diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:25:22 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:25:22 +0000 |
commit | f6ad4dcef54c5ce997a4bad5a6d86de229015700 (patch) | |
tree | 7cfa4e31ace5c2bd95c72b154d15af494b2bcbef /src/cmd/internal/obj/x86 | |
parent | Initial commit. (diff) | |
download | golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.tar.xz golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.zip |
Adding upstream version 1.22.1.upstream/1.22.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/cmd/internal/obj/x86')
-rw-r--r-- | src/cmd/internal/obj/x86/a.out.go | 426 | ||||
-rw-r--r-- | src/cmd/internal/obj/x86/aenum.go | 1610 | ||||
-rw-r--r-- | src/cmd/internal/obj/x86/anames.go | 1608 | ||||
-rw-r--r-- | src/cmd/internal/obj/x86/asm6.go | 5473 | ||||
-rw-r--r-- | src/cmd/internal/obj/x86/asm_test.go | 342 | ||||
-rw-r--r-- | src/cmd/internal/obj/x86/avx_optabs.go | 4628 | ||||
-rw-r--r-- | src/cmd/internal/obj/x86/evex.go | 383 | ||||
-rw-r--r-- | src/cmd/internal/obj/x86/list6.go | 264 | ||||
-rw-r--r-- | src/cmd/internal/obj/x86/obj6.go | 1546 | ||||
-rw-r--r-- | src/cmd/internal/obj/x86/obj6_test.go | 167 | ||||
-rw-r--r-- | src/cmd/internal/obj/x86/pcrelative_test.go | 105 | ||||
-rw-r--r-- | src/cmd/internal/obj/x86/seh.go | 165 | ||||
-rw-r--r-- | src/cmd/internal/obj/x86/ytab.go | 44 |
13 files changed, 16761 insertions, 0 deletions
diff --git a/src/cmd/internal/obj/x86/a.out.go b/src/cmd/internal/obj/x86/a.out.go new file mode 100644 index 0000000..b121f6d --- /dev/null +++ b/src/cmd/internal/obj/x86/a.out.go @@ -0,0 +1,426 @@ +// Inferno utils/6c/6.out.h +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6c/6.out.h +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package x86 + +import "cmd/internal/obj" + +const ( + REG_NONE = 0 +) + +const ( + REG_AL = obj.RBaseAMD64 + iota + REG_CL + REG_DL + REG_BL + REG_SPB + REG_BPB + REG_SIB + REG_DIB + REG_R8B + REG_R9B + REG_R10B + REG_R11B + REG_R12B + REG_R13B + REG_R14B + REG_R15B + + REG_AX + REG_CX + REG_DX + REG_BX + REG_SP + REG_BP + REG_SI + REG_DI + REG_R8 + REG_R9 + REG_R10 + REG_R11 + REG_R12 + REG_R13 + REG_R14 + REG_R15 + + REG_AH + REG_CH + REG_DH + REG_BH + + REG_F0 + REG_F1 + REG_F2 + REG_F3 + REG_F4 + REG_F5 + REG_F6 + REG_F7 + + REG_M0 + REG_M1 + REG_M2 + REG_M3 + REG_M4 + REG_M5 + REG_M6 + REG_M7 + + REG_K0 + REG_K1 + REG_K2 + REG_K3 + REG_K4 + REG_K5 + REG_K6 + REG_K7 + + REG_X0 + REG_X1 + REG_X2 + REG_X3 + REG_X4 + REG_X5 + REG_X6 + REG_X7 + REG_X8 + REG_X9 + REG_X10 + REG_X11 + REG_X12 + REG_X13 + REG_X14 + REG_X15 + REG_X16 + REG_X17 + REG_X18 + REG_X19 + REG_X20 + REG_X21 + REG_X22 + REG_X23 + REG_X24 + REG_X25 + REG_X26 + REG_X27 + REG_X28 + REG_X29 + REG_X30 + REG_X31 + + REG_Y0 + REG_Y1 + REG_Y2 + REG_Y3 + REG_Y4 + REG_Y5 + REG_Y6 + REG_Y7 + REG_Y8 + REG_Y9 + REG_Y10 + REG_Y11 + REG_Y12 + REG_Y13 + REG_Y14 + REG_Y15 + REG_Y16 + REG_Y17 + REG_Y18 + REG_Y19 + REG_Y20 + REG_Y21 + REG_Y22 + REG_Y23 + REG_Y24 + REG_Y25 + REG_Y26 + REG_Y27 + REG_Y28 + REG_Y29 + REG_Y30 + REG_Y31 + + REG_Z0 + REG_Z1 + REG_Z2 + REG_Z3 + REG_Z4 + REG_Z5 + REG_Z6 + REG_Z7 + REG_Z8 + REG_Z9 + REG_Z10 + REG_Z11 + REG_Z12 + REG_Z13 + REG_Z14 + REG_Z15 + REG_Z16 + REG_Z17 + REG_Z18 + REG_Z19 + REG_Z20 + REG_Z21 + REG_Z22 + REG_Z23 + REG_Z24 + REG_Z25 + REG_Z26 + REG_Z27 + REG_Z28 + REG_Z29 + REG_Z30 + REG_Z31 + + REG_CS + REG_SS + REG_DS + REG_ES + REG_FS + REG_GS + + REG_GDTR // global descriptor table register + REG_IDTR // interrupt descriptor table register + REG_LDTR // local descriptor table register + REG_MSW // machine status word + REG_TASK // task register + + REG_CR0 + REG_CR1 + REG_CR2 + REG_CR3 + REG_CR4 + REG_CR5 + REG_CR6 + REG_CR7 + REG_CR8 + REG_CR9 + REG_CR10 + REG_CR11 + REG_CR12 + REG_CR13 + REG_CR14 + REG_CR15 + + REG_DR0 + REG_DR1 + REG_DR2 + REG_DR3 + REG_DR4 + REG_DR5 + REG_DR6 + REG_DR7 + + REG_TR0 + REG_TR1 + REG_TR2 + REG_TR3 + REG_TR4 + REG_TR5 + REG_TR6 + REG_TR7 + + REG_TLS + + MAXREG + + REG_CR = REG_CR0 + REG_DR = REG_DR0 + REG_TR = REG_TR0 + + REGARG = -1 + REGRET = REG_AX + FREGRET = REG_X0 + REGSP = REG_SP + REGCTXT = REG_DX + REGENTRYTMP0 = REG_R12 // scratch register available at function entry in ABIInternal + REGENTRYTMP1 = REG_R13 // scratch register available at function entry in ABIInternal + REGG = REG_R14 // g register in ABIInternal + REGEXT = REG_R15 // compiler allocates external registers R15 down + FREGMIN = REG_X0 + 5 // first register variable + FREGEXT = REG_X0 + 15 // first external register + T_TYPE = 1 << 0 + T_INDEX = 1 << 1 + T_OFFSET = 1 << 2 + T_FCONST = 1 << 3 + T_SYM = 1 << 4 + T_SCONST = 1 << 5 + T_64 = 1 << 6 + T_GOTYPE = 1 << 7 +) + +// https://www.uclibc.org/docs/psABI-x86_64.pdf, figure 3.36 +var AMD64DWARFRegisters = map[int16]int16{ + REG_AX: 0, + REG_DX: 1, + REG_CX: 2, + REG_BX: 3, + REG_SI: 4, + REG_DI: 5, + REG_BP: 6, + REG_SP: 7, + REG_R8: 8, + REG_R9: 9, + REG_R10: 10, + REG_R11: 11, + REG_R12: 12, + REG_R13: 13, + REG_R14: 14, + REG_R15: 15, + // 16 is "Return Address RA", whatever that is. + // 17-24 vector registers (X/Y/Z). + REG_X0: 17, + REG_X1: 18, + REG_X2: 19, + REG_X3: 20, + REG_X4: 21, + REG_X5: 22, + REG_X6: 23, + REG_X7: 24, + // 25-32 extended vector registers (X/Y/Z). + REG_X8: 25, + REG_X9: 26, + REG_X10: 27, + REG_X11: 28, + REG_X12: 29, + REG_X13: 30, + REG_X14: 31, + REG_X15: 32, + // ST registers. %stN => FN. + REG_F0: 33, + REG_F1: 34, + REG_F2: 35, + REG_F3: 36, + REG_F4: 37, + REG_F5: 38, + REG_F6: 39, + REG_F7: 40, + // MMX registers. %mmN => MN. + REG_M0: 41, + REG_M1: 42, + REG_M2: 43, + REG_M3: 44, + REG_M4: 45, + REG_M5: 46, + REG_M6: 47, + REG_M7: 48, + // 48 is flags, which doesn't have a name. + REG_ES: 50, + REG_CS: 51, + REG_SS: 52, + REG_DS: 53, + REG_FS: 54, + REG_GS: 55, + // 58 and 59 are {fs,gs}base, which don't have names. + REG_TR: 62, + REG_LDTR: 63, + // 64-66 are mxcsr, fcw, fsw, which don't have names. + + // 67-82 upper vector registers (X/Y/Z). + REG_X16: 67, + REG_X17: 68, + REG_X18: 69, + REG_X19: 70, + REG_X20: 71, + REG_X21: 72, + REG_X22: 73, + REG_X23: 74, + REG_X24: 75, + REG_X25: 76, + REG_X26: 77, + REG_X27: 78, + REG_X28: 79, + REG_X29: 80, + REG_X30: 81, + REG_X31: 82, + + // 118-125 vector mask registers. %kN => KN. + REG_K0: 118, + REG_K1: 119, + REG_K2: 120, + REG_K3: 121, + REG_K4: 122, + REG_K5: 123, + REG_K6: 124, + REG_K7: 125, +} + +// https://www.uclibc.org/docs/psABI-i386.pdf, table 2.14 +var X86DWARFRegisters = map[int16]int16{ + REG_AX: 0, + REG_CX: 1, + REG_DX: 2, + REG_BX: 3, + REG_SP: 4, + REG_BP: 5, + REG_SI: 6, + REG_DI: 7, + // 8 is "Return Address RA", whatever that is. + // 9 is flags, which doesn't have a name. + // ST registers. %stN => FN. + REG_F0: 11, + REG_F1: 12, + REG_F2: 13, + REG_F3: 14, + REG_F4: 15, + REG_F5: 16, + REG_F6: 17, + REG_F7: 18, + // XMM registers. %xmmN => XN. + REG_X0: 21, + REG_X1: 22, + REG_X2: 23, + REG_X3: 24, + REG_X4: 25, + REG_X5: 26, + REG_X6: 27, + REG_X7: 28, + // MMX registers. %mmN => MN. + REG_M0: 29, + REG_M1: 30, + REG_M2: 31, + REG_M3: 32, + REG_M4: 33, + REG_M5: 34, + REG_M6: 35, + REG_M7: 36, + // 39 is mxcsr, which doesn't have a name. + REG_ES: 40, + REG_CS: 41, + REG_SS: 42, + REG_DS: 43, + REG_FS: 44, + REG_GS: 45, + REG_TR: 48, + REG_LDTR: 49, +} diff --git a/src/cmd/internal/obj/x86/aenum.go b/src/cmd/internal/obj/x86/aenum.go new file mode 100644 index 0000000..79cdd24 --- /dev/null +++ b/src/cmd/internal/obj/x86/aenum.go @@ -0,0 +1,1610 @@ +// Code generated by x86avxgen. DO NOT EDIT. + +package x86 + +import "cmd/internal/obj" + +//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p x86 + +const ( + AAAA = obj.ABaseAMD64 + obj.A_ARCHSPECIFIC + iota + AAAD + AAAM + AAAS + AADCB + AADCL + AADCQ + AADCW + AADCXL + AADCXQ + AADDB + AADDL + AADDPD + AADDPS + AADDQ + AADDSD + AADDSS + AADDSUBPD + AADDSUBPS + AADDW + AADJSP + AADOXL + AADOXQ + AAESDEC + AAESDECLAST + AAESENC + AAESENCLAST + AAESIMC + AAESKEYGENASSIST + AANDB + AANDL + AANDNL + AANDNPD + AANDNPS + AANDNQ + AANDPD + AANDPS + AANDQ + AANDW + AARPL + ABEXTRL + ABEXTRQ + ABLENDPD + ABLENDPS + ABLENDVPD + ABLENDVPS + ABLSIL + ABLSIQ + ABLSMSKL + ABLSMSKQ + ABLSRL + ABLSRQ + ABOUNDL + ABOUNDW + ABSFL + ABSFQ + ABSFW + ABSRL + ABSRQ + ABSRW + ABSWAPL + ABSWAPQ + ABTCL + ABTCQ + ABTCW + ABTL + ABTQ + ABTRL + ABTRQ + ABTRW + ABTSL + ABTSQ + ABTSW + ABTW + ABYTE + ABZHIL + ABZHIQ + ACBW + ACDQ + ACDQE + ACLAC + ACLC + ACLD + ACLDEMOTE + ACLFLUSH + ACLFLUSHOPT + ACLI + ACLTS + ACLWB + ACMC + ACMOVLCC + ACMOVLCS + ACMOVLEQ + ACMOVLGE + ACMOVLGT + ACMOVLHI + ACMOVLLE + ACMOVLLS + ACMOVLLT + ACMOVLMI + ACMOVLNE + ACMOVLOC + ACMOVLOS + ACMOVLPC + ACMOVLPL + ACMOVLPS + ACMOVQCC + ACMOVQCS + ACMOVQEQ + ACMOVQGE + ACMOVQGT + ACMOVQHI + ACMOVQLE + ACMOVQLS + ACMOVQLT + ACMOVQMI + ACMOVQNE + ACMOVQOC + ACMOVQOS + ACMOVQPC + ACMOVQPL + ACMOVQPS + ACMOVWCC + ACMOVWCS + ACMOVWEQ + ACMOVWGE + ACMOVWGT + ACMOVWHI + ACMOVWLE + ACMOVWLS + ACMOVWLT + ACMOVWMI + ACMOVWNE + ACMOVWOC + ACMOVWOS + ACMOVWPC + ACMOVWPL + ACMOVWPS + ACMPB + ACMPL + ACMPPD + ACMPPS + ACMPQ + ACMPSB + ACMPSD + ACMPSL + ACMPSQ + ACMPSS + ACMPSW + ACMPW + ACMPXCHG16B + ACMPXCHG8B + ACMPXCHGB + ACMPXCHGL + ACMPXCHGQ + ACMPXCHGW + ACOMISD + ACOMISS + ACPUID + ACQO + ACRC32B + ACRC32L + ACRC32Q + ACRC32W + ACVTPD2PL + ACVTPD2PS + ACVTPL2PD + ACVTPL2PS + ACVTPS2PD + ACVTPS2PL + ACVTSD2SL + ACVTSD2SQ + ACVTSD2SS + ACVTSL2SD + ACVTSL2SS + ACVTSQ2SD + ACVTSQ2SS + ACVTSS2SD + ACVTSS2SL + ACVTSS2SQ + ACVTTPD2PL + ACVTTPS2PL + ACVTTSD2SL + ACVTTSD2SQ + ACVTTSS2SL + ACVTTSS2SQ + ACWD + ACWDE + ADAA + ADAS + ADECB + ADECL + ADECQ + ADECW + ADIVB + ADIVL + ADIVPD + ADIVPS + ADIVQ + ADIVSD + ADIVSS + ADIVW + ADPPD + ADPPS + AEMMS + AENTER + AEXTRACTPS + AF2XM1 + AFABS + AFADDD + AFADDDP + AFADDF + AFADDL + AFADDW + AFBLD + AFBSTP + AFCHS + AFCLEX + AFCMOVB + AFCMOVBE + AFCMOVCC + AFCMOVCS + AFCMOVE + AFCMOVEQ + AFCMOVHI + AFCMOVLS + AFCMOVNB + AFCMOVNBE + AFCMOVNE + AFCMOVNU + AFCMOVU + AFCMOVUN + AFCOMD + AFCOMDP + AFCOMDPP + AFCOMF + AFCOMFP + AFCOMI + AFCOMIP + AFCOML + AFCOMLP + AFCOMW + AFCOMWP + AFCOS + AFDECSTP + AFDIVD + AFDIVDP + AFDIVF + AFDIVL + AFDIVRD + AFDIVRDP + AFDIVRF + AFDIVRL + AFDIVRW + AFDIVW + AFFREE + AFINCSTP + AFINIT + AFLD1 + AFLDCW + AFLDENV + AFLDL2E + AFLDL2T + AFLDLG2 + AFLDLN2 + AFLDPI + AFLDZ + AFMOVB + AFMOVBP + AFMOVD + AFMOVDP + AFMOVF + AFMOVFP + AFMOVL + AFMOVLP + AFMOVV + AFMOVVP + AFMOVW + AFMOVWP + AFMOVX + AFMOVXP + AFMULD + AFMULDP + AFMULF + AFMULL + AFMULW + AFNOP + AFPATAN + AFPREM + AFPREM1 + AFPTAN + AFRNDINT + AFRSTOR + AFSAVE + AFSCALE + AFSIN + AFSINCOS + AFSQRT + AFSTCW + AFSTENV + AFSTSW + AFSUBD + AFSUBDP + AFSUBF + AFSUBL + AFSUBRD + AFSUBRDP + AFSUBRF + AFSUBRL + AFSUBRW + AFSUBW + AFTST + AFUCOM + AFUCOMI + AFUCOMIP + AFUCOMP + AFUCOMPP + AFXAM + AFXCHD + AFXRSTOR + AFXRSTOR64 + AFXSAVE + AFXSAVE64 + AFXTRACT + AFYL2X + AFYL2XP1 + AHADDPD + AHADDPS + AHLT + AHSUBPD + AHSUBPS + AICEBP + AIDIVB + AIDIVL + AIDIVQ + AIDIVW + AIMUL3L + AIMUL3Q + AIMUL3W + AIMULB + AIMULL + AIMULQ + AIMULW + AINB + AINCB + AINCL + AINCQ + AINCW + AINL + AINSB + AINSERTPS + AINSL + AINSW + AINT + AINTO + AINVD + AINVLPG + AINVPCID + AINW + AIRETL + AIRETQ + AIRETW + AJCC // >= unsigned + AJCS // < unsigned + AJCXZL + AJCXZQ + AJCXZW + AJEQ // == (zero) + AJGE // >= signed + AJGT // > signed + AJHI // > unsigned + AJLE // <= signed + AJLS // <= unsigned + AJLT // < signed + AJMI // sign bit set (negative) + AJNE // != (nonzero) + AJOC // overflow clear + AJOS // overflow set + AJPC // parity clear + AJPL // sign bit clear (positive) + AJPS // parity set + AKADDB + AKADDD + AKADDQ + AKADDW + AKANDB + AKANDD + AKANDNB + AKANDND + AKANDNQ + AKANDNW + AKANDQ + AKANDW + AKMOVB + AKMOVD + AKMOVQ + AKMOVW + AKNOTB + AKNOTD + AKNOTQ + AKNOTW + AKORB + AKORD + AKORQ + AKORTESTB + AKORTESTD + AKORTESTQ + AKORTESTW + AKORW + AKSHIFTLB + AKSHIFTLD + AKSHIFTLQ + AKSHIFTLW + AKSHIFTRB + AKSHIFTRD + AKSHIFTRQ + AKSHIFTRW + AKTESTB + AKTESTD + AKTESTQ + AKTESTW + AKUNPCKBW + AKUNPCKDQ + AKUNPCKWD + AKXNORB + AKXNORD + AKXNORQ + AKXNORW + AKXORB + AKXORD + AKXORQ + AKXORW + ALAHF + ALARL + ALARQ + ALARW + ALDDQU + ALDMXCSR + ALEAL + ALEAQ + ALEAVEL + ALEAVEQ + ALEAVEW + ALEAW + ALFENCE + ALFSL + ALFSQ + ALFSW + ALGDT + ALGSL + ALGSQ + ALGSW + ALIDT + ALLDT + ALMSW + ALOCK + ALODSB + ALODSL + ALODSQ + ALODSW + ALONG + ALOOP + ALOOPEQ + ALOOPNE + ALSLL + ALSLQ + ALSLW + ALSSL + ALSSQ + ALSSW + ALTR + ALZCNTL + ALZCNTQ + ALZCNTW + AMASKMOVOU + AMASKMOVQ + AMAXPD + AMAXPS + AMAXSD + AMAXSS + AMFENCE + AMINPD + AMINPS + AMINSD + AMINSS + AMONITOR + AMOVAPD + AMOVAPS + AMOVB + AMOVBEL + AMOVBEQ + AMOVBEW + AMOVBLSX + AMOVBLZX + AMOVBQSX + AMOVBQZX + AMOVBWSX + AMOVBWZX + AMOVDDUP + AMOVHLPS + AMOVHPD + AMOVHPS + AMOVL + AMOVLHPS + AMOVLPD + AMOVLPS + AMOVLQSX + AMOVLQZX + AMOVMSKPD + AMOVMSKPS + AMOVNTDQA + AMOVNTIL + AMOVNTIQ + AMOVNTO + AMOVNTPD + AMOVNTPS + AMOVNTQ + AMOVO + AMOVOU + AMOVQ + AMOVQL + AMOVQOZX + AMOVSB + AMOVSD + AMOVSHDUP + AMOVSL + AMOVSLDUP + AMOVSQ + AMOVSS + AMOVSW + AMOVSWW + AMOVUPD + AMOVUPS + AMOVW + AMOVWLSX + AMOVWLZX + AMOVWQSX + AMOVWQZX + AMOVZWW + AMPSADBW + AMULB + AMULL + AMULPD + AMULPS + AMULQ + AMULSD + AMULSS + AMULW + AMULXL + AMULXQ + AMWAIT + ANEGB + ANEGL + ANEGQ + ANEGW + ANOPL + ANOPW + ANOTB + ANOTL + ANOTQ + ANOTW + AORB + AORL + AORPD + AORPS + AORQ + AORW + AOUTB + AOUTL + AOUTSB + AOUTSL + AOUTSW + AOUTW + APABSB + APABSD + APABSW + APACKSSLW + APACKSSWB + APACKUSDW + APACKUSWB + APADDB + APADDL + APADDQ + APADDSB + APADDSW + APADDUSB + APADDUSW + APADDW + APALIGNR + APAND + APANDN + APAUSE + APAVGB + APAVGW + APBLENDVB + APBLENDW + APCLMULQDQ + APCMPEQB + APCMPEQL + APCMPEQQ + APCMPEQW + APCMPESTRI + APCMPESTRM + APCMPGTB + APCMPGTL + APCMPGTQ + APCMPGTW + APCMPISTRI + APCMPISTRM + APDEPL + APDEPQ + APEXTL + APEXTQ + APEXTRB + APEXTRD + APEXTRQ + APEXTRW + APHADDD + APHADDSW + APHADDW + APHMINPOSUW + APHSUBD + APHSUBSW + APHSUBW + APINSRB + APINSRD + APINSRQ + APINSRW + APMADDUBSW + APMADDWL + APMAXSB + APMAXSD + APMAXSW + APMAXUB + APMAXUD + APMAXUW + APMINSB + APMINSD + APMINSW + APMINUB + APMINUD + APMINUW + APMOVMSKB + APMOVSXBD + APMOVSXBQ + APMOVSXBW + APMOVSXDQ + APMOVSXWD + APMOVSXWQ + APMOVZXBD + APMOVZXBQ + APMOVZXBW + APMOVZXDQ + APMOVZXWD + APMOVZXWQ + APMULDQ + APMULHRSW + APMULHUW + APMULHW + APMULLD + APMULLW + APMULULQ + APOPAL + APOPAW + APOPCNTL + APOPCNTQ + APOPCNTW + APOPFL + APOPFQ + APOPFW + APOPL + APOPQ + APOPW + APOR + APREFETCHNTA + APREFETCHT0 + APREFETCHT1 + APREFETCHT2 + APSADBW + APSHUFB + APSHUFD + APSHUFHW + APSHUFL + APSHUFLW + APSHUFW + APSIGNB + APSIGND + APSIGNW + APSLLL + APSLLO + APSLLQ + APSLLW + APSRAL + APSRAW + APSRLL + APSRLO + APSRLQ + APSRLW + APSUBB + APSUBL + APSUBQ + APSUBSB + APSUBSW + APSUBUSB + APSUBUSW + APSUBW + APTEST + APUNPCKHBW + APUNPCKHLQ + APUNPCKHQDQ + APUNPCKHWL + APUNPCKLBW + APUNPCKLLQ + APUNPCKLQDQ + APUNPCKLWL + APUSHAL + APUSHAW + APUSHFL + APUSHFQ + APUSHFW + APUSHL + APUSHQ + APUSHW + APXOR + AQUAD + ARCLB + ARCLL + ARCLQ + ARCLW + ARCPPS + ARCPSS + ARCRB + ARCRL + ARCRQ + ARCRW + ARDFSBASEL + ARDFSBASEQ + ARDGSBASEL + ARDGSBASEQ + ARDMSR + ARDPID + ARDPKRU + ARDPMC + ARDRANDL + ARDRANDQ + ARDRANDW + ARDSEEDL + ARDSEEDQ + ARDSEEDW + ARDTSC + ARDTSCP + AREP + AREPN + ARETFL + ARETFQ + ARETFW + AROLB + AROLL + AROLQ + AROLW + ARORB + ARORL + ARORQ + ARORW + ARORXL + ARORXQ + AROUNDPD + AROUNDPS + AROUNDSD + AROUNDSS + ARSM + ARSQRTPS + ARSQRTSS + ASAHF + ASALB + ASALL + ASALQ + ASALW + ASARB + ASARL + ASARQ + ASARW + ASARXL + ASARXQ + ASBBB + ASBBL + ASBBQ + ASBBW + ASCASB + ASCASL + ASCASQ + ASCASW + ASETCC + ASETCS + ASETEQ + ASETGE + ASETGT + ASETHI + ASETLE + ASETLS + ASETLT + ASETMI + ASETNE + ASETOC + ASETOS + ASETPC + ASETPL + ASETPS + ASFENCE + ASGDT + ASHA1MSG1 + ASHA1MSG2 + ASHA1NEXTE + ASHA1RNDS4 + ASHA256MSG1 + ASHA256MSG2 + ASHA256RNDS2 + ASHLB + ASHLL + ASHLQ + ASHLW + ASHLXL + ASHLXQ + ASHRB + ASHRL + ASHRQ + ASHRW + ASHRXL + ASHRXQ + ASHUFPD + ASHUFPS + ASIDT + ASLDTL + ASLDTQ + ASLDTW + ASMSWL + ASMSWQ + ASMSWW + ASQRTPD + ASQRTPS + ASQRTSD + ASQRTSS + ASTAC + ASTC + ASTD + ASTI + ASTMXCSR + ASTOSB + ASTOSL + ASTOSQ + ASTOSW + ASTRL + ASTRQ + ASTRW + ASUBB + ASUBL + ASUBPD + ASUBPS + ASUBQ + ASUBSD + ASUBSS + ASUBW + ASWAPGS + ASYSCALL + ASYSENTER + ASYSENTER64 + ASYSEXIT + ASYSEXIT64 + ASYSRET + ATESTB + ATESTL + ATESTQ + ATESTW + ATPAUSE + ATZCNTL + ATZCNTQ + ATZCNTW + AUCOMISD + AUCOMISS + AUD1 + AUD2 + AUMWAIT + AUNPCKHPD + AUNPCKHPS + AUNPCKLPD + AUNPCKLPS + AUMONITOR + AV4FMADDPS + AV4FMADDSS + AV4FNMADDPS + AV4FNMADDSS + AVADDPD + AVADDPS + AVADDSD + AVADDSS + AVADDSUBPD + AVADDSUBPS + AVAESDEC + AVAESDECLAST + AVAESENC + AVAESENCLAST + AVAESIMC + AVAESKEYGENASSIST + AVALIGND + AVALIGNQ + AVANDNPD + AVANDNPS + AVANDPD + AVANDPS + AVBLENDMPD + AVBLENDMPS + AVBLENDPD + AVBLENDPS + AVBLENDVPD + AVBLENDVPS + AVBROADCASTF128 + AVBROADCASTF32X2 + AVBROADCASTF32X4 + AVBROADCASTF32X8 + AVBROADCASTF64X2 + AVBROADCASTF64X4 + AVBROADCASTI128 + AVBROADCASTI32X2 + AVBROADCASTI32X4 + AVBROADCASTI32X8 + AVBROADCASTI64X2 + AVBROADCASTI64X4 + AVBROADCASTSD + AVBROADCASTSS + AVCMPPD + AVCMPPS + AVCMPSD + AVCMPSS + AVCOMISD + AVCOMISS + AVCOMPRESSPD + AVCOMPRESSPS + AVCVTDQ2PD + AVCVTDQ2PS + AVCVTPD2DQ + AVCVTPD2DQX + AVCVTPD2DQY + AVCVTPD2PS + AVCVTPD2PSX + AVCVTPD2PSY + AVCVTPD2QQ + AVCVTPD2UDQ + AVCVTPD2UDQX + AVCVTPD2UDQY + AVCVTPD2UQQ + AVCVTPH2PS + AVCVTPS2DQ + AVCVTPS2PD + AVCVTPS2PH + AVCVTPS2QQ + AVCVTPS2UDQ + AVCVTPS2UQQ + AVCVTQQ2PD + AVCVTQQ2PS + AVCVTQQ2PSX + AVCVTQQ2PSY + AVCVTSD2SI + AVCVTSD2SIQ + AVCVTSD2SS + AVCVTSD2USI + AVCVTSD2USIL + AVCVTSD2USIQ + AVCVTSI2SDL + AVCVTSI2SDQ + AVCVTSI2SSL + AVCVTSI2SSQ + AVCVTSS2SD + AVCVTSS2SI + AVCVTSS2SIQ + AVCVTSS2USI + AVCVTSS2USIL + AVCVTSS2USIQ + AVCVTTPD2DQ + AVCVTTPD2DQX + AVCVTTPD2DQY + AVCVTTPD2QQ + AVCVTTPD2UDQ + AVCVTTPD2UDQX + AVCVTTPD2UDQY + AVCVTTPD2UQQ + AVCVTTPS2DQ + AVCVTTPS2QQ + AVCVTTPS2UDQ + AVCVTTPS2UQQ + AVCVTTSD2SI + AVCVTTSD2SIQ + AVCVTTSD2USI + AVCVTTSD2USIL + AVCVTTSD2USIQ + AVCVTTSS2SI + AVCVTTSS2SIQ + AVCVTTSS2USI + AVCVTTSS2USIL + AVCVTTSS2USIQ + AVCVTUDQ2PD + AVCVTUDQ2PS + AVCVTUQQ2PD + AVCVTUQQ2PS + AVCVTUQQ2PSX + AVCVTUQQ2PSY + AVCVTUSI2SD + AVCVTUSI2SDL + AVCVTUSI2SDQ + AVCVTUSI2SS + AVCVTUSI2SSL + AVCVTUSI2SSQ + AVDBPSADBW + AVDIVPD + AVDIVPS + AVDIVSD + AVDIVSS + AVDPPD + AVDPPS + AVERR + AVERW + AVEXP2PD + AVEXP2PS + AVEXPANDPD + AVEXPANDPS + AVEXTRACTF128 + AVEXTRACTF32X4 + AVEXTRACTF32X8 + AVEXTRACTF64X2 + AVEXTRACTF64X4 + AVEXTRACTI128 + AVEXTRACTI32X4 + AVEXTRACTI32X8 + AVEXTRACTI64X2 + AVEXTRACTI64X4 + AVEXTRACTPS + AVFIXUPIMMPD + AVFIXUPIMMPS + AVFIXUPIMMSD + AVFIXUPIMMSS + AVFMADD132PD + AVFMADD132PS + AVFMADD132SD + AVFMADD132SS + AVFMADD213PD + AVFMADD213PS + AVFMADD213SD + AVFMADD213SS + AVFMADD231PD + AVFMADD231PS + AVFMADD231SD + AVFMADD231SS + AVFMADDSUB132PD + AVFMADDSUB132PS + AVFMADDSUB213PD + AVFMADDSUB213PS + AVFMADDSUB231PD + AVFMADDSUB231PS + AVFMSUB132PD + AVFMSUB132PS + AVFMSUB132SD + AVFMSUB132SS + AVFMSUB213PD + AVFMSUB213PS + AVFMSUB213SD + AVFMSUB213SS + AVFMSUB231PD + AVFMSUB231PS + AVFMSUB231SD + AVFMSUB231SS + AVFMSUBADD132PD + AVFMSUBADD132PS + AVFMSUBADD213PD + AVFMSUBADD213PS + AVFMSUBADD231PD + AVFMSUBADD231PS + AVFNMADD132PD + AVFNMADD132PS + AVFNMADD132SD + AVFNMADD132SS + AVFNMADD213PD + AVFNMADD213PS + AVFNMADD213SD + AVFNMADD213SS + AVFNMADD231PD + AVFNMADD231PS + AVFNMADD231SD + AVFNMADD231SS + AVFNMSUB132PD + AVFNMSUB132PS + AVFNMSUB132SD + AVFNMSUB132SS + AVFNMSUB213PD + AVFNMSUB213PS + AVFNMSUB213SD + AVFNMSUB213SS + AVFNMSUB231PD + AVFNMSUB231PS + AVFNMSUB231SD + AVFNMSUB231SS + AVFPCLASSPD + AVFPCLASSPDX + AVFPCLASSPDY + AVFPCLASSPDZ + AVFPCLASSPS + AVFPCLASSPSX + AVFPCLASSPSY + AVFPCLASSPSZ + AVFPCLASSSD + AVFPCLASSSS + AVGATHERDPD + AVGATHERDPS + AVGATHERPF0DPD + AVGATHERPF0DPS + AVGATHERPF0QPD + AVGATHERPF0QPS + AVGATHERPF1DPD + AVGATHERPF1DPS + AVGATHERPF1QPD + AVGATHERPF1QPS + AVGATHERQPD + AVGATHERQPS + AVGETEXPPD + AVGETEXPPS + AVGETEXPSD + AVGETEXPSS + AVGETMANTPD + AVGETMANTPS + AVGETMANTSD + AVGETMANTSS + AVGF2P8AFFINEINVQB + AVGF2P8AFFINEQB + AVGF2P8MULB + AVHADDPD + AVHADDPS + AVHSUBPD + AVHSUBPS + AVINSERTF128 + AVINSERTF32X4 + AVINSERTF32X8 + AVINSERTF64X2 + AVINSERTF64X4 + AVINSERTI128 + AVINSERTI32X4 + AVINSERTI32X8 + AVINSERTI64X2 + AVINSERTI64X4 + AVINSERTPS + AVLDDQU + AVLDMXCSR + AVMASKMOVDQU + AVMASKMOVPD + AVMASKMOVPS + AVMAXPD + AVMAXPS + AVMAXSD + AVMAXSS + AVMINPD + AVMINPS + AVMINSD + AVMINSS + AVMOVAPD + AVMOVAPS + AVMOVD + AVMOVDDUP + AVMOVDQA + AVMOVDQA32 + AVMOVDQA64 + AVMOVDQU + AVMOVDQU16 + AVMOVDQU32 + AVMOVDQU64 + AVMOVDQU8 + AVMOVHLPS + AVMOVHPD + AVMOVHPS + AVMOVLHPS + AVMOVLPD + AVMOVLPS + AVMOVMSKPD + AVMOVMSKPS + AVMOVNTDQ + AVMOVNTDQA + AVMOVNTPD + AVMOVNTPS + AVMOVQ + AVMOVSD + AVMOVSHDUP + AVMOVSLDUP + AVMOVSS + AVMOVUPD + AVMOVUPS + AVMPSADBW + AVMULPD + AVMULPS + AVMULSD + AVMULSS + AVORPD + AVORPS + AVP4DPWSSD + AVP4DPWSSDS + AVPABSB + AVPABSD + AVPABSQ + AVPABSW + AVPACKSSDW + AVPACKSSWB + AVPACKUSDW + AVPACKUSWB + AVPADDB + AVPADDD + AVPADDQ + AVPADDSB + AVPADDSW + AVPADDUSB + AVPADDUSW + AVPADDW + AVPALIGNR + AVPAND + AVPANDD + AVPANDN + AVPANDND + AVPANDNQ + AVPANDQ + AVPAVGB + AVPAVGW + AVPBLENDD + AVPBLENDMB + AVPBLENDMD + AVPBLENDMQ + AVPBLENDMW + AVPBLENDVB + AVPBLENDW + AVPBROADCASTB + AVPBROADCASTD + AVPBROADCASTMB2Q + AVPBROADCASTMW2D + AVPBROADCASTQ + AVPBROADCASTW + AVPCLMULQDQ + AVPCMPB + AVPCMPD + AVPCMPEQB + AVPCMPEQD + AVPCMPEQQ + AVPCMPEQW + AVPCMPESTRI + AVPCMPESTRM + AVPCMPGTB + AVPCMPGTD + AVPCMPGTQ + AVPCMPGTW + AVPCMPISTRI + AVPCMPISTRM + AVPCMPQ + AVPCMPUB + AVPCMPUD + AVPCMPUQ + AVPCMPUW + AVPCMPW + AVPCOMPRESSB + AVPCOMPRESSD + AVPCOMPRESSQ + AVPCOMPRESSW + AVPCONFLICTD + AVPCONFLICTQ + AVPDPBUSD + AVPDPBUSDS + AVPDPWSSD + AVPDPWSSDS + AVPERM2F128 + AVPERM2I128 + AVPERMB + AVPERMD + AVPERMI2B + AVPERMI2D + AVPERMI2PD + AVPERMI2PS + AVPERMI2Q + AVPERMI2W + AVPERMILPD + AVPERMILPS + AVPERMPD + AVPERMPS + AVPERMQ + AVPERMT2B + AVPERMT2D + AVPERMT2PD + AVPERMT2PS + AVPERMT2Q + AVPERMT2W + AVPERMW + AVPEXPANDB + AVPEXPANDD + AVPEXPANDQ + AVPEXPANDW + AVPEXTRB + AVPEXTRD + AVPEXTRQ + AVPEXTRW + AVPGATHERDD + AVPGATHERDQ + AVPGATHERQD + AVPGATHERQQ + AVPHADDD + AVPHADDSW + AVPHADDW + AVPHMINPOSUW + AVPHSUBD + AVPHSUBSW + AVPHSUBW + AVPINSRB + AVPINSRD + AVPINSRQ + AVPINSRW + AVPLZCNTD + AVPLZCNTQ + AVPMADD52HUQ + AVPMADD52LUQ + AVPMADDUBSW + AVPMADDWD + AVPMASKMOVD + AVPMASKMOVQ + AVPMAXSB + AVPMAXSD + AVPMAXSQ + AVPMAXSW + AVPMAXUB + AVPMAXUD + AVPMAXUQ + AVPMAXUW + AVPMINSB + AVPMINSD + AVPMINSQ + AVPMINSW + AVPMINUB + AVPMINUD + AVPMINUQ + AVPMINUW + AVPMOVB2M + AVPMOVD2M + AVPMOVDB + AVPMOVDW + AVPMOVM2B + AVPMOVM2D + AVPMOVM2Q + AVPMOVM2W + AVPMOVMSKB + AVPMOVQ2M + AVPMOVQB + AVPMOVQD + AVPMOVQW + AVPMOVSDB + AVPMOVSDW + AVPMOVSQB + AVPMOVSQD + AVPMOVSQW + AVPMOVSWB + AVPMOVSXBD + AVPMOVSXBQ + AVPMOVSXBW + AVPMOVSXDQ + AVPMOVSXWD + AVPMOVSXWQ + AVPMOVUSDB + AVPMOVUSDW + AVPMOVUSQB + AVPMOVUSQD + AVPMOVUSQW + AVPMOVUSWB + AVPMOVW2M + AVPMOVWB + AVPMOVZXBD + AVPMOVZXBQ + AVPMOVZXBW + AVPMOVZXDQ + AVPMOVZXWD + AVPMOVZXWQ + AVPMULDQ + AVPMULHRSW + AVPMULHUW + AVPMULHW + AVPMULLD + AVPMULLQ + AVPMULLW + AVPMULTISHIFTQB + AVPMULUDQ + AVPOPCNTB + AVPOPCNTD + AVPOPCNTQ + AVPOPCNTW + AVPOR + AVPORD + AVPORQ + AVPROLD + AVPROLQ + AVPROLVD + AVPROLVQ + AVPRORD + AVPRORQ + AVPRORVD + AVPRORVQ + AVPSADBW + AVPSCATTERDD + AVPSCATTERDQ + AVPSCATTERQD + AVPSCATTERQQ + AVPSHLDD + AVPSHLDQ + AVPSHLDVD + AVPSHLDVQ + AVPSHLDVW + AVPSHLDW + AVPSHRDD + AVPSHRDQ + AVPSHRDVD + AVPSHRDVQ + AVPSHRDVW + AVPSHRDW + AVPSHUFB + AVPSHUFBITQMB + AVPSHUFD + AVPSHUFHW + AVPSHUFLW + AVPSIGNB + AVPSIGND + AVPSIGNW + AVPSLLD + AVPSLLDQ + AVPSLLQ + AVPSLLVD + AVPSLLVQ + AVPSLLVW + AVPSLLW + AVPSRAD + AVPSRAQ + AVPSRAVD + AVPSRAVQ + AVPSRAVW + AVPSRAW + AVPSRLD + AVPSRLDQ + AVPSRLQ + AVPSRLVD + AVPSRLVQ + AVPSRLVW + AVPSRLW + AVPSUBB + AVPSUBD + AVPSUBQ + AVPSUBSB + AVPSUBSW + AVPSUBUSB + AVPSUBUSW + AVPSUBW + AVPTERNLOGD + AVPTERNLOGQ + AVPTEST + AVPTESTMB + AVPTESTMD + AVPTESTMQ + AVPTESTMW + AVPTESTNMB + AVPTESTNMD + AVPTESTNMQ + AVPTESTNMW + AVPUNPCKHBW + AVPUNPCKHDQ + AVPUNPCKHQDQ + AVPUNPCKHWD + AVPUNPCKLBW + AVPUNPCKLDQ + AVPUNPCKLQDQ + AVPUNPCKLWD + AVPXOR + AVPXORD + AVPXORQ + AVRANGEPD + AVRANGEPS + AVRANGESD + AVRANGESS + AVRCP14PD + AVRCP14PS + AVRCP14SD + AVRCP14SS + AVRCP28PD + AVRCP28PS + AVRCP28SD + AVRCP28SS + AVRCPPS + AVRCPSS + AVREDUCEPD + AVREDUCEPS + AVREDUCESD + AVREDUCESS + AVRNDSCALEPD + AVRNDSCALEPS + AVRNDSCALESD + AVRNDSCALESS + AVROUNDPD + AVROUNDPS + AVROUNDSD + AVROUNDSS + AVRSQRT14PD + AVRSQRT14PS + AVRSQRT14SD + AVRSQRT14SS + AVRSQRT28PD + AVRSQRT28PS + AVRSQRT28SD + AVRSQRT28SS + AVRSQRTPS + AVRSQRTSS + AVSCALEFPD + AVSCALEFPS + AVSCALEFSD + AVSCALEFSS + AVSCATTERDPD + AVSCATTERDPS + AVSCATTERPF0DPD + AVSCATTERPF0DPS + AVSCATTERPF0QPD + AVSCATTERPF0QPS + AVSCATTERPF1DPD + AVSCATTERPF1DPS + AVSCATTERPF1QPD + AVSCATTERPF1QPS + AVSCATTERQPD + AVSCATTERQPS + AVSHUFF32X4 + AVSHUFF64X2 + AVSHUFI32X4 + AVSHUFI64X2 + AVSHUFPD + AVSHUFPS + AVSQRTPD + AVSQRTPS + AVSQRTSD + AVSQRTSS + AVSTMXCSR + AVSUBPD + AVSUBPS + AVSUBSD + AVSUBSS + AVTESTPD + AVTESTPS + AVUCOMISD + AVUCOMISS + AVUNPCKHPD + AVUNPCKHPS + AVUNPCKLPD + AVUNPCKLPS + AVXORPD + AVXORPS + AVZEROALL + AVZEROUPPER + AWAIT + AWBINVD + AWORD + AWRFSBASEL + AWRFSBASEQ + AWRGSBASEL + AWRGSBASEQ + AWRMSR + AWRPKRU + AXABORT + AXACQUIRE + AXADDB + AXADDL + AXADDQ + AXADDW + AXBEGIN + AXCHGB + AXCHGL + AXCHGQ + AXCHGW + AXEND + AXGETBV + AXLAT + AXORB + AXORL + AXORPD + AXORPS + AXORQ + AXORW + AXRELEASE + AXRSTOR + AXRSTOR64 + AXRSTORS + AXRSTORS64 + AXSAVE + AXSAVE64 + AXSAVEC + AXSAVEC64 + AXSAVEOPT + AXSAVEOPT64 + AXSAVES + AXSAVES64 + AXSETBV + AXTEST + ALAST +) diff --git a/src/cmd/internal/obj/x86/anames.go b/src/cmd/internal/obj/x86/anames.go new file mode 100644 index 0000000..3966381 --- /dev/null +++ b/src/cmd/internal/obj/x86/anames.go @@ -0,0 +1,1608 @@ +// Code generated by stringer -i aenum.go -o anames.go -p x86; DO NOT EDIT. + +package x86 + +import "cmd/internal/obj" + +var Anames = []string{ + obj.A_ARCHSPECIFIC: "AAA", + "AAD", + "AAM", + "AAS", + "ADCB", + "ADCL", + "ADCQ", + "ADCW", + "ADCXL", + "ADCXQ", + "ADDB", + "ADDL", + "ADDPD", + "ADDPS", + "ADDQ", + "ADDSD", + "ADDSS", + "ADDSUBPD", + "ADDSUBPS", + "ADDW", + "ADJSP", + "ADOXL", + "ADOXQ", + "AESDEC", + "AESDECLAST", + "AESENC", + "AESENCLAST", + "AESIMC", + "AESKEYGENASSIST", + "ANDB", + "ANDL", + "ANDNL", + "ANDNPD", + "ANDNPS", + "ANDNQ", + "ANDPD", + "ANDPS", + "ANDQ", + "ANDW", + "ARPL", + "BEXTRL", + "BEXTRQ", + "BLENDPD", + "BLENDPS", + "BLENDVPD", + "BLENDVPS", + "BLSIL", + "BLSIQ", + "BLSMSKL", + "BLSMSKQ", + "BLSRL", + "BLSRQ", + "BOUNDL", + "BOUNDW", + "BSFL", + "BSFQ", + "BSFW", + "BSRL", + "BSRQ", + "BSRW", + "BSWAPL", + "BSWAPQ", + "BTCL", + "BTCQ", + "BTCW", + "BTL", + "BTQ", + "BTRL", + "BTRQ", + "BTRW", + "BTSL", + "BTSQ", + "BTSW", + "BTW", + "BYTE", + "BZHIL", + "BZHIQ", + "CBW", + "CDQ", + "CDQE", + "CLAC", + "CLC", + "CLD", + "CLDEMOTE", + "CLFLUSH", + "CLFLUSHOPT", + "CLI", + "CLTS", + "CLWB", + "CMC", + "CMOVLCC", + "CMOVLCS", + "CMOVLEQ", + "CMOVLGE", + "CMOVLGT", + "CMOVLHI", + "CMOVLLE", + "CMOVLLS", + "CMOVLLT", + "CMOVLMI", + "CMOVLNE", + "CMOVLOC", + "CMOVLOS", + "CMOVLPC", + "CMOVLPL", + "CMOVLPS", + "CMOVQCC", + "CMOVQCS", + "CMOVQEQ", + "CMOVQGE", + "CMOVQGT", + "CMOVQHI", + "CMOVQLE", + "CMOVQLS", + "CMOVQLT", + "CMOVQMI", + "CMOVQNE", + "CMOVQOC", + "CMOVQOS", + "CMOVQPC", + "CMOVQPL", + "CMOVQPS", + "CMOVWCC", + "CMOVWCS", + "CMOVWEQ", + "CMOVWGE", + "CMOVWGT", + "CMOVWHI", + "CMOVWLE", + "CMOVWLS", + "CMOVWLT", + "CMOVWMI", + "CMOVWNE", + "CMOVWOC", + "CMOVWOS", + "CMOVWPC", + "CMOVWPL", + "CMOVWPS", + "CMPB", + "CMPL", + "CMPPD", + "CMPPS", + "CMPQ", + "CMPSB", + "CMPSD", + "CMPSL", + "CMPSQ", + "CMPSS", + "CMPSW", + "CMPW", + "CMPXCHG16B", + "CMPXCHG8B", + "CMPXCHGB", + "CMPXCHGL", + "CMPXCHGQ", + "CMPXCHGW", + "COMISD", + "COMISS", + "CPUID", + "CQO", + "CRC32B", + "CRC32L", + "CRC32Q", + "CRC32W", + "CVTPD2PL", + "CVTPD2PS", + "CVTPL2PD", + "CVTPL2PS", + "CVTPS2PD", + "CVTPS2PL", + "CVTSD2SL", + "CVTSD2SQ", + "CVTSD2SS", + "CVTSL2SD", + "CVTSL2SS", + "CVTSQ2SD", + "CVTSQ2SS", + "CVTSS2SD", + "CVTSS2SL", + "CVTSS2SQ", + "CVTTPD2PL", + "CVTTPS2PL", + "CVTTSD2SL", + "CVTTSD2SQ", + "CVTTSS2SL", + "CVTTSS2SQ", + "CWD", + "CWDE", + "DAA", + "DAS", + "DECB", + "DECL", + "DECQ", + "DECW", + "DIVB", + "DIVL", + "DIVPD", + "DIVPS", + "DIVQ", + "DIVSD", + "DIVSS", + "DIVW", + "DPPD", + "DPPS", + "EMMS", + "ENTER", + "EXTRACTPS", + "F2XM1", + "FABS", + "FADDD", + "FADDDP", + "FADDF", + "FADDL", + "FADDW", + "FBLD", + "FBSTP", + "FCHS", + "FCLEX", + "FCMOVB", + "FCMOVBE", + "FCMOVCC", + "FCMOVCS", + "FCMOVE", + "FCMOVEQ", + "FCMOVHI", + "FCMOVLS", + "FCMOVNB", + "FCMOVNBE", + "FCMOVNE", + "FCMOVNU", + "FCMOVU", + "FCMOVUN", + "FCOMD", + "FCOMDP", + "FCOMDPP", + "FCOMF", + "FCOMFP", + "FCOMI", + "FCOMIP", + "FCOML", + "FCOMLP", + "FCOMW", + "FCOMWP", + "FCOS", + "FDECSTP", + "FDIVD", + "FDIVDP", + "FDIVF", + "FDIVL", + "FDIVRD", + "FDIVRDP", + "FDIVRF", + "FDIVRL", + "FDIVRW", + "FDIVW", + "FFREE", + "FINCSTP", + "FINIT", + "FLD1", + "FLDCW", + "FLDENV", + "FLDL2E", + "FLDL2T", + "FLDLG2", + "FLDLN2", + "FLDPI", + "FLDZ", + "FMOVB", + "FMOVBP", + "FMOVD", + "FMOVDP", + "FMOVF", + "FMOVFP", + "FMOVL", + "FMOVLP", + "FMOVV", + "FMOVVP", + "FMOVW", + "FMOVWP", + "FMOVX", + "FMOVXP", + "FMULD", + "FMULDP", + "FMULF", + "FMULL", + "FMULW", + "FNOP", + "FPATAN", + "FPREM", + "FPREM1", + "FPTAN", + "FRNDINT", + "FRSTOR", + "FSAVE", + "FSCALE", + "FSIN", + "FSINCOS", + "FSQRT", + "FSTCW", + "FSTENV", + "FSTSW", + "FSUBD", + "FSUBDP", + "FSUBF", + "FSUBL", + "FSUBRD", + "FSUBRDP", + "FSUBRF", + "FSUBRL", + "FSUBRW", + "FSUBW", + "FTST", + "FUCOM", + "FUCOMI", + "FUCOMIP", + "FUCOMP", + "FUCOMPP", + "FXAM", + "FXCHD", + "FXRSTOR", + "FXRSTOR64", + "FXSAVE", + "FXSAVE64", + "FXTRACT", + "FYL2X", + "FYL2XP1", + "HADDPD", + "HADDPS", + "HLT", + "HSUBPD", + "HSUBPS", + "ICEBP", + "IDIVB", + "IDIVL", + "IDIVQ", + "IDIVW", + "IMUL3L", + "IMUL3Q", + "IMUL3W", + "IMULB", + "IMULL", + "IMULQ", + "IMULW", + "INB", + "INCB", + "INCL", + "INCQ", + "INCW", + "INL", + "INSB", + "INSERTPS", + "INSL", + "INSW", + "INT", + "INTO", + "INVD", + "INVLPG", + "INVPCID", + "INW", + "IRETL", + "IRETQ", + "IRETW", + "JCC", + "JCS", + "JCXZL", + "JCXZQ", + "JCXZW", + "JEQ", + "JGE", + "JGT", + "JHI", + "JLE", + "JLS", + "JLT", + "JMI", + "JNE", + "JOC", + "JOS", + "JPC", + "JPL", + "JPS", + "KADDB", + "KADDD", + "KADDQ", + "KADDW", + "KANDB", + "KANDD", + "KANDNB", + "KANDND", + "KANDNQ", + "KANDNW", + "KANDQ", + "KANDW", + "KMOVB", + "KMOVD", + "KMOVQ", + "KMOVW", + "KNOTB", + "KNOTD", + "KNOTQ", + "KNOTW", + "KORB", + "KORD", + "KORQ", + "KORTESTB", + "KORTESTD", + "KORTESTQ", + "KORTESTW", + "KORW", + "KSHIFTLB", + "KSHIFTLD", + "KSHIFTLQ", + "KSHIFTLW", + "KSHIFTRB", + "KSHIFTRD", + "KSHIFTRQ", + "KSHIFTRW", + "KTESTB", + "KTESTD", + "KTESTQ", + "KTESTW", + "KUNPCKBW", + "KUNPCKDQ", + "KUNPCKWD", + "KXNORB", + "KXNORD", + "KXNORQ", + "KXNORW", + "KXORB", + "KXORD", + "KXORQ", + "KXORW", + "LAHF", + "LARL", + "LARQ", + "LARW", + "LDDQU", + "LDMXCSR", + "LEAL", + "LEAQ", + "LEAVEL", + "LEAVEQ", + "LEAVEW", + "LEAW", + "LFENCE", + "LFSL", + "LFSQ", + "LFSW", + "LGDT", + "LGSL", + "LGSQ", + "LGSW", + "LIDT", + "LLDT", + "LMSW", + "LOCK", + "LODSB", + "LODSL", + "LODSQ", + "LODSW", + "LONG", + "LOOP", + "LOOPEQ", + "LOOPNE", + "LSLL", + "LSLQ", + "LSLW", + "LSSL", + "LSSQ", + "LSSW", + "LTR", + "LZCNTL", + "LZCNTQ", + "LZCNTW", + "MASKMOVOU", + "MASKMOVQ", + "MAXPD", + "MAXPS", + "MAXSD", + "MAXSS", + "MFENCE", + "MINPD", + "MINPS", + "MINSD", + "MINSS", + "MONITOR", + "MOVAPD", + "MOVAPS", + "MOVB", + "MOVBEL", + "MOVBEQ", + "MOVBEW", + "MOVBLSX", + "MOVBLZX", + "MOVBQSX", + "MOVBQZX", + "MOVBWSX", + "MOVBWZX", + "MOVDDUP", + "MOVHLPS", + "MOVHPD", + "MOVHPS", + "MOVL", + "MOVLHPS", + "MOVLPD", + "MOVLPS", + "MOVLQSX", + "MOVLQZX", + "MOVMSKPD", + "MOVMSKPS", + "MOVNTDQA", + "MOVNTIL", + "MOVNTIQ", + "MOVNTO", + "MOVNTPD", + "MOVNTPS", + "MOVNTQ", + "MOVO", + "MOVOU", + "MOVQ", + "MOVQL", + "MOVQOZX", + "MOVSB", + "MOVSD", + "MOVSHDUP", + "MOVSL", + "MOVSLDUP", + "MOVSQ", + "MOVSS", + "MOVSW", + "MOVSWW", + "MOVUPD", + "MOVUPS", + "MOVW", + "MOVWLSX", + "MOVWLZX", + "MOVWQSX", + "MOVWQZX", + "MOVZWW", + "MPSADBW", + "MULB", + "MULL", + "MULPD", + "MULPS", + "MULQ", + "MULSD", + "MULSS", + "MULW", + "MULXL", + "MULXQ", + "MWAIT", + "NEGB", + "NEGL", + "NEGQ", + "NEGW", + "NOPL", + "NOPW", + "NOTB", + "NOTL", + "NOTQ", + "NOTW", + "ORB", + "ORL", + "ORPD", + "ORPS", + "ORQ", + "ORW", + "OUTB", + "OUTL", + "OUTSB", + "OUTSL", + "OUTSW", + "OUTW", + "PABSB", + "PABSD", + "PABSW", + "PACKSSLW", + "PACKSSWB", + "PACKUSDW", + "PACKUSWB", + "PADDB", + "PADDL", + "PADDQ", + "PADDSB", + "PADDSW", + "PADDUSB", + "PADDUSW", + "PADDW", + "PALIGNR", + "PAND", + "PANDN", + "PAUSE", + "PAVGB", + "PAVGW", + "PBLENDVB", + "PBLENDW", + "PCLMULQDQ", + "PCMPEQB", + "PCMPEQL", + "PCMPEQQ", + "PCMPEQW", + "PCMPESTRI", + "PCMPESTRM", + "PCMPGTB", + "PCMPGTL", + "PCMPGTQ", + "PCMPGTW", + "PCMPISTRI", + "PCMPISTRM", + "PDEPL", + "PDEPQ", + "PEXTL", + "PEXTQ", + "PEXTRB", + "PEXTRD", + "PEXTRQ", + "PEXTRW", + "PHADDD", + "PHADDSW", + "PHADDW", + "PHMINPOSUW", + "PHSUBD", + "PHSUBSW", + "PHSUBW", + "PINSRB", + "PINSRD", + "PINSRQ", + "PINSRW", + "PMADDUBSW", + "PMADDWL", + "PMAXSB", + "PMAXSD", + "PMAXSW", + "PMAXUB", + "PMAXUD", + "PMAXUW", + "PMINSB", + "PMINSD", + "PMINSW", + "PMINUB", + "PMINUD", + "PMINUW", + "PMOVMSKB", + "PMOVSXBD", + "PMOVSXBQ", + "PMOVSXBW", + "PMOVSXDQ", + "PMOVSXWD", + "PMOVSXWQ", + "PMOVZXBD", + "PMOVZXBQ", + "PMOVZXBW", + "PMOVZXDQ", + "PMOVZXWD", + "PMOVZXWQ", + "PMULDQ", + "PMULHRSW", + "PMULHUW", + "PMULHW", + "PMULLD", + "PMULLW", + "PMULULQ", + "POPAL", + "POPAW", + "POPCNTL", + "POPCNTQ", + "POPCNTW", + "POPFL", + "POPFQ", + "POPFW", + "POPL", + "POPQ", + "POPW", + "POR", + "PREFETCHNTA", + "PREFETCHT0", + "PREFETCHT1", + "PREFETCHT2", + "PSADBW", + "PSHUFB", + "PSHUFD", + "PSHUFHW", + "PSHUFL", + "PSHUFLW", + "PSHUFW", + "PSIGNB", + "PSIGND", + "PSIGNW", + "PSLLL", + "PSLLO", + "PSLLQ", + "PSLLW", + "PSRAL", + "PSRAW", + "PSRLL", + "PSRLO", + "PSRLQ", + "PSRLW", + "PSUBB", + "PSUBL", + "PSUBQ", + "PSUBSB", + "PSUBSW", + "PSUBUSB", + "PSUBUSW", + "PSUBW", + "PTEST", + "PUNPCKHBW", + "PUNPCKHLQ", + "PUNPCKHQDQ", + "PUNPCKHWL", + "PUNPCKLBW", + "PUNPCKLLQ", + "PUNPCKLQDQ", + "PUNPCKLWL", + "PUSHAL", + "PUSHAW", + "PUSHFL", + "PUSHFQ", + "PUSHFW", + "PUSHL", + "PUSHQ", + "PUSHW", + "PXOR", + "QUAD", + "RCLB", + "RCLL", + "RCLQ", + "RCLW", + "RCPPS", + "RCPSS", + "RCRB", + "RCRL", + "RCRQ", + "RCRW", + "RDFSBASEL", + "RDFSBASEQ", + "RDGSBASEL", + "RDGSBASEQ", + "RDMSR", + "RDPID", + "RDPKRU", + "RDPMC", + "RDRANDL", + "RDRANDQ", + "RDRANDW", + "RDSEEDL", + "RDSEEDQ", + "RDSEEDW", + "RDTSC", + "RDTSCP", + "REP", + "REPN", + "RETFL", + "RETFQ", + "RETFW", + "ROLB", + "ROLL", + "ROLQ", + "ROLW", + "RORB", + "RORL", + "RORQ", + "RORW", + "RORXL", + "RORXQ", + "ROUNDPD", + "ROUNDPS", + "ROUNDSD", + "ROUNDSS", + "RSM", + "RSQRTPS", + "RSQRTSS", + "SAHF", + "SALB", + "SALL", + "SALQ", + "SALW", + "SARB", + "SARL", + "SARQ", + "SARW", + "SARXL", + "SARXQ", + "SBBB", + "SBBL", + "SBBQ", + "SBBW", + "SCASB", + "SCASL", + "SCASQ", + "SCASW", + "SETCC", + "SETCS", + "SETEQ", + "SETGE", + "SETGT", + "SETHI", + "SETLE", + "SETLS", + "SETLT", + "SETMI", + "SETNE", + "SETOC", + "SETOS", + "SETPC", + "SETPL", + "SETPS", + "SFENCE", + "SGDT", + "SHA1MSG1", + "SHA1MSG2", + "SHA1NEXTE", + "SHA1RNDS4", + "SHA256MSG1", + "SHA256MSG2", + "SHA256RNDS2", + "SHLB", + "SHLL", + "SHLQ", + "SHLW", + "SHLXL", + "SHLXQ", + "SHRB", + "SHRL", + "SHRQ", + "SHRW", + "SHRXL", + "SHRXQ", + "SHUFPD", + "SHUFPS", + "SIDT", + "SLDTL", + "SLDTQ", + "SLDTW", + "SMSWL", + "SMSWQ", + "SMSWW", + "SQRTPD", + "SQRTPS", + "SQRTSD", + "SQRTSS", + "STAC", + "STC", + "STD", + "STI", + "STMXCSR", + "STOSB", + "STOSL", + "STOSQ", + "STOSW", + "STRL", + "STRQ", + "STRW", + "SUBB", + "SUBL", + "SUBPD", + "SUBPS", + "SUBQ", + "SUBSD", + "SUBSS", + "SUBW", + "SWAPGS", + "SYSCALL", + "SYSENTER", + "SYSENTER64", + "SYSEXIT", + "SYSEXIT64", + "SYSRET", + "TESTB", + "TESTL", + "TESTQ", + "TESTW", + "TPAUSE", + "TZCNTL", + "TZCNTQ", + "TZCNTW", + "UCOMISD", + "UCOMISS", + "UD1", + "UD2", + "UMWAIT", + "UNPCKHPD", + "UNPCKHPS", + "UNPCKLPD", + "UNPCKLPS", + "UMONITOR", + "V4FMADDPS", + "V4FMADDSS", + "V4FNMADDPS", + "V4FNMADDSS", + "VADDPD", + "VADDPS", + "VADDSD", + "VADDSS", + "VADDSUBPD", + "VADDSUBPS", + "VAESDEC", + "VAESDECLAST", + "VAESENC", + "VAESENCLAST", + "VAESIMC", + "VAESKEYGENASSIST", + "VALIGND", + "VALIGNQ", + "VANDNPD", + "VANDNPS", + "VANDPD", + "VANDPS", + "VBLENDMPD", + "VBLENDMPS", + "VBLENDPD", + "VBLENDPS", + "VBLENDVPD", + "VBLENDVPS", + "VBROADCASTF128", + "VBROADCASTF32X2", + "VBROADCASTF32X4", + "VBROADCASTF32X8", + "VBROADCASTF64X2", + "VBROADCASTF64X4", + "VBROADCASTI128", + "VBROADCASTI32X2", + "VBROADCASTI32X4", + "VBROADCASTI32X8", + "VBROADCASTI64X2", + "VBROADCASTI64X4", + "VBROADCASTSD", + "VBROADCASTSS", + "VCMPPD", + "VCMPPS", + "VCMPSD", + "VCMPSS", + "VCOMISD", + "VCOMISS", + "VCOMPRESSPD", + "VCOMPRESSPS", + "VCVTDQ2PD", + "VCVTDQ2PS", + "VCVTPD2DQ", + "VCVTPD2DQX", + "VCVTPD2DQY", + "VCVTPD2PS", + "VCVTPD2PSX", + "VCVTPD2PSY", + "VCVTPD2QQ", + "VCVTPD2UDQ", + "VCVTPD2UDQX", + "VCVTPD2UDQY", + "VCVTPD2UQQ", + "VCVTPH2PS", + "VCVTPS2DQ", + "VCVTPS2PD", + "VCVTPS2PH", + "VCVTPS2QQ", + "VCVTPS2UDQ", + "VCVTPS2UQQ", + "VCVTQQ2PD", + "VCVTQQ2PS", + "VCVTQQ2PSX", + "VCVTQQ2PSY", + "VCVTSD2SI", + "VCVTSD2SIQ", + "VCVTSD2SS", + "VCVTSD2USI", + "VCVTSD2USIL", + "VCVTSD2USIQ", + "VCVTSI2SDL", + "VCVTSI2SDQ", + "VCVTSI2SSL", + "VCVTSI2SSQ", + "VCVTSS2SD", + "VCVTSS2SI", + "VCVTSS2SIQ", + "VCVTSS2USI", + "VCVTSS2USIL", + "VCVTSS2USIQ", + "VCVTTPD2DQ", + "VCVTTPD2DQX", + "VCVTTPD2DQY", + "VCVTTPD2QQ", + "VCVTTPD2UDQ", + "VCVTTPD2UDQX", + "VCVTTPD2UDQY", + "VCVTTPD2UQQ", + "VCVTTPS2DQ", + "VCVTTPS2QQ", + "VCVTTPS2UDQ", + "VCVTTPS2UQQ", + "VCVTTSD2SI", + "VCVTTSD2SIQ", + "VCVTTSD2USI", + "VCVTTSD2USIL", + "VCVTTSD2USIQ", + "VCVTTSS2SI", + "VCVTTSS2SIQ", + "VCVTTSS2USI", + "VCVTTSS2USIL", + "VCVTTSS2USIQ", + "VCVTUDQ2PD", + "VCVTUDQ2PS", + "VCVTUQQ2PD", + "VCVTUQQ2PS", + "VCVTUQQ2PSX", + "VCVTUQQ2PSY", + "VCVTUSI2SD", + "VCVTUSI2SDL", + "VCVTUSI2SDQ", + "VCVTUSI2SS", + "VCVTUSI2SSL", + "VCVTUSI2SSQ", + "VDBPSADBW", + "VDIVPD", + "VDIVPS", + "VDIVSD", + "VDIVSS", + "VDPPD", + "VDPPS", + "VERR", + "VERW", + "VEXP2PD", + "VEXP2PS", + "VEXPANDPD", + "VEXPANDPS", + "VEXTRACTF128", + "VEXTRACTF32X4", + "VEXTRACTF32X8", + "VEXTRACTF64X2", + "VEXTRACTF64X4", + "VEXTRACTI128", + "VEXTRACTI32X4", + "VEXTRACTI32X8", + "VEXTRACTI64X2", + "VEXTRACTI64X4", + "VEXTRACTPS", + "VFIXUPIMMPD", + "VFIXUPIMMPS", + "VFIXUPIMMSD", + "VFIXUPIMMSS", + "VFMADD132PD", + "VFMADD132PS", + "VFMADD132SD", + "VFMADD132SS", + "VFMADD213PD", + "VFMADD213PS", + "VFMADD213SD", + "VFMADD213SS", + "VFMADD231PD", + "VFMADD231PS", + "VFMADD231SD", + "VFMADD231SS", + "VFMADDSUB132PD", + "VFMADDSUB132PS", + "VFMADDSUB213PD", + "VFMADDSUB213PS", + "VFMADDSUB231PD", + "VFMADDSUB231PS", + "VFMSUB132PD", + "VFMSUB132PS", + "VFMSUB132SD", + "VFMSUB132SS", + "VFMSUB213PD", + "VFMSUB213PS", + "VFMSUB213SD", + "VFMSUB213SS", + "VFMSUB231PD", + "VFMSUB231PS", + "VFMSUB231SD", + "VFMSUB231SS", + "VFMSUBADD132PD", + "VFMSUBADD132PS", + "VFMSUBADD213PD", + "VFMSUBADD213PS", + "VFMSUBADD231PD", + "VFMSUBADD231PS", + "VFNMADD132PD", + "VFNMADD132PS", + "VFNMADD132SD", + "VFNMADD132SS", + "VFNMADD213PD", + "VFNMADD213PS", + "VFNMADD213SD", + "VFNMADD213SS", + "VFNMADD231PD", + "VFNMADD231PS", + "VFNMADD231SD", + "VFNMADD231SS", + "VFNMSUB132PD", + "VFNMSUB132PS", + "VFNMSUB132SD", + "VFNMSUB132SS", + "VFNMSUB213PD", + "VFNMSUB213PS", + "VFNMSUB213SD", + "VFNMSUB213SS", + "VFNMSUB231PD", + "VFNMSUB231PS", + "VFNMSUB231SD", + "VFNMSUB231SS", + "VFPCLASSPD", + "VFPCLASSPDX", + "VFPCLASSPDY", + "VFPCLASSPDZ", + "VFPCLASSPS", + "VFPCLASSPSX", + "VFPCLASSPSY", + "VFPCLASSPSZ", + "VFPCLASSSD", + "VFPCLASSSS", + "VGATHERDPD", + "VGATHERDPS", + "VGATHERPF0DPD", + "VGATHERPF0DPS", + "VGATHERPF0QPD", + "VGATHERPF0QPS", + "VGATHERPF1DPD", + "VGATHERPF1DPS", + "VGATHERPF1QPD", + "VGATHERPF1QPS", + "VGATHERQPD", + "VGATHERQPS", + "VGETEXPPD", + "VGETEXPPS", + "VGETEXPSD", + "VGETEXPSS", + "VGETMANTPD", + "VGETMANTPS", + "VGETMANTSD", + "VGETMANTSS", + "VGF2P8AFFINEINVQB", + "VGF2P8AFFINEQB", + "VGF2P8MULB", + "VHADDPD", + "VHADDPS", + "VHSUBPD", + "VHSUBPS", + "VINSERTF128", + "VINSERTF32X4", + "VINSERTF32X8", + "VINSERTF64X2", + "VINSERTF64X4", + "VINSERTI128", + "VINSERTI32X4", + "VINSERTI32X8", + "VINSERTI64X2", + "VINSERTI64X4", + "VINSERTPS", + "VLDDQU", + "VLDMXCSR", + "VMASKMOVDQU", + "VMASKMOVPD", + "VMASKMOVPS", + "VMAXPD", + "VMAXPS", + "VMAXSD", + "VMAXSS", + "VMINPD", + "VMINPS", + "VMINSD", + "VMINSS", + "VMOVAPD", + "VMOVAPS", + "VMOVD", + "VMOVDDUP", + "VMOVDQA", + "VMOVDQA32", + "VMOVDQA64", + "VMOVDQU", + "VMOVDQU16", + "VMOVDQU32", + "VMOVDQU64", + "VMOVDQU8", + "VMOVHLPS", + "VMOVHPD", + "VMOVHPS", + "VMOVLHPS", + "VMOVLPD", + "VMOVLPS", + "VMOVMSKPD", + "VMOVMSKPS", + "VMOVNTDQ", + "VMOVNTDQA", + "VMOVNTPD", + "VMOVNTPS", + "VMOVQ", + "VMOVSD", + "VMOVSHDUP", + "VMOVSLDUP", + "VMOVSS", + "VMOVUPD", + "VMOVUPS", + "VMPSADBW", + "VMULPD", + "VMULPS", + "VMULSD", + "VMULSS", + "VORPD", + "VORPS", + "VP4DPWSSD", + "VP4DPWSSDS", + "VPABSB", + "VPABSD", + "VPABSQ", + "VPABSW", + "VPACKSSDW", + "VPACKSSWB", + "VPACKUSDW", + "VPACKUSWB", + "VPADDB", + "VPADDD", + "VPADDQ", + "VPADDSB", + "VPADDSW", + "VPADDUSB", + "VPADDUSW", + "VPADDW", + "VPALIGNR", + "VPAND", + "VPANDD", + "VPANDN", + "VPANDND", + "VPANDNQ", + "VPANDQ", + "VPAVGB", + "VPAVGW", + "VPBLENDD", + "VPBLENDMB", + "VPBLENDMD", + "VPBLENDMQ", + "VPBLENDMW", + "VPBLENDVB", + "VPBLENDW", + "VPBROADCASTB", + "VPBROADCASTD", + "VPBROADCASTMB2Q", + "VPBROADCASTMW2D", + "VPBROADCASTQ", + "VPBROADCASTW", + "VPCLMULQDQ", + "VPCMPB", + "VPCMPD", + "VPCMPEQB", + "VPCMPEQD", + "VPCMPEQQ", + "VPCMPEQW", + "VPCMPESTRI", + "VPCMPESTRM", + "VPCMPGTB", + "VPCMPGTD", + "VPCMPGTQ", + "VPCMPGTW", + "VPCMPISTRI", + "VPCMPISTRM", + "VPCMPQ", + "VPCMPUB", + "VPCMPUD", + "VPCMPUQ", + "VPCMPUW", + "VPCMPW", + "VPCOMPRESSB", + "VPCOMPRESSD", + "VPCOMPRESSQ", + "VPCOMPRESSW", + "VPCONFLICTD", + "VPCONFLICTQ", + "VPDPBUSD", + "VPDPBUSDS", + "VPDPWSSD", + "VPDPWSSDS", + "VPERM2F128", + "VPERM2I128", + "VPERMB", + "VPERMD", + "VPERMI2B", + "VPERMI2D", + "VPERMI2PD", + "VPERMI2PS", + "VPERMI2Q", + "VPERMI2W", + "VPERMILPD", + "VPERMILPS", + "VPERMPD", + "VPERMPS", + "VPERMQ", + "VPERMT2B", + "VPERMT2D", + "VPERMT2PD", + "VPERMT2PS", + "VPERMT2Q", + "VPERMT2W", + "VPERMW", + "VPEXPANDB", + "VPEXPANDD", + "VPEXPANDQ", + "VPEXPANDW", + "VPEXTRB", + "VPEXTRD", + "VPEXTRQ", + "VPEXTRW", + "VPGATHERDD", + "VPGATHERDQ", + "VPGATHERQD", + "VPGATHERQQ", + "VPHADDD", + "VPHADDSW", + "VPHADDW", + "VPHMINPOSUW", + "VPHSUBD", + "VPHSUBSW", + "VPHSUBW", + "VPINSRB", + "VPINSRD", + "VPINSRQ", + "VPINSRW", + "VPLZCNTD", + "VPLZCNTQ", + "VPMADD52HUQ", + "VPMADD52LUQ", + "VPMADDUBSW", + "VPMADDWD", + "VPMASKMOVD", + "VPMASKMOVQ", + "VPMAXSB", + "VPMAXSD", + "VPMAXSQ", + "VPMAXSW", + "VPMAXUB", + "VPMAXUD", + "VPMAXUQ", + "VPMAXUW", + "VPMINSB", + "VPMINSD", + "VPMINSQ", + "VPMINSW", + "VPMINUB", + "VPMINUD", + "VPMINUQ", + "VPMINUW", + "VPMOVB2M", + "VPMOVD2M", + "VPMOVDB", + "VPMOVDW", + "VPMOVM2B", + "VPMOVM2D", + "VPMOVM2Q", + "VPMOVM2W", + "VPMOVMSKB", + "VPMOVQ2M", + "VPMOVQB", + "VPMOVQD", + "VPMOVQW", + "VPMOVSDB", + "VPMOVSDW", + "VPMOVSQB", + "VPMOVSQD", + "VPMOVSQW", + "VPMOVSWB", + "VPMOVSXBD", + "VPMOVSXBQ", + "VPMOVSXBW", + "VPMOVSXDQ", + "VPMOVSXWD", + "VPMOVSXWQ", + "VPMOVUSDB", + "VPMOVUSDW", + "VPMOVUSQB", + "VPMOVUSQD", + "VPMOVUSQW", + "VPMOVUSWB", + "VPMOVW2M", + "VPMOVWB", + "VPMOVZXBD", + "VPMOVZXBQ", + "VPMOVZXBW", + "VPMOVZXDQ", + "VPMOVZXWD", + "VPMOVZXWQ", + "VPMULDQ", + "VPMULHRSW", + "VPMULHUW", + "VPMULHW", + "VPMULLD", + "VPMULLQ", + "VPMULLW", + "VPMULTISHIFTQB", + "VPMULUDQ", + "VPOPCNTB", + "VPOPCNTD", + "VPOPCNTQ", + "VPOPCNTW", + "VPOR", + "VPORD", + "VPORQ", + "VPROLD", + "VPROLQ", + "VPROLVD", + "VPROLVQ", + "VPRORD", + "VPRORQ", + "VPRORVD", + "VPRORVQ", + "VPSADBW", + "VPSCATTERDD", + "VPSCATTERDQ", + "VPSCATTERQD", + "VPSCATTERQQ", + "VPSHLDD", + "VPSHLDQ", + "VPSHLDVD", + "VPSHLDVQ", + "VPSHLDVW", + "VPSHLDW", + "VPSHRDD", + "VPSHRDQ", + "VPSHRDVD", + "VPSHRDVQ", + "VPSHRDVW", + "VPSHRDW", + "VPSHUFB", + "VPSHUFBITQMB", + "VPSHUFD", + "VPSHUFHW", + "VPSHUFLW", + "VPSIGNB", + "VPSIGND", + "VPSIGNW", + "VPSLLD", + "VPSLLDQ", + "VPSLLQ", + "VPSLLVD", + "VPSLLVQ", + "VPSLLVW", + "VPSLLW", + "VPSRAD", + "VPSRAQ", + "VPSRAVD", + "VPSRAVQ", + "VPSRAVW", + "VPSRAW", + "VPSRLD", + "VPSRLDQ", + "VPSRLQ", + "VPSRLVD", + "VPSRLVQ", + "VPSRLVW", + "VPSRLW", + "VPSUBB", + "VPSUBD", + "VPSUBQ", + "VPSUBSB", + "VPSUBSW", + "VPSUBUSB", + "VPSUBUSW", + "VPSUBW", + "VPTERNLOGD", + "VPTERNLOGQ", + "VPTEST", + "VPTESTMB", + "VPTESTMD", + "VPTESTMQ", + "VPTESTMW", + "VPTESTNMB", + "VPTESTNMD", + "VPTESTNMQ", + "VPTESTNMW", + "VPUNPCKHBW", + "VPUNPCKHDQ", + "VPUNPCKHQDQ", + "VPUNPCKHWD", + "VPUNPCKLBW", + "VPUNPCKLDQ", + "VPUNPCKLQDQ", + "VPUNPCKLWD", + "VPXOR", + "VPXORD", + "VPXORQ", + "VRANGEPD", + "VRANGEPS", + "VRANGESD", + "VRANGESS", + "VRCP14PD", + "VRCP14PS", + "VRCP14SD", + "VRCP14SS", + "VRCP28PD", + "VRCP28PS", + "VRCP28SD", + "VRCP28SS", + "VRCPPS", + "VRCPSS", + "VREDUCEPD", + "VREDUCEPS", + "VREDUCESD", + "VREDUCESS", + "VRNDSCALEPD", + "VRNDSCALEPS", + "VRNDSCALESD", + "VRNDSCALESS", + "VROUNDPD", + "VROUNDPS", + "VROUNDSD", + "VROUNDSS", + "VRSQRT14PD", + "VRSQRT14PS", + "VRSQRT14SD", + "VRSQRT14SS", + "VRSQRT28PD", + "VRSQRT28PS", + "VRSQRT28SD", + "VRSQRT28SS", + "VRSQRTPS", + "VRSQRTSS", + "VSCALEFPD", + "VSCALEFPS", + "VSCALEFSD", + "VSCALEFSS", + "VSCATTERDPD", + "VSCATTERDPS", + "VSCATTERPF0DPD", + "VSCATTERPF0DPS", + "VSCATTERPF0QPD", + "VSCATTERPF0QPS", + "VSCATTERPF1DPD", + "VSCATTERPF1DPS", + "VSCATTERPF1QPD", + "VSCATTERPF1QPS", + "VSCATTERQPD", + "VSCATTERQPS", + "VSHUFF32X4", + "VSHUFF64X2", + "VSHUFI32X4", + "VSHUFI64X2", + "VSHUFPD", + "VSHUFPS", + "VSQRTPD", + "VSQRTPS", + "VSQRTSD", + "VSQRTSS", + "VSTMXCSR", + "VSUBPD", + "VSUBPS", + "VSUBSD", + "VSUBSS", + "VTESTPD", + "VTESTPS", + "VUCOMISD", + "VUCOMISS", + "VUNPCKHPD", + "VUNPCKHPS", + "VUNPCKLPD", + "VUNPCKLPS", + "VXORPD", + "VXORPS", + "VZEROALL", + "VZEROUPPER", + "WAIT", + "WBINVD", + "WORD", + "WRFSBASEL", + "WRFSBASEQ", + "WRGSBASEL", + "WRGSBASEQ", + "WRMSR", + "WRPKRU", + "XABORT", + "XACQUIRE", + "XADDB", + "XADDL", + "XADDQ", + "XADDW", + "XBEGIN", + "XCHGB", + "XCHGL", + "XCHGQ", + "XCHGW", + "XEND", + "XGETBV", + "XLAT", + "XORB", + "XORL", + "XORPD", + "XORPS", + "XORQ", + "XORW", + "XRELEASE", + "XRSTOR", + "XRSTOR64", + "XRSTORS", + "XRSTORS64", + "XSAVE", + "XSAVE64", + "XSAVEC", + "XSAVEC64", + "XSAVEOPT", + "XSAVEOPT64", + "XSAVES", + "XSAVES64", + "XSETBV", + "XTEST", + "LAST", +} diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go new file mode 100644 index 0000000..bdd75b4 --- /dev/null +++ b/src/cmd/internal/obj/x86/asm6.go @@ -0,0 +1,5473 @@ +// Inferno utils/6l/span.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package x86 + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/sys" + "encoding/binary" + "fmt" + "internal/buildcfg" + "log" + "strings" +) + +var ( + plan9privates *obj.LSym +) + +// Instruction layout. + +// Loop alignment constants: +// want to align loop entry to loopAlign-byte boundary, +// and willing to insert at most maxLoopPad bytes of NOP to do so. +// We define a loop entry as the target of a backward jump. +// +// gcc uses maxLoopPad = 10 for its 'generic x86-64' config, +// and it aligns all jump targets, not just backward jump targets. +// +// As of 6/1/2012, the effect of setting maxLoopPad = 10 here +// is very slight but negative, so the alignment is disabled by +// setting MaxLoopPad = 0. The code is here for reference and +// for future experiments. +const ( + loopAlign = 16 + maxLoopPad = 0 +) + +// Bit flags that are used to express jump target properties. +const ( + // branchBackwards marks targets that are located behind. + // Used to express jumps to loop headers. + branchBackwards = (1 << iota) + // branchShort marks branches those target is close, + // with offset is in -128..127 range. + branchShort + // branchLoopHead marks loop entry. + // Used to insert padding for misaligned loops. + branchLoopHead +) + +// opBytes holds optab encoding bytes. +// Each ytab reserves fixed amount of bytes in this array. +// +// The size should be the minimal number of bytes that +// are enough to hold biggest optab op lines. +type opBytes [31]uint8 + +type Optab struct { + as obj.As + ytab []ytab + prefix uint8 + op opBytes +} + +type movtab struct { + as obj.As + ft uint8 + f3t uint8 + tt uint8 + code uint8 + op [4]uint8 +} + +const ( + Yxxx = iota + Ynone + Yi0 // $0 + Yi1 // $1 + Yu2 // $x, x fits in uint2 + Yi8 // $x, x fits in int8 + Yu8 // $x, x fits in uint8 + Yu7 // $x, x in 0..127 (fits in both int8 and uint8) + Ys32 + Yi32 + Yi64 + Yiauto + Yal + Ycl + Yax + Ycx + Yrb + Yrl + Yrl32 // Yrl on 32-bit system + Yrf + Yf0 + Yrx + Ymb + Yml + Ym + Ybr + Ycs + Yss + Yds + Yes + Yfs + Ygs + Ygdtr + Yidtr + Yldtr + Ymsw + Ytask + Ycr0 + Ycr1 + Ycr2 + Ycr3 + Ycr4 + Ycr5 + Ycr6 + Ycr7 + Ycr8 + Ydr0 + Ydr1 + Ydr2 + Ydr3 + Ydr4 + Ydr5 + Ydr6 + Ydr7 + Ytr0 + Ytr1 + Ytr2 + Ytr3 + Ytr4 + Ytr5 + Ytr6 + Ytr7 + Ymr + Ymm + Yxr0 // X0 only. "<XMM0>" notation in Intel manual. + YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex + Yxr // X0..X15 + YxrEvex // X0..X31 + Yxm + YxmEvex // YxrEvex+Ym + Yxvm // VSIB vector array; vm32x/vm64x + YxvmEvex // Yxvm which permits High-16 X register as index. + YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex + Yyr // Y0..Y15 + YyrEvex // Y0..Y31 + Yym + YymEvex // YyrEvex+Ym + Yyvm // VSIB vector array; vm32y/vm64y + YyvmEvex // Yyvm which permits High-16 Y register as index. + YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex + Yzr // Z0..Z31 + Yzm // Yzr+Ym + Yzvm // VSIB vector array; vm32z/vm64z + Yk0 // K0 + Yknot0 // K1..K7; write mask + Yk // K0..K7; used for KOP + Ykm // Yk+Ym; used for KOP + Ytls + Ytextsize + Yindir + Ymax +) + +const ( + Zxxx = iota + Zlit + Zlitm_r + Zlitr_m + Zlit_m_r + Z_rp + Zbr + Zcall + Zcallcon + Zcallduff + Zcallind + Zcallindreg + Zib_ + Zib_rp + Zibo_m + Zibo_m_xm + Zil_ + Zil_rp + Ziq_rp + Zilo_m + Zjmp + Zjmpcon + Zloop + Zo_iw + Zm_o + Zm_r + Z_m_r + Zm2_r + Zm_r_xm + Zm_r_i_xm + Zm_r_xm_nr + Zr_m_xm_nr + Zibm_r // mmx1,mmx2/mem64,imm8 + Zibr_m + Zmb_r + Zaut_r + Zo_m + Zo_m64 + Zpseudo + Zr_m + Zr_m_xm + Zrp_ + Z_ib + Z_il + Zm_ibo + Zm_ilo + Zib_rr + Zil_rr + Zbyte + + Zvex_rm_v_r + Zvex_rm_v_ro + Zvex_r_v_rm + Zvex_i_rm_vo + Zvex_v_rm_r + Zvex_i_rm_r + Zvex_i_r_v + Zvex_i_rm_v_r + Zvex + Zvex_rm_r_vo + Zvex_i_r_rm + Zvex_hr_rm_v_r + + Zevex_first + Zevex_i_r_k_rm + Zevex_i_r_rm + Zevex_i_rm_k_r + Zevex_i_rm_k_vo + Zevex_i_rm_r + Zevex_i_rm_v_k_r + Zevex_i_rm_v_r + Zevex_i_rm_vo + Zevex_k_rmo + Zevex_r_k_rm + Zevex_r_v_k_rm + Zevex_r_v_rm + Zevex_rm_k_r + Zevex_rm_v_k_r + Zevex_rm_v_r + Zevex_last + + Zmax +) + +const ( + Px = 0 + Px1 = 1 // symbolic; exact value doesn't matter + P32 = 0x32 // 32-bit only + Pe = 0x66 // operand escape + Pm = 0x0f // 2byte opcode escape + Pq = 0xff // both escapes: 66 0f + Pb = 0xfe // byte operands + Pf2 = 0xf2 // xmm escape 1: f2 0f + Pf3 = 0xf3 // xmm escape 2: f3 0f + Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f + Pq3 = 0x67 // xmm escape 3: 66 48 0f + Pq4 = 0x68 // xmm escape 4: 66 0F 38 + Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38 + Pq5 = 0x6a // xmm escape 5: F3 0F 38 + Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38 + Pfw = 0xf4 // Pf3 with Rex.w: f3 48 0f + Pw = 0x48 // Rex.w + Pw8 = 0x90 // symbolic; exact value doesn't matter + Py = 0x80 // defaults to 64-bit mode + Py1 = 0x81 // symbolic; exact value doesn't matter + Py3 = 0x83 // symbolic; exact value doesn't matter + Pavx = 0x84 // symbolic; exact value doesn't matter + + RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R + Rxw = 1 << 3 // =1, 64-bit operand size + Rxr = 1 << 2 // extend modrm reg + Rxx = 1 << 1 // extend sib index + Rxb = 1 << 0 // extend modrm r/m, sib base, or opcode reg +) + +const ( + // Encoding for VEX prefix in tables. + // The P, L, and W fields are chosen to match + // their eventual locations in the VEX prefix bytes. + + // Encoding for VEX prefix in tables. + // The P, L, and W fields are chosen to match + // their eventual locations in the VEX prefix bytes. + + // Using spare bit to make leading [E]VEX encoding byte different from + // 0x0f even if all other VEX fields are 0. + avxEscape = 1 << 6 + + // P field - 2 bits + vex66 = 1 << 0 + vexF3 = 2 << 0 + vexF2 = 3 << 0 + // L field - 1 bit + vexLZ = 0 << 2 + vexLIG = 0 << 2 + vex128 = 0 << 2 + vex256 = 1 << 2 + // W field - 1 bit + vexWIG = 0 << 7 + vexW0 = 0 << 7 + vexW1 = 1 << 7 + // M field - 5 bits, but mostly reserved; we can store up to 3 + vex0F = 1 << 3 + vex0F38 = 2 << 3 + vex0F3A = 3 << 3 +) + +var ycover [Ymax * Ymax]uint8 + +var reg [MAXREG]int + +var regrex [MAXREG + 1]int + +var ynone = []ytab{ + {Zlit, 1, argList{}}, +} + +var ytext = []ytab{ + {Zpseudo, 0, argList{Ymb, Ytextsize}}, + {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}}, +} + +var ynop = []ytab{ + {Zpseudo, 0, argList{}}, + {Zpseudo, 0, argList{Yiauto}}, + {Zpseudo, 0, argList{Yml}}, + {Zpseudo, 0, argList{Yrf}}, + {Zpseudo, 0, argList{Yxr}}, + {Zpseudo, 0, argList{Yiauto}}, + {Zpseudo, 0, argList{Yml}}, + {Zpseudo, 0, argList{Yrf}}, + {Zpseudo, 1, argList{Yxr}}, +} + +var yfuncdata = []ytab{ + {Zpseudo, 0, argList{Yi32, Ym}}, +} + +var ypcdata = []ytab{ + {Zpseudo, 0, argList{Yi32, Yi32}}, +} + +var yxorb = []ytab{ + {Zib_, 1, argList{Yi32, Yal}}, + {Zibo_m, 2, argList{Yi32, Ymb}}, + {Zr_m, 1, argList{Yrb, Ymb}}, + {Zm_r, 1, argList{Ymb, Yrb}}, +} + +var yaddl = []ytab{ + {Zibo_m, 2, argList{Yi8, Yml}}, + {Zil_, 1, argList{Yi32, Yax}}, + {Zilo_m, 2, argList{Yi32, Yml}}, + {Zr_m, 1, argList{Yrl, Yml}}, + {Zm_r, 1, argList{Yml, Yrl}}, +} + +var yincl = []ytab{ + {Z_rp, 1, argList{Yrl}}, + {Zo_m, 2, argList{Yml}}, +} + +var yincq = []ytab{ + {Zo_m, 2, argList{Yml}}, +} + +var ycmpb = []ytab{ + {Z_ib, 1, argList{Yal, Yi32}}, + {Zm_ibo, 2, argList{Ymb, Yi32}}, + {Zm_r, 1, argList{Ymb, Yrb}}, + {Zr_m, 1, argList{Yrb, Ymb}}, +} + +var ycmpl = []ytab{ + {Zm_ibo, 2, argList{Yml, Yi8}}, + {Z_il, 1, argList{Yax, Yi32}}, + {Zm_ilo, 2, argList{Yml, Yi32}}, + {Zm_r, 1, argList{Yml, Yrl}}, + {Zr_m, 1, argList{Yrl, Yml}}, +} + +var yshb = []ytab{ + {Zo_m, 2, argList{Yi1, Ymb}}, + {Zibo_m, 2, argList{Yu8, Ymb}}, + {Zo_m, 2, argList{Ycx, Ymb}}, +} + +var yshl = []ytab{ + {Zo_m, 2, argList{Yi1, Yml}}, + {Zibo_m, 2, argList{Yu8, Yml}}, + {Zo_m, 2, argList{Ycl, Yml}}, + {Zo_m, 2, argList{Ycx, Yml}}, +} + +var ytestl = []ytab{ + {Zil_, 1, argList{Yi32, Yax}}, + {Zilo_m, 2, argList{Yi32, Yml}}, + {Zr_m, 1, argList{Yrl, Yml}}, + {Zm_r, 1, argList{Yml, Yrl}}, +} + +var ymovb = []ytab{ + {Zr_m, 1, argList{Yrb, Ymb}}, + {Zm_r, 1, argList{Ymb, Yrb}}, + {Zib_rp, 1, argList{Yi32, Yrb}}, + {Zibo_m, 2, argList{Yi32, Ymb}}, +} + +var ybtl = []ytab{ + {Zibo_m, 2, argList{Yi8, Yml}}, + {Zr_m, 1, argList{Yrl, Yml}}, +} + +var ymovw = []ytab{ + {Zr_m, 1, argList{Yrl, Yml}}, + {Zm_r, 1, argList{Yml, Yrl}}, + {Zil_rp, 1, argList{Yi32, Yrl}}, + {Zilo_m, 2, argList{Yi32, Yml}}, + {Zaut_r, 2, argList{Yiauto, Yrl}}, +} + +var ymovl = []ytab{ + {Zr_m, 1, argList{Yrl, Yml}}, + {Zm_r, 1, argList{Yml, Yrl}}, + {Zil_rp, 1, argList{Yi32, Yrl}}, + {Zilo_m, 2, argList{Yi32, Yml}}, + {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD + {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD + {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit) + {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit) + {Zaut_r, 2, argList{Yiauto, Yrl}}, +} + +var yret = []ytab{ + {Zo_iw, 1, argList{}}, + {Zo_iw, 1, argList{Yi32}}, +} + +var ymovq = []ytab{ + // valid in 32-bit mode + {Zm_r_xm_nr, 1, argList{Ym, Ymr}}, // 0x6f MMX MOVQ (shorter encoding) + {Zr_m_xm_nr, 1, argList{Ymr, Ym}}, // 0x7f MMX MOVQ + {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q + {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2 + {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64 + + // valid only in 64-bit mode, usually with 64-bit prefix + {Zr_m, 1, argList{Yrl, Yml}}, // 0x89 + {Zm_r, 1, argList{Yml, Yrl}}, // 0x8b + {Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0) + {Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate + {Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0) + {Zm_r_xm, 1, argList{Ymm, Ymr}}, // 0x6e MMX MOVD + {Zr_m_xm, 1, argList{Ymr, Ymm}}, // 0x7e MMX MOVD + {Zm_r_xm, 2, argList{Yml, Yxr}}, // Pe, 0x6e MOVD xmm load + {Zr_m_xm, 2, argList{Yxr, Yml}}, // Pe, 0x7e MOVD xmm store + {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ +} + +var ymovbe = []ytab{ + {Zlitm_r, 3, argList{Ym, Yrl}}, + {Zlitr_m, 3, argList{Yrl, Ym}}, +} + +var ym_rl = []ytab{ + {Zm_r, 1, argList{Ym, Yrl}}, +} + +var yrl_m = []ytab{ + {Zr_m, 1, argList{Yrl, Ym}}, +} + +var ymb_rl = []ytab{ + {Zmb_r, 1, argList{Ymb, Yrl}}, +} + +var yml_rl = []ytab{ + {Zm_r, 1, argList{Yml, Yrl}}, +} + +var yrl_ml = []ytab{ + {Zr_m, 1, argList{Yrl, Yml}}, +} + +var yml_mb = []ytab{ + {Zr_m, 1, argList{Yrb, Ymb}}, + {Zm_r, 1, argList{Ymb, Yrb}}, +} + +var yrb_mb = []ytab{ + {Zr_m, 1, argList{Yrb, Ymb}}, +} + +var yxchg = []ytab{ + {Z_rp, 1, argList{Yax, Yrl}}, + {Zrp_, 1, argList{Yrl, Yax}}, + {Zr_m, 1, argList{Yrl, Yml}}, + {Zm_r, 1, argList{Yml, Yrl}}, +} + +var ydivl = []ytab{ + {Zm_o, 2, argList{Yml}}, +} + +var ydivb = []ytab{ + {Zm_o, 2, argList{Ymb}}, +} + +var yimul = []ytab{ + {Zm_o, 2, argList{Yml}}, + {Zib_rr, 1, argList{Yi8, Yrl}}, + {Zil_rr, 1, argList{Yi32, Yrl}}, + {Zm_r, 2, argList{Yml, Yrl}}, +} + +var yimul3 = []ytab{ + {Zibm_r, 2, argList{Yi8, Yml, Yrl}}, + {Zibm_r, 2, argList{Yi32, Yml, Yrl}}, +} + +var ybyte = []ytab{ + {Zbyte, 1, argList{Yi64}}, +} + +var yin = []ytab{ + {Zib_, 1, argList{Yi32}}, + {Zlit, 1, argList{}}, +} + +var yint = []ytab{ + {Zib_, 1, argList{Yi32}}, +} + +var ypushl = []ytab{ + {Zrp_, 1, argList{Yrl}}, + {Zm_o, 2, argList{Ym}}, + {Zib_, 1, argList{Yi8}}, + {Zil_, 1, argList{Yi32}}, +} + +var ypopl = []ytab{ + {Z_rp, 1, argList{Yrl}}, + {Zo_m, 2, argList{Ym}}, +} + +var ywrfsbase = []ytab{ + {Zm_o, 2, argList{Yrl}}, +} + +var yrdrand = []ytab{ + {Zo_m, 2, argList{Yrl}}, +} + +var yclflush = []ytab{ + {Zo_m, 2, argList{Ym}}, +} + +var ybswap = []ytab{ + {Z_rp, 2, argList{Yrl}}, +} + +var yscond = []ytab{ + {Zo_m, 2, argList{Ymb}}, +} + +var yjcond = []ytab{ + {Zbr, 0, argList{Ybr}}, + {Zbr, 0, argList{Yi0, Ybr}}, + {Zbr, 1, argList{Yi1, Ybr}}, +} + +var yloop = []ytab{ + {Zloop, 1, argList{Ybr}}, +} + +var ycall = []ytab{ + {Zcallindreg, 0, argList{Yml}}, + {Zcallindreg, 2, argList{Yrx, Yrx}}, + {Zcallind, 2, argList{Yindir}}, + {Zcall, 0, argList{Ybr}}, + {Zcallcon, 1, argList{Yi32}}, +} + +var yduff = []ytab{ + {Zcallduff, 1, argList{Yi32}}, +} + +var yjmp = []ytab{ + {Zo_m64, 2, argList{Yml}}, + {Zjmp, 0, argList{Ybr}}, + {Zjmpcon, 1, argList{Yi32}}, +} + +var yfmvd = []ytab{ + {Zm_o, 2, argList{Ym, Yf0}}, + {Zo_m, 2, argList{Yf0, Ym}}, + {Zm_o, 2, argList{Yrf, Yf0}}, + {Zo_m, 2, argList{Yf0, Yrf}}, +} + +var yfmvdp = []ytab{ + {Zo_m, 2, argList{Yf0, Ym}}, + {Zo_m, 2, argList{Yf0, Yrf}}, +} + +var yfmvf = []ytab{ + {Zm_o, 2, argList{Ym, Yf0}}, + {Zo_m, 2, argList{Yf0, Ym}}, +} + +var yfmvx = []ytab{ + {Zm_o, 2, argList{Ym, Yf0}}, +} + +var yfmvp = []ytab{ + {Zo_m, 2, argList{Yf0, Ym}}, +} + +var yfcmv = []ytab{ + {Zm_o, 2, argList{Yrf, Yf0}}, +} + +var yfadd = []ytab{ + {Zm_o, 2, argList{Ym, Yf0}}, + {Zm_o, 2, argList{Yrf, Yf0}}, + {Zo_m, 2, argList{Yf0, Yrf}}, +} + +var yfxch = []ytab{ + {Zo_m, 2, argList{Yf0, Yrf}}, + {Zm_o, 2, argList{Yrf, Yf0}}, +} + +var ycompp = []ytab{ + {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1 +} + +var ystsw = []ytab{ + {Zo_m, 2, argList{Ym}}, + {Zlit, 1, argList{Yax}}, +} + +var ysvrs_mo = []ytab{ + {Zm_o, 2, argList{Ym}}, +} + +// unaryDst version of "ysvrs_mo". +var ysvrs_om = []ytab{ + {Zo_m, 2, argList{Ym}}, +} + +var ymm = []ytab{ + {Zm_r_xm, 1, argList{Ymm, Ymr}}, + {Zm_r_xm, 2, argList{Yxm, Yxr}}, +} + +var yxm = []ytab{ + {Zm_r_xm, 1, argList{Yxm, Yxr}}, +} + +var yxm_q4 = []ytab{ + {Zm_r, 1, argList{Yxm, Yxr}}, +} + +var yxcvm1 = []ytab{ + {Zm_r_xm, 2, argList{Yxm, Yxr}}, + {Zm_r_xm, 2, argList{Yxm, Ymr}}, +} + +var yxcvm2 = []ytab{ + {Zm_r_xm, 2, argList{Yxm, Yxr}}, + {Zm_r_xm, 2, argList{Ymm, Yxr}}, +} + +var yxr = []ytab{ + {Zm_r_xm, 1, argList{Yxr, Yxr}}, +} + +var yxr_ml = []ytab{ + {Zr_m_xm, 1, argList{Yxr, Yml}}, +} + +var ymr = []ytab{ + {Zm_r, 1, argList{Ymr, Ymr}}, +} + +var ymr_ml = []ytab{ + {Zr_m_xm, 1, argList{Ymr, Yml}}, +} + +var yxcmpi = []ytab{ + {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}}, +} + +var yxmov = []ytab{ + {Zm_r_xm, 1, argList{Yxm, Yxr}}, + {Zr_m_xm, 1, argList{Yxr, Yxm}}, +} + +var yxcvfl = []ytab{ + {Zm_r_xm, 1, argList{Yxm, Yrl}}, +} + +var yxcvlf = []ytab{ + {Zm_r_xm, 1, argList{Yml, Yxr}}, +} + +var yxcvfq = []ytab{ + {Zm_r_xm, 2, argList{Yxm, Yrl}}, +} + +var yxcvqf = []ytab{ + {Zm_r_xm, 2, argList{Yml, Yxr}}, +} + +var yps = []ytab{ + {Zm_r_xm, 1, argList{Ymm, Ymr}}, + {Zibo_m_xm, 2, argList{Yi8, Ymr}}, + {Zm_r_xm, 2, argList{Yxm, Yxr}}, + {Zibo_m_xm, 3, argList{Yi8, Yxr}}, +} + +var yxrrl = []ytab{ + {Zm_r, 1, argList{Yxr, Yrl}}, +} + +var ymrxr = []ytab{ + {Zm_r, 1, argList{Ymr, Yxr}}, + {Zm_r_xm, 1, argList{Yxm, Yxr}}, +} + +var ymshuf = []ytab{ + {Zibm_r, 2, argList{Yi8, Ymm, Ymr}}, +} + +var ymshufb = []ytab{ + {Zm2_r, 2, argList{Yxm, Yxr}}, +} + +// It should never have more than 1 entry, +// because some optab entries have opcode sequences that +// are longer than 2 bytes (zoffset=2 here), +// ROUNDPD and ROUNDPS and recently added BLENDPD, +// to name a few. +var yxshuf = []ytab{ + {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, +} + +var yextrw = []ytab{ + {Zibm_r, 2, argList{Yu8, Yxr, Yrl}}, + {Zibr_m, 2, argList{Yu8, Yxr, Yml}}, +} + +var yextr = []ytab{ + {Zibr_m, 3, argList{Yu8, Yxr, Ymm}}, +} + +var yinsrw = []ytab{ + {Zibm_r, 2, argList{Yu8, Yml, Yxr}}, +} + +var yinsr = []ytab{ + {Zibm_r, 3, argList{Yu8, Ymm, Yxr}}, +} + +var ypsdq = []ytab{ + {Zibo_m, 2, argList{Yi8, Yxr}}, +} + +var ymskb = []ytab{ + {Zm_r_xm, 2, argList{Yxr, Yrl}}, + {Zm_r_xm, 1, argList{Ymr, Yrl}}, +} + +var ycrc32l = []ytab{ + {Zlitm_r, 0, argList{Yml, Yrl}}, +} + +var ycrc32b = []ytab{ + {Zlitm_r, 0, argList{Ymb, Yrl}}, +} + +var yprefetch = []ytab{ + {Zm_o, 2, argList{Ym}}, +} + +var yaes = []ytab{ + {Zlitm_r, 2, argList{Yxm, Yxr}}, +} + +var yxbegin = []ytab{ + {Zjmp, 1, argList{Ybr}}, +} + +var yxabort = []ytab{ + {Zib_, 1, argList{Yu8}}, +} + +var ylddqu = []ytab{ + {Zm_r, 1, argList{Ym, Yxr}}, +} + +var ypalignr = []ytab{ + {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, +} + +var ysha256rnds2 = []ytab{ + {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}}, +} + +var yblendvpd = []ytab{ + {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}}, +} + +var ymmxmm0f38 = []ytab{ + {Zlitm_r, 3, argList{Ymm, Ymr}}, + {Zlitm_r, 5, argList{Yxm, Yxr}}, +} + +var yextractps = []ytab{ + {Zibr_m, 2, argList{Yu2, Yxr, Yml}}, +} + +var ysha1rnds4 = []ytab{ + {Zibm_r, 2, argList{Yu2, Yxm, Yxr}}, +} + +// You are doasm, holding in your hand a *obj.Prog with p.As set to, say, +// ACRC32, and p.From and p.To as operands (obj.Addr). The linker scans optab +// to find the entry with the given p.As and then looks through the ytable for +// that instruction (the second field in the optab struct) for a line whose +// first two values match the Ytypes of the p.From and p.To operands. The +// function oclass computes the specific Ytype of an operand and then the set +// of more general Ytypes that it satisfies is implied by the ycover table, set +// up in instinit. For example, oclass distinguishes the constants 0 and 1 +// from the more general 8-bit constants, but instinit says +// +// ycover[Yi0*Ymax+Ys32] = 1 +// ycover[Yi1*Ymax+Ys32] = 1 +// ycover[Yi8*Ymax+Ys32] = 1 +// +// which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32) +// if that's what an instruction can handle. +// +// In parallel with the scan through the ytable for the appropriate line, there +// is a z pointer that starts out pointing at the strange magic byte list in +// the Optab struct. With each step past a non-matching ytable line, z +// advances by the 4th entry in the line. When a matching line is found, that +// z pointer has the extra data to use in laying down the instruction bytes. +// The actual bytes laid down are a function of the 3rd entry in the line (that +// is, the Ztype) and the z bytes. +// +// For example, let's look at AADDL. The optab line says: +// +// {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, +// +// and yaddl says +// +// var yaddl = []ytab{ +// {Yi8, Ynone, Yml, Zibo_m, 2}, +// {Yi32, Ynone, Yax, Zil_, 1}, +// {Yi32, Ynone, Yml, Zilo_m, 2}, +// {Yrl, Ynone, Yml, Zr_m, 1}, +// {Yml, Ynone, Yrl, Zm_r, 1}, +// } +// +// so there are 5 possible types of ADDL instruction that can be laid down, and +// possible states used to lay them down (Ztype and z pointer, assuming z +// points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are: +// +// Yi8, Yml -> Zibo_m, z (0x83, 00) +// Yi32, Yax -> Zil_, z+2 (0x05) +// Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00) +// Yrl, Yml -> Zr_m, z+2+1+2 (0x01) +// Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03) +// +// The Pconstant in the optab line controls the prefix bytes to emit. That's +// relatively straightforward as this program goes. +// +// The switch on yt.zcase in doasm implements the various Z cases. Zibo_m, for +// example, is an opcode byte (z[0]) then an asmando (which is some kind of +// encoded addressing mode for the Yml arg), and then a single immediate byte. +// Zilo_m is the same but a long (32-bit) immediate. +var optab = +// as, ytab, andproto, opcode +[...]Optab{ + {obj.AXXX, nil, 0, opBytes{}}, + {AAAA, ynone, P32, opBytes{0x37}}, + {AAAD, ynone, P32, opBytes{0xd5, 0x0a}}, + {AAAM, ynone, P32, opBytes{0xd4, 0x0a}}, + {AAAS, ynone, P32, opBytes{0x3f}}, + {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}}, + {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, + {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, + {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, + {AADCXL, yml_rl, Pq4, opBytes{0xf6}}, + {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}}, + {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}}, + {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, + {AADDPD, yxm, Pq, opBytes{0x58}}, + {AADDPS, yxm, Pm, opBytes{0x58}}, + {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, + {AADDSD, yxm, Pf2, opBytes{0x58}}, + {AADDSS, yxm, Pf3, opBytes{0x58}}, + {AADDSUBPD, yxm, Pq, opBytes{0xd0}}, + {AADDSUBPS, yxm, Pf2, opBytes{0xd0}}, + {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, + {AADOXL, yml_rl, Pq5, opBytes{0xf6}}, + {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}}, + {AADJSP, nil, 0, opBytes{}}, + {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}}, + {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, + {AANDNPD, yxm, Pq, opBytes{0x55}}, + {AANDNPS, yxm, Pm, opBytes{0x55}}, + {AANDPD, yxm, Pq, opBytes{0x54}}, + {AANDPS, yxm, Pm, opBytes{0x54}}, + {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, + {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, + {AARPL, yrl_ml, P32, opBytes{0x63}}, + {ABOUNDL, yrl_m, P32, opBytes{0x62}}, + {ABOUNDW, yrl_m, Pe, opBytes{0x62}}, + {ABSFL, yml_rl, Pm, opBytes{0xbc}}, + {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}}, + {ABSFW, yml_rl, Pq, opBytes{0xbc}}, + {ABSRL, yml_rl, Pm, opBytes{0xbd}}, + {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}}, + {ABSRW, yml_rl, Pq, opBytes{0xbd}}, + {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}}, + {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}}, + {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}}, + {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}}, + {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}}, + {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}}, + {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}}, + {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}}, + {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}}, + {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}}, + {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}}, + {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}}, + {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}}, + {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}}, + {ABYTE, ybyte, Px, opBytes{1}}, + {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}}, + {ACBW, ynone, Pe, opBytes{0x98}}, + {ACDQ, ynone, Px, opBytes{0x99}}, + {ACDQE, ynone, Pw, opBytes{0x98}}, + {ACLAC, ynone, Pm, opBytes{01, 0xca}}, + {ACLC, ynone, Px, opBytes{0xf8}}, + {ACLD, ynone, Px, opBytes{0xfc}}, + {ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}}, + {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}}, + {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}}, + {ACLI, ynone, Px, opBytes{0xfa}}, + {ACLTS, ynone, Pm, opBytes{0x06}}, + {ACLWB, yclflush, Pq, opBytes{0xae, 06}}, + {ACMC, ynone, Px, opBytes{0xf5}}, + {ACMOVLCC, yml_rl, Pm, opBytes{0x43}}, + {ACMOVLCS, yml_rl, Pm, opBytes{0x42}}, + {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}}, + {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}}, + {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}}, + {ACMOVLHI, yml_rl, Pm, opBytes{0x47}}, + {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}}, + {ACMOVLLS, yml_rl, Pm, opBytes{0x46}}, + {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}}, + {ACMOVLMI, yml_rl, Pm, opBytes{0x48}}, + {ACMOVLNE, yml_rl, Pm, opBytes{0x45}}, + {ACMOVLOC, yml_rl, Pm, opBytes{0x41}}, + {ACMOVLOS, yml_rl, Pm, opBytes{0x40}}, + {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}}, + {ACMOVLPL, yml_rl, Pm, opBytes{0x49}}, + {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}}, + {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}}, + {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}}, + {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}}, + {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}}, + {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}}, + {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}}, + {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}}, + {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}}, + {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}}, + {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}}, + {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}}, + {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}}, + {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}}, + {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}}, + {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}}, + {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}}, + {ACMOVWCC, yml_rl, Pq, opBytes{0x43}}, + {ACMOVWCS, yml_rl, Pq, opBytes{0x42}}, + {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}}, + {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}}, + {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}}, + {ACMOVWHI, yml_rl, Pq, opBytes{0x47}}, + {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}}, + {ACMOVWLS, yml_rl, Pq, opBytes{0x46}}, + {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}}, + {ACMOVWMI, yml_rl, Pq, opBytes{0x48}}, + {ACMOVWNE, yml_rl, Pq, opBytes{0x45}}, + {ACMOVWOC, yml_rl, Pq, opBytes{0x41}}, + {ACMOVWOS, yml_rl, Pq, opBytes{0x40}}, + {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}}, + {ACMOVWPL, yml_rl, Pq, opBytes{0x49}}, + {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}}, + {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}}, + {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, + {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}}, + {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}}, + {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, + {ACMPSB, ynone, Pb, opBytes{0xa6}}, + {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}}, + {ACMPSL, ynone, Px, opBytes{0xa7}}, + {ACMPSQ, ynone, Pw, opBytes{0xa7}}, + {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}}, + {ACMPSW, ynone, Pe, opBytes{0xa7}}, + {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, + {ACOMISD, yxm, Pe, opBytes{0x2f}}, + {ACOMISS, yxm, Pm, opBytes{0x2f}}, + {ACPUID, ynone, Pm, opBytes{0xa2}}, + {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}}, + {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}}, + {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}}, + {ACVTPD2PS, yxm, Pe, opBytes{0x5a}}, + {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}}, + {ACVTPS2PD, yxm, Pm, opBytes{0x5a}}, + {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}}, + {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}}, + {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}}, + {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}}, + {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}}, + {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}}, + {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}}, + {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}}, + {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}}, + {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}}, + {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}}, + {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}}, + {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}}, + {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}}, + {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}}, + {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}}, + {ACWD, ynone, Pe, opBytes{0x99}}, + {ACWDE, ynone, Px, opBytes{0x98}}, + {ACQO, ynone, Pw, opBytes{0x99}}, + {ADAA, ynone, P32, opBytes{0x27}}, + {ADAS, ynone, P32, opBytes{0x2f}}, + {ADECB, yscond, Pb, opBytes{0xfe, 01}}, + {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}}, + {ADECQ, yincq, Pw, opBytes{0xff, 01}}, + {ADECW, yincq, Pe, opBytes{0xff, 01}}, + {ADIVB, ydivb, Pb, opBytes{0xf6, 06}}, + {ADIVL, ydivl, Px, opBytes{0xf7, 06}}, + {ADIVPD, yxm, Pe, opBytes{0x5e}}, + {ADIVPS, yxm, Pm, opBytes{0x5e}}, + {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}}, + {ADIVSD, yxm, Pf2, opBytes{0x5e}}, + {ADIVSS, yxm, Pf3, opBytes{0x5e}}, + {ADIVW, ydivl, Pe, opBytes{0xf7, 06}}, + {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}}, + {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}}, + {AEMMS, ynone, Pm, opBytes{0x77}}, + {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}}, + {AENTER, nil, 0, opBytes{}}, // botch + {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}}, + {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}}, + {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}}, + {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}}, + {AHLT, ynone, Px, opBytes{0xf4}}, + {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}}, + {AIDIVL, ydivl, Px, opBytes{0xf7, 07}}, + {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}}, + {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}}, + {AIMULB, ydivb, Pb, opBytes{0xf6, 05}}, + {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, + {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, + {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, + {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}}, + {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}}, + {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}}, + {AINB, yin, Pb, opBytes{0xe4, 0xec}}, + {AINW, yin, Pe, opBytes{0xe5, 0xed}}, + {AINL, yin, Px, opBytes{0xe5, 0xed}}, + {AINCB, yscond, Pb, opBytes{0xfe, 00}}, + {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}}, + {AINCQ, yincq, Pw, opBytes{0xff, 00}}, + {AINCW, yincq, Pe, opBytes{0xff, 00}}, + {AINSB, ynone, Pb, opBytes{0x6c}}, + {AINSL, ynone, Px, opBytes{0x6d}}, + {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}}, + {AINSW, ynone, Pe, opBytes{0x6d}}, + {AICEBP, ynone, Px, opBytes{0xf1}}, + {AINT, yint, Px, opBytes{0xcd}}, + {AINTO, ynone, P32, opBytes{0xce}}, + {AIRETL, ynone, Px, opBytes{0xcf}}, + {AIRETQ, ynone, Pw, opBytes{0xcf}}, + {AIRETW, ynone, Pe, opBytes{0xcf}}, + {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}}, + {AJCS, yjcond, Px, opBytes{0x72, 0x82}}, + {AJCXZL, yloop, Px, opBytes{0xe3}}, + {AJCXZW, yloop, Px, opBytes{0xe3}}, + {AJCXZQ, yloop, Px, opBytes{0xe3}}, + {AJEQ, yjcond, Px, opBytes{0x74, 0x84}}, + {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}}, + {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}}, + {AJHI, yjcond, Px, opBytes{0x77, 0x87}}, + {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}}, + {AJLS, yjcond, Px, opBytes{0x76, 0x86}}, + {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}}, + {AJMI, yjcond, Px, opBytes{0x78, 0x88}}, + {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}}, + {AJNE, yjcond, Px, opBytes{0x75, 0x85}}, + {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}}, + {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}}, + {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}}, + {AJPL, yjcond, Px, opBytes{0x79, 0x89}}, + {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}}, + {AHADDPD, yxm, Pq, opBytes{0x7c}}, + {AHADDPS, yxm, Pf2, opBytes{0x7c}}, + {AHSUBPD, yxm, Pq, opBytes{0x7d}}, + {AHSUBPS, yxm, Pf2, opBytes{0x7d}}, + {ALAHF, ynone, Px, opBytes{0x9f}}, + {ALARL, yml_rl, Pm, opBytes{0x02}}, + {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}}, + {ALARW, yml_rl, Pq, opBytes{0x02}}, + {ALDDQU, ylddqu, Pf2, opBytes{0xf0}}, + {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}}, + {ALEAL, ym_rl, Px, opBytes{0x8d}}, + {ALEAQ, ym_rl, Pw, opBytes{0x8d}}, + {ALEAVEL, ynone, P32, opBytes{0xc9}}, + {ALEAVEQ, ynone, Py, opBytes{0xc9}}, + {ALEAVEW, ynone, Pe, opBytes{0xc9}}, + {ALEAW, ym_rl, Pe, opBytes{0x8d}}, + {ALOCK, ynone, Px, opBytes{0xf0}}, + {ALODSB, ynone, Pb, opBytes{0xac}}, + {ALODSL, ynone, Px, opBytes{0xad}}, + {ALODSQ, ynone, Pw, opBytes{0xad}}, + {ALODSW, ynone, Pe, opBytes{0xad}}, + {ALONG, ybyte, Px, opBytes{4}}, + {ALOOP, yloop, Px, opBytes{0xe2}}, + {ALOOPEQ, yloop, Px, opBytes{0xe1}}, + {ALOOPNE, yloop, Px, opBytes{0xe0}}, + {ALTR, ydivl, Pm, opBytes{0x00, 03}}, + {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}}, + {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}}, + {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}}, + {ALSLL, yml_rl, Pm, opBytes{0x03}}, + {ALSLW, yml_rl, Pq, opBytes{0x03}}, + {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}}, + {AMASKMOVOU, yxr, Pe, opBytes{0xf7}}, + {AMASKMOVQ, ymr, Pm, opBytes{0xf7}}, + {AMAXPD, yxm, Pe, opBytes{0x5f}}, + {AMAXPS, yxm, Pm, opBytes{0x5f}}, + {AMAXSD, yxm, Pf2, opBytes{0x5f}}, + {AMAXSS, yxm, Pf3, opBytes{0x5f}}, + {AMINPD, yxm, Pe, opBytes{0x5d}}, + {AMINPS, yxm, Pm, opBytes{0x5d}}, + {AMINSD, yxm, Pf2, opBytes{0x5d}}, + {AMINSS, yxm, Pf3, opBytes{0x5d}}, + {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}}, + {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}}, + {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}}, + {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}}, + {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}}, + {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}}, + {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}}, + {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}}, + {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}}, + {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}}, + {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}}, + {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}}, + {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}}, + {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}}, + {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}}, + {AMOVHLPS, yxr, Pm, opBytes{0x12}}, + {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}}, + {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}}, + {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, + {AMOVLHPS, yxr, Pm, opBytes{0x16}}, + {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}}, + {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}}, + {AMOVLQSX, yml_rl, Pw, opBytes{0x63}}, + {AMOVLQZX, yml_rl, Px, opBytes{0x8b}}, + {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}}, + {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}}, + {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}}, + {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}}, + {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}}, + {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}}, + {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}}, + {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, + {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}}, + {AMOVSB, ynone, Pb, opBytes{0xa4}}, + {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}}, + {AMOVSL, ynone, Px, opBytes{0xa5}}, + {AMOVSQ, ynone, Pw, opBytes{0xa5}}, + {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}}, + {AMOVSW, ynone, Pe, opBytes{0xa5}}, + {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}}, + {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}}, + {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}}, + {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}}, + {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}}, + {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}}, + {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}}, + {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}}, + {AMULB, ydivb, Pb, opBytes{0xf6, 04}}, + {AMULL, ydivl, Px, opBytes{0xf7, 04}}, + {AMULPD, yxm, Pe, opBytes{0x59}}, + {AMULPS, yxm, Ym, opBytes{0x59}}, + {AMULQ, ydivl, Pw, opBytes{0xf7, 04}}, + {AMULSD, yxm, Pf2, opBytes{0x59}}, + {AMULSS, yxm, Pf3, opBytes{0x59}}, + {AMULW, ydivl, Pe, opBytes{0xf7, 04}}, + {ANEGB, yscond, Pb, opBytes{0xf6, 03}}, + {ANEGL, yscond, Px, opBytes{0xf7, 03}}, + {ANEGQ, yscond, Pw, opBytes{0xf7, 03}}, + {ANEGW, yscond, Pe, opBytes{0xf7, 03}}, + {obj.ANOP, ynop, Px, opBytes{0, 0}}, + {ANOTB, yscond, Pb, opBytes{0xf6, 02}}, + {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here. + {ANOTQ, yscond, Pw, opBytes{0xf7, 02}}, + {ANOTW, yscond, Pe, opBytes{0xf7, 02}}, + {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}}, + {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, + {AORPD, yxm, Pq, opBytes{0x56}}, + {AORPS, yxm, Pm, opBytes{0x56}}, + {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, + {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, + {AOUTB, yin, Pb, opBytes{0xe6, 0xee}}, + {AOUTL, yin, Px, opBytes{0xe7, 0xef}}, + {AOUTW, yin, Pe, opBytes{0xe7, 0xef}}, + {AOUTSB, ynone, Pb, opBytes{0x6e}}, + {AOUTSL, ynone, Px, opBytes{0x6f}}, + {AOUTSW, ynone, Pe, opBytes{0x6f}}, + {APABSB, yxm_q4, Pq4, opBytes{0x1c}}, + {APABSD, yxm_q4, Pq4, opBytes{0x1e}}, + {APABSW, yxm_q4, Pq4, opBytes{0x1d}}, + {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}}, + {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}}, + {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}}, + {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}}, + {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}}, + {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}}, + {APADDQ, yxm, Pe, opBytes{0xd4}}, + {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}}, + {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}}, + {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}}, + {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}}, + {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}}, + {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}}, + {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}}, + {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}}, + {APAUSE, ynone, Px, opBytes{0xf3, 0x90}}, + {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}}, + {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}}, + {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}}, + {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}}, + {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}}, + {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}}, + {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}}, + {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}}, + {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}}, + {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}}, + {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}}, + {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}}, + {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}}, + {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}}, + {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}}, + {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}}, + {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}}, + {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}}, + {APHADDSW, yxm_q4, Pq4, opBytes{0x03}}, + {APHADDW, yxm_q4, Pq4, opBytes{0x01}}, + {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}}, + {APHSUBD, yxm_q4, Pq4, opBytes{0x06}}, + {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}}, + {APHSUBW, yxm_q4, Pq4, opBytes{0x05}}, + {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}}, + {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}}, + {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}}, + {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}}, + {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}}, + {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}}, + {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}}, + {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}}, + {APMAXSW, yxm, Pe, opBytes{0xee}}, + {APMAXUB, yxm, Pe, opBytes{0xde}}, + {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}}, + {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}}, + {APMINSB, yxm_q4, Pq4, opBytes{0x38}}, + {APMINSD, yxm_q4, Pq4, opBytes{0x39}}, + {APMINSW, yxm, Pe, opBytes{0xea}}, + {APMINUB, yxm, Pe, opBytes{0xda}}, + {APMINUD, yxm_q4, Pq4, opBytes{0x3b}}, + {APMINUW, yxm_q4, Pq4, opBytes{0x3a}}, + {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}}, + {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}}, + {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}}, + {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}}, + {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}}, + {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}}, + {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}}, + {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}}, + {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}}, + {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}}, + {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}}, + {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}}, + {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}}, + {APMULDQ, yxm_q4, Pq4, opBytes{0x28}}, + {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}}, + {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}}, + {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}}, + {APMULLD, yxm_q4, Pq4, opBytes{0x40}}, + {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}}, + {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}}, + {APOPAL, ynone, P32, opBytes{0x61}}, + {APOPAW, ynone, Pe, opBytes{0x61}}, + {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}}, + {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}}, + {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}}, + {APOPFL, ynone, P32, opBytes{0x9d}}, + {APOPFQ, ynone, Py, opBytes{0x9d}}, + {APOPFW, ynone, Pe, opBytes{0x9d}}, + {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}}, + {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}}, + {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}}, + {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}}, + {APSADBW, yxm, Pq, opBytes{0xf6}}, + {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}}, + {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}}, + {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}}, + {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}}, + {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}}, + {APSIGNB, yxm_q4, Pq4, opBytes{0x08}}, + {APSIGND, yxm_q4, Pq4, opBytes{0x0a}}, + {APSIGNW, yxm_q4, Pq4, opBytes{0x09}}, + {APSLLO, ypsdq, Pq, opBytes{0x73, 07}}, + {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}}, + {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}}, + {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}}, + {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}}, + {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}}, + {APSRLO, ypsdq, Pq, opBytes{0x73, 03}}, + {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}}, + {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}}, + {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}}, + {APSUBB, yxm, Pe, opBytes{0xf8}}, + {APSUBL, yxm, Pe, opBytes{0xfa}}, + {APSUBQ, yxm, Pe, opBytes{0xfb}}, + {APSUBSB, yxm, Pe, opBytes{0xe8}}, + {APSUBSW, yxm, Pe, opBytes{0xe9}}, + {APSUBUSB, yxm, Pe, opBytes{0xd8}}, + {APSUBUSW, yxm, Pe, opBytes{0xd9}}, + {APSUBW, yxm, Pe, opBytes{0xf9}}, + {APTEST, yxm_q4, Pq4, opBytes{0x17}}, + {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}}, + {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}}, + {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}}, + {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}}, + {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}}, + {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}}, + {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}}, + {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}}, + {APUSHAL, ynone, P32, opBytes{0x60}}, + {APUSHAW, ynone, Pe, opBytes{0x60}}, + {APUSHFL, ynone, P32, opBytes{0x9c}}, + {APUSHFQ, ynone, Py, opBytes{0x9c}}, + {APUSHFW, ynone, Pe, opBytes{0x9c}}, + {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, + {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, + {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, + {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}}, + {AQUAD, ybyte, Px, opBytes{8}}, + {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}}, + {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, + {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, + {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, + {ARCPPS, yxm, Pm, opBytes{0x53}}, + {ARCPSS, yxm, Pf3, opBytes{0x53}}, + {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}}, + {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, + {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, + {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, + {AREP, ynone, Px, opBytes{0xf3}}, + {AREPN, ynone, Px, opBytes{0xf2}}, + {obj.ARET, ynone, Px, opBytes{0xc3}}, + {ARETFW, yret, Pe, opBytes{0xcb, 0xca}}, + {ARETFL, yret, Px, opBytes{0xcb, 0xca}}, + {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}}, + {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}}, + {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, + {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, + {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, + {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}}, + {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, + {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, + {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, + {ARSQRTPS, yxm, Pm, opBytes{0x52}}, + {ARSQRTSS, yxm, Pf3, opBytes{0x52}}, + {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL + {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, + {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, + {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, + {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, + {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}}, + {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, + {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, + {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, + {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}}, + {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, + {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, + {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, + {ASCASB, ynone, Pb, opBytes{0xae}}, + {ASCASL, ynone, Px, opBytes{0xaf}}, + {ASCASQ, ynone, Pw, opBytes{0xaf}}, + {ASCASW, ynone, Pe, opBytes{0xaf}}, + {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}}, + {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}}, + {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}}, + {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}}, + {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}}, + {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}}, + {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}}, + {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}}, + {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}}, + {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}}, + {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}}, + {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}}, + {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}}, + {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}}, + {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}}, + {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}}, + {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, + {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, + {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, + {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, + {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}}, + {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, + {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, + {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, + {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}}, + {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}}, + {ASQRTPD, yxm, Pe, opBytes{0x51}}, + {ASQRTPS, yxm, Pm, opBytes{0x51}}, + {ASQRTSD, yxm, Pf2, opBytes{0x51}}, + {ASQRTSS, yxm, Pf3, opBytes{0x51}}, + {ASTC, ynone, Px, opBytes{0xf9}}, + {ASTD, ynone, Px, opBytes{0xfd}}, + {ASTI, ynone, Px, opBytes{0xfb}}, + {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}}, + {ASTOSB, ynone, Pb, opBytes{0xaa}}, + {ASTOSL, ynone, Px, opBytes{0xab}}, + {ASTOSQ, ynone, Pw, opBytes{0xab}}, + {ASTOSW, ynone, Pe, opBytes{0xab}}, + {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}}, + {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, + {ASUBPD, yxm, Pe, opBytes{0x5c}}, + {ASUBPS, yxm, Pm, opBytes{0x5c}}, + {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, + {ASUBSD, yxm, Pf2, opBytes{0x5c}}, + {ASUBSS, yxm, Pf3, opBytes{0x5c}}, + {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, + {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}}, + {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall + {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}}, + {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, + {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, + {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, + {ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}}, + {obj.ATEXT, ytext, Px, opBytes{}}, + {AUCOMISD, yxm, Pe, opBytes{0x2e}}, + {AUCOMISS, yxm, Pm, opBytes{0x2e}}, + {AUNPCKHPD, yxm, Pe, opBytes{0x15}}, + {AUNPCKHPS, yxm, Pm, opBytes{0x15}}, + {AUNPCKLPD, yxm, Pe, opBytes{0x14}}, + {AUNPCKLPS, yxm, Pm, opBytes{0x14}}, + {AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}}, + {AVERR, ydivl, Pm, opBytes{0x00, 04}}, + {AVERW, ydivl, Pm, opBytes{0x00, 05}}, + {AWAIT, ynone, Px, opBytes{0x9b}}, + {AWORD, ybyte, Px, opBytes{2}}, + {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}}, + {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}}, + {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}}, + {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}}, + {AXLAT, ynone, Px, opBytes{0xd7}}, + {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}}, + {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, + {AXORPD, yxm, Pe, opBytes{0x57}}, + {AXORPS, yxm, Pm, opBytes{0x57}}, + {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, + {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, + {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}}, + {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}}, + {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}}, + {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}}, + {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}}, + {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}}, + {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}}, + {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}}, + {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}}, + {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}}, + {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}}, + {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}}, + {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}}, + {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}}, + {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}}, + {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}}, + {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}}, + {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}}, + {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}}, + {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}}, + {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}}, + {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}}, + {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}}, + {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}}, + {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}}, + {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}}, + {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}}, + {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}}, + {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}}, // botch + {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch + {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}}, + {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}}, + {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}}, + {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}}, + {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}}, + {AFCOML, yfmvx, Px, opBytes{0xda, 02}}, + {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}}, + {AFCOMW, yfmvx, Px, opBytes{0xde, 02}}, + {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}}, + {AFUCOM, ycompp, Px, opBytes{0xdd, 04}}, + {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}}, + {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}}, + {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}}, + {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}}, + {AFADDDP, ycompp, Px, opBytes{0xde, 00}}, + {AFADDW, yfmvx, Px, opBytes{0xde, 00}}, + {AFADDL, yfmvx, Px, opBytes{0xda, 00}}, + {AFADDF, yfmvx, Px, opBytes{0xd8, 00}}, + {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}}, + {AFMULDP, ycompp, Px, opBytes{0xde, 01}}, + {AFMULW, yfmvx, Px, opBytes{0xde, 01}}, + {AFMULL, yfmvx, Px, opBytes{0xda, 01}}, + {AFMULF, yfmvx, Px, opBytes{0xd8, 01}}, + {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}}, + {AFSUBDP, ycompp, Px, opBytes{0xde, 05}}, + {AFSUBW, yfmvx, Px, opBytes{0xde, 04}}, + {AFSUBL, yfmvx, Px, opBytes{0xda, 04}}, + {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}}, + {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}}, + {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}}, + {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}}, + {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}}, + {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}}, + {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}}, + {AFDIVDP, ycompp, Px, opBytes{0xde, 07}}, + {AFDIVW, yfmvx, Px, opBytes{0xde, 06}}, + {AFDIVL, yfmvx, Px, opBytes{0xda, 06}}, + {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}}, + {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}}, + {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}}, + {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}}, + {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}}, + {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}}, + {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}}, + {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}}, + {AFFREE, nil, 0, opBytes{}}, + {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}}, + {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}}, + {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}}, + {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}}, + {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}}, + {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}}, + {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}}, + {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}}, + {AFABS, ynone, Px, opBytes{0xd9, 0xe1}}, + {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}}, + {AFBSTP, yclflush, Px, opBytes{0xdf, 06}}, + {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}}, + {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}}, + {AFCOS, ynone, Px, opBytes{0xd9, 0xff}}, + {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}}, + {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}}, + {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}}, + {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}}, + {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}}, + {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}}, + {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}}, + {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}}, + {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}}, + {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}}, + {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}}, + {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}}, + {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}}, + {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}}, + {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}}, + {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}}, + {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}}, + {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}}, + {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}}, + {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}}, + {AFTST, ynone, Px, opBytes{0xd9, 0xe4}}, + {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}}, + {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}}, + {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}}, + {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}}, + {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}}, + {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}}, + {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}}, + {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}}, + {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}}, + {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}}, + {AINVD, ynone, Pm, opBytes{0x08}}, + {AINVLPG, ydivb, Pm, opBytes{0x01, 07}}, + {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}}, + {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}}, + {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}}, + {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}}, + {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}}, + {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}}, + {ARDMSR, ynone, Pm, opBytes{0x32}}, + {ARDPMC, ynone, Pm, opBytes{0x33}}, + {ARDTSC, ynone, Pm, opBytes{0x31}}, + {ARSM, ynone, Pm, opBytes{0xaa}}, + {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}}, + {ASYSRET, ynone, Pm, opBytes{0x07}}, + {AWBINVD, ynone, Pm, opBytes{0x09}}, + {AWRMSR, ynone, Pm, opBytes{0x30}}, + {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}}, + {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}}, + {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}}, + {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}}, + {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}}, + {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}}, + {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, + {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, + {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, + {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}}, + {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}}, + {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}}, + {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}}, + {AMOVQL, yrl_ml, Px, opBytes{0x89}}, + {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}}, + {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}}, + {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}}, + {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}}, + {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}}, + {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}}, + {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}}, + {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}}, + {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}}, + {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}}, + {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}}, + {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}}, + {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}}, + {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}}, + {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}}, + {AMOVDDUP, yxm, Pf2, opBytes{0x12}}, + {AMOVSHDUP, yxm, Pf3, opBytes{0x16}}, + {AMOVSLDUP, yxm, Pf3, opBytes{0x12}}, + {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}}, + {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}}, + {AUD1, ynone, Pm, opBytes{0xb9, 0}}, + {AUD2, ynone, Pm, opBytes{0x0b, 0}}, + {AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}}, + {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}}, + {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}}, + {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}}, + {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}}, + {ALMSW, ydivl, Pm, opBytes{0x01, 06}}, + {ALLDT, ydivl, Pm, opBytes{0x00, 02}}, + {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}}, + {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}}, + {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}}, + {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}}, + {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}}, + {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}}, + {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}}, + {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}}, + {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}}, + {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}}, + {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}}, + {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}}, + {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}}, + {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}}, + {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}}, + {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}}, + {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}}, + {ASGDT, yclflush, Pm, opBytes{0x01, 00}}, + {ASIDT, yclflush, Pm, opBytes{0x01, 01}}, + {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}}, + {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}}, + {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}}, + {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}}, + {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}}, + {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}}, + {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}}, + {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}}, + {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}}, + {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}}, + {AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, + {AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, + {AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}}, + {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}}, + {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}}, + {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}}, + {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}}, + {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}}, + {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}}, + {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}}, + {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}}, + {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}}, + {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}}, + {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}}, + {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}}, + {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}}, + {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}}, + {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}}, + {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}}, + {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}}, + {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}}, + {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}}, + {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}}, + {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}}, + {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}}, + {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}}, + {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}}, + {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}}, + {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}}, + {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}}, + {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}}, + {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}}, + {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}}, + {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}}, + {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}}, + {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}}, + {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}}, + {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}}, + {ARDPID, yrdrand, Pf3, opBytes{0xc7, 07}}, + + {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}}, + {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}}, + {AXACQUIRE, ynone, Px, opBytes{0xf2}}, + {AXRELEASE, ynone, Px, opBytes{0xf3}}, + {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}}, + {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}}, + {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}}, + {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}}, + {AXGETBV, ynone, Pm, opBytes{01, 0xd0}}, + {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}}, + {obj.APCDATA, ypcdata, Px, opBytes{0, 0}}, + {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}}, + {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}}, + + {obj.AEND, nil, 0, opBytes{}}, + {0, nil, 0, opBytes{}}, +} + +var opindex [(ALAST + 1) & obj.AMask]*Optab + +// useAbs reports whether s describes a symbol that must avoid pc-relative addressing. +// This happens on systems like Solaris that call .so functions instead of system calls. +// It does not seem to be necessary for any other systems. This is probably working +// around a Solaris-specific bug that should be fixed differently, but we don't know +// what that bug is. And this does fix it. +func useAbs(ctxt *obj.Link, s *obj.LSym) bool { + if ctxt.Headtype == objabi.Hsolaris { + // All the Solaris dynamic imports from libc.so begin with "libc_". + return strings.HasPrefix(s.Name, "libc_") + } + return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared +} + +// single-instruction no-ops of various lengths. +// constructed by hand and disassembled with gdb to verify. +// see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion. +var nop = [][16]uint8{ + {0x90}, + {0x66, 0x90}, + {0x0F, 0x1F, 0x00}, + {0x0F, 0x1F, 0x40, 0x00}, + {0x0F, 0x1F, 0x44, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, + {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, + {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, +} + +// Native Client rejects the repeated 0x66 prefix. +// {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, +func fillnop(p []byte, n int) { + var m int + + for n > 0 { + m = n + if m > len(nop) { + m = len(nop) + } + copy(p[:m], nop[m-1][:m]) + p = p[m:] + n -= m + } +} + +func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 { + s.Grow(int64(c) + int64(pad)) + fillnop(s.P[c:], int(pad)) + return c + pad +} + +func spadjop(ctxt *obj.Link, l, q obj.As) obj.As { + if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 { + return l + } + return q +} + +// isJump returns whether p is a jump instruction. +// It is used to ensure that no standalone or macro-fused jump will straddle +// or end on a 32 byte boundary by inserting NOPs before the jumps. +func isJump(p *obj.Prog) bool { + return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL || + p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO +} + +// lookForJCC returns the first real instruction starting from p, if that instruction is a conditional +// jump. Otherwise, nil is returned. +func lookForJCC(p *obj.Prog) *obj.Prog { + // Skip any PCDATA, FUNCDATA or NOP instructions + var q *obj.Prog + for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link { + } + + if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL { + return nil + } + + switch q.As { + case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI, + AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT: + default: + return nil + } + + return q +} + +// fusedJump determines whether p can be fused with a subsequent conditional jump instruction. +// If it can, we return true followed by the total size of the fused jump. If it can't, we return false. +// Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2. +func fusedJump(p *obj.Prog) (bool, uint8) { + var fusedSize uint8 + + // The first instruction in a macro fused pair may be preceded by the LOCK prefix, + // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we + // need to be careful to insert any padding before the locks rather than directly after them. + + if p.As == AXRELEASE || p.As == AXACQUIRE { + fusedSize += p.Isize + for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { + } + if p == nil { + return false, 0 + } + } + if p.As == ALOCK { + fusedSize += p.Isize + for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { + } + if p == nil { + return false, 0 + } + } + cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW + + cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ || + p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp + + testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW || + p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW + + incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW || + p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW + + if !cmpAddSub && !testAnd && !incDec { + return false, 0 + } + + if !incDec { + var argOne obj.AddrType + var argTwo obj.AddrType + if cmp { + argOne = p.From.Type + argTwo = p.To.Type + } else { + argOne = p.To.Type + argTwo = p.From.Type + } + if argOne == obj.TYPE_REG { + if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM { + return false, 0 + } + } else if argOne == obj.TYPE_MEM { + if argTwo != obj.TYPE_REG { + return false, 0 + } + } else { + return false, 0 + } + } + + fusedSize += p.Isize + jmp := lookForJCC(p) + if jmp == nil { + return false, 0 + } + + fusedSize += jmp.Isize + + if testAnd { + return true, fusedSize + } + + if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI || + jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC { + return false, 0 + } + + if cmpAddSub { + return true, fusedSize + } + + if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS { + return false, 0 + } + + return true, fusedSize +} + +type padJumpsCtx int32 + +func makePjcCtx(ctxt *obj.Link) padJumpsCtx { + // Disable jump padding on 32 bit builds by setting + // padJumps to 0. + if ctxt.Arch.Family == sys.I386 { + return padJumpsCtx(0) + } + + // Disable jump padding for hand written assembly code. + if ctxt.IsAsm { + return padJumpsCtx(0) + } + + return padJumpsCtx(32) +} + +// padJump detects whether the instruction being assembled is a standalone or a macro-fused +// jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does +// not cross or end on a 32 byte boundary. +func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 { + if pjc == 0 { + return c + } + + var toPad int32 + fj, fjSize := fusedJump(p) + mask := int32(pjc - 1) + if fj { + if (c&mask)+int32(fjSize) >= int32(pjc) { + toPad = int32(pjc) - (c & mask) + } + } else if isJump(p) { + if (c&mask)+int32(p.Isize) >= int32(pjc) { + toPad = int32(pjc) - (c & mask) + } + } + if toPad <= 0 { + return c + } + + return noppad(ctxt, s, c, toPad) +} + +// reAssemble is called if an instruction's size changes during assembly. If +// it does and the instruction is a standalone or a macro-fused jump we need to +// reassemble. +func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool { + if pjc == 0 { + return false + } + + fj, _ := fusedJump(p) + return fj || isJump(p) +} + +type nopPad struct { + p *obj.Prog // Instruction before the pad + n int32 // Size of the pad +} + +// Padding bytes to add to align code as requested. +// Alignment is restricted to powers of 2 between 8 and 2048 inclusive. +// +// pc: current offset in function, in bytes +// a: requested alignment, in bytes +// cursym: current function being assembled +// returns number of bytes of padding needed +func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { + if !((a&(a-1) == 0) && 8 <= a && a <= 2048) { + ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", a) + return 0 + } + + // By default function alignment is 32 bytes for amd64 + if cursym.Func().Align < int32(a) { + cursym.Func().Align = int32(a) + } + + if pc&(a-1) != 0 { + return int(a - (pc & (a - 1))) + } + + return 0 +} + +func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { + if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 { + ctxt.Diag("-spectre=ret not supported on 386") + ctxt.Retpoline = false // don't keep printing + } + + pjc := makePjcCtx(ctxt) + + if s.P != nil { + return + } + + if ycover[0] == 0 { + ctxt.Diag("x86 tables not initialized, call x86.instinit first") + } + + for p := s.Func().Text; p != nil; p = p.Link { + if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil { + p.To.SetTarget(p) + } + if p.As == AADJSP { + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_SP + // Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive. + // One exception: It is smaller to encode $-0x80 than $0x80. + // For that case, flip the sign and the op: + // Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'. + switch v := p.From.Offset; { + case v == 0: + p.As = obj.ANOP + case v == 0x80 || (v < 0 && v != -0x80): + p.As = spadjop(ctxt, AADDL, AADDQ) + p.From.Offset *= -1 + default: + p.As = spadjop(ctxt, ASUBL, ASUBQ) + } + } + if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) { + if p.To.Type != obj.TYPE_REG { + ctxt.Diag("non-retpoline-compatible: %v", p) + continue + } + p.To.Type = obj.TYPE_BRANCH + p.To.Name = obj.NAME_EXTERN + p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg))) + p.To.Reg = 0 + p.To.Offset = 0 + } + } + + var count int64 // rough count of number of instructions + for p := s.Func().Text; p != nil; p = p.Link { + count++ + p.Back = branchShort // use short branches first time through + if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) { + p.Back |= branchBackwards + q.Back |= branchLoopHead + } + } + s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction + + var ab AsmBuf + var n int + var c int32 + errors := ctxt.Errors + var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies) + nrelocs0 := len(s.R) + for { + // This loop continues while there are reasons to re-assemble + // whole block, like the presence of long forward jumps. + reAssemble := false + for i := range s.R[nrelocs0:] { + s.R[nrelocs0+i] = obj.Reloc{} + } + s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler + s.P = s.P[:0] + c = 0 + var pPrev *obj.Prog + nops = nops[:0] + for p := s.Func().Text; p != nil; p = p.Link { + c0 := c + c = pjc.padJump(ctxt, s, p, c) + + if p.As == obj.APCALIGN { + aln := p.From.Offset + v := addpad(int64(c), aln, ctxt, s) + if v > 0 { + s.Grow(int64(c) + int64(v)) + fillnop(s.P[c:], int(v)) + } + + c += int32(v) + pPrev = p + continue + } + + if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 { + // pad with NOPs + v := -c & (loopAlign - 1) + + if v <= maxLoopPad { + s.Grow(int64(c) + int64(v)) + fillnop(s.P[c:], int(v)) + c += v + } + } + + p.Pc = int64(c) + + // process forward jumps to p + for q := p.Rel; q != nil; q = q.Forwd { + v := int32(p.Pc - (q.Pc + int64(q.Isize))) + if q.Back&branchShort != 0 { + if v > 127 { + reAssemble = true + q.Back ^= branchShort + } + + if q.As == AJCXZL || q.As == AXBEGIN { + s.P[q.Pc+2] = byte(v) + } else { + s.P[q.Pc+1] = byte(v) + } + } else { + binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v)) + } + } + + p.Rel = nil + + p.Pc = int64(c) + ab.asmins(ctxt, s, p) + m := ab.Len() + if int(p.Isize) != m { + p.Isize = uint8(m) + if pjc.reAssemble(p) { + // We need to re-assemble here to check for jumps and fused jumps + // that span or end on 32 byte boundaries. + reAssemble = true + } + } + + s.Grow(p.Pc + int64(m)) + copy(s.P[p.Pc:], ab.Bytes()) + // If there was padding, remember it. + if pPrev != nil && !ctxt.IsAsm && c > c0 { + nops = append(nops, nopPad{p: pPrev, n: c - c0}) + } + c += int32(m) + pPrev = p + } + + n++ + if n > 1000 { + ctxt.Diag("span must be looping") + log.Fatalf("loop") + } + if !reAssemble { + break + } + if ctxt.Errors > errors { + return + } + } + // splice padding nops into Progs + for _, n := range nops { + pp := n.p + np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)} + pp.Link = np + } + + s.Size = int64(c) + + if false { /* debug['a'] > 1 */ + fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0) + var i int + for i = 0; i < len(s.P); i++ { + fmt.Printf(" %.2x", s.P[i]) + if i%16 == 15 { + fmt.Printf("\n %.6x", uint(i+1)) + } + } + + if i%16 != 0 { + fmt.Printf("\n") + } + + for i := 0; i < len(s.R); i++ { + r := &s.R[i] + fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add) + } + } + + // Mark nonpreemptible instruction sequences. + // The 2-instruction TLS access sequence + // MOVQ TLS, BX + // MOVQ 0(BX)(TLS*1), BX + // is not async preemptible, as if it is preempted and resumed on + // a different thread, the TLS address may become invalid. + if !CanUse1InsnTLS(ctxt) { + useTLS := func(p *obj.Prog) bool { + // Only need to mark the second instruction, which has + // REG_TLS as Index. (It is okay to interrupt and restart + // the first instruction.) + return p.From.Index == REG_TLS + } + obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil) + } + + // Now that we know byte offsets, we can generate jump table entries. + // TODO: could this live in obj instead of obj/$ARCH? + for _, jt := range s.Func().JumpTables { + for i, p := range jt.Targets { + // The ith jumptable entry points to the p.Pc'th + // byte in the function symbol s. + jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc) + } + } +} + +func instinit(ctxt *obj.Link) { + if ycover[0] != 0 { + // Already initialized; stop now. + // This happens in the cmd/asm tests, + // each of which re-initializes the arch. + return + } + + switch ctxt.Headtype { + case objabi.Hplan9: + plan9privates = ctxt.Lookup("_privates") + } + + for i := range avxOptab { + c := avxOptab[i].as + if opindex[c&obj.AMask] != nil { + ctxt.Diag("phase error in avxOptab: %d (%v)", i, c) + } + opindex[c&obj.AMask] = &avxOptab[i] + } + for i := 1; optab[i].as != 0; i++ { + c := optab[i].as + if opindex[c&obj.AMask] != nil { + ctxt.Diag("phase error in optab: %d (%v)", i, c) + } + opindex[c&obj.AMask] = &optab[i] + } + + for i := 0; i < Ymax; i++ { + ycover[i*Ymax+i] = 1 + } + + ycover[Yi0*Ymax+Yu2] = 1 + ycover[Yi1*Ymax+Yu2] = 1 + + ycover[Yi0*Ymax+Yi8] = 1 + ycover[Yi1*Ymax+Yi8] = 1 + ycover[Yu2*Ymax+Yi8] = 1 + ycover[Yu7*Ymax+Yi8] = 1 + + ycover[Yi0*Ymax+Yu7] = 1 + ycover[Yi1*Ymax+Yu7] = 1 + ycover[Yu2*Ymax+Yu7] = 1 + + ycover[Yi0*Ymax+Yu8] = 1 + ycover[Yi1*Ymax+Yu8] = 1 + ycover[Yu2*Ymax+Yu8] = 1 + ycover[Yu7*Ymax+Yu8] = 1 + + ycover[Yi0*Ymax+Ys32] = 1 + ycover[Yi1*Ymax+Ys32] = 1 + ycover[Yu2*Ymax+Ys32] = 1 + ycover[Yu7*Ymax+Ys32] = 1 + ycover[Yu8*Ymax+Ys32] = 1 + ycover[Yi8*Ymax+Ys32] = 1 + + ycover[Yi0*Ymax+Yi32] = 1 + ycover[Yi1*Ymax+Yi32] = 1 + ycover[Yu2*Ymax+Yi32] = 1 + ycover[Yu7*Ymax+Yi32] = 1 + ycover[Yu8*Ymax+Yi32] = 1 + ycover[Yi8*Ymax+Yi32] = 1 + ycover[Ys32*Ymax+Yi32] = 1 + + ycover[Yi0*Ymax+Yi64] = 1 + ycover[Yi1*Ymax+Yi64] = 1 + ycover[Yu7*Ymax+Yi64] = 1 + ycover[Yu2*Ymax+Yi64] = 1 + ycover[Yu8*Ymax+Yi64] = 1 + ycover[Yi8*Ymax+Yi64] = 1 + ycover[Ys32*Ymax+Yi64] = 1 + ycover[Yi32*Ymax+Yi64] = 1 + + ycover[Yal*Ymax+Yrb] = 1 + ycover[Ycl*Ymax+Yrb] = 1 + ycover[Yax*Ymax+Yrb] = 1 + ycover[Ycx*Ymax+Yrb] = 1 + ycover[Yrx*Ymax+Yrb] = 1 + ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32 + + ycover[Ycl*Ymax+Ycx] = 1 + + ycover[Yax*Ymax+Yrx] = 1 + ycover[Ycx*Ymax+Yrx] = 1 + + ycover[Yax*Ymax+Yrl] = 1 + ycover[Ycx*Ymax+Yrl] = 1 + ycover[Yrx*Ymax+Yrl] = 1 + ycover[Yrl32*Ymax+Yrl] = 1 + + ycover[Yf0*Ymax+Yrf] = 1 + + ycover[Yal*Ymax+Ymb] = 1 + ycover[Ycl*Ymax+Ymb] = 1 + ycover[Yax*Ymax+Ymb] = 1 + ycover[Ycx*Ymax+Ymb] = 1 + ycover[Yrx*Ymax+Ymb] = 1 + ycover[Yrb*Ymax+Ymb] = 1 + ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32 + ycover[Ym*Ymax+Ymb] = 1 + + ycover[Yax*Ymax+Yml] = 1 + ycover[Ycx*Ymax+Yml] = 1 + ycover[Yrx*Ymax+Yml] = 1 + ycover[Yrl*Ymax+Yml] = 1 + ycover[Yrl32*Ymax+Yml] = 1 + ycover[Ym*Ymax+Yml] = 1 + + ycover[Yax*Ymax+Ymm] = 1 + ycover[Ycx*Ymax+Ymm] = 1 + ycover[Yrx*Ymax+Ymm] = 1 + ycover[Yrl*Ymax+Ymm] = 1 + ycover[Yrl32*Ymax+Ymm] = 1 + ycover[Ym*Ymax+Ymm] = 1 + ycover[Ymr*Ymax+Ymm] = 1 + + ycover[Yxr0*Ymax+Yxr] = 1 + + ycover[Ym*Ymax+Yxm] = 1 + ycover[Yxr0*Ymax+Yxm] = 1 + ycover[Yxr*Ymax+Yxm] = 1 + + ycover[Ym*Ymax+Yym] = 1 + ycover[Yyr*Ymax+Yym] = 1 + + ycover[Yxr0*Ymax+YxrEvex] = 1 + ycover[Yxr*Ymax+YxrEvex] = 1 + + ycover[Ym*Ymax+YxmEvex] = 1 + ycover[Yxr0*Ymax+YxmEvex] = 1 + ycover[Yxr*Ymax+YxmEvex] = 1 + ycover[YxrEvex*Ymax+YxmEvex] = 1 + + ycover[Yyr*Ymax+YyrEvex] = 1 + + ycover[Ym*Ymax+YymEvex] = 1 + ycover[Yyr*Ymax+YymEvex] = 1 + ycover[YyrEvex*Ymax+YymEvex] = 1 + + ycover[Ym*Ymax+Yzm] = 1 + ycover[Yzr*Ymax+Yzm] = 1 + + ycover[Yk0*Ymax+Yk] = 1 + ycover[Yknot0*Ymax+Yk] = 1 + + ycover[Yk0*Ymax+Ykm] = 1 + ycover[Yknot0*Ymax+Ykm] = 1 + ycover[Yk*Ymax+Ykm] = 1 + ycover[Ym*Ymax+Ykm] = 1 + + ycover[Yxvm*Ymax+YxvmEvex] = 1 + + ycover[Yyvm*Ymax+YyvmEvex] = 1 + + for i := 0; i < MAXREG; i++ { + reg[i] = -1 + if i >= REG_AL && i <= REG_R15B { + reg[i] = (i - REG_AL) & 7 + if i >= REG_SPB && i <= REG_DIB { + regrex[i] = 0x40 + } + if i >= REG_R8B && i <= REG_R15B { + regrex[i] = Rxr | Rxx | Rxb + } + } + + if i >= REG_AH && i <= REG_BH { + reg[i] = 4 + ((i - REG_AH) & 7) + } + if i >= REG_AX && i <= REG_R15 { + reg[i] = (i - REG_AX) & 7 + if i >= REG_R8 { + regrex[i] = Rxr | Rxx | Rxb + } + } + + if i >= REG_F0 && i <= REG_F0+7 { + reg[i] = (i - REG_F0) & 7 + } + if i >= REG_M0 && i <= REG_M0+7 { + reg[i] = (i - REG_M0) & 7 + } + if i >= REG_K0 && i <= REG_K0+7 { + reg[i] = (i - REG_K0) & 7 + } + if i >= REG_X0 && i <= REG_X0+15 { + reg[i] = (i - REG_X0) & 7 + if i >= REG_X0+8 { + regrex[i] = Rxr | Rxx | Rxb + } + } + if i >= REG_X16 && i <= REG_X16+15 { + reg[i] = (i - REG_X16) & 7 + if i >= REG_X16+8 { + regrex[i] = Rxr | Rxx | Rxb | RxrEvex + } else { + regrex[i] = RxrEvex + } + } + if i >= REG_Y0 && i <= REG_Y0+15 { + reg[i] = (i - REG_Y0) & 7 + if i >= REG_Y0+8 { + regrex[i] = Rxr | Rxx | Rxb + } + } + if i >= REG_Y16 && i <= REG_Y16+15 { + reg[i] = (i - REG_Y16) & 7 + if i >= REG_Y16+8 { + regrex[i] = Rxr | Rxx | Rxb | RxrEvex + } else { + regrex[i] = RxrEvex + } + } + if i >= REG_Z0 && i <= REG_Z0+15 { + reg[i] = (i - REG_Z0) & 7 + if i > REG_Z0+7 { + regrex[i] = Rxr | Rxx | Rxb + } + } + if i >= REG_Z16 && i <= REG_Z16+15 { + reg[i] = (i - REG_Z16) & 7 + if i >= REG_Z16+8 { + regrex[i] = Rxr | Rxx | Rxb | RxrEvex + } else { + regrex[i] = RxrEvex + } + } + + if i >= REG_CR+8 && i <= REG_CR+15 { + regrex[i] = Rxr + } + } +} + +var isAndroid = buildcfg.GOOS == "android" + +func prefixof(ctxt *obj.Link, a *obj.Addr) int { + if a.Reg < REG_CS && a.Index < REG_CS { // fast path + return 0 + } + if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE { + switch a.Reg { + case REG_CS: + return 0x2e + + case REG_DS: + return 0x3e + + case REG_ES: + return 0x26 + + case REG_FS: + return 0x64 + + case REG_GS: + return 0x65 + + case REG_TLS: + // NOTE: Systems listed here should be only systems that + // support direct TLS references like 8(TLS) implemented as + // direct references from FS or GS. Systems that require + // the initial-exec model, where you load the TLS base into + // a register and then index from that register, do not reach + // this code and should not be listed. + if ctxt.Arch.Family == sys.I386 { + switch ctxt.Headtype { + default: + if isAndroid { + return 0x65 // GS + } + log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) + + case objabi.Hdarwin, + objabi.Hdragonfly, + objabi.Hfreebsd, + objabi.Hnetbsd, + objabi.Hopenbsd: + return 0x65 // GS + } + } + + switch ctxt.Headtype { + default: + log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) + + case objabi.Hlinux: + if isAndroid { + return 0x64 // FS + } + + if ctxt.Flag_shared { + log.Fatalf("unknown TLS base register for linux with -shared") + } else { + return 0x64 // FS + } + + case objabi.Hdragonfly, + objabi.Hfreebsd, + objabi.Hnetbsd, + objabi.Hopenbsd, + objabi.Hsolaris: + return 0x64 // FS + + case objabi.Hdarwin: + return 0x65 // GS + } + } + } + + switch a.Index { + case REG_CS: + return 0x2e + + case REG_DS: + return 0x3e + + case REG_ES: + return 0x26 + + case REG_TLS: + if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows { + // When building for inclusion into a shared library, an instruction of the form + // MOV off(CX)(TLS*1), AX + // becomes + // mov %gs:off(%ecx), %eax // on i386 + // mov %fs:off(%rcx), %rax // on amd64 + // which assumes that the correct TLS offset has been loaded into CX (today + // there is only one TLS variable -- g -- so this is OK). When not building for + // a shared library the instruction it becomes + // mov 0x0(%ecx), %eax // on i386 + // mov 0x0(%rcx), %rax // on amd64 + // and a R_TLS_LE relocation, and so does not require a prefix. + if ctxt.Arch.Family == sys.I386 { + return 0x65 // GS + } + return 0x64 // FS + } + + case REG_FS: + return 0x64 + + case REG_GS: + return 0x65 + } + + return 0 +} + +// oclassRegList returns multisource operand class for addr. +func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int { + // TODO(quasilyte): when oclass register case is refactored into + // lookup table, use it here to get register kind more easily. + // Helper functions like regIsXmm should go away too (they will become redundant). + + regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 } + regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 } + regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 } + + reg0, reg1 := decodeRegisterRange(addr.Offset) + low := regIndex(int16(reg0)) + high := regIndex(int16(reg1)) + + if ctxt.Arch.Family == sys.I386 { + if low >= 8 || high >= 8 { + return Yxxx + } + } + + switch high - low { + case 3: + switch { + case regIsXmm(reg0) && regIsXmm(reg1): + return YxrEvexMulti4 + case regIsYmm(reg0) && regIsYmm(reg1): + return YyrEvexMulti4 + case regIsZmm(reg0) && regIsZmm(reg1): + return YzrMulti4 + default: + return Yxxx + } + default: + return Yxxx + } +} + +// oclassVMem returns V-mem (vector memory with VSIB) operand class. +// For addr that is not V-mem returns (Yxxx, false). +func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) { + switch addr.Index { + case REG_X0 + 0, + REG_X0 + 1, + REG_X0 + 2, + REG_X0 + 3, + REG_X0 + 4, + REG_X0 + 5, + REG_X0 + 6, + REG_X0 + 7: + return Yxvm, true + case REG_X8 + 0, + REG_X8 + 1, + REG_X8 + 2, + REG_X8 + 3, + REG_X8 + 4, + REG_X8 + 5, + REG_X8 + 6, + REG_X8 + 7: + if ctxt.Arch.Family == sys.I386 { + return Yxxx, true + } + return Yxvm, true + case REG_X16 + 0, + REG_X16 + 1, + REG_X16 + 2, + REG_X16 + 3, + REG_X16 + 4, + REG_X16 + 5, + REG_X16 + 6, + REG_X16 + 7, + REG_X16 + 8, + REG_X16 + 9, + REG_X16 + 10, + REG_X16 + 11, + REG_X16 + 12, + REG_X16 + 13, + REG_X16 + 14, + REG_X16 + 15: + if ctxt.Arch.Family == sys.I386 { + return Yxxx, true + } + return YxvmEvex, true + + case REG_Y0 + 0, + REG_Y0 + 1, + REG_Y0 + 2, + REG_Y0 + 3, + REG_Y0 + 4, + REG_Y0 + 5, + REG_Y0 + 6, + REG_Y0 + 7: + return Yyvm, true + case REG_Y8 + 0, + REG_Y8 + 1, + REG_Y8 + 2, + REG_Y8 + 3, + REG_Y8 + 4, + REG_Y8 + 5, + REG_Y8 + 6, + REG_Y8 + 7: + if ctxt.Arch.Family == sys.I386 { + return Yxxx, true + } + return Yyvm, true + case REG_Y16 + 0, + REG_Y16 + 1, + REG_Y16 + 2, + REG_Y16 + 3, + REG_Y16 + 4, + REG_Y16 + 5, + REG_Y16 + 6, + REG_Y16 + 7, + REG_Y16 + 8, + REG_Y16 + 9, + REG_Y16 + 10, + REG_Y16 + 11, + REG_Y16 + 12, + REG_Y16 + 13, + REG_Y16 + 14, + REG_Y16 + 15: + if ctxt.Arch.Family == sys.I386 { + return Yxxx, true + } + return YyvmEvex, true + + case REG_Z0 + 0, + REG_Z0 + 1, + REG_Z0 + 2, + REG_Z0 + 3, + REG_Z0 + 4, + REG_Z0 + 5, + REG_Z0 + 6, + REG_Z0 + 7: + return Yzvm, true + case REG_Z8 + 0, + REG_Z8 + 1, + REG_Z8 + 2, + REG_Z8 + 3, + REG_Z8 + 4, + REG_Z8 + 5, + REG_Z8 + 6, + REG_Z8 + 7, + REG_Z8 + 8, + REG_Z8 + 9, + REG_Z8 + 10, + REG_Z8 + 11, + REG_Z8 + 12, + REG_Z8 + 13, + REG_Z8 + 14, + REG_Z8 + 15, + REG_Z8 + 16, + REG_Z8 + 17, + REG_Z8 + 18, + REG_Z8 + 19, + REG_Z8 + 20, + REG_Z8 + 21, + REG_Z8 + 22, + REG_Z8 + 23: + if ctxt.Arch.Family == sys.I386 { + return Yxxx, true + } + return Yzvm, true + } + + return Yxxx, false +} + +func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int { + switch a.Type { + case obj.TYPE_REGLIST: + return oclassRegList(ctxt, a) + + case obj.TYPE_NONE: + return Ynone + + case obj.TYPE_BRANCH: + return Ybr + + case obj.TYPE_INDIR: + if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 { + return Yindir + } + return Yxxx + + case obj.TYPE_MEM: + // Pseudo registers have negative index, but SP is + // not pseudo on x86, hence REG_SP check is not redundant. + if a.Index == REG_SP || a.Index < 0 { + // Can't use FP/SB/PC/SP as the index register. + return Yxxx + } + + if vmem, ok := oclassVMem(ctxt, a); ok { + return vmem + } + + if ctxt.Arch.Family == sys.AMD64 { + switch a.Name { + case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF: + // Global variables can't use index registers and their + // base register is %rip (%rip is encoded as REG_NONE). + if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 { + return Yxxx + } + case obj.NAME_AUTO, obj.NAME_PARAM: + // These names must have a base of SP. The old compiler + // uses 0 for the base register. SSA uses REG_SP. + if a.Reg != REG_SP && a.Reg != 0 { + return Yxxx + } + case obj.NAME_NONE: + // everything is ok + default: + // unknown name + return Yxxx + } + } + return Ym + + case obj.TYPE_ADDR: + switch a.Name { + case obj.NAME_GOTREF: + ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF") + return Yxxx + + case obj.NAME_EXTERN, + obj.NAME_STATIC: + if a.Sym != nil && useAbs(ctxt, a.Sym) { + return Yi32 + } + return Yiauto // use pc-relative addressing + + case obj.NAME_AUTO, + obj.NAME_PARAM: + return Yiauto + } + + // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index + // and got Yi32 in an earlier version of this code. + // Keep doing that until we fix yduff etc. + if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") { + return Yi32 + } + + if a.Sym != nil || a.Name != obj.NAME_NONE { + ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a)) + } + fallthrough + + case obj.TYPE_CONST: + if a.Sym != nil { + ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a)) + } + + v := a.Offset + if ctxt.Arch.Family == sys.I386 { + v = int64(int32(v)) + } + switch { + case v == 0: + return Yi0 + case v == 1: + return Yi1 + case v >= 0 && v <= 3: + return Yu2 + case v >= 0 && v <= 127: + return Yu7 + case v >= 0 && v <= 255: + return Yu8 + case v >= -128 && v <= 127: + return Yi8 + } + if ctxt.Arch.Family == sys.I386 { + return Yi32 + } + l := int32(v) + if int64(l) == v { + return Ys32 // can sign extend + } + if v>>32 == 0 { + return Yi32 // unsigned + } + return Yi64 + + case obj.TYPE_TEXTSIZE: + return Ytextsize + } + + if a.Type != obj.TYPE_REG { + ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a)) + return Yxxx + } + + switch a.Reg { + case REG_AL: + return Yal + + case REG_AX: + return Yax + + /* + case REG_SPB: + */ + case REG_BPB, + REG_SIB, + REG_DIB, + REG_R8B, + REG_R9B, + REG_R10B, + REG_R11B, + REG_R12B, + REG_R13B, + REG_R14B, + REG_R15B: + if ctxt.Arch.Family == sys.I386 { + return Yxxx + } + fallthrough + + case REG_DL, + REG_BL, + REG_AH, + REG_CH, + REG_DH, + REG_BH: + return Yrb + + case REG_CL: + return Ycl + + case REG_CX: + return Ycx + + case REG_DX, REG_BX: + return Yrx + + case REG_R8, // not really Yrl + REG_R9, + REG_R10, + REG_R11, + REG_R12, + REG_R13, + REG_R14, + REG_R15: + if ctxt.Arch.Family == sys.I386 { + return Yxxx + } + fallthrough + + case REG_SP, REG_BP, REG_SI, REG_DI: + if ctxt.Arch.Family == sys.I386 { + return Yrl32 + } + return Yrl + + case REG_F0 + 0: + return Yf0 + + case REG_F0 + 1, + REG_F0 + 2, + REG_F0 + 3, + REG_F0 + 4, + REG_F0 + 5, + REG_F0 + 6, + REG_F0 + 7: + return Yrf + + case REG_M0 + 0, + REG_M0 + 1, + REG_M0 + 2, + REG_M0 + 3, + REG_M0 + 4, + REG_M0 + 5, + REG_M0 + 6, + REG_M0 + 7: + return Ymr + + case REG_X0: + return Yxr0 + + case REG_X0 + 1, + REG_X0 + 2, + REG_X0 + 3, + REG_X0 + 4, + REG_X0 + 5, + REG_X0 + 6, + REG_X0 + 7, + REG_X0 + 8, + REG_X0 + 9, + REG_X0 + 10, + REG_X0 + 11, + REG_X0 + 12, + REG_X0 + 13, + REG_X0 + 14, + REG_X0 + 15: + return Yxr + + case REG_X0 + 16, + REG_X0 + 17, + REG_X0 + 18, + REG_X0 + 19, + REG_X0 + 20, + REG_X0 + 21, + REG_X0 + 22, + REG_X0 + 23, + REG_X0 + 24, + REG_X0 + 25, + REG_X0 + 26, + REG_X0 + 27, + REG_X0 + 28, + REG_X0 + 29, + REG_X0 + 30, + REG_X0 + 31: + return YxrEvex + + case REG_Y0 + 0, + REG_Y0 + 1, + REG_Y0 + 2, + REG_Y0 + 3, + REG_Y0 + 4, + REG_Y0 + 5, + REG_Y0 + 6, + REG_Y0 + 7, + REG_Y0 + 8, + REG_Y0 + 9, + REG_Y0 + 10, + REG_Y0 + 11, + REG_Y0 + 12, + REG_Y0 + 13, + REG_Y0 + 14, + REG_Y0 + 15: + return Yyr + + case REG_Y0 + 16, + REG_Y0 + 17, + REG_Y0 + 18, + REG_Y0 + 19, + REG_Y0 + 20, + REG_Y0 + 21, + REG_Y0 + 22, + REG_Y0 + 23, + REG_Y0 + 24, + REG_Y0 + 25, + REG_Y0 + 26, + REG_Y0 + 27, + REG_Y0 + 28, + REG_Y0 + 29, + REG_Y0 + 30, + REG_Y0 + 31: + return YyrEvex + + case REG_Z0 + 0, + REG_Z0 + 1, + REG_Z0 + 2, + REG_Z0 + 3, + REG_Z0 + 4, + REG_Z0 + 5, + REG_Z0 + 6, + REG_Z0 + 7: + return Yzr + + case REG_Z0 + 8, + REG_Z0 + 9, + REG_Z0 + 10, + REG_Z0 + 11, + REG_Z0 + 12, + REG_Z0 + 13, + REG_Z0 + 14, + REG_Z0 + 15, + REG_Z0 + 16, + REG_Z0 + 17, + REG_Z0 + 18, + REG_Z0 + 19, + REG_Z0 + 20, + REG_Z0 + 21, + REG_Z0 + 22, + REG_Z0 + 23, + REG_Z0 + 24, + REG_Z0 + 25, + REG_Z0 + 26, + REG_Z0 + 27, + REG_Z0 + 28, + REG_Z0 + 29, + REG_Z0 + 30, + REG_Z0 + 31: + if ctxt.Arch.Family == sys.I386 { + return Yxxx + } + return Yzr + + case REG_K0: + return Yk0 + + case REG_K0 + 1, + REG_K0 + 2, + REG_K0 + 3, + REG_K0 + 4, + REG_K0 + 5, + REG_K0 + 6, + REG_K0 + 7: + return Yknot0 + + case REG_CS: + return Ycs + case REG_SS: + return Yss + case REG_DS: + return Yds + case REG_ES: + return Yes + case REG_FS: + return Yfs + case REG_GS: + return Ygs + case REG_TLS: + return Ytls + + case REG_GDTR: + return Ygdtr + case REG_IDTR: + return Yidtr + case REG_LDTR: + return Yldtr + case REG_MSW: + return Ymsw + case REG_TASK: + return Ytask + + case REG_CR + 0: + return Ycr0 + case REG_CR + 1: + return Ycr1 + case REG_CR + 2: + return Ycr2 + case REG_CR + 3: + return Ycr3 + case REG_CR + 4: + return Ycr4 + case REG_CR + 5: + return Ycr5 + case REG_CR + 6: + return Ycr6 + case REG_CR + 7: + return Ycr7 + case REG_CR + 8: + return Ycr8 + + case REG_DR + 0: + return Ydr0 + case REG_DR + 1: + return Ydr1 + case REG_DR + 2: + return Ydr2 + case REG_DR + 3: + return Ydr3 + case REG_DR + 4: + return Ydr4 + case REG_DR + 5: + return Ydr5 + case REG_DR + 6: + return Ydr6 + case REG_DR + 7: + return Ydr7 + + case REG_TR + 0: + return Ytr0 + case REG_TR + 1: + return Ytr1 + case REG_TR + 2: + return Ytr2 + case REG_TR + 3: + return Ytr3 + case REG_TR + 4: + return Ytr4 + case REG_TR + 5: + return Ytr5 + case REG_TR + 6: + return Ytr6 + case REG_TR + 7: + return Ytr7 + } + + return Yxxx +} + +// AsmBuf is a simple buffer to assemble variable-length x86 instructions into +// and hold assembly state. +type AsmBuf struct { + buf [100]byte + off int + rexflag int + vexflag bool // Per inst: true for VEX-encoded + evexflag bool // Per inst: true for EVEX-encoded + rep bool + repn bool + lock bool + + evex evexBits // Initialized when evexflag is true +} + +// Put1 appends one byte to the end of the buffer. +func (ab *AsmBuf) Put1(x byte) { + ab.buf[ab.off] = x + ab.off++ +} + +// Put2 appends two bytes to the end of the buffer. +func (ab *AsmBuf) Put2(x, y byte) { + ab.buf[ab.off+0] = x + ab.buf[ab.off+1] = y + ab.off += 2 +} + +// Put3 appends three bytes to the end of the buffer. +func (ab *AsmBuf) Put3(x, y, z byte) { + ab.buf[ab.off+0] = x + ab.buf[ab.off+1] = y + ab.buf[ab.off+2] = z + ab.off += 3 +} + +// Put4 appends four bytes to the end of the buffer. +func (ab *AsmBuf) Put4(x, y, z, w byte) { + ab.buf[ab.off+0] = x + ab.buf[ab.off+1] = y + ab.buf[ab.off+2] = z + ab.buf[ab.off+3] = w + ab.off += 4 +} + +// PutInt16 writes v into the buffer using little-endian encoding. +func (ab *AsmBuf) PutInt16(v int16) { + ab.buf[ab.off+0] = byte(v) + ab.buf[ab.off+1] = byte(v >> 8) + ab.off += 2 +} + +// PutInt32 writes v into the buffer using little-endian encoding. +func (ab *AsmBuf) PutInt32(v int32) { + ab.buf[ab.off+0] = byte(v) + ab.buf[ab.off+1] = byte(v >> 8) + ab.buf[ab.off+2] = byte(v >> 16) + ab.buf[ab.off+3] = byte(v >> 24) + ab.off += 4 +} + +// PutInt64 writes v into the buffer using little-endian encoding. +func (ab *AsmBuf) PutInt64(v int64) { + ab.buf[ab.off+0] = byte(v) + ab.buf[ab.off+1] = byte(v >> 8) + ab.buf[ab.off+2] = byte(v >> 16) + ab.buf[ab.off+3] = byte(v >> 24) + ab.buf[ab.off+4] = byte(v >> 32) + ab.buf[ab.off+5] = byte(v >> 40) + ab.buf[ab.off+6] = byte(v >> 48) + ab.buf[ab.off+7] = byte(v >> 56) + ab.off += 8 +} + +// Put copies b into the buffer. +func (ab *AsmBuf) Put(b []byte) { + copy(ab.buf[ab.off:], b) + ab.off += len(b) +} + +// PutOpBytesLit writes zero terminated sequence of bytes from op, +// starting at specified offset (e.g. z counter value). +// Trailing 0 is not written. +// +// Intended to be used for literal Z cases. +// Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r). +func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) { + for int(op[offset]) != 0 { + ab.Put1(byte(op[offset])) + offset++ + } +} + +// Insert inserts b at offset i. +func (ab *AsmBuf) Insert(i int, b byte) { + ab.off++ + copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1]) + ab.buf[i] = b +} + +// Last returns the byte at the end of the buffer. +func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] } + +// Len returns the length of the buffer. +func (ab *AsmBuf) Len() int { return ab.off } + +// Bytes returns the contents of the buffer. +func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] } + +// Reset empties the buffer. +func (ab *AsmBuf) Reset() { ab.off = 0 } + +// At returns the byte at offset i. +func (ab *AsmBuf) At(i int) byte { return ab.buf[i] } + +// asmidx emits SIB byte. +func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) { + var i int + + // X/Y index register is used in VSIB. + switch index { + default: + goto bad + + case REG_NONE: + i = 4 << 3 + goto bas + + case REG_R8, + REG_R9, + REG_R10, + REG_R11, + REG_R12, + REG_R13, + REG_R14, + REG_R15, + REG_X8, + REG_X9, + REG_X10, + REG_X11, + REG_X12, + REG_X13, + REG_X14, + REG_X15, + REG_X16, + REG_X17, + REG_X18, + REG_X19, + REG_X20, + REG_X21, + REG_X22, + REG_X23, + REG_X24, + REG_X25, + REG_X26, + REG_X27, + REG_X28, + REG_X29, + REG_X30, + REG_X31, + REG_Y8, + REG_Y9, + REG_Y10, + REG_Y11, + REG_Y12, + REG_Y13, + REG_Y14, + REG_Y15, + REG_Y16, + REG_Y17, + REG_Y18, + REG_Y19, + REG_Y20, + REG_Y21, + REG_Y22, + REG_Y23, + REG_Y24, + REG_Y25, + REG_Y26, + REG_Y27, + REG_Y28, + REG_Y29, + REG_Y30, + REG_Y31, + REG_Z8, + REG_Z9, + REG_Z10, + REG_Z11, + REG_Z12, + REG_Z13, + REG_Z14, + REG_Z15, + REG_Z16, + REG_Z17, + REG_Z18, + REG_Z19, + REG_Z20, + REG_Z21, + REG_Z22, + REG_Z23, + REG_Z24, + REG_Z25, + REG_Z26, + REG_Z27, + REG_Z28, + REG_Z29, + REG_Z30, + REG_Z31: + if ctxt.Arch.Family == sys.I386 { + goto bad + } + fallthrough + + case REG_AX, + REG_CX, + REG_DX, + REG_BX, + REG_BP, + REG_SI, + REG_DI, + REG_X0, + REG_X1, + REG_X2, + REG_X3, + REG_X4, + REG_X5, + REG_X6, + REG_X7, + REG_Y0, + REG_Y1, + REG_Y2, + REG_Y3, + REG_Y4, + REG_Y5, + REG_Y6, + REG_Y7, + REG_Z0, + REG_Z1, + REG_Z2, + REG_Z3, + REG_Z4, + REG_Z5, + REG_Z6, + REG_Z7: + i = reg[index] << 3 + } + + switch scale { + default: + goto bad + + case 1: + break + + case 2: + i |= 1 << 6 + + case 4: + i |= 2 << 6 + + case 8: + i |= 3 << 6 + } + +bas: + switch base { + default: + goto bad + + case REG_NONE: // must be mod=00 + i |= 5 + + case REG_R8, + REG_R9, + REG_R10, + REG_R11, + REG_R12, + REG_R13, + REG_R14, + REG_R15: + if ctxt.Arch.Family == sys.I386 { + goto bad + } + fallthrough + + case REG_AX, + REG_CX, + REG_DX, + REG_BX, + REG_SP, + REG_BP, + REG_SI, + REG_DI: + i |= reg[base] + } + + ab.Put1(byte(i)) + return + +bad: + ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base) + ab.Put1(0) +} + +func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) { + var rel obj.Reloc + + v := vaddr(ctxt, p, a, &rel) + if rel.Siz != 0 { + if rel.Siz != 4 { + ctxt.Diag("bad reloc") + } + r := obj.Addrel(cursym) + *r = rel + r.Off = int32(p.Pc + int64(ab.Len())) + } + + ab.PutInt32(int32(v)) +} + +func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 { + if r != nil { + *r = obj.Reloc{} + } + + switch a.Name { + case obj.NAME_STATIC, + obj.NAME_GOTREF, + obj.NAME_EXTERN: + s := a.Sym + if r == nil { + ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) + log.Fatalf("reloc") + } + + if a.Name == obj.NAME_GOTREF { + r.Siz = 4 + r.Type = objabi.R_GOTPCREL + } else if useAbs(ctxt, s) { + r.Siz = 4 + r.Type = objabi.R_ADDR + } else { + r.Siz = 4 + r.Type = objabi.R_PCREL + } + + r.Off = -1 // caller must fill in + r.Sym = s + r.Add = a.Offset + + return 0 + } + + if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS { + if r == nil { + ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) + log.Fatalf("reloc") + } + + if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin { + r.Type = objabi.R_TLS_LE + r.Siz = 4 + r.Off = -1 // caller must fill in + r.Add = a.Offset + } + return 0 + } + + return a.Offset +} + +func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) { + var base int + var rel obj.Reloc + + rex &= 0x40 | Rxr + if a.Offset != int64(int32(a.Offset)) { + // The rules are slightly different for 386 and AMD64, + // mostly for historical reasons. We may unify them later, + // but it must be discussed beforehand. + // + // For 64bit mode only LEAL is allowed to overflow. + // It's how https://golang.org/cl/59630 made it. + // crypto/sha1/sha1block_amd64.s depends on this feature. + // + // For 32bit mode rules are more permissive. + // If offset fits uint32, it's permitted. + // This is allowed for assembly that wants to use 32-bit hex + // constants, e.g. LEAL 0x99999999(AX), AX. + overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) || + (ctxt.Arch.Family != sys.AMD64 && + int64(uint32(a.Offset)) == a.Offset && + ab.rexflag&Rxw == 0) + if !overflowOK { + ctxt.Diag("offset too large in %s", p) + } + } + v := int32(a.Offset) + rel.Siz = 0 + + switch a.Type { + case obj.TYPE_ADDR: + if a.Name == obj.NAME_NONE { + ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE") + } + if a.Index == REG_TLS { + ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS") + } + goto bad + + case obj.TYPE_REG: + const regFirst = REG_AL + const regLast = REG_Z31 + if a.Reg < regFirst || regLast < a.Reg { + goto bad + } + if v != 0 { + goto bad + } + ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3)) + ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex + return + } + + if a.Type != obj.TYPE_MEM { + goto bad + } + + if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) { + base := int(a.Reg) + switch a.Name { + case obj.NAME_EXTERN, + obj.NAME_GOTREF, + obj.NAME_STATIC: + if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 { + goto bad + } + if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { + // The base register has already been set. It holds the PC + // of this instruction returned by a PC-reading thunk. + // See obj6.go:rewriteToPcrel. + } else { + base = REG_NONE + } + v = int32(vaddr(ctxt, p, a, &rel)) + + case obj.NAME_AUTO, + obj.NAME_PARAM: + base = REG_SP + } + + ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex + if base == REG_NONE { + ab.Put1(byte(0<<6 | 4<<0 | r<<3)) + ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) + goto putrelv + } + + if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { + ab.Put1(byte(0<<6 | 4<<0 | r<<3)) + ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) + return + } + + if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { + ab.Put1(byte(1<<6 | 4<<0 | r<<3)) + ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) + ab.Put1(disp8) + return + } + + ab.Put1(byte(2<<6 | 4<<0 | r<<3)) + ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) + goto putrelv + } + + base = int(a.Reg) + switch a.Name { + case obj.NAME_STATIC, + obj.NAME_GOTREF, + obj.NAME_EXTERN: + if a.Sym == nil { + ctxt.Diag("bad addr: %v", p) + } + if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { + // The base register has already been set. It holds the PC + // of this instruction returned by a PC-reading thunk. + // See obj6.go:rewriteToPcrel. + } else { + base = REG_NONE + } + v = int32(vaddr(ctxt, p, a, &rel)) + + case obj.NAME_AUTO, + obj.NAME_PARAM: + base = REG_SP + } + + if base == REG_TLS { + v = int32(vaddr(ctxt, p, a, &rel)) + } + + ab.rexflag |= regrex[base]&Rxb | rex + if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS { + if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 { + if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) { + ctxt.Diag("%v has offset against gotref", p) + } + ab.Put1(byte(0<<6 | 5<<0 | r<<3)) + goto putrelv + } + + // temporary + ab.Put2( + byte(0<<6|4<<0|r<<3), // sib present + 0<<6|4<<3|5<<0, // DS:d32 + ) + goto putrelv + } + + if base == REG_SP || base == REG_R12 { + if v == 0 { + ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) + ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) + return + } + + if disp8, ok := toDisp8(v, p, ab); ok { + ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3)) + ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) + ab.Put1(disp8) + return + } + + ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) + ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) + goto putrelv + } + + if REG_AX <= base && base <= REG_R15 { + if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid && + ctxt.Headtype != objabi.Hwindows { + rel = obj.Reloc{} + rel.Type = objabi.R_TLS_LE + rel.Siz = 4 + rel.Sym = nil + rel.Add = int64(v) + v = 0 + } + + if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { + ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) + return + } + + if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { + ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8) + return + } + + ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) + goto putrelv + } + + goto bad + +putrelv: + if rel.Siz != 0 { + if rel.Siz != 4 { + ctxt.Diag("bad rel") + goto bad + } + + r := obj.Addrel(cursym) + *r = rel + r.Off = int32(p.Pc + int64(ab.Len())) + } + + ab.PutInt32(v) + return + +bad: + ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a)) +} + +func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) { + ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0) +} + +func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) { + ab.asmandsz(ctxt, cursym, p, a, o, 0, 0) +} + +func bytereg(a *obj.Addr, t *uint8) { + if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) { + a.Reg += REG_AL - REG_AX + *t = 0 + } +} + +func unbytereg(a *obj.Addr, t *uint8) { + if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) { + a.Reg += REG_AX - REG_AL + *t = 0 + } +} + +const ( + movLit uint8 = iota // Like Zlit + movRegMem + movMemReg + movRegMem2op + movMemReg2op + movFullPtr // Load full pointer, trash heap (unsupported) + movDoubleShift + movTLSReg +) + +var ymovtab = []movtab{ + // push + {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}}, + {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}}, + {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}}, + {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}}, + {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, + {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, + {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, + {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, + {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}}, + {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}}, + {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}}, + {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}}, + {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}}, + {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}}, + + // pop + {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}}, + {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}}, + {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}}, + {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, + {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, + {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, + {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, + {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}}, + {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}}, + {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}}, + {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}}, + {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}}, + + // mov seg + {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}}, + {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}}, + {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}}, + {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}}, + {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}}, + {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}}, + {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}}, + {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}}, + {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}}, + {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}}, + {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}}, + {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}}, + + // mov cr + {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, + {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, + {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, + {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, + {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, + {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, + {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, + {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, + {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, + {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, + {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, + {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, + {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, + {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, + {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, + {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, + {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, + {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, + {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, + {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, + + // mov dr + {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, + {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, + {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, + {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, + {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}}, + {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}}, + {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, + {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, + {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, + {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, + {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, + {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, + {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}}, + {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}}, + {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, + {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, + + // mov tr + {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}}, + {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}}, + {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}}, + {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}}, + + // lgdt, sgdt, lidt, sidt + {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, + {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, + {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, + {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, + {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, + {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, + {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, + {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, + + // lldt, sldt + {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}}, + {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}}, + + // lmsw, smsw + {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}}, + {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}}, + + // ltr, str + {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}}, + {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}}, + + /* load full pointer - unsupported + {AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}}, + {AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}}, + */ + + // double shift + {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, + {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, + {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, + {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, + {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, + {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, + {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, + {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, + {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, + {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, + {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, + {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, + {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, + {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, + {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, + {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, + {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, + {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, + + // load TLS base + {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, + {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, + {0, 0, 0, 0, 0, [4]uint8{}}, +} + +func isax(a *obj.Addr) bool { + switch a.Reg { + case REG_AX, REG_AL, REG_AH: + return true + } + + return a.Index == REG_AX +} + +func subreg(p *obj.Prog, from int, to int) { + if false { /* debug['Q'] */ + fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to)) + } + + if int(p.From.Reg) == from { + p.From.Reg = int16(to) + p.Ft = 0 + } + + if int(p.To.Reg) == from { + p.To.Reg = int16(to) + p.Tt = 0 + } + + if int(p.From.Index) == from { + p.From.Index = int16(to) + p.Ft = 0 + } + + if int(p.To.Index) == from { + p.To.Index = int16(to) + p.Tt = 0 + } + + if false { /* debug['Q'] */ + fmt.Printf("%v\n", p) + } +} + +func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int { + switch op { + case Pm, Pe, Pf2, Pf3: + if osize != 1 { + if op != Pm { + ab.Put1(byte(op)) + } + ab.Put1(Pm) + z++ + op = int(o.op[z]) + break + } + fallthrough + + default: + if ab.Len() == 0 || ab.Last() != Pm { + ab.Put1(Pm) + } + } + + ab.Put1(byte(op)) + return z +} + +var bpduff1 = []byte{ + 0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP) + 0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP +} + +var bpduff2 = []byte{ + 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP +} + +// asmevex emits EVEX pregis and opcode byte. +// In addition to asmvex r/m, vvvv and reg fields also requires optional +// K-masking register. +// +// Expects asmbuf.evex to be properly initialized. +func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) { + ab.evexflag = true + evex := ab.evex + + rexR := byte(1) + evexR := byte(1) + rexX := byte(1) + rexB := byte(1) + if r != nil { + if regrex[r.Reg]&Rxr != 0 { + rexR = 0 // "ModR/M.reg" selector 4th bit. + } + if regrex[r.Reg]&RxrEvex != 0 { + evexR = 0 // "ModR/M.reg" selector 5th bit. + } + } + if rm != nil { + if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 { + rexX = 0 + } else if regrex[rm.Index]&Rxx != 0 { + rexX = 0 + } + if regrex[rm.Reg]&Rxb != 0 { + rexB = 0 + } + } + // P0 = [R][X][B][R'][00][mm] + p0 := (rexR << 7) | + (rexX << 6) | + (rexB << 5) | + (evexR << 4) | + (0 << 2) | + (evex.M() << 0) + + vexV := byte(0) + if v != nil { + // 4bit-wide reg index. + vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF + } + vexV ^= 0x0F + // P1 = [W][vvvv][1][pp] + p1 := (evex.W() << 7) | + (vexV << 3) | + (1 << 2) | + (evex.P() << 0) + + suffix := evexSuffixMap[p.Scond] + evexZ := byte(0) + evexLL := evex.L() + evexB := byte(0) + evexV := byte(1) + evexA := byte(0) + if suffix.zeroing { + if !evex.ZeroingEnabled() { + ctxt.Diag("unsupported zeroing: %v", p) + } + if k == nil { + // When you request zeroing you must specify a mask register. + // See issue 57952. + ctxt.Diag("mask register must be specified for .Z instructions: %v", p) + } else if k.Reg == REG_K0 { + // The mask register must not be K0. That restriction is already + // handled by the Yknot0 restriction in the opcode tables, so we + // won't ever reach here. But put something sensible here just in case. + ctxt.Diag("mask register must not be K0 for .Z instructions: %v", p) + } + evexZ = 1 + } + switch { + case suffix.rounding != rcUnset: + if rm != nil && rm.Type == obj.TYPE_MEM { + ctxt.Diag("illegal rounding with memory argument: %v", p) + } else if !evex.RoundingEnabled() { + ctxt.Diag("unsupported rounding: %v", p) + } + evexB = 1 + evexLL = suffix.rounding + case suffix.broadcast: + if rm == nil || rm.Type != obj.TYPE_MEM { + ctxt.Diag("illegal broadcast without memory argument: %v", p) + } else if !evex.BroadcastEnabled() { + ctxt.Diag("unsupported broadcast: %v", p) + } + evexB = 1 + case suffix.sae: + if rm != nil && rm.Type == obj.TYPE_MEM { + ctxt.Diag("illegal SAE with memory argument: %v", p) + } else if !evex.SaeEnabled() { + ctxt.Diag("unsupported SAE: %v", p) + } + evexB = 1 + } + if rm != nil && regrex[rm.Index]&RxrEvex != 0 { + evexV = 0 + } else if v != nil && regrex[v.Reg]&RxrEvex != 0 { + evexV = 0 // VSR selector 5th bit. + } + if k != nil { + evexA = byte(reg[k.Reg]) + } + // P2 = [z][L'L][b][V'][aaa] + p2 := (evexZ << 7) | + (evexLL << 5) | + (evexB << 4) | + (evexV << 3) | + (evexA << 0) + + const evexEscapeByte = 0x62 + ab.Put4(evexEscapeByte, p0, p1, p2) + ab.Put1(evex.opcode) +} + +// Emit VEX prefix and opcode byte. +// The three addresses are the r/m, vvvv, and reg fields. +// The reg and rm arguments appear in the same order as the +// arguments to asmand, which typically follows the call to asmvex. +// The final two arguments are the VEX prefix (see encoding above) +// and the opcode byte. +// For details about vex prefix see: +// https://en.wikipedia.org/wiki/VEX_prefix#Technical_description +func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) { + ab.vexflag = true + rexR := 0 + if r != nil { + rexR = regrex[r.Reg] & Rxr + } + rexB := 0 + rexX := 0 + if rm != nil { + rexB = regrex[rm.Reg] & Rxb + rexX = regrex[rm.Index] & Rxx + } + vexM := (vex >> 3) & 0x7 + vexWLP := vex & 0x87 + vexV := byte(0) + if v != nil { + vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF + } + vexV ^= 0xF + if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 { + // Can use 2-byte encoding. + ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP) + } else { + // Must use 3-byte encoding. + ab.Put3(0xc4, + (byte(rexR|rexX|rexB)<<5)^0xE0|vexM, + vexV<<3|vexWLP, + ) + } + ab.Put1(opcode) +} + +// regIndex returns register index that fits in 5 bits. +// +// R : 3 bit | legacy instructions | N/A +// [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr +// EVEX.R : 1 bit | EVEX extension bit | RxrEvex +// +// Examples: +// +// REG_Z30 => 30 +// REG_X15 => 15 +// REG_R9 => 9 +// REG_AX => 0 +func regIndex(r int16) int { + lower3bits := reg[r] + high4bit := regrex[r] & Rxr << 1 + high5bit := regrex[r] & RxrEvex << 0 + return lower3bits | high4bit | high5bit +} + +// avx2gatherValid reports whether p satisfies AVX2 gather constraints. +// Reports errors via ctxt. +func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool { + // If any pair of the index, mask, or destination registers + // are the same, illegal instruction trap (#UD) is triggered. + index := regIndex(p.GetFrom3().Index) + mask := regIndex(p.From.Reg) + dest := regIndex(p.To.Reg) + if dest == mask || dest == index || mask == index { + ctxt.Diag("mask, index, and destination registers should be distinct: %v", p) + return false + } + + return true +} + +// avx512gatherValid reports whether p satisfies AVX512 gather constraints. +// Reports errors via ctxt. +func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool { + // Illegal instruction trap (#UD) is triggered if the destination vector + // register is the same as index vector in VSIB. + index := regIndex(p.From.Index) + dest := regIndex(p.To.Reg) + if dest == index { + ctxt.Diag("index and destination registers should be distinct: %v", p) + return false + } + + return true +} + +func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { + o := opindex[p.As&obj.AMask] + + if o == nil { + ctxt.Diag("asmins: missing op %v", p) + return + } + + if pre := prefixof(ctxt, &p.From); pre != 0 { + ab.Put1(byte(pre)) + } + if pre := prefixof(ctxt, &p.To); pre != 0 { + ab.Put1(byte(pre)) + } + + // Checks to warn about instruction/arguments combinations that + // will unconditionally trigger illegal instruction trap (#UD). + switch p.As { + case AVGATHERDPD, + AVGATHERQPD, + AVGATHERDPS, + AVGATHERQPS, + AVPGATHERDD, + AVPGATHERQD, + AVPGATHERDQ, + AVPGATHERQQ: + if p.GetFrom3() == nil { + // gathers need a 3rd arg. See issue 58822. + ctxt.Diag("need a third arg for gather instruction: %v", p) + return + } + // AVX512 gather requires explicit K mask. + if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 { + if !avx512gatherValid(ctxt, p) { + return + } + } else { + if !avx2gatherValid(ctxt, p) { + return + } + } + } + + if p.Ft == 0 { + p.Ft = uint8(oclass(ctxt, p, &p.From)) + } + if p.Tt == 0 { + p.Tt = uint8(oclass(ctxt, p, &p.To)) + } + + ft := int(p.Ft) * Ymax + var f3t int + tt := int(p.Tt) * Ymax + + xo := obj.Bool2int(o.op[0] == 0x0f) + z := 0 + var a *obj.Addr + var l int + var op int + var q *obj.Prog + var r *obj.Reloc + var rel obj.Reloc + var v int64 + + args := make([]int, 0, argListMax) + if ft != Ynone*Ymax { + args = append(args, ft) + } + for i := range p.RestArgs { + args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax) + } + if tt != Ynone*Ymax { + args = append(args, tt) + } + + for _, yt := range o.ytab { + // ytab matching is purely args-based, + // but AVX512 suffixes like "Z" or "RU_SAE" will + // add EVEX-only filter that will reject non-EVEX matches. + // + // Consider "VADDPD.BCST 2032(DX), X0, X0". + // Without this rule, operands will lead to VEX-encoded form + // and produce "c5b15813" encoding. + if !yt.match(args) { + // "xo" is always zero for VEX/EVEX encoded insts. + z += int(yt.zoffset) + xo + } else { + if p.Scond != 0 && !evexZcase(yt.zcase) { + // Do not signal error and continue to search + // for matching EVEX-encoded form. + z += int(yt.zoffset) + continue + } + + switch o.prefix { + case Px1: // first option valid only in 32-bit mode + if ctxt.Arch.Family == sys.AMD64 && z == 0 { + z += int(yt.zoffset) + xo + continue + } + case Pq: // 16 bit escape and opcode escape + ab.Put2(Pe, Pm) + + case Pq3: // 16 bit escape and opcode escape + REX.W + ab.rexflag |= Pw + ab.Put2(Pe, Pm) + + case Pq4: // 66 0F 38 + ab.Put3(0x66, 0x0F, 0x38) + + case Pq4w: // 66 0F 38 + REX.W + ab.rexflag |= Pw + ab.Put3(0x66, 0x0F, 0x38) + + case Pq5: // F3 0F 38 + ab.Put3(0xF3, 0x0F, 0x38) + + case Pq5w: // F3 0F 38 + REX.W + ab.rexflag |= Pw + ab.Put3(0xF3, 0x0F, 0x38) + + case Pf2, // xmm opcode escape + Pf3: + ab.Put2(o.prefix, Pm) + + case Pef3: + ab.Put3(Pe, Pf3, Pm) + + case Pfw: // xmm opcode escape + REX.W + ab.rexflag |= Pw + ab.Put2(Pf3, Pm) + + case Pm: // opcode escape + ab.Put1(Pm) + + case Pe: // 16 bit escape + ab.Put1(Pe) + + case Pw: // 64-bit escape + if ctxt.Arch.Family != sys.AMD64 { + ctxt.Diag("asmins: illegal 64: %v", p) + } + ab.rexflag |= Pw + + case Pw8: // 64-bit escape if z >= 8 + if z >= 8 { + if ctxt.Arch.Family != sys.AMD64 { + ctxt.Diag("asmins: illegal 64: %v", p) + } + ab.rexflag |= Pw + } + + case Pb: // botch + if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) { + goto bad + } + // NOTE(rsc): This is probably safe to do always, + // but when enabled it chooses different encodings + // than the old cmd/internal/obj/i386 code did, + // which breaks our "same bits out" checks. + // In particular, CMPB AX, $0 encodes as 80 f8 00 + // in the original obj/i386, and it would encode + // (using a valid, shorter form) as 3c 00 if we enabled + // the call to bytereg here. + if ctxt.Arch.Family == sys.AMD64 { + bytereg(&p.From, &p.Ft) + bytereg(&p.To, &p.Tt) + } + + case P32: // 32 bit but illegal if 64-bit mode + if ctxt.Arch.Family == sys.AMD64 { + ctxt.Diag("asmins: illegal in 64-bit mode: %v", p) + } + + case Py: // 64-bit only, no prefix + if ctxt.Arch.Family != sys.AMD64 { + ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) + } + + case Py1: // 64-bit only if z < 1, no prefix + if z < 1 && ctxt.Arch.Family != sys.AMD64 { + ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) + } + + case Py3: // 64-bit only if z < 3, no prefix + if z < 3 && ctxt.Arch.Family != sys.AMD64 { + ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) + } + } + + if z >= len(o.op) { + log.Fatalf("asmins bad table %v", p) + } + op = int(o.op[z]) + if op == 0x0f { + ab.Put1(byte(op)) + z++ + op = int(o.op[z]) + } + + switch yt.zcase { + default: + ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p) + return + + case Zpseudo: + break + + case Zlit: + ab.PutOpBytesLit(z, &o.op) + + case Zlitr_m: + ab.PutOpBytesLit(z, &o.op) + ab.asmand(ctxt, cursym, p, &p.To, &p.From) + + case Zlitm_r: + ab.PutOpBytesLit(z, &o.op) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Zlit_m_r: + ab.PutOpBytesLit(z, &o.op) + ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) + + case Zmb_r: + bytereg(&p.From, &p.Ft) + fallthrough + + case Zm_r: + ab.Put1(byte(op)) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Z_m_r: + ab.Put1(byte(op)) + ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) + + case Zm2_r: + ab.Put2(byte(op), o.op[z+1]) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Zm_r_xm: + ab.mediaop(ctxt, o, op, int(yt.zoffset), z) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Zm_r_xm_nr: + ab.rexflag = 0 + ab.mediaop(ctxt, o, op, int(yt.zoffset), z) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Zm_r_i_xm: + ab.mediaop(ctxt, o, op, int(yt.zoffset), z) + ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3()) + ab.Put1(byte(p.To.Offset)) + + case Zibm_r, Zibr_m: + ab.PutOpBytesLit(z, &o.op) + if yt.zcase == Zibr_m { + ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) + } else { + ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) + } + switch { + default: + ab.Put1(byte(p.From.Offset)) + case yt.args[0] == Yi32 && o.prefix == Pe: + ab.PutInt16(int16(p.From.Offset)) + case yt.args[0] == Yi32: + ab.PutInt32(int32(p.From.Offset)) + } + + case Zaut_r: + ab.Put1(0x8d) // leal + if p.From.Type != obj.TYPE_ADDR { + ctxt.Diag("asmins: Zaut sb type ADDR") + } + p.From.Type = obj.TYPE_MEM + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + p.From.Type = obj.TYPE_ADDR + + case Zm_o: + ab.Put1(byte(op)) + ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) + + case Zr_m: + ab.Put1(byte(op)) + ab.asmand(ctxt, cursym, p, &p.To, &p.From) + + case Zvex: + ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) + + case Zvex_rm_v_r: + ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Zvex_rm_v_ro: + ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) + ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) + + case Zvex_i_rm_vo: + ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) + ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2])) + ab.Put1(byte(p.From.Offset)) + + case Zvex_i_r_v: + ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) + regnum := byte(0x7) + if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 { + regnum &= byte(p.GetFrom3().Reg - REG_X0) + } else { + regnum &= byte(p.GetFrom3().Reg - REG_Y0) + } + ab.Put1(o.op[z+2] | regnum) + ab.Put1(byte(p.From.Offset)) + + case Zvex_i_rm_v_r: + imm, from, from3, to := unpackOps4(p) + ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) + ab.asmand(ctxt, cursym, p, from, to) + ab.Put1(byte(imm.Offset)) + + case Zvex_i_rm_r: + ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1]) + ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) + ab.Put1(byte(p.From.Offset)) + + case Zvex_v_rm_r: + ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1]) + ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) + + case Zvex_r_v_rm: + ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1]) + ab.asmand(ctxt, cursym, p, &p.To, &p.From) + + case Zvex_rm_r_vo: + ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1]) + ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) + + case Zvex_i_r_rm: + ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1]) + ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) + ab.Put1(byte(p.From.Offset)) + + case Zvex_hr_rm_v_r: + hr, from, from3, to := unpackOps4(p) + ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) + ab.asmand(ctxt, cursym, p, from, to) + ab.Put1(byte(regIndex(hr.Reg) << 4)) + + case Zevex_k_rmo: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From) + ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3])) + + case Zevex_i_rm_vo: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil) + ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3])) + ab.Put1(byte(p.From.Offset)) + + case Zevex_i_rm_k_vo: + imm, from, kmask, to := unpackOps4(p) + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, from, to, nil, kmask) + ab.asmando(ctxt, cursym, p, from, int(o.op[z+3])) + ab.Put1(byte(imm.Offset)) + + case Zevex_i_r_rm: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil) + ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) + ab.Put1(byte(p.From.Offset)) + + case Zevex_i_r_k_rm: + imm, from, kmask, to := unpackOps4(p) + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, to, nil, from, kmask) + ab.asmand(ctxt, cursym, p, to, from) + ab.Put1(byte(imm.Offset)) + + case Zevex_i_rm_r: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil) + ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) + ab.Put1(byte(p.From.Offset)) + + case Zevex_i_rm_k_r: + imm, from, kmask, to := unpackOps4(p) + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, from, nil, to, kmask) + ab.asmand(ctxt, cursym, p, from, to) + ab.Put1(byte(imm.Offset)) + + case Zevex_i_rm_v_r: + imm, from, from3, to := unpackOps4(p) + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, from, from3, to, nil) + ab.asmand(ctxt, cursym, p, from, to) + ab.Put1(byte(imm.Offset)) + + case Zevex_i_rm_v_k_r: + imm, from, from3, kmask, to := unpackOps5(p) + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, from, from3, to, kmask) + ab.asmand(ctxt, cursym, p, from, to) + ab.Put1(byte(imm.Offset)) + + case Zevex_r_v_rm: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil) + ab.asmand(ctxt, cursym, p, &p.To, &p.From) + + case Zevex_rm_v_r: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Zevex_rm_k_r: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3()) + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case Zevex_r_k_rm: + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3()) + ab.asmand(ctxt, cursym, p, &p.To, &p.From) + + case Zevex_rm_v_k_r: + from, from3, kmask, to := unpackOps4(p) + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, from, from3, to, kmask) + ab.asmand(ctxt, cursym, p, from, to) + + case Zevex_r_v_k_rm: + from, from3, kmask, to := unpackOps4(p) + ab.evex = newEVEXBits(z, &o.op) + ab.asmevex(ctxt, p, to, from3, from, kmask) + ab.asmand(ctxt, cursym, p, to, from) + + case Zr_m_xm: + ab.mediaop(ctxt, o, op, int(yt.zoffset), z) + ab.asmand(ctxt, cursym, p, &p.To, &p.From) + + case Zr_m_xm_nr: + ab.rexflag = 0 + ab.mediaop(ctxt, o, op, int(yt.zoffset), z) + ab.asmand(ctxt, cursym, p, &p.To, &p.From) + + case Zo_m: + ab.Put1(byte(op)) + ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) + + case Zcallindreg: + r = obj.Addrel(cursym) + r.Off = int32(p.Pc) + r.Type = objabi.R_CALLIND + r.Siz = 0 + fallthrough + + case Zo_m64: + ab.Put1(byte(op)) + ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1) + + case Zm_ibo: + ab.Put1(byte(op)) + ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) + ab.Put1(byte(vaddr(ctxt, p, &p.To, nil))) + + case Zibo_m: + ab.Put1(byte(op)) + ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) + ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) + + case Zibo_m_xm: + z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z) + ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) + ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) + + case Z_ib, Zib_: + if yt.zcase == Zib_ { + a = &p.From + } else { + a = &p.To + } + ab.Put1(byte(op)) + if p.As == AXABORT { + ab.Put1(o.op[z+1]) + } + ab.Put1(byte(vaddr(ctxt, p, a, nil))) + + case Zib_rp: + ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) + ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil))) + + case Zil_rp: + ab.rexflag |= regrex[p.To.Reg] & Rxb + ab.Put1(byte(op + reg[p.To.Reg])) + if o.prefix == Pe { + v = vaddr(ctxt, p, &p.From, nil) + ab.PutInt16(int16(v)) + } else { + ab.relput4(ctxt, cursym, p, &p.From) + } + + case Zo_iw: + ab.Put1(byte(op)) + if p.From.Type != obj.TYPE_NONE { + v = vaddr(ctxt, p, &p.From, nil) + ab.PutInt16(int16(v)) + } + + case Ziq_rp: + v = vaddr(ctxt, p, &p.From, &rel) + l = int(v >> 32) + if l == 0 && rel.Siz != 8 { + ab.rexflag &^= (0x40 | Rxw) + + ab.rexflag |= regrex[p.To.Reg] & Rxb + ab.Put1(byte(0xb8 + reg[p.To.Reg])) + if rel.Type != 0 { + r = obj.Addrel(cursym) + *r = rel + r.Off = int32(p.Pc + int64(ab.Len())) + } + + ab.PutInt32(int32(v)) + } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend + ab.Put1(0xc7) + ab.asmando(ctxt, cursym, p, &p.To, 0) + + ab.PutInt32(int32(v)) // need all 8 + } else { + ab.rexflag |= regrex[p.To.Reg] & Rxb + ab.Put1(byte(op + reg[p.To.Reg])) + if rel.Type != 0 { + r = obj.Addrel(cursym) + *r = rel + r.Off = int32(p.Pc + int64(ab.Len())) + } + + ab.PutInt64(v) + } + + case Zib_rr: + ab.Put1(byte(op)) + ab.asmand(ctxt, cursym, p, &p.To, &p.To) + ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) + + case Z_il, Zil_: + if yt.zcase == Zil_ { + a = &p.From + } else { + a = &p.To + } + ab.Put1(byte(op)) + if o.prefix == Pe { + v = vaddr(ctxt, p, a, nil) + ab.PutInt16(int16(v)) + } else { + ab.relput4(ctxt, cursym, p, a) + } + + case Zm_ilo, Zilo_m: + ab.Put1(byte(op)) + if yt.zcase == Zilo_m { + a = &p.From + ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) + } else { + a = &p.To + ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) + } + + if o.prefix == Pe { + v = vaddr(ctxt, p, a, nil) + ab.PutInt16(int16(v)) + } else { + ab.relput4(ctxt, cursym, p, a) + } + + case Zil_rr: + ab.Put1(byte(op)) + ab.asmand(ctxt, cursym, p, &p.To, &p.To) + if o.prefix == Pe { + v = vaddr(ctxt, p, &p.From, nil) + ab.PutInt16(int16(v)) + } else { + ab.relput4(ctxt, cursym, p, &p.From) + } + + case Z_rp: + ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) + ab.Put1(byte(op + reg[p.To.Reg])) + + case Zrp_: + ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40) + ab.Put1(byte(op + reg[p.From.Reg])) + + case Zcallcon, Zjmpcon: + if yt.zcase == Zcallcon { + ab.Put1(byte(op)) + } else { + ab.Put1(o.op[z+1]) + } + r = obj.Addrel(cursym) + r.Off = int32(p.Pc + int64(ab.Len())) + r.Type = objabi.R_PCREL + r.Siz = 4 + r.Add = p.To.Offset + ab.PutInt32(0) + + case Zcallind: + ab.Put2(byte(op), o.op[z+1]) + r = obj.Addrel(cursym) + r.Off = int32(p.Pc + int64(ab.Len())) + if ctxt.Arch.Family == sys.AMD64 { + r.Type = objabi.R_PCREL + } else { + r.Type = objabi.R_ADDR + } + r.Siz = 4 + r.Add = p.To.Offset + r.Sym = p.To.Sym + ab.PutInt32(0) + + case Zcall, Zcallduff: + if p.To.Sym == nil { + ctxt.Diag("call without target") + ctxt.DiagFlush() + log.Fatalf("bad code") + } + + if yt.zcase == Zcallduff && ctxt.Flag_dynlink { + ctxt.Diag("directly calling duff when dynamically linking Go") + } + + if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { + // Maintain BP around call, since duffcopy/duffzero can't do it + // (the call jumps into the middle of the function). + // This makes it possible to see call sites for duffcopy/duffzero in + // BP-based profiling tools like Linux perf (which is the + // whole point of maintaining frame pointers in Go). + // MOVQ BP, -16(SP) + // LEAQ -16(SP), BP + ab.Put(bpduff1) + } + ab.Put1(byte(op)) + r = obj.Addrel(cursym) + r.Off = int32(p.Pc + int64(ab.Len())) + r.Sym = p.To.Sym + r.Add = p.To.Offset + r.Type = objabi.R_CALL + r.Siz = 4 + ab.PutInt32(0) + + if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { + // Pop BP pushed above. + // MOVQ 0(BP), BP + ab.Put(bpduff2) + } + + // TODO: jump across functions needs reloc + case Zbr, Zjmp, Zloop: + if p.As == AXBEGIN { + ab.Put1(byte(op)) + } + if p.To.Sym != nil { + if yt.zcase != Zjmp { + ctxt.Diag("branch to ATEXT") + ctxt.DiagFlush() + log.Fatalf("bad code") + } + + ab.Put1(o.op[z+1]) + r = obj.Addrel(cursym) + r.Off = int32(p.Pc + int64(ab.Len())) + r.Sym = p.To.Sym + // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that + // it can point to a trampoline instead of the destination itself. + r.Type = objabi.R_CALL + r.Siz = 4 + ab.PutInt32(0) + break + } + + // Assumes q is in this function. + // TODO: Check in input, preserve in brchain. + + // Fill in backward jump now. + q = p.To.Target() + + if q == nil { + ctxt.Diag("jmp/branch/loop without target") + ctxt.DiagFlush() + log.Fatalf("bad code") + } + + if p.Back&branchBackwards != 0 { + v = q.Pc - (p.Pc + 2) + if v >= -128 && p.As != AXBEGIN { + if p.As == AJCXZL { + ab.Put1(0x67) + } + ab.Put2(byte(op), byte(v)) + } else if yt.zcase == Zloop { + ctxt.Diag("loop too far: %v", p) + } else { + v -= 5 - 2 + if p.As == AXBEGIN { + v-- + } + if yt.zcase == Zbr { + ab.Put1(0x0f) + v-- + } + + ab.Put1(o.op[z+1]) + ab.PutInt32(int32(v)) + } + + break + } + + // Annotate target; will fill in later. + p.Forwd = q.Rel + + q.Rel = p + if p.Back&branchShort != 0 && p.As != AXBEGIN { + if p.As == AJCXZL { + ab.Put1(0x67) + } + ab.Put2(byte(op), 0) + } else if yt.zcase == Zloop { + ctxt.Diag("loop too far: %v", p) + } else { + if yt.zcase == Zbr { + ab.Put1(0x0f) + } + ab.Put1(o.op[z+1]) + ab.PutInt32(0) + } + + case Zbyte: + v = vaddr(ctxt, p, &p.From, &rel) + if rel.Siz != 0 { + rel.Siz = uint8(op) + r = obj.Addrel(cursym) + *r = rel + r.Off = int32(p.Pc + int64(ab.Len())) + } + + ab.Put1(byte(v)) + if op > 1 { + ab.Put1(byte(v >> 8)) + if op > 2 { + ab.PutInt16(int16(v >> 16)) + if op > 4 { + ab.PutInt32(int32(v >> 32)) + } + } + } + } + + return + } + } + f3t = Ynone * Ymax + if p.GetFrom3() != nil { + f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax + } + for mo := ymovtab; mo[0].as != 0; mo = mo[1:] { + var pp obj.Prog + var t []byte + if p.As == mo[0].as { + if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 { + t = mo[0].op[:] + switch mo[0].code { + default: + ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p) + + case movLit: + for z = 0; t[z] != 0; z++ { + ab.Put1(t[z]) + } + + case movRegMem: + ab.Put1(t[0]) + ab.asmando(ctxt, cursym, p, &p.To, int(t[1])) + + case movMemReg: + ab.Put1(t[0]) + ab.asmando(ctxt, cursym, p, &p.From, int(t[1])) + + case movRegMem2op: // r,m - 2op + ab.Put2(t[0], t[1]) + ab.asmando(ctxt, cursym, p, &p.To, int(t[2])) + ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40) + + case movMemReg2op: + ab.Put2(t[0], t[1]) + ab.asmando(ctxt, cursym, p, &p.From, int(t[2])) + ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40) + + case movFullPtr: + if t[0] != 0 { + ab.Put1(t[0]) + } + switch p.To.Index { + default: + goto bad + + case REG_DS: + ab.Put1(0xc5) + + case REG_SS: + ab.Put2(0x0f, 0xb2) + + case REG_ES: + ab.Put1(0xc4) + + case REG_FS: + ab.Put2(0x0f, 0xb4) + + case REG_GS: + ab.Put2(0x0f, 0xb5) + } + + ab.asmand(ctxt, cursym, p, &p.From, &p.To) + + case movDoubleShift: + if t[0] == Pw { + if ctxt.Arch.Family != sys.AMD64 { + ctxt.Diag("asmins: illegal 64: %v", p) + } + ab.rexflag |= Pw + t = t[1:] + } else if t[0] == Pe { + ab.Put1(Pe) + t = t[1:] + } + + switch p.From.Type { + default: + goto bad + + case obj.TYPE_CONST: + ab.Put2(0x0f, t[0]) + ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) + ab.Put1(byte(p.From.Offset)) + + case obj.TYPE_REG: + switch p.From.Reg { + default: + goto bad + + case REG_CL, REG_CX: + ab.Put2(0x0f, t[1]) + ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) + } + } + + // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, + // where you load the TLS base register into a register and then index off that + // register to access the actual TLS variables. Systems that allow direct TLS access + // are handled in prefixof above and should not be listed here. + case movTLSReg: + if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL { + ctxt.Diag("invalid load of TLS: %v", p) + } + + if ctxt.Arch.Family == sys.I386 { + // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, + // where you load the TLS base register into a register and then index off that + // register to access the actual TLS variables. Systems that allow direct TLS access + // are handled in prefixof above and should not be listed here. + switch ctxt.Headtype { + default: + log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) + + case objabi.Hlinux, objabi.Hfreebsd: + if ctxt.Flag_shared { + // Note that this is not generating the same insns as the other cases. + // MOV TLS, dst + // becomes + // call __x86.get_pc_thunk.dst + // movl (gotpc + g@gotntpoff)(dst), dst + // which is encoded as + // call __x86.get_pc_thunk.dst + // movq 0(dst), dst + // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access + // is g, which we can't check here, but will when we assemble the second + // instruction. + dst := p.To.Reg + ab.Put1(0xe8) + r = obj.Addrel(cursym) + r.Off = int32(p.Pc + int64(ab.Len())) + r.Type = objabi.R_CALL + r.Siz = 4 + r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))) + ab.PutInt32(0) + + ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3))) + r = obj.Addrel(cursym) + r.Off = int32(p.Pc + int64(ab.Len())) + r.Type = objabi.R_TLS_IE + r.Siz = 4 + r.Add = 2 + ab.PutInt32(0) + } else { + // ELF TLS base is 0(GS). + pp.From = p.From + + pp.From.Type = obj.TYPE_MEM + pp.From.Reg = REG_GS + pp.From.Offset = 0 + pp.From.Index = REG_NONE + pp.From.Scale = 0 + ab.Put2(0x65, // GS + 0x8B) + ab.asmand(ctxt, cursym, p, &pp.From, &p.To) + } + case objabi.Hplan9: + pp.From = obj.Addr{} + pp.From.Type = obj.TYPE_MEM + pp.From.Name = obj.NAME_EXTERN + pp.From.Sym = plan9privates + pp.From.Offset = 0 + pp.From.Index = REG_NONE + ab.Put1(0x8B) + ab.asmand(ctxt, cursym, p, &pp.From, &p.To) + } + break + } + + switch ctxt.Headtype { + default: + log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) + + case objabi.Hlinux, objabi.Hfreebsd: + if !ctxt.Flag_shared { + log.Fatalf("unknown TLS base location for linux/freebsd without -shared") + } + // Note that this is not generating the same insn as the other cases. + // MOV TLS, R_to + // becomes + // movq g@gottpoff(%rip), R_to + // which is encoded as + // movq 0(%rip), R_to + // and a R_TLS_IE reloc. This all assumes the only tls variable we access + // is g, which we can't check here, but will when we assemble the second + // instruction. + ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr) + + ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3))) + r = obj.Addrel(cursym) + r.Off = int32(p.Pc + int64(ab.Len())) + r.Type = objabi.R_TLS_IE + r.Siz = 4 + r.Add = -4 + ab.PutInt32(0) + + case objabi.Hplan9: + pp.From = obj.Addr{} + pp.From.Type = obj.TYPE_MEM + pp.From.Name = obj.NAME_EXTERN + pp.From.Sym = plan9privates + pp.From.Offset = 0 + pp.From.Index = REG_NONE + ab.rexflag |= Pw + ab.Put1(0x8B) + ab.asmand(ctxt, cursym, p, &pp.From, &p.To) + + case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c. + // TLS base is 0(FS). + pp.From = p.From + + pp.From.Type = obj.TYPE_MEM + pp.From.Name = obj.NAME_NONE + pp.From.Reg = REG_NONE + pp.From.Offset = 0 + pp.From.Index = REG_NONE + pp.From.Scale = 0 + ab.rexflag |= Pw + ab.Put2(0x64, // FS + 0x8B) + ab.asmand(ctxt, cursym, p, &pp.From, &p.To) + } + } + return + } + } + } + goto bad + +bad: + if ctxt.Arch.Family != sys.AMD64 { + // here, the assembly has failed. + // if it's a byte instruction that has + // unaddressable registers, try to + // exchange registers and reissue the + // instruction with the operands renamed. + pp := *p + + unbytereg(&pp.From, &pp.Ft) + unbytereg(&pp.To, &pp.Tt) + + z := int(p.From.Reg) + if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { + // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. + // For now, different to keep bit-for-bit compatibility. + if ctxt.Arch.Family == sys.I386 { + breg := byteswapreg(ctxt, &p.To) + if breg != REG_AX { + ab.Put1(0x87) // xchg lhs,bx + ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) + subreg(&pp, z, breg) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(0x87) // xchg lhs,bx + ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) + } else { + ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax + subreg(&pp, z, REG_AX) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax + } + return + } + + if isax(&p.To) || p.To.Type == obj.TYPE_NONE { + // We certainly don't want to exchange + // with AX if the op is MUL or DIV. + ab.Put1(0x87) // xchg lhs,bx + ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) + subreg(&pp, z, REG_BX) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(0x87) // xchg lhs,bx + ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) + } else { + ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax + subreg(&pp, z, REG_AX) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax + } + return + } + + z = int(p.To.Reg) + if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { + // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. + // For now, different to keep bit-for-bit compatibility. + if ctxt.Arch.Family == sys.I386 { + breg := byteswapreg(ctxt, &p.From) + if breg != REG_AX { + ab.Put1(0x87) //xchg rhs,bx + ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) + subreg(&pp, z, breg) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(0x87) // xchg rhs,bx + ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) + } else { + ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax + subreg(&pp, z, REG_AX) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax + } + return + } + + if isax(&p.From) { + ab.Put1(0x87) // xchg rhs,bx + ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) + subreg(&pp, z, REG_BX) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(0x87) // xchg rhs,bx + ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) + } else { + ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax + subreg(&pp, z, REG_AX) + ab.doasm(ctxt, cursym, &pp) + ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax + } + return + } + } + + ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p) +} + +// byteswapreg returns a byte-addressable register (AX, BX, CX, DX) +// which is not referenced in a. +// If a is empty, it returns BX to account for MULB-like instructions +// that might use DX and AX. +func byteswapreg(ctxt *obj.Link, a *obj.Addr) int { + cana, canb, canc, cand := true, true, true, true + if a.Type == obj.TYPE_NONE { + cana, cand = false, false + } + + if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) { + switch a.Reg { + case REG_NONE: + cana, cand = false, false + case REG_AX, REG_AL, REG_AH: + cana = false + case REG_BX, REG_BL, REG_BH: + canb = false + case REG_CX, REG_CL, REG_CH: + canc = false + case REG_DX, REG_DL, REG_DH: + cand = false + } + } + + if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR { + switch a.Index { + case REG_AX: + cana = false + case REG_BX: + canb = false + case REG_CX: + canc = false + case REG_DX: + cand = false + } + } + + switch { + case cana: + return REG_AX + case canb: + return REG_BX + case canc: + return REG_CX + case cand: + return REG_DX + default: + ctxt.Diag("impossible byte register") + ctxt.DiagFlush() + log.Fatalf("bad code") + return 0 + } +} + +func isbadbyte(a *obj.Addr) bool { + return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB) +} + +func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { + ab.Reset() + + ab.rexflag = 0 + ab.vexflag = false + ab.evexflag = false + mark := ab.Len() + ab.doasm(ctxt, cursym, p) + if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { + // as befits the whole approach of the architecture, + // the rex prefix must appear before the first opcode byte + // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but + // before the 0f opcode escape!), or it might be ignored. + // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'. + if ctxt.Arch.Family != sys.AMD64 { + ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt) + } + n := ab.Len() + var np int + for np = mark; np < n; np++ { + c := ab.At(np) + if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 { + break + } + } + ab.Insert(np, byte(0x40|ab.rexflag)) + } + + n := ab.Len() + for i := len(cursym.R) - 1; i >= 0; i-- { + r := &cursym.R[i] + if int64(r.Off) < p.Pc { + break + } + if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { + r.Off++ + } + if r.Type == objabi.R_PCREL { + if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL { + // PC-relative addressing is relative to the end of the instruction, + // but the relocations applied by the linker are relative to the end + // of the relocation. Because immediate instruction + // arguments can follow the PC-relative memory reference in the + // instruction encoding, the two may not coincide. In this case, + // adjust addend so that linker can keep relocating relative to the + // end of the relocation. + r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz)) + } else if ctxt.Arch.Family == sys.I386 { + // On 386 PC-relative addressing (for non-call/jmp instructions) + // assumes that the previous instruction loaded the PC of the end + // of that instruction into CX, so the adjustment is relative to + // that. + r.Add += int64(r.Off) - p.Pc + int64(r.Siz) + } + } + if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 { + // On 386, R_GOTPCREL makes the same assumptions as R_PCREL. + r.Add += int64(r.Off) - p.Pc + int64(r.Siz) + } + + } +} + +// unpackOps4 extracts 4 operands from p. +func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) { + return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To +} + +// unpackOps5 extracts 5 operands from p. +func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) { + return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To +} diff --git a/src/cmd/internal/obj/x86/asm_test.go b/src/cmd/internal/obj/x86/asm_test.go new file mode 100644 index 0000000..458a912 --- /dev/null +++ b/src/cmd/internal/obj/x86/asm_test.go @@ -0,0 +1,342 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package x86 + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "internal/testenv" + "os" + "path/filepath" + "regexp" + "testing" +) + +type oclassTest struct { + arg *obj.Addr + want int // Expected oclass return value for a given arg +} + +// Filled inside init, because it's easier to do with helper functions. +var ( + oclassTestsAMD64 []*oclassTest + oclassTests386 []*oclassTest +) + +func init() { + // Required for tests that access any of + // opindex/ycover/reg/regrex global tables. + var ctxt obj.Link + instinit(&ctxt) + + regAddr := func(reg int16) *obj.Addr { + return &obj.Addr{Type: obj.TYPE_REG, Reg: reg} + } + immAddr := func(v int64) *obj.Addr { + return &obj.Addr{Type: obj.TYPE_CONST, Offset: v} + } + regListAddr := func(regFrom, regTo int16) *obj.Addr { + return &obj.Addr{Type: obj.TYPE_REGLIST, Offset: EncodeRegisterRange(regFrom, regTo)} + } + memAddr := func(base, index int16) *obj.Addr { + return &obj.Addr{Type: obj.TYPE_MEM, Reg: base, Index: index} + } + + // TODO(quasilyte): oclass doesn't return Yxxx for X/Y regs with + // ID higher than 7. We don't encode such instructions, but this + // behavior seems inconsistent. It should probably either + // never check for arch or do it in all cases. + + oclassTestsCommon := []*oclassTest{ + {&obj.Addr{Type: obj.TYPE_NONE}, Ynone}, + {&obj.Addr{Type: obj.TYPE_BRANCH}, Ybr}, + {&obj.Addr{Type: obj.TYPE_TEXTSIZE}, Ytextsize}, + + {&obj.Addr{Type: obj.TYPE_INDIR, Name: obj.NAME_EXTERN}, Yindir}, + {&obj.Addr{Type: obj.TYPE_INDIR, Name: obj.NAME_GOTREF}, Yindir}, + + {&obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_AUTO}, Yiauto}, + {&obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_PARAM}, Yiauto}, + {&obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_EXTERN}, Yiauto}, + {&obj.Addr{Type: obj.TYPE_ADDR, Sym: &obj.LSym{Name: "runtime.duff"}}, Yi32}, + {&obj.Addr{Type: obj.TYPE_ADDR, Offset: 4}, Yu7}, + {&obj.Addr{Type: obj.TYPE_ADDR, Offset: 255}, Yu8}, + + {immAddr(0), Yi0}, + {immAddr(1), Yi1}, + {immAddr(2), Yu2}, + {immAddr(3), Yu2}, + {immAddr(4), Yu7}, + {immAddr(86), Yu7}, + {immAddr(127), Yu7}, + {immAddr(128), Yu8}, + {immAddr(200), Yu8}, + {immAddr(255), Yu8}, + {immAddr(-1), Yi8}, + {immAddr(-100), Yi8}, + {immAddr(-128), Yi8}, + + {regAddr(REG_AL), Yal}, + {regAddr(REG_AX), Yax}, + {regAddr(REG_DL), Yrb}, + {regAddr(REG_DH), Yrb}, + {regAddr(REG_BH), Yrb}, + {regAddr(REG_CL), Ycl}, + {regAddr(REG_CX), Ycx}, + {regAddr(REG_DX), Yrx}, + {regAddr(REG_BX), Yrx}, + {regAddr(REG_F0), Yf0}, + {regAddr(REG_F3), Yrf}, + {regAddr(REG_F7), Yrf}, + {regAddr(REG_M0), Ymr}, + {regAddr(REG_M3), Ymr}, + {regAddr(REG_M7), Ymr}, + {regAddr(REG_X0), Yxr0}, + {regAddr(REG_X6), Yxr}, + {regAddr(REG_X13), Yxr}, + {regAddr(REG_X20), YxrEvex}, + {regAddr(REG_X31), YxrEvex}, + {regAddr(REG_Y0), Yyr}, + {regAddr(REG_Y6), Yyr}, + {regAddr(REG_Y13), Yyr}, + {regAddr(REG_Y20), YyrEvex}, + {regAddr(REG_Y31), YyrEvex}, + {regAddr(REG_Z0), Yzr}, + {regAddr(REG_Z6), Yzr}, + {regAddr(REG_K0), Yk0}, + {regAddr(REG_K5), Yknot0}, + {regAddr(REG_K7), Yknot0}, + {regAddr(REG_CS), Ycs}, + {regAddr(REG_SS), Yss}, + {regAddr(REG_DS), Yds}, + {regAddr(REG_ES), Yes}, + {regAddr(REG_FS), Yfs}, + {regAddr(REG_GS), Ygs}, + {regAddr(REG_TLS), Ytls}, + {regAddr(REG_GDTR), Ygdtr}, + {regAddr(REG_IDTR), Yidtr}, + {regAddr(REG_LDTR), Yldtr}, + {regAddr(REG_MSW), Ymsw}, + {regAddr(REG_TASK), Ytask}, + {regAddr(REG_CR0), Ycr0}, + {regAddr(REG_CR5), Ycr5}, + {regAddr(REG_CR8), Ycr8}, + {regAddr(REG_DR0), Ydr0}, + {regAddr(REG_DR5), Ydr5}, + {regAddr(REG_DR7), Ydr7}, + {regAddr(REG_TR0), Ytr0}, + {regAddr(REG_TR5), Ytr5}, + {regAddr(REG_TR7), Ytr7}, + + {regListAddr(REG_X0, REG_X3), YxrEvexMulti4}, + {regListAddr(REG_X4, REG_X7), YxrEvexMulti4}, + {regListAddr(REG_Y0, REG_Y3), YyrEvexMulti4}, + {regListAddr(REG_Y4, REG_Y7), YyrEvexMulti4}, + {regListAddr(REG_Z0, REG_Z3), YzrMulti4}, + {regListAddr(REG_Z4, REG_Z7), YzrMulti4}, + + {memAddr(REG_AL, REG_NONE), Ym}, + {memAddr(REG_AL, REG_SI), Ym}, + {memAddr(REG_SI, REG_CX), Ym}, + {memAddr(REG_DI, REG_X0), Yxvm}, + {memAddr(REG_DI, REG_X7), Yxvm}, + {memAddr(REG_DI, REG_Y0), Yyvm}, + {memAddr(REG_DI, REG_Y7), Yyvm}, + {memAddr(REG_DI, REG_Z0), Yzvm}, + {memAddr(REG_DI, REG_Z7), Yzvm}, + } + + oclassTestsAMD64 = []*oclassTest{ + {immAddr(-200), Ys32}, + {immAddr(500), Ys32}, + {immAddr(0x7FFFFFFF), Ys32}, + {immAddr(0x7FFFFFFF + 1), Yi32}, + {immAddr(0xFFFFFFFF), Yi32}, + {immAddr(0xFFFFFFFF + 1), Yi64}, + + {regAddr(REG_BPB), Yrb}, + {regAddr(REG_SIB), Yrb}, + {regAddr(REG_DIB), Yrb}, + {regAddr(REG_R8B), Yrb}, + {regAddr(REG_R12B), Yrb}, + {regAddr(REG_R8), Yrl}, + {regAddr(REG_R13), Yrl}, + {regAddr(REG_R15), Yrl}, + {regAddr(REG_SP), Yrl}, + {regAddr(REG_SI), Yrl}, + {regAddr(REG_DI), Yrl}, + {regAddr(REG_Z13), Yzr}, + {regAddr(REG_Z20), Yzr}, + {regAddr(REG_Z31), Yzr}, + + {regListAddr(REG_X10, REG_X13), YxrEvexMulti4}, + {regListAddr(REG_X24, REG_X27), YxrEvexMulti4}, + {regListAddr(REG_Y10, REG_Y13), YyrEvexMulti4}, + {regListAddr(REG_Y24, REG_Y27), YyrEvexMulti4}, + {regListAddr(REG_Z10, REG_Z13), YzrMulti4}, + {regListAddr(REG_Z24, REG_Z27), YzrMulti4}, + + {memAddr(REG_DI, REG_X20), YxvmEvex}, + {memAddr(REG_DI, REG_X27), YxvmEvex}, + {memAddr(REG_DI, REG_Y20), YyvmEvex}, + {memAddr(REG_DI, REG_Y27), YyvmEvex}, + {memAddr(REG_DI, REG_Z20), Yzvm}, + {memAddr(REG_DI, REG_Z27), Yzvm}, + } + + oclassTests386 = []*oclassTest{ + {&obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_EXTERN, Sym: &obj.LSym{}}, Yi32}, + + {immAddr(-200), Yi32}, + + {regAddr(REG_SP), Yrl32}, + {regAddr(REG_SI), Yrl32}, + {regAddr(REG_DI), Yrl32}, + } + + // Add tests that are arch-independent for all sets. + oclassTestsAMD64 = append(oclassTestsAMD64, oclassTestsCommon...) + oclassTests386 = append(oclassTests386, oclassTestsCommon...) +} + +func TestOclass(t *testing.T) { + runTest := func(t *testing.T, ctxt *obj.Link, tests []*oclassTest) { + var p obj.Prog + for _, test := range tests { + have := oclass(ctxt, &p, test.arg) + if have != test.want { + t.Errorf("oclass(%q):\nhave: %d\nwant: %d", + obj.Dconv(&p, test.arg), have, test.want) + } + } + } + + // TODO(quasilyte): test edge cases for Hsolaris, etc? + + t.Run("linux/AMD64", func(t *testing.T) { + ctxtAMD64 := obj.Linknew(&Linkamd64) + ctxtAMD64.Headtype = objabi.Hlinux // See #32028 + runTest(t, ctxtAMD64, oclassTestsAMD64) + }) + + t.Run("linux/386", func(t *testing.T) { + ctxt386 := obj.Linknew(&Link386) + ctxt386.Headtype = objabi.Hlinux // See #32028 + runTest(t, ctxt386, oclassTests386) + }) +} + +func TestRegisterListEncDec(t *testing.T) { + tests := []struct { + printed string + reg0 int16 + reg1 int16 + }{ + {"[R10-R13]", REG_R10, REG_R13}, + {"[X0-AX]", REG_X0, REG_AX}, + + {"[X0-X3]", REG_X0, REG_X3}, + {"[X21-X24]", REG_X21, REG_X24}, + + {"[Y0-Y3]", REG_Y0, REG_Y3}, + {"[Y21-Y24]", REG_Y21, REG_Y24}, + + {"[Z0-Z3]", REG_Z0, REG_Z3}, + {"[Z21-Z24]", REG_Z21, REG_Z24}, + } + + for _, test := range tests { + enc := EncodeRegisterRange(test.reg0, test.reg1) + reg0, reg1 := decodeRegisterRange(enc) + + if int16(reg0) != test.reg0 { + t.Errorf("%s reg0 mismatch: have %d, want %d", + test.printed, reg0, test.reg0) + } + if int16(reg1) != test.reg1 { + t.Errorf("%s reg1 mismatch: have %d, want %d", + test.printed, reg1, test.reg1) + } + wantPrinted := test.printed + if rlconv(enc) != wantPrinted { + t.Errorf("%s string mismatch: have %s, want %s", + test.printed, rlconv(enc), wantPrinted) + } + } +} + +func TestRegIndex(t *testing.T) { + tests := []struct { + regFrom int + regTo int + }{ + {REG_AL, REG_R15B}, + {REG_AX, REG_R15}, + {REG_M0, REG_M7}, + {REG_K0, REG_K7}, + {REG_X0, REG_X31}, + {REG_Y0, REG_Y31}, + {REG_Z0, REG_Z31}, + } + + for _, test := range tests { + for index, reg := 0, test.regFrom; reg <= test.regTo; index, reg = index+1, reg+1 { + have := regIndex(int16(reg)) + want := index + if have != want { + regName := rconv(int(reg)) + t.Errorf("regIndex(%s):\nhave: %d\nwant: %d", + regName, have, want) + } + } + } +} + +// TestPCALIGN verifies the correctness of the PCALIGN by checking if the +// code can be aligned to the alignment value. +func TestPCALIGN(t *testing.T) { + testenv.MustHaveGoBuild(t) + dir := t.TempDir() + tmpfile := filepath.Join(dir, "test.s") + tmpout := filepath.Join(dir, "test.o") + + var testCases = []struct { + name string + code string + out string + }{ + { + name: "8-byte alignment", + code: "TEXT ·foo(SB),$0-0\nMOVQ $0, AX\nPCALIGN $8\nMOVQ $1, BX\nRET\n", + out: `0x0008\s00008\s\(.*\)\tMOVQ\t\$1,\sBX`, + }, + { + name: "16-byte alignment", + code: "TEXT ·foo(SB),$0-0\nMOVQ $0, AX\nPCALIGN $16\nMOVQ $2, CX\nRET\n", + out: `0x0010\s00016\s\(.*\)\tMOVQ\t\$2,\sCX`, + }, + } + + for _, test := range testCases { + if err := os.WriteFile(tmpfile, []byte(test.code), 0644); err != nil { + t.Fatal(err) + } + cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-S", "-o", tmpout, tmpfile) + cmd.Env = append(os.Environ(), "GOARCH=amd64", "GOOS=linux") + out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("The %s build failed: %v, output: %s", test.name, err, out) + continue + } + + matched, err := regexp.MatchString(test.out, string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("The %s testing failed!\ninput: %s\noutput: %s\n", test.name, test.code, out) + } + } +} diff --git a/src/cmd/internal/obj/x86/avx_optabs.go b/src/cmd/internal/obj/x86/avx_optabs.go new file mode 100644 index 0000000..b8ff469 --- /dev/null +++ b/src/cmd/internal/obj/x86/avx_optabs.go @@ -0,0 +1,4628 @@ +// Code generated by x86avxgen. DO NOT EDIT. + +package x86 + +// VEX instructions that come in two forms: +// VTHING xmm2/m128, xmmV, xmm1 +// VTHING ymm2/m256, ymmV, ymm1 +// +// The opcode array in the corresponding Optab entry +// should contain the (VEX prefixes, opcode byte) pair +// for each of the two forms. +// For example, the entries for VPXOR are: +// +// VPXOR xmm2/m128, xmmV, xmm1 +// VEX.NDS.128.66.0F.WIG EF /r +// +// VPXOR ymm2/m256, ymmV, ymm1 +// VEX.NDS.256.66.0F.WIG EF /r +// +// Produce this optab entry: +// +// {AVPXOR, yvex_xy3, Pavx, opBytes{vex128|vex66|vex0F|vexWIG, 0xEF, vex256|vex66|vex0F|vexWIG, 0xEF}} +// +// VEX requires at least 2 bytes inside opBytes: +// - VEX prefixes (vex-prefixed constants) +// - Opcode byte +// +// EVEX instructions extend VEX form variety: +// VTHING zmm2/m512, zmmV, zmm1 -- implicit K0 (merging) +// VTHING zmm2/m512, zmmV, K, zmm1 -- explicit K mask (can't use K0) +// +// EVEX requires at least 3 bytes inside opBytes: +// - EVEX prefixes (evex-prefixed constants); similar to VEX +// - Displacement multiplier info (scale / broadcast scale) +// - Opcode byte; similar to VEX +// +// Both VEX and EVEX instructions may have opdigit (opcode extension) byte +// which follows the primary opcode byte. +// Because it can only have value of 0-7, it is written in octal notation. +// +// x86.csv can be very useful for figuring out proper [E]VEX parts. + +var _yandnl = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yml, Yrl, Yrl}}, +} + +var _ybextrl = []ytab{ + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Yrl, Yml, Yrl}}, +} + +var _yblsil = []ytab{ + {zcase: Zvex_rm_r_vo, zoffset: 3, args: argList{Yml, Yrl}}, +} + +var _ykaddb = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yk, Yk, Yk}}, +} + +var _ykmovb = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yk, Ym}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yk, Yrl}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ykm, Yk}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yrl, Yk}}, +} + +var _yknotb = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yk, Yk}}, +} + +var _ykshiftlb = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yu8, Yk, Yk}}, +} + +var _yrorxl = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yml, Yrl}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yml, Yrl}}, +} + +var _yv4fmaddps = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Ym, YzrMulti4, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Ym, YzrMulti4, Yknot0, Yzr}}, +} + +var _yv4fmaddss = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Ym, YxrEvexMulti4, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Ym, YxrEvexMulti4, Yknot0, YxrEvex}}, +} + +var _yvaddpd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, +} + +var _yvaddsd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, +} + +var _yvaddsubpd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr, Yyr}}, +} + +var _yvaesdec = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yzm, Yzr, Yzr}}, +} + +var _yvaesimc = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, +} + +var _yvaeskeygenassist = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yxm, Yxr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yxm, Yxr}}, +} + +var _yvalignd = []ytab{ + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr, Yzr}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvandnpd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvblendmpd = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvblendpd = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yxr, Yxr}}, + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yym, Yyr, Yyr}}, +} + +var _yvblendvpd = []ytab{ + {zcase: Zvex_hr_rm_v_r, zoffset: 2, args: argList{Yxr, Yxm, Yxr, Yxr}}, + {zcase: Zvex_hr_rm_v_r, zoffset: 2, args: argList{Yyr, Yym, Yyr, Yyr}}, +} + +var _yvbroadcastf128 = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yyr}}, +} + +var _yvbroadcastf32x2 = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, Yzr}}, +} + +var _yvbroadcastf32x4 = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Ym, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Ym, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Ym, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Ym, Yknot0, Yzr}}, +} + +var _yvbroadcastf32x8 = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Ym, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Ym, Yknot0, Yzr}}, +} + +var _yvbroadcasti32x2 = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, Yzr}}, +} + +var _yvbroadcastsd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, Yzr}}, +} + +var _yvbroadcastss = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, Yzr}}, +} + +var _yvcmppd = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yxr, Yxr}}, + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yym, Yyr, Yyr}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr, Yk}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yzr, Yknot0, Yk}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex, Yk}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, Yknot0, Yk}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex, Yk}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YymEvex, YyrEvex, Yknot0, Yk}}, +} + +var _yvcmpsd = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yxr, Yxr}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex, Yk}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, Yknot0, Yk}}, +} + +var _yvcomisd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YxmEvex, YxrEvex}}, +} + +var _yvcompresspd = []ytab{ + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YxrEvex, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YyrEvex, YymEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, YymEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{Yzr, Yzm}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{Yzr, Yknot0, Yzm}}, +} + +var _yvcvtdq2pd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, Yzr}}, +} + +var _yvcvtdq2ps = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YyrEvex}}, +} + +var _yvcvtpd2dq = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, YyrEvex}}, +} + +var _yvcvtpd2dqx = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, +} + +var _yvcvtpd2dqy = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yxr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YxrEvex}}, +} + +var _yvcvtpd2qq = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YyrEvex}}, +} + +var _yvcvtpd2udqx = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, +} + +var _yvcvtpd2udqy = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YxrEvex}}, +} + +var _yvcvtph2ps = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, +} + +var _yvcvtps2ph = []ytab{ + {zcase: Zvex_i_r_rm, zoffset: 0, args: argList{Yu8, Yxr, Yxm}}, + {zcase: Zvex_i_r_rm, zoffset: 2, args: argList{Yi8, Yxr, Yxm}}, + {zcase: Zvex_i_r_rm, zoffset: 0, args: argList{Yu8, Yyr, Yxm}}, + {zcase: Zvex_i_r_rm, zoffset: 2, args: argList{Yi8, Yyr, Yxm}}, + {zcase: Zevex_i_r_rm, zoffset: 0, args: argList{Yu8, Yzr, YymEvex}}, + {zcase: Zevex_i_r_k_rm, zoffset: 3, args: argList{Yu8, Yzr, Yknot0, YymEvex}}, + {zcase: Zevex_i_r_rm, zoffset: 0, args: argList{Yu8, YxrEvex, YxmEvex}}, + {zcase: Zevex_i_r_k_rm, zoffset: 3, args: argList{Yu8, YxrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_i_r_rm, zoffset: 0, args: argList{Yu8, YyrEvex, YxmEvex}}, + {zcase: Zevex_i_r_k_rm, zoffset: 3, args: argList{Yu8, YyrEvex, Yknot0, YxmEvex}}, +} + +var _yvcvtps2qq = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, +} + +var _yvcvtsd2si = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yrl}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YxmEvex, Yrl}}, +} + +var _yvcvtsd2usil = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YxmEvex, Yrl}}, +} + +var _yvcvtsi2sdl = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yml, Yxr, Yxr}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yml, YxrEvex, YxrEvex}}, +} + +var _yvcvtudq2pd = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, Yzr}}, +} + +var _yvcvtusi2sdl = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yml, YxrEvex, YxrEvex}}, +} + +var _yvdppd = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yxr, Yxr}}, +} + +var _yvexp2pd = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, Yzr}}, +} + +var _yvexpandpd = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, Yzr}}, +} + +var _yvextractf128 = []ytab{ + {zcase: Zvex_i_r_rm, zoffset: 0, args: argList{Yu8, Yyr, Yxm}}, + {zcase: Zvex_i_r_rm, zoffset: 2, args: argList{Yi8, Yyr, Yxm}}, +} + +var _yvextractf32x4 = []ytab{ + {zcase: Zevex_i_r_rm, zoffset: 0, args: argList{Yu8, YyrEvex, YxmEvex}}, + {zcase: Zevex_i_r_k_rm, zoffset: 3, args: argList{Yu8, YyrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_i_r_rm, zoffset: 0, args: argList{Yu8, Yzr, YxmEvex}}, + {zcase: Zevex_i_r_k_rm, zoffset: 3, args: argList{Yu8, Yzr, Yknot0, YxmEvex}}, +} + +var _yvextractf32x8 = []ytab{ + {zcase: Zevex_i_r_rm, zoffset: 0, args: argList{Yu8, Yzr, YymEvex}}, + {zcase: Zevex_i_r_k_rm, zoffset: 3, args: argList{Yu8, Yzr, Yknot0, YymEvex}}, +} + +var _yvextractps = []ytab{ + {zcase: Zvex_i_r_rm, zoffset: 0, args: argList{Yu8, Yxr, Yml}}, + {zcase: Zvex_i_r_rm, zoffset: 2, args: argList{Yi8, Yxr, Yml}}, + {zcase: Zevex_i_r_rm, zoffset: 3, args: argList{Yu8, YxrEvex, Yml}}, +} + +var _yvfixupimmpd = []ytab{ + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr, Yzr}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yzr, Yknot0, Yzr}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YymEvex, YyrEvex, Yknot0, YyrEvex}}, +} + +var _yvfixupimmsd = []ytab{ + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, Yknot0, YxrEvex}}, +} + +var _yvfpclasspdx = []ytab{ + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YxmEvex, Yk}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, Yknot0, Yk}}, +} + +var _yvfpclasspdy = []ytab{ + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YymEvex, Yk}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YymEvex, Yknot0, Yk}}, +} + +var _yvfpclasspdz = []ytab{ + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, Yzm, Yk}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yknot0, Yk}}, +} + +var _yvgatherdpd = []ytab{ + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Yxr, Yxvm, Yxr}}, + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Yyr, Yxvm, Yyr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxvmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxvmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YyvmEvex, Yknot0, Yzr}}, +} + +var _yvgatherdps = []ytab{ + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Yxr, Yxvm, Yxr}}, + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Yyr, Yyvm, Yyr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxvmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YyvmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzvm, Yknot0, Yzr}}, +} + +var _yvgatherpf0dpd = []ytab{ + {zcase: Zevex_k_rmo, zoffset: 4, args: argList{Yknot0, YyvmEvex}}, +} + +var _yvgatherpf0dps = []ytab{ + {zcase: Zevex_k_rmo, zoffset: 4, args: argList{Yknot0, Yzvm}}, +} + +var _yvgatherqps = []ytab{ + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Yxr, Yxvm, Yxr}}, + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Yxr, Yyvm, Yxr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxvmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YyvmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzvm, Yknot0, YyrEvex}}, +} + +var _yvgetexpsd = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, +} + +var _yvgetmantpd = []ytab{ + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yknot0, Yzr}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, +} + +var _yvgf2p8affineinvqb = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yxr, Yxr}}, + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yym, Yyr, Yyr}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr, Yzr}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvinsertf128 = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yyr, Yyr}}, +} + +var _yvinsertf32x4 = []ytab{ + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, Yzr, Yzr}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, Yzr, Yknot0, Yzr}}, +} + +var _yvinsertf32x8 = []ytab{ + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YymEvex, Yzr, Yzr}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YymEvex, Yzr, Yknot0, Yzr}}, +} + +var _yvinsertps = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yxr, Yxr}}, + {zcase: Zevex_i_rm_v_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, YxrEvex}}, +} + +var _yvlddqu = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yyr}}, +} + +var _yvldmxcsr = []ytab{ + {zcase: Zvex_rm_v_ro, zoffset: 3, args: argList{Ym}}, +} + +var _yvmaskmovdqu = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxr, Yxr}}, +} + +var _yvmaskmovpd = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Yxr, Ym}}, + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yyr, Yyr, Ym}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yyr, Yyr}}, +} + +var _yvmovapd = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Yxm}}, + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yyr, Yym}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YxrEvex, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YyrEvex, YymEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, YymEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{Yzr, Yzm}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{Yzr, Yknot0, Yzm}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, Yzr}}, +} + +var _yvmovd = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Yml}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yml, Yxr}}, + {zcase: Zevex_r_v_rm, zoffset: 3, args: argList{YxrEvex, Yml}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yml, YxrEvex}}, +} + +var _yvmovddup = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, Yzr}}, +} + +var _yvmovdqa = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Yxm}}, + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yyr, Yym}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr}}, +} + +var _yvmovdqa32 = []ytab{ + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YxrEvex, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YyrEvex, YymEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, YymEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{Yzr, Yzm}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{Yzr, Yknot0, Yzm}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yzm, Yknot0, Yzr}}, +} + +var _yvmovhlps = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxr, Yxr, Yxr}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YxrEvex, YxrEvex, YxrEvex}}, +} + +var _yvmovhpd = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Ym}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yxr, Yxr}}, + {zcase: Zevex_r_v_rm, zoffset: 3, args: argList{YxrEvex, Ym}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Ym, YxrEvex, YxrEvex}}, +} + +var _yvmovmskpd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxr, Yrl}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yyr, Yrl}}, +} + +var _yvmovntdq = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Ym}}, + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yyr, Ym}}, + {zcase: Zevex_r_v_rm, zoffset: 3, args: argList{YxrEvex, Ym}}, + {zcase: Zevex_r_v_rm, zoffset: 3, args: argList{YyrEvex, Ym}}, + {zcase: Zevex_r_v_rm, zoffset: 3, args: argList{Yzr, Ym}}, +} + +var _yvmovntdqa = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Ym, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Ym, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Ym, Yzr}}, +} + +var _yvmovq = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Yml}}, + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Yxm}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yml, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zevex_r_v_rm, zoffset: 3, args: argList{YxrEvex, Yml}}, + {zcase: Zevex_r_v_rm, zoffset: 3, args: argList{YxrEvex, YxmEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yml, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YxmEvex, YxrEvex}}, +} + +var _yvmovsd = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Yxr, Yxr}}, + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Yxr, Ym}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxr, Yxr, Yxr}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YxrEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_r_v_k_rm, zoffset: 3, args: argList{YxrEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YxrEvex, Ym}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, Ym}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Ym, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Ym, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxrEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxrEvex, YxrEvex, Yknot0, YxrEvex}}, +} + +var _yvpbroadcastb = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yrl, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yrl, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yrl, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yrl, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yrl, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{Yrl, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, Yzr}}, + {zcase: Zevex_rm_k_r, zoffset: 3, args: argList{YxmEvex, Yknot0, Yzr}}, +} + +var _yvpbroadcastmb2q = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yk, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yk, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yk, Yzr}}, +} + +var _yvpclmulqdq = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yxm, Yxr, Yxr}}, + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yym, Yyr, Yyr}}, + {zcase: Zevex_i_rm_v_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 3, args: argList{Yu8, YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 3, args: argList{Yu8, Yzm, Yzr, Yzr}}, +} + +var _yvpcmpb = []ytab{ + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex, Yk}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, YxrEvex, Yknot0, Yk}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex, Yk}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YymEvex, YyrEvex, Yknot0, Yk}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr, Yk}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yzr, Yknot0, Yk}}, +} + +var _yvpcmpeqb = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, Yk}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, Yk}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, Yk}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, Yk}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yk}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yk}}, +} + +var _yvperm2f128 = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yym, Yyr, Yyr}}, +} + +var _yvpermd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr, Yyr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvpermilpd = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yxm, Yxr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yxm, Yxr}}, + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yym, Yyr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yym, Yyr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr, Yyr}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvpermpd = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yu8, Yym, Yyr}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvpermq = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yym, Yyr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yym, Yyr}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvpextrw = []ytab{ + {zcase: Zvex_i_r_rm, zoffset: 0, args: argList{Yu8, Yxr, Yml}}, + {zcase: Zvex_i_r_rm, zoffset: 2, args: argList{Yi8, Yxr, Yml}}, + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yxr, Yrl}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yxr, Yrl}}, + {zcase: Zevex_i_r_rm, zoffset: 3, args: argList{Yu8, YxrEvex, Yml}}, + {zcase: Zevex_i_rm_r, zoffset: 3, args: argList{Yu8, YxrEvex, Yrl}}, +} + +var _yvpinsrb = []ytab{ + {zcase: Zvex_i_rm_v_r, zoffset: 2, args: argList{Yu8, Yml, Yxr, Yxr}}, + {zcase: Zevex_i_rm_v_r, zoffset: 3, args: argList{Yu8, Yml, YxrEvex, YxrEvex}}, +} + +var _yvpmovb2m = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YxrEvex, Yk}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{YyrEvex, Yk}}, + {zcase: Zevex_rm_v_r, zoffset: 3, args: argList{Yzr, Yk}}, +} + +var _yvpmovdb = []ytab{ + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YxrEvex, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YyrEvex, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{Yzr, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{Yzr, Yknot0, YxmEvex}}, +} + +var _yvpmovdw = []ytab{ + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YxrEvex, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{YyrEvex, YxmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, YxmEvex}}, + {zcase: Zevex_r_v_rm, zoffset: 0, args: argList{Yzr, YymEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{Yzr, Yknot0, YymEvex}}, +} + +var _yvprold = []ytab{ + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, Yzm, Yknot0, Yzr}}, +} + +var _yvpscatterdd = []ytab{ + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxvmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, YyvmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{Yzr, Yknot0, Yzvm}}, +} + +var _yvpscatterdq = []ytab{ + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxvmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, YxvmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{Yzr, Yknot0, YyvmEvex}}, +} + +var _yvpscatterqd = []ytab{ + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YxvmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YxrEvex, Yknot0, YyvmEvex}}, + {zcase: Zevex_r_k_rm, zoffset: 3, args: argList{YyrEvex, Yknot0, Yzvm}}, +} + +var _yvpshufbitqmb = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, Yk}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, Yk}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, Yk}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, Yk}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yk}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yk}}, +} + +var _yvpshufd = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yxm, Yxr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yxm, Yxr}}, + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yym, Yyr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yym, Yyr}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yknot0, Yzr}}, +} + +var _yvpslld = []ytab{ + {zcase: Zvex_i_rm_vo, zoffset: 0, args: argList{Yu8, Yxr, Yxr}}, + {zcase: Zvex_i_rm_vo, zoffset: 3, args: argList{Yi8, Yxr, Yxr}}, + {zcase: Zvex_i_rm_vo, zoffset: 0, args: argList{Yu8, Yyr, Yyr}}, + {zcase: Zvex_i_rm_vo, zoffset: 3, args: argList{Yi8, Yyr, Yyr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yyr, Yyr}}, + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, Yzm, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, Yzr, Yknot0, Yzr}}, +} + +var _yvpslldq = []ytab{ + {zcase: Zvex_i_rm_vo, zoffset: 0, args: argList{Yu8, Yxr, Yxr}}, + {zcase: Zvex_i_rm_vo, zoffset: 3, args: argList{Yi8, Yxr, Yxr}}, + {zcase: Zvex_i_rm_vo, zoffset: 0, args: argList{Yu8, Yyr, Yyr}}, + {zcase: Zvex_i_rm_vo, zoffset: 3, args: argList{Yi8, Yyr, Yyr}}, + {zcase: Zevex_i_rm_vo, zoffset: 4, args: argList{Yu8, YxmEvex, YxrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 4, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 4, args: argList{Yu8, Yzm, Yzr}}, +} + +var _yvpsraq = []ytab{ + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, YxmEvex, YxrEvex}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, YxmEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, YymEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_vo, zoffset: 0, args: argList{Yu8, Yzm, Yzr}}, + {zcase: Zevex_i_rm_k_vo, zoffset: 4, args: argList{Yu8, Yzm, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, Yzr, Yknot0, Yzr}}, +} + +var _yvptest = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr}}, + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yym, Yyr}}, +} + +var _yvrcpss = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Yxm, Yxr, Yxr}}, +} + +var _yvroundpd = []ytab{ + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yxm, Yxr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yxm, Yxr}}, + {zcase: Zvex_i_rm_r, zoffset: 0, args: argList{Yu8, Yym, Yyr}}, + {zcase: Zvex_i_rm_r, zoffset: 2, args: argList{Yi8, Yym, Yyr}}, +} + +var _yvscalefpd = []ytab{ + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{Yzm, Yzr, Yzr}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{Yzm, Yzr, Yknot0, Yzr}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YxmEvex, YxrEvex, YxrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YxmEvex, YxrEvex, Yknot0, YxrEvex}}, + {zcase: Zevex_rm_v_r, zoffset: 0, args: argList{YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_rm_v_k_r, zoffset: 3, args: argList{YymEvex, YyrEvex, Yknot0, YyrEvex}}, +} + +var _yvshuff32x4 = []ytab{ + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, YymEvex, YyrEvex, YyrEvex}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, YymEvex, YyrEvex, Yknot0, YyrEvex}}, + {zcase: Zevex_i_rm_v_r, zoffset: 0, args: argList{Yu8, Yzm, Yzr, Yzr}}, + {zcase: Zevex_i_rm_v_k_r, zoffset: 3, args: argList{Yu8, Yzm, Yzr, Yknot0, Yzr}}, +} + +var _yvzeroall = []ytab{ + {zcase: Zvex, zoffset: 2, args: argList{}}, +} + +var avxOptab = [...]Optab{ + {as: AANDNL, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0xF2, + }}, + {as: AANDNQ, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW1, 0xF2, + }}, + {as: ABEXTRL, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0xF7, + }}, + {as: ABEXTRQ, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW1, 0xF7, + }}, + {as: ABLSIL, ytab: _yblsil, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0xF3, 03, + }}, + {as: ABLSIQ, ytab: _yblsil, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW1, 0xF3, 03, + }}, + {as: ABLSMSKL, ytab: _yblsil, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0xF3, 02, + }}, + {as: ABLSMSKQ, ytab: _yblsil, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW1, 0xF3, 02, + }}, + {as: ABLSRL, ytab: _yblsil, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0xF3, 01, + }}, + {as: ABLSRQ, ytab: _yblsil, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW1, 0xF3, 01, + }}, + {as: ABZHIL, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0xF5, + }}, + {as: ABZHIQ, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW1, 0xF5, + }}, + {as: AKADDB, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x4A, + }}, + {as: AKADDD, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW1, 0x4A, + }}, + {as: AKADDQ, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW1, 0x4A, + }}, + {as: AKADDW, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x4A, + }}, + {as: AKANDB, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x41, + }}, + {as: AKANDD, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW1, 0x41, + }}, + {as: AKANDNB, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x42, + }}, + {as: AKANDND, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW1, 0x42, + }}, + {as: AKANDNQ, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW1, 0x42, + }}, + {as: AKANDNW, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x42, + }}, + {as: AKANDQ, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW1, 0x41, + }}, + {as: AKANDW, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x41, + }}, + {as: AKMOVB, ytab: _ykmovb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x91, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x93, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x90, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x92, + }}, + {as: AKMOVD, ytab: _ykmovb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW1, 0x91, + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x93, + avxEscape | vex128 | vex66 | vex0F | vexW1, 0x90, + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x92, + }}, + {as: AKMOVQ, ytab: _ykmovb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW1, 0x91, + avxEscape | vex128 | vexF2 | vex0F | vexW1, 0x93, + avxEscape | vex128 | vex0F | vexW1, 0x90, + avxEscape | vex128 | vexF2 | vex0F | vexW1, 0x92, + }}, + {as: AKMOVW, ytab: _ykmovb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x91, + avxEscape | vex128 | vex0F | vexW0, 0x93, + avxEscape | vex128 | vex0F | vexW0, 0x90, + avxEscape | vex128 | vex0F | vexW0, 0x92, + }}, + {as: AKNOTB, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x44, + }}, + {as: AKNOTD, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW1, 0x44, + }}, + {as: AKNOTQ, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW1, 0x44, + }}, + {as: AKNOTW, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x44, + }}, + {as: AKORB, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x45, + }}, + {as: AKORD, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW1, 0x45, + }}, + {as: AKORQ, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW1, 0x45, + }}, + {as: AKORTESTB, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x98, + }}, + {as: AKORTESTD, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW1, 0x98, + }}, + {as: AKORTESTQ, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW1, 0x98, + }}, + {as: AKORTESTW, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x98, + }}, + {as: AKORW, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x45, + }}, + {as: AKSHIFTLB, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x32, + }}, + {as: AKSHIFTLD, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x33, + }}, + {as: AKSHIFTLQ, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0x33, + }}, + {as: AKSHIFTLW, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0x32, + }}, + {as: AKSHIFTRB, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x30, + }}, + {as: AKSHIFTRD, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x31, + }}, + {as: AKSHIFTRQ, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0x31, + }}, + {as: AKSHIFTRW, ytab: _ykshiftlb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0x30, + }}, + {as: AKTESTB, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x99, + }}, + {as: AKTESTD, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW1, 0x99, + }}, + {as: AKTESTQ, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW1, 0x99, + }}, + {as: AKTESTW, ytab: _yknotb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x99, + }}, + {as: AKUNPCKBW, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x4B, + }}, + {as: AKUNPCKDQ, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW1, 0x4B, + }}, + {as: AKUNPCKWD, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x4B, + }}, + {as: AKXNORB, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x46, + }}, + {as: AKXNORD, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW1, 0x46, + }}, + {as: AKXNORQ, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW1, 0x46, + }}, + {as: AKXNORW, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x46, + }}, + {as: AKXORB, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x47, + }}, + {as: AKXORD, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW1, 0x47, + }}, + {as: AKXORQ, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW1, 0x47, + }}, + {as: AKXORW, ytab: _ykaddb, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x47, + }}, + {as: AMULXL, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW0, 0xF6, + }}, + {as: AMULXQ, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW1, 0xF6, + }}, + {as: APDEPL, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW0, 0xF5, + }}, + {as: APDEPQ, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW1, 0xF5, + }}, + {as: APEXTL, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F38 | vexW0, 0xF5, + }}, + {as: APEXTQ, ytab: _yandnl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F38 | vexW1, 0xF5, + }}, + {as: ARORXL, ytab: _yrorxl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F3A | vexW0, 0xF0, + }}, + {as: ARORXQ, ytab: _yrorxl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F3A | vexW1, 0xF0, + }}, + {as: ASARXL, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F38 | vexW0, 0xF7, + }}, + {as: ASARXQ, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F38 | vexW1, 0xF7, + }}, + {as: ASHLXL, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xF7, + }}, + {as: ASHLXQ, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xF7, + }}, + {as: ASHRXL, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW0, 0xF7, + }}, + {as: ASHRXQ, ytab: _ybextrl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW1, 0xF7, + }}, + {as: AV4FMADDPS, ytab: _yv4fmaddps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF2 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x9A, + }}, + {as: AV4FMADDSS, ytab: _yv4fmaddss, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x9B, + }}, + {as: AV4FNMADDPS, ytab: _yv4fmaddps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF2 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0xAA, + }}, + {as: AV4FNMADDSS, ytab: _yv4fmaddss, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0xAB, + }}, + {as: AVADDPD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x58, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x58, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x58, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x58, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x58, + }}, + {as: AVADDPS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x58, + avxEscape | vex256 | vex0F | vexW0, 0x58, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x58, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x58, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x58, + }}, + {as: AVADDSD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x58, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x58, + }}, + {as: AVADDSS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x58, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x58, + }}, + {as: AVADDSUBPD, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD0, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD0, + }}, + {as: AVADDSUBPS, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0xD0, + avxEscape | vex256 | vexF2 | vex0F | vexW0, 0xD0, + }}, + {as: AVAESDEC, ytab: _yvaesdec, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xDE, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xDE, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16, 0xDE, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32, 0xDE, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64, 0xDE, + }}, + {as: AVAESDECLAST, ytab: _yvaesdec, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xDF, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xDF, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16, 0xDF, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32, 0xDF, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64, 0xDF, + }}, + {as: AVAESENC, ytab: _yvaesdec, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xDC, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xDC, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16, 0xDC, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32, 0xDC, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64, 0xDC, + }}, + {as: AVAESENCLAST, ytab: _yvaesdec, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xDD, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xDD, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16, 0xDD, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32, 0xDD, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64, 0xDD, + }}, + {as: AVAESIMC, ytab: _yvaesimc, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xDB, + }}, + {as: AVAESKEYGENASSIST, ytab: _yvaeskeygenassist, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0xDF, + }}, + {as: AVALIGND, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x03, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x03, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x03, + }}, + {as: AVALIGNQ, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x03, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x03, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x03, + }}, + {as: AVANDNPD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x55, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x55, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x55, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x55, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x55, + }}, + {as: AVANDNPS, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x55, + avxEscape | vex256 | vex0F | vexW0, 0x55, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x55, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x55, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x55, + }}, + {as: AVANDPD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x54, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x54, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x54, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x54, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x54, + }}, + {as: AVANDPS, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x54, + avxEscape | vex256 | vex0F | vexW0, 0x54, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x54, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x54, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x54, + }}, + {as: AVBLENDMPD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x65, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x65, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x65, + }}, + {as: AVBLENDMPS, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x65, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x65, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x65, + }}, + {as: AVBLENDPD, ytab: _yvblendpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x0D, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x0D, + }}, + {as: AVBLENDPS, ytab: _yvblendpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x0C, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x0C, + }}, + {as: AVBLENDVPD, ytab: _yvblendvpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x4B, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x4B, + }}, + {as: AVBLENDVPS, ytab: _yvblendvpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x4A, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x4A, + }}, + {as: AVBROADCASTF128, ytab: _yvbroadcastf128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x1A, + }}, + {as: AVBROADCASTF32X2, ytab: _yvbroadcastf32x2, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x19, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x19, + }}, + {as: AVBROADCASTF32X4, ytab: _yvbroadcastf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x1A, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x1A, + }}, + {as: AVBROADCASTF32X8, ytab: _yvbroadcastf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x1B, + }}, + {as: AVBROADCASTF64X2, ytab: _yvbroadcastf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x1A, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x1A, + }}, + {as: AVBROADCASTF64X4, ytab: _yvbroadcastf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x1B, + }}, + {as: AVBROADCASTI128, ytab: _yvbroadcastf128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x5A, + }}, + {as: AVBROADCASTI32X2, ytab: _yvbroadcasti32x2, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x59, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x59, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x59, + }}, + {as: AVBROADCASTI32X4, ytab: _yvbroadcastf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x5A, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x5A, + }}, + {as: AVBROADCASTI32X8, ytab: _yvbroadcastf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x5B, + }}, + {as: AVBROADCASTI64X2, ytab: _yvbroadcastf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x5A, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x5A, + }}, + {as: AVBROADCASTI64X4, ytab: _yvbroadcastf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x5B, + }}, + {as: AVBROADCASTSD, ytab: _yvbroadcastsd, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x19, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x19, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x19, + }}, + {as: AVBROADCASTSS, ytab: _yvbroadcastss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x18, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x18, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x18, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x18, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x18, + }}, + {as: AVCMPPD, ytab: _yvcmppd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xC2, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xC2, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled, 0xC2, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8, 0xC2, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8, 0xC2, + }}, + {as: AVCMPPS, ytab: _yvcmppd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0xC2, + avxEscape | vex256 | vex0F | vexW0, 0xC2, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled, 0xC2, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4, 0xC2, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4, 0xC2, + }}, + {as: AVCMPSD, ytab: _yvcmpsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0xC2, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexSaeEnabled, 0xC2, + }}, + {as: AVCMPSS, ytab: _yvcmpsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0xC2, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexSaeEnabled, 0xC2, + }}, + {as: AVCOMISD, ytab: _yvcomisd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x2F, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8 | evexSaeEnabled, 0x2F, + }}, + {as: AVCOMISS, ytab: _yvcomisd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x2F, + avxEscape | evex128 | evex0F | evexW0, evexN4 | evexSaeEnabled, 0x2F, + }}, + {as: AVCOMPRESSPD, ytab: _yvcompresspd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x8A, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x8A, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x8A, + }}, + {as: AVCOMPRESSPS, ytab: _yvcompresspd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x8A, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x8A, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x8A, + }}, + {as: AVCVTDQ2PD, ytab: _yvcvtdq2pd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0xE6, + avxEscape | vex256 | vexF3 | vex0F | vexW0, 0xE6, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN8 | evexBcstN4 | evexZeroingEnabled, 0xE6, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xE6, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTDQ2PS, ytab: _yvcvtdq2ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x5B, + avxEscape | vex256 | vex0F | vexW0, 0x5B, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x5B, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5B, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x5B, + }}, + {as: AVCVTPD2DQ, ytab: _yvcvtpd2dq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF2 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTPD2DQX, ytab: _yvcvtpd2dqx, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0xE6, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTPD2DQY, ytab: _yvcvtpd2dqy, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vexF2 | vex0F | vexW0, 0xE6, + avxEscape | evex256 | evexF2 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTPD2PS, ytab: _yvcvtpd2dq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x5A, + }}, + {as: AVCVTPD2PSX, ytab: _yvcvtpd2dqx, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x5A, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x5A, + }}, + {as: AVCVTPD2PSY, ytab: _yvcvtpd2dqy, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x5A, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x5A, + }}, + {as: AVCVTPD2QQ, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x7B, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x7B, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x7B, + }}, + {as: AVCVTPD2UDQ, ytab: _yvcvtpd2dq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x79, + }}, + {as: AVCVTPD2UDQX, ytab: _yvcvtpd2udqx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x79, + }}, + {as: AVCVTPD2UDQY, ytab: _yvcvtpd2udqy, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x79, + }}, + {as: AVCVTPD2UQQ, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x79, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x79, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x79, + }}, + {as: AVCVTPH2PS, ytab: _yvcvtph2ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x13, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x13, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexSaeEnabled | evexZeroingEnabled, 0x13, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x13, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x13, + }}, + {as: AVCVTPS2DQ, ytab: _yvcvtdq2ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x5B, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x5B, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x5B, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5B, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x5B, + }}, + {as: AVCVTPS2PD, ytab: _yvcvtph2ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x5A, + avxEscape | vex256 | vex0F | vexW0, 0x5A, + avxEscape | evex512 | evex0F | evexW0, evexN32 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x5A, + avxEscape | evex128 | evex0F | evexW0, evexN8 | evexBcstN4 | evexZeroingEnabled, 0x5A, + avxEscape | evex256 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5A, + }}, + {as: AVCVTPS2PH, ytab: _yvcvtps2ph, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x1D, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x1D, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN32 | evexSaeEnabled | evexZeroingEnabled, 0x1D, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN8 | evexZeroingEnabled, 0x1D, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x1D, + }}, + {as: AVCVTPS2QQ, ytab: _yvcvtps2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x7B, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN8 | evexBcstN4 | evexZeroingEnabled, 0x7B, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x7B, + }}, + {as: AVCVTPS2UDQ, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x79, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x79, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x79, + }}, + {as: AVCVTPS2UQQ, ytab: _yvcvtps2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x79, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN8 | evexBcstN4 | evexZeroingEnabled, 0x79, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x79, + }}, + {as: AVCVTQQ2PD, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF3 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xE6, + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xE6, + avxEscape | evex256 | evexF3 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTQQ2PS, ytab: _yvcvtpd2dq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x5B, + }}, + {as: AVCVTQQ2PSX, ytab: _yvcvtpd2udqx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x5B, + }}, + {as: AVCVTQQ2PSY, ytab: _yvcvtpd2udqy, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x5B, + }}, + {as: AVCVTSD2SI, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x2D, + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN8 | evexRoundingEnabled, 0x2D, + }}, + {as: AVCVTSD2SIQ, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW1, 0x2D, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled, 0x2D, + }}, + {as: AVCVTSD2SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x5A, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x5A, + }}, + {as: AVCVTSD2USIL, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN8 | evexRoundingEnabled, 0x79, + }}, + {as: AVCVTSD2USIQ, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled, 0x79, + }}, + {as: AVCVTSI2SDL, ytab: _yvcvtsi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x2A, + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN4, 0x2A, + }}, + {as: AVCVTSI2SDQ, ytab: _yvcvtsi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW1, 0x2A, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled, 0x2A, + }}, + {as: AVCVTSI2SSL, ytab: _yvcvtsi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x2A, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled, 0x2A, + }}, + {as: AVCVTSI2SSQ, ytab: _yvcvtsi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW1, 0x2A, + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN8 | evexRoundingEnabled, 0x2A, + }}, + {as: AVCVTSS2SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x5A, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x5A, + }}, + {as: AVCVTSS2SI, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x2D, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled, 0x2D, + }}, + {as: AVCVTSS2SIQ, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW1, 0x2D, + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN4 | evexRoundingEnabled, 0x2D, + }}, + {as: AVCVTSS2USIL, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled, 0x79, + }}, + {as: AVCVTSS2USIQ, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN4 | evexRoundingEnabled, 0x79, + }}, + {as: AVCVTTPD2DQ, ytab: _yvcvtpd2dq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTTPD2DQX, ytab: _yvcvtpd2dqx, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE6, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTTPD2DQY, ytab: _yvcvtpd2dqy, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE6, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xE6, + }}, + {as: AVCVTTPD2QQ, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x7A, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x7A, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTTPD2UDQ, ytab: _yvcvtpd2dq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex0F | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x78, + }}, + {as: AVCVTTPD2UDQX, ytab: _yvcvtpd2udqx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x78, + }}, + {as: AVCVTTPD2UDQY, ytab: _yvcvtpd2udqy, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x78, + }}, + {as: AVCVTTPD2UQQ, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x78, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x78, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x78, + }}, + {as: AVCVTTPS2DQ, ytab: _yvcvtdq2ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x5B, + avxEscape | vex256 | vexF3 | vex0F | vexW0, 0x5B, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x5B, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5B, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x5B, + }}, + {as: AVCVTTPS2QQ, ytab: _yvcvtps2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x7A, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN8 | evexBcstN4 | evexZeroingEnabled, 0x7A, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTTPS2UDQ, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x78, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x78, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x78, + }}, + {as: AVCVTTPS2UQQ, ytab: _yvcvtps2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x78, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN8 | evexBcstN4 | evexZeroingEnabled, 0x78, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x78, + }}, + {as: AVCVTTSD2SI, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x2C, + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN8 | evexSaeEnabled, 0x2C, + }}, + {as: AVCVTTSD2SIQ, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW1, 0x2C, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexSaeEnabled, 0x2C, + }}, + {as: AVCVTTSD2USIL, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN8 | evexSaeEnabled, 0x78, + }}, + {as: AVCVTTSD2USIQ, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexSaeEnabled, 0x78, + }}, + {as: AVCVTTSS2SI, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x2C, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexSaeEnabled, 0x2C, + }}, + {as: AVCVTTSS2SIQ, ytab: _yvcvtsd2si, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW1, 0x2C, + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN4 | evexSaeEnabled, 0x2C, + }}, + {as: AVCVTTSS2USIL, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexSaeEnabled, 0x78, + }}, + {as: AVCVTTSS2USIQ, ytab: _yvcvtsd2usil, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN4 | evexSaeEnabled, 0x78, + }}, + {as: AVCVTUDQ2PD, ytab: _yvcvtudq2pd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN8 | evexBcstN4 | evexZeroingEnabled, 0x7A, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x7A, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTUDQ2PS, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF2 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x7A, + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x7A, + avxEscape | evex256 | evexF2 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTUQQ2PD, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF3 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x7A, + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x7A, + avxEscape | evex256 | evexF3 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTUQQ2PS, ytab: _yvcvtpd2dq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF2 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTUQQ2PSX, ytab: _yvcvtpd2udqx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTUQQ2PSY, ytab: _yvcvtpd2udqy, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evexF2 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x7A, + }}, + {as: AVCVTUSI2SDL, ytab: _yvcvtusi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN4, 0x7B, + }}, + {as: AVCVTUSI2SDQ, ytab: _yvcvtusi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled, 0x7B, + }}, + {as: AVCVTUSI2SSL, ytab: _yvcvtusi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled, 0x7B, + }}, + {as: AVCVTUSI2SSQ, ytab: _yvcvtusi2sdl, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN8 | evexRoundingEnabled, 0x7B, + }}, + {as: AVDBPSADBW, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x42, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexZeroingEnabled, 0x42, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexZeroingEnabled, 0x42, + }}, + {as: AVDIVPD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x5E, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x5E, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x5E, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x5E, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x5E, + }}, + {as: AVDIVPS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x5E, + avxEscape | vex256 | vex0F | vexW0, 0x5E, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x5E, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5E, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x5E, + }}, + {as: AVDIVSD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x5E, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x5E, + }}, + {as: AVDIVSS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x5E, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x5E, + }}, + {as: AVDPPD, ytab: _yvdppd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x41, + }}, + {as: AVDPPS, ytab: _yvblendpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x40, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x40, + }}, + {as: AVEXP2PD, ytab: _yvexp2pd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0xC8, + }}, + {as: AVEXP2PS, ytab: _yvexp2pd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0xC8, + }}, + {as: AVEXPANDPD, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x88, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x88, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x88, + }}, + {as: AVEXPANDPS, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x88, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x88, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x88, + }}, + {as: AVEXTRACTF128, ytab: _yvextractf128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x19, + }}, + {as: AVEXTRACTF32X4, ytab: _yvextractf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x19, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x19, + }}, + {as: AVEXTRACTF32X8, ytab: _yvextractf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN32 | evexZeroingEnabled, 0x1B, + }}, + {as: AVEXTRACTF64X2, ytab: _yvextractf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x19, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x19, + }}, + {as: AVEXTRACTF64X4, ytab: _yvextractf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN32 | evexZeroingEnabled, 0x1B, + }}, + {as: AVEXTRACTI128, ytab: _yvextractf128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x39, + }}, + {as: AVEXTRACTI32X4, ytab: _yvextractf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x39, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x39, + }}, + {as: AVEXTRACTI32X8, ytab: _yvextractf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN32 | evexZeroingEnabled, 0x3B, + }}, + {as: AVEXTRACTI64X2, ytab: _yvextractf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x39, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x39, + }}, + {as: AVEXTRACTI64X4, ytab: _yvextractf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN32 | evexZeroingEnabled, 0x3B, + }}, + {as: AVEXTRACTPS, ytab: _yvextractps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x17, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4, 0x17, + }}, + {as: AVFIXUPIMMPD, ytab: _yvfixupimmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x54, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x54, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x54, + }}, + {as: AVFIXUPIMMPS, ytab: _yvfixupimmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x54, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x54, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x54, + }}, + {as: AVFIXUPIMMSD, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x55, + }}, + {as: AVFIXUPIMMSS, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x55, + }}, + {as: AVFMADD132PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x98, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x98, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x98, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x98, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x98, + }}, + {as: AVFMADD132PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x98, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x98, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x98, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x98, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x98, + }}, + {as: AVFMADD132SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x99, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x99, + }}, + {as: AVFMADD132SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x99, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x99, + }}, + {as: AVFMADD213PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xA8, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xA8, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xA8, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xA8, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xA8, + }}, + {as: AVFMADD213PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xA8, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xA8, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xA8, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xA8, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xA8, + }}, + {as: AVFMADD213SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xA9, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xA9, + }}, + {as: AVFMADD213SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xA9, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xA9, + }}, + {as: AVFMADD231PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xB8, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xB8, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xB8, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xB8, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xB8, + }}, + {as: AVFMADD231PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xB8, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xB8, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xB8, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xB8, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xB8, + }}, + {as: AVFMADD231SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xB9, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xB9, + }}, + {as: AVFMADD231SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xB9, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xB9, + }}, + {as: AVFMADDSUB132PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x96, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x96, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x96, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x96, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x96, + }}, + {as: AVFMADDSUB132PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x96, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x96, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x96, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x96, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x96, + }}, + {as: AVFMADDSUB213PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xA6, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xA6, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xA6, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xA6, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xA6, + }}, + {as: AVFMADDSUB213PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xA6, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xA6, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xA6, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xA6, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xA6, + }}, + {as: AVFMADDSUB231PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xB6, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xB6, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xB6, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xB6, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xB6, + }}, + {as: AVFMADDSUB231PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xB6, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xB6, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xB6, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xB6, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xB6, + }}, + {as: AVFMSUB132PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x9A, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x9A, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x9A, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x9A, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x9A, + }}, + {as: AVFMSUB132PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x9A, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x9A, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x9A, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x9A, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x9A, + }}, + {as: AVFMSUB132SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x9B, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x9B, + }}, + {as: AVFMSUB132SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x9B, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x9B, + }}, + {as: AVFMSUB213PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xAA, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xAA, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xAA, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xAA, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xAA, + }}, + {as: AVFMSUB213PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xAA, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xAA, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xAA, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xAA, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xAA, + }}, + {as: AVFMSUB213SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xAB, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xAB, + }}, + {as: AVFMSUB213SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xAB, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xAB, + }}, + {as: AVFMSUB231PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xBA, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xBA, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xBA, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xBA, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xBA, + }}, + {as: AVFMSUB231PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xBA, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xBA, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xBA, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xBA, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xBA, + }}, + {as: AVFMSUB231SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xBB, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xBB, + }}, + {as: AVFMSUB231SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xBB, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xBB, + }}, + {as: AVFMSUBADD132PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x97, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x97, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x97, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x97, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x97, + }}, + {as: AVFMSUBADD132PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x97, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x97, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x97, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x97, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x97, + }}, + {as: AVFMSUBADD213PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xA7, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xA7, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xA7, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xA7, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xA7, + }}, + {as: AVFMSUBADD213PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xA7, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xA7, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xA7, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xA7, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xA7, + }}, + {as: AVFMSUBADD231PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xB7, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xB7, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xB7, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xB7, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xB7, + }}, + {as: AVFMSUBADD231PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xB7, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xB7, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xB7, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xB7, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xB7, + }}, + {as: AVFNMADD132PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x9C, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x9C, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x9C, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x9C, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x9C, + }}, + {as: AVFNMADD132PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x9C, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x9C, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x9C, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x9C, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x9C, + }}, + {as: AVFNMADD132SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x9D, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x9D, + }}, + {as: AVFNMADD132SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x9D, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x9D, + }}, + {as: AVFNMADD213PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xAC, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xAC, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xAC, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xAC, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xAC, + }}, + {as: AVFNMADD213PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xAC, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xAC, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xAC, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xAC, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xAC, + }}, + {as: AVFNMADD213SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xAD, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xAD, + }}, + {as: AVFNMADD213SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xAD, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xAD, + }}, + {as: AVFNMADD231PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xBC, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xBC, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xBC, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xBC, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xBC, + }}, + {as: AVFNMADD231PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xBC, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xBC, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xBC, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xBC, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xBC, + }}, + {as: AVFNMADD231SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xBD, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xBD, + }}, + {as: AVFNMADD231SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xBD, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xBD, + }}, + {as: AVFNMSUB132PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x9E, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x9E, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x9E, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x9E, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x9E, + }}, + {as: AVFNMSUB132PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x9E, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x9E, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x9E, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x9E, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x9E, + }}, + {as: AVFNMSUB132SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x9F, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x9F, + }}, + {as: AVFNMSUB132SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x9F, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x9F, + }}, + {as: AVFNMSUB213PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xAE, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xAE, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xAE, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xAE, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xAE, + }}, + {as: AVFNMSUB213PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xAE, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xAE, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xAE, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xAE, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xAE, + }}, + {as: AVFNMSUB213SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xAF, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xAF, + }}, + {as: AVFNMSUB213SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xAF, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xAF, + }}, + {as: AVFNMSUB231PD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xBE, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0xBE, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0xBE, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xBE, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xBE, + }}, + {as: AVFNMSUB231PS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xBE, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xBE, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0xBE, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xBE, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xBE, + }}, + {as: AVFNMSUB231SD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0xBF, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0xBF, + }}, + {as: AVFNMSUB231SS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xBF, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0xBF, + }}, + {as: AVFPCLASSPDX, ytab: _yvfpclasspdx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8, 0x66, + }}, + {as: AVFPCLASSPDY, ytab: _yvfpclasspdy, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8, 0x66, + }}, + {as: AVFPCLASSPDZ, ytab: _yvfpclasspdz, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8, 0x66, + }}, + {as: AVFPCLASSPSX, ytab: _yvfpclasspdx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4, 0x66, + }}, + {as: AVFPCLASSPSY, ytab: _yvfpclasspdy, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4, 0x66, + }}, + {as: AVFPCLASSPSZ, ytab: _yvfpclasspdz, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4, 0x66, + }}, + {as: AVFPCLASSSD, ytab: _yvfpclasspdx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8, 0x67, + }}, + {as: AVFPCLASSSS, ytab: _yvfpclasspdx, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4, 0x67, + }}, + {as: AVGATHERDPD, ytab: _yvgatherdpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x92, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x92, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0x92, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0x92, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0x92, + }}, + {as: AVGATHERDPS, ytab: _yvgatherdps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x92, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x92, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0x92, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0x92, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0x92, + }}, + {as: AVGATHERPF0DPD, ytab: _yvgatherpf0dpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC6, 01, + }}, + {as: AVGATHERPF0DPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC6, 01, + }}, + {as: AVGATHERPF0QPD, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC7, 01, + }}, + {as: AVGATHERPF0QPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC7, 01, + }}, + {as: AVGATHERPF1DPD, ytab: _yvgatherpf0dpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC6, 02, + }}, + {as: AVGATHERPF1DPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC6, 02, + }}, + {as: AVGATHERPF1QPD, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC7, 02, + }}, + {as: AVGATHERPF1QPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC7, 02, + }}, + {as: AVGATHERQPD, ytab: _yvgatherdps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x93, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x93, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0x93, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0x93, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0x93, + }}, + {as: AVGATHERQPS, ytab: _yvgatherqps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x93, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x93, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0x93, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0x93, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0x93, + }}, + {as: AVGETEXPPD, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x42, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x42, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x42, + }}, + {as: AVGETEXPPS, ytab: _yvcvtpd2qq, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x42, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x42, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x42, + }}, + {as: AVGETEXPSD, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x43, + }}, + {as: AVGETEXPSS, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x43, + }}, + {as: AVGETMANTPD, ytab: _yvgetmantpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x26, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x26, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x26, + }}, + {as: AVGETMANTPS, ytab: _yvgetmantpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x26, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x26, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x26, + }}, + {as: AVGETMANTSD, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x27, + }}, + {as: AVGETMANTSS, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x27, + }}, + {as: AVGF2P8AFFINEINVQB, ytab: _yvgf2p8affineinvqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0xCF, + avxEscape | vex256 | vex66 | vex0F3A | vexW1, 0xCF, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xCF, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xCF, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xCF, + }}, + {as: AVGF2P8AFFINEQB, ytab: _yvgf2p8affineinvqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0xCE, + avxEscape | vex256 | vex66 | vex0F3A | vexW1, 0xCE, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xCE, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xCE, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xCE, + }}, + {as: AVGF2P8MULB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0xCF, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0xCF, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0xCF, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0xCF, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0xCF, + }}, + {as: AVHADDPD, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x7C, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x7C, + }}, + {as: AVHADDPS, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x7C, + avxEscape | vex256 | vexF2 | vex0F | vexW0, 0x7C, + }}, + {as: AVHSUBPD, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x7D, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x7D, + }}, + {as: AVHSUBPS, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x7D, + avxEscape | vex256 | vexF2 | vex0F | vexW0, 0x7D, + }}, + {as: AVINSERTF128, ytab: _yvinsertf128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x18, + }}, + {as: AVINSERTF32X4, ytab: _yvinsertf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x18, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x18, + }}, + {as: AVINSERTF32X8, ytab: _yvinsertf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN32 | evexZeroingEnabled, 0x1A, + }}, + {as: AVINSERTF64X2, ytab: _yvinsertf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x18, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x18, + }}, + {as: AVINSERTF64X4, ytab: _yvinsertf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN32 | evexZeroingEnabled, 0x1A, + }}, + {as: AVINSERTI128, ytab: _yvinsertf128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x38, + }}, + {as: AVINSERTI32X4, ytab: _yvinsertf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x38, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x38, + }}, + {as: AVINSERTI32X8, ytab: _yvinsertf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN32 | evexZeroingEnabled, 0x3A, + }}, + {as: AVINSERTI64X2, ytab: _yvinsertf32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x38, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x38, + }}, + {as: AVINSERTI64X4, ytab: _yvinsertf32x8, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN32 | evexZeroingEnabled, 0x3A, + }}, + {as: AVINSERTPS, ytab: _yvinsertps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x21, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4, 0x21, + }}, + {as: AVLDDQU, ytab: _yvlddqu, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0xF0, + avxEscape | vex256 | vexF2 | vex0F | vexW0, 0xF0, + }}, + {as: AVLDMXCSR, ytab: _yvldmxcsr, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0xAE, 02, + }}, + {as: AVMASKMOVDQU, ytab: _yvmaskmovdqu, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF7, + }}, + {as: AVMASKMOVPD, ytab: _yvmaskmovpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x2F, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x2F, + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x2D, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x2D, + }}, + {as: AVMASKMOVPS, ytab: _yvmaskmovpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x2E, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x2E, + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x2C, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x2C, + }}, + {as: AVMAXPD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x5F, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x5F, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x5F, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x5F, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x5F, + }}, + {as: AVMAXPS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x5F, + avxEscape | vex256 | vex0F | vexW0, 0x5F, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x5F, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5F, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x5F, + }}, + {as: AVMAXSD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x5F, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x5F, + }}, + {as: AVMAXSS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x5F, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x5F, + }}, + {as: AVMINPD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x5D, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x5D, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x5D, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x5D, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x5D, + }}, + {as: AVMINPS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x5D, + avxEscape | vex256 | vex0F | vexW0, 0x5D, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x5D, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5D, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x5D, + }}, + {as: AVMINSD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x5D, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x5D, + }}, + {as: AVMINSS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x5D, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x5D, + }}, + {as: AVMOVAPD, ytab: _yvmovapd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x29, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x29, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x28, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x28, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x29, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x29, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x29, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x28, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x28, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x28, + }}, + {as: AVMOVAPS, ytab: _yvmovapd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x29, + avxEscape | vex256 | vex0F | vexW0, 0x29, + avxEscape | vex128 | vex0F | vexW0, 0x28, + avxEscape | vex256 | vex0F | vexW0, 0x28, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x29, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x29, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x29, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x28, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x28, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x28, + }}, + {as: AVMOVD, ytab: _yvmovd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x7E, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x6E, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN4, 0x7E, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN4, 0x6E, + }}, + {as: AVMOVDDUP, ytab: _yvmovddup, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x12, + avxEscape | vex256 | vexF2 | vex0F | vexW0, 0x12, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexZeroingEnabled, 0x12, + avxEscape | evex256 | evexF2 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x12, + avxEscape | evex512 | evexF2 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x12, + }}, + {as: AVMOVDQA, ytab: _yvmovdqa, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x7F, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x7F, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x6F, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x6F, + }}, + {as: AVMOVDQA32, ytab: _yvmovdqa32, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x7F, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x6F, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x6F, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x6F, + }}, + {as: AVMOVDQA64, ytab: _yvmovdqa32, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x7F, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x6F, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x6F, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x6F, + }}, + {as: AVMOVDQU, ytab: _yvmovdqa, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x7F, + avxEscape | vex256 | vexF3 | vex0F | vexW0, 0x7F, + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x6F, + avxEscape | vex256 | vexF3 | vex0F | vexW0, 0x6F, + }}, + {as: AVMOVDQU16, ytab: _yvmovdqa32, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evexF2 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evexF2 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x7F, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x6F, + avxEscape | evex256 | evexF2 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x6F, + avxEscape | evex512 | evexF2 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x6F, + }}, + {as: AVMOVDQU32, ytab: _yvmovdqa32, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x7F, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x6F, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x6F, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x6F, + }}, + {as: AVMOVDQU64, ytab: _yvmovdqa32, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evexF3 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evexF3 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x7F, + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x6F, + avxEscape | evex256 | evexF3 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x6F, + avxEscape | evex512 | evexF3 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x6F, + }}, + {as: AVMOVDQU8, ytab: _yvmovdqa32, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evexF2 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evexF2 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x7F, + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x6F, + avxEscape | evex256 | evexF2 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x6F, + avxEscape | evex512 | evexF2 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x6F, + }}, + {as: AVMOVHLPS, ytab: _yvmovhlps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x12, + avxEscape | evex128 | evex0F | evexW0, 0, 0x12, + }}, + {as: AVMOVHPD, ytab: _yvmovhpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x17, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x16, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8, 0x17, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8, 0x16, + }}, + {as: AVMOVHPS, ytab: _yvmovhpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x17, + avxEscape | vex128 | vex0F | vexW0, 0x16, + avxEscape | evex128 | evex0F | evexW0, evexN8, 0x17, + avxEscape | evex128 | evex0F | evexW0, evexN8, 0x16, + }}, + {as: AVMOVLHPS, ytab: _yvmovhlps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x16, + avxEscape | evex128 | evex0F | evexW0, 0, 0x16, + }}, + {as: AVMOVLPD, ytab: _yvmovhpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x13, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x12, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8, 0x13, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8, 0x12, + }}, + {as: AVMOVLPS, ytab: _yvmovhpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x13, + avxEscape | vex128 | vex0F | vexW0, 0x12, + avxEscape | evex128 | evex0F | evexW0, evexN8, 0x13, + avxEscape | evex128 | evex0F | evexW0, evexN8, 0x12, + }}, + {as: AVMOVMSKPD, ytab: _yvmovmskpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x50, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x50, + }}, + {as: AVMOVMSKPS, ytab: _yvmovmskpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x50, + avxEscape | vex256 | vex0F | vexW0, 0x50, + }}, + {as: AVMOVNTDQ, ytab: _yvmovntdq, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE7, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE7, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0xE7, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0xE7, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0xE7, + }}, + {as: AVMOVNTDQA, ytab: _yvmovntdqa, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x2A, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x2A, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16, 0x2A, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32, 0x2A, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64, 0x2A, + }}, + {as: AVMOVNTPD, ytab: _yvmovntdq, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x2B, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x2B, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16, 0x2B, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32, 0x2B, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64, 0x2B, + }}, + {as: AVMOVNTPS, ytab: _yvmovntdq, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x2B, + avxEscape | vex256 | vex0F | vexW0, 0x2B, + avxEscape | evex128 | evex0F | evexW0, evexN16, 0x2B, + avxEscape | evex256 | evex0F | evexW0, evexN32, 0x2B, + avxEscape | evex512 | evex0F | evexW0, evexN64, 0x2B, + }}, + {as: AVMOVQ, ytab: _yvmovq, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW1, 0x7E, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD6, + avxEscape | vex128 | vex66 | vex0F | vexW1, 0x6E, + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x7E, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8, 0x7E, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8, 0xD6, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8, 0x6E, + avxEscape | evex128 | evexF3 | evex0F | evexW1, evexN8, 0x7E, + }}, + {as: AVMOVSD, ytab: _yvmovsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x11, + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x11, + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x10, + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x10, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexZeroingEnabled, 0x11, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8, 0x11, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexZeroingEnabled, 0x10, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexZeroingEnabled, 0x10, + }}, + {as: AVMOVSHDUP, ytab: _yvmovddup, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x16, + avxEscape | vex256 | vexF3 | vex0F | vexW0, 0x16, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x16, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x16, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x16, + }}, + {as: AVMOVSLDUP, ytab: _yvmovddup, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x12, + avxEscape | vex256 | vexF3 | vex0F | vexW0, 0x12, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x12, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x12, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x12, + }}, + {as: AVMOVSS, ytab: _yvmovsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x11, + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x11, + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x10, + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x10, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexZeroingEnabled, 0x11, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4, 0x11, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexZeroingEnabled, 0x10, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexZeroingEnabled, 0x10, + }}, + {as: AVMOVUPD, ytab: _yvmovapd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x11, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x11, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x10, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x10, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x11, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x11, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x11, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0x10, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexZeroingEnabled, 0x10, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexZeroingEnabled, 0x10, + }}, + {as: AVMOVUPS, ytab: _yvmovapd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x11, + avxEscape | vex256 | vex0F | vexW0, 0x11, + avxEscape | vex128 | vex0F | vexW0, 0x10, + avxEscape | vex256 | vex0F | vexW0, 0x10, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x11, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x11, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x11, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x10, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x10, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x10, + }}, + {as: AVMPSADBW, ytab: _yvblendpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x42, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x42, + }}, + {as: AVMULPD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x59, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x59, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x59, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x59, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x59, + }}, + {as: AVMULPS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x59, + avxEscape | vex256 | vex0F | vexW0, 0x59, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x59, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x59, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x59, + }}, + {as: AVMULSD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x59, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x59, + }}, + {as: AVMULSS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x59, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x59, + }}, + {as: AVORPD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x56, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x56, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x56, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x56, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x56, + }}, + {as: AVORPS, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x56, + avxEscape | vex256 | vex0F | vexW0, 0x56, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x56, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x56, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x56, + }}, + {as: AVP4DPWSSD, ytab: _yv4fmaddps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF2 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x52, + }}, + {as: AVP4DPWSSDS, ytab: _yv4fmaddps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evexF2 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x53, + }}, + {as: AVPABSB, ytab: _yvmovddup, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x1C, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x1C, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x1C, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x1C, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x1C, + }}, + {as: AVPABSD, ytab: _yvmovddup, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x1E, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x1E, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x1E, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x1E, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x1E, + }}, + {as: AVPABSQ, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x1F, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x1F, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x1F, + }}, + {as: AVPABSW, ytab: _yvmovddup, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x1D, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x1D, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x1D, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x1D, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x1D, + }}, + {as: AVPACKSSDW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x6B, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x6B, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x6B, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x6B, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x6B, + }}, + {as: AVPACKSSWB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x63, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x63, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x63, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x63, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x63, + }}, + {as: AVPACKUSDW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x2B, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x2B, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x2B, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x2B, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x2B, + }}, + {as: AVPACKUSWB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x67, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x67, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x67, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x67, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x67, + }}, + {as: AVPADDB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xFC, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xFC, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xFC, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xFC, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xFC, + }}, + {as: AVPADDD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xFE, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xFE, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xFE, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xFE, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xFE, + }}, + {as: AVPADDQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD4, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD4, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xD4, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xD4, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xD4, + }}, + {as: AVPADDSB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xEC, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xEC, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xEC, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xEC, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xEC, + }}, + {as: AVPADDSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xED, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xED, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xED, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xED, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xED, + }}, + {as: AVPADDUSB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xDC, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xDC, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xDC, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xDC, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xDC, + }}, + {as: AVPADDUSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xDD, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xDD, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xDD, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xDD, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xDD, + }}, + {as: AVPADDW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xFD, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xFD, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xFD, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xFD, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xFD, + }}, + {as: AVPALIGNR, ytab: _yvgf2p8affineinvqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x0F, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x0F, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexZeroingEnabled, 0x0F, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexZeroingEnabled, 0x0F, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexZeroingEnabled, 0x0F, + }}, + {as: AVPAND, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xDB, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xDB, + }}, + {as: AVPANDD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xDB, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xDB, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xDB, + }}, + {as: AVPANDN, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xDF, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xDF, + }}, + {as: AVPANDND, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xDF, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xDF, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xDF, + }}, + {as: AVPANDNQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xDF, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xDF, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xDF, + }}, + {as: AVPANDQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xDB, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xDB, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xDB, + }}, + {as: AVPAVGB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE0, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE0, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE0, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xE0, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xE0, + }}, + {as: AVPAVGW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE3, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE3, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE3, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xE3, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xE3, + }}, + {as: AVPBLENDD, ytab: _yvblendpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x02, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x02, + }}, + {as: AVPBLENDMB, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x66, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x66, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x66, + }}, + {as: AVPBLENDMD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x64, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x64, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x64, + }}, + {as: AVPBLENDMQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x64, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x64, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x64, + }}, + {as: AVPBLENDMW, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x66, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x66, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x66, + }}, + {as: AVPBLENDVB, ytab: _yvblendvpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x4C, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x4C, + }}, + {as: AVPBLENDW, ytab: _yvblendpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x0E, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x0E, + }}, + {as: AVPBROADCASTB, ytab: _yvpbroadcastb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x78, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x78, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7A, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7A, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7A, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x78, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x78, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x78, + }}, + {as: AVPBROADCASTD, ytab: _yvpbroadcastb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x58, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x58, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7C, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7C, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7C, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x58, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x58, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x58, + }}, + {as: AVPBROADCASTMB2Q, ytab: _yvpbroadcastmb2q, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW1, 0, 0x2A, + avxEscape | evex256 | evexF3 | evex0F38 | evexW1, 0, 0x2A, + avxEscape | evex512 | evexF3 | evex0F38 | evexW1, 0, 0x2A, + }}, + {as: AVPBROADCASTMW2D, ytab: _yvpbroadcastmb2q, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, 0, 0x3A, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, 0, 0x3A, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, 0, 0x3A, + }}, + {as: AVPBROADCASTQ, ytab: _yvpbroadcastb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x59, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x59, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexZeroingEnabled, 0x7C, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexZeroingEnabled, 0x7C, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexZeroingEnabled, 0x7C, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x59, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x59, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x59, + }}, + {as: AVPBROADCASTW, ytab: _yvpbroadcastb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x79, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x79, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7B, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7B, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexZeroingEnabled, 0x7B, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x79, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x79, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x79, + }}, + {as: AVPCLMULQDQ, ytab: _yvpclmulqdq, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x44, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x44, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16, 0x44, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32, 0x44, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64, 0x44, + }}, + {as: AVPCMPB, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16, 0x3F, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32, 0x3F, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64, 0x3F, + }}, + {as: AVPCMPD, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4, 0x1F, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4, 0x1F, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4, 0x1F, + }}, + {as: AVPCMPEQB, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x74, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x74, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0x74, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0x74, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0x74, + }}, + {as: AVPCMPEQD, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x76, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x76, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4, 0x76, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4, 0x76, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4, 0x76, + }}, + {as: AVPCMPEQQ, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x29, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x29, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8, 0x29, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8, 0x29, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8, 0x29, + }}, + {as: AVPCMPEQW, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x75, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x75, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0x75, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0x75, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0x75, + }}, + {as: AVPCMPESTRI, ytab: _yvaeskeygenassist, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexWIG, 0x61, + }}, + {as: AVPCMPESTRM, ytab: _yvaeskeygenassist, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexWIG, 0x60, + }}, + {as: AVPCMPGTB, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x64, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x64, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0x64, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0x64, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0x64, + }}, + {as: AVPCMPGTD, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x66, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x66, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4, 0x66, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4, 0x66, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4, 0x66, + }}, + {as: AVPCMPGTQ, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x37, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x37, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8, 0x37, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8, 0x37, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8, 0x37, + }}, + {as: AVPCMPGTW, ytab: _yvpcmpeqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x65, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x65, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0x65, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0x65, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0x65, + }}, + {as: AVPCMPISTRI, ytab: _yvaeskeygenassist, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexWIG, 0x63, + }}, + {as: AVPCMPISTRM, ytab: _yvaeskeygenassist, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x62, + }}, + {as: AVPCMPQ, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8, 0x1F, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8, 0x1F, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8, 0x1F, + }}, + {as: AVPCMPUB, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16, 0x3E, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32, 0x3E, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64, 0x3E, + }}, + {as: AVPCMPUD, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4, 0x1E, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4, 0x1E, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4, 0x1E, + }}, + {as: AVPCMPUQ, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8, 0x1E, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8, 0x1E, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8, 0x1E, + }}, + {as: AVPCMPUW, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16, 0x3E, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32, 0x3E, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64, 0x3E, + }}, + {as: AVPCMPW, ytab: _yvpcmpb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16, 0x3F, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32, 0x3F, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64, 0x3F, + }}, + {as: AVPCOMPRESSB, ytab: _yvcompresspd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x63, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x63, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x63, + }}, + {as: AVPCOMPRESSD, ytab: _yvcompresspd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x8B, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x8B, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x8B, + }}, + {as: AVPCOMPRESSQ, ytab: _yvcompresspd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x8B, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x8B, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x8B, + }}, + {as: AVPCOMPRESSW, ytab: _yvcompresspd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN2 | evexZeroingEnabled, 0x63, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN2 | evexZeroingEnabled, 0x63, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN2 | evexZeroingEnabled, 0x63, + }}, + {as: AVPCONFLICTD, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xC4, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xC4, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xC4, + }}, + {as: AVPCONFLICTQ, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xC4, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xC4, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xC4, + }}, + {as: AVPDPBUSD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x50, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x50, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x50, + }}, + {as: AVPDPBUSDS, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x51, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x51, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x51, + }}, + {as: AVPDPWSSD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x52, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x52, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x52, + }}, + {as: AVPDPWSSDS, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x53, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x53, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x53, + }}, + {as: AVPERM2F128, ytab: _yvperm2f128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x06, + }}, + {as: AVPERM2I128, ytab: _yvperm2f128, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x46, + }}, + {as: AVPERMB, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x8D, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x8D, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x8D, + }}, + {as: AVPERMD, ytab: _yvpermd, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x36, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x36, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x36, + }}, + {as: AVPERMI2B, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x75, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x75, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x75, + }}, + {as: AVPERMI2D, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x76, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x76, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x76, + }}, + {as: AVPERMI2PD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x77, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x77, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x77, + }}, + {as: AVPERMI2PS, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x77, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x77, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x77, + }}, + {as: AVPERMI2Q, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x76, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x76, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x76, + }}, + {as: AVPERMI2W, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x75, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x75, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x75, + }}, + {as: AVPERMILPD, ytab: _yvpermilpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x05, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x05, + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x0D, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x0D, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x05, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x05, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x05, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x0D, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x0D, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x0D, + }}, + {as: AVPERMILPS, ytab: _yvpermilpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x04, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x04, + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x0C, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x0C, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x04, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x04, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x04, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x0C, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x0C, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x0C, + }}, + {as: AVPERMPD, ytab: _yvpermq, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW1, 0x01, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x01, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x01, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x16, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x16, + }}, + {as: AVPERMPS, ytab: _yvpermd, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x16, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x16, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x16, + }}, + {as: AVPERMQ, ytab: _yvpermq, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex66 | vex0F3A | vexW1, 0x00, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x00, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x00, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x36, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x36, + }}, + {as: AVPERMT2B, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x7D, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x7D, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x7D, + }}, + {as: AVPERMT2D, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x7E, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x7E, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x7E, + }}, + {as: AVPERMT2PD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x7F, + }}, + {as: AVPERMT2PS, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x7F, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x7F, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x7F, + }}, + {as: AVPERMT2Q, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x7E, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x7E, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x7E, + }}, + {as: AVPERMT2W, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x7D, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x7D, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x7D, + }}, + {as: AVPERMW, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x8D, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x8D, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x8D, + }}, + {as: AVPEXPANDB, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x62, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x62, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN1 | evexZeroingEnabled, 0x62, + }}, + {as: AVPEXPANDD, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x89, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x89, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x89, + }}, + {as: AVPEXPANDQ, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x89, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x89, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x89, + }}, + {as: AVPEXPANDW, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN2 | evexZeroingEnabled, 0x62, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN2 | evexZeroingEnabled, 0x62, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN2 | evexZeroingEnabled, 0x62, + }}, + {as: AVPEXTRB, ytab: _yvextractps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x14, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN1, 0x14, + }}, + {as: AVPEXTRD, ytab: _yvextractps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x16, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4, 0x16, + }}, + {as: AVPEXTRQ, ytab: _yvextractps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0x16, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8, 0x16, + }}, + {as: AVPEXTRW, ytab: _yvpextrw, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x15, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xC5, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN2, 0x15, + avxEscape | evex128 | evex66 | evex0F | evexW0, 0, 0xC5, + }}, + {as: AVPGATHERDD, ytab: _yvgatherdps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x90, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x90, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0x90, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0x90, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0x90, + }}, + {as: AVPGATHERDQ, ytab: _yvgatherdpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x90, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x90, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0x90, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0x90, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0x90, + }}, + {as: AVPGATHERQD, ytab: _yvgatherqps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x91, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x91, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0x91, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0x91, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0x91, + }}, + {as: AVPGATHERQQ, ytab: _yvgatherdps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x91, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x91, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0x91, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0x91, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0x91, + }}, + {as: AVPHADDD, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x02, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x02, + }}, + {as: AVPHADDSW, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x03, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x03, + }}, + {as: AVPHADDW, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x01, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x01, + }}, + {as: AVPHMINPOSUW, ytab: _yvaesimc, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x41, + }}, + {as: AVPHSUBD, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x06, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x06, + }}, + {as: AVPHSUBSW, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x07, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x07, + }}, + {as: AVPHSUBW, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x05, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x05, + }}, + {as: AVPINSRB, ytab: _yvpinsrb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x20, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN1, 0x20, + }}, + {as: AVPINSRD, ytab: _yvpinsrb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x22, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4, 0x22, + }}, + {as: AVPINSRQ, ytab: _yvpinsrb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW1, 0x22, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8, 0x22, + }}, + {as: AVPINSRW, ytab: _yvpinsrb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xC4, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN2, 0xC4, + }}, + {as: AVPLZCNTD, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x44, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x44, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x44, + }}, + {as: AVPLZCNTQ, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x44, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x44, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x44, + }}, + {as: AVPMADD52HUQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xB5, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xB5, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xB5, + }}, + {as: AVPMADD52LUQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xB4, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xB4, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xB4, + }}, + {as: AVPMADDUBSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x04, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x04, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x04, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x04, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x04, + }}, + {as: AVPMADDWD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF5, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF5, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF5, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xF5, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xF5, + }}, + {as: AVPMASKMOVD, ytab: _yvmaskmovpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x8E, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x8E, + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x8C, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x8C, + }}, + {as: AVPMASKMOVQ, ytab: _yvmaskmovpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x8E, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x8E, + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x8C, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x8C, + }}, + {as: AVPMAXSB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x3C, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x3C, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x3C, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x3C, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x3C, + }}, + {as: AVPMAXSD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x3D, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x3D, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x3D, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x3D, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x3D, + }}, + {as: AVPMAXSQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x3D, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x3D, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x3D, + }}, + {as: AVPMAXSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xEE, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xEE, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xEE, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xEE, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xEE, + }}, + {as: AVPMAXUB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xDE, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xDE, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xDE, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xDE, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xDE, + }}, + {as: AVPMAXUD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x3F, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x3F, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x3F, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x3F, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x3F, + }}, + {as: AVPMAXUQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x3F, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x3F, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x3F, + }}, + {as: AVPMAXUW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x3E, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x3E, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x3E, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x3E, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x3E, + }}, + {as: AVPMINSB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x38, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x38, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x38, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x38, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x38, + }}, + {as: AVPMINSD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x39, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x39, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x39, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x39, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x39, + }}, + {as: AVPMINSQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x39, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x39, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x39, + }}, + {as: AVPMINSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xEA, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xEA, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xEA, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xEA, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xEA, + }}, + {as: AVPMINUB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xDA, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xDA, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xDA, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xDA, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xDA, + }}, + {as: AVPMINUD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x3B, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x3B, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x3B, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x3B, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x3B, + }}, + {as: AVPMINUQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x3B, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x3B, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x3B, + }}, + {as: AVPMINUW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x3A, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x3A, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x3A, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x3A, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x3A, + }}, + {as: AVPMOVB2M, ytab: _yvpmovb2m, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, 0, 0x29, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, 0, 0x29, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, 0, 0x29, + }}, + {as: AVPMOVD2M, ytab: _yvpmovb2m, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, 0, 0x39, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, 0, 0x39, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, 0, 0x39, + }}, + {as: AVPMOVDB, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x31, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x31, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x31, + }}, + {as: AVPMOVDW, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x33, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x33, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x33, + }}, + {as: AVPMOVM2B, ytab: _yvpbroadcastmb2q, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, 0, 0x28, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, 0, 0x28, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, 0, 0x28, + }}, + {as: AVPMOVM2D, ytab: _yvpbroadcastmb2q, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, 0, 0x38, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, 0, 0x38, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, 0, 0x38, + }}, + {as: AVPMOVM2Q, ytab: _yvpbroadcastmb2q, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW1, 0, 0x38, + avxEscape | evex256 | evexF3 | evex0F38 | evexW1, 0, 0x38, + avxEscape | evex512 | evexF3 | evex0F38 | evexW1, 0, 0x38, + }}, + {as: AVPMOVM2W, ytab: _yvpbroadcastmb2q, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW1, 0, 0x28, + avxEscape | evex256 | evexF3 | evex0F38 | evexW1, 0, 0x28, + avxEscape | evex512 | evexF3 | evex0F38 | evexW1, 0, 0x28, + }}, + {as: AVPMOVMSKB, ytab: _yvmovmskpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD7, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD7, + }}, + {as: AVPMOVQ2M, ytab: _yvpmovb2m, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW1, 0, 0x39, + avxEscape | evex256 | evexF3 | evex0F38 | evexW1, 0, 0x39, + avxEscape | evex512 | evexF3 | evex0F38 | evexW1, 0, 0x39, + }}, + {as: AVPMOVQB, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x32, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x32, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x32, + }}, + {as: AVPMOVQD, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x35, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x35, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x35, + }}, + {as: AVPMOVQW, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x34, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x34, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x34, + }}, + {as: AVPMOVSDB, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x21, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x21, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x21, + }}, + {as: AVPMOVSDW, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x23, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x23, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x23, + }}, + {as: AVPMOVSQB, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x22, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x22, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x22, + }}, + {as: AVPMOVSQD, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x25, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x25, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x25, + }}, + {as: AVPMOVSQW, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x24, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x24, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x24, + }}, + {as: AVPMOVSWB, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x20, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x20, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x20, + }}, + {as: AVPMOVSXBD, ytab: _yvbroadcastss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x21, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x21, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x21, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x21, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x21, + }}, + {as: AVPMOVSXBQ, ytab: _yvbroadcastss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x22, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x22, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x22, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x22, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x22, + }}, + {as: AVPMOVSXBW, ytab: _yvcvtdq2pd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x20, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x20, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x20, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x20, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x20, + }}, + {as: AVPMOVSXDQ, ytab: _yvcvtdq2pd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x25, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x25, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x25, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x25, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x25, + }}, + {as: AVPMOVSXWD, ytab: _yvcvtdq2pd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x23, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x23, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x23, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x23, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x23, + }}, + {as: AVPMOVSXWQ, ytab: _yvbroadcastss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x24, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x24, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x24, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x24, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x24, + }}, + {as: AVPMOVUSDB, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x11, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x11, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x11, + }}, + {as: AVPMOVUSDW, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x13, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x13, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x13, + }}, + {as: AVPMOVUSQB, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x12, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x12, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x12, + }}, + {as: AVPMOVUSQD, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x15, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x15, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x15, + }}, + {as: AVPMOVUSQW, ytab: _yvpmovdb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x14, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x14, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x14, + }}, + {as: AVPMOVUSWB, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x10, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x10, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x10, + }}, + {as: AVPMOVW2M, ytab: _yvpmovb2m, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW1, 0, 0x29, + avxEscape | evex256 | evexF3 | evex0F38 | evexW1, 0, 0x29, + avxEscape | evex512 | evexF3 | evex0F38 | evexW1, 0, 0x29, + }}, + {as: AVPMOVWB, ytab: _yvpmovdw, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x30, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x30, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x30, + }}, + {as: AVPMOVZXBD, ytab: _yvbroadcastss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x31, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x31, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x31, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x31, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x31, + }}, + {as: AVPMOVZXBQ, ytab: _yvbroadcastss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x32, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x32, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN2 | evexZeroingEnabled, 0x32, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x32, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x32, + }}, + {as: AVPMOVZXBW, ytab: _yvcvtdq2pd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x30, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x30, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x30, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x30, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x30, + }}, + {as: AVPMOVZXDQ, ytab: _yvcvtdq2pd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x35, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x35, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x35, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x35, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x35, + }}, + {as: AVPMOVZXWD, ytab: _yvcvtdq2pd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x33, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x33, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x33, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x33, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x33, + }}, + {as: AVPMOVZXWQ, ytab: _yvbroadcastss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x34, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x34, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x34, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN8 | evexZeroingEnabled, 0x34, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x34, + }}, + {as: AVPMULDQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x28, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x28, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x28, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x28, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x28, + }}, + {as: AVPMULHRSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x0B, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x0B, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x0B, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x0B, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x0B, + }}, + {as: AVPMULHUW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE4, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE4, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE4, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xE4, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xE4, + }}, + {as: AVPMULHW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE5, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE5, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE5, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xE5, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xE5, + }}, + {as: AVPMULLD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x40, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x40, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x40, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x40, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x40, + }}, + {as: AVPMULLQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x40, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x40, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x40, + }}, + {as: AVPMULLW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD5, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD5, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD5, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xD5, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xD5, + }}, + {as: AVPMULTISHIFTQB, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x83, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x83, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x83, + }}, + {as: AVPMULUDQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF4, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF4, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xF4, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xF4, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xF4, + }}, + {as: AVPOPCNTB, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x54, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x54, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x54, + }}, + {as: AVPOPCNTD, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x55, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x55, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x55, + }}, + {as: AVPOPCNTQ, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x55, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x55, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x55, + }}, + {as: AVPOPCNTW, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x54, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x54, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x54, + }}, + {as: AVPOR, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xEB, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xEB, + }}, + {as: AVPORD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xEB, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xEB, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xEB, + }}, + {as: AVPORQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xEB, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xEB, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xEB, + }}, + {as: AVPROLD, ytab: _yvprold, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x72, 01, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x72, 01, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x72, 01, + }}, + {as: AVPROLQ, ytab: _yvprold, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x72, 01, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x72, 01, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x72, 01, + }}, + {as: AVPROLVD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x15, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x15, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x15, + }}, + {as: AVPROLVQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x15, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x15, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x15, + }}, + {as: AVPRORD, ytab: _yvprold, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x72, 00, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x72, 00, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x72, 00, + }}, + {as: AVPRORQ, ytab: _yvprold, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x72, 00, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x72, 00, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x72, 00, + }}, + {as: AVPRORVD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x14, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x14, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x14, + }}, + {as: AVPRORVQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x14, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x14, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x14, + }}, + {as: AVPSADBW, ytab: _yvaesdec, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF6, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF6, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0xF6, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0xF6, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0xF6, + }}, + {as: AVPSCATTERDD, ytab: _yvpscatterdd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0xA0, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0xA0, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xA0, + }}, + {as: AVPSCATTERDQ, ytab: _yvpscatterdq, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0xA0, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0xA0, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xA0, + }}, + {as: AVPSCATTERQD, ytab: _yvpscatterqd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0xA1, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0xA1, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xA1, + }}, + {as: AVPSCATTERQQ, ytab: _yvpscatterdd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0xA1, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0xA1, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xA1, + }}, + {as: AVPSHLDD, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x71, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x71, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x71, + }}, + {as: AVPSHLDQ, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x71, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x71, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x71, + }}, + {as: AVPSHLDVD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x71, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x71, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x71, + }}, + {as: AVPSHLDVQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x71, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x71, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x71, + }}, + {as: AVPSHLDVW, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x70, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x70, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x70, + }}, + {as: AVPSHLDW, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x70, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexZeroingEnabled, 0x70, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexZeroingEnabled, 0x70, + }}, + {as: AVPSHRDD, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x73, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x73, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x73, + }}, + {as: AVPSHRDQ, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x73, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x73, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x73, + }}, + {as: AVPSHRDVD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x73, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x73, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x73, + }}, + {as: AVPSHRDVQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x73, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x73, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x73, + }}, + {as: AVPSHRDVW, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x72, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x72, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x72, + }}, + {as: AVPSHRDW, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexZeroingEnabled, 0x72, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexZeroingEnabled, 0x72, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexZeroingEnabled, 0x72, + }}, + {as: AVPSHUFB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x00, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x00, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexZeroingEnabled, 0x00, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexZeroingEnabled, 0x00, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexZeroingEnabled, 0x00, + }}, + {as: AVPSHUFBITQMB, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16, 0x8F, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32, 0x8F, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64, 0x8F, + }}, + {as: AVPSHUFD, ytab: _yvpshufd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x70, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x70, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x70, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x70, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x70, + }}, + {as: AVPSHUFHW, ytab: _yvpshufd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x70, + avxEscape | vex256 | vexF3 | vex0F | vexW0, 0x70, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x70, + avxEscape | evex256 | evexF3 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x70, + avxEscape | evex512 | evexF3 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x70, + }}, + {as: AVPSHUFLW, ytab: _yvpshufd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x70, + avxEscape | vex256 | vexF2 | vex0F | vexW0, 0x70, + avxEscape | evex128 | evexF2 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x70, + avxEscape | evex256 | evexF2 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x70, + avxEscape | evex512 | evexF2 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x70, + }}, + {as: AVPSIGNB, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x08, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x08, + }}, + {as: AVPSIGND, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x0A, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x0A, + }}, + {as: AVPSIGNW, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x09, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x09, + }}, + {as: AVPSLLD, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x72, 06, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x72, 06, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF2, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF2, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x72, 06, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x72, 06, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x72, 06, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF2, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF2, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF2, + }}, + {as: AVPSLLDQ, ytab: _yvpslldq, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x73, 07, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x73, 07, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0x73, 07, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0x73, 07, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0x73, 07, + }}, + {as: AVPSLLQ, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x73, 06, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x73, 06, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF3, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF3, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x73, 06, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x73, 06, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x73, 06, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xF3, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xF3, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xF3, + }}, + {as: AVPSLLVD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x47, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x47, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x47, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x47, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x47, + }}, + {as: AVPSLLVQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x47, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x47, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x47, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x47, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x47, + }}, + {as: AVPSLLVW, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x12, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x12, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x12, + }}, + {as: AVPSLLW, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x71, 06, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x71, 06, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF1, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF1, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x71, 06, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x71, 06, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x71, 06, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF1, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF1, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF1, + }}, + {as: AVPSRAD, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x72, 04, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x72, 04, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE2, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE2, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x72, 04, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x72, 04, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x72, 04, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE2, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE2, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE2, + }}, + {as: AVPSRAQ, ytab: _yvpsraq, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x72, 04, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x72, 04, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x72, 04, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xE2, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xE2, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xE2, + }}, + {as: AVPSRAVD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x46, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x46, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x46, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x46, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x46, + }}, + {as: AVPSRAVQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x46, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x46, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x46, + }}, + {as: AVPSRAVW, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x11, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x11, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x11, + }}, + {as: AVPSRAW, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x71, 04, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x71, 04, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE1, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE1, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x71, 04, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x71, 04, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x71, 04, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE1, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE1, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE1, + }}, + {as: AVPSRLD, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x72, 02, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x72, 02, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD2, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD2, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x72, 02, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x72, 02, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x72, 02, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD2, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD2, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD2, + }}, + {as: AVPSRLDQ, ytab: _yvpslldq, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x73, 03, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x73, 03, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16, 0x73, 03, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32, 0x73, 03, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64, 0x73, 03, + }}, + {as: AVPSRLQ, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x73, 02, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x73, 02, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD3, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD3, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x73, 02, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x73, 02, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x73, 02, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xD3, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xD3, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN16 | evexZeroingEnabled, 0xD3, + }}, + {as: AVPSRLVD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x45, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x45, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x45, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x45, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x45, + }}, + {as: AVPSRLVQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW1, 0x45, + avxEscape | vex256 | vex66 | vex0F38 | vexW1, 0x45, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x45, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x45, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x45, + }}, + {as: AVPSRLVW, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexZeroingEnabled, 0x10, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexZeroingEnabled, 0x10, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexZeroingEnabled, 0x10, + }}, + {as: AVPSRLW, ytab: _yvpslld, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x71, 02, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x71, 02, + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD1, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD1, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x71, 02, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x71, 02, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x71, 02, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD1, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD1, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD1, + }}, + {as: AVPSUBB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF8, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF8, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF8, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xF8, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xF8, + }}, + {as: AVPSUBD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xFA, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xFA, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xFA, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xFA, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xFA, + }}, + {as: AVPSUBQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xFB, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xFB, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xFB, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xFB, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xFB, + }}, + {as: AVPSUBSB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE8, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE8, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE8, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xE8, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xE8, + }}, + {as: AVPSUBSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xE9, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xE9, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xE9, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xE9, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xE9, + }}, + {as: AVPSUBUSB, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD8, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD8, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD8, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xD8, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xD8, + }}, + {as: AVPSUBUSW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xD9, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xD9, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xD9, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xD9, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xD9, + }}, + {as: AVPSUBW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xF9, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xF9, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0xF9, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0xF9, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0xF9, + }}, + {as: AVPTERNLOGD, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x25, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x25, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x25, + }}, + {as: AVPTERNLOGQ, ytab: _yvalignd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x25, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x25, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x25, + }}, + {as: AVPTEST, ytab: _yvptest, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x17, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x17, + }}, + {as: AVPTESTMB, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16, 0x26, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32, 0x26, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64, 0x26, + }}, + {as: AVPTESTMD, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4, 0x27, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4, 0x27, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4, 0x27, + }}, + {as: AVPTESTMQ, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8, 0x27, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8, 0x27, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8, 0x27, + }}, + {as: AVPTESTMW, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16, 0x26, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32, 0x26, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64, 0x26, + }}, + {as: AVPTESTNMB, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN16, 0x26, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN32, 0x26, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN64, 0x26, + }}, + {as: AVPTESTNMD, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW0, evexN16 | evexBcstN4, 0x27, + avxEscape | evex256 | evexF3 | evex0F38 | evexW0, evexN32 | evexBcstN4, 0x27, + avxEscape | evex512 | evexF3 | evex0F38 | evexW0, evexN64 | evexBcstN4, 0x27, + }}, + {as: AVPTESTNMQ, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW1, evexN16 | evexBcstN8, 0x27, + avxEscape | evex256 | evexF3 | evex0F38 | evexW1, evexN32 | evexBcstN8, 0x27, + avxEscape | evex512 | evexF3 | evex0F38 | evexW1, evexN64 | evexBcstN8, 0x27, + }}, + {as: AVPTESTNMW, ytab: _yvpshufbitqmb, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evexF3 | evex0F38 | evexW1, evexN16, 0x26, + avxEscape | evex256 | evexF3 | evex0F38 | evexW1, evexN32, 0x26, + avxEscape | evex512 | evexF3 | evex0F38 | evexW1, evexN64, 0x26, + }}, + {as: AVPUNPCKHBW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x68, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x68, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x68, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x68, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x68, + }}, + {as: AVPUNPCKHDQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x6A, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x6A, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x6A, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x6A, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x6A, + }}, + {as: AVPUNPCKHQDQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x6D, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x6D, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x6D, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x6D, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x6D, + }}, + {as: AVPUNPCKHWD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x69, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x69, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x69, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x69, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x69, + }}, + {as: AVPUNPCKLBW, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x60, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x60, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x60, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x60, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x60, + }}, + {as: AVPUNPCKLDQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x62, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x62, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x62, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x62, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x62, + }}, + {as: AVPUNPCKLQDQ, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x6C, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x6C, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x6C, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x6C, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x6C, + }}, + {as: AVPUNPCKLWD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x61, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x61, + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexZeroingEnabled, 0x61, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexZeroingEnabled, 0x61, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexZeroingEnabled, 0x61, + }}, + {as: AVPXOR, ytab: _yvaddsubpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xEF, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xEF, + }}, + {as: AVPXORD, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xEF, + avxEscape | evex256 | evex66 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xEF, + avxEscape | evex512 | evex66 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xEF, + }}, + {as: AVPXORQ, ytab: _yvblendmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xEF, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xEF, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xEF, + }}, + {as: AVRANGEPD, ytab: _yvfixupimmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x50, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x50, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x50, + }}, + {as: AVRANGEPS, ytab: _yvfixupimmpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x50, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x50, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x50, + }}, + {as: AVRANGESD, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x51, + }}, + {as: AVRANGESS, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x51, + }}, + {as: AVRCP14PD, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x4C, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x4C, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x4C, + }}, + {as: AVRCP14PS, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x4C, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x4C, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x4C, + }}, + {as: AVRCP14SD, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x4D, + }}, + {as: AVRCP14SS, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x4D, + }}, + {as: AVRCP28PD, ytab: _yvexp2pd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0xCA, + }}, + {as: AVRCP28PS, ytab: _yvexp2pd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0xCA, + }}, + {as: AVRCP28SD, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0xCB, + }}, + {as: AVRCP28SS, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0xCB, + }}, + {as: AVRCPPS, ytab: _yvptest, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x53, + avxEscape | vex256 | vex0F | vexW0, 0x53, + }}, + {as: AVRCPSS, ytab: _yvrcpss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x53, + }}, + {as: AVREDUCEPD, ytab: _yvgetmantpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x56, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x56, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x56, + }}, + {as: AVREDUCEPS, ytab: _yvgetmantpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x56, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x56, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x56, + }}, + {as: AVREDUCESD, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x57, + }}, + {as: AVREDUCESS, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x57, + }}, + {as: AVRNDSCALEPD, ytab: _yvgetmantpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0x09, + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x09, + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x09, + }}, + {as: AVRNDSCALEPS, ytab: _yvgetmantpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0x08, + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x08, + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x08, + }}, + {as: AVRNDSCALESD, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0x0B, + }}, + {as: AVRNDSCALESS, ytab: _yvfixupimmsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F3A | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0x0A, + }}, + {as: AVROUNDPD, ytab: _yvroundpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x09, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x09, + }}, + {as: AVROUNDPS, ytab: _yvroundpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x08, + avxEscape | vex256 | vex66 | vex0F3A | vexW0, 0x08, + }}, + {as: AVROUNDSD, ytab: _yvdppd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x0B, + }}, + {as: AVROUNDSS, ytab: _yvdppd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F3A | vexW0, 0x0A, + }}, + {as: AVRSQRT14PD, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x4E, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x4E, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x4E, + }}, + {as: AVRSQRT14PS, ytab: _yvexpandpd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x4E, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x4E, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x4E, + }}, + {as: AVRSQRT14SD, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexZeroingEnabled, 0x4F, + }}, + {as: AVRSQRT14SS, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexZeroingEnabled, 0x4F, + }}, + {as: AVRSQRT28PD, ytab: _yvexp2pd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexSaeEnabled | evexZeroingEnabled, 0xCC, + }}, + {as: AVRSQRT28PS, ytab: _yvexp2pd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexSaeEnabled | evexZeroingEnabled, 0xCC, + }}, + {as: AVRSQRT28SD, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexSaeEnabled | evexZeroingEnabled, 0xCD, + }}, + {as: AVRSQRT28SS, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexSaeEnabled | evexZeroingEnabled, 0xCD, + }}, + {as: AVRSQRTPS, ytab: _yvptest, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x52, + avxEscape | vex256 | vex0F | vexW0, 0x52, + }}, + {as: AVRSQRTSS, ytab: _yvrcpss, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x52, + }}, + {as: AVSCALEFPD, ytab: _yvscalefpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x2C, + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x2C, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x2C, + }}, + {as: AVSCALEFPS, ytab: _yvscalefpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x2C, + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x2C, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x2C, + }}, + {as: AVSCALEFSD, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x2D, + }}, + {as: AVSCALEFSS, ytab: _yvgetexpsd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x2D, + }}, + {as: AVSCATTERDPD, ytab: _yvpscatterdq, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0xA2, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0xA2, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xA2, + }}, + {as: AVSCATTERDPS, ytab: _yvpscatterdd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0xA2, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0xA2, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xA2, + }}, + {as: AVSCATTERPF0DPD, ytab: _yvgatherpf0dpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC6, 05, + }}, + {as: AVSCATTERPF0DPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC6, 05, + }}, + {as: AVSCATTERPF0QPD, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC7, 05, + }}, + {as: AVSCATTERPF0QPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC7, 05, + }}, + {as: AVSCATTERPF1DPD, ytab: _yvgatherpf0dpd, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC6, 06, + }}, + {as: AVSCATTERPF1DPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC6, 06, + }}, + {as: AVSCATTERPF1QPD, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xC7, 06, + }}, + {as: AVSCATTERPF1QPS, ytab: _yvgatherpf0dps, prefix: Pavx, op: opBytes{ + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xC7, 06, + }}, + {as: AVSCATTERQPD, ytab: _yvpscatterdd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW1, evexN8, 0xA3, + avxEscape | evex256 | evex66 | evex0F38 | evexW1, evexN8, 0xA3, + avxEscape | evex512 | evex66 | evex0F38 | evexW1, evexN8, 0xA3, + }}, + {as: AVSCATTERQPS, ytab: _yvpscatterqd, prefix: Pavx, op: opBytes{ + avxEscape | evex128 | evex66 | evex0F38 | evexW0, evexN4, 0xA3, + avxEscape | evex256 | evex66 | evex0F38 | evexW0, evexN4, 0xA3, + avxEscape | evex512 | evex66 | evex0F38 | evexW0, evexN4, 0xA3, + }}, + {as: AVSHUFF32X4, ytab: _yvshuff32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x23, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x23, + }}, + {as: AVSHUFF64X2, ytab: _yvshuff32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x23, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x23, + }}, + {as: AVSHUFI32X4, ytab: _yvshuff32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x43, + avxEscape | evex512 | evex66 | evex0F3A | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x43, + }}, + {as: AVSHUFI64X2, ytab: _yvshuff32x4, prefix: Pavx, op: opBytes{ + avxEscape | evex256 | evex66 | evex0F3A | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x43, + avxEscape | evex512 | evex66 | evex0F3A | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x43, + }}, + {as: AVSHUFPD, ytab: _yvgf2p8affineinvqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0xC6, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0xC6, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0xC6, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0xC6, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0xC6, + }}, + {as: AVSHUFPS, ytab: _yvgf2p8affineinvqb, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0xC6, + avxEscape | vex256 | vex0F | vexW0, 0xC6, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0xC6, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0xC6, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0xC6, + }}, + {as: AVSQRTPD, ytab: _yvcvtdq2ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x51, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x51, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x51, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x51, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x51, + }}, + {as: AVSQRTPS, ytab: _yvcvtdq2ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x51, + avxEscape | vex256 | vex0F | vexW0, 0x51, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x51, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x51, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x51, + }}, + {as: AVSQRTSD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x51, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x51, + }}, + {as: AVSQRTSS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x51, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x51, + }}, + {as: AVSTMXCSR, ytab: _yvldmxcsr, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0xAE, 03, + }}, + {as: AVSUBPD, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x5C, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x5C, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexRoundingEnabled | evexZeroingEnabled, 0x5C, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x5C, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x5C, + }}, + {as: AVSUBPS, ytab: _yvaddpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x5C, + avxEscape | vex256 | vex0F | vexW0, 0x5C, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexRoundingEnabled | evexZeroingEnabled, 0x5C, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x5C, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x5C, + }}, + {as: AVSUBSD, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F | vexW0, 0x5C, + avxEscape | evex128 | evexF2 | evex0F | evexW1, evexN8 | evexRoundingEnabled | evexZeroingEnabled, 0x5C, + }}, + {as: AVSUBSS, ytab: _yvaddsd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F | vexW0, 0x5C, + avxEscape | evex128 | evexF3 | evex0F | evexW0, evexN4 | evexRoundingEnabled | evexZeroingEnabled, 0x5C, + }}, + {as: AVTESTPD, ytab: _yvptest, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x0F, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x0F, + }}, + {as: AVTESTPS, ytab: _yvptest, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x0E, + avxEscape | vex256 | vex66 | vex0F38 | vexW0, 0x0E, + }}, + {as: AVUCOMISD, ytab: _yvcomisd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x2E, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN8 | evexSaeEnabled, 0x2E, + }}, + {as: AVUCOMISS, ytab: _yvcomisd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x2E, + avxEscape | evex128 | evex0F | evexW0, evexN4 | evexSaeEnabled, 0x2E, + }}, + {as: AVUNPCKHPD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x15, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x15, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x15, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x15, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x15, + }}, + {as: AVUNPCKHPS, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x15, + avxEscape | vex256 | vex0F | vexW0, 0x15, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x15, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x15, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x15, + }}, + {as: AVUNPCKLPD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x14, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x14, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x14, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x14, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x14, + }}, + {as: AVUNPCKLPS, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x14, + avxEscape | vex256 | vex0F | vexW0, 0x14, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x14, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x14, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x14, + }}, + {as: AVXORPD, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F | vexW0, 0x57, + avxEscape | vex256 | vex66 | vex0F | vexW0, 0x57, + avxEscape | evex128 | evex66 | evex0F | evexW1, evexN16 | evexBcstN8 | evexZeroingEnabled, 0x57, + avxEscape | evex256 | evex66 | evex0F | evexW1, evexN32 | evexBcstN8 | evexZeroingEnabled, 0x57, + avxEscape | evex512 | evex66 | evex0F | evexW1, evexN64 | evexBcstN8 | evexZeroingEnabled, 0x57, + }}, + {as: AVXORPS, ytab: _yvandnpd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x57, + avxEscape | vex256 | vex0F | vexW0, 0x57, + avxEscape | evex128 | evex0F | evexW0, evexN16 | evexBcstN4 | evexZeroingEnabled, 0x57, + avxEscape | evex256 | evex0F | evexW0, evexN32 | evexBcstN4 | evexZeroingEnabled, 0x57, + avxEscape | evex512 | evex0F | evexW0, evexN64 | evexBcstN4 | evexZeroingEnabled, 0x57, + }}, + {as: AVZEROALL, ytab: _yvzeroall, prefix: Pavx, op: opBytes{ + avxEscape | vex256 | vex0F | vexW0, 0x77, + }}, + {as: AVZEROUPPER, ytab: _yvzeroall, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F | vexW0, 0x77, + }}, +} diff --git a/src/cmd/internal/obj/x86/evex.go b/src/cmd/internal/obj/x86/evex.go new file mode 100644 index 0000000..aa93cd8 --- /dev/null +++ b/src/cmd/internal/obj/x86/evex.go @@ -0,0 +1,383 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package x86 + +import ( + "cmd/internal/obj" + "errors" + "fmt" + "strings" +) + +// evexBits stores EVEX prefix info that is used during instruction encoding. +type evexBits struct { + b1 byte // [W1mmLLpp] + b2 byte // [NNNbbZRS] + + // Associated instruction opcode. + opcode byte +} + +// newEVEXBits creates evexBits object from enc bytes at z position. +func newEVEXBits(z int, enc *opBytes) evexBits { + return evexBits{ + b1: enc[z+0], + b2: enc[z+1], + opcode: enc[z+2], + } +} + +// P returns EVEX.pp value. +func (evex evexBits) P() byte { return (evex.b1 & evexP) >> 0 } + +// L returns EVEX.L'L value. +func (evex evexBits) L() byte { return (evex.b1 & evexL) >> 2 } + +// M returns EVEX.mm value. +func (evex evexBits) M() byte { return (evex.b1 & evexM) >> 4 } + +// W returns EVEX.W value. +func (evex evexBits) W() byte { return (evex.b1 & evexW) >> 7 } + +// BroadcastEnabled reports whether BCST suffix is permitted. +func (evex evexBits) BroadcastEnabled() bool { + return evex.b2&evexBcst != 0 +} + +// ZeroingEnabled reports whether Z suffix is permitted. +func (evex evexBits) ZeroingEnabled() bool { + return (evex.b2&evexZeroing)>>2 != 0 +} + +// RoundingEnabled reports whether RN_SAE, RZ_SAE, RD_SAE and RU_SAE suffixes +// are permitted. +func (evex evexBits) RoundingEnabled() bool { + return (evex.b2&evexRounding)>>1 != 0 +} + +// SaeEnabled reports whether SAE suffix is permitted. +func (evex evexBits) SaeEnabled() bool { + return (evex.b2&evexSae)>>0 != 0 +} + +// DispMultiplier returns displacement multiplier that is calculated +// based on tuple type, EVEX.W and input size. +// If embedded broadcast is used, bcst should be true. +func (evex evexBits) DispMultiplier(bcst bool) int32 { + if bcst { + switch evex.b2 & evexBcst { + case evexBcstN4: + return 4 + case evexBcstN8: + return 8 + } + return 1 + } + + switch evex.b2 & evexN { + case evexN1: + return 1 + case evexN2: + return 2 + case evexN4: + return 4 + case evexN8: + return 8 + case evexN16: + return 16 + case evexN32: + return 32 + case evexN64: + return 64 + case evexN128: + return 128 + } + return 1 +} + +// EVEX is described by using 2-byte sequence. +// See evexBits for more details. +const ( + evexW = 0x80 // b1[W... ....] + evexWIG = 0 << 7 + evexW0 = 0 << 7 + evexW1 = 1 << 7 + + evexM = 0x30 // b2[..mm ...] + evex0F = 1 << 4 + evex0F38 = 2 << 4 + evex0F3A = 3 << 4 + + evexL = 0x0C // b1[.... LL..] + evexLIG = 0 << 2 + evex128 = 0 << 2 + evex256 = 1 << 2 + evex512 = 2 << 2 + + evexP = 0x03 // b1[.... ..pp] + evex66 = 1 << 0 + evexF3 = 2 << 0 + evexF2 = 3 << 0 + + // Precalculated Disp8 N value. + // N acts like a multiplier for 8bit displacement. + // Note that some N are not used, but their bits are reserved. + evexN = 0xE0 // b2[NNN. ....] + evexN1 = 0 << 5 + evexN2 = 1 << 5 + evexN4 = 2 << 5 + evexN8 = 3 << 5 + evexN16 = 4 << 5 + evexN32 = 5 << 5 + evexN64 = 6 << 5 + evexN128 = 7 << 5 + + // Disp8 for broadcasts. + evexBcst = 0x18 // b2[...b b...] + evexBcstN4 = 1 << 3 + evexBcstN8 = 2 << 3 + + // Flags that permit certain AVX512 features. + // It's semantically illegal to combine evexZeroing and evexSae. + evexZeroing = 0x4 // b2[.... .Z..] + evexZeroingEnabled = 1 << 2 + evexRounding = 0x2 // b2[.... ..R.] + evexRoundingEnabled = 1 << 1 + evexSae = 0x1 // b2[.... ...S] + evexSaeEnabled = 1 << 0 +) + +// compressedDisp8 calculates EVEX compressed displacement, if applicable. +func compressedDisp8(disp, elemSize int32) (disp8 byte, ok bool) { + if disp%elemSize == 0 { + v := disp / elemSize + if v >= -128 && v <= 127 { + return byte(v), true + } + } + return 0, false +} + +// evexZcase reports whether given Z-case belongs to EVEX group. +func evexZcase(zcase uint8) bool { + return zcase > Zevex_first && zcase < Zevex_last +} + +// evexSuffixBits carries instruction EVEX suffix set flags. +// +// Examples: +// +// "RU_SAE.Z" => {rounding: 3, zeroing: true} +// "Z" => {zeroing: true} +// "BCST" => {broadcast: true} +// "SAE.Z" => {sae: true, zeroing: true} +type evexSuffix struct { + rounding byte + sae bool + zeroing bool + broadcast bool +} + +// Rounding control values. +// Match exact value for EVEX.L'L field (with exception of rcUnset). +const ( + rcRNSAE = 0 // Round towards nearest + rcRDSAE = 1 // Round towards -Inf + rcRUSAE = 2 // Round towards +Inf + rcRZSAE = 3 // Round towards zero + rcUnset = 4 +) + +// newEVEXSuffix returns proper zero value for evexSuffix. +func newEVEXSuffix() evexSuffix { + return evexSuffix{rounding: rcUnset} +} + +// evexSuffixMap maps obj.X86suffix to its decoded version. +// Filled during init(). +var evexSuffixMap [255]evexSuffix + +func init() { + // Decode all valid suffixes for later use. + for i := range opSuffixTable { + suffix := newEVEXSuffix() + parts := strings.Split(opSuffixTable[i], ".") + for j := range parts { + switch parts[j] { + case "Z": + suffix.zeroing = true + case "BCST": + suffix.broadcast = true + case "SAE": + suffix.sae = true + + case "RN_SAE": + suffix.rounding = rcRNSAE + case "RD_SAE": + suffix.rounding = rcRDSAE + case "RU_SAE": + suffix.rounding = rcRUSAE + case "RZ_SAE": + suffix.rounding = rcRZSAE + } + } + evexSuffixMap[i] = suffix + } +} + +// toDisp8 tries to convert disp to proper 8-bit displacement value. +func toDisp8(disp int32, p *obj.Prog, asmbuf *AsmBuf) (disp8 byte, ok bool) { + if asmbuf.evexflag { + bcst := evexSuffixMap[p.Scond].broadcast + elemSize := asmbuf.evex.DispMultiplier(bcst) + return compressedDisp8(disp, elemSize) + } + return byte(disp), disp >= -128 && disp < 128 +} + +// EncodeRegisterRange packs [reg0-reg1] list into 64-bit value that +// is intended to be stored inside obj.Addr.Offset with TYPE_REGLIST. +func EncodeRegisterRange(reg0, reg1 int16) int64 { + return (int64(reg0) << 0) | + (int64(reg1) << 16) | + obj.RegListX86Lo +} + +// decodeRegisterRange unpacks [reg0-reg1] list from 64-bit value created by EncodeRegisterRange. +func decodeRegisterRange(list int64) (reg0, reg1 int) { + return int((list >> 0) & 0xFFFF), + int((list >> 16) & 0xFFFF) +} + +// ParseSuffix handles the special suffix for the 386/AMD64. +// Suffix bits are stored into p.Scond. +// +// Leading "." in cond is ignored. +func ParseSuffix(p *obj.Prog, cond string) error { + cond = strings.TrimPrefix(cond, ".") + + suffix := newOpSuffix(cond) + if !suffix.IsValid() { + return inferSuffixError(cond) + } + + p.Scond = uint8(suffix) + return nil +} + +// inferSuffixError returns non-nil error that describes what could be +// the cause of suffix parse failure. +// +// At the point this function is executed there is already assembly error, +// so we can burn some clocks to construct good error message. +// +// Reported issues: +// - duplicated suffixes +// - illegal rounding/SAE+broadcast combinations +// - unknown suffixes +// - misplaced suffix (e.g. wrong Z suffix position) +func inferSuffixError(cond string) error { + suffixSet := make(map[string]bool) // Set for duplicates detection. + unknownSet := make(map[string]bool) // Set of unknown suffixes. + hasBcst := false + hasRoundSae := false + var msg []string // Error message parts + + suffixes := strings.Split(cond, ".") + for i, suffix := range suffixes { + switch suffix { + case "Z": + if i != len(suffixes)-1 { + msg = append(msg, "Z suffix should be the last") + } + case "BCST": + hasBcst = true + case "SAE", "RN_SAE", "RZ_SAE", "RD_SAE", "RU_SAE": + hasRoundSae = true + default: + if !unknownSet[suffix] { + msg = append(msg, fmt.Sprintf("unknown suffix %q", suffix)) + } + unknownSet[suffix] = true + } + + if suffixSet[suffix] { + msg = append(msg, fmt.Sprintf("duplicate suffix %q", suffix)) + } + suffixSet[suffix] = true + } + + if hasBcst && hasRoundSae { + msg = append(msg, "can't combine rounding/SAE and broadcast") + } + + if len(msg) == 0 { + return errors.New("bad suffix combination") + } + return errors.New(strings.Join(msg, "; ")) +} + +// opSuffixTable is a complete list of possible opcode suffix combinations. +// It "maps" uint8 suffix bits to their string representation. +// With the exception of first and last elements, order is not important. +var opSuffixTable = [...]string{ + "", // Map empty suffix to empty string. + + "Z", + + "SAE", + "SAE.Z", + + "RN_SAE", + "RZ_SAE", + "RD_SAE", + "RU_SAE", + "RN_SAE.Z", + "RZ_SAE.Z", + "RD_SAE.Z", + "RU_SAE.Z", + + "BCST", + "BCST.Z", + + "<bad suffix>", +} + +// opSuffix represents instruction opcode suffix. +// Compound (multi-part) suffixes expressed with single opSuffix value. +// +// uint8 type is used to fit obj.Prog.Scond. +type opSuffix uint8 + +// badOpSuffix is used to represent all invalid suffix combinations. +const badOpSuffix = opSuffix(len(opSuffixTable) - 1) + +// newOpSuffix returns opSuffix object that matches suffixes string. +// +// If no matching suffix is found, special "invalid" suffix is returned. +// Use IsValid method to check against this case. +func newOpSuffix(suffixes string) opSuffix { + for i := range opSuffixTable { + if opSuffixTable[i] == suffixes { + return opSuffix(i) + } + } + return badOpSuffix +} + +// IsValid reports whether suffix is valid. +// Empty suffixes are valid. +func (suffix opSuffix) IsValid() bool { + return suffix != badOpSuffix +} + +// String returns suffix printed representation. +// +// It matches the string that was used to create suffix with NewX86Suffix() +// for valid suffixes. +// For all invalid suffixes, special marker is returned. +func (suffix opSuffix) String() string { + return opSuffixTable[suffix] +} diff --git a/src/cmd/internal/obj/x86/list6.go b/src/cmd/internal/obj/x86/list6.go new file mode 100644 index 0000000..6028031 --- /dev/null +++ b/src/cmd/internal/obj/x86/list6.go @@ -0,0 +1,264 @@ +// Inferno utils/6c/list.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6c/list.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package x86 + +import ( + "cmd/internal/obj" + "fmt" +) + +var Register = []string{ + "AL", // [D_AL] + "CL", + "DL", + "BL", + "SPB", + "BPB", + "SIB", + "DIB", + "R8B", + "R9B", + "R10B", + "R11B", + "R12B", + "R13B", + "R14B", + "R15B", + "AX", // [D_AX] + "CX", + "DX", + "BX", + "SP", + "BP", + "SI", + "DI", + "R8", + "R9", + "R10", + "R11", + "R12", + "R13", + "R14", + "R15", + "AH", + "CH", + "DH", + "BH", + "F0", // [D_F0] + "F1", + "F2", + "F3", + "F4", + "F5", + "F6", + "F7", + "M0", + "M1", + "M2", + "M3", + "M4", + "M5", + "M6", + "M7", + "K0", + "K1", + "K2", + "K3", + "K4", + "K5", + "K6", + "K7", + "X0", + "X1", + "X2", + "X3", + "X4", + "X5", + "X6", + "X7", + "X8", + "X9", + "X10", + "X11", + "X12", + "X13", + "X14", + "X15", + "X16", + "X17", + "X18", + "X19", + "X20", + "X21", + "X22", + "X23", + "X24", + "X25", + "X26", + "X27", + "X28", + "X29", + "X30", + "X31", + "Y0", + "Y1", + "Y2", + "Y3", + "Y4", + "Y5", + "Y6", + "Y7", + "Y8", + "Y9", + "Y10", + "Y11", + "Y12", + "Y13", + "Y14", + "Y15", + "Y16", + "Y17", + "Y18", + "Y19", + "Y20", + "Y21", + "Y22", + "Y23", + "Y24", + "Y25", + "Y26", + "Y27", + "Y28", + "Y29", + "Y30", + "Y31", + "Z0", + "Z1", + "Z2", + "Z3", + "Z4", + "Z5", + "Z6", + "Z7", + "Z8", + "Z9", + "Z10", + "Z11", + "Z12", + "Z13", + "Z14", + "Z15", + "Z16", + "Z17", + "Z18", + "Z19", + "Z20", + "Z21", + "Z22", + "Z23", + "Z24", + "Z25", + "Z26", + "Z27", + "Z28", + "Z29", + "Z30", + "Z31", + "CS", // [D_CS] + "SS", + "DS", + "ES", + "FS", + "GS", + "GDTR", // [D_GDTR] + "IDTR", // [D_IDTR] + "LDTR", // [D_LDTR] + "MSW", // [D_MSW] + "TASK", // [D_TASK] + "CR0", // [D_CR] + "CR1", + "CR2", + "CR3", + "CR4", + "CR5", + "CR6", + "CR7", + "CR8", + "CR9", + "CR10", + "CR11", + "CR12", + "CR13", + "CR14", + "CR15", + "DR0", // [D_DR] + "DR1", + "DR2", + "DR3", + "DR4", + "DR5", + "DR6", + "DR7", + "TR0", // [D_TR] + "TR1", + "TR2", + "TR3", + "TR4", + "TR5", + "TR6", + "TR7", + "TLS", // [D_TLS] + "MAXREG", // [MAXREG] +} + +func init() { + obj.RegisterRegister(REG_AL, REG_AL+len(Register), rconv) + obj.RegisterOpcode(obj.ABaseAMD64, Anames) + obj.RegisterRegisterList(obj.RegListX86Lo, obj.RegListX86Hi, rlconv) + obj.RegisterOpSuffix("386", opSuffixString) + obj.RegisterOpSuffix("amd64", opSuffixString) +} + +func rconv(r int) string { + if REG_AL <= r && r-REG_AL < len(Register) { + return Register[r-REG_AL] + } + return fmt.Sprintf("Rgok(%d)", r-obj.RBaseAMD64) +} + +func rlconv(bits int64) string { + reg0, reg1 := decodeRegisterRange(bits) + return fmt.Sprintf("[%s-%s]", rconv(reg0), rconv(reg1)) +} + +func opSuffixString(s uint8) string { + return "." + opSuffix(s).String() +} diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go new file mode 100644 index 0000000..e6ea898 --- /dev/null +++ b/src/cmd/internal/obj/x86/obj6.go @@ -0,0 +1,1546 @@ +// Inferno utils/6l/pass.c +// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package x86 + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/src" + "cmd/internal/sys" + "internal/abi" + "log" + "math" + "path" + "strings" +) + +func CanUse1InsnTLS(ctxt *obj.Link) bool { + if isAndroid { + // Android uses a global variable for the tls offset. + return false + } + + if ctxt.Arch.Family == sys.I386 { + switch ctxt.Headtype { + case objabi.Hlinux, + objabi.Hplan9, + objabi.Hwindows: + return false + } + + return true + } + + switch ctxt.Headtype { + case objabi.Hplan9, objabi.Hwindows: + return false + case objabi.Hlinux, objabi.Hfreebsd: + return !ctxt.Flag_shared + } + + return true +} + +func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + // Thread-local storage references use the TLS pseudo-register. + // As a register, TLS refers to the thread-local storage base, and it + // can only be loaded into another register: + // + // MOVQ TLS, AX + // + // An offset from the thread-local storage base is written off(reg)(TLS*1). + // Semantically it is off(reg), but the (TLS*1) annotation marks this as + // indexing from the loaded TLS base. This emits a relocation so that + // if the linker needs to adjust the offset, it can. For example: + // + // MOVQ TLS, AX + // MOVQ 0(AX)(TLS*1), CX // load g into CX + // + // On systems that support direct access to the TLS memory, this + // pair of instructions can be reduced to a direct TLS memory reference: + // + // MOVQ 0(TLS), CX // load g into CX + // + // The 2-instruction and 1-instruction forms correspond to the two code + // sequences for loading a TLS variable in the local exec model given in "ELF + // Handling For Thread-Local Storage". + // + // We apply this rewrite on systems that support the 1-instruction form. + // The decision is made using only the operating system and the -shared flag, + // not the link mode. If some link modes on a particular operating system + // require the 2-instruction form, then all builds for that operating system + // will use the 2-instruction form, so that the link mode decision can be + // delayed to link time. + // + // In this way, all supported systems use identical instructions to + // access TLS, and they are rewritten appropriately first here in + // liblink and then finally using relocations in the linker. + // + // When -shared is passed, we leave the code in the 2-instruction form but + // assemble (and relocate) them in different ways to generate the initial + // exec code sequence. It's a bit of a fluke that this is possible without + // rewriting the instructions more comprehensively, and it only does because + // we only support a single TLS variable (g). + + if CanUse1InsnTLS(ctxt) { + // Reduce 2-instruction sequence to 1-instruction sequence. + // Sequences like + // MOVQ TLS, BX + // ... off(BX)(TLS*1) ... + // become + // NOP + // ... off(TLS) ... + // + // TODO(rsc): Remove the Hsolaris special case. It exists only to + // guarantee we are producing byte-identical binaries as before this code. + // But it should be unnecessary. + if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris { + obj.Nopout(p) + } + if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 { + p.From.Reg = REG_TLS + p.From.Scale = 0 + p.From.Index = REG_NONE + } + + if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { + p.To.Reg = REG_TLS + p.To.Scale = 0 + p.To.Index = REG_NONE + } + } else { + // load_g, below, always inserts the 1-instruction sequence. Rewrite it + // as the 2-instruction sequence if necessary. + // MOVQ 0(TLS), BX + // becomes + // MOVQ TLS, BX + // MOVQ 0(BX)(TLS*1), BX + if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { + q := obj.Appendp(p, newprog) + q.As = p.As + q.From = p.From + q.From.Type = obj.TYPE_MEM + q.From.Reg = p.To.Reg + q.From.Index = REG_TLS + q.From.Scale = 2 // TODO: use 1 + q.To = p.To + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_TLS + p.From.Index = REG_NONE + p.From.Offset = 0 + } + } + + // Android and Windows use a tls offset determined at runtime. Rewrite + // MOVQ TLS, BX + // to + // MOVQ runtime.tls_g(SB), BX + if (isAndroid || ctxt.Headtype == objabi.Hwindows) && + (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_EXTERN + p.From.Reg = REG_NONE + p.From.Sym = ctxt.Lookup("runtime.tls_g") + p.From.Index = REG_NONE + if ctxt.Headtype == objabi.Hwindows { + // Windows requires an additional indirection + // to retrieve the TLS pointer, + // as runtime.tls_g contains the TLS offset from GS or FS. + // on AMD64 add + // MOVQ 0(BX)(GS*1), BX + // on 386 add + // MOVQ 0(BX)(FS*1), BX4 + q := obj.Appendp(p, newprog) + q.As = p.As + q.From = obj.Addr{} + q.From.Type = obj.TYPE_MEM + q.From.Reg = p.To.Reg + if ctxt.Arch.Family == sys.AMD64 { + q.From.Index = REG_GS + } else { + q.From.Index = REG_FS + } + q.From.Scale = 1 + q.From.Offset = 0 + q.To = p.To + } + } + + // TODO: Remove. + if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 { + if p.From.Scale == 1 && p.From.Index == REG_TLS { + p.From.Scale = 2 + } + if p.To.Scale == 1 && p.To.Index == REG_TLS { + p.To.Scale = 2 + } + } + + // Rewrite 0 to $0 in 3rd argument to CMPPS etc. + // That's what the tables expect. + switch p.As { + case ACMPPD, ACMPPS, ACMPSD, ACMPSS: + if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil { + p.To.Type = obj.TYPE_CONST + } + } + + // Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH. + switch p.As { + case obj.ACALL, obj.AJMP, obj.ARET: + if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil { + p.To.Type = obj.TYPE_BRANCH + } + } + + // Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ. + if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) { + switch p.As { + case AMOVL: + p.As = ALEAL + p.From.Type = obj.TYPE_MEM + case AMOVQ: + p.As = ALEAQ + p.From.Type = obj.TYPE_MEM + } + } + + // Rewrite float constants to values stored in memory. + switch p.As { + // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx + case AMOVSS: + if p.From.Type == obj.TYPE_FCONST { + // f == 0 can't be used here due to -0, so use Float64bits + if f := p.From.Val.(float64); math.Float64bits(f) == 0 { + if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { + p.As = AXORPS + p.From = p.To + break + } + } + } + fallthrough + + case AFMOVF, + AFADDF, + AFSUBF, + AFSUBRF, + AFMULF, + AFDIVF, + AFDIVRF, + AFCOMF, + AFCOMFP, + AADDSS, + ASUBSS, + AMULSS, + ADIVSS, + ACOMISS, + AUCOMISS: + if p.From.Type == obj.TYPE_FCONST { + f32 := float32(p.From.Val.(float64)) + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_EXTERN + p.From.Sym = ctxt.Float32Sym(f32) + p.From.Offset = 0 + } + + case AMOVSD: + // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx + if p.From.Type == obj.TYPE_FCONST { + // f == 0 can't be used here due to -0, so use Float64bits + if f := p.From.Val.(float64); math.Float64bits(f) == 0 { + if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { + p.As = AXORPS + p.From = p.To + break + } + } + } + fallthrough + + case AFMOVD, + AFADDD, + AFSUBD, + AFSUBRD, + AFMULD, + AFDIVD, + AFDIVRD, + AFCOMD, + AFCOMDP, + AADDSD, + ASUBSD, + AMULSD, + ADIVSD, + ACOMISD, + AUCOMISD: + if p.From.Type == obj.TYPE_FCONST { + f64 := p.From.Val.(float64) + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_EXTERN + p.From.Sym = ctxt.Float64Sym(f64) + p.From.Offset = 0 + } + } + + if ctxt.Flag_dynlink { + rewriteToUseGot(ctxt, p, newprog) + } + + if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 { + rewriteToPcrel(ctxt, p, newprog) + } +} + +// Rewrite p, if necessary, to access global data via the global offset table. +func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + var lea, mov obj.As + var reg int16 + if ctxt.Arch.Family == sys.AMD64 { + lea = ALEAQ + mov = AMOVQ + reg = REG_R15 + } else { + lea = ALEAL + mov = AMOVL + reg = REG_CX + if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index { + // Special case: clobber the destination register with + // the PC so we don't have to clobber CX. + // The SSA backend depends on CX not being clobbered across LEAL. + // See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared). + reg = p.To.Reg + } + } + + if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { + // ADUFFxxx $offset + // becomes + // $MOV runtime.duffxxx@GOT, $reg + // $LEA $offset($reg), $reg + // CALL $reg + // (we use LEAx rather than ADDx because ADDx clobbers + // flags and duffzero on 386 does not otherwise do so). + var sym *obj.LSym + if p.As == obj.ADUFFZERO { + sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal) + } else { + sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal) + } + offset := p.To.Offset + p.As = mov + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_GOTREF + p.From.Sym = sym + p.To.Type = obj.TYPE_REG + p.To.Reg = reg + p.To.Offset = 0 + p.To.Sym = nil + p1 := obj.Appendp(p, newprog) + p1.As = lea + p1.From.Type = obj.TYPE_MEM + p1.From.Offset = offset + p1.From.Reg = reg + p1.To.Type = obj.TYPE_REG + p1.To.Reg = reg + p2 := obj.Appendp(p1, newprog) + p2.As = obj.ACALL + p2.To.Type = obj.TYPE_REG + p2.To.Reg = reg + } + + // We only care about global data: NAME_EXTERN means a global + // symbol in the Go sense, and p.Sym.Local is true for a few + // internally defined symbols. + if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + // $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below + p.As = mov + p.From.Type = obj.TYPE_ADDR + } + if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + // $MOV $sym, Rx becomes $MOV sym@GOT, Rx + // $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx + // On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX + cmplxdest := false + pAs := p.As + var dest obj.Addr + if p.To.Type != obj.TYPE_REG || pAs != mov { + if ctxt.Arch.Family == sys.AMD64 { + ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p) + } + cmplxdest = true + dest = p.To + p.As = mov + p.To.Type = obj.TYPE_REG + p.To.Reg = reg + p.To.Sym = nil + p.To.Name = obj.NAME_NONE + } + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_GOTREF + q := p + if p.From.Offset != 0 { + q = obj.Appendp(p, newprog) + q.As = lea + q.From.Type = obj.TYPE_MEM + q.From.Reg = p.To.Reg + q.From.Offset = p.From.Offset + q.To = p.To + p.From.Offset = 0 + } + if cmplxdest { + q = obj.Appendp(q, newprog) + q.As = pAs + q.To = dest + q.From.Type = obj.TYPE_REG + q.From.Reg = reg + } + } + if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN { + ctxt.Diag("don't know how to handle %v with -dynlink", p) + } + var source *obj.Addr + // MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry + // MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15) + // An addition may be inserted between the two MOVs if there is an offset. + if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { + if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) + } + source = &p.From + } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { + source = &p.To + } else { + return + } + if p.As == obj.ACALL { + // When dynlinking on 386, almost any call might end up being a call + // to a PLT, so make sure the GOT pointer is loaded into BX. + // RegTo2 is set on the replacement call insn to stop it being + // processed when it is in turn passed to progedit. + // + // We disable open-coded defers in buildssa() on 386 ONLY with shared + // libraries because of this extra code added before deferreturn calls. + if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 { + return + } + p1 := obj.Appendp(p, newprog) + p2 := obj.Appendp(p1, newprog) + + p1.As = ALEAL + p1.From.Type = obj.TYPE_MEM + p1.From.Name = obj.NAME_STATIC + p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_") + p1.To.Type = obj.TYPE_REG + p1.To.Reg = REG_BX + + p2.As = p.As + p2.Scond = p.Scond + p2.From = p.From + if p.RestArgs != nil { + p2.RestArgs = append(p2.RestArgs, p.RestArgs...) + } + p2.Reg = p.Reg + p2.To = p.To + // p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr + // in ../pass.go complain, so set it back to TYPE_MEM here, until p2 + // itself gets passed to progedit. + p2.To.Type = obj.TYPE_MEM + p2.RegTo2 = 1 + + obj.Nopout(p) + return + + } + if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP { + return + } + if source.Type != obj.TYPE_MEM { + ctxt.Diag("don't know how to handle %v with -dynlink", p) + } + p1 := obj.Appendp(p, newprog) + p2 := obj.Appendp(p1, newprog) + + p1.As = mov + p1.From.Type = obj.TYPE_MEM + p1.From.Sym = source.Sym + p1.From.Name = obj.NAME_GOTREF + p1.To.Type = obj.TYPE_REG + p1.To.Reg = reg + + p2.As = p.As + p2.From = p.From + p2.To = p.To + if from3 := p.GetFrom3(); from3 != nil { + p2.AddRestSource(*from3) + } + if p.From.Name == obj.NAME_EXTERN { + p2.From.Reg = reg + p2.From.Name = obj.NAME_NONE + p2.From.Sym = nil + } else if p.To.Name == obj.NAME_EXTERN { + p2.To.Reg = reg + p2.To.Name = obj.NAME_NONE + p2.To.Sym = nil + } else { + return + } + obj.Nopout(p) +} + +func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + // RegTo2 is set on the instructions we insert here so they don't get + // processed twice. + if p.RegTo2 != 0 { + return + } + if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { + return + } + // Any Prog (aside from the above special cases) with an Addr with Name == + // NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX + // inserted before it. + isName := func(a *obj.Addr) bool { + if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 { + return false + } + if a.Sym.Type == objabi.STLSBSS { + return false + } + return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF + } + + if isName(&p.From) && p.From.Type == obj.TYPE_ADDR { + // Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting + // to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX" + // respectively. + if p.To.Type != obj.TYPE_REG { + q := obj.Appendp(p, newprog) + q.As = p.As + q.From.Type = obj.TYPE_REG + q.From.Reg = REG_CX + q.To = p.To + p.As = AMOVL + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_CX + p.To.Sym = nil + p.To.Name = obj.NAME_NONE + } + } + + if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) { + return + } + var dst int16 = REG_CX + if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index { + dst = p.To.Reg + // Why? See the comment near the top of rewriteToUseGot above. + // AMOVLs might be introduced by the GOT rewrites. + } + q := obj.Appendp(p, newprog) + q.RegTo2 = 1 + r := obj.Appendp(q, newprog) + r.RegTo2 = 1 + q.As = obj.ACALL + thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))) + q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) }) + q.To.Type = obj.TYPE_MEM + q.To.Name = obj.NAME_EXTERN + r.As = p.As + r.Scond = p.Scond + r.From = p.From + r.RestArgs = p.RestArgs + r.Reg = p.Reg + r.To = p.To + if isName(&p.From) { + r.From.Reg = dst + } + if isName(&p.To) { + r.To.Reg = dst + } + if p.GetFrom3() != nil && isName(p.GetFrom3()) { + r.GetFrom3().Reg = dst + } + obj.Nopout(p) +} + +// Prog.mark +const ( + markBit = 1 << 0 // used in errorCheck to avoid duplicate work +) + +func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { + return + } + + p := cursym.Func().Text + autoffset := int32(p.To.Offset) + if autoffset < 0 { + autoffset = 0 + } + + hasCall := false + for q := p; q != nil; q = q.Link { + if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO { + hasCall = true + break + } + } + + var bpsize int + if ctxt.Arch.Family == sys.AMD64 && + !p.From.Sym.NoFrame() && // (1) below + !(autoffset == 0 && !hasCall) { // (2) below + // Make room to save a base pointer. + // There are 2 cases we must avoid: + // 1) If noframe is set (which we do for functions which tail call). + // For performance, we also want to avoid: + // 2) Frameless leaf functions + bpsize = ctxt.Arch.PtrSize + autoffset += int32(bpsize) + p.To.Offset += int64(bpsize) + } else { + bpsize = 0 + p.From.Sym.Set(obj.AttrNoFrame, true) + } + + textarg := int64(p.To.Val.(int32)) + cursym.Func().Args = int32(textarg) + cursym.Func().Locals = int32(p.To.Offset) + + // TODO(rsc): Remove. + if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 { + cursym.Func().Locals = 0 + } + + // TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'. + if ctxt.Arch.Family == sys.AMD64 && autoffset < abi.StackSmall && !p.From.Sym.NoSplit() { + leaf := true + LeafSearch: + for q := p; q != nil; q = q.Link { + switch q.As { + case obj.ACALL: + // Treat common runtime calls that take no arguments + // the same as duffcopy and duffzero. + if !isZeroArgRuntimeCall(q.To.Sym) { + leaf = false + break LeafSearch + } + fallthrough + case obj.ADUFFCOPY, obj.ADUFFZERO: + if autoffset >= abi.StackSmall-8 { + leaf = false + break LeafSearch + } + } + } + + if leaf { + p.From.Sym.Set(obj.AttrNoSplit, true) + } + } + + var regEntryTmp0, regEntryTmp1 int16 + if ctxt.Arch.Family == sys.AMD64 { + regEntryTmp0, regEntryTmp1 = REGENTRYTMP0, REGENTRYTMP1 + } else { + regEntryTmp0, regEntryTmp1 = REG_BX, REG_DI + } + + var regg int16 + if !p.From.Sym.NoSplit() { + // Emit split check and load G register + p, regg = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg)) + } else if p.From.Sym.Wrapper() { + // Load G register for the wrapper code + p, regg = loadG(ctxt, cursym, p, newprog) + } + + if bpsize > 0 { + // Save caller's BP + p = obj.Appendp(p, newprog) + + p.As = APUSHQ + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_BP + + // Move current frame to BP + p = obj.Appendp(p, newprog) + + p.As = AMOVQ + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_SP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_BP + } + + if autoffset%int32(ctxt.Arch.RegSize) != 0 { + ctxt.Diag("unaligned stack size %d", autoffset) + } + + // localoffset is autoffset discounting the frame pointer, + // which has already been allocated in the stack. + localoffset := autoffset - int32(bpsize) + if localoffset != 0 { + p = obj.Appendp(p, newprog) + p.As = AADJSP + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(localoffset) + p.Spadj = localoffset + } + + // Delve debugger would like the next instruction to be noted as the end of the function prologue. + // TODO: are there other cases (e.g., wrapper functions) that need marking? + if autoffset != 0 { + p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd) + } + + if cursym.Func().Text.From.Sym.Wrapper() { + // if g._panic != nil && g._panic.argp == FP { + // g._panic.argp = bottom-of-frame + // } + // + // MOVQ g_panic(g), regEntryTmp0 + // TESTQ regEntryTmp0, regEntryTmp0 + // JNE checkargp + // end: + // NOP + // ... rest of function ... + // checkargp: + // LEAQ (autoffset+8)(SP), regEntryTmp1 + // CMPQ panic_argp(regEntryTmp0), regEntryTmp1 + // JNE end + // MOVQ SP, panic_argp(regEntryTmp0) + // JMP end + // + // The NOP is needed to give the jumps somewhere to land. + // It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes. + // + // The layout is chosen to help static branch prediction: + // Both conditional jumps are unlikely, so they are arranged to be forward jumps. + + // MOVQ g_panic(g), regEntryTmp0 + p = obj.Appendp(p, newprog) + p.As = AMOVQ + p.From.Type = obj.TYPE_MEM + p.From.Reg = regg + p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic + p.To.Type = obj.TYPE_REG + p.To.Reg = regEntryTmp0 + if ctxt.Arch.Family == sys.I386 { + p.As = AMOVL + } + + // TESTQ regEntryTmp0, regEntryTmp0 + p = obj.Appendp(p, newprog) + p.As = ATESTQ + p.From.Type = obj.TYPE_REG + p.From.Reg = regEntryTmp0 + p.To.Type = obj.TYPE_REG + p.To.Reg = regEntryTmp0 + if ctxt.Arch.Family == sys.I386 { + p.As = ATESTL + } + + // JNE checkargp (checkargp to be resolved later) + jne := obj.Appendp(p, newprog) + jne.As = AJNE + jne.To.Type = obj.TYPE_BRANCH + + // end: + // NOP + end := obj.Appendp(jne, newprog) + end.As = obj.ANOP + + // Fast forward to end of function. + var last *obj.Prog + for last = end; last.Link != nil; last = last.Link { + } + + // LEAQ (autoffset+8)(SP), regEntryTmp1 + p = obj.Appendp(last, newprog) + p.As = ALEAQ + p.From.Type = obj.TYPE_MEM + p.From.Reg = REG_SP + p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize) + p.To.Type = obj.TYPE_REG + p.To.Reg = regEntryTmp1 + if ctxt.Arch.Family == sys.I386 { + p.As = ALEAL + } + + // Set jne branch target. + jne.To.SetTarget(p) + + // CMPQ panic_argp(regEntryTmp0), regEntryTmp1 + p = obj.Appendp(p, newprog) + p.As = ACMPQ + p.From.Type = obj.TYPE_MEM + p.From.Reg = regEntryTmp0 + p.From.Offset = 0 // Panic.argp + p.To.Type = obj.TYPE_REG + p.To.Reg = regEntryTmp1 + if ctxt.Arch.Family == sys.I386 { + p.As = ACMPL + } + + // JNE end + p = obj.Appendp(p, newprog) + p.As = AJNE + p.To.Type = obj.TYPE_BRANCH + p.To.SetTarget(end) + + // MOVQ SP, panic_argp(regEntryTmp0) + p = obj.Appendp(p, newprog) + p.As = AMOVQ + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_SP + p.To.Type = obj.TYPE_MEM + p.To.Reg = regEntryTmp0 + p.To.Offset = 0 // Panic.argp + if ctxt.Arch.Family == sys.I386 { + p.As = AMOVL + } + + // JMP end + p = obj.Appendp(p, newprog) + p.As = obj.AJMP + p.To.Type = obj.TYPE_BRANCH + p.To.SetTarget(end) + + // Reset p for following code. + p = end + } + + var deltasp int32 + for p = cursym.Func().Text; p != nil; p = p.Link { + pcsize := ctxt.Arch.RegSize + switch p.From.Name { + case obj.NAME_AUTO: + p.From.Offset += int64(deltasp) - int64(bpsize) + case obj.NAME_PARAM: + p.From.Offset += int64(deltasp) + int64(pcsize) + } + if p.GetFrom3() != nil { + switch p.GetFrom3().Name { + case obj.NAME_AUTO: + p.GetFrom3().Offset += int64(deltasp) - int64(bpsize) + case obj.NAME_PARAM: + p.GetFrom3().Offset += int64(deltasp) + int64(pcsize) + } + } + switch p.To.Name { + case obj.NAME_AUTO: + p.To.Offset += int64(deltasp) - int64(bpsize) + case obj.NAME_PARAM: + p.To.Offset += int64(deltasp) + int64(pcsize) + } + + switch p.As { + default: + if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.As != ACMPL && p.As != ACMPQ { + f := cursym.Func() + if f.FuncFlag&abi.FuncFlagSPWrite == 0 { + f.FuncFlag |= abi.FuncFlagSPWrite + if ctxt.Debugvlog || !ctxt.IsAsm { + ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p) + if !ctxt.IsAsm { + ctxt.Diag("invalid auto-SPWRITE in non-assembly") + ctxt.DiagFlush() + log.Fatalf("bad SPWRITE") + } + } + } + } + continue + + case APUSHL, APUSHFL: + deltasp += 4 + p.Spadj = 4 + continue + + case APUSHQ, APUSHFQ: + deltasp += 8 + p.Spadj = 8 + continue + + case APUSHW, APUSHFW: + deltasp += 2 + p.Spadj = 2 + continue + + case APOPL, APOPFL: + deltasp -= 4 + p.Spadj = -4 + continue + + case APOPQ, APOPFQ: + deltasp -= 8 + p.Spadj = -8 + continue + + case APOPW, APOPFW: + deltasp -= 2 + p.Spadj = -2 + continue + + case AADJSP: + p.Spadj = int32(p.From.Offset) + deltasp += int32(p.From.Offset) + continue + + case obj.ARET: + // do nothing + } + + if autoffset != deltasp { + ctxt.Diag("%s: unbalanced PUSH/POP", cursym) + } + + if autoffset != 0 { + to := p.To // Keep To attached to RET for retjmp below + p.To = obj.Addr{} + if localoffset != 0 { + p.As = AADJSP + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(-localoffset) + p.Spadj = -localoffset + p = obj.Appendp(p, newprog) + } + + if bpsize > 0 { + // Restore caller's BP + p.As = APOPQ + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_BP + p.Spadj = -int32(bpsize) + p = obj.Appendp(p, newprog) + } + + p.As = obj.ARET + p.To = to + + // If there are instructions following + // this ARET, they come from a branch + // with the same stackframe, so undo + // the cleanup. + p.Spadj = +autoffset + } + + if p.To.Sym != nil { // retjmp + p.As = obj.AJMP + } + } +} + +func isZeroArgRuntimeCall(s *obj.LSym) bool { + if s == nil { + return false + } + switch s.Name { + case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift": + return true + } + if strings.HasPrefix(s.Name, "runtime.panicIndex") || strings.HasPrefix(s.Name, "runtime.panicSlice") { + // These functions do take arguments (in registers), + // but use no stack before they do a stack check. We + // should include them. See issue 31219. + return true + } + return false +} + +func indir_cx(ctxt *obj.Link, a *obj.Addr) { + a.Type = obj.TYPE_MEM + a.Reg = REG_CX +} + +// loadG ensures the G is loaded into a register (either CX or REGG), +// appending instructions to p if necessary. It returns the new last +// instruction and the G register. +func loadG(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc) (*obj.Prog, int16) { + if ctxt.Arch.Family == sys.AMD64 && cursym.ABI() == obj.ABIInternal { + // Use the G register directly in ABIInternal + return p, REGG + } + + var regg int16 = REG_CX + if ctxt.Arch.Family == sys.AMD64 { + regg = REGG // == REG_R14 + } + + p = obj.Appendp(p, newprog) + p.As = AMOVQ + if ctxt.Arch.PtrSize == 4 { + p.As = AMOVL + } + p.From.Type = obj.TYPE_MEM + p.From.Reg = REG_TLS + p.From.Offset = 0 + p.To.Type = obj.TYPE_REG + p.To.Reg = regg + + // Rewrite TLS instruction if necessary. + next := p.Link + progedit(ctxt, p, newprog) + for p.Link != next { + p = p.Link + progedit(ctxt, p, newprog) + } + + if p.From.Index == REG_TLS { + p.From.Scale = 2 + } + + return p, regg +} + +// Append code to p to check for stack split. +// Appends to (does not overwrite) p. +// Assumes g is in rg. +// Returns last new instruction and G register. +func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) (*obj.Prog, int16) { + cmp := ACMPQ + lea := ALEAQ + mov := AMOVQ + sub := ASUBQ + push, pop := APUSHQ, APOPQ + + if ctxt.Arch.Family == sys.I386 { + cmp = ACMPL + lea = ALEAL + mov = AMOVL + sub = ASUBL + push, pop = APUSHL, APOPL + } + + tmp := int16(REG_AX) // use AX for 32-bit + if ctxt.Arch.Family == sys.AMD64 { + // Avoid register parameters. + tmp = int16(REGENTRYTMP0) + } + + if ctxt.Flag_maymorestack != "" { + p = cursym.Func().SpillRegisterArgs(p, newprog) + + if cursym.Func().Text.From.Sym.NeedCtxt() { + p = obj.Appendp(p, newprog) + p.As = push + p.From.Type = obj.TYPE_REG + p.From.Reg = REGCTXT + } + + // We call maymorestack with an ABI matching the + // caller's ABI. Since this is the first thing that + // happens in the function, we have to be consistent + // with the caller about CPU state (notably, + // fixed-meaning registers). + + p = obj.Appendp(p, newprog) + p.As = obj.ACALL + p.To.Type = obj.TYPE_BRANCH + p.To.Name = obj.NAME_EXTERN + p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI()) + + if cursym.Func().Text.From.Sym.NeedCtxt() { + p = obj.Appendp(p, newprog) + p.As = pop + p.To.Type = obj.TYPE_REG + p.To.Reg = REGCTXT + } + + p = cursym.Func().UnspillRegisterArgs(p, newprog) + } + + // Jump back to here after morestack returns. + startPred := p + + // Load G register + var rg int16 + p, rg = loadG(ctxt, cursym, p, newprog) + + var q1 *obj.Prog + if framesize <= abi.StackSmall { + // small stack: SP <= stackguard + // CMPQ SP, stackguard + p = obj.Appendp(p, newprog) + + p.As = cmp + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_SP + p.To.Type = obj.TYPE_MEM + p.To.Reg = rg + p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 + if cursym.CFunc() { + p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 + } + + // Mark the stack bound check and morestack call async nonpreemptible. + // If we get preempted here, when resumed the preemption request is + // cleared, but we'll still call morestack, which will double the stack + // unnecessarily. See issue #35470. + p = ctxt.StartUnsafePoint(p, newprog) + } else if framesize <= abi.StackBig { + // large stack: SP-framesize <= stackguard-StackSmall + // LEAQ -xxx(SP), tmp + // CMPQ tmp, stackguard + p = obj.Appendp(p, newprog) + + p.As = lea + p.From.Type = obj.TYPE_MEM + p.From.Reg = REG_SP + p.From.Offset = -(int64(framesize) - abi.StackSmall) + p.To.Type = obj.TYPE_REG + p.To.Reg = tmp + + p = obj.Appendp(p, newprog) + p.As = cmp + p.From.Type = obj.TYPE_REG + p.From.Reg = tmp + p.To.Type = obj.TYPE_MEM + p.To.Reg = rg + p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 + if cursym.CFunc() { + p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 + } + + p = ctxt.StartUnsafePoint(p, newprog) // see the comment above + } else { + // Such a large stack we need to protect against underflow. + // The runtime guarantees SP > objabi.StackBig, but + // framesize is large enough that SP-framesize may + // underflow, causing a direct comparison with the + // stack guard to incorrectly succeed. We explicitly + // guard against underflow. + // + // MOVQ SP, tmp + // SUBQ $(framesize - StackSmall), tmp + // // If subtraction wrapped (carry set), morestack. + // JCS label-of-call-to-morestack + // CMPQ tmp, stackguard + + p = obj.Appendp(p, newprog) + + p.As = mov + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_SP + p.To.Type = obj.TYPE_REG + p.To.Reg = tmp + + p = ctxt.StartUnsafePoint(p, newprog) // see the comment above + + p = obj.Appendp(p, newprog) + p.As = sub + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(framesize) - abi.StackSmall + p.To.Type = obj.TYPE_REG + p.To.Reg = tmp + + p = obj.Appendp(p, newprog) + p.As = AJCS + p.To.Type = obj.TYPE_BRANCH + q1 = p + + p = obj.Appendp(p, newprog) + p.As = cmp + p.From.Type = obj.TYPE_REG + p.From.Reg = tmp + p.To.Type = obj.TYPE_MEM + p.To.Reg = rg + p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 + if cursym.CFunc() { + p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 + } + } + + // common + jls := obj.Appendp(p, newprog) + jls.As = AJLS + jls.To.Type = obj.TYPE_BRANCH + + end := ctxt.EndUnsafePoint(jls, newprog, -1) + + var last *obj.Prog + for last = cursym.Func().Text; last.Link != nil; last = last.Link { + } + + // Now we are at the end of the function, but logically + // we are still in function prologue. We need to fix the + // SP data and PCDATA. + spfix := obj.Appendp(last, newprog) + spfix.As = obj.ANOP + spfix.Spadj = -framesize + + pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog) + spill := ctxt.StartUnsafePoint(pcdata, newprog) + pcdata = cursym.Func().SpillRegisterArgs(spill, newprog) + + call := obj.Appendp(pcdata, newprog) + call.Pos = cursym.Func().Text.Pos + call.As = obj.ACALL + call.To.Type = obj.TYPE_BRANCH + call.To.Name = obj.NAME_EXTERN + morestack := "runtime.morestack" + switch { + case cursym.CFunc(): + morestack = "runtime.morestackc" + case !cursym.Func().Text.From.Sym.NeedCtxt(): + morestack = "runtime.morestack_noctxt" + } + call.To.Sym = ctxt.Lookup(morestack) + // When compiling 386 code for dynamic linking, the call needs to be adjusted + // to follow PIC rules. This in turn can insert more instructions, so we need + // to keep track of the start of the call (where the jump will be to) and the + // end (which following instructions are appended to). + callend := call + progedit(ctxt, callend, newprog) + for ; callend.Link != nil; callend = callend.Link { + progedit(ctxt, callend.Link, newprog) + } + + // The instructions which unspill regs should be preemptible. + pcdata = ctxt.EndUnsafePoint(callend, newprog, -1) + unspill := cursym.Func().UnspillRegisterArgs(pcdata, newprog) + + jmp := obj.Appendp(unspill, newprog) + jmp.As = obj.AJMP + jmp.To.Type = obj.TYPE_BRANCH + jmp.To.SetTarget(startPred.Link) + jmp.Spadj = +framesize + + jls.To.SetTarget(spill) + if q1 != nil { + q1.To.SetTarget(spill) + } + + return end, rg +} + +func isR15(r int16) bool { + return r == REG_R15 || r == REG_R15B +} +func addrMentionsR15(a *obj.Addr) bool { + if a == nil { + return false + } + return isR15(a.Reg) || isR15(a.Index) +} +func progMentionsR15(p *obj.Prog) bool { + return addrMentionsR15(&p.From) || addrMentionsR15(&p.To) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3()) +} + +func addrUsesGlobal(a *obj.Addr) bool { + if a == nil { + return false + } + return a.Name == obj.NAME_EXTERN && !a.Sym.Local() +} +func progUsesGlobal(p *obj.Prog) bool { + if p.As == obj.ACALL || p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP { + // These opcodes don't use a GOT to access their argument (see rewriteToUseGot), + // or R15 would be dead at them anyway. + return false + } + if p.As == ALEAQ { + // The GOT entry is placed directly in the destination register; R15 is not used. + return false + } + return addrUsesGlobal(&p.From) || addrUsesGlobal(&p.To) || addrUsesGlobal(p.GetFrom3()) +} + +type rwMask int + +const ( + readFrom rwMask = 1 << iota + readTo + readReg + readFrom3 + writeFrom + writeTo + writeReg + writeFrom3 +) + +// progRW returns a mask describing the effects of the instruction p. +// Note: this isn't exhaustively accurate. It is only currently used for detecting +// reads/writes to R15, so SSE register behavior isn't fully correct, and +// other weird cases (e.g. writes to DX by CLD) also aren't captured. +func progRW(p *obj.Prog) rwMask { + var m rwMask + // Default for most instructions + if p.From.Type != obj.TYPE_NONE { + m |= readFrom + } + if p.To.Type != obj.TYPE_NONE { + // Most x86 instructions update the To value + m |= readTo | writeTo + } + if p.Reg != 0 { + m |= readReg + } + if p.GetFrom3() != nil { + m |= readFrom3 + } + + // Lots of exceptions to the above defaults. + name := p.As.String() + if strings.HasPrefix(name, "MOV") || strings.HasPrefix(name, "PMOV") { + // MOV instructions don't read To. + m &^= readTo + } + switch p.As { + case APOPW, APOPL, APOPQ, + ALEAL, ALEAQ, + AIMUL3W, AIMUL3L, AIMUL3Q, + APEXTRB, APEXTRW, APEXTRD, APEXTRQ, AVPEXTRB, AVPEXTRW, AVPEXTRD, AVPEXTRQ, AEXTRACTPS, + ABSFW, ABSFL, ABSFQ, ABSRW, ABSRL, ABSRQ, APOPCNTW, APOPCNTL, APOPCNTQ, ALZCNTW, ALZCNTL, ALZCNTQ, + ASHLXL, ASHLXQ, ASHRXL, ASHRXQ, ASARXL, ASARXQ: + // These instructions are pure writes to To. They don't use its old value. + m &^= readTo + case AXORL, AXORQ: + // Register-clearing idiom doesn't read previous value. + if p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_REG && p.From.Reg == p.To.Reg { + m &^= readFrom | readTo + } + case AMULXL, AMULXQ: + // These are write-only to both To and From3. + m &^= readTo | readFrom3 + m |= writeFrom3 + } + return m +} + +// progReadsR15 reports whether p reads the register R15. +func progReadsR15(p *obj.Prog) bool { + m := progRW(p) + if m&readFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) { + return true + } + if m&readTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) { + return true + } + if m&readReg != 0 && isR15(p.Reg) { + return true + } + if m&readFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) { + return true + } + // reads of the index registers + if p.From.Type == obj.TYPE_MEM && (isR15(p.From.Reg) || isR15(p.From.Index)) { + return true + } + if p.To.Type == obj.TYPE_MEM && (isR15(p.To.Reg) || isR15(p.To.Index)) { + return true + } + if f3 := p.GetFrom3(); f3 != nil && f3.Type == obj.TYPE_MEM && (isR15(f3.Reg) || isR15(f3.Index)) { + return true + } + return false +} + +// progWritesR15 reports whether p writes the register R15. +func progWritesR15(p *obj.Prog) bool { + m := progRW(p) + if m&writeFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) { + return true + } + if m&writeTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) { + return true + } + if m&writeReg != 0 && isR15(p.Reg) { + return true + } + if m&writeFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) { + return true + } + return false +} + +func errorCheck(ctxt *obj.Link, s *obj.LSym) { + // When dynamic linking, R15 is used to access globals. Reject code that + // uses R15 after a global variable access. + if !ctxt.Flag_dynlink { + return + } + + // Flood fill all the instructions where R15's value is junk. + // If there are any uses of R15 in that set, report an error. + var work []*obj.Prog + var mentionsR15 bool + for p := s.Func().Text; p != nil; p = p.Link { + if progUsesGlobal(p) { + work = append(work, p) + p.Mark |= markBit + } + if progMentionsR15(p) { + mentionsR15 = true + } + } + if mentionsR15 { + for len(work) > 0 { + p := work[len(work)-1] + work = work[:len(work)-1] + if progReadsR15(p) { + pos := ctxt.PosTable.Pos(p.Pos) + ctxt.Diag("%s:%s: when dynamic linking, R15 is clobbered by a global variable access and is used here: %v", path.Base(pos.Filename()), pos.LineNumber(), p) + break // only report one error + } + if progWritesR15(p) { + // R15 is overwritten by this instruction. Its value is not junk any more. + continue + } + if q := p.To.Target(); q != nil && q.Mark&markBit == 0 { + q.Mark |= markBit + work = append(work, q) + } + if p.As == obj.AJMP || p.As == obj.ARET { + continue // no fallthrough + } + if q := p.Link; q != nil && q.Mark&markBit == 0 { + q.Mark |= markBit + work = append(work, q) + } + } + } + + // Clean up. + for p := s.Func().Text; p != nil; p = p.Link { + p.Mark &^= markBit + } +} + +var unaryDst = map[obj.As]bool{ + ABSWAPL: true, + ABSWAPQ: true, + ACLDEMOTE: true, + ACLFLUSH: true, + ACLFLUSHOPT: true, + ACLWB: true, + ACMPXCHG16B: true, + ACMPXCHG8B: true, + ADECB: true, + ADECL: true, + ADECQ: true, + ADECW: true, + AFBSTP: true, + AFFREE: true, + AFLDENV: true, + AFSAVE: true, + AFSTCW: true, + AFSTENV: true, + AFSTSW: true, + AFXSAVE64: true, + AFXSAVE: true, + AINCB: true, + AINCL: true, + AINCQ: true, + AINCW: true, + ANEGB: true, + ANEGL: true, + ANEGQ: true, + ANEGW: true, + ANOTB: true, + ANOTL: true, + ANOTQ: true, + ANOTW: true, + APOPL: true, + APOPQ: true, + APOPW: true, + ARDFSBASEL: true, + ARDFSBASEQ: true, + ARDGSBASEL: true, + ARDGSBASEQ: true, + ARDPID: true, + ARDRANDL: true, + ARDRANDQ: true, + ARDRANDW: true, + ARDSEEDL: true, + ARDSEEDQ: true, + ARDSEEDW: true, + ASETCC: true, + ASETCS: true, + ASETEQ: true, + ASETGE: true, + ASETGT: true, + ASETHI: true, + ASETLE: true, + ASETLS: true, + ASETLT: true, + ASETMI: true, + ASETNE: true, + ASETOC: true, + ASETOS: true, + ASETPC: true, + ASETPL: true, + ASETPS: true, + ASGDT: true, + ASIDT: true, + ASLDTL: true, + ASLDTQ: true, + ASLDTW: true, + ASMSWL: true, + ASMSWQ: true, + ASMSWW: true, + ASTMXCSR: true, + ASTRL: true, + ASTRQ: true, + ASTRW: true, + AXSAVE64: true, + AXSAVE: true, + AXSAVEC64: true, + AXSAVEC: true, + AXSAVEOPT64: true, + AXSAVEOPT: true, + AXSAVES64: true, + AXSAVES: true, +} + +var Linkamd64 = obj.LinkArch{ + Arch: sys.ArchAMD64, + Init: instinit, + ErrorCheck: errorCheck, + Preprocess: preprocess, + Assemble: span6, + Progedit: progedit, + SEH: populateSeh, + UnaryDst: unaryDst, + DWARFRegisters: AMD64DWARFRegisters, +} + +var Link386 = obj.LinkArch{ + Arch: sys.Arch386, + Init: instinit, + Preprocess: preprocess, + Assemble: span6, + Progedit: progedit, + UnaryDst: unaryDst, + DWARFRegisters: X86DWARFRegisters, +} diff --git a/src/cmd/internal/obj/x86/obj6_test.go b/src/cmd/internal/obj/x86/obj6_test.go new file mode 100644 index 0000000..d1246be --- /dev/null +++ b/src/cmd/internal/obj/x86/obj6_test.go @@ -0,0 +1,167 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package x86_test + +import ( + "bufio" + "bytes" + "fmt" + "internal/testenv" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "testing" +) + +const testdata = ` +MOVQ AX, AX -> MOVQ AX, AX + +LEAQ name(SB), AX -> MOVQ name@GOT(SB), AX +LEAQ name+10(SB), AX -> MOVQ name@GOT(SB), AX; LEAQ 10(AX), AX +MOVQ $name(SB), AX -> MOVQ name@GOT(SB), AX +MOVQ $name+10(SB), AX -> MOVQ name@GOT(SB), AX; LEAQ 10(AX), AX + +MOVQ name(SB), AX -> NOP; MOVQ name@GOT(SB), R15; MOVQ (R15), AX +MOVQ name+10(SB), AX -> NOP; MOVQ name@GOT(SB), R15; MOVQ 10(R15), AX + +CMPQ name(SB), $0 -> NOP; MOVQ name@GOT(SB), R15; CMPQ (R15), $0 + +MOVQ $1, name(SB) -> NOP; MOVQ name@GOT(SB), R15; MOVQ $1, (R15) +MOVQ $1, name+10(SB) -> NOP; MOVQ name@GOT(SB), R15; MOVQ $1, 10(R15) +` + +type ParsedTestData struct { + input string + marks []int + marker_to_input map[int][]string + marker_to_expected map[int][]string + marker_to_output map[int][]string +} + +const marker_start = 1234 + +func parseTestData(t *testing.T) *ParsedTestData { + r := &ParsedTestData{} + scanner := bufio.NewScanner(strings.NewReader(testdata)) + r.marker_to_input = make(map[int][]string) + r.marker_to_expected = make(map[int][]string) + marker := marker_start + input_insns := []string{} + for scanner.Scan() { + line := scanner.Text() + if len(strings.TrimSpace(line)) == 0 { + continue + } + parts := strings.Split(line, "->") + if len(parts) != 2 { + t.Fatalf("malformed line %v", line) + } + r.marks = append(r.marks, marker) + marker_insn := fmt.Sprintf("MOVQ $%d, AX", marker) + input_insns = append(input_insns, marker_insn) + for _, input_insn := range strings.Split(parts[0], ";") { + input_insns = append(input_insns, input_insn) + r.marker_to_input[marker] = append(r.marker_to_input[marker], normalize(input_insn)) + } + for _, expected_insn := range strings.Split(parts[1], ";") { + r.marker_to_expected[marker] = append(r.marker_to_expected[marker], normalize(expected_insn)) + } + marker++ + } + r.input = "TEXT ·foo(SB),$0\n" + strings.Join(input_insns, "\n") + "\n" + return r +} + +var spaces_re *regexp.Regexp = regexp.MustCompile(`\s+`) + +func normalize(s string) string { + return spaces_re.ReplaceAllLiteralString(strings.TrimSpace(s), " ") +} + +func asmOutput(t *testing.T, s string) []byte { + tmpdir, err := os.MkdirTemp("", "progedittest") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpdir) + tmpfile, err := os.Create(filepath.Join(tmpdir, "input.s")) + if err != nil { + t.Fatal(err) + } + defer tmpfile.Close() + _, err = tmpfile.WriteString(s) + if err != nil { + t.Fatal(err) + } + cmd := testenv.Command(t, + testenv.GoToolPath(t), "tool", "asm", "-S", "-dynlink", + "-o", filepath.Join(tmpdir, "output.6"), tmpfile.Name()) + + cmd.Env = append(os.Environ(), + "GOARCH=amd64", "GOOS=linux", "GOPATH="+filepath.Join(tmpdir, "_gopath")) + asmout, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("error %s output %s", err, asmout) + } + return asmout +} + +func parseOutput(t *testing.T, td *ParsedTestData, asmout []byte) { + scanner := bufio.NewScanner(bytes.NewReader(asmout)) + marker := regexp.MustCompile(`MOVQ \$([0-9]+), AX`) + mark := -1 + td.marker_to_output = make(map[int][]string) + for scanner.Scan() { + line := scanner.Text() + if line[0] != '\t' { + continue + } + parts := strings.SplitN(line, "\t", 3) + if len(parts) != 3 { + continue + } + n := normalize(parts[2]) + mark_matches := marker.FindStringSubmatch(n) + if mark_matches != nil { + mark, _ = strconv.Atoi(mark_matches[1]) + if _, ok := td.marker_to_input[mark]; !ok { + t.Fatalf("unexpected marker %d", mark) + } + } else if mark != -1 { + td.marker_to_output[mark] = append(td.marker_to_output[mark], n) + } + } +} + +func TestDynlink(t *testing.T) { + testenv.MustHaveGoBuild(t) + + if os.Getenv("GOHOSTARCH") != "" { + // TODO: make this work? It was failing due to the + // GOARCH= filtering above and skipping is easiest for + // now. + t.Skip("skipping when GOHOSTARCH is set") + } + + testdata := parseTestData(t) + asmout := asmOutput(t, testdata.input) + parseOutput(t, testdata, asmout) + for _, m := range testdata.marks { + i := strings.Join(testdata.marker_to_input[m], "; ") + o := strings.Join(testdata.marker_to_output[m], "; ") + e := strings.Join(testdata.marker_to_expected[m], "; ") + if o != e { + if o == i { + t.Errorf("%s was unchanged; should have become %s", i, e) + } else { + t.Errorf("%s became %s; should have become %s", i, o, e) + } + } else if i != e { + t.Logf("%s correctly became %s", i, o) + } + } +} diff --git a/src/cmd/internal/obj/x86/pcrelative_test.go b/src/cmd/internal/obj/x86/pcrelative_test.go new file mode 100644 index 0000000..3827100 --- /dev/null +++ b/src/cmd/internal/obj/x86/pcrelative_test.go @@ -0,0 +1,105 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package x86_test + +import ( + "bytes" + "fmt" + "internal/testenv" + "os" + "path/filepath" + "testing" +) + +const asmData = ` +GLOBL zeros<>(SB),8,$64 +TEXT ·testASM(SB),4,$0 +VMOVUPS zeros<>(SB), %s // PC relative relocation is off by 1, for Y8-Y15, Z8-15 and Z24-Z31 +RET +` + +const goData = ` +package main + +func testASM() + +func main() { + testASM() +} +` + +func objdumpOutput(t *testing.T, mname, source string) []byte { + tmpdir, err := os.MkdirTemp("", mname) + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpdir) + err = os.WriteFile(filepath.Join(tmpdir, "go.mod"), []byte(fmt.Sprintf("module %s\n", mname)), 0666) + if err != nil { + t.Fatal(err) + } + tmpfile, err := os.Create(filepath.Join(tmpdir, "input.s")) + if err != nil { + t.Fatal(err) + } + defer tmpfile.Close() + _, err = tmpfile.WriteString(source) + if err != nil { + t.Fatal(err) + } + tmpfile2, err := os.Create(filepath.Join(tmpdir, "input.go")) + if err != nil { + t.Fatal(err) + } + defer tmpfile2.Close() + _, err = tmpfile2.WriteString(goData) + if err != nil { + t.Fatal(err) + } + + cmd := testenv.Command(t, + testenv.GoToolPath(t), "build", "-o", + filepath.Join(tmpdir, "output")) + + cmd.Env = append(os.Environ(), + "GOARCH=amd64", "GOOS=linux", "GOPATH="+filepath.Join(tmpdir, "_gopath")) + cmd.Dir = tmpdir + + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("error %s output %s", err, out) + } + cmd2 := testenv.Command(t, + testenv.GoToolPath(t), "tool", "objdump", "-s", "testASM", + filepath.Join(tmpdir, "output")) + cmd2.Env = cmd.Env + cmd2.Dir = tmpdir + objout, err := cmd2.CombinedOutput() + if err != nil { + t.Fatalf("error %s output %s", err, objout) + } + + return objout +} + +func TestVexEvexPCrelative(t *testing.T) { + testenv.MustHaveGoBuild(t) +LOOP: + for _, reg := range []string{"Y0", "Y8", "Z0", "Z8", "Z16", "Z24"} { + asm := fmt.Sprintf(asmData, reg) + objout := objdumpOutput(t, "pcrelative", asm) + data := bytes.Split(objout, []byte("\n")) + for idx := len(data) - 1; idx >= 0; idx-- { + // check that RET wasn't overwritten. + if bytes.Index(data[idx], []byte("RET")) != -1 { + if testing.Short() { + break LOOP + } + continue LOOP + } + } + t.Errorf("VMOVUPS zeros<>(SB), %s overwrote RET", reg) + } +} diff --git a/src/cmd/internal/obj/x86/seh.go b/src/cmd/internal/obj/x86/seh.go new file mode 100644 index 0000000..71cdd36 --- /dev/null +++ b/src/cmd/internal/obj/x86/seh.go @@ -0,0 +1,165 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package x86 + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/src" + "encoding/base64" + "fmt" + "math" +) + +type sehbuf struct { + ctxt *obj.Link + data []byte + off int +} + +func newsehbuf(ctxt *obj.Link, nodes uint8) sehbuf { + // - 8 bytes for the header + // - 2 bytes for each node + // - 2 bytes in case nodes is not even + size := 8 + nodes*2 + if nodes%2 != 0 { + size += 2 + } + return sehbuf{ctxt, make([]byte, size), 0} +} + +func (b *sehbuf) write8(v uint8) { + b.data[b.off] = v + b.off++ +} + +func (b *sehbuf) write32(v uint32) { + b.ctxt.Arch.ByteOrder.PutUint32(b.data[b.off:], v) + b.off += 4 +} + +func (b *sehbuf) writecode(op, value uint8) { + b.write8(value<<4 | op) +} + +// populateSeh generates the SEH unwind information for s. +func populateSeh(ctxt *obj.Link, s *obj.LSym) (sehsym *obj.LSym) { + if s.NoFrame() { + return + } + + // This implementation expects the following function prologue layout: + // - Stack split code (optional) + // - PUSHQ BP + // - MOVQ SP, BP + // + // If the prologue layout change, the unwind information should be updated + // accordingly. + + // Search for the PUSHQ BP instruction inside the prologue. + var pushbp *obj.Prog + for p := s.Func().Text; p != nil; p = p.Link { + if p.As == APUSHQ && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_BP { + pushbp = p + break + } + if p.Pos.Xlogue() == src.PosPrologueEnd { + break + } + } + if pushbp == nil { + ctxt.Diag("missing frame pointer instruction: PUSHQ BP") + return + } + + // It must be followed by a MOVQ SP, BP. + movbp := pushbp.Link + if movbp == nil { + ctxt.Diag("missing frame pointer instruction: MOVQ SP, BP") + return + } + if !(movbp.As == AMOVQ && movbp.From.Type == obj.TYPE_REG && movbp.From.Reg == REG_SP && + movbp.To.Type == obj.TYPE_REG && movbp.To.Reg == REG_BP && movbp.From.Offset == 0) { + ctxt.Diag("unexpected frame pointer instruction\n%v", movbp) + return + } + if movbp.Link.Pc > math.MaxUint8 { + // SEH unwind information don't support prologues that are more than 255 bytes long. + // These are very rare, but still possible, e.g., when compiling functions with many + // parameters with -gcflags=-d=maymorestack=runtime.mayMoreStackPreempt. + // Return without reporting an error. + return + } + + // Reference: + // https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64#struct-unwind_info + + const ( + UWOP_PUSH_NONVOL = 0 + UWOP_SET_FPREG = 3 + SEH_REG_BP = 5 + UNW_FLAG_EHANDLER = 1 << 3 + ) + + var exceptionHandler *obj.LSym + var flags uint8 + if s.Name == "runtime.asmcgocall_landingpad" { + // Most cgo calls go through runtime.asmcgocall_landingpad, + // we can use it to catch exceptions from C code. + // TODO: use a more generic approach to identify which calls need an exception handler. + exceptionHandler = ctxt.Lookup("runtime.sehtramp") + if exceptionHandler == nil { + ctxt.Diag("missing runtime.sehtramp\n") + return + } + flags = UNW_FLAG_EHANDLER + } + + // Fow now we only support operations which are encoded + // using a single 2-byte node, so the number of nodes + // is the number of operations. + nodes := uint8(2) + buf := newsehbuf(ctxt, nodes) + buf.write8(flags | 1) // Flags + version + buf.write8(uint8(movbp.Link.Pc)) // Size of prolog + buf.write8(nodes) // Count of nodes + buf.write8(SEH_REG_BP) // FP register + + // Notes are written in reverse order of appearance. + buf.write8(uint8(movbp.Link.Pc)) + buf.writecode(UWOP_SET_FPREG, 0) + + buf.write8(uint8(pushbp.Link.Pc)) + buf.writecode(UWOP_PUSH_NONVOL, SEH_REG_BP) + + // The following 4 bytes reference the RVA of the exception handler. + // The value is set to 0 for now, if an exception handler is needed, + // it will be updated later with a R_PEIMAGEOFF relocation to the + // exception handler. + buf.write32(0) + + // The list of unwind infos in a PE binary have very low cardinality + // as each info only contains frame pointer operations, + // which are very similar across functions. + // Dedup them when possible. + hash := base64.StdEncoding.EncodeToString(buf.data) + symname := fmt.Sprintf("%d.%s", len(buf.data), hash) + return ctxt.LookupInit("go:sehuw."+symname, func(s *obj.LSym) { + s.WriteBytes(ctxt, 0, buf.data) + s.Type = objabi.SSEHUNWINDINFO + s.Set(obj.AttrDuplicateOK, true) + s.Set(obj.AttrLocal, true) + if exceptionHandler != nil { + r := obj.Addrel(s) + r.Off = int32(len(buf.data) - 4) + r.Siz = 4 + r.Sym = exceptionHandler + r.Type = objabi.R_PEIMAGEOFF + } + // Note: AttrContentAddressable cannot be set here, + // because the content-addressable-handling code + // does not know about aux symbols. + }) +} diff --git a/src/cmd/internal/obj/x86/ytab.go b/src/cmd/internal/obj/x86/ytab.go new file mode 100644 index 0000000..7d0b75b --- /dev/null +++ b/src/cmd/internal/obj/x86/ytab.go @@ -0,0 +1,44 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package x86 + +// argListMax specifies upper arg count limit expected to be carried by obj.Prog. +// Max len(obj.Prog.RestArgs) can be inferred from this to be 4. +const argListMax int = 6 + +type argList [argListMax]uint8 + +type ytab struct { + zcase uint8 + zoffset uint8 + + // Last arg is usually destination. + // For unary instructions unaryDst is used to determine + // if single argument is a source or destination. + args argList +} + +// Returns true if yt is compatible with args. +// +// Elements from args and yt.args are used +// to index ycover table like `ycover[args[i]+yt.args[i]]`. +// This means that args should contain values that already +// multiplied by Ymax. +func (yt *ytab) match(args []int) bool { + // Trailing Yxxx check is required to avoid a case + // where shorter arg list is matched. + // If we had exact yt.args length, it could be `yt.argc != len(args)`. + if len(args) < len(yt.args) && yt.args[len(args)] != Yxxx { + return false + } + + for i := range args { + if ycover[args[i]+int(yt.args[i])] == 0 { + return false + } + } + + return true +} |