summaryrefslogtreecommitdiffstats
path: root/src/cmd/asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/asm')
-rw-r--r--src/cmd/asm/doc.go56
-rw-r--r--src/cmd/asm/internal/arch/arch.go736
-rw-r--r--src/cmd/asm/internal/arch/arm.go257
-rw-r--r--src/cmd/asm/internal/arch/arm64.go362
-rw-r--r--src/cmd/asm/internal/arch/mips.go72
-rw-r--r--src/cmd/asm/internal/arch/ppc64.go102
-rw-r--r--src/cmd/asm/internal/arch/riscv64.go28
-rw-r--r--src/cmd/asm/internal/arch/s390x.go81
-rw-r--r--src/cmd/asm/internal/asm/asm.go915
-rw-r--r--src/cmd/asm/internal/asm/endtoend_test.go464
-rw-r--r--src/cmd/asm/internal/asm/expr_test.go121
-rw-r--r--src/cmd/asm/internal/asm/line_test.go55
-rw-r--r--src/cmd/asm/internal/asm/operand_test.go944
-rw-r--r--src/cmd/asm/internal/asm/parse.go1437
-rw-r--r--src/cmd/asm/internal/asm/pseudo_test.go103
-rw-r--r--src/cmd/asm/internal/asm/testdata/386.s98
-rw-r--r--src/cmd/asm/internal/asm/testdata/386enc.s37
-rw-r--r--src/cmd/asm/internal/asm/testdata/amd64.s152
-rw-r--r--src/cmd/asm/internal/asm/testdata/amd64dynlinkerror.s68
-rw-r--r--src/cmd/asm/internal/asm/testdata/amd64enc.s10700
-rw-r--r--src/cmd/asm/internal/asm/testdata/amd64enc_extra.s1059
-rw-r--r--src/cmd/asm/internal/asm/testdata/amd64error.s143
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm.s1595
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64.s1626
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64enc.s766
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64error.s435
-rw-r--r--src/cmd/asm/internal/asm/testdata/armerror.s264
-rw-r--r--src/cmd/asm/internal/asm/testdata/armv6.s110
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/aes_avx512f.s336
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/avx512_4fmaps.s66
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/avx512_4vnniw.s30
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/avx512_bitalg.s154
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/avx512_ifma.s194
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vbmi.s415
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vbmi2.s1386
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vnni.s400
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vpopcntdq.s82
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/avx512bw.s1939
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/avx512cd.s190
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/avx512dq.s2668
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/avx512er.s331
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/avx512f.s5639
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/avx512pf.s54
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/gfni_avx512f.s324
-rw-r--r--src/cmd/asm/internal/asm/testdata/avx512enc/vpclmulqdq_avx512f.s94
-rw-r--r--src/cmd/asm/internal/asm/testdata/buildtagerror.s8
-rw-r--r--src/cmd/asm/internal/asm/testdata/mips.s443
-rw-r--r--src/cmd/asm/internal/asm/testdata/mips64.s633
-rw-r--r--src/cmd/asm/internal/asm/testdata/ppc64.s767
-rw-r--r--src/cmd/asm/internal/asm/testdata/riscv64.s394
-rw-r--r--src/cmd/asm/internal/asm/testdata/riscv64error.s26
-rw-r--r--src/cmd/asm/internal/asm/testdata/s390x.s524
-rw-r--r--src/cmd/asm/internal/flags/flags.go91
-rw-r--r--src/cmd/asm/internal/lex/input.go502
-rw-r--r--src/cmd/asm/internal/lex/lex.go141
-rw-r--r--src/cmd/asm/internal/lex/lex_test.go365
-rw-r--r--src/cmd/asm/internal/lex/slice.go74
-rw-r--r--src/cmd/asm/internal/lex/stack.go61
-rw-r--r--src/cmd/asm/internal/lex/tokenizer.go153
-rw-r--r--src/cmd/asm/main.go117
60 files changed, 41387 insertions, 0 deletions
diff --git a/src/cmd/asm/doc.go b/src/cmd/asm/doc.go
new file mode 100644
index 0000000..4a0c785
--- /dev/null
+++ b/src/cmd/asm/doc.go
@@ -0,0 +1,56 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Asm, typically invoked as ``go tool asm'', assembles the source file into an object
+file named for the basename of the argument source file with a .o suffix. The
+object file can then be combined with other objects into a package archive.
+
+Command Line
+
+Usage:
+
+ go tool asm [flags] file
+
+The specified file must be a Go assembly file.
+The same assembler is used for all target operating systems and architectures.
+The GOOS and GOARCH environment variables set the desired target.
+
+Flags:
+
+ -D name[=value]
+ Predefine symbol name with an optional simple value.
+ Can be repeated to define multiple symbols.
+ -I dir1 -I dir2
+ Search for #include files in dir1, dir2, etc,
+ after consulting $GOROOT/pkg/$GOOS_$GOARCH.
+ -S
+ Print assembly and machine code.
+ -V
+ Print assembler version and exit.
+ -debug
+ Dump instructions as they are parsed.
+ -dynlink
+ Support references to Go symbols defined in other shared libraries.
+ -gensymabis
+ Write symbol ABI information to output file. Don't assemble.
+ -o file
+ Write output to file. The default is foo.o for /a/b/c/foo.s.
+ -shared
+ Generate code that can be linked into a shared library.
+ -spectre list
+ Enable spectre mitigations in list (all, ret).
+ -trimpath prefix
+ Remove prefix from recorded source file paths.
+
+Input language:
+
+The assembler uses mostly the same syntax for all architectures,
+the main variation having to do with addressing modes. Input is
+run through a simplified C preprocessor that implements #include,
+#define, #ifdef/endif, but not #if or ##.
+
+For more information, see https://golang.org/doc/asm.
+*/
+package main
diff --git a/src/cmd/asm/internal/arch/arch.go b/src/cmd/asm/internal/arch/arch.go
new file mode 100644
index 0000000..4d374cb
--- /dev/null
+++ b/src/cmd/asm/internal/arch/arch.go
@@ -0,0 +1,736 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package arch defines architecture-specific information and support functions.
+package arch
+
+import (
+ "cmd/internal/obj"
+ "cmd/internal/obj/arm"
+ "cmd/internal/obj/arm64"
+ "cmd/internal/obj/mips"
+ "cmd/internal/obj/ppc64"
+ "cmd/internal/obj/riscv"
+ "cmd/internal/obj/s390x"
+ "cmd/internal/obj/wasm"
+ "cmd/internal/obj/x86"
+ "fmt"
+ "strings"
+)
+
+// Pseudo-registers whose names are the constant name without the leading R.
+const (
+ RFP = -(iota + 1)
+ RSB
+ RSP
+ RPC
+)
+
+// Arch wraps the link architecture object with more architecture-specific information.
+type Arch struct {
+ *obj.LinkArch
+ // Map of instruction names to enumeration.
+ Instructions map[string]obj.As
+ // Map of register names to enumeration.
+ Register map[string]int16
+ // Table of register prefix names. These are things like R for R(0) and SPR for SPR(268).
+ RegisterPrefix map[string]bool
+ // RegisterNumber converts R(10) into arm.REG_R10.
+ RegisterNumber func(string, int16) (int16, bool)
+ // Instruction is a jump.
+ IsJump func(word string) bool
+}
+
+// nilRegisterNumber is the register number function for architectures
+// that do not accept the R(N) notation. It always returns failure.
+func nilRegisterNumber(name string, n int16) (int16, bool) {
+ return 0, false
+}
+
+// Set configures the architecture specified by GOARCH and returns its representation.
+// It returns nil if GOARCH is not recognized.
+func Set(GOARCH string, shared bool) *Arch {
+ switch GOARCH {
+ case "386":
+ return archX86(&x86.Link386)
+ case "amd64":
+ return archX86(&x86.Linkamd64)
+ case "arm":
+ return archArm()
+ case "arm64":
+ return archArm64()
+ case "mips":
+ return archMips(&mips.Linkmips)
+ case "mipsle":
+ return archMips(&mips.Linkmipsle)
+ case "mips64":
+ return archMips64(&mips.Linkmips64)
+ case "mips64le":
+ return archMips64(&mips.Linkmips64le)
+ case "ppc64":
+ return archPPC64(&ppc64.Linkppc64)
+ case "ppc64le":
+ return archPPC64(&ppc64.Linkppc64le)
+ case "riscv64":
+ return archRISCV64(shared)
+ case "s390x":
+ return archS390x()
+ case "wasm":
+ return archWasm()
+ }
+ return nil
+}
+
+func jumpX86(word string) bool {
+ return word[0] == 'J' || word == "CALL" || strings.HasPrefix(word, "LOOP") || word == "XBEGIN"
+}
+
+func jumpRISCV(word string) bool {
+ switch word {
+ case "BEQ", "BEQZ", "BGE", "BGEU", "BGEZ", "BGT", "BGTU", "BGTZ", "BLE", "BLEU", "BLEZ",
+ "BLT", "BLTU", "BLTZ", "BNE", "BNEZ", "CALL", "JAL", "JALR", "JMP":
+ return true
+ }
+ return false
+}
+
+func jumpWasm(word string) bool {
+ return word == "JMP" || word == "CALL" || word == "Call" || word == "Br" || word == "BrIf"
+}
+
+func archX86(linkArch *obj.LinkArch) *Arch {
+ register := make(map[string]int16)
+ // Create maps for easy lookup of instruction names etc.
+ for i, s := range x86.Register {
+ register[s] = int16(i + x86.REG_AL)
+ }
+ // Pseudo-registers.
+ register["SB"] = RSB
+ register["FP"] = RFP
+ register["PC"] = RPC
+ if linkArch == &x86.Linkamd64 {
+ // Alias g to R14
+ register["g"] = x86.REGG
+ }
+ // Register prefix not used on this architecture.
+
+ instructions := make(map[string]obj.As)
+ for i, s := range obj.Anames {
+ instructions[s] = obj.As(i)
+ }
+ for i, s := range x86.Anames {
+ if obj.As(i) >= obj.A_ARCHSPECIFIC {
+ instructions[s] = obj.As(i) + obj.ABaseAMD64
+ }
+ }
+ // Annoying aliases.
+ instructions["JA"] = x86.AJHI /* alternate */
+ instructions["JAE"] = x86.AJCC /* alternate */
+ instructions["JB"] = x86.AJCS /* alternate */
+ instructions["JBE"] = x86.AJLS /* alternate */
+ instructions["JC"] = x86.AJCS /* alternate */
+ instructions["JCC"] = x86.AJCC /* carry clear (CF = 0) */
+ instructions["JCS"] = x86.AJCS /* carry set (CF = 1) */
+ instructions["JE"] = x86.AJEQ /* alternate */
+ instructions["JEQ"] = x86.AJEQ /* equal (ZF = 1) */
+ instructions["JG"] = x86.AJGT /* alternate */
+ instructions["JGE"] = x86.AJGE /* greater than or equal (signed) (SF = OF) */
+ instructions["JGT"] = x86.AJGT /* greater than (signed) (ZF = 0 && SF = OF) */
+ instructions["JHI"] = x86.AJHI /* higher (unsigned) (CF = 0 && ZF = 0) */
+ instructions["JHS"] = x86.AJCC /* alternate */
+ instructions["JL"] = x86.AJLT /* alternate */
+ instructions["JLE"] = x86.AJLE /* less than or equal (signed) (ZF = 1 || SF != OF) */
+ instructions["JLO"] = x86.AJCS /* alternate */
+ instructions["JLS"] = x86.AJLS /* lower or same (unsigned) (CF = 1 || ZF = 1) */
+ instructions["JLT"] = x86.AJLT /* less than (signed) (SF != OF) */
+ instructions["JMI"] = x86.AJMI /* negative (minus) (SF = 1) */
+ instructions["JNA"] = x86.AJLS /* alternate */
+ instructions["JNAE"] = x86.AJCS /* alternate */
+ instructions["JNB"] = x86.AJCC /* alternate */
+ instructions["JNBE"] = x86.AJHI /* alternate */
+ instructions["JNC"] = x86.AJCC /* alternate */
+ instructions["JNE"] = x86.AJNE /* not equal (ZF = 0) */
+ instructions["JNG"] = x86.AJLE /* alternate */
+ instructions["JNGE"] = x86.AJLT /* alternate */
+ instructions["JNL"] = x86.AJGE /* alternate */
+ instructions["JNLE"] = x86.AJGT /* alternate */
+ instructions["JNO"] = x86.AJOC /* alternate */
+ instructions["JNP"] = x86.AJPC /* alternate */
+ instructions["JNS"] = x86.AJPL /* alternate */
+ instructions["JNZ"] = x86.AJNE /* alternate */
+ instructions["JO"] = x86.AJOS /* alternate */
+ instructions["JOC"] = x86.AJOC /* overflow clear (OF = 0) */
+ instructions["JOS"] = x86.AJOS /* overflow set (OF = 1) */
+ instructions["JP"] = x86.AJPS /* alternate */
+ instructions["JPC"] = x86.AJPC /* parity clear (PF = 0) */
+ instructions["JPE"] = x86.AJPS /* alternate */
+ instructions["JPL"] = x86.AJPL /* non-negative (plus) (SF = 0) */
+ instructions["JPO"] = x86.AJPC /* alternate */
+ instructions["JPS"] = x86.AJPS /* parity set (PF = 1) */
+ instructions["JS"] = x86.AJMI /* alternate */
+ instructions["JZ"] = x86.AJEQ /* alternate */
+ instructions["MASKMOVDQU"] = x86.AMASKMOVOU
+ instructions["MOVD"] = x86.AMOVQ
+ instructions["MOVDQ2Q"] = x86.AMOVQ
+ instructions["MOVNTDQ"] = x86.AMOVNTO
+ instructions["MOVOA"] = x86.AMOVO
+ instructions["PSLLDQ"] = x86.APSLLO
+ instructions["PSRLDQ"] = x86.APSRLO
+ instructions["PADDD"] = x86.APADDL
+ // Spellings originally used in CL 97235.
+ instructions["MOVBELL"] = x86.AMOVBEL
+ instructions["MOVBEQQ"] = x86.AMOVBEQ
+ instructions["MOVBEWW"] = x86.AMOVBEW
+
+ return &Arch{
+ LinkArch: linkArch,
+ Instructions: instructions,
+ Register: register,
+ RegisterPrefix: nil,
+ RegisterNumber: nilRegisterNumber,
+ IsJump: jumpX86,
+ }
+}
+
+func archArm() *Arch {
+ register := make(map[string]int16)
+ // Create maps for easy lookup of instruction names etc.
+ // Note that there is no list of names as there is for x86.
+ for i := arm.REG_R0; i < arm.REG_SPSR; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ // Avoid unintentionally clobbering g using R10.
+ delete(register, "R10")
+ register["g"] = arm.REG_R10
+ for i := 0; i < 16; i++ {
+ register[fmt.Sprintf("C%d", i)] = int16(i)
+ }
+
+ // Pseudo-registers.
+ register["SB"] = RSB
+ register["FP"] = RFP
+ register["PC"] = RPC
+ register["SP"] = RSP
+ registerPrefix := map[string]bool{
+ "F": true,
+ "R": true,
+ }
+
+ // special operands for DMB/DSB instructions
+ register["MB_SY"] = arm.REG_MB_SY
+ register["MB_ST"] = arm.REG_MB_ST
+ register["MB_ISH"] = arm.REG_MB_ISH
+ register["MB_ISHST"] = arm.REG_MB_ISHST
+ register["MB_NSH"] = arm.REG_MB_NSH
+ register["MB_NSHST"] = arm.REG_MB_NSHST
+ register["MB_OSH"] = arm.REG_MB_OSH
+ register["MB_OSHST"] = arm.REG_MB_OSHST
+
+ instructions := make(map[string]obj.As)
+ for i, s := range obj.Anames {
+ instructions[s] = obj.As(i)
+ }
+ for i, s := range arm.Anames {
+ if obj.As(i) >= obj.A_ARCHSPECIFIC {
+ instructions[s] = obj.As(i) + obj.ABaseARM
+ }
+ }
+ // Annoying aliases.
+ instructions["B"] = obj.AJMP
+ instructions["BL"] = obj.ACALL
+ // MCR differs from MRC by the way fields of the word are encoded.
+ // (Details in arm.go). Here we add the instruction so parse will find
+ // it, but give it an opcode number known only to us.
+ instructions["MCR"] = aMCR
+
+ return &Arch{
+ LinkArch: &arm.Linkarm,
+ Instructions: instructions,
+ Register: register,
+ RegisterPrefix: registerPrefix,
+ RegisterNumber: armRegisterNumber,
+ IsJump: jumpArm,
+ }
+}
+
+func archArm64() *Arch {
+ register := make(map[string]int16)
+ // Create maps for easy lookup of instruction names etc.
+ // Note that there is no list of names as there is for 386 and amd64.
+ register[obj.Rconv(arm64.REGSP)] = int16(arm64.REGSP)
+ for i := arm64.REG_R0; i <= arm64.REG_R31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ // Rename R18 to R18_PLATFORM to avoid accidental use.
+ register["R18_PLATFORM"] = register["R18"]
+ delete(register, "R18")
+ for i := arm64.REG_F0; i <= arm64.REG_F31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := arm64.REG_V0; i <= arm64.REG_V31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+
+ // System registers.
+ for i := 0; i < len(arm64.SystemReg); i++ {
+ register[arm64.SystemReg[i].Name] = arm64.SystemReg[i].Reg
+ }
+
+ register["LR"] = arm64.REGLINK
+ register["DAIFSet"] = arm64.REG_DAIFSet
+ register["DAIFClr"] = arm64.REG_DAIFClr
+ register["PLDL1KEEP"] = arm64.REG_PLDL1KEEP
+ register["PLDL1STRM"] = arm64.REG_PLDL1STRM
+ register["PLDL2KEEP"] = arm64.REG_PLDL2KEEP
+ register["PLDL2STRM"] = arm64.REG_PLDL2STRM
+ register["PLDL3KEEP"] = arm64.REG_PLDL3KEEP
+ register["PLDL3STRM"] = arm64.REG_PLDL3STRM
+ register["PLIL1KEEP"] = arm64.REG_PLIL1KEEP
+ register["PLIL1STRM"] = arm64.REG_PLIL1STRM
+ register["PLIL2KEEP"] = arm64.REG_PLIL2KEEP
+ register["PLIL2STRM"] = arm64.REG_PLIL2STRM
+ register["PLIL3KEEP"] = arm64.REG_PLIL3KEEP
+ register["PLIL3STRM"] = arm64.REG_PLIL3STRM
+ register["PSTL1KEEP"] = arm64.REG_PSTL1KEEP
+ register["PSTL1STRM"] = arm64.REG_PSTL1STRM
+ register["PSTL2KEEP"] = arm64.REG_PSTL2KEEP
+ register["PSTL2STRM"] = arm64.REG_PSTL2STRM
+ register["PSTL3KEEP"] = arm64.REG_PSTL3KEEP
+ register["PSTL3STRM"] = arm64.REG_PSTL3STRM
+
+ // Conditional operators, like EQ, NE, etc.
+ register["EQ"] = arm64.COND_EQ
+ register["NE"] = arm64.COND_NE
+ register["HS"] = arm64.COND_HS
+ register["CS"] = arm64.COND_HS
+ register["LO"] = arm64.COND_LO
+ register["CC"] = arm64.COND_LO
+ register["MI"] = arm64.COND_MI
+ register["PL"] = arm64.COND_PL
+ register["VS"] = arm64.COND_VS
+ register["VC"] = arm64.COND_VC
+ register["HI"] = arm64.COND_HI
+ register["LS"] = arm64.COND_LS
+ register["GE"] = arm64.COND_GE
+ register["LT"] = arm64.COND_LT
+ register["GT"] = arm64.COND_GT
+ register["LE"] = arm64.COND_LE
+ register["AL"] = arm64.COND_AL
+ register["NV"] = arm64.COND_NV
+ // Pseudo-registers.
+ register["SB"] = RSB
+ register["FP"] = RFP
+ register["PC"] = RPC
+ register["SP"] = RSP
+ // Avoid unintentionally clobbering g using R28.
+ delete(register, "R28")
+ register["g"] = arm64.REG_R28
+ registerPrefix := map[string]bool{
+ "F": true,
+ "R": true,
+ "V": true,
+ }
+
+ instructions := make(map[string]obj.As)
+ for i, s := range obj.Anames {
+ instructions[s] = obj.As(i)
+ }
+ for i, s := range arm64.Anames {
+ if obj.As(i) >= obj.A_ARCHSPECIFIC {
+ instructions[s] = obj.As(i) + obj.ABaseARM64
+ }
+ }
+ // Annoying aliases.
+ instructions["B"] = arm64.AB
+ instructions["BL"] = arm64.ABL
+
+ return &Arch{
+ LinkArch: &arm64.Linkarm64,
+ Instructions: instructions,
+ Register: register,
+ RegisterPrefix: registerPrefix,
+ RegisterNumber: arm64RegisterNumber,
+ IsJump: jumpArm64,
+ }
+
+}
+
+func archPPC64(linkArch *obj.LinkArch) *Arch {
+ register := make(map[string]int16)
+ // Create maps for easy lookup of instruction names etc.
+ // Note that there is no list of names as there is for x86.
+ for i := ppc64.REG_R0; i <= ppc64.REG_R31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := ppc64.REG_F0; i <= ppc64.REG_F31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := ppc64.REG_V0; i <= ppc64.REG_V31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := ppc64.REG_VS0; i <= ppc64.REG_VS63; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := ppc64.REG_CR0; i <= ppc64.REG_CR7; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := ppc64.REG_MSR; i <= ppc64.REG_CR; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := ppc64.REG_CR0LT; i <= ppc64.REG_CR7SO; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ register["CR"] = ppc64.REG_CR
+ register["XER"] = ppc64.REG_XER
+ register["LR"] = ppc64.REG_LR
+ register["CTR"] = ppc64.REG_CTR
+ register["FPSCR"] = ppc64.REG_FPSCR
+ register["MSR"] = ppc64.REG_MSR
+ // Pseudo-registers.
+ register["SB"] = RSB
+ register["FP"] = RFP
+ register["PC"] = RPC
+ // Avoid unintentionally clobbering g using R30.
+ delete(register, "R30")
+ register["g"] = ppc64.REG_R30
+ registerPrefix := map[string]bool{
+ "CR": true,
+ "F": true,
+ "R": true,
+ "SPR": true,
+ }
+
+ instructions := make(map[string]obj.As)
+ for i, s := range obj.Anames {
+ instructions[s] = obj.As(i)
+ }
+ for i, s := range ppc64.Anames {
+ if obj.As(i) >= obj.A_ARCHSPECIFIC {
+ instructions[s] = obj.As(i) + obj.ABasePPC64
+ }
+ }
+ // Annoying aliases.
+ instructions["BR"] = ppc64.ABR
+ instructions["BL"] = ppc64.ABL
+
+ return &Arch{
+ LinkArch: linkArch,
+ Instructions: instructions,
+ Register: register,
+ RegisterPrefix: registerPrefix,
+ RegisterNumber: ppc64RegisterNumber,
+ IsJump: jumpPPC64,
+ }
+}
+
+func archMips(linkArch *obj.LinkArch) *Arch {
+ register := make(map[string]int16)
+ // Create maps for easy lookup of instruction names etc.
+ // Note that there is no list of names as there is for x86.
+ for i := mips.REG_R0; i <= mips.REG_R31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+
+ for i := mips.REG_F0; i <= mips.REG_F31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := mips.REG_M0; i <= mips.REG_M31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := mips.REG_FCR0; i <= mips.REG_FCR31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ register["HI"] = mips.REG_HI
+ register["LO"] = mips.REG_LO
+ // Pseudo-registers.
+ register["SB"] = RSB
+ register["FP"] = RFP
+ register["PC"] = RPC
+ // Avoid unintentionally clobbering g using R30.
+ delete(register, "R30")
+ register["g"] = mips.REG_R30
+
+ registerPrefix := map[string]bool{
+ "F": true,
+ "FCR": true,
+ "M": true,
+ "R": true,
+ }
+
+ instructions := make(map[string]obj.As)
+ for i, s := range obj.Anames {
+ instructions[s] = obj.As(i)
+ }
+ for i, s := range mips.Anames {
+ if obj.As(i) >= obj.A_ARCHSPECIFIC {
+ instructions[s] = obj.As(i) + obj.ABaseMIPS
+ }
+ }
+ // Annoying alias.
+ instructions["JAL"] = mips.AJAL
+
+ return &Arch{
+ LinkArch: linkArch,
+ Instructions: instructions,
+ Register: register,
+ RegisterPrefix: registerPrefix,
+ RegisterNumber: mipsRegisterNumber,
+ IsJump: jumpMIPS,
+ }
+}
+
+func archMips64(linkArch *obj.LinkArch) *Arch {
+ register := make(map[string]int16)
+ // Create maps for easy lookup of instruction names etc.
+ // Note that there is no list of names as there is for x86.
+ for i := mips.REG_R0; i <= mips.REG_R31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := mips.REG_F0; i <= mips.REG_F31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := mips.REG_M0; i <= mips.REG_M31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := mips.REG_FCR0; i <= mips.REG_FCR31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := mips.REG_W0; i <= mips.REG_W31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ register["HI"] = mips.REG_HI
+ register["LO"] = mips.REG_LO
+ // Pseudo-registers.
+ register["SB"] = RSB
+ register["FP"] = RFP
+ register["PC"] = RPC
+ // Avoid unintentionally clobbering g using R30.
+ delete(register, "R30")
+ register["g"] = mips.REG_R30
+ // Avoid unintentionally clobbering RSB using R28.
+ delete(register, "R28")
+ register["RSB"] = mips.REG_R28
+ registerPrefix := map[string]bool{
+ "F": true,
+ "FCR": true,
+ "M": true,
+ "R": true,
+ "W": true,
+ }
+
+ instructions := make(map[string]obj.As)
+ for i, s := range obj.Anames {
+ instructions[s] = obj.As(i)
+ }
+ for i, s := range mips.Anames {
+ if obj.As(i) >= obj.A_ARCHSPECIFIC {
+ instructions[s] = obj.As(i) + obj.ABaseMIPS
+ }
+ }
+ // Annoying alias.
+ instructions["JAL"] = mips.AJAL
+
+ return &Arch{
+ LinkArch: linkArch,
+ Instructions: instructions,
+ Register: register,
+ RegisterPrefix: registerPrefix,
+ RegisterNumber: mipsRegisterNumber,
+ IsJump: jumpMIPS,
+ }
+}
+
+func archRISCV64(shared bool) *Arch {
+ register := make(map[string]int16)
+
+ // Standard register names.
+ for i := riscv.REG_X0; i <= riscv.REG_X31; i++ {
+ // Disallow X3 in shared mode, as this will likely be used as the
+ // GP register, which could result in problems in non-Go code,
+ // including signal handlers.
+ if shared && i == riscv.REG_GP {
+ continue
+ }
+ if i == riscv.REG_TP || i == riscv.REG_G {
+ continue
+ }
+ name := fmt.Sprintf("X%d", i-riscv.REG_X0)
+ register[name] = int16(i)
+ }
+ for i := riscv.REG_F0; i <= riscv.REG_F31; i++ {
+ name := fmt.Sprintf("F%d", i-riscv.REG_F0)
+ register[name] = int16(i)
+ }
+
+ // General registers with ABI names.
+ register["ZERO"] = riscv.REG_ZERO
+ register["RA"] = riscv.REG_RA
+ register["SP"] = riscv.REG_SP
+ register["GP"] = riscv.REG_GP
+ register["TP"] = riscv.REG_TP
+ register["T0"] = riscv.REG_T0
+ register["T1"] = riscv.REG_T1
+ register["T2"] = riscv.REG_T2
+ register["S0"] = riscv.REG_S0
+ register["S1"] = riscv.REG_S1
+ register["A0"] = riscv.REG_A0
+ register["A1"] = riscv.REG_A1
+ register["A2"] = riscv.REG_A2
+ register["A3"] = riscv.REG_A3
+ register["A4"] = riscv.REG_A4
+ register["A5"] = riscv.REG_A5
+ register["A6"] = riscv.REG_A6
+ register["A7"] = riscv.REG_A7
+ register["S2"] = riscv.REG_S2
+ register["S3"] = riscv.REG_S3
+ register["S4"] = riscv.REG_S4
+ register["S5"] = riscv.REG_S5
+ register["S6"] = riscv.REG_S6
+ register["S7"] = riscv.REG_S7
+ register["S8"] = riscv.REG_S8
+ register["S9"] = riscv.REG_S9
+ register["S10"] = riscv.REG_S10
+ // Skip S11 as it is the g register.
+ register["T3"] = riscv.REG_T3
+ register["T4"] = riscv.REG_T4
+ register["T5"] = riscv.REG_T5
+ register["T6"] = riscv.REG_T6
+
+ // Go runtime register names.
+ register["g"] = riscv.REG_G
+ register["CTXT"] = riscv.REG_CTXT
+ register["TMP"] = riscv.REG_TMP
+
+ // ABI names for floating point register.
+ register["FT0"] = riscv.REG_FT0
+ register["FT1"] = riscv.REG_FT1
+ register["FT2"] = riscv.REG_FT2
+ register["FT3"] = riscv.REG_FT3
+ register["FT4"] = riscv.REG_FT4
+ register["FT5"] = riscv.REG_FT5
+ register["FT6"] = riscv.REG_FT6
+ register["FT7"] = riscv.REG_FT7
+ register["FS0"] = riscv.REG_FS0
+ register["FS1"] = riscv.REG_FS1
+ register["FA0"] = riscv.REG_FA0
+ register["FA1"] = riscv.REG_FA1
+ register["FA2"] = riscv.REG_FA2
+ register["FA3"] = riscv.REG_FA3
+ register["FA4"] = riscv.REG_FA4
+ register["FA5"] = riscv.REG_FA5
+ register["FA6"] = riscv.REG_FA6
+ register["FA7"] = riscv.REG_FA7
+ register["FS2"] = riscv.REG_FS2
+ register["FS3"] = riscv.REG_FS3
+ register["FS4"] = riscv.REG_FS4
+ register["FS5"] = riscv.REG_FS5
+ register["FS6"] = riscv.REG_FS6
+ register["FS7"] = riscv.REG_FS7
+ register["FS8"] = riscv.REG_FS8
+ register["FS9"] = riscv.REG_FS9
+ register["FS10"] = riscv.REG_FS10
+ register["FS11"] = riscv.REG_FS11
+ register["FT8"] = riscv.REG_FT8
+ register["FT9"] = riscv.REG_FT9
+ register["FT10"] = riscv.REG_FT10
+ register["FT11"] = riscv.REG_FT11
+
+ // Pseudo-registers.
+ register["SB"] = RSB
+ register["FP"] = RFP
+ register["PC"] = RPC
+
+ instructions := make(map[string]obj.As)
+ for i, s := range obj.Anames {
+ instructions[s] = obj.As(i)
+ }
+ for i, s := range riscv.Anames {
+ if obj.As(i) >= obj.A_ARCHSPECIFIC {
+ instructions[s] = obj.As(i) + obj.ABaseRISCV
+ }
+ }
+
+ return &Arch{
+ LinkArch: &riscv.LinkRISCV64,
+ Instructions: instructions,
+ Register: register,
+ RegisterPrefix: nil,
+ RegisterNumber: nilRegisterNumber,
+ IsJump: jumpRISCV,
+ }
+}
+
+func archS390x() *Arch {
+ register := make(map[string]int16)
+ // Create maps for easy lookup of instruction names etc.
+ // Note that there is no list of names as there is for x86.
+ for i := s390x.REG_R0; i <= s390x.REG_R15; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := s390x.REG_F0; i <= s390x.REG_F15; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := s390x.REG_V0; i <= s390x.REG_V31; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ for i := s390x.REG_AR0; i <= s390x.REG_AR15; i++ {
+ register[obj.Rconv(i)] = int16(i)
+ }
+ register["LR"] = s390x.REG_LR
+ // Pseudo-registers.
+ register["SB"] = RSB
+ register["FP"] = RFP
+ register["PC"] = RPC
+ // Avoid unintentionally clobbering g using R13.
+ delete(register, "R13")
+ register["g"] = s390x.REG_R13
+ registerPrefix := map[string]bool{
+ "AR": true,
+ "F": true,
+ "R": true,
+ }
+
+ instructions := make(map[string]obj.As)
+ for i, s := range obj.Anames {
+ instructions[s] = obj.As(i)
+ }
+ for i, s := range s390x.Anames {
+ if obj.As(i) >= obj.A_ARCHSPECIFIC {
+ instructions[s] = obj.As(i) + obj.ABaseS390X
+ }
+ }
+ // Annoying aliases.
+ instructions["BR"] = s390x.ABR
+ instructions["BL"] = s390x.ABL
+
+ return &Arch{
+ LinkArch: &s390x.Links390x,
+ Instructions: instructions,
+ Register: register,
+ RegisterPrefix: registerPrefix,
+ RegisterNumber: s390xRegisterNumber,
+ IsJump: jumpS390x,
+ }
+}
+
+func archWasm() *Arch {
+ instructions := make(map[string]obj.As)
+ for i, s := range obj.Anames {
+ instructions[s] = obj.As(i)
+ }
+ for i, s := range wasm.Anames {
+ if obj.As(i) >= obj.A_ARCHSPECIFIC {
+ instructions[s] = obj.As(i) + obj.ABaseWasm
+ }
+ }
+
+ return &Arch{
+ LinkArch: &wasm.Linkwasm,
+ Instructions: instructions,
+ Register: wasm.Register,
+ RegisterPrefix: nil,
+ RegisterNumber: nilRegisterNumber,
+ IsJump: jumpWasm,
+ }
+}
diff --git a/src/cmd/asm/internal/arch/arm.go b/src/cmd/asm/internal/arch/arm.go
new file mode 100644
index 0000000..b0e985f
--- /dev/null
+++ b/src/cmd/asm/internal/arch/arm.go
@@ -0,0 +1,257 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file encapsulates some of the odd characteristics of the ARM
+// instruction set, to minimize its interaction with the core of the
+// assembler.
+
+package arch
+
+import (
+ "strings"
+
+ "cmd/internal/obj"
+ "cmd/internal/obj/arm"
+)
+
+var armLS = map[string]uint8{
+ "U": arm.C_UBIT,
+ "S": arm.C_SBIT,
+ "W": arm.C_WBIT,
+ "P": arm.C_PBIT,
+ "PW": arm.C_WBIT | arm.C_PBIT,
+ "WP": arm.C_WBIT | arm.C_PBIT,
+}
+
+var armSCOND = map[string]uint8{
+ "EQ": arm.C_SCOND_EQ,
+ "NE": arm.C_SCOND_NE,
+ "CS": arm.C_SCOND_HS,
+ "HS": arm.C_SCOND_HS,
+ "CC": arm.C_SCOND_LO,
+ "LO": arm.C_SCOND_LO,
+ "MI": arm.C_SCOND_MI,
+ "PL": arm.C_SCOND_PL,
+ "VS": arm.C_SCOND_VS,
+ "VC": arm.C_SCOND_VC,
+ "HI": arm.C_SCOND_HI,
+ "LS": arm.C_SCOND_LS,
+ "GE": arm.C_SCOND_GE,
+ "LT": arm.C_SCOND_LT,
+ "GT": arm.C_SCOND_GT,
+ "LE": arm.C_SCOND_LE,
+ "AL": arm.C_SCOND_NONE,
+ "U": arm.C_UBIT,
+ "S": arm.C_SBIT,
+ "W": arm.C_WBIT,
+ "P": arm.C_PBIT,
+ "PW": arm.C_WBIT | arm.C_PBIT,
+ "WP": arm.C_WBIT | arm.C_PBIT,
+ "F": arm.C_FBIT,
+ "IBW": arm.C_WBIT | arm.C_PBIT | arm.C_UBIT,
+ "IAW": arm.C_WBIT | arm.C_UBIT,
+ "DBW": arm.C_WBIT | arm.C_PBIT,
+ "DAW": arm.C_WBIT,
+ "IB": arm.C_PBIT | arm.C_UBIT,
+ "IA": arm.C_UBIT,
+ "DB": arm.C_PBIT,
+ "DA": 0,
+}
+
+var armJump = map[string]bool{
+ "B": true,
+ "BL": true,
+ "BX": true,
+ "BEQ": true,
+ "BNE": true,
+ "BCS": true,
+ "BHS": true,
+ "BCC": true,
+ "BLO": true,
+ "BMI": true,
+ "BPL": true,
+ "BVS": true,
+ "BVC": true,
+ "BHI": true,
+ "BLS": true,
+ "BGE": true,
+ "BLT": true,
+ "BGT": true,
+ "BLE": true,
+ "CALL": true,
+ "JMP": true,
+}
+
+func jumpArm(word string) bool {
+ return armJump[word]
+}
+
+// IsARMCMP reports whether the op (as defined by an arm.A* constant) is
+// one of the comparison instructions that require special handling.
+func IsARMCMP(op obj.As) bool {
+ switch op {
+ case arm.ACMN, arm.ACMP, arm.ATEQ, arm.ATST:
+ return true
+ }
+ return false
+}
+
+// IsARMSTREX reports whether the op (as defined by an arm.A* constant) is
+// one of the STREX-like instructions that require special handling.
+func IsARMSTREX(op obj.As) bool {
+ switch op {
+ case arm.ASTREX, arm.ASTREXD, arm.ASWPW, arm.ASWPBU:
+ return true
+ }
+ return false
+}
+
+// MCR is not defined by the obj/arm; instead we define it privately here.
+// It is encoded as an MRC with a bit inside the instruction word,
+// passed to arch.ARMMRCOffset.
+const aMCR = arm.ALAST + 1
+
+// IsARMMRC reports whether the op (as defined by an arm.A* constant) is
+// MRC or MCR
+func IsARMMRC(op obj.As) bool {
+ switch op {
+ case arm.AMRC, aMCR: // Note: aMCR is defined in this package.
+ return true
+ }
+ return false
+}
+
+// IsARMBFX reports whether the op (as defined by an arm.A* constant) is one the
+// BFX-like instructions which are in the form of "op $width, $LSB, (Reg,) Reg".
+func IsARMBFX(op obj.As) bool {
+ switch op {
+ case arm.ABFX, arm.ABFXU, arm.ABFC, arm.ABFI:
+ return true
+ }
+ return false
+}
+
+// IsARMFloatCmp reports whether the op is a floating comparison instruction.
+func IsARMFloatCmp(op obj.As) bool {
+ switch op {
+ case arm.ACMPF, arm.ACMPD:
+ return true
+ }
+ return false
+}
+
+// ARMMRCOffset implements the peculiar encoding of the MRC and MCR instructions.
+// The difference between MRC and MCR is represented by a bit high in the word, not
+// in the usual way by the opcode itself. Asm must use AMRC for both instructions, so
+// we return the opcode for MRC so that asm doesn't need to import obj/arm.
+func ARMMRCOffset(op obj.As, cond string, x0, x1, x2, x3, x4, x5 int64) (offset int64, op0 obj.As, ok bool) {
+ op1 := int64(0)
+ if op == arm.AMRC {
+ op1 = 1
+ }
+ bits, ok := ParseARMCondition(cond)
+ if !ok {
+ return
+ }
+ offset = (0xe << 24) | // opcode
+ (op1 << 20) | // MCR/MRC
+ ((int64(bits) ^ arm.C_SCOND_XOR) << 28) | // scond
+ ((x0 & 15) << 8) | //coprocessor number
+ ((x1 & 7) << 21) | // coprocessor operation
+ ((x2 & 15) << 12) | // ARM register
+ ((x3 & 15) << 16) | // Crn
+ ((x4 & 15) << 0) | // Crm
+ ((x5 & 7) << 5) | // coprocessor information
+ (1 << 4) /* must be set */
+ return offset, arm.AMRC, true
+}
+
+// IsARMMULA reports whether the op (as defined by an arm.A* constant) is
+// MULA, MULS, MMULA, MMULS, MULABB, MULAWB or MULAWT, the 4-operand instructions.
+func IsARMMULA(op obj.As) bool {
+ switch op {
+ case arm.AMULA, arm.AMULS, arm.AMMULA, arm.AMMULS, arm.AMULABB, arm.AMULAWB, arm.AMULAWT:
+ return true
+ }
+ return false
+}
+
+var bcode = []obj.As{
+ arm.ABEQ,
+ arm.ABNE,
+ arm.ABCS,
+ arm.ABCC,
+ arm.ABMI,
+ arm.ABPL,
+ arm.ABVS,
+ arm.ABVC,
+ arm.ABHI,
+ arm.ABLS,
+ arm.ABGE,
+ arm.ABLT,
+ arm.ABGT,
+ arm.ABLE,
+ arm.AB,
+ obj.ANOP,
+}
+
+// ARMConditionCodes handles the special condition code situation for the ARM.
+// It returns a boolean to indicate success; failure means cond was unrecognized.
+func ARMConditionCodes(prog *obj.Prog, cond string) bool {
+ if cond == "" {
+ return true
+ }
+ bits, ok := ParseARMCondition(cond)
+ if !ok {
+ return false
+ }
+ /* hack to make B.NE etc. work: turn it into the corresponding conditional */
+ if prog.As == arm.AB {
+ prog.As = bcode[(bits^arm.C_SCOND_XOR)&0xf]
+ bits = (bits &^ 0xf) | arm.C_SCOND_NONE
+ }
+ prog.Scond = bits
+ return true
+}
+
+// ParseARMCondition parses the conditions attached to an ARM instruction.
+// The input is a single string consisting of period-separated condition
+// codes, such as ".P.W". An initial period is ignored.
+func ParseARMCondition(cond string) (uint8, bool) {
+ return parseARMCondition(cond, armLS, armSCOND)
+}
+
+func parseARMCondition(cond string, ls, scond map[string]uint8) (uint8, bool) {
+ cond = strings.TrimPrefix(cond, ".")
+ if cond == "" {
+ return arm.C_SCOND_NONE, true
+ }
+ names := strings.Split(cond, ".")
+ bits := uint8(0)
+ for _, name := range names {
+ if b, present := ls[name]; present {
+ bits |= b
+ continue
+ }
+ if b, present := scond[name]; present {
+ bits = (bits &^ arm.C_SCOND) | b
+ continue
+ }
+ return 0, false
+ }
+ return bits, true
+}
+
+func armRegisterNumber(name string, n int16) (int16, bool) {
+ if n < 0 || 15 < n {
+ return 0, false
+ }
+ switch name {
+ case "R":
+ return arm.REG_R0 + n, true
+ case "F":
+ return arm.REG_F0 + n, true
+ }
+ return 0, false
+}
diff --git a/src/cmd/asm/internal/arch/arm64.go b/src/cmd/asm/internal/arch/arm64.go
new file mode 100644
index 0000000..24689c5
--- /dev/null
+++ b/src/cmd/asm/internal/arch/arm64.go
@@ -0,0 +1,362 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file encapsulates some of the odd characteristics of the ARM64
+// instruction set, to minimize its interaction with the core of the
+// assembler.
+
+package arch
+
+import (
+ "cmd/internal/obj"
+ "cmd/internal/obj/arm64"
+ "errors"
+)
+
+var arm64LS = map[string]uint8{
+ "P": arm64.C_XPOST,
+ "W": arm64.C_XPRE,
+}
+
+var arm64Jump = map[string]bool{
+ "B": true,
+ "BL": true,
+ "BEQ": true,
+ "BNE": true,
+ "BCS": true,
+ "BHS": true,
+ "BCC": true,
+ "BLO": true,
+ "BMI": true,
+ "BPL": true,
+ "BVS": true,
+ "BVC": true,
+ "BHI": true,
+ "BLS": true,
+ "BGE": true,
+ "BLT": true,
+ "BGT": true,
+ "BLE": true,
+ "CALL": true,
+ "CBZ": true,
+ "CBZW": true,
+ "CBNZ": true,
+ "CBNZW": true,
+ "JMP": true,
+ "TBNZ": true,
+ "TBZ": true,
+}
+
+func jumpArm64(word string) bool {
+ return arm64Jump[word]
+}
+
+// IsARM64CMP reports whether the op (as defined by an arm.A* constant) is
+// one of the comparison instructions that require special handling.
+func IsARM64CMP(op obj.As) bool {
+ switch op {
+ case arm64.ACMN, arm64.ACMP, arm64.ATST,
+ arm64.ACMNW, arm64.ACMPW, arm64.ATSTW,
+ arm64.AFCMPS, arm64.AFCMPD,
+ arm64.AFCMPES, arm64.AFCMPED:
+ return true
+ }
+ return false
+}
+
+// IsARM64STLXR reports whether the op (as defined by an arm64.A*
+// constant) is one of the STLXR-like instructions that require special
+// handling.
+func IsARM64STLXR(op obj.As) bool {
+ switch op {
+ case arm64.ASTLXRB, arm64.ASTLXRH, arm64.ASTLXRW, arm64.ASTLXR,
+ arm64.ASTXRB, arm64.ASTXRH, arm64.ASTXRW, arm64.ASTXR,
+ arm64.ASTXP, arm64.ASTXPW, arm64.ASTLXP, arm64.ASTLXPW:
+ return true
+ }
+ // LDADDx/SWPx/CASx atomic instructions
+ if arm64.IsAtomicInstruction(op) {
+ return true
+ }
+ return false
+}
+
+// IsARM64TBL reports whether the op (as defined by an arm64.A*
+// constant) is one of the TBL-like instructions and one of its
+// inputs does not fit into prog.Reg, so require special handling.
+func IsARM64TBL(op obj.As) bool {
+ switch op {
+ case arm64.AVTBL, arm64.AVMOVQ:
+ return true
+ }
+ return false
+}
+
+// IsARM64CASP reports whether the op (as defined by an arm64.A*
+// constant) is one of the CASP-like instructions, and its 2nd
+// destination is a register pair that require special handling.
+func IsARM64CASP(op obj.As) bool {
+ switch op {
+ case arm64.ACASPD, arm64.ACASPW:
+ return true
+ }
+ return false
+}
+
+// ARM64Suffix handles the special suffix for the ARM64.
+// It returns a boolean to indicate success; failure means
+// cond was unrecognized.
+func ARM64Suffix(prog *obj.Prog, cond string) bool {
+ if cond == "" {
+ return true
+ }
+ bits, ok := parseARM64Suffix(cond)
+ if !ok {
+ return false
+ }
+ prog.Scond = bits
+ return true
+}
+
+// parseARM64Suffix parses the suffix attached to an ARM64 instruction.
+// The input is a single string consisting of period-separated condition
+// codes, such as ".P.W". An initial period is ignored.
+func parseARM64Suffix(cond string) (uint8, bool) {
+ if cond == "" {
+ return 0, true
+ }
+ return parseARMCondition(cond, arm64LS, nil)
+}
+
+func arm64RegisterNumber(name string, n int16) (int16, bool) {
+ switch name {
+ case "F":
+ if 0 <= n && n <= 31 {
+ return arm64.REG_F0 + n, true
+ }
+ case "R":
+ if 0 <= n && n <= 30 { // not 31
+ return arm64.REG_R0 + n, true
+ }
+ case "V":
+ if 0 <= n && n <= 31 {
+ return arm64.REG_V0 + n, true
+ }
+ }
+ return 0, false
+}
+
+// ARM64RegisterShift constructs an ARM64 register with shift operation.
+func ARM64RegisterShift(reg, op, count int16) (int64, error) {
+ // the base register of shift operations must be general register.
+ if reg > arm64.REG_R31 || reg < arm64.REG_R0 {
+ return 0, errors.New("invalid register for shift operation")
+ }
+ return int64(reg&31)<<16 | int64(op)<<22 | int64(uint16(count)), nil
+}
+
+// ARM64RegisterExtension constructs an ARM64 register with extension or arrangement.
+func ARM64RegisterExtension(a *obj.Addr, ext string, reg, num int16, isAmount, isIndex bool) error {
+ Rnum := (reg & 31) + int16(num<<5)
+ if isAmount {
+ if num < 0 || num > 7 {
+ return errors.New("index shift amount is out of range")
+ }
+ }
+ if reg <= arm64.REG_R31 && reg >= arm64.REG_R0 {
+ if !isAmount {
+ return errors.New("invalid register extension")
+ }
+ switch ext {
+ case "UXTB":
+ if a.Type == obj.TYPE_MEM {
+ return errors.New("invalid shift for the register offset addressing mode")
+ }
+ a.Reg = arm64.REG_UXTB + Rnum
+ case "UXTH":
+ if a.Type == obj.TYPE_MEM {
+ return errors.New("invalid shift for the register offset addressing mode")
+ }
+ a.Reg = arm64.REG_UXTH + Rnum
+ case "UXTW":
+ // effective address of memory is a base register value and an offset register value.
+ if a.Type == obj.TYPE_MEM {
+ a.Index = arm64.REG_UXTW + Rnum
+ } else {
+ a.Reg = arm64.REG_UXTW + Rnum
+ }
+ case "UXTX":
+ if a.Type == obj.TYPE_MEM {
+ return errors.New("invalid shift for the register offset addressing mode")
+ }
+ a.Reg = arm64.REG_UXTX + Rnum
+ case "SXTB":
+ if a.Type == obj.TYPE_MEM {
+ return errors.New("invalid shift for the register offset addressing mode")
+ }
+ a.Reg = arm64.REG_SXTB + Rnum
+ case "SXTH":
+ if a.Type == obj.TYPE_MEM {
+ return errors.New("invalid shift for the register offset addressing mode")
+ }
+ a.Reg = arm64.REG_SXTH + Rnum
+ case "SXTW":
+ if a.Type == obj.TYPE_MEM {
+ a.Index = arm64.REG_SXTW + Rnum
+ } else {
+ a.Reg = arm64.REG_SXTW + Rnum
+ }
+ case "SXTX":
+ if a.Type == obj.TYPE_MEM {
+ a.Index = arm64.REG_SXTX + Rnum
+ } else {
+ a.Reg = arm64.REG_SXTX + Rnum
+ }
+ case "LSL":
+ a.Index = arm64.REG_LSL + Rnum
+ default:
+ return errors.New("unsupported general register extension type: " + ext)
+
+ }
+ } else if reg <= arm64.REG_V31 && reg >= arm64.REG_V0 {
+ switch ext {
+ case "B8":
+ if isIndex {
+ return errors.New("invalid register extension")
+ }
+ a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_8B & 15) << 5)
+ case "B16":
+ if isIndex {
+ return errors.New("invalid register extension")
+ }
+ a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_16B & 15) << 5)
+ case "H4":
+ if isIndex {
+ return errors.New("invalid register extension")
+ }
+ a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_4H & 15) << 5)
+ case "H8":
+ if isIndex {
+ return errors.New("invalid register extension")
+ }
+ a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_8H & 15) << 5)
+ case "S2":
+ if isIndex {
+ return errors.New("invalid register extension")
+ }
+ a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_2S & 15) << 5)
+ case "S4":
+ if isIndex {
+ return errors.New("invalid register extension")
+ }
+ a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_4S & 15) << 5)
+ case "D1":
+ if isIndex {
+ return errors.New("invalid register extension")
+ }
+ a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_1D & 15) << 5)
+ case "D2":
+ if isIndex {
+ return errors.New("invalid register extension")
+ }
+ a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_2D & 15) << 5)
+ case "Q1":
+ if isIndex {
+ return errors.New("invalid register extension")
+ }
+ a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_1Q & 15) << 5)
+ case "B":
+ if !isIndex {
+ return nil
+ }
+ a.Reg = arm64.REG_ELEM + (reg & 31) + ((arm64.ARNG_B & 15) << 5)
+ a.Index = num
+ case "H":
+ if !isIndex {
+ return nil
+ }
+ a.Reg = arm64.REG_ELEM + (reg & 31) + ((arm64.ARNG_H & 15) << 5)
+ a.Index = num
+ case "S":
+ if !isIndex {
+ return nil
+ }
+ a.Reg = arm64.REG_ELEM + (reg & 31) + ((arm64.ARNG_S & 15) << 5)
+ a.Index = num
+ case "D":
+ if !isIndex {
+ return nil
+ }
+ a.Reg = arm64.REG_ELEM + (reg & 31) + ((arm64.ARNG_D & 15) << 5)
+ a.Index = num
+ default:
+ return errors.New("unsupported simd register extension type: " + ext)
+ }
+ } else {
+ return errors.New("invalid register and extension combination")
+ }
+ return nil
+}
+
+// ARM64RegisterArrangement constructs an ARM64 vector register arrangement.
+func ARM64RegisterArrangement(reg int16, name, arng string) (int64, error) {
+ var curQ, curSize uint16
+ if name[0] != 'V' {
+ return 0, errors.New("expect V0 through V31; found: " + name)
+ }
+ if reg < 0 {
+ return 0, errors.New("invalid register number: " + name)
+ }
+ switch arng {
+ case "B8":
+ curSize = 0
+ curQ = 0
+ case "B16":
+ curSize = 0
+ curQ = 1
+ case "H4":
+ curSize = 1
+ curQ = 0
+ case "H8":
+ curSize = 1
+ curQ = 1
+ case "S2":
+ curSize = 2
+ curQ = 0
+ case "S4":
+ curSize = 2
+ curQ = 1
+ case "D1":
+ curSize = 3
+ curQ = 0
+ case "D2":
+ curSize = 3
+ curQ = 1
+ default:
+ return 0, errors.New("invalid arrangement in ARM64 register list")
+ }
+ return (int64(curQ) & 1 << 30) | (int64(curSize&3) << 10), nil
+}
+
+// ARM64RegisterListOffset generates offset encoding according to AArch64 specification.
+func ARM64RegisterListOffset(firstReg, regCnt int, arrangement int64) (int64, error) {
+ offset := int64(firstReg)
+ switch regCnt {
+ case 1:
+ offset |= 0x7 << 12
+ case 2:
+ offset |= 0xa << 12
+ case 3:
+ offset |= 0x6 << 12
+ case 4:
+ offset |= 0x2 << 12
+ default:
+ return 0, errors.New("invalid register numbers in ARM64 register list")
+ }
+ offset |= arrangement
+ // arm64 uses the 60th bit to differentiate from other archs
+ // For more details, refer to: obj/arm64/list7.go
+ offset |= 1 << 60
+ return offset, nil
+}
diff --git a/src/cmd/asm/internal/arch/mips.go b/src/cmd/asm/internal/arch/mips.go
new file mode 100644
index 0000000..5d71f40
--- /dev/null
+++ b/src/cmd/asm/internal/arch/mips.go
@@ -0,0 +1,72 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file encapsulates some of the odd characteristics of the
+// MIPS (MIPS64) instruction set, to minimize its interaction
+// with the core of the assembler.
+
+package arch
+
+import (
+ "cmd/internal/obj"
+ "cmd/internal/obj/mips"
+)
+
+func jumpMIPS(word string) bool {
+ switch word {
+ case "BEQ", "BFPF", "BFPT", "BGEZ", "BGEZAL", "BGTZ", "BLEZ", "BLTZ", "BLTZAL", "BNE", "JMP", "JAL", "CALL":
+ return true
+ }
+ return false
+}
+
+// IsMIPSCMP reports whether the op (as defined by an mips.A* constant) is
+// one of the CMP instructions that require special handling.
+func IsMIPSCMP(op obj.As) bool {
+ switch op {
+ case mips.ACMPEQF, mips.ACMPEQD, mips.ACMPGEF, mips.ACMPGED,
+ mips.ACMPGTF, mips.ACMPGTD:
+ return true
+ }
+ return false
+}
+
+// IsMIPSMUL reports whether the op (as defined by an mips.A* constant) is
+// one of the MUL/DIV/REM/MADD/MSUB instructions that require special handling.
+func IsMIPSMUL(op obj.As) bool {
+ switch op {
+ case mips.AMUL, mips.AMULU, mips.AMULV, mips.AMULVU,
+ mips.ADIV, mips.ADIVU, mips.ADIVV, mips.ADIVVU,
+ mips.AREM, mips.AREMU, mips.AREMV, mips.AREMVU,
+ mips.AMADD, mips.AMSUB:
+ return true
+ }
+ return false
+}
+
+func mipsRegisterNumber(name string, n int16) (int16, bool) {
+ switch name {
+ case "F":
+ if 0 <= n && n <= 31 {
+ return mips.REG_F0 + n, true
+ }
+ case "FCR":
+ if 0 <= n && n <= 31 {
+ return mips.REG_FCR0 + n, true
+ }
+ case "M":
+ if 0 <= n && n <= 31 {
+ return mips.REG_M0 + n, true
+ }
+ case "R":
+ if 0 <= n && n <= 31 {
+ return mips.REG_R0 + n, true
+ }
+ case "W":
+ if 0 <= n && n <= 31 {
+ return mips.REG_W0 + n, true
+ }
+ }
+ return 0, false
+}
diff --git a/src/cmd/asm/internal/arch/ppc64.go b/src/cmd/asm/internal/arch/ppc64.go
new file mode 100644
index 0000000..3139665
--- /dev/null
+++ b/src/cmd/asm/internal/arch/ppc64.go
@@ -0,0 +1,102 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file encapsulates some of the odd characteristics of the
+// 64-bit PowerPC (PPC64) instruction set, to minimize its interaction
+// with the core of the assembler.
+
+package arch
+
+import (
+ "cmd/internal/obj"
+ "cmd/internal/obj/ppc64"
+)
+
+func jumpPPC64(word string) bool {
+ switch word {
+ case "BC", "BCL", "BEQ", "BGE", "BGT", "BL", "BLE", "BLT", "BNE", "BR", "BVC", "BVS", "CALL", "JMP":
+ return true
+ }
+ return false
+}
+
+// IsPPC64RLD reports whether the op (as defined by an ppc64.A* constant) is
+// one of the RLD-like instructions that require special handling.
+// The FMADD-like instructions behave similarly.
+func IsPPC64RLD(op obj.As) bool {
+ switch op {
+ case ppc64.ARLDC, ppc64.ARLDCCC, ppc64.ARLDCL, ppc64.ARLDCLCC,
+ ppc64.ARLDCR, ppc64.ARLDCRCC, ppc64.ARLDMI, ppc64.ARLDMICC,
+ ppc64.ARLWMI, ppc64.ARLWMICC, ppc64.ARLWNM, ppc64.ARLWNMCC:
+ return true
+ case ppc64.AFMADD, ppc64.AFMADDCC, ppc64.AFMADDS, ppc64.AFMADDSCC,
+ ppc64.AFMSUB, ppc64.AFMSUBCC, ppc64.AFMSUBS, ppc64.AFMSUBSCC,
+ ppc64.AFNMADD, ppc64.AFNMADDCC, ppc64.AFNMADDS, ppc64.AFNMADDSCC,
+ ppc64.AFNMSUB, ppc64.AFNMSUBCC, ppc64.AFNMSUBS, ppc64.AFNMSUBSCC:
+ return true
+ }
+ return false
+}
+
+func IsPPC64ISEL(op obj.As) bool {
+ return op == ppc64.AISEL
+}
+
+// IsPPC64CMP reports whether the op (as defined by an ppc64.A* constant) is
+// one of the CMP instructions that require special handling.
+func IsPPC64CMP(op obj.As) bool {
+ switch op {
+ case ppc64.ACMP, ppc64.ACMPU, ppc64.ACMPW, ppc64.ACMPWU, ppc64.AFCMPU:
+ return true
+ }
+ return false
+}
+
+// IsPPC64NEG reports whether the op (as defined by an ppc64.A* constant) is
+// one of the NEG-like instructions that require special handling.
+func IsPPC64NEG(op obj.As) bool {
+ switch op {
+ case ppc64.AADDMECC, ppc64.AADDMEVCC, ppc64.AADDMEV, ppc64.AADDME,
+ ppc64.AADDZECC, ppc64.AADDZEVCC, ppc64.AADDZEV, ppc64.AADDZE,
+ ppc64.ACNTLZDCC, ppc64.ACNTLZD, ppc64.ACNTLZWCC, ppc64.ACNTLZW,
+ ppc64.AEXTSBCC, ppc64.AEXTSB, ppc64.AEXTSHCC, ppc64.AEXTSH,
+ ppc64.AEXTSWCC, ppc64.AEXTSW, ppc64.ANEGCC, ppc64.ANEGVCC,
+ ppc64.ANEGV, ppc64.ANEG, ppc64.ASLBMFEE, ppc64.ASLBMFEV,
+ ppc64.ASLBMTE, ppc64.ASUBMECC, ppc64.ASUBMEVCC, ppc64.ASUBMEV,
+ ppc64.ASUBME, ppc64.ASUBZECC, ppc64.ASUBZEVCC, ppc64.ASUBZEV,
+ ppc64.ASUBZE:
+ return true
+ }
+ return false
+}
+
+func ppc64RegisterNumber(name string, n int16) (int16, bool) {
+ switch name {
+ case "CR":
+ if 0 <= n && n <= 7 {
+ return ppc64.REG_CR0 + n, true
+ }
+ case "VS":
+ if 0 <= n && n <= 63 {
+ return ppc64.REG_VS0 + n, true
+ }
+ case "V":
+ if 0 <= n && n <= 31 {
+ return ppc64.REG_V0 + n, true
+ }
+ case "F":
+ if 0 <= n && n <= 31 {
+ return ppc64.REG_F0 + n, true
+ }
+ case "R":
+ if 0 <= n && n <= 31 {
+ return ppc64.REG_R0 + n, true
+ }
+ case "SPR":
+ if 0 <= n && n <= 1024 {
+ return ppc64.REG_SPR0 + n, true
+ }
+ }
+ return 0, false
+}
diff --git a/src/cmd/asm/internal/arch/riscv64.go b/src/cmd/asm/internal/arch/riscv64.go
new file mode 100644
index 0000000..27a66c5
--- /dev/null
+++ b/src/cmd/asm/internal/arch/riscv64.go
@@ -0,0 +1,28 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file encapsulates some of the odd characteristics of the RISCV64
+// instruction set, to minimize its interaction with the core of the
+// assembler.
+
+package arch
+
+import (
+ "cmd/internal/obj"
+ "cmd/internal/obj/riscv"
+)
+
+// IsRISCV64AMO reports whether the op (as defined by a riscv.A*
+// constant) is one of the AMO instructions that requires special
+// handling.
+func IsRISCV64AMO(op obj.As) bool {
+ switch op {
+ case riscv.ASCW, riscv.ASCD, riscv.AAMOSWAPW, riscv.AAMOSWAPD, riscv.AAMOADDW, riscv.AAMOADDD,
+ riscv.AAMOANDW, riscv.AAMOANDD, riscv.AAMOORW, riscv.AAMOORD, riscv.AAMOXORW, riscv.AAMOXORD,
+ riscv.AAMOMINW, riscv.AAMOMIND, riscv.AAMOMINUW, riscv.AAMOMINUD,
+ riscv.AAMOMAXW, riscv.AAMOMAXD, riscv.AAMOMAXUW, riscv.AAMOMAXUD:
+ return true
+ }
+ return false
+}
diff --git a/src/cmd/asm/internal/arch/s390x.go b/src/cmd/asm/internal/arch/s390x.go
new file mode 100644
index 0000000..519d208
--- /dev/null
+++ b/src/cmd/asm/internal/arch/s390x.go
@@ -0,0 +1,81 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file encapsulates some of the odd characteristics of the
+// s390x instruction set, to minimize its interaction
+// with the core of the assembler.
+
+package arch
+
+import (
+ "cmd/internal/obj/s390x"
+)
+
+func jumpS390x(word string) bool {
+ switch word {
+ case "BRC",
+ "BC",
+ "BCL",
+ "BEQ",
+ "BGE",
+ "BGT",
+ "BL",
+ "BLE",
+ "BLEU",
+ "BLT",
+ "BLTU",
+ "BNE",
+ "BR",
+ "BVC",
+ "BVS",
+ "BRCT",
+ "BRCTG",
+ "CMPBEQ",
+ "CMPBGE",
+ "CMPBGT",
+ "CMPBLE",
+ "CMPBLT",
+ "CMPBNE",
+ "CMPUBEQ",
+ "CMPUBGE",
+ "CMPUBGT",
+ "CMPUBLE",
+ "CMPUBLT",
+ "CMPUBNE",
+ "CRJ",
+ "CGRJ",
+ "CLRJ",
+ "CLGRJ",
+ "CIJ",
+ "CGIJ",
+ "CLIJ",
+ "CLGIJ",
+ "CALL",
+ "JMP":
+ return true
+ }
+ return false
+}
+
+func s390xRegisterNumber(name string, n int16) (int16, bool) {
+ switch name {
+ case "AR":
+ if 0 <= n && n <= 15 {
+ return s390x.REG_AR0 + n, true
+ }
+ case "F":
+ if 0 <= n && n <= 15 {
+ return s390x.REG_F0 + n, true
+ }
+ case "R":
+ if 0 <= n && n <= 15 {
+ return s390x.REG_R0 + n, true
+ }
+ case "V":
+ if 0 <= n && n <= 31 {
+ return s390x.REG_V0 + n, true
+ }
+ }
+ return 0, false
+}
diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go
new file mode 100644
index 0000000..d0cb632
--- /dev/null
+++ b/src/cmd/asm/internal/asm/asm.go
@@ -0,0 +1,915 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package asm
+
+import (
+ "bytes"
+ "fmt"
+ "strconv"
+ "text/scanner"
+
+ "cmd/asm/internal/arch"
+ "cmd/asm/internal/flags"
+ "cmd/asm/internal/lex"
+ "cmd/internal/obj"
+ "cmd/internal/obj/x86"
+ "cmd/internal/objabi"
+ "cmd/internal/sys"
+)
+
+// TODO: configure the architecture
+
+var testOut *bytes.Buffer // Gathers output when testing.
+
+// append adds the Prog to the end of the program-thus-far.
+// If doLabel is set, it also defines the labels collect for this Prog.
+func (p *Parser) append(prog *obj.Prog, cond string, doLabel bool) {
+ if cond != "" {
+ switch p.arch.Family {
+ case sys.ARM:
+ if !arch.ARMConditionCodes(prog, cond) {
+ p.errorf("unrecognized condition code .%q", cond)
+ return
+ }
+
+ case sys.ARM64:
+ if !arch.ARM64Suffix(prog, cond) {
+ p.errorf("unrecognized suffix .%q", cond)
+ return
+ }
+
+ case sys.AMD64, sys.I386:
+ if err := x86.ParseSuffix(prog, cond); err != nil {
+ p.errorf("%v", err)
+ return
+ }
+
+ default:
+ p.errorf("unrecognized suffix .%q", cond)
+ return
+ }
+ }
+ if p.firstProg == nil {
+ p.firstProg = prog
+ } else {
+ p.lastProg.Link = prog
+ }
+ p.lastProg = prog
+ if doLabel {
+ p.pc++
+ for _, label := range p.pendingLabels {
+ if p.labels[label] != nil {
+ p.errorf("label %q multiply defined", label)
+ return
+ }
+ p.labels[label] = prog
+ }
+ p.pendingLabels = p.pendingLabels[0:0]
+ }
+ prog.Pc = p.pc
+ if *flags.Debug {
+ fmt.Println(p.lineNum, prog)
+ }
+ if testOut != nil {
+ fmt.Fprintln(testOut, prog)
+ }
+}
+
+// validSymbol checks that addr represents a valid name for a pseudo-op.
+func (p *Parser) validSymbol(pseudo string, addr *obj.Addr, offsetOk bool) bool {
+ if addr.Sym == nil || addr.Name != obj.NAME_EXTERN && addr.Name != obj.NAME_STATIC || addr.Scale != 0 || addr.Reg != 0 {
+ p.errorf("%s symbol %q must be a symbol(SB)", pseudo, symbolName(addr))
+ return false
+ }
+ if !offsetOk && addr.Offset != 0 {
+ p.errorf("%s symbol %q must not be offset from SB", pseudo, symbolName(addr))
+ return false
+ }
+ return true
+}
+
+// evalInteger evaluates an integer constant for a pseudo-op.
+func (p *Parser) evalInteger(pseudo string, operands []lex.Token) int64 {
+ addr := p.address(operands)
+ return p.getConstantPseudo(pseudo, &addr)
+}
+
+// validImmediate checks that addr represents an immediate constant.
+func (p *Parser) validImmediate(pseudo string, addr *obj.Addr) bool {
+ if addr.Type != obj.TYPE_CONST || addr.Name != 0 || addr.Reg != 0 || addr.Index != 0 {
+ p.errorf("%s: expected immediate constant; found %s", pseudo, obj.Dconv(&emptyProg, addr))
+ return false
+ }
+ return true
+}
+
+// asmText assembles a TEXT pseudo-op.
+// TEXT runtime·sigtramp(SB),4,$0-0
+func (p *Parser) asmText(operands [][]lex.Token) {
+ if len(operands) != 2 && len(operands) != 3 {
+ p.errorf("expect two or three operands for TEXT")
+ return
+ }
+
+ // Labels are function scoped. Patch existing labels and
+ // create a new label space for this TEXT.
+ p.patch()
+ p.labels = make(map[string]*obj.Prog)
+
+ // Operand 0 is the symbol name in the form foo(SB).
+ // That means symbol plus indirect on SB and no offset.
+ nameAddr := p.address(operands[0])
+ if !p.validSymbol("TEXT", &nameAddr, false) {
+ return
+ }
+ name := symbolName(&nameAddr)
+ next := 1
+
+ // Next operand is the optional text flag, a literal integer.
+ var flag = int64(0)
+ if len(operands) == 3 {
+ flag = p.evalInteger("TEXT", operands[1])
+ next++
+ }
+
+ // Issue an error if we see a function defined as ABIInternal
+ // without NOSPLIT. In ABIInternal, obj needs to know the function
+ // signature in order to construct the morestack path, so this
+ // currently isn't supported for asm functions.
+ if nameAddr.Sym.ABI() == obj.ABIInternal && flag&obj.NOSPLIT == 0 {
+ p.errorf("TEXT %q: ABIInternal requires NOSPLIT", name)
+ }
+
+ // Next operand is the frame and arg size.
+ // Bizarre syntax: $frameSize-argSize is two words, not subtraction.
+ // Both frameSize and argSize must be simple integers; only frameSize
+ // can be negative.
+ // The "-argSize" may be missing; if so, set it to objabi.ArgsSizeUnknown.
+ // Parse left to right.
+ op := operands[next]
+ if len(op) < 2 || op[0].ScanToken != '$' {
+ p.errorf("TEXT %s: frame size must be an immediate constant", name)
+ return
+ }
+ op = op[1:]
+ negative := false
+ if op[0].ScanToken == '-' {
+ negative = true
+ op = op[1:]
+ }
+ if len(op) == 0 || op[0].ScanToken != scanner.Int {
+ p.errorf("TEXT %s: frame size must be an immediate constant", name)
+ return
+ }
+ frameSize := p.positiveAtoi(op[0].String())
+ if negative {
+ frameSize = -frameSize
+ }
+ op = op[1:]
+ argSize := int64(objabi.ArgsSizeUnknown)
+ if len(op) > 0 {
+ // There is an argument size. It must be a minus sign followed by a non-negative integer literal.
+ if len(op) != 2 || op[0].ScanToken != '-' || op[1].ScanToken != scanner.Int {
+ p.errorf("TEXT %s: argument size must be of form -integer", name)
+ return
+ }
+ argSize = p.positiveAtoi(op[1].String())
+ }
+ p.ctxt.InitTextSym(nameAddr.Sym, int(flag))
+ prog := &obj.Prog{
+ Ctxt: p.ctxt,
+ As: obj.ATEXT,
+ Pos: p.pos(),
+ From: nameAddr,
+ To: obj.Addr{
+ Type: obj.TYPE_TEXTSIZE,
+ Offset: frameSize,
+ // Argsize set below.
+ },
+ }
+ nameAddr.Sym.Func().Text = prog
+ prog.To.Val = int32(argSize)
+ p.append(prog, "", true)
+}
+
+// asmData assembles a DATA pseudo-op.
+// DATA masks<>+0x00(SB)/4, $0x00000000
+func (p *Parser) asmData(operands [][]lex.Token) {
+ if len(operands) != 2 {
+ p.errorf("expect two operands for DATA")
+ return
+ }
+
+ // Operand 0 has the general form foo<>+0x04(SB)/4.
+ op := operands[0]
+ n := len(op)
+ if n < 3 || op[n-2].ScanToken != '/' || op[n-1].ScanToken != scanner.Int {
+ p.errorf("expect /size for DATA argument")
+ return
+ }
+ szop := op[n-1].String()
+ sz, err := strconv.Atoi(szop)
+ if err != nil {
+ p.errorf("bad size for DATA argument: %q", szop)
+ }
+ op = op[:n-2]
+ nameAddr := p.address(op)
+ if !p.validSymbol("DATA", &nameAddr, true) {
+ return
+ }
+ name := symbolName(&nameAddr)
+
+ // Operand 1 is an immediate constant or address.
+ valueAddr := p.address(operands[1])
+ switch valueAddr.Type {
+ case obj.TYPE_CONST, obj.TYPE_FCONST, obj.TYPE_SCONST, obj.TYPE_ADDR:
+ // OK
+ default:
+ p.errorf("DATA value must be an immediate constant or address")
+ return
+ }
+
+ // The addresses must not overlap. Easiest test: require monotonicity.
+ if lastAddr, ok := p.dataAddr[name]; ok && nameAddr.Offset < lastAddr {
+ p.errorf("overlapping DATA entry for %s", name)
+ return
+ }
+ p.dataAddr[name] = nameAddr.Offset + int64(sz)
+
+ switch valueAddr.Type {
+ case obj.TYPE_CONST:
+ switch sz {
+ case 1, 2, 4, 8:
+ nameAddr.Sym.WriteInt(p.ctxt, nameAddr.Offset, int(sz), valueAddr.Offset)
+ default:
+ p.errorf("bad int size for DATA argument: %d", sz)
+ }
+ case obj.TYPE_FCONST:
+ switch sz {
+ case 4:
+ nameAddr.Sym.WriteFloat32(p.ctxt, nameAddr.Offset, float32(valueAddr.Val.(float64)))
+ case 8:
+ nameAddr.Sym.WriteFloat64(p.ctxt, nameAddr.Offset, valueAddr.Val.(float64))
+ default:
+ p.errorf("bad float size for DATA argument: %d", sz)
+ }
+ case obj.TYPE_SCONST:
+ nameAddr.Sym.WriteString(p.ctxt, nameAddr.Offset, int(sz), valueAddr.Val.(string))
+ case obj.TYPE_ADDR:
+ if sz == p.arch.PtrSize {
+ nameAddr.Sym.WriteAddr(p.ctxt, nameAddr.Offset, int(sz), valueAddr.Sym, valueAddr.Offset)
+ } else {
+ p.errorf("bad addr size for DATA argument: %d", sz)
+ }
+ }
+}
+
+// asmGlobl assembles a GLOBL pseudo-op.
+// GLOBL shifts<>(SB),8,$256
+// GLOBL shifts<>(SB),$256
+func (p *Parser) asmGlobl(operands [][]lex.Token) {
+ if len(operands) != 2 && len(operands) != 3 {
+ p.errorf("expect two or three operands for GLOBL")
+ return
+ }
+
+ // Operand 0 has the general form foo<>+0x04(SB).
+ nameAddr := p.address(operands[0])
+ if !p.validSymbol("GLOBL", &nameAddr, false) {
+ return
+ }
+ next := 1
+
+ // Next operand is the optional flag, a literal integer.
+ var flag = int64(0)
+ if len(operands) == 3 {
+ flag = p.evalInteger("GLOBL", operands[1])
+ next++
+ }
+
+ // Final operand is an immediate constant.
+ addr := p.address(operands[next])
+ if !p.validImmediate("GLOBL", &addr) {
+ return
+ }
+
+ // log.Printf("GLOBL %s %d, $%d", name, flag, size)
+ p.ctxt.Globl(nameAddr.Sym, addr.Offset, int(flag))
+}
+
+// asmPCData assembles a PCDATA pseudo-op.
+// PCDATA $2, $705
+func (p *Parser) asmPCData(operands [][]lex.Token) {
+ if len(operands) != 2 {
+ p.errorf("expect two operands for PCDATA")
+ return
+ }
+
+ // Operand 0 must be an immediate constant.
+ key := p.address(operands[0])
+ if !p.validImmediate("PCDATA", &key) {
+ return
+ }
+
+ // Operand 1 must be an immediate constant.
+ value := p.address(operands[1])
+ if !p.validImmediate("PCDATA", &value) {
+ return
+ }
+
+ // log.Printf("PCDATA $%d, $%d", key.Offset, value.Offset)
+ prog := &obj.Prog{
+ Ctxt: p.ctxt,
+ As: obj.APCDATA,
+ Pos: p.pos(),
+ From: key,
+ To: value,
+ }
+ p.append(prog, "", true)
+}
+
+// asmPCAlign assembles a PCALIGN pseudo-op.
+// PCALIGN $16
+func (p *Parser) asmPCAlign(operands [][]lex.Token) {
+ if len(operands) != 1 {
+ p.errorf("expect one operand for PCALIGN")
+ return
+ }
+
+ // Operand 0 must be an immediate constant.
+ key := p.address(operands[0])
+ if !p.validImmediate("PCALIGN", &key) {
+ return
+ }
+
+ prog := &obj.Prog{
+ Ctxt: p.ctxt,
+ As: obj.APCALIGN,
+ From: key,
+ }
+ p.append(prog, "", true)
+}
+
+// asmFuncData assembles a FUNCDATA pseudo-op.
+// FUNCDATA $1, funcdata<>+4(SB)
+func (p *Parser) asmFuncData(operands [][]lex.Token) {
+ if len(operands) != 2 {
+ p.errorf("expect two operands for FUNCDATA")
+ return
+ }
+
+ // Operand 0 must be an immediate constant.
+ valueAddr := p.address(operands[0])
+ if !p.validImmediate("FUNCDATA", &valueAddr) {
+ return
+ }
+
+ // Operand 1 is a symbol name in the form foo(SB).
+ nameAddr := p.address(operands[1])
+ if !p.validSymbol("FUNCDATA", &nameAddr, true) {
+ return
+ }
+
+ prog := &obj.Prog{
+ Ctxt: p.ctxt,
+ As: obj.AFUNCDATA,
+ Pos: p.pos(),
+ From: valueAddr,
+ To: nameAddr,
+ }
+ p.append(prog, "", true)
+}
+
+// asmJump assembles a jump instruction.
+// JMP R1
+// JMP exit
+// JMP 3(PC)
+func (p *Parser) asmJump(op obj.As, cond string, a []obj.Addr) {
+ var target *obj.Addr
+ prog := &obj.Prog{
+ Ctxt: p.ctxt,
+ Pos: p.pos(),
+ As: op,
+ }
+ switch len(a) {
+ case 0:
+ if p.arch.Family == sys.Wasm {
+ target = &obj.Addr{Type: obj.TYPE_NONE}
+ break
+ }
+ p.errorf("wrong number of arguments to %s instruction", op)
+ return
+ case 1:
+ target = &a[0]
+ case 2:
+ // Special 2-operand jumps.
+ target = &a[1]
+ prog.From = a[0]
+ case 3:
+ if p.arch.Family == sys.PPC64 {
+ // Special 3-operand jumps.
+ // First two must be constants; a[1] is a register number.
+ target = &a[2]
+ prog.From = obj.Addr{
+ Type: obj.TYPE_CONST,
+ Offset: p.getConstant(prog, op, &a[0]),
+ }
+ reg := int16(p.getConstant(prog, op, &a[1]))
+ reg, ok := p.arch.RegisterNumber("R", reg)
+ if !ok {
+ p.errorf("bad register number %d", reg)
+ return
+ }
+ prog.Reg = reg
+ break
+ }
+ if p.arch.Family == sys.MIPS || p.arch.Family == sys.MIPS64 || p.arch.Family == sys.RISCV64 {
+ // 3-operand jumps.
+ // First two must be registers
+ target = &a[2]
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ break
+ }
+ if p.arch.Family == sys.S390X {
+ // 3-operand jumps.
+ target = &a[2]
+ prog.From = a[0]
+ if a[1].Reg != 0 {
+ // Compare two registers and jump.
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ } else {
+ // Compare register with immediate and jump.
+ prog.SetFrom3(a[1])
+ }
+ break
+ }
+ if p.arch.Family == sys.ARM64 {
+ // Special 3-operand jumps.
+ // a[0] must be immediate constant; a[1] is a register.
+ if a[0].Type != obj.TYPE_CONST {
+ p.errorf("%s: expected immediate constant; found %s", op, obj.Dconv(prog, &a[0]))
+ return
+ }
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ target = &a[2]
+ break
+ }
+ p.errorf("wrong number of arguments to %s instruction", op)
+ return
+ case 4:
+ if p.arch.Family == sys.S390X {
+ // 4-operand compare-and-branch.
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ prog.SetFrom3(a[2])
+ target = &a[3]
+ break
+ }
+ p.errorf("wrong number of arguments to %s instruction", op)
+ return
+ default:
+ p.errorf("wrong number of arguments to %s instruction", op)
+ return
+ }
+ switch {
+ case target.Type == obj.TYPE_BRANCH:
+ // JMP 4(PC)
+ prog.To = obj.Addr{
+ Type: obj.TYPE_BRANCH,
+ Offset: p.pc + 1 + target.Offset, // +1 because p.pc is incremented in append, below.
+ }
+ case target.Type == obj.TYPE_REG:
+ // JMP R1
+ prog.To = *target
+ case target.Type == obj.TYPE_MEM && (target.Name == obj.NAME_EXTERN || target.Name == obj.NAME_STATIC):
+ // JMP main·morestack(SB)
+ prog.To = *target
+ case target.Type == obj.TYPE_INDIR && (target.Name == obj.NAME_EXTERN || target.Name == obj.NAME_STATIC):
+ // JMP *main·morestack(SB)
+ prog.To = *target
+ prog.To.Type = obj.TYPE_INDIR
+ case target.Type == obj.TYPE_MEM && target.Reg == 0 && target.Offset == 0:
+ // JMP exit
+ if target.Sym == nil {
+ // Parse error left name unset.
+ return
+ }
+ targetProg := p.labels[target.Sym.Name]
+ if targetProg == nil {
+ p.toPatch = append(p.toPatch, Patch{prog, target.Sym.Name})
+ } else {
+ p.branch(prog, targetProg)
+ }
+ case target.Type == obj.TYPE_MEM && target.Name == obj.NAME_NONE:
+ // JMP 4(R0)
+ prog.To = *target
+ // On the ppc64, 9a encodes BR (CTR) as BR CTR. We do the same.
+ if p.arch.Family == sys.PPC64 && target.Offset == 0 {
+ prog.To.Type = obj.TYPE_REG
+ }
+ case target.Type == obj.TYPE_CONST:
+ // JMP $4
+ prog.To = a[0]
+ case target.Type == obj.TYPE_NONE:
+ // JMP
+ default:
+ p.errorf("cannot assemble jump %+v", target)
+ return
+ }
+
+ p.append(prog, cond, true)
+}
+
+func (p *Parser) patch() {
+ for _, patch := range p.toPatch {
+ targetProg := p.labels[patch.label]
+ if targetProg == nil {
+ p.errorf("undefined label %s", patch.label)
+ return
+ }
+ p.branch(patch.prog, targetProg)
+ }
+ p.toPatch = p.toPatch[:0]
+}
+
+func (p *Parser) branch(jmp, target *obj.Prog) {
+ jmp.To = obj.Addr{
+ Type: obj.TYPE_BRANCH,
+ Index: 0,
+ }
+ jmp.To.Val = target
+}
+
+// asmInstruction assembles an instruction.
+// MOVW R9, (R10)
+func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
+ // fmt.Printf("%s %+v\n", op, a)
+ prog := &obj.Prog{
+ Ctxt: p.ctxt,
+ Pos: p.pos(),
+ As: op,
+ }
+ switch len(a) {
+ case 0:
+ // Nothing to do.
+ case 1:
+ if p.arch.UnaryDst[op] || op == obj.ARET || op == obj.AGETCALLERPC {
+ // prog.From is no address.
+ prog.To = a[0]
+ } else {
+ prog.From = a[0]
+ // prog.To is no address.
+ }
+ if p.arch.Family == sys.PPC64 && arch.IsPPC64NEG(op) {
+ // NEG: From and To are both a[0].
+ prog.To = a[0]
+ prog.From = a[0]
+ break
+ }
+ case 2:
+ if p.arch.Family == sys.ARM {
+ if arch.IsARMCMP(op) {
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ break
+ }
+ // Strange special cases.
+ if arch.IsARMFloatCmp(op) {
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ break
+ }
+ } else if p.arch.Family == sys.ARM64 && arch.IsARM64CMP(op) {
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ break
+ } else if p.arch.Family == sys.MIPS || p.arch.Family == sys.MIPS64 {
+ if arch.IsMIPSCMP(op) || arch.IsMIPSMUL(op) {
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ break
+ }
+ }
+ prog.From = a[0]
+ prog.To = a[1]
+ case 3:
+ switch p.arch.Family {
+ case sys.MIPS, sys.MIPS64:
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ prog.To = a[2]
+ case sys.ARM:
+ // Special cases.
+ if arch.IsARMSTREX(op) {
+ /*
+ STREX x, (y), z
+ from=(y) reg=x to=z
+ */
+ prog.From = a[1]
+ prog.Reg = p.getRegister(prog, op, &a[0])
+ prog.To = a[2]
+ break
+ }
+ if arch.IsARMBFX(op) {
+ // a[0] and a[1] must be constants, a[2] must be a register
+ prog.From = a[0]
+ prog.SetFrom3(a[1])
+ prog.To = a[2]
+ break
+ }
+ // Otherwise the 2nd operand (a[1]) must be a register.
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ prog.To = a[2]
+ case sys.AMD64:
+ prog.From = a[0]
+ prog.SetFrom3(a[1])
+ prog.To = a[2]
+ case sys.ARM64:
+ switch {
+ case arch.IsARM64STLXR(op):
+ // ARM64 instructions with one input and two outputs.
+ prog.From = a[0]
+ prog.To = a[1]
+ if a[2].Type != obj.TYPE_REG {
+ p.errorf("invalid addressing modes for third operand to %s instruction, must be register", op)
+ return
+ }
+ prog.RegTo2 = a[2].Reg
+ case arch.IsARM64TBL(op):
+ // one of its inputs does not fit into prog.Reg.
+ prog.From = a[0]
+ prog.SetFrom3(a[1])
+ prog.To = a[2]
+ case arch.IsARM64CASP(op):
+ prog.From = a[0]
+ prog.To = a[1]
+ // both 1st operand and 3rd operand are (Rs, Rs+1) register pair.
+ // And the register pair must be contiguous.
+ if (a[0].Type != obj.TYPE_REGREG) || (a[2].Type != obj.TYPE_REGREG) {
+ p.errorf("invalid addressing modes for 1st or 3rd operand to %s instruction, must be register pair", op)
+ return
+ }
+ // For ARM64 CASP-like instructions, its 2nd destination operand is register pair(Rt, Rt+1) that can
+ // not fit into prog.RegTo2, so save it to the prog.RestArgs.
+ prog.SetTo2(a[2])
+ default:
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ prog.To = a[2]
+ }
+ case sys.I386:
+ prog.From = a[0]
+ prog.SetFrom3(a[1])
+ prog.To = a[2]
+ case sys.PPC64:
+ if arch.IsPPC64CMP(op) {
+ // CMPW etc.; third argument is a CR register that goes into prog.Reg.
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[2])
+ prog.To = a[1]
+ break
+ }
+ // Arithmetic. Choices are:
+ // reg reg reg
+ // imm reg reg
+ // reg imm reg
+ // If the immediate is the middle argument, use From3.
+ switch a[1].Type {
+ case obj.TYPE_REG:
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ prog.To = a[2]
+ case obj.TYPE_CONST:
+ prog.From = a[0]
+ prog.SetFrom3(a[1])
+ prog.To = a[2]
+ default:
+ p.errorf("invalid addressing modes for %s instruction", op)
+ return
+ }
+ case sys.RISCV64:
+ // RISCV64 instructions with one input and two outputs.
+ if arch.IsRISCV64AMO(op) {
+ prog.From = a[0]
+ prog.To = a[1]
+ if a[2].Type != obj.TYPE_REG {
+ p.errorf("invalid addressing modes for third operand to %s instruction, must be register", op)
+ return
+ }
+ prog.RegTo2 = a[2].Reg
+ break
+ }
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ prog.To = a[2]
+ case sys.S390X:
+ prog.From = a[0]
+ if a[1].Type == obj.TYPE_REG {
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ } else {
+ prog.SetFrom3(a[1])
+ }
+ prog.To = a[2]
+ default:
+ p.errorf("TODO: implement three-operand instructions for this architecture")
+ return
+ }
+ case 4:
+ if p.arch.Family == sys.ARM {
+ if arch.IsARMBFX(op) {
+ // a[0] and a[1] must be constants, a[2] and a[3] must be registers
+ prog.From = a[0]
+ prog.SetFrom3(a[1])
+ prog.Reg = p.getRegister(prog, op, &a[2])
+ prog.To = a[3]
+ break
+ }
+ if arch.IsARMMULA(op) {
+ // All must be registers.
+ p.getRegister(prog, op, &a[0])
+ r1 := p.getRegister(prog, op, &a[1])
+ r2 := p.getRegister(prog, op, &a[2])
+ p.getRegister(prog, op, &a[3])
+ prog.From = a[0]
+ prog.To = a[3]
+ prog.To.Type = obj.TYPE_REGREG2
+ prog.To.Offset = int64(r2)
+ prog.Reg = r1
+ break
+ }
+ }
+ if p.arch.Family == sys.AMD64 {
+ prog.From = a[0]
+ prog.SetRestArgs([]obj.Addr{a[1], a[2]})
+ prog.To = a[3]
+ break
+ }
+ if p.arch.Family == sys.ARM64 {
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ prog.SetFrom3(a[2])
+ prog.To = a[3]
+ break
+ }
+ if p.arch.Family == sys.PPC64 {
+ if arch.IsPPC64RLD(op) {
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ prog.SetFrom3(a[2])
+ prog.To = a[3]
+ break
+ } else if arch.IsPPC64ISEL(op) {
+ // ISEL BC,RB,RA,RT becomes isel rt,ra,rb,bc
+ prog.SetFrom3(a[2]) // ra
+ prog.From = a[0] // bc
+ prog.Reg = p.getRegister(prog, op, &a[1]) // rb
+ prog.To = a[3] // rt
+ break
+ }
+ // Else, it is a VA-form instruction
+ // reg reg reg reg
+ // imm reg reg reg
+ // Or a VX-form instruction
+ // imm imm reg reg
+ if a[1].Type == obj.TYPE_REG {
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ prog.SetFrom3(a[2])
+ prog.To = a[3]
+ break
+ } else if a[1].Type == obj.TYPE_CONST {
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[2])
+ prog.SetFrom3(a[1])
+ prog.To = a[3]
+ break
+ } else {
+ p.errorf("invalid addressing modes for %s instruction", op)
+ return
+ }
+ }
+ if p.arch.Family == sys.RISCV64 {
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ prog.SetRestArgs([]obj.Addr{a[2]})
+ prog.To = a[3]
+ break
+ }
+ if p.arch.Family == sys.S390X {
+ if a[1].Type != obj.TYPE_REG {
+ p.errorf("second operand must be a register in %s instruction", op)
+ return
+ }
+ prog.From = a[0]
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ prog.SetFrom3(a[2])
+ prog.To = a[3]
+ break
+ }
+ p.errorf("can't handle %s instruction with 4 operands", op)
+ return
+ case 5:
+ if p.arch.Family == sys.PPC64 {
+ prog.From = a[0]
+ // Second arg is always a register type on ppc64.
+ prog.Reg = p.getRegister(prog, op, &a[1])
+ prog.SetRestArgs([]obj.Addr{a[2], a[3]})
+ prog.To = a[4]
+ break
+ }
+ if p.arch.Family == sys.AMD64 {
+ prog.From = a[0]
+ prog.SetRestArgs([]obj.Addr{a[1], a[2], a[3]})
+ prog.To = a[4]
+ break
+ }
+ if p.arch.Family == sys.S390X {
+ prog.From = a[0]
+ prog.SetRestArgs([]obj.Addr{a[1], a[2], a[3]})
+ prog.To = a[4]
+ break
+ }
+ p.errorf("can't handle %s instruction with 5 operands", op)
+ return
+ case 6:
+ if p.arch.Family == sys.ARM && arch.IsARMMRC(op) {
+ // Strange special case: MCR, MRC.
+ prog.To.Type = obj.TYPE_CONST
+ x0 := p.getConstant(prog, op, &a[0])
+ x1 := p.getConstant(prog, op, &a[1])
+ x2 := int64(p.getRegister(prog, op, &a[2]))
+ x3 := int64(p.getRegister(prog, op, &a[3]))
+ x4 := int64(p.getRegister(prog, op, &a[4]))
+ x5 := p.getConstant(prog, op, &a[5])
+ // Cond is handled specially for this instruction.
+ offset, MRC, ok := arch.ARMMRCOffset(op, cond, x0, x1, x2, x3, x4, x5)
+ if !ok {
+ p.errorf("unrecognized condition code .%q", cond)
+ }
+ prog.To.Offset = offset
+ cond = ""
+ prog.As = MRC // Both instructions are coded as MRC.
+ break
+ }
+ fallthrough
+ default:
+ p.errorf("can't handle %s instruction with %d operands", op, len(a))
+ return
+ }
+
+ p.append(prog, cond, true)
+}
+
+// newAddr returns a new(Addr) initialized to x.
+func newAddr(x obj.Addr) *obj.Addr {
+ p := new(obj.Addr)
+ *p = x
+ return p
+}
+
+// symbolName returns the symbol name, or an error string if none if available.
+func symbolName(addr *obj.Addr) string {
+ if addr.Sym != nil {
+ return addr.Sym.Name
+ }
+ return "<erroneous symbol>"
+}
+
+var emptyProg obj.Prog
+
+// getConstantPseudo checks that addr represents a plain constant and returns its value.
+func (p *Parser) getConstantPseudo(pseudo string, addr *obj.Addr) int64 {
+ if addr.Type != obj.TYPE_MEM || addr.Name != 0 || addr.Reg != 0 || addr.Index != 0 {
+ p.errorf("%s: expected integer constant; found %s", pseudo, obj.Dconv(&emptyProg, addr))
+ }
+ return addr.Offset
+}
+
+// getConstant checks that addr represents a plain constant and returns its value.
+func (p *Parser) getConstant(prog *obj.Prog, op obj.As, addr *obj.Addr) int64 {
+ if addr.Type != obj.TYPE_MEM || addr.Name != 0 || addr.Reg != 0 || addr.Index != 0 {
+ p.errorf("%s: expected integer constant; found %s", op, obj.Dconv(prog, addr))
+ }
+ return addr.Offset
+}
+
+// getImmediate checks that addr represents an immediate constant and returns its value.
+func (p *Parser) getImmediate(prog *obj.Prog, op obj.As, addr *obj.Addr) int64 {
+ if addr.Type != obj.TYPE_CONST || addr.Name != 0 || addr.Reg != 0 || addr.Index != 0 {
+ p.errorf("%s: expected immediate constant; found %s", op, obj.Dconv(prog, addr))
+ }
+ return addr.Offset
+}
+
+// getRegister checks that addr represents a register and returns its value.
+func (p *Parser) getRegister(prog *obj.Prog, op obj.As, addr *obj.Addr) int16 {
+ if addr.Type != obj.TYPE_REG || addr.Offset != 0 || addr.Name != 0 || addr.Index != 0 {
+ p.errorf("%s: expected register; found %s", op, obj.Dconv(prog, addr))
+ }
+ return addr.Reg
+}
diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go
new file mode 100644
index 0000000..ead8b27
--- /dev/null
+++ b/src/cmd/asm/internal/asm/endtoend_test.go
@@ -0,0 +1,464 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package asm
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ "internal/buildcfg"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "regexp"
+ "sort"
+ "strconv"
+ "strings"
+ "testing"
+
+ "cmd/asm/internal/lex"
+ "cmd/internal/obj"
+)
+
+// An end-to-end test for the assembler: Do we print what we parse?
+// Output is generated by, in effect, turning on -S and comparing the
+// result against a golden file.
+
+func testEndToEnd(t *testing.T, goarch, file string) {
+ input := filepath.Join("testdata", file+".s")
+ architecture, ctxt := setArch(goarch)
+ architecture.Init(ctxt)
+ lexer := lex.NewLexer(input)
+ parser := NewParser(ctxt, architecture, lexer, false)
+ pList := new(obj.Plist)
+ var ok bool
+ testOut = new(bytes.Buffer) // The assembler writes test output to this buffer.
+ ctxt.Bso = bufio.NewWriter(os.Stdout)
+ ctxt.IsAsm = true
+ defer ctxt.Bso.Flush()
+ failed := false
+ ctxt.DiagFunc = func(format string, args ...interface{}) {
+ failed = true
+ t.Errorf(format, args...)
+ }
+ pList.Firstpc, ok = parser.Parse()
+ if !ok || failed {
+ t.Errorf("asm: %s assembly failed", goarch)
+ return
+ }
+ output := strings.Split(testOut.String(), "\n")
+
+ // Reconstruct expected output by independently "parsing" the input.
+ data, err := ioutil.ReadFile(input)
+ if err != nil {
+ t.Error(err)
+ return
+ }
+ lineno := 0
+ seq := 0
+ hexByLine := map[string]string{}
+ lines := strings.SplitAfter(string(data), "\n")
+Diff:
+ for _, line := range lines {
+ lineno++
+
+ // Ignore include of textflag.h.
+ if strings.HasPrefix(line, "#include ") {
+ continue
+ }
+
+ // The general form of a test input line is:
+ // // comment
+ // INST args [// printed form] [// hex encoding]
+ parts := strings.Split(line, "//")
+ printed := strings.TrimSpace(parts[0])
+ if printed == "" || strings.HasSuffix(printed, ":") { // empty or label
+ continue
+ }
+ seq++
+
+ var hexes string
+ switch len(parts) {
+ default:
+ t.Errorf("%s:%d: unable to understand comments: %s", input, lineno, line)
+ case 1:
+ // no comment
+ case 2:
+ // might be printed form or hex
+ note := strings.TrimSpace(parts[1])
+ if isHexes(note) {
+ hexes = note
+ } else {
+ printed = note
+ }
+ case 3:
+ // printed form, then hex
+ printed = strings.TrimSpace(parts[1])
+ hexes = strings.TrimSpace(parts[2])
+ if !isHexes(hexes) {
+ t.Errorf("%s:%d: malformed hex instruction encoding: %s", input, lineno, line)
+ }
+ }
+
+ if hexes != "" {
+ hexByLine[fmt.Sprintf("%s:%d", input, lineno)] = hexes
+ }
+
+ // Canonicalize spacing in printed form.
+ // First field is opcode, then tab, then arguments separated by spaces.
+ // Canonicalize spaces after commas first.
+ // Comma to separate argument gets a space; comma within does not.
+ var buf []byte
+ nest := 0
+ for i := 0; i < len(printed); i++ {
+ c := printed[i]
+ switch c {
+ case '{', '[':
+ nest++
+ case '}', ']':
+ nest--
+ case ',':
+ buf = append(buf, ',')
+ if nest == 0 {
+ buf = append(buf, ' ')
+ }
+ for i+1 < len(printed) && (printed[i+1] == ' ' || printed[i+1] == '\t') {
+ i++
+ }
+ continue
+ }
+ buf = append(buf, c)
+ }
+
+ f := strings.Fields(string(buf))
+
+ // Turn relative (PC) into absolute (PC) automatically,
+ // so that most branch instructions don't need comments
+ // giving the absolute form.
+ if len(f) > 0 && strings.HasSuffix(printed, "(PC)") {
+ last := f[len(f)-1]
+ n, err := strconv.Atoi(last[:len(last)-len("(PC)")])
+ if err == nil {
+ f[len(f)-1] = fmt.Sprintf("%d(PC)", seq+n)
+ }
+ }
+
+ if len(f) == 1 {
+ printed = f[0]
+ } else {
+ printed = f[0] + "\t" + strings.Join(f[1:], " ")
+ }
+
+ want := fmt.Sprintf("%05d (%s:%d)\t%s", seq, input, lineno, printed)
+ for len(output) > 0 && (output[0] < want || output[0] != want && len(output[0]) >= 5 && output[0][:5] == want[:5]) {
+ if len(output[0]) >= 5 && output[0][:5] == want[:5] {
+ t.Errorf("mismatched output:\nhave %s\nwant %s", output[0], want)
+ output = output[1:]
+ continue Diff
+ }
+ t.Errorf("unexpected output: %q", output[0])
+ output = output[1:]
+ }
+ if len(output) > 0 && output[0] == want {
+ output = output[1:]
+ } else {
+ t.Errorf("missing output: %q", want)
+ }
+ }
+ for len(output) > 0 {
+ if output[0] == "" {
+ // spurious blank caused by Split on "\n"
+ output = output[1:]
+ continue
+ }
+ t.Errorf("unexpected output: %q", output[0])
+ output = output[1:]
+ }
+
+ // Checked printing.
+ // Now check machine code layout.
+
+ top := pList.Firstpc
+ var text *obj.LSym
+ ok = true
+ ctxt.DiagFunc = func(format string, args ...interface{}) {
+ t.Errorf(format, args...)
+ ok = false
+ }
+ obj.Flushplist(ctxt, pList, nil, "")
+
+ for p := top; p != nil; p = p.Link {
+ if p.As == obj.ATEXT {
+ text = p.From.Sym
+ }
+ hexes := hexByLine[p.Line()]
+ if hexes == "" {
+ continue
+ }
+ delete(hexByLine, p.Line())
+ if text == nil {
+ t.Errorf("%s: instruction outside TEXT", p)
+ }
+ size := int64(len(text.P)) - p.Pc
+ if p.Link != nil {
+ size = p.Link.Pc - p.Pc
+ } else if p.Isize != 0 {
+ size = int64(p.Isize)
+ }
+ var code []byte
+ if p.Pc < int64(len(text.P)) {
+ code = text.P[p.Pc:]
+ if size < int64(len(code)) {
+ code = code[:size]
+ }
+ }
+ codeHex := fmt.Sprintf("%x", code)
+ if codeHex == "" {
+ codeHex = "empty"
+ }
+ ok := false
+ for _, hex := range strings.Split(hexes, " or ") {
+ if codeHex == hex {
+ ok = true
+ break
+ }
+ }
+ if !ok {
+ t.Errorf("%s: have encoding %s, want %s", p, codeHex, hexes)
+ }
+ }
+
+ if len(hexByLine) > 0 {
+ var missing []string
+ for key := range hexByLine {
+ missing = append(missing, key)
+ }
+ sort.Strings(missing)
+ for _, line := range missing {
+ t.Errorf("%s: did not find instruction encoding", line)
+ }
+ }
+
+}
+
+func isHexes(s string) bool {
+ if s == "" {
+ return false
+ }
+ if s == "empty" {
+ return true
+ }
+ for _, f := range strings.Split(s, " or ") {
+ if f == "" || len(f)%2 != 0 || strings.TrimLeft(f, "0123456789abcdef") != "" {
+ return false
+ }
+ }
+ return true
+}
+
+// It would be nice if the error messages always began with
+// the standard file:line: prefix,
+// but that's not where we are today.
+// It might be at the beginning but it might be in the middle of the printed instruction.
+var fileLineRE = regexp.MustCompile(`(?:^|\()(testdata[/\\][0-9a-z]+\.s:[0-9]+)(?:$|\)|:)`)
+
+// Same as in test/run.go
+var (
+ errRE = regexp.MustCompile(`// ERROR ?(.*)`)
+ errQuotesRE = regexp.MustCompile(`"([^"]*)"`)
+)
+
+func testErrors(t *testing.T, goarch, file string, flags ...string) {
+ input := filepath.Join("testdata", file+".s")
+ architecture, ctxt := setArch(goarch)
+ lexer := lex.NewLexer(input)
+ parser := NewParser(ctxt, architecture, lexer, false)
+ pList := new(obj.Plist)
+ var ok bool
+ testOut = new(bytes.Buffer) // The assembler writes test output to this buffer.
+ ctxt.Bso = bufio.NewWriter(os.Stdout)
+ ctxt.IsAsm = true
+ defer ctxt.Bso.Flush()
+ failed := false
+ var errBuf bytes.Buffer
+ parser.errorWriter = &errBuf
+ ctxt.DiagFunc = func(format string, args ...interface{}) {
+ failed = true
+ s := fmt.Sprintf(format, args...)
+ if !strings.HasSuffix(s, "\n") {
+ s += "\n"
+ }
+ errBuf.WriteString(s)
+ }
+ for _, flag := range flags {
+ switch flag {
+ case "dynlink":
+ ctxt.Flag_dynlink = true
+ default:
+ t.Errorf("unknown flag %s", flag)
+ }
+ }
+ pList.Firstpc, ok = parser.Parse()
+ obj.Flushplist(ctxt, pList, nil, "")
+ if ok && !failed {
+ t.Errorf("asm: %s had no errors", file)
+ }
+
+ errors := map[string]string{}
+ for _, line := range strings.Split(errBuf.String(), "\n") {
+ if line == "" || strings.HasPrefix(line, "\t") {
+ continue
+ }
+ m := fileLineRE.FindStringSubmatch(line)
+ if m == nil {
+ t.Errorf("unexpected error: %v", line)
+ continue
+ }
+ fileline := m[1]
+ if errors[fileline] != "" && errors[fileline] != line {
+ t.Errorf("multiple errors on %s:\n\t%s\n\t%s", fileline, errors[fileline], line)
+ continue
+ }
+ errors[fileline] = line
+ }
+
+ // Reconstruct expected errors by independently "parsing" the input.
+ data, err := ioutil.ReadFile(input)
+ if err != nil {
+ t.Error(err)
+ return
+ }
+ lineno := 0
+ lines := strings.Split(string(data), "\n")
+ for _, line := range lines {
+ lineno++
+
+ fileline := fmt.Sprintf("%s:%d", input, lineno)
+ if m := errRE.FindStringSubmatch(line); m != nil {
+ all := m[1]
+ mm := errQuotesRE.FindAllStringSubmatch(all, -1)
+ if len(mm) != 1 {
+ t.Errorf("%s: invalid errorcheck line:\n%s", fileline, line)
+ } else if err := errors[fileline]; err == "" {
+ t.Errorf("%s: missing error, want %s", fileline, all)
+ } else if !strings.Contains(err, mm[0][1]) {
+ t.Errorf("%s: wrong error for %s:\n%s", fileline, all, err)
+ }
+ } else {
+ if errors[fileline] != "" {
+ t.Errorf("unexpected error on %s: %v", fileline, errors[fileline])
+ }
+ }
+ delete(errors, fileline)
+ }
+ var extra []string
+ for key := range errors {
+ extra = append(extra, key)
+ }
+ sort.Strings(extra)
+ for _, fileline := range extra {
+ t.Errorf("unexpected error on %s: %v", fileline, errors[fileline])
+ }
+}
+
+func Test386EndToEnd(t *testing.T) {
+ testEndToEnd(t, "386", "386")
+}
+
+func TestARMEndToEnd(t *testing.T) {
+ defer func(old int) { buildcfg.GOARM = old }(buildcfg.GOARM)
+ for _, goarm := range []int{5, 6, 7} {
+ t.Logf("GOARM=%d", goarm)
+ buildcfg.GOARM = goarm
+ testEndToEnd(t, "arm", "arm")
+ if goarm == 6 {
+ testEndToEnd(t, "arm", "armv6")
+ }
+ }
+}
+
+func TestGoBuildErrors(t *testing.T) {
+ testErrors(t, "amd64", "buildtagerror")
+}
+
+func TestARMErrors(t *testing.T) {
+ testErrors(t, "arm", "armerror")
+}
+
+func TestARM64EndToEnd(t *testing.T) {
+ testEndToEnd(t, "arm64", "arm64")
+}
+
+func TestARM64Encoder(t *testing.T) {
+ testEndToEnd(t, "arm64", "arm64enc")
+}
+
+func TestARM64Errors(t *testing.T) {
+ testErrors(t, "arm64", "arm64error")
+}
+
+func TestAMD64EndToEnd(t *testing.T) {
+ testEndToEnd(t, "amd64", "amd64")
+}
+
+func Test386Encoder(t *testing.T) {
+ testEndToEnd(t, "386", "386enc")
+}
+
+func TestAMD64Encoder(t *testing.T) {
+ filenames := [...]string{
+ "amd64enc",
+ "amd64enc_extra",
+ "avx512enc/aes_avx512f",
+ "avx512enc/gfni_avx512f",
+ "avx512enc/vpclmulqdq_avx512f",
+ "avx512enc/avx512bw",
+ "avx512enc/avx512cd",
+ "avx512enc/avx512dq",
+ "avx512enc/avx512er",
+ "avx512enc/avx512f",
+ "avx512enc/avx512pf",
+ "avx512enc/avx512_4fmaps",
+ "avx512enc/avx512_4vnniw",
+ "avx512enc/avx512_bitalg",
+ "avx512enc/avx512_ifma",
+ "avx512enc/avx512_vbmi",
+ "avx512enc/avx512_vbmi2",
+ "avx512enc/avx512_vnni",
+ "avx512enc/avx512_vpopcntdq",
+ }
+ for _, name := range filenames {
+ testEndToEnd(t, "amd64", name)
+ }
+}
+
+func TestAMD64Errors(t *testing.T) {
+ testErrors(t, "amd64", "amd64error")
+}
+
+func TestAMD64DynLinkErrors(t *testing.T) {
+ testErrors(t, "amd64", "amd64dynlinkerror", "dynlink")
+}
+
+func TestMIPSEndToEnd(t *testing.T) {
+ testEndToEnd(t, "mips", "mips")
+ testEndToEnd(t, "mips64", "mips64")
+}
+
+func TestPPC64EndToEnd(t *testing.T) {
+ testEndToEnd(t, "ppc64", "ppc64")
+}
+
+func TestRISCVEndToEnd(t *testing.T) {
+ testEndToEnd(t, "riscv64", "riscv64")
+}
+
+func TestRISCVErrors(t *testing.T) {
+ testErrors(t, "riscv64", "riscv64error")
+}
+
+func TestS390XEndToEnd(t *testing.T) {
+ testEndToEnd(t, "s390x", "s390x")
+}
diff --git a/src/cmd/asm/internal/asm/expr_test.go b/src/cmd/asm/internal/asm/expr_test.go
new file mode 100644
index 0000000..e9c92df
--- /dev/null
+++ b/src/cmd/asm/internal/asm/expr_test.go
@@ -0,0 +1,121 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package asm
+
+import (
+ "cmd/asm/internal/lex"
+ "strings"
+ "testing"
+ "text/scanner"
+)
+
+type exprTest struct {
+ input string
+ output int64
+ atEOF bool
+}
+
+var exprTests = []exprTest{
+ // Simple
+ {"0", 0, true},
+ {"3", 3, true},
+ {"070", 8 * 7, true},
+ {"0x0f", 15, true},
+ {"0xFF", 255, true},
+ {"9223372036854775807", 9223372036854775807, true}, // max int64
+ // Unary
+ {"-0", 0, true},
+ {"~0", -1, true},
+ {"~0*0", 0, true},
+ {"+3", 3, true},
+ {"-3", -3, true},
+ {"-9223372036854775808", -9223372036854775808, true}, // min int64
+ // Binary
+ {"3+4", 3 + 4, true},
+ {"3-4", 3 - 4, true},
+ {"2|5", 2 | 5, true},
+ {"3^4", 3 ^ 4, true},
+ {"3*4", 3 * 4, true},
+ {"14/4", 14 / 4, true},
+ {"3<<4", 3 << 4, true},
+ {"48>>3", 48 >> 3, true},
+ {"3&9", 3 & 9, true},
+ // General
+ {"3*2+3", 3*2 + 3, true},
+ {"3+2*3", 3 + 2*3, true},
+ {"3*(2+3)", 3 * (2 + 3), true},
+ {"3*-(2+3)", 3 * -(2 + 3), true},
+ {"3<<2+4", 3<<2 + 4, true},
+ {"3<<2+4", 3<<2 + 4, true},
+ {"3<<(2+4)", 3 << (2 + 4), true},
+ // Junk at EOF.
+ {"3 x", 3, false},
+ // Big number
+ {"4611686018427387904", 4611686018427387904, true},
+}
+
+func TestExpr(t *testing.T) {
+ p := NewParser(nil, nil, nil, false) // Expression evaluation uses none of these fields of the parser.
+ for i, test := range exprTests {
+ p.start(lex.Tokenize(test.input))
+ result := int64(p.expr())
+ if result != test.output {
+ t.Errorf("%d: %q evaluated to %d; expected %d", i, test.input, result, test.output)
+ }
+ tok := p.next()
+ if test.atEOF && tok.ScanToken != scanner.EOF {
+ t.Errorf("%d: %q: at EOF got %s", i, test.input, tok)
+ } else if !test.atEOF && tok.ScanToken == scanner.EOF {
+ t.Errorf("%d: %q: expected not EOF but at EOF", i, test.input)
+ }
+ }
+}
+
+type badExprTest struct {
+ input string
+ error string // Empty means no error.
+}
+
+var badExprTests = []badExprTest{
+ {"0/0", "division by zero"},
+ {"3/0", "division by zero"},
+ {"(1<<63)/0", "divide of value with high bit set"},
+ {"3%0", "modulo by zero"},
+ {"(1<<63)%0", "modulo of value with high bit set"},
+ {"3<<-4", "negative left shift count"},
+ {"3<<(1<<63)", "negative left shift count"},
+ {"3>>-4", "negative right shift count"},
+ {"3>>(1<<63)", "negative right shift count"},
+ {"(1<<63)>>2", "right shift of value with high bit set"},
+ {"(1<<62)>>2", ""},
+ {`'\x80'`, "illegal UTF-8 encoding for character constant"},
+ {"(23*4", "missing closing paren"},
+ {")23*4", "unexpected ) evaluating expression"},
+ {"18446744073709551616", "value out of range"},
+}
+
+func TestBadExpr(t *testing.T) {
+ for i, test := range badExprTests {
+ err := runBadTest(i, test, t)
+ if err == nil {
+ if test.error != "" {
+ t.Errorf("#%d: %q: expected error %q; got none", i, test.input, test.error)
+ }
+ continue
+ }
+ if !strings.Contains(err.Error(), test.error) {
+ t.Errorf("#%d: expected error %q; got %q", i, test.error, err)
+ continue
+ }
+ }
+}
+
+func runBadTest(i int, test badExprTest, t *testing.T) (err error) {
+ p := NewParser(nil, nil, nil, false) // Expression evaluation uses none of these fields of the parser.
+ p.start(lex.Tokenize(test.input))
+ return tryParse(t, func() {
+ p.expr()
+ })
+}
diff --git a/src/cmd/asm/internal/asm/line_test.go b/src/cmd/asm/internal/asm/line_test.go
new file mode 100644
index 0000000..da857ce
--- /dev/null
+++ b/src/cmd/asm/internal/asm/line_test.go
@@ -0,0 +1,55 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package asm
+
+import (
+ "cmd/asm/internal/lex"
+ "strings"
+ "testing"
+)
+
+type badInstTest struct {
+ input, error string
+}
+
+func TestAMD64BadInstParser(t *testing.T) {
+ testBadInstParser(t, "amd64", []badInstTest{
+ // Test AVX512 suffixes.
+ {"VADDPD.A X0, X1, X2", `unknown suffix "A"`},
+ {"VADDPD.A.A X0, X1, X2", `unknown suffix "A"; duplicate suffix "A"`},
+ {"VADDPD.A.A.A X0, X1, X2", `unknown suffix "A"; duplicate suffix "A"`},
+ {"VADDPD.A.B X0, X1, X2", `unknown suffix "A"; unknown suffix "B"`},
+ {"VADDPD.Z.A X0, X1, X2", `Z suffix should be the last; unknown suffix "A"`},
+ {"VADDPD.Z.Z X0, X1, X2", `Z suffix should be the last; duplicate suffix "Z"`},
+ {"VADDPD.SAE.BCST X0, X1, X2", `can't combine rounding/SAE and broadcast`},
+ {"VADDPD.BCST.SAE X0, X1, X2", `can't combine rounding/SAE and broadcast`},
+ {"VADDPD.BCST.Z.SAE X0, X1, X2", `Z suffix should be the last; can't combine rounding/SAE and broadcast`},
+ {"VADDPD.SAE.SAE X0, X1, X2", `duplicate suffix "SAE"`},
+ {"VADDPD.RZ_SAE.SAE X0, X1, X2", `bad suffix combination`},
+
+ // BSWAP on 16-bit registers is undefined. See #29167,
+ {"BSWAPW DX", `unrecognized instruction`},
+ {"BSWAPW R11", `unrecognized instruction`},
+ })
+}
+
+func testBadInstParser(t *testing.T, goarch string, tests []badInstTest) {
+ for i, test := range tests {
+ arch, ctxt := setArch(goarch)
+ tokenizer := lex.NewTokenizer("", strings.NewReader(test.input+"\n"), nil)
+ parser := NewParser(ctxt, arch, tokenizer, false)
+
+ err := tryParse(t, func() {
+ parser.Parse()
+ })
+
+ switch {
+ case err == nil:
+ t.Errorf("#%d: %q: want error %q; have none", i, test.input, test.error)
+ case !strings.Contains(err.Error(), test.error):
+ t.Errorf("#%d: %q: want error %q; have %q", i, test.input, test.error, err)
+ }
+ }
+}
diff --git a/src/cmd/asm/internal/asm/operand_test.go b/src/cmd/asm/internal/asm/operand_test.go
new file mode 100644
index 0000000..c1295a0
--- /dev/null
+++ b/src/cmd/asm/internal/asm/operand_test.go
@@ -0,0 +1,944 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package asm
+
+import (
+ "internal/buildcfg"
+ "strings"
+ "testing"
+
+ "cmd/asm/internal/arch"
+ "cmd/asm/internal/lex"
+ "cmd/internal/obj"
+)
+
+// A simple in-out test: Do we print what we parse?
+
+func setArch(goarch string) (*arch.Arch, *obj.Link) {
+ buildcfg.GOOS = "linux" // obj can handle this OS for all architectures.
+ buildcfg.GOARCH = goarch
+ architecture := arch.Set(goarch, false)
+ if architecture == nil {
+ panic("asm: unrecognized architecture " + goarch)
+ }
+ return architecture, obj.Linknew(architecture.LinkArch)
+}
+
+func newParser(goarch string) *Parser {
+ architecture, ctxt := setArch(goarch)
+ return NewParser(ctxt, architecture, nil, false)
+}
+
+// tryParse executes parse func in panicOnError=true context.
+// parse is expected to call any parsing methods that may panic.
+// Returns error gathered from recover; nil if no parse errors occurred.
+//
+// For unexpected panics, calls t.Fatal.
+func tryParse(t *testing.T, parse func()) (err error) {
+ panicOnError = true
+ defer func() {
+ panicOnError = false
+
+ e := recover()
+ var ok bool
+ if err, ok = e.(error); e != nil && !ok {
+ t.Fatal(e)
+ }
+ }()
+
+ parse()
+
+ return nil
+}
+
+func testBadOperandParser(t *testing.T, parser *Parser, tests []badOperandTest) {
+ for _, test := range tests {
+ err := tryParse(t, func() {
+ parser.start(lex.Tokenize(test.input))
+ addr := obj.Addr{}
+ parser.operand(&addr)
+ })
+
+ switch {
+ case err == nil:
+ t.Errorf("fail at %s: got no errors; expected %s\n", test.input, test.error)
+ case !strings.Contains(err.Error(), test.error):
+ t.Errorf("fail at %s: got %s; expected %s", test.input, err, test.error)
+ }
+ }
+}
+
+func testOperandParser(t *testing.T, parser *Parser, tests []operandTest) {
+ for _, test := range tests {
+ parser.start(lex.Tokenize(test.input))
+ addr := obj.Addr{}
+ parser.operand(&addr)
+ var result string
+ if parser.compilingRuntime {
+ result = obj.DconvWithABIDetail(&emptyProg, &addr)
+ } else {
+ result = obj.Dconv(&emptyProg, &addr)
+ }
+ if result != test.output {
+ t.Errorf("fail at %s: got %s; expected %s\n", test.input, result, test.output)
+ }
+ }
+}
+
+func TestAMD64OperandParser(t *testing.T) {
+ parser := newParser("amd64")
+ testOperandParser(t, parser, amd64OperandTests)
+ testBadOperandParser(t, parser, amd64BadOperandTests)
+ parser.compilingRuntime = true
+ testOperandParser(t, parser, amd64RuntimeOperandTests)
+ testBadOperandParser(t, parser, amd64BadOperandRuntimeTests)
+}
+
+func Test386OperandParser(t *testing.T) {
+ parser := newParser("386")
+ testOperandParser(t, parser, x86OperandTests)
+}
+
+func TestARMOperandParser(t *testing.T) {
+ parser := newParser("arm")
+ testOperandParser(t, parser, armOperandTests)
+}
+func TestARM64OperandParser(t *testing.T) {
+ parser := newParser("arm64")
+ testOperandParser(t, parser, arm64OperandTests)
+}
+
+func TestPPC64OperandParser(t *testing.T) {
+ parser := newParser("ppc64")
+ testOperandParser(t, parser, ppc64OperandTests)
+}
+
+func TestMIPSOperandParser(t *testing.T) {
+ parser := newParser("mips")
+ testOperandParser(t, parser, mipsOperandTests)
+}
+
+func TestMIPS64OperandParser(t *testing.T) {
+ parser := newParser("mips64")
+ testOperandParser(t, parser, mips64OperandTests)
+}
+
+func TestS390XOperandParser(t *testing.T) {
+ parser := newParser("s390x")
+ testOperandParser(t, parser, s390xOperandTests)
+}
+
+func TestFuncAddress(t *testing.T) {
+ type subtest struct {
+ arch string
+ tests []operandTest
+ }
+ for _, sub := range []subtest{
+ {"amd64", amd64OperandTests},
+ {"386", x86OperandTests},
+ {"arm", armOperandTests},
+ {"arm64", arm64OperandTests},
+ {"ppc64", ppc64OperandTests},
+ {"mips", mipsOperandTests},
+ {"mips64", mips64OperandTests},
+ {"s390x", s390xOperandTests},
+ } {
+ t.Run(sub.arch, func(t *testing.T) {
+ parser := newParser(sub.arch)
+ for _, test := range sub.tests {
+ parser.start(lex.Tokenize(test.input))
+ name, _, ok := parser.funcAddress()
+
+ isFuncSym := strings.HasSuffix(test.input, "(SB)") &&
+ // Ignore static symbols.
+ !strings.Contains(test.input, "<>")
+
+ wantName := ""
+ if isFuncSym {
+ // Strip $|* and (SB) and +Int.
+ wantName = test.output[:len(test.output)-4]
+ if strings.HasPrefix(wantName, "$") || strings.HasPrefix(wantName, "*") {
+ wantName = wantName[1:]
+ }
+ if i := strings.Index(wantName, "+"); i >= 0 {
+ wantName = wantName[:i]
+ }
+ }
+ if ok != isFuncSym || name != wantName {
+ t.Errorf("fail at %s as function address: got %s, %v; expected %s, %v", test.input, name, ok, wantName, isFuncSym)
+ }
+ }
+ })
+ }
+}
+
+type operandTest struct {
+ input, output string
+}
+
+type badOperandTest struct {
+ input, error string
+}
+
+// Examples collected by scanning all the assembly in the standard repo.
+
+var amd64OperandTests = []operandTest{
+ {"$(-1.0)", "$(-1.0)"},
+ {"$(0.0)", "$(0.0)"},
+ {"$(0x2000000+116)", "$33554548"},
+ {"$(0x3F<<7)", "$8064"},
+ {"$(112+8)", "$120"},
+ {"$(1<<63)", "$-9223372036854775808"},
+ {"$-1", "$-1"},
+ {"$0", "$0"},
+ {"$0-0", "$0"},
+ {"$0-16", "$-16"},
+ {"$0x000FFFFFFFFFFFFF", "$4503599627370495"},
+ {"$0x01", "$1"},
+ {"$0x02", "$2"},
+ {"$0x04", "$4"},
+ {"$0x3FE", "$1022"},
+ {"$0x7fffffe00000", "$140737486258176"},
+ {"$0xfffffffffffff001", "$-4095"},
+ {"$1", "$1"},
+ {"$1.0", "$(1.0)"},
+ {"$10", "$10"},
+ {"$1000", "$1000"},
+ {"$1000000", "$1000000"},
+ {"$1000000000", "$1000000000"},
+ {"$__tsan_func_enter(SB)", "$__tsan_func_enter(SB)"},
+ {"$main(SB)", "$main(SB)"},
+ {"$masks<>(SB)", "$masks<>(SB)"},
+ {"$setg_gcc<>(SB)", "$setg_gcc<>(SB)"},
+ {"$shifts<>(SB)", "$shifts<>(SB)"},
+ {"$~(1<<63)", "$9223372036854775807"},
+ {"$~0x3F", "$-64"},
+ {"$~15", "$-16"},
+ {"(((8)&0xf)*4)(SP)", "32(SP)"},
+ {"(((8-14)&0xf)*4)(SP)", "40(SP)"},
+ {"(6+8)(AX)", "14(AX)"},
+ {"(8*4)(BP)", "32(BP)"},
+ {"(AX)", "(AX)"},
+ {"(AX)(CX*8)", "(AX)(CX*8)"},
+ {"(BP)(CX*4)", "(BP)(CX*4)"},
+ {"(BP)(DX*4)", "(BP)(DX*4)"},
+ {"(BP)(R8*4)", "(BP)(R8*4)"},
+ {"(BX)", "(BX)"},
+ {"(DI)", "(DI)"},
+ {"(DI)(BX*1)", "(DI)(BX*1)"},
+ {"(DX)", "(DX)"},
+ {"(R9)", "(R9)"},
+ {"(R9)(BX*8)", "(R9)(BX*8)"},
+ {"(SI)", "(SI)"},
+ {"(SI)(BX*1)", "(SI)(BX*1)"},
+ {"(SI)(DX*1)", "(SI)(DX*1)"},
+ {"(SP)", "(SP)"},
+ {"(SP)(AX*4)", "(SP)(AX*4)"},
+ {"32(SP)(BX*2)", "32(SP)(BX*2)"},
+ {"32323(SP)(R8*4)", "32323(SP)(R8*4)"},
+ {"+3(PC)", "3(PC)"},
+ {"-1(DI)(BX*1)", "-1(DI)(BX*1)"},
+ {"-3(PC)", "-3(PC)"},
+ {"-64(SI)(BX*1)", "-64(SI)(BX*1)"},
+ {"-96(SI)(BX*1)", "-96(SI)(BX*1)"},
+ {"AL", "AL"},
+ {"AX", "AX"},
+ {"BP", "BP"},
+ {"BX", "BX"},
+ {"CX", "CX"},
+ {"DI", "DI"},
+ {"DX", "DX"},
+ {"R10", "R10"},
+ {"R10", "R10"},
+ {"R11", "R11"},
+ {"R12", "R12"},
+ {"R13", "R13"},
+ {"R14", "R14"},
+ {"R15", "R15"},
+ {"R8", "R8"},
+ {"R9", "R9"},
+ {"g", "R14"},
+ {"SI", "SI"},
+ {"SP", "SP"},
+ {"X0", "X0"},
+ {"X1", "X1"},
+ {"X10", "X10"},
+ {"X11", "X11"},
+ {"X12", "X12"},
+ {"X13", "X13"},
+ {"X14", "X14"},
+ {"X15", "X15"},
+ {"X2", "X2"},
+ {"X3", "X3"},
+ {"X4", "X4"},
+ {"X5", "X5"},
+ {"X6", "X6"},
+ {"X7", "X7"},
+ {"X8", "X8"},
+ {"X9", "X9"},
+ {"_expand_key_128<>(SB)", "_expand_key_128<>(SB)"},
+ {"_seek<>(SB)", "_seek<>(SB)"},
+ {"a2+16(FP)", "a2+16(FP)"},
+ {"addr2+24(FP)", "addr2+24(FP)"},
+ {"asmcgocall<>(SB)", "asmcgocall<>(SB)"},
+ {"b+24(FP)", "b+24(FP)"},
+ {"b_len+32(FP)", "b_len+32(FP)"},
+ {"racecall<>(SB)", "racecall<>(SB)"},
+ {"rcv_name+20(FP)", "rcv_name+20(FP)"},
+ {"retoffset+28(FP)", "retoffset+28(FP)"},
+ {"runtime·_GetStdHandle(SB)", "runtime._GetStdHandle(SB)"},
+ {"sync\u2215atomic·AddInt64(SB)", "sync/atomic.AddInt64(SB)"},
+ {"timeout+20(FP)", "timeout+20(FP)"},
+ {"ts+16(FP)", "ts+16(FP)"},
+ {"x+24(FP)", "x+24(FP)"},
+ {"x·y(SB)", "x.y(SB)"},
+ {"x·y(SP)", "x.y(SP)"},
+ {"x·y+8(SB)", "x.y+8(SB)"},
+ {"x·y+8(SP)", "x.y+8(SP)"},
+ {"y+56(FP)", "y+56(FP)"},
+ {"·AddUint32(SB)", "\"\".AddUint32(SB)"},
+ {"·callReflect(SB)", "\"\".callReflect(SB)"},
+ {"[X0-X0]", "[X0-X0]"},
+ {"[ Z9 - Z12 ]", "[Z9-Z12]"},
+ {"[X0-AX]", "[X0-AX]"},
+ {"[AX-X0]", "[AX-X0]"},
+ {"[):[o-FP", ""}, // Issue 12469 - asm hung parsing the o-FP range on non ARM platforms.
+}
+
+var amd64RuntimeOperandTests = []operandTest{
+ {"$bar<ABI0>(SB)", "$bar<ABI0>(SB)"},
+ {"$foo<ABIInternal>(SB)", "$foo<ABIInternal>(SB)"},
+}
+
+var amd64BadOperandTests = []badOperandTest{
+ {"[", "register list: expected ']', found EOF"},
+ {"[4", "register list: bad low register in `[4`"},
+ {"[]", "register list: bad low register in `[]`"},
+ {"[f-x]", "register list: bad low register in `[f`"},
+ {"[r10-r13]", "register list: bad low register in `[r10`"},
+ {"[k3-k6]", "register list: bad low register in `[k3`"},
+ {"[X0]", "register list: expected '-' after `[X0`, found ']'"},
+ {"[X0-]", "register list: bad high register in `[X0-]`"},
+ {"[X0-x]", "register list: bad high register in `[X0-x`"},
+ {"[X0-X1-X2]", "register list: expected ']' after `[X0-X1`, found '-'"},
+ {"[X0,X3]", "register list: expected '-' after `[X0`, found ','"},
+ {"[X0,X1,X2,X3]", "register list: expected '-' after `[X0`, found ','"},
+ {"$foo<ABI0>", "ABI selector only permitted when compiling runtime, reference was to \"foo\""},
+}
+
+var amd64BadOperandRuntimeTests = []badOperandTest{
+ {"$foo<bletch>", "malformed ABI selector \"bletch\" in reference to \"foo\""},
+}
+
+var x86OperandTests = []operandTest{
+ {"$(2.928932188134524e-01)", "$(0.29289321881345243)"},
+ {"$-1", "$-1"},
+ {"$0", "$0"},
+ {"$0x00000000", "$0"},
+ {"$runtime·badmcall(SB)", "$runtime.badmcall(SB)"},
+ {"$setg_gcc<>(SB)", "$setg_gcc<>(SB)"},
+ {"$~15", "$-16"},
+ {"(-64*1024+104)(SP)", "-65432(SP)"},
+ {"(0*4)(BP)", "(BP)"},
+ {"(1*4)(DI)", "4(DI)"},
+ {"(4*4)(BP)", "16(BP)"},
+ {"(AX)", "(AX)"},
+ {"(BP)(CX*4)", "(BP)(CX*4)"},
+ {"(BP*8)", "0(BP*8)"},
+ {"(BX)", "(BX)"},
+ {"(SP)", "(SP)"},
+ {"*AX", "AX"}, // TODO: Should make * illegal here; a simple alias for JMP AX.
+ {"*runtime·_GetStdHandle(SB)", "*runtime._GetStdHandle(SB)"},
+ {"-(4+12)(DI)", "-16(DI)"},
+ {"-1(DI)(BX*1)", "-1(DI)(BX*1)"},
+ {"-96(DI)(BX*1)", "-96(DI)(BX*1)"},
+ {"0(AX)", "(AX)"},
+ {"0(BP)", "(BP)"},
+ {"0(BX)", "(BX)"},
+ {"4(AX)", "4(AX)"},
+ {"AL", "AL"},
+ {"AX", "AX"},
+ {"BP", "BP"},
+ {"BX", "BX"},
+ {"CX", "CX"},
+ {"DI", "DI"},
+ {"DX", "DX"},
+ {"F0", "F0"},
+ {"GS", "GS"},
+ {"SI", "SI"},
+ {"SP", "SP"},
+ {"X0", "X0"},
+ {"X1", "X1"},
+ {"X2", "X2"},
+ {"X3", "X3"},
+ {"X4", "X4"},
+ {"X5", "X5"},
+ {"X6", "X6"},
+ {"X7", "X7"},
+ {"asmcgocall<>(SB)", "asmcgocall<>(SB)"},
+ {"ax+4(FP)", "ax+4(FP)"},
+ {"ptime-12(SP)", "ptime-12(SP)"},
+ {"runtime·_NtWaitForSingleObject(SB)", "runtime._NtWaitForSingleObject(SB)"},
+ {"s(FP)", "s(FP)"},
+ {"sec+4(FP)", "sec+4(FP)"},
+ {"shifts<>(SB)(CX*8)", "shifts<>(SB)(CX*8)"},
+ {"x+4(FP)", "x+4(FP)"},
+ {"·AddUint32(SB)", "\"\".AddUint32(SB)"},
+ {"·reflectcall(SB)", "\"\".reflectcall(SB)"},
+ {"[):[o-FP", ""}, // Issue 12469 - asm hung parsing the o-FP range on non ARM platforms.
+}
+
+var armOperandTests = []operandTest{
+ {"$0", "$0"},
+ {"$256", "$256"},
+ {"(R0)", "(R0)"},
+ {"(R11)", "(R11)"},
+ {"(g)", "(g)"},
+ {"-12(R4)", "-12(R4)"},
+ {"0(PC)", "0(PC)"},
+ {"1024", "1024"},
+ {"12(R(1))", "12(R1)"},
+ {"12(R13)", "12(R13)"},
+ {"R0", "R0"},
+ {"R0->(32-1)", "R0->31"},
+ {"R0<<R1", "R0<<R1"},
+ {"R0>>R(1)", "R0>>R1"},
+ {"R0@>(32-1)", "R0@>31"},
+ {"R1", "R1"},
+ {"R11", "R11"},
+ {"R12", "R12"},
+ {"R13", "R13"},
+ {"R14", "R14"},
+ {"R15", "R15"},
+ {"R1<<2(R3)", "R1<<2(R3)"},
+ {"R(1)<<2(R(3))", "R1<<2(R3)"},
+ {"R2", "R2"},
+ {"R3", "R3"},
+ {"R4", "R4"},
+ {"R(4)", "R4"},
+ {"R5", "R5"},
+ {"R6", "R6"},
+ {"R7", "R7"},
+ {"R8", "R8"},
+ {"[R0,R1,g,R15]", "[R0,R1,g,R15]"},
+ {"[R0-R7]", "[R0,R1,R2,R3,R4,R5,R6,R7]"},
+ {"[R(0)-R(7)]", "[R0,R1,R2,R3,R4,R5,R6,R7]"},
+ {"[R0]", "[R0]"},
+ {"[R1-R12]", "[R1,R2,R3,R4,R5,R6,R7,R8,R9,g,R11,R12]"},
+ {"armCAS64(SB)", "armCAS64(SB)"},
+ {"asmcgocall<>(SB)", "asmcgocall<>(SB)"},
+ {"c+28(FP)", "c+28(FP)"},
+ {"g", "g"},
+ {"gosave<>(SB)", "gosave<>(SB)"},
+ {"retlo+12(FP)", "retlo+12(FP)"},
+ {"runtime·gogo(SB)", "runtime.gogo(SB)"},
+ {"·AddUint32(SB)", "\"\".AddUint32(SB)"},
+ {"(R1, R3)", "(R1, R3)"},
+ {"[R0,R1,g,R15", ""}, // Issue 11764 - asm hung parsing ']' missing register lists.
+ {"[):[o-FP", ""}, // Issue 12469 - there was no infinite loop for ARM; these are just sanity checks.
+ {"[):[R0-FP", ""},
+ {"(", ""}, // Issue 12466 - backed up before beginning of line.
+}
+
+var ppc64OperandTests = []operandTest{
+ {"$((1<<63)-1)", "$9223372036854775807"},
+ {"$(-64*1024)", "$-65536"},
+ {"$(1024 * 8)", "$8192"},
+ {"$-1", "$-1"},
+ {"$-24(R4)", "$-24(R4)"},
+ {"$0", "$0"},
+ {"$0(R1)", "$(R1)"},
+ {"$0.5", "$(0.5)"},
+ {"$0x7000", "$28672"},
+ {"$0x88888eef", "$2290650863"},
+ {"$1", "$1"},
+ {"$_main<>(SB)", "$_main<>(SB)"},
+ {"$argframe(FP)", "$argframe(FP)"},
+ {"$runtime·tlsg(SB)", "$runtime.tlsg(SB)"},
+ {"$~3", "$-4"},
+ {"(-288-3*8)(R1)", "-312(R1)"},
+ {"(16)(R7)", "16(R7)"},
+ {"(8)(g)", "8(g)"},
+ {"(CTR)", "(CTR)"},
+ {"(R0)", "(R0)"},
+ {"(R3)", "(R3)"},
+ {"(R4)", "(R4)"},
+ {"(R5)", "(R5)"},
+ {"(R5)(R6*1)", "(R5)(R6*1)"},
+ {"(R5+R6)", "(R5)(R6*1)"}, // Old syntax.
+ {"-1(R4)", "-1(R4)"},
+ {"-1(R5)", "-1(R5)"},
+ {"6(PC)", "6(PC)"},
+ {"CR7", "CR7"},
+ {"CTR", "CTR"},
+ {"VS0", "VS0"},
+ {"VS1", "VS1"},
+ {"VS2", "VS2"},
+ {"VS3", "VS3"},
+ {"VS4", "VS4"},
+ {"VS5", "VS5"},
+ {"VS6", "VS6"},
+ {"VS7", "VS7"},
+ {"VS8", "VS8"},
+ {"VS9", "VS9"},
+ {"VS10", "VS10"},
+ {"VS11", "VS11"},
+ {"VS12", "VS12"},
+ {"VS13", "VS13"},
+ {"VS14", "VS14"},
+ {"VS15", "VS15"},
+ {"VS16", "VS16"},
+ {"VS17", "VS17"},
+ {"VS18", "VS18"},
+ {"VS19", "VS19"},
+ {"VS20", "VS20"},
+ {"VS21", "VS21"},
+ {"VS22", "VS22"},
+ {"VS23", "VS23"},
+ {"VS24", "VS24"},
+ {"VS25", "VS25"},
+ {"VS26", "VS26"},
+ {"VS27", "VS27"},
+ {"VS28", "VS28"},
+ {"VS29", "VS29"},
+ {"VS30", "VS30"},
+ {"VS31", "VS31"},
+ {"VS32", "VS32"},
+ {"VS33", "VS33"},
+ {"VS34", "VS34"},
+ {"VS35", "VS35"},
+ {"VS36", "VS36"},
+ {"VS37", "VS37"},
+ {"VS38", "VS38"},
+ {"VS39", "VS39"},
+ {"VS40", "VS40"},
+ {"VS41", "VS41"},
+ {"VS42", "VS42"},
+ {"VS43", "VS43"},
+ {"VS44", "VS44"},
+ {"VS45", "VS45"},
+ {"VS46", "VS46"},
+ {"VS47", "VS47"},
+ {"VS48", "VS48"},
+ {"VS49", "VS49"},
+ {"VS50", "VS50"},
+ {"VS51", "VS51"},
+ {"VS52", "VS52"},
+ {"VS53", "VS53"},
+ {"VS54", "VS54"},
+ {"VS55", "VS55"},
+ {"VS56", "VS56"},
+ {"VS57", "VS57"},
+ {"VS58", "VS58"},
+ {"VS59", "VS59"},
+ {"VS60", "VS60"},
+ {"VS61", "VS61"},
+ {"VS62", "VS62"},
+ {"VS63", "VS63"},
+ {"V0", "V0"},
+ {"V1", "V1"},
+ {"V2", "V2"},
+ {"V3", "V3"},
+ {"V4", "V4"},
+ {"V5", "V5"},
+ {"V6", "V6"},
+ {"V7", "V7"},
+ {"V8", "V8"},
+ {"V9", "V9"},
+ {"V10", "V10"},
+ {"V11", "V11"},
+ {"V12", "V12"},
+ {"V13", "V13"},
+ {"V14", "V14"},
+ {"V15", "V15"},
+ {"V16", "V16"},
+ {"V17", "V17"},
+ {"V18", "V18"},
+ {"V19", "V19"},
+ {"V20", "V20"},
+ {"V21", "V21"},
+ {"V22", "V22"},
+ {"V23", "V23"},
+ {"V24", "V24"},
+ {"V25", "V25"},
+ {"V26", "V26"},
+ {"V27", "V27"},
+ {"V28", "V28"},
+ {"V29", "V29"},
+ {"V30", "V30"},
+ {"V31", "V31"},
+ {"F14", "F14"},
+ {"F15", "F15"},
+ {"F16", "F16"},
+ {"F17", "F17"},
+ {"F18", "F18"},
+ {"F19", "F19"},
+ {"F20", "F20"},
+ {"F21", "F21"},
+ {"F22", "F22"},
+ {"F23", "F23"},
+ {"F24", "F24"},
+ {"F25", "F25"},
+ {"F26", "F26"},
+ {"F27", "F27"},
+ {"F28", "F28"},
+ {"F29", "F29"},
+ {"F30", "F30"},
+ {"F31", "F31"},
+ {"LR", "LR"},
+ {"R0", "R0"},
+ {"R1", "R1"},
+ {"R11", "R11"},
+ {"R12", "R12"},
+ {"R13", "R13"},
+ {"R14", "R14"},
+ {"R15", "R15"},
+ {"R16", "R16"},
+ {"R17", "R17"},
+ {"R18", "R18"},
+ {"R19", "R19"},
+ {"R2", "R2"},
+ {"R20", "R20"},
+ {"R21", "R21"},
+ {"R22", "R22"},
+ {"R23", "R23"},
+ {"R24", "R24"},
+ {"R25", "R25"},
+ {"R26", "R26"},
+ {"R27", "R27"},
+ {"R28", "R28"},
+ {"R29", "R29"},
+ {"R3", "R3"},
+ {"R31", "R31"},
+ {"R4", "R4"},
+ {"R5", "R5"},
+ {"R6", "R6"},
+ {"R7", "R7"},
+ {"R8", "R8"},
+ {"R9", "R9"},
+ {"SPR(269)", "SPR(269)"},
+ {"a(FP)", "a(FP)"},
+ {"g", "g"},
+ {"ret+8(FP)", "ret+8(FP)"},
+ {"runtime·abort(SB)", "runtime.abort(SB)"},
+ {"·AddUint32(SB)", "\"\".AddUint32(SB)"},
+ {"·trunc(SB)", "\"\".trunc(SB)"},
+ {"[):[o-FP", ""}, // Issue 12469 - asm hung parsing the o-FP range on non ARM platforms.
+}
+
+var arm64OperandTests = []operandTest{
+ {"$0", "$0"},
+ {"$0.5", "$(0.5)"},
+ {"0(R26)", "(R26)"},
+ {"0(RSP)", "(RSP)"},
+ {"$1", "$1"},
+ {"$-1", "$-1"},
+ {"$1000", "$1000"},
+ {"$1000000000", "$1000000000"},
+ {"$0x7fff3c000", "$34358935552"},
+ {"$1234", "$1234"},
+ {"$~15", "$-16"},
+ {"$16", "$16"},
+ {"-16(RSP)", "-16(RSP)"},
+ {"16(RSP)", "16(RSP)"},
+ {"1(R1)", "1(R1)"},
+ {"-1(R4)", "-1(R4)"},
+ {"18740(R5)", "18740(R5)"},
+ {"$2", "$2"},
+ {"$-24(R4)", "$-24(R4)"},
+ {"-24(RSP)", "-24(RSP)"},
+ {"$24(RSP)", "$24(RSP)"},
+ {"-32(RSP)", "-32(RSP)"},
+ {"$48", "$48"},
+ {"$(-64*1024)(R7)", "$-65536(R7)"},
+ {"$(8-1)", "$7"},
+ {"a+0(FP)", "a(FP)"},
+ {"a1+8(FP)", "a1+8(FP)"},
+ {"·AddInt32(SB)", `"".AddInt32(SB)`},
+ {"runtime·divWVW(SB)", "runtime.divWVW(SB)"},
+ {"$argframe+0(FP)", "$argframe(FP)"},
+ {"$asmcgocall<>(SB)", "$asmcgocall<>(SB)"},
+ {"EQ", "EQ"},
+ {"F29", "F29"},
+ {"F3", "F3"},
+ {"F30", "F30"},
+ {"g", "g"},
+ {"LR", "R30"},
+ {"(LR)", "(R30)"},
+ {"R0", "R0"},
+ {"R10", "R10"},
+ {"R11", "R11"},
+ {"R18_PLATFORM", "R18"},
+ {"$4503601774854144.0", "$(4503601774854144.0)"},
+ {"$runtime·badsystemstack(SB)", "$runtime.badsystemstack(SB)"},
+ {"ZR", "ZR"},
+ {"(ZR)", "(ZR)"},
+ {"(R29, RSP)", "(R29, RSP)"},
+ {"[):[o-FP", ""}, // Issue 12469 - asm hung parsing the o-FP range on non ARM platforms.
+}
+
+var mips64OperandTests = []operandTest{
+ {"$((1<<63)-1)", "$9223372036854775807"},
+ {"$(-64*1024)", "$-65536"},
+ {"$(1024 * 8)", "$8192"},
+ {"$-1", "$-1"},
+ {"$-24(R4)", "$-24(R4)"},
+ {"$0", "$0"},
+ {"$0(R1)", "$(R1)"},
+ {"$0.5", "$(0.5)"},
+ {"$0x7000", "$28672"},
+ {"$0x88888eef", "$2290650863"},
+ {"$1", "$1"},
+ {"$_main<>(SB)", "$_main<>(SB)"},
+ {"$argframe(FP)", "$argframe(FP)"},
+ {"$~3", "$-4"},
+ {"(-288-3*8)(R1)", "-312(R1)"},
+ {"(16)(R7)", "16(R7)"},
+ {"(8)(g)", "8(g)"},
+ {"(R0)", "(R0)"},
+ {"(R3)", "(R3)"},
+ {"(R4)", "(R4)"},
+ {"(R5)", "(R5)"},
+ {"-1(R4)", "-1(R4)"},
+ {"-1(R5)", "-1(R5)"},
+ {"6(PC)", "6(PC)"},
+ {"F14", "F14"},
+ {"F15", "F15"},
+ {"F16", "F16"},
+ {"F17", "F17"},
+ {"F18", "F18"},
+ {"F19", "F19"},
+ {"F20", "F20"},
+ {"F21", "F21"},
+ {"F22", "F22"},
+ {"F23", "F23"},
+ {"F24", "F24"},
+ {"F25", "F25"},
+ {"F26", "F26"},
+ {"F27", "F27"},
+ {"F28", "F28"},
+ {"F29", "F29"},
+ {"F30", "F30"},
+ {"F31", "F31"},
+ {"R0", "R0"},
+ {"R1", "R1"},
+ {"R11", "R11"},
+ {"R12", "R12"},
+ {"R13", "R13"},
+ {"R14", "R14"},
+ {"R15", "R15"},
+ {"R16", "R16"},
+ {"R17", "R17"},
+ {"R18", "R18"},
+ {"R19", "R19"},
+ {"R2", "R2"},
+ {"R20", "R20"},
+ {"R21", "R21"},
+ {"R22", "R22"},
+ {"R23", "R23"},
+ {"R24", "R24"},
+ {"R25", "R25"},
+ {"R26", "R26"},
+ {"R27", "R27"},
+ {"R29", "R29"},
+ {"R3", "R3"},
+ {"R31", "R31"},
+ {"R4", "R4"},
+ {"R5", "R5"},
+ {"R6", "R6"},
+ {"R7", "R7"},
+ {"R8", "R8"},
+ {"R9", "R9"},
+ {"LO", "LO"},
+ {"a(FP)", "a(FP)"},
+ {"g", "g"},
+ {"RSB", "R28"},
+ {"ret+8(FP)", "ret+8(FP)"},
+ {"runtime·abort(SB)", "runtime.abort(SB)"},
+ {"·AddUint32(SB)", "\"\".AddUint32(SB)"},
+ {"·trunc(SB)", "\"\".trunc(SB)"},
+ {"[):[o-FP", ""}, // Issue 12469 - asm hung parsing the o-FP range on non ARM platforms.
+}
+
+var mipsOperandTests = []operandTest{
+ {"$((1<<63)-1)", "$9223372036854775807"},
+ {"$(-64*1024)", "$-65536"},
+ {"$(1024 * 8)", "$8192"},
+ {"$-1", "$-1"},
+ {"$-24(R4)", "$-24(R4)"},
+ {"$0", "$0"},
+ {"$0(R1)", "$(R1)"},
+ {"$0.5", "$(0.5)"},
+ {"$0x7000", "$28672"},
+ {"$0x88888eef", "$2290650863"},
+ {"$1", "$1"},
+ {"$_main<>(SB)", "$_main<>(SB)"},
+ {"$argframe(FP)", "$argframe(FP)"},
+ {"$~3", "$-4"},
+ {"(-288-3*8)(R1)", "-312(R1)"},
+ {"(16)(R7)", "16(R7)"},
+ {"(8)(g)", "8(g)"},
+ {"(R0)", "(R0)"},
+ {"(R3)", "(R3)"},
+ {"(R4)", "(R4)"},
+ {"(R5)", "(R5)"},
+ {"-1(R4)", "-1(R4)"},
+ {"-1(R5)", "-1(R5)"},
+ {"6(PC)", "6(PC)"},
+ {"F14", "F14"},
+ {"F15", "F15"},
+ {"F16", "F16"},
+ {"F17", "F17"},
+ {"F18", "F18"},
+ {"F19", "F19"},
+ {"F20", "F20"},
+ {"F21", "F21"},
+ {"F22", "F22"},
+ {"F23", "F23"},
+ {"F24", "F24"},
+ {"F25", "F25"},
+ {"F26", "F26"},
+ {"F27", "F27"},
+ {"F28", "F28"},
+ {"F29", "F29"},
+ {"F30", "F30"},
+ {"F31", "F31"},
+ {"R0", "R0"},
+ {"R1", "R1"},
+ {"R11", "R11"},
+ {"R12", "R12"},
+ {"R13", "R13"},
+ {"R14", "R14"},
+ {"R15", "R15"},
+ {"R16", "R16"},
+ {"R17", "R17"},
+ {"R18", "R18"},
+ {"R19", "R19"},
+ {"R2", "R2"},
+ {"R20", "R20"},
+ {"R21", "R21"},
+ {"R22", "R22"},
+ {"R23", "R23"},
+ {"R24", "R24"},
+ {"R25", "R25"},
+ {"R26", "R26"},
+ {"R27", "R27"},
+ {"R28", "R28"},
+ {"R29", "R29"},
+ {"R3", "R3"},
+ {"R31", "R31"},
+ {"R4", "R4"},
+ {"R5", "R5"},
+ {"R6", "R6"},
+ {"R7", "R7"},
+ {"R8", "R8"},
+ {"R9", "R9"},
+ {"LO", "LO"},
+ {"a(FP)", "a(FP)"},
+ {"g", "g"},
+ {"ret+8(FP)", "ret+8(FP)"},
+ {"runtime·abort(SB)", "runtime.abort(SB)"},
+ {"·AddUint32(SB)", "\"\".AddUint32(SB)"},
+ {"·trunc(SB)", "\"\".trunc(SB)"},
+ {"[):[o-FP", ""}, // Issue 12469 - asm hung parsing the o-FP range on non ARM platforms.
+}
+
+var s390xOperandTests = []operandTest{
+ {"$((1<<63)-1)", "$9223372036854775807"},
+ {"$(-64*1024)", "$-65536"},
+ {"$(1024 * 8)", "$8192"},
+ {"$-1", "$-1"},
+ {"$-24(R4)", "$-24(R4)"},
+ {"$0", "$0"},
+ {"$0(R1)", "$(R1)"},
+ {"$0.5", "$(0.5)"},
+ {"$0x7000", "$28672"},
+ {"$0x88888eef", "$2290650863"},
+ {"$1", "$1"},
+ {"$_main<>(SB)", "$_main<>(SB)"},
+ {"$argframe(FP)", "$argframe(FP)"},
+ {"$~3", "$-4"},
+ {"(-288-3*8)(R1)", "-312(R1)"},
+ {"(16)(R7)", "16(R7)"},
+ {"(8)(g)", "8(g)"},
+ {"(R0)", "(R0)"},
+ {"(R3)", "(R3)"},
+ {"(R4)", "(R4)"},
+ {"(R5)", "(R5)"},
+ {"-1(R4)", "-1(R4)"},
+ {"-1(R5)", "-1(R5)"},
+ {"6(PC)", "6(PC)"},
+ {"R0", "R0"},
+ {"R1", "R1"},
+ {"R2", "R2"},
+ {"R3", "R3"},
+ {"R4", "R4"},
+ {"R5", "R5"},
+ {"R6", "R6"},
+ {"R7", "R7"},
+ {"R8", "R8"},
+ {"R9", "R9"},
+ {"R10", "R10"},
+ {"R11", "R11"},
+ {"R12", "R12"},
+ // {"R13", "R13"}, R13 is g
+ {"R14", "R14"},
+ {"R15", "R15"},
+ {"F0", "F0"},
+ {"F1", "F1"},
+ {"F2", "F2"},
+ {"F3", "F3"},
+ {"F4", "F4"},
+ {"F5", "F5"},
+ {"F6", "F6"},
+ {"F7", "F7"},
+ {"F8", "F8"},
+ {"F9", "F9"},
+ {"F10", "F10"},
+ {"F11", "F11"},
+ {"F12", "F12"},
+ {"F13", "F13"},
+ {"F14", "F14"},
+ {"F15", "F15"},
+ {"V0", "V0"},
+ {"V1", "V1"},
+ {"V2", "V2"},
+ {"V3", "V3"},
+ {"V4", "V4"},
+ {"V5", "V5"},
+ {"V6", "V6"},
+ {"V7", "V7"},
+ {"V8", "V8"},
+ {"V9", "V9"},
+ {"V10", "V10"},
+ {"V11", "V11"},
+ {"V12", "V12"},
+ {"V13", "V13"},
+ {"V14", "V14"},
+ {"V15", "V15"},
+ {"V16", "V16"},
+ {"V17", "V17"},
+ {"V18", "V18"},
+ {"V19", "V19"},
+ {"V20", "V20"},
+ {"V21", "V21"},
+ {"V22", "V22"},
+ {"V23", "V23"},
+ {"V24", "V24"},
+ {"V25", "V25"},
+ {"V26", "V26"},
+ {"V27", "V27"},
+ {"V28", "V28"},
+ {"V29", "V29"},
+ {"V30", "V30"},
+ {"V31", "V31"},
+ {"a(FP)", "a(FP)"},
+ {"g", "g"},
+ {"ret+8(FP)", "ret+8(FP)"},
+ {"runtime·abort(SB)", "runtime.abort(SB)"},
+ {"·AddUint32(SB)", "\"\".AddUint32(SB)"},
+ {"·trunc(SB)", "\"\".trunc(SB)"},
+ {"[):[o-FP", ""}, // Issue 12469 - asm hung parsing the o-FP range on non ARM platforms.
+}
diff --git a/src/cmd/asm/internal/asm/parse.go b/src/cmd/asm/internal/asm/parse.go
new file mode 100644
index 0000000..4cddcf4
--- /dev/null
+++ b/src/cmd/asm/internal/asm/parse.go
@@ -0,0 +1,1437 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package asm implements the parser and instruction generator for the assembler.
+// TODO: Split apart?
+package asm
+
+import (
+ "fmt"
+ "io"
+ "log"
+ "os"
+ "strconv"
+ "text/scanner"
+ "unicode/utf8"
+
+ "cmd/asm/internal/arch"
+ "cmd/asm/internal/flags"
+ "cmd/asm/internal/lex"
+ "cmd/internal/obj"
+ "cmd/internal/obj/x86"
+ "cmd/internal/src"
+ "cmd/internal/sys"
+)
+
+type Parser struct {
+ lex lex.TokenReader
+ lineNum int // Line number in source file.
+ errorLine int // Line number of last error.
+ errorCount int // Number of errors.
+ sawCode bool // saw code in this file (as opposed to comments and blank lines)
+ pc int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA.
+ input []lex.Token
+ inputPos int
+ pendingLabels []string // Labels to attach to next instruction.
+ labels map[string]*obj.Prog
+ toPatch []Patch
+ addr []obj.Addr
+ arch *arch.Arch
+ ctxt *obj.Link
+ firstProg *obj.Prog
+ lastProg *obj.Prog
+ dataAddr map[string]int64 // Most recent address for DATA for this symbol.
+ isJump bool // Instruction being assembled is a jump.
+ compilingRuntime bool
+ errorWriter io.Writer
+}
+
+type Patch struct {
+ prog *obj.Prog
+ label string
+}
+
+func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader, compilingRuntime bool) *Parser {
+ return &Parser{
+ ctxt: ctxt,
+ arch: ar,
+ lex: lexer,
+ labels: make(map[string]*obj.Prog),
+ dataAddr: make(map[string]int64),
+ errorWriter: os.Stderr,
+ compilingRuntime: compilingRuntime,
+ }
+}
+
+// panicOnError is enabled when testing to abort execution on the first error
+// and turn it into a recoverable panic.
+var panicOnError bool
+
+func (p *Parser) errorf(format string, args ...interface{}) {
+ if panicOnError {
+ panic(fmt.Errorf(format, args...))
+ }
+ if p.lineNum == p.errorLine {
+ // Only one error per line.
+ return
+ }
+ p.errorLine = p.lineNum
+ if p.lex != nil {
+ // Put file and line information on head of message.
+ format = "%s:%d: " + format + "\n"
+ args = append([]interface{}{p.lex.File(), p.lineNum}, args...)
+ }
+ fmt.Fprintf(p.errorWriter, format, args...)
+ p.errorCount++
+ if p.errorCount > 10 && !*flags.AllErrors {
+ log.Fatal("too many errors")
+ }
+}
+
+func (p *Parser) pos() src.XPos {
+ return p.ctxt.PosTable.XPos(src.MakePos(p.lex.Base(), uint(p.lineNum), 0))
+}
+
+func (p *Parser) Parse() (*obj.Prog, bool) {
+ scratch := make([][]lex.Token, 0, 3)
+ for {
+ word, cond, operands, ok := p.line(scratch)
+ if !ok {
+ break
+ }
+ scratch = operands
+
+ if p.pseudo(word, operands) {
+ continue
+ }
+ i, present := p.arch.Instructions[word]
+ if present {
+ p.instruction(i, word, cond, operands)
+ continue
+ }
+ p.errorf("unrecognized instruction %q", word)
+ }
+ if p.errorCount > 0 {
+ return nil, false
+ }
+ p.patch()
+ return p.firstProg, true
+}
+
+// ParseSymABIs parses p's assembly code to find text symbol
+// definitions and references and writes a symabis file to w.
+func (p *Parser) ParseSymABIs(w io.Writer) bool {
+ operands := make([][]lex.Token, 0, 3)
+ for {
+ word, _, operands1, ok := p.line(operands)
+ if !ok {
+ break
+ }
+ operands = operands1
+
+ p.symDefRef(w, word, operands)
+ }
+ return p.errorCount == 0
+}
+
+// nextToken returns the next non-build-comment token from the lexer.
+// It reports misplaced //go:build comments but otherwise discards them.
+func (p *Parser) nextToken() lex.ScanToken {
+ for {
+ tok := p.lex.Next()
+ if tok == lex.BuildComment {
+ if p.sawCode {
+ p.errorf("misplaced //go:build comment")
+ }
+ continue
+ }
+ if tok != '\n' {
+ p.sawCode = true
+ }
+ if tok == '#' {
+ // A leftover wisp of a #include/#define/etc,
+ // to let us know that p.sawCode should be true now.
+ // Otherwise ignored.
+ continue
+ }
+ return tok
+ }
+}
+
+// line consumes a single assembly line from p.lex of the form
+//
+// {label:} WORD[.cond] [ arg {, arg} ] (';' | '\n')
+//
+// It adds any labels to p.pendingLabels and returns the word, cond,
+// operand list, and true. If there is an error or EOF, it returns
+// ok=false.
+//
+// line may reuse the memory from scratch.
+func (p *Parser) line(scratch [][]lex.Token) (word, cond string, operands [][]lex.Token, ok bool) {
+next:
+ // Skip newlines.
+ var tok lex.ScanToken
+ for {
+ tok = p.nextToken()
+ // We save the line number here so error messages from this instruction
+ // are labeled with this line. Otherwise we complain after we've absorbed
+ // the terminating newline and the line numbers are off by one in errors.
+ p.lineNum = p.lex.Line()
+ switch tok {
+ case '\n', ';':
+ continue
+ case scanner.EOF:
+ return "", "", nil, false
+ }
+ break
+ }
+ // First item must be an identifier.
+ if tok != scanner.Ident {
+ p.errorf("expected identifier, found %q", p.lex.Text())
+ return "", "", nil, false // Might as well stop now.
+ }
+ word, cond = p.lex.Text(), ""
+ operands = scratch[:0]
+ // Zero or more comma-separated operands, one per loop.
+ nesting := 0
+ colon := -1
+ for tok != '\n' && tok != ';' {
+ // Process one operand.
+ var items []lex.Token
+ if cap(operands) > len(operands) {
+ // Reuse scratch items slice.
+ items = operands[:cap(operands)][len(operands)][:0]
+ } else {
+ items = make([]lex.Token, 0, 3)
+ }
+ for {
+ tok = p.nextToken()
+ if len(operands) == 0 && len(items) == 0 {
+ if p.arch.InFamily(sys.ARM, sys.ARM64, sys.AMD64, sys.I386) && tok == '.' {
+ // Suffixes: ARM conditionals or x86 modifiers.
+ tok = p.nextToken()
+ str := p.lex.Text()
+ if tok != scanner.Ident {
+ p.errorf("instruction suffix expected identifier, found %s", str)
+ }
+ cond = cond + "." + str
+ continue
+ }
+ if tok == ':' {
+ // Labels.
+ p.pendingLabels = append(p.pendingLabels, word)
+ goto next
+ }
+ }
+ if tok == scanner.EOF {
+ p.errorf("unexpected EOF")
+ return "", "", nil, false
+ }
+ // Split operands on comma. Also, the old syntax on x86 for a "register pair"
+ // was AX:DX, for which the new syntax is DX, AX. Note the reordering.
+ if tok == '\n' || tok == ';' || (nesting == 0 && (tok == ',' || tok == ':')) {
+ if tok == ':' {
+ // Remember this location so we can swap the operands below.
+ if colon >= 0 {
+ p.errorf("invalid ':' in operand")
+ return word, cond, operands, true
+ }
+ colon = len(operands)
+ }
+ break
+ }
+ if tok == '(' || tok == '[' {
+ nesting++
+ }
+ if tok == ')' || tok == ']' {
+ nesting--
+ }
+ items = append(items, lex.Make(tok, p.lex.Text()))
+ }
+ if len(items) > 0 {
+ operands = append(operands, items)
+ if colon >= 0 && len(operands) == colon+2 {
+ // AX:DX becomes DX, AX.
+ operands[colon], operands[colon+1] = operands[colon+1], operands[colon]
+ colon = -1
+ }
+ } else if len(operands) > 0 || tok == ',' || colon >= 0 {
+ // Had a separator with nothing after.
+ p.errorf("missing operand")
+ }
+ }
+ return word, cond, operands, true
+}
+
+func (p *Parser) instruction(op obj.As, word, cond string, operands [][]lex.Token) {
+ p.addr = p.addr[0:0]
+ p.isJump = p.arch.IsJump(word)
+ for _, op := range operands {
+ addr := p.address(op)
+ if !p.isJump && addr.Reg < 0 { // Jumps refer to PC, a pseudo.
+ p.errorf("illegal use of pseudo-register in %s", word)
+ }
+ p.addr = append(p.addr, addr)
+ }
+ if p.isJump {
+ p.asmJump(op, cond, p.addr)
+ return
+ }
+ p.asmInstruction(op, cond, p.addr)
+}
+
+func (p *Parser) pseudo(word string, operands [][]lex.Token) bool {
+ switch word {
+ case "DATA":
+ p.asmData(operands)
+ case "FUNCDATA":
+ p.asmFuncData(operands)
+ case "GLOBL":
+ p.asmGlobl(operands)
+ case "PCDATA":
+ p.asmPCData(operands)
+ case "PCALIGN":
+ p.asmPCAlign(operands)
+ case "TEXT":
+ p.asmText(operands)
+ default:
+ return false
+ }
+ return true
+}
+
+// symDefRef scans a line for potential text symbol definitions and
+// references and writes symabis information to w.
+//
+// The symabis format is documented at
+// cmd/compile/internal/ssagen.ReadSymABIs.
+func (p *Parser) symDefRef(w io.Writer, word string, operands [][]lex.Token) {
+ switch word {
+ case "TEXT":
+ // Defines text symbol in operands[0].
+ if len(operands) > 0 {
+ p.start(operands[0])
+ if name, abi, ok := p.funcAddress(); ok {
+ fmt.Fprintf(w, "def %s %s\n", name, abi)
+ }
+ }
+ return
+ case "GLOBL", "PCDATA":
+ // No text definitions or symbol references.
+ case "DATA", "FUNCDATA":
+ // For DATA, operands[0] is defined symbol.
+ // For FUNCDATA, operands[0] is an immediate constant.
+ // Remaining operands may have references.
+ if len(operands) < 2 {
+ return
+ }
+ operands = operands[1:]
+ }
+ // Search for symbol references.
+ for _, op := range operands {
+ p.start(op)
+ if name, abi, ok := p.funcAddress(); ok {
+ fmt.Fprintf(w, "ref %s %s\n", name, abi)
+ }
+ }
+}
+
+func (p *Parser) start(operand []lex.Token) {
+ p.input = operand
+ p.inputPos = 0
+}
+
+// address parses the operand into a link address structure.
+func (p *Parser) address(operand []lex.Token) obj.Addr {
+ p.start(operand)
+ addr := obj.Addr{}
+ p.operand(&addr)
+ return addr
+}
+
+// parseScale converts a decimal string into a valid scale factor.
+func (p *Parser) parseScale(s string) int8 {
+ switch s {
+ case "1", "2", "4", "8":
+ return int8(s[0] - '0')
+ }
+ p.errorf("bad scale: %s", s)
+ return 0
+}
+
+// operand parses a general operand and stores the result in *a.
+func (p *Parser) operand(a *obj.Addr) {
+ //fmt.Printf("Operand: %v\n", p.input)
+ if len(p.input) == 0 {
+ p.errorf("empty operand: cannot happen")
+ return
+ }
+ // General address (with a few exceptions) looks like
+ // $sym±offset(SB)(reg)(index*scale)
+ // Exceptions are:
+ //
+ // R1
+ // offset
+ // $offset
+ // Every piece is optional, so we scan left to right and what
+ // we discover tells us where we are.
+
+ // Prefix: $.
+ var prefix rune
+ switch tok := p.peek(); tok {
+ case '$', '*':
+ prefix = rune(tok)
+ p.next()
+ }
+
+ // Symbol: sym±offset(SB)
+ tok := p.next()
+ name := tok.String()
+ if tok.ScanToken == scanner.Ident && !p.atStartOfRegister(name) {
+ // We have a symbol. Parse $sym±offset(symkind)
+ p.symbolReference(a, name, prefix)
+ // fmt.Printf("SYM %s\n", obj.Dconv(&emptyProg, 0, a))
+ if p.peek() == scanner.EOF {
+ return
+ }
+ }
+
+ // Special register list syntax for arm: [R1,R3-R7]
+ if tok.ScanToken == '[' {
+ if prefix != 0 {
+ p.errorf("illegal use of register list")
+ }
+ p.registerList(a)
+ p.expectOperandEnd()
+ return
+ }
+
+ // Register: R1
+ if tok.ScanToken == scanner.Ident && p.atStartOfRegister(name) {
+ if p.atRegisterShift() {
+ // ARM shifted register such as R1<<R2 or R1>>2.
+ a.Type = obj.TYPE_SHIFT
+ a.Offset = p.registerShift(tok.String(), prefix)
+ if p.peek() == '(' {
+ // Can only be a literal register here.
+ p.next()
+ tok := p.next()
+ name := tok.String()
+ if !p.atStartOfRegister(name) {
+ p.errorf("expected register; found %s", name)
+ }
+ a.Reg, _ = p.registerReference(name)
+ p.get(')')
+ }
+ } else if p.atRegisterExtension() {
+ a.Type = obj.TYPE_REG
+ p.registerExtension(a, tok.String(), prefix)
+ p.expectOperandEnd()
+ return
+ } else if r1, r2, scale, ok := p.register(tok.String(), prefix); ok {
+ if scale != 0 {
+ p.errorf("expected simple register reference")
+ }
+ a.Type = obj.TYPE_REG
+ a.Reg = r1
+ if r2 != 0 {
+ // Form is R1:R2. It is on RHS and the second register
+ // needs to go into the LHS.
+ panic("cannot happen (Addr.Reg2)")
+ }
+ }
+ // fmt.Printf("REG %s\n", obj.Dconv(&emptyProg, 0, a))
+ p.expectOperandEnd()
+ return
+ }
+
+ // Constant.
+ haveConstant := false
+ switch tok.ScanToken {
+ case scanner.Int, scanner.Float, scanner.String, scanner.Char, '+', '-', '~':
+ haveConstant = true
+ case '(':
+ // Could be parenthesized expression or (R). Must be something, though.
+ tok := p.next()
+ if tok.ScanToken == scanner.EOF {
+ p.errorf("missing right parenthesis")
+ return
+ }
+ rname := tok.String()
+ p.back()
+ haveConstant = !p.atStartOfRegister(rname)
+ if !haveConstant {
+ p.back() // Put back the '('.
+ }
+ }
+ if haveConstant {
+ p.back()
+ if p.have(scanner.Float) {
+ if prefix != '$' {
+ p.errorf("floating-point constant must be an immediate")
+ }
+ a.Type = obj.TYPE_FCONST
+ a.Val = p.floatExpr()
+ // fmt.Printf("FCONST %s\n", obj.Dconv(&emptyProg, 0, a))
+ p.expectOperandEnd()
+ return
+ }
+ if p.have(scanner.String) {
+ if prefix != '$' {
+ p.errorf("string constant must be an immediate")
+ return
+ }
+ str, err := strconv.Unquote(p.get(scanner.String).String())
+ if err != nil {
+ p.errorf("string parse error: %s", err)
+ }
+ a.Type = obj.TYPE_SCONST
+ a.Val = str
+ // fmt.Printf("SCONST %s\n", obj.Dconv(&emptyProg, 0, a))
+ p.expectOperandEnd()
+ return
+ }
+ a.Offset = int64(p.expr())
+ if p.peek() != '(' {
+ switch prefix {
+ case '$':
+ a.Type = obj.TYPE_CONST
+ case '*':
+ a.Type = obj.TYPE_INDIR // Can appear but is illegal, will be rejected by the linker.
+ default:
+ a.Type = obj.TYPE_MEM
+ }
+ // fmt.Printf("CONST %d %s\n", a.Offset, obj.Dconv(&emptyProg, 0, a))
+ p.expectOperandEnd()
+ return
+ }
+ // fmt.Printf("offset %d \n", a.Offset)
+ }
+
+ // Register indirection: (reg) or (index*scale). We are on the opening paren.
+ p.registerIndirect(a, prefix)
+ // fmt.Printf("DONE %s\n", p.arch.Dconv(&emptyProg, 0, a))
+
+ p.expectOperandEnd()
+ return
+}
+
+// atStartOfRegister reports whether the parser is at the start of a register definition.
+func (p *Parser) atStartOfRegister(name string) bool {
+ // Simple register: R10.
+ _, present := p.arch.Register[name]
+ if present {
+ return true
+ }
+ // Parenthesized register: R(10).
+ return p.arch.RegisterPrefix[name] && p.peek() == '('
+}
+
+// atRegisterShift reports whether we are at the start of an ARM shifted register.
+// We have consumed the register or R prefix.
+func (p *Parser) atRegisterShift() bool {
+ // ARM only.
+ if !p.arch.InFamily(sys.ARM, sys.ARM64) {
+ return false
+ }
+ // R1<<...
+ if lex.IsRegisterShift(p.peek()) {
+ return true
+ }
+ // R(1)<<... Ugly check. TODO: Rethink how we handle ARM register shifts to be
+ // less special.
+ if p.peek() != '(' || len(p.input)-p.inputPos < 4 {
+ return false
+ }
+ return p.at('(', scanner.Int, ')') && lex.IsRegisterShift(p.input[p.inputPos+3].ScanToken)
+}
+
+// atRegisterExtension reports whether we are at the start of an ARM64 extended register.
+// We have consumed the register or R prefix.
+func (p *Parser) atRegisterExtension() bool {
+ // ARM64 only.
+ if p.arch.Family != sys.ARM64 {
+ return false
+ }
+ // R1.xxx
+ if p.peek() == '.' {
+ return true
+ }
+ return false
+}
+
+// registerReference parses a register given either the name, R10, or a parenthesized form, SPR(10).
+func (p *Parser) registerReference(name string) (int16, bool) {
+ r, present := p.arch.Register[name]
+ if present {
+ return r, true
+ }
+ if !p.arch.RegisterPrefix[name] {
+ p.errorf("expected register; found %s", name)
+ return 0, false
+ }
+ p.get('(')
+ tok := p.get(scanner.Int)
+ num, err := strconv.ParseInt(tok.String(), 10, 16)
+ p.get(')')
+ if err != nil {
+ p.errorf("parsing register list: %s", err)
+ return 0, false
+ }
+ r, ok := p.arch.RegisterNumber(name, int16(num))
+ if !ok {
+ p.errorf("illegal register %s(%d)", name, r)
+ return 0, false
+ }
+ return r, true
+}
+
+// register parses a full register reference where there is no symbol present (as in 4(R0) or R(10) but not sym(SB))
+// including forms involving multiple registers such as R1:R2.
+func (p *Parser) register(name string, prefix rune) (r1, r2 int16, scale int8, ok bool) {
+ // R1 or R(1) R1:R2 R1,R2 R1+R2, or R1*scale.
+ r1, ok = p.registerReference(name)
+ if !ok {
+ return
+ }
+ if prefix != 0 && prefix != '*' { // *AX is OK.
+ p.errorf("prefix %c not allowed for register: %c%s", prefix, prefix, name)
+ }
+ c := p.peek()
+ if c == ':' || c == ',' || c == '+' {
+ // 2nd register; syntax (R1+R2) etc. No two architectures agree.
+ // Check the architectures match the syntax.
+ switch p.next().ScanToken {
+ case ',':
+ if !p.arch.InFamily(sys.ARM, sys.ARM64) {
+ p.errorf("(register,register) not supported on this architecture")
+ return
+ }
+ case '+':
+ if p.arch.Family != sys.PPC64 {
+ p.errorf("(register+register) not supported on this architecture")
+ return
+ }
+ }
+ name := p.next().String()
+ r2, ok = p.registerReference(name)
+ if !ok {
+ return
+ }
+ }
+ if p.peek() == '*' {
+ // Scale
+ p.next()
+ scale = p.parseScale(p.next().String())
+ }
+ return r1, r2, scale, true
+}
+
+// registerShift parses an ARM/ARM64 shifted register reference and returns the encoded representation.
+// There is known to be a register (current token) and a shift operator (peeked token).
+func (p *Parser) registerShift(name string, prefix rune) int64 {
+ if prefix != 0 {
+ p.errorf("prefix %c not allowed for shifted register: $%s", prefix, name)
+ }
+ // R1 op R2 or r1 op constant.
+ // op is:
+ // "<<" == 0
+ // ">>" == 1
+ // "->" == 2
+ // "@>" == 3
+ r1, ok := p.registerReference(name)
+ if !ok {
+ return 0
+ }
+ var op int16
+ switch p.next().ScanToken {
+ case lex.LSH:
+ op = 0
+ case lex.RSH:
+ op = 1
+ case lex.ARR:
+ op = 2
+ case lex.ROT:
+ // following instructions on ARM64 support rotate right
+ // AND, ANDS, TST, BIC, BICS, EON, EOR, ORR, MVN, ORN
+ op = 3
+ }
+ tok := p.next()
+ str := tok.String()
+ var count int16
+ switch tok.ScanToken {
+ case scanner.Ident:
+ if p.arch.Family == sys.ARM64 {
+ p.errorf("rhs of shift must be integer: %s", str)
+ } else {
+ r2, ok := p.registerReference(str)
+ if !ok {
+ p.errorf("rhs of shift must be register or integer: %s", str)
+ }
+ count = (r2&15)<<8 | 1<<4
+ }
+ case scanner.Int, '(':
+ p.back()
+ x := int64(p.expr())
+ if p.arch.Family == sys.ARM64 {
+ if x >= 64 {
+ p.errorf("register shift count too large: %s", str)
+ }
+ count = int16((x & 63) << 10)
+ } else {
+ if x >= 32 {
+ p.errorf("register shift count too large: %s", str)
+ }
+ count = int16((x & 31) << 7)
+ }
+ default:
+ p.errorf("unexpected %s in register shift", tok.String())
+ }
+ if p.arch.Family == sys.ARM64 {
+ off, err := arch.ARM64RegisterShift(r1, op, count)
+ if err != nil {
+ p.errorf(err.Error())
+ }
+ return off
+ } else {
+ return int64((r1 & 15) | op<<5 | count)
+ }
+}
+
+// registerExtension parses a register with extension or arrangement.
+// There is known to be a register (current token) and an extension operator (peeked token).
+func (p *Parser) registerExtension(a *obj.Addr, name string, prefix rune) {
+ if prefix != 0 {
+ p.errorf("prefix %c not allowed for shifted register: $%s", prefix, name)
+ }
+
+ reg, ok := p.registerReference(name)
+ if !ok {
+ p.errorf("unexpected %s in register extension", name)
+ return
+ }
+
+ isIndex := false
+ num := int16(0)
+ isAmount := true // Amount is zero by default
+ ext := ""
+ if p.peek() == lex.LSH {
+ // (Rn)(Rm<<2), the shifted offset register.
+ ext = "LSL"
+ } else {
+ // (Rn)(Rm.UXTW<1), the extended offset register.
+ // Rm.UXTW<<3, the extended register.
+ p.get('.')
+ tok := p.next()
+ ext = tok.String()
+ }
+ if p.peek() == lex.LSH {
+ // parses left shift amount applied after extension: <<Amount
+ p.get(lex.LSH)
+ tok := p.get(scanner.Int)
+ amount, err := strconv.ParseInt(tok.String(), 10, 16)
+ if err != nil {
+ p.errorf("parsing left shift amount: %s", err)
+ }
+ num = int16(amount)
+ } else if p.peek() == '[' {
+ // parses an element: [Index]
+ p.get('[')
+ tok := p.get(scanner.Int)
+ index, err := strconv.ParseInt(tok.String(), 10, 16)
+ p.get(']')
+ if err != nil {
+ p.errorf("parsing element index: %s", err)
+ }
+ isIndex = true
+ isAmount = false
+ num = int16(index)
+ }
+
+ switch p.arch.Family {
+ case sys.ARM64:
+ err := arch.ARM64RegisterExtension(a, ext, reg, num, isAmount, isIndex)
+ if err != nil {
+ p.errorf(err.Error())
+ }
+ default:
+ p.errorf("register extension not supported on this architecture")
+ }
+}
+
+// symbolReference parses a symbol that is known not to be a register.
+func (p *Parser) symbolReference(a *obj.Addr, name string, prefix rune) {
+ // Identifier is a name.
+ switch prefix {
+ case 0:
+ a.Type = obj.TYPE_MEM
+ case '$':
+ a.Type = obj.TYPE_ADDR
+ case '*':
+ a.Type = obj.TYPE_INDIR
+ }
+
+ // Parse optional <> (indicates a static symbol) or
+ // <ABIxxx> (selecting text symbol with specific ABI).
+ doIssueError := true
+ isStatic, abi := p.symRefAttrs(name, doIssueError)
+
+ if p.peek() == '+' || p.peek() == '-' {
+ a.Offset = int64(p.expr())
+ }
+ if isStatic {
+ a.Sym = p.ctxt.LookupStatic(name)
+ } else {
+ a.Sym = p.ctxt.LookupABI(name, abi)
+ }
+ if p.peek() == scanner.EOF {
+ if prefix == 0 && p.isJump {
+ // Symbols without prefix or suffix are jump labels.
+ return
+ }
+ p.errorf("illegal or missing addressing mode for symbol %s", name)
+ return
+ }
+ // Expect (SB), (FP), (PC), or (SP)
+ p.get('(')
+ reg := p.get(scanner.Ident).String()
+ p.get(')')
+ p.setPseudoRegister(a, reg, isStatic, prefix)
+}
+
+// setPseudoRegister sets the NAME field of addr for a pseudo-register reference such as (SB).
+func (p *Parser) setPseudoRegister(addr *obj.Addr, reg string, isStatic bool, prefix rune) {
+ if addr.Reg != 0 {
+ p.errorf("internal error: reg %s already set in pseudo", reg)
+ }
+ switch reg {
+ case "FP":
+ addr.Name = obj.NAME_PARAM
+ case "PC":
+ if prefix != 0 {
+ p.errorf("illegal addressing mode for PC")
+ }
+ addr.Type = obj.TYPE_BRANCH // We set the type and leave NAME untouched. See asmJump.
+ case "SB":
+ addr.Name = obj.NAME_EXTERN
+ if isStatic {
+ addr.Name = obj.NAME_STATIC
+ }
+ case "SP":
+ addr.Name = obj.NAME_AUTO // The pseudo-stack.
+ default:
+ p.errorf("expected pseudo-register; found %s", reg)
+ }
+ if prefix == '$' {
+ addr.Type = obj.TYPE_ADDR
+ }
+}
+
+// symRefAttrs parses an optional function symbol attribute clause for
+// the function symbol 'name', logging an error for a malformed
+// attribute clause if 'issueError' is true. The return value is a
+// (boolean, ABI) pair indicating that the named symbol is either
+// static or a particular ABI specification.
+//
+// The expected form of the attribute clause is:
+//
+// empty, yielding (false, obj.ABI0)
+// "<>", yielding (true, obj.ABI0)
+// "<ABI0>" yielding (false, obj.ABI0)
+// "<ABIInternal>" yielding (false, obj.ABIInternal)
+//
+// Anything else beginning with "<" logs an error if issueError is
+// true, otherwise returns (false, obj.ABI0).
+//
+func (p *Parser) symRefAttrs(name string, issueError bool) (bool, obj.ABI) {
+ abi := obj.ABI0
+ isStatic := false
+ if p.peek() != '<' {
+ return isStatic, abi
+ }
+ p.next()
+ tok := p.peek()
+ if tok == '>' {
+ isStatic = true
+ } else if tok == scanner.Ident {
+ abistr := p.get(scanner.Ident).String()
+ if !p.compilingRuntime {
+ if issueError {
+ p.errorf("ABI selector only permitted when compiling runtime, reference was to %q", name)
+ }
+ } else {
+ theabi, valid := obj.ParseABI(abistr)
+ if !valid {
+ if issueError {
+ p.errorf("malformed ABI selector %q in reference to %q",
+ abistr, name)
+ }
+ } else {
+ abi = theabi
+ }
+ }
+ }
+ p.get('>')
+ return isStatic, abi
+}
+
+// funcAddress parses an external function address. This is a
+// constrained form of the operand syntax that's always SB-based,
+// non-static, and has at most a simple integer offset:
+//
+// [$|*]sym[<abi>][+Int](SB)
+func (p *Parser) funcAddress() (string, obj.ABI, bool) {
+ switch p.peek() {
+ case '$', '*':
+ // Skip prefix.
+ p.next()
+ }
+
+ tok := p.next()
+ name := tok.String()
+ if tok.ScanToken != scanner.Ident || p.atStartOfRegister(name) {
+ return "", obj.ABI0, false
+ }
+ // Parse optional <> (indicates a static symbol) or
+ // <ABIxxx> (selecting text symbol with specific ABI).
+ noErrMsg := false
+ isStatic, abi := p.symRefAttrs(name, noErrMsg)
+ if isStatic {
+ return "", obj.ABI0, false // This function rejects static symbols.
+ }
+ tok = p.next()
+ if tok.ScanToken == '+' {
+ if p.next().ScanToken != scanner.Int {
+ return "", obj.ABI0, false
+ }
+ tok = p.next()
+ }
+ if tok.ScanToken != '(' {
+ return "", obj.ABI0, false
+ }
+ if reg := p.next(); reg.ScanToken != scanner.Ident || reg.String() != "SB" {
+ return "", obj.ABI0, false
+ }
+ if p.next().ScanToken != ')' || p.peek() != scanner.EOF {
+ return "", obj.ABI0, false
+ }
+ return name, abi, true
+}
+
+// registerIndirect parses the general form of a register indirection.
+// It is can be (R1), (R2*scale), (R1)(R2*scale), (R1)(R2.SXTX<<3) or (R1)(R2<<3)
+// where R1 may be a simple register or register pair R:R or (R, R) or (R+R).
+// Or it might be a pseudo-indirection like (FP).
+// We are sitting on the opening parenthesis.
+func (p *Parser) registerIndirect(a *obj.Addr, prefix rune) {
+ p.get('(')
+ tok := p.next()
+ name := tok.String()
+ r1, r2, scale, ok := p.register(name, 0)
+ if !ok {
+ p.errorf("indirect through non-register %s", tok)
+ }
+ p.get(')')
+ a.Type = obj.TYPE_MEM
+ if r1 < 0 {
+ // Pseudo-register reference.
+ if r2 != 0 {
+ p.errorf("cannot use pseudo-register in pair")
+ return
+ }
+ // For SB, SP, and FP, there must be a name here. 0(FP) is not legal.
+ if name != "PC" && a.Name == obj.NAME_NONE {
+ p.errorf("cannot reference %s without a symbol", name)
+ }
+ p.setPseudoRegister(a, name, false, prefix)
+ return
+ }
+ a.Reg = r1
+ if r2 != 0 {
+ // TODO: Consistency in the encoding would be nice here.
+ if p.arch.InFamily(sys.ARM, sys.ARM64) {
+ // Special form
+ // ARM: destination register pair (R1, R2).
+ // ARM64: register pair (R1, R2) for LDP/STP.
+ if prefix != 0 || scale != 0 {
+ p.errorf("illegal address mode for register pair")
+ return
+ }
+ a.Type = obj.TYPE_REGREG
+ a.Offset = int64(r2)
+ // Nothing may follow
+ return
+ }
+ if p.arch.Family == sys.PPC64 {
+ // Special form for PPC64: (R1+R2); alias for (R1)(R2*1).
+ if prefix != 0 || scale != 0 {
+ p.errorf("illegal address mode for register+register")
+ return
+ }
+ a.Type = obj.TYPE_MEM
+ a.Scale = 1
+ a.Index = r2
+ // Nothing may follow.
+ return
+ }
+ }
+ if r2 != 0 {
+ p.errorf("indirect through register pair")
+ }
+ if prefix == '$' {
+ a.Type = obj.TYPE_ADDR
+ }
+ if r1 == arch.RPC && prefix != 0 {
+ p.errorf("illegal addressing mode for PC")
+ }
+ if scale == 0 && p.peek() == '(' {
+ // General form (R)(R*scale).
+ p.next()
+ tok := p.next()
+ if p.atRegisterExtension() {
+ p.registerExtension(a, tok.String(), prefix)
+ } else if p.atRegisterShift() {
+ // (R1)(R2<<3)
+ p.registerExtension(a, tok.String(), prefix)
+ } else {
+ r1, r2, scale, ok = p.register(tok.String(), 0)
+ if !ok {
+ p.errorf("indirect through non-register %s", tok)
+ }
+ if r2 != 0 {
+ p.errorf("unimplemented two-register form")
+ }
+ a.Index = r1
+ if scale != 0 && scale != 1 && p.arch.Family == sys.ARM64 {
+ // Support (R1)(R2) (no scaling) and (R1)(R2*1).
+ p.errorf("arm64 doesn't support scaled register format")
+ } else {
+ a.Scale = int16(scale)
+ }
+ }
+ p.get(')')
+ } else if scale != 0 {
+ if p.arch.Family == sys.ARM64 {
+ p.errorf("arm64 doesn't support scaled register format")
+ }
+ // First (R) was missing, all we have is (R*scale).
+ a.Reg = 0
+ a.Index = r1
+ a.Scale = int16(scale)
+ }
+}
+
+// registerList parses an ARM or ARM64 register list expression, a list of
+// registers in []. There may be comma-separated ranges or individual
+// registers, as in [R1,R3-R5] or [V1.S4, V2.S4, V3.S4, V4.S4].
+// For ARM, only R0 through R15 may appear.
+// For ARM64, V0 through V31 with arrangement may appear.
+//
+// For 386/AMD64 register list specifies 4VNNIW-style multi-source operand.
+// For range of 4 elements, Intel manual uses "+3" notation, for example:
+// VP4DPWSSDS zmm1{k1}{z}, zmm2+3, m128
+// Given asm line:
+// VP4DPWSSDS Z5, [Z10-Z13], (AX)
+// zmm2 is Z10, and Z13 is the only valid value for it (Z10+3).
+// Only simple ranges are accepted, like [Z0-Z3].
+//
+// The opening bracket has been consumed.
+func (p *Parser) registerList(a *obj.Addr) {
+ if p.arch.InFamily(sys.I386, sys.AMD64) {
+ p.registerListX86(a)
+ } else {
+ p.registerListARM(a)
+ }
+}
+
+func (p *Parser) registerListARM(a *obj.Addr) {
+ // One range per loop.
+ var maxReg int
+ var bits uint16
+ var arrangement int64
+ switch p.arch.Family {
+ case sys.ARM:
+ maxReg = 16
+ case sys.ARM64:
+ maxReg = 32
+ default:
+ p.errorf("unexpected register list")
+ }
+ firstReg := -1
+ nextReg := -1
+ regCnt := 0
+ListLoop:
+ for {
+ tok := p.next()
+ switch tok.ScanToken {
+ case ']':
+ break ListLoop
+ case scanner.EOF:
+ p.errorf("missing ']' in register list")
+ return
+ }
+ switch p.arch.Family {
+ case sys.ARM64:
+ // Vn.T
+ name := tok.String()
+ r, ok := p.registerReference(name)
+ if !ok {
+ p.errorf("invalid register: %s", name)
+ }
+ reg := r - p.arch.Register["V0"]
+ p.get('.')
+ tok := p.next()
+ ext := tok.String()
+ curArrangement, err := arch.ARM64RegisterArrangement(reg, name, ext)
+ if err != nil {
+ p.errorf(err.Error())
+ }
+ if firstReg == -1 {
+ // only record the first register and arrangement
+ firstReg = int(reg)
+ nextReg = firstReg
+ arrangement = curArrangement
+ } else if curArrangement != arrangement {
+ p.errorf("inconsistent arrangement in ARM64 register list")
+ } else if nextReg != int(reg) {
+ p.errorf("incontiguous register in ARM64 register list: %s", name)
+ }
+ regCnt++
+ nextReg = (nextReg + 1) % 32
+ case sys.ARM:
+ // Parse the upper and lower bounds.
+ lo := p.registerNumber(tok.String())
+ hi := lo
+ if p.peek() == '-' {
+ p.next()
+ hi = p.registerNumber(p.next().String())
+ }
+ if hi < lo {
+ lo, hi = hi, lo
+ }
+ // Check there are no duplicates in the register list.
+ for i := 0; lo <= hi && i < maxReg; i++ {
+ if bits&(1<<lo) != 0 {
+ p.errorf("register R%d already in list", lo)
+ }
+ bits |= 1 << lo
+ lo++
+ }
+ default:
+ p.errorf("unexpected register list")
+ }
+ if p.peek() != ']' {
+ p.get(',')
+ }
+ }
+ a.Type = obj.TYPE_REGLIST
+ switch p.arch.Family {
+ case sys.ARM:
+ a.Offset = int64(bits)
+ case sys.ARM64:
+ offset, err := arch.ARM64RegisterListOffset(firstReg, regCnt, arrangement)
+ if err != nil {
+ p.errorf(err.Error())
+ }
+ a.Offset = offset
+ default:
+ p.errorf("register list not supported on this architecuture")
+ }
+}
+
+func (p *Parser) registerListX86(a *obj.Addr) {
+ // Accept only [RegA-RegB] syntax.
+ // Don't use p.get() to provide better error messages.
+
+ loName := p.next().String()
+ lo, ok := p.arch.Register[loName]
+ if !ok {
+ if loName == "EOF" {
+ p.errorf("register list: expected ']', found EOF")
+ } else {
+ p.errorf("register list: bad low register in `[%s`", loName)
+ }
+ return
+ }
+ if tok := p.next().ScanToken; tok != '-' {
+ p.errorf("register list: expected '-' after `[%s`, found %s", loName, tok)
+ return
+ }
+ hiName := p.next().String()
+ hi, ok := p.arch.Register[hiName]
+ if !ok {
+ p.errorf("register list: bad high register in `[%s-%s`", loName, hiName)
+ return
+ }
+ if tok := p.next().ScanToken; tok != ']' {
+ p.errorf("register list: expected ']' after `[%s-%s`, found %s", loName, hiName, tok)
+ }
+
+ a.Type = obj.TYPE_REGLIST
+ a.Reg = lo
+ a.Offset = x86.EncodeRegisterRange(lo, hi)
+}
+
+// register number is ARM-specific. It returns the number of the specified register.
+func (p *Parser) registerNumber(name string) uint16 {
+ if p.arch.Family == sys.ARM && name == "g" {
+ return 10
+ }
+ if name[0] != 'R' {
+ p.errorf("expected g or R0 through R15; found %s", name)
+ return 0
+ }
+ r, ok := p.registerReference(name)
+ if !ok {
+ return 0
+ }
+ reg := r - p.arch.Register["R0"]
+ if reg < 0 {
+ // Could happen for an architecture having other registers prefixed by R
+ p.errorf("expected g or R0 through R15; found %s", name)
+ return 0
+ }
+ return uint16(reg)
+}
+
+// Note: There are two changes in the expression handling here
+// compared to the old yacc/C implementations. Neither has
+// much practical consequence because the expressions we
+// see in assembly code are simple, but for the record:
+//
+// 1) Evaluation uses uint64; the old one used int64.
+// 2) Precedence uses Go rules not C rules.
+
+// expr = term | term ('+' | '-' | '|' | '^') term.
+func (p *Parser) expr() uint64 {
+ value := p.term()
+ for {
+ switch p.peek() {
+ case '+':
+ p.next()
+ value += p.term()
+ case '-':
+ p.next()
+ value -= p.term()
+ case '|':
+ p.next()
+ value |= p.term()
+ case '^':
+ p.next()
+ value ^= p.term()
+ default:
+ return value
+ }
+ }
+}
+
+// floatExpr = fconst | '-' floatExpr | '+' floatExpr | '(' floatExpr ')'
+func (p *Parser) floatExpr() float64 {
+ tok := p.next()
+ switch tok.ScanToken {
+ case '(':
+ v := p.floatExpr()
+ if p.next().ScanToken != ')' {
+ p.errorf("missing closing paren")
+ }
+ return v
+ case '+':
+ return +p.floatExpr()
+ case '-':
+ return -p.floatExpr()
+ case scanner.Float:
+ return p.atof(tok.String())
+ }
+ p.errorf("unexpected %s evaluating float expression", tok)
+ return 0
+}
+
+// term = factor | factor ('*' | '/' | '%' | '>>' | '<<' | '&') factor
+func (p *Parser) term() uint64 {
+ value := p.factor()
+ for {
+ switch p.peek() {
+ case '*':
+ p.next()
+ value *= p.factor()
+ case '/':
+ p.next()
+ if int64(value) < 0 {
+ p.errorf("divide of value with high bit set")
+ }
+ divisor := p.factor()
+ if divisor == 0 {
+ p.errorf("division by zero")
+ } else {
+ value /= divisor
+ }
+ case '%':
+ p.next()
+ divisor := p.factor()
+ if int64(value) < 0 {
+ p.errorf("modulo of value with high bit set")
+ }
+ if divisor == 0 {
+ p.errorf("modulo by zero")
+ } else {
+ value %= divisor
+ }
+ case lex.LSH:
+ p.next()
+ shift := p.factor()
+ if int64(shift) < 0 {
+ p.errorf("negative left shift count")
+ }
+ return value << shift
+ case lex.RSH:
+ p.next()
+ shift := p.term()
+ if int64(shift) < 0 {
+ p.errorf("negative right shift count")
+ }
+ if int64(value) < 0 {
+ p.errorf("right shift of value with high bit set")
+ }
+ value >>= shift
+ case '&':
+ p.next()
+ value &= p.factor()
+ default:
+ return value
+ }
+ }
+}
+
+// factor = const | '+' factor | '-' factor | '~' factor | '(' expr ')'
+func (p *Parser) factor() uint64 {
+ tok := p.next()
+ switch tok.ScanToken {
+ case scanner.Int:
+ return p.atoi(tok.String())
+ case scanner.Char:
+ str, err := strconv.Unquote(tok.String())
+ if err != nil {
+ p.errorf("%s", err)
+ }
+ r, w := utf8.DecodeRuneInString(str)
+ if w == 1 && r == utf8.RuneError {
+ p.errorf("illegal UTF-8 encoding for character constant")
+ }
+ return uint64(r)
+ case '+':
+ return +p.factor()
+ case '-':
+ return -p.factor()
+ case '~':
+ return ^p.factor()
+ case '(':
+ v := p.expr()
+ if p.next().ScanToken != ')' {
+ p.errorf("missing closing paren")
+ }
+ return v
+ }
+ p.errorf("unexpected %s evaluating expression", tok)
+ return 0
+}
+
+// positiveAtoi returns an int64 that must be >= 0.
+func (p *Parser) positiveAtoi(str string) int64 {
+ value, err := strconv.ParseInt(str, 0, 64)
+ if err != nil {
+ p.errorf("%s", err)
+ }
+ if value < 0 {
+ p.errorf("%s overflows int64", str)
+ }
+ return value
+}
+
+func (p *Parser) atoi(str string) uint64 {
+ value, err := strconv.ParseUint(str, 0, 64)
+ if err != nil {
+ p.errorf("%s", err)
+ }
+ return value
+}
+
+func (p *Parser) atof(str string) float64 {
+ value, err := strconv.ParseFloat(str, 64)
+ if err != nil {
+ p.errorf("%s", err)
+ }
+ return value
+}
+
+// EOF represents the end of input.
+var EOF = lex.Make(scanner.EOF, "EOF")
+
+func (p *Parser) next() lex.Token {
+ if !p.more() {
+ return EOF
+ }
+ tok := p.input[p.inputPos]
+ p.inputPos++
+ return tok
+}
+
+func (p *Parser) back() {
+ if p.inputPos == 0 {
+ p.errorf("internal error: backing up before BOL")
+ } else {
+ p.inputPos--
+ }
+}
+
+func (p *Parser) peek() lex.ScanToken {
+ if p.more() {
+ return p.input[p.inputPos].ScanToken
+ }
+ return scanner.EOF
+}
+
+func (p *Parser) more() bool {
+ return p.inputPos < len(p.input)
+}
+
+// get verifies that the next item has the expected type and returns it.
+func (p *Parser) get(expected lex.ScanToken) lex.Token {
+ p.expect(expected, expected.String())
+ return p.next()
+}
+
+// expectOperandEnd verifies that the parsing state is properly at the end of an operand.
+func (p *Parser) expectOperandEnd() {
+ p.expect(scanner.EOF, "end of operand")
+}
+
+// expect verifies that the next item has the expected type. It does not consume it.
+func (p *Parser) expect(expectedToken lex.ScanToken, expectedMessage string) {
+ if p.peek() != expectedToken {
+ p.errorf("expected %s, found %s", expectedMessage, p.next())
+ }
+}
+
+// have reports whether the remaining tokens (including the current one) contain the specified token.
+func (p *Parser) have(token lex.ScanToken) bool {
+ for i := p.inputPos; i < len(p.input); i++ {
+ if p.input[i].ScanToken == token {
+ return true
+ }
+ }
+ return false
+}
+
+// at reports whether the next tokens are as requested.
+func (p *Parser) at(next ...lex.ScanToken) bool {
+ if len(p.input)-p.inputPos < len(next) {
+ return false
+ }
+ for i, r := range next {
+ if p.input[p.inputPos+i].ScanToken != r {
+ return false
+ }
+ }
+ return true
+}
diff --git a/src/cmd/asm/internal/asm/pseudo_test.go b/src/cmd/asm/internal/asm/pseudo_test.go
new file mode 100644
index 0000000..fe6ffa6
--- /dev/null
+++ b/src/cmd/asm/internal/asm/pseudo_test.go
@@ -0,0 +1,103 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package asm
+
+import (
+ "bytes"
+ "strings"
+ "testing"
+
+ "cmd/asm/internal/lex"
+)
+
+func tokenize(s string) [][]lex.Token {
+ res := [][]lex.Token{}
+ if len(s) == 0 {
+ return res
+ }
+ for _, o := range strings.Split(s, ",") {
+ res = append(res, lex.Tokenize(o))
+ }
+ return res
+}
+
+func TestErroneous(t *testing.T) {
+
+ type errtest struct {
+ pseudo string
+ operands string
+ expected string
+ }
+
+ nonRuntimeTests := []errtest{
+ {"TEXT", "", "expect two or three operands for TEXT"},
+ {"TEXT", "%", "expect two or three operands for TEXT"},
+ {"TEXT", "1, 1", "TEXT symbol \"<erroneous symbol>\" must be a symbol(SB)"},
+ {"TEXT", "$\"foo\", 0, $1", "TEXT symbol \"<erroneous symbol>\" must be a symbol(SB)"},
+ {"TEXT", "$0É:0, 0, $1", "expected end of operand, found É"}, // Issue #12467.
+ {"TEXT", "$:0:(SB, 0, $1", "expected '(', found 0"}, // Issue 12468.
+ {"TEXT", "@B(SB),0,$0", "expected '(', found B"}, // Issue 23580.
+ {"TEXT", "foo<ABIInternal>(SB),0", "ABI selector only permitted when compiling runtime, reference was to \"foo\""},
+ {"FUNCDATA", "", "expect two operands for FUNCDATA"},
+ {"FUNCDATA", "(SB ", "expect two operands for FUNCDATA"},
+ {"DATA", "", "expect two operands for DATA"},
+ {"DATA", "0", "expect two operands for DATA"},
+ {"DATA", "(0), 1", "expect /size for DATA argument"},
+ {"DATA", "@B(SB)/4,0", "expected '(', found B"}, // Issue 23580.
+ {"DATA", "·A(SB)/4,0", "DATA value must be an immediate constant or address"},
+ {"DATA", "·B(SB)/4,$0", ""},
+ {"DATA", "·C(SB)/5,$0", "bad int size for DATA argument: 5"},
+ {"DATA", "·D(SB)/5,$0.0", "bad float size for DATA argument: 5"},
+ {"DATA", "·E(SB)/4,$·A(SB)", "bad addr size for DATA argument: 4"},
+ {"DATA", "·F(SB)/8,$·A(SB)", ""},
+ {"DATA", "·G(SB)/5,$\"abcde\"", ""},
+ {"GLOBL", "", "expect two or three operands for GLOBL"},
+ {"GLOBL", "0,1", "GLOBL symbol \"<erroneous symbol>\" must be a symbol(SB)"},
+ {"GLOBL", "@B(SB), 0", "expected '(', found B"}, // Issue 23580.
+ {"PCDATA", "", "expect two operands for PCDATA"},
+ {"PCDATA", "1", "expect two operands for PCDATA"},
+ }
+
+ runtimeTests := []errtest{
+ {"TEXT", "foo<ABIInternal>(SB),0", "TEXT \"foo\": ABIInternal requires NOSPLIT"},
+ }
+
+ testcats := []struct {
+ compilingRuntime bool
+ tests []errtest
+ }{
+ {
+ compilingRuntime: false,
+ tests: nonRuntimeTests,
+ },
+ {
+ compilingRuntime: true,
+ tests: runtimeTests,
+ },
+ }
+
+ // Note these errors should be independent of the architecture.
+ // Just run the test with amd64.
+ parser := newParser("amd64")
+ var buf bytes.Buffer
+ parser.errorWriter = &buf
+
+ for _, cat := range testcats {
+ for _, test := range cat.tests {
+ parser.compilingRuntime = cat.compilingRuntime
+ parser.errorCount = 0
+ parser.lineNum++
+ if !parser.pseudo(test.pseudo, tokenize(test.operands)) {
+ t.Fatalf("Wrong pseudo-instruction: %s", test.pseudo)
+ }
+ errorLine := buf.String()
+ if test.expected != errorLine {
+ t.Errorf("Unexpected error %q; expected %q", errorLine, test.expected)
+ }
+ buf.Reset()
+ }
+ }
+
+}
diff --git a/src/cmd/asm/internal/asm/testdata/386.s b/src/cmd/asm/internal/asm/testdata/386.s
new file mode 100644
index 0000000..e0855f5
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/386.s
@@ -0,0 +1,98 @@
+// This input was created by taking the instruction productions in
+// the old assembler's (8a's) grammar and hand-writing complete
+// instructions for each rule, to guarantee we cover the same space.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT foo(SB), DUPOK|NOSPLIT, $0
+
+// LTYPE1 nonrem { outcode(int($1), &$2); }
+ SETCC AX
+ SETCC foo+4(SB)
+
+// LTYPE2 rimnon { outcode(int($1), &$2); }
+ DIVB AX
+ DIVB foo+4(SB)
+ PUSHL $foo+4(SB)
+ POPL AX
+
+// LTYPE3 rimrem { outcode(int($1), &$2); }
+ SUBB $1, AX
+ SUBB $1, foo+4(SB)
+ SUBB BX, AX
+ SUBB BX, foo+4(SB)
+
+// LTYPE4 remrim { outcode(int($1), &$2); }
+ CMPB AX, $1
+ CMPB foo+4(SB), $4
+ CMPB BX, AX
+ CMPB foo+4(SB), BX
+
+// LTYPER nonrel { outcode(int($1), &$2); }
+label:
+ JC label // JCS
+ JC -1(PC) // JCS -1(PC)
+
+// LTYPEC spec3 { outcode(int($1), &$2); }
+ CALL AX
+ JCS 2(PC)
+ JMP *AX // JMP AX
+ CALL *foo(SB)
+ JCS 2(PC)
+ JMP $4
+ JCS 2(PC)
+ JMP label // JMP 16
+ CALL foo(SB)
+// CALL (AX*4) // TODO: This line is silently dropped on the floor!
+ CALL foo+4(SB)(AX*4)
+ CALL *4(SP) // CALL 4(SP)
+ CALL *(AX) // CALL (AX)
+ CALL *(SP) // CALL (SP)
+// CALL *(AX*4) // TODO: This line is silently dropped on the floor!
+ CALL *(AX)(AX*4) // CALL (AX)(AX*4)
+ CALL 4(SP)
+ CALL (AX)
+ CALL (SP)
+// CALL (AX*4) // TODO: This line is silently dropped on the floor!
+ JCS 2(PC)
+ JMP (AX)(AX*4)
+
+// LTYPEN spec4 { outcode(int($1), &$2); }
+ NOP
+ NOP AX
+ NOP foo+4(SB)
+
+// LTYPES spec5 { outcode(int($1), &$2); }
+ SHLL $4, BX
+ SHLL $4, foo+4(SB)
+ SHLL $4, foo+4(SB):AX // SHLL $4, AX, foo+4(SB)
+
+// LTYPEM spec6 { outcode(int($1), &$2); }
+ MOVL AX, BX
+ MOVL $4, BX
+
+// LTYPEI spec7 { outcode(int($1), &$2); }
+ IMULL AX
+ IMULL $4, CX
+ IMULL AX, BX
+
+// LTYPEXC spec9 { outcode(int($1), &$2); }
+ CMPPD X0, X1, 4
+ CMPPD foo+4(SB), X1, 4
+
+// LTYPEX spec10 { outcode(int($1), &$2); }
+ PINSRD $1, (AX), X0
+ PINSRD $2, foo+4(FP), X0
+
+// Was bug: LOOP is a branch instruction.
+ JCS 2(PC)
+loop:
+ LOOP loop // LOOP
+
+// Tests for TLS reference.
+ MOVL (TLS), AX
+ MOVL 8(TLS), DX
+
+// LTYPE0 nonnon { outcode(int($1), &$2); }
+ RET
+ RET foo(SB)
diff --git a/src/cmd/asm/internal/asm/testdata/386enc.s b/src/cmd/asm/internal/asm/testdata/386enc.s
new file mode 100644
index 0000000..4af6de3
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/386enc.s
@@ -0,0 +1,37 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT asmtest(SB),DUPOK|NOSPLIT,$0
+ // Instructions that were encoded with BYTE sequences.
+ // Included to simplify validation of CL that fixed that.
+ MOVQ (AX), M0 // 0f6f00
+ MOVQ M0, 8(SP) // 0f7f442408
+ MOVQ 8(SP), M0 // 0f6f442408
+ MOVQ M0, (AX) // 0f7f00
+ MOVQ M0, (BX) // 0f7f03
+ // On non-64bit arch, Go asm allowed uint32 offsets instead of int32.
+ // These tests check that property for backwards-compatibility.
+ MOVL 2147483648(AX), AX // 8b8000000080
+ MOVL -2147483648(AX), AX // 8b8000000080
+ ADDL 2147483648(AX), AX // 038000000080
+ ADDL -2147483648(AX), AX // 038000000080
+ // Make sure MOV CR/DR continues to work after changing its movtabs.
+ MOVL CR0, AX // 0f20c0
+ MOVL CR0, DX // 0f20c2
+ MOVL CR4, DI // 0f20e7
+ MOVL AX, CR0 // 0f22c0
+ MOVL DX, CR0 // 0f22c2
+ MOVL DI, CR4 // 0f22e7
+ MOVL DR0, AX // 0f21c0
+ MOVL DR6, DX // 0f21f2
+ MOVL DR7, SI // 0f21fe
+ // Test other movtab entries.
+ PUSHL SS // 16
+ PUSHL FS // 0fa0
+ POPL FS // 0fa1
+ POPL SS // 17
+ // End of tests.
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/amd64.s b/src/cmd/asm/internal/asm/testdata/amd64.s
new file mode 100644
index 0000000..1dec7f4
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/amd64.s
@@ -0,0 +1,152 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This input was created by taking the instruction productions in
+// the old assembler's (6a's) grammar and hand-writing complete
+// instructions for each rule, to guarantee we cover the same space.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT foo(SB), DUPOK|NOSPLIT, $0
+
+// LTYPE1 nonrem { outcode($1, &$2); }
+ NEGQ R11
+ NEGQ 4(R11)
+ NEGQ foo+4(SB)
+
+// LTYPE2 rimnon { outcode($1, &$2); }
+ INT $4
+ DIVB R11
+ DIVB 4(R11)
+ DIVB foo+4(SB)
+
+// LTYPE3 rimrem { outcode($1, &$2); }
+ SUBQ $4, DI
+ SUBQ R11, DI
+ SUBQ 4(R11), DI
+ SUBQ foo+4(SB), DI
+ SUBQ $4, 8(R12)
+ SUBQ R11, 8(R12)
+ SUBQ R11, foo+4(SB)
+
+// LTYPE4 remrim { outcode($1, &$2); }
+ CMPB CX, $4
+
+// LTYPER nonrel { outcode($1, &$2); }
+label:
+ JB -4(PC) // JCS -4(PC)
+ JB label // JCS 17
+
+// LTYPEC spec3 { outcode($1, &$2); }
+ JCS 2(PC)
+ JMP -4(PC)
+ JCS 2(PC)
+ JMP label // JMP 17
+ JCS 2(PC)
+ JMP foo+4(SB)
+ JCS 2(PC)
+ JMP bar<>+4(SB)
+ JCS 2(PC)
+ JMP bar<>+4(SB)(R11*4)
+ JCS 2(PC)
+ JMP *4(SP) // JMP 4(SP)
+ JCS 2(PC)
+ JMP *(R12) // JMP (R12)
+ JCS 2(PC)
+// JMP *(R12*4) // TODO: This line is silently dropped on the floor!
+ JCS 2(PC)
+ JMP *(R12)(R13*4) // JMP (R12)(R13*4)
+ JCS 2(PC)
+ JMP *(AX) // JMP (AX)
+ JCS 2(PC)
+ JMP *(SP) // JMP (SP)
+ JCS 2(PC)
+// JMP *(AX*4) // TODO: This line is silently dropped on the floor!
+ JCS 2(PC)
+ JMP *(AX)(AX*4) // JMP (AX)(AX*4)
+ JCS 2(PC)
+ JMP 4(SP)
+ JCS 2(PC)
+ JMP (R12)
+ JCS 2(PC)
+// JMP (R12*4) // TODO: This line is silently dropped on the floor!
+ JCS 2(PC)
+ JMP (R12)(R13*4)
+ JCS 2(PC)
+ JMP (AX)
+ JCS 2(PC)
+ JMP (SP)
+ JCS 2(PC)
+// JMP (AX*4) // TODO: This line is silently dropped on the floor!
+ JCS 2(PC)
+ JMP (AX)(AX*4)
+ JCS 2(PC)
+ JMP R13
+
+// LTYPEN spec4 { outcode($1, &$2); }
+ NOP
+ NOP AX
+ NOP foo+4(SB)
+
+// LTYPES spec5 { outcode($1, &$2); }
+ SHLL CX, R12
+ SHLL CX, foo+4(SB)
+ // Old syntax, still accepted:
+ SHLL CX, R11:AX // SHLL CX, AX, R11
+
+// LTYPEM spec6 { outcode($1, &$2); }
+ MOVL AX, R11
+ MOVL $4, R11
+// MOVL AX, 0(AX):DS // no longer works - did it ever?
+
+// LTYPEI spec7 { outcode($1, &$2); }
+ IMULB DX
+ IMULW DX, BX
+ IMULL R11, R12
+ IMULQ foo+4(SB), R11
+
+// LTYPEXC spec8 { outcode($1, &$2); }
+ CMPPD X1, X2, 4
+ CMPPD foo+4(SB), X2, 4
+
+// LTYPEX spec9 { outcode($1, &$2); }
+ PINSRW $4, AX, X2
+ PINSRW $4, foo+4(SB), X2
+
+// LTYPERT spec10 { outcode($1, &$2); }
+ JCS 2(PC)
+ RETFL $4
+
+// Was bug: LOOP is a branch instruction.
+ JCS 2(PC)
+loop:
+ LOOP loop // LOOP
+
+ // Intel pseudonyms for our own renamings.
+ PADDD M2, M1 // PADDL M2, M1
+ MOVDQ2Q X1, M1 // MOVQ X1, M1
+ MOVNTDQ X1, (AX) // MOVNTO X1, (AX)
+ MOVOA (AX), X1 // MOVO (AX), X1
+
+// Tests for SP indexed addresses.
+ MOVQ foo(SP)(AX*1), BX // 488b1c04
+ MOVQ foo+32(SP)(CX*2), DX // 488b544c20
+ MOVQ foo+32323(SP)(R8*4), R9 // 4e8b8c84437e0000
+ MOVL foo(SP)(SI*8), DI // 8b3cf4
+ MOVL foo+32(SP)(R10*1), R11 // 468b5c1420
+ MOVL foo+32323(SP)(R12*2), R13 // 468bac64437e0000
+ MOVW foo(SP)(AX*4), R8 // 66448b0484
+ MOVW foo+32(SP)(R9*8), CX // 66428b4ccc20
+ MOVW foo+32323(SP)(AX*1), DX // 668b9404437e0000
+ MOVB foo(SP)(AX*2), AL // 8a0444
+ MOVB foo+32(SP)(CX*4), AH // 8a648c20
+ MOVB foo+32323(SP)(CX*8), R9 // 448a8ccc437e0000
+
+// Tests for TLS reference.
+ MOVQ (TLS), AX
+ MOVQ 8(TLS), DX
+
+// LTYPE0 nonnon { outcode($1, &$2); }
+ RET // c3
+ RET foo(SB)
diff --git a/src/cmd/asm/internal/asm/testdata/amd64dynlinkerror.s b/src/cmd/asm/internal/asm/testdata/amd64dynlinkerror.s
new file mode 100644
index 0000000..1eee1a1
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/amd64dynlinkerror.s
@@ -0,0 +1,68 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test to make sure that if we use R15 after it is clobbered by
+// a global variable access while dynamic linking, we get an error.
+// See issue 43661.
+
+TEXT ·a1(SB), 0, $0-0
+ CMPL runtime·writeBarrier(SB), $0
+ MOVL $0, R15
+ RET
+TEXT ·a2(SB), 0, $0-0
+ CMPL runtime·writeBarrier(SB), $0
+ MOVQ $0, R15
+ RET
+TEXT ·a3(SB), 0, $0-0
+ CMPL runtime·writeBarrier(SB), $0
+ XORL R15, R15
+ RET
+TEXT ·a4(SB), 0, $0-0
+ CMPL runtime·writeBarrier(SB), $0
+ XORQ R15, R15
+ RET
+TEXT ·a5(SB), 0, $0-0
+ CMPL runtime·writeBarrier(SB), $0
+ XORL R15, R15
+ RET
+TEXT ·a6(SB), 0, $0-0
+ CMPL runtime·writeBarrier(SB), $0
+ POPQ R15
+ PUSHQ R15
+ RET
+TEXT ·a7(SB), 0, $0-0
+ CMPL runtime·writeBarrier(SB), $0
+ MOVQ R15, AX // ERROR "when dynamic linking, R15 is clobbered by a global variable access and is used here"
+ RET
+TEXT ·a8(SB), 0, $0-0
+ CMPL runtime·writeBarrier(SB), $0
+ ADDQ AX, R15 // ERROR "when dynamic linking, R15 is clobbered by a global variable access and is used here"
+ RET
+TEXT ·a9(SB), 0, $0-0
+ CMPL runtime·writeBarrier(SB), $0
+ ORQ R15, R15 // ERROR "when dynamic linking, R15 is clobbered by a global variable access and is used here"
+ RET
+TEXT ·a10(SB), 0, $0-0
+ CMPL runtime·writeBarrier(SB), $0
+ JEQ one
+ ORQ R15, R15 // ERROR "when dynamic linking, R15 is clobbered by a global variable access and is used here"
+one:
+ RET
+TEXT ·a11(SB), 0, $0-0
+ CMPL runtime·writeBarrier(SB), $0
+ JEQ one
+ JMP two
+one:
+ ORQ R15, R15 // ERROR "when dynamic linking, R15 is clobbered by a global variable access and is used here"
+two:
+ RET
+TEXT ·a12(SB), 0, $0-0
+ CMPL runtime·writeBarrier(SB), $0
+ JMP one
+two:
+ ORQ R15, R15
+ RET
+one:
+ MOVL $0, R15
+ JMP two
diff --git a/src/cmd/asm/internal/asm/testdata/amd64enc.s b/src/cmd/asm/internal/asm/testdata/amd64enc.s
new file mode 100644
index 0000000..5bba292
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/amd64enc.s
@@ -0,0 +1,10700 @@
+// generated by x86test -amd64
+// DO NOT EDIT
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT asmtest(SB),DUPOK|NOSPLIT,$0
+ ADCB $7, AL // 1407
+ ADCW $61731, AX // 661523f1
+ ADCL $4045620583, AX // 15674523f1
+ ADCQ $-249346713, AX // 4815674523f1
+ ADCW $61731, (BX) // 66811323f1
+ ADCW $61731, (R11) // 6641811323f1
+ ADCW $61731, DX // 6681d223f1
+ ADCW $61731, R11 // 664181d323f1
+ ADCW $7, (BX) // 66831307
+ ADCW $7, (R11) // 6641831307
+ ADCW $7, DX // 6683d207
+ ADCW $7, R11 // 664183d307
+ ADCW DX, (BX) // 661113
+ ADCW R11, (BX) // 6644111b
+ ADCW DX, (R11) // 66411113
+ ADCW R11, (R11) // 6645111b
+ ADCW DX, DX // 6611d2 or 6613d2
+ ADCW R11, DX // 664411da or 664113d3
+ ADCW DX, R11 // 664111d3 or 664413da
+ ADCW R11, R11 // 664511db or 664513db
+ ADCL $4045620583, (BX) // 8113674523f1
+ ADCL $4045620583, (R11) // 418113674523f1
+ ADCL $4045620583, DX // 81d2674523f1
+ ADCL $4045620583, R11 // 4181d3674523f1
+ ADCL $7, (BX) // 831307
+ ADCL $7, (R11) // 41831307
+ ADCL $7, DX // 83d207
+ ADCL $7, R11 // 4183d307
+ ADCL DX, (BX) // 1113
+ ADCL R11, (BX) // 44111b
+ ADCL DX, (R11) // 411113
+ ADCL R11, (R11) // 45111b
+ ADCL DX, DX // 11d2 or 13d2
+ ADCL R11, DX // 4411da or 4113d3
+ ADCL DX, R11 // 4111d3 or 4413da
+ ADCL R11, R11 // 4511db or 4513db
+ ADCQ $-249346713, (BX) // 488113674523f1
+ ADCQ $-249346713, (R11) // 498113674523f1
+ ADCQ $-249346713, DX // 4881d2674523f1
+ ADCQ $-249346713, R11 // 4981d3674523f1
+ ADCQ $7, (BX) // 48831307
+ ADCQ $7, (R11) // 49831307
+ ADCQ $7, DX // 4883d207
+ ADCQ $7, R11 // 4983d307
+ ADCQ DX, (BX) // 481113
+ ADCQ R11, (BX) // 4c111b
+ ADCQ DX, (R11) // 491113
+ ADCQ R11, (R11) // 4d111b
+ ADCQ DX, DX // 4811d2 or 4813d2
+ ADCQ R11, DX // 4c11da or 4913d3
+ ADCQ DX, R11 // 4911d3 or 4c13da
+ ADCQ R11, R11 // 4d11db or 4d13db
+ ADCB $7, (BX) // 801307
+ ADCB $7, (R11) // 41801307
+ ADCB $7, DL // 80d207
+ ADCB $7, R11 // 4180d307
+ ADCB DL, (BX) // 1013
+ ADCB R11, (BX) // 44101b
+ ADCB DL, (R11) // 411013
+ ADCB R11, (R11) // 45101b
+ ADCB DL, DL // 10d2 or 12d2
+ ADCB R11, DL // 4410da or 4112d3
+ ADCB DL, R11 // 4110d3 or 4412da
+ ADCB R11, R11 // 4510db or 4512db
+ ADCW (BX), DX // 661313
+ ADCW (R11), DX // 66411313
+ ADCW (BX), R11 // 6644131b
+ ADCW (R11), R11 // 6645131b
+ ADCL (BX), DX // 1313
+ ADCL (R11), DX // 411313
+ ADCL (BX), R11 // 44131b
+ ADCL (R11), R11 // 45131b
+ ADCQ (BX), DX // 481313
+ ADCQ (R11), DX // 491313
+ ADCQ (BX), R11 // 4c131b
+ ADCQ (R11), R11 // 4d131b
+ ADCB (BX), DL // 1213
+ ADCB (R11), DL // 411213
+ ADCB (BX), R11 // 44121b
+ ADCB (R11), R11 // 45121b
+ ADCXL (BX), DX // 660f38f613
+ ADCXL (R11), DX // 66410f38f613
+ ADCXL DX, DX // 660f38f6d2
+ ADCXL R11, DX // 66410f38f6d3
+ ADCXL (BX), R11 // 66440f38f61b
+ ADCXL (R11), R11 // 66450f38f61b
+ ADCXL DX, R11 // 66440f38f6da
+ ADCXL R11, R11 // 66450f38f6db
+ ADCXQ (BX), DX // 66480f38f613
+ ADCXQ (R11), DX // 66490f38f613
+ ADCXQ DX, DX // 66480f38f6d2
+ ADCXQ R11, DX // 66490f38f6d3
+ ADCXQ (BX), R11 // 664c0f38f61b
+ ADCXQ (R11), R11 // 664d0f38f61b
+ ADCXQ DX, R11 // 664c0f38f6da
+ ADCXQ R11, R11 // 664d0f38f6db
+ ADDB $7, AL // 0407
+ ADDW $61731, AX // 660523f1
+ ADDL $4045620583, AX // 05674523f1
+ ADDQ $-249346713, AX // 4805674523f1
+ ADDW $61731, (BX) // 66810323f1
+ ADDW $61731, (R11) // 6641810323f1
+ ADDW $61731, DX // 6681c223f1
+ ADDW $61731, R11 // 664181c323f1
+ ADDW $7, (BX) // 66830307
+ ADDW $7, (R11) // 6641830307
+ ADDW $7, DX // 6683c207
+ ADDW $7, R11 // 664183c307
+ ADDW DX, (BX) // 660113
+ ADDW R11, (BX) // 6644011b
+ ADDW DX, (R11) // 66410113
+ ADDW R11, (R11) // 6645011b
+ ADDW DX, DX // 6601d2 or 6603d2
+ ADDW R11, DX // 664401da or 664103d3
+ ADDW DX, R11 // 664101d3 or 664403da
+ ADDW R11, R11 // 664501db or 664503db
+ ADDL $4045620583, (BX) // 8103674523f1
+ ADDL $4045620583, (R11) // 418103674523f1
+ ADDL $4045620583, DX // 81c2674523f1
+ ADDL $4045620583, R11 // 4181c3674523f1
+ ADDL $7, (BX) // 830307
+ ADDL $7, (R11) // 41830307
+ ADDL $7, DX // 83c207
+ ADDL $7, R11 // 4183c307
+ ADDL DX, (BX) // 0113
+ ADDL R11, (BX) // 44011b
+ ADDL DX, (R11) // 410113
+ ADDL R11, (R11) // 45011b
+ ADDL DX, DX // 01d2 or 03d2
+ ADDL R11, DX // 4401da or 4103d3
+ ADDL DX, R11 // 4101d3 or 4403da
+ ADDL R11, R11 // 4501db or 4503db
+ ADDQ $-249346713, (BX) // 488103674523f1
+ ADDQ $-249346713, (R11) // 498103674523f1
+ ADDQ $-249346713, DX // 4881c2674523f1
+ ADDQ $-249346713, R11 // 4981c3674523f1
+ ADDQ $7, (BX) // 48830307
+ ADDQ $7, (R11) // 49830307
+ ADDQ $7, DX // 4883c207
+ ADDQ $7, R11 // 4983c307
+ ADDQ DX, (BX) // 480113
+ ADDQ R11, (BX) // 4c011b
+ ADDQ DX, (R11) // 490113
+ ADDQ R11, (R11) // 4d011b
+ ADDQ DX, DX // 4801d2 or 4803d2
+ ADDQ R11, DX // 4c01da or 4903d3
+ ADDQ DX, R11 // 4901d3 or 4c03da
+ ADDQ R11, R11 // 4d01db or 4d03db
+ ADDB $7, (BX) // 800307
+ ADDB $7, (R11) // 41800307
+ ADDB $7, DL // 80c207
+ ADDB $7, R11 // 4180c307
+ ADDB DL, (BX) // 0013
+ ADDB R11, (BX) // 44001b
+ ADDB DL, (R11) // 410013
+ ADDB R11, (R11) // 45001b
+ ADDB DL, DL // 00d2 or 02d2
+ ADDB R11, DL // 4400da or 4102d3
+ ADDB DL, R11 // 4100d3 or 4402da
+ ADDB R11, R11 // 4500db or 4502db
+ ADDW (BX), DX // 660313
+ ADDW (R11), DX // 66410313
+ ADDW (BX), R11 // 6644031b
+ ADDW (R11), R11 // 6645031b
+ ADDL (BX), DX // 0313
+ ADDL (R11), DX // 410313
+ ADDL (BX), R11 // 44031b
+ ADDL (R11), R11 // 45031b
+ ADDQ (BX), DX // 480313
+ ADDQ (R11), DX // 490313
+ ADDQ (BX), R11 // 4c031b
+ ADDQ (R11), R11 // 4d031b
+ ADDB (BX), DL // 0213
+ ADDB (R11), DL // 410213
+ ADDB (BX), R11 // 44021b
+ ADDB (R11), R11 // 45021b
+ ADDPD (BX), X2 // 660f5813
+ ADDPD (R11), X2 // 66410f5813
+ ADDPD X2, X2 // 660f58d2
+ ADDPD X11, X2 // 66410f58d3
+ ADDPD (BX), X11 // 66440f581b
+ ADDPD (R11), X11 // 66450f581b
+ ADDPD X2, X11 // 66440f58da
+ ADDPD X11, X11 // 66450f58db
+ ADDPS (BX), X2 // 0f5813
+ ADDPS (R11), X2 // 410f5813
+ ADDPS X2, X2 // 0f58d2
+ ADDPS X11, X2 // 410f58d3
+ ADDPS (BX), X11 // 440f581b
+ ADDPS (R11), X11 // 450f581b
+ ADDPS X2, X11 // 440f58da
+ ADDPS X11, X11 // 450f58db
+ ADDSD (BX), X2 // f20f5813
+ ADDSD (R11), X2 // f2410f5813
+ ADDSD X2, X2 // f20f58d2
+ ADDSD X11, X2 // f2410f58d3
+ ADDSD (BX), X11 // f2440f581b
+ ADDSD (R11), X11 // f2450f581b
+ ADDSD X2, X11 // f2440f58da
+ ADDSD X11, X11 // f2450f58db
+ ADDSS (BX), X2 // f30f5813
+ ADDSS (R11), X2 // f3410f5813
+ ADDSS X2, X2 // f30f58d2
+ ADDSS X11, X2 // f3410f58d3
+ ADDSS (BX), X11 // f3440f581b
+ ADDSS (R11), X11 // f3450f581b
+ ADDSS X2, X11 // f3440f58da
+ ADDSS X11, X11 // f3450f58db
+ ADDSUBPD (BX), X2 // 660fd013
+ ADDSUBPD (R11), X2 // 66410fd013
+ ADDSUBPD X2, X2 // 660fd0d2
+ ADDSUBPD X11, X2 // 66410fd0d3
+ ADDSUBPD (BX), X11 // 66440fd01b
+ ADDSUBPD (R11), X11 // 66450fd01b
+ ADDSUBPD X2, X11 // 66440fd0da
+ ADDSUBPD X11, X11 // 66450fd0db
+ ADDSUBPS (BX), X2 // f20fd013
+ ADDSUBPS (R11), X2 // f2410fd013
+ ADDSUBPS X2, X2 // f20fd0d2
+ ADDSUBPS X11, X2 // f2410fd0d3
+ ADDSUBPS (BX), X11 // f2440fd01b
+ ADDSUBPS (R11), X11 // f2450fd01b
+ ADDSUBPS X2, X11 // f2440fd0da
+ ADDSUBPS X11, X11 // f2450fd0db
+ ADOXL (BX), DX // f30f38f613
+ ADOXL (R11), DX // f3410f38f613
+ ADOXL DX, DX // f30f38f6d2
+ ADOXL R11, DX // f3410f38f6d3
+ ADOXL (BX), R11 // f3440f38f61b
+ ADOXL (R11), R11 // f3450f38f61b
+ ADOXL DX, R11 // f3440f38f6da
+ ADOXL R11, R11 // f3450f38f6db
+ ADOXQ (BX), DX // f3480f38f613
+ ADOXQ (R11), DX // f3490f38f613
+ ADOXQ DX, DX // f3480f38f6d2
+ ADOXQ R11, DX // f3490f38f6d3
+ ADOXQ (BX), R11 // f34c0f38f61b
+ ADOXQ (R11), R11 // f34d0f38f61b
+ ADOXQ DX, R11 // f34c0f38f6da
+ ADOXQ R11, R11 // f34d0f38f6db
+ AESDEC (BX), X2 // 660f38de13
+ AESDEC (R11), X2 // 66410f38de13
+ AESDEC X2, X2 // 660f38ded2
+ AESDEC X11, X2 // 66410f38ded3
+ AESDEC (BX), X11 // 66440f38de1b
+ AESDEC (R11), X11 // 66450f38de1b
+ AESDEC X2, X11 // 66440f38deda
+ AESDEC X11, X11 // 66450f38dedb
+ AESDECLAST (BX), X2 // 660f38df13
+ AESDECLAST (R11), X2 // 66410f38df13
+ AESDECLAST X2, X2 // 660f38dfd2
+ AESDECLAST X11, X2 // 66410f38dfd3
+ AESDECLAST (BX), X11 // 66440f38df1b
+ AESDECLAST (R11), X11 // 66450f38df1b
+ AESDECLAST X2, X11 // 66440f38dfda
+ AESDECLAST X11, X11 // 66450f38dfdb
+ AESENC (BX), X2 // 660f38dc13
+ AESENC (R11), X2 // 66410f38dc13
+ AESENC X2, X2 // 660f38dcd2
+ AESENC X11, X2 // 66410f38dcd3
+ AESENC (BX), X11 // 66440f38dc1b
+ AESENC (R11), X11 // 66450f38dc1b
+ AESENC X2, X11 // 66440f38dcda
+ AESENC X11, X11 // 66450f38dcdb
+ AESENCLAST (BX), X2 // 660f38dd13
+ AESENCLAST (R11), X2 // 66410f38dd13
+ AESENCLAST X2, X2 // 660f38ddd2
+ AESENCLAST X11, X2 // 66410f38ddd3
+ AESENCLAST (BX), X11 // 66440f38dd1b
+ AESENCLAST (R11), X11 // 66450f38dd1b
+ AESENCLAST X2, X11 // 66440f38ddda
+ AESENCLAST X11, X11 // 66450f38dddb
+ AESIMC (BX), X2 // 660f38db13
+ AESIMC (R11), X2 // 66410f38db13
+ AESIMC X2, X2 // 660f38dbd2
+ AESIMC X11, X2 // 66410f38dbd3
+ AESIMC (BX), X11 // 66440f38db1b
+ AESIMC (R11), X11 // 66450f38db1b
+ AESIMC X2, X11 // 66440f38dbda
+ AESIMC X11, X11 // 66450f38dbdb
+ AESKEYGENASSIST $7, (BX), X2 // 660f3adf1307
+ AESKEYGENASSIST $7, (R11), X2 // 66410f3adf1307
+ AESKEYGENASSIST $7, X2, X2 // 660f3adfd207
+ AESKEYGENASSIST $7, X11, X2 // 66410f3adfd307
+ AESKEYGENASSIST $7, (BX), X11 // 66440f3adf1b07
+ AESKEYGENASSIST $7, (R11), X11 // 66450f3adf1b07
+ AESKEYGENASSIST $7, X2, X11 // 66440f3adfda07
+ AESKEYGENASSIST $7, X11, X11 // 66450f3adfdb07
+ ANDB $7, AL // 2407
+ ANDW $61731, AX // 662523f1
+ ANDL $4045620583, AX // 25674523f1
+ ANDQ $-249346713, AX // 4825674523f1
+ ANDW $61731, (BX) // 66812323f1
+ ANDW $61731, (R11) // 6641812323f1
+ ANDW $61731, DX // 6681e223f1
+ ANDW $61731, R11 // 664181e323f1
+ ANDW $7, (BX) // 66832307
+ ANDW $7, (R11) // 6641832307
+ ANDW $7, DX // 6683e207
+ ANDW $7, R11 // 664183e307
+ ANDW DX, (BX) // 662113
+ ANDW R11, (BX) // 6644211b
+ ANDW DX, (R11) // 66412113
+ ANDW R11, (R11) // 6645211b
+ ANDW DX, DX // 6621d2 or 6623d2
+ ANDW R11, DX // 664421da or 664123d3
+ ANDW DX, R11 // 664121d3 or 664423da
+ ANDW R11, R11 // 664521db or 664523db
+ ANDL $4045620583, (BX) // 8123674523f1
+ ANDL $4045620583, (R11) // 418123674523f1
+ ANDL $4045620583, DX // 81e2674523f1
+ ANDL $4045620583, R11 // 4181e3674523f1
+ ANDL $7, (BX) // 832307
+ ANDL $7, (R11) // 41832307
+ ANDL $7, DX // 83e207
+ ANDL $7, R11 // 4183e307
+ ANDL DX, (BX) // 2113
+ ANDL R11, (BX) // 44211b
+ ANDL DX, (R11) // 412113
+ ANDL R11, (R11) // 45211b
+ ANDL DX, DX // 21d2 or 23d2
+ ANDL R11, DX // 4421da or 4123d3
+ ANDL DX, R11 // 4121d3 or 4423da
+ ANDL R11, R11 // 4521db or 4523db
+ ANDQ $-249346713, (BX) // 488123674523f1
+ ANDQ $-249346713, (R11) // 498123674523f1
+ ANDQ $-249346713, DX // 4881e2674523f1
+ ANDQ $-249346713, R11 // 4981e3674523f1
+ ANDQ $7, (BX) // 48832307
+ ANDQ $7, (R11) // 49832307
+ ANDQ $7, DX // 4883e207
+ ANDQ $7, R11 // 4983e307
+ ANDQ DX, (BX) // 482113
+ ANDQ R11, (BX) // 4c211b
+ ANDQ DX, (R11) // 492113
+ ANDQ R11, (R11) // 4d211b
+ ANDQ DX, DX // 4821d2 or 4823d2
+ ANDQ R11, DX // 4c21da or 4923d3
+ ANDQ DX, R11 // 4921d3 or 4c23da
+ ANDQ R11, R11 // 4d21db or 4d23db
+ ANDB $7, (BX) // 802307
+ ANDB $7, (R11) // 41802307
+ ANDB $7, DL // 80e207
+ ANDB $7, R11 // 4180e307
+ ANDB DL, (BX) // 2013
+ ANDB R11, (BX) // 44201b
+ ANDB DL, (R11) // 412013
+ ANDB R11, (R11) // 45201b
+ ANDB DL, DL // 20d2 or 22d2
+ ANDB R11, DL // 4420da or 4122d3
+ ANDB DL, R11 // 4120d3 or 4422da
+ ANDB R11, R11 // 4520db or 4522db
+ ANDW (BX), DX // 662313
+ ANDW (R11), DX // 66412313
+ ANDW (BX), R11 // 6644231b
+ ANDW (R11), R11 // 6645231b
+ ANDL (BX), DX // 2313
+ ANDL (R11), DX // 412313
+ ANDL (BX), R11 // 44231b
+ ANDL (R11), R11 // 45231b
+ ANDQ (BX), DX // 482313
+ ANDQ (R11), DX // 492313
+ ANDQ (BX), R11 // 4c231b
+ ANDQ (R11), R11 // 4d231b
+ ANDB (BX), DL // 2213
+ ANDB (R11), DL // 412213
+ ANDB (BX), R11 // 44221b
+ ANDB (R11), R11 // 45221b
+ ANDNL (BX), R9, DX // c4e230f213
+ ANDNL (R11), R9, DX // c4c230f213
+ ANDNL DX, R9, DX // c4e230f2d2
+ ANDNL R11, R9, DX // c4c230f2d3
+ ANDNL (BX), R9, R11 // c46230f21b
+ ANDNL (R11), R9, R11 // c44230f21b
+ ANDNL DX, R9, R11 // c46230f2da
+ ANDNL R11, R9, R11 // c44230f2db
+ ANDNQ (BX), R14, DX // c4e288f213
+ ANDNQ (R11), R14, DX // c4c288f213
+ ANDNQ DX, R14, DX // c4e288f2d2
+ ANDNQ R11, R14, DX // c4c288f2d3
+ ANDNQ (BX), R14, R11 // c46288f21b
+ ANDNQ (R11), R14, R11 // c44288f21b
+ ANDNQ DX, R14, R11 // c46288f2da
+ ANDNQ R11, R14, R11 // c44288f2db
+ ANDNPD (BX), X2 // 660f5513
+ ANDNPD (R11), X2 // 66410f5513
+ ANDNPD X2, X2 // 660f55d2
+ ANDNPD X11, X2 // 66410f55d3
+ ANDNPD (BX), X11 // 66440f551b
+ ANDNPD (R11), X11 // 66450f551b
+ ANDNPD X2, X11 // 66440f55da
+ ANDNPD X11, X11 // 66450f55db
+ ANDNPS (BX), X2 // 0f5513
+ ANDNPS (R11), X2 // 410f5513
+ ANDNPS X2, X2 // 0f55d2
+ ANDNPS X11, X2 // 410f55d3
+ ANDNPS (BX), X11 // 440f551b
+ ANDNPS (R11), X11 // 450f551b
+ ANDNPS X2, X11 // 440f55da
+ ANDNPS X11, X11 // 450f55db
+ ANDPD (BX), X2 // 660f5413
+ ANDPD (R11), X2 // 66410f5413
+ ANDPD X2, X2 // 660f54d2
+ ANDPD X11, X2 // 66410f54d3
+ ANDPD (BX), X11 // 66440f541b
+ ANDPD (R11), X11 // 66450f541b
+ ANDPD X2, X11 // 66440f54da
+ ANDPD X11, X11 // 66450f54db
+ ANDPS (BX), X2 // 0f5413
+ ANDPS (R11), X2 // 410f5413
+ ANDPS X2, X2 // 0f54d2
+ ANDPS X11, X2 // 410f54d3
+ ANDPS (BX), X11 // 440f541b
+ ANDPS (R11), X11 // 450f541b
+ ANDPS X2, X11 // 440f54da
+ ANDPS X11, X11 // 450f54db
+ BEXTRL R9, (BX), DX // c4e230f713
+ BEXTRL R9, (R11), DX // c4c230f713
+ BEXTRL R9, DX, DX // c4e230f7d2
+ BEXTRL R9, R11, DX // c4c230f7d3
+ BEXTRL R9, (BX), R11 // c46230f71b
+ BEXTRL R9, (R11), R11 // c44230f71b
+ BEXTRL R9, DX, R11 // c46230f7da
+ BEXTRL R9, R11, R11 // c44230f7db
+ BEXTRQ R14, (BX), DX // c4e288f713
+ BEXTRQ R14, (R11), DX // c4c288f713
+ BEXTRQ R14, DX, DX // c4e288f7d2
+ BEXTRQ R14, R11, DX // c4c288f7d3
+ BEXTRQ R14, (BX), R11 // c46288f71b
+ BEXTRQ R14, (R11), R11 // c44288f71b
+ BEXTRQ R14, DX, R11 // c46288f7da
+ BEXTRQ R14, R11, R11 // c44288f7db
+ BLENDPD $7, (BX), X2 // 660f3a0d1307
+ BLENDPD $7, (R11), X2 // 66410f3a0d1307
+ BLENDPD $7, X2, X2 // 660f3a0dd207
+ BLENDPD $7, X11, X2 // 66410f3a0dd307
+ BLENDPD $7, (BX), X11 // 66440f3a0d1b07
+ BLENDPD $7, (R11), X11 // 66450f3a0d1b07
+ BLENDPD $7, X2, X11 // 66440f3a0dda07
+ BLENDPD $7, X11, X11 // 66450f3a0ddb07
+ BLENDPS $7, (BX), X2 // 660f3a0c1307
+ BLENDPS $7, (R11), X2 // 66410f3a0c1307
+ BLENDPS $7, X2, X2 // 660f3a0cd207
+ BLENDPS $7, X11, X2 // 66410f3a0cd307
+ BLENDPS $7, (BX), X11 // 66440f3a0c1b07
+ BLENDPS $7, (R11), X11 // 66450f3a0c1b07
+ BLENDPS $7, X2, X11 // 66440f3a0cda07
+ BLENDPS $7, X11, X11 // 66450f3a0cdb07
+ BLENDVPD X0, (BX), X2 // 660f381513
+ BLENDVPD X0, (R11), X2 // 66410f381513
+ BLENDVPD X0, X2, X2 // 660f3815d2
+ BLENDVPD X0, X11, X2 // 66410f3815d3
+ BLENDVPD X0, (BX), X11 // 66440f38151b
+ BLENDVPD X0, (R11), X11 // 66450f38151b
+ BLENDVPD X0, X2, X11 // 66440f3815da
+ BLENDVPD X0, X11, X11 // 66450f3815db
+ BLENDVPS X0, (BX), X2 // 660f381413
+ BLENDVPS X0, (R11), X2 // 66410f381413
+ BLENDVPS X0, X2, X2 // 660f3814d2
+ BLENDVPS X0, X11, X2 // 66410f3814d3
+ BLENDVPS X0, (BX), X11 // 66440f38141b
+ BLENDVPS X0, (R11), X11 // 66450f38141b
+ BLENDVPS X0, X2, X11 // 66440f3814da
+ BLENDVPS X0, X11, X11 // 66450f3814db
+ BLSIL (BX), R9 // c4e230f31b
+ BLSIL (R11), R9 // c4c230f31b
+ BLSIL DX, R9 // c4e230f3da
+ BLSIL R11, R9 // c4c230f3db
+ BLSIQ (BX), R14 // c4e288f31b
+ BLSIQ (R11), R14 // c4c288f31b
+ BLSIQ DX, R14 // c4e288f3da
+ BLSIQ R11, R14 // c4c288f3db
+ BLSMSKL (BX), R9 // c4e230f313
+ BLSMSKL (R11), R9 // c4c230f313
+ BLSMSKL DX, R9 // c4e230f3d2
+ BLSMSKL R11, R9 // c4c230f3d3
+ BLSMSKQ (BX), R14 // c4e288f313
+ BLSMSKQ (R11), R14 // c4c288f313
+ BLSMSKQ DX, R14 // c4e288f3d2
+ BLSMSKQ R11, R14 // c4c288f3d3
+ BLSRL (BX), R9 // c4e230f30b
+ BLSRL (R11), R9 // c4c230f30b
+ BLSRL DX, R9 // c4e230f3ca
+ BLSRL R11, R9 // c4c230f3cb
+ BLSRQ (BX), R14 // c4e288f30b
+ BLSRQ (R11), R14 // c4c288f30b
+ BLSRQ DX, R14 // c4e288f3ca
+ BLSRQ R11, R14 // c4c288f3cb
+ //TODO: BNDCL (BX), BND2 // f30f1a13
+ //TODO: BNDCL (R11), BND2 // f3410f1a13
+ //TODO: BNDCL DX, BND2 // f30f1ad2
+ //TODO: BNDCL R11, BND2 // f3410f1ad3
+ //TODO: BNDCL (BX), BND3 // f30f1a1b
+ //TODO: BNDCL (R11), BND3 // f3410f1a1b
+ //TODO: BNDCL DX, BND3 // f30f1ada
+ //TODO: BNDCL R11, BND3 // f3410f1adb
+ //TODO: BNDCN (BX), BND2 // f20f1b13
+ //TODO: BNDCN (R11), BND2 // f2410f1b13
+ //TODO: BNDCN DX, BND2 // f20f1bd2
+ //TODO: BNDCN R11, BND2 // f2410f1bd3
+ //TODO: BNDCN (BX), BND3 // f20f1b1b
+ //TODO: BNDCN (R11), BND3 // f2410f1b1b
+ //TODO: BNDCN DX, BND3 // f20f1bda
+ //TODO: BNDCN R11, BND3 // f2410f1bdb
+ //TODO: BNDCU (BX), BND2 // f20f1a13
+ //TODO: BNDCU (R11), BND2 // f2410f1a13
+ //TODO: BNDCU DX, BND2 // f20f1ad2
+ //TODO: BNDCU R11, BND2 // f2410f1ad3
+ //TODO: BNDCU (BX), BND3 // f20f1a1b
+ //TODO: BNDCU (R11), BND3 // f2410f1a1b
+ //TODO: BNDCU DX, BND3 // f20f1ada
+ //TODO: BNDCU R11, BND3 // f2410f1adb
+ //TODO: BNDLDX (BX), BND2 // 0f1a13
+ //TODO: BNDLDX (R11), BND2 // 410f1a13
+ //TODO: BNDLDX (BX), BND3 // 0f1a1b
+ //TODO: BNDLDX (R11), BND3 // 410f1a1b
+ //TODO: BNDMK (BX), BND2 // f30f1b13
+ //TODO: BNDMK (R11), BND2 // f3410f1b13
+ //TODO: BNDMK (BX), BND3 // f30f1b1b
+ //TODO: BNDMK (R11), BND3 // f3410f1b1b
+ //TODO: BNDMOV (BX), BND2 // 660f1a13
+ //TODO: BNDMOV (R11), BND2 // 66410f1a13
+ //TODO: BNDMOV BND2, BND2 // 660f1ad2 or 660f1bd2
+ //TODO: BNDMOV BND3, BND2 // 660f1ad3 or 660f1bda
+ //TODO: BNDMOV (BX), BND3 // 660f1a1b
+ //TODO: BNDMOV (R11), BND3 // 66410f1a1b
+ //TODO: BNDMOV BND2, BND3 // 660f1ada or 660f1bd3
+ //TODO: BNDMOV BND3, BND3 // 660f1adb or 660f1bdb
+ //TODO: BNDMOV BND2, (BX) // 660f1b13
+ //TODO: BNDMOV BND3, (BX) // 660f1b1b
+ //TODO: BNDMOV BND2, (R11) // 66410f1b13
+ //TODO: BNDMOV BND3, (R11) // 66410f1b1b
+ //TODO: BNDSTX BND2, (BX) // 0f1b13
+ //TODO: BNDSTX BND3, (BX) // 0f1b1b
+ //TODO: BNDSTX BND2, (R11) // 410f1b13
+ //TODO: BNDSTX BND3, (R11) // 410f1b1b
+ BSFW (BX), DX // 660fbc13
+ BSFW (R11), DX // 66410fbc13
+ BSFW DX, DX // 660fbcd2
+ BSFW R11, DX // 66410fbcd3
+ BSFW (BX), R11 // 66440fbc1b
+ BSFW (R11), R11 // 66450fbc1b
+ BSFW DX, R11 // 66440fbcda
+ BSFW R11, R11 // 66450fbcdb
+ BSFL (BX), DX // 0fbc13
+ BSFL (R11), DX // 410fbc13
+ BSFL DX, DX // 0fbcd2
+ BSFL R11, DX // 410fbcd3
+ BSFL (BX), R11 // 440fbc1b
+ BSFL (R11), R11 // 450fbc1b
+ BSFL DX, R11 // 440fbcda
+ BSFL R11, R11 // 450fbcdb
+ BSFQ (BX), DX // 480fbc13
+ BSFQ (R11), DX // 490fbc13
+ BSFQ DX, DX // 480fbcd2
+ BSFQ R11, DX // 490fbcd3
+ BSFQ (BX), R11 // 4c0fbc1b
+ BSFQ (R11), R11 // 4d0fbc1b
+ BSFQ DX, R11 // 4c0fbcda
+ BSFQ R11, R11 // 4d0fbcdb
+ BSRW (BX), DX // 660fbd13
+ BSRW (R11), DX // 66410fbd13
+ BSRW DX, DX // 660fbdd2
+ BSRW R11, DX // 66410fbdd3
+ BSRW (BX), R11 // 66440fbd1b
+ BSRW (R11), R11 // 66450fbd1b
+ BSRW DX, R11 // 66440fbdda
+ BSRW R11, R11 // 66450fbddb
+ BSRL (BX), DX // 0fbd13
+ BSRL (R11), DX // 410fbd13
+ BSRL DX, DX // 0fbdd2
+ BSRL R11, DX // 410fbdd3
+ BSRL (BX), R11 // 440fbd1b
+ BSRL (R11), R11 // 450fbd1b
+ BSRL DX, R11 // 440fbdda
+ BSRL R11, R11 // 450fbddb
+ BSRQ (BX), DX // 480fbd13
+ BSRQ (R11), DX // 490fbd13
+ BSRQ DX, DX // 480fbdd2
+ BSRQ R11, DX // 490fbdd3
+ BSRQ (BX), R11 // 4c0fbd1b
+ BSRQ (R11), R11 // 4d0fbd1b
+ BSRQ DX, R11 // 4c0fbdda
+ BSRQ R11, R11 // 4d0fbddb
+ BSWAPL DX // 0fca
+ BSWAPL R11 // 410fcb
+ BSWAPQ DX // 480fca
+ BSWAPQ R11 // 490fcb
+ BTW $7, (BX) // 660fba2307
+ BTW $7, (R11) // 66410fba2307
+ BTW $7, DX // 660fbae207
+ BTW $7, R11 // 66410fbae307
+ BTW DX, (BX) // 660fa313
+ BTW R11, (BX) // 66440fa31b
+ BTW DX, (R11) // 66410fa313
+ BTW R11, (R11) // 66450fa31b
+ BTW DX, DX // 660fa3d2
+ BTW R11, DX // 66440fa3da
+ BTW DX, R11 // 66410fa3d3
+ BTW R11, R11 // 66450fa3db
+ BTL $7, (BX) // 0fba2307
+ BTL $7, (R11) // 410fba2307
+ BTL $7, DX // 0fbae207
+ BTL $7, R11 // 410fbae307
+ BTL DX, (BX) // 0fa313
+ BTL R11, (BX) // 440fa31b
+ BTL DX, (R11) // 410fa313
+ BTL R11, (R11) // 450fa31b
+ BTL DX, DX // 0fa3d2
+ BTL R11, DX // 440fa3da
+ BTL DX, R11 // 410fa3d3
+ BTL R11, R11 // 450fa3db
+ BTQ $7, (BX) // 480fba2307
+ BTQ $7, (R11) // 490fba2307
+ BTQ $7, DX // 480fbae207
+ BTQ $7, R11 // 490fbae307
+ BTQ DX, (BX) // 480fa313
+ BTQ R11, (BX) // 4c0fa31b
+ BTQ DX, (R11) // 490fa313
+ BTQ R11, (R11) // 4d0fa31b
+ BTQ DX, DX // 480fa3d2
+ BTQ R11, DX // 4c0fa3da
+ BTQ DX, R11 // 490fa3d3
+ BTQ R11, R11 // 4d0fa3db
+ BTCW $7, (BX) // 660fba3b07
+ BTCW $7, (R11) // 66410fba3b07
+ BTCW $7, DX // 660fbafa07
+ BTCW $7, R11 // 66410fbafb07
+ BTCW DX, (BX) // 660fbb13
+ BTCW R11, (BX) // 66440fbb1b
+ BTCW DX, (R11) // 66410fbb13
+ BTCW R11, (R11) // 66450fbb1b
+ BTCW DX, DX // 660fbbd2
+ BTCW R11, DX // 66440fbbda
+ BTCW DX, R11 // 66410fbbd3
+ BTCW R11, R11 // 66450fbbdb
+ BTCL $7, (BX) // 0fba3b07
+ BTCL $7, (R11) // 410fba3b07
+ BTCL $7, DX // 0fbafa07
+ BTCL $7, R11 // 410fbafb07
+ BTCL DX, (BX) // 0fbb13
+ BTCL R11, (BX) // 440fbb1b
+ BTCL DX, (R11) // 410fbb13
+ BTCL R11, (R11) // 450fbb1b
+ BTCL DX, DX // 0fbbd2
+ BTCL R11, DX // 440fbbda
+ BTCL DX, R11 // 410fbbd3
+ BTCL R11, R11 // 450fbbdb
+ BTCQ $7, (BX) // 480fba3b07
+ BTCQ $7, (R11) // 490fba3b07
+ BTCQ $7, DX // 480fbafa07
+ BTCQ $7, R11 // 490fbafb07
+ BTCQ DX, (BX) // 480fbb13
+ BTCQ R11, (BX) // 4c0fbb1b
+ BTCQ DX, (R11) // 490fbb13
+ BTCQ R11, (R11) // 4d0fbb1b
+ BTCQ DX, DX // 480fbbd2
+ BTCQ R11, DX // 4c0fbbda
+ BTCQ DX, R11 // 490fbbd3
+ BTCQ R11, R11 // 4d0fbbdb
+ BTRW $7, (BX) // 660fba3307
+ BTRW $7, (R11) // 66410fba3307
+ BTRW $7, DX // 660fbaf207
+ BTRW $7, R11 // 66410fbaf307
+ BTRW DX, (BX) // 660fb313
+ BTRW R11, (BX) // 66440fb31b
+ BTRW DX, (R11) // 66410fb313
+ BTRW R11, (R11) // 66450fb31b
+ BTRW DX, DX // 660fb3d2
+ BTRW R11, DX // 66440fb3da
+ BTRW DX, R11 // 66410fb3d3
+ BTRW R11, R11 // 66450fb3db
+ BTRL $7, (BX) // 0fba3307
+ BTRL $7, (R11) // 410fba3307
+ BTRL $7, DX // 0fbaf207
+ BTRL $7, R11 // 410fbaf307
+ BTRL DX, (BX) // 0fb313
+ BTRL R11, (BX) // 440fb31b
+ BTRL DX, (R11) // 410fb313
+ BTRL R11, (R11) // 450fb31b
+ BTRL DX, DX // 0fb3d2
+ BTRL R11, DX // 440fb3da
+ BTRL DX, R11 // 410fb3d3
+ BTRL R11, R11 // 450fb3db
+ BTRQ $7, (BX) // 480fba3307
+ BTRQ $7, (R11) // 490fba3307
+ BTRQ $7, DX // 480fbaf207
+ BTRQ $7, R11 // 490fbaf307
+ BTRQ DX, (BX) // 480fb313
+ BTRQ R11, (BX) // 4c0fb31b
+ BTRQ DX, (R11) // 490fb313
+ BTRQ R11, (R11) // 4d0fb31b
+ BTRQ DX, DX // 480fb3d2
+ BTRQ R11, DX // 4c0fb3da
+ BTRQ DX, R11 // 490fb3d3
+ BTRQ R11, R11 // 4d0fb3db
+ BTSW $7, (BX) // 660fba2b07
+ BTSW $7, (R11) // 66410fba2b07
+ BTSW $7, DX // 660fbaea07
+ BTSW $7, R11 // 66410fbaeb07
+ BTSW DX, (BX) // 660fab13
+ BTSW R11, (BX) // 66440fab1b
+ BTSW DX, (R11) // 66410fab13
+ BTSW R11, (R11) // 66450fab1b
+ BTSW DX, DX // 660fabd2
+ BTSW R11, DX // 66440fabda
+ BTSW DX, R11 // 66410fabd3
+ BTSW R11, R11 // 66450fabdb
+ BTSL $7, (BX) // 0fba2b07
+ BTSL $7, (R11) // 410fba2b07
+ BTSL $7, DX // 0fbaea07
+ BTSL $7, R11 // 410fbaeb07
+ BTSL DX, (BX) // 0fab13
+ BTSL R11, (BX) // 440fab1b
+ BTSL DX, (R11) // 410fab13
+ BTSL R11, (R11) // 450fab1b
+ BTSL DX, DX // 0fabd2
+ BTSL R11, DX // 440fabda
+ BTSL DX, R11 // 410fabd3
+ BTSL R11, R11 // 450fabdb
+ BTSQ $7, (BX) // 480fba2b07
+ BTSQ $7, (R11) // 490fba2b07
+ BTSQ $7, DX // 480fbaea07
+ BTSQ $7, R11 // 490fbaeb07
+ BTSQ DX, (BX) // 480fab13
+ BTSQ R11, (BX) // 4c0fab1b
+ BTSQ DX, (R11) // 490fab13
+ BTSQ R11, (R11) // 4d0fab1b
+ BTSQ DX, DX // 480fabd2
+ BTSQ R11, DX // 4c0fabda
+ BTSQ DX, R11 // 490fabd3
+ BTSQ R11, R11 // 4d0fabdb
+ BZHIL R9, (BX), DX // c4e230f513
+ BZHIL R9, (R11), DX // c4c230f513
+ BZHIL R9, DX, DX // c4e230f5d2
+ BZHIL R9, R11, DX // c4c230f5d3
+ BZHIL R9, (BX), R11 // c46230f51b
+ BZHIL R9, (R11), R11 // c44230f51b
+ BZHIL R9, DX, R11 // c46230f5da
+ BZHIL R9, R11, R11 // c44230f5db
+ BZHIQ R14, (BX), DX // c4e288f513
+ BZHIQ R14, (R11), DX // c4c288f513
+ BZHIQ R14, DX, DX // c4e288f5d2
+ BZHIQ R14, R11, DX // c4c288f5d3
+ BZHIQ R14, (BX), R11 // c46288f51b
+ BZHIQ R14, (R11), R11 // c44288f51b
+ BZHIQ R14, DX, R11 // c46288f5da
+ BZHIQ R14, R11, R11 // c44288f5db
+ //TODO: CALLQ* (BX) // ff13
+ //TODO: CALLQ* (R11) // 41ff13
+ //TODO: CALLQ* DX // ffd2
+ //TODO: CALLQ* R11 // 41ffd3
+ //TODO: CALL .+$0x11223344 // e844332211 or 48e844332211
+ //TODO: LCALLW* (BX) // 66ff1b
+ //TODO: LCALLW* (R11) // 6641ff1b
+ //TODO: LCALLL* (BX) // ff1b
+ //TODO: LCALLL* (R11) // 41ff1b
+ //TODO: LCALLQ* (BX) // 48ff1b
+ //TODO: LCALLQ* (R11) // 49ff1b
+ CBW // 6698
+ CDQ // 99
+ CDQE // 4898
+ CLAC // 0f01ca
+ CLC // f8
+ CLD // fc
+ CLFLUSH (BX) // 0fae3b
+ CLFLUSH (R11) // 410fae3b
+ CLFLUSHOPT (BX) // 660fae3b
+ CLFLUSHOPT (R11) // 66410fae3b
+ CLI // fa
+ CLTS // 0f06
+ CMC // f5
+ CMOVWHI (BX), DX // 660f4713
+ CMOVWHI (R11), DX // 66410f4713
+ CMOVWHI DX, DX // 660f47d2
+ CMOVWHI R11, DX // 66410f47d3
+ CMOVWHI (BX), R11 // 66440f471b
+ CMOVWHI (R11), R11 // 66450f471b
+ CMOVWHI DX, R11 // 66440f47da
+ CMOVWHI R11, R11 // 66450f47db
+ CMOVLHI (BX), DX // 0f4713
+ CMOVLHI (R11), DX // 410f4713
+ CMOVLHI DX, DX // 0f47d2
+ CMOVLHI R11, DX // 410f47d3
+ CMOVLHI (BX), R11 // 440f471b
+ CMOVLHI (R11), R11 // 450f471b
+ CMOVLHI DX, R11 // 440f47da
+ CMOVLHI R11, R11 // 450f47db
+ CMOVQHI (BX), DX // 480f4713
+ CMOVQHI (R11), DX // 490f4713
+ CMOVQHI DX, DX // 480f47d2
+ CMOVQHI R11, DX // 490f47d3
+ CMOVQHI (BX), R11 // 4c0f471b
+ CMOVQHI (R11), R11 // 4d0f471b
+ CMOVQHI DX, R11 // 4c0f47da
+ CMOVQHI R11, R11 // 4d0f47db
+ CMOVWCC (BX), DX // 660f4313
+ CMOVWCC (R11), DX // 66410f4313
+ CMOVWCC DX, DX // 660f43d2
+ CMOVWCC R11, DX // 66410f43d3
+ CMOVWCC (BX), R11 // 66440f431b
+ CMOVWCC (R11), R11 // 66450f431b
+ CMOVWCC DX, R11 // 66440f43da
+ CMOVWCC R11, R11 // 66450f43db
+ CMOVLCC (BX), DX // 0f4313
+ CMOVLCC (R11), DX // 410f4313
+ CMOVLCC DX, DX // 0f43d2
+ CMOVLCC R11, DX // 410f43d3
+ CMOVLCC (BX), R11 // 440f431b
+ CMOVLCC (R11), R11 // 450f431b
+ CMOVLCC DX, R11 // 440f43da
+ CMOVLCC R11, R11 // 450f43db
+ CMOVQCC (BX), DX // 480f4313
+ CMOVQCC (R11), DX // 490f4313
+ CMOVQCC DX, DX // 480f43d2
+ CMOVQCC R11, DX // 490f43d3
+ CMOVQCC (BX), R11 // 4c0f431b
+ CMOVQCC (R11), R11 // 4d0f431b
+ CMOVQCC DX, R11 // 4c0f43da
+ CMOVQCC R11, R11 // 4d0f43db
+ CMOVWCS (BX), DX // 660f4213
+ CMOVWCS (R11), DX // 66410f4213
+ CMOVWCS DX, DX // 660f42d2
+ CMOVWCS R11, DX // 66410f42d3
+ CMOVWCS (BX), R11 // 66440f421b
+ CMOVWCS (R11), R11 // 66450f421b
+ CMOVWCS DX, R11 // 66440f42da
+ CMOVWCS R11, R11 // 66450f42db
+ CMOVLCS (BX), DX // 0f4213
+ CMOVLCS (R11), DX // 410f4213
+ CMOVLCS DX, DX // 0f42d2
+ CMOVLCS R11, DX // 410f42d3
+ CMOVLCS (BX), R11 // 440f421b
+ CMOVLCS (R11), R11 // 450f421b
+ CMOVLCS DX, R11 // 440f42da
+ CMOVLCS R11, R11 // 450f42db
+ CMOVQCS (BX), DX // 480f4213
+ CMOVQCS (R11), DX // 490f4213
+ CMOVQCS DX, DX // 480f42d2
+ CMOVQCS R11, DX // 490f42d3
+ CMOVQCS (BX), R11 // 4c0f421b
+ CMOVQCS (R11), R11 // 4d0f421b
+ CMOVQCS DX, R11 // 4c0f42da
+ CMOVQCS R11, R11 // 4d0f42db
+ CMOVWLS (BX), DX // 660f4613
+ CMOVWLS (R11), DX // 66410f4613
+ CMOVWLS DX, DX // 660f46d2
+ CMOVWLS R11, DX // 66410f46d3
+ CMOVWLS (BX), R11 // 66440f461b
+ CMOVWLS (R11), R11 // 66450f461b
+ CMOVWLS DX, R11 // 66440f46da
+ CMOVWLS R11, R11 // 66450f46db
+ CMOVLLS (BX), DX // 0f4613
+ CMOVLLS (R11), DX // 410f4613
+ CMOVLLS DX, DX // 0f46d2
+ CMOVLLS R11, DX // 410f46d3
+ CMOVLLS (BX), R11 // 440f461b
+ CMOVLLS (R11), R11 // 450f461b
+ CMOVLLS DX, R11 // 440f46da
+ CMOVLLS R11, R11 // 450f46db
+ CMOVQLS (BX), DX // 480f4613
+ CMOVQLS (R11), DX // 490f4613
+ CMOVQLS DX, DX // 480f46d2
+ CMOVQLS R11, DX // 490f46d3
+ CMOVQLS (BX), R11 // 4c0f461b
+ CMOVQLS (R11), R11 // 4d0f461b
+ CMOVQLS DX, R11 // 4c0f46da
+ CMOVQLS R11, R11 // 4d0f46db
+ CMOVWEQ (BX), DX // 660f4413
+ CMOVWEQ (R11), DX // 66410f4413
+ CMOVWEQ DX, DX // 660f44d2
+ CMOVWEQ R11, DX // 66410f44d3
+ CMOVWEQ (BX), R11 // 66440f441b
+ CMOVWEQ (R11), R11 // 66450f441b
+ CMOVWEQ DX, R11 // 66440f44da
+ CMOVWEQ R11, R11 // 66450f44db
+ CMOVLEQ (BX), DX // 0f4413
+ CMOVLEQ (R11), DX // 410f4413
+ CMOVLEQ DX, DX // 0f44d2
+ CMOVLEQ R11, DX // 410f44d3
+ CMOVLEQ (BX), R11 // 440f441b
+ CMOVLEQ (R11), R11 // 450f441b
+ CMOVLEQ DX, R11 // 440f44da
+ CMOVLEQ R11, R11 // 450f44db
+ CMOVQEQ (BX), DX // 480f4413
+ CMOVQEQ (R11), DX // 490f4413
+ CMOVQEQ DX, DX // 480f44d2
+ CMOVQEQ R11, DX // 490f44d3
+ CMOVQEQ (BX), R11 // 4c0f441b
+ CMOVQEQ (R11), R11 // 4d0f441b
+ CMOVQEQ DX, R11 // 4c0f44da
+ CMOVQEQ R11, R11 // 4d0f44db
+ CMOVWGT (BX), DX // 660f4f13
+ CMOVWGT (R11), DX // 66410f4f13
+ CMOVWGT DX, DX // 660f4fd2
+ CMOVWGT R11, DX // 66410f4fd3
+ CMOVWGT (BX), R11 // 66440f4f1b
+ CMOVWGT (R11), R11 // 66450f4f1b
+ CMOVWGT DX, R11 // 66440f4fda
+ CMOVWGT R11, R11 // 66450f4fdb
+ CMOVLGT (BX), DX // 0f4f13
+ CMOVLGT (R11), DX // 410f4f13
+ CMOVLGT DX, DX // 0f4fd2
+ CMOVLGT R11, DX // 410f4fd3
+ CMOVLGT (BX), R11 // 440f4f1b
+ CMOVLGT (R11), R11 // 450f4f1b
+ CMOVLGT DX, R11 // 440f4fda
+ CMOVLGT R11, R11 // 450f4fdb
+ CMOVQGT (BX), DX // 480f4f13
+ CMOVQGT (R11), DX // 490f4f13
+ CMOVQGT DX, DX // 480f4fd2
+ CMOVQGT R11, DX // 490f4fd3
+ CMOVQGT (BX), R11 // 4c0f4f1b
+ CMOVQGT (R11), R11 // 4d0f4f1b
+ CMOVQGT DX, R11 // 4c0f4fda
+ CMOVQGT R11, R11 // 4d0f4fdb
+ CMOVWGE (BX), DX // 660f4d13
+ CMOVWGE (R11), DX // 66410f4d13
+ CMOVWGE DX, DX // 660f4dd2
+ CMOVWGE R11, DX // 66410f4dd3
+ CMOVWGE (BX), R11 // 66440f4d1b
+ CMOVWGE (R11), R11 // 66450f4d1b
+ CMOVWGE DX, R11 // 66440f4dda
+ CMOVWGE R11, R11 // 66450f4ddb
+ CMOVLGE (BX), DX // 0f4d13
+ CMOVLGE (R11), DX // 410f4d13
+ CMOVLGE DX, DX // 0f4dd2
+ CMOVLGE R11, DX // 410f4dd3
+ CMOVLGE (BX), R11 // 440f4d1b
+ CMOVLGE (R11), R11 // 450f4d1b
+ CMOVLGE DX, R11 // 440f4dda
+ CMOVLGE R11, R11 // 450f4ddb
+ CMOVQGE (BX), DX // 480f4d13
+ CMOVQGE (R11), DX // 490f4d13
+ CMOVQGE DX, DX // 480f4dd2
+ CMOVQGE R11, DX // 490f4dd3
+ CMOVQGE (BX), R11 // 4c0f4d1b
+ CMOVQGE (R11), R11 // 4d0f4d1b
+ CMOVQGE DX, R11 // 4c0f4dda
+ CMOVQGE R11, R11 // 4d0f4ddb
+ CMOVWLT (BX), DX // 660f4c13
+ CMOVWLT (R11), DX // 66410f4c13
+ CMOVWLT DX, DX // 660f4cd2
+ CMOVWLT R11, DX // 66410f4cd3
+ CMOVWLT (BX), R11 // 66440f4c1b
+ CMOVWLT (R11), R11 // 66450f4c1b
+ CMOVWLT DX, R11 // 66440f4cda
+ CMOVWLT R11, R11 // 66450f4cdb
+ CMOVLLT (BX), DX // 0f4c13
+ CMOVLLT (R11), DX // 410f4c13
+ CMOVLLT DX, DX // 0f4cd2
+ CMOVLLT R11, DX // 410f4cd3
+ CMOVLLT (BX), R11 // 440f4c1b
+ CMOVLLT (R11), R11 // 450f4c1b
+ CMOVLLT DX, R11 // 440f4cda
+ CMOVLLT R11, R11 // 450f4cdb
+ CMOVQLT (BX), DX // 480f4c13
+ CMOVQLT (R11), DX // 490f4c13
+ CMOVQLT DX, DX // 480f4cd2
+ CMOVQLT R11, DX // 490f4cd3
+ CMOVQLT (BX), R11 // 4c0f4c1b
+ CMOVQLT (R11), R11 // 4d0f4c1b
+ CMOVQLT DX, R11 // 4c0f4cda
+ CMOVQLT R11, R11 // 4d0f4cdb
+ CMOVWLE (BX), DX // 660f4e13
+ CMOVWLE (R11), DX // 66410f4e13
+ CMOVWLE DX, DX // 660f4ed2
+ CMOVWLE R11, DX // 66410f4ed3
+ CMOVWLE (BX), R11 // 66440f4e1b
+ CMOVWLE (R11), R11 // 66450f4e1b
+ CMOVWLE DX, R11 // 66440f4eda
+ CMOVWLE R11, R11 // 66450f4edb
+ CMOVLLE (BX), DX // 0f4e13
+ CMOVLLE (R11), DX // 410f4e13
+ CMOVLLE DX, DX // 0f4ed2
+ CMOVLLE R11, DX // 410f4ed3
+ CMOVLLE (BX), R11 // 440f4e1b
+ CMOVLLE (R11), R11 // 450f4e1b
+ CMOVLLE DX, R11 // 440f4eda
+ CMOVLLE R11, R11 // 450f4edb
+ CMOVQLE (BX), DX // 480f4e13
+ CMOVQLE (R11), DX // 490f4e13
+ CMOVQLE DX, DX // 480f4ed2
+ CMOVQLE R11, DX // 490f4ed3
+ CMOVQLE (BX), R11 // 4c0f4e1b
+ CMOVQLE (R11), R11 // 4d0f4e1b
+ CMOVQLE DX, R11 // 4c0f4eda
+ CMOVQLE R11, R11 // 4d0f4edb
+ CMOVWNE (BX), DX // 660f4513
+ CMOVWNE (R11), DX // 66410f4513
+ CMOVWNE DX, DX // 660f45d2
+ CMOVWNE R11, DX // 66410f45d3
+ CMOVWNE (BX), R11 // 66440f451b
+ CMOVWNE (R11), R11 // 66450f451b
+ CMOVWNE DX, R11 // 66440f45da
+ CMOVWNE R11, R11 // 66450f45db
+ CMOVLNE (BX), DX // 0f4513
+ CMOVLNE (R11), DX // 410f4513
+ CMOVLNE DX, DX // 0f45d2
+ CMOVLNE R11, DX // 410f45d3
+ CMOVLNE (BX), R11 // 440f451b
+ CMOVLNE (R11), R11 // 450f451b
+ CMOVLNE DX, R11 // 440f45da
+ CMOVLNE R11, R11 // 450f45db
+ CMOVQNE (BX), DX // 480f4513
+ CMOVQNE (R11), DX // 490f4513
+ CMOVQNE DX, DX // 480f45d2
+ CMOVQNE R11, DX // 490f45d3
+ CMOVQNE (BX), R11 // 4c0f451b
+ CMOVQNE (R11), R11 // 4d0f451b
+ CMOVQNE DX, R11 // 4c0f45da
+ CMOVQNE R11, R11 // 4d0f45db
+ CMOVWOC (BX), DX // 660f4113
+ CMOVWOC (R11), DX // 66410f4113
+ CMOVWOC DX, DX // 660f41d2
+ CMOVWOC R11, DX // 66410f41d3
+ CMOVWOC (BX), R11 // 66440f411b
+ CMOVWOC (R11), R11 // 66450f411b
+ CMOVWOC DX, R11 // 66440f41da
+ CMOVWOC R11, R11 // 66450f41db
+ CMOVLOC (BX), DX // 0f4113
+ CMOVLOC (R11), DX // 410f4113
+ CMOVLOC DX, DX // 0f41d2
+ CMOVLOC R11, DX // 410f41d3
+ CMOVLOC (BX), R11 // 440f411b
+ CMOVLOC (R11), R11 // 450f411b
+ CMOVLOC DX, R11 // 440f41da
+ CMOVLOC R11, R11 // 450f41db
+ CMOVQOC (BX), DX // 480f4113
+ CMOVQOC (R11), DX // 490f4113
+ CMOVQOC DX, DX // 480f41d2
+ CMOVQOC R11, DX // 490f41d3
+ CMOVQOC (BX), R11 // 4c0f411b
+ CMOVQOC (R11), R11 // 4d0f411b
+ CMOVQOC DX, R11 // 4c0f41da
+ CMOVQOC R11, R11 // 4d0f41db
+ CMOVWPC (BX), DX // 660f4b13
+ CMOVWPC (R11), DX // 66410f4b13
+ CMOVWPC DX, DX // 660f4bd2
+ CMOVWPC R11, DX // 66410f4bd3
+ CMOVWPC (BX), R11 // 66440f4b1b
+ CMOVWPC (R11), R11 // 66450f4b1b
+ CMOVWPC DX, R11 // 66440f4bda
+ CMOVWPC R11, R11 // 66450f4bdb
+ CMOVLPC (BX), DX // 0f4b13
+ CMOVLPC (R11), DX // 410f4b13
+ CMOVLPC DX, DX // 0f4bd2
+ CMOVLPC R11, DX // 410f4bd3
+ CMOVLPC (BX), R11 // 440f4b1b
+ CMOVLPC (R11), R11 // 450f4b1b
+ CMOVLPC DX, R11 // 440f4bda
+ CMOVLPC R11, R11 // 450f4bdb
+ CMOVQPC (BX), DX // 480f4b13
+ CMOVQPC (R11), DX // 490f4b13
+ CMOVQPC DX, DX // 480f4bd2
+ CMOVQPC R11, DX // 490f4bd3
+ CMOVQPC (BX), R11 // 4c0f4b1b
+ CMOVQPC (R11), R11 // 4d0f4b1b
+ CMOVQPC DX, R11 // 4c0f4bda
+ CMOVQPC R11, R11 // 4d0f4bdb
+ CMOVWPL (BX), DX // 660f4913
+ CMOVWPL (R11), DX // 66410f4913
+ CMOVWPL DX, DX // 660f49d2
+ CMOVWPL R11, DX // 66410f49d3
+ CMOVWPL (BX), R11 // 66440f491b
+ CMOVWPL (R11), R11 // 66450f491b
+ CMOVWPL DX, R11 // 66440f49da
+ CMOVWPL R11, R11 // 66450f49db
+ CMOVLPL (BX), DX // 0f4913
+ CMOVLPL (R11), DX // 410f4913
+ CMOVLPL DX, DX // 0f49d2
+ CMOVLPL R11, DX // 410f49d3
+ CMOVLPL (BX), R11 // 440f491b
+ CMOVLPL (R11), R11 // 450f491b
+ CMOVLPL DX, R11 // 440f49da
+ CMOVLPL R11, R11 // 450f49db
+ CMOVQPL (BX), DX // 480f4913
+ CMOVQPL (R11), DX // 490f4913
+ CMOVQPL DX, DX // 480f49d2
+ CMOVQPL R11, DX // 490f49d3
+ CMOVQPL (BX), R11 // 4c0f491b
+ CMOVQPL (R11), R11 // 4d0f491b
+ CMOVQPL DX, R11 // 4c0f49da
+ CMOVQPL R11, R11 // 4d0f49db
+ CMOVWOS (BX), DX // 660f4013
+ CMOVWOS (R11), DX // 66410f4013
+ CMOVWOS DX, DX // 660f40d2
+ CMOVWOS R11, DX // 66410f40d3
+ CMOVWOS (BX), R11 // 66440f401b
+ CMOVWOS (R11), R11 // 66450f401b
+ CMOVWOS DX, R11 // 66440f40da
+ CMOVWOS R11, R11 // 66450f40db
+ CMOVLOS (BX), DX // 0f4013
+ CMOVLOS (R11), DX // 410f4013
+ CMOVLOS DX, DX // 0f40d2
+ CMOVLOS R11, DX // 410f40d3
+ CMOVLOS (BX), R11 // 440f401b
+ CMOVLOS (R11), R11 // 450f401b
+ CMOVLOS DX, R11 // 440f40da
+ CMOVLOS R11, R11 // 450f40db
+ CMOVQOS (BX), DX // 480f4013
+ CMOVQOS (R11), DX // 490f4013
+ CMOVQOS DX, DX // 480f40d2
+ CMOVQOS R11, DX // 490f40d3
+ CMOVQOS (BX), R11 // 4c0f401b
+ CMOVQOS (R11), R11 // 4d0f401b
+ CMOVQOS DX, R11 // 4c0f40da
+ CMOVQOS R11, R11 // 4d0f40db
+ CMOVWPS (BX), DX // 660f4a13
+ CMOVWPS (R11), DX // 66410f4a13
+ CMOVWPS DX, DX // 660f4ad2
+ CMOVWPS R11, DX // 66410f4ad3
+ CMOVWPS (BX), R11 // 66440f4a1b
+ CMOVWPS (R11), R11 // 66450f4a1b
+ CMOVWPS DX, R11 // 66440f4ada
+ CMOVWPS R11, R11 // 66450f4adb
+ CMOVLPS (BX), DX // 0f4a13
+ CMOVLPS (R11), DX // 410f4a13
+ CMOVLPS DX, DX // 0f4ad2
+ CMOVLPS R11, DX // 410f4ad3
+ CMOVLPS (BX), R11 // 440f4a1b
+ CMOVLPS (R11), R11 // 450f4a1b
+ CMOVLPS DX, R11 // 440f4ada
+ CMOVLPS R11, R11 // 450f4adb
+ CMOVQPS (BX), DX // 480f4a13
+ CMOVQPS (R11), DX // 490f4a13
+ CMOVQPS DX, DX // 480f4ad2
+ CMOVQPS R11, DX // 490f4ad3
+ CMOVQPS (BX), R11 // 4c0f4a1b
+ CMOVQPS (R11), R11 // 4d0f4a1b
+ CMOVQPS DX, R11 // 4c0f4ada
+ CMOVQPS R11, R11 // 4d0f4adb
+ CMOVWMI (BX), DX // 660f4813
+ CMOVWMI (R11), DX // 66410f4813
+ CMOVWMI DX, DX // 660f48d2
+ CMOVWMI R11, DX // 66410f48d3
+ CMOVWMI (BX), R11 // 66440f481b
+ CMOVWMI (R11), R11 // 66450f481b
+ CMOVWMI DX, R11 // 66440f48da
+ CMOVWMI R11, R11 // 66450f48db
+ CMOVLMI (BX), DX // 0f4813
+ CMOVLMI (R11), DX // 410f4813
+ CMOVLMI DX, DX // 0f48d2
+ CMOVLMI R11, DX // 410f48d3
+ CMOVLMI (BX), R11 // 440f481b
+ CMOVLMI (R11), R11 // 450f481b
+ CMOVLMI DX, R11 // 440f48da
+ CMOVLMI R11, R11 // 450f48db
+ CMOVQMI (BX), DX // 480f4813
+ CMOVQMI (R11), DX // 490f4813
+ CMOVQMI DX, DX // 480f48d2
+ CMOVQMI R11, DX // 490f48d3
+ CMOVQMI (BX), R11 // 4c0f481b
+ CMOVQMI (R11), R11 // 4d0f481b
+ CMOVQMI DX, R11 // 4c0f48da
+ CMOVQMI R11, R11 // 4d0f48db
+ CMPB AL, $7 // 3c07
+ CMPW AX, $61731 // 663d23f1
+ CMPL AX, $4045620583 // 3d674523f1
+ CMPQ AX, $-249346713 // 483d674523f1
+ CMPW (BX), $61731 // 66813b23f1
+ CMPW (R11), $61731 // 6641813b23f1
+ CMPW DX, $61731 // 6681fa23f1
+ CMPW R11, $61731 // 664181fb23f1
+ CMPW (BX), $7 // 66833b07
+ CMPW (R11), $7 // 6641833b07
+ CMPW DX, $7 // 6683fa07
+ CMPW R11, $7 // 664183fb07
+ CMPW (BX), DX // 663913
+ CMPW (BX), R11 // 6644391b
+ CMPW (R11), DX // 66413913
+ CMPW (R11), R11 // 6645391b
+ CMPW DX, DX // 6639d2 or 663bd2
+ CMPW DX, R11 // 664439da or 66413bd3
+ CMPW R11, DX // 664139d3 or 66443bda
+ CMPW R11, R11 // 664539db or 66453bdb
+ CMPL (BX), $4045620583 // 813b674523f1
+ CMPL (R11), $4045620583 // 41813b674523f1
+ CMPL DX, $4045620583 // 81fa674523f1
+ CMPL R11, $4045620583 // 4181fb674523f1
+ CMPL (BX), $7 // 833b07
+ CMPL (R11), $7 // 41833b07
+ CMPL DX, $7 // 83fa07
+ CMPL R11, $7 // 4183fb07
+ CMPL (BX), DX // 3913
+ CMPL (BX), R11 // 44391b
+ CMPL (R11), DX // 413913
+ CMPL (R11), R11 // 45391b
+ CMPL DX, DX // 39d2 or 3bd2
+ CMPL DX, R11 // 4439da or 413bd3
+ CMPL R11, DX // 4139d3 or 443bda
+ CMPL R11, R11 // 4539db or 453bdb
+ CMPQ (BX), $-249346713 // 48813b674523f1
+ CMPQ (R11), $-249346713 // 49813b674523f1
+ CMPQ DX, $-249346713 // 4881fa674523f1
+ CMPQ R11, $-249346713 // 4981fb674523f1
+ CMPQ (BX), $7 // 48833b07
+ CMPQ (R11), $7 // 49833b07
+ CMPQ DX, $7 // 4883fa07
+ CMPQ R11, $7 // 4983fb07
+ CMPQ (BX), DX // 483913
+ CMPQ (BX), R11 // 4c391b
+ CMPQ (R11), DX // 493913
+ CMPQ (R11), R11 // 4d391b
+ CMPQ DX, DX // 4839d2 or 483bd2
+ CMPQ DX, R11 // 4c39da or 493bd3
+ CMPQ R11, DX // 4939d3 or 4c3bda
+ CMPQ R11, R11 // 4d39db or 4d3bdb
+ CMPB (BX), $7 // 803b07
+ CMPB (R11), $7 // 41803b07
+ CMPB DL, $7 // 80fa07
+ CMPB R11, $7 // 4180fb07
+ CMPB (BX), DL // 3813
+ CMPB (BX), R11 // 44381b
+ CMPB (R11), DL // 413813
+ CMPB (R11), R11 // 45381b
+ CMPB DL, DL // 38d2 or 3ad2
+ CMPB DL, R11 // 4438da or 413ad3
+ CMPB R11, DL // 4138d3 or 443ada
+ CMPB R11, R11 // 4538db or 453adb
+ CMPW DX, (BX) // 663b13
+ CMPW DX, (R11) // 66413b13
+ CMPW R11, (BX) // 66443b1b
+ CMPW R11, (R11) // 66453b1b
+ CMPL DX, (BX) // 3b13
+ CMPL DX, (R11) // 413b13
+ CMPL R11, (BX) // 443b1b
+ CMPL R11, (R11) // 453b1b
+ CMPQ DX, (BX) // 483b13
+ CMPQ DX, (R11) // 493b13
+ CMPQ R11, (BX) // 4c3b1b
+ CMPQ R11, (R11) // 4d3b1b
+ CMPB DL, (BX) // 3a13
+ CMPB DL, (R11) // 413a13
+ CMPB R11, (BX) // 443a1b
+ CMPB R11, (R11) // 453a1b
+ CMPPD (BX), X2, $7 // 660fc21307
+ CMPPD (R11), X2, $7 // 66410fc21307
+ CMPPD X2, X2, $7 // 660fc2d207
+ CMPPD X11, X2, $7 // 66410fc2d307
+ CMPPD (BX), X11, $7 // 66440fc21b07
+ CMPPD (R11), X11, $7 // 66450fc21b07
+ CMPPD X2, X11, $7 // 66440fc2da07
+ CMPPD X11, X11, $7 // 66450fc2db07
+ CMPPS (BX), X2, $7 // 0fc21307
+ CMPPS (R11), X2, $7 // 410fc21307
+ CMPPS X2, X2, $7 // 0fc2d207
+ CMPPS X11, X2, $7 // 410fc2d307
+ CMPPS (BX), X11, $7 // 440fc21b07
+ CMPPS (R11), X11, $7 // 450fc21b07
+ CMPPS X2, X11, $7 // 440fc2da07
+ CMPPS X11, X11, $7 // 450fc2db07
+ CMPSB // a6
+ CMPSL // a7
+ CMPSD (BX), X2, $7 // f20fc21307
+ CMPSD (R11), X2, $7 // f2410fc21307
+ CMPSD X2, X2, $7 // f20fc2d207
+ CMPSD X11, X2, $7 // f2410fc2d307
+ CMPSD (BX), X11, $7 // f2440fc21b07
+ CMPSD (R11), X11, $7 // f2450fc21b07
+ CMPSD X2, X11, $7 // f2440fc2da07
+ CMPSD X11, X11, $7 // f2450fc2db07
+ CMPSQ // 48a7
+ CMPSS (BX), X2, $7 // f30fc21307
+ CMPSS (R11), X2, $7 // f3410fc21307
+ CMPSS X2, X2, $7 // f30fc2d207
+ CMPSS X11, X2, $7 // f3410fc2d307
+ CMPSS (BX), X11, $7 // f3440fc21b07
+ CMPSS (R11), X11, $7 // f3450fc21b07
+ CMPSS X2, X11, $7 // f3440fc2da07
+ CMPSS X11, X11, $7 // f3450fc2db07
+ CMPSW // 66a7
+ CMPXCHGW DX, (BX) // 660fb113
+ CMPXCHGW R11, (BX) // 66440fb11b
+ CMPXCHGW DX, (R11) // 66410fb113
+ CMPXCHGW R11, (R11) // 66450fb11b
+ CMPXCHGW DX, DX // 660fb1d2
+ CMPXCHGW R11, DX // 66440fb1da
+ CMPXCHGW DX, R11 // 66410fb1d3
+ CMPXCHGW R11, R11 // 66450fb1db
+ CMPXCHGL DX, (BX) // 0fb113
+ CMPXCHGL R11, (BX) // 440fb11b
+ CMPXCHGL DX, (R11) // 410fb113
+ CMPXCHGL R11, (R11) // 450fb11b
+ CMPXCHGL DX, DX // 0fb1d2
+ CMPXCHGL R11, DX // 440fb1da
+ CMPXCHGL DX, R11 // 410fb1d3
+ CMPXCHGL R11, R11 // 450fb1db
+ CMPXCHGQ DX, (BX) // 480fb113
+ CMPXCHGQ R11, (BX) // 4c0fb11b
+ CMPXCHGQ DX, (R11) // 490fb113
+ CMPXCHGQ R11, (R11) // 4d0fb11b
+ CMPXCHGQ DX, DX // 480fb1d2
+ CMPXCHGQ R11, DX // 4c0fb1da
+ CMPXCHGQ DX, R11 // 490fb1d3
+ CMPXCHGQ R11, R11 // 4d0fb1db
+ CMPXCHGB DL, (BX) // 0fb013
+ CMPXCHGB R11, (BX) // 440fb01b
+ CMPXCHGB DL, (R11) // 410fb013
+ CMPXCHGB R11, (R11) // 450fb01b
+ CMPXCHGB DL, DL // 0fb0d2
+ CMPXCHGB R11, DL // 440fb0da
+ CMPXCHGB DL, R11 // 410fb0d3
+ CMPXCHGB R11, R11 // 450fb0db
+ CMPXCHG16B (BX) // 480fc70b
+ CMPXCHG16B (R11) // 490fc70b
+ CMPXCHG8B (BX) // 0fc70b
+ CMPXCHG8B (R11) // 410fc70b
+ COMISD (BX), X2 // 660f2f13
+ COMISD (R11), X2 // 66410f2f13
+ COMISD X2, X2 // 660f2fd2
+ COMISD X11, X2 // 66410f2fd3
+ COMISD (BX), X11 // 66440f2f1b
+ COMISD (R11), X11 // 66450f2f1b
+ COMISD X2, X11 // 66440f2fda
+ COMISD X11, X11 // 66450f2fdb
+ COMISS (BX), X2 // 0f2f13
+ COMISS (R11), X2 // 410f2f13
+ COMISS X2, X2 // 0f2fd2
+ COMISS X11, X2 // 410f2fd3
+ COMISS (BX), X11 // 440f2f1b
+ COMISS (R11), X11 // 450f2f1b
+ COMISS X2, X11 // 440f2fda
+ COMISS X11, X11 // 450f2fdb
+ CPUID // 0fa2
+ CQO // 4899
+ CRC32W (BX), DX // 66f20f38f113
+ CRC32W (R11), DX // 66f2410f38f113
+ CRC32W DX, DX // 66f20f38f1d2
+ CRC32W R11, DX // 66f2410f38f1d3
+ CRC32W (BX), R11 // 66f2440f38f11b
+ CRC32W (R11), R11 // 66f2450f38f11b
+ CRC32W DX, R11 // 66f2440f38f1da
+ CRC32W R11, R11 // 66f2450f38f1db
+ CRC32L (BX), DX // f20f38f113
+ CRC32L (R11), DX // f2410f38f113
+ CRC32L DX, DX // f20f38f1d2
+ CRC32L R11, DX // f2410f38f1d3
+ CRC32L (BX), R11 // f2440f38f11b
+ CRC32L (R11), R11 // f2450f38f11b
+ CRC32L DX, R11 // f2440f38f1da
+ CRC32L R11, R11 // f2450f38f1db
+ CRC32B (BX), DX // f20f38f013 or f2480f38f013
+ CRC32B (R11), DX // f2410f38f013 or f2490f38f013
+ CRC32B DL, DX // f20f38f0d2 or f2480f38f0d2
+ CRC32B R11, DX // f2410f38f0d3 or f2490f38f0d3
+ CRC32B (BX), R11 // f2440f38f01b or f24c0f38f01b
+ CRC32B (R11), R11 // f2450f38f01b or f24d0f38f01b
+ CRC32B DL, R11 // f2440f38f0da or f24c0f38f0da
+ CRC32B R11, R11 // f2450f38f0db or f24d0f38f0db
+ CRC32Q (BX), DX // f2480f38f113
+ CRC32Q (R11), DX // f2490f38f113
+ CRC32Q DX, DX // f2480f38f1d2
+ CRC32Q R11, DX // f2490f38f1d3
+ CRC32Q (BX), R11 // f24c0f38f11b
+ CRC32Q (R11), R11 // f24d0f38f11b
+ CRC32Q DX, R11 // f24c0f38f1da
+ CRC32Q R11, R11 // f24d0f38f1db
+ CVTPL2PD (BX), X2 // f30fe613
+ CVTPL2PD (R11), X2 // f3410fe613
+ CVTPL2PD X2, X2 // f30fe6d2
+ CVTPL2PD X11, X2 // f3410fe6d3
+ CVTPL2PD (BX), X11 // f3440fe61b
+ CVTPL2PD (R11), X11 // f3450fe61b
+ CVTPL2PD X2, X11 // f3440fe6da
+ CVTPL2PD X11, X11 // f3450fe6db
+ CVTPL2PS (BX), X2 // 0f5b13
+ CVTPL2PS (R11), X2 // 410f5b13
+ CVTPL2PS X2, X2 // 0f5bd2
+ CVTPL2PS X11, X2 // 410f5bd3
+ CVTPL2PS (BX), X11 // 440f5b1b
+ CVTPL2PS (R11), X11 // 450f5b1b
+ CVTPL2PS X2, X11 // 440f5bda
+ CVTPL2PS X11, X11 // 450f5bdb
+ CVTPD2PL (BX), X2 // f20fe613
+ CVTPD2PL (R11), X2 // f2410fe613
+ CVTPD2PL X2, X2 // f20fe6d2
+ CVTPD2PL X11, X2 // f2410fe6d3
+ CVTPD2PL (BX), X11 // f2440fe61b
+ CVTPD2PL (R11), X11 // f2450fe61b
+ CVTPD2PL X2, X11 // f2440fe6da
+ CVTPD2PL X11, X11 // f2450fe6db
+ //TODO: CVTPD2PI (BX), M2 // 660f2d13
+ //TODO: CVTPD2PI (R11), M2 // 66410f2d13
+ //TODO: CVTPD2PI X2, M2 // 660f2dd2
+ //TODO: CVTPD2PI X11, M2 // 66410f2dd3
+ //TODO: CVTPD2PI (BX), M3 // 660f2d1b
+ //TODO: CVTPD2PI (R11), M3 // 66410f2d1b
+ //TODO: CVTPD2PI X2, M3 // 660f2dda
+ //TODO: CVTPD2PI X11, M3 // 66410f2ddb
+ CVTPD2PS (BX), X2 // 660f5a13
+ CVTPD2PS (R11), X2 // 66410f5a13
+ CVTPD2PS X2, X2 // 660f5ad2
+ CVTPD2PS X11, X2 // 66410f5ad3
+ CVTPD2PS (BX), X11 // 66440f5a1b
+ CVTPD2PS (R11), X11 // 66450f5a1b
+ CVTPD2PS X2, X11 // 66440f5ada
+ CVTPD2PS X11, X11 // 66450f5adb
+ //TODO: CVTPI2PD (BX), X2 // 660f2a13
+ //TODO: CVTPI2PD (R11), X2 // 66410f2a13
+ //TODO: CVTPI2PD M2, X2 // 660f2ad2
+ //TODO: CVTPI2PD M3, X2 // 660f2ad3
+ //TODO: CVTPI2PD (BX), X11 // 66440f2a1b
+ //TODO: CVTPI2PD (R11), X11 // 66450f2a1b
+ //TODO: CVTPI2PD M2, X11 // 66440f2ada
+ //TODO: CVTPI2PD M3, X11 // 66440f2adb
+ //TODO: CVTPI2PS (BX), X2 // 0f2a13
+ //TODO: CVTPI2PS (R11), X2 // 410f2a13
+ //TODO: CVTPI2PS M2, X2 // 0f2ad2
+ //TODO: CVTPI2PS M3, X2 // 0f2ad3
+ //TODO: CVTPI2PS (BX), X11 // 440f2a1b
+ //TODO: CVTPI2PS (R11), X11 // 450f2a1b
+ //TODO: CVTPI2PS M2, X11 // 440f2ada
+ //TODO: CVTPI2PS M3, X11 // 440f2adb
+ CVTPS2PL (BX), X2 // 660f5b13
+ CVTPS2PL (R11), X2 // 66410f5b13
+ CVTPS2PL X2, X2 // 660f5bd2
+ CVTPS2PL X11, X2 // 66410f5bd3
+ CVTPS2PL (BX), X11 // 66440f5b1b
+ CVTPS2PL (R11), X11 // 66450f5b1b
+ CVTPS2PL X2, X11 // 66440f5bda
+ CVTPS2PL X11, X11 // 66450f5bdb
+ CVTPS2PD (BX), X2 // 0f5a13
+ CVTPS2PD (R11), X2 // 410f5a13
+ CVTPS2PD X2, X2 // 0f5ad2
+ CVTPS2PD X11, X2 // 410f5ad3
+ CVTPS2PD (BX), X11 // 440f5a1b
+ CVTPS2PD (R11), X11 // 450f5a1b
+ CVTPS2PD X2, X11 // 440f5ada
+ CVTPS2PD X11, X11 // 450f5adb
+ //TODO: CVTPS2PI (BX), M2 // 0f2d13
+ //TODO: CVTPS2PI (R11), M2 // 410f2d13
+ //TODO: CVTPS2PI X2, M2 // 0f2dd2
+ //TODO: CVTPS2PI X11, M2 // 410f2dd3
+ //TODO: CVTPS2PI (BX), M3 // 0f2d1b
+ //TODO: CVTPS2PI (R11), M3 // 410f2d1b
+ //TODO: CVTPS2PI X2, M3 // 0f2dda
+ //TODO: CVTPS2PI X11, M3 // 410f2ddb
+ CVTSD2SL (BX), DX // f20f2d13 or f2480f2d13
+ CVTSD2SL (R11), DX // f2410f2d13 or f2490f2d13
+ CVTSD2SL X2, DX // f20f2dd2 or f2480f2dd2
+ CVTSD2SL X11, DX // f2410f2dd3 or f2490f2dd3
+ CVTSD2SL (BX), R11 // f2440f2d1b or f24c0f2d1b
+ CVTSD2SL (R11), R11 // f2450f2d1b or f24d0f2d1b
+ CVTSD2SL X2, R11 // f2440f2dda or f24c0f2dda
+ CVTSD2SL X11, R11 // f2450f2ddb or f24d0f2ddb
+ CVTSD2SS (BX), X2 // f20f5a13
+ CVTSD2SS (R11), X2 // f2410f5a13
+ CVTSD2SS X2, X2 // f20f5ad2
+ CVTSD2SS X11, X2 // f2410f5ad3
+ CVTSD2SS (BX), X11 // f2440f5a1b
+ CVTSD2SS (R11), X11 // f2450f5a1b
+ CVTSD2SS X2, X11 // f2440f5ada
+ CVTSD2SS X11, X11 // f2450f5adb
+ CVTSL2SD (BX), X2 // f20f2a13
+ CVTSL2SD (R11), X2 // f2410f2a13
+ CVTSL2SD DX, X2 // f20f2ad2
+ CVTSL2SD R11, X2 // f2410f2ad3
+ CVTSL2SD (BX), X11 // f2440f2a1b
+ CVTSL2SD (R11), X11 // f2450f2a1b
+ CVTSL2SD DX, X11 // f2440f2ada
+ CVTSL2SD R11, X11 // f2450f2adb
+ CVTSQ2SD (BX), X2 // f2480f2a13
+ CVTSQ2SD (R11), X2 // f2490f2a13
+ CVTSQ2SD DX, X2 // f2480f2ad2
+ CVTSQ2SD R11, X2 // f2490f2ad3
+ CVTSQ2SD (BX), X11 // f24c0f2a1b
+ CVTSQ2SD (R11), X11 // f24d0f2a1b
+ CVTSQ2SD DX, X11 // f24c0f2ada
+ CVTSQ2SD R11, X11 // f24d0f2adb
+ CVTSL2SS (BX), X2 // f30f2a13
+ CVTSL2SS (R11), X2 // f3410f2a13
+ CVTSL2SS DX, X2 // f30f2ad2
+ CVTSL2SS R11, X2 // f3410f2ad3
+ CVTSL2SS (BX), X11 // f3440f2a1b
+ CVTSL2SS (R11), X11 // f3450f2a1b
+ CVTSL2SS DX, X11 // f3440f2ada
+ CVTSL2SS R11, X11 // f3450f2adb
+ CVTSQ2SS (BX), X2 // f3480f2a13
+ CVTSQ2SS (R11), X2 // f3490f2a13
+ CVTSQ2SS DX, X2 // f3480f2ad2
+ CVTSQ2SS R11, X2 // f3490f2ad3
+ CVTSQ2SS (BX), X11 // f34c0f2a1b
+ CVTSQ2SS (R11), X11 // f34d0f2a1b
+ CVTSQ2SS DX, X11 // f34c0f2ada
+ CVTSQ2SS R11, X11 // f34d0f2adb
+ CVTSS2SD (BX), X2 // f30f5a13
+ CVTSS2SD (R11), X2 // f3410f5a13
+ CVTSS2SD X2, X2 // f30f5ad2
+ CVTSS2SD X11, X2 // f3410f5ad3
+ CVTSS2SD (BX), X11 // f3440f5a1b
+ CVTSS2SD (R11), X11 // f3450f5a1b
+ CVTSS2SD X2, X11 // f3440f5ada
+ CVTSS2SD X11, X11 // f3450f5adb
+ CVTSS2SL (BX), DX // f30f2d13 or f3480f2d13
+ CVTSS2SL (R11), DX // f3410f2d13 or f3490f2d13
+ CVTSS2SL X2, DX // f30f2dd2 or f3480f2dd2
+ CVTSS2SL X11, DX // f3410f2dd3 or f3490f2dd3
+ CVTSS2SL (BX), R11 // f3440f2d1b or f34c0f2d1b
+ CVTSS2SL (R11), R11 // f3450f2d1b or f34d0f2d1b
+ CVTSS2SL X2, R11 // f3440f2dda or f34c0f2dda
+ CVTSS2SL X11, R11 // f3450f2ddb or f34d0f2ddb
+ CVTTPD2PL (BX), X2 // 660fe613
+ CVTTPD2PL (R11), X2 // 66410fe613
+ CVTTPD2PL X2, X2 // 660fe6d2
+ CVTTPD2PL X11, X2 // 66410fe6d3
+ CVTTPD2PL (BX), X11 // 66440fe61b
+ CVTTPD2PL (R11), X11 // 66450fe61b
+ CVTTPD2PL X2, X11 // 66440fe6da
+ CVTTPD2PL X11, X11 // 66450fe6db
+ //TODO: CVTTPD2PI (BX), M2 // 660f2c13
+ //TODO: CVTTPD2PI (R11), M2 // 66410f2c13
+ //TODO: CVTTPD2PI X2, M2 // 660f2cd2
+ //TODO: CVTTPD2PI X11, M2 // 66410f2cd3
+ //TODO: CVTTPD2PI (BX), M3 // 660f2c1b
+ //TODO: CVTTPD2PI (R11), M3 // 66410f2c1b
+ //TODO: CVTTPD2PI X2, M3 // 660f2cda
+ //TODO: CVTTPD2PI X11, M3 // 66410f2cdb
+ CVTTPS2PL (BX), X2 // f30f5b13
+ CVTTPS2PL (R11), X2 // f3410f5b13
+ CVTTPS2PL X2, X2 // f30f5bd2
+ CVTTPS2PL X11, X2 // f3410f5bd3
+ CVTTPS2PL (BX), X11 // f3440f5b1b
+ CVTTPS2PL (R11), X11 // f3450f5b1b
+ CVTTPS2PL X2, X11 // f3440f5bda
+ CVTTPS2PL X11, X11 // f3450f5bdb
+ //TODO: CVTTPS2PI (BX), M2 // 0f2c13
+ //TODO: CVTTPS2PI (R11), M2 // 410f2c13
+ //TODO: CVTTPS2PI X2, M2 // 0f2cd2
+ //TODO: CVTTPS2PI X11, M2 // 410f2cd3
+ //TODO: CVTTPS2PI (BX), M3 // 0f2c1b
+ //TODO: CVTTPS2PI (R11), M3 // 410f2c1b
+ //TODO: CVTTPS2PI X2, M3 // 0f2cda
+ //TODO: CVTTPS2PI X11, M3 // 410f2cdb
+ CVTTSD2SL (BX), DX // f20f2c13 or f2480f2c13
+ CVTTSD2SL (R11), DX // f2410f2c13 or f2490f2c13
+ CVTTSD2SL X2, DX // f20f2cd2 or f2480f2cd2
+ CVTTSD2SL X11, DX // f2410f2cd3 or f2490f2cd3
+ CVTTSD2SL (BX), R11 // f2440f2c1b or f24c0f2c1b
+ CVTTSD2SL (R11), R11 // f2450f2c1b or f24d0f2c1b
+ CVTTSD2SL X2, R11 // f2440f2cda or f24c0f2cda
+ CVTTSD2SL X11, R11 // f2450f2cdb or f24d0f2cdb
+ CVTTSS2SL (BX), DX // f30f2c13 or f3480f2c13
+ CVTTSS2SL (R11), DX // f3410f2c13 or f3490f2c13
+ CVTTSS2SL X2, DX // f30f2cd2 or f3480f2cd2
+ CVTTSS2SL X11, DX // f3410f2cd3 or f3490f2cd3
+ CVTTSS2SL (BX), R11 // f3440f2c1b or f34c0f2c1b
+ CVTTSS2SL (R11), R11 // f3450f2c1b or f34d0f2c1b
+ CVTTSS2SL X2, R11 // f3440f2cda or f34c0f2cda
+ CVTTSS2SL X11, R11 // f3450f2cdb or f34d0f2cdb
+ CWD // 6699
+ CWDE // 98
+ DECW (BX) // 66ff0b
+ DECW (R11) // 6641ff0b
+ DECW DX // 66ffca
+ DECW R11 // 6641ffcb
+ DECL (BX) // ff0b
+ DECL (R11) // 41ff0b
+ DECL DX // ffca
+ DECL R11 // 41ffcb
+ DECQ (BX) // 48ff0b
+ DECQ (R11) // 49ff0b
+ DECQ DX // 48ffca
+ DECQ R11 // 49ffcb
+ DECB (BX) // fe0b
+ DECB (R11) // 41fe0b
+ DECB DL // feca
+ DECB R11 // 41fecb
+ DIVW (BX) // 66f733
+ DIVW (R11) // 6641f733
+ DIVW DX // 66f7f2
+ DIVW R11 // 6641f7f3
+ DIVL (BX) // f733
+ DIVL (R11) // 41f733
+ DIVL DX // f7f2
+ DIVL R11 // 41f7f3
+ DIVQ (BX) // 48f733
+ DIVQ (R11) // 49f733
+ DIVQ DX // 48f7f2
+ DIVQ R11 // 49f7f3
+ DIVB (BX) // f633
+ DIVB (R11) // 41f633
+ DIVB DL // f6f2
+ DIVB R11 // 41f6f3
+ DIVPD (BX), X2 // 660f5e13
+ DIVPD (R11), X2 // 66410f5e13
+ DIVPD X2, X2 // 660f5ed2
+ DIVPD X11, X2 // 66410f5ed3
+ DIVPD (BX), X11 // 66440f5e1b
+ DIVPD (R11), X11 // 66450f5e1b
+ DIVPD X2, X11 // 66440f5eda
+ DIVPD X11, X11 // 66450f5edb
+ DIVPS (BX), X2 // 0f5e13
+ DIVPS (R11), X2 // 410f5e13
+ DIVPS X2, X2 // 0f5ed2
+ DIVPS X11, X2 // 410f5ed3
+ DIVPS (BX), X11 // 440f5e1b
+ DIVPS (R11), X11 // 450f5e1b
+ DIVPS X2, X11 // 440f5eda
+ DIVPS X11, X11 // 450f5edb
+ DIVSD (BX), X2 // f20f5e13
+ DIVSD (R11), X2 // f2410f5e13
+ DIVSD X2, X2 // f20f5ed2
+ DIVSD X11, X2 // f2410f5ed3
+ DIVSD (BX), X11 // f2440f5e1b
+ DIVSD (R11), X11 // f2450f5e1b
+ DIVSD X2, X11 // f2440f5eda
+ DIVSD X11, X11 // f2450f5edb
+ DIVSS (BX), X2 // f30f5e13
+ DIVSS (R11), X2 // f3410f5e13
+ DIVSS X2, X2 // f30f5ed2
+ DIVSS X11, X2 // f3410f5ed3
+ DIVSS (BX), X11 // f3440f5e1b
+ DIVSS (R11), X11 // f3450f5e1b
+ DIVSS X2, X11 // f3440f5eda
+ DIVSS X11, X11 // f3450f5edb
+ DPPD $7, (BX), X2 // 660f3a411307
+ DPPD $7, (R11), X2 // 66410f3a411307
+ DPPD $7, X2, X2 // 660f3a41d207
+ DPPD $7, X11, X2 // 66410f3a41d307
+ DPPD $7, (BX), X11 // 66440f3a411b07
+ DPPD $7, (R11), X11 // 66450f3a411b07
+ DPPD $7, X2, X11 // 66440f3a41da07
+ DPPD $7, X11, X11 // 66450f3a41db07
+ DPPS $7, (BX), X2 // 660f3a401307
+ DPPS $7, (R11), X2 // 66410f3a401307
+ DPPS $7, X2, X2 // 660f3a40d207
+ DPPS $7, X11, X2 // 66410f3a40d307
+ DPPS $7, (BX), X11 // 66440f3a401b07
+ DPPS $7, (R11), X11 // 66450f3a401b07
+ DPPS $7, X2, X11 // 66440f3a40da07
+ DPPS $7, X11, X11 // 66450f3a40db07
+ EMMS // 0f77
+ //TODO: ENTERQ $0x12, $0xf123 // c823f112
+ EXTRACTPS $0, X2, (BX) // 660f3a171300
+ EXTRACTPS $1, X11, (BX) // 66440f3a171b01
+ EXTRACTPS $2, X2, (R11) // 66410f3a171302
+ EXTRACTPS $3, X11, (R11) // 66450f3a171b03
+ EXTRACTPS $3, X2, DX // 660f3a17d203
+ EXTRACTPS $2, X11, DX // 66440f3a17da02
+ EXTRACTPS $1, X2, R11 // 66410f3a17d301
+ EXTRACTPS $0, X11, R11 // 66450f3a17db00
+ F2XM1 // d9f0
+ FABS // d9e1
+ FADDD F2, F0 // d8c2
+ FADDD F3, F0 // d8c3
+ FADDD F0, F2 // dcc2
+ FADDD F0, F3 // dcc3
+ FADDD (BX), F0 // d803 or dc03
+ FADDD (R11), F0 // 41d803 or 41dc03
+ FADDDP F0, F2 // dec2
+ FADDDP F0, F3 // dec3
+ FBLD (BX) // df23
+ FBLD (R11) // 41df23
+ FBSTP (BX) // df33
+ FBSTP (R11) // 41df33
+ FCHS // d9e0
+ FCMOVB F2, F0 // dac2
+ FCMOVB F3, F0 // dac3
+ FCMOVBE F2, F0 // dad2
+ FCMOVBE F3, F0 // dad3
+ FCMOVE F2, F0 // daca
+ FCMOVE F3, F0 // dacb
+ FCMOVNB F2, F0 // dbc2
+ FCMOVNB F3, F0 // dbc3
+ FCMOVNBE F2, F0 // dbd2
+ FCMOVNBE F3, F0 // dbd3
+ FCMOVNE F2, F0 // dbca
+ FCMOVNE F3, F0 // dbcb
+ FCMOVNU F2, F0 // dbda
+ FCMOVNU F3, F0 // dbdb
+ FCMOVU F2, F0 // dada
+ FCMOVU F3, F0 // dadb
+ FCOMD F2, F0 // d8d2
+ FCOMD F3, F0 // d8d3
+ FCOMD (BX), F0 // d813 or dc13
+ FCOMD (R11), F0 // 41d813 or 41dc13
+ FCOMI F2, F0 // dbf2
+ FCOMI F3, F0 // dbf3
+ FCOMIP F2, F0 // dff2
+ FCOMIP F3, F0 // dff3
+ //TODO: FCOMP F2 // d8da
+ //TODO: FCOMP F3 // d8db
+ //TODO: FCOMFP (BX) // d81b
+ //TODO: FCOMFP (R11) // 41d81b
+ //TODO: FCOMPL (BX) // dc1b
+ //TODO: FCOMPL (R11) // 41dc1b
+ //TODO: FCOMPP // ded9
+ FCOS // d9ff
+ FDECSTP // d9f6
+ FDIVD F2, F0 // d8f2
+ FDIVD F3, F0 // d8f3
+ FDIVD F0, F2 // dcfa or dcf2
+ FDIVD F0, F3 // dcfb or dcf3
+ FDIVD (BX), F0 // d833 or dc33
+ FDIVD (R11), F0 // 41d833 or 41dc33
+ //TODO: FDIVRP F0, F2 // defa
+ //TODO: FDIVRP F0, F3 // defb
+ //TODO: FDIVR F2, F0 // d8fa
+ //TODO: FDIVR F3, F0 // d8fb
+ //TODO: FDIVFR (BX) // d83b
+ //TODO: FDIVFR (R11) // 41d83b
+ //TODO: FDIVRL (BX) // dc3b
+ //TODO: FDIVRL (R11) // 41dc3b
+ //TODO: FDIVP F0, F2 // def2
+ //TODO: FDIVP F0, F3 // def3
+ //TODO: FFREE F2 // ddc2
+ //TODO: FFREE F3 // ddc3
+ //TODO: FFREEP F2 // dfc2
+ //TODO: FFREEP F3 // dfc3
+ //TODO: FIADD (BX) // de03
+ //TODO: FIADD (R11) // 41de03
+ //TODO: FIADDL (BX) // da03
+ //TODO: FIADDL (R11) // 41da03
+ //TODO: FICOM (BX) // de13
+ //TODO: FICOM (R11) // 41de13
+ //TODO: FICOML (BX) // da13
+ //TODO: FICOML (R11) // 41da13
+ //TODO: FICOMP (BX) // de1b
+ //TODO: FICOMP (R11) // 41de1b
+ //TODO: FICOMPL (BX) // da1b
+ //TODO: FICOMPL (R11) // 41da1b
+ //TODO: FIDIV (BX) // de33
+ //TODO: FIDIV (R11) // 41de33
+ //TODO: FIDIVL (BX) // da33
+ //TODO: FIDIVL (R11) // 41da33
+ //TODO: FIDIVR (BX) // de3b
+ //TODO: FIDIVR (R11) // 41de3b
+ //TODO: FIDIVRL (BX) // da3b
+ //TODO: FIDIVRL (R11) // 41da3b
+ //TODO: FILD (BX) // df03
+ //TODO: FILD (R11) // 41df03
+ //TODO: FILDL (BX) // db03
+ //TODO: FILDL (R11) // 41db03
+ //TODO: FILDLL (BX) // df2b
+ //TODO: FILDLL (R11) // 41df2b
+ //TODO: FIMUL (BX) // de0b
+ //TODO: FIMUL (R11) // 41de0b
+ //TODO: FIMULL (BX) // da0b
+ //TODO: FIMULL (R11) // 41da0b
+ FINCSTP // d9f7
+ //TODO: FIST (BX) // df13
+ //TODO: FIST (R11) // 41df13
+ //TODO: FISTL (BX) // db13
+ //TODO: FISTL (R11) // 41db13
+ //TODO: FISTP (BX) // df1b
+ //TODO: FISTP (R11) // 41df1b
+ //TODO: FISTPL (BX) // db1b
+ //TODO: FISTPL (R11) // 41db1b
+ //TODO: FISTPLL (BX) // df3b
+ //TODO: FISTPLL (R11) // 41df3b
+ //TODO: FISTTP (BX) // df0b
+ //TODO: FISTTP (R11) // 41df0b
+ //TODO: FISTTPL (BX) // db0b
+ //TODO: FISTTPL (R11) // 41db0b
+ //TODO: FISTTPLL (BX) // dd0b
+ //TODO: FISTTPLL (R11) // 41dd0b
+ //TODO: FISUB (BX) // de23
+ //TODO: FISUB (R11) // 41de23
+ //TODO: FISUBL (BX) // da23
+ //TODO: FISUBL (R11) // 41da23
+ //TODO: FISUBR (BX) // de2b
+ //TODO: FISUBR (R11) // 41de2b
+ //TODO: FISUBRL (BX) // da2b
+ //TODO: FISUBRL (R11) // 41da2b
+ //TODO: FLD F2 // d9c2
+ //TODO: FLD F3 // d9c3
+ //TODO: FLDS (BX) // d903
+ //TODO: FLDS (R11) // 41d903
+ //TODO: FLDL (BX) // dd03
+ //TODO: FLDL (R11) // 41dd03
+ //TODO: FLDT (BX) // db2b
+ //TODO: FLDT (R11) // 41db2b
+ FLD1 // d9e8
+ FLDCW (BX) // d92b
+ FLDCW (R11) // 41d92b
+ //TODO: FLDENVL (BX) // d923
+ //TODO: FLDENVL (R11) // 41d923
+ FLDL2E // d9ea
+ FLDL2T // d9e9
+ FLDLG2 // d9ec
+ FLDPI // d9eb
+ //TODO: FMUL F2, F0 // d8ca
+ //TODO: FMUL F3, F0 // d8cb
+ //TODO: FMUL F0, F2 // dcca
+ //TODO: FMUL F0, F3 // dccb
+ //TODO: FMULS (BX) // d80b
+ //TODO: FMULS (R11) // 41d80b
+ //TODO: FMULL (BX) // dc0b
+ //TODO: FMULL (R11) // 41dc0b
+ //TODO: FMULP F0, F2 // deca
+ //TODO: FMULP F0, F3 // decb
+ //TODO: FNCLEX // dbe2
+ //TODO: FNINIT // dbe3
+ FNOP // d9d0
+ //TODO: FNSAVEL (BX) // dd33
+ //TODO: FNSAVEL (R11) // 41dd33
+ //TODO: FNSTCW (BX) // d93b
+ //TODO: FNSTCW (R11) // 41d93b
+ //TODO: FNSTENVL (BX) // d933
+ //TODO: FNSTENVL (R11) // 41d933
+ //TODO: FNSTSW AX // dfe0
+ //TODO: FNSTSW (BX) // dd3b
+ //TODO: FNSTSW (R11) // 41dd3b
+ FPATAN // d9f3
+ FPREM // d9f8
+ FPREM1 // d9f5
+ FPTAN // d9f2
+ FRNDINT // d9fc
+ //TODO: FRSTORL (BX) // dd23
+ //TODO: FRSTORL (R11) // 41dd23
+ FSCALE // d9fd
+ FSIN // d9fe
+ FSINCOS // d9fb
+ FSQRT // d9fa
+ //TODO: FST F2 // ddd2
+ //TODO: FST F3 // ddd3
+ //TODO: FSTS (BX) // d913
+ //TODO: FSTS (R11) // 41d913
+ //TODO: FSTL (BX) // dd13
+ //TODO: FSTL (R11) // 41dd13
+ //TODO: FSTP F2 // ddda
+ //TODO: FSTP F3 // dddb
+ //TODO: FSTPS (BX) // d91b
+ //TODO: FSTPS (R11) // 41d91b
+ //TODO: FSTPL (BX) // dd1b
+ //TODO: FSTPL (R11) // 41dd1b
+ //TODO: FSTPT (BX) // db3b
+ //TODO: FSTPT (R11) // 41db3b
+ //TODO: FSUB F2, F0 // d8e2
+ //TODO: FSUB F3, F0 // d8e3
+ //TODO: FSUBR F0, F2 // dcea
+ //TODO: FSUBR F0, F3 // dceb
+ //TODO: FSUBS (BX) // d823
+ //TODO: FSUBS (R11) // 41d823
+ //TODO: FSUBL (BX) // dc23
+ //TODO: FSUBL (R11) // 41dc23
+ //TODO: FSUBRP F0, F2 // deea
+ //TODO: FSUBRP F0, F3 // deeb
+ //TODO: FSUBR F2, F0 // d8ea
+ //TODO: FSUBR F3, F0 // d8eb
+ //TODO: FSUB F0, F2 // dce2
+ //TODO: FSUB F0, F3 // dce3
+ //TODO: FSUBRS (BX) // d82b
+ //TODO: FSUBRS (R11) // 41d82b
+ //TODO: FSUBRL (BX) // dc2b
+ //TODO: FSUBRL (R11) // 41dc2b
+ //TODO: FSUBP F0, F2 // dee2
+ //TODO: FSUBP F0, F3 // dee3
+ FTST // d9e4
+ //TODO: FUCOM F2 // dde2
+ //TODO: FUCOM F3 // dde3
+ //TODO: FUCOMI F2, F0 // dbea
+ //TODO: FUCOMI F3, F0 // dbeb
+ //TODO: FUCOMIP F2, F0 // dfea
+ //TODO: FUCOMIP F3, F0 // dfeb
+ //TODO: FUCOMP F2 // ddea
+ //TODO: FUCOMP F3 // ddeb
+ //TODO: FUCOMPP // dae9
+ //TODO: FWAIT // 9b
+ FXAM // d9e5
+ //TODO: FXCH F2 // d9ca
+ //TODO: FXCH F3 // d9cb
+ FXRSTOR (BX) // 0fae0b
+ FXRSTOR (R11) // 410fae0b
+ FXRSTOR64 (BX) // 480fae0b
+ FXRSTOR64 (R11) // 490fae0b
+ FXSAVE (BX) // 0fae03
+ FXSAVE (R11) // 410fae03
+ FXSAVE64 (BX) // 480fae03
+ FXSAVE64 (R11) // 490fae03
+ FXTRACT // d9f4
+ FYL2X // d9f1
+ FYL2XP1 // d9f9
+ HADDPD (BX), X2 // 660f7c13
+ HADDPD (R11), X2 // 66410f7c13
+ HADDPD X2, X2 // 660f7cd2
+ HADDPD X11, X2 // 66410f7cd3
+ HADDPD (BX), X11 // 66440f7c1b
+ HADDPD (R11), X11 // 66450f7c1b
+ HADDPD X2, X11 // 66440f7cda
+ HADDPD X11, X11 // 66450f7cdb
+ HADDPS (BX), X2 // f20f7c13
+ HADDPS (R11), X2 // f2410f7c13
+ HADDPS X2, X2 // f20f7cd2
+ HADDPS X11, X2 // f2410f7cd3
+ HADDPS (BX), X11 // f2440f7c1b
+ HADDPS (R11), X11 // f2450f7c1b
+ HADDPS X2, X11 // f2440f7cda
+ HADDPS X11, X11 // f2450f7cdb
+ HLT // f4
+ HSUBPD (BX), X2 // 660f7d13
+ HSUBPD (R11), X2 // 66410f7d13
+ HSUBPD X2, X2 // 660f7dd2
+ HSUBPD X11, X2 // 66410f7dd3
+ HSUBPD (BX), X11 // 66440f7d1b
+ HSUBPD (R11), X11 // 66450f7d1b
+ HSUBPD X2, X11 // 66440f7dda
+ HSUBPD X11, X11 // 66450f7ddb
+ HSUBPS (BX), X2 // f20f7d13
+ HSUBPS (R11), X2 // f2410f7d13
+ HSUBPS X2, X2 // f20f7dd2
+ HSUBPS X11, X2 // f2410f7dd3
+ HSUBPS (BX), X11 // f2440f7d1b
+ HSUBPS (R11), X11 // f2450f7d1b
+ HSUBPS X2, X11 // f2440f7dda
+ HSUBPS X11, X11 // f2450f7ddb
+ ICEBP // f1
+ IDIVW (BX) // 66f73b
+ IDIVW (R11) // 6641f73b
+ IDIVW DX // 66f7fa
+ IDIVW R11 // 6641f7fb
+ IDIVL (BX) // f73b
+ IDIVL (R11) // 41f73b
+ IDIVL DX // f7fa
+ IDIVL R11 // 41f7fb
+ IDIVQ (BX) // 48f73b
+ IDIVQ (R11) // 49f73b
+ IDIVQ DX // 48f7fa
+ IDIVQ R11 // 49f7fb
+ IDIVB (BX) // f63b
+ IDIVB (R11) // 41f63b
+ IDIVB DL // f6fa
+ IDIVB R11 // 41f6fb
+ IMULW (BX) // 66f72b
+ IMULW (R11) // 6641f72b
+ IMULW DX // 66f7ea
+ IMULW R11 // 6641f7eb
+ IMULL (BX) // f72b
+ IMULL (R11) // 41f72b
+ IMULL DX // f7ea
+ IMULL R11 // 41f7eb
+ IMULQ (BX) // 48f72b
+ IMULQ (R11) // 49f72b
+ IMULQ DX // 48f7ea
+ IMULQ R11 // 49f7eb
+ IMULB (BX) // f62b
+ IMULB (R11) // 41f62b
+ IMULB DL // f6ea
+ IMULB R11 // 41f6eb
+ IMULW (BX), DX // 660faf13
+ IMULW (R11), DX // 66410faf13
+ IMULW DX, DX // 660fafd2
+ IMULW R11, DX // 66410fafd3
+ IMULW (BX), R11 // 66440faf1b
+ IMULW (R11), R11 // 66450faf1b
+ IMULW DX, R11 // 66440fafda
+ IMULW R11, R11 // 66450fafdb
+ IMUL3W $61731, (BX), DX // 66691323f1
+ IMUL3W $61731, (R11), DX // 6641691323f1
+ IMUL3W $61731, DX, DX // 6669d223f1
+ IMUL3W $61731, R11, DX // 664169d323f1
+ IMUL3W $61731, (BX), R11 // 6644691b23f1
+ IMUL3W $61731, (R11), R11 // 6645691b23f1
+ IMUL3W $61731, DX, R11 // 664469da23f1
+ IMUL3W $61731, R11, R11 // 664569db23f1
+ IMUL3W $7, (BX), DX // 666b1307
+ IMUL3W $7, (R11), DX // 66416b1307
+ IMUL3W $7, DX, DX // 666bd207
+ IMUL3W $7, R11, DX // 66416bd307
+ IMUL3W $7, (BX), R11 // 66446b1b07
+ IMUL3W $7, (R11), R11 // 66456b1b07
+ IMUL3W $7, DX, R11 // 66446bda07
+ IMUL3W $7, R11, R11 // 66456bdb07
+ IMULL (BX), DX // 0faf13
+ IMULL (R11), DX // 410faf13
+ IMULL DX, DX // 0fafd2
+ IMULL R11, DX // 410fafd3
+ IMULL (BX), R11 // 440faf1b
+ IMULL (R11), R11 // 450faf1b
+ IMULL DX, R11 // 440fafda
+ IMULL R11, R11 // 450fafdb
+ IMUL3L $4045620583, (BX), DX // 6913674523f1
+ IMUL3L $4045620583, (R11), DX // 416913674523f1
+ IMUL3L $4045620583, DX, DX // 69d2674523f1
+ IMUL3L $4045620583, R11, DX // 4169d3674523f1
+ IMUL3L $4045620583, (BX), R11 // 44691b674523f1
+ IMUL3L $4045620583, (R11), R11 // 45691b674523f1
+ IMUL3L $4045620583, DX, R11 // 4469da674523f1
+ IMUL3L $4045620583, R11, R11 // 4569db674523f1
+ IMUL3L $7, (BX), DX // 6b1307
+ IMUL3L $7, (R11), DX // 416b1307
+ IMUL3L $7, DX, DX // 6bd207
+ IMUL3L $7, R11, DX // 416bd307
+ IMUL3L $7, (BX), R11 // 446b1b07
+ IMUL3L $7, (R11), R11 // 456b1b07
+ IMUL3L $7, DX, R11 // 446bda07
+ IMUL3L $7, R11, R11 // 456bdb07
+ IMULQ (BX), DX // 480faf13
+ IMULQ (R11), DX // 490faf13
+ IMULQ DX, DX // 480fafd2
+ IMULQ R11, DX // 490fafd3
+ IMULQ (BX), R11 // 4c0faf1b
+ IMULQ (R11), R11 // 4d0faf1b
+ IMULQ DX, R11 // 4c0fafda
+ IMULQ R11, R11 // 4d0fafdb
+ IMUL3Q $-249346713, (BX), DX // 486913674523f1
+ IMUL3Q $-249346713, (R11), DX // 496913674523f1
+ IMUL3Q $-249346713, DX, DX // 4869d2674523f1
+ IMUL3Q $-249346713, R11, DX // 4969d3674523f1
+ IMUL3Q $-249346713, (BX), R11 // 4c691b674523f1
+ IMUL3Q $-249346713, (R11), R11 // 4d691b674523f1
+ IMUL3Q $-249346713, DX, R11 // 4c69da674523f1
+ IMUL3Q $-249346713, R11, R11 // 4d69db674523f1
+ IMUL3Q $7, (BX), DX // 486b1307
+ IMUL3Q $7, (R11), DX // 496b1307
+ IMUL3Q $7, DX, DX // 486bd207
+ IMUL3Q $7, R11, DX // 496bd307
+ IMUL3Q $7, (BX), R11 // 4c6b1b07
+ IMUL3Q $7, (R11), R11 // 4d6b1b07
+ IMUL3Q $7, DX, R11 // 4c6bda07
+ IMUL3Q $7, R11, R11 // 4d6bdb07
+ //TODO: INB DX, AL // ec
+ //TODO: INB $7, AL // e407
+ //TODO: INW DX, AX // 66ed
+ //TODO: INW $7, AX // 66e507
+ //TODO: INL DX, AX // ed
+ //TODO: INL $7, AX // e507
+ INCW (BX) // 66ff03
+ INCW (R11) // 6641ff03
+ INCW DX // 66ffc2
+ INCW R11 // 6641ffc3
+ INCL (BX) // ff03
+ INCL (R11) // 41ff03
+ INCL DX // ffc2
+ INCL R11 // 41ffc3
+ INCQ (BX) // 48ff03
+ INCQ (R11) // 49ff03
+ INCQ DX // 48ffc2
+ INCQ R11 // 49ffc3
+ INCB (BX) // fe03
+ INCB (R11) // 41fe03
+ INCB DL // fec2
+ INCB R11 // 41fec3
+ INSB // 6c
+ INSL // 6d
+ INSERTPS $7, (BX), X2 // 660f3a211307
+ INSERTPS $7, (R11), X2 // 66410f3a211307
+ INSERTPS $7, X2, X2 // 660f3a21d207
+ INSERTPS $7, X11, X2 // 66410f3a21d307
+ INSERTPS $7, (BX), X11 // 66440f3a211b07
+ INSERTPS $7, (R11), X11 // 66450f3a211b07
+ INSERTPS $7, X2, X11 // 66440f3a21da07
+ INSERTPS $7, X11, X11 // 66450f3a21db07
+ INSW // 666d
+ //TODO: INT $3 // cc
+ INT $7 // cd07
+ INVD // 0f08
+ INVLPG (BX) // 0f013b
+ INVLPG (R11) // 410f013b
+ INVPCID (BX), DX // 660f388213
+ INVPCID (R11), DX // 66410f388213
+ INVPCID (BX), R11 // 66440f38821b
+ INVPCID (R11), R11 // 66450f38821b
+ JCS 2(PC)
+ IRETW // 66cf
+ JCS 2(PC)
+ IRETL // cf
+ JCS 2(PC)
+ IRETQ // 48cf
+ //TODO: JA .+$0x11223344 // 480f8744332211 or 0f8744332211
+ //TODO: JA .+$0x11 // 7711
+ //TODO: JAE .+$0x11223344 // 0f8344332211 or 480f8344332211
+ //TODO: JAE .+$0x11 // 7311
+ //TODO: JB .+$0x11223344 // 480f8244332211 or 0f8244332211
+ //TODO: JB .+$0x11 // 7211
+ //TODO: JBE .+$0x11223344 // 0f8644332211 or 480f8644332211
+ //TODO: JBE .+$0x11 // 7611
+ //TODO: JE .+$0x11223344 // 480f8444332211 or 0f8444332211
+ //TODO: JE .+$0x11 // 7411
+ //TODO: JECXZ .+$0x11 // e311
+ //TODO: JG .+$0x11223344 // 0f8f44332211 or 480f8f44332211
+ //TODO: JG .+$0x11 // 7f11
+ //TODO: JGE .+$0x11223344 // 480f8d44332211 or 0f8d44332211
+ //TODO: JGE .+$0x11 // 7d11
+ //TODO: JL .+$0x11223344 // 0f8c44332211 or 480f8c44332211
+ //TODO: JL .+$0x11 // 7c11
+ //TODO: JLE .+$0x11223344 // 0f8e44332211 or 480f8e44332211
+ //TODO: JLE .+$0x11 // 7e11
+ JCS 2(PC)
+ //TODO: JMPQ* (BX) // ff23
+ JCS 2(PC)
+ //TODO: JMPQ* (R11) // 41ff23
+ JCS 2(PC)
+ //TODO: JMPQ* DX // ffe2
+ JCS 2(PC)
+ //TODO: JMPQ* R11 // 41ffe3
+ JCS 2(PC)
+ //TODO: JMP .+$0x11223344 // 48e944332211 or e944332211
+ JCS 2(PC)
+ JCS 2(PC)
+ //TODO: JMP .+$0x11 // eb11
+ JCS 2(PC)
+ //TODO: LJMPW* (BX) // 66ff2b
+ JCS 2(PC)
+ //TODO: LJMPW* (R11) // 6641ff2b
+ JCS 2(PC)
+ //TODO: LJMPL* (BX) // ff2b
+ JCS 2(PC)
+ //TODO: LJMPL* (R11) // 41ff2b
+ JCS 2(PC)
+ //TODO: LJMPQ* (BX) // 48ff2b
+ JCS 2(PC)
+ //TODO: LJMPQ* (R11) // 49ff2b
+ //TODO: JNE .+$0x11223344 // 480f8544332211 or 0f8544332211
+ //TODO: JNE .+$0x11 // 7511
+ //TODO: JNO .+$0x11223344 // 480f8144332211 or 0f8144332211
+ //TODO: JNO .+$0x11 // 7111
+ //TODO: JNP .+$0x11223344 // 480f8b44332211 or 0f8b44332211
+ //TODO: JNP .+$0x11 // 7b11
+ //TODO: JNS .+$0x11223344 // 0f8944332211 or 480f8944332211
+ //TODO: JNS .+$0x11 // 7911
+ //TODO: JO .+$0x11223344 // 0f8044332211 or 480f8044332211
+ //TODO: JO .+$0x11 // 7011
+ //TODO: JP .+$0x11223344 // 480f8a44332211 or 0f8a44332211
+ //TODO: JP .+$0x11 // 7a11
+ //TODO: JRCXZ .+$0x11 // e311
+ //TODO: JS .+$0x11223344 // 480f8844332211 or 0f8844332211
+ //TODO: JS .+$0x11 // 7811
+ LAHF // 9f
+ LARW (BX), DX // 660f0213
+ LARW (R11), DX // 66410f0213
+ LARW DX, DX // 660f02d2
+ LARW R11, DX // 66410f02d3
+ LARW (BX), R11 // 66440f021b
+ LARW (R11), R11 // 66450f021b
+ LARW DX, R11 // 66440f02da
+ LARW R11, R11 // 66450f02db
+ LARL (BX), DX // 0f0213
+ LARL (R11), DX // 410f0213
+ LARL DX, DX // 0f02d2
+ LARL R11, DX // 410f02d3
+ LARL (BX), R11 // 440f021b
+ LARL (R11), R11 // 450f021b
+ LARL DX, R11 // 440f02da
+ LARL R11, R11 // 450f02db
+ LARQ (BX), DX // 480f0213
+ LARQ (R11), DX // 490f0213
+ LARQ DX, DX // 480f02d2
+ LARQ R11, DX // 490f02d3
+ LARQ (BX), R11 // 4c0f021b
+ LARQ (R11), R11 // 4d0f021b
+ LARQ DX, R11 // 4c0f02da
+ LARQ R11, R11 // 4d0f02db
+ LDDQU (BX), X2 // f20ff013
+ LDDQU (R11), X2 // f2410ff013
+ LDDQU (BX), X11 // f2440ff01b
+ LDDQU (R11), X11 // f2450ff01b
+ LDMXCSR (BX) // 0fae13
+ LDMXCSR (R11) // 410fae13
+ LEAW (BX), DX // 668d13
+ LEAW (R11), DX // 66418d13
+ LEAW (BX), R11 // 66448d1b
+ LEAW (R11), R11 // 66458d1b
+ LEAL (BX), DX // 8d13
+ LEAL (R11), DX // 418d13
+ LEAL (BX), R11 // 448d1b
+ LEAL (R11), R11 // 458d1b
+ LEAQ (BX), DX // 488d13
+ LEAQ (R11), DX // 498d13
+ LEAQ (BX), R11 // 4c8d1b
+ LEAQ (R11), R11 // 4d8d1b
+ LEAVEQ // 66c9 or c9
+ LFENCE // 0faee8
+ LFSW (BX), DX // 660fb413
+ LFSW (R11), DX // 66410fb413
+ LFSW (BX), R11 // 66440fb41b
+ LFSW (R11), R11 // 66450fb41b
+ LFSL (BX), DX // 0fb413
+ LFSL (R11), DX // 410fb413
+ LFSL (BX), R11 // 440fb41b
+ LFSL (R11), R11 // 450fb41b
+ LFSQ (BX), DX // 480fb413
+ LFSQ (R11), DX // 490fb413
+ LFSQ (BX), R11 // 4c0fb41b
+ LFSQ (R11), R11 // 4d0fb41b
+ LGDT (BX) // 0f0113
+ LGDT (R11) // 410f0113
+ LGSW (BX), DX // 660fb513
+ LGSW (R11), DX // 66410fb513
+ LGSW (BX), R11 // 66440fb51b
+ LGSW (R11), R11 // 66450fb51b
+ LGSL (BX), DX // 0fb513
+ LGSL (R11), DX // 410fb513
+ LGSL (BX), R11 // 440fb51b
+ LGSL (R11), R11 // 450fb51b
+ LGSQ (BX), DX // 480fb513
+ LGSQ (R11), DX // 490fb513
+ LGSQ (BX), R11 // 4c0fb51b
+ LGSQ (R11), R11 // 4d0fb51b
+ LIDT (BX) // 0f011b
+ LIDT (R11) // 410f011b
+ LLDT (BX) // 0f0013
+ LLDT (R11) // 410f0013
+ LLDT DX // 0f00d2
+ LLDT R11 // 410f00d3
+ LMSW (BX) // 0f0133
+ LMSW (R11) // 410f0133
+ LMSW DX // 0f01f2
+ LMSW R11 // 410f01f3
+ LODSB // ac
+ LODSL // ad
+ LODSQ // 48ad
+ LODSW // 66ad
+ //TODO: LOOP .+$0x11 // e211
+ //TODO: LOOPEQ .+$0x11 // e111
+ //TODO: LOOPNE .+$0x11 // e011
+ LSLW (BX), DX // 660f0313
+ LSLW (R11), DX // 66410f0313
+ LSLW DX, DX // 660f03d2
+ LSLW R11, DX // 66410f03d3
+ LSLW (BX), R11 // 66440f031b
+ LSLW (R11), R11 // 66450f031b
+ LSLW DX, R11 // 66440f03da
+ LSLW R11, R11 // 66450f03db
+ LSLL (BX), DX // 0f0313
+ LSLL (R11), DX // 410f0313
+ LSLL DX, DX // 0f03d2
+ LSLL R11, DX // 410f03d3
+ LSLL (BX), R11 // 440f031b
+ LSLL (R11), R11 // 450f031b
+ LSLL DX, R11 // 440f03da
+ LSLL R11, R11 // 450f03db
+ LSLQ (BX), DX // 480f0313
+ LSLQ (R11), DX // 490f0313
+ LSLQ DX, DX // 480f03d2
+ LSLQ R11, DX // 490f03d3
+ LSLQ (BX), R11 // 4c0f031b
+ LSLQ (R11), R11 // 4d0f031b
+ LSLQ DX, R11 // 4c0f03da
+ LSLQ R11, R11 // 4d0f03db
+ LSSW (BX), DX // 660fb213
+ LSSW (R11), DX // 66410fb213
+ LSSW (BX), R11 // 66440fb21b
+ LSSW (R11), R11 // 66450fb21b
+ LSSL (BX), DX // 0fb213
+ LSSL (R11), DX // 410fb213
+ LSSL (BX), R11 // 440fb21b
+ LSSL (R11), R11 // 450fb21b
+ LSSQ (BX), DX // 480fb213
+ LSSQ (R11), DX // 490fb213
+ LSSQ (BX), R11 // 4c0fb21b
+ LSSQ (R11), R11 // 4d0fb21b
+ LTR (BX) // 0f001b
+ LTR (R11) // 410f001b
+ LTR DX // 0f00da
+ LTR R11 // 410f00db
+ LZCNTW (BX), DX // 66f30fbd13
+ LZCNTW (R11), DX // 66f3410fbd13
+ LZCNTW DX, DX // 66f30fbdd2
+ LZCNTW R11, DX // 66f3410fbdd3
+ LZCNTW (BX), R11 // 66f3440fbd1b
+ LZCNTW (R11), R11 // 66f3450fbd1b
+ LZCNTW DX, R11 // 66f3440fbdda
+ LZCNTW R11, R11 // 66f3450fbddb
+ LZCNTL (BX), DX // f30fbd13
+ LZCNTL (R11), DX // f3410fbd13
+ LZCNTL DX, DX // f30fbdd2
+ LZCNTL R11, DX // f3410fbdd3
+ LZCNTL (BX), R11 // f3440fbd1b
+ LZCNTL (R11), R11 // f3450fbd1b
+ LZCNTL DX, R11 // f3440fbdda
+ LZCNTL R11, R11 // f3450fbddb
+ LZCNTQ (BX), DX // f3480fbd13
+ LZCNTQ (R11), DX // f3490fbd13
+ LZCNTQ DX, DX // f3480fbdd2
+ LZCNTQ R11, DX // f3490fbdd3
+ LZCNTQ (BX), R11 // f34c0fbd1b
+ LZCNTQ (R11), R11 // f34d0fbd1b
+ LZCNTQ DX, R11 // f34c0fbdda
+ LZCNTQ R11, R11 // f34d0fbddb
+ MASKMOVOU X2, X2 // 660ff7d2
+ MASKMOVOU X11, X2 // 66410ff7d3
+ MASKMOVOU X2, X11 // 66440ff7da
+ MASKMOVOU X11, X11 // 66450ff7db
+ MASKMOVQ M2, M2 // 0ff7d2
+ MASKMOVQ M3, M2 // 0ff7d3
+ MASKMOVQ M2, M3 // 0ff7da
+ MASKMOVQ M3, M3 // 0ff7db
+ MAXPD (BX), X2 // 660f5f13
+ MAXPD (R11), X2 // 66410f5f13
+ MAXPD X2, X2 // 660f5fd2
+ MAXPD X11, X2 // 66410f5fd3
+ MAXPD (BX), X11 // 66440f5f1b
+ MAXPD (R11), X11 // 66450f5f1b
+ MAXPD X2, X11 // 66440f5fda
+ MAXPD X11, X11 // 66450f5fdb
+ MAXPS (BX), X2 // 0f5f13
+ MAXPS (R11), X2 // 410f5f13
+ MAXPS X2, X2 // 0f5fd2
+ MAXPS X11, X2 // 410f5fd3
+ MAXPS (BX), X11 // 440f5f1b
+ MAXPS (R11), X11 // 450f5f1b
+ MAXPS X2, X11 // 440f5fda
+ MAXPS X11, X11 // 450f5fdb
+ MAXSD (BX), X2 // f20f5f13
+ MAXSD (R11), X2 // f2410f5f13
+ MAXSD X2, X2 // f20f5fd2
+ MAXSD X11, X2 // f2410f5fd3
+ MAXSD (BX), X11 // f2440f5f1b
+ MAXSD (R11), X11 // f2450f5f1b
+ MAXSD X2, X11 // f2440f5fda
+ MAXSD X11, X11 // f2450f5fdb
+ MAXSS (BX), X2 // f30f5f13
+ MAXSS (R11), X2 // f3410f5f13
+ MAXSS X2, X2 // f30f5fd2
+ MAXSS X11, X2 // f3410f5fd3
+ MAXSS (BX), X11 // f3440f5f1b
+ MAXSS (R11), X11 // f3450f5f1b
+ MAXSS X2, X11 // f3440f5fda
+ MAXSS X11, X11 // f3450f5fdb
+ MFENCE // 0faef0
+ MINPD (BX), X2 // 660f5d13
+ MINPD (R11), X2 // 66410f5d13
+ MINPD X2, X2 // 660f5dd2
+ MINPD X11, X2 // 66410f5dd3
+ MINPD (BX), X11 // 66440f5d1b
+ MINPD (R11), X11 // 66450f5d1b
+ MINPD X2, X11 // 66440f5dda
+ MINPD X11, X11 // 66450f5ddb
+ MINPS (BX), X2 // 0f5d13
+ MINPS (R11), X2 // 410f5d13
+ MINPS X2, X2 // 0f5dd2
+ MINPS X11, X2 // 410f5dd3
+ MINPS (BX), X11 // 440f5d1b
+ MINPS (R11), X11 // 450f5d1b
+ MINPS X2, X11 // 440f5dda
+ MINPS X11, X11 // 450f5ddb
+ MINSD (BX), X2 // f20f5d13
+ MINSD (R11), X2 // f2410f5d13
+ MINSD X2, X2 // f20f5dd2
+ MINSD X11, X2 // f2410f5dd3
+ MINSD (BX), X11 // f2440f5d1b
+ MINSD (R11), X11 // f2450f5d1b
+ MINSD X2, X11 // f2440f5dda
+ MINSD X11, X11 // f2450f5ddb
+ MINSS (BX), X2 // f30f5d13
+ MINSS (R11), X2 // f3410f5d13
+ MINSS X2, X2 // f30f5dd2
+ MINSS X11, X2 // f3410f5dd3
+ MINSS (BX), X11 // f3440f5d1b
+ MINSS (R11), X11 // f3450f5d1b
+ MINSS X2, X11 // f3440f5dda
+ MINSS X11, X11 // f3450f5ddb
+ MONITOR // 0f01c8
+ //TODO: MOVABSB 0x123456789abcdef1, AL // a0f1debc9a78563412
+ //TODO: MOVW 0x123456789abcdef1, AX // 66a1f1debc9a78563412
+ MOVQ DX, CR2 // 0f22d2
+ MOVQ R11, CR2 // 410f22d3
+ MOVQ DX, CR3 // 0f22da
+ MOVQ R11, CR3 // 410f22db
+ MOVQ DX, DR2 // 0f23d2
+ MOVQ R11, DR2 // 410f23d3
+ MOVQ DX, DR3 // 0f23da
+ MOVQ R11, DR3 // 410f23db
+ //TODO: MOVL 0x123456789abcdef1, AX // a1f1debc9a78563412
+ //TODO: MOVQ 0x123456789abcdef1, AX // 48a1f1debc9a78563412
+ //TODO: MOVW (BX), SS // 668e13 or 488e13
+ //TODO: MOVW (R11), SS // 66418e13 or 498e13
+ //TODO: MOVW DX, SS // 668ed2 or 488ed2
+ //TODO: MOVW R11, SS // 66418ed3 or 498ed3
+ //TODO: MOVW (BX), DS // 668e1b or 488e1b
+ //TODO: MOVW (R11), DS // 66418e1b or 498e1b
+ //TODO: MOVW DX, DS // 668eda or 488eda
+ //TODO: MOVW R11, DS // 66418edb or 498edb
+ //TODO: MOVL (BX), SS // 8e13
+ //TODO: MOVL (R11), SS // 418e13
+ //TODO: MOVL DX, SS // 8ed2
+ //TODO: MOVL R11, SS // 418ed3
+ //TODO: MOVL (BX), DS // 8e1b
+ //TODO: MOVL (R11), DS // 418e1b
+ //TODO: MOVL DX, DS // 8eda
+ //TODO: MOVL R11, DS // 418edb
+ //TODO: MOVW AX, 0x123456789abcdef1 // 66a3f1debc9a78563412
+ //TODO: MOVL AX, 0x123456789abcdef1 // a3f1debc9a78563412
+ //TODO: MOVQ AX, 0x123456789abcdef1 // 48a3f1debc9a78563412
+ //TODO: MOVABSB AL, 0x123456789abcdef1 // a2f1debc9a78563412
+ //TODO: MOVW SS, (BX) // 668c13 or 488c13
+ //TODO: MOVW DS, (BX) // 668c1b or 488c1b
+ //TODO: MOVW SS, (R11) // 66418c13 or 498c13
+ //TODO: MOVW DS, (R11) // 66418c1b or 498c1b
+ //TODO: MOVW SS, DX // 668cd2 or 488cd2
+ //TODO: MOVW DS, DX // 668cda or 488cda
+ //TODO: MOVW SS, R11 // 66418cd3 or 498cd3
+ //TODO: MOVW DS, R11 // 66418cdb or 498cdb
+ MOVW $61731, (BX) // 66c70323f1
+ MOVW $61731, (R11) // 6641c70323f1
+ MOVW $61731, DX // 66c7c223f1 or 66ba23f1
+ MOVW $61731, R11 // 6641c7c323f1 or 6641bb23f1
+ MOVW DX, (BX) // 668913
+ MOVW R11, (BX) // 6644891b
+ MOVW DX, (R11) // 66418913
+ MOVW R11, (R11) // 6645891b
+ MOVW DX, DX // 6689d2 or 668bd2
+ MOVW R11, DX // 664489da or 66418bd3
+ MOVW DX, R11 // 664189d3 or 66448bda
+ MOVW R11, R11 // 664589db or 66458bdb
+ //TODO: MOVL SS, (BX) // 8c13
+ //TODO: MOVL DS, (BX) // 8c1b
+ //TODO: MOVL SS, (R11) // 418c13
+ //TODO: MOVL DS, (R11) // 418c1b
+ //TODO: MOVL SS, DX // 8cd2
+ //TODO: MOVL DS, DX // 8cda
+ //TODO: MOVL SS, R11 // 418cd3
+ //TODO: MOVL DS, R11 // 418cdb
+ MOVL $4045620583, (BX) // c703674523f1
+ MOVL $4045620583, (R11) // 41c703674523f1
+ MOVL $4045620583, DX // c7c2674523f1 or ba674523f1
+ MOVL $4045620583, R11 // 41c7c3674523f1 or 41bb674523f1
+ MOVL DX, (BX) // 8913
+ MOVL R11, (BX) // 44891b
+ MOVL DX, (R11) // 418913
+ MOVL R11, (R11) // 45891b
+ MOVL DX, DX // 89d2 or 8bd2
+ MOVL R11, DX // 4489da or 418bd3
+ MOVL DX, R11 // 4189d3 or 448bda
+ MOVL R11, R11 // 4589db or 458bdb
+ MOVQ $-249346713, (BX) // 48c703674523f1
+ MOVQ $-249346713, (R11) // 49c703674523f1
+ MOVQ $-249346713, DX // 48c7c2674523f1
+ MOVQ $-249346713, R11 // 49c7c3674523f1
+ MOVQ DX, (BX) // 488913
+ MOVQ R11, (BX) // 4c891b
+ MOVQ DX, (R11) // 498913
+ MOVQ R11, (R11) // 4d891b
+ MOVQ DX, DX // 4889d2 or 488bd2
+ MOVQ R11, DX // 4c89da or 498bd3
+ MOVQ DX, R11 // 4989d3 or 4c8bda
+ MOVQ R11, R11 // 4d89db or 4d8bdb
+ MOVB $7, (BX) // c60307
+ MOVB $7, (R11) // 41c60307
+ MOVB $7, DL // c6c207 or b207
+ MOVB $7, R11 // 41c6c307 or 41b307
+ MOVB DL, (BX) // 8813
+ MOVB R11, (BX) // 44881b
+ MOVB DL, (R11) // 418813
+ MOVB R11, (R11) // 45881b
+ MOVB DL, DL // 88d2 or 8ad2
+ MOVB R11, DL // 4488da or 418ad3
+ MOVB DL, R11 // 4188d3 or 448ada
+ MOVB R11, R11 // 4588db or 458adb
+ MOVW (BX), DX // 668b13
+ MOVW (R11), DX // 66418b13
+ MOVW (BX), R11 // 66448b1b
+ MOVW (R11), R11 // 66458b1b
+ MOVL (BX), DX // 8b13
+ MOVL (R11), DX // 418b13
+ MOVL (BX), R11 // 448b1b
+ MOVL (R11), R11 // 458b1b
+ MOVQ (BX), DX // 488b13
+ MOVQ (R11), DX // 498b13
+ MOVQ (BX), R11 // 4c8b1b
+ MOVQ (R11), R11 // 4d8b1b
+ MOVQ $-1070935975390360081, DX // 48baefcdab89674523f1
+ MOVQ $-1070935975390360081, R11 // 49bbefcdab89674523f1
+ MOVB (BX), DL // 8a13
+ MOVB (R11), DL // 418a13
+ MOVB (BX), R11 // 448a1b
+ MOVB (R11), R11 // 458a1b
+ MOVQ CR2, DX // 0f20d2
+ MOVQ CR3, DX // 0f20da
+ MOVQ CR2, R11 // 410f20d3
+ MOVQ CR3, R11 // 410f20db
+ MOVQ DR2, DX // 0f21d2
+ MOVQ DR3, DX // 0f21da
+ MOVQ DR2, R11 // 410f21d3
+ MOVQ DR3, R11 // 410f21db
+ MOVAPD (BX), X2 // 660f2813
+ MOVAPD (R11), X2 // 66410f2813
+ MOVAPD X2, X2 // 660f28d2 or 660f29d2
+ MOVAPD X11, X2 // 66410f28d3 or 66440f29da
+ MOVAPD (BX), X11 // 66440f281b
+ MOVAPD (R11), X11 // 66450f281b
+ MOVAPD X2, X11 // 66440f28da or 66410f29d3
+ MOVAPD X11, X11 // 66450f28db or 66450f29db
+ MOVAPD X2, (BX) // 660f2913
+ MOVAPD X11, (BX) // 66440f291b
+ MOVAPD X2, (R11) // 66410f2913
+ MOVAPD X11, (R11) // 66450f291b
+ MOVAPS (BX), X2 // 0f2813
+ MOVAPS (R11), X2 // 410f2813
+ MOVAPS X2, X2 // 0f28d2 or 0f29d2
+ MOVAPS X11, X2 // 410f28d3 or 440f29da
+ MOVAPS (BX), X11 // 440f281b
+ MOVAPS (R11), X11 // 450f281b
+ MOVAPS X2, X11 // 440f28da or 410f29d3
+ MOVAPS X11, X11 // 450f28db or 450f29db
+ MOVAPS X2, (BX) // 0f2913
+ MOVAPS X11, (BX) // 440f291b
+ MOVAPS X2, (R11) // 410f2913
+ MOVAPS X11, (R11) // 450f291b
+ MOVBEW DX, (BX) // 660f38f113
+ MOVBEW R11, (BX) // 66440f38f11b
+ MOVBEW DX, (R11) // 66410f38f113
+ MOVBEW R11, (R11) // 66450f38f11b
+ MOVBEW (BX), DX // 660f38f013
+ MOVBEW (R11), DX // 66410f38f013
+ MOVBEW (BX), R11 // 66440f38f01b
+ MOVBEW (R11), R11 // 66450f38f01b
+ MOVBEL DX, (BX) // 0f38f113
+ MOVBEL R11, (BX) // 440f38f11b
+ MOVBEL DX, (R11) // 410f38f113
+ MOVBEL R11, (R11) // 450f38f11b
+ MOVBEL (BX), DX // 0f38f013
+ MOVBEL (R11), DX // 410f38f013
+ MOVBEL (BX), R11 // 440f38f01b
+ MOVBEL (R11), R11 // 450f38f01b
+ MOVBEQ DX, (BX) // 480f38f113
+ MOVBEQ R11, (BX) // 4c0f38f11b
+ MOVBEQ DX, (R11) // 490f38f113
+ MOVBEQ R11, (R11) // 4d0f38f11b
+ MOVBEQ (BX), DX // 480f38f013
+ MOVBEQ (R11), DX // 490f38f013
+ MOVBEQ (BX), R11 // 4c0f38f01b
+ MOVBEQ (R11), R11 // 4d0f38f01b
+ MOVQ (BX), M2 // 0f6e13 or 0f6f13 or 480f6e13
+ MOVQ (R11), M2 // 410f6e13 or 410f6f13 or 490f6e13
+ MOVQ DX, M2 // 0f6ed2 or 480f6ed2
+ MOVQ R11, M2 // 410f6ed3 or 490f6ed3
+ MOVQ (BX), M3 // 0f6e1b or 0f6f1b or 480f6e1b
+ MOVQ (R11), M3 // 410f6e1b or 410f6f1b or 490f6e1b
+ MOVQ DX, M3 // 0f6eda or 480f6eda
+ MOVQ R11, M3 // 410f6edb or 490f6edb
+ MOVQ M2, (BX) // 0f7e13 or 0f7f13 or 480f7e13
+ MOVQ M3, (BX) // 0f7e1b or 0f7f1b or 480f7e1b
+ MOVQ M2, (R11) // 410f7e13 or 410f7f13 or 490f7e13
+ MOVQ M3, (R11) // 410f7e1b or 410f7f1b or 490f7e1b
+ MOVQ M2, DX // 0f7ed2 or 480f7ed2
+ MOVQ M3, DX // 0f7eda or 480f7eda
+ MOVQ M2, R11 // 410f7ed3 or 490f7ed3
+ MOVQ M3, R11 // 410f7edb or 490f7edb
+ MOVQ X2, (BX) // 660f7e13 or 66480f7e13 or 660fd613
+ MOVQ X11, (BX) // 66440f7e1b or 664c0f7e1b or 66440fd61b
+ MOVQ X2, (R11) // 66410f7e13 or 66490f7e13 or 66410fd613
+ MOVQ X11, (R11) // 66450f7e1b or 664d0f7e1b or 66450fd61b
+ MOVQ X2, DX // 660f7ed2 or 66480f7ed2
+ MOVQ X11, DX // 66440f7eda or 664c0f7eda
+ MOVQ X2, R11 // 66410f7ed3 or 66490f7ed3
+ MOVQ X11, R11 // 66450f7edb or 664d0f7edb
+ MOVQ (BX), X2 // 660f6e13 or 66480f6e13 or f30f7e13
+ MOVQ (R11), X2 // 66410f6e13 or 66490f6e13 or f3410f7e13
+ MOVQ DX, X2 // 660f6ed2 or 66480f6ed2
+ MOVQ R11, X2 // 66410f6ed3 or 66490f6ed3
+ MOVQ (BX), X11 // 66440f6e1b or 664c0f6e1b or f3440f7e1b
+ MOVQ (R11), X11 // 66450f6e1b or 664d0f6e1b or f3450f7e1b
+ MOVQ DX, X11 // 66440f6eda or 664c0f6eda
+ MOVQ R11, X11 // 66450f6edb or 664d0f6edb
+ MOVDDUP (BX), X2 // f20f1213
+ MOVDDUP (R11), X2 // f2410f1213
+ MOVDDUP X2, X2 // f20f12d2
+ MOVDDUP X11, X2 // f2410f12d3
+ MOVDDUP (BX), X11 // f2440f121b
+ MOVDDUP (R11), X11 // f2450f121b
+ MOVDDUP X2, X11 // f2440f12da
+ MOVDDUP X11, X11 // f2450f12db
+ MOVQ X2, M2 // f20fd6d2
+ MOVQ X11, M2 // f2410fd6d3
+ MOVQ X2, M3 // f20fd6da
+ MOVQ X11, M3 // f2410fd6db
+ MOVO (BX), X2 // 660f6f13
+ MOVO (R11), X2 // 66410f6f13
+ MOVO X2, X2 // 660f6fd2 or 660f7fd2
+ MOVO X11, X2 // 66410f6fd3 or 66440f7fda
+ MOVO (BX), X11 // 66440f6f1b
+ MOVO (R11), X11 // 66450f6f1b
+ MOVO X2, X11 // 66440f6fda or 66410f7fd3
+ MOVO X11, X11 // 66450f6fdb or 66450f7fdb
+ MOVO X2, (BX) // 660f7f13
+ MOVO X11, (BX) // 66440f7f1b
+ MOVO X2, (R11) // 66410f7f13
+ MOVO X11, (R11) // 66450f7f1b
+ MOVOU (BX), X2 // f30f6f13
+ MOVOU (R11), X2 // f3410f6f13
+ MOVOU X2, X2 // f30f6fd2 or f30f7fd2
+ MOVOU X11, X2 // f3410f6fd3 or f3440f7fda
+ MOVOU (BX), X11 // f3440f6f1b
+ MOVOU (R11), X11 // f3450f6f1b
+ MOVOU X2, X11 // f3440f6fda or f3410f7fd3
+ MOVOU X11, X11 // f3450f6fdb or f3450f7fdb
+ MOVOU X2, (BX) // f30f7f13
+ MOVOU X11, (BX) // f3440f7f1b
+ MOVOU X2, (R11) // f3410f7f13
+ MOVOU X11, (R11) // f3450f7f1b
+ MOVHLPS X2, X2 // 0f12d2
+ MOVHLPS X11, X2 // 410f12d3
+ MOVHLPS X2, X11 // 440f12da
+ MOVHLPS X11, X11 // 450f12db
+ MOVHPD X2, (BX) // 660f1713
+ MOVHPD X11, (BX) // 66440f171b
+ MOVHPD X2, (R11) // 66410f1713
+ MOVHPD X11, (R11) // 66450f171b
+ MOVHPD (BX), X2 // 660f1613
+ MOVHPD (R11), X2 // 66410f1613
+ MOVHPD (BX), X11 // 66440f161b
+ MOVHPD (R11), X11 // 66450f161b
+ MOVHPS X2, (BX) // 0f1713
+ MOVHPS X11, (BX) // 440f171b
+ MOVHPS X2, (R11) // 410f1713
+ MOVHPS X11, (R11) // 450f171b
+ MOVHPS (BX), X2 // 0f1613
+ MOVHPS (R11), X2 // 410f1613
+ MOVHPS (BX), X11 // 440f161b
+ MOVHPS (R11), X11 // 450f161b
+ MOVLHPS X2, X2 // 0f16d2
+ MOVLHPS X11, X2 // 410f16d3
+ MOVLHPS X2, X11 // 440f16da
+ MOVLHPS X11, X11 // 450f16db
+ MOVLPD X2, (BX) // 660f1313
+ MOVLPD X11, (BX) // 66440f131b
+ MOVLPD X2, (R11) // 66410f1313
+ MOVLPD X11, (R11) // 66450f131b
+ MOVLPD (BX), X2 // 660f1213
+ MOVLPD (R11), X2 // 66410f1213
+ MOVLPD (BX), X11 // 66440f121b
+ MOVLPD (R11), X11 // 66450f121b
+ MOVLPS X2, (BX) // 0f1313
+ MOVLPS X11, (BX) // 440f131b
+ MOVLPS X2, (R11) // 410f1313
+ MOVLPS X11, (R11) // 450f131b
+ MOVLPS (BX), X2 // 0f1213
+ MOVLPS (R11), X2 // 410f1213
+ MOVLPS (BX), X11 // 440f121b
+ MOVLPS (R11), X11 // 450f121b
+ MOVMSKPD X2, DX // 660f50d2
+ MOVMSKPD X11, DX // 66410f50d3
+ MOVMSKPD X2, R11 // 66440f50da
+ MOVMSKPD X11, R11 // 66450f50db
+ MOVMSKPS X2, DX // 0f50d2
+ MOVMSKPS X11, DX // 410f50d3
+ MOVMSKPS X2, R11 // 440f50da
+ MOVMSKPS X11, R11 // 450f50db
+ MOVNTO X2, (BX) // 660fe713
+ MOVNTO X11, (BX) // 66440fe71b
+ MOVNTO X2, (R11) // 66410fe713
+ MOVNTO X11, (R11) // 66450fe71b
+ MOVNTDQA (BX), X2 // 660f382a13
+ MOVNTDQA (R11), X2 // 66410f382a13
+ MOVNTDQA (BX), X11 // 66440f382a1b
+ MOVNTDQA (R11), X11 // 66450f382a1b
+ MOVNTIL DX, (BX) // 0fc313
+ MOVNTIL R11, (BX) // 440fc31b
+ MOVNTIL DX, (R11) // 410fc313
+ MOVNTIL R11, (R11) // 450fc31b
+ MOVNTIQ DX, (BX) // 480fc313
+ MOVNTIQ R11, (BX) // 4c0fc31b
+ MOVNTIQ DX, (R11) // 490fc313
+ MOVNTIQ R11, (R11) // 4d0fc31b
+ MOVNTPD X2, (BX) // 660f2b13
+ MOVNTPD X11, (BX) // 66440f2b1b
+ MOVNTPD X2, (R11) // 66410f2b13
+ MOVNTPD X11, (R11) // 66450f2b1b
+ MOVNTPS X2, (BX) // 0f2b13
+ MOVNTPS X11, (BX) // 440f2b1b
+ MOVNTPS X2, (R11) // 410f2b13
+ MOVNTPS X11, (R11) // 450f2b1b
+ MOVNTQ M2, (BX) // 0fe713
+ MOVNTQ M3, (BX) // 0fe71b
+ MOVNTQ M2, (R11) // 410fe713
+ MOVNTQ M3, (R11) // 410fe71b
+ //TODO: MOVNTSD X2, (BX) // f20f2b13
+ //TODO: MOVNTSD X11, (BX) // f2440f2b1b
+ //TODO: MOVNTSD X2, (R11) // f2410f2b13
+ //TODO: MOVNTSD X11, (R11) // f2450f2b1b
+ //TODO: MOVNTSS X2, (BX) // f30f2b13
+ //TODO: MOVNTSS X11, (BX) // f3440f2b1b
+ //TODO: MOVNTSS X2, (R11) // f3410f2b13
+ //TODO: MOVNTSS X11, (R11) // f3450f2b1b
+ //TODO: MOVQ M2, M2 // 0f6fd2 or 0f7fd2
+ //TODO: MOVQ M3, M2 // 0f6fd3 or 0f7fda
+ //TODO: MOVQ M2, M3 // 0f6fda or 0f7fd3
+ //TODO: MOVQ M3, M3 // 0f6fdb or 0f7fdb
+ MOVQ X2, X2 // f30f7ed2 or 660fd6d2
+ MOVQ X11, X2 // f3410f7ed3 or 66440fd6da
+ MOVQ X2, X11 // f3440f7eda or 66410fd6d3
+ MOVQ X11, X11 // f3450f7edb or 66450fd6db
+ MOVQOZX M2, X2 // f30fd6d2
+ MOVQOZX M3, X2 // f30fd6d3
+ MOVQOZX M2, X11 // f3440fd6da
+ MOVQOZX M3, X11 // f3440fd6db
+ MOVSB // a4
+ MOVSL // a5
+ MOVSD (BX), X2 // f20f1013
+ MOVSD (R11), X2 // f2410f1013
+ MOVSD X2, X2 // f20f10d2 or f20f11d2
+ MOVSD X11, X2 // f2410f10d3 or f2440f11da
+ MOVSD (BX), X11 // f2440f101b
+ MOVSD (R11), X11 // f2450f101b
+ MOVSD X2, X11 // f2440f10da or f2410f11d3
+ MOVSD X11, X11 // f2450f10db or f2450f11db
+ MOVSD X2, (BX) // f20f1113
+ MOVSD X11, (BX) // f2440f111b
+ MOVSD X2, (R11) // f2410f1113
+ MOVSD X11, (R11) // f2450f111b
+ MOVSHDUP (BX), X2 // f30f1613
+ MOVSHDUP (R11), X2 // f3410f1613
+ MOVSHDUP X2, X2 // f30f16d2
+ MOVSHDUP X11, X2 // f3410f16d3
+ MOVSHDUP (BX), X11 // f3440f161b
+ MOVSHDUP (R11), X11 // f3450f161b
+ MOVSHDUP X2, X11 // f3440f16da
+ MOVSHDUP X11, X11 // f3450f16db
+ MOVSLDUP (BX), X2 // f30f1213
+ MOVSLDUP (R11), X2 // f3410f1213
+ MOVSLDUP X2, X2 // f30f12d2
+ MOVSLDUP X11, X2 // f3410f12d3
+ MOVSLDUP (BX), X11 // f3440f121b
+ MOVSLDUP (R11), X11 // f3450f121b
+ MOVSLDUP X2, X11 // f3440f12da
+ MOVSLDUP X11, X11 // f3450f12db
+ MOVSQ // 48a5
+ MOVSS (BX), X2 // f30f1013
+ MOVSS (R11), X2 // f3410f1013
+ MOVSS X2, X2 // f30f10d2 or f30f11d2
+ MOVSS X11, X2 // f3410f10d3 or f3440f11da
+ MOVSS (BX), X11 // f3440f101b
+ MOVSS (R11), X11 // f3450f101b
+ MOVSS X2, X11 // f3440f10da or f3410f11d3
+ MOVSS X11, X11 // f3450f10db or f3450f11db
+ MOVSS X2, (BX) // f30f1113
+ MOVSS X11, (BX) // f3440f111b
+ MOVSS X2, (R11) // f3410f1113
+ MOVSS X11, (R11) // f3450f111b
+ MOVSW // 66a5
+ MOVSWW (BX), DX // 660fbf13
+ MOVSWW (R11), DX // 66410fbf13
+ MOVSWW DX, DX // 660fbfd2
+ MOVSWW R11, DX // 66410fbfd3
+ MOVSWW (BX), R11 // 66440fbf1b
+ MOVSWW (R11), R11 // 66450fbf1b
+ MOVSWW DX, R11 // 66440fbfda
+ MOVSWW R11, R11 // 66450fbfdb
+ MOVBWSX (BX), DX // 660fbe13
+ MOVBWSX (R11), DX // 66410fbe13
+ MOVBWSX DL, DX // 660fbed2
+ MOVBWSX R11, DX // 66410fbed3
+ MOVBWSX (BX), R11 // 66440fbe1b
+ MOVBWSX (R11), R11 // 66450fbe1b
+ MOVBWSX DL, R11 // 66440fbeda
+ MOVBWSX R11, R11 // 66450fbedb
+ MOVWLSX (BX), DX // 0fbf13
+ MOVWLSX (R11), DX // 410fbf13
+ MOVWLSX DX, DX // 0fbfd2
+ MOVWLSX R11, DX // 410fbfd3
+ MOVWLSX (BX), R11 // 440fbf1b
+ MOVWLSX (R11), R11 // 450fbf1b
+ MOVWLSX DX, R11 // 440fbfda
+ MOVWLSX R11, R11 // 450fbfdb
+ MOVBLSX (BX), DX // 0fbe13
+ MOVBLSX (R11), DX // 410fbe13
+ MOVBLSX DL, DX // 0fbed2
+ MOVBLSX R11, DX // 410fbed3
+ MOVBLSX (BX), R11 // 440fbe1b
+ MOVBLSX (R11), R11 // 450fbe1b
+ MOVBLSX DL, R11 // 440fbeda
+ MOVBLSX R11, R11 // 450fbedb
+ MOVWQSX (BX), DX // 480fbf13 or 666313
+ MOVWQSX (R11), DX // 490fbf13 or 66416313
+ MOVWQSX DX, DX // 480fbfd2 or 6663d2
+ MOVWQSX R11, DX // 490fbfd3 or 664163d3
+ MOVWQSX (BX), R11 // 4c0fbf1b or 6644631b
+ MOVWQSX (R11), R11 // 4d0fbf1b or 6645631b
+ MOVWQSX DX, R11 // 4c0fbfda or 664463da
+ MOVWQSX R11, R11 // 4d0fbfdb or 664563db
+ MOVBQSX (BX), DX // 480fbe13
+ MOVBQSX (R11), DX // 490fbe13
+ MOVBQSX DL, DX // 480fbed2
+ MOVBQSX R11, DX // 490fbed3
+ MOVBQSX (BX), R11 // 4c0fbe1b
+ MOVBQSX (R11), R11 // 4d0fbe1b
+ MOVBQSX DL, R11 // 4c0fbeda
+ MOVBQSX R11, R11 // 4d0fbedb
+ MOVLQSX (BX), DX // 6313 or 486313
+ MOVLQSX (R11), DX // 416313 or 496313
+ MOVLQSX DX, DX // 63d2 or 4863d2
+ MOVLQSX R11, DX // 4163d3 or 4963d3
+ MOVLQSX (BX), R11 // 44631b or 4c631b
+ MOVLQSX (R11), R11 // 45631b or 4d631b
+ MOVLQSX DX, R11 // 4463da or 4c63da
+ MOVLQSX R11, R11 // 4563db or 4d63db
+ MOVUPD (BX), X2 // 660f1013
+ MOVUPD (R11), X2 // 66410f1013
+ MOVUPD X2, X2 // 660f10d2 or 660f11d2
+ MOVUPD X11, X2 // 66410f10d3 or 66440f11da
+ MOVUPD (BX), X11 // 66440f101b
+ MOVUPD (R11), X11 // 66450f101b
+ MOVUPD X2, X11 // 66440f10da or 66410f11d3
+ MOVUPD X11, X11 // 66450f10db or 66450f11db
+ MOVUPD X2, (BX) // 660f1113
+ MOVUPD X11, (BX) // 66440f111b
+ MOVUPD X2, (R11) // 66410f1113
+ MOVUPD X11, (R11) // 66450f111b
+ MOVUPS (BX), X2 // 0f1013
+ MOVUPS (R11), X2 // 410f1013
+ MOVUPS X2, X2 // 0f10d2 or 0f11d2
+ MOVUPS X11, X2 // 410f10d3 or 440f11da
+ MOVUPS (BX), X11 // 440f101b
+ MOVUPS (R11), X11 // 450f101b
+ MOVUPS X2, X11 // 440f10da or 410f11d3
+ MOVUPS X11, X11 // 450f10db or 450f11db
+ MOVUPS X2, (BX) // 0f1113
+ MOVUPS X11, (BX) // 440f111b
+ MOVUPS X2, (R11) // 410f1113
+ MOVUPS X11, (R11) // 450f111b
+ MOVZWW (BX), DX // 660fb713
+ MOVZWW (R11), DX // 66410fb713
+ MOVZWW DX, DX // 660fb7d2
+ MOVZWW R11, DX // 66410fb7d3
+ MOVZWW (BX), R11 // 66440fb71b
+ MOVZWW (R11), R11 // 66450fb71b
+ MOVZWW DX, R11 // 66440fb7da
+ MOVZWW R11, R11 // 66450fb7db
+ MOVBWZX (BX), DX // 660fb613
+ MOVBWZX (R11), DX // 66410fb613
+ MOVBWZX DL, DX // 660fb6d2
+ MOVBWZX R11, DX // 66410fb6d3
+ MOVBWZX (BX), R11 // 66440fb61b
+ MOVBWZX (R11), R11 // 66450fb61b
+ MOVBWZX DL, R11 // 66440fb6da
+ MOVBWZX R11, R11 // 66450fb6db
+ MOVWLZX (BX), DX // 0fb713
+ MOVWLZX (R11), DX // 410fb713
+ MOVWLZX DX, DX // 0fb7d2
+ MOVWLZX R11, DX // 410fb7d3
+ MOVWLZX (BX), R11 // 440fb71b
+ MOVWLZX (R11), R11 // 450fb71b
+ MOVWLZX DX, R11 // 440fb7da
+ MOVWLZX R11, R11 // 450fb7db
+ MOVBLZX (BX), DX // 0fb613
+ MOVBLZX (R11), DX // 410fb613
+ MOVBLZX DL, DX // 0fb6d2
+ MOVBLZX R11, DX // 410fb6d3
+ MOVBLZX (BX), R11 // 440fb61b
+ MOVBLZX (R11), R11 // 450fb61b
+ MOVBLZX DL, R11 // 440fb6da
+ MOVBLZX R11, R11 // 450fb6db
+ MOVWQZX (BX), DX // 480fb713
+ MOVWQZX (R11), DX // 490fb713
+ MOVWQZX DX, DX // 480fb7d2
+ MOVWQZX R11, DX // 490fb7d3
+ MOVWQZX (BX), R11 // 4c0fb71b
+ MOVWQZX (R11), R11 // 4d0fb71b
+ MOVWQZX DX, R11 // 4c0fb7da
+ MOVWQZX R11, R11 // 4d0fb7db
+ MOVBQZX (BX), DX // 480fb613
+ MOVBQZX (R11), DX // 490fb613
+ MOVBQZX DL, DX // 480fb6d2
+ MOVBQZX R11, DX // 490fb6d3
+ MOVBQZX (BX), R11 // 4c0fb61b
+ MOVBQZX (R11), R11 // 4d0fb61b
+ MOVBQZX DL, R11 // 4c0fb6da
+ MOVBQZX R11, R11 // 4d0fb6db
+ MPSADBW $7, (BX), X2 // 660f3a421307
+ MPSADBW $7, (R11), X2 // 66410f3a421307
+ MPSADBW $7, X2, X2 // 660f3a42d207
+ MPSADBW $7, X11, X2 // 66410f3a42d307
+ MPSADBW $7, (BX), X11 // 66440f3a421b07
+ MPSADBW $7, (R11), X11 // 66450f3a421b07
+ MPSADBW $7, X2, X11 // 66440f3a42da07
+ MPSADBW $7, X11, X11 // 66450f3a42db07
+ MULW (BX) // 66f723
+ MULW (R11) // 6641f723
+ MULW DX // 66f7e2
+ MULW R11 // 6641f7e3
+ MULL (BX) // f723
+ MULL (R11) // 41f723
+ MULL DX // f7e2
+ MULL R11 // 41f7e3
+ MULQ (BX) // 48f723
+ MULQ (R11) // 49f723
+ MULQ DX // 48f7e2
+ MULQ R11 // 49f7e3
+ MULB (BX) // f623
+ MULB (R11) // 41f623
+ MULB DL // f6e2
+ MULB R11 // 41f6e3
+ MULPD (BX), X2 // 660f5913
+ MULPD (R11), X2 // 66410f5913
+ MULPD X2, X2 // 660f59d2
+ MULPD X11, X2 // 66410f59d3
+ MULPD (BX), X11 // 66440f591b
+ MULPD (R11), X11 // 66450f591b
+ MULPD X2, X11 // 66440f59da
+ MULPD X11, X11 // 66450f59db
+ MULPS (BX), X2 // 0f5913
+ MULPS (R11), X2 // 410f5913
+ MULPS X2, X2 // 0f59d2
+ MULPS X11, X2 // 410f59d3
+ MULPS (BX), X11 // 440f591b
+ MULPS (R11), X11 // 450f591b
+ MULPS X2, X11 // 440f59da
+ MULPS X11, X11 // 450f59db
+ MULSD (BX), X2 // f20f5913
+ MULSD (R11), X2 // f2410f5913
+ MULSD X2, X2 // f20f59d2
+ MULSD X11, X2 // f2410f59d3
+ MULSD (BX), X11 // f2440f591b
+ MULSD (R11), X11 // f2450f591b
+ MULSD X2, X11 // f2440f59da
+ MULSD X11, X11 // f2450f59db
+ MULSS (BX), X2 // f30f5913
+ MULSS (R11), X2 // f3410f5913
+ MULSS X2, X2 // f30f59d2
+ MULSS X11, X2 // f3410f59d3
+ MULSS (BX), X11 // f3440f591b
+ MULSS (R11), X11 // f3450f591b
+ MULSS X2, X11 // f3440f59da
+ MULSS X11, X11 // f3450f59db
+ MULXL (BX), R9, DX // c4e233f613
+ MULXL (R11), R9, DX // c4c233f613
+ MULXL DX, R9, DX // c4e233f6d2
+ MULXL R11, R9, DX // c4c233f6d3
+ MULXL (BX), R9, R11 // c46233f61b
+ MULXL (R11), R9, R11 // c44233f61b
+ MULXL DX, R9, R11 // c46233f6da
+ MULXL R11, R9, R11 // c44233f6db
+ MULXQ (BX), R14, DX // c4e28bf613
+ MULXQ (R11), R14, DX // c4c28bf613
+ MULXQ DX, R14, DX // c4e28bf6d2
+ MULXQ R11, R14, DX // c4c28bf6d3
+ MULXQ (BX), R14, R11 // c4628bf61b
+ MULXQ (R11), R14, R11 // c4428bf61b
+ MULXQ DX, R14, R11 // c4628bf6da
+ MULXQ R11, R14, R11 // c4428bf6db
+ MWAIT // 0f01c9
+ NEGW (BX) // 66f71b
+ NEGW (R11) // 6641f71b
+ NEGW DX // 66f7da
+ NEGW R11 // 6641f7db
+ NEGL (BX) // f71b
+ NEGL (R11) // 41f71b
+ NEGL DX // f7da
+ NEGL R11 // 41f7db
+ NEGQ (BX) // 48f71b
+ NEGQ (R11) // 49f71b
+ NEGQ DX // 48f7da
+ NEGQ R11 // 49f7db
+ NEGB (BX) // f61b
+ NEGB (R11) // 41f61b
+ NEGB DL // f6da
+ NEGB R11 // 41f6db
+ NOPW (BX) // 660f1f03
+ NOPW (R11) // 66410f1f03
+ NOPW DX // 660f1fc2
+ NOPW R11 // 66410f1fc3
+ NOPL (BX) // 0f1f03
+ NOPL (R11) // 410f1f03
+ NOPL DX // 0f1fc2
+ NOPL R11 // 410f1fc3
+ NOTW (BX) // 66f713
+ NOTW (R11) // 6641f713
+ NOTW DX // 66f7d2
+ NOTW R11 // 6641f7d3
+ NOTL (BX) // f713
+ NOTL (R11) // 41f713
+ NOTL DX // f7d2
+ NOTL R11 // 41f7d3
+ NOTQ (BX) // 48f713
+ NOTQ (R11) // 49f713
+ NOTQ DX // 48f7d2
+ NOTQ R11 // 49f7d3
+ NOTB (BX) // f613
+ NOTB (R11) // 41f613
+ NOTB DL // f6d2
+ NOTB R11 // 41f6d3
+ ORB $7, AL // 0c07
+ ORW $61731, AX // 660d23f1
+ ORL $4045620583, AX // 0d674523f1
+ ORQ $-249346713, AX // 480d674523f1
+ ORW $61731, (BX) // 66810b23f1
+ ORW $61731, (R11) // 6641810b23f1
+ ORW $61731, DX // 6681ca23f1
+ ORW $61731, R11 // 664181cb23f1
+ ORW $7, (BX) // 66830b07
+ ORW $7, (R11) // 6641830b07
+ ORW $7, DX // 6683ca07
+ ORW $7, R11 // 664183cb07
+ ORW DX, (BX) // 660913
+ ORW R11, (BX) // 6644091b
+ ORW DX, (R11) // 66410913
+ ORW R11, (R11) // 6645091b
+ ORW DX, DX // 6609d2 or 660bd2
+ ORW R11, DX // 664409da or 66410bd3
+ ORW DX, R11 // 664109d3 or 66440bda
+ ORW R11, R11 // 664509db or 66450bdb
+ ORL $4045620583, (BX) // 810b674523f1
+ ORL $4045620583, (R11) // 41810b674523f1
+ ORL $4045620583, DX // 81ca674523f1
+ ORL $4045620583, R11 // 4181cb674523f1
+ ORL $7, (BX) // 830b07
+ ORL $7, (R11) // 41830b07
+ ORL $7, DX // 83ca07
+ ORL $7, R11 // 4183cb07
+ ORL DX, (BX) // 0913
+ ORL R11, (BX) // 44091b
+ ORL DX, (R11) // 410913
+ ORL R11, (R11) // 45091b
+ ORL DX, DX // 09d2 or 0bd2
+ ORL R11, DX // 4409da or 410bd3
+ ORL DX, R11 // 4109d3 or 440bda
+ ORL R11, R11 // 4509db or 450bdb
+ ORQ $-249346713, (BX) // 48810b674523f1
+ ORQ $-249346713, (R11) // 49810b674523f1
+ ORQ $-249346713, DX // 4881ca674523f1
+ ORQ $-249346713, R11 // 4981cb674523f1
+ ORQ $7, (BX) // 48830b07
+ ORQ $7, (R11) // 49830b07
+ ORQ $7, DX // 4883ca07
+ ORQ $7, R11 // 4983cb07
+ ORQ DX, (BX) // 480913
+ ORQ R11, (BX) // 4c091b
+ ORQ DX, (R11) // 490913
+ ORQ R11, (R11) // 4d091b
+ ORQ DX, DX // 4809d2 or 480bd2
+ ORQ R11, DX // 4c09da or 490bd3
+ ORQ DX, R11 // 4909d3 or 4c0bda
+ ORQ R11, R11 // 4d09db or 4d0bdb
+ ORB $7, (BX) // 800b07
+ ORB $7, (R11) // 41800b07
+ ORB $7, DL // 80ca07
+ ORB $7, R11 // 4180cb07
+ ORB DL, (BX) // 0813
+ ORB R11, (BX) // 44081b
+ ORB DL, (R11) // 410813
+ ORB R11, (R11) // 45081b
+ ORB DL, DL // 08d2 or 0ad2
+ ORB R11, DL // 4408da or 410ad3
+ ORB DL, R11 // 4108d3 or 440ada
+ ORB R11, R11 // 4508db or 450adb
+ ORW (BX), DX // 660b13
+ ORW (R11), DX // 66410b13
+ ORW (BX), R11 // 66440b1b
+ ORW (R11), R11 // 66450b1b
+ ORL (BX), DX // 0b13
+ ORL (R11), DX // 410b13
+ ORL (BX), R11 // 440b1b
+ ORL (R11), R11 // 450b1b
+ ORQ (BX), DX // 480b13
+ ORQ (R11), DX // 490b13
+ ORQ (BX), R11 // 4c0b1b
+ ORQ (R11), R11 // 4d0b1b
+ ORB (BX), DL // 0a13
+ ORB (R11), DL // 410a13
+ ORB (BX), R11 // 440a1b
+ ORB (R11), R11 // 450a1b
+ ORPD (BX), X2 // 660f5613
+ ORPD (R11), X2 // 66410f5613
+ ORPD X2, X2 // 660f56d2
+ ORPD X11, X2 // 66410f56d3
+ ORPD (BX), X11 // 66440f561b
+ ORPD (R11), X11 // 66450f561b
+ ORPD X2, X11 // 66440f56da
+ ORPD X11, X11 // 66450f56db
+ ORPS (BX), X2 // 0f5613
+ ORPS (R11), X2 // 410f5613
+ ORPS X2, X2 // 0f56d2
+ ORPS X11, X2 // 410f56d3
+ ORPS (BX), X11 // 440f561b
+ ORPS (R11), X11 // 450f561b
+ ORPS X2, X11 // 440f56da
+ ORPS X11, X11 // 450f56db
+ //TODO: OUTB AL, DX // ee
+ //TODO: OUTW AX, DX // 66ef
+ //TODO: OUTL AX, DX // ef
+ //TODO: OUTB AL, $7 // e607
+ //TODO: OUTW AX, $7 // 66e707
+ //TODO: OUTL AX, $7 // e707
+ OUTSB // 6e
+ OUTSL // 6f
+ OUTSW // 666f
+ //TODO: PABSB (BX), M2 // 0f381c13
+ //TODO: PABSB (R11), M2 // 410f381c13
+ //TODO: PABSB M2, M2 // 0f381cd2
+ //TODO: PABSB M3, M2 // 0f381cd3
+ //TODO: PABSB (BX), M3 // 0f381c1b
+ //TODO: PABSB (R11), M3 // 410f381c1b
+ //TODO: PABSB M2, M3 // 0f381cda
+ //TODO: PABSB M3, M3 // 0f381cdb
+ PABSB (BX), X2 // 660f381c13
+ PABSB (R11), X2 // 66410f381c13
+ PABSB X2, X2 // 660f381cd2
+ PABSB X11, X2 // 66410f381cd3
+ PABSB (BX), X11 // 66440f381c1b
+ PABSB (R11), X11 // 66450f381c1b
+ PABSB X2, X11 // 66440f381cda
+ PABSB X11, X11 // 66450f381cdb
+ //TODO: PABSD (BX), M2 // 0f381e13
+ //TODO: PABSD (R11), M2 // 410f381e13
+ //TODO: PABSD M2, M2 // 0f381ed2
+ //TODO: PABSD M3, M2 // 0f381ed3
+ //TODO: PABSD (BX), M3 // 0f381e1b
+ //TODO: PABSD (R11), M3 // 410f381e1b
+ //TODO: PABSD M2, M3 // 0f381eda
+ //TODO: PABSD M3, M3 // 0f381edb
+ PABSD (BX), X2 // 660f381e13
+ PABSD (R11), X2 // 66410f381e13
+ PABSD X2, X2 // 660f381ed2
+ PABSD X11, X2 // 66410f381ed3
+ PABSD (BX), X11 // 66440f381e1b
+ PABSD (R11), X11 // 66450f381e1b
+ PABSD X2, X11 // 66440f381eda
+ PABSD X11, X11 // 66450f381edb
+ //TODO: PABSW (BX), M2 // 0f381d13
+ //TODO: PABSW (R11), M2 // 410f381d13
+ //TODO: PABSW M2, M2 // 0f381dd2
+ //TODO: PABSW M3, M2 // 0f381dd3
+ //TODO: PABSW (BX), M3 // 0f381d1b
+ //TODO: PABSW (R11), M3 // 410f381d1b
+ //TODO: PABSW M2, M3 // 0f381dda
+ //TODO: PABSW M3, M3 // 0f381ddb
+ PABSW (BX), X2 // 660f381d13
+ PABSW (R11), X2 // 66410f381d13
+ PABSW X2, X2 // 660f381dd2
+ PABSW X11, X2 // 66410f381dd3
+ PABSW (BX), X11 // 66440f381d1b
+ PABSW (R11), X11 // 66450f381d1b
+ PABSW X2, X11 // 66440f381dda
+ PABSW X11, X11 // 66450f381ddb
+ PACKSSLW (BX), M2 // 0f6b13
+ PACKSSLW (R11), M2 // 410f6b13
+ PACKSSLW M2, M2 // 0f6bd2
+ PACKSSLW M3, M2 // 0f6bd3
+ PACKSSLW (BX), M3 // 0f6b1b
+ PACKSSLW (R11), M3 // 410f6b1b
+ PACKSSLW M2, M3 // 0f6bda
+ PACKSSLW M3, M3 // 0f6bdb
+ PACKSSLW (BX), X2 // 660f6b13
+ PACKSSLW (R11), X2 // 66410f6b13
+ PACKSSLW X2, X2 // 660f6bd2
+ PACKSSLW X11, X2 // 66410f6bd3
+ PACKSSLW (BX), X11 // 66440f6b1b
+ PACKSSLW (R11), X11 // 66450f6b1b
+ PACKSSLW X2, X11 // 66440f6bda
+ PACKSSLW X11, X11 // 66450f6bdb
+ PACKSSWB (BX), M2 // 0f6313
+ PACKSSWB (R11), M2 // 410f6313
+ PACKSSWB M2, M2 // 0f63d2
+ PACKSSWB M3, M2 // 0f63d3
+ PACKSSWB (BX), M3 // 0f631b
+ PACKSSWB (R11), M3 // 410f631b
+ PACKSSWB M2, M3 // 0f63da
+ PACKSSWB M3, M3 // 0f63db
+ PACKSSWB (BX), X2 // 660f6313
+ PACKSSWB (R11), X2 // 66410f6313
+ PACKSSWB X2, X2 // 660f63d2
+ PACKSSWB X11, X2 // 66410f63d3
+ PACKSSWB (BX), X11 // 66440f631b
+ PACKSSWB (R11), X11 // 66450f631b
+ PACKSSWB X2, X11 // 66440f63da
+ PACKSSWB X11, X11 // 66450f63db
+ PACKUSDW (BX), X2 // 660f382b13
+ PACKUSDW (R11), X2 // 66410f382b13
+ PACKUSDW X2, X2 // 660f382bd2
+ PACKUSDW X11, X2 // 66410f382bd3
+ PACKUSDW (BX), X11 // 66440f382b1b
+ PACKUSDW (R11), X11 // 66450f382b1b
+ PACKUSDW X2, X11 // 66440f382bda
+ PACKUSDW X11, X11 // 66450f382bdb
+ PACKUSWB (BX), M2 // 0f6713
+ PACKUSWB (R11), M2 // 410f6713
+ PACKUSWB M2, M2 // 0f67d2
+ PACKUSWB M3, M2 // 0f67d3
+ PACKUSWB (BX), M3 // 0f671b
+ PACKUSWB (R11), M3 // 410f671b
+ PACKUSWB M2, M3 // 0f67da
+ PACKUSWB M3, M3 // 0f67db
+ PACKUSWB (BX), X2 // 660f6713
+ PACKUSWB (R11), X2 // 66410f6713
+ PACKUSWB X2, X2 // 660f67d2
+ PACKUSWB X11, X2 // 66410f67d3
+ PACKUSWB (BX), X11 // 66440f671b
+ PACKUSWB (R11), X11 // 66450f671b
+ PACKUSWB X2, X11 // 66440f67da
+ PACKUSWB X11, X11 // 66450f67db
+ PADDB (BX), M2 // 0ffc13
+ PADDB (R11), M2 // 410ffc13
+ PADDB M2, M2 // 0ffcd2
+ PADDB M3, M2 // 0ffcd3
+ PADDB (BX), M3 // 0ffc1b
+ PADDB (R11), M3 // 410ffc1b
+ PADDB M2, M3 // 0ffcda
+ PADDB M3, M3 // 0ffcdb
+ PADDB (BX), X2 // 660ffc13
+ PADDB (R11), X2 // 66410ffc13
+ PADDB X2, X2 // 660ffcd2
+ PADDB X11, X2 // 66410ffcd3
+ PADDB (BX), X11 // 66440ffc1b
+ PADDB (R11), X11 // 66450ffc1b
+ PADDB X2, X11 // 66440ffcda
+ PADDB X11, X11 // 66450ffcdb
+ PADDL (BX), M2 // 0ffe13
+ PADDL (R11), M2 // 410ffe13
+ PADDL M2, M2 // 0ffed2
+ PADDL M3, M2 // 0ffed3
+ PADDL (BX), M3 // 0ffe1b
+ PADDL (R11), M3 // 410ffe1b
+ PADDL M2, M3 // 0ffeda
+ PADDL M3, M3 // 0ffedb
+ PADDL (BX), X2 // 660ffe13
+ PADDL (R11), X2 // 66410ffe13
+ PADDL X2, X2 // 660ffed2
+ PADDL X11, X2 // 66410ffed3
+ PADDL (BX), X11 // 66440ffe1b
+ PADDL (R11), X11 // 66450ffe1b
+ PADDL X2, X11 // 66440ffeda
+ PADDL X11, X11 // 66450ffedb
+ //TODO: PADDQ (BX), M2 // 0fd413
+ //TODO: PADDQ (R11), M2 // 410fd413
+ //TODO: PADDQ M2, M2 // 0fd4d2
+ //TODO: PADDQ M3, M2 // 0fd4d3
+ //TODO: PADDQ (BX), M3 // 0fd41b
+ //TODO: PADDQ (R11), M3 // 410fd41b
+ //TODO: PADDQ M2, M3 // 0fd4da
+ //TODO: PADDQ M3, M3 // 0fd4db
+ PADDQ (BX), X2 // 660fd413
+ PADDQ (R11), X2 // 66410fd413
+ PADDQ X2, X2 // 660fd4d2
+ PADDQ X11, X2 // 66410fd4d3
+ PADDQ (BX), X11 // 66440fd41b
+ PADDQ (R11), X11 // 66450fd41b
+ PADDQ X2, X11 // 66440fd4da
+ PADDQ X11, X11 // 66450fd4db
+ PADDSB (BX), M2 // 0fec13
+ PADDSB (R11), M2 // 410fec13
+ PADDSB M2, M2 // 0fecd2
+ PADDSB M3, M2 // 0fecd3
+ PADDSB (BX), M3 // 0fec1b
+ PADDSB (R11), M3 // 410fec1b
+ PADDSB M2, M3 // 0fecda
+ PADDSB M3, M3 // 0fecdb
+ PADDSB (BX), X2 // 660fec13
+ PADDSB (R11), X2 // 66410fec13
+ PADDSB X2, X2 // 660fecd2
+ PADDSB X11, X2 // 66410fecd3
+ PADDSB (BX), X11 // 66440fec1b
+ PADDSB (R11), X11 // 66450fec1b
+ PADDSB X2, X11 // 66440fecda
+ PADDSB X11, X11 // 66450fecdb
+ PADDSW (BX), M2 // 0fed13
+ PADDSW (R11), M2 // 410fed13
+ PADDSW M2, M2 // 0fedd2
+ PADDSW M3, M2 // 0fedd3
+ PADDSW (BX), M3 // 0fed1b
+ PADDSW (R11), M3 // 410fed1b
+ PADDSW M2, M3 // 0fedda
+ PADDSW M3, M3 // 0feddb
+ PADDSW (BX), X2 // 660fed13
+ PADDSW (R11), X2 // 66410fed13
+ PADDSW X2, X2 // 660fedd2
+ PADDSW X11, X2 // 66410fedd3
+ PADDSW (BX), X11 // 66440fed1b
+ PADDSW (R11), X11 // 66450fed1b
+ PADDSW X2, X11 // 66440fedda
+ PADDSW X11, X11 // 66450feddb
+ PADDUSB (BX), M2 // 0fdc13
+ PADDUSB (R11), M2 // 410fdc13
+ PADDUSB M2, M2 // 0fdcd2
+ PADDUSB M3, M2 // 0fdcd3
+ PADDUSB (BX), M3 // 0fdc1b
+ PADDUSB (R11), M3 // 410fdc1b
+ PADDUSB M2, M3 // 0fdcda
+ PADDUSB M3, M3 // 0fdcdb
+ PADDUSB (BX), X2 // 660fdc13
+ PADDUSB (R11), X2 // 66410fdc13
+ PADDUSB X2, X2 // 660fdcd2
+ PADDUSB X11, X2 // 66410fdcd3
+ PADDUSB (BX), X11 // 66440fdc1b
+ PADDUSB (R11), X11 // 66450fdc1b
+ PADDUSB X2, X11 // 66440fdcda
+ PADDUSB X11, X11 // 66450fdcdb
+ PADDUSW (BX), M2 // 0fdd13
+ PADDUSW (R11), M2 // 410fdd13
+ PADDUSW M2, M2 // 0fddd2
+ PADDUSW M3, M2 // 0fddd3
+ PADDUSW (BX), M3 // 0fdd1b
+ PADDUSW (R11), M3 // 410fdd1b
+ PADDUSW M2, M3 // 0fddda
+ PADDUSW M3, M3 // 0fdddb
+ PADDUSW (BX), X2 // 660fdd13
+ PADDUSW (R11), X2 // 66410fdd13
+ PADDUSW X2, X2 // 660fddd2
+ PADDUSW X11, X2 // 66410fddd3
+ PADDUSW (BX), X11 // 66440fdd1b
+ PADDUSW (R11), X11 // 66450fdd1b
+ PADDUSW X2, X11 // 66440fddda
+ PADDUSW X11, X11 // 66450fdddb
+ PADDW (BX), M2 // 0ffd13
+ PADDW (R11), M2 // 410ffd13
+ PADDW M2, M2 // 0ffdd2
+ PADDW M3, M2 // 0ffdd3
+ PADDW (BX), M3 // 0ffd1b
+ PADDW (R11), M3 // 410ffd1b
+ PADDW M2, M3 // 0ffdda
+ PADDW M3, M3 // 0ffddb
+ PADDW (BX), X2 // 660ffd13
+ PADDW (R11), X2 // 66410ffd13
+ PADDW X2, X2 // 660ffdd2
+ PADDW X11, X2 // 66410ffdd3
+ PADDW (BX), X11 // 66440ffd1b
+ PADDW (R11), X11 // 66450ffd1b
+ PADDW X2, X11 // 66440ffdda
+ PADDW X11, X11 // 66450ffddb
+ //TODO: PALIGNR $7, (BX), M2 // 0f3a0f1307
+ //TODO: PALIGNR $7, (R11), M2 // 410f3a0f1307
+ //TODO: PALIGNR $7, M2, M2 // 0f3a0fd207
+ //TODO: PALIGNR $7, M3, M2 // 0f3a0fd307
+ //TODO: PALIGNR $7, (BX), M3 // 0f3a0f1b07
+ //TODO: PALIGNR $7, (R11), M3 // 410f3a0f1b07
+ //TODO: PALIGNR $7, M2, M3 // 0f3a0fda07
+ //TODO: PALIGNR $7, M3, M3 // 0f3a0fdb07
+ PALIGNR $7, (BX), X2 // 660f3a0f1307
+ PALIGNR $7, (R11), X2 // 66410f3a0f1307
+ PALIGNR $7, X2, X2 // 660f3a0fd207
+ PALIGNR $7, X11, X2 // 66410f3a0fd307
+ PALIGNR $7, (BX), X11 // 66440f3a0f1b07
+ PALIGNR $7, (R11), X11 // 66450f3a0f1b07
+ PALIGNR $7, X2, X11 // 66440f3a0fda07
+ PALIGNR $7, X11, X11 // 66450f3a0fdb07
+ PAND (BX), M2 // 0fdb13
+ PAND (R11), M2 // 410fdb13
+ PAND M2, M2 // 0fdbd2
+ PAND M3, M2 // 0fdbd3
+ PAND (BX), M3 // 0fdb1b
+ PAND (R11), M3 // 410fdb1b
+ PAND M2, M3 // 0fdbda
+ PAND M3, M3 // 0fdbdb
+ PAND (BX), X2 // 660fdb13
+ PAND (R11), X2 // 66410fdb13
+ PAND X2, X2 // 660fdbd2
+ PAND X11, X2 // 66410fdbd3
+ PAND (BX), X11 // 66440fdb1b
+ PAND (R11), X11 // 66450fdb1b
+ PAND X2, X11 // 66440fdbda
+ PAND X11, X11 // 66450fdbdb
+ PANDN (BX), M2 // 0fdf13
+ PANDN (R11), M2 // 410fdf13
+ PANDN M2, M2 // 0fdfd2
+ PANDN M3, M2 // 0fdfd3
+ PANDN (BX), M3 // 0fdf1b
+ PANDN (R11), M3 // 410fdf1b
+ PANDN M2, M3 // 0fdfda
+ PANDN M3, M3 // 0fdfdb
+ PANDN (BX), X2 // 660fdf13
+ PANDN (R11), X2 // 66410fdf13
+ PANDN X2, X2 // 660fdfd2
+ PANDN X11, X2 // 66410fdfd3
+ PANDN (BX), X11 // 66440fdf1b
+ PANDN (R11), X11 // 66450fdf1b
+ PANDN X2, X11 // 66440fdfda
+ PANDN X11, X11 // 66450fdfdb
+ PAVGB (BX), M2 // 0fe013
+ PAVGB (R11), M2 // 410fe013
+ PAVGB M2, M2 // 0fe0d2
+ PAVGB M3, M2 // 0fe0d3
+ PAVGB (BX), M3 // 0fe01b
+ PAVGB (R11), M3 // 410fe01b
+ PAVGB M2, M3 // 0fe0da
+ PAVGB M3, M3 // 0fe0db
+ PAVGB (BX), X2 // 660fe013
+ PAVGB (R11), X2 // 66410fe013
+ PAVGB X2, X2 // 660fe0d2
+ PAVGB X11, X2 // 66410fe0d3
+ PAVGB (BX), X11 // 66440fe01b
+ PAVGB (R11), X11 // 66450fe01b
+ PAVGB X2, X11 // 66440fe0da
+ PAVGB X11, X11 // 66450fe0db
+ PAVGW (BX), M2 // 0fe313
+ PAVGW (R11), M2 // 410fe313
+ PAVGW M2, M2 // 0fe3d2
+ PAVGW M3, M2 // 0fe3d3
+ PAVGW (BX), M3 // 0fe31b
+ PAVGW (R11), M3 // 410fe31b
+ PAVGW M2, M3 // 0fe3da
+ PAVGW M3, M3 // 0fe3db
+ PAVGW (BX), X2 // 660fe313
+ PAVGW (R11), X2 // 66410fe313
+ PAVGW X2, X2 // 660fe3d2
+ PAVGW X11, X2 // 66410fe3d3
+ PAVGW (BX), X11 // 66440fe31b
+ PAVGW (R11), X11 // 66450fe31b
+ PAVGW X2, X11 // 66440fe3da
+ PAVGW X11, X11 // 66450fe3db
+ PBLENDVB X0, (BX), X2 // 660f381013
+ PBLENDVB X0, (R11), X2 // 66410f381013
+ PBLENDVB X0, X2, X2 // 660f3810d2
+ PBLENDVB X0, X11, X2 // 66410f3810d3
+ PBLENDVB X0, (BX), X11 // 66440f38101b
+ PBLENDVB X0, (R11), X11 // 66450f38101b
+ PBLENDVB X0, X2, X11 // 66440f3810da
+ PBLENDVB X0, X11, X11 // 66450f3810db
+ PBLENDW $7, (BX), X2 // 660f3a0e1307
+ PBLENDW $7, (R11), X2 // 66410f3a0e1307
+ PBLENDW $7, X2, X2 // 660f3a0ed207
+ PBLENDW $7, X11, X2 // 66410f3a0ed307
+ PBLENDW $7, (BX), X11 // 66440f3a0e1b07
+ PBLENDW $7, (R11), X11 // 66450f3a0e1b07
+ PBLENDW $7, X2, X11 // 66440f3a0eda07
+ PBLENDW $7, X11, X11 // 66450f3a0edb07
+ PCLMULQDQ $7, (BX), X2 // 660f3a441307
+ PCLMULQDQ $7, (R11), X2 // 66410f3a441307
+ PCLMULQDQ $7, X2, X2 // 660f3a44d207
+ PCLMULQDQ $7, X11, X2 // 66410f3a44d307
+ PCLMULQDQ $7, (BX), X11 // 66440f3a441b07
+ PCLMULQDQ $7, (R11), X11 // 66450f3a441b07
+ PCLMULQDQ $7, X2, X11 // 66440f3a44da07
+ PCLMULQDQ $7, X11, X11 // 66450f3a44db07
+ PCMPEQB (BX), M2 // 0f7413
+ PCMPEQB (R11), M2 // 410f7413
+ PCMPEQB M2, M2 // 0f74d2
+ PCMPEQB M3, M2 // 0f74d3
+ PCMPEQB (BX), M3 // 0f741b
+ PCMPEQB (R11), M3 // 410f741b
+ PCMPEQB M2, M3 // 0f74da
+ PCMPEQB M3, M3 // 0f74db
+ PCMPEQB (BX), X2 // 660f7413
+ PCMPEQB (R11), X2 // 66410f7413
+ PCMPEQB X2, X2 // 660f74d2
+ PCMPEQB X11, X2 // 66410f74d3
+ PCMPEQB (BX), X11 // 66440f741b
+ PCMPEQB (R11), X11 // 66450f741b
+ PCMPEQB X2, X11 // 66440f74da
+ PCMPEQB X11, X11 // 66450f74db
+ PCMPEQL (BX), M2 // 0f7613
+ PCMPEQL (R11), M2 // 410f7613
+ PCMPEQL M2, M2 // 0f76d2
+ PCMPEQL M3, M2 // 0f76d3
+ PCMPEQL (BX), M3 // 0f761b
+ PCMPEQL (R11), M3 // 410f761b
+ PCMPEQL M2, M3 // 0f76da
+ PCMPEQL M3, M3 // 0f76db
+ PCMPEQL (BX), X2 // 660f7613
+ PCMPEQL (R11), X2 // 66410f7613
+ PCMPEQL X2, X2 // 660f76d2
+ PCMPEQL X11, X2 // 66410f76d3
+ PCMPEQL (BX), X11 // 66440f761b
+ PCMPEQL (R11), X11 // 66450f761b
+ PCMPEQL X2, X11 // 66440f76da
+ PCMPEQL X11, X11 // 66450f76db
+ PCMPEQQ (BX), X2 // 660f382913
+ PCMPEQQ (R11), X2 // 66410f382913
+ PCMPEQQ X2, X2 // 660f3829d2
+ PCMPEQQ X11, X2 // 66410f3829d3
+ PCMPEQQ (BX), X11 // 66440f38291b
+ PCMPEQQ (R11), X11 // 66450f38291b
+ PCMPEQQ X2, X11 // 66440f3829da
+ PCMPEQQ X11, X11 // 66450f3829db
+ PCMPEQW (BX), M2 // 0f7513
+ PCMPEQW (R11), M2 // 410f7513
+ PCMPEQW M2, M2 // 0f75d2
+ PCMPEQW M3, M2 // 0f75d3
+ PCMPEQW (BX), M3 // 0f751b
+ PCMPEQW (R11), M3 // 410f751b
+ PCMPEQW M2, M3 // 0f75da
+ PCMPEQW M3, M3 // 0f75db
+ PCMPEQW (BX), X2 // 660f7513
+ PCMPEQW (R11), X2 // 66410f7513
+ PCMPEQW X2, X2 // 660f75d2
+ PCMPEQW X11, X2 // 66410f75d3
+ PCMPEQW (BX), X11 // 66440f751b
+ PCMPEQW (R11), X11 // 66450f751b
+ PCMPEQW X2, X11 // 66440f75da
+ PCMPEQW X11, X11 // 66450f75db
+ PCMPESTRI $7, (BX), X2 // 660f3a611307
+ PCMPESTRI $7, (R11), X2 // 66410f3a611307
+ PCMPESTRI $7, X2, X2 // 660f3a61d207
+ PCMPESTRI $7, X11, X2 // 66410f3a61d307
+ PCMPESTRI $7, (BX), X11 // 66440f3a611b07
+ PCMPESTRI $7, (R11), X11 // 66450f3a611b07
+ PCMPESTRI $7, X2, X11 // 66440f3a61da07
+ PCMPESTRI $7, X11, X11 // 66450f3a61db07
+ PCMPESTRM $7, (BX), X2 // 660f3a601307
+ PCMPESTRM $7, (R11), X2 // 66410f3a601307
+ PCMPESTRM $7, X2, X2 // 660f3a60d207
+ PCMPESTRM $7, X11, X2 // 66410f3a60d307
+ PCMPESTRM $7, (BX), X11 // 66440f3a601b07
+ PCMPESTRM $7, (R11), X11 // 66450f3a601b07
+ PCMPESTRM $7, X2, X11 // 66440f3a60da07
+ PCMPESTRM $7, X11, X11 // 66450f3a60db07
+ PCMPGTB (BX), M2 // 0f6413
+ PCMPGTB (R11), M2 // 410f6413
+ PCMPGTB M2, M2 // 0f64d2
+ PCMPGTB M3, M2 // 0f64d3
+ PCMPGTB (BX), M3 // 0f641b
+ PCMPGTB (R11), M3 // 410f641b
+ PCMPGTB M2, M3 // 0f64da
+ PCMPGTB M3, M3 // 0f64db
+ PCMPGTB (BX), X2 // 660f6413
+ PCMPGTB (R11), X2 // 66410f6413
+ PCMPGTB X2, X2 // 660f64d2
+ PCMPGTB X11, X2 // 66410f64d3
+ PCMPGTB (BX), X11 // 66440f641b
+ PCMPGTB (R11), X11 // 66450f641b
+ PCMPGTB X2, X11 // 66440f64da
+ PCMPGTB X11, X11 // 66450f64db
+ PCMPGTL (BX), M2 // 0f6613
+ PCMPGTL (R11), M2 // 410f6613
+ PCMPGTL M2, M2 // 0f66d2
+ PCMPGTL M3, M2 // 0f66d3
+ PCMPGTL (BX), M3 // 0f661b
+ PCMPGTL (R11), M3 // 410f661b
+ PCMPGTL M2, M3 // 0f66da
+ PCMPGTL M3, M3 // 0f66db
+ PCMPGTL (BX), X2 // 660f6613
+ PCMPGTL (R11), X2 // 66410f6613
+ PCMPGTL X2, X2 // 660f66d2
+ PCMPGTL X11, X2 // 66410f66d3
+ PCMPGTL (BX), X11 // 66440f661b
+ PCMPGTL (R11), X11 // 66450f661b
+ PCMPGTL X2, X11 // 66440f66da
+ PCMPGTL X11, X11 // 66450f66db
+ PCMPGTQ (BX), X2 // 660f383713
+ PCMPGTQ (R11), X2 // 66410f383713
+ PCMPGTQ X2, X2 // 660f3837d2
+ PCMPGTQ X11, X2 // 66410f3837d3
+ PCMPGTQ (BX), X11 // 66440f38371b
+ PCMPGTQ (R11), X11 // 66450f38371b
+ PCMPGTQ X2, X11 // 66440f3837da
+ PCMPGTQ X11, X11 // 66450f3837db
+ PCMPGTW (BX), M2 // 0f6513
+ PCMPGTW (R11), M2 // 410f6513
+ PCMPGTW M2, M2 // 0f65d2
+ PCMPGTW M3, M2 // 0f65d3
+ PCMPGTW (BX), M3 // 0f651b
+ PCMPGTW (R11), M3 // 410f651b
+ PCMPGTW M2, M3 // 0f65da
+ PCMPGTW M3, M3 // 0f65db
+ PCMPGTW (BX), X2 // 660f6513
+ PCMPGTW (R11), X2 // 66410f6513
+ PCMPGTW X2, X2 // 660f65d2
+ PCMPGTW X11, X2 // 66410f65d3
+ PCMPGTW (BX), X11 // 66440f651b
+ PCMPGTW (R11), X11 // 66450f651b
+ PCMPGTW X2, X11 // 66440f65da
+ PCMPGTW X11, X11 // 66450f65db
+ PCMPISTRI $7, (BX), X2 // 660f3a631307
+ PCMPISTRI $7, (R11), X2 // 66410f3a631307
+ PCMPISTRI $7, X2, X2 // 660f3a63d207
+ PCMPISTRI $7, X11, X2 // 66410f3a63d307
+ PCMPISTRI $7, (BX), X11 // 66440f3a631b07
+ PCMPISTRI $7, (R11), X11 // 66450f3a631b07
+ PCMPISTRI $7, X2, X11 // 66440f3a63da07
+ PCMPISTRI $7, X11, X11 // 66450f3a63db07
+ PCMPISTRM $7, (BX), X2 // 660f3a621307
+ PCMPISTRM $7, (R11), X2 // 66410f3a621307
+ PCMPISTRM $7, X2, X2 // 660f3a62d207
+ PCMPISTRM $7, X11, X2 // 66410f3a62d307
+ PCMPISTRM $7, (BX), X11 // 66440f3a621b07
+ PCMPISTRM $7, (R11), X11 // 66450f3a621b07
+ PCMPISTRM $7, X2, X11 // 66440f3a62da07
+ PCMPISTRM $7, X11, X11 // 66450f3a62db07
+ PDEPL (BX), R9, DX // c4e233f513
+ PDEPL (R11), R9, DX // c4c233f513
+ PDEPL DX, R9, DX // c4e233f5d2
+ PDEPL R11, R9, DX // c4c233f5d3
+ PDEPL (BX), R9, R11 // c46233f51b
+ PDEPL (R11), R9, R11 // c44233f51b
+ PDEPL DX, R9, R11 // c46233f5da
+ PDEPL R11, R9, R11 // c44233f5db
+ PDEPQ (BX), R14, DX // c4e28bf513
+ PDEPQ (R11), R14, DX // c4c28bf513
+ PDEPQ DX, R14, DX // c4e28bf5d2
+ PDEPQ R11, R14, DX // c4c28bf5d3
+ PDEPQ (BX), R14, R11 // c4628bf51b
+ PDEPQ (R11), R14, R11 // c4428bf51b
+ PDEPQ DX, R14, R11 // c4628bf5da
+ PDEPQ R11, R14, R11 // c4428bf5db
+ PEXTL (BX), R9, DX // c4e232f513
+ PEXTL (R11), R9, DX // c4c232f513
+ PEXTL DX, R9, DX // c4e232f5d2
+ PEXTL R11, R9, DX // c4c232f5d3
+ PEXTL (BX), R9, R11 // c46232f51b
+ PEXTL (R11), R9, R11 // c44232f51b
+ PEXTL DX, R9, R11 // c46232f5da
+ PEXTL R11, R9, R11 // c44232f5db
+ PEXTQ (BX), R14, DX // c4e28af513
+ PEXTQ (R11), R14, DX // c4c28af513
+ PEXTQ DX, R14, DX // c4e28af5d2
+ PEXTQ R11, R14, DX // c4c28af5d3
+ PEXTQ (BX), R14, R11 // c4628af51b
+ PEXTQ (R11), R14, R11 // c4428af51b
+ PEXTQ DX, R14, R11 // c4628af5da
+ PEXTQ R11, R14, R11 // c4428af5db
+ PEXTRB $7, X2, (BX) // 660f3a141307
+ PEXTRB $7, X11, (BX) // 66440f3a141b07
+ PEXTRB $7, X2, (R11) // 66410f3a141307
+ PEXTRB $7, X11, (R11) // 66450f3a141b07
+ PEXTRB $7, X2, DX // 660f3a14d207
+ PEXTRB $7, X11, DX // 66440f3a14da07
+ PEXTRB $7, X2, R11 // 66410f3a14d307
+ PEXTRB $7, X11, R11 // 66450f3a14db07
+ PEXTRD $7, X2, (BX) // 660f3a161307
+ PEXTRD $7, X11, (BX) // 66440f3a161b07
+ PEXTRD $7, X2, (R11) // 66410f3a161307
+ PEXTRD $7, X11, (R11) // 66450f3a161b07
+ PEXTRD $7, X2, DX // 660f3a16d207
+ PEXTRD $7, X11, DX // 66440f3a16da07
+ PEXTRD $7, X2, R11 // 66410f3a16d307
+ PEXTRD $7, X11, R11 // 66450f3a16db07
+ PEXTRQ $7, X2, (BX) // 66480f3a161307
+ PEXTRQ $7, X11, (BX) // 664c0f3a161b07
+ PEXTRQ $7, X2, (R11) // 66490f3a161307
+ PEXTRQ $7, X11, (R11) // 664d0f3a161b07
+ PEXTRQ $7, X2, DX // 66480f3a16d207
+ PEXTRQ $7, X11, DX // 664c0f3a16da07
+ PEXTRQ $7, X2, R11 // 66490f3a16d307
+ PEXTRQ $7, X11, R11 // 664d0f3a16db07
+ //TODO: PEXTRW $7, M2, DX // 0fc5d207
+ //TODO: PEXTRW $7, M3, DX // 0fc5d307
+ //TODO: PEXTRW $7, M2, R11 // 440fc5da07
+ //TODO: PEXTRW $7, M3, R11 // 440fc5db07
+ PEXTRW $7, X2, DX // 660fc5d207 or 660f3a15d207
+ PEXTRW $7, X11, DX // 66410fc5d307 or 66440f3a15da07
+ PEXTRW $7, X2, R11 // 66440fc5da07 or 66410f3a15d307
+ PEXTRW $7, X11, R11 // 66450fc5db07 or 66450f3a15db07
+ PEXTRW $7, X2, (BX) // 660f3a151307
+ PEXTRW $7, X11, (BX) // 66440f3a151b07
+ PEXTRW $7, X2, (R11) // 66410f3a151307
+ PEXTRW $7, X11, (R11) // 66450f3a151b07
+ PHADDD (BX), M2 // 0f380213
+ PHADDD (R11), M2 // 410f380213
+ PHADDD M2, M2 // 0f3802d2
+ PHADDD M3, M2 // 0f3802d3
+ PHADDD (BX), M3 // 0f38021b
+ PHADDD (R11), M3 // 410f38021b
+ PHADDD M2, M3 // 0f3802da
+ PHADDD M3, M3 // 0f3802db
+ PHADDD (BX), X2 // 660f380213
+ PHADDD (R11), X2 // 66410f380213
+ PHADDD X2, X2 // 660f3802d2
+ PHADDD X11, X2 // 66410f3802d3
+ PHADDD (BX), X11 // 66440f38021b
+ PHADDD (R11), X11 // 66450f38021b
+ PHADDD X2, X11 // 66440f3802da
+ PHADDD X11, X11 // 66450f3802db
+ //TODO: PHADDSW (BX), M2 // 0f380313
+ //TODO: PHADDSW (R11), M2 // 410f380313
+ //TODO: PHADDSW M2, M2 // 0f3803d2
+ //TODO: PHADDSW M3, M2 // 0f3803d3
+ //TODO: PHADDSW (BX), M3 // 0f38031b
+ //TODO: PHADDSW (R11), M3 // 410f38031b
+ //TODO: PHADDSW M2, M3 // 0f3803da
+ //TODO: PHADDSW M3, M3 // 0f3803db
+ PHADDSW (BX), X2 // 660f380313
+ PHADDSW (R11), X2 // 66410f380313
+ PHADDSW X2, X2 // 660f3803d2
+ PHADDSW X11, X2 // 66410f3803d3
+ PHADDSW (BX), X11 // 66440f38031b
+ PHADDSW (R11), X11 // 66450f38031b
+ PHADDSW X2, X11 // 66440f3803da
+ PHADDSW X11, X11 // 66450f3803db
+ //TODO: PHADDW (BX), M2 // 0f380113
+ //TODO: PHADDW (R11), M2 // 410f380113
+ //TODO: PHADDW M2, M2 // 0f3801d2
+ //TODO: PHADDW M3, M2 // 0f3801d3
+ //TODO: PHADDW (BX), M3 // 0f38011b
+ //TODO: PHADDW (R11), M3 // 410f38011b
+ //TODO: PHADDW M2, M3 // 0f3801da
+ //TODO: PHADDW M3, M3 // 0f3801db
+ PHADDW (BX), X2 // 660f380113
+ PHADDW (R11), X2 // 66410f380113
+ PHADDW X2, X2 // 660f3801d2
+ PHADDW X11, X2 // 66410f3801d3
+ PHADDW (BX), X11 // 66440f38011b
+ PHADDW (R11), X11 // 66450f38011b
+ PHADDW X2, X11 // 66440f3801da
+ PHADDW X11, X11 // 66450f3801db
+ PHMINPOSUW (BX), X2 // 660f384113
+ PHMINPOSUW (R11), X2 // 66410f384113
+ PHMINPOSUW X2, X2 // 660f3841d2
+ PHMINPOSUW X11, X2 // 66410f3841d3
+ PHMINPOSUW (BX), X11 // 66440f38411b
+ PHMINPOSUW (R11), X11 // 66450f38411b
+ PHMINPOSUW X2, X11 // 66440f3841da
+ PHMINPOSUW X11, X11 // 66450f3841db
+ //TODO: PHSUBD (BX), M2 // 0f380613
+ //TODO: PHSUBD (R11), M2 // 410f380613
+ //TODO: PHSUBD M2, M2 // 0f3806d2
+ //TODO: PHSUBD M3, M2 // 0f3806d3
+ //TODO: PHSUBD (BX), M3 // 0f38061b
+ //TODO: PHSUBD (R11), M3 // 410f38061b
+ //TODO: PHSUBD M2, M3 // 0f3806da
+ //TODO: PHSUBD M3, M3 // 0f3806db
+ PHSUBD (BX), X2 // 660f380613
+ PHSUBD (R11), X2 // 66410f380613
+ PHSUBD X2, X2 // 660f3806d2
+ PHSUBD X11, X2 // 66410f3806d3
+ PHSUBD (BX), X11 // 66440f38061b
+ PHSUBD (R11), X11 // 66450f38061b
+ PHSUBD X2, X11 // 66440f3806da
+ PHSUBD X11, X11 // 66450f3806db
+ //TODO: PHSUBSW (BX), M2 // 0f380713
+ //TODO: PHSUBSW (R11), M2 // 410f380713
+ //TODO: PHSUBSW M2, M2 // 0f3807d2
+ //TODO: PHSUBSW M3, M2 // 0f3807d3
+ //TODO: PHSUBSW (BX), M3 // 0f38071b
+ //TODO: PHSUBSW (R11), M3 // 410f38071b
+ //TODO: PHSUBSW M2, M3 // 0f3807da
+ //TODO: PHSUBSW M3, M3 // 0f3807db
+ PHSUBSW (BX), X2 // 660f380713
+ PHSUBSW (R11), X2 // 66410f380713
+ PHSUBSW X2, X2 // 660f3807d2
+ PHSUBSW X11, X2 // 66410f3807d3
+ PHSUBSW (BX), X11 // 66440f38071b
+ PHSUBSW (R11), X11 // 66450f38071b
+ PHSUBSW X2, X11 // 66440f3807da
+ PHSUBSW X11, X11 // 66450f3807db
+ //TODO: PHSUBW (BX), M2 // 0f380513
+ //TODO: PHSUBW (R11), M2 // 410f380513
+ //TODO: PHSUBW M2, M2 // 0f3805d2
+ //TODO: PHSUBW M3, M2 // 0f3805d3
+ //TODO: PHSUBW (BX), M3 // 0f38051b
+ //TODO: PHSUBW (R11), M3 // 410f38051b
+ //TODO: PHSUBW M2, M3 // 0f3805da
+ //TODO: PHSUBW M3, M3 // 0f3805db
+ PHSUBW (BX), X2 // 660f380513
+ PHSUBW (R11), X2 // 66410f380513
+ PHSUBW X2, X2 // 660f3805d2
+ PHSUBW X11, X2 // 66410f3805d3
+ PHSUBW (BX), X11 // 66440f38051b
+ PHSUBW (R11), X11 // 66450f38051b
+ PHSUBW X2, X11 // 66440f3805da
+ PHSUBW X11, X11 // 66450f3805db
+ PINSRB $7, (BX), X2 // 660f3a201307
+ PINSRB $7, (R11), X2 // 66410f3a201307
+ PINSRB $7, DX, X2 // 660f3a20d207
+ PINSRB $7, R11, X2 // 66410f3a20d307
+ PINSRB $7, (BX), X11 // 66440f3a201b07
+ PINSRB $7, (R11), X11 // 66450f3a201b07
+ PINSRB $7, DX, X11 // 66440f3a20da07
+ PINSRB $7, R11, X11 // 66450f3a20db07
+ PINSRD $7, (BX), X2 // 660f3a221307
+ PINSRD $7, (R11), X2 // 66410f3a221307
+ PINSRD $7, DX, X2 // 660f3a22d207
+ PINSRD $7, R11, X2 // 66410f3a22d307
+ PINSRD $7, (BX), X11 // 66440f3a221b07
+ PINSRD $7, (R11), X11 // 66450f3a221b07
+ PINSRD $7, DX, X11 // 66440f3a22da07
+ PINSRD $7, R11, X11 // 66450f3a22db07
+ PINSRQ $7, (BX), X2 // 66480f3a221307
+ PINSRQ $7, (R11), X2 // 66490f3a221307
+ PINSRQ $7, DX, X2 // 66480f3a22d207
+ PINSRQ $7, R11, X2 // 66490f3a22d307
+ PINSRQ $7, (BX), X11 // 664c0f3a221b07
+ PINSRQ $7, (R11), X11 // 664d0f3a221b07
+ PINSRQ $7, DX, X11 // 664c0f3a22da07
+ PINSRQ $7, R11, X11 // 664d0f3a22db07
+ //TODO: PINSRW $7, (BX), M2 // 0fc41307
+ //TODO: PINSRW $7, (R11), M2 // 410fc41307
+ //TODO: PINSRW $7, DX, M2 // 0fc4d207
+ //TODO: PINSRW $7, R11, M2 // 410fc4d307
+ //TODO: PINSRW $7, (BX), M3 // 0fc41b07
+ //TODO: PINSRW $7, (R11), M3 // 410fc41b07
+ //TODO: PINSRW $7, DX, M3 // 0fc4da07
+ //TODO: PINSRW $7, R11, M3 // 410fc4db07
+ PINSRW $7, (BX), X2 // 660fc41307
+ PINSRW $7, (R11), X2 // 66410fc41307
+ PINSRW $7, DX, X2 // 660fc4d207
+ PINSRW $7, R11, X2 // 66410fc4d307
+ PINSRW $7, (BX), X11 // 66440fc41b07
+ PINSRW $7, (R11), X11 // 66450fc41b07
+ PINSRW $7, DX, X11 // 66440fc4da07
+ PINSRW $7, R11, X11 // 66450fc4db07
+ //TODO: PMADDUBSW (BX), M2 // 0f380413
+ //TODO: PMADDUBSW (R11), M2 // 410f380413
+ //TODO: PMADDUBSW M2, M2 // 0f3804d2
+ //TODO: PMADDUBSW M3, M2 // 0f3804d3
+ //TODO: PMADDUBSW (BX), M3 // 0f38041b
+ //TODO: PMADDUBSW (R11), M3 // 410f38041b
+ //TODO: PMADDUBSW M2, M3 // 0f3804da
+ //TODO: PMADDUBSW M3, M3 // 0f3804db
+ PMADDUBSW (BX), X2 // 660f380413
+ PMADDUBSW (R11), X2 // 66410f380413
+ PMADDUBSW X2, X2 // 660f3804d2
+ PMADDUBSW X11, X2 // 66410f3804d3
+ PMADDUBSW (BX), X11 // 66440f38041b
+ PMADDUBSW (R11), X11 // 66450f38041b
+ PMADDUBSW X2, X11 // 66440f3804da
+ PMADDUBSW X11, X11 // 66450f3804db
+ PMADDWL (BX), M2 // 0ff513
+ PMADDWL (R11), M2 // 410ff513
+ PMADDWL M2, M2 // 0ff5d2
+ PMADDWL M3, M2 // 0ff5d3
+ PMADDWL (BX), M3 // 0ff51b
+ PMADDWL (R11), M3 // 410ff51b
+ PMADDWL M2, M3 // 0ff5da
+ PMADDWL M3, M3 // 0ff5db
+ PMADDWL (BX), X2 // 660ff513
+ PMADDWL (R11), X2 // 66410ff513
+ PMADDWL X2, X2 // 660ff5d2
+ PMADDWL X11, X2 // 66410ff5d3
+ PMADDWL (BX), X11 // 66440ff51b
+ PMADDWL (R11), X11 // 66450ff51b
+ PMADDWL X2, X11 // 66440ff5da
+ PMADDWL X11, X11 // 66450ff5db
+ PMAXSB (BX), X2 // 660f383c13
+ PMAXSB (R11), X2 // 66410f383c13
+ PMAXSB X2, X2 // 660f383cd2
+ PMAXSB X11, X2 // 66410f383cd3
+ PMAXSB (BX), X11 // 66440f383c1b
+ PMAXSB (R11), X11 // 66450f383c1b
+ PMAXSB X2, X11 // 66440f383cda
+ PMAXSB X11, X11 // 66450f383cdb
+ PMAXSD (BX), X2 // 660f383d13
+ PMAXSD (R11), X2 // 66410f383d13
+ PMAXSD X2, X2 // 660f383dd2
+ PMAXSD X11, X2 // 66410f383dd3
+ PMAXSD (BX), X11 // 66440f383d1b
+ PMAXSD (R11), X11 // 66450f383d1b
+ PMAXSD X2, X11 // 66440f383dda
+ PMAXSD X11, X11 // 66450f383ddb
+ //TODO: PMAXSW (BX), M2 // 0fee13
+ //TODO: PMAXSW (R11), M2 // 410fee13
+ //TODO: PMAXSW M2, M2 // 0feed2
+ //TODO: PMAXSW M3, M2 // 0feed3
+ //TODO: PMAXSW (BX), M3 // 0fee1b
+ //TODO: PMAXSW (R11), M3 // 410fee1b
+ //TODO: PMAXSW M2, M3 // 0feeda
+ //TODO: PMAXSW M3, M3 // 0feedb
+ PMAXSW (BX), X2 // 660fee13
+ PMAXSW (R11), X2 // 66410fee13
+ PMAXSW X2, X2 // 660feed2
+ PMAXSW X11, X2 // 66410feed3
+ PMAXSW (BX), X11 // 66440fee1b
+ PMAXSW (R11), X11 // 66450fee1b
+ PMAXSW X2, X11 // 66440feeda
+ PMAXSW X11, X11 // 66450feedb
+ //TODO: PMAXUB (BX), M2 // 0fde13
+ //TODO: PMAXUB (R11), M2 // 410fde13
+ //TODO: PMAXUB M2, M2 // 0fded2
+ //TODO: PMAXUB M3, M2 // 0fded3
+ //TODO: PMAXUB (BX), M3 // 0fde1b
+ //TODO: PMAXUB (R11), M3 // 410fde1b
+ //TODO: PMAXUB M2, M3 // 0fdeda
+ //TODO: PMAXUB M3, M3 // 0fdedb
+ PMAXUB (BX), X2 // 660fde13
+ PMAXUB (R11), X2 // 66410fde13
+ PMAXUB X2, X2 // 660fded2
+ PMAXUB X11, X2 // 66410fded3
+ PMAXUB (BX), X11 // 66440fde1b
+ PMAXUB (R11), X11 // 66450fde1b
+ PMAXUB X2, X11 // 66440fdeda
+ PMAXUB X11, X11 // 66450fdedb
+ PMAXUD (BX), X2 // 660f383f13
+ PMAXUD (R11), X2 // 66410f383f13
+ PMAXUD X2, X2 // 660f383fd2
+ PMAXUD X11, X2 // 66410f383fd3
+ PMAXUD (BX), X11 // 66440f383f1b
+ PMAXUD (R11), X11 // 66450f383f1b
+ PMAXUD X2, X11 // 66440f383fda
+ PMAXUD X11, X11 // 66450f383fdb
+ PMAXUW (BX), X2 // 660f383e13
+ PMAXUW (R11), X2 // 66410f383e13
+ PMAXUW X2, X2 // 660f383ed2
+ PMAXUW X11, X2 // 66410f383ed3
+ PMAXUW (BX), X11 // 66440f383e1b
+ PMAXUW (R11), X11 // 66450f383e1b
+ PMAXUW X2, X11 // 66440f383eda
+ PMAXUW X11, X11 // 66450f383edb
+ PMINSB (BX), X2 // 660f383813
+ PMINSB (R11), X2 // 66410f383813
+ PMINSB X2, X2 // 660f3838d2
+ PMINSB X11, X2 // 66410f3838d3
+ PMINSB (BX), X11 // 66440f38381b
+ PMINSB (R11), X11 // 66450f38381b
+ PMINSB X2, X11 // 66440f3838da
+ PMINSB X11, X11 // 66450f3838db
+ PMINSD (BX), X2 // 660f383913
+ PMINSD (R11), X2 // 66410f383913
+ PMINSD X2, X2 // 660f3839d2
+ PMINSD X11, X2 // 66410f3839d3
+ PMINSD (BX), X11 // 66440f38391b
+ PMINSD (R11), X11 // 66450f38391b
+ PMINSD X2, X11 // 66440f3839da
+ PMINSD X11, X11 // 66450f3839db
+ //TODO: PMINSW (BX), M2 // 0fea13
+ //TODO: PMINSW (R11), M2 // 410fea13
+ //TODO: PMINSW M2, M2 // 0fead2
+ //TODO: PMINSW M3, M2 // 0fead3
+ //TODO: PMINSW (BX), M3 // 0fea1b
+ //TODO: PMINSW (R11), M3 // 410fea1b
+ //TODO: PMINSW M2, M3 // 0feada
+ //TODO: PMINSW M3, M3 // 0feadb
+ PMINSW (BX), X2 // 660fea13
+ PMINSW (R11), X2 // 66410fea13
+ PMINSW X2, X2 // 660fead2
+ PMINSW X11, X2 // 66410fead3
+ PMINSW (BX), X11 // 66440fea1b
+ PMINSW (R11), X11 // 66450fea1b
+ PMINSW X2, X11 // 66440feada
+ PMINSW X11, X11 // 66450feadb
+ //TODO: PMINUB (BX), M2 // 0fda13
+ //TODO: PMINUB (R11), M2 // 410fda13
+ //TODO: PMINUB M2, M2 // 0fdad2
+ //TODO: PMINUB M3, M2 // 0fdad3
+ //TODO: PMINUB (BX), M3 // 0fda1b
+ //TODO: PMINUB (R11), M3 // 410fda1b
+ //TODO: PMINUB M2, M3 // 0fdada
+ //TODO: PMINUB M3, M3 // 0fdadb
+ PMINUB (BX), X2 // 660fda13
+ PMINUB (R11), X2 // 66410fda13
+ PMINUB X2, X2 // 660fdad2
+ PMINUB X11, X2 // 66410fdad3
+ PMINUB (BX), X11 // 66440fda1b
+ PMINUB (R11), X11 // 66450fda1b
+ PMINUB X2, X11 // 66440fdada
+ PMINUB X11, X11 // 66450fdadb
+ PMINUD (BX), X2 // 660f383b13
+ PMINUD (R11), X2 // 66410f383b13
+ PMINUD X2, X2 // 660f383bd2
+ PMINUD X11, X2 // 66410f383bd3
+ PMINUD (BX), X11 // 66440f383b1b
+ PMINUD (R11), X11 // 66450f383b1b
+ PMINUD X2, X11 // 66440f383bda
+ PMINUD X11, X11 // 66450f383bdb
+ PMINUW (BX), X2 // 660f383a13
+ PMINUW (R11), X2 // 66410f383a13
+ PMINUW X2, X2 // 660f383ad2
+ PMINUW X11, X2 // 66410f383ad3
+ PMINUW (BX), X11 // 66440f383a1b
+ PMINUW (R11), X11 // 66450f383a1b
+ PMINUW X2, X11 // 66440f383ada
+ PMINUW X11, X11 // 66450f383adb
+ PMOVMSKB M2, DX // 0fd7d2
+ PMOVMSKB M3, DX // 0fd7d3
+ PMOVMSKB M2, R11 // 440fd7da
+ PMOVMSKB M3, R11 // 440fd7db
+ PMOVMSKB X2, DX // 660fd7d2
+ PMOVMSKB X11, DX // 66410fd7d3
+ PMOVMSKB X2, R11 // 66440fd7da
+ PMOVMSKB X11, R11 // 66450fd7db
+ PMOVSXBD (BX), X2 // 660f382113
+ PMOVSXBD (R11), X2 // 66410f382113
+ PMOVSXBD X2, X2 // 660f3821d2
+ PMOVSXBD X11, X2 // 66410f3821d3
+ PMOVSXBD (BX), X11 // 66440f38211b
+ PMOVSXBD (R11), X11 // 66450f38211b
+ PMOVSXBD X2, X11 // 66440f3821da
+ PMOVSXBD X11, X11 // 66450f3821db
+ PMOVSXBQ (BX), X2 // 660f382213
+ PMOVSXBQ (R11), X2 // 66410f382213
+ PMOVSXBQ X2, X2 // 660f3822d2
+ PMOVSXBQ X11, X2 // 66410f3822d3
+ PMOVSXBQ (BX), X11 // 66440f38221b
+ PMOVSXBQ (R11), X11 // 66450f38221b
+ PMOVSXBQ X2, X11 // 66440f3822da
+ PMOVSXBQ X11, X11 // 66450f3822db
+ PMOVSXBW (BX), X2 // 660f382013
+ PMOVSXBW (R11), X2 // 66410f382013
+ PMOVSXBW X2, X2 // 660f3820d2
+ PMOVSXBW X11, X2 // 66410f3820d3
+ PMOVSXBW (BX), X11 // 66440f38201b
+ PMOVSXBW (R11), X11 // 66450f38201b
+ PMOVSXBW X2, X11 // 66440f3820da
+ PMOVSXBW X11, X11 // 66450f3820db
+ PMOVSXDQ (BX), X2 // 660f382513
+ PMOVSXDQ (R11), X2 // 66410f382513
+ PMOVSXDQ X2, X2 // 660f3825d2
+ PMOVSXDQ X11, X2 // 66410f3825d3
+ PMOVSXDQ (BX), X11 // 66440f38251b
+ PMOVSXDQ (R11), X11 // 66450f38251b
+ PMOVSXDQ X2, X11 // 66440f3825da
+ PMOVSXDQ X11, X11 // 66450f3825db
+ PMOVSXWD (BX), X2 // 660f382313
+ PMOVSXWD (R11), X2 // 66410f382313
+ PMOVSXWD X2, X2 // 660f3823d2
+ PMOVSXWD X11, X2 // 66410f3823d3
+ PMOVSXWD (BX), X11 // 66440f38231b
+ PMOVSXWD (R11), X11 // 66450f38231b
+ PMOVSXWD X2, X11 // 66440f3823da
+ PMOVSXWD X11, X11 // 66450f3823db
+ PMOVSXWQ (BX), X2 // 660f382413
+ PMOVSXWQ (R11), X2 // 66410f382413
+ PMOVSXWQ X2, X2 // 660f3824d2
+ PMOVSXWQ X11, X2 // 66410f3824d3
+ PMOVSXWQ (BX), X11 // 66440f38241b
+ PMOVSXWQ (R11), X11 // 66450f38241b
+ PMOVSXWQ X2, X11 // 66440f3824da
+ PMOVSXWQ X11, X11 // 66450f3824db
+ PMOVZXBD (BX), X2 // 660f383113
+ PMOVZXBD (R11), X2 // 66410f383113
+ PMOVZXBD X2, X2 // 660f3831d2
+ PMOVZXBD X11, X2 // 66410f3831d3
+ PMOVZXBD (BX), X11 // 66440f38311b
+ PMOVZXBD (R11), X11 // 66450f38311b
+ PMOVZXBD X2, X11 // 66440f3831da
+ PMOVZXBD X11, X11 // 66450f3831db
+ PMOVZXBQ (BX), X2 // 660f383213
+ PMOVZXBQ (R11), X2 // 66410f383213
+ PMOVZXBQ X2, X2 // 660f3832d2
+ PMOVZXBQ X11, X2 // 66410f3832d3
+ PMOVZXBQ (BX), X11 // 66440f38321b
+ PMOVZXBQ (R11), X11 // 66450f38321b
+ PMOVZXBQ X2, X11 // 66440f3832da
+ PMOVZXBQ X11, X11 // 66450f3832db
+ PMOVZXBW (BX), X2 // 660f383013
+ PMOVZXBW (R11), X2 // 66410f383013
+ PMOVZXBW X2, X2 // 660f3830d2
+ PMOVZXBW X11, X2 // 66410f3830d3
+ PMOVZXBW (BX), X11 // 66440f38301b
+ PMOVZXBW (R11), X11 // 66450f38301b
+ PMOVZXBW X2, X11 // 66440f3830da
+ PMOVZXBW X11, X11 // 66450f3830db
+ PMOVZXDQ (BX), X2 // 660f383513
+ PMOVZXDQ (R11), X2 // 66410f383513
+ PMOVZXDQ X2, X2 // 660f3835d2
+ PMOVZXDQ X11, X2 // 66410f3835d3
+ PMOVZXDQ (BX), X11 // 66440f38351b
+ PMOVZXDQ (R11), X11 // 66450f38351b
+ PMOVZXDQ X2, X11 // 66440f3835da
+ PMOVZXDQ X11, X11 // 66450f3835db
+ PMOVZXWD (BX), X2 // 660f383313
+ PMOVZXWD (R11), X2 // 66410f383313
+ PMOVZXWD X2, X2 // 660f3833d2
+ PMOVZXWD X11, X2 // 66410f3833d3
+ PMOVZXWD (BX), X11 // 66440f38331b
+ PMOVZXWD (R11), X11 // 66450f38331b
+ PMOVZXWD X2, X11 // 66440f3833da
+ PMOVZXWD X11, X11 // 66450f3833db
+ PMOVZXWQ (BX), X2 // 660f383413
+ PMOVZXWQ (R11), X2 // 66410f383413
+ PMOVZXWQ X2, X2 // 660f3834d2
+ PMOVZXWQ X11, X2 // 66410f3834d3
+ PMOVZXWQ (BX), X11 // 66440f38341b
+ PMOVZXWQ (R11), X11 // 66450f38341b
+ PMOVZXWQ X2, X11 // 66440f3834da
+ PMOVZXWQ X11, X11 // 66450f3834db
+ PMULDQ (BX), X2 // 660f382813
+ PMULDQ (R11), X2 // 66410f382813
+ PMULDQ X2, X2 // 660f3828d2
+ PMULDQ X11, X2 // 66410f3828d3
+ PMULDQ (BX), X11 // 66440f38281b
+ PMULDQ (R11), X11 // 66450f38281b
+ PMULDQ X2, X11 // 66440f3828da
+ PMULDQ X11, X11 // 66450f3828db
+ //TODO: PMULHRSW (BX), M2 // 0f380b13
+ //TODO: PMULHRSW (R11), M2 // 410f380b13
+ //TODO: PMULHRSW M2, M2 // 0f380bd2
+ //TODO: PMULHRSW M3, M2 // 0f380bd3
+ //TODO: PMULHRSW (BX), M3 // 0f380b1b
+ //TODO: PMULHRSW (R11), M3 // 410f380b1b
+ //TODO: PMULHRSW M2, M3 // 0f380bda
+ //TODO: PMULHRSW M3, M3 // 0f380bdb
+ PMULHRSW (BX), X2 // 660f380b13
+ PMULHRSW (R11), X2 // 66410f380b13
+ PMULHRSW X2, X2 // 660f380bd2
+ PMULHRSW X11, X2 // 66410f380bd3
+ PMULHRSW (BX), X11 // 66440f380b1b
+ PMULHRSW (R11), X11 // 66450f380b1b
+ PMULHRSW X2, X11 // 66440f380bda
+ PMULHRSW X11, X11 // 66450f380bdb
+ PMULHUW (BX), M2 // 0fe413
+ PMULHUW (R11), M2 // 410fe413
+ PMULHUW M2, M2 // 0fe4d2
+ PMULHUW M3, M2 // 0fe4d3
+ PMULHUW (BX), M3 // 0fe41b
+ PMULHUW (R11), M3 // 410fe41b
+ PMULHUW M2, M3 // 0fe4da
+ PMULHUW M3, M3 // 0fe4db
+ PMULHUW (BX), X2 // 660fe413
+ PMULHUW (R11), X2 // 66410fe413
+ PMULHUW X2, X2 // 660fe4d2
+ PMULHUW X11, X2 // 66410fe4d3
+ PMULHUW (BX), X11 // 66440fe41b
+ PMULHUW (R11), X11 // 66450fe41b
+ PMULHUW X2, X11 // 66440fe4da
+ PMULHUW X11, X11 // 66450fe4db
+ PMULHW (BX), M2 // 0fe513
+ PMULHW (R11), M2 // 410fe513
+ PMULHW M2, M2 // 0fe5d2
+ PMULHW M3, M2 // 0fe5d3
+ PMULHW (BX), M3 // 0fe51b
+ PMULHW (R11), M3 // 410fe51b
+ PMULHW M2, M3 // 0fe5da
+ PMULHW M3, M3 // 0fe5db
+ PMULHW (BX), X2 // 660fe513
+ PMULHW (R11), X2 // 66410fe513
+ PMULHW X2, X2 // 660fe5d2
+ PMULHW X11, X2 // 66410fe5d3
+ PMULHW (BX), X11 // 66440fe51b
+ PMULHW (R11), X11 // 66450fe51b
+ PMULHW X2, X11 // 66440fe5da
+ PMULHW X11, X11 // 66450fe5db
+ PMULLD (BX), X2 // 660f384013
+ PMULLD (R11), X2 // 66410f384013
+ PMULLD X2, X2 // 660f3840d2
+ PMULLD X11, X2 // 66410f3840d3
+ PMULLD (BX), X11 // 66440f38401b
+ PMULLD (R11), X11 // 66450f38401b
+ PMULLD X2, X11 // 66440f3840da
+ PMULLD X11, X11 // 66450f3840db
+ PMULLW (BX), M2 // 0fd513
+ PMULLW (R11), M2 // 410fd513
+ PMULLW M2, M2 // 0fd5d2
+ PMULLW M3, M2 // 0fd5d3
+ PMULLW (BX), M3 // 0fd51b
+ PMULLW (R11), M3 // 410fd51b
+ PMULLW M2, M3 // 0fd5da
+ PMULLW M3, M3 // 0fd5db
+ PMULLW (BX), X2 // 660fd513
+ PMULLW (R11), X2 // 66410fd513
+ PMULLW X2, X2 // 660fd5d2
+ PMULLW X11, X2 // 66410fd5d3
+ PMULLW (BX), X11 // 66440fd51b
+ PMULLW (R11), X11 // 66450fd51b
+ PMULLW X2, X11 // 66440fd5da
+ PMULLW X11, X11 // 66450fd5db
+ PMULULQ (BX), M2 // 0ff413
+ PMULULQ (R11), M2 // 410ff413
+ PMULULQ M2, M2 // 0ff4d2
+ PMULULQ M3, M2 // 0ff4d3
+ PMULULQ (BX), M3 // 0ff41b
+ PMULULQ (R11), M3 // 410ff41b
+ PMULULQ M2, M3 // 0ff4da
+ PMULULQ M3, M3 // 0ff4db
+ PMULULQ (BX), X2 // 660ff413
+ PMULULQ (R11), X2 // 66410ff413
+ PMULULQ X2, X2 // 660ff4d2
+ PMULULQ X11, X2 // 66410ff4d3
+ PMULULQ (BX), X11 // 66440ff41b
+ PMULULQ (R11), X11 // 66450ff41b
+ PMULULQ X2, X11 // 66440ff4da
+ PMULULQ X11, X11 // 66450ff4db
+ PUSHQ AX
+ POPQ FS // 660fa1 or 0fa1
+ PUSHQ AX
+ POPQ GS // 660fa9 or 0fa9
+ PUSHW AX
+ POPW (BX) // 668f03
+ PUSHW AX
+ POPW (R11) // 66418f03
+ PUSHW AX
+ POPW DX // 668fc2 or 665a
+ PUSHW AX
+ POPW R11 // 66418fc3 or 66415b
+ PUSHQ AX
+ POPQ (BX) // 8f03
+ PUSHQ AX
+ POPQ (R11) // 418f03
+ PUSHQ AX
+ POPQ DX // 8fc2 or 5a
+ PUSHQ AX
+ POPQ R11 // 418fc3 or 415b
+ POPCNTW (BX), DX // 66f30fb813
+ POPCNTW (R11), DX // 66f3410fb813
+ POPCNTW DX, DX // 66f30fb8d2
+ POPCNTW R11, DX // 66f3410fb8d3
+ POPCNTW (BX), R11 // 66f3440fb81b
+ POPCNTW (R11), R11 // 66f3450fb81b
+ POPCNTW DX, R11 // 66f3440fb8da
+ POPCNTW R11, R11 // 66f3450fb8db
+ POPCNTL (BX), DX // f30fb813
+ POPCNTL (R11), DX // f3410fb813
+ POPCNTL DX, DX // f30fb8d2
+ POPCNTL R11, DX // f3410fb8d3
+ POPCNTL (BX), R11 // f3440fb81b
+ POPCNTL (R11), R11 // f3450fb81b
+ POPCNTL DX, R11 // f3440fb8da
+ POPCNTL R11, R11 // f3450fb8db
+ POPCNTQ (BX), DX // f3480fb813
+ POPCNTQ (R11), DX // f3490fb813
+ POPCNTQ DX, DX // f3480fb8d2
+ POPCNTQ R11, DX // f3490fb8d3
+ POPCNTQ (BX), R11 // f34c0fb81b
+ POPCNTQ (R11), R11 // f34d0fb81b
+ POPCNTQ DX, R11 // f34c0fb8da
+ POPCNTQ R11, R11 // f34d0fb8db
+ PUSHFW
+ POPFW // 669d
+ PUSHFQ
+ POPFQ // 9d
+ POR (BX), M2 // 0feb13
+ POR (R11), M2 // 410feb13
+ POR M2, M2 // 0febd2
+ POR M3, M2 // 0febd3
+ POR (BX), M3 // 0feb1b
+ POR (R11), M3 // 410feb1b
+ POR M2, M3 // 0febda
+ POR M3, M3 // 0febdb
+ POR (BX), X2 // 660feb13
+ POR (R11), X2 // 66410feb13
+ POR X2, X2 // 660febd2
+ POR X11, X2 // 66410febd3
+ POR (BX), X11 // 66440feb1b
+ POR (R11), X11 // 66450feb1b
+ POR X2, X11 // 66440febda
+ POR X11, X11 // 66450febdb
+ PREFETCHNTA (BX) // 0f1803
+ PREFETCHNTA (R11) // 410f1803
+ PREFETCHT0 (BX) // 0f180b
+ PREFETCHT0 (R11) // 410f180b
+ PREFETCHT1 (BX) // 0f1813
+ PREFETCHT1 (R11) // 410f1813
+ PREFETCHT2 (BX) // 0f181b
+ PREFETCHT2 (R11) // 410f181b
+ //TODO: PREFETCHW (BX) // 0f0d0b
+ //TODO: PREFETCHW (R11) // 410f0d0b
+ //TODO: PREFETCHWT1 (BX) // 0f0d13
+ //TODO: PREFETCHWT1 (R11) // 410f0d13
+ //TODO: PSADBW (BX), M2 // 0ff613
+ //TODO: PSADBW (R11), M2 // 410ff613
+ //TODO: PSADBW M2, M2 // 0ff6d2
+ //TODO: PSADBW M3, M2 // 0ff6d3
+ //TODO: PSADBW (BX), M3 // 0ff61b
+ //TODO: PSADBW (R11), M3 // 410ff61b
+ //TODO: PSADBW M2, M3 // 0ff6da
+ //TODO: PSADBW M3, M3 // 0ff6db
+ PSADBW (BX), X2 // 660ff613
+ PSADBW (R11), X2 // 66410ff613
+ PSADBW X2, X2 // 660ff6d2
+ PSADBW X11, X2 // 66410ff6d3
+ PSADBW (BX), X11 // 66440ff61b
+ PSADBW (R11), X11 // 66450ff61b
+ PSADBW X2, X11 // 66440ff6da
+ PSADBW X11, X11 // 66450ff6db
+ //TODO: PSHUFB (BX), M2 // 0f380013
+ //TODO: PSHUFB (R11), M2 // 410f380013
+ //TODO: PSHUFB M2, M2 // 0f3800d2
+ //TODO: PSHUFB M3, M2 // 0f3800d3
+ //TODO: PSHUFB (BX), M3 // 0f38001b
+ //TODO: PSHUFB (R11), M3 // 410f38001b
+ //TODO: PSHUFB M2, M3 // 0f3800da
+ //TODO: PSHUFB M3, M3 // 0f3800db
+ PSHUFB (BX), X2 // 660f380013
+ PSHUFB (R11), X2 // 66410f380013
+ PSHUFB X2, X2 // 660f3800d2
+ PSHUFB X11, X2 // 66410f3800d3
+ PSHUFB (BX), X11 // 66440f38001b
+ PSHUFB (R11), X11 // 66450f38001b
+ PSHUFB X2, X11 // 66440f3800da
+ PSHUFB X11, X11 // 66450f3800db
+ PSHUFD $7, (BX), X2 // 660f701307
+ PSHUFL $7, (BX), X2 // 660f701307
+ PSHUFD $7, (R11), X2 // 66410f701307
+ PSHUFL $7, (R11), X2 // 66410f701307
+ PSHUFD $7, X2, X2 // 660f70d207
+ PSHUFL $7, X2, X2 // 660f70d207
+ PSHUFD $7, X11, X2 // 66410f70d307
+ PSHUFL $7, X11, X2 // 66410f70d307
+ PSHUFD $7, (BX), X11 // 66440f701b07
+ PSHUFL $7, (BX), X11 // 66440f701b07
+ PSHUFD $7, (R11), X11 // 66450f701b07
+ PSHUFL $7, (R11), X11 // 66450f701b07
+ PSHUFD $7, X2, X11 // 66440f70da07
+ PSHUFL $7, X2, X11 // 66440f70da07
+ PSHUFD $7, X11, X11 // 66450f70db07
+ PSHUFL $7, X11, X11 // 66450f70db07
+ PSHUFHW $7, (BX), X2 // f30f701307
+ PSHUFHW $7, (R11), X2 // f3410f701307
+ PSHUFHW $7, X2, X2 // f30f70d207
+ PSHUFHW $7, X11, X2 // f3410f70d307
+ PSHUFHW $7, (BX), X11 // f3440f701b07
+ PSHUFHW $7, (R11), X11 // f3450f701b07
+ PSHUFHW $7, X2, X11 // f3440f70da07
+ PSHUFHW $7, X11, X11 // f3450f70db07
+ PSHUFLW $7, (BX), X2 // f20f701307
+ PSHUFLW $7, (R11), X2 // f2410f701307
+ PSHUFLW $7, X2, X2 // f20f70d207
+ PSHUFLW $7, X11, X2 // f2410f70d307
+ PSHUFLW $7, (BX), X11 // f2440f701b07
+ PSHUFLW $7, (R11), X11 // f2450f701b07
+ PSHUFLW $7, X2, X11 // f2440f70da07
+ PSHUFLW $7, X11, X11 // f2450f70db07
+ PSHUFW $7, (BX), M2 // 0f701307
+ PSHUFW $7, (R11), M2 // 410f701307
+ PSHUFW $7, M2, M2 // 0f70d207
+ PSHUFW $7, M3, M2 // 0f70d307
+ PSHUFW $7, (BX), M3 // 0f701b07
+ PSHUFW $7, (R11), M3 // 410f701b07
+ PSHUFW $7, M2, M3 // 0f70da07
+ PSHUFW $7, M3, M3 // 0f70db07
+ //TODO: PSIGNB (BX), M2 // 0f380813
+ //TODO: PSIGNB (R11), M2 // 410f380813
+ //TODO: PSIGNB M2, M2 // 0f3808d2
+ //TODO: PSIGNB M3, M2 // 0f3808d3
+ //TODO: PSIGNB (BX), M3 // 0f38081b
+ //TODO: PSIGNB (R11), M3 // 410f38081b
+ //TODO: PSIGNB M2, M3 // 0f3808da
+ //TODO: PSIGNB M3, M3 // 0f3808db
+ PSIGNB (BX), X2 // 660f380813
+ PSIGNB (R11), X2 // 66410f380813
+ PSIGNB X2, X2 // 660f3808d2
+ PSIGNB X11, X2 // 66410f3808d3
+ PSIGNB (BX), X11 // 66440f38081b
+ PSIGNB (R11), X11 // 66450f38081b
+ PSIGNB X2, X11 // 66440f3808da
+ PSIGNB X11, X11 // 66450f3808db
+ //TODO: PSIGND (BX), M2 // 0f380a13
+ //TODO: PSIGND (R11), M2 // 410f380a13
+ //TODO: PSIGND M2, M2 // 0f380ad2
+ //TODO: PSIGND M3, M2 // 0f380ad3
+ //TODO: PSIGND (BX), M3 // 0f380a1b
+ //TODO: PSIGND (R11), M3 // 410f380a1b
+ //TODO: PSIGND M2, M3 // 0f380ada
+ //TODO: PSIGND M3, M3 // 0f380adb
+ PSIGND (BX), X2 // 660f380a13
+ PSIGND (R11), X2 // 66410f380a13
+ PSIGND X2, X2 // 660f380ad2
+ PSIGND X11, X2 // 66410f380ad3
+ PSIGND (BX), X11 // 66440f380a1b
+ PSIGND (R11), X11 // 66450f380a1b
+ PSIGND X2, X11 // 66440f380ada
+ PSIGND X11, X11 // 66450f380adb
+ //TODO: PSIGNW (BX), M2 // 0f380913
+ //TODO: PSIGNW (R11), M2 // 410f380913
+ //TODO: PSIGNW M2, M2 // 0f3809d2
+ //TODO: PSIGNW M3, M2 // 0f3809d3
+ //TODO: PSIGNW (BX), M3 // 0f38091b
+ //TODO: PSIGNW (R11), M3 // 410f38091b
+ //TODO: PSIGNW M2, M3 // 0f3809da
+ //TODO: PSIGNW M3, M3 // 0f3809db
+ PSIGNW (BX), X2 // 660f380913
+ PSIGNW (R11), X2 // 66410f380913
+ PSIGNW X2, X2 // 660f3809d2
+ PSIGNW X11, X2 // 66410f3809d3
+ PSIGNW (BX), X11 // 66440f38091b
+ PSIGNW (R11), X11 // 66450f38091b
+ PSIGNW X2, X11 // 66440f3809da
+ PSIGNW X11, X11 // 66450f3809db
+ PSLLL (BX), M2 // 0ff213
+ PSLLL (R11), M2 // 410ff213
+ PSLLL M2, M2 // 0ff2d2
+ PSLLL M3, M2 // 0ff2d3
+ PSLLL (BX), M3 // 0ff21b
+ PSLLL (R11), M3 // 410ff21b
+ PSLLL M2, M3 // 0ff2da
+ PSLLL M3, M3 // 0ff2db
+ PSLLL $7, M2 // 0f72f207
+ PSLLL $7, M3 // 0f72f307
+ PSLLL (BX), X2 // 660ff213
+ PSLLL (R11), X2 // 66410ff213
+ PSLLL X2, X2 // 660ff2d2
+ PSLLL X11, X2 // 66410ff2d3
+ PSLLL (BX), X11 // 66440ff21b
+ PSLLL (R11), X11 // 66450ff21b
+ PSLLL X2, X11 // 66440ff2da
+ PSLLL X11, X11 // 66450ff2db
+ PSLLL $7, X2 // 660f72f207
+ PSLLL $7, X11 // 66410f72f307
+ PSLLO $7, X2 // 660f73fa07
+ PSLLO $7, X11 // 66410f73fb07
+ PSLLQ (BX), M2 // 0ff313
+ PSLLQ (R11), M2 // 410ff313
+ PSLLQ M2, M2 // 0ff3d2
+ PSLLQ M3, M2 // 0ff3d3
+ PSLLQ (BX), M3 // 0ff31b
+ PSLLQ (R11), M3 // 410ff31b
+ PSLLQ M2, M3 // 0ff3da
+ PSLLQ M3, M3 // 0ff3db
+ PSLLQ $7, M2 // 0f73f207
+ PSLLQ $7, M3 // 0f73f307
+ PSLLQ (BX), X2 // 660ff313
+ PSLLQ (R11), X2 // 66410ff313
+ PSLLQ X2, X2 // 660ff3d2
+ PSLLQ X11, X2 // 66410ff3d3
+ PSLLQ (BX), X11 // 66440ff31b
+ PSLLQ (R11), X11 // 66450ff31b
+ PSLLQ X2, X11 // 66440ff3da
+ PSLLQ X11, X11 // 66450ff3db
+ PSLLQ $7, X2 // 660f73f207
+ PSLLQ $7, X11 // 66410f73f307
+ PSLLW (BX), M2 // 0ff113
+ PSLLW (R11), M2 // 410ff113
+ PSLLW M2, M2 // 0ff1d2
+ PSLLW M3, M2 // 0ff1d3
+ PSLLW (BX), M3 // 0ff11b
+ PSLLW (R11), M3 // 410ff11b
+ PSLLW M2, M3 // 0ff1da
+ PSLLW M3, M3 // 0ff1db
+ PSLLW $7, M2 // 0f71f207
+ PSLLW $7, M3 // 0f71f307
+ PSLLW (BX), X2 // 660ff113
+ PSLLW (R11), X2 // 66410ff113
+ PSLLW X2, X2 // 660ff1d2
+ PSLLW X11, X2 // 66410ff1d3
+ PSLLW (BX), X11 // 66440ff11b
+ PSLLW (R11), X11 // 66450ff11b
+ PSLLW X2, X11 // 66440ff1da
+ PSLLW X11, X11 // 66450ff1db
+ PSLLW $7, X2 // 660f71f207
+ PSLLW $7, X11 // 66410f71f307
+ PSRAL (BX), M2 // 0fe213
+ PSRAL (R11), M2 // 410fe213
+ PSRAL M2, M2 // 0fe2d2
+ PSRAL M3, M2 // 0fe2d3
+ PSRAL (BX), M3 // 0fe21b
+ PSRAL (R11), M3 // 410fe21b
+ PSRAL M2, M3 // 0fe2da
+ PSRAL M3, M3 // 0fe2db
+ PSRAL $7, M2 // 0f72e207
+ PSRAL $7, M3 // 0f72e307
+ PSRAL (BX), X2 // 660fe213
+ PSRAL (R11), X2 // 66410fe213
+ PSRAL X2, X2 // 660fe2d2
+ PSRAL X11, X2 // 66410fe2d3
+ PSRAL (BX), X11 // 66440fe21b
+ PSRAL (R11), X11 // 66450fe21b
+ PSRAL X2, X11 // 66440fe2da
+ PSRAL X11, X11 // 66450fe2db
+ PSRAL $7, X2 // 660f72e207
+ PSRAL $7, X11 // 66410f72e307
+ PSRAW (BX), M2 // 0fe113
+ PSRAW (R11), M2 // 410fe113
+ PSRAW M2, M2 // 0fe1d2
+ PSRAW M3, M2 // 0fe1d3
+ PSRAW (BX), M3 // 0fe11b
+ PSRAW (R11), M3 // 410fe11b
+ PSRAW M2, M3 // 0fe1da
+ PSRAW M3, M3 // 0fe1db
+ PSRAW $7, M2 // 0f71e207
+ PSRAW $7, M3 // 0f71e307
+ PSRAW (BX), X2 // 660fe113
+ PSRAW (R11), X2 // 66410fe113
+ PSRAW X2, X2 // 660fe1d2
+ PSRAW X11, X2 // 66410fe1d3
+ PSRAW (BX), X11 // 66440fe11b
+ PSRAW (R11), X11 // 66450fe11b
+ PSRAW X2, X11 // 66440fe1da
+ PSRAW X11, X11 // 66450fe1db
+ PSRAW $7, X2 // 660f71e207
+ PSRAW $7, X11 // 66410f71e307
+ PSRLL (BX), M2 // 0fd213
+ PSRLL (R11), M2 // 410fd213
+ PSRLL M2, M2 // 0fd2d2
+ PSRLL M3, M2 // 0fd2d3
+ PSRLL (BX), M3 // 0fd21b
+ PSRLL (R11), M3 // 410fd21b
+ PSRLL M2, M3 // 0fd2da
+ PSRLL M3, M3 // 0fd2db
+ PSRLL $7, M2 // 0f72d207
+ PSRLL $7, M3 // 0f72d307
+ PSRLL (BX), X2 // 660fd213
+ PSRLL (R11), X2 // 66410fd213
+ PSRLL X2, X2 // 660fd2d2
+ PSRLL X11, X2 // 66410fd2d3
+ PSRLL (BX), X11 // 66440fd21b
+ PSRLL (R11), X11 // 66450fd21b
+ PSRLL X2, X11 // 66440fd2da
+ PSRLL X11, X11 // 66450fd2db
+ PSRLL $7, X2 // 660f72d207
+ PSRLL $7, X11 // 66410f72d307
+ PSRLO $7, X2 // 660f73da07
+ PSRLO $7, X11 // 66410f73db07
+ PSRLQ (BX), M2 // 0fd313
+ PSRLQ (R11), M2 // 410fd313
+ PSRLQ M2, M2 // 0fd3d2
+ PSRLQ M3, M2 // 0fd3d3
+ PSRLQ (BX), M3 // 0fd31b
+ PSRLQ (R11), M3 // 410fd31b
+ PSRLQ M2, M3 // 0fd3da
+ PSRLQ M3, M3 // 0fd3db
+ PSRLQ $7, M2 // 0f73d207
+ PSRLQ $7, M3 // 0f73d307
+ PSRLQ (BX), X2 // 660fd313
+ PSRLQ (R11), X2 // 66410fd313
+ PSRLQ X2, X2 // 660fd3d2
+ PSRLQ X11, X2 // 66410fd3d3
+ PSRLQ (BX), X11 // 66440fd31b
+ PSRLQ (R11), X11 // 66450fd31b
+ PSRLQ X2, X11 // 66440fd3da
+ PSRLQ X11, X11 // 66450fd3db
+ PSRLQ $7, X2 // 660f73d207
+ PSRLQ $7, X11 // 66410f73d307
+ PSRLW (BX), M2 // 0fd113
+ PSRLW (R11), M2 // 410fd113
+ PSRLW M2, M2 // 0fd1d2
+ PSRLW M3, M2 // 0fd1d3
+ PSRLW (BX), M3 // 0fd11b
+ PSRLW (R11), M3 // 410fd11b
+ PSRLW M2, M3 // 0fd1da
+ PSRLW M3, M3 // 0fd1db
+ PSRLW $7, M2 // 0f71d207
+ PSRLW $7, M3 // 0f71d307
+ PSRLW (BX), X2 // 660fd113
+ PSRLW (R11), X2 // 66410fd113
+ PSRLW X2, X2 // 660fd1d2
+ PSRLW X11, X2 // 66410fd1d3
+ PSRLW (BX), X11 // 66440fd11b
+ PSRLW (R11), X11 // 66450fd11b
+ PSRLW X2, X11 // 66440fd1da
+ PSRLW X11, X11 // 66450fd1db
+ PSRLW $7, X2 // 660f71d207
+ PSRLW $7, X11 // 66410f71d307
+ //TODO: PSUBB (BX), M2 // 0ff813
+ //TODO: PSUBB (R11), M2 // 410ff813
+ //TODO: PSUBB M2, M2 // 0ff8d2
+ //TODO: PSUBB M3, M2 // 0ff8d3
+ //TODO: PSUBB (BX), M3 // 0ff81b
+ //TODO: PSUBB (R11), M3 // 410ff81b
+ //TODO: PSUBB M2, M3 // 0ff8da
+ //TODO: PSUBB M3, M3 // 0ff8db
+ PSUBB (BX), X2 // 660ff813
+ PSUBB (R11), X2 // 66410ff813
+ PSUBB X2, X2 // 660ff8d2
+ PSUBB X11, X2 // 66410ff8d3
+ PSUBB (BX), X11 // 66440ff81b
+ PSUBB (R11), X11 // 66450ff81b
+ PSUBB X2, X11 // 66440ff8da
+ PSUBB X11, X11 // 66450ff8db
+ //TODO: PSUBL (BX), M2 // 0ffa13
+ //TODO: PSUBL (R11), M2 // 410ffa13
+ //TODO: PSUBL M2, M2 // 0ffad2
+ //TODO: PSUBL M3, M2 // 0ffad3
+ //TODO: PSUBL (BX), M3 // 0ffa1b
+ //TODO: PSUBL (R11), M3 // 410ffa1b
+ //TODO: PSUBL M2, M3 // 0ffada
+ //TODO: PSUBL M3, M3 // 0ffadb
+ PSUBL (BX), X2 // 660ffa13
+ PSUBL (R11), X2 // 66410ffa13
+ PSUBL X2, X2 // 660ffad2
+ PSUBL X11, X2 // 66410ffad3
+ PSUBL (BX), X11 // 66440ffa1b
+ PSUBL (R11), X11 // 66450ffa1b
+ PSUBL X2, X11 // 66440ffada
+ PSUBL X11, X11 // 66450ffadb
+ //TODO: PSUBQ (BX), M2 // 0ffb13
+ //TODO: PSUBQ (R11), M2 // 410ffb13
+ //TODO: PSUBQ M2, M2 // 0ffbd2
+ //TODO: PSUBQ M3, M2 // 0ffbd3
+ //TODO: PSUBQ (BX), M3 // 0ffb1b
+ //TODO: PSUBQ (R11), M3 // 410ffb1b
+ //TODO: PSUBQ M2, M3 // 0ffbda
+ //TODO: PSUBQ M3, M3 // 0ffbdb
+ PSUBQ (BX), X2 // 660ffb13
+ PSUBQ (R11), X2 // 66410ffb13
+ PSUBQ X2, X2 // 660ffbd2
+ PSUBQ X11, X2 // 66410ffbd3
+ PSUBQ (BX), X11 // 66440ffb1b
+ PSUBQ (R11), X11 // 66450ffb1b
+ PSUBQ X2, X11 // 66440ffbda
+ PSUBQ X11, X11 // 66450ffbdb
+ //TODO: PSUBSB (BX), M2 // 0fe813
+ //TODO: PSUBSB (R11), M2 // 410fe813
+ //TODO: PSUBSB M2, M2 // 0fe8d2
+ //TODO: PSUBSB M3, M2 // 0fe8d3
+ //TODO: PSUBSB (BX), M3 // 0fe81b
+ //TODO: PSUBSB (R11), M3 // 410fe81b
+ //TODO: PSUBSB M2, M3 // 0fe8da
+ //TODO: PSUBSB M3, M3 // 0fe8db
+ PSUBSB (BX), X2 // 660fe813
+ PSUBSB (R11), X2 // 66410fe813
+ PSUBSB X2, X2 // 660fe8d2
+ PSUBSB X11, X2 // 66410fe8d3
+ PSUBSB (BX), X11 // 66440fe81b
+ PSUBSB (R11), X11 // 66450fe81b
+ PSUBSB X2, X11 // 66440fe8da
+ PSUBSB X11, X11 // 66450fe8db
+ //TODO: PSUBSW (BX), M2 // 0fe913
+ //TODO: PSUBSW (R11), M2 // 410fe913
+ //TODO: PSUBSW M2, M2 // 0fe9d2
+ //TODO: PSUBSW M3, M2 // 0fe9d3
+ //TODO: PSUBSW (BX), M3 // 0fe91b
+ //TODO: PSUBSW (R11), M3 // 410fe91b
+ //TODO: PSUBSW M2, M3 // 0fe9da
+ //TODO: PSUBSW M3, M3 // 0fe9db
+ PSUBSW (BX), X2 // 660fe913
+ PSUBSW (R11), X2 // 66410fe913
+ PSUBSW X2, X2 // 660fe9d2
+ PSUBSW X11, X2 // 66410fe9d3
+ PSUBSW (BX), X11 // 66440fe91b
+ PSUBSW (R11), X11 // 66450fe91b
+ PSUBSW X2, X11 // 66440fe9da
+ PSUBSW X11, X11 // 66450fe9db
+ //TODO: PSUBUSB (BX), M2 // 0fd813
+ //TODO: PSUBUSB (R11), M2 // 410fd813
+ //TODO: PSUBUSB M2, M2 // 0fd8d2
+ //TODO: PSUBUSB M3, M2 // 0fd8d3
+ //TODO: PSUBUSB (BX), M3 // 0fd81b
+ //TODO: PSUBUSB (R11), M3 // 410fd81b
+ //TODO: PSUBUSB M2, M3 // 0fd8da
+ //TODO: PSUBUSB M3, M3 // 0fd8db
+ PSUBUSB (BX), X2 // 660fd813
+ PSUBUSB (R11), X2 // 66410fd813
+ PSUBUSB X2, X2 // 660fd8d2
+ PSUBUSB X11, X2 // 66410fd8d3
+ PSUBUSB (BX), X11 // 66440fd81b
+ PSUBUSB (R11), X11 // 66450fd81b
+ PSUBUSB X2, X11 // 66440fd8da
+ PSUBUSB X11, X11 // 66450fd8db
+ //TODO: PSUBUSW (BX), M2 // 0fd913
+ //TODO: PSUBUSW (R11), M2 // 410fd913
+ //TODO: PSUBUSW M2, M2 // 0fd9d2
+ //TODO: PSUBUSW M3, M2 // 0fd9d3
+ //TODO: PSUBUSW (BX), M3 // 0fd91b
+ //TODO: PSUBUSW (R11), M3 // 410fd91b
+ //TODO: PSUBUSW M2, M3 // 0fd9da
+ //TODO: PSUBUSW M3, M3 // 0fd9db
+ PSUBUSW (BX), X2 // 660fd913
+ PSUBUSW (R11), X2 // 66410fd913
+ PSUBUSW X2, X2 // 660fd9d2
+ PSUBUSW X11, X2 // 66410fd9d3
+ PSUBUSW (BX), X11 // 66440fd91b
+ PSUBUSW (R11), X11 // 66450fd91b
+ PSUBUSW X2, X11 // 66440fd9da
+ PSUBUSW X11, X11 // 66450fd9db
+ //TODO: PSUBW (BX), M2 // 0ff913
+ //TODO: PSUBW (R11), M2 // 410ff913
+ //TODO: PSUBW M2, M2 // 0ff9d2
+ //TODO: PSUBW M3, M2 // 0ff9d3
+ //TODO: PSUBW (BX), M3 // 0ff91b
+ //TODO: PSUBW (R11), M3 // 410ff91b
+ //TODO: PSUBW M2, M3 // 0ff9da
+ //TODO: PSUBW M3, M3 // 0ff9db
+ PSUBW (BX), X2 // 660ff913
+ PSUBW (R11), X2 // 66410ff913
+ PSUBW X2, X2 // 660ff9d2
+ PSUBW X11, X2 // 66410ff9d3
+ PSUBW (BX), X11 // 66440ff91b
+ PSUBW (R11), X11 // 66450ff91b
+ PSUBW X2, X11 // 66440ff9da
+ PSUBW X11, X11 // 66450ff9db
+ PTEST (BX), X2 // 660f381713
+ PTEST (R11), X2 // 66410f381713
+ PTEST X2, X2 // 660f3817d2
+ PTEST X11, X2 // 66410f3817d3
+ PTEST (BX), X11 // 66440f38171b
+ PTEST (R11), X11 // 66450f38171b
+ PTEST X2, X11 // 66440f3817da
+ PTEST X11, X11 // 66450f3817db
+ PUNPCKHBW (BX), M2 // 0f6813
+ PUNPCKHBW (R11), M2 // 410f6813
+ PUNPCKHBW M2, M2 // 0f68d2
+ PUNPCKHBW M3, M2 // 0f68d3
+ PUNPCKHBW (BX), M3 // 0f681b
+ PUNPCKHBW (R11), M3 // 410f681b
+ PUNPCKHBW M2, M3 // 0f68da
+ PUNPCKHBW M3, M3 // 0f68db
+ PUNPCKHBW (BX), X2 // 660f6813
+ PUNPCKHBW (R11), X2 // 66410f6813
+ PUNPCKHBW X2, X2 // 660f68d2
+ PUNPCKHBW X11, X2 // 66410f68d3
+ PUNPCKHBW (BX), X11 // 66440f681b
+ PUNPCKHBW (R11), X11 // 66450f681b
+ PUNPCKHBW X2, X11 // 66440f68da
+ PUNPCKHBW X11, X11 // 66450f68db
+ PUNPCKHLQ (BX), M2 // 0f6a13
+ PUNPCKHLQ (R11), M2 // 410f6a13
+ PUNPCKHLQ M2, M2 // 0f6ad2
+ PUNPCKHLQ M3, M2 // 0f6ad3
+ PUNPCKHLQ (BX), M3 // 0f6a1b
+ PUNPCKHLQ (R11), M3 // 410f6a1b
+ PUNPCKHLQ M2, M3 // 0f6ada
+ PUNPCKHLQ M3, M3 // 0f6adb
+ PUNPCKHLQ (BX), X2 // 660f6a13
+ PUNPCKHLQ (R11), X2 // 66410f6a13
+ PUNPCKHLQ X2, X2 // 660f6ad2
+ PUNPCKHLQ X11, X2 // 66410f6ad3
+ PUNPCKHLQ (BX), X11 // 66440f6a1b
+ PUNPCKHLQ (R11), X11 // 66450f6a1b
+ PUNPCKHLQ X2, X11 // 66440f6ada
+ PUNPCKHLQ X11, X11 // 66450f6adb
+ PUNPCKHQDQ (BX), X2 // 660f6d13
+ PUNPCKHQDQ (R11), X2 // 66410f6d13
+ PUNPCKHQDQ X2, X2 // 660f6dd2
+ PUNPCKHQDQ X11, X2 // 66410f6dd3
+ PUNPCKHQDQ (BX), X11 // 66440f6d1b
+ PUNPCKHQDQ (R11), X11 // 66450f6d1b
+ PUNPCKHQDQ X2, X11 // 66440f6dda
+ PUNPCKHQDQ X11, X11 // 66450f6ddb
+ PUNPCKHWL (BX), M2 // 0f6913
+ PUNPCKHWL (R11), M2 // 410f6913
+ PUNPCKHWL M2, M2 // 0f69d2
+ PUNPCKHWL M3, M2 // 0f69d3
+ PUNPCKHWL (BX), M3 // 0f691b
+ PUNPCKHWL (R11), M3 // 410f691b
+ PUNPCKHWL M2, M3 // 0f69da
+ PUNPCKHWL M3, M3 // 0f69db
+ PUNPCKHWL (BX), X2 // 660f6913
+ PUNPCKHWL (R11), X2 // 66410f6913
+ PUNPCKHWL X2, X2 // 660f69d2
+ PUNPCKHWL X11, X2 // 66410f69d3
+ PUNPCKHWL (BX), X11 // 66440f691b
+ PUNPCKHWL (R11), X11 // 66450f691b
+ PUNPCKHWL X2, X11 // 66440f69da
+ PUNPCKHWL X11, X11 // 66450f69db
+ PUNPCKLBW (BX), M2 // 0f6013
+ PUNPCKLBW (R11), M2 // 410f6013
+ PUNPCKLBW M2, M2 // 0f60d2
+ PUNPCKLBW M3, M2 // 0f60d3
+ PUNPCKLBW (BX), M3 // 0f601b
+ PUNPCKLBW (R11), M3 // 410f601b
+ PUNPCKLBW M2, M3 // 0f60da
+ PUNPCKLBW M3, M3 // 0f60db
+ PUNPCKLBW (BX), X2 // 660f6013
+ PUNPCKLBW (R11), X2 // 66410f6013
+ PUNPCKLBW X2, X2 // 660f60d2
+ PUNPCKLBW X11, X2 // 66410f60d3
+ PUNPCKLBW (BX), X11 // 66440f601b
+ PUNPCKLBW (R11), X11 // 66450f601b
+ PUNPCKLBW X2, X11 // 66440f60da
+ PUNPCKLBW X11, X11 // 66450f60db
+ PUNPCKLLQ (BX), M2 // 0f6213
+ PUNPCKLLQ (R11), M2 // 410f6213
+ PUNPCKLLQ M2, M2 // 0f62d2
+ PUNPCKLLQ M3, M2 // 0f62d3
+ PUNPCKLLQ (BX), M3 // 0f621b
+ PUNPCKLLQ (R11), M3 // 410f621b
+ PUNPCKLLQ M2, M3 // 0f62da
+ PUNPCKLLQ M3, M3 // 0f62db
+ PUNPCKLLQ (BX), X2 // 660f6213
+ PUNPCKLLQ (R11), X2 // 66410f6213
+ PUNPCKLLQ X2, X2 // 660f62d2
+ PUNPCKLLQ X11, X2 // 66410f62d3
+ PUNPCKLLQ (BX), X11 // 66440f621b
+ PUNPCKLLQ (R11), X11 // 66450f621b
+ PUNPCKLLQ X2, X11 // 66440f62da
+ PUNPCKLLQ X11, X11 // 66450f62db
+ PUNPCKLQDQ (BX), X2 // 660f6c13
+ PUNPCKLQDQ (R11), X2 // 66410f6c13
+ PUNPCKLQDQ X2, X2 // 660f6cd2
+ PUNPCKLQDQ X11, X2 // 66410f6cd3
+ PUNPCKLQDQ (BX), X11 // 66440f6c1b
+ PUNPCKLQDQ (R11), X11 // 66450f6c1b
+ PUNPCKLQDQ X2, X11 // 66440f6cda
+ PUNPCKLQDQ X11, X11 // 66450f6cdb
+ PUNPCKLWL (BX), M2 // 0f6113
+ PUNPCKLWL (R11), M2 // 410f6113
+ PUNPCKLWL M2, M2 // 0f61d2
+ PUNPCKLWL M3, M2 // 0f61d3
+ PUNPCKLWL (BX), M3 // 0f611b
+ PUNPCKLWL (R11), M3 // 410f611b
+ PUNPCKLWL M2, M3 // 0f61da
+ PUNPCKLWL M3, M3 // 0f61db
+ PUNPCKLWL (BX), X2 // 660f6113
+ PUNPCKLWL (R11), X2 // 66410f6113
+ PUNPCKLWL X2, X2 // 660f61d2
+ PUNPCKLWL X11, X2 // 66410f61d3
+ PUNPCKLWL (BX), X11 // 66440f611b
+ PUNPCKLWL (R11), X11 // 66450f611b
+ PUNPCKLWL X2, X11 // 66440f61da
+ PUNPCKLWL X11, X11 // 66450f61db
+ PUSHQ FS // 0fa0
+ POPQ AX
+ PUSHQ GS // 0fa8
+ POPQ AX
+ PUSHW $61731 // 666823f1
+ POPW AX
+ PUSHQ $4045620583 // 68674523f1
+ POPQ AX
+ PUSHQ $7 // 6a07
+ POPQ AX
+ PUSHW (BX) // 66ff33
+ POPW AX
+ PUSHW (R11) // 6641ff33
+ POPW AX
+ PUSHW DX // 66fff2 or 6652
+ POPW AX
+ PUSHW R11 // 6641fff3 or 664153
+ POPW AX
+ PUSHQ (BX) // ff33
+ POPQ AX
+ PUSHQ (R11) // 41ff33
+ POPQ AX
+ PUSHQ DX // fff2 or 52
+ POPQ AX
+ PUSHQ R11 // 41fff3 or 4153
+ POPQ AX
+ PUSHFW // 669c
+ POPFW
+ PUSHFQ // 9c
+ POPFQ
+ PXOR (BX), M2 // 0fef13
+ PXOR (R11), M2 // 410fef13
+ PXOR M2, M2 // 0fefd2
+ PXOR M3, M2 // 0fefd3
+ PXOR (BX), M3 // 0fef1b
+ PXOR (R11), M3 // 410fef1b
+ PXOR M2, M3 // 0fefda
+ PXOR M3, M3 // 0fefdb
+ PXOR (BX), X2 // 660fef13
+ PXOR (R11), X2 // 66410fef13
+ PXOR X2, X2 // 660fefd2
+ PXOR X11, X2 // 66410fefd3
+ PXOR (BX), X11 // 66440fef1b
+ PXOR (R11), X11 // 66450fef1b
+ PXOR X2, X11 // 66440fefda
+ PXOR X11, X11 // 66450fefdb
+ RCLW $1, (BX) // 66d113
+ RCLW $1, (R11) // 6641d113
+ RCLW $1, DX // 66d1d2
+ RCLW $1, R11 // 6641d1d3
+ RCLW CL, (BX) // 66d313
+ RCLW CL, (R11) // 6641d313
+ RCLW CL, DX // 66d3d2
+ RCLW CL, R11 // 6641d3d3
+ RCLW $7, (BX) // 66c11307
+ RCLW $7, (R11) // 6641c11307
+ RCLW $7, DX // 66c1d207
+ RCLW $7, R11 // 6641c1d307
+ RCLL $1, (BX) // d113
+ RCLL $1, (R11) // 41d113
+ RCLL $1, DX // d1d2
+ RCLL $1, R11 // 41d1d3
+ RCLL CL, (BX) // d313
+ RCLL CL, (R11) // 41d313
+ RCLL CL, DX // d3d2
+ RCLL CL, R11 // 41d3d3
+ RCLL $7, (BX) // c11307
+ RCLL $7, (R11) // 41c11307
+ RCLL $7, DX // c1d207
+ RCLL $7, R11 // 41c1d307
+ RCLQ $1, (BX) // 48d113
+ RCLQ $1, (R11) // 49d113
+ RCLQ $1, DX // 48d1d2
+ RCLQ $1, R11 // 49d1d3
+ RCLQ CL, (BX) // 48d313
+ RCLQ CL, (R11) // 49d313
+ RCLQ CL, DX // 48d3d2
+ RCLQ CL, R11 // 49d3d3
+ RCLQ $7, (BX) // 48c11307
+ RCLQ $7, (R11) // 49c11307
+ RCLQ $7, DX // 48c1d207
+ RCLQ $7, R11 // 49c1d307
+ RCLB $1, (BX) // d013
+ RCLB $1, (R11) // 41d013
+ RCLB $1, DL // d0d2
+ RCLB $1, R11 // 41d0d3
+ RCLB CL, (BX) // d213
+ RCLB CL, (R11) // 41d213
+ RCLB CL, DL // d2d2
+ RCLB CL, R11 // 41d2d3
+ RCLB $7, (BX) // c01307
+ RCLB $7, (R11) // 41c01307
+ RCLB $7, DL // c0d207
+ RCLB $7, R11 // 41c0d307
+ RCPPS (BX), X2 // 0f5313
+ RCPPS (R11), X2 // 410f5313
+ RCPPS X2, X2 // 0f53d2
+ RCPPS X11, X2 // 410f53d3
+ RCPPS (BX), X11 // 440f531b
+ RCPPS (R11), X11 // 450f531b
+ RCPPS X2, X11 // 440f53da
+ RCPPS X11, X11 // 450f53db
+ RCPSS (BX), X2 // f30f5313
+ RCPSS (R11), X2 // f3410f5313
+ RCPSS X2, X2 // f30f53d2
+ RCPSS X11, X2 // f3410f53d3
+ RCPSS (BX), X11 // f3440f531b
+ RCPSS (R11), X11 // f3450f531b
+ RCPSS X2, X11 // f3440f53da
+ RCPSS X11, X11 // f3450f53db
+ RCRW $1, (BX) // 66d11b
+ RCRW $1, (R11) // 6641d11b
+ RCRW $1, DX // 66d1da
+ RCRW $1, R11 // 6641d1db
+ RCRW CL, (BX) // 66d31b
+ RCRW CL, (R11) // 6641d31b
+ RCRW CL, DX // 66d3da
+ RCRW CL, R11 // 6641d3db
+ RCRW $7, (BX) // 66c11b07
+ RCRW $7, (R11) // 6641c11b07
+ RCRW $7, DX // 66c1da07
+ RCRW $7, R11 // 6641c1db07
+ RCRL $1, (BX) // d11b
+ RCRL $1, (R11) // 41d11b
+ RCRL $1, DX // d1da
+ RCRL $1, R11 // 41d1db
+ RCRL CL, (BX) // d31b
+ RCRL CL, (R11) // 41d31b
+ RCRL CL, DX // d3da
+ RCRL CL, R11 // 41d3db
+ RCRL $7, (BX) // c11b07
+ RCRL $7, (R11) // 41c11b07
+ RCRL $7, DX // c1da07
+ RCRL $7, R11 // 41c1db07
+ RCRQ $1, (BX) // 48d11b
+ RCRQ $1, (R11) // 49d11b
+ RCRQ $1, DX // 48d1da
+ RCRQ $1, R11 // 49d1db
+ RCRQ CL, (BX) // 48d31b
+ RCRQ CL, (R11) // 49d31b
+ RCRQ CL, DX // 48d3da
+ RCRQ CL, R11 // 49d3db
+ RCRQ $7, (BX) // 48c11b07
+ RCRQ $7, (R11) // 49c11b07
+ RCRQ $7, DX // 48c1da07
+ RCRQ $7, R11 // 49c1db07
+ RCRB $1, (BX) // d01b
+ RCRB $1, (R11) // 41d01b
+ RCRB $1, DL // d0da
+ RCRB $1, R11 // 41d0db
+ RCRB CL, (BX) // d21b
+ RCRB CL, (R11) // 41d21b
+ RCRB CL, DL // d2da
+ RCRB CL, R11 // 41d2db
+ RCRB $7, (BX) // c01b07
+ RCRB $7, (R11) // 41c01b07
+ RCRB $7, DL // c0da07
+ RCRB $7, R11 // 41c0db07
+ RDFSBASEL DX // f30faec2
+ RDFSBASEL R11 // f3410faec3
+ RDGSBASEL DX // f30faeca
+ RDGSBASEL R11 // f3410faecb
+ RDFSBASEQ DX // f3480faec2
+ RDFSBASEQ R11 // f3490faec3
+ RDGSBASEQ DX // f3480faeca
+ RDGSBASEQ R11 // f3490faecb
+ RDMSR // 0f32
+ RDPKRU // 0f01ee
+ RDPMC // 0f33
+ RDRANDW DX // 660fc7f2
+ RDRANDW R11 // 66410fc7f3
+ RDRANDL DX // 0fc7f2
+ RDRANDL R11 // 410fc7f3
+ RDRANDQ DX // 480fc7f2
+ RDRANDQ R11 // 490fc7f3
+ RDSEEDW DX // 660fc7fa
+ RDSEEDW R11 // 66410fc7fb
+ RDSEEDL DX // 0fc7fa
+ RDSEEDL R11 // 410fc7fb
+ RDSEEDQ DX // 480fc7fa
+ RDSEEDQ R11 // 490fc7fb
+ RDTSC // 0f31
+ RDTSCP // 0f01f9
+ JCS 2(PC)
+ //TODO: RETQ // c3
+ JCS 2(PC)
+ //TODO: RETQ $0xf123 // c223f1
+ JCS 2(PC)
+ //TODO: RETFQ // cb
+ JCS 2(PC)
+ //TODO: RETFQ $0xf123 // ca23f1
+ ROLW $1, (BX) // 66d103
+ ROLW $1, (R11) // 6641d103
+ ROLW $1, DX // 66d1c2
+ ROLW $1, R11 // 6641d1c3
+ ROLW CL, (BX) // 66d303
+ ROLW CL, (R11) // 6641d303
+ ROLW CL, DX // 66d3c2
+ ROLW CL, R11 // 6641d3c3
+ ROLW $7, (BX) // 66c10307
+ ROLW $7, (R11) // 6641c10307
+ ROLW $7, DX // 66c1c207
+ ROLW $7, R11 // 6641c1c307
+ ROLL $1, (BX) // d103
+ ROLL $1, (R11) // 41d103
+ ROLL $1, DX // d1c2
+ ROLL $1, R11 // 41d1c3
+ ROLL CL, (BX) // d303
+ ROLL CL, (R11) // 41d303
+ ROLL CL, DX // d3c2
+ ROLL CL, R11 // 41d3c3
+ ROLL $7, (BX) // c10307
+ ROLL $7, (R11) // 41c10307
+ ROLL $7, DX // c1c207
+ ROLL $7, R11 // 41c1c307
+ ROLQ $1, (BX) // 48d103
+ ROLQ $1, (R11) // 49d103
+ ROLQ $1, DX // 48d1c2
+ ROLQ $1, R11 // 49d1c3
+ ROLQ CL, (BX) // 48d303
+ ROLQ CL, (R11) // 49d303
+ ROLQ CL, DX // 48d3c2
+ ROLQ CL, R11 // 49d3c3
+ ROLQ $7, (BX) // 48c10307
+ ROLQ $7, (R11) // 49c10307
+ ROLQ $7, DX // 48c1c207
+ ROLQ $7, R11 // 49c1c307
+ ROLB $1, (BX) // d003
+ ROLB $1, (R11) // 41d003
+ ROLB $1, DL // d0c2
+ ROLB $1, R11 // 41d0c3
+ ROLB CL, (BX) // d203
+ ROLB CL, (R11) // 41d203
+ ROLB CL, DL // d2c2
+ ROLB CL, R11 // 41d2c3
+ ROLB $7, (BX) // c00307
+ ROLB $7, (R11) // 41c00307
+ ROLB $7, DL // c0c207
+ ROLB $7, R11 // 41c0c307
+ RORW $1, (BX) // 66d10b
+ RORW $1, (R11) // 6641d10b
+ RORW $1, DX // 66d1ca
+ RORW $1, R11 // 6641d1cb
+ RORW CL, (BX) // 66d30b
+ RORW CL, (R11) // 6641d30b
+ RORW CL, DX // 66d3ca
+ RORW CL, R11 // 6641d3cb
+ RORW $7, (BX) // 66c10b07
+ RORW $7, (R11) // 6641c10b07
+ RORW $7, DX // 66c1ca07
+ RORW $7, R11 // 6641c1cb07
+ RORL $1, (BX) // d10b
+ RORL $1, (R11) // 41d10b
+ RORL $1, DX // d1ca
+ RORL $1, R11 // 41d1cb
+ RORL CL, (BX) // d30b
+ RORL CL, (R11) // 41d30b
+ RORL CL, DX // d3ca
+ RORL CL, R11 // 41d3cb
+ RORL $7, (BX) // c10b07
+ RORL $7, (R11) // 41c10b07
+ RORL $7, DX // c1ca07
+ RORL $7, R11 // 41c1cb07
+ RORQ $1, (BX) // 48d10b
+ RORQ $1, (R11) // 49d10b
+ RORQ $1, DX // 48d1ca
+ RORQ $1, R11 // 49d1cb
+ RORQ CL, (BX) // 48d30b
+ RORQ CL, (R11) // 49d30b
+ RORQ CL, DX // 48d3ca
+ RORQ CL, R11 // 49d3cb
+ RORQ $7, (BX) // 48c10b07
+ RORQ $7, (R11) // 49c10b07
+ RORQ $7, DX // 48c1ca07
+ RORQ $7, R11 // 49c1cb07
+ RORB $1, (BX) // d00b
+ RORB $1, (R11) // 41d00b
+ RORB $1, DL // d0ca
+ RORB $1, R11 // 41d0cb
+ RORB CL, (BX) // d20b
+ RORB CL, (R11) // 41d20b
+ RORB CL, DL // d2ca
+ RORB CL, R11 // 41d2cb
+ RORB $7, (BX) // c00b07
+ RORB $7, (R11) // 41c00b07
+ RORB $7, DL // c0ca07
+ RORB $7, R11 // 41c0cb07
+ RORXL $7, (BX), DX // c4e37bf01307
+ RORXL $7, (R11), DX // c4c37bf01307
+ RORXL $7, DX, DX // c4e37bf0d207
+ RORXL $7, R11, DX // c4c37bf0d307
+ RORXL $7, (BX), R11 // c4637bf01b07
+ RORXL $7, (R11), R11 // c4437bf01b07
+ RORXL $7, DX, R11 // c4637bf0da07
+ RORXL $7, R11, R11 // c4437bf0db07
+ RORXQ $7, (BX), DX // c4e3fbf01307
+ RORXQ $7, (R11), DX // c4c3fbf01307
+ RORXQ $7, DX, DX // c4e3fbf0d207
+ RORXQ $7, R11, DX // c4c3fbf0d307
+ RORXQ $7, (BX), R11 // c463fbf01b07
+ RORXQ $7, (R11), R11 // c443fbf01b07
+ RORXQ $7, DX, R11 // c463fbf0da07
+ RORXQ $7, R11, R11 // c443fbf0db07
+ ROUNDPD $7, (BX), X2 // 660f3a091307
+ ROUNDPD $7, (R11), X2 // 66410f3a091307
+ ROUNDPD $7, X2, X2 // 660f3a09d207
+ ROUNDPD $7, X11, X2 // 66410f3a09d307
+ ROUNDPD $7, (BX), X11 // 66440f3a091b07
+ ROUNDPD $7, (R11), X11 // 66450f3a091b07
+ ROUNDPD $7, X2, X11 // 66440f3a09da07
+ ROUNDPD $7, X11, X11 // 66450f3a09db07
+ ROUNDPS $7, (BX), X2 // 660f3a081307
+ ROUNDPS $7, (R11), X2 // 66410f3a081307
+ ROUNDPS $7, X2, X2 // 660f3a08d207
+ ROUNDPS $7, X11, X2 // 66410f3a08d307
+ ROUNDPS $7, (BX), X11 // 66440f3a081b07
+ ROUNDPS $7, (R11), X11 // 66450f3a081b07
+ ROUNDPS $7, X2, X11 // 66440f3a08da07
+ ROUNDPS $7, X11, X11 // 66450f3a08db07
+ ROUNDSD $7, (BX), X2 // 660f3a0b1307
+ ROUNDSD $7, (R11), X2 // 66410f3a0b1307
+ ROUNDSD $7, X2, X2 // 660f3a0bd207
+ ROUNDSD $7, X11, X2 // 66410f3a0bd307
+ ROUNDSD $7, (BX), X11 // 66440f3a0b1b07
+ ROUNDSD $7, (R11), X11 // 66450f3a0b1b07
+ ROUNDSD $7, X2, X11 // 66440f3a0bda07
+ ROUNDSD $7, X11, X11 // 66450f3a0bdb07
+ ROUNDSS $7, (BX), X2 // 660f3a0a1307
+ ROUNDSS $7, (R11), X2 // 66410f3a0a1307
+ ROUNDSS $7, X2, X2 // 660f3a0ad207
+ ROUNDSS $7, X11, X2 // 66410f3a0ad307
+ ROUNDSS $7, (BX), X11 // 66440f3a0a1b07
+ ROUNDSS $7, (R11), X11 // 66450f3a0a1b07
+ ROUNDSS $7, X2, X11 // 66440f3a0ada07
+ ROUNDSS $7, X11, X11 // 66450f3a0adb07
+ RSM // 0faa
+ RSQRTPS (BX), X2 // 0f5213
+ RSQRTPS (R11), X2 // 410f5213
+ RSQRTPS X2, X2 // 0f52d2
+ RSQRTPS X11, X2 // 410f52d3
+ RSQRTPS (BX), X11 // 440f521b
+ RSQRTPS (R11), X11 // 450f521b
+ RSQRTPS X2, X11 // 440f52da
+ RSQRTPS X11, X11 // 450f52db
+ RSQRTSS (BX), X2 // f30f5213
+ RSQRTSS (R11), X2 // f3410f5213
+ RSQRTSS X2, X2 // f30f52d2
+ RSQRTSS X11, X2 // f3410f52d3
+ RSQRTSS (BX), X11 // f3440f521b
+ RSQRTSS (R11), X11 // f3450f521b
+ RSQRTSS X2, X11 // f3440f52da
+ RSQRTSS X11, X11 // f3450f52db
+ SAHF // 9e
+ SARW $1, (BX) // 66d13b
+ SARW $1, (R11) // 6641d13b
+ SARW $1, DX // 66d1fa
+ SARW $1, R11 // 6641d1fb
+ SARW CL, (BX) // 66d33b
+ SARW CL, (R11) // 6641d33b
+ SARW CL, DX // 66d3fa
+ SARW CL, R11 // 6641d3fb
+ SARW $7, (BX) // 66c13b07
+ SARW $7, (R11) // 6641c13b07
+ SARW $7, DX // 66c1fa07
+ SARW $7, R11 // 6641c1fb07
+ SARL $1, (BX) // d13b
+ SARL $1, (R11) // 41d13b
+ SARL $1, DX // d1fa
+ SARL $1, R11 // 41d1fb
+ SARL CL, (BX) // d33b
+ SARL CL, (R11) // 41d33b
+ SARL CL, DX // d3fa
+ SARL CL, R11 // 41d3fb
+ SARL $7, (BX) // c13b07
+ SARL $7, (R11) // 41c13b07
+ SARL $7, DX // c1fa07
+ SARL $7, R11 // 41c1fb07
+ SARQ $1, (BX) // 48d13b
+ SARQ $1, (R11) // 49d13b
+ SARQ $1, DX // 48d1fa
+ SARQ $1, R11 // 49d1fb
+ SARQ CL, (BX) // 48d33b
+ SARQ CL, (R11) // 49d33b
+ SARQ CL, DX // 48d3fa
+ SARQ CL, R11 // 49d3fb
+ SARQ $7, (BX) // 48c13b07
+ SARQ $7, (R11) // 49c13b07
+ SARQ $7, DX // 48c1fa07
+ SARQ $7, R11 // 49c1fb07
+ SARB $1, (BX) // d03b
+ SARB $1, (R11) // 41d03b
+ SARB $1, DL // d0fa
+ SARB $1, R11 // 41d0fb
+ SARB CL, (BX) // d23b
+ SARB CL, (R11) // 41d23b
+ SARB CL, DL // d2fa
+ SARB CL, R11 // 41d2fb
+ SARB $7, (BX) // c03b07
+ SARB $7, (R11) // 41c03b07
+ SARB $7, DL // c0fa07
+ SARB $7, R11 // 41c0fb07
+ SARXL R9, (BX), DX // c4e232f713
+ SARXL R9, (R11), DX // c4c232f713
+ SARXL R9, DX, DX // c4e232f7d2
+ SARXL R9, R11, DX // c4c232f7d3
+ SARXL R9, (BX), R11 // c46232f71b
+ SARXL R9, (R11), R11 // c44232f71b
+ SARXL R9, DX, R11 // c46232f7da
+ SARXL R9, R11, R11 // c44232f7db
+ SARXQ R14, (BX), DX // c4e28af713
+ SARXQ R14, (R11), DX // c4c28af713
+ SARXQ R14, DX, DX // c4e28af7d2
+ SARXQ R14, R11, DX // c4c28af7d3
+ SARXQ R14, (BX), R11 // c4628af71b
+ SARXQ R14, (R11), R11 // c4428af71b
+ SARXQ R14, DX, R11 // c4628af7da
+ SARXQ R14, R11, R11 // c4428af7db
+ SBBB $7, AL // 1c07
+ SBBW $61731, AX // 661d23f1
+ SBBL $4045620583, AX // 1d674523f1
+ SBBQ $-249346713, AX // 481d674523f1
+ SBBW $61731, (BX) // 66811b23f1
+ SBBW $61731, (R11) // 6641811b23f1
+ SBBW $61731, DX // 6681da23f1
+ SBBW $61731, R11 // 664181db23f1
+ SBBW $7, (BX) // 66831b07
+ SBBW $7, (R11) // 6641831b07
+ SBBW $7, DX // 6683da07
+ SBBW $7, R11 // 664183db07
+ SBBW DX, (BX) // 661913
+ SBBW R11, (BX) // 6644191b
+ SBBW DX, (R11) // 66411913
+ SBBW R11, (R11) // 6645191b
+ SBBW DX, DX // 6619d2 or 661bd2
+ SBBW R11, DX // 664419da or 66411bd3
+ SBBW DX, R11 // 664119d3 or 66441bda
+ SBBW R11, R11 // 664519db or 66451bdb
+ SBBL $4045620583, (BX) // 811b674523f1
+ SBBL $4045620583, (R11) // 41811b674523f1
+ SBBL $4045620583, DX // 81da674523f1
+ SBBL $4045620583, R11 // 4181db674523f1
+ SBBL $7, (BX) // 831b07
+ SBBL $7, (R11) // 41831b07
+ SBBL $7, DX // 83da07
+ SBBL $7, R11 // 4183db07
+ SBBL DX, (BX) // 1913
+ SBBL R11, (BX) // 44191b
+ SBBL DX, (R11) // 411913
+ SBBL R11, (R11) // 45191b
+ SBBL DX, DX // 19d2 or 1bd2
+ SBBL R11, DX // 4419da or 411bd3
+ SBBL DX, R11 // 4119d3 or 441bda
+ SBBL R11, R11 // 4519db or 451bdb
+ SBBQ $-249346713, (BX) // 48811b674523f1
+ SBBQ $-249346713, (R11) // 49811b674523f1
+ SBBQ $-249346713, DX // 4881da674523f1
+ SBBQ $-249346713, R11 // 4981db674523f1
+ SBBQ $7, (BX) // 48831b07
+ SBBQ $7, (R11) // 49831b07
+ SBBQ $7, DX // 4883da07
+ SBBQ $7, R11 // 4983db07
+ SBBQ DX, (BX) // 481913
+ SBBQ R11, (BX) // 4c191b
+ SBBQ DX, (R11) // 491913
+ SBBQ R11, (R11) // 4d191b
+ SBBQ DX, DX // 4819d2 or 481bd2
+ SBBQ R11, DX // 4c19da or 491bd3
+ SBBQ DX, R11 // 4919d3 or 4c1bda
+ SBBQ R11, R11 // 4d19db or 4d1bdb
+ SBBB $7, (BX) // 801b07
+ SBBB $7, (R11) // 41801b07
+ SBBB $7, DL // 80da07
+ SBBB $7, R11 // 4180db07
+ SBBB DL, (BX) // 1813
+ SBBB R11, (BX) // 44181b
+ SBBB DL, (R11) // 411813
+ SBBB R11, (R11) // 45181b
+ SBBB DL, DL // 18d2 or 1ad2
+ SBBB R11, DL // 4418da or 411ad3
+ SBBB DL, R11 // 4118d3 or 441ada
+ SBBB R11, R11 // 4518db or 451adb
+ SBBW (BX), DX // 661b13
+ SBBW (R11), DX // 66411b13
+ SBBW (BX), R11 // 66441b1b
+ SBBW (R11), R11 // 66451b1b
+ SBBL (BX), DX // 1b13
+ SBBL (R11), DX // 411b13
+ SBBL (BX), R11 // 441b1b
+ SBBL (R11), R11 // 451b1b
+ SBBQ (BX), DX // 481b13
+ SBBQ (R11), DX // 491b13
+ SBBQ (BX), R11 // 4c1b1b
+ SBBQ (R11), R11 // 4d1b1b
+ SBBB (BX), DL // 1a13
+ SBBB (R11), DL // 411a13
+ SBBB (BX), R11 // 441a1b
+ SBBB (R11), R11 // 451a1b
+ SCASB // ae
+ SCASL // af
+ SCASQ // 48af
+ SCASW // 66af
+ SETHI (BX) // 0f9703
+ SETHI (R11) // 410f9703
+ SETHI DL // 0f97c2
+ SETHI R11 // 410f97c3
+ SETCC (BX) // 0f9303
+ SETCC (R11) // 410f9303
+ SETCC DL // 0f93c2
+ SETCC R11 // 410f93c3
+ SETCS (BX) // 0f9203
+ SETCS (R11) // 410f9203
+ SETCS DL // 0f92c2
+ SETCS R11 // 410f92c3
+ SETLS (BX) // 0f9603
+ SETLS (R11) // 410f9603
+ SETLS DL // 0f96c2
+ SETLS R11 // 410f96c3
+ SETEQ (BX) // 0f9403
+ SETEQ (R11) // 410f9403
+ SETEQ DL // 0f94c2
+ SETEQ R11 // 410f94c3
+ SETGT (BX) // 0f9f03
+ SETGT (R11) // 410f9f03
+ SETGT DL // 0f9fc2
+ SETGT R11 // 410f9fc3
+ SETGE (BX) // 0f9d03
+ SETGE (R11) // 410f9d03
+ SETGE DL // 0f9dc2
+ SETGE R11 // 410f9dc3
+ SETLT (BX) // 0f9c03
+ SETLT (R11) // 410f9c03
+ SETLT DL // 0f9cc2
+ SETLT R11 // 410f9cc3
+ SETLE (BX) // 0f9e03
+ SETLE (R11) // 410f9e03
+ SETLE DL // 0f9ec2
+ SETLE R11 // 410f9ec3
+ SETNE (BX) // 0f9503
+ SETNE (R11) // 410f9503
+ SETNE DL // 0f95c2
+ SETNE R11 // 410f95c3
+ SETOC (BX) // 0f9103
+ SETOC (R11) // 410f9103
+ SETOC DL // 0f91c2
+ SETOC R11 // 410f91c3
+ SETPC (BX) // 0f9b03
+ SETPC (R11) // 410f9b03
+ SETPC DL // 0f9bc2
+ SETPC R11 // 410f9bc3
+ SETPL (BX) // 0f9903
+ SETPL (R11) // 410f9903
+ SETPL DL // 0f99c2
+ SETPL R11 // 410f99c3
+ SETOS (BX) // 0f9003
+ SETOS (R11) // 410f9003
+ SETOS DL // 0f90c2
+ SETOS R11 // 410f90c3
+ SETPS (BX) // 0f9a03
+ SETPS (R11) // 410f9a03
+ SETPS DL // 0f9ac2
+ SETPS R11 // 410f9ac3
+ SETMI (BX) // 0f9803
+ SETMI (R11) // 410f9803
+ SETMI DL // 0f98c2
+ SETMI R11 // 410f98c3
+ SFENCE // 0faef8
+ SGDT (BX) // 0f0103
+ SGDT (R11) // 410f0103
+ SHLW $1, (BX) // 66d123
+ SHLW $1, (R11) // 6641d123
+ SHLW $1, DX // 66d1e2
+ SHLW $1, R11 // 6641d1e3
+ SHLW CL, (BX) // 66d323
+ SHLW CL, (R11) // 6641d323
+ SHLW CL, DX // 66d3e2
+ SHLW CL, R11 // 6641d3e3
+ SHLW $7, (BX) // 66c12307
+ SHLW $7, (R11) // 6641c12307
+ SHLW $7, DX // 66c1e207
+ SHLW $7, R11 // 6641c1e307
+ SHLL $1, (BX) // d123
+ SHLL $1, (R11) // 41d123
+ SHLL $1, DX // d1e2
+ SHLL $1, R11 // 41d1e3
+ SHLL CL, (BX) // d323
+ SHLL CL, (R11) // 41d323
+ SHLL CL, DX // d3e2
+ SHLL CL, R11 // 41d3e3
+ SHLL $7, (BX) // c12307
+ SHLL $7, (R11) // 41c12307
+ SHLL $7, DX // c1e207
+ SHLL $7, R11 // 41c1e307
+ SHLQ $1, (BX) // 48d123
+ SHLQ $1, (R11) // 49d123
+ SHLQ $1, DX // 48d1e2
+ SHLQ $1, R11 // 49d1e3
+ SHLQ CL, (BX) // 48d323
+ SHLQ CL, (R11) // 49d323
+ SHLQ CL, DX // 48d3e2
+ SHLQ CL, R11 // 49d3e3
+ SHLQ $7, (BX) // 48c12307
+ SHLQ $7, (R11) // 49c12307
+ SHLQ $7, DX // 48c1e207
+ SHLQ $7, R11 // 49c1e307
+ SHLB $1, (BX) // d023
+ SHLB $1, (R11) // 41d023
+ SHLB $1, DL // d0e2
+ SHLB $1, R11 // 41d0e3
+ SHLB CL, (BX) // d223
+ SHLB CL, (R11) // 41d223
+ SHLB CL, DL // d2e2
+ SHLB CL, R11 // 41d2e3
+ SHLB $7, (BX) // c02307
+ SHLB $7, (R11) // 41c02307
+ SHLB $7, DL // c0e207
+ SHLB $7, R11 // 41c0e307
+ SHLW CL, DX, (BX) // 660fa513
+ SHLW CL, R11, (BX) // 66440fa51b
+ SHLW CL, DX, (R11) // 66410fa513
+ SHLW CL, R11, (R11) // 66450fa51b
+ SHLW CL, DX, DX // 660fa5d2
+ SHLW CL, R11, DX // 66440fa5da
+ SHLW CL, DX, R11 // 66410fa5d3
+ SHLW CL, R11, R11 // 66450fa5db
+ SHLW $7, DX, (BX) // 660fa41307
+ SHLW $7, R11, (BX) // 66440fa41b07
+ SHLW $7, DX, (R11) // 66410fa41307
+ SHLW $7, R11, (R11) // 66450fa41b07
+ SHLW $7, DX, DX // 660fa4d207
+ SHLW $7, R11, DX // 66440fa4da07
+ SHLW $7, DX, R11 // 66410fa4d307
+ SHLW $7, R11, R11 // 66450fa4db07
+ SHLL CL, DX, (BX) // 0fa513
+ SHLL CL, R11, (BX) // 440fa51b
+ SHLL CL, DX, (R11) // 410fa513
+ SHLL CL, R11, (R11) // 450fa51b
+ SHLL CL, DX, DX // 0fa5d2
+ SHLL CL, R11, DX // 440fa5da
+ SHLL CL, DX, R11 // 410fa5d3
+ SHLL CL, R11, R11 // 450fa5db
+ SHLL $7, DX, (BX) // 0fa41307
+ SHLL $7, R11, (BX) // 440fa41b07
+ SHLL $7, DX, (R11) // 410fa41307
+ SHLL $7, R11, (R11) // 450fa41b07
+ SHLL $7, DX, DX // 0fa4d207
+ SHLL $7, R11, DX // 440fa4da07
+ SHLL $7, DX, R11 // 410fa4d307
+ SHLL $7, R11, R11 // 450fa4db07
+ SHLQ CL, DX, (BX) // 480fa513
+ SHLQ CL, R11, (BX) // 4c0fa51b
+ SHLQ CL, DX, (R11) // 490fa513
+ SHLQ CL, R11, (R11) // 4d0fa51b
+ SHLQ CL, DX, DX // 480fa5d2
+ SHLQ CL, R11, DX // 4c0fa5da
+ SHLQ CL, DX, R11 // 490fa5d3
+ SHLQ CL, R11, R11 // 4d0fa5db
+ SHLQ $7, DX, (BX) // 480fa41307
+ SHLQ $7, R11, (BX) // 4c0fa41b07
+ SHLQ $7, DX, (R11) // 490fa41307
+ SHLQ $7, R11, (R11) // 4d0fa41b07
+ SHLQ $7, DX, DX // 480fa4d207
+ SHLQ $7, R11, DX // 4c0fa4da07
+ SHLQ $7, DX, R11 // 490fa4d307
+ SHLQ $7, R11, R11 // 4d0fa4db07
+ SHLXL R9, (BX), DX // c4e231f713
+ SHLXL R9, (R11), DX // c4c231f713
+ SHLXL R9, DX, DX // c4e231f7d2
+ SHLXL R9, R11, DX // c4c231f7d3
+ SHLXL R9, (BX), R11 // c46231f71b
+ SHLXL R9, (R11), R11 // c44231f71b
+ SHLXL R9, DX, R11 // c46231f7da
+ SHLXL R9, R11, R11 // c44231f7db
+ SHLXQ R14, (BX), DX // c4e289f713
+ SHLXQ R14, (R11), DX // c4c289f713
+ SHLXQ R14, DX, DX // c4e289f7d2
+ SHLXQ R14, R11, DX // c4c289f7d3
+ SHLXQ R14, (BX), R11 // c46289f71b
+ SHLXQ R14, (R11), R11 // c44289f71b
+ SHLXQ R14, DX, R11 // c46289f7da
+ SHLXQ R14, R11, R11 // c44289f7db
+ SHRW $1, (BX) // 66d12b
+ SHRW $1, (R11) // 6641d12b
+ SHRW $1, DX // 66d1ea
+ SHRW $1, R11 // 6641d1eb
+ SHRW CL, (BX) // 66d32b
+ SHRW CL, (R11) // 6641d32b
+ SHRW CL, DX // 66d3ea
+ SHRW CL, R11 // 6641d3eb
+ SHRW $7, (BX) // 66c12b07
+ SHRW $7, (R11) // 6641c12b07
+ SHRW $7, DX // 66c1ea07
+ SHRW $7, R11 // 6641c1eb07
+ SHRL $1, (BX) // d12b
+ SHRL $1, (R11) // 41d12b
+ SHRL $1, DX // d1ea
+ SHRL $1, R11 // 41d1eb
+ SHRL CL, (BX) // d32b
+ SHRL CL, (R11) // 41d32b
+ SHRL CL, DX // d3ea
+ SHRL CL, R11 // 41d3eb
+ SHRL $7, (BX) // c12b07
+ SHRL $7, (R11) // 41c12b07
+ SHRL $7, DX // c1ea07
+ SHRL $7, R11 // 41c1eb07
+ SHRQ $1, (BX) // 48d12b
+ SHRQ $1, (R11) // 49d12b
+ SHRQ $1, DX // 48d1ea
+ SHRQ $1, R11 // 49d1eb
+ SHRQ CL, (BX) // 48d32b
+ SHRQ CL, (R11) // 49d32b
+ SHRQ CL, DX // 48d3ea
+ SHRQ CL, R11 // 49d3eb
+ SHRQ $7, (BX) // 48c12b07
+ SHRQ $7, (R11) // 49c12b07
+ SHRQ $7, DX // 48c1ea07
+ SHRQ $7, R11 // 49c1eb07
+ SHRB $1, (BX) // d02b
+ SHRB $1, (R11) // 41d02b
+ SHRB $1, DL // d0ea
+ SHRB $1, R11 // 41d0eb
+ SHRB CL, (BX) // d22b
+ SHRB CL, (R11) // 41d22b
+ SHRB CL, DL // d2ea
+ SHRB CL, R11 // 41d2eb
+ SHRB $7, (BX) // c02b07
+ SHRB $7, (R11) // 41c02b07
+ SHRB $7, DL // c0ea07
+ SHRB $7, R11 // 41c0eb07
+ SHRW CL, DX, (BX) // 660fad13
+ SHRW CL, R11, (BX) // 66440fad1b
+ SHRW CL, DX, (R11) // 66410fad13
+ SHRW CL, R11, (R11) // 66450fad1b
+ SHRW CL, DX, DX // 660fadd2
+ SHRW CL, R11, DX // 66440fadda
+ SHRW CL, DX, R11 // 66410fadd3
+ SHRW CL, R11, R11 // 66450faddb
+ SHRW $7, DX, (BX) // 660fac1307
+ SHRW $7, R11, (BX) // 66440fac1b07
+ SHRW $7, DX, (R11) // 66410fac1307
+ SHRW $7, R11, (R11) // 66450fac1b07
+ SHRW $7, DX, DX // 660facd207
+ SHRW $7, R11, DX // 66440facda07
+ SHRW $7, DX, R11 // 66410facd307
+ SHRW $7, R11, R11 // 66450facdb07
+ SHRL CL, DX, (BX) // 0fad13
+ SHRL CL, R11, (BX) // 440fad1b
+ SHRL CL, DX, (R11) // 410fad13
+ SHRL CL, R11, (R11) // 450fad1b
+ SHRL CL, DX, DX // 0fadd2
+ SHRL CL, R11, DX // 440fadda
+ SHRL CL, DX, R11 // 410fadd3
+ SHRL CL, R11, R11 // 450faddb
+ SHRL $7, DX, (BX) // 0fac1307
+ SHRL $7, R11, (BX) // 440fac1b07
+ SHRL $7, DX, (R11) // 410fac1307
+ SHRL $7, R11, (R11) // 450fac1b07
+ SHRL $7, DX, DX // 0facd207
+ SHRL $7, R11, DX // 440facda07
+ SHRL $7, DX, R11 // 410facd307
+ SHRL $7, R11, R11 // 450facdb07
+ SHRQ CL, DX, (BX) // 480fad13
+ SHRQ CL, R11, (BX) // 4c0fad1b
+ SHRQ CL, DX, (R11) // 490fad13
+ SHRQ CL, R11, (R11) // 4d0fad1b
+ SHRQ CL, DX, DX // 480fadd2
+ SHRQ CL, R11, DX // 4c0fadda
+ SHRQ CL, DX, R11 // 490fadd3
+ SHRQ CL, R11, R11 // 4d0faddb
+ SHRQ $7, DX, (BX) // 480fac1307
+ SHRQ $7, R11, (BX) // 4c0fac1b07
+ SHRQ $7, DX, (R11) // 490fac1307
+ SHRQ $7, R11, (R11) // 4d0fac1b07
+ SHRQ $7, DX, DX // 480facd207
+ SHRQ $7, R11, DX // 4c0facda07
+ SHRQ $7, DX, R11 // 490facd307
+ SHRQ $7, R11, R11 // 4d0facdb07
+ SHRXL R9, (BX), DX // c4e233f713
+ SHRXL R9, (R11), DX // c4c233f713
+ SHRXL R9, DX, DX // c4e233f7d2
+ SHRXL R9, R11, DX // c4c233f7d3
+ SHRXL R9, (BX), R11 // c46233f71b
+ SHRXL R9, (R11), R11 // c44233f71b
+ SHRXL R9, DX, R11 // c46233f7da
+ SHRXL R9, R11, R11 // c44233f7db
+ SHRXQ R14, (BX), DX // c4e28bf713
+ SHRXQ R14, (R11), DX // c4c28bf713
+ SHRXQ R14, DX, DX // c4e28bf7d2
+ SHRXQ R14, R11, DX // c4c28bf7d3
+ SHRXQ R14, (BX), R11 // c4628bf71b
+ SHRXQ R14, (R11), R11 // c4428bf71b
+ SHRXQ R14, DX, R11 // c4628bf7da
+ SHRXQ R14, R11, R11 // c4428bf7db
+ SHUFPD $7, (BX), X2 // 660fc61307
+ SHUFPD $7, (R11), X2 // 66410fc61307
+ SHUFPD $7, X2, X2 // 660fc6d207
+ SHUFPD $7, X11, X2 // 66410fc6d307
+ SHUFPD $7, (BX), X11 // 66440fc61b07
+ SHUFPD $7, (R11), X11 // 66450fc61b07
+ SHUFPD $7, X2, X11 // 66440fc6da07
+ SHUFPD $7, X11, X11 // 66450fc6db07
+ SHUFPS $7, (BX), X2 // 0fc61307
+ SHUFPS $7, (R11), X2 // 410fc61307
+ SHUFPS $7, X2, X2 // 0fc6d207
+ SHUFPS $7, X11, X2 // 410fc6d307
+ SHUFPS $7, (BX), X11 // 440fc61b07
+ SHUFPS $7, (R11), X11 // 450fc61b07
+ SHUFPS $7, X2, X11 // 440fc6da07
+ SHUFPS $7, X11, X11 // 450fc6db07
+ SIDT (BX) // 0f010b
+ SIDT (R11) // 410f010b
+ SLDTW (BX) // 660f0003
+ SLDTW (R11) // 66410f0003
+ SLDTW DX // 660f00c2
+ SLDTW R11 // 66410f00c3
+ SLDTL (BX) // 0f0003
+ SLDTL (R11) // 410f0003
+ SLDTL DX // 0f00c2
+ SLDTL R11 // 410f00c3
+ SLDTQ (BX) // 480f0003
+ SLDTQ (R11) // 490f0003
+ SLDTQ DX // 480f00c2
+ SLDTQ R11 // 490f00c3
+ SMSWW (BX) // 660f0123
+ SMSWW (R11) // 66410f0123
+ SMSWW DX // 660f01e2
+ SMSWW R11 // 66410f01e3
+ SMSWL (BX) // 0f0123
+ SMSWL (R11) // 410f0123
+ SMSWL DX // 0f01e2
+ SMSWL R11 // 410f01e3
+ SMSWQ (BX) // 480f0123
+ SMSWQ (R11) // 490f0123
+ SMSWQ DX // 480f01e2
+ SMSWQ R11 // 490f01e3
+ SQRTPD (BX), X2 // 660f5113
+ SQRTPD (R11), X2 // 66410f5113
+ SQRTPD X2, X2 // 660f51d2
+ SQRTPD X11, X2 // 66410f51d3
+ SQRTPD (BX), X11 // 66440f511b
+ SQRTPD (R11), X11 // 66450f511b
+ SQRTPD X2, X11 // 66440f51da
+ SQRTPD X11, X11 // 66450f51db
+ SQRTPS (BX), X2 // 0f5113
+ SQRTPS (R11), X2 // 410f5113
+ SQRTPS X2, X2 // 0f51d2
+ SQRTPS X11, X2 // 410f51d3
+ SQRTPS (BX), X11 // 440f511b
+ SQRTPS (R11), X11 // 450f511b
+ SQRTPS X2, X11 // 440f51da
+ SQRTPS X11, X11 // 450f51db
+ SQRTSD (BX), X2 // f20f5113
+ SQRTSD (R11), X2 // f2410f5113
+ SQRTSD X2, X2 // f20f51d2
+ SQRTSD X11, X2 // f2410f51d3
+ SQRTSD (BX), X11 // f2440f511b
+ SQRTSD (R11), X11 // f2450f511b
+ SQRTSD X2, X11 // f2440f51da
+ SQRTSD X11, X11 // f2450f51db
+ SQRTSS (BX), X2 // f30f5113
+ SQRTSS (R11), X2 // f3410f5113
+ SQRTSS X2, X2 // f30f51d2
+ SQRTSS X11, X2 // f3410f51d3
+ SQRTSS (BX), X11 // f3440f511b
+ SQRTSS (R11), X11 // f3450f511b
+ SQRTSS X2, X11 // f3440f51da
+ SQRTSS X11, X11 // f3450f51db
+ STAC // 0f01cb
+ STC // f9
+ STD // fd
+ STI // fb
+ STMXCSR (BX) // 0fae1b
+ STMXCSR (R11) // 410fae1b
+ STOSB // aa
+ STOSL // ab
+ STOSQ // 48ab
+ STOSW // 66ab
+ STRW (BX) // 660f000b
+ STRW (R11) // 66410f000b
+ STRW DX // 660f00ca
+ STRW R11 // 66410f00cb
+ STRL (BX) // 0f000b
+ STRL (R11) // 410f000b
+ STRL DX // 0f00ca
+ STRL R11 // 410f00cb
+ STRQ (BX) // 480f000b
+ STRQ (R11) // 490f000b
+ STRQ DX // 480f00ca
+ STRQ R11 // 490f00cb
+ SUBB $7, AL // 2c07
+ SUBW $61731, AX // 662d23f1
+ SUBL $4045620583, AX // 2d674523f1
+ SUBQ $-249346713, AX // 482d674523f1
+ SUBW $61731, (BX) // 66812b23f1
+ SUBW $61731, (R11) // 6641812b23f1
+ SUBW $61731, DX // 6681ea23f1
+ SUBW $61731, R11 // 664181eb23f1
+ SUBW $7, (BX) // 66832b07
+ SUBW $7, (R11) // 6641832b07
+ SUBW $7, DX // 6683ea07
+ SUBW $7, R11 // 664183eb07
+ SUBW DX, (BX) // 662913
+ SUBW R11, (BX) // 6644291b
+ SUBW DX, (R11) // 66412913
+ SUBW R11, (R11) // 6645291b
+ SUBW DX, DX // 6629d2 or 662bd2
+ SUBW R11, DX // 664429da or 66412bd3
+ SUBW DX, R11 // 664129d3 or 66442bda
+ SUBW R11, R11 // 664529db or 66452bdb
+ SUBL $4045620583, (BX) // 812b674523f1
+ SUBL $4045620583, (R11) // 41812b674523f1
+ SUBL $4045620583, DX // 81ea674523f1
+ SUBL $4045620583, R11 // 4181eb674523f1
+ SUBL $7, (BX) // 832b07
+ SUBL $7, (R11) // 41832b07
+ SUBL $7, DX // 83ea07
+ SUBL $7, R11 // 4183eb07
+ SUBL DX, (BX) // 2913
+ SUBL R11, (BX) // 44291b
+ SUBL DX, (R11) // 412913
+ SUBL R11, (R11) // 45291b
+ SUBL DX, DX // 29d2 or 2bd2
+ SUBL R11, DX // 4429da or 412bd3
+ SUBL DX, R11 // 4129d3 or 442bda
+ SUBL R11, R11 // 4529db or 452bdb
+ SUBQ $-249346713, (BX) // 48812b674523f1
+ SUBQ $-249346713, (R11) // 49812b674523f1
+ SUBQ $-249346713, DX // 4881ea674523f1
+ SUBQ $-249346713, R11 // 4981eb674523f1
+ SUBQ $7, (BX) // 48832b07
+ SUBQ $7, (R11) // 49832b07
+ SUBQ $7, DX // 4883ea07
+ SUBQ $7, R11 // 4983eb07
+ SUBQ DX, (BX) // 482913
+ SUBQ R11, (BX) // 4c291b
+ SUBQ DX, (R11) // 492913
+ SUBQ R11, (R11) // 4d291b
+ SUBQ DX, DX // 4829d2 or 482bd2
+ SUBQ R11, DX // 4c29da or 492bd3
+ SUBQ DX, R11 // 4929d3 or 4c2bda
+ SUBQ R11, R11 // 4d29db or 4d2bdb
+ SUBB $7, (BX) // 802b07
+ SUBB $7, (R11) // 41802b07
+ SUBB $7, DL // 80ea07
+ SUBB $7, R11 // 4180eb07
+ SUBB DL, (BX) // 2813
+ SUBB R11, (BX) // 44281b
+ SUBB DL, (R11) // 412813
+ SUBB R11, (R11) // 45281b
+ SUBB DL, DL // 28d2 or 2ad2
+ SUBB R11, DL // 4428da or 412ad3
+ SUBB DL, R11 // 4128d3 or 442ada
+ SUBB R11, R11 // 4528db or 452adb
+ SUBW (BX), DX // 662b13
+ SUBW (R11), DX // 66412b13
+ SUBW (BX), R11 // 66442b1b
+ SUBW (R11), R11 // 66452b1b
+ SUBL (BX), DX // 2b13
+ SUBL (R11), DX // 412b13
+ SUBL (BX), R11 // 442b1b
+ SUBL (R11), R11 // 452b1b
+ SUBQ (BX), DX // 482b13
+ SUBQ (R11), DX // 492b13
+ SUBQ (BX), R11 // 4c2b1b
+ SUBQ (R11), R11 // 4d2b1b
+ SUBB (BX), DL // 2a13
+ SUBB (R11), DL // 412a13
+ SUBB (BX), R11 // 442a1b
+ SUBB (R11), R11 // 452a1b
+ SUBPD (BX), X2 // 660f5c13
+ SUBPD (R11), X2 // 66410f5c13
+ SUBPD X2, X2 // 660f5cd2
+ SUBPD X11, X2 // 66410f5cd3
+ SUBPD (BX), X11 // 66440f5c1b
+ SUBPD (R11), X11 // 66450f5c1b
+ SUBPD X2, X11 // 66440f5cda
+ SUBPD X11, X11 // 66450f5cdb
+ SUBPS (BX), X2 // 0f5c13
+ SUBPS (R11), X2 // 410f5c13
+ SUBPS X2, X2 // 0f5cd2
+ SUBPS X11, X2 // 410f5cd3
+ SUBPS (BX), X11 // 440f5c1b
+ SUBPS (R11), X11 // 450f5c1b
+ SUBPS X2, X11 // 440f5cda
+ SUBPS X11, X11 // 450f5cdb
+ SUBSD (BX), X2 // f20f5c13
+ SUBSD (R11), X2 // f2410f5c13
+ SUBSD X2, X2 // f20f5cd2
+ SUBSD X11, X2 // f2410f5cd3
+ SUBSD (BX), X11 // f2440f5c1b
+ SUBSD (R11), X11 // f2450f5c1b
+ SUBSD X2, X11 // f2440f5cda
+ SUBSD X11, X11 // f2450f5cdb
+ SUBSS (BX), X2 // f30f5c13
+ SUBSS (R11), X2 // f3410f5c13
+ SUBSS X2, X2 // f30f5cd2
+ SUBSS X11, X2 // f3410f5cd3
+ SUBSS (BX), X11 // f3440f5c1b
+ SUBSS (R11), X11 // f3450f5c1b
+ SUBSS X2, X11 // f3440f5cda
+ SUBSS X11, X11 // f3450f5cdb
+ SWAPGS // 0f01f8
+ SYSCALL // 0f05
+ SYSENTER // 0f34
+ SYSENTER64 // 480f34
+ SYSEXIT // 0f35
+ SYSEXIT64 // 480f35
+ SYSRET // 0f07
+ TESTB $7, AL // a807
+ TESTW $61731, AX // 66a923f1
+ TESTL $4045620583, AX // a9674523f1
+ TESTQ $-249346713, AX // 48a9674523f1
+ TESTW $61731, (BX) // 66f70323f1
+ TESTW $61731, (R11) // 6641f70323f1
+ TESTW $61731, DX // 66f7c223f1
+ TESTW $61731, R11 // 6641f7c323f1
+ TESTW DX, (BX) // 668513
+ TESTW R11, (BX) // 6644851b
+ TESTW DX, (R11) // 66418513
+ TESTW R11, (R11) // 6645851b
+ TESTW DX, DX // 6685d2
+ TESTW R11, DX // 664485da
+ TESTW DX, R11 // 664185d3
+ TESTW R11, R11 // 664585db
+ TESTL $4045620583, (BX) // f703674523f1
+ TESTL $4045620583, (R11) // 41f703674523f1
+ TESTL $4045620583, DX // f7c2674523f1
+ TESTL $4045620583, R11 // 41f7c3674523f1
+ TESTL DX, (BX) // 8513
+ TESTL R11, (BX) // 44851b
+ TESTL DX, (R11) // 418513
+ TESTL R11, (R11) // 45851b
+ TESTL DX, DX // 85d2
+ TESTL R11, DX // 4485da
+ TESTL DX, R11 // 4185d3
+ TESTL R11, R11 // 4585db
+ TESTQ $-249346713, (BX) // 48f703674523f1
+ TESTQ $-249346713, (R11) // 49f703674523f1
+ TESTQ $-249346713, DX // 48f7c2674523f1
+ TESTQ $-249346713, R11 // 49f7c3674523f1
+ TESTQ DX, (BX) // 488513
+ TESTQ R11, (BX) // 4c851b
+ TESTQ DX, (R11) // 498513
+ TESTQ R11, (R11) // 4d851b
+ TESTQ DX, DX // 4885d2
+ TESTQ R11, DX // 4c85da
+ TESTQ DX, R11 // 4985d3
+ TESTQ R11, R11 // 4d85db
+ TESTB $7, (BX) // f60307
+ TESTB $7, (R11) // 41f60307
+ TESTB $7, DL // f6c207
+ TESTB $7, R11 // 41f6c307
+ TESTB DL, (BX) // 8413
+ TESTB R11, (BX) // 44841b
+ TESTB DL, (R11) // 418413
+ TESTB R11, (R11) // 45841b
+ TESTB DL, DL // 84d2
+ TESTB R11, DL // 4484da
+ TESTB DL, R11 // 4184d3
+ TESTB R11, R11 // 4584db
+ TZCNTW (BX), DX // 66f30fbc13
+ TZCNTW (R11), DX // 66f3410fbc13
+ TZCNTW DX, DX // 66f30fbcd2
+ TZCNTW R11, DX // 66f3410fbcd3
+ TZCNTW (BX), R11 // 66f3440fbc1b
+ TZCNTW (R11), R11 // 66f3450fbc1b
+ TZCNTW DX, R11 // 66f3440fbcda
+ TZCNTW R11, R11 // 66f3450fbcdb
+ TZCNTL (BX), DX // f30fbc13
+ TZCNTL (R11), DX // f3410fbc13
+ TZCNTL DX, DX // f30fbcd2
+ TZCNTL R11, DX // f3410fbcd3
+ TZCNTL (BX), R11 // f3440fbc1b
+ TZCNTL (R11), R11 // f3450fbc1b
+ TZCNTL DX, R11 // f3440fbcda
+ TZCNTL R11, R11 // f3450fbcdb
+ TZCNTQ (BX), DX // f3480fbc13
+ TZCNTQ (R11), DX // f3490fbc13
+ TZCNTQ DX, DX // f3480fbcd2
+ TZCNTQ R11, DX // f3490fbcd3
+ TZCNTQ (BX), R11 // f34c0fbc1b
+ TZCNTQ (R11), R11 // f34d0fbc1b
+ TZCNTQ DX, R11 // f34c0fbcda
+ TZCNTQ R11, R11 // f34d0fbcdb
+ UCOMISD (BX), X2 // 660f2e13
+ UCOMISD (R11), X2 // 66410f2e13
+ UCOMISD X2, X2 // 660f2ed2
+ UCOMISD X11, X2 // 66410f2ed3
+ UCOMISD (BX), X11 // 66440f2e1b
+ UCOMISD (R11), X11 // 66450f2e1b
+ UCOMISD X2, X11 // 66440f2eda
+ UCOMISD X11, X11 // 66450f2edb
+ UCOMISS (BX), X2 // 0f2e13
+ UCOMISS (R11), X2 // 410f2e13
+ UCOMISS X2, X2 // 0f2ed2
+ UCOMISS X11, X2 // 410f2ed3
+ UCOMISS (BX), X11 // 440f2e1b
+ UCOMISS (R11), X11 // 450f2e1b
+ UCOMISS X2, X11 // 440f2eda
+ UCOMISS X11, X11 // 450f2edb
+ UD1 // 0fb9
+ UD2 // 0f0b
+ UNPCKHPD (BX), X2 // 660f1513
+ UNPCKHPD (R11), X2 // 66410f1513
+ UNPCKHPD X2, X2 // 660f15d2
+ UNPCKHPD X11, X2 // 66410f15d3
+ UNPCKHPD (BX), X11 // 66440f151b
+ UNPCKHPD (R11), X11 // 66450f151b
+ UNPCKHPD X2, X11 // 66440f15da
+ UNPCKHPD X11, X11 // 66450f15db
+ UNPCKHPS (BX), X2 // 0f1513
+ UNPCKHPS (R11), X2 // 410f1513
+ UNPCKHPS X2, X2 // 0f15d2
+ UNPCKHPS X11, X2 // 410f15d3
+ UNPCKHPS (BX), X11 // 440f151b
+ UNPCKHPS (R11), X11 // 450f151b
+ UNPCKHPS X2, X11 // 440f15da
+ UNPCKHPS X11, X11 // 450f15db
+ UNPCKLPD (BX), X2 // 660f1413
+ UNPCKLPD (R11), X2 // 66410f1413
+ UNPCKLPD X2, X2 // 660f14d2
+ UNPCKLPD X11, X2 // 66410f14d3
+ UNPCKLPD (BX), X11 // 66440f141b
+ UNPCKLPD (R11), X11 // 66450f141b
+ UNPCKLPD X2, X11 // 66440f14da
+ UNPCKLPD X11, X11 // 66450f14db
+ UNPCKLPS (BX), X2 // 0f1413
+ UNPCKLPS (R11), X2 // 410f1413
+ UNPCKLPS X2, X2 // 0f14d2
+ UNPCKLPS X11, X2 // 410f14d3
+ UNPCKLPS (BX), X11 // 440f141b
+ UNPCKLPS (R11), X11 // 450f141b
+ UNPCKLPS X2, X11 // 440f14da
+ UNPCKLPS X11, X11 // 450f14db
+ VADDPD (BX), X9, X2 // c4e1315813 or c5b15813
+ VADDPD (R11), X9, X2 // c4c1315813
+ VADDPD X2, X9, X2 // c4e13158d2 or c5b158d2
+ VADDPD X11, X9, X2 // c4c13158d3
+ VADDPD (BX), X9, X11 // c46131581b or c531581b
+ VADDPD (R11), X9, X11 // c44131581b
+ VADDPD X2, X9, X11 // c4613158da or c53158da
+ VADDPD X11, X9, X11 // c4413158db
+ VADDPD (BX), Y15, Y2 // c4e1055813 or c5855813
+ VADDPD (R11), Y15, Y2 // c4c1055813
+ VADDPD Y2, Y15, Y2 // c4e10558d2 or c58558d2
+ VADDPD Y11, Y15, Y2 // c4c10558d3
+ VADDPD (BX), Y15, Y11 // c46105581b or c505581b
+ VADDPD (R11), Y15, Y11 // c44105581b
+ VADDPD Y2, Y15, Y11 // c4610558da or c50558da
+ VADDPD Y11, Y15, Y11 // c4410558db
+ VADDPS (BX), X9, X2 // c4e1305813 or c5b05813
+ VADDPS (R11), X9, X2 // c4c1305813
+ VADDPS X2, X9, X2 // c4e13058d2 or c5b058d2
+ VADDPS X11, X9, X2 // c4c13058d3
+ VADDPS (BX), X9, X11 // c46130581b or c530581b
+ VADDPS (R11), X9, X11 // c44130581b
+ VADDPS X2, X9, X11 // c4613058da or c53058da
+ VADDPS X11, X9, X11 // c4413058db
+ VADDPS (BX), Y15, Y2 // c4e1045813 or c5845813
+ VADDPS (R11), Y15, Y2 // c4c1045813
+ VADDPS Y2, Y15, Y2 // c4e10458d2 or c58458d2
+ VADDPS Y11, Y15, Y2 // c4c10458d3
+ VADDPS (BX), Y15, Y11 // c46104581b or c504581b
+ VADDPS (R11), Y15, Y11 // c44104581b
+ VADDPS Y2, Y15, Y11 // c4610458da or c50458da
+ VADDPS Y11, Y15, Y11 // c4410458db
+ VADDSD (BX), X9, X2 // c4e1335813 or c5b35813
+ VADDSD (R11), X9, X2 // c4c1335813
+ VADDSD X2, X9, X2 // c4e13358d2 or c5b358d2
+ VADDSD X11, X9, X2 // c4c13358d3
+ VADDSD (BX), X9, X11 // c46133581b or c533581b
+ VADDSD (R11), X9, X11 // c44133581b
+ VADDSD X2, X9, X11 // c4613358da or c53358da
+ VADDSD X11, X9, X11 // c4413358db
+ VADDSS (BX), X9, X2 // c4e1325813 or c5b25813
+ VADDSS (R11), X9, X2 // c4c1325813
+ VADDSS X2, X9, X2 // c4e13258d2 or c5b258d2
+ VADDSS X11, X9, X2 // c4c13258d3
+ VADDSS (BX), X9, X11 // c46132581b or c532581b
+ VADDSS (R11), X9, X11 // c44132581b
+ VADDSS X2, X9, X11 // c4613258da or c53258da
+ VADDSS X11, X9, X11 // c4413258db
+ VADDSUBPD (BX), X9, X2 // c4e131d013 or c5b1d013
+ VADDSUBPD (R11), X9, X2 // c4c131d013
+ VADDSUBPD X2, X9, X2 // c4e131d0d2 or c5b1d0d2
+ VADDSUBPD X11, X9, X2 // c4c131d0d3
+ VADDSUBPD (BX), X9, X11 // c46131d01b or c531d01b
+ VADDSUBPD (R11), X9, X11 // c44131d01b
+ VADDSUBPD X2, X9, X11 // c46131d0da or c531d0da
+ VADDSUBPD X11, X9, X11 // c44131d0db
+ VADDSUBPD (BX), Y15, Y2 // c4e105d013 or c585d013
+ VADDSUBPD (R11), Y15, Y2 // c4c105d013
+ VADDSUBPD Y2, Y15, Y2 // c4e105d0d2 or c585d0d2
+ VADDSUBPD Y11, Y15, Y2 // c4c105d0d3
+ VADDSUBPD (BX), Y15, Y11 // c46105d01b or c505d01b
+ VADDSUBPD (R11), Y15, Y11 // c44105d01b
+ VADDSUBPD Y2, Y15, Y11 // c46105d0da or c505d0da
+ VADDSUBPD Y11, Y15, Y11 // c44105d0db
+ VADDSUBPS (BX), X9, X2 // c4e133d013 or c5b3d013
+ VADDSUBPS (R11), X9, X2 // c4c133d013
+ VADDSUBPS X2, X9, X2 // c4e133d0d2 or c5b3d0d2
+ VADDSUBPS X11, X9, X2 // c4c133d0d3
+ VADDSUBPS (BX), X9, X11 // c46133d01b or c533d01b
+ VADDSUBPS (R11), X9, X11 // c44133d01b
+ VADDSUBPS X2, X9, X11 // c46133d0da or c533d0da
+ VADDSUBPS X11, X9, X11 // c44133d0db
+ VADDSUBPS (BX), Y15, Y2 // c4e107d013 or c587d013
+ VADDSUBPS (R11), Y15, Y2 // c4c107d013
+ VADDSUBPS Y2, Y15, Y2 // c4e107d0d2 or c587d0d2
+ VADDSUBPS Y11, Y15, Y2 // c4c107d0d3
+ VADDSUBPS (BX), Y15, Y11 // c46107d01b or c507d01b
+ VADDSUBPS (R11), Y15, Y11 // c44107d01b
+ VADDSUBPS Y2, Y15, Y11 // c46107d0da or c507d0da
+ VADDSUBPS Y11, Y15, Y11 // c44107d0db
+ VAESDEC (BX), X9, X2 // c4e231de13
+ VAESDEC (R11), X9, X2 // c4c231de13
+ VAESDEC X2, X9, X2 // c4e231ded2
+ VAESDEC X11, X9, X2 // c4c231ded3
+ VAESDEC (BX), X9, X11 // c46231de1b
+ VAESDEC (R11), X9, X11 // c44231de1b
+ VAESDEC X2, X9, X11 // c46231deda
+ VAESDEC X11, X9, X11 // c44231dedb
+ VAESDECLAST (BX), X9, X2 // c4e231df13
+ VAESDECLAST (R11), X9, X2 // c4c231df13
+ VAESDECLAST X2, X9, X2 // c4e231dfd2
+ VAESDECLAST X11, X9, X2 // c4c231dfd3
+ VAESDECLAST (BX), X9, X11 // c46231df1b
+ VAESDECLAST (R11), X9, X11 // c44231df1b
+ VAESDECLAST X2, X9, X11 // c46231dfda
+ VAESDECLAST X11, X9, X11 // c44231dfdb
+ VAESENC (BX), X9, X2 // c4e231dc13
+ VAESENC (R11), X9, X2 // c4c231dc13
+ VAESENC X2, X9, X2 // c4e231dcd2
+ VAESENC X11, X9, X2 // c4c231dcd3
+ VAESENC (BX), X9, X11 // c46231dc1b
+ VAESENC (R11), X9, X11 // c44231dc1b
+ VAESENC X2, X9, X11 // c46231dcda
+ VAESENC X11, X9, X11 // c44231dcdb
+ VAESENCLAST (BX), X9, X2 // c4e231dd13
+ VAESENCLAST (R11), X9, X2 // c4c231dd13
+ VAESENCLAST X2, X9, X2 // c4e231ddd2
+ VAESENCLAST X11, X9, X2 // c4c231ddd3
+ VAESENCLAST (BX), X9, X11 // c46231dd1b
+ VAESENCLAST (R11), X9, X11 // c44231dd1b
+ VAESENCLAST X2, X9, X11 // c46231ddda
+ VAESENCLAST X11, X9, X11 // c44231dddb
+ VAESIMC (BX), X2 // c4e279db13
+ VAESIMC (R11), X2 // c4c279db13
+ VAESIMC X2, X2 // c4e279dbd2
+ VAESIMC X11, X2 // c4c279dbd3
+ VAESIMC (BX), X11 // c46279db1b
+ VAESIMC (R11), X11 // c44279db1b
+ VAESIMC X2, X11 // c46279dbda
+ VAESIMC X11, X11 // c44279dbdb
+ VAESKEYGENASSIST $7, (BX), X2 // c4e379df1307
+ VAESKEYGENASSIST $7, (R11), X2 // c4c379df1307
+ VAESKEYGENASSIST $7, X2, X2 // c4e379dfd207
+ VAESKEYGENASSIST $7, X11, X2 // c4c379dfd307
+ VAESKEYGENASSIST $7, (BX), X11 // c46379df1b07
+ VAESKEYGENASSIST $7, (R11), X11 // c44379df1b07
+ VAESKEYGENASSIST $7, X2, X11 // c46379dfda07
+ VAESKEYGENASSIST $7, X11, X11 // c44379dfdb07
+ VANDNPD (BX), X9, X2 // c4e1315513 or c5b15513
+ VANDNPD (R11), X9, X2 // c4c1315513
+ VANDNPD X2, X9, X2 // c4e13155d2 or c5b155d2
+ VANDNPD X11, X9, X2 // c4c13155d3
+ VANDNPD (BX), X9, X11 // c46131551b or c531551b
+ VANDNPD (R11), X9, X11 // c44131551b
+ VANDNPD X2, X9, X11 // c4613155da or c53155da
+ VANDNPD X11, X9, X11 // c4413155db
+ VANDNPD (BX), Y15, Y2 // c4e1055513 or c5855513
+ VANDNPD (R11), Y15, Y2 // c4c1055513
+ VANDNPD Y2, Y15, Y2 // c4e10555d2 or c58555d2
+ VANDNPD Y11, Y15, Y2 // c4c10555d3
+ VANDNPD (BX), Y15, Y11 // c46105551b or c505551b
+ VANDNPD (R11), Y15, Y11 // c44105551b
+ VANDNPD Y2, Y15, Y11 // c4610555da or c50555da
+ VANDNPD Y11, Y15, Y11 // c4410555db
+ VANDNPS (BX), X9, X2 // c4e1305513 or c5b05513
+ VANDNPS (R11), X9, X2 // c4c1305513
+ VANDNPS X2, X9, X2 // c4e13055d2 or c5b055d2
+ VANDNPS X11, X9, X2 // c4c13055d3
+ VANDNPS (BX), X9, X11 // c46130551b or c530551b
+ VANDNPS (R11), X9, X11 // c44130551b
+ VANDNPS X2, X9, X11 // c4613055da or c53055da
+ VANDNPS X11, X9, X11 // c4413055db
+ VANDNPS (BX), Y15, Y2 // c4e1045513 or c5845513
+ VANDNPS (R11), Y15, Y2 // c4c1045513
+ VANDNPS Y2, Y15, Y2 // c4e10455d2 or c58455d2
+ VANDNPS Y11, Y15, Y2 // c4c10455d3
+ VANDNPS (BX), Y15, Y11 // c46104551b or c504551b
+ VANDNPS (R11), Y15, Y11 // c44104551b
+ VANDNPS Y2, Y15, Y11 // c4610455da or c50455da
+ VANDNPS Y11, Y15, Y11 // c4410455db
+ VANDPD (BX), X9, X2 // c4e1315413 or c5b15413
+ VANDPD (R11), X9, X2 // c4c1315413
+ VANDPD X2, X9, X2 // c4e13154d2 or c5b154d2
+ VANDPD X11, X9, X2 // c4c13154d3
+ VANDPD (BX), X9, X11 // c46131541b or c531541b
+ VANDPD (R11), X9, X11 // c44131541b
+ VANDPD X2, X9, X11 // c4613154da or c53154da
+ VANDPD X11, X9, X11 // c4413154db
+ VANDPD (BX), Y15, Y2 // c4e1055413 or c5855413
+ VANDPD (R11), Y15, Y2 // c4c1055413
+ VANDPD Y2, Y15, Y2 // c4e10554d2 or c58554d2
+ VANDPD Y11, Y15, Y2 // c4c10554d3
+ VANDPD (BX), Y15, Y11 // c46105541b or c505541b
+ VANDPD (R11), Y15, Y11 // c44105541b
+ VANDPD Y2, Y15, Y11 // c4610554da or c50554da
+ VANDPD Y11, Y15, Y11 // c4410554db
+ VANDPS (BX), X9, X2 // c4e1305413 or c5b05413
+ VANDPS (R11), X9, X2 // c4c1305413
+ VANDPS X2, X9, X2 // c4e13054d2 or c5b054d2
+ VANDPS X11, X9, X2 // c4c13054d3
+ VANDPS (BX), X9, X11 // c46130541b or c530541b
+ VANDPS (R11), X9, X11 // c44130541b
+ VANDPS X2, X9, X11 // c4613054da or c53054da
+ VANDPS X11, X9, X11 // c4413054db
+ VANDPS (BX), Y15, Y2 // c4e1045413 or c5845413
+ VANDPS (R11), Y15, Y2 // c4c1045413
+ VANDPS Y2, Y15, Y2 // c4e10454d2 or c58454d2
+ VANDPS Y11, Y15, Y2 // c4c10454d3
+ VANDPS (BX), Y15, Y11 // c46104541b or c504541b
+ VANDPS (R11), Y15, Y11 // c44104541b
+ VANDPS Y2, Y15, Y11 // c4610454da or c50454da
+ VANDPS Y11, Y15, Y11 // c4410454db
+ VBLENDPD $7, (BX), X9, X2 // c4e3310d1307
+ VBLENDPD $7, (R11), X9, X2 // c4c3310d1307
+ VBLENDPD $7, X2, X9, X2 // c4e3310dd207
+ VBLENDPD $7, X11, X9, X2 // c4c3310dd307
+ VBLENDPD $7, (BX), X9, X11 // c463310d1b07
+ VBLENDPD $7, (R11), X9, X11 // c443310d1b07
+ VBLENDPD $7, X2, X9, X11 // c463310dda07
+ VBLENDPD $7, X11, X9, X11 // c443310ddb07
+ VBLENDPD $7, (BX), Y15, Y2 // c4e3050d1307
+ VBLENDPD $7, (R11), Y15, Y2 // c4c3050d1307
+ VBLENDPD $7, Y2, Y15, Y2 // c4e3050dd207
+ VBLENDPD $7, Y11, Y15, Y2 // c4c3050dd307
+ VBLENDPD $7, (BX), Y15, Y11 // c463050d1b07
+ VBLENDPD $7, (R11), Y15, Y11 // c443050d1b07
+ VBLENDPD $7, Y2, Y15, Y11 // c463050dda07
+ VBLENDPD $7, Y11, Y15, Y11 // c443050ddb07
+ VBLENDPS $7, (BX), X9, X2 // c4e3310c1307
+ VBLENDPS $7, (R11), X9, X2 // c4c3310c1307
+ VBLENDPS $7, X2, X9, X2 // c4e3310cd207
+ VBLENDPS $7, X11, X9, X2 // c4c3310cd307
+ VBLENDPS $7, (BX), X9, X11 // c463310c1b07
+ VBLENDPS $7, (R11), X9, X11 // c443310c1b07
+ VBLENDPS $7, X2, X9, X11 // c463310cda07
+ VBLENDPS $7, X11, X9, X11 // c443310cdb07
+ VBLENDPS $7, (BX), Y15, Y2 // c4e3050c1307
+ VBLENDPS $7, (R11), Y15, Y2 // c4c3050c1307
+ VBLENDPS $7, Y2, Y15, Y2 // c4e3050cd207
+ VBLENDPS $7, Y11, Y15, Y2 // c4c3050cd307
+ VBLENDPS $7, (BX), Y15, Y11 // c463050c1b07
+ VBLENDPS $7, (R11), Y15, Y11 // c443050c1b07
+ VBLENDPS $7, Y2, Y15, Y11 // c463050cda07
+ VBLENDPS $7, Y11, Y15, Y11 // c443050cdb07
+ VBLENDVPD X12, (BX), X9, X2 // c4e3314b13c0
+ VBLENDVPD X12, (R11), X9, X2 // c4c3314b13c0
+ VBLENDVPD X12, X2, X9, X2 // c4e3314bd2c0
+ VBLENDVPD X12, X11, X9, X2 // c4c3314bd3c0
+ VBLENDVPD X12, (BX), X9, X11 // c463314b1bc0
+ VBLENDVPD X12, (R11), X9, X11 // c443314b1bc0
+ VBLENDVPD X12, X2, X9, X11 // c463314bdac0
+ VBLENDVPD X12, X11, X9, X11 // c443314bdbc0
+ VBLENDVPD Y13, (BX), Y15, Y2 // c4e3054b13d0
+ VBLENDVPD Y13, (R11), Y15, Y2 // c4c3054b13d0
+ VBLENDVPD Y13, Y2, Y15, Y2 // c4e3054bd2d0
+ VBLENDVPD Y13, Y11, Y15, Y2 // c4c3054bd3d0
+ VBLENDVPD Y13, (BX), Y15, Y11 // c463054b1bd0
+ VBLENDVPD Y13, (R11), Y15, Y11 // c443054b1bd0
+ VBLENDVPD Y13, Y2, Y15, Y11 // c463054bdad0
+ VBLENDVPD Y13, Y11, Y15, Y11 // c443054bdbd0
+ VBLENDVPS X12, (BX), X9, X2 // c4e3314a13c0
+ VBLENDVPS X12, (R11), X9, X2 // c4c3314a13c0
+ VBLENDVPS X12, X2, X9, X2 // c4e3314ad2c0
+ VBLENDVPS X12, X11, X9, X2 // c4c3314ad3c0
+ VBLENDVPS X12, (BX), X9, X11 // c463314a1bc0
+ VBLENDVPS X12, (R11), X9, X11 // c443314a1bc0
+ VBLENDVPS X12, X2, X9, X11 // c463314adac0
+ VBLENDVPS X12, X11, X9, X11 // c443314adbc0
+ VBLENDVPS Y13, (BX), Y15, Y2 // c4e3054a13d0
+ VBLENDVPS Y13, (R11), Y15, Y2 // c4c3054a13d0
+ VBLENDVPS Y13, Y2, Y15, Y2 // c4e3054ad2d0
+ VBLENDVPS Y13, Y11, Y15, Y2 // c4c3054ad3d0
+ VBLENDVPS Y13, (BX), Y15, Y11 // c463054a1bd0
+ VBLENDVPS Y13, (R11), Y15, Y11 // c443054a1bd0
+ VBLENDVPS Y13, Y2, Y15, Y11 // c463054adad0
+ VBLENDVPS Y13, Y11, Y15, Y11 // c443054adbd0
+ VBROADCASTF128 (BX), Y2 // c4e27d1a13
+ VBROADCASTF128 (R11), Y2 // c4c27d1a13
+ VBROADCASTF128 (BX), Y11 // c4627d1a1b
+ VBROADCASTF128 (R11), Y11 // c4427d1a1b
+ VBROADCASTI128 (BX), Y2 // c4e27d5a13
+ VBROADCASTI128 (R11), Y2 // c4c27d5a13
+ VBROADCASTI128 (BX), Y11 // c4627d5a1b
+ VBROADCASTI128 (R11), Y11 // c4427d5a1b
+ VBROADCASTSD (BX), Y2 // c4e27d1913
+ VBROADCASTSD (R11), Y2 // c4c27d1913
+ VBROADCASTSD (BX), Y11 // c4627d191b
+ VBROADCASTSD (R11), Y11 // c4427d191b
+ VBROADCASTSD X2, Y2 // c4e27d19d2
+ VBROADCASTSD X11, Y2 // c4c27d19d3
+ VBROADCASTSD X2, Y11 // c4627d19da
+ VBROADCASTSD X11, Y11 // c4427d19db
+ VBROADCASTSS (BX), X2 // c4e2791813
+ VBROADCASTSS (R11), X2 // c4c2791813
+ VBROADCASTSS (BX), X11 // c46279181b
+ VBROADCASTSS (R11), X11 // c44279181b
+ VBROADCASTSS X2, X2 // c4e27918d2
+ VBROADCASTSS X11, X2 // c4c27918d3
+ VBROADCASTSS X2, X11 // c4627918da
+ VBROADCASTSS X11, X11 // c4427918db
+ VBROADCASTSS (BX), Y2 // c4e27d1813
+ VBROADCASTSS (R11), Y2 // c4c27d1813
+ VBROADCASTSS (BX), Y11 // c4627d181b
+ VBROADCASTSS (R11), Y11 // c4427d181b
+ VBROADCASTSS X2, Y2 // c4e27d18d2
+ VBROADCASTSS X11, Y2 // c4c27d18d3
+ VBROADCASTSS X2, Y11 // c4627d18da
+ VBROADCASTSS X11, Y11 // c4427d18db
+ VCMPPD $7, (BX), X9, X2 // c4e131c21307 or c5b1c21307
+ VCMPPD $7, (R11), X9, X2 // c4c131c21307
+ VCMPPD $7, X2, X9, X2 // c4e131c2d207 or c5b1c2d207
+ VCMPPD $7, X11, X9, X2 // c4c131c2d307
+ VCMPPD $7, (BX), X9, X11 // c46131c21b07 or c531c21b07
+ VCMPPD $7, (R11), X9, X11 // c44131c21b07
+ VCMPPD $7, X2, X9, X11 // c46131c2da07 or c531c2da07
+ VCMPPD $7, X11, X9, X11 // c44131c2db07
+ VCMPPD $7, (BX), Y15, Y2 // c4e105c21307 or c585c21307
+ VCMPPD $7, (R11), Y15, Y2 // c4c105c21307
+ VCMPPD $7, Y2, Y15, Y2 // c4e105c2d207 or c585c2d207
+ VCMPPD $7, Y11, Y15, Y2 // c4c105c2d307
+ VCMPPD $7, (BX), Y15, Y11 // c46105c21b07 or c505c21b07
+ VCMPPD $7, (R11), Y15, Y11 // c44105c21b07
+ VCMPPD $7, Y2, Y15, Y11 // c46105c2da07 or c505c2da07
+ VCMPPD $7, Y11, Y15, Y11 // c44105c2db07
+ VCMPPS $7, (BX), X9, X2 // c4e130c21307 or c5b0c21307
+ VCMPPS $7, (R11), X9, X2 // c4c130c21307
+ VCMPPS $7, X2, X9, X2 // c4e130c2d207 or c5b0c2d207
+ VCMPPS $7, X11, X9, X2 // c4c130c2d307
+ VCMPPS $7, (BX), X9, X11 // c46130c21b07 or c530c21b07
+ VCMPPS $7, (R11), X9, X11 // c44130c21b07
+ VCMPPS $7, X2, X9, X11 // c46130c2da07 or c530c2da07
+ VCMPPS $7, X11, X9, X11 // c44130c2db07
+ VCMPPS $7, (BX), Y15, Y2 // c4e104c21307 or c584c21307
+ VCMPPS $7, (R11), Y15, Y2 // c4c104c21307
+ VCMPPS $7, Y2, Y15, Y2 // c4e104c2d207 or c584c2d207
+ VCMPPS $7, Y11, Y15, Y2 // c4c104c2d307
+ VCMPPS $7, (BX), Y15, Y11 // c46104c21b07 or c504c21b07
+ VCMPPS $7, (R11), Y15, Y11 // c44104c21b07
+ VCMPPS $7, Y2, Y15, Y11 // c46104c2da07 or c504c2da07
+ VCMPPS $7, Y11, Y15, Y11 // c44104c2db07
+ VCMPSD $7, (BX), X9, X2 // c4e133c21307 or c5b3c21307
+ VCMPSD $7, (R11), X9, X2 // c4c133c21307
+ VCMPSD $7, X2, X9, X2 // c4e133c2d207 or c5b3c2d207
+ VCMPSD $7, X11, X9, X2 // c4c133c2d307
+ VCMPSD $7, (BX), X9, X11 // c46133c21b07 or c533c21b07
+ VCMPSD $7, (R11), X9, X11 // c44133c21b07
+ VCMPSD $7, X2, X9, X11 // c46133c2da07 or c533c2da07
+ VCMPSD $7, X11, X9, X11 // c44133c2db07
+ VCMPSS $7, (BX), X9, X2 // c4e132c21307 or c5b2c21307
+ VCMPSS $7, (R11), X9, X2 // c4c132c21307
+ VCMPSS $7, X2, X9, X2 // c4e132c2d207 or c5b2c2d207
+ VCMPSS $7, X11, X9, X2 // c4c132c2d307
+ VCMPSS $7, (BX), X9, X11 // c46132c21b07 or c532c21b07
+ VCMPSS $7, (R11), X9, X11 // c44132c21b07
+ VCMPSS $7, X2, X9, X11 // c46132c2da07 or c532c2da07
+ VCMPSS $7, X11, X9, X11 // c44132c2db07
+ VCOMISD (BX), X2 // c4e1792f13 or c5f92f13
+ VCOMISD (R11), X2 // c4c1792f13
+ VCOMISD X2, X2 // c4e1792fd2 or c5f92fd2
+ VCOMISD X11, X2 // c4c1792fd3
+ VCOMISD (BX), X11 // c461792f1b or c5792f1b
+ VCOMISD (R11), X11 // c441792f1b
+ VCOMISD X2, X11 // c461792fda or c5792fda
+ VCOMISD X11, X11 // c441792fdb
+ VCOMISS (BX), X2 // c4e1782f13 or c5f82f13
+ VCOMISS (R11), X2 // c4c1782f13
+ VCOMISS X2, X2 // c4e1782fd2 or c5f82fd2
+ VCOMISS X11, X2 // c4c1782fd3
+ VCOMISS (BX), X11 // c461782f1b or c5782f1b
+ VCOMISS (R11), X11 // c441782f1b
+ VCOMISS X2, X11 // c461782fda or c5782fda
+ VCOMISS X11, X11 // c441782fdb
+ VCVTDQ2PD (BX), X2 // c4e17ae613 or c5fae613
+ VCVTDQ2PD (R11), X2 // c4c17ae613
+ VCVTDQ2PD X2, X2 // c4e17ae6d2 or c5fae6d2
+ VCVTDQ2PD X11, X2 // c4c17ae6d3
+ VCVTDQ2PD (BX), X11 // c4617ae61b or c57ae61b
+ VCVTDQ2PD (R11), X11 // c4417ae61b
+ VCVTDQ2PD X2, X11 // c4617ae6da or c57ae6da
+ VCVTDQ2PD X11, X11 // c4417ae6db
+ VCVTDQ2PD (BX), Y2 // c4e17ee613 or c5fee613
+ VCVTDQ2PD (R11), Y2 // c4c17ee613
+ VCVTDQ2PD X2, Y2 // c4e17ee6d2 or c5fee6d2
+ VCVTDQ2PD X11, Y2 // c4c17ee6d3
+ VCVTDQ2PD (BX), Y11 // c4617ee61b or c57ee61b
+ VCVTDQ2PD (R11), Y11 // c4417ee61b
+ VCVTDQ2PD X2, Y11 // c4617ee6da or c57ee6da
+ VCVTDQ2PD X11, Y11 // c4417ee6db
+ VCVTDQ2PS (BX), X2 // c4e1785b13 or c5f85b13
+ VCVTDQ2PS (R11), X2 // c4c1785b13
+ VCVTDQ2PS X2, X2 // c4e1785bd2 or c5f85bd2
+ VCVTDQ2PS X11, X2 // c4c1785bd3
+ VCVTDQ2PS (BX), X11 // c461785b1b or c5785b1b
+ VCVTDQ2PS (R11), X11 // c441785b1b
+ VCVTDQ2PS X2, X11 // c461785bda or c5785bda
+ VCVTDQ2PS X11, X11 // c441785bdb
+ VCVTDQ2PS (BX), Y2 // c4e17c5b13 or c5fc5b13
+ VCVTDQ2PS (R11), Y2 // c4c17c5b13
+ VCVTDQ2PS Y2, Y2 // c4e17c5bd2 or c5fc5bd2
+ VCVTDQ2PS Y11, Y2 // c4c17c5bd3
+ VCVTDQ2PS (BX), Y11 // c4617c5b1b or c57c5b1b
+ VCVTDQ2PS (R11), Y11 // c4417c5b1b
+ VCVTDQ2PS Y2, Y11 // c4617c5bda or c57c5bda
+ VCVTDQ2PS Y11, Y11 // c4417c5bdb
+ VCVTPD2DQX (BX), X2 // c4e17be613 or c5fbe613
+ VCVTPD2DQX (R11), X2 // c4c17be613
+ VCVTPD2DQX X2, X2 // c4e17be6d2 or c5fbe6d2
+ VCVTPD2DQX X11, X2 // c4c17be6d3
+ VCVTPD2DQX (BX), X11 // c4617be61b or c57be61b
+ VCVTPD2DQX (R11), X11 // c4417be61b
+ VCVTPD2DQX X2, X11 // c4617be6da or c57be6da
+ VCVTPD2DQX X11, X11 // c4417be6db
+ VCVTPD2DQY (BX), X2 // c4e17fe613 or c5ffe613
+ VCVTPD2DQY (R11), X2 // c4c17fe613
+ VCVTPD2DQY Y2, X2 // c4e17fe6d2 or c5ffe6d2
+ VCVTPD2DQY Y11, X2 // c4c17fe6d3
+ VCVTPD2DQY (BX), X11 // c4617fe61b or c57fe61b
+ VCVTPD2DQY (R11), X11 // c4417fe61b
+ VCVTPD2DQY Y2, X11 // c4617fe6da or c57fe6da
+ VCVTPD2DQY Y11, X11 // c4417fe6db
+ VCVTPD2PSX (BX), X2 // c4e1795a13 or c5f95a13
+ VCVTPD2PSX (R11), X2 // c4c1795a13
+ VCVTPD2PSX X2, X2 // c4e1795ad2 or c5f95ad2
+ VCVTPD2PSX X11, X2 // c4c1795ad3
+ VCVTPD2PSX (BX), X11 // c461795a1b or c5795a1b
+ VCVTPD2PSX (R11), X11 // c441795a1b
+ VCVTPD2PSX X2, X11 // c461795ada or c5795ada
+ VCVTPD2PSX X11, X11 // c441795adb
+ VCVTPD2PSY (BX), X2 // c4e17d5a13 or c5fd5a13
+ VCVTPD2PSY (R11), X2 // c4c17d5a13
+ VCVTPD2PSY Y2, X2 // c4e17d5ad2 or c5fd5ad2
+ VCVTPD2PSY Y11, X2 // c4c17d5ad3
+ VCVTPD2PSY (BX), X11 // c4617d5a1b or c57d5a1b
+ VCVTPD2PSY (R11), X11 // c4417d5a1b
+ VCVTPD2PSY Y2, X11 // c4617d5ada or c57d5ada
+ VCVTPD2PSY Y11, X11 // c4417d5adb
+ VCVTPH2PS (BX), X2 // c4e2791313
+ VCVTPH2PS (R11), X2 // c4c2791313
+ VCVTPH2PS X2, X2 // c4e27913d2
+ VCVTPH2PS X11, X2 // c4c27913d3
+ VCVTPH2PS (BX), X11 // c46279131b
+ VCVTPH2PS (R11), X11 // c44279131b
+ VCVTPH2PS X2, X11 // c4627913da
+ VCVTPH2PS X11, X11 // c4427913db
+ VCVTPH2PS (BX), Y2 // c4e27d1313
+ VCVTPH2PS (R11), Y2 // c4c27d1313
+ VCVTPH2PS X2, Y2 // c4e27d13d2
+ VCVTPH2PS X11, Y2 // c4c27d13d3
+ VCVTPH2PS (BX), Y11 // c4627d131b
+ VCVTPH2PS (R11), Y11 // c4427d131b
+ VCVTPH2PS X2, Y11 // c4627d13da
+ VCVTPH2PS X11, Y11 // c4427d13db
+ VCVTPS2DQ (BX), X2 // c4e1795b13 or c5f95b13
+ VCVTPS2DQ (R11), X2 // c4c1795b13
+ VCVTPS2DQ X2, X2 // c4e1795bd2 or c5f95bd2
+ VCVTPS2DQ X11, X2 // c4c1795bd3
+ VCVTPS2DQ (BX), X11 // c461795b1b or c5795b1b
+ VCVTPS2DQ (R11), X11 // c441795b1b
+ VCVTPS2DQ X2, X11 // c461795bda or c5795bda
+ VCVTPS2DQ X11, X11 // c441795bdb
+ VCVTPS2DQ (BX), Y2 // c4e17d5b13 or c5fd5b13
+ VCVTPS2DQ (R11), Y2 // c4c17d5b13
+ VCVTPS2DQ Y2, Y2 // c4e17d5bd2 or c5fd5bd2
+ VCVTPS2DQ Y11, Y2 // c4c17d5bd3
+ VCVTPS2DQ (BX), Y11 // c4617d5b1b or c57d5b1b
+ VCVTPS2DQ (R11), Y11 // c4417d5b1b
+ VCVTPS2DQ Y2, Y11 // c4617d5bda or c57d5bda
+ VCVTPS2DQ Y11, Y11 // c4417d5bdb
+ VCVTPS2PD (BX), X2 // c4e1785a13 or c5f85a13
+ VCVTPS2PD (R11), X2 // c4c1785a13
+ VCVTPS2PD X2, X2 // c4e1785ad2 or c5f85ad2
+ VCVTPS2PD X11, X2 // c4c1785ad3
+ VCVTPS2PD (BX), X11 // c461785a1b or c5785a1b
+ VCVTPS2PD (R11), X11 // c441785a1b
+ VCVTPS2PD X2, X11 // c461785ada or c5785ada
+ VCVTPS2PD X11, X11 // c441785adb
+ VCVTPS2PD (BX), Y2 // c4e17c5a13 or c5fc5a13
+ VCVTPS2PD (R11), Y2 // c4c17c5a13
+ VCVTPS2PD X2, Y2 // c4e17c5ad2 or c5fc5ad2
+ VCVTPS2PD X11, Y2 // c4c17c5ad3
+ VCVTPS2PD (BX), Y11 // c4617c5a1b or c57c5a1b
+ VCVTPS2PD (R11), Y11 // c4417c5a1b
+ VCVTPS2PD X2, Y11 // c4617c5ada or c57c5ada
+ VCVTPS2PD X11, Y11 // c4417c5adb
+ VCVTPS2PH $7, Y2, (BX) // c4e37d1d1307
+ VCVTPS2PH $7, Y11, (BX) // c4637d1d1b07
+ VCVTPS2PH $7, Y2, (R11) // c4c37d1d1307
+ VCVTPS2PH $7, Y11, (R11) // c4437d1d1b07
+ VCVTPS2PH $7, Y2, X2 // c4e37d1dd207
+ VCVTPS2PH $7, Y11, X2 // c4637d1dda07
+ VCVTPS2PH $7, Y2, X11 // c4c37d1dd307
+ VCVTPS2PH $7, Y11, X11 // c4437d1ddb07
+ VCVTPS2PH $7, X2, (BX) // c4e3791d1307
+ VCVTPS2PH $7, X11, (BX) // c463791d1b07
+ VCVTPS2PH $7, X2, (R11) // c4c3791d1307
+ VCVTPS2PH $7, X11, (R11) // c443791d1b07
+ VCVTPS2PH $7, X2, X2 // c4e3791dd207
+ VCVTPS2PH $7, X11, X2 // c463791dda07
+ VCVTPS2PH $7, X2, X11 // c4c3791dd307
+ VCVTPS2PH $7, X11, X11 // c443791ddb07
+ VCVTSD2SI (BX), DX // c4e17b2d13 or c5fb2d13
+ VCVTSD2SI (R11), DX // c4c17b2d13
+ VCVTSD2SI X2, DX // c4e17b2dd2 or c5fb2dd2
+ VCVTSD2SI X11, DX // c4c17b2dd3
+ VCVTSD2SI (BX), R11 // c4617b2d1b or c57b2d1b
+ VCVTSD2SI (R11), R11 // c4417b2d1b
+ VCVTSD2SI X2, R11 // c4617b2dda or c57b2dda
+ VCVTSD2SI X11, R11 // c4417b2ddb
+ VCVTSD2SIQ (BX), DX // c4e1fb2d13
+ VCVTSD2SIQ (R11), DX // c4c1fb2d13
+ VCVTSD2SIQ X2, DX // c4e1fb2dd2
+ VCVTSD2SIQ X11, DX // c4c1fb2dd3
+ VCVTSD2SIQ (BX), R11 // c461fb2d1b
+ VCVTSD2SIQ (R11), R11 // c441fb2d1b
+ VCVTSD2SIQ X2, R11 // c461fb2dda
+ VCVTSD2SIQ X11, R11 // c441fb2ddb
+ VCVTSD2SS (BX), X9, X2 // c4e1335a13 or c5b35a13
+ VCVTSD2SS (R11), X9, X2 // c4c1335a13
+ VCVTSD2SS X2, X9, X2 // c4e1335ad2 or c5b35ad2
+ VCVTSD2SS X11, X9, X2 // c4c1335ad3
+ VCVTSD2SS (BX), X9, X11 // c461335a1b or c5335a1b
+ VCVTSD2SS (R11), X9, X11 // c441335a1b
+ VCVTSD2SS X2, X9, X11 // c461335ada or c5335ada
+ VCVTSD2SS X11, X9, X11 // c441335adb
+ VCVTSI2SDL (BX), X9, X2 // c4e1332a13 or c5b32a13
+ VCVTSI2SDL (R11), X9, X2 // c4c1332a13
+ VCVTSI2SDL DX, X9, X2 // c4e1332ad2 or c5b32ad2
+ VCVTSI2SDL R11, X9, X2 // c4c1332ad3
+ VCVTSI2SDL (BX), X9, X11 // c461332a1b or c5332a1b
+ VCVTSI2SDL (R11), X9, X11 // c441332a1b
+ VCVTSI2SDL DX, X9, X11 // c461332ada or c5332ada
+ VCVTSI2SDL R11, X9, X11 // c441332adb
+ VCVTSI2SDQ (BX), X9, X2 // c4e1b32a13
+ VCVTSI2SDQ (R11), X9, X2 // c4c1b32a13
+ VCVTSI2SDQ DX, X9, X2 // c4e1b32ad2
+ VCVTSI2SDQ R11, X9, X2 // c4c1b32ad3
+ VCVTSI2SDQ (BX), X9, X11 // c461b32a1b
+ VCVTSI2SDQ (R11), X9, X11 // c441b32a1b
+ VCVTSI2SDQ DX, X9, X11 // c461b32ada
+ VCVTSI2SDQ R11, X9, X11 // c441b32adb
+ VCVTSI2SSL (BX), X9, X2 // c4e1322a13 or c5b22a13
+ VCVTSI2SSL (R11), X9, X2 // c4c1322a13
+ VCVTSI2SSL DX, X9, X2 // c4e1322ad2 or c5b22ad2
+ VCVTSI2SSL R11, X9, X2 // c4c1322ad3
+ VCVTSI2SSL (BX), X9, X11 // c461322a1b or c5322a1b
+ VCVTSI2SSL (R11), X9, X11 // c441322a1b
+ VCVTSI2SSL DX, X9, X11 // c461322ada or c5322ada
+ VCVTSI2SSL R11, X9, X11 // c441322adb
+ VCVTSI2SSQ (BX), X9, X2 // c4e1b22a13
+ VCVTSI2SSQ (R11), X9, X2 // c4c1b22a13
+ VCVTSI2SSQ DX, X9, X2 // c4e1b22ad2
+ VCVTSI2SSQ R11, X9, X2 // c4c1b22ad3
+ VCVTSI2SSQ (BX), X9, X11 // c461b22a1b
+ VCVTSI2SSQ (R11), X9, X11 // c441b22a1b
+ VCVTSI2SSQ DX, X9, X11 // c461b22ada
+ VCVTSI2SSQ R11, X9, X11 // c441b22adb
+ VCVTSS2SD (BX), X9, X2 // c4e1325a13 or c5b25a13
+ VCVTSS2SD (R11), X9, X2 // c4c1325a13
+ VCVTSS2SD X2, X9, X2 // c4e1325ad2 or c5b25ad2
+ VCVTSS2SD X11, X9, X2 // c4c1325ad3
+ VCVTSS2SD (BX), X9, X11 // c461325a1b or c5325a1b
+ VCVTSS2SD (R11), X9, X11 // c441325a1b
+ VCVTSS2SD X2, X9, X11 // c461325ada or c5325ada
+ VCVTSS2SD X11, X9, X11 // c441325adb
+ VCVTSS2SI (BX), DX // c4e17a2d13 or c5fa2d13
+ VCVTSS2SI (R11), DX // c4c17a2d13
+ VCVTSS2SI X2, DX // c4e17a2dd2 or c5fa2dd2
+ VCVTSS2SI X11, DX // c4c17a2dd3
+ VCVTSS2SI (BX), R11 // c4617a2d1b or c57a2d1b
+ VCVTSS2SI (R11), R11 // c4417a2d1b
+ VCVTSS2SI X2, R11 // c4617a2dda or c57a2dda
+ VCVTSS2SI X11, R11 // c4417a2ddb
+ VCVTSS2SIQ (BX), DX // c4e1fa2d13
+ VCVTSS2SIQ (R11), DX // c4c1fa2d13
+ VCVTSS2SIQ X2, DX // c4e1fa2dd2
+ VCVTSS2SIQ X11, DX // c4c1fa2dd3
+ VCVTSS2SIQ (BX), R11 // c461fa2d1b
+ VCVTSS2SIQ (R11), R11 // c441fa2d1b
+ VCVTSS2SIQ X2, R11 // c461fa2dda
+ VCVTSS2SIQ X11, R11 // c441fa2ddb
+ VCVTTPD2DQX (BX), X2 // c4e179e613 or c5f9e613
+ VCVTTPD2DQX (R11), X2 // c4c179e613
+ VCVTTPD2DQX X2, X2 // c4e179e6d2 or c5f9e6d2
+ VCVTTPD2DQX X11, X2 // c4c179e6d3
+ VCVTTPD2DQX (BX), X11 // c46179e61b or c579e61b
+ VCVTTPD2DQX (R11), X11 // c44179e61b
+ VCVTTPD2DQX X2, X11 // c46179e6da or c579e6da
+ VCVTTPD2DQX X11, X11 // c44179e6db
+ VCVTTPD2DQY (BX), X2 // c4e17de613 or c5fde613
+ VCVTTPD2DQY (R11), X2 // c4c17de613
+ VCVTTPD2DQY Y2, X2 // c4e17de6d2 or c5fde6d2
+ VCVTTPD2DQY Y11, X2 // c4c17de6d3
+ VCVTTPD2DQY (BX), X11 // c4617de61b or c57de61b
+ VCVTTPD2DQY (R11), X11 // c4417de61b
+ VCVTTPD2DQY Y2, X11 // c4617de6da or c57de6da
+ VCVTTPD2DQY Y11, X11 // c4417de6db
+ VCVTTPS2DQ (BX), X2 // c4e17a5b13 or c5fa5b13
+ VCVTTPS2DQ (R11), X2 // c4c17a5b13
+ VCVTTPS2DQ X2, X2 // c4e17a5bd2 or c5fa5bd2
+ VCVTTPS2DQ X11, X2 // c4c17a5bd3
+ VCVTTPS2DQ (BX), X11 // c4617a5b1b or c57a5b1b
+ VCVTTPS2DQ (R11), X11 // c4417a5b1b
+ VCVTTPS2DQ X2, X11 // c4617a5bda or c57a5bda
+ VCVTTPS2DQ X11, X11 // c4417a5bdb
+ VCVTTPS2DQ (BX), Y2 // c4e17e5b13 or c5fe5b13
+ VCVTTPS2DQ (R11), Y2 // c4c17e5b13
+ VCVTTPS2DQ Y2, Y2 // c4e17e5bd2 or c5fe5bd2
+ VCVTTPS2DQ Y11, Y2 // c4c17e5bd3
+ VCVTTPS2DQ (BX), Y11 // c4617e5b1b or c57e5b1b
+ VCVTTPS2DQ (R11), Y11 // c4417e5b1b
+ VCVTTPS2DQ Y2, Y11 // c4617e5bda or c57e5bda
+ VCVTTPS2DQ Y11, Y11 // c4417e5bdb
+ VCVTTSD2SI (BX), DX // c4e17b2c13 or c5fb2c13
+ VCVTTSD2SI (R11), DX // c4c17b2c13
+ VCVTTSD2SI X2, DX // c4e17b2cd2 or c5fb2cd2
+ VCVTTSD2SI X11, DX // c4c17b2cd3
+ VCVTTSD2SI (BX), R11 // c4617b2c1b or c57b2c1b
+ VCVTTSD2SI (R11), R11 // c4417b2c1b
+ VCVTTSD2SI X2, R11 // c4617b2cda or c57b2cda
+ VCVTTSD2SI X11, R11 // c4417b2cdb
+ VCVTTSD2SIQ (BX), DX // c4e1fb2c13
+ VCVTTSD2SIQ (R11), DX // c4c1fb2c13
+ VCVTTSD2SIQ X2, DX // c4e1fb2cd2
+ VCVTTSD2SIQ X11, DX // c4c1fb2cd3
+ VCVTTSD2SIQ (BX), R11 // c461fb2c1b
+ VCVTTSD2SIQ (R11), R11 // c441fb2c1b
+ VCVTTSD2SIQ X2, R11 // c461fb2cda
+ VCVTTSD2SIQ X11, R11 // c441fb2cdb
+ VCVTTSS2SI (BX), DX // c4e17a2c13 or c5fa2c13
+ VCVTTSS2SI (R11), DX // c4c17a2c13
+ VCVTTSS2SI X2, DX // c4e17a2cd2 or c5fa2cd2
+ VCVTTSS2SI X11, DX // c4c17a2cd3
+ VCVTTSS2SI (BX), R11 // c4617a2c1b or c57a2c1b
+ VCVTTSS2SI (R11), R11 // c4417a2c1b
+ VCVTTSS2SI X2, R11 // c4617a2cda or c57a2cda
+ VCVTTSS2SI X11, R11 // c4417a2cdb
+ VCVTTSS2SIQ (BX), DX // c4e1fa2c13
+ VCVTTSS2SIQ (R11), DX // c4c1fa2c13
+ VCVTTSS2SIQ X2, DX // c4e1fa2cd2
+ VCVTTSS2SIQ X11, DX // c4c1fa2cd3
+ VCVTTSS2SIQ (BX), R11 // c461fa2c1b
+ VCVTTSS2SIQ (R11), R11 // c441fa2c1b
+ VCVTTSS2SIQ X2, R11 // c461fa2cda
+ VCVTTSS2SIQ X11, R11 // c441fa2cdb
+ VDIVPD (BX), X9, X2 // c4e1315e13 or c5b15e13
+ VDIVPD (R11), X9, X2 // c4c1315e13
+ VDIVPD X2, X9, X2 // c4e1315ed2 or c5b15ed2
+ VDIVPD X11, X9, X2 // c4c1315ed3
+ VDIVPD (BX), X9, X11 // c461315e1b or c5315e1b
+ VDIVPD (R11), X9, X11 // c441315e1b
+ VDIVPD X2, X9, X11 // c461315eda or c5315eda
+ VDIVPD X11, X9, X11 // c441315edb
+ VDIVPD (BX), Y15, Y2 // c4e1055e13 or c5855e13
+ VDIVPD (R11), Y15, Y2 // c4c1055e13
+ VDIVPD Y2, Y15, Y2 // c4e1055ed2 or c5855ed2
+ VDIVPD Y11, Y15, Y2 // c4c1055ed3
+ VDIVPD (BX), Y15, Y11 // c461055e1b or c5055e1b
+ VDIVPD (R11), Y15, Y11 // c441055e1b
+ VDIVPD Y2, Y15, Y11 // c461055eda or c5055eda
+ VDIVPD Y11, Y15, Y11 // c441055edb
+ VDIVPS (BX), X9, X2 // c4e1305e13 or c5b05e13
+ VDIVPS (R11), X9, X2 // c4c1305e13
+ VDIVPS X2, X9, X2 // c4e1305ed2 or c5b05ed2
+ VDIVPS X11, X9, X2 // c4c1305ed3
+ VDIVPS (BX), X9, X11 // c461305e1b or c5305e1b
+ VDIVPS (R11), X9, X11 // c441305e1b
+ VDIVPS X2, X9, X11 // c461305eda or c5305eda
+ VDIVPS X11, X9, X11 // c441305edb
+ VDIVPS (BX), Y15, Y2 // c4e1045e13 or c5845e13
+ VDIVPS (R11), Y15, Y2 // c4c1045e13
+ VDIVPS Y2, Y15, Y2 // c4e1045ed2 or c5845ed2
+ VDIVPS Y11, Y15, Y2 // c4c1045ed3
+ VDIVPS (BX), Y15, Y11 // c461045e1b or c5045e1b
+ VDIVPS (R11), Y15, Y11 // c441045e1b
+ VDIVPS Y2, Y15, Y11 // c461045eda or c5045eda
+ VDIVPS Y11, Y15, Y11 // c441045edb
+ VDIVSD (BX), X9, X2 // c4e1335e13 or c5b35e13
+ VDIVSD (R11), X9, X2 // c4c1335e13
+ VDIVSD X2, X9, X2 // c4e1335ed2 or c5b35ed2
+ VDIVSD X11, X9, X2 // c4c1335ed3
+ VDIVSD (BX), X9, X11 // c461335e1b or c5335e1b
+ VDIVSD (R11), X9, X11 // c441335e1b
+ VDIVSD X2, X9, X11 // c461335eda or c5335eda
+ VDIVSD X11, X9, X11 // c441335edb
+ VDIVSS (BX), X9, X2 // c4e1325e13 or c5b25e13
+ VDIVSS (R11), X9, X2 // c4c1325e13
+ VDIVSS X2, X9, X2 // c4e1325ed2 or c5b25ed2
+ VDIVSS X11, X9, X2 // c4c1325ed3
+ VDIVSS (BX), X9, X11 // c461325e1b or c5325e1b
+ VDIVSS (R11), X9, X11 // c441325e1b
+ VDIVSS X2, X9, X11 // c461325eda or c5325eda
+ VDIVSS X11, X9, X11 // c441325edb
+ VDPPD $7, (BX), X9, X2 // c4e331411307
+ VDPPD $7, (R11), X9, X2 // c4c331411307
+ VDPPD $7, X2, X9, X2 // c4e33141d207
+ VDPPD $7, X11, X9, X2 // c4c33141d307
+ VDPPD $7, (BX), X9, X11 // c46331411b07
+ VDPPD $7, (R11), X9, X11 // c44331411b07
+ VDPPD $7, X2, X9, X11 // c4633141da07
+ VDPPD $7, X11, X9, X11 // c4433141db07
+ VDPPS $7, (BX), X9, X2 // c4e331401307
+ VDPPS $7, (R11), X9, X2 // c4c331401307
+ VDPPS $7, X2, X9, X2 // c4e33140d207
+ VDPPS $7, X11, X9, X2 // c4c33140d307
+ VDPPS $7, (BX), X9, X11 // c46331401b07
+ VDPPS $7, (R11), X9, X11 // c44331401b07
+ VDPPS $7, X2, X9, X11 // c4633140da07
+ VDPPS $7, X11, X9, X11 // c4433140db07
+ VDPPS $7, (BX), Y15, Y2 // c4e305401307
+ VDPPS $7, (R11), Y15, Y2 // c4c305401307
+ VDPPS $7, Y2, Y15, Y2 // c4e30540d207
+ VDPPS $7, Y11, Y15, Y2 // c4c30540d307
+ VDPPS $7, (BX), Y15, Y11 // c46305401b07
+ VDPPS $7, (R11), Y15, Y11 // c44305401b07
+ VDPPS $7, Y2, Y15, Y11 // c4630540da07
+ VDPPS $7, Y11, Y15, Y11 // c4430540db07
+ VERR (BX) // 0f0023
+ VERR (R11) // 410f0023
+ VERR DX // 0f00e2
+ VERR R11 // 410f00e3
+ VERW (BX) // 0f002b
+ VERW (R11) // 410f002b
+ VERW DX // 0f00ea
+ VERW R11 // 410f00eb
+ VEXTRACTF128 $7, Y2, (BX) // c4e37d191307
+ VEXTRACTF128 $7, Y11, (BX) // c4637d191b07
+ VEXTRACTF128 $7, Y2, (R11) // c4c37d191307
+ VEXTRACTF128 $7, Y11, (R11) // c4437d191b07
+ VEXTRACTF128 $7, Y2, X2 // c4e37d19d207
+ VEXTRACTF128 $7, Y11, X2 // c4637d19da07
+ VEXTRACTF128 $7, Y2, X11 // c4c37d19d307
+ VEXTRACTF128 $7, Y11, X11 // c4437d19db07
+ VEXTRACTI128 $7, Y2, (BX) // c4e37d391307
+ VEXTRACTI128 $7, Y11, (BX) // c4637d391b07
+ VEXTRACTI128 $7, Y2, (R11) // c4c37d391307
+ VEXTRACTI128 $7, Y11, (R11) // c4437d391b07
+ VEXTRACTI128 $7, Y2, X2 // c4e37d39d207
+ VEXTRACTI128 $7, Y11, X2 // c4637d39da07
+ VEXTRACTI128 $7, Y2, X11 // c4c37d39d307
+ VEXTRACTI128 $7, Y11, X11 // c4437d39db07
+ VEXTRACTPS $7, X2, (BX) // c4e379171307
+ VEXTRACTPS $7, X11, (BX) // c46379171b07
+ VEXTRACTPS $7, X2, (R11) // c4c379171307
+ VEXTRACTPS $7, X11, (R11) // c44379171b07
+ VEXTRACTPS $7, X2, DX // c4e37917d207
+ VEXTRACTPS $7, X11, DX // c4637917da07
+ VEXTRACTPS $7, X2, R11 // c4c37917d307
+ VEXTRACTPS $7, X11, R11 // c4437917db07
+ VFMADD132PD (BX), X9, X2 // c4e2b19813
+ VFMADD132PD (R11), X9, X2 // c4c2b19813
+ VFMADD132PD X2, X9, X2 // c4e2b198d2
+ VFMADD132PD X11, X9, X2 // c4c2b198d3
+ VFMADD132PD (BX), X9, X11 // c462b1981b
+ VFMADD132PD (R11), X9, X11 // c442b1981b
+ VFMADD132PD X2, X9, X11 // c462b198da
+ VFMADD132PD X11, X9, X11 // c442b198db
+ VFMADD132PD (BX), Y15, Y2 // c4e2859813
+ VFMADD132PD (R11), Y15, Y2 // c4c2859813
+ VFMADD132PD Y2, Y15, Y2 // c4e28598d2
+ VFMADD132PD Y11, Y15, Y2 // c4c28598d3
+ VFMADD132PD (BX), Y15, Y11 // c46285981b
+ VFMADD132PD (R11), Y15, Y11 // c44285981b
+ VFMADD132PD Y2, Y15, Y11 // c4628598da
+ VFMADD132PD Y11, Y15, Y11 // c4428598db
+ VFMADD132PS (BX), X9, X2 // c4e2319813
+ VFMADD132PS (R11), X9, X2 // c4c2319813
+ VFMADD132PS X2, X9, X2 // c4e23198d2
+ VFMADD132PS X11, X9, X2 // c4c23198d3
+ VFMADD132PS (BX), X9, X11 // c46231981b
+ VFMADD132PS (R11), X9, X11 // c44231981b
+ VFMADD132PS X2, X9, X11 // c4623198da
+ VFMADD132PS X11, X9, X11 // c4423198db
+ VFMADD132PS (BX), Y15, Y2 // c4e2059813
+ VFMADD132PS (R11), Y15, Y2 // c4c2059813
+ VFMADD132PS Y2, Y15, Y2 // c4e20598d2
+ VFMADD132PS Y11, Y15, Y2 // c4c20598d3
+ VFMADD132PS (BX), Y15, Y11 // c46205981b
+ VFMADD132PS (R11), Y15, Y11 // c44205981b
+ VFMADD132PS Y2, Y15, Y11 // c4620598da
+ VFMADD132PS Y11, Y15, Y11 // c4420598db
+ VFMADD132SD (BX), X9, X2 // c4e2b19913
+ VFMADD132SD (R11), X9, X2 // c4c2b19913
+ VFMADD132SD X2, X9, X2 // c4e2b199d2
+ VFMADD132SD X11, X9, X2 // c4c2b199d3
+ VFMADD132SD (BX), X9, X11 // c462b1991b
+ VFMADD132SD (R11), X9, X11 // c442b1991b
+ VFMADD132SD X2, X9, X11 // c462b199da
+ VFMADD132SD X11, X9, X11 // c442b199db
+ VFMADD132SS (BX), X9, X2 // c4e2319913
+ VFMADD132SS (R11), X9, X2 // c4c2319913
+ VFMADD132SS X2, X9, X2 // c4e23199d2
+ VFMADD132SS X11, X9, X2 // c4c23199d3
+ VFMADD132SS (BX), X9, X11 // c46231991b
+ VFMADD132SS (R11), X9, X11 // c44231991b
+ VFMADD132SS X2, X9, X11 // c4623199da
+ VFMADD132SS X11, X9, X11 // c4423199db
+ VFMADD213PD (BX), X9, X2 // c4e2b1a813
+ VFMADD213PD (R11), X9, X2 // c4c2b1a813
+ VFMADD213PD X2, X9, X2 // c4e2b1a8d2
+ VFMADD213PD X11, X9, X2 // c4c2b1a8d3
+ VFMADD213PD (BX), X9, X11 // c462b1a81b
+ VFMADD213PD (R11), X9, X11 // c442b1a81b
+ VFMADD213PD X2, X9, X11 // c462b1a8da
+ VFMADD213PD X11, X9, X11 // c442b1a8db
+ VFMADD213PD (BX), Y15, Y2 // c4e285a813
+ VFMADD213PD (R11), Y15, Y2 // c4c285a813
+ VFMADD213PD Y2, Y15, Y2 // c4e285a8d2
+ VFMADD213PD Y11, Y15, Y2 // c4c285a8d3
+ VFMADD213PD (BX), Y15, Y11 // c46285a81b
+ VFMADD213PD (R11), Y15, Y11 // c44285a81b
+ VFMADD213PD Y2, Y15, Y11 // c46285a8da
+ VFMADD213PD Y11, Y15, Y11 // c44285a8db
+ VFMADD213PS (BX), X9, X2 // c4e231a813
+ VFMADD213PS (R11), X9, X2 // c4c231a813
+ VFMADD213PS X2, X9, X2 // c4e231a8d2
+ VFMADD213PS X11, X9, X2 // c4c231a8d3
+ VFMADD213PS (BX), X9, X11 // c46231a81b
+ VFMADD213PS (R11), X9, X11 // c44231a81b
+ VFMADD213PS X2, X9, X11 // c46231a8da
+ VFMADD213PS X11, X9, X11 // c44231a8db
+ VFMADD213PS (BX), Y15, Y2 // c4e205a813
+ VFMADD213PS (R11), Y15, Y2 // c4c205a813
+ VFMADD213PS Y2, Y15, Y2 // c4e205a8d2
+ VFMADD213PS Y11, Y15, Y2 // c4c205a8d3
+ VFMADD213PS (BX), Y15, Y11 // c46205a81b
+ VFMADD213PS (R11), Y15, Y11 // c44205a81b
+ VFMADD213PS Y2, Y15, Y11 // c46205a8da
+ VFMADD213PS Y11, Y15, Y11 // c44205a8db
+ VFMADD213SD (BX), X9, X2 // c4e2b1a913
+ VFMADD213SD (R11), X9, X2 // c4c2b1a913
+ VFMADD213SD X2, X9, X2 // c4e2b1a9d2
+ VFMADD213SD X11, X9, X2 // c4c2b1a9d3
+ VFMADD213SD (BX), X9, X11 // c462b1a91b
+ VFMADD213SD (R11), X9, X11 // c442b1a91b
+ VFMADD213SD X2, X9, X11 // c462b1a9da
+ VFMADD213SD X11, X9, X11 // c442b1a9db
+ VFMADD213SS (BX), X9, X2 // c4e231a913
+ VFMADD213SS (R11), X9, X2 // c4c231a913
+ VFMADD213SS X2, X9, X2 // c4e231a9d2
+ VFMADD213SS X11, X9, X2 // c4c231a9d3
+ VFMADD213SS (BX), X9, X11 // c46231a91b
+ VFMADD213SS (R11), X9, X11 // c44231a91b
+ VFMADD213SS X2, X9, X11 // c46231a9da
+ VFMADD213SS X11, X9, X11 // c44231a9db
+ VFMADD231PD (BX), X9, X2 // c4e2b1b813
+ VFMADD231PD (R11), X9, X2 // c4c2b1b813
+ VFMADD231PD X2, X9, X2 // c4e2b1b8d2
+ VFMADD231PD X11, X9, X2 // c4c2b1b8d3
+ VFMADD231PD (BX), X9, X11 // c462b1b81b
+ VFMADD231PD (R11), X9, X11 // c442b1b81b
+ VFMADD231PD X2, X9, X11 // c462b1b8da
+ VFMADD231PD X11, X9, X11 // c442b1b8db
+ VFMADD231PD (BX), Y15, Y2 // c4e285b813
+ VFMADD231PD (R11), Y15, Y2 // c4c285b813
+ VFMADD231PD Y2, Y15, Y2 // c4e285b8d2
+ VFMADD231PD Y11, Y15, Y2 // c4c285b8d3
+ VFMADD231PD (BX), Y15, Y11 // c46285b81b
+ VFMADD231PD (R11), Y15, Y11 // c44285b81b
+ VFMADD231PD Y2, Y15, Y11 // c46285b8da
+ VFMADD231PD Y11, Y15, Y11 // c44285b8db
+ VFMADD231PS (BX), X9, X2 // c4e231b813
+ VFMADD231PS (R11), X9, X2 // c4c231b813
+ VFMADD231PS X2, X9, X2 // c4e231b8d2
+ VFMADD231PS X11, X9, X2 // c4c231b8d3
+ VFMADD231PS (BX), X9, X11 // c46231b81b
+ VFMADD231PS (R11), X9, X11 // c44231b81b
+ VFMADD231PS X2, X9, X11 // c46231b8da
+ VFMADD231PS X11, X9, X11 // c44231b8db
+ VFMADD231PS (BX), Y15, Y2 // c4e205b813
+ VFMADD231PS (R11), Y15, Y2 // c4c205b813
+ VFMADD231PS Y2, Y15, Y2 // c4e205b8d2
+ VFMADD231PS Y11, Y15, Y2 // c4c205b8d3
+ VFMADD231PS (BX), Y15, Y11 // c46205b81b
+ VFMADD231PS (R11), Y15, Y11 // c44205b81b
+ VFMADD231PS Y2, Y15, Y11 // c46205b8da
+ VFMADD231PS Y11, Y15, Y11 // c44205b8db
+ VFMADD231SD (BX), X9, X2 // c4e2b1b913
+ VFMADD231SD (R11), X9, X2 // c4c2b1b913
+ VFMADD231SD X2, X9, X2 // c4e2b1b9d2
+ VFMADD231SD X11, X9, X2 // c4c2b1b9d3
+ VFMADD231SD (BX), X9, X11 // c462b1b91b
+ VFMADD231SD (R11), X9, X11 // c442b1b91b
+ VFMADD231SD X2, X9, X11 // c462b1b9da
+ VFMADD231SD X11, X9, X11 // c442b1b9db
+ VFMADD231SS (BX), X9, X2 // c4e231b913
+ VFMADD231SS (R11), X9, X2 // c4c231b913
+ VFMADD231SS X2, X9, X2 // c4e231b9d2
+ VFMADD231SS X11, X9, X2 // c4c231b9d3
+ VFMADD231SS (BX), X9, X11 // c46231b91b
+ VFMADD231SS (R11), X9, X11 // c44231b91b
+ VFMADD231SS X2, X9, X11 // c46231b9da
+ VFMADD231SS X11, X9, X11 // c44231b9db
+ VFMADDSUB132PD (BX), X9, X2 // c4e2b19613
+ VFMADDSUB132PD (R11), X9, X2 // c4c2b19613
+ VFMADDSUB132PD X2, X9, X2 // c4e2b196d2
+ VFMADDSUB132PD X11, X9, X2 // c4c2b196d3
+ VFMADDSUB132PD (BX), X9, X11 // c462b1961b
+ VFMADDSUB132PD (R11), X9, X11 // c442b1961b
+ VFMADDSUB132PD X2, X9, X11 // c462b196da
+ VFMADDSUB132PD X11, X9, X11 // c442b196db
+ VFMADDSUB132PD (BX), Y15, Y2 // c4e2859613
+ VFMADDSUB132PD (R11), Y15, Y2 // c4c2859613
+ VFMADDSUB132PD Y2, Y15, Y2 // c4e28596d2
+ VFMADDSUB132PD Y11, Y15, Y2 // c4c28596d3
+ VFMADDSUB132PD (BX), Y15, Y11 // c46285961b
+ VFMADDSUB132PD (R11), Y15, Y11 // c44285961b
+ VFMADDSUB132PD Y2, Y15, Y11 // c4628596da
+ VFMADDSUB132PD Y11, Y15, Y11 // c4428596db
+ VFMADDSUB132PS (BX), X9, X2 // c4e2319613
+ VFMADDSUB132PS (R11), X9, X2 // c4c2319613
+ VFMADDSUB132PS X2, X9, X2 // c4e23196d2
+ VFMADDSUB132PS X11, X9, X2 // c4c23196d3
+ VFMADDSUB132PS (BX), X9, X11 // c46231961b
+ VFMADDSUB132PS (R11), X9, X11 // c44231961b
+ VFMADDSUB132PS X2, X9, X11 // c4623196da
+ VFMADDSUB132PS X11, X9, X11 // c4423196db
+ VFMADDSUB132PS (BX), Y15, Y2 // c4e2059613
+ VFMADDSUB132PS (R11), Y15, Y2 // c4c2059613
+ VFMADDSUB132PS Y2, Y15, Y2 // c4e20596d2
+ VFMADDSUB132PS Y11, Y15, Y2 // c4c20596d3
+ VFMADDSUB132PS (BX), Y15, Y11 // c46205961b
+ VFMADDSUB132PS (R11), Y15, Y11 // c44205961b
+ VFMADDSUB132PS Y2, Y15, Y11 // c4620596da
+ VFMADDSUB132PS Y11, Y15, Y11 // c4420596db
+ VFMADDSUB213PD (BX), X9, X2 // c4e2b1a613
+ VFMADDSUB213PD (R11), X9, X2 // c4c2b1a613
+ VFMADDSUB213PD X2, X9, X2 // c4e2b1a6d2
+ VFMADDSUB213PD X11, X9, X2 // c4c2b1a6d3
+ VFMADDSUB213PD (BX), X9, X11 // c462b1a61b
+ VFMADDSUB213PD (R11), X9, X11 // c442b1a61b
+ VFMADDSUB213PD X2, X9, X11 // c462b1a6da
+ VFMADDSUB213PD X11, X9, X11 // c442b1a6db
+ VFMADDSUB213PD (BX), Y15, Y2 // c4e285a613
+ VFMADDSUB213PD (R11), Y15, Y2 // c4c285a613
+ VFMADDSUB213PD Y2, Y15, Y2 // c4e285a6d2
+ VFMADDSUB213PD Y11, Y15, Y2 // c4c285a6d3
+ VFMADDSUB213PD (BX), Y15, Y11 // c46285a61b
+ VFMADDSUB213PD (R11), Y15, Y11 // c44285a61b
+ VFMADDSUB213PD Y2, Y15, Y11 // c46285a6da
+ VFMADDSUB213PD Y11, Y15, Y11 // c44285a6db
+ VFMADDSUB213PS (BX), X9, X2 // c4e231a613
+ VFMADDSUB213PS (R11), X9, X2 // c4c231a613
+ VFMADDSUB213PS X2, X9, X2 // c4e231a6d2
+ VFMADDSUB213PS X11, X9, X2 // c4c231a6d3
+ VFMADDSUB213PS (BX), X9, X11 // c46231a61b
+ VFMADDSUB213PS (R11), X9, X11 // c44231a61b
+ VFMADDSUB213PS X2, X9, X11 // c46231a6da
+ VFMADDSUB213PS X11, X9, X11 // c44231a6db
+ VFMADDSUB213PS (BX), Y15, Y2 // c4e205a613
+ VFMADDSUB213PS (R11), Y15, Y2 // c4c205a613
+ VFMADDSUB213PS Y2, Y15, Y2 // c4e205a6d2
+ VFMADDSUB213PS Y11, Y15, Y2 // c4c205a6d3
+ VFMADDSUB213PS (BX), Y15, Y11 // c46205a61b
+ VFMADDSUB213PS (R11), Y15, Y11 // c44205a61b
+ VFMADDSUB213PS Y2, Y15, Y11 // c46205a6da
+ VFMADDSUB213PS Y11, Y15, Y11 // c44205a6db
+ VFMADDSUB231PD (BX), X9, X2 // c4e2b1b613
+ VFMADDSUB231PD (R11), X9, X2 // c4c2b1b613
+ VFMADDSUB231PD X2, X9, X2 // c4e2b1b6d2
+ VFMADDSUB231PD X11, X9, X2 // c4c2b1b6d3
+ VFMADDSUB231PD (BX), X9, X11 // c462b1b61b
+ VFMADDSUB231PD (R11), X9, X11 // c442b1b61b
+ VFMADDSUB231PD X2, X9, X11 // c462b1b6da
+ VFMADDSUB231PD X11, X9, X11 // c442b1b6db
+ VFMADDSUB231PD (BX), Y15, Y2 // c4e285b613
+ VFMADDSUB231PD (R11), Y15, Y2 // c4c285b613
+ VFMADDSUB231PD Y2, Y15, Y2 // c4e285b6d2
+ VFMADDSUB231PD Y11, Y15, Y2 // c4c285b6d3
+ VFMADDSUB231PD (BX), Y15, Y11 // c46285b61b
+ VFMADDSUB231PD (R11), Y15, Y11 // c44285b61b
+ VFMADDSUB231PD Y2, Y15, Y11 // c46285b6da
+ VFMADDSUB231PD Y11, Y15, Y11 // c44285b6db
+ VFMADDSUB231PS (BX), X9, X2 // c4e231b613
+ VFMADDSUB231PS (R11), X9, X2 // c4c231b613
+ VFMADDSUB231PS X2, X9, X2 // c4e231b6d2
+ VFMADDSUB231PS X11, X9, X2 // c4c231b6d3
+ VFMADDSUB231PS (BX), X9, X11 // c46231b61b
+ VFMADDSUB231PS (R11), X9, X11 // c44231b61b
+ VFMADDSUB231PS X2, X9, X11 // c46231b6da
+ VFMADDSUB231PS X11, X9, X11 // c44231b6db
+ VFMADDSUB231PS (BX), Y15, Y2 // c4e205b613
+ VFMADDSUB231PS (R11), Y15, Y2 // c4c205b613
+ VFMADDSUB231PS Y2, Y15, Y2 // c4e205b6d2
+ VFMADDSUB231PS Y11, Y15, Y2 // c4c205b6d3
+ VFMADDSUB231PS (BX), Y15, Y11 // c46205b61b
+ VFMADDSUB231PS (R11), Y15, Y11 // c44205b61b
+ VFMADDSUB231PS Y2, Y15, Y11 // c46205b6da
+ VFMADDSUB231PS Y11, Y15, Y11 // c44205b6db
+ VFMSUB132PD (BX), X9, X2 // c4e2b19a13
+ VFMSUB132PD (R11), X9, X2 // c4c2b19a13
+ VFMSUB132PD X2, X9, X2 // c4e2b19ad2
+ VFMSUB132PD X11, X9, X2 // c4c2b19ad3
+ VFMSUB132PD (BX), X9, X11 // c462b19a1b
+ VFMSUB132PD (R11), X9, X11 // c442b19a1b
+ VFMSUB132PD X2, X9, X11 // c462b19ada
+ VFMSUB132PD X11, X9, X11 // c442b19adb
+ VFMSUB132PD (BX), Y15, Y2 // c4e2859a13
+ VFMSUB132PD (R11), Y15, Y2 // c4c2859a13
+ VFMSUB132PD Y2, Y15, Y2 // c4e2859ad2
+ VFMSUB132PD Y11, Y15, Y2 // c4c2859ad3
+ VFMSUB132PD (BX), Y15, Y11 // c462859a1b
+ VFMSUB132PD (R11), Y15, Y11 // c442859a1b
+ VFMSUB132PD Y2, Y15, Y11 // c462859ada
+ VFMSUB132PD Y11, Y15, Y11 // c442859adb
+ VFMSUB132PS (BX), X9, X2 // c4e2319a13
+ VFMSUB132PS (R11), X9, X2 // c4c2319a13
+ VFMSUB132PS X2, X9, X2 // c4e2319ad2
+ VFMSUB132PS X11, X9, X2 // c4c2319ad3
+ VFMSUB132PS (BX), X9, X11 // c462319a1b
+ VFMSUB132PS (R11), X9, X11 // c442319a1b
+ VFMSUB132PS X2, X9, X11 // c462319ada
+ VFMSUB132PS X11, X9, X11 // c442319adb
+ VFMSUB132PS (BX), Y15, Y2 // c4e2059a13
+ VFMSUB132PS (R11), Y15, Y2 // c4c2059a13
+ VFMSUB132PS Y2, Y15, Y2 // c4e2059ad2
+ VFMSUB132PS Y11, Y15, Y2 // c4c2059ad3
+ VFMSUB132PS (BX), Y15, Y11 // c462059a1b
+ VFMSUB132PS (R11), Y15, Y11 // c442059a1b
+ VFMSUB132PS Y2, Y15, Y11 // c462059ada
+ VFMSUB132PS Y11, Y15, Y11 // c442059adb
+ VFMSUB132SD (BX), X9, X2 // c4e2b19b13
+ VFMSUB132SD (R11), X9, X2 // c4c2b19b13
+ VFMSUB132SD X2, X9, X2 // c4e2b19bd2
+ VFMSUB132SD X11, X9, X2 // c4c2b19bd3
+ VFMSUB132SD (BX), X9, X11 // c462b19b1b
+ VFMSUB132SD (R11), X9, X11 // c442b19b1b
+ VFMSUB132SD X2, X9, X11 // c462b19bda
+ VFMSUB132SD X11, X9, X11 // c442b19bdb
+ VFMSUB132SS (BX), X9, X2 // c4e2319b13
+ VFMSUB132SS (R11), X9, X2 // c4c2319b13
+ VFMSUB132SS X2, X9, X2 // c4e2319bd2
+ VFMSUB132SS X11, X9, X2 // c4c2319bd3
+ VFMSUB132SS (BX), X9, X11 // c462319b1b
+ VFMSUB132SS (R11), X9, X11 // c442319b1b
+ VFMSUB132SS X2, X9, X11 // c462319bda
+ VFMSUB132SS X11, X9, X11 // c442319bdb
+ VFMSUB213PD (BX), X9, X2 // c4e2b1aa13
+ VFMSUB213PD (R11), X9, X2 // c4c2b1aa13
+ VFMSUB213PD X2, X9, X2 // c4e2b1aad2
+ VFMSUB213PD X11, X9, X2 // c4c2b1aad3
+ VFMSUB213PD (BX), X9, X11 // c462b1aa1b
+ VFMSUB213PD (R11), X9, X11 // c442b1aa1b
+ VFMSUB213PD X2, X9, X11 // c462b1aada
+ VFMSUB213PD X11, X9, X11 // c442b1aadb
+ VFMSUB213PD (BX), Y15, Y2 // c4e285aa13
+ VFMSUB213PD (R11), Y15, Y2 // c4c285aa13
+ VFMSUB213PD Y2, Y15, Y2 // c4e285aad2
+ VFMSUB213PD Y11, Y15, Y2 // c4c285aad3
+ VFMSUB213PD (BX), Y15, Y11 // c46285aa1b
+ VFMSUB213PD (R11), Y15, Y11 // c44285aa1b
+ VFMSUB213PD Y2, Y15, Y11 // c46285aada
+ VFMSUB213PD Y11, Y15, Y11 // c44285aadb
+ VFMSUB213PS (BX), X9, X2 // c4e231aa13
+ VFMSUB213PS (R11), X9, X2 // c4c231aa13
+ VFMSUB213PS X2, X9, X2 // c4e231aad2
+ VFMSUB213PS X11, X9, X2 // c4c231aad3
+ VFMSUB213PS (BX), X9, X11 // c46231aa1b
+ VFMSUB213PS (R11), X9, X11 // c44231aa1b
+ VFMSUB213PS X2, X9, X11 // c46231aada
+ VFMSUB213PS X11, X9, X11 // c44231aadb
+ VFMSUB213PS (BX), Y15, Y2 // c4e205aa13
+ VFMSUB213PS (R11), Y15, Y2 // c4c205aa13
+ VFMSUB213PS Y2, Y15, Y2 // c4e205aad2
+ VFMSUB213PS Y11, Y15, Y2 // c4c205aad3
+ VFMSUB213PS (BX), Y15, Y11 // c46205aa1b
+ VFMSUB213PS (R11), Y15, Y11 // c44205aa1b
+ VFMSUB213PS Y2, Y15, Y11 // c46205aada
+ VFMSUB213PS Y11, Y15, Y11 // c44205aadb
+ VFMSUB213SD (BX), X9, X2 // c4e2b1ab13
+ VFMSUB213SD (R11), X9, X2 // c4c2b1ab13
+ VFMSUB213SD X2, X9, X2 // c4e2b1abd2
+ VFMSUB213SD X11, X9, X2 // c4c2b1abd3
+ VFMSUB213SD (BX), X9, X11 // c462b1ab1b
+ VFMSUB213SD (R11), X9, X11 // c442b1ab1b
+ VFMSUB213SD X2, X9, X11 // c462b1abda
+ VFMSUB213SD X11, X9, X11 // c442b1abdb
+ VFMSUB213SS (BX), X9, X2 // c4e231ab13
+ VFMSUB213SS (R11), X9, X2 // c4c231ab13
+ VFMSUB213SS X2, X9, X2 // c4e231abd2
+ VFMSUB213SS X11, X9, X2 // c4c231abd3
+ VFMSUB213SS (BX), X9, X11 // c46231ab1b
+ VFMSUB213SS (R11), X9, X11 // c44231ab1b
+ VFMSUB213SS X2, X9, X11 // c46231abda
+ VFMSUB213SS X11, X9, X11 // c44231abdb
+ VFMSUB231PD (BX), X9, X2 // c4e2b1ba13
+ VFMSUB231PD (R11), X9, X2 // c4c2b1ba13
+ VFMSUB231PD X2, X9, X2 // c4e2b1bad2
+ VFMSUB231PD X11, X9, X2 // c4c2b1bad3
+ VFMSUB231PD (BX), X9, X11 // c462b1ba1b
+ VFMSUB231PD (R11), X9, X11 // c442b1ba1b
+ VFMSUB231PD X2, X9, X11 // c462b1bada
+ VFMSUB231PD X11, X9, X11 // c442b1badb
+ VFMSUB231PD (BX), Y15, Y2 // c4e285ba13
+ VFMSUB231PD (R11), Y15, Y2 // c4c285ba13
+ VFMSUB231PD Y2, Y15, Y2 // c4e285bad2
+ VFMSUB231PD Y11, Y15, Y2 // c4c285bad3
+ VFMSUB231PD (BX), Y15, Y11 // c46285ba1b
+ VFMSUB231PD (R11), Y15, Y11 // c44285ba1b
+ VFMSUB231PD Y2, Y15, Y11 // c46285bada
+ VFMSUB231PD Y11, Y15, Y11 // c44285badb
+ VFMSUB231PS (BX), X9, X2 // c4e231ba13
+ VFMSUB231PS (R11), X9, X2 // c4c231ba13
+ VFMSUB231PS X2, X9, X2 // c4e231bad2
+ VFMSUB231PS X11, X9, X2 // c4c231bad3
+ VFMSUB231PS (BX), X9, X11 // c46231ba1b
+ VFMSUB231PS (R11), X9, X11 // c44231ba1b
+ VFMSUB231PS X2, X9, X11 // c46231bada
+ VFMSUB231PS X11, X9, X11 // c44231badb
+ VFMSUB231PS (BX), Y15, Y2 // c4e205ba13
+ VFMSUB231PS (R11), Y15, Y2 // c4c205ba13
+ VFMSUB231PS Y2, Y15, Y2 // c4e205bad2
+ VFMSUB231PS Y11, Y15, Y2 // c4c205bad3
+ VFMSUB231PS (BX), Y15, Y11 // c46205ba1b
+ VFMSUB231PS (R11), Y15, Y11 // c44205ba1b
+ VFMSUB231PS Y2, Y15, Y11 // c46205bada
+ VFMSUB231PS Y11, Y15, Y11 // c44205badb
+ VFMSUB231SD (BX), X9, X2 // c4e2b1bb13
+ VFMSUB231SD (R11), X9, X2 // c4c2b1bb13
+ VFMSUB231SD X2, X9, X2 // c4e2b1bbd2
+ VFMSUB231SD X11, X9, X2 // c4c2b1bbd3
+ VFMSUB231SD (BX), X9, X11 // c462b1bb1b
+ VFMSUB231SD (R11), X9, X11 // c442b1bb1b
+ VFMSUB231SD X2, X9, X11 // c462b1bbda
+ VFMSUB231SD X11, X9, X11 // c442b1bbdb
+ VFMSUB231SS (BX), X9, X2 // c4e231bb13
+ VFMSUB231SS (R11), X9, X2 // c4c231bb13
+ VFMSUB231SS X2, X9, X2 // c4e231bbd2
+ VFMSUB231SS X11, X9, X2 // c4c231bbd3
+ VFMSUB231SS (BX), X9, X11 // c46231bb1b
+ VFMSUB231SS (R11), X9, X11 // c44231bb1b
+ VFMSUB231SS X2, X9, X11 // c46231bbda
+ VFMSUB231SS X11, X9, X11 // c44231bbdb
+ VFMSUBADD132PD (BX), X9, X2 // c4e2b19713
+ VFMSUBADD132PD (R11), X9, X2 // c4c2b19713
+ VFMSUBADD132PD X2, X9, X2 // c4e2b197d2
+ VFMSUBADD132PD X11, X9, X2 // c4c2b197d3
+ VFMSUBADD132PD (BX), X9, X11 // c462b1971b
+ VFMSUBADD132PD (R11), X9, X11 // c442b1971b
+ VFMSUBADD132PD X2, X9, X11 // c462b197da
+ VFMSUBADD132PD X11, X9, X11 // c442b197db
+ VFMSUBADD132PD (BX), Y15, Y2 // c4e2859713
+ VFMSUBADD132PD (R11), Y15, Y2 // c4c2859713
+ VFMSUBADD132PD Y2, Y15, Y2 // c4e28597d2
+ VFMSUBADD132PD Y11, Y15, Y2 // c4c28597d3
+ VFMSUBADD132PD (BX), Y15, Y11 // c46285971b
+ VFMSUBADD132PD (R11), Y15, Y11 // c44285971b
+ VFMSUBADD132PD Y2, Y15, Y11 // c4628597da
+ VFMSUBADD132PD Y11, Y15, Y11 // c4428597db
+ VFMSUBADD132PS (BX), X9, X2 // c4e2319713
+ VFMSUBADD132PS (R11), X9, X2 // c4c2319713
+ VFMSUBADD132PS X2, X9, X2 // c4e23197d2
+ VFMSUBADD132PS X11, X9, X2 // c4c23197d3
+ VFMSUBADD132PS (BX), X9, X11 // c46231971b
+ VFMSUBADD132PS (R11), X9, X11 // c44231971b
+ VFMSUBADD132PS X2, X9, X11 // c4623197da
+ VFMSUBADD132PS X11, X9, X11 // c4423197db
+ VFMSUBADD132PS (BX), Y15, Y2 // c4e2059713
+ VFMSUBADD132PS (R11), Y15, Y2 // c4c2059713
+ VFMSUBADD132PS Y2, Y15, Y2 // c4e20597d2
+ VFMSUBADD132PS Y11, Y15, Y2 // c4c20597d3
+ VFMSUBADD132PS (BX), Y15, Y11 // c46205971b
+ VFMSUBADD132PS (R11), Y15, Y11 // c44205971b
+ VFMSUBADD132PS Y2, Y15, Y11 // c4620597da
+ VFMSUBADD132PS Y11, Y15, Y11 // c4420597db
+ VFMSUBADD213PD (BX), X9, X2 // c4e2b1a713
+ VFMSUBADD213PD (R11), X9, X2 // c4c2b1a713
+ VFMSUBADD213PD X2, X9, X2 // c4e2b1a7d2
+ VFMSUBADD213PD X11, X9, X2 // c4c2b1a7d3
+ VFMSUBADD213PD (BX), X9, X11 // c462b1a71b
+ VFMSUBADD213PD (R11), X9, X11 // c442b1a71b
+ VFMSUBADD213PD X2, X9, X11 // c462b1a7da
+ VFMSUBADD213PD X11, X9, X11 // c442b1a7db
+ VFMSUBADD213PD (BX), Y15, Y2 // c4e285a713
+ VFMSUBADD213PD (R11), Y15, Y2 // c4c285a713
+ VFMSUBADD213PD Y2, Y15, Y2 // c4e285a7d2
+ VFMSUBADD213PD Y11, Y15, Y2 // c4c285a7d3
+ VFMSUBADD213PD (BX), Y15, Y11 // c46285a71b
+ VFMSUBADD213PD (R11), Y15, Y11 // c44285a71b
+ VFMSUBADD213PD Y2, Y15, Y11 // c46285a7da
+ VFMSUBADD213PD Y11, Y15, Y11 // c44285a7db
+ VFMSUBADD213PS (BX), X9, X2 // c4e231a713
+ VFMSUBADD213PS (R11), X9, X2 // c4c231a713
+ VFMSUBADD213PS X2, X9, X2 // c4e231a7d2
+ VFMSUBADD213PS X11, X9, X2 // c4c231a7d3
+ VFMSUBADD213PS (BX), X9, X11 // c46231a71b
+ VFMSUBADD213PS (R11), X9, X11 // c44231a71b
+ VFMSUBADD213PS X2, X9, X11 // c46231a7da
+ VFMSUBADD213PS X11, X9, X11 // c44231a7db
+ VFMSUBADD213PS (BX), Y15, Y2 // c4e205a713
+ VFMSUBADD213PS (R11), Y15, Y2 // c4c205a713
+ VFMSUBADD213PS Y2, Y15, Y2 // c4e205a7d2
+ VFMSUBADD213PS Y11, Y15, Y2 // c4c205a7d3
+ VFMSUBADD213PS (BX), Y15, Y11 // c46205a71b
+ VFMSUBADD213PS (R11), Y15, Y11 // c44205a71b
+ VFMSUBADD213PS Y2, Y15, Y11 // c46205a7da
+ VFMSUBADD213PS Y11, Y15, Y11 // c44205a7db
+ VFMSUBADD231PD (BX), X9, X2 // c4e2b1b713
+ VFMSUBADD231PD (R11), X9, X2 // c4c2b1b713
+ VFMSUBADD231PD X2, X9, X2 // c4e2b1b7d2
+ VFMSUBADD231PD X11, X9, X2 // c4c2b1b7d3
+ VFMSUBADD231PD (BX), X9, X11 // c462b1b71b
+ VFMSUBADD231PD (R11), X9, X11 // c442b1b71b
+ VFMSUBADD231PD X2, X9, X11 // c462b1b7da
+ VFMSUBADD231PD X11, X9, X11 // c442b1b7db
+ VFMSUBADD231PD (BX), Y15, Y2 // c4e285b713
+ VFMSUBADD231PD (R11), Y15, Y2 // c4c285b713
+ VFMSUBADD231PD Y2, Y15, Y2 // c4e285b7d2
+ VFMSUBADD231PD Y11, Y15, Y2 // c4c285b7d3
+ VFMSUBADD231PD (BX), Y15, Y11 // c46285b71b
+ VFMSUBADD231PD (R11), Y15, Y11 // c44285b71b
+ VFMSUBADD231PD Y2, Y15, Y11 // c46285b7da
+ VFMSUBADD231PD Y11, Y15, Y11 // c44285b7db
+ VFMSUBADD231PS (BX), X9, X2 // c4e231b713
+ VFMSUBADD231PS (R11), X9, X2 // c4c231b713
+ VFMSUBADD231PS X2, X9, X2 // c4e231b7d2
+ VFMSUBADD231PS X11, X9, X2 // c4c231b7d3
+ VFMSUBADD231PS (BX), X9, X11 // c46231b71b
+ VFMSUBADD231PS (R11), X9, X11 // c44231b71b
+ VFMSUBADD231PS X2, X9, X11 // c46231b7da
+ VFMSUBADD231PS X11, X9, X11 // c44231b7db
+ VFMSUBADD231PS (BX), Y15, Y2 // c4e205b713
+ VFMSUBADD231PS (R11), Y15, Y2 // c4c205b713
+ VFMSUBADD231PS Y2, Y15, Y2 // c4e205b7d2
+ VFMSUBADD231PS Y11, Y15, Y2 // c4c205b7d3
+ VFMSUBADD231PS (BX), Y15, Y11 // c46205b71b
+ VFMSUBADD231PS (R11), Y15, Y11 // c44205b71b
+ VFMSUBADD231PS Y2, Y15, Y11 // c46205b7da
+ VFMSUBADD231PS Y11, Y15, Y11 // c44205b7db
+ VFNMADD132PD (BX), X9, X2 // c4e2b19c13
+ VFNMADD132PD (R11), X9, X2 // c4c2b19c13
+ VFNMADD132PD X2, X9, X2 // c4e2b19cd2
+ VFNMADD132PD X11, X9, X2 // c4c2b19cd3
+ VFNMADD132PD (BX), X9, X11 // c462b19c1b
+ VFNMADD132PD (R11), X9, X11 // c442b19c1b
+ VFNMADD132PD X2, X9, X11 // c462b19cda
+ VFNMADD132PD X11, X9, X11 // c442b19cdb
+ VFNMADD132PD (BX), Y15, Y2 // c4e2859c13
+ VFNMADD132PD (R11), Y15, Y2 // c4c2859c13
+ VFNMADD132PD Y2, Y15, Y2 // c4e2859cd2
+ VFNMADD132PD Y11, Y15, Y2 // c4c2859cd3
+ VFNMADD132PD (BX), Y15, Y11 // c462859c1b
+ VFNMADD132PD (R11), Y15, Y11 // c442859c1b
+ VFNMADD132PD Y2, Y15, Y11 // c462859cda
+ VFNMADD132PD Y11, Y15, Y11 // c442859cdb
+ VFNMADD132PS (BX), X9, X2 // c4e2319c13
+ VFNMADD132PS (R11), X9, X2 // c4c2319c13
+ VFNMADD132PS X2, X9, X2 // c4e2319cd2
+ VFNMADD132PS X11, X9, X2 // c4c2319cd3
+ VFNMADD132PS (BX), X9, X11 // c462319c1b
+ VFNMADD132PS (R11), X9, X11 // c442319c1b
+ VFNMADD132PS X2, X9, X11 // c462319cda
+ VFNMADD132PS X11, X9, X11 // c442319cdb
+ VFNMADD132PS (BX), Y15, Y2 // c4e2059c13
+ VFNMADD132PS (R11), Y15, Y2 // c4c2059c13
+ VFNMADD132PS Y2, Y15, Y2 // c4e2059cd2
+ VFNMADD132PS Y11, Y15, Y2 // c4c2059cd3
+ VFNMADD132PS (BX), Y15, Y11 // c462059c1b
+ VFNMADD132PS (R11), Y15, Y11 // c442059c1b
+ VFNMADD132PS Y2, Y15, Y11 // c462059cda
+ VFNMADD132PS Y11, Y15, Y11 // c442059cdb
+ VFNMADD132SD (BX), X9, X2 // c4e2b19d13
+ VFNMADD132SD (R11), X9, X2 // c4c2b19d13
+ VFNMADD132SD X2, X9, X2 // c4e2b19dd2
+ VFNMADD132SD X11, X9, X2 // c4c2b19dd3
+ VFNMADD132SD (BX), X9, X11 // c462b19d1b
+ VFNMADD132SD (R11), X9, X11 // c442b19d1b
+ VFNMADD132SD X2, X9, X11 // c462b19dda
+ VFNMADD132SD X11, X9, X11 // c442b19ddb
+ VFNMADD132SS (BX), X9, X2 // c4e2319d13
+ VFNMADD132SS (R11), X9, X2 // c4c2319d13
+ VFNMADD132SS X2, X9, X2 // c4e2319dd2
+ VFNMADD132SS X11, X9, X2 // c4c2319dd3
+ VFNMADD132SS (BX), X9, X11 // c462319d1b
+ VFNMADD132SS (R11), X9, X11 // c442319d1b
+ VFNMADD132SS X2, X9, X11 // c462319dda
+ VFNMADD132SS X11, X9, X11 // c442319ddb
+ VFNMADD213PD (BX), X9, X2 // c4e2b1ac13
+ VFNMADD213PD (R11), X9, X2 // c4c2b1ac13
+ VFNMADD213PD X2, X9, X2 // c4e2b1acd2
+ VFNMADD213PD X11, X9, X2 // c4c2b1acd3
+ VFNMADD213PD (BX), X9, X11 // c462b1ac1b
+ VFNMADD213PD (R11), X9, X11 // c442b1ac1b
+ VFNMADD213PD X2, X9, X11 // c462b1acda
+ VFNMADD213PD X11, X9, X11 // c442b1acdb
+ VFNMADD213PD (BX), Y15, Y2 // c4e285ac13
+ VFNMADD213PD (R11), Y15, Y2 // c4c285ac13
+ VFNMADD213PD Y2, Y15, Y2 // c4e285acd2
+ VFNMADD213PD Y11, Y15, Y2 // c4c285acd3
+ VFNMADD213PD (BX), Y15, Y11 // c46285ac1b
+ VFNMADD213PD (R11), Y15, Y11 // c44285ac1b
+ VFNMADD213PD Y2, Y15, Y11 // c46285acda
+ VFNMADD213PD Y11, Y15, Y11 // c44285acdb
+ VFNMADD213PS (BX), X9, X2 // c4e231ac13
+ VFNMADD213PS (R11), X9, X2 // c4c231ac13
+ VFNMADD213PS X2, X9, X2 // c4e231acd2
+ VFNMADD213PS X11, X9, X2 // c4c231acd3
+ VFNMADD213PS (BX), X9, X11 // c46231ac1b
+ VFNMADD213PS (R11), X9, X11 // c44231ac1b
+ VFNMADD213PS X2, X9, X11 // c46231acda
+ VFNMADD213PS X11, X9, X11 // c44231acdb
+ VFNMADD213PS (BX), Y15, Y2 // c4e205ac13
+ VFNMADD213PS (R11), Y15, Y2 // c4c205ac13
+ VFNMADD213PS Y2, Y15, Y2 // c4e205acd2
+ VFNMADD213PS Y11, Y15, Y2 // c4c205acd3
+ VFNMADD213PS (BX), Y15, Y11 // c46205ac1b
+ VFNMADD213PS (R11), Y15, Y11 // c44205ac1b
+ VFNMADD213PS Y2, Y15, Y11 // c46205acda
+ VFNMADD213PS Y11, Y15, Y11 // c44205acdb
+ VFNMADD213SD (BX), X9, X2 // c4e2b1ad13
+ VFNMADD213SD (R11), X9, X2 // c4c2b1ad13
+ VFNMADD213SD X2, X9, X2 // c4e2b1add2
+ VFNMADD213SD X11, X9, X2 // c4c2b1add3
+ VFNMADD213SD (BX), X9, X11 // c462b1ad1b
+ VFNMADD213SD (R11), X9, X11 // c442b1ad1b
+ VFNMADD213SD X2, X9, X11 // c462b1adda
+ VFNMADD213SD X11, X9, X11 // c442b1addb
+ VFNMADD213SS (BX), X9, X2 // c4e231ad13
+ VFNMADD213SS (R11), X9, X2 // c4c231ad13
+ VFNMADD213SS X2, X9, X2 // c4e231add2
+ VFNMADD213SS X11, X9, X2 // c4c231add3
+ VFNMADD213SS (BX), X9, X11 // c46231ad1b
+ VFNMADD213SS (R11), X9, X11 // c44231ad1b
+ VFNMADD213SS X2, X9, X11 // c46231adda
+ VFNMADD213SS X11, X9, X11 // c44231addb
+ VFNMADD231PD (BX), X9, X2 // c4e2b1bc13
+ VFNMADD231PD (R11), X9, X2 // c4c2b1bc13
+ VFNMADD231PD X2, X9, X2 // c4e2b1bcd2
+ VFNMADD231PD X11, X9, X2 // c4c2b1bcd3
+ VFNMADD231PD (BX), X9, X11 // c462b1bc1b
+ VFNMADD231PD (R11), X9, X11 // c442b1bc1b
+ VFNMADD231PD X2, X9, X11 // c462b1bcda
+ VFNMADD231PD X11, X9, X11 // c442b1bcdb
+ VFNMADD231PD (BX), Y15, Y2 // c4e285bc13
+ VFNMADD231PD (R11), Y15, Y2 // c4c285bc13
+ VFNMADD231PD Y2, Y15, Y2 // c4e285bcd2
+ VFNMADD231PD Y11, Y15, Y2 // c4c285bcd3
+ VFNMADD231PD (BX), Y15, Y11 // c46285bc1b
+ VFNMADD231PD (R11), Y15, Y11 // c44285bc1b
+ VFNMADD231PD Y2, Y15, Y11 // c46285bcda
+ VFNMADD231PD Y11, Y15, Y11 // c44285bcdb
+ VFNMADD231PS (BX), X9, X2 // c4e231bc13
+ VFNMADD231PS (R11), X9, X2 // c4c231bc13
+ VFNMADD231PS X2, X9, X2 // c4e231bcd2
+ VFNMADD231PS X11, X9, X2 // c4c231bcd3
+ VFNMADD231PS (BX), X9, X11 // c46231bc1b
+ VFNMADD231PS (R11), X9, X11 // c44231bc1b
+ VFNMADD231PS X2, X9, X11 // c46231bcda
+ VFNMADD231PS X11, X9, X11 // c44231bcdb
+ VFNMADD231PS (BX), Y15, Y2 // c4e205bc13
+ VFNMADD231PS (R11), Y15, Y2 // c4c205bc13
+ VFNMADD231PS Y2, Y15, Y2 // c4e205bcd2
+ VFNMADD231PS Y11, Y15, Y2 // c4c205bcd3
+ VFNMADD231PS (BX), Y15, Y11 // c46205bc1b
+ VFNMADD231PS (R11), Y15, Y11 // c44205bc1b
+ VFNMADD231PS Y2, Y15, Y11 // c46205bcda
+ VFNMADD231PS Y11, Y15, Y11 // c44205bcdb
+ VFNMADD231SD (BX), X9, X2 // c4e2b1bd13
+ VFNMADD231SD (R11), X9, X2 // c4c2b1bd13
+ VFNMADD231SD X2, X9, X2 // c4e2b1bdd2
+ VFNMADD231SD X11, X9, X2 // c4c2b1bdd3
+ VFNMADD231SD (BX), X9, X11 // c462b1bd1b
+ VFNMADD231SD (R11), X9, X11 // c442b1bd1b
+ VFNMADD231SD X2, X9, X11 // c462b1bdda
+ VFNMADD231SD X11, X9, X11 // c442b1bddb
+ VFNMADD231SS (BX), X9, X2 // c4e231bd13
+ VFNMADD231SS (R11), X9, X2 // c4c231bd13
+ VFNMADD231SS X2, X9, X2 // c4e231bdd2
+ VFNMADD231SS X11, X9, X2 // c4c231bdd3
+ VFNMADD231SS (BX), X9, X11 // c46231bd1b
+ VFNMADD231SS (R11), X9, X11 // c44231bd1b
+ VFNMADD231SS X2, X9, X11 // c46231bdda
+ VFNMADD231SS X11, X9, X11 // c44231bddb
+ VFNMSUB132PD (BX), X9, X2 // c4e2b19e13
+ VFNMSUB132PD (R11), X9, X2 // c4c2b19e13
+ VFNMSUB132PD X2, X9, X2 // c4e2b19ed2
+ VFNMSUB132PD X11, X9, X2 // c4c2b19ed3
+ VFNMSUB132PD (BX), X9, X11 // c462b19e1b
+ VFNMSUB132PD (R11), X9, X11 // c442b19e1b
+ VFNMSUB132PD X2, X9, X11 // c462b19eda
+ VFNMSUB132PD X11, X9, X11 // c442b19edb
+ VFNMSUB132PD (BX), Y15, Y2 // c4e2859e13
+ VFNMSUB132PD (R11), Y15, Y2 // c4c2859e13
+ VFNMSUB132PD Y2, Y15, Y2 // c4e2859ed2
+ VFNMSUB132PD Y11, Y15, Y2 // c4c2859ed3
+ VFNMSUB132PD (BX), Y15, Y11 // c462859e1b
+ VFNMSUB132PD (R11), Y15, Y11 // c442859e1b
+ VFNMSUB132PD Y2, Y15, Y11 // c462859eda
+ VFNMSUB132PD Y11, Y15, Y11 // c442859edb
+ VFNMSUB132PS (BX), X9, X2 // c4e2319e13
+ VFNMSUB132PS (R11), X9, X2 // c4c2319e13
+ VFNMSUB132PS X2, X9, X2 // c4e2319ed2
+ VFNMSUB132PS X11, X9, X2 // c4c2319ed3
+ VFNMSUB132PS (BX), X9, X11 // c462319e1b
+ VFNMSUB132PS (R11), X9, X11 // c442319e1b
+ VFNMSUB132PS X2, X9, X11 // c462319eda
+ VFNMSUB132PS X11, X9, X11 // c442319edb
+ VFNMSUB132PS (BX), Y15, Y2 // c4e2059e13
+ VFNMSUB132PS (R11), Y15, Y2 // c4c2059e13
+ VFNMSUB132PS Y2, Y15, Y2 // c4e2059ed2
+ VFNMSUB132PS Y11, Y15, Y2 // c4c2059ed3
+ VFNMSUB132PS (BX), Y15, Y11 // c462059e1b
+ VFNMSUB132PS (R11), Y15, Y11 // c442059e1b
+ VFNMSUB132PS Y2, Y15, Y11 // c462059eda
+ VFNMSUB132PS Y11, Y15, Y11 // c442059edb
+ VFNMSUB132SD (BX), X9, X2 // c4e2b19f13
+ VFNMSUB132SD (R11), X9, X2 // c4c2b19f13
+ VFNMSUB132SD X2, X9, X2 // c4e2b19fd2
+ VFNMSUB132SD X11, X9, X2 // c4c2b19fd3
+ VFNMSUB132SD (BX), X9, X11 // c462b19f1b
+ VFNMSUB132SD (R11), X9, X11 // c442b19f1b
+ VFNMSUB132SD X2, X9, X11 // c462b19fda
+ VFNMSUB132SD X11, X9, X11 // c442b19fdb
+ VFNMSUB132SS (BX), X9, X2 // c4e2319f13
+ VFNMSUB132SS (R11), X9, X2 // c4c2319f13
+ VFNMSUB132SS X2, X9, X2 // c4e2319fd2
+ VFNMSUB132SS X11, X9, X2 // c4c2319fd3
+ VFNMSUB132SS (BX), X9, X11 // c462319f1b
+ VFNMSUB132SS (R11), X9, X11 // c442319f1b
+ VFNMSUB132SS X2, X9, X11 // c462319fda
+ VFNMSUB132SS X11, X9, X11 // c442319fdb
+ VFNMSUB213PD (BX), X9, X2 // c4e2b1ae13
+ VFNMSUB213PD (R11), X9, X2 // c4c2b1ae13
+ VFNMSUB213PD X2, X9, X2 // c4e2b1aed2
+ VFNMSUB213PD X11, X9, X2 // c4c2b1aed3
+ VFNMSUB213PD (BX), X9, X11 // c462b1ae1b
+ VFNMSUB213PD (R11), X9, X11 // c442b1ae1b
+ VFNMSUB213PD X2, X9, X11 // c462b1aeda
+ VFNMSUB213PD X11, X9, X11 // c442b1aedb
+ VFNMSUB213PD (BX), Y15, Y2 // c4e285ae13
+ VFNMSUB213PD (R11), Y15, Y2 // c4c285ae13
+ VFNMSUB213PD Y2, Y15, Y2 // c4e285aed2
+ VFNMSUB213PD Y11, Y15, Y2 // c4c285aed3
+ VFNMSUB213PD (BX), Y15, Y11 // c46285ae1b
+ VFNMSUB213PD (R11), Y15, Y11 // c44285ae1b
+ VFNMSUB213PD Y2, Y15, Y11 // c46285aeda
+ VFNMSUB213PD Y11, Y15, Y11 // c44285aedb
+ VFNMSUB213PS (BX), X9, X2 // c4e231ae13
+ VFNMSUB213PS (R11), X9, X2 // c4c231ae13
+ VFNMSUB213PS X2, X9, X2 // c4e231aed2
+ VFNMSUB213PS X11, X9, X2 // c4c231aed3
+ VFNMSUB213PS (BX), X9, X11 // c46231ae1b
+ VFNMSUB213PS (R11), X9, X11 // c44231ae1b
+ VFNMSUB213PS X2, X9, X11 // c46231aeda
+ VFNMSUB213PS X11, X9, X11 // c44231aedb
+ VFNMSUB213PS (BX), Y15, Y2 // c4e205ae13
+ VFNMSUB213PS (R11), Y15, Y2 // c4c205ae13
+ VFNMSUB213PS Y2, Y15, Y2 // c4e205aed2
+ VFNMSUB213PS Y11, Y15, Y2 // c4c205aed3
+ VFNMSUB213PS (BX), Y15, Y11 // c46205ae1b
+ VFNMSUB213PS (R11), Y15, Y11 // c44205ae1b
+ VFNMSUB213PS Y2, Y15, Y11 // c46205aeda
+ VFNMSUB213PS Y11, Y15, Y11 // c44205aedb
+ VFNMSUB213SD (BX), X9, X2 // c4e2b1af13
+ VFNMSUB213SD (R11), X9, X2 // c4c2b1af13
+ VFNMSUB213SD X2, X9, X2 // c4e2b1afd2
+ VFNMSUB213SD X11, X9, X2 // c4c2b1afd3
+ VFNMSUB213SD (BX), X9, X11 // c462b1af1b
+ VFNMSUB213SD (R11), X9, X11 // c442b1af1b
+ VFNMSUB213SD X2, X9, X11 // c462b1afda
+ VFNMSUB213SD X11, X9, X11 // c442b1afdb
+ VFNMSUB213SS (BX), X9, X2 // c4e231af13
+ VFNMSUB213SS (R11), X9, X2 // c4c231af13
+ VFNMSUB213SS X2, X9, X2 // c4e231afd2
+ VFNMSUB213SS X11, X9, X2 // c4c231afd3
+ VFNMSUB213SS (BX), X9, X11 // c46231af1b
+ VFNMSUB213SS (R11), X9, X11 // c44231af1b
+ VFNMSUB213SS X2, X9, X11 // c46231afda
+ VFNMSUB213SS X11, X9, X11 // c44231afdb
+ VFNMSUB231PD (BX), X9, X2 // c4e2b1be13
+ VFNMSUB231PD (R11), X9, X2 // c4c2b1be13
+ VFNMSUB231PD X2, X9, X2 // c4e2b1bed2
+ VFNMSUB231PD X11, X9, X2 // c4c2b1bed3
+ VFNMSUB231PD (BX), X9, X11 // c462b1be1b
+ VFNMSUB231PD (R11), X9, X11 // c442b1be1b
+ VFNMSUB231PD X2, X9, X11 // c462b1beda
+ VFNMSUB231PD X11, X9, X11 // c442b1bedb
+ VFNMSUB231PD (BX), Y15, Y2 // c4e285be13
+ VFNMSUB231PD (R11), Y15, Y2 // c4c285be13
+ VFNMSUB231PD Y2, Y15, Y2 // c4e285bed2
+ VFNMSUB231PD Y11, Y15, Y2 // c4c285bed3
+ VFNMSUB231PD (BX), Y15, Y11 // c46285be1b
+ VFNMSUB231PD (R11), Y15, Y11 // c44285be1b
+ VFNMSUB231PD Y2, Y15, Y11 // c46285beda
+ VFNMSUB231PD Y11, Y15, Y11 // c44285bedb
+ VFNMSUB231PS (BX), X9, X2 // c4e231be13
+ VFNMSUB231PS (R11), X9, X2 // c4c231be13
+ VFNMSUB231PS X2, X9, X2 // c4e231bed2
+ VFNMSUB231PS X11, X9, X2 // c4c231bed3
+ VFNMSUB231PS (BX), X9, X11 // c46231be1b
+ VFNMSUB231PS (R11), X9, X11 // c44231be1b
+ VFNMSUB231PS X2, X9, X11 // c46231beda
+ VFNMSUB231PS X11, X9, X11 // c44231bedb
+ VFNMSUB231PS (BX), Y15, Y2 // c4e205be13
+ VFNMSUB231PS (R11), Y15, Y2 // c4c205be13
+ VFNMSUB231PS Y2, Y15, Y2 // c4e205bed2
+ VFNMSUB231PS Y11, Y15, Y2 // c4c205bed3
+ VFNMSUB231PS (BX), Y15, Y11 // c46205be1b
+ VFNMSUB231PS (R11), Y15, Y11 // c44205be1b
+ VFNMSUB231PS Y2, Y15, Y11 // c46205beda
+ VFNMSUB231PS Y11, Y15, Y11 // c44205bedb
+ VFNMSUB231SD (BX), X9, X2 // c4e2b1bf13
+ VFNMSUB231SD (R11), X9, X2 // c4c2b1bf13
+ VFNMSUB231SD X2, X9, X2 // c4e2b1bfd2
+ VFNMSUB231SD X11, X9, X2 // c4c2b1bfd3
+ VFNMSUB231SD (BX), X9, X11 // c462b1bf1b
+ VFNMSUB231SD (R11), X9, X11 // c442b1bf1b
+ VFNMSUB231SD X2, X9, X11 // c462b1bfda
+ VFNMSUB231SD X11, X9, X11 // c442b1bfdb
+ VFNMSUB231SS (BX), X9, X2 // c4e231bf13
+ VFNMSUB231SS (R11), X9, X2 // c4c231bf13
+ VFNMSUB231SS X2, X9, X2 // c4e231bfd2
+ VFNMSUB231SS X11, X9, X2 // c4c231bfd3
+ VFNMSUB231SS (BX), X9, X11 // c46231bf1b
+ VFNMSUB231SS (R11), X9, X11 // c44231bf1b
+ VFNMSUB231SS X2, X9, X11 // c46231bfda
+ VFNMSUB231SS X11, X9, X11 // c44231bfdb
+ VHADDPD (BX), X9, X2 // c4e1317c13 or c5b17c13
+ VHADDPD (R11), X9, X2 // c4c1317c13
+ VHADDPD X2, X9, X2 // c4e1317cd2 or c5b17cd2
+ VHADDPD X11, X9, X2 // c4c1317cd3
+ VHADDPD (BX), X9, X11 // c461317c1b or c5317c1b
+ VHADDPD (R11), X9, X11 // c441317c1b
+ VHADDPD X2, X9, X11 // c461317cda or c5317cda
+ VHADDPD X11, X9, X11 // c441317cdb
+ VHADDPD (BX), Y15, Y2 // c4e1057c13 or c5857c13
+ VHADDPD (R11), Y15, Y2 // c4c1057c13
+ VHADDPD Y2, Y15, Y2 // c4e1057cd2 or c5857cd2
+ VHADDPD Y11, Y15, Y2 // c4c1057cd3
+ VHADDPD (BX), Y15, Y11 // c461057c1b or c5057c1b
+ VHADDPD (R11), Y15, Y11 // c441057c1b
+ VHADDPD Y2, Y15, Y11 // c461057cda or c5057cda
+ VHADDPD Y11, Y15, Y11 // c441057cdb
+ VHADDPS (BX), X9, X2 // c4e1337c13 or c5b37c13
+ VHADDPS (R11), X9, X2 // c4c1337c13
+ VHADDPS X2, X9, X2 // c4e1337cd2 or c5b37cd2
+ VHADDPS X11, X9, X2 // c4c1337cd3
+ VHADDPS (BX), X9, X11 // c461337c1b or c5337c1b
+ VHADDPS (R11), X9, X11 // c441337c1b
+ VHADDPS X2, X9, X11 // c461337cda or c5337cda
+ VHADDPS X11, X9, X11 // c441337cdb
+ VHADDPS (BX), Y15, Y2 // c4e1077c13 or c5877c13
+ VHADDPS (R11), Y15, Y2 // c4c1077c13
+ VHADDPS Y2, Y15, Y2 // c4e1077cd2 or c5877cd2
+ VHADDPS Y11, Y15, Y2 // c4c1077cd3
+ VHADDPS (BX), Y15, Y11 // c461077c1b or c5077c1b
+ VHADDPS (R11), Y15, Y11 // c441077c1b
+ VHADDPS Y2, Y15, Y11 // c461077cda or c5077cda
+ VHADDPS Y11, Y15, Y11 // c441077cdb
+ VHSUBPD (BX), X9, X2 // c4e1317d13 or c5b17d13
+ VHSUBPD (R11), X9, X2 // c4c1317d13
+ VHSUBPD X2, X9, X2 // c4e1317dd2 or c5b17dd2
+ VHSUBPD X11, X9, X2 // c4c1317dd3
+ VHSUBPD (BX), X9, X11 // c461317d1b or c5317d1b
+ VHSUBPD (R11), X9, X11 // c441317d1b
+ VHSUBPD X2, X9, X11 // c461317dda or c5317dda
+ VHSUBPD X11, X9, X11 // c441317ddb
+ VHSUBPD (BX), Y15, Y2 // c4e1057d13 or c5857d13
+ VHSUBPD (R11), Y15, Y2 // c4c1057d13
+ VHSUBPD Y2, Y15, Y2 // c4e1057dd2 or c5857dd2
+ VHSUBPD Y11, Y15, Y2 // c4c1057dd3
+ VHSUBPD (BX), Y15, Y11 // c461057d1b or c5057d1b
+ VHSUBPD (R11), Y15, Y11 // c441057d1b
+ VHSUBPD Y2, Y15, Y11 // c461057dda or c5057dda
+ VHSUBPD Y11, Y15, Y11 // c441057ddb
+ VHSUBPS (BX), X9, X2 // c4e1337d13 or c5b37d13
+ VHSUBPS (R11), X9, X2 // c4c1337d13
+ VHSUBPS X2, X9, X2 // c4e1337dd2 or c5b37dd2
+ VHSUBPS X11, X9, X2 // c4c1337dd3
+ VHSUBPS (BX), X9, X11 // c461337d1b or c5337d1b
+ VHSUBPS (R11), X9, X11 // c441337d1b
+ VHSUBPS X2, X9, X11 // c461337dda or c5337dda
+ VHSUBPS X11, X9, X11 // c441337ddb
+ VHSUBPS (BX), Y15, Y2 // c4e1077d13 or c5877d13
+ VHSUBPS (R11), Y15, Y2 // c4c1077d13
+ VHSUBPS Y2, Y15, Y2 // c4e1077dd2 or c5877dd2
+ VHSUBPS Y11, Y15, Y2 // c4c1077dd3
+ VHSUBPS (BX), Y15, Y11 // c461077d1b or c5077d1b
+ VHSUBPS (R11), Y15, Y11 // c441077d1b
+ VHSUBPS Y2, Y15, Y11 // c461077dda or c5077dda
+ VHSUBPS Y11, Y15, Y11 // c441077ddb
+ VINSERTF128 $7, (BX), Y15, Y2 // c4e305181307
+ VINSERTF128 $7, (R11), Y15, Y2 // c4c305181307
+ VINSERTF128 $7, X2, Y15, Y2 // c4e30518d207
+ VINSERTF128 $7, X11, Y15, Y2 // c4c30518d307
+ VINSERTF128 $7, (BX), Y15, Y11 // c46305181b07
+ VINSERTF128 $7, (R11), Y15, Y11 // c44305181b07
+ VINSERTF128 $7, X2, Y15, Y11 // c4630518da07
+ VINSERTF128 $7, X11, Y15, Y11 // c4430518db07
+ VINSERTI128 $7, (BX), Y15, Y2 // c4e305381307
+ VINSERTI128 $7, (R11), Y15, Y2 // c4c305381307
+ VINSERTI128 $7, X2, Y15, Y2 // c4e30538d207
+ VINSERTI128 $7, X11, Y15, Y2 // c4c30538d307
+ VINSERTI128 $7, (BX), Y15, Y11 // c46305381b07
+ VINSERTI128 $7, (R11), Y15, Y11 // c44305381b07
+ VINSERTI128 $7, X2, Y15, Y11 // c4630538da07
+ VINSERTI128 $7, X11, Y15, Y11 // c4430538db07
+ VINSERTPS $7, (BX), X9, X2 // c4e331211307
+ VINSERTPS $7, (R11), X9, X2 // c4c331211307
+ VINSERTPS $7, X2, X9, X2 // c4e33121d207
+ VINSERTPS $7, X11, X9, X2 // c4c33121d307
+ VINSERTPS $7, (BX), X9, X11 // c46331211b07
+ VINSERTPS $7, (R11), X9, X11 // c44331211b07
+ VINSERTPS $7, X2, X9, X11 // c4633121da07
+ VINSERTPS $7, X11, X9, X11 // c4433121db07
+ VLDDQU (BX), X2 // c4e17bf013 or c5fbf013
+ VLDDQU (R11), X2 // c4c17bf013
+ VLDDQU (BX), X11 // c4617bf01b or c57bf01b
+ VLDDQU (R11), X11 // c4417bf01b
+ VLDDQU (BX), Y2 // c4e17ff013 or c5fff013
+ VLDDQU (R11), Y2 // c4c17ff013
+ VLDDQU (BX), Y11 // c4617ff01b or c57ff01b
+ VLDDQU (R11), Y11 // c4417ff01b
+ VLDMXCSR (BX) // c4e178ae13 or c5f8ae13
+ VLDMXCSR (R11) // c4c178ae13
+ VMASKMOVDQU X2, X2 // c4e179f7d2 or c5f9f7d2
+ VMASKMOVDQU X11, X2 // c4c179f7d3
+ VMASKMOVDQU X2, X11 // c46179f7da or c579f7da
+ VMASKMOVDQU X11, X11 // c44179f7db
+ VMASKMOVPD X2, X9, (BX) // c4e2312f13
+ VMASKMOVPD X11, X9, (BX) // c462312f1b
+ VMASKMOVPD X2, X9, (R11) // c4c2312f13
+ VMASKMOVPD X11, X9, (R11) // c442312f1b
+ VMASKMOVPD Y2, Y15, (BX) // c4e2052f13
+ VMASKMOVPD Y11, Y15, (BX) // c462052f1b
+ VMASKMOVPD Y2, Y15, (R11) // c4c2052f13
+ VMASKMOVPD Y11, Y15, (R11) // c442052f1b
+ VMASKMOVPD (BX), X9, X2 // c4e2312d13
+ VMASKMOVPD (R11), X9, X2 // c4c2312d13
+ VMASKMOVPD (BX), X9, X11 // c462312d1b
+ VMASKMOVPD (R11), X9, X11 // c442312d1b
+ VMASKMOVPD (BX), Y15, Y2 // c4e2052d13
+ VMASKMOVPD (R11), Y15, Y2 // c4c2052d13
+ VMASKMOVPD (BX), Y15, Y11 // c462052d1b
+ VMASKMOVPD (R11), Y15, Y11 // c442052d1b
+ VMASKMOVPS X2, X9, (BX) // c4e2312e13
+ VMASKMOVPS X11, X9, (BX) // c462312e1b
+ VMASKMOVPS X2, X9, (R11) // c4c2312e13
+ VMASKMOVPS X11, X9, (R11) // c442312e1b
+ VMASKMOVPS Y2, Y15, (BX) // c4e2052e13
+ VMASKMOVPS Y11, Y15, (BX) // c462052e1b
+ VMASKMOVPS Y2, Y15, (R11) // c4c2052e13
+ VMASKMOVPS Y11, Y15, (R11) // c442052e1b
+ VMASKMOVPS (BX), X9, X2 // c4e2312c13
+ VMASKMOVPS (R11), X9, X2 // c4c2312c13
+ VMASKMOVPS (BX), X9, X11 // c462312c1b
+ VMASKMOVPS (R11), X9, X11 // c442312c1b
+ VMASKMOVPS (BX), Y15, Y2 // c4e2052c13
+ VMASKMOVPS (R11), Y15, Y2 // c4c2052c13
+ VMASKMOVPS (BX), Y15, Y11 // c462052c1b
+ VMASKMOVPS (R11), Y15, Y11 // c442052c1b
+ VMAXPD (BX), X9, X2 // c4e1315f13 or c5b15f13
+ VMAXPD (R11), X9, X2 // c4c1315f13
+ VMAXPD X2, X9, X2 // c4e1315fd2 or c5b15fd2
+ VMAXPD X11, X9, X2 // c4c1315fd3
+ VMAXPD (BX), X9, X11 // c461315f1b or c5315f1b
+ VMAXPD (R11), X9, X11 // c441315f1b
+ VMAXPD X2, X9, X11 // c461315fda or c5315fda
+ VMAXPD X11, X9, X11 // c441315fdb
+ VMAXPD (BX), Y15, Y2 // c4e1055f13 or c5855f13
+ VMAXPD (R11), Y15, Y2 // c4c1055f13
+ VMAXPD Y2, Y15, Y2 // c4e1055fd2 or c5855fd2
+ VMAXPD Y11, Y15, Y2 // c4c1055fd3
+ VMAXPD (BX), Y15, Y11 // c461055f1b or c5055f1b
+ VMAXPD (R11), Y15, Y11 // c441055f1b
+ VMAXPD Y2, Y15, Y11 // c461055fda or c5055fda
+ VMAXPD Y11, Y15, Y11 // c441055fdb
+ VMAXPS (BX), X9, X2 // c4e1305f13 or c5b05f13
+ VMAXPS (R11), X9, X2 // c4c1305f13
+ VMAXPS X2, X9, X2 // c4e1305fd2 or c5b05fd2
+ VMAXPS X11, X9, X2 // c4c1305fd3
+ VMAXPS (BX), X9, X11 // c461305f1b or c5305f1b
+ VMAXPS (R11), X9, X11 // c441305f1b
+ VMAXPS X2, X9, X11 // c461305fda or c5305fda
+ VMAXPS X11, X9, X11 // c441305fdb
+ VMAXPS (BX), Y15, Y2 // c4e1045f13 or c5845f13
+ VMAXPS (R11), Y15, Y2 // c4c1045f13
+ VMAXPS Y2, Y15, Y2 // c4e1045fd2 or c5845fd2
+ VMAXPS Y11, Y15, Y2 // c4c1045fd3
+ VMAXPS (BX), Y15, Y11 // c461045f1b or c5045f1b
+ VMAXPS (R11), Y15, Y11 // c441045f1b
+ VMAXPS Y2, Y15, Y11 // c461045fda or c5045fda
+ VMAXPS Y11, Y15, Y11 // c441045fdb
+ VMAXSD (BX), X9, X2 // c4e1335f13 or c5b35f13
+ VMAXSD (R11), X9, X2 // c4c1335f13
+ VMAXSD X2, X9, X2 // c4e1335fd2 or c5b35fd2
+ VMAXSD X11, X9, X2 // c4c1335fd3
+ VMAXSD (BX), X9, X11 // c461335f1b or c5335f1b
+ VMAXSD (R11), X9, X11 // c441335f1b
+ VMAXSD X2, X9, X11 // c461335fda or c5335fda
+ VMAXSD X11, X9, X11 // c441335fdb
+ VMAXSS (BX), X9, X2 // c4e1325f13 or c5b25f13
+ VMAXSS (R11), X9, X2 // c4c1325f13
+ VMAXSS X2, X9, X2 // c4e1325fd2 or c5b25fd2
+ VMAXSS X11, X9, X2 // c4c1325fd3
+ VMAXSS (BX), X9, X11 // c461325f1b or c5325f1b
+ VMAXSS (R11), X9, X11 // c441325f1b
+ VMAXSS X2, X9, X11 // c461325fda or c5325fda
+ VMAXSS X11, X9, X11 // c441325fdb
+ VMINPD (BX), X9, X2 // c4e1315d13 or c5b15d13
+ VMINPD (R11), X9, X2 // c4c1315d13
+ VMINPD X2, X9, X2 // c4e1315dd2 or c5b15dd2
+ VMINPD X11, X9, X2 // c4c1315dd3
+ VMINPD (BX), X9, X11 // c461315d1b or c5315d1b
+ VMINPD (R11), X9, X11 // c441315d1b
+ VMINPD X2, X9, X11 // c461315dda or c5315dda
+ VMINPD X11, X9, X11 // c441315ddb
+ VMINPD (BX), Y15, Y2 // c4e1055d13 or c5855d13
+ VMINPD (R11), Y15, Y2 // c4c1055d13
+ VMINPD Y2, Y15, Y2 // c4e1055dd2 or c5855dd2
+ VMINPD Y11, Y15, Y2 // c4c1055dd3
+ VMINPD (BX), Y15, Y11 // c461055d1b or c5055d1b
+ VMINPD (R11), Y15, Y11 // c441055d1b
+ VMINPD Y2, Y15, Y11 // c461055dda or c5055dda
+ VMINPD Y11, Y15, Y11 // c441055ddb
+ VMINPS (BX), X9, X2 // c4e1305d13 or c5b05d13
+ VMINPS (R11), X9, X2 // c4c1305d13
+ VMINPS X2, X9, X2 // c4e1305dd2 or c5b05dd2
+ VMINPS X11, X9, X2 // c4c1305dd3
+ VMINPS (BX), X9, X11 // c461305d1b or c5305d1b
+ VMINPS (R11), X9, X11 // c441305d1b
+ VMINPS X2, X9, X11 // c461305dda or c5305dda
+ VMINPS X11, X9, X11 // c441305ddb
+ VMINPS (BX), Y15, Y2 // c4e1045d13 or c5845d13
+ VMINPS (R11), Y15, Y2 // c4c1045d13
+ VMINPS Y2, Y15, Y2 // c4e1045dd2 or c5845dd2
+ VMINPS Y11, Y15, Y2 // c4c1045dd3
+ VMINPS (BX), Y15, Y11 // c461045d1b or c5045d1b
+ VMINPS (R11), Y15, Y11 // c441045d1b
+ VMINPS Y2, Y15, Y11 // c461045dda or c5045dda
+ VMINPS Y11, Y15, Y11 // c441045ddb
+ VMINSD (BX), X9, X2 // c4e1335d13 or c5b35d13
+ VMINSD (R11), X9, X2 // c4c1335d13
+ VMINSD X2, X9, X2 // c4e1335dd2 or c5b35dd2
+ VMINSD X11, X9, X2 // c4c1335dd3
+ VMINSD (BX), X9, X11 // c461335d1b or c5335d1b
+ VMINSD (R11), X9, X11 // c441335d1b
+ VMINSD X2, X9, X11 // c461335dda or c5335dda
+ VMINSD X11, X9, X11 // c441335ddb
+ VMINSS (BX), X9, X2 // c4e1325d13 or c5b25d13
+ VMINSS (R11), X9, X2 // c4c1325d13
+ VMINSS X2, X9, X2 // c4e1325dd2 or c5b25dd2
+ VMINSS X11, X9, X2 // c4c1325dd3
+ VMINSS (BX), X9, X11 // c461325d1b or c5325d1b
+ VMINSS (R11), X9, X11 // c441325d1b
+ VMINSS X2, X9, X11 // c461325dda or c5325dda
+ VMINSS X11, X9, X11 // c441325ddb
+ VMOVAPD (BX), X2 // c4e1792813 or c5f92813
+ VMOVAPD (R11), X2 // c4c1792813
+ VMOVAPD X2, X2 // c4e17928d2 or c5f928d2 or c4e17929d2 or c5f929d2
+ VMOVAPD X11, X2 // c4c17928d3 or c4617929da or c57929da
+ VMOVAPD (BX), X11 // c46179281b or c579281b
+ VMOVAPD (R11), X11 // c44179281b
+ VMOVAPD X2, X11 // c4617928da or c57928da or c4c17929d3
+ VMOVAPD X11, X11 // c4417928db or c4417929db
+ VMOVAPD X2, (BX) // c4e1792913 or c5f92913
+ VMOVAPD X11, (BX) // c46179291b or c579291b
+ VMOVAPD X2, (R11) // c4c1792913
+ VMOVAPD X11, (R11) // c44179291b
+ VMOVAPD (BX), Y2 // c4e17d2813 or c5fd2813
+ VMOVAPD (R11), Y2 // c4c17d2813
+ VMOVAPD Y2, Y2 // c4e17d28d2 or c5fd28d2 or c4e17d29d2 or c5fd29d2
+ VMOVAPD Y11, Y2 // c4c17d28d3 or c4617d29da or c57d29da
+ VMOVAPD (BX), Y11 // c4617d281b or c57d281b
+ VMOVAPD (R11), Y11 // c4417d281b
+ VMOVAPD Y2, Y11 // c4617d28da or c57d28da or c4c17d29d3
+ VMOVAPD Y11, Y11 // c4417d28db or c4417d29db
+ VMOVAPD Y2, (BX) // c4e17d2913 or c5fd2913
+ VMOVAPD Y11, (BX) // c4617d291b or c57d291b
+ VMOVAPD Y2, (R11) // c4c17d2913
+ VMOVAPD Y11, (R11) // c4417d291b
+ VMOVAPS (BX), X2 // c4e1782813 or c5f82813
+ VMOVAPS (R11), X2 // c4c1782813
+ VMOVAPS X2, X2 // c4e17828d2 or c5f828d2 or c4e17829d2 or c5f829d2
+ VMOVAPS X11, X2 // c4c17828d3 or c4617829da or c57829da
+ VMOVAPS (BX), X11 // c46178281b or c578281b
+ VMOVAPS (R11), X11 // c44178281b
+ VMOVAPS X2, X11 // c4617828da or c57828da or c4c17829d3
+ VMOVAPS X11, X11 // c4417828db or c4417829db
+ VMOVAPS X2, (BX) // c4e1782913 or c5f82913
+ VMOVAPS X11, (BX) // c46178291b or c578291b
+ VMOVAPS X2, (R11) // c4c1782913
+ VMOVAPS X11, (R11) // c44178291b
+ VMOVAPS (BX), Y2 // c4e17c2813 or c5fc2813
+ VMOVAPS (R11), Y2 // c4c17c2813
+ VMOVAPS Y2, Y2 // c4e17c28d2 or c5fc28d2 or c4e17c29d2 or c5fc29d2
+ VMOVAPS Y11, Y2 // c4c17c28d3 or c4617c29da or c57c29da
+ VMOVAPS (BX), Y11 // c4617c281b or c57c281b
+ VMOVAPS (R11), Y11 // c4417c281b
+ VMOVAPS Y2, Y11 // c4617c28da or c57c28da or c4c17c29d3
+ VMOVAPS Y11, Y11 // c4417c28db or c4417c29db
+ VMOVAPS Y2, (BX) // c4e17c2913 or c5fc2913
+ VMOVAPS Y11, (BX) // c4617c291b or c57c291b
+ VMOVAPS Y2, (R11) // c4c17c2913
+ VMOVAPS Y11, (R11) // c4417c291b
+ VMOVD X2, (BX) // c4e1797e13 or c5f97e13
+ VMOVD X11, (BX) // c461797e1b or c5797e1b
+ VMOVD X2, (R11) // c4c1797e13
+ VMOVD X11, (R11) // c441797e1b
+ VMOVD X2, DX // c4e1797ed2 or c5f97ed2
+ VMOVD X11, DX // c461797eda or c5797eda
+ VMOVD X2, R11 // c4c1797ed3
+ VMOVD X11, R11 // c441797edb
+ VMOVD (BX), X2 // c4e1796e13 or c5f96e13
+ VMOVD (R11), X2 // c4c1796e13
+ VMOVD DX, X2 // c4e1796ed2 or c5f96ed2
+ VMOVD R11, X2 // c4c1796ed3
+ VMOVD (BX), X11 // c461796e1b or c5796e1b
+ VMOVD (R11), X11 // c441796e1b
+ VMOVD DX, X11 // c461796eda or c5796eda
+ VMOVD R11, X11 // c441796edb
+ VMOVDDUP (BX), X2 // c4e17b1213 or c5fb1213
+ VMOVDDUP (R11), X2 // c4c17b1213
+ VMOVDDUP X2, X2 // c4e17b12d2 or c5fb12d2
+ VMOVDDUP X11, X2 // c4c17b12d3
+ VMOVDDUP (BX), X11 // c4617b121b or c57b121b
+ VMOVDDUP (R11), X11 // c4417b121b
+ VMOVDDUP X2, X11 // c4617b12da or c57b12da
+ VMOVDDUP X11, X11 // c4417b12db
+ VMOVDDUP (BX), Y2 // c4e17f1213 or c5ff1213
+ VMOVDDUP (R11), Y2 // c4c17f1213
+ VMOVDDUP Y2, Y2 // c4e17f12d2 or c5ff12d2
+ VMOVDDUP Y11, Y2 // c4c17f12d3
+ VMOVDDUP (BX), Y11 // c4617f121b or c57f121b
+ VMOVDDUP (R11), Y11 // c4417f121b
+ VMOVDDUP Y2, Y11 // c4617f12da or c57f12da
+ VMOVDDUP Y11, Y11 // c4417f12db
+ VMOVDQA (BX), X2 // c4e1796f13 or c5f96f13
+ VMOVDQA (R11), X2 // c4c1796f13
+ VMOVDQA X2, X2 // c4e1796fd2 or c5f96fd2 or c4e1797fd2 or c5f97fd2
+ VMOVDQA X11, X2 // c4c1796fd3 or c461797fda or c5797fda
+ VMOVDQA (BX), X11 // c461796f1b or c5796f1b
+ VMOVDQA (R11), X11 // c441796f1b
+ VMOVDQA X2, X11 // c461796fda or c5796fda or c4c1797fd3
+ VMOVDQA X11, X11 // c441796fdb or c441797fdb
+ VMOVDQA X2, (BX) // c4e1797f13 or c5f97f13
+ VMOVDQA X11, (BX) // c461797f1b or c5797f1b
+ VMOVDQA X2, (R11) // c4c1797f13
+ VMOVDQA X11, (R11) // c441797f1b
+ VMOVDQA (BX), Y2 // c4e17d6f13 or c5fd6f13
+ VMOVDQA (R11), Y2 // c4c17d6f13
+ VMOVDQA Y2, Y2 // c4e17d6fd2 or c5fd6fd2 or c4e17d7fd2 or c5fd7fd2
+ VMOVDQA Y11, Y2 // c4c17d6fd3 or c4617d7fda or c57d7fda
+ VMOVDQA (BX), Y11 // c4617d6f1b or c57d6f1b
+ VMOVDQA (R11), Y11 // c4417d6f1b
+ VMOVDQA Y2, Y11 // c4617d6fda or c57d6fda or c4c17d7fd3
+ VMOVDQA Y11, Y11 // c4417d6fdb or c4417d7fdb
+ VMOVDQA Y2, (BX) // c4e17d7f13 or c5fd7f13
+ VMOVDQA Y11, (BX) // c4617d7f1b or c57d7f1b
+ VMOVDQA Y2, (R11) // c4c17d7f13
+ VMOVDQA Y11, (R11) // c4417d7f1b
+ VMOVDQU (BX), X2 // c4e17a6f13 or c5fa6f13
+ VMOVDQU (R11), X2 // c4c17a6f13
+ VMOVDQU X2, X2 // c4e17a6fd2 or c5fa6fd2 or c4e17a7fd2 or c5fa7fd2
+ VMOVDQU X11, X2 // c4c17a6fd3 or c4617a7fda or c57a7fda
+ VMOVDQU (BX), X11 // c4617a6f1b or c57a6f1b
+ VMOVDQU (R11), X11 // c4417a6f1b
+ VMOVDQU X2, X11 // c4617a6fda or c57a6fda or c4c17a7fd3
+ VMOVDQU X11, X11 // c4417a6fdb or c4417a7fdb
+ VMOVDQU X2, (BX) // c4e17a7f13 or c5fa7f13
+ VMOVDQU X11, (BX) // c4617a7f1b or c57a7f1b
+ VMOVDQU X2, (R11) // c4c17a7f13
+ VMOVDQU X11, (R11) // c4417a7f1b
+ VMOVDQU (BX), Y2 // c4e17e6f13 or c5fe6f13
+ VMOVDQU (R11), Y2 // c4c17e6f13
+ VMOVDQU Y2, Y2 // c4e17e6fd2 or c5fe6fd2 or c4e17e7fd2 or c5fe7fd2
+ VMOVDQU Y11, Y2 // c4c17e6fd3 or c4617e7fda or c57e7fda
+ VMOVDQU (BX), Y11 // c4617e6f1b or c57e6f1b
+ VMOVDQU (R11), Y11 // c4417e6f1b
+ VMOVDQU Y2, Y11 // c4617e6fda or c57e6fda or c4c17e7fd3
+ VMOVDQU Y11, Y11 // c4417e6fdb or c4417e7fdb
+ VMOVDQU Y2, (BX) // c4e17e7f13 or c5fe7f13
+ VMOVDQU Y11, (BX) // c4617e7f1b or c57e7f1b
+ VMOVDQU Y2, (R11) // c4c17e7f13
+ VMOVDQU Y11, (R11) // c4417e7f1b
+ VMOVHLPS X2, X9, X2 // c4e13012d2 or c5b012d2
+ VMOVHLPS X11, X9, X2 // c4c13012d3
+ VMOVHLPS X2, X9, X11 // c4613012da or c53012da
+ VMOVHLPS X11, X9, X11 // c4413012db
+ VMOVHPD X2, (BX) // c4e1791713 or c5f91713
+ VMOVHPD X11, (BX) // c46179171b or c579171b
+ VMOVHPD X2, (R11) // c4c1791713
+ VMOVHPD X11, (R11) // c44179171b
+ VMOVHPD (BX), X9, X2 // c4e1311613 or c5b11613
+ VMOVHPD (R11), X9, X2 // c4c1311613
+ VMOVHPD (BX), X9, X11 // c46131161b or c531161b
+ VMOVHPD (R11), X9, X11 // c44131161b
+ VMOVHPS X2, (BX) // c4e1781713 or c5f81713
+ VMOVHPS X11, (BX) // c46178171b or c578171b
+ VMOVHPS X2, (R11) // c4c1781713
+ VMOVHPS X11, (R11) // c44178171b
+ VMOVHPS (BX), X9, X2 // c4e1301613 or c5b01613
+ VMOVHPS (R11), X9, X2 // c4c1301613
+ VMOVHPS (BX), X9, X11 // c46130161b or c530161b
+ VMOVHPS (R11), X9, X11 // c44130161b
+ VMOVLHPS X2, X9, X2 // c4e13016d2 or c5b016d2
+ VMOVLHPS X11, X9, X2 // c4c13016d3
+ VMOVLHPS X2, X9, X11 // c4613016da or c53016da
+ VMOVLHPS X11, X9, X11 // c4413016db
+ VMOVLPD X2, (BX) // c4e1791313 or c5f91313
+ VMOVLPD X11, (BX) // c46179131b or c579131b
+ VMOVLPD X2, (R11) // c4c1791313
+ VMOVLPD X11, (R11) // c44179131b
+ VMOVLPD (BX), X9, X2 // c4e1311213 or c5b11213
+ VMOVLPD (R11), X9, X2 // c4c1311213
+ VMOVLPD (BX), X9, X11 // c46131121b or c531121b
+ VMOVLPD (R11), X9, X11 // c44131121b
+ VMOVLPS X2, (BX) // c4e1781313 or c5f81313
+ VMOVLPS X11, (BX) // c46178131b or c578131b
+ VMOVLPS X2, (R11) // c4c1781313
+ VMOVLPS X11, (R11) // c44178131b
+ VMOVLPS (BX), X9, X2 // c4e1301213 or c5b01213
+ VMOVLPS (R11), X9, X2 // c4c1301213
+ VMOVLPS (BX), X9, X11 // c46130121b or c530121b
+ VMOVLPS (R11), X9, X11 // c44130121b
+ VMOVMSKPD X2, DX // c4e17950d2 or c5f950d2
+ VMOVMSKPD X11, DX // c4c17950d3
+ VMOVMSKPD X2, R11 // c4617950da or c57950da
+ VMOVMSKPD X11, R11 // c4417950db
+ VMOVMSKPD Y2, DX // c4e17d50d2 or c5fd50d2
+ VMOVMSKPD Y11, DX // c4c17d50d3
+ VMOVMSKPD Y2, R11 // c4617d50da or c57d50da
+ VMOVMSKPD Y11, R11 // c4417d50db
+ VMOVMSKPS X2, DX // c4e17850d2 or c5f850d2
+ VMOVMSKPS X11, DX // c4c17850d3
+ VMOVMSKPS X2, R11 // c4617850da or c57850da
+ VMOVMSKPS X11, R11 // c4417850db
+ VMOVMSKPS Y2, DX // c4e17c50d2 or c5fc50d2
+ VMOVMSKPS Y11, DX // c4c17c50d3
+ VMOVMSKPS Y2, R11 // c4617c50da or c57c50da
+ VMOVMSKPS Y11, R11 // c4417c50db
+ VMOVNTDQ X2, (BX) // c4e179e713 or c5f9e713
+ VMOVNTDQ X11, (BX) // c46179e71b or c579e71b
+ VMOVNTDQ X2, (R11) // c4c179e713
+ VMOVNTDQ X11, (R11) // c44179e71b
+ VMOVNTDQ Y2, (BX) // c4e17de713 or c5fde713
+ VMOVNTDQ Y11, (BX) // c4617de71b or c57de71b
+ VMOVNTDQ Y2, (R11) // c4c17de713
+ VMOVNTDQ Y11, (R11) // c4417de71b
+ VMOVNTDQA (BX), X2 // c4e2792a13
+ VMOVNTDQA (R11), X2 // c4c2792a13
+ VMOVNTDQA (BX), X11 // c462792a1b
+ VMOVNTDQA (R11), X11 // c442792a1b
+ VMOVNTDQA (BX), Y2 // c4e27d2a13
+ VMOVNTDQA (R11), Y2 // c4c27d2a13
+ VMOVNTDQA (BX), Y11 // c4627d2a1b
+ VMOVNTDQA (R11), Y11 // c4427d2a1b
+ VMOVNTPD X2, (BX) // c4e1792b13 or c5f92b13
+ VMOVNTPD X11, (BX) // c461792b1b or c5792b1b
+ VMOVNTPD X2, (R11) // c4c1792b13
+ VMOVNTPD X11, (R11) // c441792b1b
+ VMOVNTPD Y2, (BX) // c4e17d2b13 or c5fd2b13
+ VMOVNTPD Y11, (BX) // c4617d2b1b or c57d2b1b
+ VMOVNTPD Y2, (R11) // c4c17d2b13
+ VMOVNTPD Y11, (R11) // c4417d2b1b
+ VMOVNTPS X2, (BX) // c4e1782b13 or c5f82b13
+ VMOVNTPS X11, (BX) // c461782b1b or c5782b1b
+ VMOVNTPS X2, (R11) // c4c1782b13
+ VMOVNTPS X11, (R11) // c441782b1b
+ VMOVNTPS Y2, (BX) // c4e17c2b13 or c5fc2b13
+ VMOVNTPS Y11, (BX) // c4617c2b1b or c57c2b1b
+ VMOVNTPS Y2, (R11) // c4c17c2b13
+ VMOVNTPS Y11, (R11) // c4417c2b1b
+ VMOVQ X2, (BX) // c4e1f97e13 or c4e179d613 or c5f9d613
+ VMOVQ X11, (BX) // c461f97e1b or c46179d61b or c579d61b
+ VMOVQ X2, (R11) // c4c1f97e13 or c4c179d613
+ VMOVQ X11, (R11) // c441f97e1b or c44179d61b
+ VMOVQ X2, DX // c4e1f97ed2
+ VMOVQ X11, DX // c461f97eda
+ VMOVQ X2, R11 // c4c1f97ed3
+ VMOVQ X11, R11 // c441f97edb
+ VMOVQ (BX), X2 // c4e17a7e13 or c5fa7e13 or c4e1f96e13
+ VMOVQ (R11), X2 // c4c17a7e13 or c4c1f96e13
+ VMOVQ (BX), X11 // c4617a7e1b or c57a7e1b or c461f96e1b
+ VMOVQ (R11), X11 // c4417a7e1b or c441f96e1b
+ VMOVQ DX, X2 // c4e1f96ed2
+ VMOVQ R11, X2 // c4c1f96ed3
+ VMOVQ DX, X11 // c461f96eda
+ VMOVQ R11, X11 // c441f96edb
+ VMOVQ X2, X2 // c4e17a7ed2 or c5fa7ed2 or c4e179d6d2 or c5f9d6d2
+ VMOVQ X11, X2 // c4c17a7ed3 or c46179d6da or c579d6da
+ VMOVQ X2, X11 // c4617a7eda or c57a7eda or c4c179d6d3
+ VMOVQ X11, X11 // c4417a7edb or c44179d6db
+ VMOVSD X2, (BX) // c4e17b1113 or c5fb1113
+ VMOVSD X11, (BX) // c4617b111b or c57b111b
+ VMOVSD X2, (R11) // c4c17b1113
+ VMOVSD X11, (R11) // c4417b111b
+ VMOVSD (BX), X2 // c4e17b1013 or c5fb1013
+ VMOVSD (R11), X2 // c4c17b1013
+ VMOVSD (BX), X11 // c4617b101b or c57b101b
+ VMOVSD (R11), X11 // c4417b101b
+ VMOVSD X2, X9, X2 // c4e13310d2 or c5b310d2 or c4e13311d2 or c5b311d2
+ VMOVSD X11, X9, X2 // c4c13310d3 or c4613311da or c53311da
+ VMOVSD X2, X9, X11 // c4613310da or c53310da or c4c13311d3
+ VMOVSD X11, X9, X11 // c4413310db or c4413311db
+ VMOVSHDUP (BX), X2 // c4e17a1613 or c5fa1613
+ VMOVSHDUP (R11), X2 // c4c17a1613
+ VMOVSHDUP X2, X2 // c4e17a16d2 or c5fa16d2
+ VMOVSHDUP X11, X2 // c4c17a16d3
+ VMOVSHDUP (BX), X11 // c4617a161b or c57a161b
+ VMOVSHDUP (R11), X11 // c4417a161b
+ VMOVSHDUP X2, X11 // c4617a16da or c57a16da
+ VMOVSHDUP X11, X11 // c4417a16db
+ VMOVSHDUP (BX), Y2 // c4e17e1613 or c5fe1613
+ VMOVSHDUP (R11), Y2 // c4c17e1613
+ VMOVSHDUP Y2, Y2 // c4e17e16d2 or c5fe16d2
+ VMOVSHDUP Y11, Y2 // c4c17e16d3
+ VMOVSHDUP (BX), Y11 // c4617e161b or c57e161b
+ VMOVSHDUP (R11), Y11 // c4417e161b
+ VMOVSHDUP Y2, Y11 // c4617e16da or c57e16da
+ VMOVSHDUP Y11, Y11 // c4417e16db
+ VMOVSLDUP (BX), X2 // c4e17a1213 or c5fa1213
+ VMOVSLDUP (R11), X2 // c4c17a1213
+ VMOVSLDUP X2, X2 // c4e17a12d2 or c5fa12d2
+ VMOVSLDUP X11, X2 // c4c17a12d3
+ VMOVSLDUP (BX), X11 // c4617a121b or c57a121b
+ VMOVSLDUP (R11), X11 // c4417a121b
+ VMOVSLDUP X2, X11 // c4617a12da or c57a12da
+ VMOVSLDUP X11, X11 // c4417a12db
+ VMOVSLDUP (BX), Y2 // c4e17e1213 or c5fe1213
+ VMOVSLDUP (R11), Y2 // c4c17e1213
+ VMOVSLDUP Y2, Y2 // c4e17e12d2 or c5fe12d2
+ VMOVSLDUP Y11, Y2 // c4c17e12d3
+ VMOVSLDUP (BX), Y11 // c4617e121b or c57e121b
+ VMOVSLDUP (R11), Y11 // c4417e121b
+ VMOVSLDUP Y2, Y11 // c4617e12da or c57e12da
+ VMOVSLDUP Y11, Y11 // c4417e12db
+ VMOVSS X2, (BX) // c4e17a1113 or c5fa1113
+ VMOVSS X11, (BX) // c4617a111b or c57a111b
+ VMOVSS X2, (R11) // c4c17a1113
+ VMOVSS X11, (R11) // c4417a111b
+ VMOVSS (BX), X2 // c4e17a1013 or c5fa1013
+ VMOVSS (R11), X2 // c4c17a1013
+ VMOVSS (BX), X11 // c4617a101b or c57a101b
+ VMOVSS (R11), X11 // c4417a101b
+ VMOVSS X2, X9, X2 // c4e13210d2 or c5b210d2 or c4e13211d2 or c5b211d2
+ VMOVSS X11, X9, X2 // c4c13210d3 or c4613211da or c53211da
+ VMOVSS X2, X9, X11 // c4613210da or c53210da or c4c13211d3
+ VMOVSS X11, X9, X11 // c4413210db or c4413211db
+ VMOVUPD (BX), X2 // c4e1791013 or c5f91013
+ VMOVUPD (R11), X2 // c4c1791013
+ VMOVUPD X2, X2 // c4e17910d2 or c5f910d2 or c4e17911d2 or c5f911d2
+ VMOVUPD X11, X2 // c4c17910d3 or c4617911da or c57911da
+ VMOVUPD (BX), X11 // c46179101b or c579101b
+ VMOVUPD (R11), X11 // c44179101b
+ VMOVUPD X2, X11 // c4617910da or c57910da or c4c17911d3
+ VMOVUPD X11, X11 // c4417910db or c4417911db
+ VMOVUPD X2, (BX) // c4e1791113 or c5f91113
+ VMOVUPD X11, (BX) // c46179111b or c579111b
+ VMOVUPD X2, (R11) // c4c1791113
+ VMOVUPD X11, (R11) // c44179111b
+ VMOVUPD (BX), Y2 // c4e17d1013 or c5fd1013
+ VMOVUPD (R11), Y2 // c4c17d1013
+ VMOVUPD Y2, Y2 // c4e17d10d2 or c5fd10d2 or c4e17d11d2 or c5fd11d2
+ VMOVUPD Y11, Y2 // c4c17d10d3 or c4617d11da or c57d11da
+ VMOVUPD (BX), Y11 // c4617d101b or c57d101b
+ VMOVUPD (R11), Y11 // c4417d101b
+ VMOVUPD Y2, Y11 // c4617d10da or c57d10da or c4c17d11d3
+ VMOVUPD Y11, Y11 // c4417d10db or c4417d11db
+ VMOVUPD Y2, (BX) // c4e17d1113 or c5fd1113
+ VMOVUPD Y11, (BX) // c4617d111b or c57d111b
+ VMOVUPD Y2, (R11) // c4c17d1113
+ VMOVUPD Y11, (R11) // c4417d111b
+ VMOVUPS (BX), X2 // c4e1781013 or c5f81013
+ VMOVUPS (R11), X2 // c4c1781013
+ VMOVUPS X2, X2 // c4e17810d2 or c5f810d2 or c4e17811d2 or c5f811d2
+ VMOVUPS X11, X2 // c4c17810d3 or c4617811da or c57811da
+ VMOVUPS (BX), X11 // c46178101b or c578101b
+ VMOVUPS (R11), X11 // c44178101b
+ VMOVUPS X2, X11 // c4617810da or c57810da or c4c17811d3
+ VMOVUPS X11, X11 // c4417810db or c4417811db
+ VMOVUPS X2, (BX) // c4e1781113 or c5f81113
+ VMOVUPS X11, (BX) // c46178111b or c578111b
+ VMOVUPS X2, (R11) // c4c1781113
+ VMOVUPS X11, (R11) // c44178111b
+ VMOVUPS (BX), Y2 // c4e17c1013 or c5fc1013
+ VMOVUPS (R11), Y2 // c4c17c1013
+ VMOVUPS Y2, Y2 // c4e17c10d2 or c5fc10d2 or c4e17c11d2 or c5fc11d2
+ VMOVUPS Y11, Y2 // c4c17c10d3 or c4617c11da or c57c11da
+ VMOVUPS (BX), Y11 // c4617c101b or c57c101b
+ VMOVUPS (R11), Y11 // c4417c101b
+ VMOVUPS Y2, Y11 // c4617c10da or c57c10da or c4c17c11d3
+ VMOVUPS Y11, Y11 // c4417c10db or c4417c11db
+ VMOVUPS Y2, (BX) // c4e17c1113 or c5fc1113
+ VMOVUPS Y11, (BX) // c4617c111b or c57c111b
+ VMOVUPS Y2, (R11) // c4c17c1113
+ VMOVUPS Y11, (R11) // c4417c111b
+ VMPSADBW $7, (BX), X9, X2 // c4e331421307
+ VMPSADBW $7, (R11), X9, X2 // c4c331421307
+ VMPSADBW $7, X2, X9, X2 // c4e33142d207
+ VMPSADBW $7, X11, X9, X2 // c4c33142d307
+ VMPSADBW $7, (BX), X9, X11 // c46331421b07
+ VMPSADBW $7, (R11), X9, X11 // c44331421b07
+ VMPSADBW $7, X2, X9, X11 // c4633142da07
+ VMPSADBW $7, X11, X9, X11 // c4433142db07
+ VMPSADBW $7, (BX), Y15, Y2 // c4e305421307
+ VMPSADBW $7, (R11), Y15, Y2 // c4c305421307
+ VMPSADBW $7, Y2, Y15, Y2 // c4e30542d207
+ VMPSADBW $7, Y11, Y15, Y2 // c4c30542d307
+ VMPSADBW $7, (BX), Y15, Y11 // c46305421b07
+ VMPSADBW $7, (R11), Y15, Y11 // c44305421b07
+ VMPSADBW $7, Y2, Y15, Y11 // c4630542da07
+ VMPSADBW $7, Y11, Y15, Y11 // c4430542db07
+ VMULPD (BX), X9, X2 // c4e1315913 or c5b15913
+ VMULPD (R11), X9, X2 // c4c1315913
+ VMULPD X2, X9, X2 // c4e13159d2 or c5b159d2
+ VMULPD X11, X9, X2 // c4c13159d3
+ VMULPD (BX), X9, X11 // c46131591b or c531591b
+ VMULPD (R11), X9, X11 // c44131591b
+ VMULPD X2, X9, X11 // c4613159da or c53159da
+ VMULPD X11, X9, X11 // c4413159db
+ VMULPD (BX), Y15, Y2 // c4e1055913 or c5855913
+ VMULPD (R11), Y15, Y2 // c4c1055913
+ VMULPD Y2, Y15, Y2 // c4e10559d2 or c58559d2
+ VMULPD Y11, Y15, Y2 // c4c10559d3
+ VMULPD (BX), Y15, Y11 // c46105591b or c505591b
+ VMULPD (R11), Y15, Y11 // c44105591b
+ VMULPD Y2, Y15, Y11 // c4610559da or c50559da
+ VMULPD Y11, Y15, Y11 // c4410559db
+ VMULPS (BX), X9, X2 // c4e1305913 or c5b05913
+ VMULPS (R11), X9, X2 // c4c1305913
+ VMULPS X2, X9, X2 // c4e13059d2 or c5b059d2
+ VMULPS X11, X9, X2 // c4c13059d3
+ VMULPS (BX), X9, X11 // c46130591b or c530591b
+ VMULPS (R11), X9, X11 // c44130591b
+ VMULPS X2, X9, X11 // c4613059da or c53059da
+ VMULPS X11, X9, X11 // c4413059db
+ VMULPS (BX), Y15, Y2 // c4e1045913 or c5845913
+ VMULPS (R11), Y15, Y2 // c4c1045913
+ VMULPS Y2, Y15, Y2 // c4e10459d2 or c58459d2
+ VMULPS Y11, Y15, Y2 // c4c10459d3
+ VMULPS (BX), Y15, Y11 // c46104591b or c504591b
+ VMULPS (R11), Y15, Y11 // c44104591b
+ VMULPS Y2, Y15, Y11 // c4610459da or c50459da
+ VMULPS Y11, Y15, Y11 // c4410459db
+ VMULSD (BX), X9, X2 // c4e1335913 or c5b35913
+ VMULSD (R11), X9, X2 // c4c1335913
+ VMULSD X2, X9, X2 // c4e13359d2 or c5b359d2
+ VMULSD X11, X9, X2 // c4c13359d3
+ VMULSD (BX), X9, X11 // c46133591b or c533591b
+ VMULSD (R11), X9, X11 // c44133591b
+ VMULSD X2, X9, X11 // c4613359da or c53359da
+ VMULSD X11, X9, X11 // c4413359db
+ VMULSS (BX), X9, X2 // c4e1325913 or c5b25913
+ VMULSS (R11), X9, X2 // c4c1325913
+ VMULSS X2, X9, X2 // c4e13259d2 or c5b259d2
+ VMULSS X11, X9, X2 // c4c13259d3
+ VMULSS (BX), X9, X11 // c46132591b or c532591b
+ VMULSS (R11), X9, X11 // c44132591b
+ VMULSS X2, X9, X11 // c4613259da or c53259da
+ VMULSS X11, X9, X11 // c4413259db
+ VORPD (BX), X9, X2 // c4e1315613 or c5b15613
+ VORPD (R11), X9, X2 // c4c1315613
+ VORPD X2, X9, X2 // c4e13156d2 or c5b156d2
+ VORPD X11, X9, X2 // c4c13156d3
+ VORPD (BX), X9, X11 // c46131561b or c531561b
+ VORPD (R11), X9, X11 // c44131561b
+ VORPD X2, X9, X11 // c4613156da or c53156da
+ VORPD X11, X9, X11 // c4413156db
+ VORPD (BX), Y15, Y2 // c4e1055613 or c5855613
+ VORPD (R11), Y15, Y2 // c4c1055613
+ VORPD Y2, Y15, Y2 // c4e10556d2 or c58556d2
+ VORPD Y11, Y15, Y2 // c4c10556d3
+ VORPD (BX), Y15, Y11 // c46105561b or c505561b
+ VORPD (R11), Y15, Y11 // c44105561b
+ VORPD Y2, Y15, Y11 // c4610556da or c50556da
+ VORPD Y11, Y15, Y11 // c4410556db
+ VORPS (BX), X9, X2 // c4e1305613 or c5b05613
+ VORPS (R11), X9, X2 // c4c1305613
+ VORPS X2, X9, X2 // c4e13056d2 or c5b056d2
+ VORPS X11, X9, X2 // c4c13056d3
+ VORPS (BX), X9, X11 // c46130561b or c530561b
+ VORPS (R11), X9, X11 // c44130561b
+ VORPS X2, X9, X11 // c4613056da or c53056da
+ VORPS X11, X9, X11 // c4413056db
+ VORPS (BX), Y15, Y2 // c4e1045613 or c5845613
+ VORPS (R11), Y15, Y2 // c4c1045613
+ VORPS Y2, Y15, Y2 // c4e10456d2 or c58456d2
+ VORPS Y11, Y15, Y2 // c4c10456d3
+ VORPS (BX), Y15, Y11 // c46104561b or c504561b
+ VORPS (R11), Y15, Y11 // c44104561b
+ VORPS Y2, Y15, Y11 // c4610456da or c50456da
+ VORPS Y11, Y15, Y11 // c4410456db
+ VPABSB (BX), X2 // c4e2791c13
+ VPABSB (R11), X2 // c4c2791c13
+ VPABSB X2, X2 // c4e2791cd2
+ VPABSB X11, X2 // c4c2791cd3
+ VPABSB (BX), X11 // c462791c1b
+ VPABSB (R11), X11 // c442791c1b
+ VPABSB X2, X11 // c462791cda
+ VPABSB X11, X11 // c442791cdb
+ VPABSB (BX), Y2 // c4e27d1c13
+ VPABSB (R11), Y2 // c4c27d1c13
+ VPABSB Y2, Y2 // c4e27d1cd2
+ VPABSB Y11, Y2 // c4c27d1cd3
+ VPABSB (BX), Y11 // c4627d1c1b
+ VPABSB (R11), Y11 // c4427d1c1b
+ VPABSB Y2, Y11 // c4627d1cda
+ VPABSB Y11, Y11 // c4427d1cdb
+ VPABSD (BX), X2 // c4e2791e13
+ VPABSD (R11), X2 // c4c2791e13
+ VPABSD X2, X2 // c4e2791ed2
+ VPABSD X11, X2 // c4c2791ed3
+ VPABSD (BX), X11 // c462791e1b
+ VPABSD (R11), X11 // c442791e1b
+ VPABSD X2, X11 // c462791eda
+ VPABSD X11, X11 // c442791edb
+ VPABSD (BX), Y2 // c4e27d1e13
+ VPABSD (R11), Y2 // c4c27d1e13
+ VPABSD Y2, Y2 // c4e27d1ed2
+ VPABSD Y11, Y2 // c4c27d1ed3
+ VPABSD (BX), Y11 // c4627d1e1b
+ VPABSD (R11), Y11 // c4427d1e1b
+ VPABSD Y2, Y11 // c4627d1eda
+ VPABSD Y11, Y11 // c4427d1edb
+ VPABSW (BX), X2 // c4e2791d13
+ VPABSW (R11), X2 // c4c2791d13
+ VPABSW X2, X2 // c4e2791dd2
+ VPABSW X11, X2 // c4c2791dd3
+ VPABSW (BX), X11 // c462791d1b
+ VPABSW (R11), X11 // c442791d1b
+ VPABSW X2, X11 // c462791dda
+ VPABSW X11, X11 // c442791ddb
+ VPABSW (BX), Y2 // c4e27d1d13
+ VPABSW (R11), Y2 // c4c27d1d13
+ VPABSW Y2, Y2 // c4e27d1dd2
+ VPABSW Y11, Y2 // c4c27d1dd3
+ VPABSW (BX), Y11 // c4627d1d1b
+ VPABSW (R11), Y11 // c4427d1d1b
+ VPABSW Y2, Y11 // c4627d1dda
+ VPABSW Y11, Y11 // c4427d1ddb
+ VPACKSSDW (BX), X9, X2 // c4e1316b13 or c5b16b13
+ VPACKSSDW (R11), X9, X2 // c4c1316b13
+ VPACKSSDW X2, X9, X2 // c4e1316bd2 or c5b16bd2
+ VPACKSSDW X11, X9, X2 // c4c1316bd3
+ VPACKSSDW (BX), X9, X11 // c461316b1b or c5316b1b
+ VPACKSSDW (R11), X9, X11 // c441316b1b
+ VPACKSSDW X2, X9, X11 // c461316bda or c5316bda
+ VPACKSSDW X11, X9, X11 // c441316bdb
+ VPACKSSDW (BX), Y15, Y2 // c4e1056b13 or c5856b13
+ VPACKSSDW (R11), Y15, Y2 // c4c1056b13
+ VPACKSSDW Y2, Y15, Y2 // c4e1056bd2 or c5856bd2
+ VPACKSSDW Y11, Y15, Y2 // c4c1056bd3
+ VPACKSSDW (BX), Y15, Y11 // c461056b1b or c5056b1b
+ VPACKSSDW (R11), Y15, Y11 // c441056b1b
+ VPACKSSDW Y2, Y15, Y11 // c461056bda or c5056bda
+ VPACKSSDW Y11, Y15, Y11 // c441056bdb
+ VPACKSSWB (BX), X9, X2 // c4e1316313 or c5b16313
+ VPACKSSWB (R11), X9, X2 // c4c1316313
+ VPACKSSWB X2, X9, X2 // c4e13163d2 or c5b163d2
+ VPACKSSWB X11, X9, X2 // c4c13163d3
+ VPACKSSWB (BX), X9, X11 // c46131631b or c531631b
+ VPACKSSWB (R11), X9, X11 // c44131631b
+ VPACKSSWB X2, X9, X11 // c4613163da or c53163da
+ VPACKSSWB X11, X9, X11 // c4413163db
+ VPACKSSWB (BX), Y15, Y2 // c4e1056313 or c5856313
+ VPACKSSWB (R11), Y15, Y2 // c4c1056313
+ VPACKSSWB Y2, Y15, Y2 // c4e10563d2 or c58563d2
+ VPACKSSWB Y11, Y15, Y2 // c4c10563d3
+ VPACKSSWB (BX), Y15, Y11 // c46105631b or c505631b
+ VPACKSSWB (R11), Y15, Y11 // c44105631b
+ VPACKSSWB Y2, Y15, Y11 // c4610563da or c50563da
+ VPACKSSWB Y11, Y15, Y11 // c4410563db
+ VPACKUSDW (BX), X9, X2 // c4e2312b13
+ VPACKUSDW (R11), X9, X2 // c4c2312b13
+ VPACKUSDW X2, X9, X2 // c4e2312bd2
+ VPACKUSDW X11, X9, X2 // c4c2312bd3
+ VPACKUSDW (BX), X9, X11 // c462312b1b
+ VPACKUSDW (R11), X9, X11 // c442312b1b
+ VPACKUSDW X2, X9, X11 // c462312bda
+ VPACKUSDW X11, X9, X11 // c442312bdb
+ VPACKUSDW (BX), Y15, Y2 // c4e2052b13
+ VPACKUSDW (R11), Y15, Y2 // c4c2052b13
+ VPACKUSDW Y2, Y15, Y2 // c4e2052bd2
+ VPACKUSDW Y11, Y15, Y2 // c4c2052bd3
+ VPACKUSDW (BX), Y15, Y11 // c462052b1b
+ VPACKUSDW (R11), Y15, Y11 // c442052b1b
+ VPACKUSDW Y2, Y15, Y11 // c462052bda
+ VPACKUSDW Y11, Y15, Y11 // c442052bdb
+ VPACKUSWB (BX), X9, X2 // c4e1316713 or c5b16713
+ VPACKUSWB (R11), X9, X2 // c4c1316713
+ VPACKUSWB X2, X9, X2 // c4e13167d2 or c5b167d2
+ VPACKUSWB X11, X9, X2 // c4c13167d3
+ VPACKUSWB (BX), X9, X11 // c46131671b or c531671b
+ VPACKUSWB (R11), X9, X11 // c44131671b
+ VPACKUSWB X2, X9, X11 // c4613167da or c53167da
+ VPACKUSWB X11, X9, X11 // c4413167db
+ VPACKUSWB (BX), Y15, Y2 // c4e1056713 or c5856713
+ VPACKUSWB (R11), Y15, Y2 // c4c1056713
+ VPACKUSWB Y2, Y15, Y2 // c4e10567d2 or c58567d2
+ VPACKUSWB Y11, Y15, Y2 // c4c10567d3
+ VPACKUSWB (BX), Y15, Y11 // c46105671b or c505671b
+ VPACKUSWB (R11), Y15, Y11 // c44105671b
+ VPACKUSWB Y2, Y15, Y11 // c4610567da or c50567da
+ VPACKUSWB Y11, Y15, Y11 // c4410567db
+ VPADDB (BX), X9, X2 // c4e131fc13 or c5b1fc13
+ VPADDB (R11), X9, X2 // c4c131fc13
+ VPADDB X2, X9, X2 // c4e131fcd2 or c5b1fcd2
+ VPADDB X11, X9, X2 // c4c131fcd3
+ VPADDB (BX), X9, X11 // c46131fc1b or c531fc1b
+ VPADDB (R11), X9, X11 // c44131fc1b
+ VPADDB X2, X9, X11 // c46131fcda or c531fcda
+ VPADDB X11, X9, X11 // c44131fcdb
+ VPADDB (BX), Y15, Y2 // c4e105fc13 or c585fc13
+ VPADDB (R11), Y15, Y2 // c4c105fc13
+ VPADDB Y2, Y15, Y2 // c4e105fcd2 or c585fcd2
+ VPADDB Y11, Y15, Y2 // c4c105fcd3
+ VPADDB (BX), Y15, Y11 // c46105fc1b or c505fc1b
+ VPADDB (R11), Y15, Y11 // c44105fc1b
+ VPADDB Y2, Y15, Y11 // c46105fcda or c505fcda
+ VPADDB Y11, Y15, Y11 // c44105fcdb
+ VPADDD (BX), X9, X2 // c4e131fe13 or c5b1fe13
+ VPADDD (R11), X9, X2 // c4c131fe13
+ VPADDD X2, X9, X2 // c4e131fed2 or c5b1fed2
+ VPADDD X11, X9, X2 // c4c131fed3
+ VPADDD (BX), X9, X11 // c46131fe1b or c531fe1b
+ VPADDD (R11), X9, X11 // c44131fe1b
+ VPADDD X2, X9, X11 // c46131feda or c531feda
+ VPADDD X11, X9, X11 // c44131fedb
+ VPADDD (BX), Y15, Y2 // c4e105fe13 or c585fe13
+ VPADDD (R11), Y15, Y2 // c4c105fe13
+ VPADDD Y2, Y15, Y2 // c4e105fed2 or c585fed2
+ VPADDD Y11, Y15, Y2 // c4c105fed3
+ VPADDD (BX), Y15, Y11 // c46105fe1b or c505fe1b
+ VPADDD (R11), Y15, Y11 // c44105fe1b
+ VPADDD Y2, Y15, Y11 // c46105feda or c505feda
+ VPADDD Y11, Y15, Y11 // c44105fedb
+ VPADDQ (BX), X9, X2 // c4e131d413 or c5b1d413
+ VPADDQ (R11), X9, X2 // c4c131d413
+ VPADDQ X2, X9, X2 // c4e131d4d2 or c5b1d4d2
+ VPADDQ X11, X9, X2 // c4c131d4d3
+ VPADDQ (BX), X9, X11 // c46131d41b or c531d41b
+ VPADDQ (R11), X9, X11 // c44131d41b
+ VPADDQ X2, X9, X11 // c46131d4da or c531d4da
+ VPADDQ X11, X9, X11 // c44131d4db
+ VPADDQ (BX), Y15, Y2 // c4e105d413 or c585d413
+ VPADDQ (R11), Y15, Y2 // c4c105d413
+ VPADDQ Y2, Y15, Y2 // c4e105d4d2 or c585d4d2
+ VPADDQ Y11, Y15, Y2 // c4c105d4d3
+ VPADDQ (BX), Y15, Y11 // c46105d41b or c505d41b
+ VPADDQ (R11), Y15, Y11 // c44105d41b
+ VPADDQ Y2, Y15, Y11 // c46105d4da or c505d4da
+ VPADDQ Y11, Y15, Y11 // c44105d4db
+ VPADDSB (BX), X9, X2 // c4e131ec13 or c5b1ec13
+ VPADDSB (R11), X9, X2 // c4c131ec13
+ VPADDSB X2, X9, X2 // c4e131ecd2 or c5b1ecd2
+ VPADDSB X11, X9, X2 // c4c131ecd3
+ VPADDSB (BX), X9, X11 // c46131ec1b or c531ec1b
+ VPADDSB (R11), X9, X11 // c44131ec1b
+ VPADDSB X2, X9, X11 // c46131ecda or c531ecda
+ VPADDSB X11, X9, X11 // c44131ecdb
+ VPADDSB (BX), Y15, Y2 // c4e105ec13 or c585ec13
+ VPADDSB (R11), Y15, Y2 // c4c105ec13
+ VPADDSB Y2, Y15, Y2 // c4e105ecd2 or c585ecd2
+ VPADDSB Y11, Y15, Y2 // c4c105ecd3
+ VPADDSB (BX), Y15, Y11 // c46105ec1b or c505ec1b
+ VPADDSB (R11), Y15, Y11 // c44105ec1b
+ VPADDSB Y2, Y15, Y11 // c46105ecda or c505ecda
+ VPADDSB Y11, Y15, Y11 // c44105ecdb
+ VPADDSW (BX), X9, X2 // c4e131ed13 or c5b1ed13
+ VPADDSW (R11), X9, X2 // c4c131ed13
+ VPADDSW X2, X9, X2 // c4e131edd2 or c5b1edd2
+ VPADDSW X11, X9, X2 // c4c131edd3
+ VPADDSW (BX), X9, X11 // c46131ed1b or c531ed1b
+ VPADDSW (R11), X9, X11 // c44131ed1b
+ VPADDSW X2, X9, X11 // c46131edda or c531edda
+ VPADDSW X11, X9, X11 // c44131eddb
+ VPADDSW (BX), Y15, Y2 // c4e105ed13 or c585ed13
+ VPADDSW (R11), Y15, Y2 // c4c105ed13
+ VPADDSW Y2, Y15, Y2 // c4e105edd2 or c585edd2
+ VPADDSW Y11, Y15, Y2 // c4c105edd3
+ VPADDSW (BX), Y15, Y11 // c46105ed1b or c505ed1b
+ VPADDSW (R11), Y15, Y11 // c44105ed1b
+ VPADDSW Y2, Y15, Y11 // c46105edda or c505edda
+ VPADDSW Y11, Y15, Y11 // c44105eddb
+ VPADDUSB (BX), X9, X2 // c4e131dc13 or c5b1dc13
+ VPADDUSB (R11), X9, X2 // c4c131dc13
+ VPADDUSB X2, X9, X2 // c4e131dcd2 or c5b1dcd2
+ VPADDUSB X11, X9, X2 // c4c131dcd3
+ VPADDUSB (BX), X9, X11 // c46131dc1b or c531dc1b
+ VPADDUSB (R11), X9, X11 // c44131dc1b
+ VPADDUSB X2, X9, X11 // c46131dcda or c531dcda
+ VPADDUSB X11, X9, X11 // c44131dcdb
+ VPADDUSB (BX), Y15, Y2 // c4e105dc13 or c585dc13
+ VPADDUSB (R11), Y15, Y2 // c4c105dc13
+ VPADDUSB Y2, Y15, Y2 // c4e105dcd2 or c585dcd2
+ VPADDUSB Y11, Y15, Y2 // c4c105dcd3
+ VPADDUSB (BX), Y15, Y11 // c46105dc1b or c505dc1b
+ VPADDUSB (R11), Y15, Y11 // c44105dc1b
+ VPADDUSB Y2, Y15, Y11 // c46105dcda or c505dcda
+ VPADDUSB Y11, Y15, Y11 // c44105dcdb
+ VPADDUSW (BX), X9, X2 // c4e131dd13 or c5b1dd13
+ VPADDUSW (R11), X9, X2 // c4c131dd13
+ VPADDUSW X2, X9, X2 // c4e131ddd2 or c5b1ddd2
+ VPADDUSW X11, X9, X2 // c4c131ddd3
+ VPADDUSW (BX), X9, X11 // c46131dd1b or c531dd1b
+ VPADDUSW (R11), X9, X11 // c44131dd1b
+ VPADDUSW X2, X9, X11 // c46131ddda or c531ddda
+ VPADDUSW X11, X9, X11 // c44131dddb
+ VPADDUSW (BX), Y15, Y2 // c4e105dd13 or c585dd13
+ VPADDUSW (R11), Y15, Y2 // c4c105dd13
+ VPADDUSW Y2, Y15, Y2 // c4e105ddd2 or c585ddd2
+ VPADDUSW Y11, Y15, Y2 // c4c105ddd3
+ VPADDUSW (BX), Y15, Y11 // c46105dd1b or c505dd1b
+ VPADDUSW (R11), Y15, Y11 // c44105dd1b
+ VPADDUSW Y2, Y15, Y11 // c46105ddda or c505ddda
+ VPADDUSW Y11, Y15, Y11 // c44105dddb
+ VPADDW (BX), X9, X2 // c4e131fd13 or c5b1fd13
+ VPADDW (R11), X9, X2 // c4c131fd13
+ VPADDW X2, X9, X2 // c4e131fdd2 or c5b1fdd2
+ VPADDW X11, X9, X2 // c4c131fdd3
+ VPADDW (BX), X9, X11 // c46131fd1b or c531fd1b
+ VPADDW (R11), X9, X11 // c44131fd1b
+ VPADDW X2, X9, X11 // c46131fdda or c531fdda
+ VPADDW X11, X9, X11 // c44131fddb
+ VPADDW (BX), Y15, Y2 // c4e105fd13 or c585fd13
+ VPADDW (R11), Y15, Y2 // c4c105fd13
+ VPADDW Y2, Y15, Y2 // c4e105fdd2 or c585fdd2
+ VPADDW Y11, Y15, Y2 // c4c105fdd3
+ VPADDW (BX), Y15, Y11 // c46105fd1b or c505fd1b
+ VPADDW (R11), Y15, Y11 // c44105fd1b
+ VPADDW Y2, Y15, Y11 // c46105fdda or c505fdda
+ VPADDW Y11, Y15, Y11 // c44105fddb
+ VPALIGNR $7, (BX), X9, X2 // c4e3310f1307
+ VPALIGNR $7, (R11), X9, X2 // c4c3310f1307
+ VPALIGNR $7, X2, X9, X2 // c4e3310fd207
+ VPALIGNR $7, X11, X9, X2 // c4c3310fd307
+ VPALIGNR $7, (BX), X9, X11 // c463310f1b07
+ VPALIGNR $7, (R11), X9, X11 // c443310f1b07
+ VPALIGNR $7, X2, X9, X11 // c463310fda07
+ VPALIGNR $7, X11, X9, X11 // c443310fdb07
+ VPALIGNR $7, (BX), Y15, Y2 // c4e3050f1307
+ VPALIGNR $7, (R11), Y15, Y2 // c4c3050f1307
+ VPALIGNR $7, Y2, Y15, Y2 // c4e3050fd207
+ VPALIGNR $7, Y11, Y15, Y2 // c4c3050fd307
+ VPALIGNR $7, (BX), Y15, Y11 // c463050f1b07
+ VPALIGNR $7, (R11), Y15, Y11 // c443050f1b07
+ VPALIGNR $7, Y2, Y15, Y11 // c463050fda07
+ VPALIGNR $7, Y11, Y15, Y11 // c443050fdb07
+ VPAND (BX), X9, X2 // c4e131db13 or c5b1db13
+ VPAND (R11), X9, X2 // c4c131db13
+ VPAND X2, X9, X2 // c4e131dbd2 or c5b1dbd2
+ VPAND X11, X9, X2 // c4c131dbd3
+ VPAND (BX), X9, X11 // c46131db1b or c531db1b
+ VPAND (R11), X9, X11 // c44131db1b
+ VPAND X2, X9, X11 // c46131dbda or c531dbda
+ VPAND X11, X9, X11 // c44131dbdb
+ VPAND (BX), Y15, Y2 // c4e105db13 or c585db13
+ VPAND (R11), Y15, Y2 // c4c105db13
+ VPAND Y2, Y15, Y2 // c4e105dbd2 or c585dbd2
+ VPAND Y11, Y15, Y2 // c4c105dbd3
+ VPAND (BX), Y15, Y11 // c46105db1b or c505db1b
+ VPAND (R11), Y15, Y11 // c44105db1b
+ VPAND Y2, Y15, Y11 // c46105dbda or c505dbda
+ VPAND Y11, Y15, Y11 // c44105dbdb
+ VPANDN (BX), X9, X2 // c4e131df13 or c5b1df13
+ VPANDN (R11), X9, X2 // c4c131df13
+ VPANDN X2, X9, X2 // c4e131dfd2 or c5b1dfd2
+ VPANDN X11, X9, X2 // c4c131dfd3
+ VPANDN (BX), X9, X11 // c46131df1b or c531df1b
+ VPANDN (R11), X9, X11 // c44131df1b
+ VPANDN X2, X9, X11 // c46131dfda or c531dfda
+ VPANDN X11, X9, X11 // c44131dfdb
+ VPANDN (BX), Y15, Y2 // c4e105df13 or c585df13
+ VPANDN (R11), Y15, Y2 // c4c105df13
+ VPANDN Y2, Y15, Y2 // c4e105dfd2 or c585dfd2
+ VPANDN Y11, Y15, Y2 // c4c105dfd3
+ VPANDN (BX), Y15, Y11 // c46105df1b or c505df1b
+ VPANDN (R11), Y15, Y11 // c44105df1b
+ VPANDN Y2, Y15, Y11 // c46105dfda or c505dfda
+ VPANDN Y11, Y15, Y11 // c44105dfdb
+ VPAVGB (BX), X9, X2 // c4e131e013 or c5b1e013
+ VPAVGB (R11), X9, X2 // c4c131e013
+ VPAVGB X2, X9, X2 // c4e131e0d2 or c5b1e0d2
+ VPAVGB X11, X9, X2 // c4c131e0d3
+ VPAVGB (BX), X9, X11 // c46131e01b or c531e01b
+ VPAVGB (R11), X9, X11 // c44131e01b
+ VPAVGB X2, X9, X11 // c46131e0da or c531e0da
+ VPAVGB X11, X9, X11 // c44131e0db
+ VPAVGB (BX), Y15, Y2 // c4e105e013 or c585e013
+ VPAVGB (R11), Y15, Y2 // c4c105e013
+ VPAVGB Y2, Y15, Y2 // c4e105e0d2 or c585e0d2
+ VPAVGB Y11, Y15, Y2 // c4c105e0d3
+ VPAVGB (BX), Y15, Y11 // c46105e01b or c505e01b
+ VPAVGB (R11), Y15, Y11 // c44105e01b
+ VPAVGB Y2, Y15, Y11 // c46105e0da or c505e0da
+ VPAVGB Y11, Y15, Y11 // c44105e0db
+ VPAVGW (BX), X9, X2 // c4e131e313 or c5b1e313
+ VPAVGW (R11), X9, X2 // c4c131e313
+ VPAVGW X2, X9, X2 // c4e131e3d2 or c5b1e3d2
+ VPAVGW X11, X9, X2 // c4c131e3d3
+ VPAVGW (BX), X9, X11 // c46131e31b or c531e31b
+ VPAVGW (R11), X9, X11 // c44131e31b
+ VPAVGW X2, X9, X11 // c46131e3da or c531e3da
+ VPAVGW X11, X9, X11 // c44131e3db
+ VPAVGW (BX), Y15, Y2 // c4e105e313 or c585e313
+ VPAVGW (R11), Y15, Y2 // c4c105e313
+ VPAVGW Y2, Y15, Y2 // c4e105e3d2 or c585e3d2
+ VPAVGW Y11, Y15, Y2 // c4c105e3d3
+ VPAVGW (BX), Y15, Y11 // c46105e31b or c505e31b
+ VPAVGW (R11), Y15, Y11 // c44105e31b
+ VPAVGW Y2, Y15, Y11 // c46105e3da or c505e3da
+ VPAVGW Y11, Y15, Y11 // c44105e3db
+ VPBLENDD $7, (BX), X9, X2 // c4e331021307
+ VPBLENDD $7, (R11), X9, X2 // c4c331021307
+ VPBLENDD $7, X2, X9, X2 // c4e33102d207
+ VPBLENDD $7, X11, X9, X2 // c4c33102d307
+ VPBLENDD $7, (BX), X9, X11 // c46331021b07
+ VPBLENDD $7, (R11), X9, X11 // c44331021b07
+ VPBLENDD $7, X2, X9, X11 // c4633102da07
+ VPBLENDD $7, X11, X9, X11 // c4433102db07
+ VPBLENDD $7, (BX), Y15, Y2 // c4e305021307
+ VPBLENDD $7, (R11), Y15, Y2 // c4c305021307
+ VPBLENDD $7, Y2, Y15, Y2 // c4e30502d207
+ VPBLENDD $7, Y11, Y15, Y2 // c4c30502d307
+ VPBLENDD $7, (BX), Y15, Y11 // c46305021b07
+ VPBLENDD $7, (R11), Y15, Y11 // c44305021b07
+ VPBLENDD $7, Y2, Y15, Y11 // c4630502da07
+ VPBLENDD $7, Y11, Y15, Y11 // c4430502db07
+ VPBLENDVB X12, (BX), X9, X2 // c4e3314c13c0
+ VPBLENDVB X12, (R11), X9, X2 // c4c3314c13c0
+ VPBLENDVB X12, X2, X9, X2 // c4e3314cd2c0
+ VPBLENDVB X12, X11, X9, X2 // c4c3314cd3c0
+ VPBLENDVB X12, (BX), X9, X11 // c463314c1bc0
+ VPBLENDVB X12, (R11), X9, X11 // c443314c1bc0
+ VPBLENDVB X12, X2, X9, X11 // c463314cdac0
+ VPBLENDVB X12, X11, X9, X11 // c443314cdbc0
+ VPBLENDVB Y13, (BX), Y15, Y2 // c4e3054c13d0
+ VPBLENDVB Y13, (R11), Y15, Y2 // c4c3054c13d0
+ VPBLENDVB Y13, Y2, Y15, Y2 // c4e3054cd2d0
+ VPBLENDVB Y13, Y11, Y15, Y2 // c4c3054cd3d0
+ VPBLENDVB Y13, (BX), Y15, Y11 // c463054c1bd0
+ VPBLENDVB Y13, (R11), Y15, Y11 // c443054c1bd0
+ VPBLENDVB Y13, Y2, Y15, Y11 // c463054cdad0
+ VPBLENDVB Y13, Y11, Y15, Y11 // c443054cdbd0
+ VPBLENDW $7, (BX), X9, X2 // c4e3310e1307
+ VPBLENDW $7, (R11), X9, X2 // c4c3310e1307
+ VPBLENDW $7, X2, X9, X2 // c4e3310ed207
+ VPBLENDW $7, X11, X9, X2 // c4c3310ed307
+ VPBLENDW $7, (BX), X9, X11 // c463310e1b07
+ VPBLENDW $7, (R11), X9, X11 // c443310e1b07
+ VPBLENDW $7, X2, X9, X11 // c463310eda07
+ VPBLENDW $7, X11, X9, X11 // c443310edb07
+ VPBLENDW $7, (BX), Y15, Y2 // c4e3050e1307
+ VPBLENDW $7, (R11), Y15, Y2 // c4c3050e1307
+ VPBLENDW $7, Y2, Y15, Y2 // c4e3050ed207
+ VPBLENDW $7, Y11, Y15, Y2 // c4c3050ed307
+ VPBLENDW $7, (BX), Y15, Y11 // c463050e1b07
+ VPBLENDW $7, (R11), Y15, Y11 // c443050e1b07
+ VPBLENDW $7, Y2, Y15, Y11 // c463050eda07
+ VPBLENDW $7, Y11, Y15, Y11 // c443050edb07
+ VPBROADCASTB (BX), X2 // c4e2797813
+ VPBROADCASTB (R11), X2 // c4c2797813
+ VPBROADCASTB X2, X2 // c4e27978d2
+ VPBROADCASTB X11, X2 // c4c27978d3
+ VPBROADCASTB (BX), X11 // c46279781b
+ VPBROADCASTB (R11), X11 // c44279781b
+ VPBROADCASTB X2, X11 // c4627978da
+ VPBROADCASTB X11, X11 // c4427978db
+ VPBROADCASTB (BX), Y2 // c4e27d7813
+ VPBROADCASTB (R11), Y2 // c4c27d7813
+ VPBROADCASTB X2, Y2 // c4e27d78d2
+ VPBROADCASTB X11, Y2 // c4c27d78d3
+ VPBROADCASTB (BX), Y11 // c4627d781b
+ VPBROADCASTB (R11), Y11 // c4427d781b
+ VPBROADCASTB X2, Y11 // c4627d78da
+ VPBROADCASTB X11, Y11 // c4427d78db
+ VPBROADCASTD (BX), X2 // c4e2795813
+ VPBROADCASTD (R11), X2 // c4c2795813
+ VPBROADCASTD X2, X2 // c4e27958d2
+ VPBROADCASTD X11, X2 // c4c27958d3
+ VPBROADCASTD (BX), X11 // c46279581b
+ VPBROADCASTD (R11), X11 // c44279581b
+ VPBROADCASTD X2, X11 // c4627958da
+ VPBROADCASTD X11, X11 // c4427958db
+ VPBROADCASTD (BX), Y2 // c4e27d5813
+ VPBROADCASTD (R11), Y2 // c4c27d5813
+ VPBROADCASTD X2, Y2 // c4e27d58d2
+ VPBROADCASTD X11, Y2 // c4c27d58d3
+ VPBROADCASTD (BX), Y11 // c4627d581b
+ VPBROADCASTD (R11), Y11 // c4427d581b
+ VPBROADCASTD X2, Y11 // c4627d58da
+ VPBROADCASTD X11, Y11 // c4427d58db
+ VPBROADCASTQ (BX), X2 // c4e2795913
+ VPBROADCASTQ (R11), X2 // c4c2795913
+ VPBROADCASTQ X2, X2 // c4e27959d2
+ VPBROADCASTQ X11, X2 // c4c27959d3
+ VPBROADCASTQ (BX), X11 // c46279591b
+ VPBROADCASTQ (R11), X11 // c44279591b
+ VPBROADCASTQ X2, X11 // c4627959da
+ VPBROADCASTQ X11, X11 // c4427959db
+ VPBROADCASTQ (BX), Y2 // c4e27d5913
+ VPBROADCASTQ (R11), Y2 // c4c27d5913
+ VPBROADCASTQ X2, Y2 // c4e27d59d2
+ VPBROADCASTQ X11, Y2 // c4c27d59d3
+ VPBROADCASTQ (BX), Y11 // c4627d591b
+ VPBROADCASTQ (R11), Y11 // c4427d591b
+ VPBROADCASTQ X2, Y11 // c4627d59da
+ VPBROADCASTQ X11, Y11 // c4427d59db
+ VPBROADCASTW (BX), X2 // c4e2797913
+ VPBROADCASTW (R11), X2 // c4c2797913
+ VPBROADCASTW X2, X2 // c4e27979d2
+ VPBROADCASTW X11, X2 // c4c27979d3
+ VPBROADCASTW (BX), X11 // c46279791b
+ VPBROADCASTW (R11), X11 // c44279791b
+ VPBROADCASTW X2, X11 // c4627979da
+ VPBROADCASTW X11, X11 // c4427979db
+ VPBROADCASTW (BX), Y2 // c4e27d7913
+ VPBROADCASTW (R11), Y2 // c4c27d7913
+ VPBROADCASTW X2, Y2 // c4e27d79d2
+ VPBROADCASTW X11, Y2 // c4c27d79d3
+ VPBROADCASTW (BX), Y11 // c4627d791b
+ VPBROADCASTW (R11), Y11 // c4427d791b
+ VPBROADCASTW X2, Y11 // c4627d79da
+ VPBROADCASTW X11, Y11 // c4427d79db
+ VPCLMULQDQ $7, (BX), X9, X2 // c4e331441307
+ VPCLMULQDQ $7, (R11), X9, X2 // c4c331441307
+ VPCLMULQDQ $7, X2, X9, X2 // c4e33144d207
+ VPCLMULQDQ $7, X11, X9, X2 // c4c33144d307
+ VPCLMULQDQ $7, (BX), X9, X11 // c46331441b07
+ VPCLMULQDQ $7, (R11), X9, X11 // c44331441b07
+ VPCLMULQDQ $7, X2, X9, X11 // c4633144da07
+ VPCLMULQDQ $7, X11, X9, X11 // c4433144db07
+ VPCMPEQB (BX), X9, X2 // c4e1317413 or c5b17413
+ VPCMPEQB (R11), X9, X2 // c4c1317413
+ VPCMPEQB X2, X9, X2 // c4e13174d2 or c5b174d2
+ VPCMPEQB X11, X9, X2 // c4c13174d3
+ VPCMPEQB (BX), X9, X11 // c46131741b or c531741b
+ VPCMPEQB (R11), X9, X11 // c44131741b
+ VPCMPEQB X2, X9, X11 // c4613174da or c53174da
+ VPCMPEQB X11, X9, X11 // c4413174db
+ VPCMPEQB (BX), Y15, Y2 // c4e1057413 or c5857413
+ VPCMPEQB (R11), Y15, Y2 // c4c1057413
+ VPCMPEQB Y2, Y15, Y2 // c4e10574d2 or c58574d2
+ VPCMPEQB Y11, Y15, Y2 // c4c10574d3
+ VPCMPEQB (BX), Y15, Y11 // c46105741b or c505741b
+ VPCMPEQB (R11), Y15, Y11 // c44105741b
+ VPCMPEQB Y2, Y15, Y11 // c4610574da or c50574da
+ VPCMPEQB Y11, Y15, Y11 // c4410574db
+ VPCMPEQD (BX), X9, X2 // c4e1317613 or c5b17613
+ VPCMPEQD (R11), X9, X2 // c4c1317613
+ VPCMPEQD X2, X9, X2 // c4e13176d2 or c5b176d2
+ VPCMPEQD X11, X9, X2 // c4c13176d3
+ VPCMPEQD (BX), X9, X11 // c46131761b or c531761b
+ VPCMPEQD (R11), X9, X11 // c44131761b
+ VPCMPEQD X2, X9, X11 // c4613176da or c53176da
+ VPCMPEQD X11, X9, X11 // c4413176db
+ VPCMPEQD (BX), Y15, Y2 // c4e1057613 or c5857613
+ VPCMPEQD (R11), Y15, Y2 // c4c1057613
+ VPCMPEQD Y2, Y15, Y2 // c4e10576d2 or c58576d2
+ VPCMPEQD Y11, Y15, Y2 // c4c10576d3
+ VPCMPEQD (BX), Y15, Y11 // c46105761b or c505761b
+ VPCMPEQD (R11), Y15, Y11 // c44105761b
+ VPCMPEQD Y2, Y15, Y11 // c4610576da or c50576da
+ VPCMPEQD Y11, Y15, Y11 // c4410576db
+ VPCMPEQQ (BX), X9, X2 // c4e2312913
+ VPCMPEQQ (R11), X9, X2 // c4c2312913
+ VPCMPEQQ X2, X9, X2 // c4e23129d2
+ VPCMPEQQ X11, X9, X2 // c4c23129d3
+ VPCMPEQQ (BX), X9, X11 // c46231291b
+ VPCMPEQQ (R11), X9, X11 // c44231291b
+ VPCMPEQQ X2, X9, X11 // c4623129da
+ VPCMPEQQ X11, X9, X11 // c4423129db
+ VPCMPEQQ (BX), Y15, Y2 // c4e2052913
+ VPCMPEQQ (R11), Y15, Y2 // c4c2052913
+ VPCMPEQQ Y2, Y15, Y2 // c4e20529d2
+ VPCMPEQQ Y11, Y15, Y2 // c4c20529d3
+ VPCMPEQQ (BX), Y15, Y11 // c46205291b
+ VPCMPEQQ (R11), Y15, Y11 // c44205291b
+ VPCMPEQQ Y2, Y15, Y11 // c4620529da
+ VPCMPEQQ Y11, Y15, Y11 // c4420529db
+ VPCMPEQW (BX), X9, X2 // c4e1317513 or c5b17513
+ VPCMPEQW (R11), X9, X2 // c4c1317513
+ VPCMPEQW X2, X9, X2 // c4e13175d2 or c5b175d2
+ VPCMPEQW X11, X9, X2 // c4c13175d3
+ VPCMPEQW (BX), X9, X11 // c46131751b or c531751b
+ VPCMPEQW (R11), X9, X11 // c44131751b
+ VPCMPEQW X2, X9, X11 // c4613175da or c53175da
+ VPCMPEQW X11, X9, X11 // c4413175db
+ VPCMPEQW (BX), Y15, Y2 // c4e1057513 or c5857513
+ VPCMPEQW (R11), Y15, Y2 // c4c1057513
+ VPCMPEQW Y2, Y15, Y2 // c4e10575d2 or c58575d2
+ VPCMPEQW Y11, Y15, Y2 // c4c10575d3
+ VPCMPEQW (BX), Y15, Y11 // c46105751b or c505751b
+ VPCMPEQW (R11), Y15, Y11 // c44105751b
+ VPCMPEQW Y2, Y15, Y11 // c4610575da or c50575da
+ VPCMPEQW Y11, Y15, Y11 // c4410575db
+ VPCMPESTRI $7, (BX), X2 // c4e379611307
+ VPCMPESTRI $7, (R11), X2 // c4c379611307
+ VPCMPESTRI $7, X2, X2 // c4e37961d207
+ VPCMPESTRI $7, X11, X2 // c4c37961d307
+ VPCMPESTRI $7, (BX), X11 // c46379611b07
+ VPCMPESTRI $7, (R11), X11 // c44379611b07
+ VPCMPESTRI $7, X2, X11 // c4637961da07
+ VPCMPESTRI $7, X11, X11 // c4437961db07
+ VPCMPESTRM $7, (BX), X2 // c4e379601307
+ VPCMPESTRM $7, (R11), X2 // c4c379601307
+ VPCMPESTRM $7, X2, X2 // c4e37960d207
+ VPCMPESTRM $7, X11, X2 // c4c37960d307
+ VPCMPESTRM $7, (BX), X11 // c46379601b07
+ VPCMPESTRM $7, (R11), X11 // c44379601b07
+ VPCMPESTRM $7, X2, X11 // c4637960da07
+ VPCMPESTRM $7, X11, X11 // c4437960db07
+ VPCMPGTB (BX), X9, X2 // c4e1316413 or c5b16413
+ VPCMPGTB (R11), X9, X2 // c4c1316413
+ VPCMPGTB X2, X9, X2 // c4e13164d2 or c5b164d2
+ VPCMPGTB X11, X9, X2 // c4c13164d3
+ VPCMPGTB (BX), X9, X11 // c46131641b or c531641b
+ VPCMPGTB (R11), X9, X11 // c44131641b
+ VPCMPGTB X2, X9, X11 // c4613164da or c53164da
+ VPCMPGTB X11, X9, X11 // c4413164db
+ VPCMPGTB (BX), Y15, Y2 // c4e1056413 or c5856413
+ VPCMPGTB (R11), Y15, Y2 // c4c1056413
+ VPCMPGTB Y2, Y15, Y2 // c4e10564d2 or c58564d2
+ VPCMPGTB Y11, Y15, Y2 // c4c10564d3
+ VPCMPGTB (BX), Y15, Y11 // c46105641b or c505641b
+ VPCMPGTB (R11), Y15, Y11 // c44105641b
+ VPCMPGTB Y2, Y15, Y11 // c4610564da or c50564da
+ VPCMPGTB Y11, Y15, Y11 // c4410564db
+ VPCMPGTD (BX), X9, X2 // c4e1316613 or c5b16613
+ VPCMPGTD (R11), X9, X2 // c4c1316613
+ VPCMPGTD X2, X9, X2 // c4e13166d2 or c5b166d2
+ VPCMPGTD X11, X9, X2 // c4c13166d3
+ VPCMPGTD (BX), X9, X11 // c46131661b or c531661b
+ VPCMPGTD (R11), X9, X11 // c44131661b
+ VPCMPGTD X2, X9, X11 // c4613166da or c53166da
+ VPCMPGTD X11, X9, X11 // c4413166db
+ VPCMPGTD (BX), Y15, Y2 // c4e1056613 or c5856613
+ VPCMPGTD (R11), Y15, Y2 // c4c1056613
+ VPCMPGTD Y2, Y15, Y2 // c4e10566d2 or c58566d2
+ VPCMPGTD Y11, Y15, Y2 // c4c10566d3
+ VPCMPGTD (BX), Y15, Y11 // c46105661b or c505661b
+ VPCMPGTD (R11), Y15, Y11 // c44105661b
+ VPCMPGTD Y2, Y15, Y11 // c4610566da or c50566da
+ VPCMPGTD Y11, Y15, Y11 // c4410566db
+ VPCMPGTQ (BX), X9, X2 // c4e2313713
+ VPCMPGTQ (R11), X9, X2 // c4c2313713
+ VPCMPGTQ X2, X9, X2 // c4e23137d2
+ VPCMPGTQ X11, X9, X2 // c4c23137d3
+ VPCMPGTQ (BX), X9, X11 // c46231371b
+ VPCMPGTQ (R11), X9, X11 // c44231371b
+ VPCMPGTQ X2, X9, X11 // c4623137da
+ VPCMPGTQ X11, X9, X11 // c4423137db
+ VPCMPGTQ (BX), Y15, Y2 // c4e2053713
+ VPCMPGTQ (R11), Y15, Y2 // c4c2053713
+ VPCMPGTQ Y2, Y15, Y2 // c4e20537d2
+ VPCMPGTQ Y11, Y15, Y2 // c4c20537d3
+ VPCMPGTQ (BX), Y15, Y11 // c46205371b
+ VPCMPGTQ (R11), Y15, Y11 // c44205371b
+ VPCMPGTQ Y2, Y15, Y11 // c4620537da
+ VPCMPGTQ Y11, Y15, Y11 // c4420537db
+ VPCMPGTW (BX), X9, X2 // c4e1316513 or c5b16513
+ VPCMPGTW (R11), X9, X2 // c4c1316513
+ VPCMPGTW X2, X9, X2 // c4e13165d2 or c5b165d2
+ VPCMPGTW X11, X9, X2 // c4c13165d3
+ VPCMPGTW (BX), X9, X11 // c46131651b or c531651b
+ VPCMPGTW (R11), X9, X11 // c44131651b
+ VPCMPGTW X2, X9, X11 // c4613165da or c53165da
+ VPCMPGTW X11, X9, X11 // c4413165db
+ VPCMPGTW (BX), Y15, Y2 // c4e1056513 or c5856513
+ VPCMPGTW (R11), Y15, Y2 // c4c1056513
+ VPCMPGTW Y2, Y15, Y2 // c4e10565d2 or c58565d2
+ VPCMPGTW Y11, Y15, Y2 // c4c10565d3
+ VPCMPGTW (BX), Y15, Y11 // c46105651b or c505651b
+ VPCMPGTW (R11), Y15, Y11 // c44105651b
+ VPCMPGTW Y2, Y15, Y11 // c4610565da or c50565da
+ VPCMPGTW Y11, Y15, Y11 // c4410565db
+ VPCMPISTRI $7, (BX), X2 // c4e379631307
+ VPCMPISTRI $7, (R11), X2 // c4c379631307
+ VPCMPISTRI $7, X2, X2 // c4e37963d207
+ VPCMPISTRI $7, X11, X2 // c4c37963d307
+ VPCMPISTRI $7, (BX), X11 // c46379631b07
+ VPCMPISTRI $7, (R11), X11 // c44379631b07
+ VPCMPISTRI $7, X2, X11 // c4637963da07
+ VPCMPISTRI $7, X11, X11 // c4437963db07
+ VPCMPISTRM $7, (BX), X2 // c4e379621307
+ VPCMPISTRM $7, (R11), X2 // c4c379621307
+ VPCMPISTRM $7, X2, X2 // c4e37962d207
+ VPCMPISTRM $7, X11, X2 // c4c37962d307
+ VPCMPISTRM $7, (BX), X11 // c46379621b07
+ VPCMPISTRM $7, (R11), X11 // c44379621b07
+ VPCMPISTRM $7, X2, X11 // c4637962da07
+ VPCMPISTRM $7, X11, X11 // c4437962db07
+ VPERM2F128 $7, (BX), Y15, Y2 // c4e305061307
+ VPERM2F128 $7, (R11), Y15, Y2 // c4c305061307
+ VPERM2F128 $7, Y2, Y15, Y2 // c4e30506d207
+ VPERM2F128 $7, Y11, Y15, Y2 // c4c30506d307
+ VPERM2F128 $7, (BX), Y15, Y11 // c46305061b07
+ VPERM2F128 $7, (R11), Y15, Y11 // c44305061b07
+ VPERM2F128 $7, Y2, Y15, Y11 // c4630506da07
+ VPERM2F128 $7, Y11, Y15, Y11 // c4430506db07
+ VPERM2I128 $7, (BX), Y15, Y2 // c4e305461307
+ VPERM2I128 $7, (R11), Y15, Y2 // c4c305461307
+ VPERM2I128 $7, Y2, Y15, Y2 // c4e30546d207
+ VPERM2I128 $7, Y11, Y15, Y2 // c4c30546d307
+ VPERM2I128 $7, (BX), Y15, Y11 // c46305461b07
+ VPERM2I128 $7, (R11), Y15, Y11 // c44305461b07
+ VPERM2I128 $7, Y2, Y15, Y11 // c4630546da07
+ VPERM2I128 $7, Y11, Y15, Y11 // c4430546db07
+ VPERMD (BX), Y15, Y2 // c4e2053613
+ VPERMD (R11), Y15, Y2 // c4c2053613
+ VPERMD Y2, Y15, Y2 // c4e20536d2
+ VPERMD Y11, Y15, Y2 // c4c20536d3
+ VPERMD (BX), Y15, Y11 // c46205361b
+ VPERMD (R11), Y15, Y11 // c44205361b
+ VPERMD Y2, Y15, Y11 // c4620536da
+ VPERMD Y11, Y15, Y11 // c4420536db
+ VPERMILPD $7, (BX), X2 // c4e379051307
+ VPERMILPD $7, (R11), X2 // c4c379051307
+ VPERMILPD $7, X2, X2 // c4e37905d207
+ VPERMILPD $7, X11, X2 // c4c37905d307
+ VPERMILPD $7, (BX), X11 // c46379051b07
+ VPERMILPD $7, (R11), X11 // c44379051b07
+ VPERMILPD $7, X2, X11 // c4637905da07
+ VPERMILPD $7, X11, X11 // c4437905db07
+ VPERMILPD (BX), X9, X2 // c4e2310d13
+ VPERMILPD (R11), X9, X2 // c4c2310d13
+ VPERMILPD X2, X9, X2 // c4e2310dd2
+ VPERMILPD X11, X9, X2 // c4c2310dd3
+ VPERMILPD (BX), X9, X11 // c462310d1b
+ VPERMILPD (R11), X9, X11 // c442310d1b
+ VPERMILPD X2, X9, X11 // c462310dda
+ VPERMILPD X11, X9, X11 // c442310ddb
+ VPERMILPD $7, (BX), Y2 // c4e37d051307
+ VPERMILPD $7, (R11), Y2 // c4c37d051307
+ VPERMILPD $7, Y2, Y2 // c4e37d05d207
+ VPERMILPD $7, Y11, Y2 // c4c37d05d307
+ VPERMILPD $7, (BX), Y11 // c4637d051b07
+ VPERMILPD $7, (R11), Y11 // c4437d051b07
+ VPERMILPD $7, Y2, Y11 // c4637d05da07
+ VPERMILPD $7, Y11, Y11 // c4437d05db07
+ VPERMILPD (BX), Y15, Y2 // c4e2050d13
+ VPERMILPD (R11), Y15, Y2 // c4c2050d13
+ VPERMILPD Y2, Y15, Y2 // c4e2050dd2
+ VPERMILPD Y11, Y15, Y2 // c4c2050dd3
+ VPERMILPD (BX), Y15, Y11 // c462050d1b
+ VPERMILPD (R11), Y15, Y11 // c442050d1b
+ VPERMILPD Y2, Y15, Y11 // c462050dda
+ VPERMILPD Y11, Y15, Y11 // c442050ddb
+ VPERMILPS $7, (BX), X2 // c4e379041307
+ VPERMILPS $7, (R11), X2 // c4c379041307
+ VPERMILPS $7, X2, X2 // c4e37904d207
+ VPERMILPS $7, X11, X2 // c4c37904d307
+ VPERMILPS $7, (BX), X11 // c46379041b07
+ VPERMILPS $7, (R11), X11 // c44379041b07
+ VPERMILPS $7, X2, X11 // c4637904da07
+ VPERMILPS $7, X11, X11 // c4437904db07
+ VPERMILPS (BX), X9, X2 // c4e2310c13
+ VPERMILPS (R11), X9, X2 // c4c2310c13
+ VPERMILPS X2, X9, X2 // c4e2310cd2
+ VPERMILPS X11, X9, X2 // c4c2310cd3
+ VPERMILPS (BX), X9, X11 // c462310c1b
+ VPERMILPS (R11), X9, X11 // c442310c1b
+ VPERMILPS X2, X9, X11 // c462310cda
+ VPERMILPS X11, X9, X11 // c442310cdb
+ VPERMILPS $7, (BX), Y2 // c4e37d041307
+ VPERMILPS $7, (R11), Y2 // c4c37d041307
+ VPERMILPS $7, Y2, Y2 // c4e37d04d207
+ VPERMILPS $7, Y11, Y2 // c4c37d04d307
+ VPERMILPS $7, (BX), Y11 // c4637d041b07
+ VPERMILPS $7, (R11), Y11 // c4437d041b07
+ VPERMILPS $7, Y2, Y11 // c4637d04da07
+ VPERMILPS $7, Y11, Y11 // c4437d04db07
+ VPERMILPS (BX), Y15, Y2 // c4e2050c13
+ VPERMILPS (R11), Y15, Y2 // c4c2050c13
+ VPERMILPS Y2, Y15, Y2 // c4e2050cd2
+ VPERMILPS Y11, Y15, Y2 // c4c2050cd3
+ VPERMILPS (BX), Y15, Y11 // c462050c1b
+ VPERMILPS (R11), Y15, Y11 // c442050c1b
+ VPERMILPS Y2, Y15, Y11 // c462050cda
+ VPERMILPS Y11, Y15, Y11 // c442050cdb
+ VPERMPD $7, (BX), Y2 // c4e3fd011307
+ VPERMPD $7, (R11), Y2 // c4c3fd011307
+ VPERMPD $7, Y2, Y2 // c4e3fd01d207
+ VPERMPD $7, Y11, Y2 // c4c3fd01d307
+ VPERMPD $7, (BX), Y11 // c463fd011b07
+ VPERMPD $7, (R11), Y11 // c443fd011b07
+ VPERMPD $7, Y2, Y11 // c463fd01da07
+ VPERMPD $7, Y11, Y11 // c443fd01db07
+ VPERMPS (BX), Y15, Y2 // c4e2051613
+ VPERMPS (R11), Y15, Y2 // c4c2051613
+ VPERMPS Y2, Y15, Y2 // c4e20516d2
+ VPERMPS Y11, Y15, Y2 // c4c20516d3
+ VPERMPS (BX), Y15, Y11 // c46205161b
+ VPERMPS (R11), Y15, Y11 // c44205161b
+ VPERMPS Y2, Y15, Y11 // c4620516da
+ VPERMPS Y11, Y15, Y11 // c4420516db
+ VPERMQ $7, (BX), Y2 // c4e3fd001307
+ VPERMQ $7, (R11), Y2 // c4c3fd001307
+ VPERMQ $7, Y2, Y2 // c4e3fd00d207
+ VPERMQ $7, Y11, Y2 // c4c3fd00d307
+ VPERMQ $7, (BX), Y11 // c463fd001b07
+ VPERMQ $7, (R11), Y11 // c443fd001b07
+ VPERMQ $7, Y2, Y11 // c463fd00da07
+ VPERMQ $7, Y11, Y11 // c443fd00db07
+ VPEXTRB $7, X2, (BX) // c4e379141307
+ VPEXTRB $7, X11, (BX) // c46379141b07
+ VPEXTRB $7, X2, (R11) // c4c379141307
+ VPEXTRB $7, X11, (R11) // c44379141b07
+ VPEXTRB $7, X2, DX // c4e37914d207
+ VPEXTRB $7, X11, DX // c4637914da07
+ VPEXTRB $7, X2, R11 // c4c37914d307
+ VPEXTRB $7, X11, R11 // c4437914db07
+ VPEXTRD $7, X2, (BX) // c4e379161307
+ VPEXTRD $7, X11, (BX) // c46379161b07
+ VPEXTRD $7, X2, (R11) // c4c379161307
+ VPEXTRD $7, X11, (R11) // c44379161b07
+ VPEXTRD $7, X2, DX // c4e37916d207
+ VPEXTRD $7, X11, DX // c4637916da07
+ VPEXTRD $7, X2, R11 // c4c37916d307
+ VPEXTRD $7, X11, R11 // c4437916db07
+ VPEXTRQ $7, X2, (BX) // c4e3f9161307
+ VPEXTRQ $7, X11, (BX) // c463f9161b07
+ VPEXTRQ $7, X2, (R11) // c4c3f9161307
+ VPEXTRQ $7, X11, (R11) // c443f9161b07
+ VPEXTRQ $7, X2, DX // c4e3f916d207
+ VPEXTRQ $7, X11, DX // c463f916da07
+ VPEXTRQ $7, X2, R11 // c4c3f916d307
+ VPEXTRQ $7, X11, R11 // c443f916db07
+ VPEXTRW $7, X2, DX // c4e179c5d207 or c5f9c5d207 or c4e37915d207
+ VPEXTRW $7, X11, DX // c4c179c5d307 or c4637915da07
+ VPEXTRW $7, X2, R11 // c46179c5da07 or c579c5da07 or c4c37915d307
+ VPEXTRW $7, X11, R11 // c44179c5db07 or c4437915db07
+ VPEXTRW $7, X2, (BX) // c4e379151307
+ VPEXTRW $7, X11, (BX) // c46379151b07
+ VPEXTRW $7, X2, (R11) // c4c379151307
+ VPEXTRW $7, X11, (R11) // c44379151b07
+ VPHADDD (BX), X9, X2 // c4e2310213
+ VPHADDD (R11), X9, X2 // c4c2310213
+ VPHADDD X2, X9, X2 // c4e23102d2
+ VPHADDD X11, X9, X2 // c4c23102d3
+ VPHADDD (BX), X9, X11 // c46231021b
+ VPHADDD (R11), X9, X11 // c44231021b
+ VPHADDD X2, X9, X11 // c4623102da
+ VPHADDD X11, X9, X11 // c4423102db
+ VPHADDD (BX), Y15, Y2 // c4e2050213
+ VPHADDD (R11), Y15, Y2 // c4c2050213
+ VPHADDD Y2, Y15, Y2 // c4e20502d2
+ VPHADDD Y11, Y15, Y2 // c4c20502d3
+ VPHADDD (BX), Y15, Y11 // c46205021b
+ VPHADDD (R11), Y15, Y11 // c44205021b
+ VPHADDD Y2, Y15, Y11 // c4620502da
+ VPHADDD Y11, Y15, Y11 // c4420502db
+ VPHADDSW (BX), X9, X2 // c4e2310313
+ VPHADDSW (R11), X9, X2 // c4c2310313
+ VPHADDSW X2, X9, X2 // c4e23103d2
+ VPHADDSW X11, X9, X2 // c4c23103d3
+ VPHADDSW (BX), X9, X11 // c46231031b
+ VPHADDSW (R11), X9, X11 // c44231031b
+ VPHADDSW X2, X9, X11 // c4623103da
+ VPHADDSW X11, X9, X11 // c4423103db
+ VPHADDSW (BX), Y15, Y2 // c4e2050313
+ VPHADDSW (R11), Y15, Y2 // c4c2050313
+ VPHADDSW Y2, Y15, Y2 // c4e20503d2
+ VPHADDSW Y11, Y15, Y2 // c4c20503d3
+ VPHADDSW (BX), Y15, Y11 // c46205031b
+ VPHADDSW (R11), Y15, Y11 // c44205031b
+ VPHADDSW Y2, Y15, Y11 // c4620503da
+ VPHADDSW Y11, Y15, Y11 // c4420503db
+ VPHADDW (BX), X9, X2 // c4e2310113
+ VPHADDW (R11), X9, X2 // c4c2310113
+ VPHADDW X2, X9, X2 // c4e23101d2
+ VPHADDW X11, X9, X2 // c4c23101d3
+ VPHADDW (BX), X9, X11 // c46231011b
+ VPHADDW (R11), X9, X11 // c44231011b
+ VPHADDW X2, X9, X11 // c4623101da
+ VPHADDW X11, X9, X11 // c4423101db
+ VPHADDW (BX), Y15, Y2 // c4e2050113
+ VPHADDW (R11), Y15, Y2 // c4c2050113
+ VPHADDW Y2, Y15, Y2 // c4e20501d2
+ VPHADDW Y11, Y15, Y2 // c4c20501d3
+ VPHADDW (BX), Y15, Y11 // c46205011b
+ VPHADDW (R11), Y15, Y11 // c44205011b
+ VPHADDW Y2, Y15, Y11 // c4620501da
+ VPHADDW Y11, Y15, Y11 // c4420501db
+ VPHMINPOSUW (BX), X2 // c4e2794113
+ VPHMINPOSUW (R11), X2 // c4c2794113
+ VPHMINPOSUW X2, X2 // c4e27941d2
+ VPHMINPOSUW X11, X2 // c4c27941d3
+ VPHMINPOSUW (BX), X11 // c46279411b
+ VPHMINPOSUW (R11), X11 // c44279411b
+ VPHMINPOSUW X2, X11 // c4627941da
+ VPHMINPOSUW X11, X11 // c4427941db
+ VPHSUBD (BX), X9, X2 // c4e2310613
+ VPHSUBD (R11), X9, X2 // c4c2310613
+ VPHSUBD X2, X9, X2 // c4e23106d2
+ VPHSUBD X11, X9, X2 // c4c23106d3
+ VPHSUBD (BX), X9, X11 // c46231061b
+ VPHSUBD (R11), X9, X11 // c44231061b
+ VPHSUBD X2, X9, X11 // c4623106da
+ VPHSUBD X11, X9, X11 // c4423106db
+ VPHSUBD (BX), Y15, Y2 // c4e2050613
+ VPHSUBD (R11), Y15, Y2 // c4c2050613
+ VPHSUBD Y2, Y15, Y2 // c4e20506d2
+ VPHSUBD Y11, Y15, Y2 // c4c20506d3
+ VPHSUBD (BX), Y15, Y11 // c46205061b
+ VPHSUBD (R11), Y15, Y11 // c44205061b
+ VPHSUBD Y2, Y15, Y11 // c4620506da
+ VPHSUBD Y11, Y15, Y11 // c4420506db
+ VPHSUBSW (BX), X9, X2 // c4e2310713
+ VPHSUBSW (R11), X9, X2 // c4c2310713
+ VPHSUBSW X2, X9, X2 // c4e23107d2
+ VPHSUBSW X11, X9, X2 // c4c23107d3
+ VPHSUBSW (BX), X9, X11 // c46231071b
+ VPHSUBSW (R11), X9, X11 // c44231071b
+ VPHSUBSW X2, X9, X11 // c4623107da
+ VPHSUBSW X11, X9, X11 // c4423107db
+ VPHSUBSW (BX), Y15, Y2 // c4e2050713
+ VPHSUBSW (R11), Y15, Y2 // c4c2050713
+ VPHSUBSW Y2, Y15, Y2 // c4e20507d2
+ VPHSUBSW Y11, Y15, Y2 // c4c20507d3
+ VPHSUBSW (BX), Y15, Y11 // c46205071b
+ VPHSUBSW (R11), Y15, Y11 // c44205071b
+ VPHSUBSW Y2, Y15, Y11 // c4620507da
+ VPHSUBSW Y11, Y15, Y11 // c4420507db
+ VPHSUBW (BX), X9, X2 // c4e2310513
+ VPHSUBW (R11), X9, X2 // c4c2310513
+ VPHSUBW X2, X9, X2 // c4e23105d2
+ VPHSUBW X11, X9, X2 // c4c23105d3
+ VPHSUBW (BX), X9, X11 // c46231051b
+ VPHSUBW (R11), X9, X11 // c44231051b
+ VPHSUBW X2, X9, X11 // c4623105da
+ VPHSUBW X11, X9, X11 // c4423105db
+ VPHSUBW (BX), Y15, Y2 // c4e2050513
+ VPHSUBW (R11), Y15, Y2 // c4c2050513
+ VPHSUBW Y2, Y15, Y2 // c4e20505d2
+ VPHSUBW Y11, Y15, Y2 // c4c20505d3
+ VPHSUBW (BX), Y15, Y11 // c46205051b
+ VPHSUBW (R11), Y15, Y11 // c44205051b
+ VPHSUBW Y2, Y15, Y11 // c4620505da
+ VPHSUBW Y11, Y15, Y11 // c4420505db
+ VPINSRB $7, (BX), X9, X2 // c4e331201307
+ VPINSRB $7, (R11), X9, X2 // c4c331201307
+ VPINSRB $7, DX, X9, X2 // c4e33120d207
+ VPINSRB $7, R11, X9, X2 // c4c33120d307
+ VPINSRB $7, (BX), X9, X11 // c46331201b07
+ VPINSRB $7, (R11), X9, X11 // c44331201b07
+ VPINSRB $7, DX, X9, X11 // c4633120da07
+ VPINSRB $7, R11, X9, X11 // c4433120db07
+ VPINSRD $7, (BX), X9, X2 // c4e331221307
+ VPINSRD $7, (R11), X9, X2 // c4c331221307
+ VPINSRD $7, DX, X9, X2 // c4e33122d207
+ VPINSRD $7, R11, X9, X2 // c4c33122d307
+ VPINSRD $7, (BX), X9, X11 // c46331221b07
+ VPINSRD $7, (R11), X9, X11 // c44331221b07
+ VPINSRD $7, DX, X9, X11 // c4633122da07
+ VPINSRD $7, R11, X9, X11 // c4433122db07
+ VPINSRQ $7, (BX), X9, X2 // c4e3b1221307
+ VPINSRQ $7, (R11), X9, X2 // c4c3b1221307
+ VPINSRQ $7, DX, X9, X2 // c4e3b122d207
+ VPINSRQ $7, R11, X9, X2 // c4c3b122d307
+ VPINSRQ $7, (BX), X9, X11 // c463b1221b07
+ VPINSRQ $7, (R11), X9, X11 // c443b1221b07
+ VPINSRQ $7, DX, X9, X11 // c463b122da07
+ VPINSRQ $7, R11, X9, X11 // c443b122db07
+ VPINSRW $7, (BX), X9, X2 // c4e131c41307 or c5b1c41307
+ VPINSRW $7, (R11), X9, X2 // c4c131c41307
+ VPINSRW $7, DX, X9, X2 // c4e131c4d207 or c5b1c4d207
+ VPINSRW $7, R11, X9, X2 // c4c131c4d307
+ VPINSRW $7, (BX), X9, X11 // c46131c41b07 or c531c41b07
+ VPINSRW $7, (R11), X9, X11 // c44131c41b07
+ VPINSRW $7, DX, X9, X11 // c46131c4da07 or c531c4da07
+ VPINSRW $7, R11, X9, X11 // c44131c4db07
+ VPMADDUBSW (BX), X9, X2 // c4e2310413
+ VPMADDUBSW (R11), X9, X2 // c4c2310413
+ VPMADDUBSW X2, X9, X2 // c4e23104d2
+ VPMADDUBSW X11, X9, X2 // c4c23104d3
+ VPMADDUBSW (BX), X9, X11 // c46231041b
+ VPMADDUBSW (R11), X9, X11 // c44231041b
+ VPMADDUBSW X2, X9, X11 // c4623104da
+ VPMADDUBSW X11, X9, X11 // c4423104db
+ VPMADDUBSW (BX), Y15, Y2 // c4e2050413
+ VPMADDUBSW (R11), Y15, Y2 // c4c2050413
+ VPMADDUBSW Y2, Y15, Y2 // c4e20504d2
+ VPMADDUBSW Y11, Y15, Y2 // c4c20504d3
+ VPMADDUBSW (BX), Y15, Y11 // c46205041b
+ VPMADDUBSW (R11), Y15, Y11 // c44205041b
+ VPMADDUBSW Y2, Y15, Y11 // c4620504da
+ VPMADDUBSW Y11, Y15, Y11 // c4420504db
+ VPMADDWD (BX), X9, X2 // c4e131f513 or c5b1f513
+ VPMADDWD (R11), X9, X2 // c4c131f513
+ VPMADDWD X2, X9, X2 // c4e131f5d2 or c5b1f5d2
+ VPMADDWD X11, X9, X2 // c4c131f5d3
+ VPMADDWD (BX), X9, X11 // c46131f51b or c531f51b
+ VPMADDWD (R11), X9, X11 // c44131f51b
+ VPMADDWD X2, X9, X11 // c46131f5da or c531f5da
+ VPMADDWD X11, X9, X11 // c44131f5db
+ VPMADDWD (BX), Y15, Y2 // c4e105f513 or c585f513
+ VPMADDWD (R11), Y15, Y2 // c4c105f513
+ VPMADDWD Y2, Y15, Y2 // c4e105f5d2 or c585f5d2
+ VPMADDWD Y11, Y15, Y2 // c4c105f5d3
+ VPMADDWD (BX), Y15, Y11 // c46105f51b or c505f51b
+ VPMADDWD (R11), Y15, Y11 // c44105f51b
+ VPMADDWD Y2, Y15, Y11 // c46105f5da or c505f5da
+ VPMADDWD Y11, Y15, Y11 // c44105f5db
+ VPMASKMOVD X2, X9, (BX) // c4e2318e13
+ VPMASKMOVD X11, X9, (BX) // c462318e1b
+ VPMASKMOVD X2, X9, (R11) // c4c2318e13
+ VPMASKMOVD X11, X9, (R11) // c442318e1b
+ VPMASKMOVD Y2, Y15, (BX) // c4e2058e13
+ VPMASKMOVD Y11, Y15, (BX) // c462058e1b
+ VPMASKMOVD Y2, Y15, (R11) // c4c2058e13
+ VPMASKMOVD Y11, Y15, (R11) // c442058e1b
+ VPMASKMOVD (BX), X9, X2 // c4e2318c13
+ VPMASKMOVD (R11), X9, X2 // c4c2318c13
+ VPMASKMOVD (BX), X9, X11 // c462318c1b
+ VPMASKMOVD (R11), X9, X11 // c442318c1b
+ VPMASKMOVD (BX), Y15, Y2 // c4e2058c13
+ VPMASKMOVD (R11), Y15, Y2 // c4c2058c13
+ VPMASKMOVD (BX), Y15, Y11 // c462058c1b
+ VPMASKMOVD (R11), Y15, Y11 // c442058c1b
+ VPMASKMOVQ X2, X9, (BX) // c4e2b18e13
+ VPMASKMOVQ X11, X9, (BX) // c462b18e1b
+ VPMASKMOVQ X2, X9, (R11) // c4c2b18e13
+ VPMASKMOVQ X11, X9, (R11) // c442b18e1b
+ VPMASKMOVQ Y2, Y15, (BX) // c4e2858e13
+ VPMASKMOVQ Y11, Y15, (BX) // c462858e1b
+ VPMASKMOVQ Y2, Y15, (R11) // c4c2858e13
+ VPMASKMOVQ Y11, Y15, (R11) // c442858e1b
+ VPMASKMOVQ (BX), X9, X2 // c4e2b18c13
+ VPMASKMOVQ (R11), X9, X2 // c4c2b18c13
+ VPMASKMOVQ (BX), X9, X11 // c462b18c1b
+ VPMASKMOVQ (R11), X9, X11 // c442b18c1b
+ VPMASKMOVQ (BX), Y15, Y2 // c4e2858c13
+ VPMASKMOVQ (R11), Y15, Y2 // c4c2858c13
+ VPMASKMOVQ (BX), Y15, Y11 // c462858c1b
+ VPMASKMOVQ (R11), Y15, Y11 // c442858c1b
+ VPMAXSB (BX), X9, X2 // c4e2313c13
+ VPMAXSB (R11), X9, X2 // c4c2313c13
+ VPMAXSB X2, X9, X2 // c4e2313cd2
+ VPMAXSB X11, X9, X2 // c4c2313cd3
+ VPMAXSB (BX), X9, X11 // c462313c1b
+ VPMAXSB (R11), X9, X11 // c442313c1b
+ VPMAXSB X2, X9, X11 // c462313cda
+ VPMAXSB X11, X9, X11 // c442313cdb
+ VPMAXSB (BX), Y15, Y2 // c4e2053c13
+ VPMAXSB (R11), Y15, Y2 // c4c2053c13
+ VPMAXSB Y2, Y15, Y2 // c4e2053cd2
+ VPMAXSB Y11, Y15, Y2 // c4c2053cd3
+ VPMAXSB (BX), Y15, Y11 // c462053c1b
+ VPMAXSB (R11), Y15, Y11 // c442053c1b
+ VPMAXSB Y2, Y15, Y11 // c462053cda
+ VPMAXSB Y11, Y15, Y11 // c442053cdb
+ VPMAXSD (BX), X9, X2 // c4e2313d13
+ VPMAXSD (R11), X9, X2 // c4c2313d13
+ VPMAXSD X2, X9, X2 // c4e2313dd2
+ VPMAXSD X11, X9, X2 // c4c2313dd3
+ VPMAXSD (BX), X9, X11 // c462313d1b
+ VPMAXSD (R11), X9, X11 // c442313d1b
+ VPMAXSD X2, X9, X11 // c462313dda
+ VPMAXSD X11, X9, X11 // c442313ddb
+ VPMAXSD (BX), Y15, Y2 // c4e2053d13
+ VPMAXSD (R11), Y15, Y2 // c4c2053d13
+ VPMAXSD Y2, Y15, Y2 // c4e2053dd2
+ VPMAXSD Y11, Y15, Y2 // c4c2053dd3
+ VPMAXSD (BX), Y15, Y11 // c462053d1b
+ VPMAXSD (R11), Y15, Y11 // c442053d1b
+ VPMAXSD Y2, Y15, Y11 // c462053dda
+ VPMAXSD Y11, Y15, Y11 // c442053ddb
+ VPMAXSW (BX), X9, X2 // c4e131ee13 or c5b1ee13
+ VPMAXSW (R11), X9, X2 // c4c131ee13
+ VPMAXSW X2, X9, X2 // c4e131eed2 or c5b1eed2
+ VPMAXSW X11, X9, X2 // c4c131eed3
+ VPMAXSW (BX), X9, X11 // c46131ee1b or c531ee1b
+ VPMAXSW (R11), X9, X11 // c44131ee1b
+ VPMAXSW X2, X9, X11 // c46131eeda or c531eeda
+ VPMAXSW X11, X9, X11 // c44131eedb
+ VPMAXSW (BX), Y15, Y2 // c4e105ee13 or c585ee13
+ VPMAXSW (R11), Y15, Y2 // c4c105ee13
+ VPMAXSW Y2, Y15, Y2 // c4e105eed2 or c585eed2
+ VPMAXSW Y11, Y15, Y2 // c4c105eed3
+ VPMAXSW (BX), Y15, Y11 // c46105ee1b or c505ee1b
+ VPMAXSW (R11), Y15, Y11 // c44105ee1b
+ VPMAXSW Y2, Y15, Y11 // c46105eeda or c505eeda
+ VPMAXSW Y11, Y15, Y11 // c44105eedb
+ VPMAXUB (BX), X9, X2 // c4e131de13 or c5b1de13
+ VPMAXUB (R11), X9, X2 // c4c131de13
+ VPMAXUB X2, X9, X2 // c4e131ded2 or c5b1ded2
+ VPMAXUB X11, X9, X2 // c4c131ded3
+ VPMAXUB (BX), X9, X11 // c46131de1b or c531de1b
+ VPMAXUB (R11), X9, X11 // c44131de1b
+ VPMAXUB X2, X9, X11 // c46131deda or c531deda
+ VPMAXUB X11, X9, X11 // c44131dedb
+ VPMAXUB (BX), Y15, Y2 // c4e105de13 or c585de13
+ VPMAXUB (R11), Y15, Y2 // c4c105de13
+ VPMAXUB Y2, Y15, Y2 // c4e105ded2 or c585ded2
+ VPMAXUB Y11, Y15, Y2 // c4c105ded3
+ VPMAXUB (BX), Y15, Y11 // c46105de1b or c505de1b
+ VPMAXUB (R11), Y15, Y11 // c44105de1b
+ VPMAXUB Y2, Y15, Y11 // c46105deda or c505deda
+ VPMAXUB Y11, Y15, Y11 // c44105dedb
+ VPMAXUD (BX), X9, X2 // c4e2313f13
+ VPMAXUD (R11), X9, X2 // c4c2313f13
+ VPMAXUD X2, X9, X2 // c4e2313fd2
+ VPMAXUD X11, X9, X2 // c4c2313fd3
+ VPMAXUD (BX), X9, X11 // c462313f1b
+ VPMAXUD (R11), X9, X11 // c442313f1b
+ VPMAXUD X2, X9, X11 // c462313fda
+ VPMAXUD X11, X9, X11 // c442313fdb
+ VPMAXUD (BX), Y15, Y2 // c4e2053f13
+ VPMAXUD (R11), Y15, Y2 // c4c2053f13
+ VPMAXUD Y2, Y15, Y2 // c4e2053fd2
+ VPMAXUD Y11, Y15, Y2 // c4c2053fd3
+ VPMAXUD (BX), Y15, Y11 // c462053f1b
+ VPMAXUD (R11), Y15, Y11 // c442053f1b
+ VPMAXUD Y2, Y15, Y11 // c462053fda
+ VPMAXUD Y11, Y15, Y11 // c442053fdb
+ VPMAXUW (BX), X9, X2 // c4e2313e13
+ VPMAXUW (R11), X9, X2 // c4c2313e13
+ VPMAXUW X2, X9, X2 // c4e2313ed2
+ VPMAXUW X11, X9, X2 // c4c2313ed3
+ VPMAXUW (BX), X9, X11 // c462313e1b
+ VPMAXUW (R11), X9, X11 // c442313e1b
+ VPMAXUW X2, X9, X11 // c462313eda
+ VPMAXUW X11, X9, X11 // c442313edb
+ VPMAXUW (BX), Y15, Y2 // c4e2053e13
+ VPMAXUW (R11), Y15, Y2 // c4c2053e13
+ VPMAXUW Y2, Y15, Y2 // c4e2053ed2
+ VPMAXUW Y11, Y15, Y2 // c4c2053ed3
+ VPMAXUW (BX), Y15, Y11 // c462053e1b
+ VPMAXUW (R11), Y15, Y11 // c442053e1b
+ VPMAXUW Y2, Y15, Y11 // c462053eda
+ VPMAXUW Y11, Y15, Y11 // c442053edb
+ VPMINSB (BX), X9, X2 // c4e2313813
+ VPMINSB (R11), X9, X2 // c4c2313813
+ VPMINSB X2, X9, X2 // c4e23138d2
+ VPMINSB X11, X9, X2 // c4c23138d3
+ VPMINSB (BX), X9, X11 // c46231381b
+ VPMINSB (R11), X9, X11 // c44231381b
+ VPMINSB X2, X9, X11 // c4623138da
+ VPMINSB X11, X9, X11 // c4423138db
+ VPMINSB (BX), Y15, Y2 // c4e2053813
+ VPMINSB (R11), Y15, Y2 // c4c2053813
+ VPMINSB Y2, Y15, Y2 // c4e20538d2
+ VPMINSB Y11, Y15, Y2 // c4c20538d3
+ VPMINSB (BX), Y15, Y11 // c46205381b
+ VPMINSB (R11), Y15, Y11 // c44205381b
+ VPMINSB Y2, Y15, Y11 // c4620538da
+ VPMINSB Y11, Y15, Y11 // c4420538db
+ VPMINSD (BX), X9, X2 // c4e2313913
+ VPMINSD (R11), X9, X2 // c4c2313913
+ VPMINSD X2, X9, X2 // c4e23139d2
+ VPMINSD X11, X9, X2 // c4c23139d3
+ VPMINSD (BX), X9, X11 // c46231391b
+ VPMINSD (R11), X9, X11 // c44231391b
+ VPMINSD X2, X9, X11 // c4623139da
+ VPMINSD X11, X9, X11 // c4423139db
+ VPMINSD (BX), Y15, Y2 // c4e2053913
+ VPMINSD (R11), Y15, Y2 // c4c2053913
+ VPMINSD Y2, Y15, Y2 // c4e20539d2
+ VPMINSD Y11, Y15, Y2 // c4c20539d3
+ VPMINSD (BX), Y15, Y11 // c46205391b
+ VPMINSD (R11), Y15, Y11 // c44205391b
+ VPMINSD Y2, Y15, Y11 // c4620539da
+ VPMINSD Y11, Y15, Y11 // c4420539db
+ VPMINSW (BX), X9, X2 // c4e131ea13 or c5b1ea13
+ VPMINSW (R11), X9, X2 // c4c131ea13
+ VPMINSW X2, X9, X2 // c4e131ead2 or c5b1ead2
+ VPMINSW X11, X9, X2 // c4c131ead3
+ VPMINSW (BX), X9, X11 // c46131ea1b or c531ea1b
+ VPMINSW (R11), X9, X11 // c44131ea1b
+ VPMINSW X2, X9, X11 // c46131eada or c531eada
+ VPMINSW X11, X9, X11 // c44131eadb
+ VPMINSW (BX), Y15, Y2 // c4e105ea13 or c585ea13
+ VPMINSW (R11), Y15, Y2 // c4c105ea13
+ VPMINSW Y2, Y15, Y2 // c4e105ead2 or c585ead2
+ VPMINSW Y11, Y15, Y2 // c4c105ead3
+ VPMINSW (BX), Y15, Y11 // c46105ea1b or c505ea1b
+ VPMINSW (R11), Y15, Y11 // c44105ea1b
+ VPMINSW Y2, Y15, Y11 // c46105eada or c505eada
+ VPMINSW Y11, Y15, Y11 // c44105eadb
+ VPMINUB (BX), X9, X2 // c4e131da13 or c5b1da13
+ VPMINUB (R11), X9, X2 // c4c131da13
+ VPMINUB X2, X9, X2 // c4e131dad2 or c5b1dad2
+ VPMINUB X11, X9, X2 // c4c131dad3
+ VPMINUB (BX), X9, X11 // c46131da1b or c531da1b
+ VPMINUB (R11), X9, X11 // c44131da1b
+ VPMINUB X2, X9, X11 // c46131dada or c531dada
+ VPMINUB X11, X9, X11 // c44131dadb
+ VPMINUB (BX), Y15, Y2 // c4e105da13 or c585da13
+ VPMINUB (R11), Y15, Y2 // c4c105da13
+ VPMINUB Y2, Y15, Y2 // c4e105dad2 or c585dad2
+ VPMINUB Y11, Y15, Y2 // c4c105dad3
+ VPMINUB (BX), Y15, Y11 // c46105da1b or c505da1b
+ VPMINUB (R11), Y15, Y11 // c44105da1b
+ VPMINUB Y2, Y15, Y11 // c46105dada or c505dada
+ VPMINUB Y11, Y15, Y11 // c44105dadb
+ VPMINUD (BX), X9, X2 // c4e2313b13
+ VPMINUD (R11), X9, X2 // c4c2313b13
+ VPMINUD X2, X9, X2 // c4e2313bd2
+ VPMINUD X11, X9, X2 // c4c2313bd3
+ VPMINUD (BX), X9, X11 // c462313b1b
+ VPMINUD (R11), X9, X11 // c442313b1b
+ VPMINUD X2, X9, X11 // c462313bda
+ VPMINUD X11, X9, X11 // c442313bdb
+ VPMINUD (BX), Y15, Y2 // c4e2053b13
+ VPMINUD (R11), Y15, Y2 // c4c2053b13
+ VPMINUD Y2, Y15, Y2 // c4e2053bd2
+ VPMINUD Y11, Y15, Y2 // c4c2053bd3
+ VPMINUD (BX), Y15, Y11 // c462053b1b
+ VPMINUD (R11), Y15, Y11 // c442053b1b
+ VPMINUD Y2, Y15, Y11 // c462053bda
+ VPMINUD Y11, Y15, Y11 // c442053bdb
+ VPMINUW (BX), X9, X2 // c4e2313a13
+ VPMINUW (R11), X9, X2 // c4c2313a13
+ VPMINUW X2, X9, X2 // c4e2313ad2
+ VPMINUW X11, X9, X2 // c4c2313ad3
+ VPMINUW (BX), X9, X11 // c462313a1b
+ VPMINUW (R11), X9, X11 // c442313a1b
+ VPMINUW X2, X9, X11 // c462313ada
+ VPMINUW X11, X9, X11 // c442313adb
+ VPMINUW (BX), Y15, Y2 // c4e2053a13
+ VPMINUW (R11), Y15, Y2 // c4c2053a13
+ VPMINUW Y2, Y15, Y2 // c4e2053ad2
+ VPMINUW Y11, Y15, Y2 // c4c2053ad3
+ VPMINUW (BX), Y15, Y11 // c462053a1b
+ VPMINUW (R11), Y15, Y11 // c442053a1b
+ VPMINUW Y2, Y15, Y11 // c462053ada
+ VPMINUW Y11, Y15, Y11 // c442053adb
+ VPMOVMSKB X2, DX // c4e179d7d2 or c5f9d7d2
+ VPMOVMSKB X11, DX // c4c179d7d3
+ VPMOVMSKB X2, R11 // c46179d7da or c579d7da
+ VPMOVMSKB X11, R11 // c44179d7db
+ VPMOVMSKB Y2, DX // c4e17dd7d2 or c5fdd7d2
+ VPMOVMSKB Y11, DX // c4c17dd7d3
+ VPMOVMSKB Y2, R11 // c4617dd7da or c57dd7da
+ VPMOVMSKB Y11, R11 // c4417dd7db
+ VPMOVSXBD (BX), X2 // c4e2792113
+ VPMOVSXBD (R11), X2 // c4c2792113
+ VPMOVSXBD X2, X2 // c4e27921d2
+ VPMOVSXBD X11, X2 // c4c27921d3
+ VPMOVSXBD (BX), X11 // c46279211b
+ VPMOVSXBD (R11), X11 // c44279211b
+ VPMOVSXBD X2, X11 // c4627921da
+ VPMOVSXBD X11, X11 // c4427921db
+ VPMOVSXBD (BX), Y2 // c4e27d2113
+ VPMOVSXBD (R11), Y2 // c4c27d2113
+ VPMOVSXBD X2, Y2 // c4e27d21d2
+ VPMOVSXBD X11, Y2 // c4c27d21d3
+ VPMOVSXBD (BX), Y11 // c4627d211b
+ VPMOVSXBD (R11), Y11 // c4427d211b
+ VPMOVSXBD X2, Y11 // c4627d21da
+ VPMOVSXBD X11, Y11 // c4427d21db
+ VPMOVSXBQ (BX), X2 // c4e2792213
+ VPMOVSXBQ (R11), X2 // c4c2792213
+ VPMOVSXBQ X2, X2 // c4e27922d2
+ VPMOVSXBQ X11, X2 // c4c27922d3
+ VPMOVSXBQ (BX), X11 // c46279221b
+ VPMOVSXBQ (R11), X11 // c44279221b
+ VPMOVSXBQ X2, X11 // c4627922da
+ VPMOVSXBQ X11, X11 // c4427922db
+ VPMOVSXBQ (BX), Y2 // c4e27d2213
+ VPMOVSXBQ (R11), Y2 // c4c27d2213
+ VPMOVSXBQ X2, Y2 // c4e27d22d2
+ VPMOVSXBQ X11, Y2 // c4c27d22d3
+ VPMOVSXBQ (BX), Y11 // c4627d221b
+ VPMOVSXBQ (R11), Y11 // c4427d221b
+ VPMOVSXBQ X2, Y11 // c4627d22da
+ VPMOVSXBQ X11, Y11 // c4427d22db
+ VPMOVSXBW (BX), X2 // c4e2792013
+ VPMOVSXBW (R11), X2 // c4c2792013
+ VPMOVSXBW X2, X2 // c4e27920d2
+ VPMOVSXBW X11, X2 // c4c27920d3
+ VPMOVSXBW (BX), X11 // c46279201b
+ VPMOVSXBW (R11), X11 // c44279201b
+ VPMOVSXBW X2, X11 // c4627920da
+ VPMOVSXBW X11, X11 // c4427920db
+ VPMOVSXBW (BX), Y2 // c4e27d2013
+ VPMOVSXBW (R11), Y2 // c4c27d2013
+ VPMOVSXBW X2, Y2 // c4e27d20d2
+ VPMOVSXBW X11, Y2 // c4c27d20d3
+ VPMOVSXBW (BX), Y11 // c4627d201b
+ VPMOVSXBW (R11), Y11 // c4427d201b
+ VPMOVSXBW X2, Y11 // c4627d20da
+ VPMOVSXBW X11, Y11 // c4427d20db
+ VPMOVSXDQ (BX), X2 // c4e2792513
+ VPMOVSXDQ (R11), X2 // c4c2792513
+ VPMOVSXDQ X2, X2 // c4e27925d2
+ VPMOVSXDQ X11, X2 // c4c27925d3
+ VPMOVSXDQ (BX), X11 // c46279251b
+ VPMOVSXDQ (R11), X11 // c44279251b
+ VPMOVSXDQ X2, X11 // c4627925da
+ VPMOVSXDQ X11, X11 // c4427925db
+ VPMOVSXDQ (BX), Y2 // c4e27d2513
+ VPMOVSXDQ (R11), Y2 // c4c27d2513
+ VPMOVSXDQ X2, Y2 // c4e27d25d2
+ VPMOVSXDQ X11, Y2 // c4c27d25d3
+ VPMOVSXDQ (BX), Y11 // c4627d251b
+ VPMOVSXDQ (R11), Y11 // c4427d251b
+ VPMOVSXDQ X2, Y11 // c4627d25da
+ VPMOVSXDQ X11, Y11 // c4427d25db
+ VPMOVSXWD (BX), X2 // c4e2792313
+ VPMOVSXWD (R11), X2 // c4c2792313
+ VPMOVSXWD X2, X2 // c4e27923d2
+ VPMOVSXWD X11, X2 // c4c27923d3
+ VPMOVSXWD (BX), X11 // c46279231b
+ VPMOVSXWD (R11), X11 // c44279231b
+ VPMOVSXWD X2, X11 // c4627923da
+ VPMOVSXWD X11, X11 // c4427923db
+ VPMOVSXWD (BX), Y2 // c4e27d2313
+ VPMOVSXWD (R11), Y2 // c4c27d2313
+ VPMOVSXWD X2, Y2 // c4e27d23d2
+ VPMOVSXWD X11, Y2 // c4c27d23d3
+ VPMOVSXWD (BX), Y11 // c4627d231b
+ VPMOVSXWD (R11), Y11 // c4427d231b
+ VPMOVSXWD X2, Y11 // c4627d23da
+ VPMOVSXWD X11, Y11 // c4427d23db
+ VPMOVSXWQ (BX), X2 // c4e2792413
+ VPMOVSXWQ (R11), X2 // c4c2792413
+ VPMOVSXWQ X2, X2 // c4e27924d2
+ VPMOVSXWQ X11, X2 // c4c27924d3
+ VPMOVSXWQ (BX), X11 // c46279241b
+ VPMOVSXWQ (R11), X11 // c44279241b
+ VPMOVSXWQ X2, X11 // c4627924da
+ VPMOVSXWQ X11, X11 // c4427924db
+ VPMOVSXWQ (BX), Y2 // c4e27d2413
+ VPMOVSXWQ (R11), Y2 // c4c27d2413
+ VPMOVSXWQ X2, Y2 // c4e27d24d2
+ VPMOVSXWQ X11, Y2 // c4c27d24d3
+ VPMOVSXWQ (BX), Y11 // c4627d241b
+ VPMOVSXWQ (R11), Y11 // c4427d241b
+ VPMOVSXWQ X2, Y11 // c4627d24da
+ VPMOVSXWQ X11, Y11 // c4427d24db
+ VPMOVZXBD (BX), X2 // c4e2793113
+ VPMOVZXBD (R11), X2 // c4c2793113
+ VPMOVZXBD X2, X2 // c4e27931d2
+ VPMOVZXBD X11, X2 // c4c27931d3
+ VPMOVZXBD (BX), X11 // c46279311b
+ VPMOVZXBD (R11), X11 // c44279311b
+ VPMOVZXBD X2, X11 // c4627931da
+ VPMOVZXBD X11, X11 // c4427931db
+ VPMOVZXBD (BX), Y2 // c4e27d3113
+ VPMOVZXBD (R11), Y2 // c4c27d3113
+ VPMOVZXBD X2, Y2 // c4e27d31d2
+ VPMOVZXBD X11, Y2 // c4c27d31d3
+ VPMOVZXBD (BX), Y11 // c4627d311b
+ VPMOVZXBD (R11), Y11 // c4427d311b
+ VPMOVZXBD X2, Y11 // c4627d31da
+ VPMOVZXBD X11, Y11 // c4427d31db
+ VPMOVZXBQ (BX), X2 // c4e2793213
+ VPMOVZXBQ (R11), X2 // c4c2793213
+ VPMOVZXBQ X2, X2 // c4e27932d2
+ VPMOVZXBQ X11, X2 // c4c27932d3
+ VPMOVZXBQ (BX), X11 // c46279321b
+ VPMOVZXBQ (R11), X11 // c44279321b
+ VPMOVZXBQ X2, X11 // c4627932da
+ VPMOVZXBQ X11, X11 // c4427932db
+ VPMOVZXBQ (BX), Y2 // c4e27d3213
+ VPMOVZXBQ (R11), Y2 // c4c27d3213
+ VPMOVZXBQ X2, Y2 // c4e27d32d2
+ VPMOVZXBQ X11, Y2 // c4c27d32d3
+ VPMOVZXBQ (BX), Y11 // c4627d321b
+ VPMOVZXBQ (R11), Y11 // c4427d321b
+ VPMOVZXBQ X2, Y11 // c4627d32da
+ VPMOVZXBQ X11, Y11 // c4427d32db
+ VPMOVZXBW (BX), X2 // c4e2793013
+ VPMOVZXBW (R11), X2 // c4c2793013
+ VPMOVZXBW X2, X2 // c4e27930d2
+ VPMOVZXBW X11, X2 // c4c27930d3
+ VPMOVZXBW (BX), X11 // c46279301b
+ VPMOVZXBW (R11), X11 // c44279301b
+ VPMOVZXBW X2, X11 // c4627930da
+ VPMOVZXBW X11, X11 // c4427930db
+ VPMOVZXBW (BX), Y2 // c4e27d3013
+ VPMOVZXBW (R11), Y2 // c4c27d3013
+ VPMOVZXBW X2, Y2 // c4e27d30d2
+ VPMOVZXBW X11, Y2 // c4c27d30d3
+ VPMOVZXBW (BX), Y11 // c4627d301b
+ VPMOVZXBW (R11), Y11 // c4427d301b
+ VPMOVZXBW X2, Y11 // c4627d30da
+ VPMOVZXBW X11, Y11 // c4427d30db
+ VPMOVZXDQ (BX), X2 // c4e2793513
+ VPMOVZXDQ (R11), X2 // c4c2793513
+ VPMOVZXDQ X2, X2 // c4e27935d2
+ VPMOVZXDQ X11, X2 // c4c27935d3
+ VPMOVZXDQ (BX), X11 // c46279351b
+ VPMOVZXDQ (R11), X11 // c44279351b
+ VPMOVZXDQ X2, X11 // c4627935da
+ VPMOVZXDQ X11, X11 // c4427935db
+ VPMOVZXDQ (BX), Y2 // c4e27d3513
+ VPMOVZXDQ (R11), Y2 // c4c27d3513
+ VPMOVZXDQ X2, Y2 // c4e27d35d2
+ VPMOVZXDQ X11, Y2 // c4c27d35d3
+ VPMOVZXDQ (BX), Y11 // c4627d351b
+ VPMOVZXDQ (R11), Y11 // c4427d351b
+ VPMOVZXDQ X2, Y11 // c4627d35da
+ VPMOVZXDQ X11, Y11 // c4427d35db
+ VPMOVZXWD (BX), X2 // c4e2793313
+ VPMOVZXWD (R11), X2 // c4c2793313
+ VPMOVZXWD X2, X2 // c4e27933d2
+ VPMOVZXWD X11, X2 // c4c27933d3
+ VPMOVZXWD (BX), X11 // c46279331b
+ VPMOVZXWD (R11), X11 // c44279331b
+ VPMOVZXWD X2, X11 // c4627933da
+ VPMOVZXWD X11, X11 // c4427933db
+ VPMOVZXWD (BX), Y2 // c4e27d3313
+ VPMOVZXWD (R11), Y2 // c4c27d3313
+ VPMOVZXWD X2, Y2 // c4e27d33d2
+ VPMOVZXWD X11, Y2 // c4c27d33d3
+ VPMOVZXWD (BX), Y11 // c4627d331b
+ VPMOVZXWD (R11), Y11 // c4427d331b
+ VPMOVZXWD X2, Y11 // c4627d33da
+ VPMOVZXWD X11, Y11 // c4427d33db
+ VPMOVZXWQ (BX), X2 // c4e2793413
+ VPMOVZXWQ (R11), X2 // c4c2793413
+ VPMOVZXWQ X2, X2 // c4e27934d2
+ VPMOVZXWQ X11, X2 // c4c27934d3
+ VPMOVZXWQ (BX), X11 // c46279341b
+ VPMOVZXWQ (R11), X11 // c44279341b
+ VPMOVZXWQ X2, X11 // c4627934da
+ VPMOVZXWQ X11, X11 // c4427934db
+ VPMOVZXWQ (BX), Y2 // c4e27d3413
+ VPMOVZXWQ (R11), Y2 // c4c27d3413
+ VPMOVZXWQ X2, Y2 // c4e27d34d2
+ VPMOVZXWQ X11, Y2 // c4c27d34d3
+ VPMOVZXWQ (BX), Y11 // c4627d341b
+ VPMOVZXWQ (R11), Y11 // c4427d341b
+ VPMOVZXWQ X2, Y11 // c4627d34da
+ VPMOVZXWQ X11, Y11 // c4427d34db
+ VPMULDQ (BX), X9, X2 // c4e2312813
+ VPMULDQ (R11), X9, X2 // c4c2312813
+ VPMULDQ X2, X9, X2 // c4e23128d2
+ VPMULDQ X11, X9, X2 // c4c23128d3
+ VPMULDQ (BX), X9, X11 // c46231281b
+ VPMULDQ (R11), X9, X11 // c44231281b
+ VPMULDQ X2, X9, X11 // c4623128da
+ VPMULDQ X11, X9, X11 // c4423128db
+ VPMULDQ (BX), Y15, Y2 // c4e2052813
+ VPMULDQ (R11), Y15, Y2 // c4c2052813
+ VPMULDQ Y2, Y15, Y2 // c4e20528d2
+ VPMULDQ Y11, Y15, Y2 // c4c20528d3
+ VPMULDQ (BX), Y15, Y11 // c46205281b
+ VPMULDQ (R11), Y15, Y11 // c44205281b
+ VPMULDQ Y2, Y15, Y11 // c4620528da
+ VPMULDQ Y11, Y15, Y11 // c4420528db
+ VPMULHRSW (BX), X9, X2 // c4e2310b13
+ VPMULHRSW (R11), X9, X2 // c4c2310b13
+ VPMULHRSW X2, X9, X2 // c4e2310bd2
+ VPMULHRSW X11, X9, X2 // c4c2310bd3
+ VPMULHRSW (BX), X9, X11 // c462310b1b
+ VPMULHRSW (R11), X9, X11 // c442310b1b
+ VPMULHRSW X2, X9, X11 // c462310bda
+ VPMULHRSW X11, X9, X11 // c442310bdb
+ VPMULHRSW (BX), Y15, Y2 // c4e2050b13
+ VPMULHRSW (R11), Y15, Y2 // c4c2050b13
+ VPMULHRSW Y2, Y15, Y2 // c4e2050bd2
+ VPMULHRSW Y11, Y15, Y2 // c4c2050bd3
+ VPMULHRSW (BX), Y15, Y11 // c462050b1b
+ VPMULHRSW (R11), Y15, Y11 // c442050b1b
+ VPMULHRSW Y2, Y15, Y11 // c462050bda
+ VPMULHRSW Y11, Y15, Y11 // c442050bdb
+ VPMULHUW (BX), X9, X2 // c4e131e413 or c5b1e413
+ VPMULHUW (R11), X9, X2 // c4c131e413
+ VPMULHUW X2, X9, X2 // c4e131e4d2 or c5b1e4d2
+ VPMULHUW X11, X9, X2 // c4c131e4d3
+ VPMULHUW (BX), X9, X11 // c46131e41b or c531e41b
+ VPMULHUW (R11), X9, X11 // c44131e41b
+ VPMULHUW X2, X9, X11 // c46131e4da or c531e4da
+ VPMULHUW X11, X9, X11 // c44131e4db
+ VPMULHUW (BX), Y15, Y2 // c4e105e413 or c585e413
+ VPMULHUW (R11), Y15, Y2 // c4c105e413
+ VPMULHUW Y2, Y15, Y2 // c4e105e4d2 or c585e4d2
+ VPMULHUW Y11, Y15, Y2 // c4c105e4d3
+ VPMULHUW (BX), Y15, Y11 // c46105e41b or c505e41b
+ VPMULHUW (R11), Y15, Y11 // c44105e41b
+ VPMULHUW Y2, Y15, Y11 // c46105e4da or c505e4da
+ VPMULHUW Y11, Y15, Y11 // c44105e4db
+ VPMULHW (BX), X9, X2 // c4e131e513 or c5b1e513
+ VPMULHW (R11), X9, X2 // c4c131e513
+ VPMULHW X2, X9, X2 // c4e131e5d2 or c5b1e5d2
+ VPMULHW X11, X9, X2 // c4c131e5d3
+ VPMULHW (BX), X9, X11 // c46131e51b or c531e51b
+ VPMULHW (R11), X9, X11 // c44131e51b
+ VPMULHW X2, X9, X11 // c46131e5da or c531e5da
+ VPMULHW X11, X9, X11 // c44131e5db
+ VPMULHW (BX), Y15, Y2 // c4e105e513 or c585e513
+ VPMULHW (R11), Y15, Y2 // c4c105e513
+ VPMULHW Y2, Y15, Y2 // c4e105e5d2 or c585e5d2
+ VPMULHW Y11, Y15, Y2 // c4c105e5d3
+ VPMULHW (BX), Y15, Y11 // c46105e51b or c505e51b
+ VPMULHW (R11), Y15, Y11 // c44105e51b
+ VPMULHW Y2, Y15, Y11 // c46105e5da or c505e5da
+ VPMULHW Y11, Y15, Y11 // c44105e5db
+ VPMULLD (BX), X9, X2 // c4e2314013
+ VPMULLD (R11), X9, X2 // c4c2314013
+ VPMULLD X2, X9, X2 // c4e23140d2
+ VPMULLD X11, X9, X2 // c4c23140d3
+ VPMULLD (BX), X9, X11 // c46231401b
+ VPMULLD (R11), X9, X11 // c44231401b
+ VPMULLD X2, X9, X11 // c4623140da
+ VPMULLD X11, X9, X11 // c4423140db
+ VPMULLD (BX), Y15, Y2 // c4e2054013
+ VPMULLD (R11), Y15, Y2 // c4c2054013
+ VPMULLD Y2, Y15, Y2 // c4e20540d2
+ VPMULLD Y11, Y15, Y2 // c4c20540d3
+ VPMULLD (BX), Y15, Y11 // c46205401b
+ VPMULLD (R11), Y15, Y11 // c44205401b
+ VPMULLD Y2, Y15, Y11 // c4620540da
+ VPMULLD Y11, Y15, Y11 // c4420540db
+ VPMULLW (BX), X9, X2 // c4e131d513 or c5b1d513
+ VPMULLW (R11), X9, X2 // c4c131d513
+ VPMULLW X2, X9, X2 // c4e131d5d2 or c5b1d5d2
+ VPMULLW X11, X9, X2 // c4c131d5d3
+ VPMULLW (BX), X9, X11 // c46131d51b or c531d51b
+ VPMULLW (R11), X9, X11 // c44131d51b
+ VPMULLW X2, X9, X11 // c46131d5da or c531d5da
+ VPMULLW X11, X9, X11 // c44131d5db
+ VPMULLW (BX), Y15, Y2 // c4e105d513 or c585d513
+ VPMULLW (R11), Y15, Y2 // c4c105d513
+ VPMULLW Y2, Y15, Y2 // c4e105d5d2 or c585d5d2
+ VPMULLW Y11, Y15, Y2 // c4c105d5d3
+ VPMULLW (BX), Y15, Y11 // c46105d51b or c505d51b
+ VPMULLW (R11), Y15, Y11 // c44105d51b
+ VPMULLW Y2, Y15, Y11 // c46105d5da or c505d5da
+ VPMULLW Y11, Y15, Y11 // c44105d5db
+ VPMULUDQ (BX), X9, X2 // c4e131f413 or c5b1f413
+ VPMULUDQ (R11), X9, X2 // c4c131f413
+ VPMULUDQ X2, X9, X2 // c4e131f4d2 or c5b1f4d2
+ VPMULUDQ X11, X9, X2 // c4c131f4d3
+ VPMULUDQ (BX), X9, X11 // c46131f41b or c531f41b
+ VPMULUDQ (R11), X9, X11 // c44131f41b
+ VPMULUDQ X2, X9, X11 // c46131f4da or c531f4da
+ VPMULUDQ X11, X9, X11 // c44131f4db
+ VPMULUDQ (BX), Y15, Y2 // c4e105f413 or c585f413
+ VPMULUDQ (R11), Y15, Y2 // c4c105f413
+ VPMULUDQ Y2, Y15, Y2 // c4e105f4d2 or c585f4d2
+ VPMULUDQ Y11, Y15, Y2 // c4c105f4d3
+ VPMULUDQ (BX), Y15, Y11 // c46105f41b or c505f41b
+ VPMULUDQ (R11), Y15, Y11 // c44105f41b
+ VPMULUDQ Y2, Y15, Y11 // c46105f4da or c505f4da
+ VPMULUDQ Y11, Y15, Y11 // c44105f4db
+ VPOR (BX), X9, X2 // c4e131eb13 or c5b1eb13
+ VPOR (R11), X9, X2 // c4c131eb13
+ VPOR X2, X9, X2 // c4e131ebd2 or c5b1ebd2
+ VPOR X11, X9, X2 // c4c131ebd3
+ VPOR (BX), X9, X11 // c46131eb1b or c531eb1b
+ VPOR (R11), X9, X11 // c44131eb1b
+ VPOR X2, X9, X11 // c46131ebda or c531ebda
+ VPOR X11, X9, X11 // c44131ebdb
+ VPOR (BX), Y15, Y2 // c4e105eb13 or c585eb13
+ VPOR (R11), Y15, Y2 // c4c105eb13
+ VPOR Y2, Y15, Y2 // c4e105ebd2 or c585ebd2
+ VPOR Y11, Y15, Y2 // c4c105ebd3
+ VPOR (BX), Y15, Y11 // c46105eb1b or c505eb1b
+ VPOR (R11), Y15, Y11 // c44105eb1b
+ VPOR Y2, Y15, Y11 // c46105ebda or c505ebda
+ VPOR Y11, Y15, Y11 // c44105ebdb
+ VPSADBW (BX), X9, X2 // c4e131f613 or c5b1f613
+ VPSADBW (R11), X9, X2 // c4c131f613
+ VPSADBW X2, X9, X2 // c4e131f6d2 or c5b1f6d2
+ VPSADBW X11, X9, X2 // c4c131f6d3
+ VPSADBW (BX), X9, X11 // c46131f61b or c531f61b
+ VPSADBW (R11), X9, X11 // c44131f61b
+ VPSADBW X2, X9, X11 // c46131f6da or c531f6da
+ VPSADBW X11, X9, X11 // c44131f6db
+ VPSADBW (BX), Y15, Y2 // c4e105f613 or c585f613
+ VPSADBW (R11), Y15, Y2 // c4c105f613
+ VPSADBW Y2, Y15, Y2 // c4e105f6d2 or c585f6d2
+ VPSADBW Y11, Y15, Y2 // c4c105f6d3
+ VPSADBW (BX), Y15, Y11 // c46105f61b or c505f61b
+ VPSADBW (R11), Y15, Y11 // c44105f61b
+ VPSADBW Y2, Y15, Y11 // c46105f6da or c505f6da
+ VPSADBW Y11, Y15, Y11 // c44105f6db
+ VPSHUFB (BX), X9, X2 // c4e2310013
+ VPSHUFB (R11), X9, X2 // c4c2310013
+ VPSHUFB X2, X9, X2 // c4e23100d2
+ VPSHUFB X11, X9, X2 // c4c23100d3
+ VPSHUFB (BX), X9, X11 // c46231001b
+ VPSHUFB (R11), X9, X11 // c44231001b
+ VPSHUFB X2, X9, X11 // c4623100da
+ VPSHUFB X11, X9, X11 // c4423100db
+ VPSHUFB (BX), Y15, Y2 // c4e2050013
+ VPSHUFB (R11), Y15, Y2 // c4c2050013
+ VPSHUFB Y2, Y15, Y2 // c4e20500d2
+ VPSHUFB Y11, Y15, Y2 // c4c20500d3
+ VPSHUFB (BX), Y15, Y11 // c46205001b
+ VPSHUFB (R11), Y15, Y11 // c44205001b
+ VPSHUFB Y2, Y15, Y11 // c4620500da
+ VPSHUFB Y11, Y15, Y11 // c4420500db
+ VPSHUFD $7, (BX), X2 // c4e179701307 or c5f9701307
+ VPSHUFD $7, (R11), X2 // c4c179701307
+ VPSHUFD $7, X2, X2 // c4e17970d207 or c5f970d207
+ VPSHUFD $7, X11, X2 // c4c17970d307
+ VPSHUFD $7, (BX), X11 // c46179701b07 or c579701b07
+ VPSHUFD $7, (R11), X11 // c44179701b07
+ VPSHUFD $7, X2, X11 // c4617970da07 or c57970da07
+ VPSHUFD $7, X11, X11 // c4417970db07
+ VPSHUFD $7, (BX), Y2 // c4e17d701307 or c5fd701307
+ VPSHUFD $7, (R11), Y2 // c4c17d701307
+ VPSHUFD $7, Y2, Y2 // c4e17d70d207 or c5fd70d207
+ VPSHUFD $7, Y11, Y2 // c4c17d70d307
+ VPSHUFD $7, (BX), Y11 // c4617d701b07 or c57d701b07
+ VPSHUFD $7, (R11), Y11 // c4417d701b07
+ VPSHUFD $7, Y2, Y11 // c4617d70da07 or c57d70da07
+ VPSHUFD $7, Y11, Y11 // c4417d70db07
+ VPSHUFHW $7, (BX), X2 // c4e17a701307 or c5fa701307
+ VPSHUFHW $7, (R11), X2 // c4c17a701307
+ VPSHUFHW $7, X2, X2 // c4e17a70d207 or c5fa70d207
+ VPSHUFHW $7, X11, X2 // c4c17a70d307
+ VPSHUFHW $7, (BX), X11 // c4617a701b07 or c57a701b07
+ VPSHUFHW $7, (R11), X11 // c4417a701b07
+ VPSHUFHW $7, X2, X11 // c4617a70da07 or c57a70da07
+ VPSHUFHW $7, X11, X11 // c4417a70db07
+ VPSHUFHW $7, (BX), Y2 // c4e17e701307 or c5fe701307
+ VPSHUFHW $7, (R11), Y2 // c4c17e701307
+ VPSHUFHW $7, Y2, Y2 // c4e17e70d207 or c5fe70d207
+ VPSHUFHW $7, Y11, Y2 // c4c17e70d307
+ VPSHUFHW $7, (BX), Y11 // c4617e701b07 or c57e701b07
+ VPSHUFHW $7, (R11), Y11 // c4417e701b07
+ VPSHUFHW $7, Y2, Y11 // c4617e70da07 or c57e70da07
+ VPSHUFHW $7, Y11, Y11 // c4417e70db07
+ VPSHUFLW $7, (BX), X2 // c4e17b701307 or c5fb701307
+ VPSHUFLW $7, (R11), X2 // c4c17b701307
+ VPSHUFLW $7, X2, X2 // c4e17b70d207 or c5fb70d207
+ VPSHUFLW $7, X11, X2 // c4c17b70d307
+ VPSHUFLW $7, (BX), X11 // c4617b701b07 or c57b701b07
+ VPSHUFLW $7, (R11), X11 // c4417b701b07
+ VPSHUFLW $7, X2, X11 // c4617b70da07 or c57b70da07
+ VPSHUFLW $7, X11, X11 // c4417b70db07
+ VPSHUFLW $7, (BX), Y2 // c4e17f701307 or c5ff701307
+ VPSHUFLW $7, (R11), Y2 // c4c17f701307
+ VPSHUFLW $7, Y2, Y2 // c4e17f70d207 or c5ff70d207
+ VPSHUFLW $7, Y11, Y2 // c4c17f70d307
+ VPSHUFLW $7, (BX), Y11 // c4617f701b07 or c57f701b07
+ VPSHUFLW $7, (R11), Y11 // c4417f701b07
+ VPSHUFLW $7, Y2, Y11 // c4617f70da07 or c57f70da07
+ VPSHUFLW $7, Y11, Y11 // c4417f70db07
+ VPSIGNB (BX), X9, X2 // c4e2310813
+ VPSIGNB (R11), X9, X2 // c4c2310813
+ VPSIGNB X2, X9, X2 // c4e23108d2
+ VPSIGNB X11, X9, X2 // c4c23108d3
+ VPSIGNB (BX), X9, X11 // c46231081b
+ VPSIGNB (R11), X9, X11 // c44231081b
+ VPSIGNB X2, X9, X11 // c4623108da
+ VPSIGNB X11, X9, X11 // c4423108db
+ VPSIGNB (BX), Y15, Y2 // c4e2050813
+ VPSIGNB (R11), Y15, Y2 // c4c2050813
+ VPSIGNB Y2, Y15, Y2 // c4e20508d2
+ VPSIGNB Y11, Y15, Y2 // c4c20508d3
+ VPSIGNB (BX), Y15, Y11 // c46205081b
+ VPSIGNB (R11), Y15, Y11 // c44205081b
+ VPSIGNB Y2, Y15, Y11 // c4620508da
+ VPSIGNB Y11, Y15, Y11 // c4420508db
+ VPSIGND (BX), X9, X2 // c4e2310a13
+ VPSIGND (R11), X9, X2 // c4c2310a13
+ VPSIGND X2, X9, X2 // c4e2310ad2
+ VPSIGND X11, X9, X2 // c4c2310ad3
+ VPSIGND (BX), X9, X11 // c462310a1b
+ VPSIGND (R11), X9, X11 // c442310a1b
+ VPSIGND X2, X9, X11 // c462310ada
+ VPSIGND X11, X9, X11 // c442310adb
+ VPSIGND (BX), Y15, Y2 // c4e2050a13
+ VPSIGND (R11), Y15, Y2 // c4c2050a13
+ VPSIGND Y2, Y15, Y2 // c4e2050ad2
+ VPSIGND Y11, Y15, Y2 // c4c2050ad3
+ VPSIGND (BX), Y15, Y11 // c462050a1b
+ VPSIGND (R11), Y15, Y11 // c442050a1b
+ VPSIGND Y2, Y15, Y11 // c462050ada
+ VPSIGND Y11, Y15, Y11 // c442050adb
+ VPSIGNW (BX), X9, X2 // c4e2310913
+ VPSIGNW (R11), X9, X2 // c4c2310913
+ VPSIGNW X2, X9, X2 // c4e23109d2
+ VPSIGNW X11, X9, X2 // c4c23109d3
+ VPSIGNW (BX), X9, X11 // c46231091b
+ VPSIGNW (R11), X9, X11 // c44231091b
+ VPSIGNW X2, X9, X11 // c4623109da
+ VPSIGNW X11, X9, X11 // c4423109db
+ VPSIGNW (BX), Y15, Y2 // c4e2050913
+ VPSIGNW (R11), Y15, Y2 // c4c2050913
+ VPSIGNW Y2, Y15, Y2 // c4e20509d2
+ VPSIGNW Y11, Y15, Y2 // c4c20509d3
+ VPSIGNW (BX), Y15, Y11 // c46205091b
+ VPSIGNW (R11), Y15, Y11 // c44205091b
+ VPSIGNW Y2, Y15, Y11 // c4620509da
+ VPSIGNW Y11, Y15, Y11 // c4420509db
+ VPSLLD (BX), X9, X2 // c4e131f213 or c5b1f213
+ VPSLLD (R11), X9, X2 // c4c131f213
+ VPSLLD X2, X9, X2 // c4e131f2d2 or c5b1f2d2
+ VPSLLD X11, X9, X2 // c4c131f2d3
+ VPSLLD (BX), X9, X11 // c46131f21b or c531f21b
+ VPSLLD (R11), X9, X11 // c44131f21b
+ VPSLLD X2, X9, X11 // c46131f2da or c531f2da
+ VPSLLD X11, X9, X11 // c44131f2db
+ VPSLLD $7, X2, X9 // c4e13172f207 or c5b172f207
+ VPSLLD $7, X11, X9 // c4c13172f307
+ VPSLLDQ $7, X2, X9 // c4e13173fa07 or c5b173fa07
+ VPSLLDQ $7, X11, X9 // c4c13173fb07
+ VPSLLDQ $7, Y2, Y15 // c4e10573fa07 or c58573fa07
+ VPSLLDQ $7, Y11, Y15 // c4c10573fb07
+ VPSLLQ (BX), X9, X2 // c4e131f313 or c5b1f313
+ VPSLLQ (R11), X9, X2 // c4c131f313
+ VPSLLQ X2, X9, X2 // c4e131f3d2 or c5b1f3d2
+ VPSLLQ X11, X9, X2 // c4c131f3d3
+ VPSLLQ (BX), X9, X11 // c46131f31b or c531f31b
+ VPSLLQ (R11), X9, X11 // c44131f31b
+ VPSLLQ X2, X9, X11 // c46131f3da or c531f3da
+ VPSLLQ X11, X9, X11 // c44131f3db
+ VPSLLQ $7, X2, X9 // c4e13173f207 or c5b173f207
+ VPSLLQ $7, X11, X9 // c4c13173f307
+ VPSLLVD (BX), X9, X2 // c4e2314713
+ VPSLLVD (R11), X9, X2 // c4c2314713
+ VPSLLVD X2, X9, X2 // c4e23147d2
+ VPSLLVD X11, X9, X2 // c4c23147d3
+ VPSLLVD (BX), X9, X11 // c46231471b
+ VPSLLVD (R11), X9, X11 // c44231471b
+ VPSLLVD X2, X9, X11 // c4623147da
+ VPSLLVD X11, X9, X11 // c4423147db
+ VPSLLVD (BX), Y15, Y2 // c4e2054713
+ VPSLLVD (R11), Y15, Y2 // c4c2054713
+ VPSLLVD Y2, Y15, Y2 // c4e20547d2
+ VPSLLVD Y11, Y15, Y2 // c4c20547d3
+ VPSLLVD (BX), Y15, Y11 // c46205471b
+ VPSLLVD (R11), Y15, Y11 // c44205471b
+ VPSLLVD Y2, Y15, Y11 // c4620547da
+ VPSLLVD Y11, Y15, Y11 // c4420547db
+ VPSLLVQ (BX), X9, X2 // c4e2b14713
+ VPSLLVQ (R11), X9, X2 // c4c2b14713
+ VPSLLVQ X2, X9, X2 // c4e2b147d2
+ VPSLLVQ X11, X9, X2 // c4c2b147d3
+ VPSLLVQ (BX), X9, X11 // c462b1471b
+ VPSLLVQ (R11), X9, X11 // c442b1471b
+ VPSLLVQ X2, X9, X11 // c462b147da
+ VPSLLVQ X11, X9, X11 // c442b147db
+ VPSLLVQ (BX), Y15, Y2 // c4e2854713
+ VPSLLVQ (R11), Y15, Y2 // c4c2854713
+ VPSLLVQ Y2, Y15, Y2 // c4e28547d2
+ VPSLLVQ Y11, Y15, Y2 // c4c28547d3
+ VPSLLVQ (BX), Y15, Y11 // c46285471b
+ VPSLLVQ (R11), Y15, Y11 // c44285471b
+ VPSLLVQ Y2, Y15, Y11 // c4628547da
+ VPSLLVQ Y11, Y15, Y11 // c4428547db
+ VPSLLW (BX), X9, X2 // c4e131f113 or c5b1f113
+ VPSLLW (R11), X9, X2 // c4c131f113
+ VPSLLW X2, X9, X2 // c4e131f1d2 or c5b1f1d2
+ VPSLLW X11, X9, X2 // c4c131f1d3
+ VPSLLW (BX), X9, X11 // c46131f11b or c531f11b
+ VPSLLW (R11), X9, X11 // c44131f11b
+ VPSLLW X2, X9, X11 // c46131f1da or c531f1da
+ VPSLLW X11, X9, X11 // c44131f1db
+ VPSLLW $7, X2, X9 // c4e13171f207 or c5b171f207
+ VPSLLW $7, X11, X9 // c4c13171f307
+ VPSLLW (BX), Y15, Y2 // c4e105f113 or c585f113
+ VPSLLW (R11), Y15, Y2 // c4c105f113
+ VPSLLW X2, Y15, Y2 // c4e105f1d2 or c585f1d2
+ VPSLLW X11, Y15, Y2 // c4c105f1d3
+ VPSLLW (BX), Y15, Y11 // c46105f11b or c505f11b
+ VPSLLW (R11), Y15, Y11 // c44105f11b
+ VPSLLW X2, Y15, Y11 // c46105f1da or c505f1da
+ VPSLLW X11, Y15, Y11 // c44105f1db
+ VPSLLW $7, Y2, Y15 // c4e10571f207 or c58571f207
+ VPSLLW $7, Y11, Y15 // c4c10571f307
+ VPSRAD (BX), X9, X2 // c4e131e213 or c5b1e213
+ VPSRAD (R11), X9, X2 // c4c131e213
+ VPSRAD X2, X9, X2 // c4e131e2d2 or c5b1e2d2
+ VPSRAD X11, X9, X2 // c4c131e2d3
+ VPSRAD (BX), X9, X11 // c46131e21b or c531e21b
+ VPSRAD (R11), X9, X11 // c44131e21b
+ VPSRAD X2, X9, X11 // c46131e2da or c531e2da
+ VPSRAD X11, X9, X11 // c44131e2db
+ VPSRAD $7, X2, X9 // c4e13172e207 or c5b172e207
+ VPSRAD $7, X11, X9 // c4c13172e307
+ VPSRAD (BX), Y15, Y2 // c4e105e213 or c585e213
+ VPSRAD (R11), Y15, Y2 // c4c105e213
+ VPSRAD X2, Y15, Y2 // c4e105e2d2 or c585e2d2
+ VPSRAD X11, Y15, Y2 // c4c105e2d3
+ VPSRAD (BX), Y15, Y11 // c46105e21b or c505e21b
+ VPSRAD (R11), Y15, Y11 // c44105e21b
+ VPSRAD X2, Y15, Y11 // c46105e2da or c505e2da
+ VPSRAD X11, Y15, Y11 // c44105e2db
+ VPSRAD $7, Y2, Y15 // c4e10572e207 or c58572e207
+ VPSRAD $7, Y11, Y15 // c4c10572e307
+ VPSRAVD (BX), X9, X2 // c4e2314613
+ VPSRAVD (R11), X9, X2 // c4c2314613
+ VPSRAVD X2, X9, X2 // c4e23146d2
+ VPSRAVD X11, X9, X2 // c4c23146d3
+ VPSRAVD (BX), X9, X11 // c46231461b
+ VPSRAVD (R11), X9, X11 // c44231461b
+ VPSRAVD X2, X9, X11 // c4623146da
+ VPSRAVD X11, X9, X11 // c4423146db
+ VPSRAVD (BX), Y15, Y2 // c4e2054613
+ VPSRAVD (R11), Y15, Y2 // c4c2054613
+ VPSRAVD Y2, Y15, Y2 // c4e20546d2
+ VPSRAVD Y11, Y15, Y2 // c4c20546d3
+ VPSRAVD (BX), Y15, Y11 // c46205461b
+ VPSRAVD (R11), Y15, Y11 // c44205461b
+ VPSRAVD Y2, Y15, Y11 // c4620546da
+ VPSRAVD Y11, Y15, Y11 // c4420546db
+ VPSRAW (BX), X9, X2 // c4e131e113 or c5b1e113
+ VPSRAW (R11), X9, X2 // c4c131e113
+ VPSRAW X2, X9, X2 // c4e131e1d2 or c5b1e1d2
+ VPSRAW X11, X9, X2 // c4c131e1d3
+ VPSRAW (BX), X9, X11 // c46131e11b or c531e11b
+ VPSRAW (R11), X9, X11 // c44131e11b
+ VPSRAW X2, X9, X11 // c46131e1da or c531e1da
+ VPSRAW X11, X9, X11 // c44131e1db
+ VPSRAW $7, X2, X9 // c4e13171e207 or c5b171e207
+ VPSRAW $7, X11, X9 // c4c13171e307
+ VPSRAW (BX), Y15, Y2 // c4e105e113 or c585e113
+ VPSRAW (R11), Y15, Y2 // c4c105e113
+ VPSRAW X2, Y15, Y2 // c4e105e1d2 or c585e1d2
+ VPSRAW X11, Y15, Y2 // c4c105e1d3
+ VPSRAW (BX), Y15, Y11 // c46105e11b or c505e11b
+ VPSRAW (R11), Y15, Y11 // c44105e11b
+ VPSRAW X2, Y15, Y11 // c46105e1da or c505e1da
+ VPSRAW X11, Y15, Y11 // c44105e1db
+ VPSRAW $7, Y2, Y15 // c4e10571e207 or c58571e207
+ VPSRAW $7, Y11, Y15 // c4c10571e307
+ VPSRLD (BX), X9, X2 // c4e131d213 or c5b1d213
+ VPSRLD (R11), X9, X2 // c4c131d213
+ VPSRLD X2, X9, X2 // c4e131d2d2 or c5b1d2d2
+ VPSRLD X11, X9, X2 // c4c131d2d3
+ VPSRLD (BX), X9, X11 // c46131d21b or c531d21b
+ VPSRLD (R11), X9, X11 // c44131d21b
+ VPSRLD X2, X9, X11 // c46131d2da or c531d2da
+ VPSRLD X11, X9, X11 // c44131d2db
+ VPSRLD $7, X2, X9 // c4e13172d207 or c5b172d207
+ VPSRLD $7, X11, X9 // c4c13172d307
+ VPSRLDQ $7, X2, X9 // c4e13173da07 or c5b173da07
+ VPSRLDQ $7, X11, X9 // c4c13173db07
+ VPSRLDQ $7, Y2, Y15 // c4e10573da07 or c58573da07
+ VPSRLDQ $7, Y11, Y15 // c4c10573db07
+ VPSRLQ (BX), X9, X2 // c4e131d313 or c5b1d313
+ VPSRLQ (R11), X9, X2 // c4c131d313
+ VPSRLQ X2, X9, X2 // c4e131d3d2 or c5b1d3d2
+ VPSRLQ X11, X9, X2 // c4c131d3d3
+ VPSRLQ (BX), X9, X11 // c46131d31b or c531d31b
+ VPSRLQ (R11), X9, X11 // c44131d31b
+ VPSRLQ X2, X9, X11 // c46131d3da or c531d3da
+ VPSRLQ X11, X9, X11 // c44131d3db
+ VPSRLQ $7, X2, X9 // c4e13173d207 or c5b173d207
+ VPSRLQ $7, X11, X9 // c4c13173d307
+ VPSRLVD (BX), X9, X2 // c4e2314513
+ VPSRLVD (R11), X9, X2 // c4c2314513
+ VPSRLVD X2, X9, X2 // c4e23145d2
+ VPSRLVD X11, X9, X2 // c4c23145d3
+ VPSRLVD (BX), X9, X11 // c46231451b
+ VPSRLVD (R11), X9, X11 // c44231451b
+ VPSRLVD X2, X9, X11 // c4623145da
+ VPSRLVD X11, X9, X11 // c4423145db
+ VPSRLVD (BX), Y15, Y2 // c4e2054513
+ VPSRLVD (R11), Y15, Y2 // c4c2054513
+ VPSRLVD Y2, Y15, Y2 // c4e20545d2
+ VPSRLVD Y11, Y15, Y2 // c4c20545d3
+ VPSRLVD (BX), Y15, Y11 // c46205451b
+ VPSRLVD (R11), Y15, Y11 // c44205451b
+ VPSRLVD Y2, Y15, Y11 // c4620545da
+ VPSRLVD Y11, Y15, Y11 // c4420545db
+ VPSRLVQ (BX), X9, X2 // c4e2b14513
+ VPSRLVQ (R11), X9, X2 // c4c2b14513
+ VPSRLVQ X2, X9, X2 // c4e2b145d2
+ VPSRLVQ X11, X9, X2 // c4c2b145d3
+ VPSRLVQ (BX), X9, X11 // c462b1451b
+ VPSRLVQ (R11), X9, X11 // c442b1451b
+ VPSRLVQ X2, X9, X11 // c462b145da
+ VPSRLVQ X11, X9, X11 // c442b145db
+ VPSRLVQ (BX), Y15, Y2 // c4e2854513
+ VPSRLVQ (R11), Y15, Y2 // c4c2854513
+ VPSRLVQ Y2, Y15, Y2 // c4e28545d2
+ VPSRLVQ Y11, Y15, Y2 // c4c28545d3
+ VPSRLVQ (BX), Y15, Y11 // c46285451b
+ VPSRLVQ (R11), Y15, Y11 // c44285451b
+ VPSRLVQ Y2, Y15, Y11 // c4628545da
+ VPSRLVQ Y11, Y15, Y11 // c4428545db
+ VPSRLW (BX), X9, X2 // c4e131d113 or c5b1d113
+ VPSRLW (R11), X9, X2 // c4c131d113
+ VPSRLW X2, X9, X2 // c4e131d1d2 or c5b1d1d2
+ VPSRLW X11, X9, X2 // c4c131d1d3
+ VPSRLW (BX), X9, X11 // c46131d11b or c531d11b
+ VPSRLW (R11), X9, X11 // c44131d11b
+ VPSRLW X2, X9, X11 // c46131d1da or c531d1da
+ VPSRLW X11, X9, X11 // c44131d1db
+ VPSRLW $7, X2, X9 // c4e13171d207 or c5b171d207
+ VPSRLW $7, X11, X9 // c4c13171d307
+ VPSRLW (BX), Y15, Y2 // c4e105d113 or c585d113
+ VPSRLW (R11), Y15, Y2 // c4c105d113
+ VPSRLW X2, Y15, Y2 // c4e105d1d2 or c585d1d2
+ VPSRLW X11, Y15, Y2 // c4c105d1d3
+ VPSRLW (BX), Y15, Y11 // c46105d11b or c505d11b
+ VPSRLW (R11), Y15, Y11 // c44105d11b
+ VPSRLW X2, Y15, Y11 // c46105d1da or c505d1da
+ VPSRLW X11, Y15, Y11 // c44105d1db
+ VPSRLW $7, Y2, Y15 // c4e10571d207 or c58571d207
+ VPSRLW $7, Y11, Y15 // c4c10571d307
+ VPSUBB (BX), X9, X2 // c4e131f813 or c5b1f813
+ VPSUBB (R11), X9, X2 // c4c131f813
+ VPSUBB X2, X9, X2 // c4e131f8d2 or c5b1f8d2
+ VPSUBB X11, X9, X2 // c4c131f8d3
+ VPSUBB (BX), X9, X11 // c46131f81b or c531f81b
+ VPSUBB (R11), X9, X11 // c44131f81b
+ VPSUBB X2, X9, X11 // c46131f8da or c531f8da
+ VPSUBB X11, X9, X11 // c44131f8db
+ VPSUBB (BX), Y15, Y2 // c4e105f813 or c585f813
+ VPSUBB (R11), Y15, Y2 // c4c105f813
+ VPSUBB Y2, Y15, Y2 // c4e105f8d2 or c585f8d2
+ VPSUBB Y11, Y15, Y2 // c4c105f8d3
+ VPSUBB (BX), Y15, Y11 // c46105f81b or c505f81b
+ VPSUBB (R11), Y15, Y11 // c44105f81b
+ VPSUBB Y2, Y15, Y11 // c46105f8da or c505f8da
+ VPSUBB Y11, Y15, Y11 // c44105f8db
+ VPSUBD (BX), X9, X2 // c4e131fa13 or c5b1fa13
+ VPSUBD (R11), X9, X2 // c4c131fa13
+ VPSUBD X2, X9, X2 // c4e131fad2 or c5b1fad2
+ VPSUBD X11, X9, X2 // c4c131fad3
+ VPSUBD (BX), X9, X11 // c46131fa1b or c531fa1b
+ VPSUBD (R11), X9, X11 // c44131fa1b
+ VPSUBD X2, X9, X11 // c46131fada or c531fada
+ VPSUBD X11, X9, X11 // c44131fadb
+ VPSUBD (BX), Y15, Y2 // c4e105fa13 or c585fa13
+ VPSUBD (R11), Y15, Y2 // c4c105fa13
+ VPSUBD Y2, Y15, Y2 // c4e105fad2 or c585fad2
+ VPSUBD Y11, Y15, Y2 // c4c105fad3
+ VPSUBD (BX), Y15, Y11 // c46105fa1b or c505fa1b
+ VPSUBD (R11), Y15, Y11 // c44105fa1b
+ VPSUBD Y2, Y15, Y11 // c46105fada or c505fada
+ VPSUBD Y11, Y15, Y11 // c44105fadb
+ VPSUBQ (BX), X9, X2 // c4e131fb13 or c5b1fb13
+ VPSUBQ (R11), X9, X2 // c4c131fb13
+ VPSUBQ X2, X9, X2 // c4e131fbd2 or c5b1fbd2
+ VPSUBQ X11, X9, X2 // c4c131fbd3
+ VPSUBQ (BX), X9, X11 // c46131fb1b or c531fb1b
+ VPSUBQ (R11), X9, X11 // c44131fb1b
+ VPSUBQ X2, X9, X11 // c46131fbda or c531fbda
+ VPSUBQ X11, X9, X11 // c44131fbdb
+ VPSUBQ (BX), Y15, Y2 // c4e105fb13 or c585fb13
+ VPSUBQ (R11), Y15, Y2 // c4c105fb13
+ VPSUBQ Y2, Y15, Y2 // c4e105fbd2 or c585fbd2
+ VPSUBQ Y11, Y15, Y2 // c4c105fbd3
+ VPSUBQ (BX), Y15, Y11 // c46105fb1b or c505fb1b
+ VPSUBQ (R11), Y15, Y11 // c44105fb1b
+ VPSUBQ Y2, Y15, Y11 // c46105fbda or c505fbda
+ VPSUBQ Y11, Y15, Y11 // c44105fbdb
+ VPSUBSB (BX), X9, X2 // c4e131e813 or c5b1e813
+ VPSUBSB (R11), X9, X2 // c4c131e813
+ VPSUBSB X2, X9, X2 // c4e131e8d2 or c5b1e8d2
+ VPSUBSB X11, X9, X2 // c4c131e8d3
+ VPSUBSB (BX), X9, X11 // c46131e81b or c531e81b
+ VPSUBSB (R11), X9, X11 // c44131e81b
+ VPSUBSB X2, X9, X11 // c46131e8da or c531e8da
+ VPSUBSB X11, X9, X11 // c44131e8db
+ VPSUBSB (BX), Y15, Y2 // c4e105e813 or c585e813
+ VPSUBSB (R11), Y15, Y2 // c4c105e813
+ VPSUBSB Y2, Y15, Y2 // c4e105e8d2 or c585e8d2
+ VPSUBSB Y11, Y15, Y2 // c4c105e8d3
+ VPSUBSB (BX), Y15, Y11 // c46105e81b or c505e81b
+ VPSUBSB (R11), Y15, Y11 // c44105e81b
+ VPSUBSB Y2, Y15, Y11 // c46105e8da or c505e8da
+ VPSUBSB Y11, Y15, Y11 // c44105e8db
+ VPSUBSW (BX), X9, X2 // c4e131e913 or c5b1e913
+ VPSUBSW (R11), X9, X2 // c4c131e913
+ VPSUBSW X2, X9, X2 // c4e131e9d2 or c5b1e9d2
+ VPSUBSW X11, X9, X2 // c4c131e9d3
+ VPSUBSW (BX), X9, X11 // c46131e91b or c531e91b
+ VPSUBSW (R11), X9, X11 // c44131e91b
+ VPSUBSW X2, X9, X11 // c46131e9da or c531e9da
+ VPSUBSW X11, X9, X11 // c44131e9db
+ VPSUBSW (BX), Y15, Y2 // c4e105e913 or c585e913
+ VPSUBSW (R11), Y15, Y2 // c4c105e913
+ VPSUBSW Y2, Y15, Y2 // c4e105e9d2 or c585e9d2
+ VPSUBSW Y11, Y15, Y2 // c4c105e9d3
+ VPSUBSW (BX), Y15, Y11 // c46105e91b or c505e91b
+ VPSUBSW (R11), Y15, Y11 // c44105e91b
+ VPSUBSW Y2, Y15, Y11 // c46105e9da or c505e9da
+ VPSUBSW Y11, Y15, Y11 // c44105e9db
+ VPSUBUSB (BX), X9, X2 // c4e131d813 or c5b1d813
+ VPSUBUSB (R11), X9, X2 // c4c131d813
+ VPSUBUSB X2, X9, X2 // c4e131d8d2 or c5b1d8d2
+ VPSUBUSB X11, X9, X2 // c4c131d8d3
+ VPSUBUSB (BX), X9, X11 // c46131d81b or c531d81b
+ VPSUBUSB (R11), X9, X11 // c44131d81b
+ VPSUBUSB X2, X9, X11 // c46131d8da or c531d8da
+ VPSUBUSB X11, X9, X11 // c44131d8db
+ VPSUBUSB (BX), Y15, Y2 // c4e105d813 or c585d813
+ VPSUBUSB (R11), Y15, Y2 // c4c105d813
+ VPSUBUSB Y2, Y15, Y2 // c4e105d8d2 or c585d8d2
+ VPSUBUSB Y11, Y15, Y2 // c4c105d8d3
+ VPSUBUSB (BX), Y15, Y11 // c46105d81b or c505d81b
+ VPSUBUSB (R11), Y15, Y11 // c44105d81b
+ VPSUBUSB Y2, Y15, Y11 // c46105d8da or c505d8da
+ VPSUBUSB Y11, Y15, Y11 // c44105d8db
+ VPSUBUSW (BX), X9, X2 // c4e131d913 or c5b1d913
+ VPSUBUSW (R11), X9, X2 // c4c131d913
+ VPSUBUSW X2, X9, X2 // c4e131d9d2 or c5b1d9d2
+ VPSUBUSW X11, X9, X2 // c4c131d9d3
+ VPSUBUSW (BX), X9, X11 // c46131d91b or c531d91b
+ VPSUBUSW (R11), X9, X11 // c44131d91b
+ VPSUBUSW X2, X9, X11 // c46131d9da or c531d9da
+ VPSUBUSW X11, X9, X11 // c44131d9db
+ VPSUBUSW (BX), Y15, Y2 // c4e105d913 or c585d913
+ VPSUBUSW (R11), Y15, Y2 // c4c105d913
+ VPSUBUSW Y2, Y15, Y2 // c4e105d9d2 or c585d9d2
+ VPSUBUSW Y11, Y15, Y2 // c4c105d9d3
+ VPSUBUSW (BX), Y15, Y11 // c46105d91b or c505d91b
+ VPSUBUSW (R11), Y15, Y11 // c44105d91b
+ VPSUBUSW Y2, Y15, Y11 // c46105d9da or c505d9da
+ VPSUBUSW Y11, Y15, Y11 // c44105d9db
+ VPSUBW (BX), X9, X2 // c4e131f913 or c5b1f913
+ VPSUBW (R11), X9, X2 // c4c131f913
+ VPSUBW X2, X9, X2 // c4e131f9d2 or c5b1f9d2
+ VPSUBW X11, X9, X2 // c4c131f9d3
+ VPSUBW (BX), X9, X11 // c46131f91b or c531f91b
+ VPSUBW (R11), X9, X11 // c44131f91b
+ VPSUBW X2, X9, X11 // c46131f9da or c531f9da
+ VPSUBW X11, X9, X11 // c44131f9db
+ VPSUBW (BX), Y15, Y2 // c4e105f913 or c585f913
+ VPSUBW (R11), Y15, Y2 // c4c105f913
+ VPSUBW Y2, Y15, Y2 // c4e105f9d2 or c585f9d2
+ VPSUBW Y11, Y15, Y2 // c4c105f9d3
+ VPSUBW (BX), Y15, Y11 // c46105f91b or c505f91b
+ VPSUBW (R11), Y15, Y11 // c44105f91b
+ VPSUBW Y2, Y15, Y11 // c46105f9da or c505f9da
+ VPSUBW Y11, Y15, Y11 // c44105f9db
+ VPTEST (BX), X2 // c4e2791713
+ VPTEST (R11), X2 // c4c2791713
+ VPTEST X2, X2 // c4e27917d2
+ VPTEST X11, X2 // c4c27917d3
+ VPTEST (BX), X11 // c46279171b
+ VPTEST (R11), X11 // c44279171b
+ VPTEST X2, X11 // c4627917da
+ VPTEST X11, X11 // c4427917db
+ VPTEST (BX), Y2 // c4e27d1713
+ VPTEST (R11), Y2 // c4c27d1713
+ VPTEST Y2, Y2 // c4e27d17d2
+ VPTEST Y11, Y2 // c4c27d17d3
+ VPTEST (BX), Y11 // c4627d171b
+ VPTEST (R11), Y11 // c4427d171b
+ VPTEST Y2, Y11 // c4627d17da
+ VPTEST Y11, Y11 // c4427d17db
+ VPUNPCKHBW (BX), X9, X2 // c4e1316813 or c5b16813
+ VPUNPCKHBW (R11), X9, X2 // c4c1316813
+ VPUNPCKHBW X2, X9, X2 // c4e13168d2 or c5b168d2
+ VPUNPCKHBW X11, X9, X2 // c4c13168d3
+ VPUNPCKHBW (BX), X9, X11 // c46131681b or c531681b
+ VPUNPCKHBW (R11), X9, X11 // c44131681b
+ VPUNPCKHBW X2, X9, X11 // c4613168da or c53168da
+ VPUNPCKHBW X11, X9, X11 // c4413168db
+ VPUNPCKHBW (BX), Y15, Y2 // c4e1056813 or c5856813
+ VPUNPCKHBW (R11), Y15, Y2 // c4c1056813
+ VPUNPCKHBW Y2, Y15, Y2 // c4e10568d2 or c58568d2
+ VPUNPCKHBW Y11, Y15, Y2 // c4c10568d3
+ VPUNPCKHBW (BX), Y15, Y11 // c46105681b or c505681b
+ VPUNPCKHBW (R11), Y15, Y11 // c44105681b
+ VPUNPCKHBW Y2, Y15, Y11 // c4610568da or c50568da
+ VPUNPCKHBW Y11, Y15, Y11 // c4410568db
+ VPUNPCKHDQ (BX), X9, X2 // c4e1316a13 or c5b16a13
+ VPUNPCKHDQ (R11), X9, X2 // c4c1316a13
+ VPUNPCKHDQ X2, X9, X2 // c4e1316ad2 or c5b16ad2
+ VPUNPCKHDQ X11, X9, X2 // c4c1316ad3
+ VPUNPCKHDQ (BX), X9, X11 // c461316a1b or c5316a1b
+ VPUNPCKHDQ (R11), X9, X11 // c441316a1b
+ VPUNPCKHDQ X2, X9, X11 // c461316ada or c5316ada
+ VPUNPCKHDQ X11, X9, X11 // c441316adb
+ VPUNPCKHDQ (BX), Y15, Y2 // c4e1056a13 or c5856a13
+ VPUNPCKHDQ (R11), Y15, Y2 // c4c1056a13
+ VPUNPCKHDQ Y2, Y15, Y2 // c4e1056ad2 or c5856ad2
+ VPUNPCKHDQ Y11, Y15, Y2 // c4c1056ad3
+ VPUNPCKHDQ (BX), Y15, Y11 // c461056a1b or c5056a1b
+ VPUNPCKHDQ (R11), Y15, Y11 // c441056a1b
+ VPUNPCKHDQ Y2, Y15, Y11 // c461056ada or c5056ada
+ VPUNPCKHDQ Y11, Y15, Y11 // c441056adb
+ VPUNPCKHQDQ (BX), X9, X2 // c4e1316d13 or c5b16d13
+ VPUNPCKHQDQ (R11), X9, X2 // c4c1316d13
+ VPUNPCKHQDQ X2, X9, X2 // c4e1316dd2 or c5b16dd2
+ VPUNPCKHQDQ X11, X9, X2 // c4c1316dd3
+ VPUNPCKHQDQ (BX), X9, X11 // c461316d1b or c5316d1b
+ VPUNPCKHQDQ (R11), X9, X11 // c441316d1b
+ VPUNPCKHQDQ X2, X9, X11 // c461316dda or c5316dda
+ VPUNPCKHQDQ X11, X9, X11 // c441316ddb
+ VPUNPCKHQDQ (BX), Y15, Y2 // c4e1056d13 or c5856d13
+ VPUNPCKHQDQ (R11), Y15, Y2 // c4c1056d13
+ VPUNPCKHQDQ Y2, Y15, Y2 // c4e1056dd2 or c5856dd2
+ VPUNPCKHQDQ Y11, Y15, Y2 // c4c1056dd3
+ VPUNPCKHQDQ (BX), Y15, Y11 // c461056d1b or c5056d1b
+ VPUNPCKHQDQ (R11), Y15, Y11 // c441056d1b
+ VPUNPCKHQDQ Y2, Y15, Y11 // c461056dda or c5056dda
+ VPUNPCKHQDQ Y11, Y15, Y11 // c441056ddb
+ VPUNPCKHWD (BX), X9, X2 // c4e1316913 or c5b16913
+ VPUNPCKHWD (R11), X9, X2 // c4c1316913
+ VPUNPCKHWD X2, X9, X2 // c4e13169d2 or c5b169d2
+ VPUNPCKHWD X11, X9, X2 // c4c13169d3
+ VPUNPCKHWD (BX), X9, X11 // c46131691b or c531691b
+ VPUNPCKHWD (R11), X9, X11 // c44131691b
+ VPUNPCKHWD X2, X9, X11 // c4613169da or c53169da
+ VPUNPCKHWD X11, X9, X11 // c4413169db
+ VPUNPCKHWD (BX), Y15, Y2 // c4e1056913 or c5856913
+ VPUNPCKHWD (R11), Y15, Y2 // c4c1056913
+ VPUNPCKHWD Y2, Y15, Y2 // c4e10569d2 or c58569d2
+ VPUNPCKHWD Y11, Y15, Y2 // c4c10569d3
+ VPUNPCKHWD (BX), Y15, Y11 // c46105691b or c505691b
+ VPUNPCKHWD (R11), Y15, Y11 // c44105691b
+ VPUNPCKHWD Y2, Y15, Y11 // c4610569da or c50569da
+ VPUNPCKHWD Y11, Y15, Y11 // c4410569db
+ VPUNPCKLBW (BX), X9, X2 // c4e1316013 or c5b16013
+ VPUNPCKLBW (R11), X9, X2 // c4c1316013
+ VPUNPCKLBW X2, X9, X2 // c4e13160d2 or c5b160d2
+ VPUNPCKLBW X11, X9, X2 // c4c13160d3
+ VPUNPCKLBW (BX), X9, X11 // c46131601b or c531601b
+ VPUNPCKLBW (R11), X9, X11 // c44131601b
+ VPUNPCKLBW X2, X9, X11 // c4613160da or c53160da
+ VPUNPCKLBW X11, X9, X11 // c4413160db
+ VPUNPCKLBW (BX), Y15, Y2 // c4e1056013 or c5856013
+ VPUNPCKLBW (R11), Y15, Y2 // c4c1056013
+ VPUNPCKLBW Y2, Y15, Y2 // c4e10560d2 or c58560d2
+ VPUNPCKLBW Y11, Y15, Y2 // c4c10560d3
+ VPUNPCKLBW (BX), Y15, Y11 // c46105601b or c505601b
+ VPUNPCKLBW (R11), Y15, Y11 // c44105601b
+ VPUNPCKLBW Y2, Y15, Y11 // c4610560da or c50560da
+ VPUNPCKLBW Y11, Y15, Y11 // c4410560db
+ VPUNPCKLDQ (BX), X9, X2 // c4e1316213 or c5b16213
+ VPUNPCKLDQ (R11), X9, X2 // c4c1316213
+ VPUNPCKLDQ X2, X9, X2 // c4e13162d2 or c5b162d2
+ VPUNPCKLDQ X11, X9, X2 // c4c13162d3
+ VPUNPCKLDQ (BX), X9, X11 // c46131621b or c531621b
+ VPUNPCKLDQ (R11), X9, X11 // c44131621b
+ VPUNPCKLDQ X2, X9, X11 // c4613162da or c53162da
+ VPUNPCKLDQ X11, X9, X11 // c4413162db
+ VPUNPCKLDQ (BX), Y15, Y2 // c4e1056213 or c5856213
+ VPUNPCKLDQ (R11), Y15, Y2 // c4c1056213
+ VPUNPCKLDQ Y2, Y15, Y2 // c4e10562d2 or c58562d2
+ VPUNPCKLDQ Y11, Y15, Y2 // c4c10562d3
+ VPUNPCKLDQ (BX), Y15, Y11 // c46105621b or c505621b
+ VPUNPCKLDQ (R11), Y15, Y11 // c44105621b
+ VPUNPCKLDQ Y2, Y15, Y11 // c4610562da or c50562da
+ VPUNPCKLDQ Y11, Y15, Y11 // c4410562db
+ VPUNPCKLQDQ (BX), X9, X2 // c4e1316c13 or c5b16c13
+ VPUNPCKLQDQ (R11), X9, X2 // c4c1316c13
+ VPUNPCKLQDQ X2, X9, X2 // c4e1316cd2 or c5b16cd2
+ VPUNPCKLQDQ X11, X9, X2 // c4c1316cd3
+ VPUNPCKLQDQ (BX), X9, X11 // c461316c1b or c5316c1b
+ VPUNPCKLQDQ (R11), X9, X11 // c441316c1b
+ VPUNPCKLQDQ X2, X9, X11 // c461316cda or c5316cda
+ VPUNPCKLQDQ X11, X9, X11 // c441316cdb
+ VPUNPCKLQDQ (BX), Y15, Y2 // c4e1056c13 or c5856c13
+ VPUNPCKLQDQ (R11), Y15, Y2 // c4c1056c13
+ VPUNPCKLQDQ Y2, Y15, Y2 // c4e1056cd2 or c5856cd2
+ VPUNPCKLQDQ Y11, Y15, Y2 // c4c1056cd3
+ VPUNPCKLQDQ (BX), Y15, Y11 // c461056c1b or c5056c1b
+ VPUNPCKLQDQ (R11), Y15, Y11 // c441056c1b
+ VPUNPCKLQDQ Y2, Y15, Y11 // c461056cda or c5056cda
+ VPUNPCKLQDQ Y11, Y15, Y11 // c441056cdb
+ VPUNPCKLWD (BX), X9, X2 // c4e1316113 or c5b16113
+ VPUNPCKLWD (R11), X9, X2 // c4c1316113
+ VPUNPCKLWD X2, X9, X2 // c4e13161d2 or c5b161d2
+ VPUNPCKLWD X11, X9, X2 // c4c13161d3
+ VPUNPCKLWD (BX), X9, X11 // c46131611b or c531611b
+ VPUNPCKLWD (R11), X9, X11 // c44131611b
+ VPUNPCKLWD X2, X9, X11 // c4613161da or c53161da
+ VPUNPCKLWD X11, X9, X11 // c4413161db
+ VPUNPCKLWD (BX), Y15, Y2 // c4e1056113 or c5856113
+ VPUNPCKLWD (R11), Y15, Y2 // c4c1056113
+ VPUNPCKLWD Y2, Y15, Y2 // c4e10561d2 or c58561d2
+ VPUNPCKLWD Y11, Y15, Y2 // c4c10561d3
+ VPUNPCKLWD (BX), Y15, Y11 // c46105611b or c505611b
+ VPUNPCKLWD (R11), Y15, Y11 // c44105611b
+ VPUNPCKLWD Y2, Y15, Y11 // c4610561da or c50561da
+ VPUNPCKLWD Y11, Y15, Y11 // c4410561db
+ VPXOR (BX), X9, X2 // c4e131ef13 or c5b1ef13
+ VPXOR (R11), X9, X2 // c4c131ef13
+ VPXOR X2, X9, X2 // c4e131efd2 or c5b1efd2
+ VPXOR X11, X9, X2 // c4c131efd3
+ VPXOR (BX), X9, X11 // c46131ef1b or c531ef1b
+ VPXOR (R11), X9, X11 // c44131ef1b
+ VPXOR X2, X9, X11 // c46131efda or c531efda
+ VPXOR X11, X9, X11 // c44131efdb
+ VPXOR (BX), Y15, Y2 // c4e105ef13 or c585ef13
+ VPXOR (R11), Y15, Y2 // c4c105ef13
+ VPXOR Y2, Y15, Y2 // c4e105efd2 or c585efd2
+ VPXOR Y11, Y15, Y2 // c4c105efd3
+ VPXOR (BX), Y15, Y11 // c46105ef1b or c505ef1b
+ VPXOR (R11), Y15, Y11 // c44105ef1b
+ VPXOR Y2, Y15, Y11 // c46105efda or c505efda
+ VPXOR Y11, Y15, Y11 // c44105efdb
+ VRCPPS (BX), X2 // c4e1785313 or c5f85313
+ VRCPPS (R11), X2 // c4c1785313
+ VRCPPS X2, X2 // c4e17853d2 or c5f853d2
+ VRCPPS X11, X2 // c4c17853d3
+ VRCPPS (BX), X11 // c46178531b or c578531b
+ VRCPPS (R11), X11 // c44178531b
+ VRCPPS X2, X11 // c4617853da or c57853da
+ VRCPPS X11, X11 // c4417853db
+ VRCPPS (BX), Y2 // c4e17c5313 or c5fc5313
+ VRCPPS (R11), Y2 // c4c17c5313
+ VRCPPS Y2, Y2 // c4e17c53d2 or c5fc53d2
+ VRCPPS Y11, Y2 // c4c17c53d3
+ VRCPPS (BX), Y11 // c4617c531b or c57c531b
+ VRCPPS (R11), Y11 // c4417c531b
+ VRCPPS Y2, Y11 // c4617c53da or c57c53da
+ VRCPPS Y11, Y11 // c4417c53db
+ VRCPSS (BX), X9, X2 // c4e1325313 or c5b25313
+ VRCPSS (R11), X9, X2 // c4c1325313
+ VRCPSS X2, X9, X2 // c4e13253d2 or c5b253d2
+ VRCPSS X11, X9, X2 // c4c13253d3
+ VRCPSS (BX), X9, X11 // c46132531b or c532531b
+ VRCPSS (R11), X9, X11 // c44132531b
+ VRCPSS X2, X9, X11 // c4613253da or c53253da
+ VRCPSS X11, X9, X11 // c4413253db
+ VROUNDPD $7, (BX), X2 // c4e379091307
+ VROUNDPD $7, (R11), X2 // c4c379091307
+ VROUNDPD $7, X2, X2 // c4e37909d207
+ VROUNDPD $7, X11, X2 // c4c37909d307
+ VROUNDPD $7, (BX), X11 // c46379091b07
+ VROUNDPD $7, (R11), X11 // c44379091b07
+ VROUNDPD $7, X2, X11 // c4637909da07
+ VROUNDPD $7, X11, X11 // c4437909db07
+ VROUNDPD $7, (BX), Y2 // c4e37d091307
+ VROUNDPD $7, (R11), Y2 // c4c37d091307
+ VROUNDPD $7, Y2, Y2 // c4e37d09d207
+ VROUNDPD $7, Y11, Y2 // c4c37d09d307
+ VROUNDPD $7, (BX), Y11 // c4637d091b07
+ VROUNDPD $7, (R11), Y11 // c4437d091b07
+ VROUNDPD $7, Y2, Y11 // c4637d09da07
+ VROUNDPD $7, Y11, Y11 // c4437d09db07
+ VROUNDPS $7, (BX), X2 // c4e379081307
+ VROUNDPS $7, (R11), X2 // c4c379081307
+ VROUNDPS $7, X2, X2 // c4e37908d207
+ VROUNDPS $7, X11, X2 // c4c37908d307
+ VROUNDPS $7, (BX), X11 // c46379081b07
+ VROUNDPS $7, (R11), X11 // c44379081b07
+ VROUNDPS $7, X2, X11 // c4637908da07
+ VROUNDPS $7, X11, X11 // c4437908db07
+ VROUNDPS $7, (BX), Y2 // c4e37d081307
+ VROUNDPS $7, (R11), Y2 // c4c37d081307
+ VROUNDPS $7, Y2, Y2 // c4e37d08d207
+ VROUNDPS $7, Y11, Y2 // c4c37d08d307
+ VROUNDPS $7, (BX), Y11 // c4637d081b07
+ VROUNDPS $7, (R11), Y11 // c4437d081b07
+ VROUNDPS $7, Y2, Y11 // c4637d08da07
+ VROUNDPS $7, Y11, Y11 // c4437d08db07
+ VROUNDSD $7, (BX), X9, X2 // c4e3310b1307
+ VROUNDSD $7, (R11), X9, X2 // c4c3310b1307
+ VROUNDSD $7, X2, X9, X2 // c4e3310bd207
+ VROUNDSD $7, X11, X9, X2 // c4c3310bd307
+ VROUNDSD $7, (BX), X9, X11 // c463310b1b07
+ VROUNDSD $7, (R11), X9, X11 // c443310b1b07
+ VROUNDSD $7, X2, X9, X11 // c463310bda07
+ VROUNDSD $7, X11, X9, X11 // c443310bdb07
+ VROUNDSS $7, (BX), X9, X2 // c4e3310a1307
+ VROUNDSS $7, (R11), X9, X2 // c4c3310a1307
+ VROUNDSS $7, X2, X9, X2 // c4e3310ad207
+ VROUNDSS $7, X11, X9, X2 // c4c3310ad307
+ VROUNDSS $7, (BX), X9, X11 // c463310a1b07
+ VROUNDSS $7, (R11), X9, X11 // c443310a1b07
+ VROUNDSS $7, X2, X9, X11 // c463310ada07
+ VROUNDSS $7, X11, X9, X11 // c443310adb07
+ VRSQRTPS (BX), X2 // c4e1785213 or c5f85213
+ VRSQRTPS (R11), X2 // c4c1785213
+ VRSQRTPS X2, X2 // c4e17852d2 or c5f852d2
+ VRSQRTPS X11, X2 // c4c17852d3
+ VRSQRTPS (BX), X11 // c46178521b or c578521b
+ VRSQRTPS (R11), X11 // c44178521b
+ VRSQRTPS X2, X11 // c4617852da or c57852da
+ VRSQRTPS X11, X11 // c4417852db
+ VRSQRTPS (BX), Y2 // c4e17c5213 or c5fc5213
+ VRSQRTPS (R11), Y2 // c4c17c5213
+ VRSQRTPS Y2, Y2 // c4e17c52d2 or c5fc52d2
+ VRSQRTPS Y11, Y2 // c4c17c52d3
+ VRSQRTPS (BX), Y11 // c4617c521b or c57c521b
+ VRSQRTPS (R11), Y11 // c4417c521b
+ VRSQRTPS Y2, Y11 // c4617c52da or c57c52da
+ VRSQRTPS Y11, Y11 // c4417c52db
+ VRSQRTSS (BX), X9, X2 // c4e1325213 or c5b25213
+ VRSQRTSS (R11), X9, X2 // c4c1325213
+ VRSQRTSS X2, X9, X2 // c4e13252d2 or c5b252d2
+ VRSQRTSS X11, X9, X2 // c4c13252d3
+ VRSQRTSS (BX), X9, X11 // c46132521b or c532521b
+ VRSQRTSS (R11), X9, X11 // c44132521b
+ VRSQRTSS X2, X9, X11 // c4613252da or c53252da
+ VRSQRTSS X11, X9, X11 // c4413252db
+ VSHUFPD $7, (BX), X9, X2 // c4e131c61307 or c5b1c61307
+ VSHUFPD $7, (R11), X9, X2 // c4c131c61307
+ VSHUFPD $7, X2, X9, X2 // c4e131c6d207 or c5b1c6d207
+ VSHUFPD $7, X11, X9, X2 // c4c131c6d307
+ VSHUFPD $7, (BX), X9, X11 // c46131c61b07 or c531c61b07
+ VSHUFPD $7, (R11), X9, X11 // c44131c61b07
+ VSHUFPD $7, X2, X9, X11 // c46131c6da07 or c531c6da07
+ VSHUFPD $7, X11, X9, X11 // c44131c6db07
+ VSHUFPD $7, (BX), Y15, Y2 // c4e105c61307 or c585c61307
+ VSHUFPD $7, (R11), Y15, Y2 // c4c105c61307
+ VSHUFPD $7, Y2, Y15, Y2 // c4e105c6d207 or c585c6d207
+ VSHUFPD $7, Y11, Y15, Y2 // c4c105c6d307
+ VSHUFPD $7, (BX), Y15, Y11 // c46105c61b07 or c505c61b07
+ VSHUFPD $7, (R11), Y15, Y11 // c44105c61b07
+ VSHUFPD $7, Y2, Y15, Y11 // c46105c6da07 or c505c6da07
+ VSHUFPD $7, Y11, Y15, Y11 // c44105c6db07
+ VSHUFPS $7, (BX), X9, X2 // c4e130c61307 or c5b0c61307
+ VSHUFPS $7, (R11), X9, X2 // c4c130c61307
+ VSHUFPS $7, X2, X9, X2 // c4e130c6d207 or c5b0c6d207
+ VSHUFPS $7, X11, X9, X2 // c4c130c6d307
+ VSHUFPS $7, (BX), X9, X11 // c46130c61b07 or c530c61b07
+ VSHUFPS $7, (R11), X9, X11 // c44130c61b07
+ VSHUFPS $7, X2, X9, X11 // c46130c6da07 or c530c6da07
+ VSHUFPS $7, X11, X9, X11 // c44130c6db07
+ VSHUFPS $7, (BX), Y15, Y2 // c4e104c61307 or c584c61307
+ VSHUFPS $7, (R11), Y15, Y2 // c4c104c61307
+ VSHUFPS $7, Y2, Y15, Y2 // c4e104c6d207 or c584c6d207
+ VSHUFPS $7, Y11, Y15, Y2 // c4c104c6d307
+ VSHUFPS $7, (BX), Y15, Y11 // c46104c61b07 or c504c61b07
+ VSHUFPS $7, (R11), Y15, Y11 // c44104c61b07
+ VSHUFPS $7, Y2, Y15, Y11 // c46104c6da07 or c504c6da07
+ VSHUFPS $7, Y11, Y15, Y11 // c44104c6db07
+ VSQRTPD (BX), X2 // c4e1795113 or c5f95113
+ VSQRTPD (R11), X2 // c4c1795113
+ VSQRTPD X2, X2 // c4e17951d2 or c5f951d2
+ VSQRTPD X11, X2 // c4c17951d3
+ VSQRTPD (BX), X11 // c46179511b or c579511b
+ VSQRTPD (R11), X11 // c44179511b
+ VSQRTPD X2, X11 // c4617951da or c57951da
+ VSQRTPD X11, X11 // c4417951db
+ VSQRTPD (BX), Y2 // c4e17d5113 or c5fd5113
+ VSQRTPD (R11), Y2 // c4c17d5113
+ VSQRTPD Y2, Y2 // c4e17d51d2 or c5fd51d2
+ VSQRTPD Y11, Y2 // c4c17d51d3
+ VSQRTPD (BX), Y11 // c4617d511b or c57d511b
+ VSQRTPD (R11), Y11 // c4417d511b
+ VSQRTPD Y2, Y11 // c4617d51da or c57d51da
+ VSQRTPD Y11, Y11 // c4417d51db
+ VSQRTPS (BX), X2 // c4e1785113 or c5f85113
+ VSQRTPS (R11), X2 // c4c1785113
+ VSQRTPS X2, X2 // c4e17851d2 or c5f851d2
+ VSQRTPS X11, X2 // c4c17851d3
+ VSQRTPS (BX), X11 // c46178511b or c578511b
+ VSQRTPS (R11), X11 // c44178511b
+ VSQRTPS X2, X11 // c4617851da or c57851da
+ VSQRTPS X11, X11 // c4417851db
+ VSQRTPS (BX), Y2 // c4e17c5113 or c5fc5113
+ VSQRTPS (R11), Y2 // c4c17c5113
+ VSQRTPS Y2, Y2 // c4e17c51d2 or c5fc51d2
+ VSQRTPS Y11, Y2 // c4c17c51d3
+ VSQRTPS (BX), Y11 // c4617c511b or c57c511b
+ VSQRTPS (R11), Y11 // c4417c511b
+ VSQRTPS Y2, Y11 // c4617c51da or c57c51da
+ VSQRTPS Y11, Y11 // c4417c51db
+ VSQRTSD (BX), X9, X2 // c4e1335113 or c5b35113
+ VSQRTSD (R11), X9, X2 // c4c1335113
+ VSQRTSD X2, X9, X2 // c4e13351d2 or c5b351d2
+ VSQRTSD X11, X9, X2 // c4c13351d3
+ VSQRTSD (BX), X9, X11 // c46133511b or c533511b
+ VSQRTSD (R11), X9, X11 // c44133511b
+ VSQRTSD X2, X9, X11 // c4613351da or c53351da
+ VSQRTSD X11, X9, X11 // c4413351db
+ VSQRTSS (BX), X9, X2 // c4e1325113 or c5b25113
+ VSQRTSS (R11), X9, X2 // c4c1325113
+ VSQRTSS X2, X9, X2 // c4e13251d2 or c5b251d2
+ VSQRTSS X11, X9, X2 // c4c13251d3
+ VSQRTSS (BX), X9, X11 // c46132511b or c532511b
+ VSQRTSS (R11), X9, X11 // c44132511b
+ VSQRTSS X2, X9, X11 // c4613251da or c53251da
+ VSQRTSS X11, X9, X11 // c4413251db
+ VSTMXCSR (BX) // c4e178ae1b or c5f8ae1b
+ VSTMXCSR (R11) // c4c178ae1b
+ VSUBPD (BX), X9, X2 // c4e1315c13 or c5b15c13
+ VSUBPD (R11), X9, X2 // c4c1315c13
+ VSUBPD X2, X9, X2 // c4e1315cd2 or c5b15cd2
+ VSUBPD X11, X9, X2 // c4c1315cd3
+ VSUBPD (BX), X9, X11 // c461315c1b or c5315c1b
+ VSUBPD (R11), X9, X11 // c441315c1b
+ VSUBPD X2, X9, X11 // c461315cda or c5315cda
+ VSUBPD X11, X9, X11 // c441315cdb
+ VSUBPD (BX), Y15, Y2 // c4e1055c13 or c5855c13
+ VSUBPD (R11), Y15, Y2 // c4c1055c13
+ VSUBPD Y2, Y15, Y2 // c4e1055cd2 or c5855cd2
+ VSUBPD Y11, Y15, Y2 // c4c1055cd3
+ VSUBPD (BX), Y15, Y11 // c461055c1b or c5055c1b
+ VSUBPD (R11), Y15, Y11 // c441055c1b
+ VSUBPD Y2, Y15, Y11 // c461055cda or c5055cda
+ VSUBPD Y11, Y15, Y11 // c441055cdb
+ VSUBPS (BX), X9, X2 // c4e1305c13 or c5b05c13
+ VSUBPS (R11), X9, X2 // c4c1305c13
+ VSUBPS X2, X9, X2 // c4e1305cd2 or c5b05cd2
+ VSUBPS X11, X9, X2 // c4c1305cd3
+ VSUBPS (BX), X9, X11 // c461305c1b or c5305c1b
+ VSUBPS (R11), X9, X11 // c441305c1b
+ VSUBPS X2, X9, X11 // c461305cda or c5305cda
+ VSUBPS X11, X9, X11 // c441305cdb
+ VSUBPS (BX), Y15, Y2 // c4e1045c13 or c5845c13
+ VSUBPS (R11), Y15, Y2 // c4c1045c13
+ VSUBPS Y2, Y15, Y2 // c4e1045cd2 or c5845cd2
+ VSUBPS Y11, Y15, Y2 // c4c1045cd3
+ VSUBPS (BX), Y15, Y11 // c461045c1b or c5045c1b
+ VSUBPS (R11), Y15, Y11 // c441045c1b
+ VSUBPS Y2, Y15, Y11 // c461045cda or c5045cda
+ VSUBPS Y11, Y15, Y11 // c441045cdb
+ VSUBSD (BX), X9, X2 // c4e1335c13 or c5b35c13
+ VSUBSD (R11), X9, X2 // c4c1335c13
+ VSUBSD X2, X9, X2 // c4e1335cd2 or c5b35cd2
+ VSUBSD X11, X9, X2 // c4c1335cd3
+ VSUBSD (BX), X9, X11 // c461335c1b or c5335c1b
+ VSUBSD (R11), X9, X11 // c441335c1b
+ VSUBSD X2, X9, X11 // c461335cda or c5335cda
+ VSUBSD X11, X9, X11 // c441335cdb
+ VSUBSS (BX), X9, X2 // c4e1325c13 or c5b25c13
+ VSUBSS (R11), X9, X2 // c4c1325c13
+ VSUBSS X2, X9, X2 // c4e1325cd2 or c5b25cd2
+ VSUBSS X11, X9, X2 // c4c1325cd3
+ VSUBSS (BX), X9, X11 // c461325c1b or c5325c1b
+ VSUBSS (R11), X9, X11 // c441325c1b
+ VSUBSS X2, X9, X11 // c461325cda or c5325cda
+ VSUBSS X11, X9, X11 // c441325cdb
+ VTESTPD (BX), X2 // c4e2790f13
+ VTESTPD (R11), X2 // c4c2790f13
+ VTESTPD X2, X2 // c4e2790fd2
+ VTESTPD X11, X2 // c4c2790fd3
+ VTESTPD (BX), X11 // c462790f1b
+ VTESTPD (R11), X11 // c442790f1b
+ VTESTPD X2, X11 // c462790fda
+ VTESTPD X11, X11 // c442790fdb
+ VTESTPD (BX), Y2 // c4e27d0f13
+ VTESTPD (R11), Y2 // c4c27d0f13
+ VTESTPD Y2, Y2 // c4e27d0fd2
+ VTESTPD Y11, Y2 // c4c27d0fd3
+ VTESTPD (BX), Y11 // c4627d0f1b
+ VTESTPD (R11), Y11 // c4427d0f1b
+ VTESTPD Y2, Y11 // c4627d0fda
+ VTESTPD Y11, Y11 // c4427d0fdb
+ VTESTPS (BX), X2 // c4e2790e13
+ VTESTPS (R11), X2 // c4c2790e13
+ VTESTPS X2, X2 // c4e2790ed2
+ VTESTPS X11, X2 // c4c2790ed3
+ VTESTPS (BX), X11 // c462790e1b
+ VTESTPS (R11), X11 // c442790e1b
+ VTESTPS X2, X11 // c462790eda
+ VTESTPS X11, X11 // c442790edb
+ VTESTPS (BX), Y2 // c4e27d0e13
+ VTESTPS (R11), Y2 // c4c27d0e13
+ VTESTPS Y2, Y2 // c4e27d0ed2
+ VTESTPS Y11, Y2 // c4c27d0ed3
+ VTESTPS (BX), Y11 // c4627d0e1b
+ VTESTPS (R11), Y11 // c4427d0e1b
+ VTESTPS Y2, Y11 // c4627d0eda
+ VTESTPS Y11, Y11 // c4427d0edb
+ VUCOMISD (BX), X2 // c4e1792e13 or c5f92e13
+ VUCOMISD (R11), X2 // c4c1792e13
+ VUCOMISD X2, X2 // c4e1792ed2 or c5f92ed2
+ VUCOMISD X11, X2 // c4c1792ed3
+ VUCOMISD (BX), X11 // c461792e1b or c5792e1b
+ VUCOMISD (R11), X11 // c441792e1b
+ VUCOMISD X2, X11 // c461792eda or c5792eda
+ VUCOMISD X11, X11 // c441792edb
+ VUCOMISS (BX), X2 // c4e1782e13 or c5f82e13
+ VUCOMISS (R11), X2 // c4c1782e13
+ VUCOMISS X2, X2 // c4e1782ed2 or c5f82ed2
+ VUCOMISS X11, X2 // c4c1782ed3
+ VUCOMISS (BX), X11 // c461782e1b or c5782e1b
+ VUCOMISS (R11), X11 // c441782e1b
+ VUCOMISS X2, X11 // c461782eda or c5782eda
+ VUCOMISS X11, X11 // c441782edb
+ VUNPCKHPD (BX), X9, X2 // c4e1311513 or c5b11513
+ VUNPCKHPD (R11), X9, X2 // c4c1311513
+ VUNPCKHPD X2, X9, X2 // c4e13115d2 or c5b115d2
+ VUNPCKHPD X11, X9, X2 // c4c13115d3
+ VUNPCKHPD (BX), X9, X11 // c46131151b or c531151b
+ VUNPCKHPD (R11), X9, X11 // c44131151b
+ VUNPCKHPD X2, X9, X11 // c4613115da or c53115da
+ VUNPCKHPD X11, X9, X11 // c4413115db
+ VUNPCKHPD (BX), Y15, Y2 // c4e1051513 or c5851513
+ VUNPCKHPD (R11), Y15, Y2 // c4c1051513
+ VUNPCKHPD Y2, Y15, Y2 // c4e10515d2 or c58515d2
+ VUNPCKHPD Y11, Y15, Y2 // c4c10515d3
+ VUNPCKHPD (BX), Y15, Y11 // c46105151b or c505151b
+ VUNPCKHPD (R11), Y15, Y11 // c44105151b
+ VUNPCKHPD Y2, Y15, Y11 // c4610515da or c50515da
+ VUNPCKHPD Y11, Y15, Y11 // c4410515db
+ VUNPCKHPS (BX), X9, X2 // c4e1301513 or c5b01513
+ VUNPCKHPS (R11), X9, X2 // c4c1301513
+ VUNPCKHPS X2, X9, X2 // c4e13015d2 or c5b015d2
+ VUNPCKHPS X11, X9, X2 // c4c13015d3
+ VUNPCKHPS (BX), X9, X11 // c46130151b or c530151b
+ VUNPCKHPS (R11), X9, X11 // c44130151b
+ VUNPCKHPS X2, X9, X11 // c4613015da or c53015da
+ VUNPCKHPS X11, X9, X11 // c4413015db
+ VUNPCKHPS (BX), Y15, Y2 // c4e1041513 or c5841513
+ VUNPCKHPS (R11), Y15, Y2 // c4c1041513
+ VUNPCKHPS Y2, Y15, Y2 // c4e10415d2 or c58415d2
+ VUNPCKHPS Y11, Y15, Y2 // c4c10415d3
+ VUNPCKHPS (BX), Y15, Y11 // c46104151b or c504151b
+ VUNPCKHPS (R11), Y15, Y11 // c44104151b
+ VUNPCKHPS Y2, Y15, Y11 // c4610415da or c50415da
+ VUNPCKHPS Y11, Y15, Y11 // c4410415db
+ VUNPCKLPD (BX), X9, X2 // c4e1311413 or c5b11413
+ VUNPCKLPD (R11), X9, X2 // c4c1311413
+ VUNPCKLPD X2, X9, X2 // c4e13114d2 or c5b114d2
+ VUNPCKLPD X11, X9, X2 // c4c13114d3
+ VUNPCKLPD (BX), X9, X11 // c46131141b or c531141b
+ VUNPCKLPD (R11), X9, X11 // c44131141b
+ VUNPCKLPD X2, X9, X11 // c4613114da or c53114da
+ VUNPCKLPD X11, X9, X11 // c4413114db
+ VUNPCKLPD (BX), Y15, Y2 // c4e1051413 or c5851413
+ VUNPCKLPD (R11), Y15, Y2 // c4c1051413
+ VUNPCKLPD Y2, Y15, Y2 // c4e10514d2 or c58514d2
+ VUNPCKLPD Y11, Y15, Y2 // c4c10514d3
+ VUNPCKLPD (BX), Y15, Y11 // c46105141b or c505141b
+ VUNPCKLPD (R11), Y15, Y11 // c44105141b
+ VUNPCKLPD Y2, Y15, Y11 // c4610514da or c50514da
+ VUNPCKLPD Y11, Y15, Y11 // c4410514db
+ VUNPCKLPS (BX), X9, X2 // c4e1301413 or c5b01413
+ VUNPCKLPS (R11), X9, X2 // c4c1301413
+ VUNPCKLPS X2, X9, X2 // c4e13014d2 or c5b014d2
+ VUNPCKLPS X11, X9, X2 // c4c13014d3
+ VUNPCKLPS (BX), X9, X11 // c46130141b or c530141b
+ VUNPCKLPS (R11), X9, X11 // c44130141b
+ VUNPCKLPS X2, X9, X11 // c4613014da or c53014da
+ VUNPCKLPS X11, X9, X11 // c4413014db
+ VUNPCKLPS (BX), Y15, Y2 // c4e1041413 or c5841413
+ VUNPCKLPS (R11), Y15, Y2 // c4c1041413
+ VUNPCKLPS Y2, Y15, Y2 // c4e10414d2 or c58414d2
+ VUNPCKLPS Y11, Y15, Y2 // c4c10414d3
+ VUNPCKLPS (BX), Y15, Y11 // c46104141b or c504141b
+ VUNPCKLPS (R11), Y15, Y11 // c44104141b
+ VUNPCKLPS Y2, Y15, Y11 // c4610414da or c50414da
+ VUNPCKLPS Y11, Y15, Y11 // c4410414db
+ VXORPD (BX), X9, X2 // c4e1315713 or c5b15713
+ VXORPD (R11), X9, X2 // c4c1315713
+ VXORPD X2, X9, X2 // c4e13157d2 or c5b157d2
+ VXORPD X11, X9, X2 // c4c13157d3
+ VXORPD (BX), X9, X11 // c46131571b or c531571b
+ VXORPD (R11), X9, X11 // c44131571b
+ VXORPD X2, X9, X11 // c4613157da or c53157da
+ VXORPD X11, X9, X11 // c4413157db
+ VXORPD (BX), Y15, Y2 // c4e1055713 or c5855713
+ VXORPD (R11), Y15, Y2 // c4c1055713
+ VXORPD Y2, Y15, Y2 // c4e10557d2 or c58557d2
+ VXORPD Y11, Y15, Y2 // c4c10557d3
+ VXORPD (BX), Y15, Y11 // c46105571b or c505571b
+ VXORPD (R11), Y15, Y11 // c44105571b
+ VXORPD Y2, Y15, Y11 // c4610557da or c50557da
+ VXORPD Y11, Y15, Y11 // c4410557db
+ VXORPS (BX), X9, X2 // c4e1305713 or c5b05713
+ VXORPS (R11), X9, X2 // c4c1305713
+ VXORPS X2, X9, X2 // c4e13057d2 or c5b057d2
+ VXORPS X11, X9, X2 // c4c13057d3
+ VXORPS (BX), X9, X11 // c46130571b or c530571b
+ VXORPS (R11), X9, X11 // c44130571b
+ VXORPS X2, X9, X11 // c4613057da or c53057da
+ VXORPS X11, X9, X11 // c4413057db
+ VXORPS (BX), Y15, Y2 // c4e1045713 or c5845713
+ VXORPS (R11), Y15, Y2 // c4c1045713
+ VXORPS Y2, Y15, Y2 // c4e10457d2 or c58457d2
+ VXORPS Y11, Y15, Y2 // c4c10457d3
+ VXORPS (BX), Y15, Y11 // c46104571b or c504571b
+ VXORPS (R11), Y15, Y11 // c44104571b
+ VXORPS Y2, Y15, Y11 // c4610457da or c50457da
+ VXORPS Y11, Y15, Y11 // c4410457db
+ VZEROALL // c4e17c77 or c5fc77
+ VZEROUPPER // c4e17877 or c5f877
+ WBINVD // 0f09
+ WRFSBASEL DX // f30faed2
+ WRFSBASEL R11 // f3410faed3
+ WRGSBASEL DX // f30faeda
+ WRGSBASEL R11 // f3410faedb
+ WRFSBASEQ DX // f3480faed2
+ WRFSBASEQ R11 // f3490faed3
+ WRGSBASEQ DX // f3480faeda
+ WRGSBASEQ R11 // f3490faedb
+ WRMSR // 0f30
+ WRPKRU // 0f01ef
+ XABORT $7 // c6f807
+ XADDW DX, (BX) // 660fc113
+ XADDW R11, (BX) // 66440fc11b
+ XADDW DX, (R11) // 66410fc113
+ XADDW R11, (R11) // 66450fc11b
+ XADDW DX, DX // 660fc1d2
+ XADDW R11, DX // 66440fc1da
+ XADDW DX, R11 // 66410fc1d3
+ XADDW R11, R11 // 66450fc1db
+ XADDL DX, (BX) // 0fc113
+ XADDL R11, (BX) // 440fc11b
+ XADDL DX, (R11) // 410fc113
+ XADDL R11, (R11) // 450fc11b
+ XADDL DX, DX // 0fc1d2
+ XADDL R11, DX // 440fc1da
+ XADDL DX, R11 // 410fc1d3
+ XADDL R11, R11 // 450fc1db
+ XADDQ DX, (BX) // 480fc113
+ XADDQ R11, (BX) // 4c0fc11b
+ XADDQ DX, (R11) // 490fc113
+ XADDQ R11, (R11) // 4d0fc11b
+ XADDQ DX, DX // 480fc1d2
+ XADDQ R11, DX // 4c0fc1da
+ XADDQ DX, R11 // 490fc1d3
+ XADDQ R11, R11 // 4d0fc1db
+ XADDB DL, (BX) // 0fc013
+ XADDB R11, (BX) // 440fc01b
+ XADDB DL, (R11) // 410fc013
+ XADDB R11, (R11) // 450fc01b
+ XADDB DL, DL // 0fc0d2
+ XADDB R11, DL // 440fc0da
+ XADDB DL, R11 // 410fc0d3
+ XADDB R11, R11 // 450fc0db
+ //TODO: XBEGIN .+$0x1122 // 66c7f82211
+ //TODO: XBEGIN .+$0x11223344 // c7f844332211
+ XCHGW DX, (BX) // 668713
+ XCHGW R11, (BX) // 6644871b
+ XCHGW DX, (R11) // 66418713
+ XCHGW R11, (R11) // 6645871b
+ XCHGW DX, DX // 6687d2
+ XCHGW R11, DX // 664487da
+ XCHGW DX, R11 // 664187d3
+ XCHGW R11, R11 // 664587db
+ XCHGL DX, (BX) // 8713
+ XCHGL R11, (BX) // 44871b
+ XCHGL DX, (R11) // 418713
+ XCHGL R11, (R11) // 45871b
+ XCHGL DX, DX // 87d2
+ XCHGL R11, DX // 4487da
+ XCHGL DX, R11 // 4187d3
+ XCHGL R11, R11 // 4587db
+ XCHGQ DX, (BX) // 488713
+ XCHGQ R11, (BX) // 4c871b
+ XCHGQ DX, (R11) // 498713
+ XCHGQ R11, (R11) // 4d871b
+ XCHGQ DX, DX // 4887d2
+ XCHGQ R11, DX // 4c87da
+ XCHGQ DX, R11 // 4987d3
+ XCHGQ R11, R11 // 4d87db
+ XCHGB DL, (BX) // 8613
+ XCHGB R11, (BX) // 44861b
+ XCHGB DL, (R11) // 418613
+ XCHGB R11, (R11) // 45861b
+ XCHGB DL, DL // 86d2
+ XCHGB R11, DL // 4486da
+ XCHGB DL, R11 // 4186d3
+ XCHGB R11, R11 // 4586db
+ XCHGW AX, DX // 6692
+ XCHGW AX, R11 // 664193
+ XCHGL AX, DX // 92
+ XCHGL AX, R11 // 4193
+ XCHGQ AX, DX // 4892
+ XCHGQ AX, R11 // 4993
+ XEND // 0f01d5
+ XGETBV // 0f01d0
+ XLAT // d7
+ XORB $7, AL // 3407
+ XORW $61731, AX // 663523f1
+ XORL $4045620583, AX // 35674523f1
+ XORQ $-249346713, AX // 4835674523f1
+ XORW $61731, (BX) // 66813323f1
+ XORW $61731, (R11) // 6641813323f1
+ XORW $61731, DX // 6681f223f1
+ XORW $61731, R11 // 664181f323f1
+ XORW $7, (BX) // 66833307
+ XORW $7, (R11) // 6641833307
+ XORW $7, DX // 6683f207
+ XORW $7, R11 // 664183f307
+ XORW DX, (BX) // 663113
+ XORW R11, (BX) // 6644311b
+ XORW DX, (R11) // 66413113
+ XORW R11, (R11) // 6645311b
+ XORW DX, DX // 6631d2 or 6633d2
+ XORW R11, DX // 664431da or 664133d3
+ XORW DX, R11 // 664131d3 or 664433da
+ XORW R11, R11 // 664531db or 664533db
+ XORL $4045620583, (BX) // 8133674523f1
+ XORL $4045620583, (R11) // 418133674523f1
+ XORL $4045620583, DX // 81f2674523f1
+ XORL $4045620583, R11 // 4181f3674523f1
+ XORL $7, (BX) // 833307
+ XORL $7, (R11) // 41833307
+ XORL $7, DX // 83f207
+ XORL $7, R11 // 4183f307
+ XORL DX, (BX) // 3113
+ XORL R11, (BX) // 44311b
+ XORL DX, (R11) // 413113
+ XORL R11, (R11) // 45311b
+ XORL DX, DX // 31d2 or 33d2
+ XORL R11, DX // 4431da or 4133d3
+ XORL DX, R11 // 4131d3 or 4433da
+ XORL R11, R11 // 4531db or 4533db
+ XORQ $-249346713, (BX) // 488133674523f1
+ XORQ $-249346713, (R11) // 498133674523f1
+ XORQ $-249346713, DX // 4881f2674523f1
+ XORQ $-249346713, R11 // 4981f3674523f1
+ XORQ $7, (BX) // 48833307
+ XORQ $7, (R11) // 49833307
+ XORQ $7, DX // 4883f207
+ XORQ $7, R11 // 4983f307
+ XORQ DX, (BX) // 483113
+ XORQ R11, (BX) // 4c311b
+ XORQ DX, (R11) // 493113
+ XORQ R11, (R11) // 4d311b
+ XORQ DX, DX // 4831d2 or 4833d2
+ XORQ R11, DX // 4c31da or 4933d3
+ XORQ DX, R11 // 4931d3 or 4c33da
+ XORQ R11, R11 // 4d31db or 4d33db
+ XORB $7, (BX) // 803307
+ XORB $7, (R11) // 41803307
+ XORB $7, DL // 80f207
+ XORB $7, R11 // 4180f307
+ XORB DL, (BX) // 3013
+ XORB R11, (BX) // 44301b
+ XORB DL, (R11) // 413013
+ XORB R11, (R11) // 45301b
+ XORB DL, DL // 30d2 or 32d2
+ XORB R11, DL // 4430da or 4132d3
+ XORB DL, R11 // 4130d3 or 4432da
+ XORB R11, R11 // 4530db or 4532db
+ XORW (BX), DX // 663313
+ XORW (R11), DX // 66413313
+ XORW (BX), R11 // 6644331b
+ XORW (R11), R11 // 6645331b
+ XORL (BX), DX // 3313
+ XORL (R11), DX // 413313
+ XORL (BX), R11 // 44331b
+ XORL (R11), R11 // 45331b
+ XORQ (BX), DX // 483313
+ XORQ (R11), DX // 493313
+ XORQ (BX), R11 // 4c331b
+ XORQ (R11), R11 // 4d331b
+ XORB (BX), DL // 3213
+ XORB (R11), DL // 413213
+ XORB (BX), R11 // 44321b
+ XORB (R11), R11 // 45321b
+ XORPD (BX), X2 // 660f5713
+ XORPD (R11), X2 // 66410f5713
+ XORPD X2, X2 // 660f57d2
+ XORPD X11, X2 // 66410f57d3
+ XORPD (BX), X11 // 66440f571b
+ XORPD (R11), X11 // 66450f571b
+ XORPD X2, X11 // 66440f57da
+ XORPD X11, X11 // 66450f57db
+ XORPS (BX), X2 // 0f5713
+ XORPS (R11), X2 // 410f5713
+ XORPS X2, X2 // 0f57d2
+ XORPS X11, X2 // 410f57d3
+ XORPS (BX), X11 // 440f571b
+ XORPS (R11), X11 // 450f571b
+ XORPS X2, X11 // 440f57da
+ XORPS X11, X11 // 450f57db
+ XRSTOR (BX) // 0fae2b
+ XRSTOR (R11) // 410fae2b
+ XRSTOR64 (BX) // 480fae2b
+ XRSTOR64 (R11) // 490fae2b
+ XRSTORS (BX) // 0fc71b
+ XRSTORS (R11) // 410fc71b
+ XRSTORS64 (BX) // 480fc71b
+ XRSTORS64 (R11) // 490fc71b
+ XSAVE (BX) // 0fae23
+ XSAVE (R11) // 410fae23
+ XSAVE64 (BX) // 480fae23
+ XSAVE64 (R11) // 490fae23
+ XSAVEC (BX) // 0fc723
+ XSAVEC (R11) // 410fc723
+ XSAVEC64 (BX) // 480fc723
+ XSAVEC64 (R11) // 490fc723
+ XSAVEOPT (BX) // 0fae33
+ XSAVEOPT (R11) // 410fae33
+ XSAVEOPT64 (BX) // 480fae33
+ XSAVEOPT64 (R11) // 490fae33
+ XSAVES (BX) // 0fc72b
+ XSAVES (R11) // 410fc72b
+ XSAVES64 (BX) // 480fc72b
+ XSAVES64 (R11) // 490fc72b
+ XSETBV // 0f01d1
+ XTEST // 0f01d6
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/amd64enc_extra.s b/src/cmd/asm/internal/asm/testdata/amd64enc_extra.s
new file mode 100644
index 0000000..a7d9bda
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/amd64enc_extra.s
@@ -0,0 +1,1059 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This input extends auto-generated amd64enc.s test suite
+// with manually added tests.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT asmtest(SB),DUPOK|NOSPLIT,$0
+ // AVX2GATHER: basic combinations.
+ VPGATHERDQ Y2, (BP)(X7*2), Y1 // c4e2ed904c7d00
+ VPGATHERDQ X12, (R13)(X14*2), X11 // c40299905c7500
+ VPGATHERDQ Y12, (R13)(X14*2), Y11 // c4029d905c7500
+ VPGATHERDQ Y0, 8(X4*1), Y6 // c4e2fd90342508000000
+ VPGATHERDQ Y0, -8(X4*1), Y6 // c4e2fd903425f8ffffff
+ VPGATHERDQ Y0, 0(X4*1), Y6 // c4e2fd90342500000000
+ VPGATHERDQ Y0, 664(X4*1), Y6 // c4e2fd90342598020000
+ VPGATHERDQ Y0, 8(X4*8), Y6 // c4e2fd9034e508000000
+ VPGATHERDQ Y0, -8(X4*8), Y6 // c4e2fd9034e5f8ffffff
+ VPGATHERDQ Y0, 0(X4*8), Y6 // c4e2fd9034e500000000
+ VPGATHERDQ Y0, 664(X4*8), Y6 // c4e2fd9034e598020000
+ VPGATHERDQ Y0, 8(X14*1), Y6 // c4a2fd90343508000000
+ VPGATHERDQ Y0, -8(X14*1), Y6 // c4a2fd903435f8ffffff
+ VPGATHERDQ Y0, 0(X14*1), Y6 // c4a2fd90343500000000
+ VPGATHERDQ Y0, 664(X14*1), Y6 // c4a2fd90343598020000
+ VPGATHERDQ Y0, 8(X14*8), Y6 // c4a2fd9034f508000000
+ VPGATHERDQ Y0, -8(X14*8), Y6 // c4a2fd9034f5f8ffffff
+ VPGATHERDQ Y0, 0(X14*8), Y6 // c4a2fd9034f500000000
+ VPGATHERDQ Y0, 664(X14*8), Y6 // c4a2fd9034f598020000
+ VPGATHERDQ X2, (BP)(X7*2), X1 // c4e2e9904c7d00
+ VPGATHERDQ Y2, (BP)(X7*2), Y1 // c4e2ed904c7d00
+ VPGATHERDQ X12, (R13)(X14*2), X11 // c40299905c7500
+ VPGATHERDQ Y12, (R13)(X14*2), Y11 // c4029d905c7500
+ VPGATHERDQ Y0, 8(X4*1), Y6 // c4e2fd90342508000000
+ VPGATHERDQ Y0, -8(X4*1), Y6 // c4e2fd903425f8ffffff
+ VPGATHERDQ Y0, 0(X4*1), Y6 // c4e2fd90342500000000
+ VPGATHERDQ Y0, 664(X4*1), Y6 // c4e2fd90342598020000
+ VPGATHERDQ Y0, 8(X4*8), Y6 // c4e2fd9034e508000000
+ VPGATHERDQ Y0, -8(X4*8), Y6 // c4e2fd9034e5f8ffffff
+ VPGATHERDQ Y0, 0(X4*8), Y6 // c4e2fd9034e500000000
+ VPGATHERDQ Y0, 664(X4*8), Y6 // c4e2fd9034e598020000
+ VPGATHERDQ Y0, 8(X14*1), Y6 // c4a2fd90343508000000
+ VPGATHERDQ Y0, -8(X14*1), Y6 // c4a2fd903435f8ffffff
+ VPGATHERDQ Y0, 0(X14*1), Y6 // c4a2fd90343500000000
+ VPGATHERDQ Y0, 664(X14*1), Y6 // c4a2fd90343598020000
+ VPGATHERDQ Y0, 8(X14*8), Y6 // c4a2fd9034f508000000
+ VPGATHERDQ Y0, -8(X14*8), Y6 // c4a2fd9034f5f8ffffff
+ VPGATHERDQ Y0, 0(X14*8), Y6 // c4a2fd9034f500000000
+ VPGATHERDQ Y0, 664(X14*8), Y6 // c4a2fd9034f598020000
+ VPGATHERQQ X2, (BP)(X7*2), X1 // c4e2e9914c7d00
+ VPGATHERQQ Y2, (BP)(Y7*2), Y1 // c4e2ed914c7d00
+ VPGATHERQQ X12, (R13)(X14*2), X11 // c40299915c7500
+ VPGATHERQQ Y12, (R13)(Y14*2), Y11 // c4029d915c7500
+ VPGATHERQQ X2, (BP)(X7*2), X1 // c4e2e9914c7d00
+ VPGATHERQQ Y2, (BP)(Y7*2), Y1 // c4e2ed914c7d00
+ VPGATHERQQ X12, (R13)(X14*2), X11 // c40299915c7500
+ VPGATHERQQ Y12, (R13)(Y14*2), Y11 // c4029d915c7500
+ VGATHERDPD X2, (BP)(X7*2), X1 // c4e2e9924c7d00
+ VGATHERDPD Y2, (BP)(X7*2), Y1 // c4e2ed924c7d00
+ VGATHERDPD X12, (R13)(X14*2), X11 // c40299925c7500
+ VGATHERDPD Y12, (R13)(X14*2), Y11 // c4029d925c7500
+ VGATHERDPD Y0, 8(X4*1), Y6 // c4e2fd92342508000000
+ VGATHERDPD Y0, -8(X4*1), Y6 // c4e2fd923425f8ffffff
+ VGATHERDPD Y0, 0(X4*1), Y6 // c4e2fd92342500000000
+ VGATHERDPD Y0, 664(X4*1), Y6 // c4e2fd92342598020000
+ VGATHERDPD Y0, 8(X4*8), Y6 // c4e2fd9234e508000000
+ VGATHERDPD Y0, -8(X4*8), Y6 // c4e2fd9234e5f8ffffff
+ VGATHERDPD Y0, 0(X4*8), Y6 // c4e2fd9234e500000000
+ VGATHERDPD Y0, 664(X4*8), Y6 // c4e2fd9234e598020000
+ VGATHERDPD Y0, 8(X14*1), Y6 // c4a2fd92343508000000
+ VGATHERDPD Y0, -8(X14*1), Y6 // c4a2fd923435f8ffffff
+ VGATHERDPD Y0, 0(X14*1), Y6 // c4a2fd92343500000000
+ VGATHERDPD Y0, 664(X14*1), Y6 // c4a2fd92343598020000
+ VGATHERDPD Y0, 8(X14*8), Y6 // c4a2fd9234f508000000
+ VGATHERDPD Y0, -8(X14*8), Y6 // c4a2fd9234f5f8ffffff
+ VGATHERDPD Y0, 0(X14*8), Y6 // c4a2fd9234f500000000
+ VGATHERDPD Y0, 664(X14*8), Y6 // c4a2fd9234f598020000
+ VGATHERDPD X2, (BP)(X7*2), X1 // c4e2e9924c7d00
+ VGATHERDPD Y2, (BP)(X7*2), Y1 // c4e2ed924c7d00
+ VGATHERDPD X12, (R13)(X14*2), X11 // c40299925c7500
+ VGATHERDPD Y12, (R13)(X14*2), Y11 // c4029d925c7500
+ VGATHERDPD Y0, 8(X4*1), Y6 // c4e2fd92342508000000
+ VGATHERDPD Y0, -8(X4*1), Y6 // c4e2fd923425f8ffffff
+ VGATHERDPD Y0, 0(X4*1), Y6 // c4e2fd92342500000000
+ VGATHERDPD Y0, 664(X4*1), Y6 // c4e2fd92342598020000
+ VGATHERDPD Y0, 8(X4*8), Y6 // c4e2fd9234e508000000
+ VGATHERDPD Y0, -8(X4*8), Y6 // c4e2fd9234e5f8ffffff
+ VGATHERDPD Y0, 0(X4*8), Y6 // c4e2fd9234e500000000
+ VGATHERDPD Y0, 664(X4*8), Y6 // c4e2fd9234e598020000
+ VGATHERDPD Y0, 8(X14*1), Y6 // c4a2fd92343508000000
+ VGATHERDPD Y0, -8(X14*1), Y6 // c4a2fd923435f8ffffff
+ VGATHERDPD Y0, 0(X14*1), Y6 // c4a2fd92343500000000
+ VGATHERDPD Y0, 664(X14*1), Y6 // c4a2fd92343598020000
+ VGATHERDPD Y0, 8(X14*8), Y6 // c4a2fd9234f508000000
+ VGATHERDPD Y0, -8(X14*8), Y6 // c4a2fd9234f5f8ffffff
+ VGATHERDPD Y0, 0(X14*8), Y6 // c4a2fd9234f500000000
+ VGATHERDPD Y0, 664(X14*8), Y6 // c4a2fd9234f598020000
+ VGATHERQPD X2, (BP)(X7*2), X1 // c4e2e9934c7d00
+ VGATHERQPD Y2, (BP)(Y7*2), Y1 // c4e2ed934c7d00
+ VGATHERQPD X12, (R13)(X14*2), X11 // c40299935c7500
+ VGATHERQPD Y12, (R13)(Y14*2), Y11 // c4029d935c7500
+ VGATHERQPD X2, (BP)(X7*2), X1 // c4e2e9934c7d00
+ VGATHERQPD Y2, (BP)(Y7*2), Y1 // c4e2ed934c7d00
+ VGATHERQPD X12, (R13)(X14*2), X11 // c40299935c7500
+ VGATHERQPD Y12, (R13)(Y14*2), Y11 // c4029d935c7500
+ VGATHERDPS X2, (BP)(X7*2), X1 // c4e269924c7d00
+ VGATHERDPS Y2, (BP)(Y7*2), Y1 // c4e26d924c7d00
+ VGATHERDPS X12, (R13)(X14*2), X11 // c40219925c7500
+ VGATHERDPS Y12, (R13)(Y14*2), Y11 // c4021d925c7500
+ VGATHERDPS X3, 8(X4*1), X6 // c4e26192342508000000
+ VGATHERDPS X3, -8(X4*1), X6 // c4e261923425f8ffffff
+ VGATHERDPS X3, 0(X4*1), X6 // c4e26192342500000000
+ VGATHERDPS X3, 664(X4*1), X6 // c4e26192342598020000
+ VGATHERDPS X3, 8(X4*8), X6 // c4e2619234e508000000
+ VGATHERDPS X3, -8(X4*8), X6 // c4e2619234e5f8ffffff
+ VGATHERDPS X3, 0(X4*8), X6 // c4e2619234e500000000
+ VGATHERDPS X3, 664(X4*8), X6 // c4e2619234e598020000
+ VGATHERDPS X3, 8(X14*1), X6 // c4a26192343508000000
+ VGATHERDPS X3, -8(X14*1), X6 // c4a261923435f8ffffff
+ VGATHERDPS X3, 0(X14*1), X6 // c4a26192343500000000
+ VGATHERDPS X3, 664(X14*1), X6 // c4a26192343598020000
+ VGATHERDPS X3, 8(X14*8), X6 // c4a2619234f508000000
+ VGATHERDPS X3, -8(X14*8), X6 // c4a2619234f5f8ffffff
+ VGATHERDPS X3, 0(X14*8), X6 // c4a2619234f500000000
+ VGATHERDPS X3, 664(X14*8), X6 // c4a2619234f598020000
+ VGATHERDPS X2, (BP)(X7*2), X1 // c4e269924c7d00
+ VGATHERDPS Y2, (BP)(Y7*2), Y1 // c4e26d924c7d00
+ VGATHERDPS X12, (R13)(X14*2), X11 // c40219925c7500
+ VGATHERDPS Y12, (R13)(Y14*2), Y11 // c4021d925c7500
+ VGATHERDPS X5, 8(X4*1), X6 // c4e25192342508000000
+ VGATHERDPS X3, -8(X4*1), X6 // c4e261923425f8ffffff
+ VGATHERDPS X3, 0(X4*1), X6 // c4e26192342500000000
+ VGATHERDPS X3, 664(X4*1), X6 // c4e26192342598020000
+ VGATHERDPS X3, 8(X4*8), X6 // c4e2619234e508000000
+ VGATHERDPS X3, -8(X4*8), X6 // c4e2619234e5f8ffffff
+ VGATHERDPS X3, 0(X4*8), X6 // c4e2619234e500000000
+ VGATHERDPS X3, 664(X4*8), X6 // c4e2619234e598020000
+ VGATHERDPS X3, 8(X14*1), X6 // c4a26192343508000000
+ VGATHERDPS X3, -8(X14*1), X6 // c4a261923435f8ffffff
+ VGATHERDPS X3, 0(X14*1), X6 // c4a26192343500000000
+ VGATHERDPS X3, 664(X14*1), X6 // c4a26192343598020000
+ VGATHERDPS X3, 8(X14*8), X6 // c4a2619234f508000000
+ VGATHERDPS X3, -8(X14*8), X6 // c4a2619234f5f8ffffff
+ VGATHERDPS X3, 0(X14*8), X6 // c4a2619234f500000000
+ VGATHERDPS X3, 664(X14*8), X6 // c4a2619234f598020000
+ VGATHERQPS X2, (BP)(X7*2), X1 // c4e269934c7d00
+ VGATHERQPS X2, (BP)(Y7*2), X1 // c4e26d934c7d00
+ VGATHERQPS X12, (R13)(X14*2), X11 // c40219935c7500
+ VGATHERQPS X12, (R13)(Y14*2), X11 // c4021d935c7500
+ VGATHERQPS X2, (BP)(X7*2), X1 // c4e269934c7d00
+ VGATHERQPS X2, (BP)(Y7*2), X1 // c4e26d934c7d00
+ VGATHERQPS X12, (R13)(X14*2), X11 // c40219935c7500
+ VGATHERQPS X12, (R13)(Y14*2), X11 // c4021d935c7500
+ VPGATHERDD X2, (BP)(X7*2), X1 // c4e269904c7d00
+ VPGATHERDD Y2, (BP)(Y7*2), Y1 // c4e26d904c7d00
+ VPGATHERDD X12, (R13)(X14*2), X11 // c40219905c7500
+ VPGATHERDD Y12, (R13)(Y14*2), Y11 // c4021d905c7500
+ VPGATHERDD X3, 8(X4*1), X6 // c4e26190342508000000
+ VPGATHERDD X3, -8(X4*1), X6 // c4e261903425f8ffffff
+ VPGATHERDD X3, 0(X4*1), X6 // c4e26190342500000000
+ VPGATHERDD X3, 664(X4*1), X6 // c4e26190342598020000
+ VPGATHERDD X3, 8(X4*8), X6 // c4e2619034e508000000
+ VPGATHERDD X3, -8(X4*8), X6 // c4e2619034e5f8ffffff
+ VPGATHERDD X3, 0(X4*8), X6 // c4e2619034e500000000
+ VPGATHERDD X3, 664(X4*8), X6 // c4e2619034e598020000
+ VPGATHERDD X3, 8(X14*1), X6 // c4a26190343508000000
+ VPGATHERDD X3, -8(X14*1), X6 // c4a261903435f8ffffff
+ VPGATHERDD X3, 0(X14*1), X6 // c4a26190343500000000
+ VPGATHERDD X3, 664(X14*1), X6 // c4a26190343598020000
+ VPGATHERDD X3, 8(X14*8), X6 // c4a2619034f508000000
+ VPGATHERDD X3, -8(X14*8), X6 // c4a2619034f5f8ffffff
+ VPGATHERDD X3, 0(X14*8), X6 // c4a2619034f500000000
+ VPGATHERDD X3, 664(X14*8), X6 // c4a2619034f598020000
+ VPGATHERDD X2, (BP)(X7*2), X1 // c4e269904c7d00
+ VPGATHERDD Y2, (BP)(Y7*2), Y1 // c4e26d904c7d00
+ VPGATHERDD X12, (R13)(X14*2), X11 // c40219905c7500
+ VPGATHERDD Y12, (R13)(Y14*2), Y11 // c4021d905c7500
+ VPGATHERDD X3, 8(X4*1), X6 // c4e26190342508000000
+ VPGATHERDD X3, -8(X4*1), X6 // c4e261903425f8ffffff
+ VPGATHERDD X3, 0(X4*1), X6 // c4e26190342500000000
+ VPGATHERDD X3, 664(X4*1), X6 // c4e26190342598020000
+ VPGATHERDD X3, 8(X4*8), X6 // c4e2619034e508000000
+ VPGATHERDD X3, -8(X4*8), X6 // c4e2619034e5f8ffffff
+ VPGATHERDD X3, 0(X4*8), X6 // c4e2619034e500000000
+ VPGATHERDD X3, 664(X4*8), X6 // c4e2619034e598020000
+ VPGATHERDD X3, 8(X14*1), X6 // c4a26190343508000000
+ VPGATHERDD X3, -8(X14*1), X6 // c4a261903435f8ffffff
+ VPGATHERDD X3, 0(X14*1), X6 // c4a26190343500000000
+ VPGATHERDD X3, 664(X14*1), X6 // c4a26190343598020000
+ VPGATHERDD X3, 8(X14*8), X6 // c4a2619034f508000000
+ VPGATHERDD X3, -8(X14*8), X6 // c4a2619034f5f8ffffff
+ VPGATHERDD X3, 0(X14*8), X6 // c4a2619034f500000000
+ VPGATHERDD X3, 664(X14*8), X6 // c4a2619034f598020000
+ VPGATHERQD X2, (BP)(X7*2), X1 // c4e269914c7d00
+ VPGATHERQD X2, (BP)(Y7*2), X1 // c4e26d914c7d00
+ VPGATHERQD X12, (R13)(X14*2), X11 // c40219915c7500
+ VPGATHERQD X12, (R13)(Y14*2), X11 // c4021d915c7500
+ VPGATHERQD X2, (BP)(X7*2), X1 // c4e269914c7d00
+ VPGATHERQD X2, (BP)(Y7*2), X1 // c4e26d914c7d00
+ VPGATHERQD X12, (R13)(X14*2), X11 // c40219915c7500
+ VPGATHERQD X12, (R13)(Y14*2), X11 // c4021d915c7500
+ VPGATHERQQ X0, 0(X1*1), X2 // c4e2f991140d00000000
+ VPGATHERQQ Y0, 0(Y1*1), Y2 // c4e2fd91140d00000000
+ VPGATHERQQ X8, 0(X9*1), X10 // c422b991140d00000000
+ VPGATHERQQ Y8, 0(Y9*1), Y10 // c422bd91140d00000000
+ VPGATHERQQ X0, 0(X1*4), X2 // c4e2f991148d00000000
+ VPGATHERQQ Y0, 0(Y1*4), Y2 // c4e2fd91148d00000000
+ VPGATHERQQ X8, 0(X9*4), X10 // c422b991148d00000000
+ VPGATHERQQ Y8, 0(Y9*4), Y10 // c422bd91148d00000000
+ // AVX2GATHER: test SP/BP base with different displacements.
+ VPGATHERQQ X0, (SP)(X1*1), X2 // c4e2f991140c
+ VPGATHERQQ X0, 16(SP)(X1*1), X2 // c4e2f991540c10
+ VPGATHERQQ X0, 512(SP)(X1*1), X2 // c4e2f991940c00020000
+ VPGATHERQQ X0, (R12)(X1*1), X2 // c4c2f991140c
+ VPGATHERQQ X0, 16(R12)(X1*1), X2 // c4c2f991540c10
+ VPGATHERQQ X0, 512(R12)(X1*1), X2 // c4c2f991940c00020000
+ VPGATHERQQ X0, (BP)(X1*1), X2 // c4e2f991540d00
+ VPGATHERQQ X0, 16(BP)(X1*1), X2 // c4e2f991540d10
+ VPGATHERQQ X0, 512(BP)(X1*1), X2 // c4e2f991940d00020000
+ VPGATHERQQ X0, (R13)(X1*1), X2 // c4c2f991540d00
+ VPGATHERQQ X0, 16(R13)(X1*1), X2 // c4c2f991540d10
+ VPGATHERQQ X0, 512(R13)(X1*1), X2 // c4c2f991940d00020000
+ VPGATHERQQ Y0, (SP)(Y1*1), Y2 // c4e2fd91140c
+ VPGATHERQQ Y0, 16(SP)(Y1*1), Y2 // c4e2fd91540c10
+ VPGATHERQQ Y0, 512(SP)(Y1*1), Y2 // c4e2fd91940c00020000
+ VPGATHERQQ Y0, (R12)(Y1*1), Y2 // c4c2fd91140c
+ VPGATHERQQ Y0, 16(R12)(Y1*1), Y2 // c4c2fd91540c10
+ VPGATHERQQ Y0, 512(R12)(Y1*1), Y2 // c4c2fd91940c00020000
+ VPGATHERQQ Y0, (BP)(Y1*1), Y2 // c4e2fd91540d00
+ VPGATHERQQ Y0, 16(BP)(Y1*1), Y2 // c4e2fd91540d10
+ VPGATHERQQ Y0, 512(BP)(Y1*1), Y2 // c4e2fd91940d00020000
+ VPGATHERQQ Y0, (R13)(Y1*1), Y2 // c4c2fd91540d00
+ VPGATHERQQ Y0, 16(R13)(Y1*1), Y2 // c4c2fd91540d10
+ VPGATHERQQ Y0, 512(R13)(Y1*1), Y2 // c4c2fd91940d00020000
+ // Test low-8 register for /is4 "hr" operand.
+ VPBLENDVB X0, (BX), X1, X2 // c4e3714c1300
+ // <XMM0>/Yxr0 tests.
+ SHA256RNDS2 X0, (BX), X2 // 0f38cb13
+ SHA256RNDS2 X0, (R11), X2 // 410f38cb13
+ SHA256RNDS2 X0, X2, X2 // 0f38cbd2
+ SHA256RNDS2 X0, X11, X2 // 410f38cbd3
+ SHA256RNDS2 X0, (BX), X11 // 440f38cb1b
+ SHA256RNDS2 X0, (R11), X11 // 450f38cb1b
+ SHA256RNDS2 X0, X2, X11 // 440f38cbda
+ SHA256RNDS2 X0, X11, X11 // 450f38cbdb
+ // Rest SHA instructions tests.
+ SHA1MSG1 (BX), X2 // 0f38c913
+ SHA1MSG1 (R11), X2 // 410f38c913
+ SHA1MSG1 X2, X2 // 0f38c9d2
+ SHA1MSG1 X11, X2 // 410f38c9d3
+ SHA1MSG1 (BX), X11 // 440f38c91b
+ SHA1MSG1 (R11), X11 // 450f38c91b
+ SHA1MSG1 X2, X11 // 440f38c9da
+ SHA1MSG1 X11, X11 // 450f38c9db
+ SHA1MSG2 (BX), X2 // 0f38ca13
+ SHA1MSG2 (R11), X2 // 410f38ca13
+ SHA1MSG2 X2, X2 // 0f38cad2
+ SHA1MSG2 X11, X2 // 410f38cad3
+ SHA1MSG2 (BX), X11 // 440f38ca1b
+ SHA1MSG2 (R11), X11 // 450f38ca1b
+ SHA1MSG2 X2, X11 // 440f38cada
+ SHA1MSG2 X11, X11 // 450f38cadb
+ SHA1NEXTE (BX), X2 // 0f38c813
+ SHA1NEXTE (R11), X2 // 410f38c813
+ SHA1NEXTE X2, X2 // 0f38c8d2
+ SHA1NEXTE X11, X2 // 410f38c8d3
+ SHA1NEXTE (BX), X11 // 440f38c81b
+ SHA1NEXTE (R11), X11 // 450f38c81b
+ SHA1NEXTE X2, X11 // 440f38c8da
+ SHA1NEXTE X11, X11 // 450f38c8db
+ SHA1RNDS4 $0, (BX), X2 // 0f3acc1300
+ SHA1RNDS4 $0, (R11), X2 // 410f3acc1300
+ SHA1RNDS4 $1, X2, X2 // 0f3accd201
+ SHA1RNDS4 $1, X11, X2 // 410f3accd301
+ SHA1RNDS4 $2, (BX), X11 // 440f3acc1b02
+ SHA1RNDS4 $2, (R11), X11 // 450f3acc1b02
+ SHA1RNDS4 $3, X2, X11 // 440f3accda03
+ SHA1RNDS4 $3, X11, X11 // 450f3accdb03
+ SHA256MSG1 (BX), X2 // 0f38cc13
+ SHA256MSG1 (R11), X2 // 410f38cc13
+ SHA256MSG1 X2, X2 // 0f38ccd2
+ SHA256MSG1 X11, X2 // 410f38ccd3
+ SHA256MSG1 (BX), X11 // 440f38cc1b
+ SHA256MSG1 (R11), X11 // 450f38cc1b
+ SHA256MSG1 X2, X11 // 440f38ccda
+ SHA256MSG1 X11, X11 // 450f38ccdb
+ SHA256MSG2 (BX), X2 // 0f38cd13
+ SHA256MSG2 (R11), X2 // 410f38cd13
+ SHA256MSG2 X2, X2 // 0f38cdd2
+ SHA256MSG2 X11, X2 // 410f38cdd3
+ SHA256MSG2 (BX), X11 // 440f38cd1b
+ SHA256MSG2 (R11), X11 // 450f38cd1b
+ SHA256MSG2 X2, X11 // 440f38cdda
+ SHA256MSG2 X11, X11 // 450f38cddb
+ // Test VPERMQ with both uint8 and int8 immediate args
+ VPERMQ $-40, Y8, Y8 // c443fd00c0d8
+ VPERMQ $216, Y8, Y8 // c443fd00c0d8
+ // Test that VPERMPD that shares ytab list with VPERMQ continues to work too.
+ VPERMPD $-40, Y7, Y7 // c4e3fd01ffd8
+ VPERMPD $216, Y7, Y7 // c4e3fd01ffd8
+ // Check that LEAL is permitted to use overflowing offset.
+ LEAL 2400959708(BP)(R10*1), BP // 428dac15dcbc1b8f
+ LEAL 3395469782(AX)(R10*1), AX // 428d8410d6c162ca
+ // Make sure MOV CR/DR continues to work after changing its movtabs.
+ MOVQ CR0, AX // 0f20c0
+ MOVQ CR0, DX // 0f20c2
+ MOVQ CR4, DI // 0f20e7
+ MOVQ AX, CR0 // 0f22c0
+ MOVQ DX, CR0 // 0f22c2
+ MOVQ DI, CR4 // 0f22e7
+ MOVQ DR0, AX // 0f21c0
+ MOVQ DR6, DX // 0f21f2
+ MOVQ DR7, SI // 0f21fe
+ // Test other movtab entries.
+ PUSHQ GS // 0fa8
+ PUSHQ FS // 0fa0
+ POPQ FS // 0fa1
+ POPQ GS // 0fa9
+ // All instructions below semantically have unsigned operands,
+ // but previous assembler permitted negative arguments.
+ // This behavior is preserved for compatibility reasons.
+ VPSHUFD $-79, X7, X7 // c5f970ffb1
+ RORXL $-1, (AX), DX // c4e37bf010ff
+ RORXQ $-1, (AX), DX // c4e3fbf010ff
+ VPSHUFD $-1, X1, X2 // c5f970d1ff
+ VPSHUFD $-1, Y1, Y2 // c5fd70d1ff
+ VPSHUFHW $-1, X1, X2 // c5fa70d1ff
+ VPSHUFHW $-1, Y1, Y2 // c5fe70d1ff
+ VPSHUFLW $-1, X1, X2 // c5fb70d1ff
+ VPSHUFLW $-1, Y1, Y2 // c5ff70d1ff
+ VROUNDPD $-1, X1, X2 // c4e37909d1ff
+ VROUNDPS $-1, Y1, Y2 // c4e37d08d1ff
+ VPSLLD $-1, X1, X2 // c5e972f1ff
+ VPSLLD $-1, Y1, Y2 // c5ed72f1ff
+ VPSLLDQ $-1, X1, X2 // c5e973f9ff
+ VPSLLDQ $-1, Y1, Y2 // c5ed73f9ff
+ VPSLLQ $-1, X1, X2 // c5e973f1ff
+ VPSLLQ $-1, Y1, Y2 // c5ed73f1ff
+ VPSRLD $-1, X1, X2 // c5e972d1ff
+ VPSRLD $-1, Y1, Y2 // c5ed72d1ff
+ VPSRLDQ $-1, X1, X2 // c5e973d9ff
+ VPSRLDQ $-1, Y1, Y2 // c5ed73d9ff
+ VPSRLQ $-1, X1, X2 // c5e973d1ff
+ VPSRLQ $-1, Y1, Y2 // c5ed73d1ff
+ VPEXTRW $-1, X1, (AX) // c4e3791508ff
+ VPEXTRW $-1, X1, AX // c4e37915c8ff
+ VEXTRACTF128 $-1, Y1, X2 // c4e37d19caff
+ VEXTRACTI128 $-1, Y1, X2 // c4e37d39caff
+ VAESKEYGENASSIST $-1, X1, X2 // c4e379dfd1ff
+ VPCMPESTRI $-1, X1, X2 // c4e37961d1ff
+ VPCMPESTRM $-1, X1, X2 // c4e37960d1ff
+ VPCMPISTRI $-1, X1, X2 // c4e37963d1ff
+ VPCMPISTRM $-1, X1, X2 // c4e37962d1ff
+ VPERMPD $-1, Y1, Y2 // c4e3fd01d1ff
+ VPERMILPD $-1, X1, X2 // c4e37905d1ff
+ VPERMILPD $-1, Y1, Y2 // c4e37d05d1ff
+ VPERMILPS $-1, X1, X2 // c4e37904d1ff
+ VPERMILPS $-1, Y1, Y2 // c4e37d04d1ff
+ VCVTPS2PH $-1, X1, X2 // c4e3791dcaff
+ VCVTPS2PH $-1, Y1, X2 // c4e37d1dcaff
+ VPSLLW $-1, X1, X2 // c5e971f1ff
+ VPSLLW $-1, Y1, Y2 // c5ed71f1ff
+ VPSRAD $-1, X1, X2 // c5e972e1ff
+ VPSRAD $-1, Y1, Y2 // c5ed72e1ff
+ VPSRAW $-1, X1, X2 // c5e971e1ff
+ VPSRAW $-1, Y1, Y2 // c5ed71e1ff
+ VPSRLW $-1, X1, X1 // c5f171d1ff
+ VPSRLW $-1, Y1, Y2 // c5ed71d1ff
+ VEXTRACTPS $-1, X1, AX // c4e37917c8ff
+ VPEXTRB $-1, X1, AX // c4e37914c8ff
+ VPEXTRD $-1, X1, AX // c4e37916c8ff
+ VPEXTRQ $-1, X1, AX // c4e3f916c8ff
+ // EVEX: High-16 X registers.
+ VADDPD X30, X1, X0 // 6291f50858c6
+ VADDPD X2, X29, X0 // 62f1950058c2
+ VADDPD X30, X29, X0 // 6291950058c6
+ VADDPD X2, X1, X28 // 6261f50858e2
+ VADDPD X30, X1, X28 // 6201f50858e6
+ VADDPD X2, X29, X28 // 6261950058e2
+ VADDPD X30, X29, X28 // 6201950058e6
+ VADDPD X30, X11, X10 // 6211a50858d6
+ VADDPD X12, X29, X10 // 6251950058d4
+ VADDPD X30, X29, X10 // 6211950058d6
+ VADDPD X12, X11, X28 // 6241a50858e4
+ VADDPD X30, X11, X28 // 6201a50858e6
+ VADDPD X12, X29, X28 // 6241950058e4
+ VADDPD X30, X29, X28 // 6201950058e6
+ VADDPD (AX), X29, X0 // 62f195005800
+ VADDPD (AX), X1, X28 // 6261f5085820
+ VADDPD (AX), X29, X28 // 626195005820
+ VADDPD (AX), X29, X10 // 627195005810
+ VADDPD (AX), X10, X28 // 6261ad085820
+ VADDPD (CX)(AX*1), X29, X0 // 62f19500580401
+ VADDPD (CX)(AX*1), X1, X28 // 6261f508582401
+ VADDPD (CX)(AX*1), X29, X28 // 62619500582401
+ VADDPD (CX)(AX*1), X29, X10 // 62719500581401
+ VADDPD (CX)(AX*1), X10, X28 // 6261ad08582401
+ VADDPD (CX)(AX*2), X29, X0 // 62f19500580441
+ VADDPD (CX)(AX*2), X1, X28 // 6261f508582441
+ VADDPD (CX)(AX*2), X29, X28 // 62619500582441
+ VADDPD (CX)(AX*2), X29, X10 // 62719500581441
+ VADDPD (CX)(AX*2), X10, X28 // 6261ad08582441
+ // EVEX: displacement without Disp8.
+ VADDPD 15(AX), X29, X0 // 62f1950058800f000000
+ VADDPD 15(AX), X1, X28 // 6261f50858a00f000000
+ VADDPD 15(AX), X29, X28 // 6261950058a00f000000
+ VADDPD 15(AX), X29, X10 // 6271950058900f000000
+ VADDPD 15(AX), X10, X28 // 6261ad0858a00f000000
+ VADDPD 15(CX)(AX*1), X29, X0 // 62f195005884010f000000
+ VADDPD 15(CX)(AX*1), X1, X28 // 6261f50858a4010f000000
+ VADDPD 15(CX)(AX*1), X29, X28 // 6261950058a4010f000000
+ VADDPD 15(CX)(AX*1), X29, X10 // 627195005894010f000000
+ VADDPD 15(CX)(AX*1), X10, X28 // 6261ad0858a4010f000000
+ VADDPD 15(CX)(AX*2), X29, X0 // 62f195005884410f000000
+ VADDPD 15(CX)(AX*2), X1, X28 // 6261f50858a4410f000000
+ VADDPD 15(CX)(AX*2), X29, X28 // 6261950058a4410f000000
+ VADDPD 15(CX)(AX*2), X29, X10 // 627195005894410f000000
+ VADDPD 15(CX)(AX*2), X10, X28 // 6261ad0858a4410f000000
+ // EVEX: compressed displacement (Disp8).
+ VADDPD 2032(DX), X29, X0 // 62f1950058427f
+ VADDPD 2032(DX), X1, X29 // 6261f508586a7f
+ VADDPD 2032(DX), X29, X28 // 6261950058627f
+ VADDPD 2032(DX)(AX*2), X29, X0 // 62f195005844427f
+ VADDPD 2032(DX)(AX*2), X1, X29 // 6261f508586c427f
+ VADDPD 2032(DX)(AX*2), X29, X28 // 626195005864427f
+ VADDPD 4064(DX), Y0, Y29 // 6261fd28586a7f
+ VADDPD 4064(DX), Y29, Y1 // 62f19520584a7f
+ VADDPD 4064(DX), Y28, Y29 // 62619d20586a7f
+ VADDPD 4064(DX)(AX*2), Y0, Y29 // 6261fd28586c427f
+ VADDPD 4064(DX)(AX*2), Y29, Y1 // 62f19520584c427f
+ VADDPD 8128(DX), Z0, Z29 // 6261fd48586a7f
+ VADDPD 8128(DX), Z29, Z1 // 62f19540584a7f
+ VADDPD 8128(DX), Z28, Z29 // 62619d40586a7f
+ VADDPD 8128(DX)(AX*2), Z0, Z29 // 6261fd48586c427f
+ VADDPD 8128(DX)(AX*2), Z29, Z1 // 62f19540584c427f
+ // EVEX: compressed displacement that does not fit into 8bits.
+ VADDPD 2048(DX), X29, X0 // 62f19500588200080000
+ VADDPD 2048(DX), X1, X29 // 6261f50858aa00080000
+ VADDPD 2048(DX), X29, X28 // 6261950058a200080000
+ VADDPD 2048(DX)(AX*2), X29, X0 // 62f1950058844200080000
+ VADDPD 2048(DX)(AX*2), X1, X29 // 6261f50858ac4200080000
+ VADDPD 2048(DX)(AX*2), X29, X28 // 6261950058a44200080000
+ VADDPD 4096(DX), Y0, Y29 // 6261fd2858aa00100000
+ VADDPD 4096(DX), Y29, Y1 // 62f19520588a00100000
+ VADDPD 4096(DX), Y28, Y29 // 62619d2058aa00100000
+ VADDPD 4096(DX)(AX*2), Y0, Y29 // 6261fd2858ac4200100000
+ VADDPD 4096(DX)(AX*2), Y29, Y1 // 62f19520588c4200100000
+ VADDPD 8192(DX), Z0, Z29 // 6261fd4858aa00200000
+ VADDPD 8192(DX), Z29, Z1 // 62f19540588a00200000
+ VADDPD 8192(DX), Z28, Z29 // 62619d4058aa00200000
+ VADDPD 8192(DX)(AX*2), Z0, Z29 // 6261fd4858ac4200200000
+ VADDPD 8192(DX)(AX*2), Z29, Z1 // 62f19540588c4200200000
+ // EVEX: Y registers; VL=256.
+ VADDPD Y30, Y1, Y0 // 6291f52858c6
+ VADDPD Y0, Y29, Y2 // 62f1952058d0
+ VADDPD Y0, Y29, Y30 // 6261952058f0
+ VADDPD Y28, Y1, Y2 // 6291f52858d4
+ VADDPD Y28, Y1, Y30 // 6201f52858f4
+ VADDPD Y28, Y29, Y2 // 6291952058d4
+ VADDPD Y28, Y29, Y30 // 6201952058f4
+ VADDPD Y10, Y11, Y30 // 6241a52858f2
+ VADDPD Y10, Y29, Y12 // 6251952058e2
+ VADDPD Y10, Y29, Y30 // 6241952058f2
+ VADDPD Y28, Y11, Y12 // 6211a52858e4
+ VADDPD Y28, Y11, Y30 // 6201a52858f4
+ VADDPD Y28, Y29, Y12 // 6211952058e4
+ VADDPD Y28, Y29, Y30 // 6201952058f4
+ VADDPD (AX), Y29, Y0 // 62f195205800
+ VADDPD (AX), Y1, Y28 // 6261f5285820
+ VADDPD (AX), Y29, Y28 // 626195205820
+ VADDPD (AX), Y29, Y10 // 627195205810
+ VADDPD (AX), Y10, Y28 // 6261ad285820
+ VADDPD (CX)(AX*1), Y29, Y0 // 62f19520580401
+ VADDPD (CX)(AX*1), Y1, Y28 // 6261f528582401
+ VADDPD (CX)(AX*1), Y29, Y28 // 62619520582401
+ VADDPD (CX)(AX*1), Y29, Y10 // 62719520581401
+ VADDPD (CX)(AX*1), Y10, Y28 // 6261ad28582401
+ VADDPD (CX)(AX*2), Y29, Y0 // 62f19520580441
+ VADDPD (CX)(AX*2), Y1, Y28 // 6261f528582441
+ VADDPD (CX)(AX*2), Y29, Y28 // 62619520582441
+ VADDPD (CX)(AX*2), Y29, Y10 // 62719520581441
+ VADDPD (CX)(AX*2), Y10, Y28 // 6261ad28582441
+ VADDPD 15(AX), Y0, Y29 // 6261fd2858a80f000000
+ VADDPD 15(AX), Y28, Y1 // 62f19d2058880f000000
+ VADDPD 15(AX), Y28, Y29 // 62619d2058a80f000000
+ VADDPD 15(AX), Y10, Y29 // 6261ad2858a80f000000
+ VADDPD 15(AX), Y28, Y10 // 62719d2058900f000000
+ VADDPD 15(CX)(AX*1), Y0, Y29 // 6261fd2858ac010f000000
+ VADDPD 15(CX)(AX*1), Y28, Y1 // 62f19d20588c010f000000
+ VADDPD 15(CX)(AX*1), Y28, Y29 // 62619d2058ac010f000000
+ VADDPD 15(CX)(AX*1), Y10, Y29 // 6261ad2858ac010f000000
+ VADDPD 15(CX)(AX*1), Y28, Y10 // 62719d205894010f000000
+ VADDPD 15(CX)(AX*2), Y0, Y29 // 6261fd2858ac410f000000
+ VADDPD 15(CX)(AX*2), Y28, Y1 // 62f19d20588c410f000000
+ VADDPD 15(CX)(AX*2), Y28, Y29 // 62619d2058ac410f000000
+ VADDPD 15(CX)(AX*2), Y10, Y29 // 6261ad2858ac410f000000
+ VADDPD 15(CX)(AX*2), Y28, Y10 // 62719d205894410f000000
+ VADDPD 2048(DX), Y0, Y29 // 6261fd28586a40
+ VADDPD 2048(DX), Y29, Y1 // 62f19520584a40
+ VADDPD 2048(DX), Y28, Y29 // 62619d20586a40
+ VADDPD 2048(DX)(AX*2), Y0, Y29 // 6261fd28586c4240
+ VADDPD 2048(DX)(AX*2), Y29, Y1 // 62f19520584c4240
+ VADDPD 2048(DX)(AX*2), Y28, Y29 // 62619d20586c4240
+ // EVEX: Z registers; VL=512.
+ VADDPD Z30, Z0, Z1 // 6291fd4858ce
+ VADDPD Z0, Z2, Z29 // 6261ed4858e8
+ VADDPD Z0, Z30, Z29 // 62618d4058e8
+ VADDPD Z28, Z2, Z1 // 6291ed4858cc
+ VADDPD Z28, Z30, Z1 // 62918d4058cc
+ VADDPD Z28, Z2, Z29 // 6201ed4858ec
+ VADDPD Z28, Z30, Z29 // 62018d4058ec
+ VADDPD Z10, Z30, Z11 // 62518d4058da
+ VADDPD Z10, Z12, Z29 // 62419d4858ea
+ VADDPD Z10, Z30, Z29 // 62418d4058ea
+ VADDPD Z28, Z12, Z11 // 62119d4858dc
+ VADDPD Z28, Z30, Z11 // 62118d4058dc
+ VADDPD Z28, Z12, Z29 // 62019d4858ec
+ VADDPD Z28, Z30, Z29 // 62018d4058ec
+ VADDPD (AX), Z0, Z29 // 6261fd485828
+ VADDPD (AX), Z28, Z1 // 62f19d405808
+ VADDPD (AX), Z28, Z29 // 62619d405828
+ VADDPD (AX), Z10, Z29 // 6261ad485828
+ VADDPD (AX), Z28, Z10 // 62719d405810
+ VADDPD (CX)(AX*1), Z0, Z29 // 6261fd48582c01
+ VADDPD (CX)(AX*1), Z28, Z1 // 62f19d40580c01
+ VADDPD (CX)(AX*1), Z28, Z29 // 62619d40582c01
+ VADDPD (CX)(AX*1), Z10, Z29 // 6261ad48582c01
+ VADDPD (CX)(AX*1), Z28, Z10 // 62719d40581401
+ VADDPD (CX)(AX*2), Z0, Z29 // 6261fd48582c41
+ VADDPD (CX)(AX*2), Z28, Z1 // 62f19d40580c41
+ VADDPD (CX)(AX*2), Z28, Z29 // 62619d40582c41
+ VADDPD (CX)(AX*2), Z10, Z29 // 6261ad48582c41
+ VADDPD (CX)(AX*2), Z28, Z10 // 62719d40581441
+ VADDPD 15(AX), Z29, Z0 // 62f1954058800f000000
+ VADDPD 15(AX), Z1, Z28 // 6261f54858a00f000000
+ VADDPD 15(AX), Z29, Z28 // 6261954058a00f000000
+ VADDPD 15(AX), Z29, Z10 // 6271954058900f000000
+ VADDPD 15(AX), Z10, Z28 // 6261ad4858a00f000000
+ VADDPD 15(CX)(AX*1), Z29, Z0 // 62f195405884010f000000
+ VADDPD 15(CX)(AX*1), Z1, Z28 // 6261f54858a4010f000000
+ VADDPD 15(CX)(AX*1), Z29, Z28 // 6261954058a4010f000000
+ VADDPD 15(CX)(AX*1), Z29, Z10 // 627195405894010f000000
+ VADDPD 15(CX)(AX*1), Z10, Z28 // 6261ad4858a4010f000000
+ VADDPD 15(CX)(AX*2), Z29, Z0 // 62f195405884410f000000
+ VADDPD 15(CX)(AX*2), Z1, Z28 // 6261f54858a4410f000000
+ VADDPD 15(CX)(AX*2), Z29, Z28 // 6261954058a4410f000000
+ VADDPD 15(CX)(AX*2), Z29, Z10 // 627195405894410f000000
+ VADDPD 15(CX)(AX*2), Z10, Z28 // 6261ad4858a4410f000000
+ VADDPD 2048(DX), Z29, Z0 // 62f19540584220
+ VADDPD 2048(DX), Z1, Z29 // 6261f548586a20
+ VADDPD 2048(DX), Z29, Z28 // 62619540586220
+ VADDPD 2048(DX)(AX*2), Z29, Z0 // 62f1954058444220
+ VADDPD 2048(DX)(AX*2), Z1, Z29 // 6261f548586c4220
+ VADDPD 2048(DX)(AX*2), Z29, Z28 // 6261954058644220
+ // EVEX: KOP (opmask) instructions.
+ KMOVB K0, K0 // c5f990c0
+ KMOVB K7, K7 // c5f990ff
+ KMOVB K5, K1 // c5f990cd
+ KMOVB K1, K5 // c5f990e9
+ KMOVB (AX), K1 // c5f99008
+ KMOVB K0, (AX) // c5f99100
+ KMOVB K7, (R10) // c4c179913a
+ KMOVB K5, AX // c5f993c5
+ KMOVB K7, R10 // c57993d7
+ KMOVB AX, K5 // c5f992e8
+ KMOVB R10, K7 // c4c17992fa
+ KMOVW K0, K0 // c5f890c0
+ KMOVW K7, K7 // c5f890ff
+ KMOVW K5, K1 // c5f890cd
+ KMOVW K1, K5 // c5f890e9
+ KMOVW (AX), K1 // c5f89008
+ KMOVW K0, (AX) // c5f89100
+ KMOVW K7, (R10) // c4c178913a
+ KMOVW K5, AX // c5f893c5
+ KMOVW K7, R10 // c57893d7
+ KMOVW AX, K5 // c5f892e8
+ KMOVW R10, K7 // c4c17892fa
+ KMOVD K0, K0 // c4e1f990c0
+ KMOVD K7, K7 // c4e1f990ff
+ KMOVD K5, K1 // c4e1f990cd
+ KMOVD K1, K5 // c4e1f990e9
+ KMOVD (AX), K1 // c4e1f99008
+ KMOVD AX, K5 // c5fb92e8
+ KMOVD R10, K7 // c4c17b92fa
+ KMOVD K0, (AX) // c4e1f99100
+ KMOVD K7, (R10) // c4c1f9913a
+ KMOVD K5, AX // c5fb93c5
+ KMOVD K7, R10 // c57b93d7
+ KMOVQ K0, K0 // c4e1f890c0
+ KMOVQ K7, K7 // c4e1f890ff
+ KMOVQ K5, K1 // c4e1f890cd
+ KMOVQ K1, K5 // c4e1f890e9
+ KMOVQ (AX), K1 // c4e1f89008
+ KMOVQ AX, K5 // c4e1fb92e8
+ KMOVQ R10, K7 // c4c1fb92fa
+ KMOVQ K0, (AX) // c4e1f89100
+ KMOVQ K7, (R10) // c4c1f8913a
+ KMOVQ K5, AX // c4e1fb93c5
+ KMOVQ K7, R10 // c461fb93d7
+ KNOTB K7, K0 // c5f944c7
+ KNOTB K1, K5 // c5f944e9
+ KNOTW K7, K0 // c5f844c7
+ KNOTW K1, K5 // c5f844e9
+ KNOTD K7, K0 // c4e1f944c7
+ KNOTD K1, K5 // c4e1f944e9
+ KNOTQ K7, K0 // c4e1f844c7
+ KNOTQ K1, K5 // c4e1f844e9
+ KORB K7, K5, K0 // c5d545c7
+ KORB K0, K7, K5 // c5c545e8
+ KORW K7, K5, K0 // c5d445c7
+ KORW K0, K7, K5 // c5c445e8
+ KORD K7, K5, K0 // c4e1d545c7
+ KORD K0, K7, K5 // c4e1c545e8
+ KORQ K7, K5, K0 // c4e1d445c7
+ KORQ K0, K7, K5 // c4e1c445e8
+ KSHIFTLB $0, K7, K0 // c4e37932c700
+ KSHIFTLB $196, K1, K5 // c4e37932e9c4
+ KSHIFTLW $0, K7, K0 // c4e3f932c700
+ KSHIFTLW $196, K1, K5 // c4e3f932e9c4
+ KSHIFTLD $0, K7, K0 // c4e37933c700
+ KSHIFTLD $196, K1, K5 // c4e37933e9c4
+ KSHIFTLQ $0, K7, K0 // c4e3f933c700
+ KSHIFTLQ $196, K1, K5 // c4e3f933e9c4
+ // EVEX: masking with K1-K7.
+ VADDPD X2, X1, K1, X0 // 62f1f50958c2
+ VADDPD X12, X1, K4, X10 // 6251f50c58d4
+ VADDPD X22, X1, K7, X20 // 62a1f50f58e6
+ VADDPD (AX), X1, K1, X1 // 62f1f5095808
+ VADDPD 8(R10), X10, K4, X10 // 6251ad0c589208000000
+ VADDPD (R10)(AX*4), X20, K7, X20 // 62c1dd07582482
+ VADDPD Y2, Y1, K1, Y0 // 62f1f52958c2
+ VADDPD Y12, Y1, K4, Y10 // 6251f52c58d4
+ VADDPD Y22, Y1, K7, Y20 // 62a1f52f58e6
+ VADDPD (AX), Y1, K1, Y1 // 62f1f5295808
+ VADDPD 8(R10), Y10, K4, Y10 // 6251ad2c589208000000
+ VADDPD (R10)(AX*4), Y20, K7, Y20 // 62c1dd27582482
+ VADDPD Z2, Z1, K1, Z0 // 62f1f54958c2
+ VADDPD Z12, Z1, K4, Z10 // 6251f54c58d4
+ VADDPD Z22, Z1, K7, Z20 // 62a1f54f58e6
+ VADDPD (AX), Z1, K1, Z1 // 62f1f5495808
+ VADDPD 8(R10), Z10, K4, Z10 // 6251ad4c589208000000
+ VADDPD (R10)(AX*4), Z20, K7, Z20 // 62c1dd47582482
+ // EVEX gather (also tests Z as VSIB index).
+ VPGATHERDD 360(AX)(X2*4), K1, X1 // 62f27d09904c905a
+ VPGATHERDD 640(BP)(X15*8), K3, X14 // 62327d0b90b4fd80020000
+ VPGATHERDD 960(R10)(X25*2), K7, X24 // 62027d0790844ac0030000
+ VPGATHERDD 1280(R10)(X1*4), K4, X0 // 62d27d0c90848a00050000
+ VPGATHERDD 360(AX)(Y2*4), K1, Y1 // 62f27d29904c905a
+ VPGATHERDD 640(BP)(Y15*8), K3, Y14 // 62327d2b90b4fd80020000
+ VPGATHERDD 960(R10)(Y25*2), K7, Y24 // 62027d2790844ac0030000
+ VPGATHERDD 1280(R10)(Y1*4), K4, Y0 // 62d27d2c90848a00050000
+ VPGATHERDD 360(AX)(Z2*4), K1, Z1 // 62f27d49904c905a
+ VPGATHERDD 640(BP)(Z15*8), K3, Z14 // 62327d4b90b4fd80020000
+ VPGATHERDD 960(R10)(Z25*2), K7, Z24 // 62027d4790844ac0030000
+ VPGATHERDD 1280(R10)(Z1*4), K4, Z0 // 62d27d4c90848a00050000
+ VPGATHERDQ 360(AX)(X2*4), K1, X1 // 62f2fd09904c902d
+ VPGATHERDQ 640(BP)(X15*8), K3, X14 // 6232fd0b9074fd50
+ VPGATHERDQ 960(R10)(X25*2), K7, X24 // 6202fd0790444a78
+ VPGATHERDQ 1280(R10)(X1*4), K4, X0 // 62d2fd0c90848a00050000
+ VPGATHERDQ 360(AX)(X2*4), K1, Y1 // 62f2fd29904c902d
+ VPGATHERDQ 640(BP)(X15*8), K3, Y14 // 6232fd2b9074fd50
+ VPGATHERDQ 960(R10)(X25*2), K7, Y24 // 6202fd2790444a78
+ VPGATHERDQ 1280(R10)(X1*4), K4, Y0 // 62d2fd2c90848a00050000
+ VPGATHERDQ 360(AX)(Y2*4), K1, Z1 // 62f2fd49904c902d
+ VPGATHERDQ 640(BP)(Y15*8), K3, Z14 // 6232fd4b9074fd50
+ VPGATHERDQ 960(R10)(Y25*2), K7, Z24 // 6202fd4790444a78
+ VPGATHERDQ 1280(R10)(Y1*4), K4, Z0 // 62d2fd4c90848a00050000
+ VGATHERDPD 360(R15)(X30*2), K6, X20 // 6282fd069264772d
+ VGATHERDPD 640(R15)(X20*2), K6, X10 // 6252fd0692546750
+ VGATHERDPD 960(R15)(X10*2), K6, X20 // 6282fd0e92645778
+ VGATHERDPD 1280(R15)(X0*2), K6, X10 // 6252fd0e92944700050000
+ VGATHERDPD 360(R15)(X30*2), K6, Y20 // 6282fd269264772d
+ VGATHERDPD 640(R15)(X20*2), K6, Y10 // 6252fd2692546750
+ VGATHERDPD 960(R15)(X10*2), K6, Y20 // 6282fd2e92645778
+ VGATHERDPD 1280(R15)(X0*2), K6, Y10 // 6252fd2e92944700050000
+ VGATHERDPD 360(R15)(Y30*2), K6, Z20 // 6282fd469264772d
+ VGATHERDPD 640(R15)(Y20*2), K6, Z10 // 6252fd4692546750
+ VGATHERDPD 960(R15)(Y10*2), K6, Z20 // 6282fd4e92645778
+ VGATHERDPD 1280(R15)(Y0*2), K6, Z10 // 6252fd4e92944700050000
+ VGATHERDPS 360(R15)(X30*2), K6, X20 // 62827d069264775a
+ VGATHERDPS 640(R15)(X20*2), K6, X10 // 62527d0692946780020000
+ VGATHERDPS 960(R15)(X10*2), K6, X20 // 62827d0e92a457c0030000
+ VGATHERDPS 1280(R15)(X0*2), K6, X10 // 62527d0e92944700050000
+ VGATHERDPS 360(R15)(Y30*2), K6, Y20 // 62827d269264775a
+ VGATHERDPS 640(R15)(Y20*2), K6, Y10 // 62527d2692946780020000
+ VGATHERDPS 960(R15)(Y10*2), K6, Y20 // 62827d2e92a457c0030000
+ VGATHERDPS 1280(R15)(Y0*2), K6, Y10 // 62527d2e92944700050000
+ VGATHERDPS 360(R15)(Z30*2), K6, Z20 // 62827d469264775a
+ VGATHERDPS 640(R15)(Z20*2), K6, Z10 // 62527d4692946780020000
+ VGATHERDPS 960(R15)(Z10*2), K6, Z20 // 62827d4e92a457c0030000
+ VGATHERDPS 1280(R15)(Z0*2), K6, Z10 // 62527d4e92944700050000
+ VGATHERQPS 360(R15)(X30*2), K6, X20 // 62827d069364775a
+ VGATHERQPS 640(R15)(X20*2), K6, X10 // 62527d0693946780020000
+ VGATHERQPS 960(R15)(X10*2), K6, X20 // 62827d0e93a457c0030000
+ VGATHERQPS 1280(R15)(X0*2), K6, X10 // 62527d0e93944700050000
+ VGATHERQPS 360(R15)(Y30*2), K6, X20 // 62827d269364775a
+ VGATHERQPS 640(R15)(Y20*2), K6, X10 // 62527d2693946780020000
+ VGATHERQPS 960(R15)(Y10*2), K6, X20 // 62827d2e93a457c0030000
+ VGATHERQPS 1280(R15)(Y0*2), K6, X10 // 62527d2e93944700050000
+ VGATHERQPS 360(R15)(Z30*2), K6, Y20 // 62827d469364775a
+ VGATHERQPS 640(R15)(Z20*2), K6, Y10 // 62527d4693946780020000
+ VGATHERQPS 960(R15)(Z10*2), K6, Y20 // 62827d4e93a457c0030000
+ VGATHERQPS 1280(R15)(Z0*2), K6, Y10 // 62527d4e93944700050000
+ VPGATHERQD 360(R15)(X30*2), K6, X20 // 62827d069164775a
+ VPGATHERQD 640(R15)(X20*2), K6, X10 // 62527d0691946780020000
+ VPGATHERQD 960(R15)(X10*2), K6, X20 // 62827d0e91a457c0030000
+ VPGATHERQD 1280(R15)(X0*2), K6, X10 // 62527d0e91944700050000
+ VPGATHERQD 360(R15)(Y30*2), K6, X20 // 62827d269164775a
+ VPGATHERQD 640(R15)(Y20*2), K6, X10 // 62527d2691946780020000
+ VPGATHERQD 960(R15)(Y10*2), K6, X20 // 62827d2e91a457c0030000
+ VPGATHERQD 1280(R15)(Y0*2), K6, X10 // 62527d2e91944700050000
+ VPGATHERQD 360(R15)(Z30*2), K6, Y20 // 62827d469164775a
+ VPGATHERQD 640(R15)(Z20*2), K6, Y10 // 62527d4691946780020000
+ VPGATHERQD 960(R15)(Z10*2), K6, Y20 // 62827d4e91a457c0030000
+ VPGATHERQD 1280(R15)(Z0*2), K6, Y10 // 62527d4e91944700050000
+ VPGATHERQQ 360(R15)(X30*2), K6, X20 // 6282fd069164772d
+ VPGATHERQQ 640(R15)(X20*2), K6, X10 // 6252fd0691546750
+ VPGATHERQQ 960(R15)(X10*2), K6, X20 // 6282fd0e91645778
+ VPGATHERQQ 1280(R15)(X0*2), K6, X10 // 6252fd0e91944700050000
+ VPGATHERQQ 360(R15)(Y30*2), K6, Y20 // 6282fd269164772d
+ VPGATHERQQ 640(R15)(Y20*2), K6, Y10 // 6252fd2691546750
+ VPGATHERQQ 960(R15)(Y10*2), K6, Y20 // 6282fd2e91645778
+ VPGATHERQQ 1280(R15)(Y0*2), K6, Y10 // 6252fd2e91944700050000
+ VPGATHERQQ 360(R15)(Z30*2), K6, Z20 // 6282fd469164772d
+ VPGATHERQQ 640(R15)(Z20*2), K6, Z10 // 6252fd4691546750
+ VPGATHERQQ 960(R15)(Z10*2), K6, Z20 // 6282fd4e91645778
+ VPGATHERQQ 1280(R15)(Z0*2), K6, Z10 // 6252fd4e91944700050000
+ VGATHERQPD 360(R15)(X30*2), K6, X20 // 6282fd069364772d
+ VGATHERQPD 640(R15)(X20*2), K6, X10 // 6252fd0693546750
+ VGATHERQPD 960(R15)(X10*2), K6, X20 // 6282fd0e93645778
+ VGATHERQPD 1280(R15)(X0*2), K6, X10 // 6252fd0e93944700050000
+ VGATHERQPD 360(R15)(Y30*2), K6, Y20 // 6282fd269364772d
+ VGATHERQPD 640(R15)(Y20*2), K6, Y10 // 6252fd2693546750
+ VGATHERQPD 960(R15)(Y10*2), K6, Y20 // 6282fd2e93645778
+ VGATHERQPD 1280(R15)(Y0*2), K6, Y10 // 6252fd2e93944700050000
+ VGATHERQPD 360(R15)(Z30*2), K6, Z20 // 6282fd469364772d
+ VGATHERQPD 640(R15)(Z20*2), K6, Z10 // 6252fd4693546750
+ VGATHERQPD 960(R15)(Z10*2), K6, Z20 // 6282fd4e93645778
+ VGATHERQPD 1280(R15)(Z0*2), K6, Z10 // 6252fd4e93944700050000
+ // EVEX: corner cases for High-16 registers.
+ VADDPD X31, X16, X15 // 6211fd0058ff
+ VADDPD X23, X15, X16 // 62a1850858c7
+ VADDPD Y31, Y16, Y15 // 6211fd2058ff
+ VADDPD Y23, Y15, Y16 // 62a1852858c7
+ VADDPD Z31, Z16, Z15 // 6211fd4058ff
+ VADDPD Z23, Z15, Z16 // 62a1854858c7
+ VGATHERQPD (DX)(X16*1),K1,X31 // 6262fd01933c02
+ VGATHERQPD (DX)(X31*1),K1,X16 // 62a2fd0193043a
+ VGATHERQPD (DX)(X15*1),K1,X23 // 62a2fd09933c3a
+ VGATHERQPD (DX)(X23*1),K1,X15 // 6272fd01933c3a
+ VGATHERQPD (DX)(Y16*1),K1,Y31 // 6262fd21933c02
+ VGATHERQPD (DX)(Y31*1),K1,Y16 // 62a2fd2193043a
+ VGATHERQPD (DX)(Y15*1),K1,Y23 // 62a2fd29933c3a
+ VGATHERQPD (DX)(Y23*1),K1,Y15 // 6272fd21933c3a
+ VGATHERQPD (DX)(Z16*1),K1,Z31 // 6262fd41933c02
+ VGATHERQPD (DX)(Z31*1),K1,Z16 // 62a2fd4193043a
+ VGATHERQPD (DX)(Z15*1),K1,Z23 // 62a2fd49933c3a
+ VGATHERQPD (DX)(Z23*1),K1,Z15 // 6272fd41933c3a
+ // EVEX: VCVTPD2DQ with Y suffix (VL=2).
+ VCVTPD2DQY (BX), X20 // 62e1ff28e623
+ VCVTPD2DQY (R11), X30 // 6241ff28e633
+ // XED encoder uses EVEX.X=0 for these; most x86 tools use EVEX.X=1.
+ // Either way is OK.
+ VMOVQ SP, X20 // 62e1fd086ee4 or 62a1fd086ee4
+ VMOVQ BP, X20 // 62e1fd086ee5 or 62a1fd086ee5
+ VMOVQ R14, X20 // 62c1fd086ee6 or 6281fd086ee6
+ // "VMOVQ r/m64, xmm1"/6E vs "VMOVQ xmm2/m64, xmm1"/7E with mem operand.
+ VMOVQ (AX), X20 // 62e1fd086e20 or 62e1fe087e20
+ VMOVQ 7(DX), X20 // 62e1fd086ea207000000 or 62e1fe087ea207000000
+ VMOVQ -15(R11)(CX*1), X20 // 62c1fd086ea40bf1ffffff or 62c1fe087ea40bf1ffffff
+ VMOVQ (SP)(AX*2), X20 // 62e1fd086e2444 or 62e1fe087e2444
+ // "VMOVQ xmm1, r/m64"/7E vs "VMOVQ xmm1, xmm2/m64"/D6 with mem operand.
+ VMOVQ X20, (AX) // 62e1fd087e20 or 62e1fd08d620
+ VMOVQ X20, 7(DX) // 62e1fd087ea207000000 or 62e1fd08d6a207000000
+ VMOVQ X20, -15(R11)(CX*1) // 62c1fd087ea40bf1ffffff or 62c1fd08d6a40bf1ffffff
+ VMOVQ X20, (SP)(AX*2) // 62e1fd087e2444 or 62e1fd08d62444
+ // VMOVHPD: overlapping VEX and EVEX variants.
+ VMOVHPD (AX), X5, X5 // c5d11628 or c4e1d11628 or 62f1d5281628 or 62f1d5481628
+ VMOVHPD 7(DX), X5, X5 // c5d1166a07 or 62f1d52816aa07000000 or 62f1d54816aa07000000
+ VMOVHPD -15(R11)(CX*1), X5, X5 // c4c151166c0bf1 or 62d1d52816ac0bf1ffffff or 62d1d54816ac0bf1ffffff
+ VMOVHPD (SP)(AX*2), X5, X5 // c5d1162c44 or c4e1d1162c44 or 62f1d528162c44 or 62f1d548162c44
+ VMOVHPD (AX), X8, X5 // c5b91628 or c4e1b91628 or 62f1bd281628 or 62f1bd481628
+ VMOVHPD 7(DX), X8, X5 // c5b9166a07 or 62f1bd2816aa07000000 or 62f1bd4816aa07000000
+ VMOVHPD -15(R11)(CX*1), X8, X5 // c4c139166c0bf1 or 62d1bd4816ac0bf1ffffff
+ VMOVHPD (SP)(AX*2), X8, X5 // c5b9162c44 or c4e1b9162c44 or 62f1bd28162c44 or 62f1bd48162c44
+ VMOVHPD (AX), X20, X5 // 62f1dd001628 or 62f1dd201628 or 62f1dd401628
+ VMOVHPD 7(DX), X20, X5 // 62f1dd0016aa07000000 or 62f1dd2016aa07000000 or 62f1dd4016aa07000000
+ VMOVHPD -15(R11)(CX*1), X20, X5 // 62d1dd0016ac0bf1ffffff or 62d1dd2016ac0bf1ffffff or 62d1dd4016ac0bf1ffffff
+ VMOVHPD (SP)(AX*2), X20, X5 // 62f1dd00162c44 or 62f1dd20162c44 or 62f1dd40162c44
+ VMOVHPD (AX), X5, X8 // c5511600 or c461d11600 or 6271d5281600 or 6271d5481600
+ VMOVHPD 7(DX), X5, X8 // c551164207 or 6271d528168207000000 or 6271d548168207000000
+ VMOVHPD -15(R11)(CX*1), X5, X8 // c4415116440bf1 or 6251d52816840bf1ffffff or 6251d54816840bf1ffffff
+ VMOVHPD (SP)(AX*2), X5, X8 // c551160444 or 6271d528160444 or 6271d548160444
+ VMOVHPD (AX), X8, X8 // c5391600 or 6271bd281600 or 6271bd481600
+ VMOVHPD 7(DX), X8, X8 // c539164207 or 6271bd28168207000000 or 6271bd48168207000000
+ VMOVHPD -15(R11)(CX*1), X8, X8 // c4413916440bf1 or 6251bd2816840bf1ffffff or 6251bd4816840bf1ffffff
+ VMOVHPD (SP)(AX*2), X8, X8 // c539160444 or 6271bd28160444 or 6271bd48160444
+ VMOVHPD (AX), X20, X8 // 6271dd001600 or 6271dd201600 or 6271dd401600
+ VMOVHPD 7(DX), X20, X8 // 6271dd00168207000000 or 6271dd20168207000000 or 6271dd40168207000000
+ VMOVHPD -15(R11)(CX*1), X20, X8 // 6251dd0016840bf1ffffff or 6251dd2016840bf1ffffff or 6251dd4016840bf1ffffff
+ VMOVHPD (SP)(AX*2), X20, X8 // 6271dd00160444 or 6271dd20160444 or 6271dd40160444
+ VMOVHPD (AX), X5, X20 // 62e1d5081620 or 62e1d5281620 or 62e1d5481620
+ VMOVHPD 7(DX), X5, X20 // 62e1d50816a207000000 or 62e1d52816a207000000 or 62e1d54816a207000000
+ VMOVHPD -15(R11)(CX*1), X5, X20 // 62c1d50816a40bf1ffffff or 62c1d52816a40bf1ffffff or 62c1d54816a40bf1ffffff
+ VMOVHPD (SP)(AX*2), X5, X20 // 62e1d508162444 or 62e1d528162444 or 62e1d548162444
+ VMOVHPD (AX), X8, X20 // 62e1bd081620 or 62e1bd281620 or 62e1bd481620
+ VMOVHPD 7(DX), X8, X20 // 62e1bd0816a207000000 or 62e1bd2816a207000000 or 62e1bd4816a207000000
+ VMOVHPD -15(R11)(CX*1), X8, X20 // 62c1bd0816a40bf1ffffff or 62c1bd2816a40bf1ffffff or 62c1bd4816a40bf1ffffff
+ VMOVHPD (SP)(AX*2), X8, X20 // 62e1bd08162444 or 62e1bd28162444 or 62e1bd48162444
+ VMOVHPD (AX), X20, X20 // 62e1dd001620 or 62e1dd201620 or 62e1dd401620
+ VMOVHPD 7(DX), X20, X20 // 62e1dd0016a207000000 or 62e1dd2016a207000000 or 62e1dd4016a207000000
+ VMOVHPD -15(R11)(CX*1), X20, X20 // 62c1dd0016a40bf1ffffff or 62c1dd2016a40bf1ffffff or 62c1dd4016a40bf1ffffff
+ VMOVHPD (SP)(AX*2), X20, X20 // 62e1dd00162444 or 62e1dd20162444 or 62e1dd40162444
+ VMOVHPD X5, (AX) // c5f91728 or 62f1fd281728 or 62f1fd481728
+ VMOVHPD X8, (AX) // c5791700 or 6271fd281700 or 6271fd481700
+ VMOVHPD X20, (AX) // 62e1fd081720 or 62e1fd281720 or 62e1fd481720
+ VMOVHPD X5, 7(DX) // c5f9176a07 or 62f1fd2817aa07000000 or 62f1fd4817aa07000000
+ VMOVHPD X8, 7(DX) // c579174207 or 6271fd28178207000000 or 6271fd48178207000000
+ VMOVHPD X20, 7(DX) // 62e1fd0817a207000000 or 62e1fd2817a207000000 or 62e1fd4817a207000000
+ VMOVHPD X5, -15(R11)(CX*1) // c4c179176c0bf1 or 62d1fd2817ac0bf1ffffff or 62d1fd4817ac0bf1ffffff
+ VMOVHPD X8, -15(R11)(CX*1) // c4417917440bf1 or 6251fd2817840bf1ffffff or 6251fd4817840bf1ffffff
+ VMOVHPD X20, -15(R11)(CX*1) // 62c1fd0817a40bf1ffffff or 62c1fd2817a40bf1ffffff or 62c1fd4817a40bf1ffffff
+ VMOVHPD X5, (SP)(AX*2) // c5f9172c44 or 62f1fd28172c44 or 62f1fd48172c44
+ VMOVHPD X8, (SP)(AX*2) // c579170444 or 6271fd28170444 or 6271fd48170444
+ VMOVHPD X20, (SP)(AX*2) // 62e1fd08172444 or 62e1fd28172444 or 62e1fd48172444
+ // VMOVLPD: overlapping VEX and EVEX variants.
+ VMOVLPD (AX), X5, X5 // c5d11228 or 62f1d5281228 or 62f1d5481228
+ VMOVLPD 7(DX), X5, X5 // c5d1126a07 or 62f1d52812aa07000000 or 62f1d54812aa07000000
+ VMOVLPD -15(R11)(CX*1), X5, X5 // c4c151126c0bf1 or 62d1d52812ac0bf1ffffff or 62d1d54812ac0bf1ffffff
+ VMOVLPD (SP)(AX*2), X5, X5 // c5d1122c44 or 62f1d528122c44 or 62f1d548122c44
+ VMOVLPD (AX), X8, X5 // c5b91228 or 62f1bd281228 or 62f1bd481228
+ VMOVLPD 7(DX), X8, X5 // c5b9126a07 or 62f1bd2812aa07000000 or 62f1bd4812aa07000000
+ VMOVLPD -15(R11)(CX*1), X8, X5 // c4c139126c0bf1 or 62d1bd2812ac0bf1ffffff or 62d1bd4812ac0bf1ffffff
+ VMOVLPD (SP)(AX*2), X8, X5 // c5b9122c44 or 62f1bd28122c44 or 62f1bd48122c44
+ VMOVLPD (AX), X20, X5 // 62f1dd001228 or 62f1dd201228 or 62f1dd401228
+ VMOVLPD 7(DX), X20, X5 // 62f1dd0012aa07000000 or 62f1dd2012aa07000000 or 62f1dd4012aa07000000
+ VMOVLPD -15(R11)(CX*1), X20, X5 // 62d1dd0012ac0bf1ffffff or 62d1dd2012ac0bf1ffffff or 62d1dd4012ac0bf1ffffff
+ VMOVLPD (SP)(AX*2), X20, X5 // 62f1dd00122c44 or 62f1dd20122c44 or 62f1dd40122c44
+ VMOVLPD (AX), X5, X8 // c5511200 or 6271d5281200 or 6271d5481200
+ VMOVLPD 7(DX), X5, X8 // c551124207 or 6271d528128207000000 or 6271d548128207000000
+ VMOVLPD -15(R11)(CX*1), X5, X8 // c4415112440bf1 or 6251d52812840bf1ffffff or 6251d54812840bf1ffffff
+ VMOVLPD (SP)(AX*2), X5, X8 // c551120444 or 6271d528120444 or 6271d548120444
+ VMOVLPD (AX), X8, X8 // c5391200 or 6271bd281200 or 6271bd481200
+ VMOVLPD 7(DX), X8, X8 // c539124207 or 6271bd28128207000000 or 6271bd48128207000000
+ VMOVLPD -15(R11)(CX*1), X8, X8 // c4413912440bf1 or 6251bd2812840bf1ffffff or 6251bd4812840bf1ffffff
+ VMOVLPD (SP)(AX*2), X8, X8 // c539120444 or 6271bd28120444 or 6271bd48120444
+ VMOVLPD (AX), X20, X8 // 6271dd001200 or 6271dd201200 or 6271dd401200
+ VMOVLPD 7(DX), X20, X8 // 6271dd00128207000000 or 6271dd20128207000000 or 6271dd40128207000000
+ VMOVLPD -15(R11)(CX*1), X20, X8 // 6251dd0012840bf1ffffff or 6251dd2012840bf1ffffff or 6251dd4012840bf1ffffff
+ VMOVLPD (SP)(AX*2), X20, X8 // 6271dd00120444 or 6271dd20120444 or 6271dd40120444
+ VMOVLPD (AX), X5, X20 // 62e1d5081220 or 62e1d5281220 or 62e1d5481220
+ VMOVLPD 7(DX), X5, X20 // 62e1d50812a207000000 or 62e1d52812a207000000 or 62e1d54812a207000000
+ VMOVLPD -15(R11)(CX*1), X5, X20 // 62c1d50812a40bf1ffffff or 62c1d52812a40bf1ffffff or 62c1d54812a40bf1ffffff
+ VMOVLPD (SP)(AX*2), X5, X20 // 62e1d508122444 or 62e1d528122444 or 62e1d548122444
+ VMOVLPD (AX), X8, X20 // 62e1bd081220 or 62e1bd281220 or 62e1bd481220
+ VMOVLPD 7(DX), X8, X20 // 62e1bd0812a207000000 or 62e1bd2812a207000000 or 62e1bd4812a207000000
+ VMOVLPD -15(R11)(CX*1), X8, X20 // 62c1bd0812a40bf1ffffff or 62c1bd2812a40bf1ffffff or 62c1bd4812a40bf1ffffff
+ VMOVLPD (SP)(AX*2), X8, X20 // 62e1bd08122444 or 62e1bd28122444 or 62e1bd48122444
+ VMOVLPD (AX), X20, X20 // 62e1dd001220 or 62e1dd201220 or 62e1dd401220
+ VMOVLPD 7(DX), X20, X20 // 62e1dd0012a207000000 or 62e1dd2012a207000000 or 62e1dd4012a207000000
+ VMOVLPD -15(R11)(CX*1), X20, X20 // 62c1dd0012a40bf1ffffff or 62c1dd2012a40bf1ffffff or 62c1dd4012a40bf1ffffff
+ VMOVLPD (SP)(AX*2), X20, X20 // 62e1dd00122444 or 62e1dd20122444 or 62e1dd40122444
+ VMOVLPD X5, (AX) // c5f91328 or 62f1fd281328 or 62f1fd481328
+ VMOVLPD X8, (AX) // c5791300 or 6271fd281300 or 6271fd481300
+ VMOVLPD X20, (AX) // 62e1fd081320 or 62e1fd281320 or 62e1fd481320
+ VMOVLPD X5, 7(DX) // c5f9136a07 or 62f1fd2813aa07000000 or 62f1fd4813aa07000000
+ VMOVLPD X8, 7(DX) // c579134207 or 6271fd28138207000000 or 6271fd48138207000000
+ VMOVLPD X20, 7(DX) // 62e1fd0813a207000000 or 62e1fd2813a207000000 or 62e1fd4813a207000000
+ VMOVLPD X5, -15(R11)(CX*1) // c4c179136c0bf1 or 62d1fd2813ac0bf1ffffff or 62d1fd4813ac0bf1ffffff
+ VMOVLPD X8, -15(R11)(CX*1) // c4417913440bf1 or 6251fd2813840bf1ffffff or 6251fd4813840bf1ffffff
+ VMOVLPD X20, -15(R11)(CX*1) // 62c1fd0813a40bf1ffffff or 62c1fd2813a40bf1ffffff or 62c1fd4813a40bf1ffffff
+ VMOVLPD X5, (SP)(AX*2) // c5f9132c44 or 62f1fd28132c44 or 62f1fd48132c44
+ VMOVLPD X8, (SP)(AX*2) // c579130444 or 6271fd28130444 or 6271fd48130444
+ VMOVLPD X20, (SP)(AX*2) // 62e1fd08132444 or 62e1fd28132444 or 62e1fd48132444
+ // "VPEXTRW imm8u, xmm1, r32/m16"/15 vs "VPEXTRW imm8u, xmm2, r32"/C5.
+ VPEXTRW $17, X20, AX // 62b17d08c5c411 or 62e37d0815e011 or 62e3fd0815e011
+ VPEXTRW $127, X20, AX // 62b17d08c5c47f or 62e37d0815e07f or 62e3fd0815e07f
+ VPEXTRW $17, X20, SP // 62b17d08c5e411 or 62e37d0815e411 or 62e3fd0815e411
+ VPEXTRW $127, X20, SP // 62b17d08c5e47f or 62e37d0815e47f or 62e3fd0815e47f
+ VPEXTRW $17, X20, BP // 62b17d08c5ec11 or 62e37d0815e511 or 62e3fd0815e511
+ VPEXTRW $127, X20, BP // 62b17d08c5ec7f or 62e37d0815e57f or 62e3fd0815e57f
+ VPEXTRW $17, X20, R14 // 62317d08c5f411 or 62c37d0815e611 or 62c3fd0815e611
+ VPEXTRW $127, X20, R14 // 62317d08c5f47f or 62c37d0815e67f or 62c3fd0815e67f
+ VPEXTRW $17, X20, (AX) // 62e37d08152011 or 62e3fd08152011
+ VPEXTRW $127, X20, (AX) // 62e37d0815207f or 62e3fd0815207f
+ VPEXTRW $17, X20, 7(DX) // 62e37d0815a20700000011 or 62e3fd0815a20700000011
+ VPEXTRW $127, X20, 7(DX) // 62e37d0815a2070000007f or 62e3fd0815a2070000007f
+ VPEXTRW $17, X20, -15(R11)(CX*1) // 62c37d0815a40bf1ffffff11 or 62c3fd0815a40bf1ffffff11
+ VPEXTRW $127, X20, -15(R11)(CX*1) // 62c37d0815a40bf1ffffff7f or 62c3fd0815a40bf1ffffff7f
+ VPEXTRW $17, X20, (SP)(AX*2) // 62e37d0815244411 or 62e3fd0815244411
+ VPEXTRW $127, X20, (SP)(AX*2) // 62e37d081524447f or 62e3fd081524447f
+ // EVEX: embedded zeroing.
+ VADDPD.Z X30, X1, X0 // 6291f58858c6
+ VMAXPD.Z (AX), Z2, K1, Z1 // 62f1edc95f08
+ // EVEX: embedded rounding.
+ VADDPD.RU_SAE Z3, Z2, K1, Z1 // 62f1ed5958cb
+ VADDPD.RD_SAE Z3, Z2, K1, Z1 // 62f1ed3958cb
+ VADDPD.RZ_SAE Z3, Z2, K1, Z1 // 62f1ed7958cb
+ VADDPD.RN_SAE Z3, Z2, K1, Z1 // 62f1ed1958cb
+ VADDPD.RU_SAE.Z Z3, Z2, K1, Z1 // 62f1edd958cb
+ VADDPD.RD_SAE.Z Z3, Z2, K1, Z1 // 62f1edb958cb
+ VADDPD.RZ_SAE.Z Z3, Z2, K1, Z1 // 62f1edf958cb
+ VADDPD.RN_SAE.Z Z3, Z2, K1, Z1 // 62f1ed9958cb
+ // EVEX: embedded broadcasting.
+ VADDPD.BCST (AX), X2, K1, X1 // 62f1ed195808
+ VADDPD.BCST.Z (AX), X2, K1, X1 // 62f1ed995808
+ VADDPD.BCST (AX), Y2, K1, Y1 // 62f1ed395808
+ VADDPD.BCST.Z (AX), Y2, K1, Y1 // 62f1edb95808
+ VADDPD.BCST (AX), Z2, K1, Z1 // 62f1ed595808
+ VADDPD.BCST.Z (AX), Z2, K1, Z1 // 62f1edd95808
+ VMAXPD.BCST (AX), Z2, K1, Z1 // 62f1ed595f08
+ VMAXPD.BCST.Z (AX), Z2, K1, Z1 // 62f1edd95f08
+ // EVEX: suppress all exceptions (SAE).
+ VMAXPD.SAE Z3, Z2, K1, Z1 // 62f1ed595fcb or 62f1ed195fcb
+ VMAXPD.SAE.Z Z3, Z2, K1, Z1 // 62f1edd95fcb or 62f1ed995fcb
+ VMAXPD (AX), Z2, K1, Z1 // 62f1ed495f08
+ VCMPSD.SAE $0, X0, X2, K0 // 62f1ef18c2c000
+ VCMPSD.SAE $0, X0, X2, K1, K0 // 62f1ef19c2c000
+ // EVEX: broadcast-affected compressed displacement (Disp8).
+ VADDPD.BCST 1016(DX), X0, X29 // 6261fd18586a7f
+ VADDPD.BCST 1016(DX), X29, X1 // 62f19510584a7f
+ VADDPD.BCST 1016(DX), X28, X29 // 62619d10586a7f
+ VADDPD.BCST 1016(DX)(AX*2), X0, X29 // 6261fd18586c427f
+ VADDPD.BCST 1016(DX)(AX*2), X29, X1 // 62f19510584c427f
+ VADDPD.BCST 1016(DX), Y0, Y29 // 6261fd38586a7f
+ VADDPD.BCST 1016(DX), Y29, Y1 // 62f19530584a7f
+ VADDPD.BCST 1016(DX), Y28, Y29 // 62619d30586a7f
+ VADDPD.BCST 1016(DX)(AX*2), Y0, Y29 // 6261fd38586c427f
+ VADDPD.BCST 1016(DX)(AX*2), Y29, Y1 // 62f19530584c427f
+ VADDPD.BCST 1016(DX), Z0, Z29 // 6261fd58586a7f
+ VADDPD.BCST 1016(DX), Z29, Z1 // 62f19550584a7f
+ VADDPD.BCST 1016(DX), Z28, Z29 // 62619d50586a7f
+ VADDPD.BCST 1016(DX)(AX*2), Z0, Z29 // 6261fd58586c427f
+ VADDPD.BCST 1016(DX)(AX*2), Z29, Z1 // 62f19550584c427f
+ VADDPS.BCST 508(DX), Z0, Z29 // 62617c58586a7f
+ VADDPS.BCST 508(DX), Z1, Z29 // 62617458586a7f
+ VADDPS.BCST 508(DX), Z28, Z29 // 62611c50586a7f
+ VADDPS.BCST 508(DX)(AX*2), Z0, Z29 // 62617c58586c427f
+ VADDPS.BCST 508(DX)(AX*2), Z1, Z29 // 62617458586c427f
+ // EVEX: broadcast-affected compressed displacement that does not fit into 8bits.
+ VADDPD.BCST 2032(DX), X0, X29 // 6261fd1858aaf0070000
+ VADDPD.BCST 2032(DX), X29, X1 // 62f19510588af0070000
+ VADDPD.BCST 2032(DX), X28, X29 // 62619d1058aaf0070000
+ VADDPD.BCST 2032(DX)(AX*2), X0, X29 // 6261fd1858ac42f0070000
+ VADDPD.BCST 2032(DX)(AX*2), X29, X1 // 62f19510588c42f0070000
+ VADDPD.BCST 2032(DX), Y0, Y29 // 6261fd3858aaf0070000
+ VADDPD.BCST 2032(DX), Y29, Y1 // 62f19530588af0070000
+ VADDPD.BCST 2032(DX), Y28, Y29 // 62619d3058aaf0070000
+ VADDPD.BCST 2032(DX)(AX*2), Y0, Y29 // 6261fd3858ac42f0070000
+ VADDPD.BCST 2032(DX)(AX*2), Y29, Y1 // 62f19530588c42f0070000
+ VADDPD.BCST 2032(DX), Z0, Z29 // 6261fd5858aaf0070000
+ VADDPD.BCST 2032(DX), Z29, Z1 // 62f19550588af0070000
+ VADDPD.BCST 2032(DX), Z28, Z29 // 62619d5058aaf0070000
+ VADDPD.BCST 2032(DX)(AX*2), Z0, Z29 // 6261fd5858ac42f0070000
+ VADDPD.BCST 2032(DX)(AX*2), Z29, Z1 // 62f19550588c42f0070000
+ VADDPS.BCST 2032(DX), Z0, Z29 // 62617c5858aaf0070000
+ VADDPS.BCST 2032(DX), Z1, Z29 // 6261745858aaf0070000
+ VADDPS.BCST 2032(DX), Z28, Z29 // 62611c5058aaf0070000
+ VADDPS.BCST 2032(DX)(AX*2), Z0, Z29 // 62617c5858ac42f0070000
+ VADDPS.BCST 2032(DX)(AX*2), Z1, Z29 // 6261745858ac42f0070000
+ // Forced EVEX encoding due to suffixes.
+ VADDPD.BCST 2032(DX), X0, X0 // 62f1fd185882f0070000
+ VADDPD.BCST 2032(DX), Y0, Y0 // 62f1fd385882f0070000
+ // Test new Z-cases one-by-one.
+ //
+ // Zevex_i_r_k_rm.
+ VCVTPS2PH $1, Z2, K5, Y21 // 62b37d4d1dd501
+ VCVTPS2PH $2, Z21, K4, Y2 // 62e37d4c1dea02
+ // Zevex_i_r_rm.
+ VCVTPS2PH $1, Z2, Y21 // 62b37d481dd501
+ VCVTPS2PH $2, Z21, Y2 // 62e37d481dea02
+ // Zevex_i_rm_k_r.
+ VFPCLASSPDX $1, X2, K5, K3 // 62f3fd0d66da01
+ VFPCLASSPDX $2, X21, K4, K1 // 62b3fd0c66cd02
+ VFPCLASSPDX $1, (AX), K5, K3 // 62f3fd0d661801
+ VFPCLASSPDX $2, (CX), K4, K1 // 62f3fd0c660902
+ // Zevex_i_rm_k_vo.
+ VPROLD $1, X2, K5, X21 // 62f1550572ca01
+ VPROLD $2, Y21, K5, Y2 // 62b16d2d72cd02
+ VPROLD $1, (AX), K5, X21 // 62f15505720801
+ VPROLD $2, (CX), K5, Y2 // 62f16d2d720902
+ // Zevex_i_rm_r.
+ VFPCLASSPDX $1, X2, K3 // 62f3fd0866da01
+ VFPCLASSPDX $2, X21, K1 // 62b3fd0866cd02
+ VFPCLASSPDX $1, (AX), K3 // 62f3fd08661801
+ VFPCLASSPDX $2, (CX), K1 // 62f3fd08660902
+ // Zevex_i_rm_v_k_r.
+ VALIGND $1, X2, X9, K5, X21 // 62e3350d03ea01
+ VALIGND $2, Y21, Y2, K5, Y9 // 62336d2d03cd02
+ VALIGND $3, Z9, Z21, K5, Z2 // 62d3554503d103
+ VALIGND $1, (AX), X9, K5, X21 // 62e3350d032801
+ VALIGND $2, (CX), Y2, K5, Y9 // 62736d2d030902
+ VALIGND $3, (AX), Z21, K5, Z2 // 62f35545031003
+ // Zevex_i_rm_v_r.
+ VALIGND $1, X2, X9, X21 // 62e3350803ea01
+ VALIGND $2, Y21, Y2, Y9 // 62336d2803cd02
+ VALIGND $3, Z9, Z21, Z2 // 62d3554003d103
+ VALIGND $1, (AX), X9, X21 // 62e33508032801
+ VALIGND $2, (CX), Y2, Y9 // 62736d28030902
+ VALIGND $3, (AX), Z21, Z2 // 62f35540031003
+ // Zevex_i_rm_vo.
+ VPROLD $1, X2, X21 // 62f1550072ca01
+ VPROLD $2, Y21, Y2 // 62b16d2872cd02
+ VPROLD $1, (AX), X21 // 62f15500720801
+ VPROLD $2, (CX), Y2 // 62f16d28720902
+ // Zevex_k_rmo.
+ VGATHERPF0DPD K5, (AX)(Y2*2) // 62f2fd4dc60c50
+ VGATHERPF0DPD K3, (CX)(Y21*2) // 62f2fd43c60c69
+ VSCATTERPF1DPD K5, (AX)(Y2*2) // 62f2fd4dc63450
+ VSCATTERPF1DPD K3, (CX)(Y21*2) // 62f2fd43c63469
+ // Zevex_r_k_rm.
+ VPSCATTERDD X2, K5, (AX)(X21*2) // 62f27d05a01468
+ VPSCATTERDD X21, K5, (AX)(X2*2) // 62e27d0da02c50
+ VPSCATTERDD Y2, K5, (AX)(Y21*2) // 62f27d25a01468
+ VPSCATTERDD Y21, K5, (AX)(Y2*2) // 62e27d2da02c50
+ VPSCATTERDD Z2, K5, (AX)(Z21*2) // 62f27d45a01468
+ VPSCATTERDD Z21, K5, (AX)(Z2*2) // 62e27d4da02c50
+ // Zevex_r_v_k_rm.
+ VMOVSD X2, X9, K5, X21 // 62b1b70d11d5 or 62e1b70d10ea
+ VMOVSD X21, X2, K5, X9 // 62c1ef0d11e9 or 6231ef0d10cd
+ VMOVSD X9, X21, K5, X2 // 6271d70511ca or 62d1d70510d1
+ // Zevex_r_v_rm.
+ VMOVSD X2, X9, X21 // 62b1b70811d5 or 62e1b70810ea
+ VMOVSD X21, X2, X9 // 62c1ef0811e9 or 6231ef0810cd
+ VMOVSD X9, X21, X2 // 6271d70011ca or 62d1d70010d1
+ VPMOVDB X2, X21 // 62b27e0831d5
+ VPMOVDB X21, X2 // 62e27e0831ea
+ VPMOVDB X2, (AX) // 62f27e083110
+ VPMOVDB X21, (AX) // 62e27e083128
+ // Zevex_rm_k_r.
+ VMOVDDUP X2, K5, X21 // 62e1ff0d12ea
+ VMOVDDUP X21, K5, X2 // 62b1ff0d12d5
+ VMOVDDUP (AX), K5, X21 // 62e1ff0d1228
+ VMOVDDUP (CX), K5, X2 // 62f1ff0d1211
+ VMOVDDUP Y2, K5, Y21 // 62e1ff2d12ea
+ VMOVDDUP Y21, K5, Y2 // 62b1ff2d12d5
+ VMOVDDUP (AX), K5, Y21 // 62e1ff2d1228
+ VMOVDDUP (CX), K5, Y2 // 62f1ff2d1211
+ VMOVDDUP Z2, K5, Z21 // 62e1ff4d12ea
+ VMOVDDUP Z21, K5, Z2 // 62b1ff4d12d5
+ VMOVDDUP (AX), K5, Z21 // 62e1ff4d1228
+ VMOVDDUP (CX), K5, Z2 // 62f1ff4d1211
+ // Zevex_rm_v_k_r.
+ VADDPD Z2, Z9, K5, Z21 // 62e1b54d58ea
+ VADDPD Z21, Z2, K5, Z9 // 6231ed4d58cd
+ VADDPD Z9, Z21, K5, Z2 // 62d1d54558d1
+ // Zevex_rm_v_r.
+ VADDPD Z2, Z9, Z21 // 62e1b54858ea
+ VADDPD Z21, Z2, Z9 // 6231ed4858cd
+ VADDPD Z9, Z21, Z2 // 62d1d54058d1
+
+ CLWB (BX) // 660fae33
+ CLDEMOTE (BX) // 0f1c03
+ TPAUSE BX // 660faef3
+ UMONITOR BX // f30faef3
+ UMWAIT BX // f20faef3
+ // End of tests.
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/amd64error.s b/src/cmd/asm/internal/asm/testdata/amd64error.s
new file mode 100644
index 0000000..7e91fb4
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/amd64error.s
@@ -0,0 +1,143 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+TEXT errors(SB),$0
+ MOVL foo<>(SB)(AX), AX // ERROR "invalid instruction"
+ MOVL (AX)(SP*1), AX // ERROR "invalid instruction"
+ EXTRACTPS $4, X2, (BX) // ERROR "invalid instruction"
+ EXTRACTPS $-1, X2, (BX) // ERROR "invalid instruction"
+ // VSIB addressing does not permit non-vector (X/Y)
+ // scaled index register.
+ VPGATHERDQ X12,(R13)(AX*2), X11 // ERROR "invalid instruction"
+ VPGATHERDQ X2, 664(BX*1), X1 // ERROR "invalid instruction"
+ VPGATHERDQ Y2, (BP)(AX*2), Y1 // ERROR "invalid instruction"
+ VPGATHERDQ Y5, 664(DX*8), Y6 // ERROR "invalid instruction"
+ VPGATHERDQ Y5, (DX), Y0 // ERROR "invalid instruction"
+ // VM/X rejects Y index register.
+ VPGATHERDQ Y5, 664(Y14*8), Y6 // ERROR "invalid instruction"
+ VPGATHERQQ X2, (BP)(Y7*2), X1 // ERROR "invalid instruction"
+ // VM/Y rejects X index register.
+ VPGATHERQQ Y2, (BP)(X7*2), Y1 // ERROR "invalid instruction"
+ VPGATHERDD Y5, -8(X14*8), Y6 // ERROR "invalid instruction"
+ // No VSIB for legacy instructions.
+ MOVL (AX)(X0*1), AX // ERROR "invalid instruction"
+ MOVL (AX)(Y0*1), AX // ERROR "invalid instruction"
+ // VSIB/VM is invalid without vector index.
+ // TODO(quasilyte): improve error message (#21860).
+ // "invalid VSIB address (missing vector index)"
+ VPGATHERQQ Y2, (BP), Y1 // ERROR "invalid instruction"
+ // AVX2GATHER mask/index/dest #UD cases.
+ VPGATHERQQ Y2, (BP)(X2*2), Y2 // ERROR "mask, index, and destination registers should be distinct"
+ VPGATHERQQ Y2, (BP)(X2*2), Y7 // ERROR "mask, index, and destination registers should be distinct"
+ VPGATHERQQ Y2, (BP)(X7*2), Y2 // ERROR "mask, index, and destination registers should be distinct"
+ VPGATHERQQ Y7, (BP)(X2*2), Y2 // ERROR "mask, index, and destination registers should be distinct"
+ VPGATHERDQ X2, 664(X2*8), X2 // ERROR "mask, index, and destination registers should be distinct"
+ VPGATHERDQ X2, 664(X2*8), X7 // ERROR "mask, index, and destination registers should be distinct"
+ VPGATHERDQ X2, 664(X7*8), X2 // ERROR "mask, index, and destination registers should be distinct"
+ VPGATHERDQ X7, 664(X2*8), X2 // ERROR "mask, index, and destination registers should be distinct"
+ // Non-X0 for Yxr0 should produce an error
+ BLENDVPD X1, (BX), X2 // ERROR "invalid instruction"
+ // Check offset overflow. Must fit in int32.
+ MOVQ 2147483647+1(AX), AX // ERROR "offset too large"
+ MOVQ 3395469782(R10), R8 // ERROR "offset too large"
+ LEAQ 3395469782(AX), AX // ERROR "offset too large"
+ ADDQ 3395469782(AX), AX // ERROR "offset too large"
+ ADDL 3395469782(AX), AX // ERROR "offset too large"
+ ADDW 3395469782(AX), AX // ERROR "offset too large"
+ LEAQ 433954697820(AX), AX // ERROR "offset too large"
+ ADDQ 433954697820(AX), AX // ERROR "offset too large"
+ ADDL 433954697820(AX), AX // ERROR "offset too large"
+ ADDW 433954697820(AX), AX // ERROR "offset too large"
+ // Pseudo-registers should not be used as scaled index.
+ CALL (AX)(PC*1) // ERROR "invalid instruction"
+ CALL (AX)(SB*1) // ERROR "invalid instruction"
+ CALL (AX)(FP*1) // ERROR "invalid instruction"
+ // Forbid memory operands for MOV CR/DR. See #24981.
+ MOVQ CR0, (AX) // ERROR "invalid instruction"
+ MOVQ CR2, (AX) // ERROR "invalid instruction"
+ MOVQ CR3, (AX) // ERROR "invalid instruction"
+ MOVQ CR4, (AX) // ERROR "invalid instruction"
+ MOVQ CR8, (AX) // ERROR "invalid instruction"
+ MOVQ (AX), CR0 // ERROR "invalid instruction"
+ MOVQ (AX), CR2 // ERROR "invalid instruction"
+ MOVQ (AX), CR3 // ERROR "invalid instruction"
+ MOVQ (AX), CR4 // ERROR "invalid instruction"
+ MOVQ (AX), CR8 // ERROR "invalid instruction"
+ MOVQ DR0, (AX) // ERROR "invalid instruction"
+ MOVQ DR2, (AX) // ERROR "invalid instruction"
+ MOVQ DR3, (AX) // ERROR "invalid instruction"
+ MOVQ DR6, (AX) // ERROR "invalid instruction"
+ MOVQ DR7, (AX) // ERROR "invalid instruction"
+ MOVQ (AX), DR0 // ERROR "invalid instruction"
+ MOVQ (AX), DR2 // ERROR "invalid instruction"
+ MOVQ (AX), DR3 // ERROR "invalid instruction"
+ MOVQ (AX), DR6 // ERROR "invalid instruction"
+ MOVQ (AX), DR7 // ERROR "invalid instruction"
+ // AVX512GATHER index/index #UD cases.
+ VPGATHERQQ (BP)(X2*2), K1, X2 // ERROR "index and destination registers should be distinct"
+ VPGATHERQQ (BP)(Y15*2), K1, Y15 // ERROR "index and destination registers should be distinct"
+ VPGATHERQQ (BP)(Z20*2), K1, Z20 // ERROR "index and destination registers should be distinct"
+ VPGATHERDQ (BP)(X2*2), K1, X2 // ERROR "index and destination registers should be distinct"
+ VPGATHERDQ (BP)(X15*2), K1, Y15 // ERROR "index and destination registers should be distinct"
+ VPGATHERDQ (BP)(Y20*2), K1, Z20 // ERROR "index and destination registers should be distinct"
+ // Instructions without EVEX variant can't use High-16 registers.
+ VADDSUBPD X20, X1, X2 // ERROR "invalid instruction"
+ VADDSUBPS X0, X20, X2 // ERROR "invalid instruction"
+ // Use of K0 for write mask (Yknot0).
+ // TODO(quasilyte): improve error message (#21860).
+ // "K0 can't be used for write mask"
+ VADDPD X0, X1, K0, X2 // ERROR "invalid instruction"
+ VADDPD Y0, Y1, K0, Y2 // ERROR "invalid instruction"
+ VADDPD Z0, Z1, K0, Z2 // ERROR "invalid instruction"
+ // VEX-encoded VSIB can't use High-16 registers as index (unlike EVEX).
+ // TODO(quasilyte): improve error message (#21860).
+ VPGATHERQQ X2, (BP)(X20*2), X3 // ERROR "invalid instruction"
+ VPGATHERQQ Y2, (BP)(Y20*2), Y3 // ERROR "invalid instruction"
+ // YzrMulti4 expects exactly 4 registers referenced by REG_LIST.
+ // TODO(quasilyte): improve error message (#21860).
+ V4FMADDPS (AX), [Z0-Z4], K1, Z7 // ERROR "invalid instruction"
+ V4FMADDPS (AX), [Z0-Z0], K1, Z7 // ERROR "invalid instruction"
+ // Invalid ranges in REG_LIST (low > high).
+ // TODO(quasilyte): improve error message (#21860).
+ V4FMADDPS (AX), [Z4-Z0], K1, Z7 // ERROR "invalid instruction"
+ V4FMADDPS (AX), [Z1-Z0], K1, Z7 // ERROR "invalid instruction"
+ // Mismatching registers in a range.
+ // TODO(quasilyte): improve error message (#21860).
+ V4FMADDPS (AX), [AX-Z3], K1, Z7 // ERROR "invalid instruction"
+ V4FMADDPS (AX), [Z0-AX], K1, Z7 // ERROR "invalid instruction"
+ // Usage of suffixes for non-EVEX instructions.
+ ADCB.Z $7, AL // ERROR "invalid instruction"
+ ADCB.RU_SAE $7, AL // ERROR "invalid instruction"
+ ADCB.RU_SAE.Z $7, AL // ERROR "invalid instruction"
+ // Usage of rounding with invalid operands.
+ VADDPD.RU_SAE X3, X2, K1, X1 // ERROR "unsupported rounding"
+ VADDPD.RD_SAE X3, X2, K1, X1 // ERROR "unsupported rounding"
+ VADDPD.RZ_SAE X3, X2, K1, X1 // ERROR "unsupported rounding"
+ VADDPD.RN_SAE X3, X2, K1, X1 // ERROR "unsupported rounding"
+ VADDPD.RU_SAE Y3, Y2, K1, Y1 // ERROR "unsupported rounding"
+ VADDPD.RD_SAE Y3, Y2, K1, Y1 // ERROR "unsupported rounding"
+ VADDPD.RZ_SAE Y3, Y2, K1, Y1 // ERROR "unsupported rounding"
+ VADDPD.RN_SAE Y3, Y2, K1, Y1 // ERROR "unsupported rounding"
+ // Unsupported SAE.
+ VMAXPD.SAE (AX), Z2, K1, Z1 // ERROR "illegal SAE with memory argument"
+ VADDPD.SAE X3, X2, K1, X1 // ERROR "unsupported SAE"
+ // Unsupported zeroing.
+ VFPCLASSPDX.Z $0, (AX), K2, K1 // ERROR "unsupported zeroing"
+ VFPCLASSPDY.Z $0, (AX), K2, K1 // ERROR "unsupported zeroing"
+ // Unsupported broadcast.
+ VFPCLASSSD.BCST $0, (AX), K2, K1 // ERROR "unsupported broadcast"
+ VFPCLASSSS.BCST $0, (AX), K2, K1 // ERROR "unsupported broadcast"
+ // Broadcast without memory operand.
+ VADDPD.BCST X3, X2, K1, X1 // ERROR "illegal broadcast without memory argument"
+ VADDPD.BCST X3, X2, K1, X1 // ERROR "illegal broadcast without memory argument"
+ VADDPD.BCST X3, X2, K1, X1 // ERROR "illegal broadcast without memory argument"
+ // CLWB instuctions:
+ CLWB BX // ERROR "invalid instruction"
+ // CLDEMOTE instructions:
+ CLDEMOTE BX // ERROR "invalid instruction"
+ // WAITPKG instructions:
+ TPAUSE (BX) // ERROR "invalid instruction"
+ UMONITOR (BX) // ERROR "invalid instruction"
+ UMWAIT (BX) // ERROR "invalid instruction"
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/arm.s b/src/cmd/asm/internal/asm/testdata/arm.s
new file mode 100644
index 0000000..cc8e25e
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/arm.s
@@ -0,0 +1,1595 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This input was created by taking the instruction productions in
+// the old assembler's (5a's) grammar and hand-writing complete
+// instructions for each rule, to guarantee we cover the same space.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT foo(SB), DUPOK|NOSPLIT, $0
+
+// ADD
+//
+// LTYPE1 cond imsr ',' spreg ',' reg
+// {
+// outcode($1, $2, &$3, $5, &$7);
+// }
+// Cover some operand space here too.
+ ADD $1, R2, R3
+ ADD R1<<R2, R3, R4
+ ADD R1>>R2, R3, R4
+ ADD R1@>R2, R3, R4
+ ADD R1->R2, R3, R4
+ ADD R1, R2, R3
+ ADD R(1)<<R(2), R(3), R(4) // ADD R1<<R2, R3, R4
+
+// LTYPE1 cond imsr ',' spreg ',' // asm doesn't support trailing comma.
+// {
+// outcode($1, $2, &$3, $5, &nullgen);
+// }
+// LTYPE1 cond imsr ',' reg
+// {
+// outcode($1, $2, &$3, 0, &$5);
+// }
+ ADD $1, R2
+ ADD R1<<R2, R3
+ ADD R1>>R2, R3
+ ADD R1@>R2, R3
+ ADD R1->R2, R3
+ ADD R1, R2
+
+//
+// MVN
+//
+// LTYPE2 cond imsr ',' reg
+// {
+// outcode($1, $2, &$3, 0, &$5);
+// }
+ CLZ R1, R2
+
+//
+// MOVW
+//
+// LTYPE3 cond gen ',' gen
+// {
+// outcode($1, $2, &$3, 0, &$5);
+// }
+ MOVW.S R1, R2
+ MOVW $1, R2
+ MOVW.S R1<<R2, R3
+
+//
+// B/BL
+//
+// LTYPE4 cond comma rel
+// {
+// outcode($1, $2, &nullgen, 0, &$4);
+// }
+ B.EQ 1(PC) // BEQ 1(PC)
+
+// LTYPE4 cond comma nireg
+// {
+// outcode($1, $2, &nullgen, 0, &$4);
+// }
+ BEQ 2(PC)
+ B foo(SB) // JMP foo(SB)
+ BEQ 2(PC)
+ B bar<>(SB) // JMP bar<>(SB)
+
+//
+// BX
+//
+// LTYPEBX comma ireg
+// {
+// outcode($1, Always, &nullgen, 0, &$3);
+// }
+ BX (R0)
+
+//
+// BEQ
+//
+// LTYPE5 comma rel
+// {
+// outcode($1, Always, &nullgen, 0, &$3);
+// }
+ BEQ 1(PC)
+
+//
+// SWI
+//
+// LTYPE6 cond comma gen
+// {
+// outcode($1, $2, &nullgen, 0, &$4);
+// }
+ SWI $2
+ SWI $3
+// SWI foo(SB) - TODO: classifying foo(SB) as C_TLS_LE
+
+//
+// CMP
+//
+// LTYPE7 cond imsr ',' spreg
+// {
+// outcode($1, $2, &$3, $5, &nullgen);
+// }
+ CMP $1, R2
+ CMP R1<<R2, R3
+ CMP R1, R2
+
+//
+// MOVM
+//
+// LTYPE8 cond ioreg ',' '[' reglist ']'
+// {
+// var g obj.Addr
+//
+// g = nullgen;
+// g.Type = obj.TYPE_CONST;
+// g.Offset = int64($6);
+// outcode($1, $2, &$3, 0, &g);
+// }
+ MOVM 0(R1), [R2,R5,R8,g] // MOVM (R1), [R2,R5,R8,g]
+ MOVM (R1), [R2-R5] // MOVM (R1), [R2,R3,R4,R5]
+ MOVM (R1), [R2]
+
+// LTYPE8 cond '[' reglist ']' ',' ioreg
+// {
+// var g obj.Addr
+//
+// g = nullgen;
+// g.Type = obj.TYPE_CONST;
+// g.Offset = int64($4);
+// outcode($1, $2, &g, 0, &$7);
+// }
+ MOVM [R2,R5,R8,g], 0(R1) // MOVM [R2,R5,R8,g], (R1)
+ MOVM [R2-R5], (R1) // MOVM [R2,R3,R4,R5], (R1)
+ MOVM [R2], (R1)
+
+//
+// SWAP
+//
+// LTYPE9 cond reg ',' ireg ',' reg
+// {
+// outcode($1, $2, &$5, int32($3.Reg), &$7);
+// }
+ STREX R1, (R2), R3 // STREX (R2), R1, R3
+
+//
+// word
+//
+// LTYPEH comma ximm
+// {
+// outcode($1, Always, &nullgen, 0, &$3);
+// }
+ WORD $1234
+
+//
+// floating-point coprocessor
+//
+// LTYPEI cond freg ',' freg
+// {
+// outcode($1, $2, &$3, 0, &$5);
+// }
+ ABSF F1, F2
+
+// LTYPEK cond frcon ',' freg
+// {
+// outcode($1, $2, &$3, 0, &$5);
+// }
+ ADDD F1, F2
+ MOVF $0.5, F2 // MOVF $(0.5), F2
+
+// LTYPEK cond frcon ',' LFREG ',' freg
+// {
+// outcode($1, $2, &$3, $5, &$7);
+// }
+ ADDD F1, F2, F3
+
+// LTYPEL cond freg ',' freg
+// {
+// outcode($1, $2, &$3, int32($5.Reg), &nullgen);
+// }
+ CMPD F1, F2
+
+//
+// MCR MRC
+//
+// LTYPEJ cond con ',' expr ',' spreg ',' creg ',' creg oexpr
+// {
+// var g obj.Addr
+//
+// g = nullgen;
+// g.Type = obj.TYPE_CONST;
+// g.Offset = int64(
+// (0xe << 24) | /* opcode */
+// ($1 << 20) | /* MCR/MRC */
+// (($2^C_SCOND_XOR) << 28) | /* scond */
+// (($3 & 15) << 8) | /* coprocessor number */
+// (($5 & 7) << 21) | /* coprocessor operation */
+// (($7 & 15) << 12) | /* arm register */
+// (($9 & 15) << 16) | /* Crn */
+// (($11 & 15) << 0) | /* Crm */
+// (($12 & 7) << 5) | /* coprocessor information */
+// (1<<4)); /* must be set */
+// outcode(AMRC, Always, &nullgen, 0, &g);
+// }
+ MRC.S 4, 6, R1, C2, C3, 7 // MRC $8301712627
+ MCR.S 4, 6, R1, C2, C3, 7 // MRC $8300664051
+
+//
+// MULL r1,r2,(hi,lo)
+//
+// LTYPEM cond reg ',' reg ',' regreg
+// {
+// outcode($1, $2, &$3, int32($5.Reg), &$7);
+// }
+ MULL R1, R2, (R3,R4)
+
+//
+// MULA r1,r2,r3,r4: (r1*r2+r3) & 0xffffffff . r4
+// MULAW{T,B} r1,r2,r3,r4
+//
+// LTYPEN cond reg ',' reg ',' reg ',' spreg
+// {
+// $7.Type = obj.TYPE_REGREG2;
+// $7.Offset = int64($9);
+// outcode($1, $2, &$3, int32($5.Reg), &$7);
+// }
+ MULAWT R1, R2, R3, R4
+//
+// PLD
+//
+// LTYPEPLD oreg
+// {
+// outcode($1, Always, &$2, 0, &nullgen);
+// }
+ PLD (R1)
+ PLD 4(R1)
+
+//
+// RET
+//
+// LTYPEA cond
+// {
+// outcode($1, $2, &nullgen, 0, &nullgen);
+// }
+ BEQ 2(PC)
+ RET
+
+// More B/BL cases, and canonical names JMP, CALL.
+
+ BEQ 2(PC)
+ B foo(SB) // JMP foo(SB)
+ BL foo(SB) // CALL foo(SB)
+ BEQ 2(PC)
+ JMP foo(SB)
+ CALL foo(SB)
+
+// CMPF and CMPD are special.
+ CMPF F1, F2
+ CMPD F1, F2
+
+// AND
+ AND $255, R0, R1 // ff1000e2
+ AND $4278190080, R0, R1 // ff1400e2
+ AND.S $255, R0, R1 // ff1010e2
+ AND.S $4278190080, R0, R1 // ff1410e2
+ AND $255, R0 // ff0000e2
+ AND $4278190080, R0 // ff0400e2
+ AND.S $255, R0 // ff0010e2
+ AND.S $4278190080, R0 // ff0410e2
+ AND R0, R1, R2 // 002001e0
+ AND.S R0, R1, R2 // 002011e0
+ AND R0, R1 // 001001e0
+ AND.S R0, R1 // 001011e0
+ AND R0>>28, R1, R2 // 202e01e0
+ AND R0<<28, R1, R2 // 002e01e0
+ AND R0->28, R1, R2 // 402e01e0
+ AND R0@>28, R1, R2 // 602e01e0
+ AND.S R0>>28, R1, R2 // 202e11e0
+ AND.S R0<<28, R1, R2 // 002e11e0
+ AND.S R0->28, R1, R2 // 402e11e0
+ AND.S R0@>28, R1, R2 // 602e11e0
+ AND R0<<28, R1 // 001e01e0
+ AND R0>>28, R1 // 201e01e0
+ AND R0->28, R1 // 401e01e0
+ AND R0@>28, R1 // 601e01e0
+ AND.S R0<<28, R1 // 001e11e0
+ AND.S R0>>28, R1 // 201e11e0
+ AND.S R0->28, R1 // 401e11e0
+ AND.S R0@>28, R1 // 601e11e0
+ AND R0<<R1, R2, R3 // 103102e0
+ AND R0>>R1, R2, R3 // 303102e0
+ AND R0->R1, R2, R3 // 503102e0
+ AND R0@>R1, R2, R3 // 703102e0
+ AND.S R0<<R1, R2, R3 // 103112e0
+ AND.S R0>>R1, R2, R3 // 303112e0
+ AND.S R0->R1, R2, R3 // 503112e0
+ AND.S R0@>R1, R2, R3 // 703112e0
+ AND R0<<R1, R2 // 102102e0
+ AND R0>>R1, R2 // 302102e0
+ AND R0->R1, R2 // 502102e0
+ AND R0@>R1, R2 // 702102e0
+ AND.S R0<<R1, R2 // 102112e0
+ AND.S R0>>R1, R2 // 302112e0
+ AND.S R0->R1, R2 // 502112e0
+ AND.S R0@>R1, R2 // 702112e0
+
+// EOR
+ EOR $255, R0, R1 // ff1020e2
+ EOR $4278190080, R0, R1 // ff1420e2
+ EOR.S $255, R0, R1 // ff1030e2
+ EOR.S $4278190080, R0, R1 // ff1430e2
+ EOR $255, R0 // ff0020e2
+ EOR $4278190080, R0 // ff0420e2
+ EOR.S $255, R0 // ff0030e2
+ EOR.S $4278190080, R0 // ff0430e2
+ EOR R0, R1, R2 // 002021e0
+ EOR.S R0, R1, R2 // 002031e0
+ EOR R0, R1 // 001021e0
+ EOR.S R0, R1 // 001031e0
+ EOR R0>>28, R1, R2 // 202e21e0
+ EOR R0<<28, R1, R2 // 002e21e0
+ EOR R0->28, R1, R2 // 402e21e0
+ EOR R0@>28, R1, R2 // 602e21e0
+ EOR.S R0>>28, R1, R2 // 202e31e0
+ EOR.S R0<<28, R1, R2 // 002e31e0
+ EOR.S R0->28, R1, R2 // 402e31e0
+ EOR.S R0@>28, R1, R2 // 602e31e0
+ EOR R0<<28, R1 // 001e21e0
+ EOR R0>>28, R1 // 201e21e0
+ EOR R0->28, R1 // 401e21e0
+ EOR R0@>28, R1 // 601e21e0
+ EOR.S R0<<28, R1 // 001e31e0
+ EOR.S R0>>28, R1 // 201e31e0
+ EOR.S R0->28, R1 // 401e31e0
+ EOR.S R0@>28, R1 // 601e31e0
+ EOR R0<<R1, R2, R3 // 103122e0
+ EOR R0>>R1, R2, R3 // 303122e0
+ EOR R0->R1, R2, R3 // 503122e0
+ EOR R0@>R1, R2, R3 // 703122e0
+ EOR.S R0<<R1, R2, R3 // 103132e0
+ EOR.S R0>>R1, R2, R3 // 303132e0
+ EOR.S R0->R1, R2, R3 // 503132e0
+ EOR.S R0@>R1, R2, R3 // 703132e0
+ EOR R0<<R1, R2 // 102122e0
+ EOR R0>>R1, R2 // 302122e0
+ EOR R0->R1, R2 // 502122e0
+ EOR R0@>R1, R2 // 702122e0
+ EOR.S R0<<R1, R2 // 102132e0
+ EOR.S R0>>R1, R2 // 302132e0
+ EOR.S R0->R1, R2 // 502132e0
+ EOR.S R0@>R1, R2 // 702132e0
+
+// ORR
+ ORR $255, R0, R1 // ff1080e3
+ ORR $4278190080, R0, R1 // ff1480e3
+ ORR.S $255, R0, R1 // ff1090e3
+ ORR.S $4278190080, R0, R1 // ff1490e3
+ ORR $255, R0 // ff0080e3
+ ORR $4278190080, R0 // ff0480e3
+ ORR.S $255, R0 // ff0090e3
+ ORR.S $4278190080, R0 // ff0490e3
+ ORR R0, R1, R2 // 002081e1
+ ORR.S R0, R1, R2 // 002091e1
+ ORR R0, R1 // 001081e1
+ ORR.S R0, R1 // 001091e1
+ ORR R0>>28, R1, R2 // 202e81e1
+ ORR R0<<28, R1, R2 // 002e81e1
+ ORR R0->28, R1, R2 // 402e81e1
+ ORR R0@>28, R1, R2 // 602e81e1
+ ORR.S R0>>28, R1, R2 // 202e91e1
+ ORR.S R0<<28, R1, R2 // 002e91e1
+ ORR.S R0->28, R1, R2 // 402e91e1
+ ORR.S R0@>28, R1, R2 // 602e91e1
+ ORR R0<<28, R1 // 001e81e1
+ ORR R0>>28, R1 // 201e81e1
+ ORR R0->28, R1 // 401e81e1
+ ORR R0@>28, R1 // 601e81e1
+ ORR.S R0<<28, R1 // 001e91e1
+ ORR.S R0>>28, R1 // 201e91e1
+ ORR.S R0->28, R1 // 401e91e1
+ ORR.S R0@>28, R1 // 601e91e1
+ ORR R0<<R1, R2, R3 // 103182e1
+ ORR R0>>R1, R2, R3 // 303182e1
+ ORR R0->R1, R2, R3 // 503182e1
+ ORR R0@>R1, R2, R3 // 703182e1
+ ORR.S R0<<R1, R2, R3 // 103192e1
+ ORR.S R0>>R1, R2, R3 // 303192e1
+ ORR.S R0->R1, R2, R3 // 503192e1
+ ORR.S R0@>R1, R2, R3 // 703192e1
+ ORR R0<<R1, R2 // 102182e1
+ ORR R0>>R1, R2 // 302182e1
+ ORR R0->R1, R2 // 502182e1
+ ORR R0@>R1, R2 // 702182e1
+ ORR.S R0<<R1, R2 // 102192e1
+ ORR.S R0>>R1, R2 // 302192e1
+ ORR.S R0->R1, R2 // 502192e1
+ ORR.S R0@>R1, R2 // 702192e1
+
+// SUB
+ SUB $255, R0, R1 // ff1040e2
+ SUB $4278190080, R0, R1 // ff1440e2
+ SUB.S $255, R0, R1 // ff1050e2
+ SUB.S $4278190080, R0, R1 // ff1450e2
+ SUB $255, R0 // ff0040e2
+ SUB $4278190080, R0 // ff0440e2
+ SUB.S $255, R0 // ff0050e2
+ SUB.S $4278190080, R0 // ff0450e2
+ SUB R0, R1, R2 // 002041e0
+ SUB.S R0, R1, R2 // 002051e0
+ SUB R0, R1 // 001041e0
+ SUB.S R0, R1 // 001051e0
+ SUB R0>>28, R1, R2 // 202e41e0
+ SUB R0<<28, R1, R2 // 002e41e0
+ SUB R0->28, R1, R2 // 402e41e0
+ SUB R0@>28, R1, R2 // 602e41e0
+ SUB.S R0>>28, R1, R2 // 202e51e0
+ SUB.S R0<<28, R1, R2 // 002e51e0
+ SUB.S R0->28, R1, R2 // 402e51e0
+ SUB.S R0@>28, R1, R2 // 602e51e0
+ SUB R0<<28, R1 // 001e41e0
+ SUB R0>>28, R1 // 201e41e0
+ SUB R0->28, R1 // 401e41e0
+ SUB R0@>28, R1 // 601e41e0
+ SUB.S R0<<28, R1 // 001e51e0
+ SUB.S R0>>28, R1 // 201e51e0
+ SUB.S R0->28, R1 // 401e51e0
+ SUB.S R0@>28, R1 // 601e51e0
+ SUB R0<<R1, R2, R3 // 103142e0
+ SUB R0>>R1, R2, R3 // 303142e0
+ SUB R0->R1, R2, R3 // 503142e0
+ SUB R0@>R1, R2, R3 // 703142e0
+ SUB.S R0<<R1, R2, R3 // 103152e0
+ SUB.S R0>>R1, R2, R3 // 303152e0
+ SUB.S R0->R1, R2, R3 // 503152e0
+ SUB.S R0@>R1, R2, R3 // 703152e0
+ SUB R0<<R1, R2 // 102142e0
+ SUB R0>>R1, R2 // 302142e0
+ SUB R0->R1, R2 // 502142e0
+ SUB R0@>R1, R2 // 702142e0
+ SUB.S R0<<R1, R2 // 102152e0
+ SUB.S R0>>R1, R2 // 302152e0
+ SUB.S R0->R1, R2 // 502152e0
+ SUB.S R0@>R1, R2 // 702152e0
+
+// SBC
+ SBC $255, R0, R1 // ff10c0e2
+ SBC $4278190080, R0, R1 // ff14c0e2
+ SBC.S $255, R0, R1 // ff10d0e2
+ SBC.S $4278190080, R0, R1 // ff14d0e2
+ SBC $255, R0 // ff00c0e2
+ SBC $4278190080, R0 // ff04c0e2
+ SBC.S $255, R0 // ff00d0e2
+ SBC.S $4278190080, R0 // ff04d0e2
+ SBC R0, R1, R2 // 0020c1e0
+ SBC.S R0, R1, R2 // 0020d1e0
+ SBC R0, R1 // 0010c1e0
+ SBC.S R0, R1 // 0010d1e0
+ SBC R0>>28, R1, R2 // 202ec1e0
+ SBC R0<<28, R1, R2 // 002ec1e0
+ SBC R0->28, R1, R2 // 402ec1e0
+ SBC R0@>28, R1, R2 // 602ec1e0
+ SBC.S R0>>28, R1, R2 // 202ed1e0
+ SBC.S R0<<28, R1, R2 // 002ed1e0
+ SBC.S R0->28, R1, R2 // 402ed1e0
+ SBC.S R0@>28, R1, R2 // 602ed1e0
+ SBC R0<<28, R1 // 001ec1e0
+ SBC R0>>28, R1 // 201ec1e0
+ SBC R0->28, R1 // 401ec1e0
+ SBC R0@>28, R1 // 601ec1e0
+ SBC.S R0<<28, R1 // 001ed1e0
+ SBC.S R0>>28, R1 // 201ed1e0
+ SBC.S R0->28, R1 // 401ed1e0
+ SBC.S R0@>28, R1 // 601ed1e0
+ SBC R0<<R1, R2, R3 // 1031c2e0
+ SBC R0>>R1, R2, R3 // 3031c2e0
+ SBC R0->R1, R2, R3 // 5031c2e0
+ SBC R0@>R1, R2, R3 // 7031c2e0
+ SBC.S R0<<R1, R2, R3 // 1031d2e0
+ SBC.S R0>>R1, R2, R3 // 3031d2e0
+ SBC.S R0->R1, R2, R3 // 5031d2e0
+ SBC.S R0@>R1, R2, R3 // 7031d2e0
+ SBC R0<<R1, R2 // 1021c2e0
+ SBC R0>>R1, R2 // 3021c2e0
+ SBC R0->R1, R2 // 5021c2e0
+ SBC R0@>R1, R2 // 7021c2e0
+ SBC.S R0<<R1, R2 // 1021d2e0
+ SBC.S R0>>R1, R2 // 3021d2e0
+ SBC.S R0->R1, R2 // 5021d2e0
+ SBC.S R0@>R1, R2 // 7021d2e0
+
+// RSB
+ RSB $255, R0, R1 // ff1060e2
+ RSB $4278190080, R0, R1 // ff1460e2
+ RSB.S $255, R0, R1 // ff1070e2
+ RSB.S $4278190080, R0, R1 // ff1470e2
+ RSB $255, R0 // ff0060e2
+ RSB $4278190080, R0 // ff0460e2
+ RSB.S $255, R0 // ff0070e2
+ RSB.S $4278190080, R0 // ff0470e2
+ RSB R0, R1, R2 // 002061e0
+ RSB.S R0, R1, R2 // 002071e0
+ RSB R0, R1 // 001061e0
+ RSB.S R0, R1 // 001071e0
+ RSB R0>>28, R1, R2 // 202e61e0
+ RSB R0<<28, R1, R2 // 002e61e0
+ RSB R0->28, R1, R2 // 402e61e0
+ RSB R0@>28, R1, R2 // 602e61e0
+ RSB.S R0>>28, R1, R2 // 202e71e0
+ RSB.S R0<<28, R1, R2 // 002e71e0
+ RSB.S R0->28, R1, R2 // 402e71e0
+ RSB.S R0@>28, R1, R2 // 602e71e0
+ RSB R0<<28, R1 // 001e61e0
+ RSB R0>>28, R1 // 201e61e0
+ RSB R0->28, R1 // 401e61e0
+ RSB R0@>28, R1 // 601e61e0
+ RSB.S R0<<28, R1 // 001e71e0
+ RSB.S R0>>28, R1 // 201e71e0
+ RSB.S R0->28, R1 // 401e71e0
+ RSB.S R0@>28, R1 // 601e71e0
+ RSB R0<<R1, R2, R3 // 103162e0
+ RSB R0>>R1, R2, R3 // 303162e0
+ RSB R0->R1, R2, R3 // 503162e0
+ RSB R0@>R1, R2, R3 // 703162e0
+ RSB.S R0<<R1, R2, R3 // 103172e0
+ RSB.S R0>>R1, R2, R3 // 303172e0
+ RSB.S R0->R1, R2, R3 // 503172e0
+ RSB.S R0@>R1, R2, R3 // 703172e0
+ RSB R0<<R1, R2 // 102162e0
+ RSB R0>>R1, R2 // 302162e0
+ RSB R0->R1, R2 // 502162e0
+ RSB R0@>R1, R2 // 702162e0
+ RSB.S R0<<R1, R2 // 102172e0
+ RSB.S R0>>R1, R2 // 302172e0
+ RSB.S R0->R1, R2 // 502172e0
+ RSB.S R0@>R1, R2 // 702172e0
+
+// RSC
+ RSC $255, R0, R1 // ff10e0e2
+ RSC $4278190080, R0, R1 // ff14e0e2
+ RSC.S $255, R0, R1 // ff10f0e2
+ RSC.S $4278190080, R0, R1 // ff14f0e2
+ RSC $255, R0 // ff00e0e2
+ RSC $4278190080, R0 // ff04e0e2
+ RSC.S $255, R0 // ff00f0e2
+ RSC.S $4278190080, R0 // ff04f0e2
+ RSC R0, R1, R2 // 0020e1e0
+ RSC.S R0, R1, R2 // 0020f1e0
+ RSC R0, R1 // 0010e1e0
+ RSC.S R0, R1 // 0010f1e0
+ RSC R0>>28, R1, R2 // 202ee1e0
+ RSC R0<<28, R1, R2 // 002ee1e0
+ RSC R0->28, R1, R2 // 402ee1e0
+ RSC R0@>28, R1, R2 // 602ee1e0
+ RSC.S R0>>28, R1, R2 // 202ef1e0
+ RSC.S R0<<28, R1, R2 // 002ef1e0
+ RSC.S R0->28, R1, R2 // 402ef1e0
+ RSC.S R0@>28, R1, R2 // 602ef1e0
+ RSC R0<<28, R1 // 001ee1e0
+ RSC R0>>28, R1 // 201ee1e0
+ RSC R0->28, R1 // 401ee1e0
+ RSC R0@>28, R1 // 601ee1e0
+ RSC.S R0<<28, R1 // 001ef1e0
+ RSC.S R0>>28, R1 // 201ef1e0
+ RSC.S R0->28, R1 // 401ef1e0
+ RSC.S R0@>28, R1 // 601ef1e0
+ RSC R0<<R1, R2, R3 // 1031e2e0
+ RSC R0>>R1, R2, R3 // 3031e2e0
+ RSC R0->R1, R2, R3 // 5031e2e0
+ RSC R0@>R1, R2, R3 // 7031e2e0
+ RSC.S R0<<R1, R2, R3 // 1031f2e0
+ RSC.S R0>>R1, R2, R3 // 3031f2e0
+ RSC.S R0->R1, R2, R3 // 5031f2e0
+ RSC.S R0@>R1, R2, R3 // 7031f2e0
+ RSC R0<<R1, R2 // 1021e2e0
+ RSC R0>>R1, R2 // 3021e2e0
+ RSC R0->R1, R2 // 5021e2e0
+ RSC R0@>R1, R2 // 7021e2e0
+ RSC.S R0<<R1, R2 // 1021f2e0
+ RSC.S R0>>R1, R2 // 3021f2e0
+ RSC.S R0->R1, R2 // 5021f2e0
+ RSC.S R0@>R1, R2 // 7021f2e0
+
+// ADD
+ ADD $255, R0, R1 // ff1080e2
+ ADD $4278190080, R0, R1 // ff1480e2
+ ADD.S $255, R0, R1 // ff1090e2
+ ADD.S $4278190080, R0, R1 // ff1490e2
+ ADD $255, R0 // ff0080e2
+ ADD $4278190080, R0 // ff0480e2
+ ADD.S $255, R0 // ff0090e2
+ ADD.S $4278190080, R0 // ff0490e2
+ ADD R0, R1, R2 // 002081e0
+ ADD.S R0, R1, R2 // 002091e0
+ ADD R0, R1 // 001081e0
+ ADD.S R0, R1 // 001091e0
+ ADD R0>>28, R1, R2 // 202e81e0
+ ADD R0<<28, R1, R2 // 002e81e0
+ ADD R0->28, R1, R2 // 402e81e0
+ ADD R0@>28, R1, R2 // 602e81e0
+ ADD.S R0>>28, R1, R2 // 202e91e0
+ ADD.S R0<<28, R1, R2 // 002e91e0
+ ADD.S R0->28, R1, R2 // 402e91e0
+ ADD.S R0@>28, R1, R2 // 602e91e0
+ ADD R0<<28, R1 // 001e81e0
+ ADD R0>>28, R1 // 201e81e0
+ ADD R0->28, R1 // 401e81e0
+ ADD R0@>28, R1 // 601e81e0
+ ADD.S R0<<28, R1 // 001e91e0
+ ADD.S R0>>28, R1 // 201e91e0
+ ADD.S R0->28, R1 // 401e91e0
+ ADD.S R0@>28, R1 // 601e91e0
+ ADD R0<<R1, R2, R3 // 103182e0
+ ADD R0>>R1, R2, R3 // 303182e0
+ ADD R0->R1, R2, R3 // 503182e0
+ ADD R0@>R1, R2, R3 // 703182e0
+ ADD.S R0<<R1, R2, R3 // 103192e0
+ ADD.S R0>>R1, R2, R3 // 303192e0
+ ADD.S R0->R1, R2, R3 // 503192e0
+ ADD.S R0@>R1, R2, R3 // 703192e0
+ ADD R0<<R1, R2 // 102182e0
+ ADD R0>>R1, R2 // 302182e0
+ ADD R0->R1, R2 // 502182e0
+ ADD R0@>R1, R2 // 702182e0
+ ADD.S R0<<R1, R2 // 102192e0
+ ADD.S R0>>R1, R2 // 302192e0
+ ADD.S R0->R1, R2 // 502192e0
+ ADD.S R0@>R1, R2 // 702192e0
+
+// ADC
+ ADC $255, R0, R1 // ff10a0e2
+ ADC $4278190080, R0, R1 // ff14a0e2
+ ADC.S $255, R0, R1 // ff10b0e2
+ ADC.S $4278190080, R0, R1 // ff14b0e2
+ ADC $255, R0 // ff00a0e2
+ ADC $4278190080, R0 // ff04a0e2
+ ADC.S $255, R0 // ff00b0e2
+ ADC.S $4278190080, R0 // ff04b0e2
+ ADC R0, R1, R2 // 0020a1e0
+ ADC.S R0, R1, R2 // 0020b1e0
+ ADC R0, R1 // 0010a1e0
+ ADC.S R0, R1 // 0010b1e0
+ ADC R0>>28, R1, R2 // 202ea1e0
+ ADC R0<<28, R1, R2 // 002ea1e0
+ ADC R0->28, R1, R2 // 402ea1e0
+ ADC R0@>28, R1, R2 // 602ea1e0
+ ADC.S R0>>28, R1, R2 // 202eb1e0
+ ADC.S R0<<28, R1, R2 // 002eb1e0
+ ADC.S R0->28, R1, R2 // 402eb1e0
+ ADC.S R0@>28, R1, R2 // 602eb1e0
+ ADC R0<<28, R1 // 001ea1e0
+ ADC R0>>28, R1 // 201ea1e0
+ ADC R0->28, R1 // 401ea1e0
+ ADC R0@>28, R1 // 601ea1e0
+ ADC.S R0<<28, R1 // 001eb1e0
+ ADC.S R0>>28, R1 // 201eb1e0
+ ADC.S R0->28, R1 // 401eb1e0
+ ADC.S R0@>28, R1 // 601eb1e0
+ ADC R0<<R1, R2, R3 // 1031a2e0
+ ADC R0>>R1, R2, R3 // 3031a2e0
+ ADC R0->R1, R2, R3 // 5031a2e0
+ ADC R0@>R1, R2, R3 // 7031a2e0
+ ADC.S R0<<R1, R2, R3 // 1031b2e0
+ ADC.S R0>>R1, R2, R3 // 3031b2e0
+ ADC.S R0->R1, R2, R3 // 5031b2e0
+ ADC.S R0@>R1, R2, R3 // 7031b2e0
+ ADC R0<<R1, R2 // 1021a2e0
+ ADC R0>>R1, R2 // 3021a2e0
+ ADC R0->R1, R2 // 5021a2e0
+ ADC R0@>R1, R2 // 7021a2e0
+ ADC.S R0<<R1, R2 // 1021b2e0
+ ADC.S R0>>R1, R2 // 3021b2e0
+ ADC.S R0->R1, R2 // 5021b2e0
+ ADC.S R0@>R1, R2 // 7021b2e0
+
+// TEQ
+ TEQ $255, R7 // ff0037e3
+ TEQ $4278190080, R9 // ff0439e3
+ TEQ R9<<30, R7 // 090f37e1
+ TEQ R9>>30, R7 // 290f37e1
+ TEQ R9->30, R7 // 490f37e1
+ TEQ R9@>30, R7 // 690f37e1
+ TEQ R9<<R8, R7 // 190837e1
+ TEQ R9>>R8, R7 // 390837e1
+ TEQ R9->R8, R7 // 590837e1
+ TEQ R9@>R8, R7 // 790837e1
+
+// TST
+ TST $255, R7 // ff0017e3
+ TST $4278190080, R9 // ff0419e3
+ TST R9<<30, R7 // 090f17e1
+ TST R9>>30, R7 // 290f17e1
+ TST R9->30, R7 // 490f17e1
+ TST R9@>30, R7 // 690f17e1
+ TST R9<<R8, R7 // 190817e1
+ TST R9>>R8, R7 // 390817e1
+ TST R9->R8, R7 // 590817e1
+ TST R9@>R8, R7 // 790817e1
+
+// CMP
+ CMP $255, R7 // ff0057e3
+ CMP $4278190080, R9 // ff0459e3
+ CMP R9<<30, R7 // 090f57e1
+ CMP R9>>30, R7 // 290f57e1
+ CMP R9->30, R7 // 490f57e1
+ CMP R9@>30, R7 // 690f57e1
+ CMP R9<<R8, R7 // 190857e1
+ CMP R9>>R8, R7 // 390857e1
+ CMP R9->R8, R7 // 590857e1
+ CMP R9@>R8, R7 // 790857e1
+
+// CMN
+ CMN $255, R7 // ff0077e3
+ CMN $4278190080, R9 // ff0479e3
+ CMN R9<<30, R7 // 090f77e1
+ CMN R9>>30, R7 // 290f77e1
+ CMN R9->30, R7 // 490f77e1
+ CMN R9@>30, R7 // 690f77e1
+ CMN R9<<R8, R7 // 190877e1
+ CMN R9>>R8, R7 // 390877e1
+ CMN R9->R8, R7 // 590877e1
+ CMN R9@>R8, R7 // 790877e1
+
+// B*
+ BEQ 14(PC) // BEQ 14(PC) // 0c00000a
+ BNE 13(PC) // BNE 13(PC) // 0b00001a
+ BCS 12(PC) // BCS 12(PC) // 0a00002a
+ BCC 11(PC) // BCC 11(PC) // 0900003a
+ BMI 10(PC) // BMI 10(PC) // 0800004a
+ BPL 9(PC) // BPL 9(PC) // 0700005a
+ BVS 8(PC) // BVS 8(PC) // 0600006a
+ BVC 7(PC) // BVC 7(PC) // 0500007a
+ BHI 6(PC) // BHI 6(PC) // 0400008a
+ BLS 5(PC) // BLS 5(PC) // 0300009a
+ BGE 4(PC) // BGE 4(PC) // 020000aa
+ BLT 3(PC) // BLT 3(PC) // 010000ba
+ BGT 2(PC) // BGT 2(PC) // 000000ca
+ BLE 1(PC) // BLE 1(PC) // ffffffda
+ ADD $0, R0, R0
+ B -1(PC) // JMP -1(PC) // fdffffea
+ B -2(PC) // JMP -2(PC) // fcffffea
+ B -3(PC) // JMP -3(PC) // fbffffea
+ B -4(PC) // JMP -4(PC) // faffffea
+ B -5(PC) // JMP -5(PC) // f9ffffea
+ B jmp_label_0 // JMP // 010000ea
+ B jmp_label_0 // JMP // 000000ea
+ B jmp_label_0 // JMP // ffffffea
+jmp_label_0:
+ ADD $0, R0, R0
+ BEQ jmp_label_0 // BEQ 519 // fdffff0a
+ BNE jmp_label_0 // BNE 519 // fcffff1a
+ BCS jmp_label_0 // BCS 519 // fbffff2a
+ BCC jmp_label_0 // BCC 519 // faffff3a
+ BMI jmp_label_0 // BMI 519 // f9ffff4a
+ BPL jmp_label_0 // BPL 519 // f8ffff5a
+ BVS jmp_label_0 // BVS 519 // f7ffff6a
+ BVC jmp_label_0 // BVC 519 // f6ffff7a
+ BHI jmp_label_0 // BHI 519 // f5ffff8a
+ BLS jmp_label_0 // BLS 519 // f4ffff9a
+ BGE jmp_label_0 // BGE 519 // f3ffffaa
+ BLT jmp_label_0 // BLT 519 // f2ffffba
+ BGT jmp_label_0 // BGT 519 // f1ffffca
+ BLE jmp_label_0 // BLE 519 // f0ffffda
+ B jmp_label_0 // JMP 519 // efffffea
+ B 0(PC) // JMP 0(PC) // feffffea
+jmp_label_1:
+ B jmp_label_1 // JMP // feffffea
+
+// BL
+ BL.EQ 14(PC) // CALL.EQ 14(PC) // 0c00000b
+ BL.NE 13(PC) // CALL.NE 13(PC) // 0b00001b
+ BL.CS 12(PC) // CALL.CS 12(PC) // 0a00002b
+ BL.CC 11(PC) // CALL.CC 11(PC) // 0900003b
+ BL.MI 10(PC) // CALL.MI 10(PC) // 0800004b
+ BL.PL 9(PC) // CALL.PL 9(PC) // 0700005b
+ BL.VS 8(PC) // CALL.VS 8(PC) // 0600006b
+ BL.VC 7(PC) // CALL.VC 7(PC) // 0500007b
+ BL.HI 6(PC) // CALL.HI 6(PC) // 0400008b
+ BL.LS 5(PC) // CALL.LS 5(PC) // 0300009b
+ BL.GE 4(PC) // CALL.GE 4(PC) // 020000ab
+ BL.LT 3(PC) // CALL.LT 3(PC) // 010000bb
+ BL.GT 2(PC) // CALL.GT 2(PC) // 000000cb
+ BL.LE 1(PC) // CALL.LE 1(PC) // ffffffdb
+ ADD $0, R0, R0
+ BL -1(PC) // CALL -1(PC) // fdffffeb
+ BL -2(PC) // CALL -2(PC) // fcffffeb
+ BL -3(PC) // CALL -3(PC) // fbffffeb
+ BL -4(PC) // CALL -4(PC) // faffffeb
+ BL -5(PC) // CALL -5(PC) // f9ffffeb
+ BL jmp_label_2 // CALL // 010000eb
+ BL jmp_label_2 // CALL // 000000eb
+ BL jmp_label_2 // CALL // ffffffeb
+jmp_label_2:
+ ADD $0, R0, R0
+ BL.EQ jmp_label_2 // CALL.EQ 560 // fdffff0b
+ BL.NE jmp_label_2 // CALL.NE 560 // fcffff1b
+ BL.CS jmp_label_2 // CALL.CS 560 // fbffff2b
+ BL.CC jmp_label_2 // CALL.CC 560 // faffff3b
+ BL.MI jmp_label_2 // CALL.MI 560 // f9ffff4b
+ BL.PL jmp_label_2 // CALL.PL 560 // f8ffff5b
+ BL.VS jmp_label_2 // CALL.VS 560 // f7ffff6b
+ BL.VC jmp_label_2 // CALL.VC 560 // f6ffff7b
+ BL.HI jmp_label_2 // CALL.HI 560 // f5ffff8b
+ BL.LS jmp_label_2 // CALL.LS 560 // f4ffff9b
+ BL.GE jmp_label_2 // CALL.GE 560 // f3ffffab
+ BL.LT jmp_label_2 // CALL.LT 560 // f2ffffbb
+ BL.GT jmp_label_2 // CALL.GT 560 // f1ffffcb
+ BL.LE jmp_label_2 // CALL.LE 560 // f0ffffdb
+ BL jmp_label_2 // CALL 560 // efffffeb
+ BL 0(PC) // CALL 0(PC) // feffffeb
+jmp_label_3:
+ BL jmp_label_3 // CALL // feffffeb
+
+// BIC
+ BIC $255, R0, R1 // ff10c0e3
+ BIC $4278190080, R0, R1 // ff14c0e3
+ BIC.S $255, R0, R1 // ff10d0e3
+ BIC.S $4278190080, R0, R1 // ff14d0e3
+ BIC $255, R0 // ff00c0e3
+ BIC $4278190080, R0 // ff04c0e3
+ BIC.S $255, R0 // ff00d0e3
+ BIC.S $4278190080, R0 // ff04d0e3
+ BIC R0, R1, R2 // 0020c1e1
+ BIC.S R0, R1, R2 // 0020d1e1
+ BIC R0, R1 // 0010c1e1
+ BIC.S R0, R1 // 0010d1e1
+ BIC R0>>28, R1, R2 // 202ec1e1
+ BIC R0<<28, R1, R2 // 002ec1e1
+ BIC R0->28, R1, R2 // 402ec1e1
+ BIC R0@>28, R1, R2 // 602ec1e1
+ BIC.S R0>>28, R1, R2 // 202ed1e1
+ BIC.S R0<<28, R1, R2 // 002ed1e1
+ BIC.S R0->28, R1, R2 // 402ed1e1
+ BIC.S R0@>28, R1, R2 // 602ed1e1
+ BIC R0<<28, R1 // 001ec1e1
+ BIC R0>>28, R1 // 201ec1e1
+ BIC R0->28, R1 // 401ec1e1
+ BIC R0@>28, R1 // 601ec1e1
+ BIC.S R0<<28, R1 // 001ed1e1
+ BIC.S R0>>28, R1 // 201ed1e1
+ BIC.S R0->28, R1 // 401ed1e1
+ BIC.S R0@>28, R1 // 601ed1e1
+ BIC R0<<R1, R2, R3 // 1031c2e1
+ BIC R0>>R1, R2, R3 // 3031c2e1
+ BIC R0->R1, R2, R3 // 5031c2e1
+ BIC R0@>R1, R2, R3 // 7031c2e1
+ BIC.S R0<<R1, R2, R3 // 1031d2e1
+ BIC.S R0>>R1, R2, R3 // 3031d2e1
+ BIC.S R0->R1, R2, R3 // 5031d2e1
+ BIC.S R0@>R1, R2, R3 // 7031d2e1
+ BIC R0<<R1, R2 // 1021c2e1
+ BIC R0>>R1, R2 // 3021c2e1
+ BIC R0->R1, R2 // 5021c2e1
+ BIC R0@>R1, R2 // 7021c2e1
+ BIC.S R0<<R1, R2 // 1021d2e1
+ BIC.S R0>>R1, R2 // 3021d2e1
+ BIC.S R0->R1, R2 // 5021d2e1
+ BIC.S R0@>R1, R2 // 7021d2e1
+
+// SRL
+ SRL $14, R5, R6 // 2567a0e1
+ SRL $15, R5, R6 // a567a0e1
+ SRL $30, R5, R6 // 256fa0e1
+ SRL $31, R5, R6 // a56fa0e1
+ SRL.S $14, R5, R6 // 2567b0e1
+ SRL.S $15, R5, R6 // a567b0e1
+ SRL.S $30, R5, R6 // 256fb0e1
+ SRL.S $31, R5, R6 // a56fb0e1
+ SRL $14, R5 // 2557a0e1
+ SRL $15, R5 // a557a0e1
+ SRL $30, R5 // 255fa0e1
+ SRL $31, R5 // a55fa0e1
+ SRL.S $14, R5 // 2557b0e1
+ SRL.S $15, R5 // a557b0e1
+ SRL.S $30, R5 // 255fb0e1
+ SRL.S $31, R5 // a55fb0e1
+ SRL R5, R6, R7 // 3675a0e1
+ SRL.S R5, R6, R7 // 3675b0e1
+ SRL R5, R7 // 3775a0e1
+ SRL.S R5, R7 // 3775b0e1
+
+// SRA
+ SRA $14, R5, R6 // 4567a0e1
+ SRA $15, R5, R6 // c567a0e1
+ SRA $30, R5, R6 // 456fa0e1
+ SRA $31, R5, R6 // c56fa0e1
+ SRA.S $14, R5, R6 // 4567b0e1
+ SRA.S $15, R5, R6 // c567b0e1
+ SRA.S $30, R5, R6 // 456fb0e1
+ SRA.S $31, R5, R6 // c56fb0e1
+ SRA $14, R5 // 4557a0e1
+ SRA $15, R5 // c557a0e1
+ SRA $30, R5 // 455fa0e1
+ SRA $31, R5 // c55fa0e1
+ SRA.S $14, R5 // 4557b0e1
+ SRA.S $15, R5 // c557b0e1
+ SRA.S $30, R5 // 455fb0e1
+ SRA.S $31, R5 // c55fb0e1
+ SRA R5, R6, R7 // 5675a0e1
+ SRA.S R5, R6, R7 // 5675b0e1
+ SRA R5, R7 // 5775a0e1
+ SRA.S R5, R7 // 5775b0e1
+
+// SLL
+ SLL $14, R5, R6 // 0567a0e1
+ SLL $15, R5, R6 // 8567a0e1
+ SLL $30, R5, R6 // 056fa0e1
+ SLL $31, R5, R6 // 856fa0e1
+ SLL.S $14, R5, R6 // 0567b0e1
+ SLL.S $15, R5, R6 // 8567b0e1
+ SLL.S $30, R5, R6 // 056fb0e1
+ SLL.S $31, R5, R6 // 856fb0e1
+ SLL $14, R5 // 0557a0e1
+ SLL $15, R5 // 8557a0e1
+ SLL $30, R5 // 055fa0e1
+ SLL $31, R5 // 855fa0e1
+ SLL.S $14, R5 // 0557b0e1
+ SLL.S $15, R5 // 8557b0e1
+ SLL.S $30, R5 // 055fb0e1
+ SLL.S $31, R5 // 855fb0e1
+ SLL R5, R6, R7 // 1675a0e1
+ SLL.S R5, R6, R7 // 1675b0e1
+ SLL R5, R7 // 1775a0e1
+ SLL.S R5, R7 // 1775b0e1
+
+// MULA / MULS
+ MULAWT R1, R2, R3, R4 // c23124e1
+ MULAWB R1, R2, R3, R4 // 823124e1
+ MULS R1, R2, R3, R4 // 923164e0
+ MULA R1, R2, R3, R4 // 923124e0
+ MULA.S R1, R2, R3, R4 // 923134e0
+ MMULA R1, R2, R3, R4 // 123154e7
+ MMULS R1, R2, R3, R4 // d23154e7
+ MULABB R1, R2, R3, R4 // 823104e1
+ MULAL R1, R2, (R4, R3) // 9231e4e0
+ MULAL.S R1, R2, (R4, R3) // 9231f4e0
+ MULALU R1, R2, (R4, R3) // 9231a4e0
+ MULALU.S R1, R2, (R4, R3) // 9231b4e0
+
+// MUL
+ MUL R2, R3, R4 // 930204e0
+ MUL R2, R4 // 940204e0
+ MUL R2, R4, R4 // 940204e0
+ MUL.S R2, R3, R4 // 930214e0
+ MUL.S R2, R4 // 940214e0
+ MUL.S R2, R4, R4 // 940214e0
+ MULU R5, R6, R7 // 960507e0
+ MULU R5, R7 // 970507e0
+ MULU R5, R7, R7 // 970507e0
+ MULU.S R5, R6, R7 // 960517e0
+ MULU.S R5, R7 // 970517e0
+ MULU.S R5, R7, R7 // 970517e0
+ MULLU R1, R2, (R4, R3) // 923184e0
+ MULLU.S R1, R2, (R4, R3) // 923194e0
+ MULL R1, R2, (R4, R3) // 9231c4e0
+ MULL.S R1, R2, (R4, R3) // 9231d4e0
+ MMUL R1, R2, R3 // 12f153e7
+ MULBB R1, R2, R3 // 820163e1
+ MULWB R1, R2, R3 // a20123e1
+ MULWT R1, R2, R3 // e20123e1
+
+// REV
+ REV R1, R2 // 312fbfe6
+ REV16 R1, R2 // b12fbfe6
+ REVSH R1, R2 // b12fffe6
+ RBIT R1, R2 // 312fffe6
+
+// XTAB/XTAH/XTABU/XTAHU
+ XTAB R2@>0, R8 // 7280a8e6
+ XTAB R2@>8, R8 // 7284a8e6
+ XTAB R2@>16, R8 // 7288a8e6
+ XTAB R2@>24, R8 // 728ca8e6
+ XTAH R3@>0, R9 // 7390b9e6
+ XTAH R3@>8, R9 // 7394b9e6
+ XTAH R3@>16, R9 // 7398b9e6
+ XTAH R3@>24, R9 // 739cb9e6
+ XTABU R4@>0, R7 // 7470e7e6
+ XTABU R4@>8, R7 // 7474e7e6
+ XTABU R4@>16, R7 // 7478e7e6
+ XTABU R4@>24, R7 // 747ce7e6
+ XTAHU R5@>0, R1 // 7510f1e6
+ XTAHU R5@>8, R1 // 7514f1e6
+ XTAHU R5@>16, R1 // 7518f1e6
+ XTAHU R5@>24, R1 // 751cf1e6
+ XTAB R2@>0, R4, R8 // 7280a4e6
+ XTAB R2@>8, R4, R8 // 7284a4e6
+ XTAB R2@>16, R4, R8 // 7288a4e6
+ XTAB R2@>24, R4, R8 // 728ca4e6
+ XTAH R3@>0, R4, R9 // 7390b4e6
+ XTAH R3@>8, R4, R9 // 7394b4e6
+ XTAH R3@>16, R4, R9 // 7398b4e6
+ XTAH R3@>24, R4, R9 // 739cb4e6
+ XTABU R4@>0, R0, R7 // 7470e0e6
+ XTABU R4@>8, R0, R7 // 7474e0e6
+ XTABU R4@>16, R0, R7 // 7478e0e6
+ XTABU R4@>24, R0, R7 // 747ce0e6
+ XTAHU R5@>0, R9, R1 // 7510f9e6
+ XTAHU R5@>8, R9, R1 // 7514f9e6
+ XTAHU R5@>16, R9, R1 // 7518f9e6
+ XTAHU R5@>24, R9, R1 // 751cf9e6
+
+// DIVHW R0, R1, R2: R1 / R0 -> R2
+ DIVHW R0, R1, R2 // 11f012e7
+ DIVUHW R0, R1, R2 // 11f032e7
+// DIVHW R0, R1: R1 / R0 -> R1
+ DIVHW R0, R1 // 11f011e7
+ DIVUHW R0, R1 // 11f031e7
+
+// misc
+ CLZ R1, R2 // 112f6fe1
+ WORD $0 // 00000000
+ WORD $4294967295 // ffffffff
+ WORD $2863311530 // aaaaaaaa
+ WORD $1431655765 // 55555555
+ PLD 4080(R6) // f0ffd6f5
+ PLD -4080(R9) // f0ff59f5
+ RFE // 0080fde8
+ SWPW R3, (R7), R9 // SWPW (R7), R3, R9 // 939007e1
+ SWPBU R4, (R2), R8 // SWPBU (R2), R4, R8 // 948042e1
+ SWI $0 // 000000ef
+ SWI $65535 // ffff00ef
+ SWI // 000000ef
+
+// BFX/BFXU/BFC/BFI
+ BFX $16, $8, R1, R2 // BFX $16, R1, $8, R2 // 5124afe7
+ BFX $29, $2, R8 // 5881bce7
+ BFXU $16, $8, R1, R2 // BFXU $16, R1, $8, R2 // 5124efe7
+ BFXU $29, $2, R8 // 5881fce7
+ BFC $29, $2, R8 // 1f81dee7
+ BFI $29, $2, R8 // 1881dee7
+ BFI $16, $8, R1, R2 // BFI $16, R1, $8, R2 // 1124d7e7
+
+// synthetic arithmatic
+ ADD $0xffffffaa, R2, R3 // ADD $4294967210, R2, R3 // 55b0e0e30b3082e0
+ ADD $0xffffff55, R5 // ADD $4294967125, R5 // aab0e0e30b5085e0
+ ADD.S $0xffffffab, R2, R3 // ADD.S $4294967211, R2, R3 // 54b0e0e30b3092e0
+ ADD.S $0xffffff54, R5 // ADD.S $4294967124, R5 // abb0e0e30b5095e0
+ ADC $0xffffffac, R2, R3 // ADC $4294967212, R2, R3 // 53b0e0e30b30a2e0
+ ADC $0xffffff53, R5 // ADC $4294967123, R5 // acb0e0e30b50a5e0
+ ADC.S $0xffffffad, R2, R3 // ADC.S $4294967213, R2, R3 // 52b0e0e30b30b2e0
+ ADC.S $0xffffff52, R5 // ADC.S $4294967122, R5 // adb0e0e30b50b5e0
+ SUB $0xffffffae, R2, R3 // SUB $4294967214, R2, R3 // 51b0e0e30b3042e0
+ SUB $0xffffff51, R5 // SUB $4294967121, R5 // aeb0e0e30b5045e0
+ SUB.S $0xffffffaf, R2, R3 // SUB.S $4294967215, R2, R3 // 50b0e0e30b3052e0
+ SUB.S $0xffffff50, R5 // SUB.S $4294967120, R5 // afb0e0e30b5055e0
+ SBC $0xffffffb0, R2, R3 // SBC $4294967216, R2, R3 // 4fb0e0e30b30c2e0
+ SBC $0xffffff4f, R5 // SBC $4294967119, R5 // b0b0e0e30b50c5e0
+ SBC.S $0xffffffb1, R2, R3 // SBC.S $4294967217, R2, R3 // 4eb0e0e30b30d2e0
+ SBC.S $0xffffff4e, R5 // SBC.S $4294967118, R5 // b1b0e0e30b50d5e0
+ RSB $0xffffffb2, R2, R3 // RSB $4294967218, R2, R3 // 4db0e0e30b3062e0
+ RSB $0xffffff4d, R5 // RSB $4294967117, R5 // b2b0e0e30b5065e0
+ RSB.S $0xffffffb3, R2, R3 // RSB.S $4294967219, R2, R3 // 4cb0e0e30b3072e0
+ RSB.S $0xffffff4c, R5 // RSB.S $4294967116, R5 // b3b0e0e30b5075e0
+ RSC $0xffffffb4, R2, R3 // RSC $4294967220, R2, R3 // 4bb0e0e30b30e2e0
+ RSC $0xffffff4b, R5 // RSC $4294967115, R5 // b4b0e0e30b50e5e0
+ RSC.S $0xffffffb5, R2, R3 // RSC.S $4294967221, R2, R3 // 4ab0e0e30b30f2e0
+ RSC.S $0xffffff4a, R5 // RSC.S $4294967114, R5 // b5b0e0e30b50f5e0
+ AND $0xffffffaa, R2, R3 // AND $4294967210, R2, R3 // 55b0e0e30b3002e0
+ AND $0xffffff55, R5 // AND $4294967125, R5 // aab0e0e30b5005e0
+ AND.S $0xffffffab, R2, R3 // AND.S $4294967211, R2, R3 // 54b0e0e30b3012e0
+ AND.S $0xffffff54, R5 // AND.S $4294967124, R5 // abb0e0e30b5015e0
+ ORR $0xffffffaa, R2, R3 // ORR $4294967210, R2, R3 // 55b0e0e30b3082e1
+ ORR $0xffffff55, R5 // ORR $4294967125, R5 // aab0e0e30b5085e1
+ ORR.S $0xffffffab, R2, R3 // ORR.S $4294967211, R2, R3 // 54b0e0e30b3092e1
+ ORR.S $0xffffff54, R5 // ORR.S $4294967124, R5 // abb0e0e30b5095e1
+ EOR $0xffffffaa, R2, R3 // EOR $4294967210, R2, R3 // 55b0e0e30b3022e0
+ EOR $0xffffff55, R5 // EOR $4294967125, R5 // aab0e0e30b5025e0
+ EOR.S $0xffffffab, R2, R3 // EOR.S $4294967211, R2, R3 // 54b0e0e30b3032e0
+ EOR.S $0xffffff54, R5 // EOR.S $4294967124, R5 // abb0e0e30b5035e0
+ BIC $0xffffffaa, R2, R3 // BIC $4294967210, R2, R3 // 55b0e0e30b30c2e1
+ BIC $0xffffff55, R5 // BIC $4294967125, R5 // aab0e0e30b50c5e1
+ BIC.S $0xffffffab, R2, R3 // BIC.S $4294967211, R2, R3 // 54b0e0e30b30d2e1
+ BIC.S $0xffffff54, R5 // BIC.S $4294967124, R5 // abb0e0e30b50d5e1
+ CMP $0xffffffab, R2 // CMP $4294967211, R2 // 54b0e0e30b0052e1
+ CMN $0xffffffac, R3 // CMN $4294967212, R3 // 53b0e0e30b0073e1
+ TST $0xffffffad, R4 // TST $4294967213, R4 // 52b0e0e30b0014e1
+ TEQ $0xffffffae, R5 // TEQ $4294967214, R5 // 51b0e0e30b0035e1
+
+// immediate decomposition
+ ADD $0xff0000ff, R0, R1 // ADD $4278190335, R0, R1 // ff1080e2ff1481e2
+ EOR $0xff0000ff, R0, R1 // EOR $4278190335, R0, R1 // ff1020e2ff1421e2
+ ORR $0xff0000ff, R0, R1 // ORR $4278190335, R0, R1 // ff1080e3ff1481e3
+ SUB $0xff0000ff, R0, R1 // SUB $4278190335, R0, R1 // ff1040e2ff1441e2
+ BIC $0xff0000ff, R0, R1 // BIC $4278190335, R0, R1 // ff10c0e3ff14c1e3
+ RSB $0xff0000ff, R0, R1 // RSB $4278190335, R0, R1 // ff1060e2ff1481e2
+ ADC $0xff0000ff, R0, R1 // ADC $4278190335, R0, R1 // ff10a0e2ff1481e2
+ SBC $0xff0000ff, R0, R1 // SBC $4278190335, R0, R1 // ff10c0e2ff1441e2
+ RSC $0xff0000ff, R0, R1 // RSC $4278190335, R0, R1 // ff10e0e2ff1481e2
+ ADD $0x000fffff, R0, R1 // ADD $1048575, R0, R1 // 011680e2011041e2
+ ADC $0x000fffff, R0, R1 // ADC $1048575, R0, R1 // 0116a0e2011041e2
+ SUB $0x000fffff, R0, R1 // SUB $1048575, R0, R1 // 011640e2011081e2
+ SBC $0x000fffff, R0, R1 // SBC $1048575, R0, R1 // 0116c0e2011081e2
+ RSB $0x000fffff, R0, R1 // RSB $1048575, R0, R1 // 011660e2011041e2
+ RSC $0x000fffff, R0, R1 // RSC $1048575, R0, R1 // 0116e0e2011041e2
+ ADD $0xff0000ff, R1 // ADD $4278190335, R1 // ff1081e2ff1481e2
+ EOR $0xff0000ff, R1 // EOR $4278190335, R1 // ff1021e2ff1421e2
+ ORR $0xff0000ff, R1 // ORR $4278190335, R1 // ff1081e3ff1481e3
+ SUB $0xff0000ff, R1 // SUB $4278190335, R1 // ff1041e2ff1441e2
+ BIC $0xff0000ff, R1 // BIC $4278190335, R1 // ff10c1e3ff14c1e3
+ RSB $0xff0000ff, R1 // RSB $4278190335, R1 // ff1061e2ff1481e2
+ ADC $0xff0000ff, R1 // ADC $4278190335, R1 // ff10a1e2ff1481e2
+ SBC $0xff0000ff, R1 // SBC $4278190335, R1 // ff10c1e2ff1441e2
+ RSC $0xff0000ff, R1 // RSC $4278190335, R1 // ff10e1e2ff1481e2
+ ADD $0x000fffff, R1 // ADD $1048575, R1 // 011681e2011041e2
+ ADC $0x000fffff, R1 // ADC $1048575, R1 // 0116a1e2011041e2
+ SUB $0x000fffff, R1 // SUB $1048575, R1 // 011641e2011081e2
+ SBC $0x000fffff, R1 // SBC $1048575, R1 // 0116c1e2011081e2
+ RSB $0x000fffff, R1 // RSB $1048575, R1 // 011661e2011041e2
+ RSC $0x000fffff, R1 // RSC $1048575, R1 // 0116e1e2011041e2
+
+// MVN
+ MVN $0xff, R1 // MVN $255, R1 // ff10e0e3
+ MVN $0xff000000, R1 // MVN $4278190080, R1 // ff14e0e3
+ MVN R9<<30, R7 // 097fe0e1
+ MVN R9>>30, R7 // 297fe0e1
+ MVN R9->30, R7 // 497fe0e1
+ MVN R9@>30, R7 // 697fe0e1
+ MVN.S R9<<30, R7 // 097ff0e1
+ MVN.S R9>>30, R7 // 297ff0e1
+ MVN.S R9->30, R7 // 497ff0e1
+ MVN.S R9@>30, R7 // 697ff0e1
+ MVN R9<<R8, R7 // 1978e0e1
+ MVN R9>>R8, R7 // 3978e0e1
+ MVN R9->R8, R7 // 5978e0e1
+ MVN R9@>R8, R7 // 7978e0e1
+ MVN.S R9<<R8, R7 // 1978f0e1
+ MVN.S R9>>R8, R7 // 3978f0e1
+ MVN.S R9->R8, R7 // 5978f0e1
+ MVN.S R9@>R8, R7 // 7978f0e1
+ MVN $0xffffffbe, R5 // MVN $4294967230, R5 // 4150a0e3
+
+// MOVM
+ MOVM.IA [R0,R2,R4,R6], (R1) // MOVM.U [R0,R2,R4,R6], (R1) // 550081e8
+ MOVM.IA [R0-R4,R6,R8,R9-R11], (R1) // MOVM.U [R0,R1,R2,R3,R4,R6,R8,R9,g,R11], (R1) // 5f0f81e8
+ MOVM.IA.W [R0,R2,R4,R6], (R1) // MOVM.W.U [R0,R2,R4,R6], (R1) // 5500a1e8
+ MOVM.IA.W [R0-R4,R6,R8,R9-R11], (R1) // MOVM.W.U [R0,R1,R2,R3,R4,R6,R8,R9,g,R11], (R1) // 5f0fa1e8
+ MOVM.IA (R1), [R0,R2,R4,R6] // MOVM.U (R1), [R0,R2,R4,R6] // 550091e8
+ MOVM.IA (R1), [R0-R4,R6,R8,R9-R11] // MOVM.U (R1), [R0,R1,R2,R3,R4,R6,R8,R9,g,R11] // 5f0f91e8
+ MOVM.IA.W (R1), [R0,R2,R4,R6] // MOVM.W.U (R1), [R0,R2,R4,R6] // 5500b1e8
+ MOVM.IA.W (R1), [R0-R4,R6,R8,R9-R11] // MOVM.W.U (R1), [R0,R1,R2,R3,R4,R6,R8,R9,g,R11] // 5f0fb1e8
+ MOVM.DA [R0,R2,R4,R6], (R1) // MOVM [R0,R2,R4,R6], (R1) // 550001e8
+ MOVM.DA [R0-R4,R6,R8,R9-R11], (R1) // MOVM [R0,R1,R2,R3,R4,R6,R8,R9,g,R11], (R1) // 5f0f01e8
+ MOVM.DA.W [R0,R2,R4,R6], (R1) // MOVM.W [R0,R2,R4,R6], (R1) // 550021e8
+ MOVM.DA.W [R0-R4,R6,R8,R9-R11], (R1) // MOVM.W [R0,R1,R2,R3,R4,R6,R8,R9,g,R11], (R1) // 5f0f21e8
+ MOVM.DA (R1), [R0,R2,R4,R6] // MOVM (R1), [R0,R2,R4,R6] // 550011e8
+ MOVM.DA (R1), [R0-R4,R6,R8,R9-R11] // MOVM (R1), [R0,R1,R2,R3,R4,R6,R8,R9,g,R11] // 5f0f11e8
+ MOVM.DA.W (R1), [R0,R2,R4,R6] // MOVM.W (R1), [R0,R2,R4,R6] // 550031e8
+ MOVM.DA.W (R1), [R0-R4,R6,R8,R9-R11] // MOVM.W (R1), [R0,R1,R2,R3,R4,R6,R8,R9,g,R11] // 5f0f31e8
+ MOVM.DB [R0,R2,R4,R6], (R1) // MOVM.P [R0,R2,R4,R6], (R1) // 550001e9
+ MOVM.DB [R0-R4,R6,R8,R9-R11], (R1) // MOVM.P [R0,R1,R2,R3,R4,R6,R8,R9,g,R11], (R1) // 5f0f01e9
+ MOVM.DB.W [R0,R2,R4,R6], (R1) // MOVM.P.W [R0,R2,R4,R6], (R1) // 550021e9
+ MOVM.DB.W [R0-R4,R6,R8,R9-R11], (R1) // MOVM.P.W [R0,R1,R2,R3,R4,R6,R8,R9,g,R11], (R1) // 5f0f21e9
+ MOVM.DB (R1), [R0,R2,R4,R6] // MOVM.P (R1), [R0,R2,R4,R6] // 550011e9
+ MOVM.DB (R1), [R0-R4,R6,R8,R9-R11] // MOVM.P (R1), [R0,R1,R2,R3,R4,R6,R8,R9,g,R11] // 5f0f11e9
+ MOVM.DB.W (R1), [R0,R2,R4,R6] // MOVM.P.W (R1), [R0,R2,R4,R6] // 550031e9
+ MOVM.DB.W (R1), [R0-R4,R6,R8,R9-R11] // MOVM.P.W (R1), [R0,R1,R2,R3,R4,R6,R8,R9,g,R11] // 5f0f31e9
+ MOVM.IB [R0,R2,R4,R6], (g) // MOVM.P.U [R0,R2,R4,R6], (g) // 55008ae9
+ MOVM.IB [R0-R4,R6,R8,R9-R11], (g) // MOVM.P.U [R0,R1,R2,R3,R4,R6,R8,R9,g,R11], (g) // 5f0f8ae9
+ MOVM.IB.W [R0,R2,R4,R6], (g) // MOVM.P.W.U [R0,R2,R4,R6], (g) // 5500aae9
+ MOVM.IB.W [R0-R4,R6,R8,R9-R11], (g) // MOVM.P.W.U [R0,R1,R2,R3,R4,R6,R8,R9,g,R11], (g) // 5f0faae9
+ MOVM.IB (g), [R0,R2,R4,R6] // MOVM.P.U (g), [R0,R2,R4,R6] // 55009ae9
+ MOVM.IB (g), [R0-R4,R6,R8,R9-R11] // MOVM.P.U (g), [R0,R1,R2,R3,R4,R6,R8,R9,g,R11] // 5f0f9ae9
+ MOVM.IB.W (g), [R0,R2,R4,R6] // MOVM.P.W.U (g), [R0,R2,R4,R6] // 5500bae9
+ MOVM.IB.W (g), [R0-R4,R6,R8,R9-R11] // MOVM.P.W.U (g), [R0,R1,R2,R3,R4,R6,R8,R9,g,R11] // 5f0fbae9
+
+// MOVW
+ MOVW R3, R4 // 0340a0e1
+ MOVW.S R3, R4 // 0340b0e1
+ MOVW R9, R2 // 0920a0e1
+ MOVW.S R9, R2 // 0920b0e1
+ MOVW R5>>1, R2 // a520a0e1
+ MOVW.S R5>>1, R2 // a520b0e1
+ MOVW R5<<1, R2 // 8520a0e1
+ MOVW.S R5<<1, R2 // 8520b0e1
+ MOVW R5->1, R2 // c520a0e1
+ MOVW.S R5->1, R2 // c520b0e1
+ MOVW R5@>1, R2 // e520a0e1
+ MOVW.S R5@>1, R2 // e520b0e1
+ MOVW $0xff, R9 // MOVW $255, R9 // ff90a0e3
+ MOVW $0xff000000, R9 // MOVW $4278190080, R9 // ff94a0e3
+ MOVW $0xff(R0), R1 // MOVW $255(R0), R1 // ff1080e2
+ MOVW.S $0xff(R0), R1 // MOVW.S $255(R0), R1 // ff1090e2
+ MOVW $-0xff(R0), R1 // MOVW $-255(R0), R1 // ff1040e2
+ MOVW.S $-0xff(R0), R1 // MOVW.S $-255(R0), R1 // ff1050e2
+ MOVW $0xffffffae, R1 // MOVW $4294967214, R1 // 5110e0e3
+ MOVW $0xaaaaaaaa, R1 // MOVW $2863311530, R1
+ MOVW R1, (R2) // 001082e5
+ MOVW.P R1, (R2) // 001082e4
+ MOVW.W R1, (R2) // 0010a2e5
+ MOVW R1, 0x20(R2) // MOVW R1, 32(R2) // 201082e5
+ MOVW.P R1, 0x20(R2) // MOVW.P R1, 32(R2) // 201082e4
+ MOVW.W R1, 0x20(R2) // MOVW.W R1, 32(R2) // 2010a2e5
+ MOVW R1, -0x20(R2) // MOVW R1, -32(R2) // 201002e5
+ MOVW.P R1, -0x20(R2) // MOVW.P R1, -32(R2) // 201002e4
+ MOVW.W R1, -0x20(R2) // MOVW.W R1, -32(R2) // 201022e5
+ MOVW (R2), R1 // 001092e5
+ MOVW.P (R2), R1 // 001092e4
+ MOVW.W (R2), R1 // 0010b2e5
+ MOVW 0x20(R2), R1 // MOVW 32(R2), R1 // 201092e5
+ MOVW.P 0x20(R2), R1 // MOVW.P 32(R2), R1 // 201092e4
+ MOVW.W 0x20(R2), R1 // MOVW.W 32(R2), R1 // 2010b2e5
+ MOVW -0x20(R2), R1 // MOVW -32(R2), R1 // 201012e5
+ MOVW.P -0x20(R2), R1 // MOVW.P -32(R2), R1 // 201012e4
+ MOVW.W -0x20(R2), R1 // MOVW.W -32(R2), R1 // 201032e5
+ MOVW R1, 0x00ffffff(R2) // MOVW R1, 16777215(R2)
+ MOVW 0x00ffffff(R2), R1 // MOVW 16777215(R2), R1
+ MOVW CPSR, R1 // 00100fe1
+ MOVW R1, CPSR // 01f02ce1
+ MOVW $0xff, CPSR // MOVW $255, CPSR // fff02ce3
+ MOVW $0xff000000, CPSR // MOVW $4278190080, CPSR // fff42ce3
+ MOVW FPSR, R9 // 109af1ee
+ MOVW FPSR, g // 10aaf1ee
+ MOVW R9, FPSR // 109ae1ee
+ MOVW g, FPSR // 10aae1ee
+ MOVW R0>>28(R1), R2 // 202e91e7
+ MOVW R0<<28(R1), R2 // 002e91e7
+ MOVW R0->28(R1), R2 // 402e91e7
+ MOVW R0@>28(R1), R2 // 602e91e7
+ MOVW.U R0>>28(R1), R2 // 202e11e7
+ MOVW.U R0<<28(R1), R2 // 002e11e7
+ MOVW.U R0->28(R1), R2 // 402e11e7
+ MOVW.U R0@>28(R1), R2 // 602e11e7
+ MOVW.W R0>>28(R1), R2 // 202eb1e7
+ MOVW.W R0<<28(R1), R2 // 002eb1e7
+ MOVW.W R0->28(R1), R2 // 402eb1e7
+ MOVW.W R0@>28(R1), R2 // 602eb1e7
+ MOVW.P R0>>28(g), R2 // 202e9ae6
+ MOVW.P R0<<28(g), R2 // 002e9ae6
+ MOVW.P R0->28(g), R2 // 402e9ae6
+ MOVW.P R0@>28(g), R2 // 602e9ae6
+ MOVW R2, R0>>28(R1) // 202e81e7
+ MOVW R2, R0<<28(R1) // 002e81e7
+ MOVW R2, R0->28(R1) // 402e81e7
+ MOVW R2, R0@>28(R1) // 602e81e7
+ MOVW.U R2, R0>>28(R1) // 202e01e7
+ MOVW.U R2, R0<<28(R1) // 002e01e7
+ MOVW.U R2, R0->28(R1) // 402e01e7
+ MOVW.U R2, R0@>28(R1) // 602e01e7
+ MOVW.W R2, R0>>28(R1) // 202ea1e7
+ MOVW.W R2, R0<<28(R1) // 002ea1e7
+ MOVW.W R2, R0->28(R1) // 402ea1e7
+ MOVW.W R2, R0@>28(R1) // 602ea1e7
+ MOVW.P R2, R0>>28(R5) // 202e85e6
+ MOVW.P R2, R0<<28(R5) // 002e85e6
+ MOVW.P R2, R0->28(R5) // 402e85e6
+ MOVW.P R2, R0@>28(R5) // 602e85e6
+ MOVW R0, math·Exp(SB) // MOVW R0, math.Exp(SB)
+ MOVW math·Exp(SB), R0 // MOVW math.Exp(SB), R0
+
+// MOVB
+ MOVB R3, R4 // 0340a0e1
+ MOVB R9, R2 // 0920a0e1
+ MOVBU R0, R1 // ff1000e2
+ MOVBS R5, R6 // 056ca0e1466ca0e1
+ MOVB R1, (R2) // 0010c2e5
+ MOVB.P R1, (R2) // 0010c2e4
+ MOVB.W R1, (R2) // 0010e2e5
+ MOVB R1, 0x20(R2) // MOVB R1, 32(R2) // 2010c2e5
+ MOVB.P R1, 0x20(R2) // MOVB.P R1, 32(R2) // 2010c2e4
+ MOVB.W R1, 0x20(R2) // MOVB.W R1, 32(R2) // 2010e2e5
+ MOVB R1, -0x20(R2) // MOVB R1, -32(R2) // 201042e5
+ MOVB.P R1, -0x20(R2) // MOVB.P R1, -32(R2) // 201042e4
+ MOVB.W R1, -0x20(R2) // MOVB.W R1, -32(R2) // 201062e5
+ MOVBS R1, (R2) // 0010c2e5
+ MOVBS.P R1, (R2) // 0010c2e4
+ MOVBS.W R1, (R2) // 0010e2e5
+ MOVBS R1, 0x20(R2) // MOVBS R1, 32(R2) // 2010c2e5
+ MOVBS.P R1, 0x20(R2) // MOVBS.P R1, 32(R2) // 2010c2e4
+ MOVBS.W R1, 0x20(R2) // MOVBS.W R1, 32(R2) // 2010e2e5
+ MOVBS R1, -0x20(R2) // MOVBS R1, -32(R2) // 201042e5
+ MOVBS.P R1, -0x20(R2) // MOVBS.P R1, -32(R2) // 201042e4
+ MOVBS.W R1, -0x20(R2) // MOVBS.W R1, -32(R2) // 201062e5
+ MOVBU R1, (R2) // 0010c2e5
+ MOVBU.P R1, (R2) // 0010c2e4
+ MOVBU.W R1, (R2) // 0010e2e5
+ MOVBU R1, 0x20(R2) // MOVBU R1, 32(R2) // 2010c2e5
+ MOVBU.P R1, 0x20(R2) // MOVBU.P R1, 32(R2) // 2010c2e4
+ MOVBU.W R1, 0x20(R2) // MOVBU.W R1, 32(R2) // 2010e2e5
+ MOVBU R1, -0x20(R2) // MOVBU R1, -32(R2) // 201042e5
+ MOVBU.P R1, -0x20(R2) // MOVBU.P R1, -32(R2) // 201042e4
+ MOVBU.W R1, -0x20(R2) // MOVBU.W R1, -32(R2) // 201062e5
+ MOVB (R2), R1 // d010d2e1
+ MOVB.P (R2), R1 // d010d2e0
+ MOVB.W (R2), R1 // d010f2e1
+ MOVB 0x20(R2), R1 // MOVB 32(R2), R1 // d012d2e1
+ MOVB.P 0x20(R2), R1 // MOVB.P 32(R2), R1 // d012d2e0
+ MOVB.W 0x20(R2), R1 // MOVB.W 32(R2), R1 // d012f2e1
+ MOVB -0x20(R2), R1 // MOVB -32(R2), R1 // d01252e1
+ MOVB.P -0x20(R2), R1 // MOVB.P -32(R2), R1 // d01252e0
+ MOVB.W -0x20(R2), R1 // MOVB.W -32(R2), R1 // d01272e1
+ MOVBS (R2), R1 // d010d2e1
+ MOVBS.P (R2), R1 // d010d2e0
+ MOVBS.W (R2), R1 // d010f2e1
+ MOVBS 0x20(R2), R1 // MOVBS 32(R2), R1 // d012d2e1
+ MOVBS.P 0x20(R2), R1 // MOVBS.P 32(R2), R1 // d012d2e0
+ MOVBS.W 0x20(R2), R1 // MOVBS.W 32(R2), R1 // d012f2e1
+ MOVBS -0x20(R2), R1 // MOVBS -32(R2), R1 // d01252e1
+ MOVBS.P -0x20(R2), R1 // MOVBS.P -32(R2), R1 // d01252e0
+ MOVBS.W -0x20(R2), R1 // MOVBS.W -32(R2), R1 // d01272e1
+ MOVBU (R2), R1 // 0010d2e5
+ MOVBU.P (R2), R1 // 0010d2e4
+ MOVBU.W (R2), R1 // 0010f2e5
+ MOVBU 0x20(R2), R1 // MOVBU 32(R2), R1 // 2010d2e5
+ MOVBU.P 0x20(R2), R1 // MOVBU.P 32(R2), R1 // 2010d2e4
+ MOVBU.W 0x20(R2), R1 // MOVBU.W 32(R2), R1 // 2010f2e5
+ MOVBU -0x20(R2), R1 // MOVBU -32(R2), R1 // 201052e5
+ MOVBU.P -0x20(R2), R1 // MOVBU.P -32(R2), R1 // 201052e4
+ MOVBU.W -0x20(R2), R1 // MOVBU.W -32(R2), R1 // 201072e5
+ MOVB R1, 0x00ffffff(R2) // MOVB R1, 16777215(R2)
+ MOVB.W R1, 0x00ffffff(R2) // MOVB.W R1, 16777215(R2)
+ MOVB.P R1, 0x00ffffff(R2) // MOVB.P R1, 16777215(R2)
+ MOVB R1, -0x00ffffff(R2) // MOVB R1, -16777215(R2)
+ MOVB.W R1, -0x00ffffff(R2) // MOVB.W R1, -16777215(R2)
+ MOVB.P R1, -0x00ffffff(R2) // MOVB.P R1, -16777215(R2)
+ MOVB 0x00ffffff(R2), R1 // MOVB 16777215(R2), R1
+ MOVB.P 0x00ffffff(R2), R1 // MOVB.P 16777215(R2), R1
+ MOVB.W 0x00ffffff(R2), R1 // MOVB.W 16777215(R2), R1
+ MOVB -0x00ffffff(R2), R1 // MOVB -16777215(R2), R1
+ MOVB.P -0x00ffffff(R2), R1 // MOVB.P -16777215(R2), R1
+ MOVB.W -0x00ffffff(R2), R1 // MOVB.W -16777215(R2), R1
+ MOVBS R1, 0x00ffffff(R2) // MOVBS R1, 16777215(R2)
+ MOVBS.W R1, 0x00ffffff(R2) // MOVBS.W R1, 16777215(R2)
+ MOVBS.P R1, 0x00ffffff(R2) // MOVBS.P R1, 16777215(R2)
+ MOVBS R1, -0x00ffffff(R2) // MOVBS R1, -16777215(R2)
+ MOVBS.W R1, -0x00ffffff(R2) // MOVBS.W R1, -16777215(R2)
+ MOVBS.P R1, -0x00ffffff(R2) // MOVBS.P R1, -16777215(R2)
+ MOVBS 0x00ffffff(R2), R1 // MOVBS 16777215(R2), R1
+ MOVBS.P 0x00ffffff(R2), R1 // MOVBS.P 16777215(R2), R1
+ MOVBS.W 0x00ffffff(R2), R1 // MOVBS.W 16777215(R2), R1
+ MOVBS -0x00ffffff(R2), R1 // MOVBS -16777215(R2), R1
+ MOVBS.P -0x00ffffff(R2), R1 // MOVBS.P -16777215(R2), R1
+ MOVBS.W -0x00ffffff(R2), R1 // MOVBS.W -16777215(R2), R1
+ MOVBU R1, 0x00ffffff(R2) // MOVBU R1, 16777215(R2)
+ MOVBU.W R1, 0x00ffffff(R2) // MOVBU.W R1, 16777215(R2)
+ MOVBU.P R1, 0x00ffffff(R2) // MOVBU.P R1, 16777215(R2)
+ MOVBU R1, -0x00ffffff(R2) // MOVBU R1, -16777215(R2)
+ MOVBU.W R1, -0x00ffffff(R2) // MOVBU.W R1, -16777215(R2)
+ MOVBU.P R1, -0x00ffffff(R2) // MOVBU.P R1, -16777215(R2)
+ MOVBU 0x00ffffff(R2), R1 // MOVBU 16777215(R2), R1
+ MOVBU.P 0x00ffffff(R2), R1 // MOVBU.P 16777215(R2), R1
+ MOVBU.W 0x00ffffff(R2), R1 // MOVBU.W 16777215(R2), R1
+ MOVBU -0x00ffffff(R2), R1 // MOVBU -16777215(R2), R1
+ MOVBU.P -0x00ffffff(R2), R1 // MOVBU.P -16777215(R2), R1
+ MOVBU.W -0x00ffffff(R2), R1 // MOVBU.W -16777215(R2), R1
+ MOVB R0, math·Exp(SB) // MOVB R0, math.Exp(SB)
+ MOVB math·Exp(SB), R0 // MOVB math.Exp(SB), R0
+ MOVBS R0, math·Exp(SB) // MOVBS R0, math.Exp(SB)
+ MOVBS math·Exp(SB), R0 // MOVBS math.Exp(SB), R0
+ MOVBU R0, math·Exp(SB) // MOVBU R0, math.Exp(SB)
+ MOVBU math·Exp(SB), R0 // MOVBU math.Exp(SB), R0
+ MOVB R2, R0>>28(R1) // 202ec1e7
+ MOVB R2, R0<<28(R1) // 002ec1e7
+ MOVB R2, R0->28(R1) // 402ec1e7
+ MOVB R2, R0@>28(R1) // 602ec1e7
+ MOVB.U R2, R0>>28(R1) // 202e41e7
+ MOVB.U R2, R0<<28(R1) // 002e41e7
+ MOVB.U R2, R0->28(R1) // 402e41e7
+ MOVB.U R2, R0@>28(R1) // 602e41e7
+ MOVB.W R2, R0>>28(R1) // 202ee1e7
+ MOVB.W R2, R0<<28(R1) // 002ee1e7
+ MOVB.W R2, R0->28(R1) // 402ee1e7
+ MOVB.W R2, R0@>28(R1) // 602ee1e7
+ MOVB.P R2, R0>>28(R5) // 202ec5e6
+ MOVB.P R2, R0<<28(R5) // 002ec5e6
+ MOVB.P R2, R0->28(R5) // 402ec5e6
+ MOVB.P R2, R0@>28(R5) // 602ec5e6
+ MOVBS R2, R0>>28(R1) // 202ec1e7
+ MOVBS R2, R0<<28(R1) // 002ec1e7
+ MOVBS R2, R0->28(R1) // 402ec1e7
+ MOVBS R2, R0@>28(R1) // 602ec1e7
+ MOVBS.U R2, R0>>28(R1) // 202e41e7
+ MOVBS.U R2, R0<<28(R1) // 002e41e7
+ MOVBS.U R2, R0->28(R1) // 402e41e7
+ MOVBS.U R2, R0@>28(R1) // 602e41e7
+ MOVBS.W R2, R0>>28(R1) // 202ee1e7
+ MOVBS.W R2, R0<<28(R1) // 002ee1e7
+ MOVBS.W R2, R0->28(R1) // 402ee1e7
+ MOVBS.W R2, R0@>28(R1) // 602ee1e7
+ MOVBS.P R2, R0>>28(R5) // 202ec5e6
+ MOVBS.P R2, R0<<28(R5) // 002ec5e6
+ MOVBS.P R2, R0->28(R5) // 402ec5e6
+ MOVBS.P R2, R0@>28(R5) // 602ec5e6
+ MOVBU R2, R0>>28(R1) // 202ec1e7
+ MOVBU R2, R0<<28(R1) // 002ec1e7
+ MOVBU R2, R0->28(R1) // 402ec1e7
+ MOVBU R2, R0@>28(R1) // 602ec1e7
+ MOVBU.U R2, R0>>28(R1) // 202e41e7
+ MOVBU.U R2, R0<<28(R1) // 002e41e7
+ MOVBU.U R2, R0->28(R1) // 402e41e7
+ MOVBU.U R2, R0@>28(R1) // 602e41e7
+ MOVBU.W R2, R0>>28(R1) // 202ee1e7
+ MOVBU.W R2, R0<<28(R1) // 002ee1e7
+ MOVBU.W R2, R0->28(R1) // 402ee1e7
+ MOVBU.W R2, R0@>28(R1) // 602ee1e7
+ MOVBU.P R2, R0>>28(R5) // 202ec5e6
+ MOVBU.P R2, R0<<28(R5) // 002ec5e6
+ MOVBU.P R2, R0->28(R5) // 402ec5e6
+ MOVBU.P R2, R0@>28(R5) // 602ec5e6
+ MOVBU R0>>28(R1), R2 // 202ed1e7
+ MOVBU R0<<28(R1), R2 // 002ed1e7
+ MOVBU R0->28(R1), R2 // 402ed1e7
+ MOVBU R0@>28(R1), R2 // 602ed1e7
+ MOVBU.U R0>>28(R1), R2 // 202e51e7
+ MOVBU.U R0<<28(R1), R2 // 002e51e7
+ MOVBU.U R0->28(R1), R2 // 402e51e7
+ MOVBU.U R0@>28(R1), R2 // 602e51e7
+ MOVBU.W R0>>28(R1), R2 // 202ef1e7
+ MOVBU.W R0<<28(R1), R2 // 002ef1e7
+ MOVBU.W R0->28(R1), R2 // 402ef1e7
+ MOVBU.W R0@>28(R1), R2 // 602ef1e7
+ MOVBU.P R0>>28(g), R2 // 202edae6
+ MOVBU.P R0<<28(g), R2 // 002edae6
+ MOVBU.P R0->28(g), R2 // 402edae6
+ MOVBU.P R0@>28(g), R2 // 602edae6
+ MOVBS R0<<0(R1), R2 // d02091e1
+ MOVBS.U R0<<0(R1), R2 // d02011e1
+ MOVBS.W R0<<0(R1), R2 // d020b1e1
+ MOVBS.P R0<<0(R1), R2 // d02091e0
+ MOVB R0<<0(R1), R2 // d02091e1
+ MOVB.U R0<<0(R1), R2 // d02011e1
+ MOVB.W R0<<0(R1), R2 // d020b1e1
+ MOVB.P R0<<0(R1), R2 // d02091e0
+ MOVBS R2@>0, R8 // 7280afe6
+ MOVBS R2@>8, R8 // 7284afe6
+ MOVBS R2@>16, R8 // 7288afe6
+ MOVBS R2@>24, R8 // 728cafe6
+ MOVB R2@>0, R8 // 7280afe6
+ MOVB R2@>8, R8 // 7284afe6
+ MOVB R2@>16, R8 // 7288afe6
+ MOVB R2@>24, R8 // 728cafe6
+ MOVBU R4@>0, R7 // 7470efe6
+ MOVBU R4@>8, R7 // 7474efe6
+ MOVBU R4@>16, R7 // 7478efe6
+ MOVBU R4@>24, R7 // 747cefe6
+
+// MOVH
+ MOVH R3, R4 // 0340a0e1
+ MOVH R9, R2 // 0920a0e1
+ MOVHS R5, R6 // 0568a0e14668a0e1
+ MOVHU R5, R6 // 0568a0e12668a0e1
+ MOVH R4, (R3) // b040c3e1
+ MOVHS.W R4, (R3) // b040e3e1
+ MOVHS.P R4, (R3) // b040c3e0
+ MOVHS R4, (R3) // b040c3e1
+ MOVHS.W R4, (R3) // b040e3e1
+ MOVHS.P R4, (R3) // b040c3e0
+ MOVHU R4, (R3) // b040c3e1
+ MOVHU.W R4, (R3) // b040e3e1
+ MOVHU.P R4, (R3) // b040c3e0
+ MOVH R3, 0x20(R4) // MOVH R3, 32(R4) // b032c4e1
+ MOVH.W R3, 0x20(R4) // MOVH.W R3, 32(R4) // b032e4e1
+ MOVH.P R3, 0x20(R4) // MOVH.P R3, 32(R4) // b032c4e0
+ MOVHS R3, 0x20(R4) // MOVHS R3, 32(R4) // b032c4e1
+ MOVHS.W R3, 0x20(R4) // MOVHS.W R3, 32(R4) // b032e4e1
+ MOVHS.P R3, 0x20(R4) // MOVHS.P R3, 32(R4) // b032c4e0
+ MOVHU R3, 0x20(R4) // MOVHU R3, 32(R4) // b032c4e1
+ MOVHU.W R3, 0x20(R4) // MOVHU.W R3, 32(R4) // b032e4e1
+ MOVHU.P R3, 0x20(R4) // MOVHU.P R3, 32(R4) // b032c4e0
+ MOVH R3, -0x20(R4) // MOVH R3, -32(R4) // b03244e1
+ MOVH.W R3, -0x20(R4) // MOVH.W R3, -32(R4) // b03264e1
+ MOVH.P R3, -0x20(R4) // MOVH.P R3, -32(R4) // b03244e0
+ MOVHS R3, -0x20(R4) // MOVHS R3, -32(R4) // b03244e1
+ MOVHS.W R3, -0x20(R4) // MOVHS.W R3, -32(R4) // b03264e1
+ MOVHS.P R3, -0x20(R4) // MOVHS.P R3, -32(R4) // b03244e0
+ MOVHU R3, -0x20(R4) // MOVHU R3, -32(R4) // b03244e1
+ MOVHU.W R3, -0x20(R4) // MOVHU.W R3, -32(R4) // b03264e1
+ MOVHU.P R3, -0x20(R4) // MOVHU.P R3, -32(R4) // b03244e0
+ MOVHU (R9), R8 // b080d9e1
+ MOVHU.W (R9), R8 // b080f9e1
+ MOVHU.P (R9), R8 // b080d9e0
+ MOVH (R9), R8 // f080d9e1
+ MOVH.W (R9), R8 // f080f9e1
+ MOVH.P (R9), R8 // f080d9e0
+ MOVHS (R9), R8 // f080d9e1
+ MOVHS.W (R9), R8 // f080f9e1
+ MOVHS.P (R9), R8 // f080d9e0
+ MOVHU 0x22(R9), R8 // MOVHU 34(R9), R8 // b282d9e1
+ MOVHU.W 0x22(R9), R8 // MOVHU.W 34(R9), R8 // b282f9e1
+ MOVHU.P 0x22(R9), R8 // MOVHU.P 34(R9), R8 // b282d9e0
+ MOVH 0x22(R9), R8 // MOVH 34(R9), R8 // f282d9e1
+ MOVH.W 0x22(R9), R8 // MOVH.W 34(R9), R8 // f282f9e1
+ MOVH.P 0x22(R9), R8 // MOVH.P 34(R9), R8 // f282d9e0
+ MOVHS 0x22(R9), R8 // MOVHS 34(R9), R8 // f282d9e1
+ MOVHS.W 0x22(R9), R8 // MOVHS.W 34(R9), R8 // f282f9e1
+ MOVHS.P 0x22(R9), R8 // MOVHS.P 34(R9), R8 // f282d9e0
+ MOVHU -0x24(R9), R8 // MOVHU -36(R9), R8 // b48259e1
+ MOVHU.W -0x24(R9), R8 // MOVHU.W -36(R9), R8 // b48279e1
+ MOVHU.P -0x24(R9), R8 // MOVHU.P -36(R9), R8 // b48259e0
+ MOVH -0x24(R9), R8 // MOVH -36(R9), R8 // f48259e1
+ MOVH.W -0x24(R9), R8 // MOVH.W -36(R9), R8 // f48279e1
+ MOVH.P -0x24(R9), R8 // MOVH.P -36(R9), R8 // f48259e0
+ MOVHS -0x24(R9), R8 // MOVHS -36(R9), R8 // f48259e1
+ MOVHS.W -0x24(R9), R8 // MOVHS.W -36(R9), R8 // f48279e1
+ MOVHS.P -0x24(R9), R8 // MOVHS.P -36(R9), R8 // f48259e0
+ MOVH R1, 0x00ffffff(R2) // MOVH R1, 16777215(R2)
+ MOVH.W R1, 0x00ffffff(R2) // MOVH.W R1, 16777215(R2)
+ MOVH.P R1, 0x00ffffff(R2) // MOVH.P R1, 16777215(R2)
+ MOVH R1, -0x00ffffff(R2) // MOVH R1, -16777215(R2)
+ MOVH.W R1, -0x00ffffff(R2) // MOVH.W R1, -16777215(R2)
+ MOVH.P R1, -0x00ffffff(R2) // MOVH.P R1, -16777215(R2)
+ MOVH 0x00ffffff(R2), R1 // MOVH 16777215(R2), R1
+ MOVH.P 0x00ffffff(R2), R1 // MOVH.P 16777215(R2), R1
+ MOVH.W 0x00ffffff(R2), R1 // MOVH.W 16777215(R2), R1
+ MOVH -0x00ffffff(R2), R1 // MOVH -16777215(R2), R1
+ MOVH.P -0x00ffffff(R2), R1 // MOVH.P -16777215(R2), R1
+ MOVH.W -0x00ffffff(R2), R1 // MOVH.W -16777215(R2), R1
+ MOVHS R1, 0x00ffffff(R2) // MOVHS R1, 16777215(R2)
+ MOVHS.W R1, 0x00ffffff(R2) // MOVHS.W R1, 16777215(R2)
+ MOVHS.P R1, 0x00ffffff(R2) // MOVHS.P R1, 16777215(R2)
+ MOVHS R1, -0x00ffffff(R2) // MOVHS R1, -16777215(R2)
+ MOVHS.W R1, -0x00ffffff(R2) // MOVHS.W R1, -16777215(R2)
+ MOVHS.P R1, -0x00ffffff(R2) // MOVHS.P R1, -16777215(R2)
+ MOVHS 0x00ffffff(R2), R1 // MOVHS 16777215(R2), R1
+ MOVHS.P 0x00ffffff(R2), R1 // MOVHS.P 16777215(R2), R1
+ MOVHS.W 0x00ffffff(R2), R1 // MOVHS.W 16777215(R2), R1
+ MOVHS -0x00ffffff(R2), R1 // MOVHS -16777215(R2), R1
+ MOVHS.P -0x00ffffff(R2), R1 // MOVHS.P -16777215(R2), R1
+ MOVHS.W -0x00ffffff(R2), R1 // MOVHS.W -16777215(R2), R1
+ MOVHU R1, 0x00ffffff(R2) // MOVHU R1, 16777215(R2)
+ MOVHU.W R1, 0x00ffffff(R2) // MOVHU.W R1, 16777215(R2)
+ MOVHU.P R1, 0x00ffffff(R2) // MOVHU.P R1, 16777215(R2)
+ MOVHU R1, -0x00ffffff(R2) // MOVHU R1, -16777215(R2)
+ MOVHU.W R1, -0x00ffffff(R2) // MOVHU.W R1, -16777215(R2)
+ MOVHU.P R1, -0x00ffffff(R2) // MOVHU.P R1, -16777215(R2)
+ MOVHU 0x00ffffff(R2), R1 // MOVHU 16777215(R2), R1
+ MOVHU.P 0x00ffffff(R2), R1 // MOVHU.P 16777215(R2), R1
+ MOVHU.W 0x00ffffff(R2), R1 // MOVHU.W 16777215(R2), R1
+ MOVHU -0x00ffffff(R2), R1 // MOVHU -16777215(R2), R1
+ MOVHU.P -0x00ffffff(R2), R1 // MOVHU.P -16777215(R2), R1
+ MOVHU.W -0x00ffffff(R2), R1 // MOVHU.W -16777215(R2), R1
+ MOVH R0, math·Exp(SB) // MOVH R0, math.Exp(SB)
+ MOVH math·Exp(SB), R0 // MOVH math.Exp(SB), R0
+ MOVHS R0, math·Exp(SB) // MOVHS R0, math.Exp(SB)
+ MOVHS math·Exp(SB), R0 // MOVHS math.Exp(SB), R0
+ MOVHU R0, math·Exp(SB) // MOVHU R0, math.Exp(SB)
+ MOVHU math·Exp(SB), R0 // MOVHU math.Exp(SB), R0
+ MOVHS R0<<0(R1), R2 // f02091e1
+ MOVHS.U R0<<0(R1), R2 // f02011e1
+ MOVHS.W R0<<0(R1), R2 // f020b1e1
+ MOVHS.P R0<<0(R1), R2 // f02091e0
+ MOVH R0<<0(R1), R2 // f02091e1
+ MOVH.U R0<<0(R1), R2 // f02011e1
+ MOVH.W R0<<0(R1), R2 // f020b1e1
+ MOVH.P R0<<0(R1), R2 // f02091e0
+ MOVHU R0<<0(R1), R2 // b02091e1
+ MOVHU.U R0<<0(R1), R2 // b02011e1
+ MOVHU.W R0<<0(R1), R2 // b020b1e1
+ MOVHU.P R0<<0(R1), R2 // b02091e0
+ MOVHS R2, R5<<0(R1) // b52081e1
+ MOVHS.U R2, R5<<0(R1) // b52001e1
+ MOVHS.W R2, R5<<0(R1) // b520a1e1
+ MOVHS.P R2, R5<<0(R1) // b52081e0
+ MOVH R2, R5<<0(R1) // b52081e1
+ MOVH.U R2, R5<<0(R1) // b52001e1
+ MOVH.W R2, R5<<0(R1) // b520a1e1
+ MOVH.P R2, R5<<0(R1) // b52081e0
+ MOVHU R2, R5<<0(R1) // b52081e1
+ MOVHU.U R2, R5<<0(R1) // b52001e1
+ MOVHU.W R2, R5<<0(R1) // b520a1e1
+ MOVHU.P R2, R5<<0(R1) // b52081e0
+ MOVHS R3@>0, R9 // 7390bfe6
+ MOVHS R3@>8, R9 // 7394bfe6
+ MOVHS R3@>16, R9 // 7398bfe6
+ MOVHS R3@>24, R9 // 739cbfe6
+ MOVH R3@>0, R9 // 7390bfe6
+ MOVH R3@>8, R9 // 7394bfe6
+ MOVH R3@>16, R9 // 7398bfe6
+ MOVH R3@>24, R9 // 739cbfe6
+ MOVHU R5@>0, R1 // 7510ffe6
+ MOVHU R5@>8, R1 // 7514ffe6
+ MOVHU R5@>16, R1 // 7518ffe6
+ MOVHU R5@>24, R1 // 751cffe6
+
+ RET foo(SB)
+
+//
+// END
+//
+// LTYPEE
+// {
+// outcode($1, Always, &nullgen, 0, &nullgen);
+// }
+ END
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s
new file mode 100644
index 0000000..a4b56b0
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/arm64.s
@@ -0,0 +1,1626 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This input was created by taking the instruction productions in
+// the old assembler's (7a's) grammar and hand-writing complete
+// instructions for each rule, to guarantee we cover the same space.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT foo(SB), DUPOK|NOSPLIT, $-8
+
+
+// arithmetic operations
+ ADDW $1, R2, R3
+ ADDW R1, R2, R3
+ ADDW R1, ZR, R3
+ ADD $1, R2, R3
+ ADD R1, R2, R3
+ ADD R1, ZR, R3
+ ADD $1, R2, R3
+ ADDW $1, R2
+ ADDW R1, R2
+ ADD $1, R2
+ ADD R1, R2
+ ADD R1>>11, R2
+ ADD R1<<22, R2
+ ADD R1->33, R2
+ ADD $0x000aaa, R2, R3 // ADD $2730, R2, R3 // 43a82a91
+ ADD $0x000aaa, R2 // ADD $2730, R2 // 42a82a91
+ ADD $0xaaa000, R2, R3 // ADD $11182080, R2, R3 // 43a86a91
+ ADD $0xaaa000, R2 // ADD $11182080, R2 // 42a86a91
+ ADD $0xaaaaaa, R2, R3 // ADD $11184810, R2, R3 // 43a82a9163a86a91
+ ADD $0xaaaaaa, R2 // ADD $11184810, R2 // 42a82a9142a86a91
+ SUB $0x000aaa, R2, R3 // SUB $2730, R2, R3 // 43a82ad1
+ SUB $0x000aaa, R2 // SUB $2730, R2 // 42a82ad1
+ SUB $0xaaa000, R2, R3 // SUB $11182080, R2, R3 // 43a86ad1
+ SUB $0xaaa000, R2 // SUB $11182080, R2 // 42a86ad1
+ SUB $0xaaaaaa, R2, R3 // SUB $11184810, R2, R3 // 43a82ad163a86ad1
+ SUB $0xaaaaaa, R2 // SUB $11184810, R2 // 42a82ad142a86ad1
+ ADDW $0x60060, R2 // ADDW $393312, R2 // 4280011142804111
+ ADD $0x186a0, R2, R5 // ADD $100000, R2, R5 // 45801a91a5604091
+ SUB $0xe7791f700, R3, R1 // SUB $62135596800, R3, R1 // 1be09ed23bf2aef2db01c0f261001bcb
+ ADD $0x3fffffffc000, R5 // ADD $70368744161280, R5 // fb7f72b2a5001b8b
+ ADD R1>>11, R2, R3
+ ADD R1<<22, R2, R3
+ ADD R1->33, R2, R3
+ AND R1@>33, R2, R3
+ ADD R1.UXTB, R2, R3 // 4300218b
+ ADD R1.UXTB<<4, R2, R3 // 4310218b
+ ADD R2, RSP, RSP // ff63228b
+ ADD R2.SXTX<<1, RSP, RSP // ffe7228b
+ ADD ZR.SXTX<<1, R2, R3 // 43e43f8b
+ ADDW R2.SXTW, R10, R12 // 4cc1220b
+ ADD R19.UXTX, R14, R17 // d161338b
+ ADDSW R19.UXTW, R14, R17 // d141332b
+ ADDS R12.SXTX, R3, R1 // 61e02cab
+ SUB R19.UXTH<<4, R2, R21 // 553033cb
+ SUBW R1.UXTX<<1, R3, R2 // 6264214b
+ SUBS R3.UXTX, R8, R9 // 096123eb
+ SUBSW R17.UXTH, R15, R21 // f521316b
+ SUBW ZR<<14, R19, R13 // 6d3a1f4b
+ CMP R2.SXTH, R13 // bfa122eb
+ CMN R1.SXTX<<2, R10 // 5fe921ab
+ CMPW R2.UXTH<<3, R11 // 7f2d226b
+ CMNW R1.SXTB, R9 // 3f81212b
+ ADD R1<<1, RSP, R3 // e367218b
+ ADDW R1<<2, R3, RSP // 7f48210b
+ SUB R1<<3, RSP // ff6f21cb
+ SUBS R1<<4, RSP, R3 // e37321eb
+ ADDS R1<<1, RSP, R4 // e46721ab
+ CMP R1<<2, RSP // ff6b21eb
+ CMN R1<<3, RSP // ff6f21ab
+ ADDS R1<<1, ZR, R4 // e40701ab
+ ADD R3<<50, ZR, ZR // ffcb038b
+ CMP R4<<24, ZR // ff6304eb
+ CMPW $0x60060, R2 // CMPW $393312, R2 // 1b0c8052db00a0725f001b6b
+ CMPW $40960, R0 // 1f284071
+ CMPW $27745, R2 // 3b8c8d525f001b6b
+ CMNW $0x3fffffc0, R2 // CMNW $1073741760, R2 // fb5f1a325f001b2b
+ CMPW $0xffff0, R1 // CMPW $1048560, R1 // fb3f1c323f001b6b
+ CMP $0xffffffffffa0, R3 // CMP $281474976710560, R3 // fb0b80921b00e0f27f001beb
+ CMP $0xf4240, R1 // CMP $1000000, R1 // 1b4888d2fb01a0f23f001beb
+ CMP $3343198598084851058, R3 // 5bae8ed2db8daef23badcdf2bbcce5f27f001beb
+ CMP $3, R2
+ CMP R1, R2
+ CMP R1->11, R2
+ CMP R1>>22, R2
+ CMP R1<<33, R2
+ CMP R22.SXTX, RSP // ffe336eb
+ CMP $0x22220000, RSP // CMP $572653568, RSP // 5b44a4d2ff633beb
+ CMPW $0x22220000, RSP // CMPW $572653568, RSP // 5b44a452ff433b6b
+ CCMN MI, ZR, R1, $4 // e44341ba
+ // MADD Rn,Rm,Ra,Rd
+ MADD R1, R2, R3, R4 // 6408019b
+ // CLS
+ CLSW R1, R2
+ CLS R1, R2
+
+// fp/simd instructions.
+ VADDP V1.B16, V2.B16, V3.B16 // 43bc214e
+ VADDP V1.S4, V2.S4, V3.S4 // 43bca14e
+ VADDP V1.D2, V2.D2, V3.D2 // 43bce14e
+ VAND V21.B8, V12.B8, V3.B8 // 831d350e
+ VCMEQ V1.H4, V2.H4, V3.H4 // 438c612e
+ VORR V5.B16, V4.B16, V3.B16 // 831ca54e
+ VADD V16.S4, V5.S4, V9.S4 // a984b04e
+ VEOR V0.B16, V1.B16, V0.B16 // 201c206e
+ VADDV V0.S4, V0 // 00b8b14e
+ VMOVI $82, V0.B16 // 40e6024f
+ VUADDLV V6.B16, V6 // c638306e
+ VADD V1, V2, V3 // 4384e15e
+ VADD V1, V3, V3 // 6384e15e
+ VSUB V12, V30, V30 // de87ec7e
+ VSUB V12, V20, V30 // 9e86ec7e
+ VFMLA V1.D2, V12.D2, V1.D2 // 81cd614e
+ VFMLA V1.S2, V12.S2, V1.S2 // 81cd210e
+ VFMLA V1.S4, V12.S4, V1.S4 // 81cd214e
+ VFMLS V1.D2, V12.D2, V1.D2 // 81cde14e
+ VFMLS V1.S2, V12.S2, V1.S2 // 81cda10e
+ VFMLS V1.S4, V12.S4, V1.S4 // 81cda14e
+ VEXT $4, V2.B8, V1.B8, V3.B8 // 2320022e
+ VEXT $8, V2.B16, V1.B16, V3.B16 // 2340026e
+ VRBIT V24.B16, V24.B16 // 185b606e
+ VRBIT V24.B8, V24.B8 // 185b602e
+ VUSHR $56, V1.D2, V2.D2 // 2204486f
+ VUSHR $24, V1.S4, V2.S4 // 2204286f
+ VUSHR $24, V1.S2, V2.S2 // 2204282f
+ VUSHR $8, V1.H4, V2.H4 // 2204182f
+ VUSHR $8, V1.H8, V2.H8 // 2204186f
+ VUSHR $2, V1.B8, V2.B8 // 22040e2f
+ VUSHR $2, V1.B16, V2.B16 // 22040e6f
+ VSHL $56, V1.D2, V2.D2 // 2254784f
+ VSHL $24, V1.S4, V2.S4 // 2254384f
+ VSHL $24, V1.S2, V2.S2 // 2254380f
+ VSHL $8, V1.H4, V2.H4 // 2254180f
+ VSHL $8, V1.H8, V2.H8 // 2254184f
+ VSHL $2, V1.B8, V2.B8 // 22540a0f
+ VSHL $2, V1.B16, V2.B16 // 22540a4f
+ VSRI $56, V1.D2, V2.D2 // 2244486f
+ VSRI $24, V1.S4, V2.S4 // 2244286f
+ VSRI $24, V1.S2, V2.S2 // 2244282f
+ VSRI $8, V1.H4, V2.H4 // 2244182f
+ VSRI $8, V1.H8, V2.H8 // 2244186f
+ VSRI $2, V1.B8, V2.B8 // 22440e2f
+ VSRI $2, V1.B16, V2.B16 // 22440e6f
+ VSLI $7, V2.B16, V3.B16 // 43540f6f
+ VSLI $15, V3.H4, V4.H4 // 64541f2f
+ VSLI $31, V5.S4, V6.S4 // a6543f6f
+ VSLI $63, V7.D2, V8.D2 // e8547f6f
+ VUSRA $8, V2.B16, V3.B16 // 4314086f
+ VUSRA $16, V3.H4, V4.H4 // 6414102f
+ VUSRA $32, V5.S4, V6.S4 // a614206f
+ VUSRA $64, V7.D2, V8.D2 // e814406f
+ VTBL V22.B16, [V28.B16, V29.B16], V11.B16 // 8b23164e
+ VTBL V18.B8, [V17.B16, V18.B16, V19.B16], V22.B8 // 3642120e
+ VTBL V31.B8, [V14.B16, V15.B16, V16.B16, V17.B16], V15.B8 // cf611f0e
+ VTBL V14.B16, [V16.B16], V11.B16 // 0b020e4e
+ VTBL V28.B16, [V25.B16, V26.B16], V5.B16 // 25231c4e
+ VTBL V16.B8, [V4.B16, V5.B16, V6.B16], V12.B8 // 8c40100e
+ VTBL V4.B8, [V16.B16, V17.B16, V18.B16, V19.B16], V4.B8 // 0462040e
+ VTBL V15.B8, [V1.B16], V20.B8 // 34000f0e
+ VTBL V26.B16, [V2.B16, V3.B16], V26.B16 // 5a201a4e
+ VTBL V15.B8, [V6.B16, V7.B16, V8.B16], V2.B8 // c2400f0e
+ VTBL V2.B16, [V27.B16, V28.B16, V29.B16, V30.B16], V18.B16 // 7263024e
+ VTBL V11.B16, [V13.B16], V27.B16 // bb010b4e
+ VTBL V3.B8, [V7.B16, V8.B16], V25.B8 // f920030e
+ VTBL V14.B16, [V3.B16, V4.B16, V5.B16], V17.B16 // 71400e4e
+ VTBL V13.B16, [V29.B16, V30.B16, V31.B16, V0.B16], V28.B16 // bc630d4e
+ VTBL V3.B8, [V27.B16], V8.B8 // 6803030e
+ VZIP1 V16.H8, V3.H8, V19.H8 // 7338504e
+ VZIP2 V22.D2, V25.D2, V21.D2 // 357bd64e
+ VZIP1 V6.D2, V9.D2, V11.D2 // 2b39c64e
+ VZIP2 V10.D2, V13.D2, V3.D2 // a379ca4e
+ VZIP1 V17.S2, V4.S2, V26.S2 // 9a38910e
+ VZIP2 V25.S2, V14.S2, V25.S2 // d979990e
+ VUXTL V30.B8, V30.H8 // dea7082f
+ VUXTL V30.H4, V29.S4 // dda7102f
+ VUXTL V29.S2, V2.D2 // a2a7202f
+ VUXTL2 V30.H8, V30.S4 // dea7106f
+ VUXTL2 V29.S4, V2.D2 // a2a7206f
+ VUXTL2 V30.B16, V2.H8 // c2a7086f
+ VBIT V21.B16, V25.B16, V4.B16 // 241fb56e
+ VBSL V23.B16, V3.B16, V7.B16 // 671c776e
+ VCMTST V2.B8, V29.B8, V2.B8 // a28f220e
+ VCMTST V2.D2, V23.D2, V3.D2 // e38ee24e
+ VSUB V2.B8, V30.B8, V30.B8 // de87222e
+ VUZP1 V0.B8, V30.B8, V1.B8 // c11b000e
+ VUZP1 V1.B16, V29.B16, V2.B16 // a21b014e
+ VUZP1 V2.H4, V28.H4, V3.H4 // 831b420e
+ VUZP1 V3.H8, V27.H8, V4.H8 // 641b434e
+ VUZP1 V28.S2, V2.S2, V5.S2 // 45189c0e
+ VUZP1 V29.S4, V1.S4, V6.S4 // 26189d4e
+ VUZP1 V30.D2, V0.D2, V7.D2 // 0718de4e
+ VUZP2 V0.D2, V30.D2, V1.D2 // c15bc04e
+ VUZP2 V30.D2, V0.D2, V29.D2 // 1d58de4e
+ VUSHLL $0, V30.B8, V30.H8 // dea7082f
+ VUSHLL $0, V30.H4, V29.S4 // dda7102f
+ VUSHLL $0, V29.S2, V2.D2 // a2a7202f
+ VUSHLL2 $0, V30.B16, V2.H8 // c2a7086f
+ VUSHLL2 $0, V30.H8, V30.S4 // dea7106f
+ VUSHLL2 $0, V29.S4, V2.D2 // a2a7206f
+ VUSHLL $7, V30.B8, V30.H8 // dea70f2f
+ VUSHLL $15, V30.H4, V29.S4 // dda71f2f
+ VUSHLL2 $31, V30.S4, V2.D2 // c2a73f6f
+ VBIF V0.B8, V30.B8, V1.B8 // c11fe02e
+ VBIF V30.B16, V0.B16, V2.B16 // 021cfe6e
+ FMOVS $(4.0), F0 // 0010221e
+ FMOVD $(4.0), F0 // 0010621e
+ FMOVS $(0.265625), F1 // 01302a1e
+ FMOVD $(0.1796875), F2 // 02f0681e
+ FMOVS $(0.96875), F3 // 03f02d1e
+ FMOVD $(28.0), F4 // 0490671e
+ VUADDW V9.B8, V12.H8, V14.H8 // 8e11292e
+ VUADDW V13.H4, V10.S4, V11.S4 // 4b116d2e
+ VUADDW V21.S2, V24.D2, V29.D2 // 1d13b52e
+ VUADDW2 V9.B16, V12.H8, V14.H8 // 8e11296e
+ VUADDW2 V13.H8, V20.S4, V30.S4 // 9e126d6e
+ VUADDW2 V21.S4, V24.D2, V29.D2 // 1d13b56e
+ VUMAX V3.B8, V2.B8, V1.B8 // 4164232e
+ VUMAX V3.B16, V2.B16, V1.B16 // 4164236e
+ VUMAX V3.H4, V2.H4, V1.H4 // 4164632e
+ VUMAX V3.H8, V2.H8, V1.H8 // 4164636e
+ VUMAX V3.S2, V2.S2, V1.S2 // 4164a32e
+ VUMAX V3.S4, V2.S4, V1.S4 // 4164a36e
+ VUMIN V3.B8, V2.B8, V1.B8 // 416c232e
+ VUMIN V3.B16, V2.B16, V1.B16 // 416c236e
+ VUMIN V3.H4, V2.H4, V1.H4 // 416c632e
+ VUMIN V3.H8, V2.H8, V1.H8 // 416c636e
+ VUMIN V3.S2, V2.S2, V1.S2 // 416ca32e
+ VUMIN V3.S4, V2.S4, V1.S4 // 416ca36e
+ FCCMPS LT, F1, F2, $1 // 41b4211e
+ FMADDS F1, F3, F2, F4 // 440c011f
+ FMADDD F4, F5, F4, F4 // 8414441f
+ FMSUBS F13, F21, F13, F19 // b3d50d1f
+ FMSUBD F11, F7, F15, F31 // ff9d4b1f
+ FNMADDS F1, F3, F2, F4 // 440c211f
+ FNMADDD F1, F3, F2, F4 // 440c611f
+ FNMSUBS F1, F3, F2, F4 // 448c211f
+ FNMSUBD F1, F3, F2, F4 // 448c611f
+ FADDS F2, F3, F4 // 6428221e
+ FADDD F1, F2 // 4228611e
+ VDUP V19.S[0], V17.S4 // 7106044e
+
+
+// special
+ PRFM (R2), PLDL1KEEP // 400080f9
+ PRFM 16(R2), PLDL1KEEP // 400880f9
+ PRFM 48(R6), PSTL2STRM // d31880f9
+ PRFM 8(R12), PLIL3STRM // 8d0580f9
+ PRFM (R8), $25 // 190180f9
+ PRFM 8(R9), $30 // 3e0580f9
+ NOOP // 1f2003d5
+ HINT $0 // 1f2003d5
+ DMB $1
+ SVC
+
+// encryption
+ SHA256H V9.S4, V3, V2 // 6240095e
+ SHA256H2 V9.S4, V4, V3 // 8350095e
+ SHA256SU0 V8.S4, V7.S4 // 0729285e
+ SHA256SU1 V6.S4, V5.S4, V7.S4 // a760065e
+ SHA1SU0 V11.S4, V8.S4, V6.S4 // 06310b5e
+ SHA1SU1 V5.S4, V1.S4 // a118285e
+ SHA1C V1.S4, V2, V3 // 4300015e
+ SHA1H V5, V4 // a408285e
+ SHA1M V8.S4, V7, V6 // e620085e
+ SHA1P V11.S4, V10, V9 // 49110b5e
+ SHA512H V2.D2, V1, V0 // 208062ce
+ SHA512H2 V4.D2, V3, V2 // 628464ce
+ SHA512SU0 V9.D2, V8.D2 // 2881c0ce
+ SHA512SU1 V7.D2, V6.D2, V5.D2 // c58867ce
+ VRAX1 V26.D2, V29.D2, V30.D2 // be8f7ace
+ VXAR $63, V27.D2, V21.D2, V26.D2 // bafe9bce
+ VPMULL V2.D1, V1.D1, V3.Q1 // 23e0e20e
+ VPMULL2 V2.D2, V1.D2, V4.Q1 // 24e0e24e
+ VPMULL V2.B8, V1.B8, V3.H8 // 23e0220e
+ VPMULL2 V2.B16, V1.B16, V4.H8 // 24e0224e
+ VEOR3 V2.B16, V7.B16, V12.B16, V25.B16 // 990907ce
+ VBCAX V1.B16, V2.B16, V26.B16, V31.B16 // 5f0722ce
+ VREV32 V5.B16, V5.B16 // a508206e
+ VREV64 V2.S2, V3.S2 // 4308a00e
+ VREV64 V2.S4, V3.S4 // 4308a04e
+
+// logical ops
+//
+// make sure constants get encoded into an instruction when it could
+ AND R1@>33, R2
+ AND $(1<<63), R1 // AND $-9223372036854775808, R1 // 21004192
+ AND $(1<<63-1), R1 // AND $9223372036854775807, R1 // 21f84092
+ ORR $(1<<63), R1 // ORR $-9223372036854775808, R1 // 210041b2
+ ORR $(1<<63-1), R1 // ORR $9223372036854775807, R1 // 21f840b2
+ EOR $(1<<63), R1 // EOR $-9223372036854775808, R1 // 210041d2
+ EOR $(1<<63-1), R1 // EOR $9223372036854775807, R1 // 21f840d2
+ ANDW $0x3ff00000, R2 // ANDW $1072693248, R2 // 42240c12
+ BICW $0x3ff00000, R2 // BICW $1072693248, R2 // 42540212
+ ORRW $0x3ff00000, R2 // ORRW $1072693248, R2 // 42240c32
+ ORNW $0x3ff00000, R2 // ORNW $1072693248, R2 // 42540232
+ EORW $0x3ff00000, R2 // EORW $1072693248, R2 // 42240c52
+ EONW $0x3ff00000, R2 // EONW $1072693248, R2 // 42540252
+ AND $0x22220000, R3, R4 // AND $572653568, R3, R4 // 5b44a4d264001b8a
+ ORR $0x22220000, R3, R4 // ORR $572653568, R3, R4 // 5b44a4d264001baa
+ EOR $0x22220000, R3, R4 // EOR $572653568, R3, R4 // 5b44a4d264001bca
+ BIC $0x22220000, R3, R4 // BIC $572653568, R3, R4 // 5b44a4d264003b8a
+ ORN $0x22220000, R3, R4 // ORN $572653568, R3, R4 // 5b44a4d264003baa
+ EON $0x22220000, R3, R4 // EON $572653568, R3, R4 // 5b44a4d264003bca
+ ANDS $0x22220000, R3, R4 // ANDS $572653568, R3, R4 // 5b44a4d264001bea
+ BICS $0x22220000, R3, R4 // BICS $572653568, R3, R4 // 5b44a4d264003bea
+ EOR $0xe03fffffffffffff, R20, R22 // EOR $-2287828610704211969, R20, R22 // 96e243d2
+ TSTW $0x600000006, R1 // TSTW $25769803782, R1 // 3f041f72
+ TST $0x4900000049, R0 // TST $313532612681, R0 // 3b0980d23b09c0f21f001bea
+ ORR $0x170000, R2, R1 // ORR $1507328, R2, R1 // fb02a0d241001baa
+ AND $0xff00ff, R2 // AND $16711935, R2 // fb1f80d2fb1fa0f242001b8a
+ AND $0xff00ffff, R1 // AND $4278255615, R1 // fbff9fd21be0bff221001b8a
+ ANDS $0xffff, R2 // ANDS $65535, R2 // 423c40f2
+ AND $0x7fffffff, R3 // AND $2147483647, R3 // 63784092
+ ANDS $0x0ffffffff80000000, R2 // ANDS $-2147483648, R2 // 428061f2
+ AND $0xfffff, R2 // AND $1048575, R2 // 424c4092
+ ANDW $0xf00fffff, R1 // ANDW $4027580415, R1 // 215c0412
+ ANDSW $0xff00ffff, R1 // ANDSW $4278255615, R1 // 215c0872
+ TST $0x11223344, R2 // TST $287454020, R2 // 9b6886d25b24a2f25f001bea
+ TSTW $0xa000, R3 // TSTW $40960, R3 // 1b0094527f001b6a
+ BICW $0xa000, R3 // BICW $40960, R3 // 1b00945263003b0a
+ ORRW $0x1b000, R2, R3 // ORRW $110592, R2, R3 // 1b0096523b00a07243001b2a
+ TSTW $0x500000, R1 // TSTW $5242880, R1 // 1b0aa0523f001b6a
+ TSTW $0xff00ff, R1 // TSTW $16711935, R1 // 3f9c0072
+ TSTW $0x60060, R5 // TSTW $393312, R5 // 1b0c8052db00a072bf001b6a
+ TSTW $0x6006000060060, R5 // TSTW $1689262177517664, R5 // 1b0c8052db00a072bf001b6a
+ ANDW $0x6006000060060, R5 // ANDW $1689262177517664, R5 // 1b0c8052db00a072a5001b0a
+ ANDSW $0x6006000060060, R5 // ANDSW $1689262177517664, R5 // 1b0c8052db00a072a5001b6a
+ EORW $0x6006000060060, R5 // EORW $1689262177517664, R5 // 1b0c8052db00a072a5001b4a
+ ORRW $0x6006000060060, R5 // ORRW $1689262177517664, R5 // 1b0c8052db00a072a5001b2a
+ BICW $0x6006000060060, R5 // BICW $1689262177517664, R5 // 1b0c8052db00a072a5003b0a
+ EONW $0x6006000060060, R5 // EONW $1689262177517664, R5 // 1b0c8052db00a072a5003b4a
+ ORNW $0x6006000060060, R5 // ORNW $1689262177517664, R5 // 1b0c8052db00a072a5003b2a
+ BICSW $0x6006000060060, R5 // BICSW $1689262177517664, R5 // 1b0c8052db00a072a5003b6a
+ AND $1, ZR // fb0340b2ff031b8a
+ ANDW $1, ZR // fb030032ff031b0a
+ // TODO: this could have better encoding
+ ANDW $-1, R10 // 1b0080124a011b0a
+ AND $8, R0, RSP // 1f007d92
+ ORR $8, R0, RSP // 1f007db2
+ EOR $8, R0, RSP // 1f007dd2
+ BIC $8, R0, RSP // 1ff87c92
+ ORN $8, R0, RSP // 1ff87cb2
+ EON $8, R0, RSP // 1ff87cd2
+ TST $15, R2 // 5f0c40f2
+ TST R1, R2 // 5f0001ea
+ TST R1->11, R2 // 5f2c81ea
+ TST R1>>22, R2 // 5f5841ea
+ TST R1<<33, R2 // 5f8401ea
+ TST $0x22220000, R3 // TST $572653568, R3 // 5b44a4d27f001bea
+
+// move an immediate to a Rn.
+ MOVD $0x3fffffffc000, R0 // MOVD $70368744161280, R0 // e07f72b2
+ MOVW $1000000, R4 // 04488852e401a072
+ MOVW $0xaaaa0000, R1 // MOVW $2863267840, R1 // 4155b552
+ MOVW $0xaaaaffff, R1 // MOVW $2863333375, R1 // a1aaaa12
+ MOVW $0xaaaa, R1 // MOVW $43690, R1 // 41559552
+ MOVW $0xffffaaaa, R1 // MOVW $4294945450, R1 // a1aa8a12
+ MOVW $0xffff0000, R1 // MOVW $4294901760, R1 // e1ffbf52
+ MOVD $0xffff00000000000, R1 // MOVD $1152903912420802560, R1 // e13f54b2
+ MOVD $0x1111000000001111, R1 // MOVD $1229764173248860433, R1 // 212282d22122e2f2
+ MOVD $0x1111ffff1111ffff, R1 // MOVD $1230045644216991743, R1 // c1ddbd922122e2f2
+ MOVD $0x1111222233334444, R1 // MOVD $1229801703532086340, R1 // 818888d26166a6f24144c4f22122e2f2
+ MOVD $0xaaaaffff, R1 // MOVD $2863333375, R1 // e1ff9fd24155b5f2
+ MOVD $0x11110000, R1 // MOVD $286326784, R1 // 2122a2d2
+ MOVD $0xaaaa0000aaaa1111, R1 // MOVD $-6149102338357718767, R1 // 212282d24155b5f24155f5f2
+ MOVD $0x1111ffff1111aaaa, R1 // MOVD $1230045644216969898, R1 // a1aa8a922122a2f22122e2f2
+ MOVD $0, R1 // 010080d2
+ MOVD $-1, R1 // 01008092
+ MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2
+ MOVD $0xffffffffffffaaaa, R1 // MOVD $-21846, R1 // a1aa8a92
+ MOVW $1, ZR // 3f008052
+ MOVW $1, R1
+ MOVD $1, ZR // 3f0080d2
+ MOVD $1, R1
+ MOVK $1, R1
+ MOVD $0x1000100010001000, RSP // MOVD $1152939097061330944, RSP // ff8304b2
+ MOVW $0x10001000, RSP // MOVW $268439552, RSP // ff830432
+ ADDW $0x10001000, R1 // ADDW $268439552, R1 // fb83043221001b0b
+ ADDW $0x22220000, RSP, R3 // ADDW $572653568, RSP, R3 // 5b44a452e3433b0b
+
+// move a large constant to a Vd.
+ VMOVS $0x80402010, V11 // VMOVS $2151686160, V11
+ VMOVD $0x8040201008040201, V20 // VMOVD $-9205322385119247871, V20
+ VMOVQ $0x7040201008040201, $0x8040201008040201, V10 // VMOVQ $8088500183983456769, $-9205322385119247871, V10
+ VMOVQ $0x8040201008040202, $0x7040201008040201, V20 // VMOVQ $-9205322385119247870, $8088500183983456769, V20
+
+// mov(to/from sp)
+ MOVD $0x1002(RSP), R1 // MOVD $4098(RSP), R1 // e107409121080091
+ MOVD $0x1708(RSP), RSP // MOVD $5896(RSP), RSP // ff074091ff231c91
+ MOVD $0x2001(R7), R1 // MOVD $8193(R7), R1 // e108409121040091
+ MOVD $0xffffff(R7), R1 // MOVD $16777215(R7), R1 // e1fc7f9121fc3f91
+ MOVD $-0x1(R7), R1 // MOVD $-1(R7), R1 // e10400d1
+ MOVD $-0x30(R7), R1 // MOVD $-48(R7), R1 // e1c000d1
+ MOVD $-0x708(R7), R1 // MOVD $-1800(R7), R1 // e1201cd1
+ MOVD $-0x2000(RSP), R1 // MOVD $-8192(RSP), R1 // e10b40d1
+ MOVD $-0x10000(RSP), RSP // MOVD $-65536(RSP), RSP // ff4340d1
+ MOVW R1, R2
+ MOVW ZR, R1
+ MOVW R1, ZR
+ MOVD R1, R2
+ MOVD ZR, R1
+
+// store and load
+//
+// LD1/ST1
+ VLD1 (R8), [V1.B16, V2.B16] // 01a1404c
+ VLD1.P (R3), [V31.H8, V0.H8] // 7fa4df4c
+ VLD1.P (R8)(R20), [V21.B16, V22.B16] // 15a1d44c
+ VLD1.P 64(R1), [V5.B16, V6.B16, V7.B16, V8.B16] // 2520df4c
+ VLD1.P 1(R0), V4.B[15] // 041cdf4d
+ VLD1.P 2(R0), V4.H[7] // 0458df4d
+ VLD1.P 4(R0), V4.S[3] // 0490df4d
+ VLD1.P 8(R0), V4.D[1] // 0484df4d
+ VLD1.P (R0)(R1), V4.D[1] // 0484c14d
+ VLD1 (R0), V4.D[1] // 0484404d
+ VST1.P [V4.S4, V5.S4], 32(R1) // 24a89f4c
+ VST1 [V0.S4, V1.S4], (R0) // 00a8004c
+ VLD1 (R30), [V15.S2, V16.S2] // cfab400c
+ VLD1.P 24(R30), [V3.S2,V4.S2,V5.S2] // c36bdf0c
+ VLD2 (R29), [V23.H8, V24.H8] // b787404c
+ VLD2.P 16(R0), [V18.B8, V19.B8] // 1280df0c
+ VLD2.P (R1)(R2), [V15.S2, V16.S2] // 2f88c20c
+ VLD3 (R27), [V11.S4, V12.S4, V13.S4] // 6b4b404c
+ VLD3.P 48(RSP), [V11.S4, V12.S4, V13.S4] // eb4bdf4c
+ VLD3.P (R30)(R2), [V14.D2, V15.D2, V16.D2] // ce4fc24c
+ VLD4 (R15), [V10.H4, V11.H4, V12.H4, V13.H4] // ea05400c
+ VLD4.P 32(R24), [V31.B8, V0.B8, V1.B8, V2.B8] // 1f03df0c
+ VLD4.P (R13)(R9), [V14.S2, V15.S2, V16.S2, V17.S2] // ae09c90c
+ VLD1R (R1), [V9.B8] // 29c0400d
+ VLD1R.P (R1), [V9.B8] // 29c0df0d
+ VLD1R.P 1(R1), [V2.B8] // 22c0df0d
+ VLD1R.P 2(R1), [V2.H4] // 22c4df0d
+ VLD1R (R0), [V0.B16] // 00c0404d
+ VLD1R.P (R0), [V0.B16] // 00c0df4d
+ VLD1R.P (R15)(R1), [V15.H4] // efc5c10d
+ VLD2R (R15), [V15.H4, V16.H4] // efc5600d
+ VLD2R.P 16(R0), [V0.D2, V1.D2] // 00ccff4d
+ VLD2R.P (R0)(R5), [V31.D1, V0.D1] // 1fcce50d
+ VLD3R (RSP), [V31.S2, V0.S2, V1.S2] // ffeb400d
+ VLD3R.P 6(R15), [V15.H4, V16.H4, V17.H4] // efe5df0d
+ VLD3R.P (R15)(R6), [V15.H8, V16.H8, V17.H8] // efe5c64d
+ VLD4R (R0), [V0.B8, V1.B8, V2.B8, V3.B8] // 00e0600d
+ VLD4R.P 16(RSP), [V31.S4, V0.S4, V1.S4, V2.S4] // ffebff4d
+ VLD4R.P (R15)(R9), [V15.H4, V16.H4, V17.H4, V18.H4] // efe5e90d
+ VST1.P [V24.S2], 8(R2) // 58789f0c
+ VST1 [V29.S2, V30.S2], (R29) // bdab000c
+ VST1 [V14.H4, V15.H4, V16.H4], (R27) // 6e67000c
+ VST1.P V4.B[15], 1(R0) // 041c9f4d
+ VST1.P V4.H[7], 2(R0) // 04589f4d
+ VST1.P V4.S[3], 4(R0) // 04909f4d
+ VST1.P V4.D[1], 8(R0) // 04849f4d
+ VST1.P V4.D[1], (R0)(R1) // 0484814d
+ VST1 V4.D[1], (R0) // 0484004d
+ VST2 [V22.H8, V23.H8], (R23) // f686004c
+ VST2.P [V14.H4, V15.H4], 16(R17) // 2e869f0c
+ VST2.P [V14.H4, V15.H4], (R3)(R17) // 6e84910c
+ VST3 [V1.D2, V2.D2, V3.D2], (R11) // 614d004c
+ VST3.P [V18.S4, V19.S4, V20.S4], 48(R25) // 324b9f4c
+ VST3.P [V19.B8, V20.B8, V21.B8], (R3)(R7) // 7340870c
+ VST4 [V22.D2, V23.D2, V24.D2, V25.D2], (R3) // 760c004c
+ VST4.P [V14.D2, V15.D2, V16.D2, V17.D2], 64(R15) // ee0d9f4c
+ VST4.P [V24.B8, V25.B8, V26.B8, V27.B8], (R3)(R23) // 7800970c
+
+// pre/post-indexed
+ FMOVS.P F20, 4(R0) // 144400bc
+ FMOVS.W F20, 4(R0) // 144c00bc
+ FMOVD.P F20, 8(R1) // 348400fc
+ FMOVQ.P F13, 11(R10) // 4db5803c
+ FMOVQ.W F15, 11(R20) // 8fbe803c
+
+ FMOVS.P 8(R0), F20 // 148440bc
+ FMOVS.W 8(R0), F20 // 148c40bc
+ FMOVD.W 8(R1), F20 // 348c40fc
+ FMOVQ.P 11(R10), F13 // 4db5c03c
+ FMOVQ.W 11(R20), F15 // 8fbec03c
+
+// small offset fits into instructions
+ MOVB R1, 1(R2) // 41040039
+ MOVH R1, 1(R2) // 41100078
+ MOVH R1, 2(R2) // 41040079
+ MOVW R1, 1(R2) // 411000b8
+ MOVW R1, 4(R2) // 410400b9
+ MOVD R1, 1(R2) // 411000f8
+ MOVD R1, 8(R2) // 410400f9
+ MOVD ZR, (R1)
+ MOVW ZR, (R1)
+ FMOVS F1, 1(R2) // 411000bc
+ FMOVS F1, 4(R2) // 410400bd
+ FMOVS F20, (R0) // 140000bd
+ FMOVD F1, 1(R2) // 411000fc
+ FMOVD F1, 8(R2) // 410400fd
+ FMOVD F20, (R2) // 540000fd
+ FMOVQ F0, 32(R5)// a008803d
+ FMOVQ F10, 65520(R10) // 4afdbf3d
+ FMOVQ F11, 64(RSP) // eb13803d
+ FMOVQ F11, 8(R20) // 8b82803c
+ FMOVQ F11, 4(R20) // 8b42803c
+
+ MOVB 1(R1), R2 // 22048039
+ MOVH 1(R1), R2 // 22108078
+ MOVH 2(R1), R2 // 22048079
+ MOVW 1(R1), R2 // 221080b8
+ MOVW 4(R1), R2 // 220480b9
+ MOVD 1(R1), R2 // 221040f8
+ MOVD 8(R1), R2 // 220440f9
+ FMOVS (R0), F20 // 140040bd
+ FMOVS 1(R1), F2 // 221040bc
+ FMOVS 4(R1), F2 // 220440bd
+ FMOVD 1(R1), F2 // 221040fc
+ FMOVD 8(R1), F2 // 220440fd
+ FMOVQ 32(R5), F2 // a208c03d
+ FMOVQ 65520(R10), F10 // 4afdff3d
+ FMOVQ 64(RSP), F11 // eb13c03d
+
+// large aligned offset, use two instructions(add+ldr/store).
+ MOVB R1, 0x1001(R2) // MOVB R1, 4097(R2) // 5b04409161070039
+ MOVH R1, 0x2002(R2) // MOVH R1, 8194(R2) // 5b08409161070079
+ MOVW R1, 0x4004(R2) // MOVW R1, 16388(R2) // 5b104091610700b9
+ MOVD R1, 0x8008(R2) // MOVD R1, 32776(R2) // 5b204091610700f9
+ FMOVS F1, 0x4004(R2) // FMOVS F1, 16388(R2) // 5b104091610700bd
+ FMOVD F1, 0x8008(R2) // FMOVD F1, 32776(R2) // 5b204091610700fd
+
+ MOVB 0x1001(R1), R2 // MOVB 4097(R1), R2 // 3b04409162078039
+ MOVH 0x2002(R1), R2 // MOVH 8194(R1), R2 // 3b08409162078079
+ MOVW 0x4004(R1), R2 // MOVW 16388(R1), R2 // 3b104091620780b9
+ MOVD 0x8008(R1), R2 // MOVD 32776(R1), R2 // 3b204091620740f9
+ FMOVS 0x4004(R1), F2 // FMOVS 16388(R1), F2 // 3b104091620740bd
+ FMOVD 0x8008(R1), F2 // FMOVD 32776(R1), F2 // 3b204091620740fd
+
+// very large or unaligned offset uses constant pool.
+// the encoding cannot be checked as the address of the constant pool is unknown.
+// here we only test that they can be assembled.
+ MOVB R1, 0x44332211(R2) // MOVB R1, 1144201745(R2)
+ MOVH R1, 0x44332211(R2) // MOVH R1, 1144201745(R2)
+ MOVW R1, 0x44332211(R2) // MOVW R1, 1144201745(R2)
+ MOVD R1, 0x44332211(R2) // MOVD R1, 1144201745(R2)
+ FMOVS F1, 0x44332211(R2) // FMOVS F1, 1144201745(R2)
+ FMOVD F1, 0x44332211(R2) // FMOVD F1, 1144201745(R2)
+
+ MOVB 0x44332211(R1), R2 // MOVB 1144201745(R1), R2
+ MOVH 0x44332211(R1), R2 // MOVH 1144201745(R1), R2
+ MOVW 0x44332211(R1), R2 // MOVW 1144201745(R1), R2
+ MOVD 0x44332211(R1), R2 // MOVD 1144201745(R1), R2
+ FMOVS 0x44332211(R1), F2 // FMOVS 1144201745(R1), F2
+ FMOVD 0x44332211(R1), F2 // FMOVD 1144201745(R1), F2
+
+// shifted or extended register offset.
+ MOVD (R2)(R6.SXTW), R4 // 44c866f8
+ MOVD (R3)(R6), R5 // 656866f8
+ MOVD (R3)(R6*1), R5 // 656866f8
+ MOVD (R2)(R6), R4 // 446866f8
+ MOVWU (R19)(R20<<2), R20 // 747a74b8
+ MOVD (R2)(R6<<3), R4 // 447866f8
+ MOVD (R3)(R7.SXTX<<3), R8 // 68f867f8
+ MOVWU (R5)(R4.UXTW), R10 // aa4864b8
+ MOVBU (R3)(R9.UXTW), R8 // 68486938
+ MOVBU (R5)(R8), R10 // aa686838
+ MOVHU (R2)(R7.SXTW<<1), R11 // 4bd86778
+ MOVHU (R1)(R2<<1), R5 // 25786278
+ MOVB (R9)(R3.UXTW), R6 // 2649a338
+ MOVB (R10)(R6), R15 // 4f69a638
+ MOVB (R29)(R30<<0), R14 // ae7bbe38
+ MOVB (R29)(R30), R14 // ae6bbe38
+ MOVH (R5)(R7.SXTX<<1), R19 // b3f8a778
+ MOVH (R8)(R4<<1), R10 // 0a79a478
+ MOVW (R9)(R8.SXTW<<2), R19 // 33d9a8b8
+ MOVW (R1)(R4.SXTX), R11 // 2be8a4b8
+ MOVW (R1)(R4.SXTX), ZR // 3fe8a4b8
+ MOVW (R2)(R5), R12 // 4c68a5b8
+ FMOVS (R2)(R6), F4 // 446866bc
+ FMOVS (R2)(R6<<2), F4 // 447866bc
+ FMOVD (R2)(R6), F4 // 446866fc
+ FMOVD (R2)(R6<<3), F4 // 447866fc
+
+ MOVD R5, (R2)(R6<<3) // 457826f8
+ MOVD R9, (R6)(R7.SXTX<<3) // c9f827f8
+ MOVD ZR, (R6)(R7.SXTX<<3) // dff827f8
+ MOVW R8, (R2)(R3.UXTW<<2) // 485823b8
+ MOVW R7, (R3)(R4.SXTW) // 67c824b8
+ MOVB R4, (R2)(R6.SXTX) // 44e82638
+ MOVB R8, (R3)(R9.UXTW) // 68482938
+ MOVB R10, (R5)(R8) // aa682838
+ MOVB R10, (R5)(R8*1) // aa682838
+ MOVH R11, (R2)(R7.SXTW<<1) // 4bd82778
+ MOVH R5, (R1)(R2<<1) // 25782278
+ MOVH R7, (R2)(R5.SXTX<<1) // 47f82578
+ MOVH R8, (R3)(R6.UXTW) // 68482678
+ MOVB R4, (R2)(R6.SXTX) // 44e82638
+ FMOVS F4, (R2)(R6) // 446826bc
+ FMOVS F4, (R2)(R6<<2) // 447826bc
+ FMOVD F4, (R2)(R6) // 446826fc
+ FMOVD F4, (R2)(R6<<3) // 447826fc
+
+// vmov
+ VMOV V8.S[1], R1 // 013d0c0e
+ VMOV V0.D[0], R11 // 0b3c084e
+ VMOV V0.D[1], R11 // 0b3c184e
+ VMOV R20, V1.S[0] // 811e044e
+ VMOV R20, V1.S[1] // 811e0c4e
+ VMOV R1, V9.H4 // 290c020e
+ VDUP R1, V9.H4 // 290c020e
+ VMOV R22, V11.D2 // cb0e084e
+ VDUP R22, V11.D2 // cb0e084e
+ VMOV V2.B16, V4.B16 // 441ca24e
+ VMOV V20.S[0], V20 // 9406045e
+ VDUP V20.S[0], V20 // 9406045e
+ VMOV V12.D[0], V12.D[1] // 8c05186e
+ VMOV V10.S[0], V12.S[1] // 4c050c6e
+ VMOV V9.H[0], V12.H[1] // 2c05066e
+ VMOV V8.B[0], V12.B[1] // 0c05036e
+ VMOV V8.B[7], V4.B[8] // 043d116e
+
+// CBZ
+again:
+ CBZ R1, again // CBZ R1
+
+// conditional operations
+ CSET GT, R1 // e1d79f9a
+ CSETW HI, R2 // e2979f1a
+ CSEL LT, R1, R2, ZR // 3fb0829a
+ CSELW LT, R2, R3, R4 // 44b0831a
+ CSINC GT, R1, ZR, R3 // 23c49f9a
+ CSNEG MI, R1, R2, R3 // 234482da
+ CSINV CS, R1, R2, R3 // CSINV HS, R1, R2, R3 // 232082da
+ CSINVW MI, R2, ZR, R2 // 42409f5a
+ CINC EQ, R4, R9 // 8914849a
+ CINCW PL, R2, ZR // 5f44821a
+ CINV PL, R11, R22 // 76418bda
+ CINVW LS, R7, R13 // ed80875a
+ CNEG LS, R13, R7 // a7858dda
+ CNEGW EQ, R8, R13 // 0d15885a
+
+// atomic ops
+ LDARB (R25), R2 // 22ffdf08
+ LDARH (R5), R7 // a7fcdf48
+ LDAXPW (R10), (R20, R16) // 54c17f88
+ LDAXP (R25), (R30, R11) // 3eaf7fc8
+ LDAXRW (R0), R2 // 02fc5f88
+ LDXPW (R24), (R23, R11) // 172f7f88
+ LDXP (R0), (R16, R13) // 10347fc8
+ STLRB R11, (R22) // cbfe9f08
+ STLRH R16, (R23) // f0fe9f48
+ STLXP (R6, R3), (R10), R2 // 468d22c8
+ STLXPW (R6, R11), (R22), R21 // c6ae3588
+ STLXRW R1, (R0), R3 // 01fc0388
+ STXP (R1, R2), (R3), R10 // 61082ac8
+ STXP (R1, R2), (RSP), R10 // e10b2ac8
+ STXPW (R1, R2), (R3), R10 // 61082a88
+ STXPW (R1, R2), (RSP), R10 // e10b2a88
+ SWPAD R5, (R6), R7 // c780a5f8
+ SWPAD R5, (RSP), R7 // e783a5f8
+ SWPAW R5, (R6), R7 // c780a5b8
+ SWPAW R5, (RSP), R7 // e783a5b8
+ SWPAH R5, (R6), R7 // c780a578
+ SWPAH R5, (RSP), R7 // e783a578
+ SWPAB R5, (R6), R7 // c780a538
+ SWPAB R5, (RSP), R7 // e783a538
+ SWPALD R5, (R6), R7 // c780e5f8
+ SWPALD R5, (RSP), R7 // e783e5f8
+ SWPALW R5, (R6), R7 // c780e5b8
+ SWPALW R5, (RSP), R7 // e783e5b8
+ SWPALH R5, (R6), R7 // c780e578
+ SWPALH R5, (RSP), R7 // e783e578
+ SWPALB R5, (R6), R7 // c780e538
+ SWPALB R5, (RSP), R7 // e783e538
+ SWPD R5, (R6), R7 // c78025f8
+ SWPD R5, (RSP), R7 // e78325f8
+ SWPW R5, (R6), R7 // c78025b8
+ SWPW R5, (RSP), R7 // e78325b8
+ SWPH R5, (R6), R7 // c7802578
+ SWPH R5, (RSP), R7 // e7832578
+ SWPB R5, (R6), R7 // c7802538
+ SWPB R5, (RSP), R7 // e7832538
+ SWPLD R5, (R6), R7 // c78065f8
+ SWPLD R5, (RSP), R7 // e78365f8
+ SWPLW R5, (R6), R7 // c78065b8
+ SWPLW R5, (RSP), R7 // e78365b8
+ SWPLH R5, (R6), R7 // c7806578
+ SWPLH R5, (RSP), R7 // e7836578
+ SWPLB R5, (R6), R7 // c7806538
+ SWPLB R5, (RSP), R7 // e7836538
+ LDADDAD R5, (R6), R7 // c700a5f8
+ LDADDAD R5, (RSP), R7 // e703a5f8
+ LDADDAW R5, (R6), R7 // c700a5b8
+ LDADDAW R5, (RSP), R7 // e703a5b8
+ LDADDAH R5, (R6), R7 // c700a578
+ LDADDAH R5, (RSP), R7 // e703a578
+ LDADDAB R5, (R6), R7 // c700a538
+ LDADDAB R5, (RSP), R7 // e703a538
+ LDADDALD R5, (R6), R7 // c700e5f8
+ LDADDALD R5, (RSP), R7 // e703e5f8
+ LDADDALW R5, (R6), R7 // c700e5b8
+ LDADDALW R5, (RSP), R7 // e703e5b8
+ LDADDALH R5, (R6), R7 // c700e578
+ LDADDALH R5, (RSP), R7 // e703e578
+ LDADDALB R5, (R6), R7 // c700e538
+ LDADDALB R5, (RSP), R7 // e703e538
+ LDADDD R5, (R6), R7 // c70025f8
+ LDADDD R5, (RSP), R7 // e70325f8
+ LDADDW R5, (R6), R7 // c70025b8
+ LDADDW R5, (RSP), R7 // e70325b8
+ LDADDH R5, (R6), R7 // c7002578
+ LDADDH R5, (RSP), R7 // e7032578
+ LDADDB R5, (R6), R7 // c7002538
+ LDADDB R5, (RSP), R7 // e7032538
+ LDADDLD R5, (R6), R7 // c70065f8
+ LDADDLD R5, (RSP), R7 // e70365f8
+ LDADDLW R5, (R6), R7 // c70065b8
+ LDADDLW R5, (RSP), R7 // e70365b8
+ LDADDLH R5, (R6), R7 // c7006578
+ LDADDLH R5, (RSP), R7 // e7036578
+ LDADDLB R5, (R6), R7 // c7006538
+ LDADDLB R5, (RSP), R7 // e7036538
+ LDCLRAD R5, (R6), R7 // c710a5f8
+ LDCLRAD R5, (RSP), R7 // e713a5f8
+ LDCLRAW R5, (R6), R7 // c710a5b8
+ LDCLRAW R5, (RSP), R7 // e713a5b8
+ LDCLRAH R5, (R6), R7 // c710a578
+ LDCLRAH R5, (RSP), R7 // e713a578
+ LDCLRAB R5, (R6), R7 // c710a538
+ LDCLRAB R5, (RSP), R7 // e713a538
+ LDCLRALD R5, (R6), R7 // c710e5f8
+ LDCLRALD R5, (RSP), R7 // e713e5f8
+ LDCLRALW R5, (R6), R7 // c710e5b8
+ LDCLRALW R5, (RSP), R7 // e713e5b8
+ LDCLRALH R5, (R6), R7 // c710e578
+ LDCLRALH R5, (RSP), R7 // e713e578
+ LDCLRALB R5, (R6), R7 // c710e538
+ LDCLRALB R5, (RSP), R7 // e713e538
+ LDCLRD R5, (R6), R7 // c71025f8
+ LDCLRD R5, (RSP), R7 // e71325f8
+ LDCLRW R5, (R6), R7 // c71025b8
+ LDCLRW R5, (RSP), R7 // e71325b8
+ LDCLRH R5, (R6), R7 // c7102578
+ LDCLRH R5, (RSP), R7 // e7132578
+ LDCLRB R5, (R6), R7 // c7102538
+ LDCLRB R5, (RSP), R7 // e7132538
+ LDCLRLD R5, (R6), R7 // c71065f8
+ LDCLRLD R5, (RSP), R7 // e71365f8
+ LDCLRLW R5, (R6), R7 // c71065b8
+ LDCLRLW R5, (RSP), R7 // e71365b8
+ LDCLRLH R5, (R6), R7 // c7106578
+ LDCLRLH R5, (RSP), R7 // e7136578
+ LDCLRLB R5, (R6), R7 // c7106538
+ LDCLRLB R5, (RSP), R7 // e7136538
+ LDEORAD R5, (R6), R7 // c720a5f8
+ LDEORAD R5, (RSP), R7 // e723a5f8
+ LDEORAW R5, (R6), R7 // c720a5b8
+ LDEORAW R5, (RSP), R7 // e723a5b8
+ LDEORAH R5, (R6), R7 // c720a578
+ LDEORAH R5, (RSP), R7 // e723a578
+ LDEORAB R5, (R6), R7 // c720a538
+ LDEORAB R5, (RSP), R7 // e723a538
+ LDEORALD R5, (R6), R7 // c720e5f8
+ LDEORALD R5, (RSP), R7 // e723e5f8
+ LDEORALW R5, (R6), R7 // c720e5b8
+ LDEORALW R5, (RSP), R7 // e723e5b8
+ LDEORALH R5, (R6), R7 // c720e578
+ LDEORALH R5, (RSP), R7 // e723e578
+ LDEORALB R5, (R6), R7 // c720e538
+ LDEORALB R5, (RSP), R7 // e723e538
+ LDEORD R5, (R6), R7 // c72025f8
+ LDEORD R5, (RSP), R7 // e72325f8
+ LDEORW R5, (R6), R7 // c72025b8
+ LDEORW R5, (RSP), R7 // e72325b8
+ LDEORH R5, (R6), R7 // c7202578
+ LDEORH R5, (RSP), R7 // e7232578
+ LDEORB R5, (R6), R7 // c7202538
+ LDEORB R5, (RSP), R7 // e7232538
+ LDEORLD R5, (R6), R7 // c72065f8
+ LDEORLD R5, (RSP), R7 // e72365f8
+ LDEORLW R5, (R6), R7 // c72065b8
+ LDEORLW R5, (RSP), R7 // e72365b8
+ LDEORLH R5, (R6), R7 // c7206578
+ LDEORLH R5, (RSP), R7 // e7236578
+ LDEORLB R5, (R6), R7 // c7206538
+ LDEORLB R5, (RSP), R7 // e7236538
+ LDORAD R5, (R6), R7 // c730a5f8
+ LDORAD R5, (RSP), R7 // e733a5f8
+ LDORAW R5, (R6), R7 // c730a5b8
+ LDORAW R5, (RSP), R7 // e733a5b8
+ LDORAH R5, (R6), R7 // c730a578
+ LDORAH R5, (RSP), R7 // e733a578
+ LDORAB R5, (R6), R7 // c730a538
+ LDORAB R5, (RSP), R7 // e733a538
+ LDORALD R5, (R6), R7 // c730e5f8
+ LDORALD R5, (RSP), R7 // e733e5f8
+ LDORALW R5, (R6), R7 // c730e5b8
+ LDORALW R5, (RSP), R7 // e733e5b8
+ LDORALH R5, (R6), R7 // c730e578
+ LDORALH R5, (RSP), R7 // e733e578
+ LDORALB R5, (R6), R7 // c730e538
+ LDORALB R5, (RSP), R7 // e733e538
+ LDORD R5, (R6), R7 // c73025f8
+ LDORD R5, (RSP), R7 // e73325f8
+ LDORW R5, (R6), R7 // c73025b8
+ LDORW R5, (RSP), R7 // e73325b8
+ LDORH R5, (R6), R7 // c7302578
+ LDORH R5, (RSP), R7 // e7332578
+ LDORB R5, (R6), R7 // c7302538
+ LDORB R5, (RSP), R7 // e7332538
+ LDORLD R5, (R6), R7 // c73065f8
+ LDORLD R5, (RSP), R7 // e73365f8
+ LDORLW R5, (R6), R7 // c73065b8
+ LDORLW R5, (RSP), R7 // e73365b8
+ LDORLH R5, (R6), R7 // c7306578
+ LDORLH R5, (RSP), R7 // e7336578
+ LDORLB R5, (R6), R7 // c7306538
+ LDORLB R5, (RSP), R7 // e7336538
+ CASD R1, (R2), ZR // 5f7ca1c8
+ CASW R1, (RSP), ZR // ff7fa188
+ CASB ZR, (R5), R3 // a37cbf08
+ CASH R3, (RSP), ZR // ff7fa348
+ CASW R5, (R7), R6 // e67ca588
+ CASLD ZR, (RSP), R8 // e8ffbfc8
+ CASLW R9, (R10), ZR // 5ffda988
+ CASAD R7, (R11), R15 // 6f7de7c8
+ CASAW R10, (RSP), R19 // f37fea88
+ CASALD R5, (R6), R7 // c7fce5c8
+ CASALD R5, (RSP), R7 // e7ffe5c8
+ CASALW R5, (R6), R7 // c7fce588
+ CASALW R5, (RSP), R7 // e7ffe588
+ CASALH ZR, (R5), R8 // a8fcff48
+ CASALB R8, (R9), ZR // 3ffde808
+ CASPD (R30, ZR), (RSP), (R8, R9) // e87f3e48
+ CASPW (R6, R7), (R8), (R4, R5) // 047d2608
+ CASPD (R2, R3), (R2), (R8, R9) // 487c2248
+
+// RET
+ RET
+ RET foo(SB)
+
+// B/BL/B.cond cases, and canonical names JMP, CALL.
+ BL 1(PC) // CALL 1(PC)
+ BL (R2) // CALL (R2)
+ BL foo(SB) // CALL foo(SB)
+ BL bar<>(SB) // CALL bar<>(SB)
+ B foo(SB) // JMP foo(SB)
+ BEQ 1(PC)
+ BEQ 2(PC)
+ TBZ $1, R1, 2(PC)
+ TBNZ $2, R2, 2(PC)
+ JMP foo(SB)
+ CALL foo(SB)
+
+// LDP/STP
+ LDP (R0), (R0, R1) // 000440a9
+ LDP (R0), (R1, R2) // 010840a9
+ LDP 8(R0), (R1, R2) // 018840a9
+ LDP -8(R0), (R1, R2) // 01887fa9
+ LDP 11(R0), (R1, R2) // 1b2c0091610b40a9
+ LDP 1024(R0), (R1, R2) // 1b001091610b40a9
+ LDP.W 8(R0), (R1, R2) // 0188c0a9
+ LDP.P 8(R0), (R1, R2) // 0188c0a8
+ LDP (RSP), (R1, R2) // e10b40a9
+ LDP 8(RSP), (R1, R2) // e18b40a9
+ LDP -8(RSP), (R1, R2) // e18b7fa9
+ LDP 11(RSP), (R1, R2) // fb2f0091610b40a9
+ LDP 1024(RSP), (R1, R2) // fb031091610b40a9
+ LDP.W 8(RSP), (R1, R2) // e18bc0a9
+ LDP.P 8(RSP), (R1, R2) // e18bc0a8
+ LDP -31(R0), (R1, R2) // 1b7c00d1610b40a9
+ LDP -4(R0), (R1, R2) // 1b1000d1610b40a9
+ LDP -8(R0), (R1, R2) // 01887fa9
+ LDP x(SB), (R1, R2)
+ LDP x+8(SB), (R1, R2)
+ LDPW -5(R0), (R1, R2) // 1b1400d1610b4029
+ LDPW (R0), (R1, R2) // 01084029
+ LDPW 4(R0), (R1, R2) // 01884029
+ LDPW -4(R0), (R1, R2) // 01887f29
+ LDPW.W 4(R0), (R1, R2) // 0188c029
+ LDPW.P 4(R0), (R1, R2) // 0188c028
+ LDPW 11(R0), (R1, R2) // 1b2c0091610b4029
+ LDPW 1024(R0), (R1, R2) // 1b001091610b4029
+ LDPW (RSP), (R1, R2) // e10b4029
+ LDPW 4(RSP), (R1, R2) // e18b4029
+ LDPW -4(RSP), (R1, R2) // e18b7f29
+ LDPW.W 4(RSP), (R1, R2) // e18bc029
+ LDPW.P 4(RSP), (R1, R2) // e18bc028
+ LDPW 11(RSP), (R1, R2) // fb2f0091610b4029
+ LDPW 1024(RSP), (R1, R2) // fb031091610b4029
+ LDPW x(SB), (R1, R2)
+ LDPW x+8(SB), (R1, R2)
+ LDPSW (R0), (R1, R2) // 01084069
+ LDPSW 4(R0), (R1, R2) // 01884069
+ LDPSW -4(R0), (R1, R2) // 01887f69
+ LDPSW.W 4(R0), (R1, R2) // 0188c069
+ LDPSW.P 4(R0), (R1, R2) // 0188c068
+ LDPSW 11(R0), (R1, R2) // 1b2c0091610b4069
+ LDPSW 1024(R0), (R1, R2) // 1b001091610b4069
+ LDPSW (RSP), (R1, R2) // e10b4069
+ LDPSW 4(RSP), (R1, R2) // e18b4069
+ LDPSW -4(RSP), (R1, R2) // e18b7f69
+ LDPSW.W 4(RSP), (R1, R2) // e18bc069
+ LDPSW.P 4(RSP), (R1, R2) // e18bc068
+ LDPSW 11(RSP), (R1, R2) // fb2f0091610b4069
+ LDPSW 1024(RSP), (R1, R2) // fb031091610b4069
+ LDPSW x(SB), (R1, R2)
+ LDPSW x+8(SB), (R1, R2)
+ STP (R3, R4), (R5) // a31000a9
+ STP (R3, R4), 8(R5) // a39000a9
+ STP.W (R3, R4), 8(R5) // a39080a9
+ STP.P (R3, R4), 8(R5) // a39080a8
+ STP (R3, R4), -8(R5) // a3903fa9
+ STP (R3, R4), -4(R5) // bb1000d1631300a9
+ STP (R3, R4), 11(R0) // 1b2c0091631300a9
+ STP (R3, R4), 1024(R0) // 1b001091631300a9
+ STP (R3, R4), (RSP) // e31300a9
+ STP (R3, R4), 8(RSP) // e39300a9
+ STP.W (R3, R4), 8(RSP) // e39380a9
+ STP.P (R3, R4), 8(RSP) // e39380a8
+ STP (R3, R4), -8(RSP) // e3933fa9
+ STP (R3, R4), 11(RSP) // fb2f0091631300a9
+ STP (R3, R4), 1024(RSP) // fb031091631300a9
+ STP (R3, R4), x(SB)
+ STP (R3, R4), x+8(SB)
+ STPW (R3, R4), (R5) // a3100029
+ STPW (R3, R4), 4(R5) // a3900029
+ STPW.W (R3, R4), 4(R5) // a3908029
+ STPW.P (R3, R4), 4(R5) // a3908028
+ STPW (R3, R4), -4(R5) // a3903f29
+ STPW (R3, R4), -5(R5) // bb1400d163130029
+ STPW (R3, R4), 11(R0) // 1b2c009163130029
+ STPW (R3, R4), 1024(R0) // 1b00109163130029
+ STPW (R3, R4), (RSP) // e3130029
+ STPW (R3, R4), 4(RSP) // e3930029
+ STPW.W (R3, R4), 4(RSP) // e3938029
+ STPW.P (R3, R4), 4(RSP) // e3938028
+ STPW (R3, R4), -4(RSP) // e3933f29
+ STPW (R3, R4), 11(RSP) // fb2f009163130029
+ STPW (R3, R4), 1024(RSP) // fb03109163130029
+ STPW (R3, R4), x(SB)
+ STPW (R3, R4), x+8(SB)
+
+// bit field operation
+ BFI $0, R1, $1, R2 // 220040b3
+ BFIW $0, R1, $1, R2 // 22000033
+ SBFIZ $0, R1, $1, R2 // 22004093
+ SBFIZW $0, R1, $1, R2 // 22000013
+ UBFIZ $0, R1, $1, R2 // 220040d3
+ UBFIZW $0, R1, $1, R2 // 22000053
+
+// FSTPD/FSTPS/FLDPD/FLDPS
+ FLDPD (R0), (F1, F2) // 0108406d
+ FLDPD 8(R0), (F1, F2) // 0188406d
+ FLDPD -8(R0), (F1, F2) // 01887f6d
+ FLDPD 11(R0), (F1, F2) // 1b2c0091610b406d
+ FLDPD 1024(R0), (F1, F2) // 1b001091610b406d
+ FLDPD.W 8(R0), (F1, F2) // 0188c06d
+ FLDPD.P 8(R0), (F1, F2) // 0188c06c
+ FLDPD (RSP), (F1, F2) // e10b406d
+ FLDPD 8(RSP), (F1, F2) // e18b406d
+ FLDPD -8(RSP), (F1, F2) // e18b7f6d
+ FLDPD 11(RSP), (F1, F2) // fb2f0091610b406d
+ FLDPD 1024(RSP), (F1, F2) // fb031091610b406d
+ FLDPD.W 8(RSP), (F1, F2) // e18bc06d
+ FLDPD.P 8(RSP), (F1, F2) // e18bc06c
+ FLDPD -31(R0), (F1, F2) // 1b7c00d1610b406d
+ FLDPD -4(R0), (F1, F2) // 1b1000d1610b406d
+ FLDPD -8(R0), (F1, F2) // 01887f6d
+ FLDPD x(SB), (F1, F2)
+ FLDPD x+8(SB), (F1, F2)
+ FLDPS -5(R0), (F1, F2) // 1b1400d1610b402d
+ FLDPS (R0), (F1, F2) // 0108402d
+ FLDPS 4(R0), (F1, F2) // 0188402d
+ FLDPS -4(R0), (F1, F2) // 01887f2d
+ FLDPS.W 4(R0), (F1, F2) // 0188c02d
+ FLDPS.P 4(R0), (F1, F2) // 0188c02c
+ FLDPS 11(R0), (F1, F2) // 1b2c0091610b402d
+ FLDPS 1024(R0), (F1, F2) // 1b001091610b402d
+ FLDPS (RSP), (F1, F2) // e10b402d
+ FLDPS 4(RSP), (F1, F2) // e18b402d
+ FLDPS -4(RSP), (F1, F2) // e18b7f2d
+ FLDPS.W 4(RSP), (F1, F2) // e18bc02d
+ FLDPS.P 4(RSP), (F1, F2) // e18bc02c
+ FLDPS 11(RSP), (F1, F2) // fb2f0091610b402d
+ FLDPS 1024(RSP), (F1, F2) // fb031091610b402d
+ FLDPS x(SB), (F1, F2)
+ FLDPS x+8(SB), (F1, F2)
+ FSTPD (F3, F4), (R5) // a310006d
+ FSTPD (F3, F4), 8(R5) // a390006d
+ FSTPD.W (F3, F4), 8(R5) // a390806d
+ FSTPD.P (F3, F4), 8(R5) // a390806c
+ FSTPD (F3, F4), -8(R5) // a3903f6d
+ FSTPD (F3, F4), -4(R5) // bb1000d16313006d
+ FSTPD (F3, F4), 11(R0) // 1b2c00916313006d
+ FSTPD (F3, F4), 1024(R0) // 1b0010916313006d
+ FSTPD (F3, F4), (RSP) // e313006d
+ FSTPD (F3, F4), 8(RSP) // e393006d
+ FSTPD.W (F3, F4), 8(RSP) // e393806d
+ FSTPD.P (F3, F4), 8(RSP) // e393806c
+ FSTPD (F3, F4), -8(RSP) // e3933f6d
+ FSTPD (F3, F4), 11(RSP) // fb2f00916313006d
+ FSTPD (F3, F4), 1024(RSP) // fb0310916313006d
+ FSTPD (F3, F4), x(SB)
+ FSTPD (F3, F4), x+8(SB)
+ FSTPS (F3, F4), (R5) // a310002d
+ FSTPS (F3, F4), 4(R5) // a390002d
+ FSTPS.W (F3, F4), 4(R5) // a390802d
+ FSTPS.P (F3, F4), 4(R5) // a390802c
+ FSTPS (F3, F4), -4(R5) // a3903f2d
+ FSTPS (F3, F4), -5(R5) // bb1400d16313002d
+ FSTPS (F3, F4), 11(R0) // 1b2c00916313002d
+ FSTPS (F3, F4), 1024(R0) // 1b0010916313002d
+ FSTPS (F3, F4), (RSP) // e313002d
+ FSTPS (F3, F4), 4(RSP) // e393002d
+ FSTPS.W (F3, F4), 4(RSP) // e393802d
+ FSTPS.P (F3, F4), 4(RSP) // e393802c
+ FSTPS (F3, F4), -4(RSP) // e3933f2d
+ FSTPS (F3, F4), 11(RSP) // fb2f00916313002d
+ FSTPS (F3, F4), 1024(RSP) // fb0310916313002d
+ FSTPS (F3, F4), x(SB)
+ FSTPS (F3, F4), x+8(SB)
+
+// FLDPQ/FSTPQ
+ FLDPQ -4000(R0), (F1, F2) // 1b803ed1610b40ad
+ FLDPQ -1024(R0), (F1, F2) // 010860ad
+ FLDPQ (R0), (F1, F2) // 010840ad
+ FLDPQ 16(R0), (F1, F2) // 018840ad
+ FLDPQ -16(R0), (F1, F2) // 01887fad
+ FLDPQ.W 32(R0), (F1, F2) // 0108c1ad
+ FLDPQ.P 32(R0), (F1, F2) // 0108c1ac
+ FLDPQ 11(R0), (F1, F2) // 1b2c0091610b40ad
+ FLDPQ 1024(R0), (F1, F2) // 1b001091610b40ad
+ FLDPQ 4104(R0), (F1, F2)
+ FLDPQ -4000(RSP), (F1, F2) // fb833ed1610b40ad
+ FLDPQ -1024(RSP), (F1, F2) // e10b60ad
+ FLDPQ (RSP), (F1, F2) // e10b40ad
+ FLDPQ 16(RSP), (F1, F2) // e18b40ad
+ FLDPQ -16(RSP), (F1, F2) // e18b7fad
+ FLDPQ.W 32(RSP), (F1, F2) // e10bc1ad
+ FLDPQ.P 32(RSP), (F1, F2) // e10bc1ac
+ FLDPQ 11(RSP), (F1, F2) // fb2f0091610b40ad
+ FLDPQ 1024(RSP), (F1, F2) // fb031091610b40ad
+ FLDPQ 4104(RSP), (F1, F2)
+ FLDPQ -31(R0), (F1, F2) // 1b7c00d1610b40ad
+ FLDPQ -4(R0), (F1, F2) // 1b1000d1610b40ad
+ FLDPQ x(SB), (F1, F2)
+ FLDPQ x+8(SB), (F1, F2)
+ FSTPQ (F3, F4), -4000(R5) // bb803ed1631300ad
+ FSTPQ (F3, F4), -1024(R5) // a31020ad
+ FSTPQ (F3, F4), (R5) // a31000ad
+ FSTPQ (F3, F4), 16(R5) // a39000ad
+ FSTPQ (F3, F4), -16(R5) // a3903fad
+ FSTPQ.W (F3, F4), 32(R5) // a31081ad
+ FSTPQ.P (F3, F4), 32(R5) // a31081ac
+ FSTPQ (F3, F4), 11(R5) // bb2c0091631300ad
+ FSTPQ (F3, F4), 1024(R5) // bb001091631300ad
+ FSTPQ (F3, F4), 4104(R5)
+ FSTPQ (F3, F4), -4000(RSP) // fb833ed1631300ad
+ FSTPQ (F3, F4), -1024(RSP) // e31320ad
+ FSTPQ (F3, F4), (RSP) // e31300ad
+ FSTPQ (F3, F4), 16(RSP) // e39300ad
+ FSTPQ (F3, F4), -16(RSP) // e3933fad
+ FSTPQ.W (F3, F4), 32(RSP) // e31381ad
+ FSTPQ.P (F3, F4), 32(RSP) // e31381ac
+ FSTPQ (F3, F4), 11(RSP) // fb2f0091631300ad
+ FSTPQ (F3, F4), 1024(RSP) // fb031091631300ad
+ FSTPQ (F3, F4), 4104(RSP)
+ FSTPQ (F3, F4), x(SB)
+ FSTPQ (F3, F4), x+8(SB)
+
+// System Register
+ MSR $1, SPSel // bf4100d5
+ MSR $9, DAIFSet // df4903d5
+ MSR $6, DAIFClr // ff4603d5
+ MRS ELR_EL1, R8 // 284038d5
+ MSR R16, ELR_EL1 // 304018d5
+ MSR R2, ACTLR_EL1 // 221018d5
+ MRS TCR_EL1, R5 // 452038d5
+ MRS PMEVCNTR15_EL0, R12 // ece93bd5
+ MSR R20, PMEVTYPER26_EL0 // 54ef1bd5
+ MSR R10, DBGBCR15_EL1 // aa0f10d5
+ MRS ACTLR_EL1, R3 // 231038d5
+ MSR R9, ACTLR_EL1 // 291018d5
+ MRS AFSR0_EL1, R10 // 0a5138d5
+ MSR R1, AFSR0_EL1 // 015118d5
+ MRS AFSR0_EL1, R9 // 095138d5
+ MSR R30, AFSR0_EL1 // 1e5118d5
+ MRS AFSR1_EL1, R0 // 205138d5
+ MSR R1, AFSR1_EL1 // 215118d5
+ MRS AFSR1_EL1, R8 // 285138d5
+ MSR R19, AFSR1_EL1 // 335118d5
+ MRS AIDR_EL1, R11 // eb0039d5
+ MRS AMAIR_EL1, R0 // 00a338d5
+ MSR R22, AMAIR_EL1 // 16a318d5
+ MRS AMAIR_EL1, R14 // 0ea338d5
+ MSR R0, AMAIR_EL1 // 00a318d5
+ MRS APDAKeyHi_EL1, R16 // 302238d5
+ MSR R26, APDAKeyHi_EL1 // 3a2218d5
+ MRS APDAKeyLo_EL1, R21 // 152238d5
+ MSR R22, APDAKeyLo_EL1 // 162218d5
+ MRS APDBKeyHi_EL1, R2 // 622238d5
+ MSR R6, APDBKeyHi_EL1 // 662218d5
+ MRS APDBKeyLo_EL1, R5 // 452238d5
+ MSR R22, APDBKeyLo_EL1 // 562218d5
+ MRS APGAKeyHi_EL1, R22 // 362338d5
+ MSR R5, APGAKeyHi_EL1 // 252318d5
+ MRS APGAKeyLo_EL1, R16 // 102338d5
+ MSR R22, APGAKeyLo_EL1 // 162318d5
+ MRS APIAKeyHi_EL1, R23 // 372138d5
+ MSR R17, APIAKeyHi_EL1 // 312118d5
+ MRS APIAKeyLo_EL1, R16 // 102138d5
+ MSR R6, APIAKeyLo_EL1 // 062118d5
+ MRS APIBKeyHi_EL1, R10 // 6a2138d5
+ MSR R11, APIBKeyHi_EL1 // 6b2118d5
+ MRS APIBKeyLo_EL1, R25 // 592138d5
+ MSR R22, APIBKeyLo_EL1 // 562118d5
+ MRS CCSIDR_EL1, R25 // 190039d5
+ MRS CLIDR_EL1, R16 // 300039d5
+ MRS CNTFRQ_EL0, R20 // 14e03bd5
+ MSR R16, CNTFRQ_EL0 // 10e01bd5
+ MRS CNTKCTL_EL1, R26 // 1ae138d5
+ MSR R0, CNTKCTL_EL1 // 00e118d5
+ MRS CNTP_CTL_EL0, R14 // 2ee23bd5
+ MSR R17, CNTP_CTL_EL0 // 31e21bd5
+ MRS CNTP_CVAL_EL0, R15 // 4fe23bd5
+ MSR R8, CNTP_CVAL_EL0 // 48e21bd5
+ MRS CNTP_TVAL_EL0, R6 // 06e23bd5
+ MSR R29, CNTP_TVAL_EL0 // 1de21bd5
+ MRS CNTP_CTL_EL0, R22 // 36e23bd5
+ MSR R0, CNTP_CTL_EL0 // 20e21bd5
+ MRS CNTP_CVAL_EL0, R9 // 49e23bd5
+ MSR R4, CNTP_CVAL_EL0 // 44e21bd5
+ MRS CNTP_TVAL_EL0, R27 // 1be23bd5
+ MSR R17, CNTP_TVAL_EL0 // 11e21bd5
+ MRS CNTV_CTL_EL0, R27 // 3be33bd5
+ MSR R2, CNTV_CTL_EL0 // 22e31bd5
+ MRS CNTV_CVAL_EL0, R16 // 50e33bd5
+ MSR R27, CNTV_CVAL_EL0 // 5be31bd5
+ MRS CNTV_TVAL_EL0, R12 // 0ce33bd5
+ MSR R19, CNTV_TVAL_EL0 // 13e31bd5
+ MRS CNTV_CTL_EL0, R14 // 2ee33bd5
+ MSR R2, CNTV_CTL_EL0 // 22e31bd5
+ MRS CNTV_CVAL_EL0, R8 // 48e33bd5
+ MSR R26, CNTV_CVAL_EL0 // 5ae31bd5
+ MRS CNTV_TVAL_EL0, R6 // 06e33bd5
+ MSR R19, CNTV_TVAL_EL0 // 13e31bd5
+ MRS CNTKCTL_EL1, R16 // 10e138d5
+ MSR R26, CNTKCTL_EL1 // 1ae118d5
+ MRS CNTPCT_EL0, R9 // 29e03bd5
+ MRS CNTPS_CTL_EL1, R30 // 3ee23fd5
+ MSR R26, CNTPS_CTL_EL1 // 3ae21fd5
+ MRS CNTPS_CVAL_EL1, R8 // 48e23fd5
+ MSR R26, CNTPS_CVAL_EL1 // 5ae21fd5
+ MRS CNTPS_TVAL_EL1, R7 // 07e23fd5
+ MSR R13, CNTPS_TVAL_EL1 // 0de21fd5
+ MRS CNTP_CTL_EL0, R2 // 22e23bd5
+ MSR R10, CNTP_CTL_EL0 // 2ae21bd5
+ MRS CNTP_CVAL_EL0, R6 // 46e23bd5
+ MSR R21, CNTP_CVAL_EL0 // 55e21bd5
+ MRS CNTP_TVAL_EL0, R27 // 1be23bd5
+ MSR R29, CNTP_TVAL_EL0 // 1de21bd5
+ MRS CNTVCT_EL0, R13 // 4de03bd5
+ MRS CNTV_CTL_EL0, R30 // 3ee33bd5
+ MSR R19, CNTV_CTL_EL0 // 33e31bd5
+ MRS CNTV_CVAL_EL0, R27 // 5be33bd5
+ MSR R24, CNTV_CVAL_EL0 // 58e31bd5
+ MRS CNTV_TVAL_EL0, R24 // 18e33bd5
+ MSR R5, CNTV_TVAL_EL0 // 05e31bd5
+ MRS CONTEXTIDR_EL1, R15 // 2fd038d5
+ MSR R27, CONTEXTIDR_EL1 // 3bd018d5
+ MRS CONTEXTIDR_EL1, R29 // 3dd038d5
+ MSR R24, CONTEXTIDR_EL1 // 38d018d5
+ MRS CPACR_EL1, R10 // 4a1038d5
+ MSR R14, CPACR_EL1 // 4e1018d5
+ MRS CPACR_EL1, R27 // 5b1038d5
+ MSR R22, CPACR_EL1 // 561018d5
+ MRS CSSELR_EL1, R3 // 03003ad5
+ MSR R4, CSSELR_EL1 // 04001ad5
+ MRS CTR_EL0, R15 // 2f003bd5
+ MRS CurrentEL, R1 // 414238d5
+ MRS DAIF, R24 // 38423bd5
+ MSR R9, DAIF // 29421bd5
+ MRS DBGAUTHSTATUS_EL1, R5 // c57e30d5
+ MRS DBGBCR0_EL1, R29 // bd0030d5
+ MRS DBGBCR1_EL1, R13 // ad0130d5
+ MRS DBGBCR2_EL1, R22 // b60230d5
+ MRS DBGBCR3_EL1, R8 // a80330d5
+ MRS DBGBCR4_EL1, R2 // a20430d5
+ MRS DBGBCR5_EL1, R4 // a40530d5
+ MRS DBGBCR6_EL1, R2 // a20630d5
+ MRS DBGBCR7_EL1, R6 // a60730d5
+ MRS DBGBCR8_EL1, R1 // a10830d5
+ MRS DBGBCR9_EL1, R16 // b00930d5
+ MRS DBGBCR10_EL1, R23 // b70a30d5
+ MRS DBGBCR11_EL1, R3 // a30b30d5
+ MRS DBGBCR12_EL1, R6 // a60c30d5
+ MRS DBGBCR13_EL1, R16 // b00d30d5
+ MRS DBGBCR14_EL1, R4 // a40e30d5
+ MRS DBGBCR15_EL1, R9 // a90f30d5
+ MSR R4, DBGBCR0_EL1 // a40010d5
+ MSR R14, DBGBCR1_EL1 // ae0110d5
+ MSR R7, DBGBCR2_EL1 // a70210d5
+ MSR R12, DBGBCR3_EL1 // ac0310d5
+ MSR R6, DBGBCR4_EL1 // a60410d5
+ MSR R11, DBGBCR5_EL1 // ab0510d5
+ MSR R6, DBGBCR6_EL1 // a60610d5
+ MSR R13, DBGBCR7_EL1 // ad0710d5
+ MSR R17, DBGBCR8_EL1 // b10810d5
+ MSR R17, DBGBCR9_EL1 // b10910d5
+ MSR R22, DBGBCR10_EL1 // b60a10d5
+ MSR R16, DBGBCR11_EL1 // b00b10d5
+ MSR R24, DBGBCR12_EL1 // b80c10d5
+ MSR R29, DBGBCR13_EL1 // bd0d10d5
+ MSR R1, DBGBCR14_EL1 // a10e10d5
+ MSR R10, DBGBCR15_EL1 // aa0f10d5
+ MRS DBGBVR0_EL1, R16 // 900030d5
+ MRS DBGBVR1_EL1, R21 // 950130d5
+ MRS DBGBVR2_EL1, R13 // 8d0230d5
+ MRS DBGBVR3_EL1, R12 // 8c0330d5
+ MRS DBGBVR4_EL1, R20 // 940430d5
+ MRS DBGBVR5_EL1, R21 // 950530d5
+ MRS DBGBVR6_EL1, R27 // 9b0630d5
+ MRS DBGBVR7_EL1, R6 // 860730d5
+ MRS DBGBVR8_EL1, R14 // 8e0830d5
+ MRS DBGBVR9_EL1, R5 // 850930d5
+ MRS DBGBVR10_EL1, R9 // 890a30d5
+ MRS DBGBVR11_EL1, R25 // 990b30d5
+ MRS DBGBVR12_EL1, R30 // 9e0c30d5
+ MRS DBGBVR13_EL1, R1 // 810d30d5
+ MRS DBGBVR14_EL1, R17 // 910e30d5
+ MRS DBGBVR15_EL1, R25 // 990f30d5
+ MSR R15, DBGBVR0_EL1 // 8f0010d5
+ MSR R6, DBGBVR1_EL1 // 860110d5
+ MSR R24, DBGBVR2_EL1 // 980210d5
+ MSR R17, DBGBVR3_EL1 // 910310d5
+ MSR R3, DBGBVR4_EL1 // 830410d5
+ MSR R21, DBGBVR5_EL1 // 950510d5
+ MSR R5, DBGBVR6_EL1 // 850610d5
+ MSR R6, DBGBVR7_EL1 // 860710d5
+ MSR R25, DBGBVR8_EL1 // 990810d5
+ MSR R4, DBGBVR9_EL1 // 840910d5
+ MSR R25, DBGBVR10_EL1 // 990a10d5
+ MSR R17, DBGBVR11_EL1 // 910b10d5
+ MSR R0, DBGBVR12_EL1 // 800c10d5
+ MSR R5, DBGBVR13_EL1 // 850d10d5
+ MSR R9, DBGBVR14_EL1 // 890e10d5
+ MSR R12, DBGBVR15_EL1 // 8c0f10d5
+ MRS DBGCLAIMCLR_EL1, R27 // db7930d5
+ MSR R0, DBGCLAIMCLR_EL1 // c07910d5
+ MRS DBGCLAIMSET_EL1, R7 // c77830d5
+ MSR R13, DBGCLAIMSET_EL1 // cd7810d5
+ MRS DBGDTRRX_EL0, R0 // 000533d5
+ MSR R29, DBGDTRTX_EL0 // 1d0513d5
+ MRS DBGDTR_EL0, R27 // 1b0433d5
+ MSR R30, DBGDTR_EL0 // 1e0413d5
+ MRS DBGPRCR_EL1, R4 // 841430d5
+ MSR R0, DBGPRCR_EL1 // 801410d5
+ MRS DBGWCR0_EL1, R24 // f80030d5
+ MRS DBGWCR1_EL1, R19 // f30130d5
+ MRS DBGWCR2_EL1, R25 // f90230d5
+ MRS DBGWCR3_EL1, R0 // e00330d5
+ MRS DBGWCR4_EL1, R13 // ed0430d5
+ MRS DBGWCR5_EL1, R8 // e80530d5
+ MRS DBGWCR6_EL1, R22 // f60630d5
+ MRS DBGWCR7_EL1, R11 // eb0730d5
+ MRS DBGWCR8_EL1, R11 // eb0830d5
+ MRS DBGWCR9_EL1, R3 // e30930d5
+ MRS DBGWCR10_EL1, R17 // f10a30d5
+ MRS DBGWCR11_EL1, R21 // f50b30d5
+ MRS DBGWCR12_EL1, R10 // ea0c30d5
+ MRS DBGWCR13_EL1, R22 // f60d30d5
+ MRS DBGWCR14_EL1, R11 // eb0e30d5
+ MRS DBGWCR15_EL1, R0 // e00f30d5
+ MSR R24, DBGWCR0_EL1 // f80010d5
+ MSR R8, DBGWCR1_EL1 // e80110d5
+ MSR R17, DBGWCR2_EL1 // f10210d5
+ MSR R29, DBGWCR3_EL1 // fd0310d5
+ MSR R13, DBGWCR4_EL1 // ed0410d5
+ MSR R22, DBGWCR5_EL1 // f60510d5
+ MSR R3, DBGWCR6_EL1 // e30610d5
+ MSR R4, DBGWCR7_EL1 // e40710d5
+ MSR R7, DBGWCR8_EL1 // e70810d5
+ MSR R29, DBGWCR9_EL1 // fd0910d5
+ MSR R3, DBGWCR10_EL1 // e30a10d5
+ MSR R11, DBGWCR11_EL1 // eb0b10d5
+ MSR R20, DBGWCR12_EL1 // f40c10d5
+ MSR R6, DBGWCR13_EL1 // e60d10d5
+ MSR R22, DBGWCR14_EL1 // f60e10d5
+ MSR R25, DBGWCR15_EL1 // f90f10d5
+ MRS DBGWVR0_EL1, R14 // ce0030d5
+ MRS DBGWVR1_EL1, R16 // d00130d5
+ MRS DBGWVR2_EL1, R15 // cf0230d5
+ MRS DBGWVR3_EL1, R1 // c10330d5
+ MRS DBGWVR4_EL1, R26 // da0430d5
+ MRS DBGWVR5_EL1, R14 // ce0530d5
+ MRS DBGWVR6_EL1, R17 // d10630d5
+ MRS DBGWVR7_EL1, R22 // d60730d5
+ MRS DBGWVR8_EL1, R4 // c40830d5
+ MRS DBGWVR9_EL1, R3 // c30930d5
+ MRS DBGWVR10_EL1, R16 // d00a30d5
+ MRS DBGWVR11_EL1, R2 // c20b30d5
+ MRS DBGWVR12_EL1, R5 // c50c30d5
+ MRS DBGWVR13_EL1, R23 // d70d30d5
+ MRS DBGWVR14_EL1, R5 // c50e30d5
+ MRS DBGWVR15_EL1, R6 // c60f30d5
+ MSR R24, DBGWVR0_EL1 // d80010d5
+ MSR R6, DBGWVR1_EL1 // c60110d5
+ MSR R1, DBGWVR2_EL1 // c10210d5
+ MSR R24, DBGWVR3_EL1 // d80310d5
+ MSR R24, DBGWVR4_EL1 // d80410d5
+ MSR R0, DBGWVR5_EL1 // c00510d5
+ MSR R10, DBGWVR6_EL1 // ca0610d5
+ MSR R17, DBGWVR7_EL1 // d10710d5
+ MSR R7, DBGWVR8_EL1 // c70810d5
+ MSR R8, DBGWVR9_EL1 // c80910d5
+ MSR R15, DBGWVR10_EL1 // cf0a10d5
+ MSR R8, DBGWVR11_EL1 // c80b10d5
+ MSR R7, DBGWVR12_EL1 // c70c10d5
+ MSR R14, DBGWVR13_EL1 // ce0d10d5
+ MSR R16, DBGWVR14_EL1 // d00e10d5
+ MSR R5, DBGWVR15_EL1 // c50f10d5
+ MRS DCZID_EL0, R21 // f5003bd5
+ MRS DISR_EL1, R8 // 28c138d5
+ MSR R5, DISR_EL1 // 25c118d5
+ MRS DIT, R29 // bd423bd5
+ MSR R22, DIT // b6421bd5
+ MRS DLR_EL0, R25 // 39453bd5
+ MSR R9, DLR_EL0 // 29451bd5
+ MRS DSPSR_EL0, R3 // 03453bd5
+ MSR R10, DSPSR_EL0 // 0a451bd5
+ MRS ELR_EL1, R24 // 384038d5
+ MSR R3, ELR_EL1 // 234018d5
+ MRS ELR_EL1, R13 // 2d4038d5
+ MSR R27, ELR_EL1 // 3b4018d5
+ MRS ERRIDR_EL1, R30 // 1e5338d5
+ MRS ERRSELR_EL1, R21 // 355338d5
+ MSR R22, ERRSELR_EL1 // 365318d5
+ MRS ERXADDR_EL1, R30 // 7e5438d5
+ MSR R0, ERXADDR_EL1 // 605418d5
+ MRS ERXCTLR_EL1, R6 // 265438d5
+ MSR R9, ERXCTLR_EL1 // 295418d5
+ MRS ERXFR_EL1, R19 // 135438d5
+ MRS ERXMISC0_EL1, R20 // 145538d5
+ MSR R24, ERXMISC0_EL1 // 185518d5
+ MRS ERXMISC1_EL1, R15 // 2f5538d5
+ MSR R10, ERXMISC1_EL1 // 2a5518d5
+ MRS ERXSTATUS_EL1, R30 // 5e5438d5
+ MSR R3, ERXSTATUS_EL1 // 435418d5
+ MRS ESR_EL1, R6 // 065238d5
+ MSR R21, ESR_EL1 // 155218d5
+ MRS ESR_EL1, R17 // 115238d5
+ MSR R12, ESR_EL1 // 0c5218d5
+ MRS FAR_EL1, R3 // 036038d5
+ MSR R17, FAR_EL1 // 116018d5
+ MRS FAR_EL1, R9 // 096038d5
+ MSR R25, FAR_EL1 // 196018d5
+ MRS FPCR, R1 // 01443bd5
+ MSR R27, FPCR // 1b441bd5
+ MRS FPSR, R5 // 25443bd5
+ MSR R15, FPSR // 2f441bd5
+ MRS ID_AA64AFR0_EL1, R19 // 930538d5
+ MRS ID_AA64AFR1_EL1, R24 // b80538d5
+ MRS ID_AA64DFR0_EL1, R21 // 150538d5
+ MRS ID_AA64DFR1_EL1, R20 // 340538d5
+ MRS ID_AA64ISAR0_EL1, R4 // 040638d5
+ MRS ID_AA64ISAR1_EL1, R6 // 260638d5
+ MRS ID_AA64MMFR0_EL1, R0 // 000738d5
+ MRS ID_AA64MMFR1_EL1, R17 // 310738d5
+ MRS ID_AA64MMFR2_EL1, R23 // 570738d5
+ MRS ID_AA64PFR0_EL1, R20 // 140438d5
+ MRS ID_AA64PFR1_EL1, R26 // 3a0438d5
+ MRS ID_AA64ZFR0_EL1, R26 // 9a0438d5
+ MRS ID_AFR0_EL1, R21 // 750138d5
+ MRS ID_DFR0_EL1, R15 // 4f0138d5
+ MRS ID_ISAR0_EL1, R11 // 0b0238d5
+ MRS ID_ISAR1_EL1, R16 // 300238d5
+ MRS ID_ISAR2_EL1, R10 // 4a0238d5
+ MRS ID_ISAR3_EL1, R13 // 6d0238d5
+ MRS ID_ISAR4_EL1, R24 // 980238d5
+ MRS ID_ISAR5_EL1, R29 // bd0238d5
+ MRS ID_MMFR0_EL1, R10 // 8a0138d5
+ MRS ID_MMFR1_EL1, R29 // bd0138d5
+ MRS ID_MMFR2_EL1, R16 // d00138d5
+ MRS ID_MMFR3_EL1, R10 // ea0138d5
+ MRS ID_MMFR4_EL1, R23 // d70238d5
+ MRS ID_PFR0_EL1, R4 // 040138d5
+ MRS ID_PFR1_EL1, R12 // 2c0138d5
+ MRS ISR_EL1, R24 // 18c138d5
+ MRS MAIR_EL1, R20 // 14a238d5
+ MSR R21, MAIR_EL1 // 15a218d5
+ MRS MAIR_EL1, R20 // 14a238d5
+ MSR R5, MAIR_EL1 // 05a218d5
+ MRS MDCCINT_EL1, R23 // 170230d5
+ MSR R27, MDCCINT_EL1 // 1b0210d5
+ MRS MDCCSR_EL0, R19 // 130133d5
+ MRS MDRAR_EL1, R12 // 0c1030d5
+ MRS MDSCR_EL1, R15 // 4f0230d5
+ MSR R15, MDSCR_EL1 // 4f0210d5
+ MRS MIDR_EL1, R26 // 1a0038d5
+ MRS MPIDR_EL1, R25 // b90038d5
+ MRS MVFR0_EL1, R29 // 1d0338d5
+ MRS MVFR1_EL1, R7 // 270338d5
+ MRS MVFR2_EL1, R19 // 530338d5
+ MRS NZCV, R11 // 0b423bd5
+ MSR R10, NZCV // 0a421bd5
+ MRS OSDLR_EL1, R16 // 901330d5
+ MSR R21, OSDLR_EL1 // 951310d5
+ MRS OSDTRRX_EL1, R5 // 450030d5
+ MSR R30, OSDTRRX_EL1 // 5e0010d5
+ MRS OSDTRTX_EL1, R3 // 430330d5
+ MSR R13, OSDTRTX_EL1 // 4d0310d5
+ MRS OSECCR_EL1, R2 // 420630d5
+ MSR R17, OSECCR_EL1 // 510610d5
+ MSR R3, OSLAR_EL1 // 831010d5
+ MRS OSLSR_EL1, R15 // 8f1130d5
+ MRS PAN, R14 // 6e4238d5
+ MSR R0, PAN // 604218d5
+ MRS PAR_EL1, R27 // 1b7438d5
+ MSR R3, PAR_EL1 // 037418d5
+ MRS PMCCFILTR_EL0, R10 // eaef3bd5
+ MSR R16, PMCCFILTR_EL0 // f0ef1bd5
+ MRS PMCCNTR_EL0, R17 // 119d3bd5
+ MSR R13, PMCCNTR_EL0 // 0d9d1bd5
+ MRS PMCEID0_EL0, R8 // c89c3bd5
+ MRS PMCEID1_EL0, R30 // fe9c3bd5
+ MRS PMCNTENCLR_EL0, R11 // 4b9c3bd5
+ MSR R21, PMCNTENCLR_EL0 // 559c1bd5
+ MRS PMCNTENSET_EL0, R25 // 399c3bd5
+ MSR R13, PMCNTENSET_EL0 // 2d9c1bd5
+ MRS PMCR_EL0, R23 // 179c3bd5
+ MSR R11, PMCR_EL0 // 0b9c1bd5
+ MRS PMEVCNTR0_EL0, R27 // 1be83bd5
+ MRS PMEVCNTR1_EL0, R23 // 37e83bd5
+ MRS PMEVCNTR2_EL0, R26 // 5ae83bd5
+ MRS PMEVCNTR3_EL0, R11 // 6be83bd5
+ MRS PMEVCNTR4_EL0, R14 // 8ee83bd5
+ MRS PMEVCNTR5_EL0, R9 // a9e83bd5
+ MRS PMEVCNTR6_EL0, R30 // dee83bd5
+ MRS PMEVCNTR7_EL0, R19 // f3e83bd5
+ MRS PMEVCNTR8_EL0, R5 // 05e93bd5
+ MRS PMEVCNTR9_EL0, R27 // 3be93bd5
+ MRS PMEVCNTR10_EL0, R23 // 57e93bd5
+ MRS PMEVCNTR11_EL0, R27 // 7be93bd5
+ MRS PMEVCNTR12_EL0, R0 // 80e93bd5
+ MRS PMEVCNTR13_EL0, R13 // ade93bd5
+ MRS PMEVCNTR14_EL0, R27 // dbe93bd5
+ MRS PMEVCNTR15_EL0, R16 // f0e93bd5
+ MRS PMEVCNTR16_EL0, R16 // 10ea3bd5
+ MRS PMEVCNTR17_EL0, R14 // 2eea3bd5
+ MRS PMEVCNTR18_EL0, R10 // 4aea3bd5
+ MRS PMEVCNTR19_EL0, R12 // 6cea3bd5
+ MRS PMEVCNTR20_EL0, R5 // 85ea3bd5
+ MRS PMEVCNTR21_EL0, R26 // baea3bd5
+ MRS PMEVCNTR22_EL0, R19 // d3ea3bd5
+ MRS PMEVCNTR23_EL0, R5 // e5ea3bd5
+ MRS PMEVCNTR24_EL0, R17 // 11eb3bd5
+ MRS PMEVCNTR25_EL0, R0 // 20eb3bd5
+ MRS PMEVCNTR26_EL0, R20 // 54eb3bd5
+ MRS PMEVCNTR27_EL0, R12 // 6ceb3bd5
+ MRS PMEVCNTR28_EL0, R29 // 9deb3bd5
+ MRS PMEVCNTR29_EL0, R22 // b6eb3bd5
+ MRS PMEVCNTR30_EL0, R22 // d6eb3bd5
+ MSR R30, PMEVCNTR0_EL0 // 1ee81bd5
+ MSR R1, PMEVCNTR1_EL0 // 21e81bd5
+ MSR R20, PMEVCNTR2_EL0 // 54e81bd5
+ MSR R9, PMEVCNTR3_EL0 // 69e81bd5
+ MSR R8, PMEVCNTR4_EL0 // 88e81bd5
+ MSR R2, PMEVCNTR5_EL0 // a2e81bd5
+ MSR R30, PMEVCNTR6_EL0 // dee81bd5
+ MSR R14, PMEVCNTR7_EL0 // eee81bd5
+ MSR R1, PMEVCNTR8_EL0 // 01e91bd5
+ MSR R15, PMEVCNTR9_EL0 // 2fe91bd5
+ MSR R15, PMEVCNTR10_EL0 // 4fe91bd5
+ MSR R14, PMEVCNTR11_EL0 // 6ee91bd5
+ MSR R15, PMEVCNTR12_EL0 // 8fe91bd5
+ MSR R25, PMEVCNTR13_EL0 // b9e91bd5
+ MSR R26, PMEVCNTR14_EL0 // dae91bd5
+ MSR R21, PMEVCNTR15_EL0 // f5e91bd5
+ MSR R29, PMEVCNTR16_EL0 // 1dea1bd5
+ MSR R11, PMEVCNTR17_EL0 // 2bea1bd5
+ MSR R16, PMEVCNTR18_EL0 // 50ea1bd5
+ MSR R2, PMEVCNTR19_EL0 // 62ea1bd5
+ MSR R19, PMEVCNTR20_EL0 // 93ea1bd5
+ MSR R17, PMEVCNTR21_EL0 // b1ea1bd5
+ MSR R7, PMEVCNTR22_EL0 // c7ea1bd5
+ MSR R23, PMEVCNTR23_EL0 // f7ea1bd5
+ MSR R15, PMEVCNTR24_EL0 // 0feb1bd5
+ MSR R27, PMEVCNTR25_EL0 // 3beb1bd5
+ MSR R13, PMEVCNTR26_EL0 // 4deb1bd5
+ MSR R2, PMEVCNTR27_EL0 // 62eb1bd5
+ MSR R15, PMEVCNTR28_EL0 // 8feb1bd5
+ MSR R14, PMEVCNTR29_EL0 // aeeb1bd5
+ MSR R23, PMEVCNTR30_EL0 // d7eb1bd5
+ MRS PMEVTYPER0_EL0, R23 // 17ec3bd5
+ MRS PMEVTYPER1_EL0, R30 // 3eec3bd5
+ MRS PMEVTYPER2_EL0, R12 // 4cec3bd5
+ MRS PMEVTYPER3_EL0, R13 // 6dec3bd5
+ MRS PMEVTYPER4_EL0, R25 // 99ec3bd5
+ MRS PMEVTYPER5_EL0, R23 // b7ec3bd5
+ MRS PMEVTYPER6_EL0, R8 // c8ec3bd5
+ MRS PMEVTYPER7_EL0, R2 // e2ec3bd5
+ MRS PMEVTYPER8_EL0, R23 // 17ed3bd5
+ MRS PMEVTYPER9_EL0, R25 // 39ed3bd5
+ MRS PMEVTYPER10_EL0, R0 // 40ed3bd5
+ MRS PMEVTYPER11_EL0, R30 // 7eed3bd5
+ MRS PMEVTYPER12_EL0, R0 // 80ed3bd5
+ MRS PMEVTYPER13_EL0, R9 // a9ed3bd5
+ MRS PMEVTYPER14_EL0, R15 // cfed3bd5
+ MRS PMEVTYPER15_EL0, R13 // eded3bd5
+ MRS PMEVTYPER16_EL0, R11 // 0bee3bd5
+ MRS PMEVTYPER17_EL0, R19 // 33ee3bd5
+ MRS PMEVTYPER18_EL0, R3 // 43ee3bd5
+ MRS PMEVTYPER19_EL0, R17 // 71ee3bd5
+ MRS PMEVTYPER20_EL0, R8 // 88ee3bd5
+ MRS PMEVTYPER21_EL0, R2 // a2ee3bd5
+ MRS PMEVTYPER22_EL0, R5 // c5ee3bd5
+ MRS PMEVTYPER23_EL0, R17 // f1ee3bd5
+ MRS PMEVTYPER24_EL0, R22 // 16ef3bd5
+ MRS PMEVTYPER25_EL0, R3 // 23ef3bd5
+ MRS PMEVTYPER26_EL0, R23 // 57ef3bd5
+ MRS PMEVTYPER27_EL0, R19 // 73ef3bd5
+ MRS PMEVTYPER28_EL0, R24 // 98ef3bd5
+ MRS PMEVTYPER29_EL0, R3 // a3ef3bd5
+ MRS PMEVTYPER30_EL0, R1 // c1ef3bd5
+ MSR R20, PMEVTYPER0_EL0 // 14ec1bd5
+ MSR R20, PMEVTYPER1_EL0 // 34ec1bd5
+ MSR R14, PMEVTYPER2_EL0 // 4eec1bd5
+ MSR R26, PMEVTYPER3_EL0 // 7aec1bd5
+ MSR R11, PMEVTYPER4_EL0 // 8bec1bd5
+ MSR R16, PMEVTYPER5_EL0 // b0ec1bd5
+ MSR R29, PMEVTYPER6_EL0 // ddec1bd5
+ MSR R3, PMEVTYPER7_EL0 // e3ec1bd5
+ MSR R30, PMEVTYPER8_EL0 // 1eed1bd5
+ MSR R17, PMEVTYPER9_EL0 // 31ed1bd5
+ MSR R10, PMEVTYPER10_EL0 // 4aed1bd5
+ MSR R19, PMEVTYPER11_EL0 // 73ed1bd5
+ MSR R13, PMEVTYPER12_EL0 // 8ded1bd5
+ MSR R23, PMEVTYPER13_EL0 // b7ed1bd5
+ MSR R13, PMEVTYPER14_EL0 // cded1bd5
+ MSR R9, PMEVTYPER15_EL0 // e9ed1bd5
+ MSR R1, PMEVTYPER16_EL0 // 01ee1bd5
+ MSR R19, PMEVTYPER17_EL0 // 33ee1bd5
+ MSR R22, PMEVTYPER18_EL0 // 56ee1bd5
+ MSR R23, PMEVTYPER19_EL0 // 77ee1bd5
+ MSR R30, PMEVTYPER20_EL0 // 9eee1bd5
+ MSR R9, PMEVTYPER21_EL0 // a9ee1bd5
+ MSR R3, PMEVTYPER22_EL0 // c3ee1bd5
+ MSR R1, PMEVTYPER23_EL0 // e1ee1bd5
+ MSR R16, PMEVTYPER24_EL0 // 10ef1bd5
+ MSR R12, PMEVTYPER25_EL0 // 2cef1bd5
+ MSR R7, PMEVTYPER26_EL0 // 47ef1bd5
+ MSR R9, PMEVTYPER27_EL0 // 69ef1bd5
+ MSR R10, PMEVTYPER28_EL0 // 8aef1bd5
+ MSR R5, PMEVTYPER29_EL0 // a5ef1bd5
+ MSR R12, PMEVTYPER30_EL0 // ccef1bd5
+ MRS PMINTENCLR_EL1, R24 // 589e38d5
+ MSR R15, PMINTENCLR_EL1 // 4f9e18d5
+ MRS PMINTENSET_EL1, R1 // 219e38d5
+ MSR R4, PMINTENSET_EL1 // 249e18d5
+ MRS PMOVSCLR_EL0, R6 // 669c3bd5
+ MSR R30, PMOVSCLR_EL0 // 7e9c1bd5
+ MRS PMOVSSET_EL0, R16 // 709e3bd5
+ MSR R12, PMOVSSET_EL0 // 6c9e1bd5
+ MRS PMSELR_EL0, R30 // be9c3bd5
+ MSR R5, PMSELR_EL0 // a59c1bd5
+ MSR R27, PMSWINC_EL0 // 9b9c1bd5
+ MRS PMUSERENR_EL0, R8 // 089e3bd5
+ MSR R6, PMUSERENR_EL0 // 069e1bd5
+ MRS PMXEVCNTR_EL0, R26 // 5a9d3bd5
+ MSR R10, PMXEVCNTR_EL0 // 4a9d1bd5
+ MRS PMXEVTYPER_EL0, R4 // 249d3bd5
+ MSR R4, PMXEVTYPER_EL0 // 249d1bd5
+ MRS REVIDR_EL1, R29 // dd0038d5
+ MRS RMR_EL1, R4 // 44c038d5
+ MSR R0, RMR_EL1 // 40c018d5
+ MRS RVBAR_EL1, R7 // 27c038d5
+ MRS SCTLR_EL1, R8 // 081038d5
+ MSR R0, SCTLR_EL1 // 001018d5
+ MRS SCTLR_EL1, R30 // 1e1038d5
+ MSR R13, SCTLR_EL1 // 0d1018d5
+ MRS SPSR_EL1, R1 // 014038d5
+ MSR R2, SPSR_EL1 // 024018d5
+ MRS SPSR_EL1, R3 // 034038d5
+ MSR R14, SPSR_EL1 // 0e4018d5
+ MRS SPSR_abt, R12 // 2c433cd5
+ MSR R4, SPSR_abt // 24431cd5
+ MRS SPSR_fiq, R17 // 71433cd5
+ MSR R9, SPSR_fiq // 69431cd5
+ MRS SPSR_irq, R12 // 0c433cd5
+ MSR R23, SPSR_irq // 17431cd5
+ MRS SPSR_und, R29 // 5d433cd5
+ MSR R3, SPSR_und // 43431cd5
+ MRS SPSel, R29 // 1d4238d5
+ MSR R1, SPSel // 014218d5
+ MRS SP_EL0, R10 // 0a4138d5
+ MSR R4, SP_EL0 // 044118d5
+ MRS SP_EL1, R22 // 16413cd5
+ MSR R17, SP_EL1 // 11411cd5
+ MRS TCR_EL1, R17 // 512038d5
+ MSR R23, TCR_EL1 // 572018d5
+ MRS TCR_EL1, R14 // 4e2038d5
+ MSR R29, TCR_EL1 // 5d2018d5
+ MRS TPIDRRO_EL0, R26 // 7ad03bd5
+ MSR R16, TPIDRRO_EL0 // 70d01bd5
+ MRS TPIDR_EL0, R23 // 57d03bd5
+ MSR R5, TPIDR_EL0 // 45d01bd5
+ MRS TPIDR_EL1, R17 // 91d038d5
+ MSR R22, TPIDR_EL1 // 96d018d5
+ MRS TTBR0_EL1, R30 // 1e2038d5
+ MSR R29, TTBR0_EL1 // 1d2018d5
+ MRS TTBR0_EL1, R23 // 172038d5
+ MSR R15, TTBR0_EL1 // 0f2018d5
+ MRS TTBR1_EL1, R5 // 252038d5
+ MSR R26, TTBR1_EL1 // 3a2018d5
+ MRS TTBR1_EL1, R19 // 332038d5
+ MSR R23, TTBR1_EL1 // 372018d5
+ MRS UAO, R22 // 964238d5
+ MSR R4, UAO // 844218d5
+ MRS VBAR_EL1, R23 // 17c038d5
+ MSR R2, VBAR_EL1 // 02c018d5
+ MRS VBAR_EL1, R6 // 06c038d5
+ MSR R3, VBAR_EL1 // 03c018d5
+ MRS DISR_EL1, R12 // 2cc138d5
+ MSR R24, DISR_EL1 // 38c118d5
+ MRS MPIDR_EL1, R1 // a10038d5
+ MRS MIDR_EL1, R13 // 0d0038d5
+ MRS ZCR_EL1, R24 // 181238d5
+ MSR R13, ZCR_EL1 // 0d1218d5
+ MRS ZCR_EL1, R23 // 171238d5
+ MSR R17, ZCR_EL1 // 111218d5
+ END
diff --git a/src/cmd/asm/internal/asm/testdata/arm64enc.s b/src/cmd/asm/internal/asm/testdata/arm64enc.s
new file mode 100644
index 0000000..a298628
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/arm64enc.s
@@ -0,0 +1,766 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The cases are auto-generated by disassembler.
+// The uncommented cases means they can be handled by assembler
+// and they are consistent with disassembler decoding.
+// TODO means they cannot be handled by current assembler.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT asmtest(SB),DUPOK|NOSPLIT,$-8
+
+ AND $(1<<63), R1 // AND $-9223372036854775808, R1 // 21004192
+ ADCW ZR, R8, R10 // 0a011f1a
+ ADC R0, R2, R12 // 4c00009a
+ ADCSW R9, R21, R6 // a602093a
+ ADCS R23, R22, R22 // d60217ba
+ ADDW R5.UXTH, R8, R9 // 0921250b
+ ADD R8.SXTB<<3, R23, R14 // ee8e288b
+ ADDW $3076, R17, R3 // 23123011
+ ADDW $(3076<<12), R17, R3 // ADDW $12599296, R17, R3 // 23127011
+ ADD $2280, R25, R11 // 2ba32391
+ ADD $(2280<<12), R25, R11 // ADD $9338880, R25, R11 // 2ba36391
+ ADDW R13->5, R11, R7 // 67158d0b
+ ADD R25<<54, R17, R16 // 30da198b
+ ADDSW R12.SXTX<<1, R29, R7 // a7e72c2b
+ ADDS R24.UXTX<<4, R25, R21 // 357338ab
+ ADDSW $(3525<<12), R3, R11 // ADDSW $14438400, R3, R11 // 6b147731
+ ADDS $(3525<<12), R3, R11 // ADDS $14438400, R3, R11 // 6b1477b1
+ ADDSW R7->22, R14, R13 // cd59872b
+ ADDS R14>>7, ZR, R4 // e41f4eab
+ AND $-9223372036854775808, R1, R1 // 21004192
+ ANDW $4026540031, R29, R2 // a2430412
+ AND $34903429696192636, R12, R19 // 93910e92
+ ANDW R9@>7, R19, R26 // 7a1ec90a
+ AND R9@>7, R19, R26 // 7a1ec98a
+ TSTW $2863311530, R24 // 1ff30172
+ TST R2, R0 // 1f0002ea
+ TST $7, R2 // 5f0840f2
+ ANDS R2, R0, ZR // 1f0002ea
+ ANDS $7, R2, ZR // 5f0840f2
+ ANDSW $2863311530, R24, ZR // 1ff30172
+ ANDSW $2863311530, R24, R23 // 17f30172
+ ANDS $-140737488289793, R2, R5 // 458051f2
+ ANDSW R26->24, R21, R15 // af629a6a
+ ANDS R30@>44, R3, R26 // 7ab0deea
+ ASRW R12, R27, R25 // 792bcc1a
+ ASR R14, R27, R7 // 672bce9a
+ ASR $11, R27, R25 // 79ff4b93
+ ASRW $11, R27, R25 // 797f0b13
+ BLT -1(PC) // ebffff54
+ JMP -1(PC) // ffffff17
+ BFIW $16, R20, $6, R0 // 80161033
+ BFI $27, R21, $21, R25 // b95265b3
+ BFXILW $3, R27, $23, R14 // 6e670333
+ BFXIL $26, R8, $16, R20 // 14a55ab3
+ BICW R7@>15, R5, R16 // b03ce70a
+ BIC R12@>13, R12, R19 // 9335ec8a
+ BICSW R25->20, R3, R20 // 7450b96a
+ BICS R19->12, R1, R23 // 3730b3ea
+ BICS R19, R1, R23 // 370033ea
+ BICS R19>>0, R1, R23 // 370073ea
+ CALL -1(PC) // ffffff97
+ CALL (R15) // e0013fd6
+ JMP (R29) // a0031fd6
+ BRK $35943 // e08c31d4
+ CBNZW R2, -1(PC) // e2ffff35
+ CBNZ R7, -1(PC) // e7ffffb5
+ CBZW R15, -1(PC) // efffff34
+ CBZ R1, -1(PC) // e1ffffb4
+ CCMN MI, ZR, R1, $4 // e44341ba
+ CCMNW AL, R26, $20, $11 // 4beb543a
+ CCMN PL, R24, $6, $1 // 015b46ba
+ CCMNW EQ, R20, R6, $6 // 8602463a
+ CCMN LE, R30, R12, $6 // c6d34cba
+ CCMPW VS, R29, $15, $7 // a76b4f7a
+ CCMP LE, R7, $19, $3 // e3d853fa
+ CCMPW HS, R19, R6, $0 // 6022467a
+ CCMP LT, R30, R6, $7 // c7b346fa
+ CCMN MI, ZR, R1, $4 // e44341ba
+ CSINCW HS, ZR, R27, R14 // ee279b1a
+ CSINC VC, R2, R1, R1 // 4174819a
+ CSINVW EQ, R2, R21, R17 // 5100955a
+ CSINV LO, R2, R19, R23 // 573093da
+ CINCW LO, R27, R14 // 6e279b1a
+ CINCW HS, R27, ZR // 7f379b1a
+ CINVW EQ, R2, R17 // 5110825a
+ CINV VS, R12, R7 // 87718cda
+ CINV VS, R30, R30 // de739eda
+ CLREX $4 // 5f3403d5
+ CLREX $0 // 5f3003d5
+ CLSW R15, R6 // e615c05a
+ CLS R15, ZR // ff15c0da
+ CLZW R1, R14 // 2e10c05a
+ CLZ R21, R9 // a912c0da
+ CMNW R21.UXTB<<4, R15 // ff11352b
+ CMN R0.UXTW<<4, R16 // 1f5220ab
+ CMNW R13>>8, R9 // 3f214d2b
+ CMN R6->17, R3 // 7f4486ab
+ CMNW $(2<<12), R5 // CMNW $8192, R5 // bf084031
+ CMN $(8<<12), R12 // CMN $32768, R12 // 9f2140b1
+ CMN R6->0, R3 // 7f0086ab
+ CMN R6, R3 // 7f0006ab
+ CMNW R30, R5 // bf001e2b
+ CMNW $2, R5 // bf080031
+ CMN ZR, R3 // 7f001fab
+ CMN R0, R3 // 7f0000ab
+ CMPW R6.UXTB, R23 // ff02266b
+ CMP R25.SXTH<<2, R26 // 5fab39eb
+ CMP $3817, R29 // bfa73bf1
+ CMP R7>>23, R3 // 7f5c47eb
+ CNEGW PL, R9, R14 // 2e45895a
+ CSNEGW HS, R5, R9, R14 // ae24895a
+ CSNEG PL, R14, R21, R3 // c35595da
+ CNEG LO, R7, R15 // ef2487da
+ CRC32B R17, R8, R16 // 1041d11a
+ CRC32H R3, R21, R27 // bb46c31a
+ CRC32W R22, R30, R9 // c94bd61a
+ CRC32X R20, R4, R15 // 8f4cd49a
+ CRC32CB R19, R27, R22 // 7653d31a
+ CRC32CH R21, R0, R20 // 1454d51a
+ CRC32CW R9, R3, R21 // 7558c91a
+ CRC32CX R11, R0, R24 // 185ccb9a
+ CSELW LO, R4, R20, R12 // 8c30941a
+ CSEL GE, R0, R12, R14 // 0ea08c9a
+ CSETW GE, R3 // e3b79f1a
+ CSET LT, R30 // fea79f9a
+ CSETMW VC, R5 // e5639f5a
+ CSETM VS, R4 // e4739fda
+ CSINCW LE, R5, R24, R26 // bad4981a
+ CSINC VS, R26, R16, R17 // 5167909a
+ CSINVW AL, R23, R21, R5 // e5e2955a
+ CSINV LO, R2, R11, R14 // 4e308bda
+ CSNEGW HS, R16, R29, R10 // 0a269d5a
+ CSNEG NE, R21, R19, R11 // ab1693da
+ //TODO DC
+ DCPS1 $11378 // 418ea5d4
+ DCPS2 $10699 // 6239a5d4
+ DCPS3 $24415 // e3ebabd4
+ DMB $1 // bf3103d5
+ DMB $0 // bf3003d5
+ DRPS // e003bfd6
+ DSB $1 // 9f3103d5
+ EONW R21<<29, R6, R9 // c974354a
+ EON R14>>46, R4, R9 // 89b86eca
+ EOR $-2287828610704211969, R27, R22 // 76e343d2
+ EORW R12->27, R10, R19 // 536d8c4a
+ EOR R2<<59, R30, R17 // d1ef02ca
+ ERET // e0039fd6
+ EXTRW $7, R8, R10, R25 // 591d8813
+ EXTR $35, R22, R12, R8 // 888dd693
+ SEVL // bf2003d5
+ HINT $6 // df2003d5
+ HINT $0 // 1f2003d5
+ HLT $65509 // a0fc5fd4
+ HVC $61428 // 82fe1dd4
+ ISB $1 // df3103d5
+ ISB $15 // df3f03d5
+ LDARW (R12), R29 // 9dfddf88
+ LDARW (R30), R22 // d6ffdf88
+ LDARW (RSP), R22 // f6ffdf88
+ LDAR (R27), R22 // 76ffdfc8
+ LDARB (R25), R2 // 22ffdf08
+ LDARH (R5), R7 // a7fcdf48
+ LDAXPW (R10), (R20, R16) // 54c17f88
+ LDAXP (R25), (R30, R11) // 3eaf7fc8
+ LDAXRW (R15), R2 // e2fd5f88
+ LDAXR (R15), R21 // f5fd5fc8
+ LDAXRB (R19), R16 // 70fe5f08
+ LDAXRH (R5), R8 // a8fc5f48
+ //TODO LDNP 0xcc(RSP), ZR, R12 // ecff5928
+ //TODO LDNP 0x40(R28), R9, R5 // 852744a8
+ //TODO LDPSW -0xd0(R2), R0, R12 // 4c00e668
+ //TODO LDPSW 0x5c(R4), R8, R5 // 85a0cb69
+ //TODO LDPSW 0x6c(R12), R2, R27 // 9b894d69
+ MOVWU.P -84(R15), R9 // e9c55ab8
+ MOVD.P -46(R10), R8 // 48255df8
+ MOVD.P (R10), R8 // 480540f8
+ MOVWU.W -141(R3), R16 // 703c57b8
+ MOVD.W -134(R0), R29 // 1dac57f8
+ MOVWU 4156(R1), R25 // 393c50b9
+ MOVD 14616(R10), R9 // 498d5cf9
+ MOVWU (R4)(R12.SXTW<<2), R7 // 87d86cb8
+ MOVD (R7)(R11.UXTW<<3), R25 // f9586bf8
+ MOVBU.P 42(R2), R12 // 4ca44238
+ MOVBU.W -27(R2), R14 // 4e5c5e38
+ MOVBU 2916(R24), R3 // 03936d39
+ MOVBU (R19)(R14<<0), R23 // 777a6e38
+ MOVBU (R2)(R8.SXTX), R19 // 53e86838
+ MOVBU (R27)(R23), R14 // 6e6b7738
+ MOVHU.P 107(R14), R13 // cdb54678
+ MOVHU.W 192(R3), R2 // 620c4c78
+ MOVHU 6844(R4), R19 // 93787579
+ MOVHU (R5)(R25.SXTW), R15 // afc87978
+ //TODO MOVBW.P 77(R19), R11 // 6bd6c438
+ MOVB.P 36(RSP), R27 // fb478238
+ //TODO MOVBW.W -57(R19), R13 // 6d7edc38
+ MOVB.W -178(R16), R24 // 18ee9438
+ //TODO MOVBW 430(R8), R22 // 16b9c639
+ MOVB 997(R9), R23 // 37958f39
+ //TODO MOVBW (R2<<1)(R21), R15 // af7ae238
+ //TODO MOVBW (R26)(R0), R21 // 1568fa38
+ MOVB (R5)(R15), R16 // b068af38
+ MOVB (R19)(R26.SXTW), R19 // 73caba38
+ MOVB (R29)(R30), R14 // ae6bbe38
+ //TODO MOVHW.P 218(R22), R25 // d9a6cd78
+ MOVH.P 179(R23), R5 // e5368b78
+ //TODO MOVHW.W 136(R2), R27 // 5b8cc878
+ MOVH.W -63(R25), R22 // 361f9c78
+ //TODO MOVHW 5708(R25), R21 // 359bec79
+ MOVH 54(R2), R13 // 4d6c8079
+ //TODO MOVHW (R22)(R24.SXTX), R4 // c4eaf878
+ MOVH (R26)(R30.UXTW<<1), ZR // 5f5bbe78
+ MOVW.P -58(R16), R2 // 02669cb8
+ MOVW.W -216(R19), R8 // 688e92b8
+ MOVW 4764(R23), R10 // ea9e92b9
+ MOVW (R8)(R3.UXTW), R17 // 1149a3b8
+ //TODO LDTR -0x1e(R3), R4 // 64285eb8
+ //TODO LDTR -0xe5(R3), R10 // 6ab851f8
+ //TODO LDTRB 0xf0(R13), R10 // aa094f38
+ //TODO LDTRH 0xe8(R13), R23 // b7894e78
+ //TODO LDTRSB -0x24(R20), R5 // 85cadd38
+ //TODO LDTRSB -0x75(R9), R13 // 2db99838
+ //TODO LDTRSH 0xef(R3), LR // 7ef8ce78
+ //TODO LDTRSH 0x96(R19), R24 // 786a8978
+ //TODO LDTRSW 0x1e(LR), R5 // c5eb81b8
+ //TODO LDUR 0xbf(R13), R1 // a1f14bb8
+ //TODO LDUR -0x3c(R22), R3 // c3425cf8
+ //TODO LDURB -0xff(R17), R14 // 2e125038
+ //TODO LDURH 0x80(R1), R6 // 26004878
+ //TODO LDURSB 0xde(LR), R3 // c3e3cd38
+ //TODO LDURSB 0x96(R9), R7 // 27618938
+ //TODO LDURSH -0x49(R11), R28 // 7c71db78
+ //TODO LDURSH -0x1f(R0), R29 // 1d109e78
+ //TODO LDURSW 0x48(R6), R20 // d48084b8
+ LDXPW (R24), (R23, R11) // 172f7f88
+ LDXP (R0), (R16, R13) // 10347fc8
+ LDXRW (RSP), R30 // fe7f5f88
+ LDXR (R27), R12 // 6c7f5fc8
+ LDXRB (R0), R4 // 047c5f08
+ LDXRH (R12), R26 // 9a7d5f48
+ LSLW R11, R10, R15 // 4f21cb1a
+ LSL R27, R24, R21 // 1523db9a
+ LSLW $5, R7, R22 // f6681b53
+ LSL $57, R17, R2 // 221a47d3
+ LSRW R9, R3, R12 // 6c24c91a
+ LSR R10, R5, R2 // a224ca9a
+ LSRW $1, R3, R16 // 707c0153
+ LSR $12, R1, R20 // 34fc4cd3
+ MADDW R13, R23, R3, R10 // 6a5c0d1b
+ MADD R5, R23, R10, R4 // 445d059b
+ MNEGW R0, R9, R21 // 35fd001b
+ MNEG R14, R27, R23 // 77ff0e9b
+ MOVD R2, R7 // e70302aa
+ MOVW $-24, R20 // f4028012
+ MOVD $-51096, ZR // fff29892
+ MOVW $2507014144, R20 // d4adb252
+ MOVD $1313925191285342208, R7 // 8747e2d2
+ ORRW $16252928, ZR, R21 // f5130d32
+ MOVD $-4260607558625, R11 // eb6b16b2
+ MOVD R30, R7 // e7031eaa
+ MOVKW $(3905<<0), R21 // MOVKW $3905, R21 // 35e88172
+ MOVKW $(3905<<16), R21 // MOVKW $255918080, R21 // 35e8a172
+ MOVK $(3905<<32), R21 // MOVK $16771847290880, R21 // 35e8c1f2
+ MOVD $0, R5 // 050080d2
+ MSR $1, SPSel // bf4100d5
+ MSR $9, DAIFSet // df4903d5
+ MSR $6, DAIFClr // ff4603d5
+ MRS ELR_EL1, R8 // 284038d5
+ MSR R16, ELR_EL1 // 304018d5
+ MRS DCZID_EL0, R3 // e3003bd5
+ MSUBW R1, R1, R12, R5 // 8585011b
+ MSUB R19, R16, R26, R2 // 42c3139b
+ MULW R26, R5, R22 // b67c1a1b
+ MUL R4, R3, R0 // 607c049b
+ MVNW R3@>13, R8 // e837e32a
+ MVN R13>>31, R9 // e97f6daa
+ NEGSW R23<<1, R30 // fe07176b
+ NEGS R20>>35, R22 // f68f54eb
+ NGCW R13, R8 // e8030d5a
+ NGC R2, R7 // e70302da
+ NGCSW R10, R5 // e5030a7a
+ NGCS R24, R16 // f00318fa
+ NOOP // 1f2003d5
+ ORNW R4@>11, R16, R3 // 032ee42a
+ ORN R22@>19, R3, R3 // 634cf6aa
+ ORRW $4294443071, R15, R24 // f8490d32
+ ORR $-3458764513820540929, R12, R22 // 96f542b2
+ ORRW R13<<4, R8, R26 // 1a110d2a
+ ORR R3<<22, R5, R6 // a65803aa
+ PRFM (R8), $25 // 190180f9
+ PRFM (R2), PLDL1KEEP // 400080f9
+ //TODO PRFM (R27)(R30.SXTW<<3), PLDL2STRM // 63dbbff8
+ //TODO PRFUM 22(R16), PSTL1KEEP // 106281f8
+ RBITW R9, R22 // 3601c05a
+ RBIT R11, R4 // 6401c0da
+ RET // c0035fd6
+ REVW R8, R10 // 0a09c05a
+ REV R1, R2 // 220cc0da
+ REV16W R21, R19 // b306c05a
+ REV16 R25, R4 // 2407c0da
+ REV32 R27, R21 // 750bc0da
+ EXTRW $27, R4, R25, R19 // 336f8413
+ EXTR $17, R10, R29, R15 // af47ca93
+ ROR $14, R14, R15 // cf39ce93
+ RORW $28, R14, R15 // cf718e13
+ RORW R3, R12, R3 // 832dc31a
+ ROR R0, R23, R2 // e22ec09a
+ SBCW R4, R8, R24 // 1801045a
+ SBC R25, R10, R26 // 5a0119da
+ SBCSW R27, R19, R19 // 73021b7a
+ SBCS R5, R9, R5 // 250105fa
+ SBFIZW $9, R10, $18, R22 // 56451713
+ SBFIZ $6, R11, $15, R20 // 74397a93
+ SBFXW $8, R15, $10, R20 // f4450813
+ SBFX $2, R27, $54, R7 // 67df4293
+ SDIVW R22, R14, R9 // c90dd61a
+ SDIV R13, R21, R9 // a90ecd9a
+ SEV // 9f2003d5
+ SEVL // bf2003d5
+ SMADDL R3, R7, R11, R9 // 691d239b
+ SMSUBL R5, R19, R11, R29 // 7dcd259b
+ SMNEGL R26, R3, R15 // 6ffc3a9b
+ SMULH R17, R21, R21 // b57e519b
+ SMULL R0, R5, R0 // a07c209b
+ SMC $37977 // 238b12d4
+ STLRW R16, (R22) // d0fe9f88
+ STLR R3, (R24) // 03ff9fc8
+ STLRB R11, (R22) // cbfe9f08
+ STLRH R16, (R23) // f0fe9f48
+ STLXR R7, (R27), R8 // 67ff08c8
+ STLXRW R13, (R15), R14 // edfd0e88
+ STLXRB R24, (R23), R8 // f8fe0808
+ STLXRH R19, (R27), R11 // 73ff0b48
+ STLXP (R6, R3), (R10), R2 // 468d22c8
+ STLXPW (R6, R11), (R22), R21 // c6ae3588
+ //TODO STNPW 44(R1), R3, R10 // 2a8c0528
+ //TODO STNP 0x108(R3), ZR, R7 // 67fc10a8
+ LDP.P -384(R3), (R22, R26) // 7668e8a8
+ LDP.W 280(R8), (R19, R11) // 13add1a9
+ STP.P (R22, R27), 352(R0) // 166c96a8
+ STP.W (R17, R11), 96(R8) // 112d86a9
+ MOVW.P R20, -28(R1) // 34441eb8
+ MOVD.P R17, 191(R16) // 11f60bf8
+ MOVW.W R1, -171(R14) // c15d15b8
+ MOVD.W R14, -220(R13) // ae4d12f8
+ MOVW R3, 14828(R24) // 03ef39b9
+ MOVD R0, 20736(R17) // 208228f9
+ MOVB.P ZR, -117(R7) // ffb41838
+ MOVB.W R27, -96(R13) // bb0d1a38
+ MOVB R17, 2200(R13) // b1612239
+ MOVH.P R7, -72(R4) // 87841b78
+ MOVH.W R12, -125(R14) // cc3d1878
+ MOVH R19, 3686(R26) // 53cf1c79
+ MOVW R21, 34(R0) // 152002b8
+ MOVD R25, -137(R17) // 397217f8
+ MOVW R4, (R12)(R22.UXTW<<2) // 845936b8
+ MOVD R27, (R5)(R15.UXTW<<3) // bb582ff8
+ MOVB R2, (R10)(R16) // 42693038
+ MOVB R2, (R29)(R26) // a26b3a38
+ MOVH R11, -80(R23) // eb021b78
+ MOVH R11, (R27)(R14.SXTW<<1) // 6bdb2e78
+ MOVB R19, (R0)(R4) // 13682438
+ MOVB R1, (R6)(R4) // c1682438
+ MOVH R3, (R11)(R13<<1) // 63792d78
+ //TODO STTR 55(R4), R29 // 9d7803b8
+ //TODO STTR 124(R5), R25 // b9c807f8
+ //TODO STTRB -28(R23), R16 // f04a1e38
+ //TODO STTRH 9(R10), R19 // 53990078
+ STXP (R1, R2), (R3), R10 // 61082ac8
+ STXP (R1, R2), (RSP), R10 // e10b2ac8
+ STXPW (R1, R2), (R3), R10 // 61082a88
+ STXPW (R1, R2), (RSP), R10 // e10b2a88
+ STXRW R2, (R19), R20 // 627e1488
+ STXR R15, (R21), R13 // af7e0dc8
+ STXRB R7, (R9), R24 // 277d1808
+ STXRH R12, (R3), R8 // 6c7c0848
+ SUBW R20.UXTW<<2, R23, R19 // f34a344b
+ SUB R5.SXTW<<2, R1, R26 // 3ac825cb
+ SUB $(1923<<12), R4, R27 // SUB $7876608, R4, R27 // 9b0c5ed1
+ SUBW $(1923<<12), R4, R27 // SUBW $7876608, R4, R27 // 9b0c5e51
+ SUBW R12<<29, R7, R8 // e8740c4b
+ SUB R12<<61, R7, R8 // e8f40ccb
+ SUBSW R2.SXTH<<3, R13, R6 // a6ad226b
+ SUBS R21.UXTX<<2, R27, R4 // 646b35eb
+ SUBSW $(44<<12), R6, R9 // SUBSW $180224, R6, R9 // c9b04071
+ SUBS $(1804<<12), R13, R9 // SUBS $7389184, R13, R9 // a9315cf1
+ SUBSW R22->28, R6, R7 // c770966b
+ SUBSW R22>>28, R6, R7 // c770566b
+ SUBS R26<<15, R6, R16 // d03c1aeb
+ SVC $0 // 010000d4
+ SVC $7165 // a17f03d4
+ SXTBW R8, R25 // 191d0013
+ SXTB R13, R9 // a91d4093
+ SXTHW R8, R8 // 083d0013
+ SXTH R17, R25 // 393e4093
+ SXTW R0, R27 // 1b7c4093
+ SYSL $285440, R12 // 0c5b2cd5
+ //TODO TLBI
+ TSTW $0x80000007, R9 // TSTW $2147483655, R9 // 3f0d0172
+ TST $0xfffffff0, LR // TST $4294967280, R30 // df6f7cf2
+ TSTW R10@>21, R2 // 5f54ca6a
+ TST R17<<11, R24 // 1f2f11ea
+ ANDSW $0x80000007, R9, ZR // ANDSW $2147483655, R9, ZR // 3f0d0172
+ ANDS $0xfffffff0, LR, ZR // ANDS $4294967280, R30, ZR // df6f7cf2
+ ANDSW R10@>21, R2, ZR // 5f54ca6a
+ ANDS R17<<11, R24, ZR // 1f2f11ea
+ UBFIZW $3, R19, $14, R14 // 6e361d53
+ UBFIZ $3, R22, $14, R4 // c4367dd3
+ UBFXW $3, R7, $20, R15 // ef580353
+ UBFX $33, R17, $25, R5 // 25e661d3
+ UDIVW R8, R21, R15 // af0ac81a
+ UDIV R2, R19, R21 // 750ac29a
+ UMADDL R0, R20, R17, R17 // 3152a09b
+ UMSUBL R22, R4, R3, R7 // 6790b69b
+ UMNEGL R3, R19, R1 // 61fea39b
+ UMULH R24, R20, R24 // 987ed89b
+ UMULL R19, R22, R19 // d37eb39b
+ UXTBW R2, R6 // 461c0053
+ UXTHW R7, R20 // f43c0053
+ VCNT V0.B8, V0.B8 // 0058200e
+ VCNT V0.B16, V0.B16 // 0058204e
+ WFE // 5f2003d5
+ WFI // 7f2003d5
+ YIELD // 3f2003d5
+ //TODO FABD F0, F5, F11 // abd4a07e
+ //TODO VFABD V30.S2, V8.S2, V24.S2 // 18d5be2e
+ //TODO VFABS V5.S4, V24.S4 // b8f8a04e
+ FABSS F2, F28 // 5cc0201e
+ FABSD F0, F14 // 0ec0601e
+ //TODO FACGE F25, F16, F0 // 00ee797e
+ //TODO VFACGE V11.S2, V15.S2, V9.S2 // e9ed2b2e
+ //TODO FACGT F20, F16, F27 // 1beef47e
+ //TODO VFACGT V15.S4, V25.S4, V22.S4 // 36efaf6e
+ //TODO VFADD V21.D2, V10.D2, V21.D2 // 55d5754e
+ FADDS F12, F2, F10 // 4a282c1e
+ FADDD F24, F14, F12 // cc29781e
+ //TODO VFADDP V4.D2, F13 // 8dd8707e
+ //TODO VFADDP V30.S4, V3.S4, V11.S4 // 6bd43e6e
+ FCCMPS LE, F17, F12, $14 // 8ed5311e
+ FCCMPD HI, F11, F15, $15 // ef856b1e
+ FCCMPES HS, F28, F13, $13 // bd253c1e
+ FCCMPED LT, F20, F4, $9 // 99b4741e
+ //TODO FCMEQ F7, F11, F26 // 7ae5675e
+ //TODO VFCMEQ V29.S4, V26.S4, V30.S4 // 5ee73d4e
+ //TODO FCMEQ $0, F17, F22 // 36daa05e
+ //TODO VFCMEQ $0, V17.D2, V22.D2 // 36dae04e
+ //TODO FCMGE F29, F31, F13 // ede77d7e
+ //TODO VFCMGE V8.S2, V31.S2, V2.S2 // e2e7282e
+ //TODO FCMGE $0, F18, F27 // e2e7282e
+ //TODO VFCMGE $0, V14.S2, V8.S2 // c8c9a02e
+ //TODO FCMGT F20, F2, F8 // 48e4b47e
+ //TODO VFCMGT V26.D2, V15.D2, V23.D2 // f7e5fa6e
+ //TODO FCMGT $0, F14, F3 // c3c9e05e
+ //TODO VFCMGT $0, V6.S2, V28.S2 // dcc8a00e
+ //TODO FCMLE $0, F26, F25 // 59dba07e
+ //TODO VFCMLE $0, V28.S2, V20.S2 // 94dba02e
+ //TODO FCMLT $0, F17, F3 // 23eae05e
+ //TODO VFCMLT $0, V8.S4, V7.S4 // 07e9a04e
+ FCMPS F3, F17 // 2022231e
+ FCMPS $(0.0), F8 // 0821201e
+ FCMPD F11, F27 // 60236b1e
+ FCMPD $(0.0), F25 // 2823601e
+ FCMPES F16, F30 // d023301e
+ FCMPES $(0.0), F29 // b823201e
+ FCMPED F13, F10 // 50216d1e
+ FCMPED $(0.0), F25 // 3823601e
+ FCSELS EQ, F26, F27, F25 // 590f3b1e
+ FCSELD PL, F8, F22, F7 // 075d761e
+ //TODO FCVTAS F4, F28 // 9cc8215e
+ //TODO VFCVTAS V21.D2, V27.D2 // bbca614e
+ //TODO FCVTAS F27, R7 // 6703241e
+ //TODO FCVTAS F19, R26 // 7a02249e
+ //TODO FCVTAS F4, R0 // 8000641e
+ //TODO FCVTAS F3, R19 // 7300649e
+ //TODO FCVTAU F18, F28 // 5cca217e
+ //TODO VFCVTAU V30.S4, V27.S4 // dbcb216e
+ //TODO FCVTAU F0, R2 // 0200251e
+ //TODO FCVTAU F0, R24 // 1800259e
+ //TODO FCVTAU F31, R10 // ea03651e
+ //TODO FCVTAU F3, R8 // 6800659e
+ //TODO VFCVTL V11.S2, V21.D2 // 7579610e
+ //TODO VFCVTL2 V15.H8, V25.S4 // f979214e
+ //TODO FCVTMS F21, F28 // bcba215e
+ //TODO VFCVTMS V5.D2, V2.D2 // a2b8614e
+ //TODO FCVTMS F31, R19 // f303301e
+ //TODO FCVTMS F23, R16 // f002309e
+ //TODO FCVTMS F16, R22 // 1602701e
+ //TODO FCVTMS F14, R19 // d301709e
+ //TODO FCVTMU F14, F8 // c8b9217e
+ //TODO VFCVTMU V7.D2, V1.D2 // e1b8616e
+ //TODO FCVTMU F2, R0 // 4000311e
+ //TODO FCVTMU F23, R19 // f302319e
+ //TODO FCVTMU F16, R17 // 1102711e
+ //TODO FCVTMU F12, R19 // 9301719e
+ //TODO VFCVTN V23.D2, V26.S2 // fa6a610e
+ //TODO VFCVTN2 V2.D2, V31.S4 // 5f68614e
+ //TODO FCVTNS F3, F27 // 7ba8215e
+ //TODO VFCVTNS V11.S2, V12.S2 // 6ca9210e
+ //TODO FCVTNS F14, R9 // c901201e
+ //TODO FCVTNS F0, R27 // 1b00209e
+ //TODO FCVTNS F23, R0 // e002601e
+ //TODO FCVTNS F6, R30 // de00609e
+ //TODO FCVTNU F12, F9 // 89a9217e
+ //TODO VFCVTNU V3.D2, V20.D2 // 74a8616e
+ //TODO FCVTNU F20, R11 // 8b02211e
+ //TODO FCVTNU F23, R19 // f302219e
+ //TODO FCVTNU F4, R5 // 8500611e
+ //TODO FCVTNU F11, R19 // 7301619e
+ //TODO FCVTPS F20, F26 // 9aaae15e
+ //TODO VFCVTPS V29.S4, V13.S4 // adaba14e
+ //TODO FCVTPS F5, R29 // bd00281e
+ //TODO FCVTPS F3, R3 // 6300289e
+ //TODO FCVTPS F4, R25 // 9900681e
+ //TODO FCVTPS F29, R15 // af03689e
+ //TODO FCVTPU F13, F3 // a3a9e17e
+ //TODO VFCVTPU V6.S4, V24.S4 // d8a8a16e
+ //TODO FCVTPU F17, R17 // 3102291e
+ //TODO FCVTPU F7, R23 // f700299e
+ //TODO FCVTPU F10, R3 // 4301691e
+ //TODO FCVTPU F24, R27 // 1b03699e
+ //TODO FCVTXN F14, F0 // c069617e
+ //TODO VFCVTXN V1.D2, V17.S2 // 3168612e
+ //TODO VFCVTXN2 V0.D2, V21.S4 // 1568616e
+ //TODO FCVTZS $26, F29, F19 // b3ff665f
+ //TODO VFCVTZS $45, V14.D2, V18.D2 // d2fd534f
+ //TODO FCVTZS F8, F7 // 07b9a15e
+ //TODO VFCVTZS V2.S2, V4.S2 // 44b8a10e
+ //TODO FCVTZS $26, F7, R11 // eb98181e
+ //TODO FCVTZS $7, F4, ZR // 9fe4189e
+ //TODO FCVTZS $28, F13, R14 // ae91581e
+ //TODO FCVTZS $8, F27, R3 // 63e3589e
+ FCVTZSSW F7, R15 // ef00381e
+ FCVTZSS F16, ZR // 1f02389e
+ FCVTZSDW F19, R3 // 6302781e
+ FCVTZSD F7, R7 // e700789e
+ //TODO FCVTZU $17, F18, F28 // 5cfe2f7f
+ //TODO VFCVTZU $19, V20.D2, V11.D2 // 8bfe6d6f
+ //TODO FCVTZU F22, F8 // c8bae17e
+ //TODO VFCVTZU V0.S4, V1.S4 // 01b8a16e
+ //TODO FCVTZU $14, F24, R20 // 14cb191e
+ //TODO FCVTZU $6, F25, R17 // 31eb199e
+ //TODO FCVTZU $5, F17, R10 // 2aee591e
+ //TODO FCVTZU $6, F7, R19 // f3e8599e
+ FCVTZUSW F2, R9 // 4900391e
+ FCVTZUS F12, R29 // 9d01399e
+ FCVTZUDW F27, R22 // 7603791e
+ FCVTZUD F25, R22 // 3603799e
+ //TODO VFDIV V6.D2, V1.D2, V27.D2 // 3bfc666e
+ FDIVS F16, F10, F20 // 5419301e
+ FDIVD F11, F25, F30 // 3e1b6b1e
+ FMADDS F15, F2, F8, F1 // 01090f1f
+ FMADDD F15, F21, F25, F9 // 29574f1f
+ //TODO VFMAX V23.D2, V27.D2, V14.D2 // 6ef7774e
+ FMAXS F5, F28, F27 // 9b4b251e
+ FMAXD F12, F31, F31 // ff4b6c1e
+ //TODO VFMAXNM V3.D2, V12.D2, V27.D2 // 9bc5634e
+ FMAXNMS F11, F24, F12 // 0c6b2b1e
+ FMAXNMD F20, F6, F16 // d068741e
+ //TODO VFMAXNMP V3.S2, F2 // 62c8307e
+ //TODO VFMAXNMP V25.S2, V4.S2, V2.S2 // 82c4392e
+ //TODO VFMAXNMV V14.S4, F15 // cfc9306e
+ //TODO VFMAXP V3.S2, F27 // 7bf8307e
+ //TODO VFMAXP V29.S2, V30.S2, V9.S2 // c9f73d2e
+ //TODO VFMAXV V13.S4, F14 // aef9306e
+ //TODO VFMIN V19.D2, V30.D2, V7.D2 // c7f7f34e
+ FMINS F26, F18, F30 // 5e5a3a1e
+ FMIND F29, F4, F21 // 95587d1e
+ //TODO VFMINNM V21.S4, V5.S4, V1.S4 // a1c4b54e
+ FMINNMS F23, F20, F1 // 817a371e
+ FMINNMD F8, F3, F24 // 7878681e
+ //TODO VFMINNMP V16.D2, F12 // 0ccaf07e
+ //TODO VFMINNMP V10.S4, V25.S4, V27.S4 // 3bc7aa6e
+ //TODO VFMINNMV V8.S4, F3 // 03c9b06e
+ //TODO VFMINP V10.S2, F20 // 54f9b07e
+ //TODO VFMINP V1.D2, V10.D2, V3.D2 // 43f5e16e
+ //TODO VFMINV V11.S4, F9 // 69f9b06e
+ //TODO VFMLA V6.S[0], F2, F14 // 4e10865f
+ //TODO VFMLA V28.S[2], V2.S2, V30.S2 // 5e189c0f
+ VFMLA V29.S2, V20.S2, V14.S2 // 8ece3d0e
+ //TODO VFMLS V24.D[1], F3, F17 // 7158d85f
+ //TODO VFMLS V10.S[0], V11.S2, V10.S2 // 6a518a0f
+ VFMLS V29.S2, V27.S2, V17.S2 // 71cfbd0e
+ //TODO FMOVS $(-1.625), F13 // 0d503f1e
+ //TODO FMOVD $12.5, F30 // 1e30651e
+ //TODO VFMOV R7, V25.D[1] // f900af9e
+ FMOVD F2, R15 // 4f00669e
+ FMOVD R3, F11 // 6b00679e
+ FMOVS F20, R29 // 9d02261e
+ FMOVS R8, F15 // 0f01271e
+ FMOVD F2, F9 // 4940601e
+ FMOVS F4, F27 // 9b40201e
+ //TODO VFMOV $3.125, V8.D2 // 28f5006f
+ FMSUBS F13, F21, F13, F19 // b3d50d1f
+ FMSUBD F11, F7, F15, F31 // ff9d4b1f
+ //TODO VFMUL V9.S[2], F21, F19 // b39a895f
+ //TODO VFMUL V26.S[2], V26.S2, V2.S2 // 429b9a0f
+ //TODO VFMUL V21.D2, V17.D2, V25.D2 // 39de756e
+ FMULS F0, F6, F24 // d808201e
+ FMULD F5, F29, F9 // a90b651e
+ //TODO VFMULX V26.S[2], F20, F8 // 889a9a7f
+ //TODO VFMULX V12.D[1], V21.D2, V31.D2 // bf9acc6f
+ //TODO FMULX F16, F1, F31 // 3fdc705e
+ //TODO VFMULX V29.S2, V13.S2, V31.S2 // bfdd3d0e
+ //TODO VFNEG V18.S2, V12.S2 // 4cfaa02e
+ FNEGS F16, F5 // 0542211e
+ FNEGD F31, F31 // ff43611e
+ FNMADDS F17, F22, F6, F20 // d458311f
+ FNMADDD F15, F0, F26, F20 // 54036f1f
+ FNMSUBS F14, F16, F27, F14 // 6ec32e1f
+ FNMSUBD F29, F25, F8, F10 // 0ae57d1f
+ FNMULS F24, F22, F18 // d28a381e
+ FNMULD F14, F30, F7 // c78b6e1e
+ //TODO FRECPE F9, F2 // 22d9e15e
+ //TODO VFRECPE V0.S2, V28.S2 // 1cd8a10e
+ //TODO FRECPS F28, F10, F9 // 49fd3c5e
+ //TODO VFRECPS V27.D2, V12.D2, V24.D2 // 98fd7b4e
+ //TODO FRECPX F28, F3 // 83fbe15e
+ //TODO VFRINTA V14.S2, V25.S2 // d989212e
+ FRINTAS F0, F21 // 1540261e
+ FRINTAD F8, F22 // 1641661e
+ //TODO VFRINTI V21.D2, V31.D2 // bf9ae16e
+ FRINTIS F17, F17 // 31c2271e
+ FRINTID F9, F15 // 2fc1671e
+ //TODO VFRINTM V9.D2, V27.D2 // 3b99614e
+ FRINTMS F24, F16 // 1043251e
+ FRINTMD F5, F2 // a240651e
+ //TODO VFRINTN V30.S4, V2.S4 // c28b214e
+ FRINTNS F26, F14 // 4e43241e
+ FRINTND F28, F12 // 8c43641e
+ //TODO VFRINTP V27.D2, V31.D2 // 7f8be14e
+ FRINTPS F27, F4 // 64c3241e
+ FRINTPD F6, F22 // d6c0641e
+ //TODO VFRINTX V25.D2, V0.D2 // 209b616e
+ FRINTXS F26, F10 // 4a43271e
+ FRINTXD F16, F12 // 0c42671e
+ //TODO VFRINTZ V25.S4, V27.S4 // 3b9ba14e
+ FRINTZS F3, F28 // 7cc0251e
+ FRINTZD F24, F6 // 06c3651e
+ //TODO FRSQRTE F29, F5 // a5dbe17e
+ //TODO VFRSQRTE V18.S2, V1.S2 // 41daa12e
+ //TODO FRSQRTS F17, F7, F24 // f8fcf15e
+ //TODO VFRSQRTS V14.S2, V10.S2, V24.S2 // 58fdae0e
+ //TODO VFSQRT V2.D2, V21.D2 // 55f8e16e
+ FSQRTS F0, F9 // 09c0211e
+ FSQRTD F14, F27 // dbc1611e
+ FSUBS F25, F23, F0 // e03a391e
+ FSUBD F11, F13, F24 // b8396b1e
+ //TODO SCVTFSS F30, F20 // d4db215e
+ //TODO VSCVTF V7.S2, V17.S2 // f1d8210e
+ SCVTFWS R3, F16 // 7000221e
+ SCVTFWD R20, F4 // 8402621e
+ SCVTFS R16, F12 // 0c02229e
+ SCVTFD R26, F14 // 4e03629e
+ UCVTFWS R6, F4 // c400231e
+ UCVTFWD R10, F23 // 5701631e
+ UCVTFS R24, F29 // 1d03239e
+ UCVTFD R20, F11 // 8b02639e
+ VADD V16, V19, V14 // 6e86f05e
+ VADD V5.H8, V18.H8, V9.H8 // 4986654e
+ VADDP V7.H8, V25.H8, V17.H8 // 31bf674e
+ VADDV V3.H8, V0 // 60b8714e
+ AESD V22.B16, V19.B16 // d35a284e
+ AESE V31.B16, V29.B16 // fd4b284e
+ AESIMC V12.B16, V27.B16 // 9b79284e
+ AESMC V14.B16, V28.B16 // dc69284e
+ VAND V4.B16, V4.B16, V9.B16 // 891c244e
+ VCMEQ V24.S4, V13.S4, V12.S4 // ac8db86e
+ VCNT V13.B8, V11.B8 // ab59200e
+ VMOV V31.B[15], V18 // f2071f5e
+ VDUP V31.B[15], V18 // f2071f5e
+ VDUP V31.B[13], V20.B16 // f4071b4e
+ VEOR V4.B8, V18.B8, V7.B8 // 471e242e
+ VEXT $4, V2.B8, V1.B8, V3.B8 // 2320022e
+ VEXT $8, V2.B16, V1.B16, V3.B16 // 2340026e
+ VMOV V11.B[11], V16.B[12] // 705d196e
+ VMOV R20, V21.B[2] // 951e054e
+ VLD1 (R2), [V21.B16] // 5570404c
+ VLD1 (R24), [V18.D1, V19.D1, V20.D1] // 126f400c
+ VLD1 (R29), [V14.D1, V15.D1, V16.D1, V17.D1] // ae2f400c
+ VLD1.P 16(R23), [V1.B16] // e172df4c
+ VLD1.P (R6)(R11), [V31.D1] // df7ccb0c
+ VLD1.P 16(R7), [V31.D1, V0.D1] // ffacdf0c
+ VLD1.P (R19)(R4), [V24.B8, V25.B8] // 78a2c40c
+ VLD1.P (R20)(R8), [V7.H8, V8.H8, V9.H8] // 8766c84c
+ VLD1.P 32(R30), [V5.B8, V6.B8, V7.B8, V8.B8] // c523df0c
+ VLD1 (R19), V14.B[15] // 6e1e404d
+ VLD1 (R29), V0.H[1] // a04b400d
+ VLD1 (R27), V2.S[0] // 6283400d
+ VLD1 (R21), V5.D[1] // a586404d
+ VLD1.P 1(R19), V10.B[14] // 6a1adf4d
+ VLD1.P (R3)(R14), V16.B[11] // 700cce4d
+ VLD1.P 2(R1), V28.H[2] // 3c50df0d
+ VLD1.P (R13)(R20), V9.H[2] // a951d40d
+ VLD1.P 4(R17), V1.S[3] // 2192df4d
+ VLD1.P (R14)(R2), V17.S[2] // d181c24d
+ VLD1.P 8(R5), V30.D[1] // be84df4d
+ VLD1.P (R27)(R13), V27.D[0] // 7b87cd0d
+ //TODO FMOVS.P -29(RSP), F8 // e8375ebc
+ //TODO FMOVS.W 71(R29), F28 // bc7f44bc
+ FMOVS 6160(R4), F23 // 971058bd
+ VMOV V18.B[10], V27 // 5b06155e
+ VDUP V18.B[10], V27 // 5b06155e
+ VMOV V12.B[2], V28.B[12] // 9c15196e
+ VMOV R30, V4.B[13] // c41f1b4e
+ VMOV V2.B16, V4.B16 // 441ca24e
+ VMOV V13.S[0], R20 // b43d040e
+ VMOV V13.D[0], R20 // b43d084e
+ VMOVI $146, V22.B16 // 56e6044f
+ VORR V25.B16, V22.B16, V15.B16 // cf1eb94e
+ VPMULL V2.D1, V1.D1, V3.Q1 // 23e0e20e
+ VPMULL2 V2.D2, V1.D2, V4.Q1 // 24e0e24e
+ VPMULL V2.B8, V1.B8, V3.H8 // 23e0220e
+ VPMULL2 V2.B16, V1.B16, V4.H8 // 24e0224e
+ VRBIT V10.B16, V21.B16 // 5559606e
+ VREV32 V2.H8, V1.H8 // 4108606e
+ VREV16 V2.B8, V1.B8 // 4118200e
+ VREV16 V5.B16, V16.B16 // b018204e
+ SCVTFWS R6, F17 // d100221e
+ SCVTFWD R3, F15 // 6f00621e
+ SCVTFS R20, F25 // 9902229e
+ SCVTFD R13, F9 // a901629e
+ SHA1C V8.S4, V8, V2 // 0201085e
+ SHA1H V17, V25 // 390a285e
+ SHA1M V0.S4, V27, V27 // 7b23005e
+ SHA1P V3.S4, V20, V27 // 9b12035e
+ SHA1SU0 V17.S4, V13.S4, V16.S4 // b031115e
+ SHA1SU1 V24.S4, V23.S4 // 171b285e
+ SHA256H2 V6.S4, V16, V11 // 0b52065e
+ SHA256H V4.S4, V2, V11 // 4b40045e
+ SHA256SU0 V0.S4, V16.S4 // 1028285e
+ SHA256SU1 V31.S4, V3.S4, V15.S4 // 6f601f5e
+ VSHL $7, V22.D2, V25.D2 // d956474f
+ VST1 [V14.H4, V15.H4, V16.H4], (R27) // 6e67000c
+ VST1 [V2.S4, V3.S4, V4.S4, V5.S4], (R14) // c229004c
+ VST1.P [V25.S4], (R7)(R29) // f9789d4c
+ VST1.P [V25.D2, V26.D2], 32(R7) // f9ac9f4c
+ VST1.P [V14.D1, V15.D1], (R7)(R23) // eeac970c
+ VST1.P [V25.D2, V26.D2, V27.D2], 48(R27) // 796f9f4c
+ VST1.P [V13.H8, V14.H8, V15.H8], (R3)(R14) // 6d648e4c
+ VST1.P [V16.S4, V17.S4, V18.S4, V19.S4], 64(R6) // d0289f4c
+ VST1.P [V19.H4, V20.H4, V21.H4, V22.H4], (R4)(R16) // 9324900c
+ VST1 V12.B[3], (R1) // 2c0c000d
+ VST1 V12.B[3], (R1) // 2c0c000d
+ VST1 V25.S[2], (R20) // 9982004d
+ VST1 V9.D[1], (RSP) // e987004d
+ VST1.P V30.B[6], 1(R3) // 7e189f0d
+ VST1.P V8.B[0], (R3)(R21) // 6800950d
+ VST1.P V15.H[5], 2(R10) // 4f499f4d
+ VST1.P V1.H[7], (R23)(R11) // e15a8b4d
+ VST1.P V26.S[0], 4(R11) // 7a819f0d
+ VST1.P V9.S[1], (R16)(R21) // 0992950d
+ VST1.P V16.D[0], 8(R9) // 30859f0d
+ VST1.P V23.D[1], (R21)(R16) // b786904d
+ VSUB V1, V12, V23 // 9785e17e
+ VUADDLV V31.S4, V11 // eb3bb06e
+ UCVTFWS R11, F19 // 7301231e
+ UCVTFWD R26, F13 // 4d03631e
+ UCVTFS R23, F11 // eb02239e
+ UCVTFD R5, F29 // bd00639e
+ VMOV V0.B[1], R11 // 0b3c030e
+ VMOV V1.H[3], R12 // 2c3c0e0e
+ VUSHR $6, V22.H8, V23.H8 // d7061a6f
+
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s
new file mode 100644
index 0000000..3d3de1d
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/arm64error.s
@@ -0,0 +1,435 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+TEXT errors(SB),$0
+ AND $1, RSP // ERROR "illegal source register"
+ ANDS $1, R0, RSP // ERROR "illegal combination"
+ ADDSW R7->32, R14, R13 // ERROR "shift amount out of range 0 to 31"
+ ADD R1.UXTB<<5, R2, R3 // ERROR "shift amount out of range 0 to 4"
+ ADDS R1.UXTX<<7, R2, R3 // ERROR "shift amount out of range 0 to 4"
+ ADDS R5, R6, RSP // ERROR "illegal destination register"
+ SUBS R5, R6, RSP // ERROR "illegal destination register"
+ ADDSW R5, R6, RSP // ERROR "illegal destination register"
+ SUBSW R5, R6, RSP // ERROR "illegal destination register"
+ ADDS $0xff, R6, RSP // ERROR "illegal destination register"
+ ADDS $0xffff0, R6, RSP // ERROR "illegal destination register"
+ ADDS $0x1000100010001000, R6, RSP // ERROR "illegal destination register"
+ ADDS $0x10001000100011, R6, RSP // ERROR "illegal destination register"
+ ADDSW $0xff, R6, RSP // ERROR "illegal destination register"
+ ADDSW $0xffff0, R6, RSP // ERROR "illegal destination register"
+ ADDSW $0x1000100010001000, R6, RSP // ERROR "illegal destination register"
+ ADDSW $0x10001000100011, R6, RSP // ERROR "illegal destination register"
+ SUBS $0xff, R6, RSP // ERROR "illegal destination register"
+ SUBS $0xffff0, R6, RSP // ERROR "illegal destination register"
+ SUBS $0x1000100010001000, R6, RSP // ERROR "illegal destination register"
+ SUBS $0x10001000100011, R6, RSP // ERROR "illegal destination register"
+ SUBSW $0xff, R6, RSP // ERROR "illegal destination register"
+ SUBSW $0xffff0, R6, RSP // ERROR "illegal destination register"
+ SUBSW $0x1000100010001000, R6, RSP // ERROR "illegal destination register"
+ SUBSW $0x10001000100011, R6, RSP // ERROR "illegal destination register"
+ AND $0x22220000, R2, RSP // ERROR "illegal combination"
+ ANDS $0x22220000, R2, RSP // ERROR "illegal combination"
+ ADD R1, R2, R3, R4 // ERROR "illegal combination"
+ BICW R7@>33, R5, R16 // ERROR "shift amount out of range 0 to 31"
+ NEGW R7<<33, R5 // ERROR "shift amount out of range 0 to 31"
+ NEGSW R7<<33, R5 // ERROR "shift amount out of range 0 to 31"
+ ADD R7@>2, R5, R16 // ERROR "unsupported shift operator"
+ ADDW R7@>2, R5, R16 // ERROR "unsupported shift operator"
+ ADDS R7@>2, R5, R16 // ERROR "unsupported shift operator"
+ ADDSW R7@>2, R5, R16 // ERROR "unsupported shift operator"
+ SUB R7@>2, R5, R16 // ERROR "unsupported shift operator"
+ SUBW R7@>2, R5, R16 // ERROR "unsupported shift operator"
+ SUBS R7@>2, R5, R16 // ERROR "unsupported shift operator"
+ SUBSW R7@>2, R5, R16 // ERROR "unsupported shift operator"
+ CMP R7@>2, R5 // ERROR "unsupported shift operator"
+ CMPW R7@>2, R5 // ERROR "unsupported shift operator"
+ CMN R7@>2, R5 // ERROR "unsupported shift operator"
+ CMNW R7@>2, R5 // ERROR "unsupported shift operator"
+ NEG R7@>2, R5 // ERROR "unsupported shift operator"
+ NEGW R7@>2, R5 // ERROR "unsupported shift operator"
+ NEGS R7@>2, R5 // ERROR "unsupported shift operator"
+ NEGSW R7@>2, R5 // ERROR "unsupported shift operator"
+ CINC CS, R2, R3, R4 // ERROR "illegal combination"
+ CSEL LT, R1, R2 // ERROR "illegal combination"
+ CINC AL, R2, R3 // ERROR "invalid condition"
+ CINC NV, R2, R3 // ERROR "invalid condition"
+ CINVW AL, R2, R3 // ERROR "invalid condition"
+ CINV NV, R2, R3 // ERROR "invalid condition"
+ CNEG AL, R2, R3 // ERROR "invalid condition"
+ CNEGW NV, R2, R3 // ERROR "invalid condition"
+ CSET AL, R2 // ERROR "invalid condition"
+ CSET NV, R2 // ERROR "invalid condition"
+ CSETMW AL, R2 // ERROR "invalid condition"
+ CSETM NV, R2 // ERROR "invalid condition"
+ LDP.P 8(R2), (R2, R3) // ERROR "constrained unpredictable behavior"
+ LDP.W 8(R3), (R2, R3) // ERROR "constrained unpredictable behavior"
+ LDP (R1), (R2, R2) // ERROR "constrained unpredictable behavior"
+ LDP (R0), (F0, F1) // ERROR "invalid register pair"
+ LDP (R0), (R3, ZR) // ERROR "invalid register pair"
+ LDXPW (RSP), (R2, R2) // ERROR "constrained unpredictable behavior"
+ LDAXPW (R5), (R2, R2) // ERROR "constrained unpredictable behavior"
+ MOVD.P 300(R2), R3 // ERROR "offset out of range [-256,255]"
+ MOVD.P R3, 344(R2) // ERROR "offset out of range [-256,255]"
+ MOVD (R3)(R7.SXTX<<2), R8 // ERROR "invalid index shift amount"
+ MOVWU (R5)(R4.UXTW<<3), R10 // ERROR "invalid index shift amount"
+ MOVWU (R5)(R4<<1), R10 // ERROR "invalid index shift amount"
+ MOVB (R5)(R4.SXTW<<5), R10 // ERROR "invalid index shift amount"
+ MOVH R5, (R6)(R2<<3) // ERROR "invalid index shift amount"
+ MADD R1, R2, R3 // ERROR "illegal combination"
+ MOVD.P R1, 8(R1) // ERROR "constrained unpredictable behavior"
+ MOVD.W 16(R2), R2 // ERROR "constrained unpredictable behavior"
+ STP (F2, F3), (R0) // ERROR "invalid register pair"
+ STP.W (R1, R2), 8(R1) // ERROR "constrained unpredictable behavior"
+ STP.P (R1, R2), 8(R2) // ERROR "constrained unpredictable behavior"
+ STLXP (R6, R11), (RSP), R6 // ERROR "constrained unpredictable behavior"
+ STXP (R6, R11), (R2), R2 // ERROR "constrained unpredictable behavior"
+ STLXR R3, (RSP), R3 // ERROR "constrained unpredictable behavior"
+ STXR R3, (R4), R4 // ERROR "constrained unpredictable behavior"
+ STLXRB R2, (R5), R5 // ERROR "constrained unpredictable behavior"
+ VLD1 (R8)(R13), [V2.B16] // ERROR "illegal combination"
+ VLD1 8(R9), [V2.B16] // ERROR "illegal combination"
+ VST1 [V1.B16], (R8)(R13) // ERROR "illegal combination"
+ VST1 [V1.B16], 9(R2) // ERROR "illegal combination"
+ VLD1 8(R8)(R13), [V2.B16] // ERROR "illegal combination"
+ VMOV V8.D[2], V12.D[1] // ERROR "register element index out of range 0 to 1"
+ VMOV V8.S[4], V12.S[1] // ERROR "register element index out of range 0 to 3"
+ VMOV V8.H[8], V12.H[1] // ERROR "register element index out of range 0 to 7"
+ VMOV V8.B[16], V12.B[1] // ERROR "register element index out of range 0 to 15"
+ VMOV V8.D[0], V12.S[1] // ERROR "operand mismatch"
+ VMOV V8.D[0], V12.H[1] // ERROR "operand mismatch"
+ VMOV V8.D[0], V12.B[1] // ERROR "operand mismatch"
+ VMOV V8.S[0], V12.H[1] // ERROR "operand mismatch"
+ VMOV V8.S[0], V12.B[1] // ERROR "operand mismatch"
+ VMOV V8.H[0], V12.B[1] // ERROR "operand mismatch"
+ VMOV V8.B[16], R3 // ERROR "register element index out of range 0 to 15"
+ VMOV V8.H[9], R3 // ERROR "register element index out of range 0 to 7"
+ VMOV V8.S[4], R3 // ERROR "register element index out of range 0 to 3"
+ VMOV V8.D[2], R3 // ERROR "register element index out of range 0 to 1"
+ VDUP V8.B[16], V3.B16 // ERROR "register element index out of range 0 to 15"
+ VDUP V8.B[17], V3.B8 // ERROR "register element index out of range 0 to 15"
+ VDUP V8.H[9], V3.H4 // ERROR "register element index out of range 0 to 7"
+ VDUP V8.H[9], V3.H8 // ERROR "register element index out of range 0 to 7"
+ VDUP V8.S[4], V3.S2 // ERROR "register element index out of range 0 to 3"
+ VDUP V8.S[4], V3.S4 // ERROR "register element index out of range 0 to 3"
+ VDUP V8.D[2], V3.D2 // ERROR "register element index out of range 0 to 1"
+ VFMLA V1.D2, V12.D2, V3.S2 // ERROR "operand mismatch"
+ VFMLA V1.S2, V12.S2, V3.D2 // ERROR "operand mismatch"
+ VFMLA V1.S4, V12.S2, V3.D2 // ERROR "operand mismatch"
+ VFMLA V1.H4, V12.H4, V3.D2 // ERROR "operand mismatch"
+ VFMLS V1.S2, V12.S2, V3.S4 // ERROR "operand mismatch"
+ VFMLS V1.S2, V12.D2, V3.S4 // ERROR "operand mismatch"
+ VFMLS V1.S2, V12.S4, V3.D2 // ERROR "operand mismatch"
+ VFMLA V1.B8, V12.B8, V3.B8 // ERROR "invalid arrangement"
+ VFMLA V1.B16, V12.B16, V3.B16 // ERROR "invalid arrangement"
+ VFMLA V1.H4, V12.H4, V3.H4 // ERROR "invalid arrangement"
+ VFMLA V1.H8, V12.H8, V3.H8 // ERROR "invalid arrangement"
+ VFMLA V1.H4, V12.H4, V3.H4 // ERROR "invalid arrangement"
+ VFMLS V1.B8, V12.B8, V3.B8 // ERROR "invalid arrangement"
+ VFMLS V1.B16, V12.B16, V3.B16 // ERROR "invalid arrangement"
+ VFMLS V1.H4, V12.H4, V3.H4 // ERROR "invalid arrangement"
+ VFMLS V1.H8, V12.H8, V3.H8 // ERROR "invalid arrangement"
+ VFMLS V1.H4, V12.H4, V3.H4 // ERROR "invalid arrangement"
+ VST1.P [V4.S4,V5.S4], 48(R1) // ERROR "invalid post-increment offset"
+ VST1.P [V4.S4], 8(R1) // ERROR "invalid post-increment offset"
+ VLD1.P 32(R1), [V8.S4, V9.S4, V10.S4] // ERROR "invalid post-increment offset"
+ VLD1.P 48(R1), [V7.S4, V8.S4, V9.S4, V10.S4] // ERROR "invalid post-increment offset"
+ VPMULL V1.D1, V2.H4, V3.Q1 // ERROR "invalid arrangement"
+ VPMULL V1.H4, V2.H4, V3.Q1 // ERROR "operand mismatch"
+ VPMULL V1.D2, V2.D2, V3.Q1 // ERROR "operand mismatch"
+ VPMULL V1.B16, V2.B16, V3.H8 // ERROR "operand mismatch"
+ VPMULL2 V1.D2, V2.H4, V3.Q1 // ERROR "invalid arrangement"
+ VPMULL2 V1.H4, V2.H4, V3.Q1 // ERROR "operand mismatch"
+ VPMULL2 V1.D1, V2.D1, V3.Q1 // ERROR "operand mismatch"
+ VPMULL2 V1.B8, V2.B8, V3.H8 // ERROR "operand mismatch"
+ VEXT $8, V1.B16, V2.B8, V2.B16 // ERROR "invalid arrangement"
+ VEXT $8, V1.H8, V2.H8, V2.H8 // ERROR "invalid arrangement"
+ VRBIT V1.B16, V2.B8 // ERROR "invalid arrangement"
+ VRBIT V1.H4, V2.H4 // ERROR "invalid arrangement"
+ VUSHR $56, V1.D2, V2.H4 // ERROR "invalid arrangement"
+ VUSHR $127, V1.D2, V2.D2 // ERROR "shift out of range"
+ VLD1.P (R8)(R9.SXTX<<2), [V2.B16] // ERROR "invalid extended register"
+ VLD1.P (R8)(R9<<2), [V2.B16] // ERROR "invalid extended register"
+ VST1.P [V1.B16], (R8)(R9.UXTW) // ERROR "invalid extended register"
+ VST1.P [V1.B16], (R8)(R9<<1) // ERROR "invalid extended register"
+ VREV64 V1.H4, V2.H8 // ERROR "invalid arrangement"
+ VREV64 V1.D1, V2.D1 // ERROR "invalid arrangement"
+ VREV16 V1.D1, V2.D1 // ERROR "invalid arrangement"
+ VREV16 V1.B8, V2.B16 // ERROR "invalid arrangement"
+ VREV16 V1.H4, V2.H4 // ERROR "invalid arrangement"
+ FLDPQ (R0), (R1, R2) // ERROR "invalid register pair"
+ FLDPQ (R1), (F2, F2) // ERROR "constrained unpredictable behavior"
+ FSTPQ (R1, R2), (R0) // ERROR "invalid register pair"
+ FLDPD (R0), (R1, R2) // ERROR "invalid register pair"
+ FLDPD (R1), (F2, F2) // ERROR "constrained unpredictable behavior"
+ FLDPS (R2), (F3, F3) // ERROR "constrained unpredictable behavior"
+ FSTPD (R1, R2), (R0) // ERROR "invalid register pair"
+ FMOVS (F2), F0 // ERROR "illegal combination"
+ FMOVD F0, (F1) // ERROR "illegal combination"
+ LDADDD R5, (R6), ZR // ERROR "illegal destination register"
+ LDADDW R5, (R6), ZR // ERROR "illegal destination register"
+ LDADDH R5, (R6), ZR // ERROR "illegal destination register"
+ LDADDB R5, (R6), ZR // ERROR "illegal destination register"
+ LDADDLD R5, (R6), ZR // ERROR "illegal destination register"
+ LDADDLW R5, (R6), ZR // ERROR "illegal destination register"
+ LDADDLH R5, (R6), ZR // ERROR "illegal destination register"
+ LDADDLB R5, (R6), ZR // ERROR "illegal destination register"
+ LDCLRD R5, (R6), ZR // ERROR "illegal destination register"
+ LDCLRW R5, (R6), ZR // ERROR "illegal destination register"
+ LDCLRH R5, (R6), ZR // ERROR "illegal destination register"
+ LDCLRB R5, (R6), ZR // ERROR "illegal destination register"
+ LDCLRLD R5, (R6), ZR // ERROR "illegal destination register"
+ LDCLRLW R5, (R6), ZR // ERROR "illegal destination register"
+ LDCLRLH R5, (R6), ZR // ERROR "illegal destination register"
+ LDCLRLB R5, (R6), ZR // ERROR "illegal destination register"
+ LDEORD R5, (R6), ZR // ERROR "illegal destination register"
+ LDEORW R5, (R6), ZR // ERROR "illegal destination register"
+ LDEORH R5, (R6), ZR // ERROR "illegal destination register"
+ LDEORB R5, (R6), ZR // ERROR "illegal destination register"
+ LDEORLD R5, (R6), ZR // ERROR "illegal destination register"
+ LDEORLW R5, (R6), ZR // ERROR "illegal destination register"
+ LDEORLH R5, (R6), ZR // ERROR "illegal destination register"
+ LDEORLB R5, (R6), ZR // ERROR "illegal destination register"
+ LDORD R5, (R6), ZR // ERROR "illegal destination register"
+ LDORW R5, (R6), ZR // ERROR "illegal destination register"
+ LDORH R5, (R6), ZR // ERROR "illegal destination register"
+ LDORB R5, (R6), ZR // ERROR "illegal destination register"
+ LDORLD R5, (R6), ZR // ERROR "illegal destination register"
+ LDORLW R5, (R6), ZR // ERROR "illegal destination register"
+ LDORLH R5, (R6), ZR // ERROR "illegal destination register"
+ LDORLB R5, (R6), ZR // ERROR "illegal destination register"
+ LDADDAD R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDAW R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDAH R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDAB R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDALD R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDALW R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDALH R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDALB R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDD R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDW R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDH R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDB R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDLD R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDLW R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDLH R5, (R6), RSP // ERROR "illegal destination register"
+ LDADDLB R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRAD R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRAW R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRAH R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRAB R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRALD R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRALW R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRALH R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRALB R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRD R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRW R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRH R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRB R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRLD R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRLW R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRLH R5, (R6), RSP // ERROR "illegal destination register"
+ LDCLRLB R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORAD R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORAW R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORAH R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORAB R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORALD R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORALW R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORALH R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORALB R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORD R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORW R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORH R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORB R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORLD R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORLW R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORLH R5, (R6), RSP // ERROR "illegal destination register"
+ LDEORLB R5, (R6), RSP // ERROR "illegal destination register"
+ LDORAD R5, (R6), RSP // ERROR "illegal destination register"
+ LDORAW R5, (R6), RSP // ERROR "illegal destination register"
+ LDORAH R5, (R6), RSP // ERROR "illegal destination register"
+ LDORAB R5, (R6), RSP // ERROR "illegal destination register"
+ LDORALD R5, (R6), RSP // ERROR "illegal destination register"
+ LDORALW R5, (R6), RSP // ERROR "illegal destination register"
+ LDORALH R5, (R6), RSP // ERROR "illegal destination register"
+ LDORALB R5, (R6), RSP // ERROR "illegal destination register"
+ LDORD R5, (R6), RSP // ERROR "illegal destination register"
+ LDORW R5, (R6), RSP // ERROR "illegal destination register"
+ LDORH R5, (R6), RSP // ERROR "illegal destination register"
+ LDORB R5, (R6), RSP // ERROR "illegal destination register"
+ LDORLD R5, (R6), RSP // ERROR "illegal destination register"
+ LDORLW R5, (R6), RSP // ERROR "illegal destination register"
+ LDORLH R5, (R6), RSP // ERROR "illegal destination register"
+ LDORLB R5, (R6), RSP // ERROR "illegal destination register"
+ SWPAD R5, (R6), RSP // ERROR "illegal destination register"
+ SWPAW R5, (R6), RSP // ERROR "illegal destination register"
+ SWPAH R5, (R6), RSP // ERROR "illegal destination register"
+ SWPAB R5, (R6), RSP // ERROR "illegal destination register"
+ SWPALD R5, (R6), RSP // ERROR "illegal destination register"
+ SWPALW R5, (R6), RSP // ERROR "illegal destination register"
+ SWPALH R5, (R6), RSP // ERROR "illegal destination register"
+ SWPALB R5, (R6), RSP // ERROR "illegal destination register"
+ SWPD R5, (R6), RSP // ERROR "illegal destination register"
+ SWPW R5, (R6), RSP // ERROR "illegal destination register"
+ SWPH R5, (R6), RSP // ERROR "illegal destination register"
+ SWPB R5, (R6), RSP // ERROR "illegal destination register"
+ SWPLD R5, (R6), RSP // ERROR "illegal destination register"
+ SWPLW R5, (R6), RSP // ERROR "illegal destination register"
+ SWPLH R5, (R6), RSP // ERROR "illegal destination register"
+ SWPLB R5, (R6), RSP // ERROR "illegal destination register"
+ STXR R5, (R6), RSP // ERROR "illegal destination register"
+ STXRW R5, (R6), RSP // ERROR "illegal destination register"
+ STLXR R5, (R6), RSP // ERROR "illegal destination register"
+ STLXRW R5, (R6), RSP // ERROR "illegal destination register"
+ STXP (R5, R7), (R6), RSP // ERROR "illegal destination register"
+ STXPW (R5, R7), (R6), RSP // ERROR "illegal destination register"
+ STLXP (R5, R7), (R6), RSP // ERROR "illegal destination register"
+ STLXP (R5, R7), (R6), RSP // ERROR "illegal destination register"
+ MSR OSLAR_EL1, R5 // ERROR "illegal combination"
+ MRS R11, AIDR_EL1 // ERROR "illegal combination"
+ MSR R6, AIDR_EL1 // ERROR "system register is not writable"
+ MSR R6, AMCFGR_EL0 // ERROR "system register is not writable"
+ MSR R6, AMCGCR_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER00_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER01_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER02_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER03_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER04_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER05_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER06_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER07_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER08_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER09_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER010_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER011_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER012_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER013_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER014_EL0 // ERROR "system register is not writable"
+ MSR R6, AMEVTYPER015_EL0 // ERROR "system register is not writable"
+ MSR R6, CCSIDR2_EL1 // ERROR "system register is not writable"
+ MSR R6, CCSIDR_EL1 // ERROR "system register is not writable"
+ MSR R6, CLIDR_EL1 // ERROR "system register is not writable"
+ MSR R6, CNTPCT_EL0 // ERROR "system register is not writable"
+ MSR R6, CNTVCT_EL0 // ERROR "system register is not writable"
+ MSR R6, CTR_EL0 // ERROR "system register is not writable"
+ MSR R6, CurrentEL // ERROR "system register is not writable"
+ MSR R6, DBGAUTHSTATUS_EL1 // ERROR "system register is not writable"
+ MSR R6, DBGDTRRX_EL0 // ERROR "system register is not writable"
+ MSR R6, DCZID_EL0 // ERROR "system register is not writable"
+ MSR R6, ERRIDR_EL1 // ERROR "system register is not writable"
+ MSR R6, ERXFR_EL1 // ERROR "system register is not writable"
+ MSR R6, ERXPFGF_EL1 // ERROR "system register is not writable"
+ MSR R6, GMID_EL1 // ERROR "system register is not writable"
+ MSR R6, ICC_HPPIR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ICC_HPPIR1_EL1 // ERROR "system register is not writable"
+ MSR R6, ICC_IAR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ICC_IAR1_EL1 // ERROR "system register is not writable"
+ MSR R6, ICC_RPR_EL1 // ERROR "system register is not writable"
+ MSR R6, ICV_HPPIR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ICV_HPPIR1_EL1 // ERROR "system register is not writable"
+ MSR R6, ICV_IAR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ICV_IAR1_EL1 // ERROR "system register is not writable"
+ MSR R6, ICV_RPR_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_AA64AFR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_AA64AFR1_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_AA64DFR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_AA64DFR1_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_AA64ISAR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_AA64ISAR1_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_AA64MMFR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_AA64MMFR1_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_AA64MMFR2_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_AA64PFR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_AA64PFR1_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_AA64ZFR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_AFR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_DFR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_ISAR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_ISAR1_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_ISAR2_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_ISAR3_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_ISAR4_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_ISAR5_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_ISAR6_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_MMFR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_MMFR1_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_MMFR2_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_MMFR3_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_MMFR4_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_PFR0_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_PFR1_EL1 // ERROR "system register is not writable"
+ MSR R6, ID_PFR2_EL1 // ERROR "system register is not writable"
+ MSR R6, ISR_EL1 // ERROR "system register is not writable"
+ MSR R6, LORID_EL1 // ERROR "system register is not writable"
+ MSR R6, MDCCSR_EL0 // ERROR "system register is not writable"
+ MSR R6, MDRAR_EL1 // ERROR "system register is not writable"
+ MSR R6, MIDR_EL1 // ERROR "system register is not writable"
+ MSR R6, MPAMIDR_EL1 // ERROR "system register is not writable"
+ MSR R6, MPIDR_EL1 // ERROR "system register is not writable"
+ MSR R6, MVFR0_EL1 // ERROR "system register is not writable"
+ MSR R6, MVFR1_EL1 // ERROR "system register is not writable"
+ MSR R6, MVFR2_EL1 // ERROR "system register is not writable"
+ MSR R6, OSLSR_EL1 // ERROR "system register is not writable"
+ MSR R6, PMBIDR_EL1 // ERROR "system register is not writable"
+ MSR R6, PMCEID0_EL0 // ERROR "system register is not writable"
+ MSR R6, PMCEID1_EL0 // ERROR "system register is not writable"
+ MSR R6, PMMIR_EL1 // ERROR "system register is not writable"
+ MSR R6, PMSIDR_EL1 // ERROR "system register is not writable"
+ MSR R6, REVIDR_EL1 // ERROR "system register is not writable"
+ MSR R6, RNDR // ERROR "system register is not writable"
+ MRS DBGDTRTX_EL0, R5 // ERROR "system register is not readable"
+ MRS ICV_DIR_EL1, R5 // ERROR "system register is not readable"
+ MRS ICC_SGI1R_EL1, R5 // ERROR "system register is not readable"
+ MRS ICC_SGI0R_EL1, R5 // ERROR "system register is not readable"
+ MRS ICC_EOIR1_EL1, R5 // ERROR "system register is not readable"
+ MRS ICC_EOIR0_EL1, R5 // ERROR "system register is not readable"
+ MRS ICC_DIR_EL1, R5 // ERROR "system register is not readable"
+ MRS ICC_ASGI1R_EL1, R5 // ERROR "system register is not readable"
+ MRS ICV_EOIR0_EL1, R3 // ERROR "system register is not readable"
+ MRS ICV_EOIR1_EL1, R3 // ERROR "system register is not readable"
+ MRS PMSWINC_EL0, R3 // ERROR "system register is not readable"
+ MRS OSLAR_EL1, R3 // ERROR "system register is not readable"
+ VLD3R.P 24(R15), [V15.H4,V16.H4,V17.H4] // ERROR "invalid post-increment offset"
+ VBIT V1.H4, V12.H4, V3.H4 // ERROR "invalid arrangement"
+ VBSL V1.D2, V12.D2, V3.D2 // ERROR "invalid arrangement"
+ VUXTL V30.D2, V30.H8 // ERROR "operand mismatch"
+ VUXTL2 V20.B8, V21.H8 // ERROR "operand mismatch"
+ VUXTL V3.D2, V4.B8 // ERROR "operand mismatch"
+ VUZP1 V0.B8, V30.B8, V1.B16 // ERROR "operand mismatch"
+ VUZP2 V0.Q1, V30.Q1, V1.Q1 // ERROR "invalid arrangement"
+ VUSHLL $0, V30.D2, V30.H8 // ERROR "operand mismatch"
+ VUSHLL2 $0, V20.B8, V21.H8 // ERROR "operand mismatch"
+ VUSHLL $8, V30.B8, V30.H8 // ERROR "shift amount out of range"
+ VUSHLL2 $32, V30.S4, V2.D2 // ERROR "shift amount out of range"
+ VBIF V0.B8, V1.B8, V2.B16 // ERROR "operand mismatch"
+ VBIF V0.D2, V1.D2, V2.D2 // ERROR "invalid arrangement"
+ VUADDW V9.B8, V12.H8, V14.B8 // ERROR "invalid arrangement"
+ VUADDW2 V9.B8, V12.S4, V14.S4 // ERROR "operand mismatch"
+ VUMAX V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement"
+ VUMIN V1.D2, V2.D2, V3.D2 // ERROR "invalid arrangement"
+ VUMAX V1.B8, V2.B8, V3.B16 // ERROR "operand mismatch"
+ VUMIN V1.H4, V2.S4, V3.H4 // ERROR "operand mismatch"
+ VSLI $64, V7.D2, V8.D2 // ERROR "shift out of range"
+ VUSRA $0, V7.D2, V8.D2 // ERROR "shift out of range"
+ CASPD (R3, R4), (R2), (R8, R9) // ERROR "source register pair must start from even register"
+ CASPD (R2, R3), (R2), (R9, R10) // ERROR "destination register pair must start from even register"
+ CASPD (R2, R4), (R2), (R8, R9) // ERROR "source register pair must be contiguous"
+ CASPD (R2, R3), (R2), (R8, R10) // ERROR "destination register pair must be contiguous"
+ ADD R1>>2, RSP, R3 // ERROR "illegal combination"
+ ADDS R2<<3, R3, RSP // ERROR "unexpected SP reference"
+ CMP R1<<5, RSP // ERROR "the left shift amount out of range 0 to 4"
+ MOVD.P y+8(FP), R1 // ERROR "illegal combination"
+ MOVD.W x-8(SP), R1 // ERROR "illegal combination"
+ LDP.P x+8(FP), (R0, R1) // ERROR "illegal combination"
+ LDP.W x+8(SP), (R0, R1) // ERROR "illegal combination"
+ ADD $0x1234567, R27, R3 // ERROR "cannot use REGTMP as source"
+ ADD $0x3fffffffc000, R27, R5 // ERROR "cannot use REGTMP as source"
+ AND $0x22220000, R27, R4 // ERROR "cannot use REGTMP as source"
+ ANDW $0x6006000060060, R27, R5 // ERROR "cannot use REGTMP as source"
+ STP (R3, R4), 0x1234567(R27) // ERROR "REGTMP used in large offset store"
+ LDP 0x1234567(R27), (R3, R4) // ERROR "REGTMP used in large offset load"
+ STP (R26, R27), 700(R2) // ERROR "cannot use REGTMP as source"
+ MOVK $0, R10 // ERROR "zero shifts cannot be handled correctly"
+ MOVK $(0<<32), R10 // ERROR "zero shifts cannot be handled correctly"
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/armerror.s b/src/cmd/asm/internal/asm/testdata/armerror.s
new file mode 100644
index 0000000..f2bed8d
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/armerror.s
@@ -0,0 +1,264 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+TEXT errors(SB),$0
+ MOVW (F0), R1 // ERROR "illegal base register"
+ MOVB (F0), R1 // ERROR "illegal base register"
+ MOVH (F0), R1 // ERROR "illegal base register"
+ MOVF (F0), F1 // ERROR "illegal base register"
+ MOVD (F0), F1 // ERROR "illegal base register"
+ MOVW R1, (F0) // ERROR "illegal base register"
+ MOVB R2, (F0) // ERROR "illegal base register"
+ MOVH R3, (F0) // ERROR "illegal base register"
+ MOVF F4, (F0) // ERROR "illegal base register"
+ MOVD F5, (F0) // ERROR "illegal base register"
+ MOVM.IA (F1), [R0-R4] // ERROR "illegal base register"
+ MOVM.DA (F1), [R0-R4] // ERROR "illegal base register"
+ MOVM.IB (F1), [R0-R4] // ERROR "illegal base register"
+ MOVM.DB (F1), [R0-R4] // ERROR "illegal base register"
+ MOVM.IA [R0-R4], (F1) // ERROR "illegal base register"
+ MOVM.DA [R0-R4], (F1) // ERROR "illegal base register"
+ MOVM.IB [R0-R4], (F1) // ERROR "illegal base register"
+ MOVM.DB [R0-R4], (F1) // ERROR "illegal base register"
+ MOVW R0<<0(F1), R1 // ERROR "illegal base register"
+ MOVB R0<<0(F1), R1 // ERROR "illegal base register"
+ MOVW R1, R0<<0(F1) // ERROR "illegal base register"
+ MOVB R2, R0<<0(F1) // ERROR "illegal base register"
+ MOVF 0x00ffffff(F2), F1 // ERROR "illegal base register"
+ MOVD 0x00ffffff(F2), F1 // ERROR "illegal base register"
+ MOVF F2, 0x00ffffff(F2) // ERROR "illegal base register"
+ MOVD F2, 0x00ffffff(F2) // ERROR "illegal base register"
+ MULS.S R1, R2, R3, R4 // ERROR "invalid .S suffix"
+ ADD.P R1, R2, R3 // ERROR "invalid .P suffix"
+ SUB.W R2, R3 // ERROR "invalid .W suffix"
+ BL 4(R4) // ERROR "non-zero offset"
+ ADDF F0, R1, F2 // ERROR "illegal combination"
+ SWI (R0) // ERROR "illegal combination"
+ MULAD F0, F1 // ERROR "illegal combination"
+ MULAF F0, F1 // ERROR "illegal combination"
+ MULSD F0, F1 // ERROR "illegal combination"
+ MULSF F0, F1 // ERROR "illegal combination"
+ NMULAD F0, F1 // ERROR "illegal combination"
+ NMULAF F0, F1 // ERROR "illegal combination"
+ NMULSD F0, F1 // ERROR "illegal combination"
+ NMULSF F0, F1 // ERROR "illegal combination"
+ FMULAD F0, F1 // ERROR "illegal combination"
+ FMULAF F0, F1 // ERROR "illegal combination"
+ FMULSD F0, F1 // ERROR "illegal combination"
+ FMULSF F0, F1 // ERROR "illegal combination"
+ FNMULAD F0, F1 // ERROR "illegal combination"
+ FNMULAF F0, F1 // ERROR "illegal combination"
+ FNMULSD F0, F1 // ERROR "illegal combination"
+ FNMULSF F0, F1 // ERROR "illegal combination"
+ NEGF F0, F1, F2 // ERROR "illegal combination"
+ NEGD F0, F1, F2 // ERROR "illegal combination"
+ ABSF F0, F1, F2 // ERROR "illegal combination"
+ ABSD F0, F1, F2 // ERROR "illegal combination"
+ SQRTF F0, F1, F2 // ERROR "illegal combination"
+ SQRTD F0, F1, F2 // ERROR "illegal combination"
+ MOVF F0, F1, F2 // ERROR "illegal combination"
+ MOVD F0, F1, F2 // ERROR "illegal combination"
+ MOVDF F0, F1, F2 // ERROR "illegal combination"
+ MOVFD F0, F1, F2 // ERROR "illegal combination"
+ MOVM.IA 4(R1), [R0-R4] // ERROR "offset must be zero"
+ MOVM.DA 4(R1), [R0-R4] // ERROR "offset must be zero"
+ MOVM.IB 4(R1), [R0-R4] // ERROR "offset must be zero"
+ MOVM.DB 4(R1), [R0-R4] // ERROR "offset must be zero"
+ MOVM.IA [R0-R4], 4(R1) // ERROR "offset must be zero"
+ MOVM.DA [R0-R4], 4(R1) // ERROR "offset must be zero"
+ MOVM.IB [R0-R4], 4(R1) // ERROR "offset must be zero"
+ MOVM.DB [R0-R4], 4(R1) // ERROR "offset must be zero"
+ MOVW CPSR, FPSR // ERROR "illegal combination"
+ MOVW FPSR, CPSR // ERROR "illegal combination"
+ MOVW CPSR, errors(SB) // ERROR "illegal combination"
+ MOVW errors(SB), CPSR // ERROR "illegal combination"
+ MOVW FPSR, errors(SB) // ERROR "illegal combination"
+ MOVW errors(SB), FPSR // ERROR "illegal combination"
+ MOVW F0, errors(SB) // ERROR "illegal combination"
+ MOVW errors(SB), F0 // ERROR "illegal combination"
+ MOVW $20, errors(SB) // ERROR "illegal combination"
+ MOVW errors(SB), $20 // ERROR "illegal combination"
+ MOVW (R1), [R0-R4] // ERROR "illegal combination"
+ MOVW [R0-R4], (R1) // ERROR "illegal combination"
+ MOVB $245, R1 // ERROR "illegal combination"
+ MOVH $245, R1 // ERROR "illegal combination"
+ MOVB $0xff000000, R1 // ERROR "illegal combination"
+ MOVH $0xff000000, R1 // ERROR "illegal combination"
+ MOVB $0x00ffffff, R1 // ERROR "illegal combination"
+ MOVH $0x00ffffff, R1 // ERROR "illegal combination"
+ MOVB FPSR, g // ERROR "illegal combination"
+ MOVH FPSR, g // ERROR "illegal combination"
+ MOVB g, FPSR // ERROR "illegal combination"
+ MOVH g, FPSR // ERROR "illegal combination"
+ MOVB CPSR, g // ERROR "illegal combination"
+ MOVH CPSR, g // ERROR "illegal combination"
+ MOVB g, CPSR // ERROR "illegal combination"
+ MOVH g, CPSR // ERROR "illegal combination"
+ MOVB $0xff000000, CPSR // ERROR "illegal combination"
+ MOVH $0xff000000, CPSR // ERROR "illegal combination"
+ MOVB $0xff000000, FPSR // ERROR "illegal combination"
+ MOVH $0xff000000, FPSR // ERROR "illegal combination"
+ MOVB $0xffffff00, CPSR // ERROR "illegal combination"
+ MOVH $0xffffff00, CPSR // ERROR "illegal combination"
+ MOVB $0xfffffff0, FPSR // ERROR "illegal combination"
+ MOVH $0xfffffff0, FPSR // ERROR "illegal combination"
+ MOVB (R1), [R0-R4] // ERROR "illegal combination"
+ MOVB [R0-R4], (R1) // ERROR "illegal combination"
+ MOVH (R1), [R0-R4] // ERROR "illegal combination"
+ MOVH [R0-R4], (R1) // ERROR "illegal combination"
+ MOVB $0xff(R0), R1 // ERROR "illegal combination"
+ MOVH $0xff(R0), R1 // ERROR "illegal combination"
+ MOVB $errors(SB), R2 // ERROR "illegal combination"
+ MOVH $errors(SB), R2 // ERROR "illegal combination"
+ MOVB F0, R0 // ERROR "illegal combination"
+ MOVH F0, R0 // ERROR "illegal combination"
+ MOVB R0, F0 // ERROR "illegal combination"
+ MOVH R0, F0 // ERROR "illegal combination"
+ MOVB R0>>0(R1), R2 // ERROR "bad shift"
+ MOVB R0->0(R1), R2 // ERROR "bad shift"
+ MOVB R0@>0(R1), R2 // ERROR "bad shift"
+ MOVBS R0>>0(R1), R2 // ERROR "bad shift"
+ MOVBS R0->0(R1), R2 // ERROR "bad shift"
+ MOVBS R0@>0(R1), R2 // ERROR "bad shift"
+ MOVF CPSR, F1 // ERROR "illegal combination"
+ MOVD R1, CPSR // ERROR "illegal combination"
+ MOVW F1, F2 // ERROR "illegal combination"
+ MOVB F1, F2 // ERROR "illegal combination"
+ MOVH F1, F2 // ERROR "illegal combination"
+ MOVF R1, F2 // ERROR "illegal combination"
+ MOVD R1, F2 // ERROR "illegal combination"
+ MOVF R1, R1 // ERROR "illegal combination"
+ MOVD R1, R2 // ERROR "illegal combination"
+ MOVFW R1, R2 // ERROR "illegal combination"
+ MOVDW R1, R2 // ERROR "illegal combination"
+ MOVWF R1, R2 // ERROR "illegal combination"
+ MOVWD R1, R2 // ERROR "illegal combination"
+ MOVWD CPSR, R2 // ERROR "illegal combination"
+ MOVWF CPSR, R2 // ERROR "illegal combination"
+ MOVWD R1, CPSR // ERROR "illegal combination"
+ MOVWF R1, CPSR // ERROR "illegal combination"
+ MOVDW CPSR, R2 // ERROR "illegal combination"
+ MOVFW CPSR, R2 // ERROR "illegal combination"
+ MOVDW R1, CPSR // ERROR "illegal combination"
+ MOVFW R1, CPSR // ERROR "illegal combination"
+ BFX $12, $41, R2, R3 // ERROR "wrong width or LSB"
+ BFX $12, $-2, R2 // ERROR "wrong width or LSB"
+ BFXU $40, $4, R2, R3 // ERROR "wrong width or LSB"
+ BFXU $-40, $4, R2 // ERROR "wrong width or LSB"
+ BFX $-2, $4, R2, R3 // ERROR "wrong width or LSB"
+ BFXU $4, R2, R5, R2 // ERROR "missing or wrong LSB"
+ BFXU $4, R2, R5 // ERROR "missing or wrong LSB"
+ BFC $12, $8, R2, R3 // ERROR "illegal combination"
+ MOVB R0>>8, R2 // ERROR "illegal shift"
+ MOVH R0<<16, R2 // ERROR "illegal shift"
+ MOVBS R0->8, R2 // ERROR "illegal shift"
+ MOVHS R0<<24, R2 // ERROR "illegal shift"
+ MOVBU R0->24, R2 // ERROR "illegal shift"
+ MOVHU R0@>1, R2 // ERROR "illegal shift"
+ XTAB R0>>8, R2 // ERROR "illegal shift"
+ XTAH R0<<16, R2 // ERROR "illegal shift"
+ XTABU R0->24, R2 // ERROR "illegal shift"
+ XTAHU R0@>1, R2 // ERROR "illegal shift"
+ XTAB R0>>8, R5, R2 // ERROR "illegal shift"
+ XTAH R0<<16, R5, R2 // ERROR "illegal shift"
+ XTABU R0->24, R5, R2 // ERROR "illegal shift"
+ XTAHU R0@>1, R5, R2 // ERROR "illegal shift"
+ AND.W R0, R1 // ERROR "invalid .W suffix"
+ ORR.P R2, R3, R4 // ERROR "invalid .P suffix"
+ CMP.S R1, R2 // ERROR "invalid .S suffix"
+ BIC.P $124, R1, R2 // ERROR "invalid .P suffix"
+ MOVW.S $124, R1 // ERROR "invalid .S suffix"
+ MVN.S $123, g // ERROR "invalid .S suffix"
+ RSB.U $0, R9 // ERROR "invalid .U suffix"
+ CMP.S $29, g // ERROR "invalid .S suffix"
+ ADD.W R1<<R2, R3 // ERROR "invalid .W suffix"
+ SUB.U R1<<R2, R3, R9 // ERROR "invalid .U suffix"
+ CMN.S R5->R2, R1 // ERROR "invalid .S suffix"
+ SLL.P R1, R2, R3 // ERROR "invalid .P suffix"
+ SRA.U R2, R8 // ERROR "invalid .U suffix"
+ SWI.S // ERROR "invalid .S suffix"
+ SWI.P $0 // ERROR "invalid .P suffix"
+ MOVW.S $0xaaaaaaaa, R7 // ERROR "invalid .S suffix"
+ MOVW.P $0xffffff44, R1 // ERROR "invalid .P suffix"
+ MOVW.S $0xffffff77, R1 // ERROR "invalid .S suffix"
+ MVN.S $0xffffffaa, R8 // ERROR "invalid .S suffix"
+ MVN.S $0xaaaaaaaa, R8 // ERROR "invalid .S suffix"
+ ADD.U $0xaaaaaaaa, R4 // ERROR "invalid .U suffix"
+ ORR.P $0x555555, R7, R3 // ERROR "invalid .P suffix"
+ TST.S $0xabcd1234, R2 // ERROR "invalid .S suffix"
+ MOVB.S R1, R2 // ERROR "invalid .S suffix"
+ MOVBU.P R1, R2 // ERROR "invalid .P suffix"
+ MOVBS.U R1, R2 // ERROR "invalid .U suffix"
+ MOVH.S R1, R2 // ERROR "invalid .S suffix"
+ MOVHU.P R1, R2 // ERROR "invalid .P suffix"
+ MOVHS.U R1, R2 // ERROR "invalid .U suffix"
+ MUL.P R0, R1, R2 // ERROR "invalid .P suffix"
+ MULU.W R1, R2 // ERROR "invalid .W suffix"
+ DIVHW.S R0, R1, R2 // ERROR "invalid .S suffix"
+ DIVHW.W R1, R2 // ERROR "invalid .W suffix"
+ MULL.W R2, R0, (R5, R8) // ERROR "invalid .W suffix"
+ MULLU.U R2, R0, (R5, R8) // ERROR "invalid .U suffix"
+ BFX.S $2, $4, R3 // ERROR "invalid .S suffix"
+ BFXU.W $2, $4, R3, R0 // ERROR "invalid .W suffix"
+ MOVB.S R1, 4(R2) // ERROR "invalid .S suffix"
+ MOVHU.S R1, 4(R2) // ERROR "invalid .S suffix"
+ MOVW.S R1, 4(R2) // ERROR "invalid .S suffix"
+ MOVBU.S 4(R2), R3 // ERROR "invalid .S suffix"
+ MOVH.S 4(R2), R3 // ERROR "invalid .S suffix"
+ MOVW.S 4(R2), R3 // ERROR "invalid .S suffix"
+ XTAB.S R0@>0, R2 // ERROR "invalid .S suffix"
+ XTAB.W R0@>8, R2, R9 // ERROR "invalid .W suffix"
+ MOVBU.S R0@>24, R1 // ERROR "invalid .S suffix"
+ MOVHS.S R0@>16, R1 // ERROR "invalid .S suffix"
+ MOVB.S R1, 0xaaaa(R2) // ERROR "invalid .S suffix"
+ MOVHU.S R1, 0xaaaa(R2) // ERROR "invalid .S suffix"
+ MOVW.S R1, 0xaaaa(R2) // ERROR "invalid .S suffix"
+ MOVBU.S 0xaaaa(R2), R3 // ERROR "invalid .S suffix"
+ MOVH.S 0xaaaa(R2), R3 // ERROR "invalid .S suffix"
+ MOVW.S 0xaaaa(R2), R3 // ERROR "invalid .S suffix"
+ MOVW.S CPSR, R1 // ERROR "invalid .S suffix"
+ MOVW.S R3, CPSR // ERROR "invalid .S suffix"
+ MOVW.S $0, CPSR // ERROR "invalid .S suffix"
+ MOVM.S (R0), [R2-R4] // ERROR "invalid .S suffix"
+ MOVM.S [R1-R6], (R9) // ERROR "invalid .S suffix"
+ SWPW.S R1, (R2), R3 // ERROR "invalid .S suffix"
+ MOVF.S (R0), F1 // ERROR "invalid .S suffix"
+ MOVF.S F9, (R4) // ERROR "invalid .S suffix"
+ MOVF.S 0xfff0(R0), F1 // ERROR "invalid .S suffix"
+ MOVF.S F9, 0xfff0(R4) // ERROR "invalid .S suffix"
+ ADDF.S F1, F2, F3 // ERROR "invalid .S suffix"
+ SUBD.U F1, F2 // ERROR "invalid .U suffix"
+ NEGF.W F9, F10 // ERROR "invalid .W suffix"
+ ABSD.P F9, F10 // ERROR "invalid .P suffix"
+ MOVW.S FPSR, R0 // ERROR "invalid .S suffix"
+ MOVW.P g, FPSR // ERROR "invalid .P suffix"
+ MOVW.S R1->4(R6), R2 // ERROR "invalid .S suffix"
+ MOVB.S R9, R2<<8(R4) // ERROR "invalid .S suffix"
+ MOVHU.S R9, R2<<0(R4) // ERROR "invalid .S suffix"
+ STREX.S R0, (R1), R2 // ERROR "invalid .S suffix"
+ LDREX.S (R2), R8 // ERROR "invalid .S suffix"
+ MOVF.S $0.0, F3 // ERROR "invalid .S suffix"
+ CMPF.S F1, F2 // ERROR "invalid .S suffix"
+ MOVFW.S F0, F9 // ERROR "invalid .S suffix"
+ MOVWF.W F3, F1 // ERROR "invalid .W suffix"
+ MOVFW.P F0, R9 // ERROR "invalid .P suffix"
+ MOVWF.W R3, F1 // ERROR "invalid .W suffix"
+ MOVW.S F0, R9 // ERROR "invalid .S suffix"
+ MOVW.U R3, F1 // ERROR "invalid .U suffix"
+ PLD.S 4(R1) // ERROR "invalid .S suffix"
+ CLZ.S R1, R2 // ERROR "invalid .S suffix"
+ MULBB.S R0, R1, R2 // ERROR "invalid .S suffix"
+ MULA.W R9, R6, R1, g // ERROR "invalid .W suffix"
+ MULS.S R2, R3, R4, g // ERROR "invalid .S suffix"
+
+ STREX R1, (R0) // ERROR "illegal combination"
+ STREX (R1), R0 // ERROR "illegal combination"
+ STREX R1, (R0), R1 // ERROR "cannot use same register as both source and destination"
+ STREX R1, (R0), R0 // ERROR "cannot use same register as both source and destination"
+ STREXD R0, (R2), R0 // ERROR "cannot use same register as both source and destination"
+ STREXD R0, (R2), R1 // ERROR "cannot use same register as both source and destination"
+ STREXD R0, (R2), R2 // ERROR "cannot use same register as both source and destination"
+ STREXD R1, (R4), R7 // ERROR "must be even"
+
+ END
diff --git a/src/cmd/asm/internal/asm/testdata/armv6.s b/src/cmd/asm/internal/asm/testdata/armv6.s
new file mode 100644
index 0000000..361867f
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/armv6.s
@@ -0,0 +1,110 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT foo(SB), DUPOK|NOSPLIT, $0
+
+ ADDF F0, F1, F2 // 002a31ee
+ ADDD.EQ F3, F4, F5 // 035b340e
+ ADDF.NE F0, F2 // 002a321e
+ ADDD F3, F5 // 035b35ee
+ SUBF F0, F1, F2 // 402a31ee
+ SUBD.EQ F3, F4, F5 // 435b340e
+ SUBF.NE F0, F2 // 402a321e
+ SUBD F3, F5 // 435b35ee
+ MULF F0, F1, F2 // 002a21ee
+ MULD.EQ F3, F4, F5 // 035b240e
+ MULF.NE F0, F2 // 002a221e
+ MULD F3, F5 // 035b25ee
+ NMULF F0, F1, F2 // 402a21ee
+ NMULF F3, F7 // 437a27ee
+ NMULD F0, F1, F2 // 402b21ee
+ NMULD F3, F7 // 437b27ee
+ MULAF F5, F6, F7 // 057a06ee
+ MULAD F5, F6, F7 // 057b06ee
+ MULSF F5, F6, F7 // 457a06ee
+ MULSD F5, F6, F7 // 457b06ee
+ NMULAF F5, F6, F7 // 457a16ee
+ NMULAD F5, F6, F7 // 457b16ee
+ NMULSF F5, F6, F7 // 057a16ee
+ NMULSD F5, F6, F7 // 057b16ee
+ FMULAF F5, F6, F7 // 057aa6ee
+ FMULAD F5, F6, F7 // 057ba6ee
+ FMULSF F5, F6, F7 // 457aa6ee
+ FMULSD F5, F6, F7 // 457ba6ee
+ FNMULAF F5, F6, F7 // 457a96ee
+ FNMULAD F5, F6, F7 // 457b96ee
+ FNMULSF F5, F6, F7 // 057a96ee
+ FNMULSD F5, F6, F7 // 057b96ee
+ DIVF F0, F1, F2 // 002a81ee
+ DIVD.EQ F3, F4, F5 // 035b840e
+ DIVF.NE F0, F2 // 002a821e
+ DIVD F3, F5 // 035b85ee
+ NEGF F0, F1 // 401ab1ee
+ NEGD F4, F5 // 445bb1ee
+ ABSF F0, F1 // c01ab0ee
+ ABSD F4, F5 // c45bb0ee
+ SQRTF F0, F1 // c01ab1ee
+ SQRTD F4, F5 // c45bb1ee
+ MOVFD F0, F1 // c01ab7ee
+ MOVDF F4, F5 // c45bb7ee
+
+ LDREX (R8), R9 // 9f9f98e1
+ LDREXD (R11), R12 // 9fcfbbe1
+ STREX R3, (R4), R5 // STREX (R4), R3, R5 // 935f84e1
+ STREXD R8, (R9), g // STREXD (R9), R8, g // 98afa9e1
+
+ CMPF F8, F9 // c89ab4ee10faf1ee
+ CMPD.CS F4, F5 // c45bb42e10faf12e
+ CMPF.VS F7 // c07ab56e10faf16e
+ CMPD F6 // c06bb5ee10faf1ee
+
+ MOVW R4, F8 // 104b08ee
+ MOVW F4, R8 // 108b14ee
+
+ MOVF (R4), F9 // 009a94ed
+ MOVD.EQ (R4), F9 // 009b940d
+ MOVF.NE (g), F3 // 003a9a1d
+ MOVD (g), F3 // 003b9aed
+ MOVF 0x20(R3), F9 // MOVF 32(R3), F9 // 089a93ed
+ MOVD.EQ 0x20(R4), F9 // MOVD.EQ 32(R4), F9 // 089b940d
+ MOVF.NE -0x20(g), F3 // MOVF.NE -32(g), F3 // 083a1a1d
+ MOVD -0x20(g), F3 // MOVD -32(g), F3 // 083b1aed
+ MOVF F9, (R4) // 009a84ed
+ MOVD.EQ F9, (R4) // 009b840d
+ MOVF.NE F3, (g) // 003a8a1d
+ MOVD F3, (g) // 003b8aed
+ MOVF F9, 0x20(R3) // MOVF F9, 32(R3) // 089a83ed
+ MOVD.EQ F9, 0x20(R4) // MOVD.EQ F9, 32(R4) // 089b840d
+ MOVF.NE F3, -0x20(g) // MOVF.NE F3, -32(g) // 083a0a1d
+ MOVD F3, -0x20(g) // MOVD F3, -32(g) // 083b0aed
+ MOVF 0x00ffffff(R2), F1 // MOVF 16777215(R2), F1
+ MOVD 0x00ffffff(R2), F1 // MOVD 16777215(R2), F1
+ MOVF F2, 0x00ffffff(R2) // MOVF F2, 16777215(R2)
+ MOVD F2, 0x00ffffff(R2) // MOVD F2, 16777215(R2)
+ MOVF F0, math·Exp(SB) // MOVF F0, math.Exp(SB)
+ MOVF math·Exp(SB), F0 // MOVF math.Exp(SB), F0
+ MOVD F0, math·Exp(SB) // MOVD F0, math.Exp(SB)
+ MOVD math·Exp(SB), F0 // MOVD math.Exp(SB), F0
+ MOVF F4, F5 // 445ab0ee
+ MOVD F6, F7 // 467bb0ee
+ MOVFW F6, F8 // c68abdee
+ MOVFW F6, R8 // c6fabdee108b1fee
+ MOVFW.U F6, F8 // c68abcee
+ MOVFW.U F6, R8 // c6fabcee108b1fee
+ MOVDW F6, F8 // c68bbdee
+ MOVDW F6, R8 // c6fbbdee108b1fee
+ MOVDW.U F6, F8 // c68bbcee
+ MOVDW.U F6, R8 // c6fbbcee108b1fee
+ MOVWF F6, F8 // c68ab8ee
+ MOVWF R6, F8 // 106b0feecf8ab8ee
+ MOVWF.U F6, F8 // 468ab8ee
+ MOVWF.U R6, F8 // 106b0fee4f8ab8ee
+ MOVWD F6, F8 // c68bb8ee
+ MOVWD R6, F8 // 106b0feecf8bb8ee
+ MOVWD.U F6, F8 // 468bb8ee
+ MOVWD.U R6, F8 // 106b0fee4f8bb8ee
+
+ END
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/aes_avx512f.s b/src/cmd/asm/internal/asm/testdata/avx512enc/aes_avx512f.s
new file mode 100644
index 0000000..87fa5f7
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/aes_avx512f.s
@@ -0,0 +1,336 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_aes_avx512f(SB), NOSPLIT, $0
+ VAESDEC X24, X7, X11 // 62124508ded8 or 6212c508ded8
+ VAESDEC X20, X7, X11 // 62324508dedc or 6232c508dedc
+ VAESDEC X24, X0, X11 // 62127d08ded8 or 6212fd08ded8
+ VAESDEC X20, X0, X11 // 62327d08dedc or 6232fd08dedc
+ VAESDEC X24, X7, X31 // 62024508def8 or 6202c508def8
+ VAESDEC X20, X7, X31 // 62224508defc or 6222c508defc
+ VAESDEC X7, X7, X31 // 62624508deff or 6262c508deff
+ VAESDEC -7(DI)(R8*1), X7, X31 // 62224508debc07f9ffffff or 6222c508debc07f9ffffff
+ VAESDEC (SP), X7, X31 // 62624508de3c24 or 6262c508de3c24
+ VAESDEC X24, X0, X31 // 62027d08def8 or 6202fd08def8
+ VAESDEC X20, X0, X31 // 62227d08defc or 6222fd08defc
+ VAESDEC X7, X0, X31 // 62627d08deff or 6262fd08deff
+ VAESDEC -7(DI)(R8*1), X0, X31 // 62227d08debc07f9ffffff or 6222fd08debc07f9ffffff
+ VAESDEC (SP), X0, X31 // 62627d08de3c24 or 6262fd08de3c24
+ VAESDEC X24, X7, X3 // 62924508ded8 or 6292c508ded8
+ VAESDEC X20, X7, X3 // 62b24508dedc or 62b2c508dedc
+ VAESDEC X24, X0, X3 // 62927d08ded8 or 6292fd08ded8
+ VAESDEC X20, X0, X3 // 62b27d08dedc or 62b2fd08dedc
+ VAESDEC Y5, Y31, Y22 // 62e20520def5 or 62e28520def5
+ VAESDEC Y19, Y31, Y22 // 62a20520def3 or 62a28520def3
+ VAESDEC Y31, Y31, Y22 // 62820520def7 or 62828520def7
+ VAESDEC 99(R15)(R15*1), Y31, Y22 // 62820520deb43f63000000 or 62828520deb43f63000000
+ VAESDEC (DX), Y31, Y22 // 62e20520de32 or 62e28520de32
+ VAESDEC Y5, Y5, Y22 // 62e25528def5 or 62e2d528def5
+ VAESDEC Y19, Y5, Y22 // 62a25528def3 or 62a2d528def3
+ VAESDEC Y31, Y5, Y22 // 62825528def7 or 6282d528def7
+ VAESDEC 99(R15)(R15*1), Y5, Y22 // 62825528deb43f63000000 or 6282d528deb43f63000000
+ VAESDEC (DX), Y5, Y22 // 62e25528de32 or 62e2d528de32
+ VAESDEC Y5, Y0, Y22 // 62e27d28def5 or 62e2fd28def5
+ VAESDEC Y19, Y0, Y22 // 62a27d28def3 or 62a2fd28def3
+ VAESDEC Y31, Y0, Y22 // 62827d28def7 or 6282fd28def7
+ VAESDEC 99(R15)(R15*1), Y0, Y22 // 62827d28deb43f63000000 or 6282fd28deb43f63000000
+ VAESDEC (DX), Y0, Y22 // 62e27d28de32 or 62e2fd28de32
+ VAESDEC Y5, Y31, Y9 // 62720520decd or 62728520decd
+ VAESDEC Y19, Y31, Y9 // 62320520decb or 62328520decb
+ VAESDEC Y31, Y31, Y9 // 62120520decf or 62128520decf
+ VAESDEC 99(R15)(R15*1), Y31, Y9 // 62120520de8c3f63000000 or 62128520de8c3f63000000
+ VAESDEC (DX), Y31, Y9 // 62720520de0a or 62728520de0a
+ VAESDEC Y19, Y5, Y9 // 62325528decb or 6232d528decb
+ VAESDEC Y31, Y5, Y9 // 62125528decf or 6212d528decf
+ VAESDEC Y19, Y0, Y9 // 62327d28decb or 6232fd28decb
+ VAESDEC Y31, Y0, Y9 // 62127d28decf or 6212fd28decf
+ VAESDEC Y5, Y31, Y23 // 62e20520defd or 62e28520defd
+ VAESDEC Y19, Y31, Y23 // 62a20520defb or 62a28520defb
+ VAESDEC Y31, Y31, Y23 // 62820520deff or 62828520deff
+ VAESDEC 99(R15)(R15*1), Y31, Y23 // 62820520debc3f63000000 or 62828520debc3f63000000
+ VAESDEC (DX), Y31, Y23 // 62e20520de3a or 62e28520de3a
+ VAESDEC Y5, Y5, Y23 // 62e25528defd or 62e2d528defd
+ VAESDEC Y19, Y5, Y23 // 62a25528defb or 62a2d528defb
+ VAESDEC Y31, Y5, Y23 // 62825528deff or 6282d528deff
+ VAESDEC 99(R15)(R15*1), Y5, Y23 // 62825528debc3f63000000 or 6282d528debc3f63000000
+ VAESDEC (DX), Y5, Y23 // 62e25528de3a or 62e2d528de3a
+ VAESDEC Y5, Y0, Y23 // 62e27d28defd or 62e2fd28defd
+ VAESDEC Y19, Y0, Y23 // 62a27d28defb or 62a2fd28defb
+ VAESDEC Y31, Y0, Y23 // 62827d28deff or 6282fd28deff
+ VAESDEC 99(R15)(R15*1), Y0, Y23 // 62827d28debc3f63000000 or 6282fd28debc3f63000000
+ VAESDEC (DX), Y0, Y23 // 62e27d28de3a or 62e2fd28de3a
+ VAESDEC Z27, Z3, Z11 // 62126548dedb or 6212e548dedb
+ VAESDEC Z15, Z3, Z11 // 62526548dedf or 6252e548dedf
+ VAESDEC 99(R15)(R15*1), Z3, Z11 // 62126548de9c3f63000000 or 6212e548de9c3f63000000
+ VAESDEC (DX), Z3, Z11 // 62726548de1a or 6272e548de1a
+ VAESDEC Z27, Z12, Z11 // 62121d48dedb or 62129d48dedb
+ VAESDEC Z15, Z12, Z11 // 62521d48dedf or 62529d48dedf
+ VAESDEC 99(R15)(R15*1), Z12, Z11 // 62121d48de9c3f63000000 or 62129d48de9c3f63000000
+ VAESDEC (DX), Z12, Z11 // 62721d48de1a or 62729d48de1a
+ VAESDEC Z27, Z3, Z25 // 62026548decb or 6202e548decb
+ VAESDEC Z15, Z3, Z25 // 62426548decf or 6242e548decf
+ VAESDEC 99(R15)(R15*1), Z3, Z25 // 62026548de8c3f63000000 or 6202e548de8c3f63000000
+ VAESDEC (DX), Z3, Z25 // 62626548de0a or 6262e548de0a
+ VAESDEC Z27, Z12, Z25 // 62021d48decb or 62029d48decb
+ VAESDEC Z15, Z12, Z25 // 62421d48decf or 62429d48decf
+ VAESDEC 99(R15)(R15*1), Z12, Z25 // 62021d48de8c3f63000000 or 62029d48de8c3f63000000
+ VAESDEC (DX), Z12, Z25 // 62621d48de0a or 62629d48de0a
+ VAESDECLAST X21, X5, X9 // 62325508dfcd or 6232d508dfcd
+ VAESDECLAST X21, X31, X9 // 62320500dfcd or 62328500dfcd
+ VAESDECLAST X1, X31, X9 // 62720500dfc9 or 62728500dfc9
+ VAESDECLAST X11, X31, X9 // 62520500dfcb or 62528500dfcb
+ VAESDECLAST -7(CX), X31, X9 // 62720500df89f9ffffff or 62728500df89f9ffffff
+ VAESDECLAST 15(DX)(BX*4), X31, X9 // 62720500df8c9a0f000000 or 62728500df8c9a0f000000
+ VAESDECLAST X21, X3, X9 // 62326508dfcd or 6232e508dfcd
+ VAESDECLAST X21, X5, X7 // 62b25508dffd or 62b2d508dffd
+ VAESDECLAST X21, X31, X7 // 62b20500dffd or 62b28500dffd
+ VAESDECLAST X1, X31, X7 // 62f20500dff9 or 62f28500dff9
+ VAESDECLAST X11, X31, X7 // 62d20500dffb or 62d28500dffb
+ VAESDECLAST -7(CX), X31, X7 // 62f20500dfb9f9ffffff or 62f28500dfb9f9ffffff
+ VAESDECLAST 15(DX)(BX*4), X31, X7 // 62f20500dfbc9a0f000000 or 62f28500dfbc9a0f000000
+ VAESDECLAST X21, X3, X7 // 62b26508dffd or 62b2e508dffd
+ VAESDECLAST X21, X5, X14 // 62325508dff5 or 6232d508dff5
+ VAESDECLAST X21, X31, X14 // 62320500dff5 or 62328500dff5
+ VAESDECLAST X1, X31, X14 // 62720500dff1 or 62728500dff1
+ VAESDECLAST X11, X31, X14 // 62520500dff3 or 62528500dff3
+ VAESDECLAST -7(CX), X31, X14 // 62720500dfb1f9ffffff or 62728500dfb1f9ffffff
+ VAESDECLAST 15(DX)(BX*4), X31, X14 // 62720500dfb49a0f000000 or 62728500dfb49a0f000000
+ VAESDECLAST X21, X3, X14 // 62326508dff5 or 6232e508dff5
+ VAESDECLAST Y31, Y27, Y28 // 62022520dfe7 or 6202a520dfe7
+ VAESDECLAST Y3, Y27, Y28 // 62622520dfe3 or 6262a520dfe3
+ VAESDECLAST Y14, Y27, Y28 // 62422520dfe6 or 6242a520dfe6
+ VAESDECLAST -17(BP)(SI*8), Y27, Y28 // 62622520dfa4f5efffffff or 6262a520dfa4f5efffffff
+ VAESDECLAST (R15), Y27, Y28 // 62422520df27 or 6242a520df27
+ VAESDECLAST Y31, Y0, Y28 // 62027d28dfe7 or 6202fd28dfe7
+ VAESDECLAST Y3, Y0, Y28 // 62627d28dfe3 or 6262fd28dfe3
+ VAESDECLAST Y14, Y0, Y28 // 62427d28dfe6 or 6242fd28dfe6
+ VAESDECLAST -17(BP)(SI*8), Y0, Y28 // 62627d28dfa4f5efffffff or 6262fd28dfa4f5efffffff
+ VAESDECLAST (R15), Y0, Y28 // 62427d28df27 or 6242fd28df27
+ VAESDECLAST Y31, Y11, Y28 // 62022528dfe7 or 6202a528dfe7
+ VAESDECLAST Y3, Y11, Y28 // 62622528dfe3 or 6262a528dfe3
+ VAESDECLAST Y14, Y11, Y28 // 62422528dfe6 or 6242a528dfe6
+ VAESDECLAST -17(BP)(SI*8), Y11, Y28 // 62622528dfa4f5efffffff or 6262a528dfa4f5efffffff
+ VAESDECLAST (R15), Y11, Y28 // 62422528df27 or 6242a528df27
+ VAESDECLAST Y31, Y27, Y2 // 62922520dfd7 or 6292a520dfd7
+ VAESDECLAST Y3, Y27, Y2 // 62f22520dfd3 or 62f2a520dfd3
+ VAESDECLAST Y14, Y27, Y2 // 62d22520dfd6 or 62d2a520dfd6
+ VAESDECLAST -17(BP)(SI*8), Y27, Y2 // 62f22520df94f5efffffff or 62f2a520df94f5efffffff
+ VAESDECLAST (R15), Y27, Y2 // 62d22520df17 or 62d2a520df17
+ VAESDECLAST Y31, Y0, Y2 // 62927d28dfd7 or 6292fd28dfd7
+ VAESDECLAST Y31, Y11, Y2 // 62922528dfd7 or 6292a528dfd7
+ VAESDECLAST Y31, Y27, Y24 // 62022520dfc7 or 6202a520dfc7
+ VAESDECLAST Y3, Y27, Y24 // 62622520dfc3 or 6262a520dfc3
+ VAESDECLAST Y14, Y27, Y24 // 62422520dfc6 or 6242a520dfc6
+ VAESDECLAST -17(BP)(SI*8), Y27, Y24 // 62622520df84f5efffffff or 6262a520df84f5efffffff
+ VAESDECLAST (R15), Y27, Y24 // 62422520df07 or 6242a520df07
+ VAESDECLAST Y31, Y0, Y24 // 62027d28dfc7 or 6202fd28dfc7
+ VAESDECLAST Y3, Y0, Y24 // 62627d28dfc3 or 6262fd28dfc3
+ VAESDECLAST Y14, Y0, Y24 // 62427d28dfc6 or 6242fd28dfc6
+ VAESDECLAST -17(BP)(SI*8), Y0, Y24 // 62627d28df84f5efffffff or 6262fd28df84f5efffffff
+ VAESDECLAST (R15), Y0, Y24 // 62427d28df07 or 6242fd28df07
+ VAESDECLAST Y31, Y11, Y24 // 62022528dfc7 or 6202a528dfc7
+ VAESDECLAST Y3, Y11, Y24 // 62622528dfc3 or 6262a528dfc3
+ VAESDECLAST Y14, Y11, Y24 // 62422528dfc6 or 6242a528dfc6
+ VAESDECLAST -17(BP)(SI*8), Y11, Y24 // 62622528df84f5efffffff or 6262a528df84f5efffffff
+ VAESDECLAST (R15), Y11, Y24 // 62422528df07 or 6242a528df07
+ VAESDECLAST Z8, Z23, Z23 // 62c24540dff8 or 62c2c540dff8
+ VAESDECLAST Z28, Z23, Z23 // 62824540dffc or 6282c540dffc
+ VAESDECLAST -17(BP)(SI*8), Z23, Z23 // 62e24540dfbcf5efffffff or 62e2c540dfbcf5efffffff
+ VAESDECLAST (R15), Z23, Z23 // 62c24540df3f or 62c2c540df3f
+ VAESDECLAST Z8, Z6, Z23 // 62c24d48dff8 or 62c2cd48dff8
+ VAESDECLAST Z28, Z6, Z23 // 62824d48dffc or 6282cd48dffc
+ VAESDECLAST -17(BP)(SI*8), Z6, Z23 // 62e24d48dfbcf5efffffff or 62e2cd48dfbcf5efffffff
+ VAESDECLAST (R15), Z6, Z23 // 62c24d48df3f or 62c2cd48df3f
+ VAESDECLAST Z8, Z23, Z5 // 62d24540dfe8 or 62d2c540dfe8
+ VAESDECLAST Z28, Z23, Z5 // 62924540dfec or 6292c540dfec
+ VAESDECLAST -17(BP)(SI*8), Z23, Z5 // 62f24540dfacf5efffffff or 62f2c540dfacf5efffffff
+ VAESDECLAST (R15), Z23, Z5 // 62d24540df2f or 62d2c540df2f
+ VAESDECLAST Z8, Z6, Z5 // 62d24d48dfe8 or 62d2cd48dfe8
+ VAESDECLAST Z28, Z6, Z5 // 62924d48dfec or 6292cd48dfec
+ VAESDECLAST -17(BP)(SI*8), Z6, Z5 // 62f24d48dfacf5efffffff or 62f2cd48dfacf5efffffff
+ VAESDECLAST (R15), Z6, Z5 // 62d24d48df2f or 62d2cd48df2f
+ VAESENC X14, X16, X13 // 62527d00dcee or 6252fd00dcee
+ VAESENC X19, X16, X13 // 62327d00dceb or 6232fd00dceb
+ VAESENC X8, X16, X13 // 62527d00dce8 or 6252fd00dce8
+ VAESENC 99(R15)(R15*8), X16, X13 // 62127d00dcacff63000000 or 6212fd00dcacff63000000
+ VAESENC 7(AX)(CX*8), X16, X13 // 62727d00dcacc807000000 or 6272fd00dcacc807000000
+ VAESENC X19, X14, X13 // 62320d08dceb or 62328d08dceb
+ VAESENC X19, X11, X13 // 62322508dceb or 6232a508dceb
+ VAESENC X14, X16, X0 // 62d27d00dcc6 or 62d2fd00dcc6
+ VAESENC X19, X16, X0 // 62b27d00dcc3 or 62b2fd00dcc3
+ VAESENC X8, X16, X0 // 62d27d00dcc0 or 62d2fd00dcc0
+ VAESENC 99(R15)(R15*8), X16, X0 // 62927d00dc84ff63000000 or 6292fd00dc84ff63000000
+ VAESENC 7(AX)(CX*8), X16, X0 // 62f27d00dc84c807000000 or 62f2fd00dc84c807000000
+ VAESENC X19, X14, X0 // 62b20d08dcc3 or 62b28d08dcc3
+ VAESENC X19, X11, X0 // 62b22508dcc3 or 62b2a508dcc3
+ VAESENC X14, X16, X30 // 62427d00dcf6 or 6242fd00dcf6
+ VAESENC X19, X16, X30 // 62227d00dcf3 or 6222fd00dcf3
+ VAESENC X8, X16, X30 // 62427d00dcf0 or 6242fd00dcf0
+ VAESENC 99(R15)(R15*8), X16, X30 // 62027d00dcb4ff63000000 or 6202fd00dcb4ff63000000
+ VAESENC 7(AX)(CX*8), X16, X30 // 62627d00dcb4c807000000 or 6262fd00dcb4c807000000
+ VAESENC X14, X14, X30 // 62420d08dcf6 or 62428d08dcf6
+ VAESENC X19, X14, X30 // 62220d08dcf3 or 62228d08dcf3
+ VAESENC X8, X14, X30 // 62420d08dcf0 or 62428d08dcf0
+ VAESENC 99(R15)(R15*8), X14, X30 // 62020d08dcb4ff63000000 or 62028d08dcb4ff63000000
+ VAESENC 7(AX)(CX*8), X14, X30 // 62620d08dcb4c807000000 or 62628d08dcb4c807000000
+ VAESENC X14, X11, X30 // 62422508dcf6 or 6242a508dcf6
+ VAESENC X19, X11, X30 // 62222508dcf3 or 6222a508dcf3
+ VAESENC X8, X11, X30 // 62422508dcf0 or 6242a508dcf0
+ VAESENC 99(R15)(R15*8), X11, X30 // 62022508dcb4ff63000000 or 6202a508dcb4ff63000000
+ VAESENC 7(AX)(CX*8), X11, X30 // 62622508dcb4c807000000 or 6262a508dcb4c807000000
+ VAESENC Y18, Y15, Y2 // 62b20528dcd2 or 62b28528dcd2
+ VAESENC Y24, Y15, Y2 // 62920528dcd0 or 62928528dcd0
+ VAESENC Y18, Y22, Y2 // 62b24d20dcd2 or 62b2cd20dcd2
+ VAESENC Y24, Y22, Y2 // 62924d20dcd0 or 6292cd20dcd0
+ VAESENC Y9, Y22, Y2 // 62d24d20dcd1 or 62d2cd20dcd1
+ VAESENC 7(SI)(DI*8), Y22, Y2 // 62f24d20dc94fe07000000 or 62f2cd20dc94fe07000000
+ VAESENC -15(R14), Y22, Y2 // 62d24d20dc96f1ffffff or 62d2cd20dc96f1ffffff
+ VAESENC Y18, Y20, Y2 // 62b25d20dcd2 or 62b2dd20dcd2
+ VAESENC Y24, Y20, Y2 // 62925d20dcd0 or 6292dd20dcd0
+ VAESENC Y9, Y20, Y2 // 62d25d20dcd1 or 62d2dd20dcd1
+ VAESENC 7(SI)(DI*8), Y20, Y2 // 62f25d20dc94fe07000000 or 62f2dd20dc94fe07000000
+ VAESENC -15(R14), Y20, Y2 // 62d25d20dc96f1ffffff or 62d2dd20dc96f1ffffff
+ VAESENC Y18, Y15, Y13 // 62320528dcea or 62328528dcea
+ VAESENC Y24, Y15, Y13 // 62120528dce8 or 62128528dce8
+ VAESENC Y18, Y22, Y13 // 62324d20dcea or 6232cd20dcea
+ VAESENC Y24, Y22, Y13 // 62124d20dce8 or 6212cd20dce8
+ VAESENC Y9, Y22, Y13 // 62524d20dce9 or 6252cd20dce9
+ VAESENC 7(SI)(DI*8), Y22, Y13 // 62724d20dcacfe07000000 or 6272cd20dcacfe07000000
+ VAESENC -15(R14), Y22, Y13 // 62524d20dcaef1ffffff or 6252cd20dcaef1ffffff
+ VAESENC Y18, Y20, Y13 // 62325d20dcea or 6232dd20dcea
+ VAESENC Y24, Y20, Y13 // 62125d20dce8 or 6212dd20dce8
+ VAESENC Y9, Y20, Y13 // 62525d20dce9 or 6252dd20dce9
+ VAESENC 7(SI)(DI*8), Y20, Y13 // 62725d20dcacfe07000000 or 6272dd20dcacfe07000000
+ VAESENC -15(R14), Y20, Y13 // 62525d20dcaef1ffffff or 6252dd20dcaef1ffffff
+ VAESENC Y18, Y15, Y27 // 62220528dcda or 62228528dcda
+ VAESENC Y24, Y15, Y27 // 62020528dcd8 or 62028528dcd8
+ VAESENC Y9, Y15, Y27 // 62420528dcd9 or 62428528dcd9
+ VAESENC 7(SI)(DI*8), Y15, Y27 // 62620528dc9cfe07000000 or 62628528dc9cfe07000000
+ VAESENC -15(R14), Y15, Y27 // 62420528dc9ef1ffffff or 62428528dc9ef1ffffff
+ VAESENC Y18, Y22, Y27 // 62224d20dcda or 6222cd20dcda
+ VAESENC Y24, Y22, Y27 // 62024d20dcd8 or 6202cd20dcd8
+ VAESENC Y9, Y22, Y27 // 62424d20dcd9 or 6242cd20dcd9
+ VAESENC 7(SI)(DI*8), Y22, Y27 // 62624d20dc9cfe07000000 or 6262cd20dc9cfe07000000
+ VAESENC -15(R14), Y22, Y27 // 62424d20dc9ef1ffffff or 6242cd20dc9ef1ffffff
+ VAESENC Y18, Y20, Y27 // 62225d20dcda or 6222dd20dcda
+ VAESENC Y24, Y20, Y27 // 62025d20dcd8 or 6202dd20dcd8
+ VAESENC Y9, Y20, Y27 // 62425d20dcd9 or 6242dd20dcd9
+ VAESENC 7(SI)(DI*8), Y20, Y27 // 62625d20dc9cfe07000000 or 6262dd20dc9cfe07000000
+ VAESENC -15(R14), Y20, Y27 // 62425d20dc9ef1ffffff or 6242dd20dc9ef1ffffff
+ VAESENC Z12, Z16, Z21 // 62c27d40dcec or 62c2fd40dcec
+ VAESENC Z27, Z16, Z21 // 62827d40dceb or 6282fd40dceb
+ VAESENC 7(SI)(DI*8), Z16, Z21 // 62e27d40dcacfe07000000 or 62e2fd40dcacfe07000000
+ VAESENC -15(R14), Z16, Z21 // 62c27d40dcaef1ffffff or 62c2fd40dcaef1ffffff
+ VAESENC Z12, Z13, Z21 // 62c21548dcec or 62c29548dcec
+ VAESENC Z27, Z13, Z21 // 62821548dceb or 62829548dceb
+ VAESENC 7(SI)(DI*8), Z13, Z21 // 62e21548dcacfe07000000 or 62e29548dcacfe07000000
+ VAESENC -15(R14), Z13, Z21 // 62c21548dcaef1ffffff or 62c29548dcaef1ffffff
+ VAESENC Z12, Z16, Z5 // 62d27d40dcec or 62d2fd40dcec
+ VAESENC Z27, Z16, Z5 // 62927d40dceb or 6292fd40dceb
+ VAESENC 7(SI)(DI*8), Z16, Z5 // 62f27d40dcacfe07000000 or 62f2fd40dcacfe07000000
+ VAESENC -15(R14), Z16, Z5 // 62d27d40dcaef1ffffff or 62d2fd40dcaef1ffffff
+ VAESENC Z12, Z13, Z5 // 62d21548dcec or 62d29548dcec
+ VAESENC Z27, Z13, Z5 // 62921548dceb or 62929548dceb
+ VAESENC 7(SI)(DI*8), Z13, Z5 // 62f21548dcacfe07000000 or 62f29548dcacfe07000000
+ VAESENC -15(R14), Z13, Z5 // 62d21548dcaef1ffffff or 62d29548dcaef1ffffff
+ VAESENCLAST X23, X12, X8 // 62321d08ddc7 or 62329d08ddc7
+ VAESENCLAST X31, X12, X8 // 62121d08ddc7 or 62129d08ddc7
+ VAESENCLAST X23, X16, X8 // 62327d00ddc7 or 6232fd00ddc7
+ VAESENCLAST X11, X16, X8 // 62527d00ddc3 or 6252fd00ddc3
+ VAESENCLAST X31, X16, X8 // 62127d00ddc7 or 6212fd00ddc7
+ VAESENCLAST (AX), X16, X8 // 62727d00dd00 or 6272fd00dd00
+ VAESENCLAST 7(SI), X16, X8 // 62727d00dd8607000000 or 6272fd00dd8607000000
+ VAESENCLAST X23, X23, X8 // 62324500ddc7 or 6232c500ddc7
+ VAESENCLAST X11, X23, X8 // 62524500ddc3 or 6252c500ddc3
+ VAESENCLAST X31, X23, X8 // 62124500ddc7 or 6212c500ddc7
+ VAESENCLAST (AX), X23, X8 // 62724500dd00 or 6272c500dd00
+ VAESENCLAST 7(SI), X23, X8 // 62724500dd8607000000 or 6272c500dd8607000000
+ VAESENCLAST X23, X12, X26 // 62221d08ddd7 or 62229d08ddd7
+ VAESENCLAST X11, X12, X26 // 62421d08ddd3 or 62429d08ddd3
+ VAESENCLAST X31, X12, X26 // 62021d08ddd7 or 62029d08ddd7
+ VAESENCLAST (AX), X12, X26 // 62621d08dd10 or 62629d08dd10
+ VAESENCLAST 7(SI), X12, X26 // 62621d08dd9607000000 or 62629d08dd9607000000
+ VAESENCLAST X23, X16, X26 // 62227d00ddd7 or 6222fd00ddd7
+ VAESENCLAST X11, X16, X26 // 62427d00ddd3 or 6242fd00ddd3
+ VAESENCLAST X31, X16, X26 // 62027d00ddd7 or 6202fd00ddd7
+ VAESENCLAST (AX), X16, X26 // 62627d00dd10 or 6262fd00dd10
+ VAESENCLAST 7(SI), X16, X26 // 62627d00dd9607000000 or 6262fd00dd9607000000
+ VAESENCLAST X23, X23, X26 // 62224500ddd7 or 6222c500ddd7
+ VAESENCLAST X11, X23, X26 // 62424500ddd3 or 6242c500ddd3
+ VAESENCLAST X31, X23, X26 // 62024500ddd7 or 6202c500ddd7
+ VAESENCLAST (AX), X23, X26 // 62624500dd10 or 6262c500dd10
+ VAESENCLAST 7(SI), X23, X26 // 62624500dd9607000000 or 6262c500dd9607000000
+ VAESENCLAST X23, X12, X23 // 62a21d08ddff or 62a29d08ddff
+ VAESENCLAST X11, X12, X23 // 62c21d08ddfb or 62c29d08ddfb
+ VAESENCLAST X31, X12, X23 // 62821d08ddff or 62829d08ddff
+ VAESENCLAST (AX), X12, X23 // 62e21d08dd38 or 62e29d08dd38
+ VAESENCLAST 7(SI), X12, X23 // 62e21d08ddbe07000000 or 62e29d08ddbe07000000
+ VAESENCLAST X23, X16, X23 // 62a27d00ddff or 62a2fd00ddff
+ VAESENCLAST X11, X16, X23 // 62c27d00ddfb or 62c2fd00ddfb
+ VAESENCLAST X31, X16, X23 // 62827d00ddff or 6282fd00ddff
+ VAESENCLAST (AX), X16, X23 // 62e27d00dd38 or 62e2fd00dd38
+ VAESENCLAST 7(SI), X16, X23 // 62e27d00ddbe07000000 or 62e2fd00ddbe07000000
+ VAESENCLAST X23, X23, X23 // 62a24500ddff or 62a2c500ddff
+ VAESENCLAST X11, X23, X23 // 62c24500ddfb or 62c2c500ddfb
+ VAESENCLAST X31, X23, X23 // 62824500ddff or 6282c500ddff
+ VAESENCLAST (AX), X23, X23 // 62e24500dd38 or 62e2c500dd38
+ VAESENCLAST 7(SI), X23, X23 // 62e24500ddbe07000000 or 62e2c500ddbe07000000
+ VAESENCLAST Y5, Y19, Y3 // 62f26520dddd or 62f2e520dddd
+ VAESENCLAST Y16, Y19, Y3 // 62b26520ddd8 or 62b2e520ddd8
+ VAESENCLAST Y2, Y19, Y3 // 62f26520ddda or 62f2e520ddda
+ VAESENCLAST 7(SI)(DI*1), Y19, Y3 // 62f26520dd9c3e07000000 or 62f2e520dd9c3e07000000
+ VAESENCLAST 15(DX)(BX*8), Y19, Y3 // 62f26520dd9cda0f000000 or 62f2e520dd9cda0f000000
+ VAESENCLAST Y16, Y14, Y3 // 62b20d28ddd8 or 62b28d28ddd8
+ VAESENCLAST Y5, Y21, Y3 // 62f25520dddd or 62f2d520dddd
+ VAESENCLAST Y16, Y21, Y3 // 62b25520ddd8 or 62b2d520ddd8
+ VAESENCLAST Y2, Y21, Y3 // 62f25520ddda or 62f2d520ddda
+ VAESENCLAST 7(SI)(DI*1), Y21, Y3 // 62f25520dd9c3e07000000 or 62f2d520dd9c3e07000000
+ VAESENCLAST 15(DX)(BX*8), Y21, Y3 // 62f25520dd9cda0f000000 or 62f2d520dd9cda0f000000
+ VAESENCLAST Y5, Y19, Y19 // 62e26520dddd or 62e2e520dddd
+ VAESENCLAST Y16, Y19, Y19 // 62a26520ddd8 or 62a2e520ddd8
+ VAESENCLAST Y2, Y19, Y19 // 62e26520ddda or 62e2e520ddda
+ VAESENCLAST 7(SI)(DI*1), Y19, Y19 // 62e26520dd9c3e07000000 or 62e2e520dd9c3e07000000
+ VAESENCLAST 15(DX)(BX*8), Y19, Y19 // 62e26520dd9cda0f000000 or 62e2e520dd9cda0f000000
+ VAESENCLAST Y5, Y14, Y19 // 62e20d28dddd or 62e28d28dddd
+ VAESENCLAST Y16, Y14, Y19 // 62a20d28ddd8 or 62a28d28ddd8
+ VAESENCLAST Y2, Y14, Y19 // 62e20d28ddda or 62e28d28ddda
+ VAESENCLAST 7(SI)(DI*1), Y14, Y19 // 62e20d28dd9c3e07000000 or 62e28d28dd9c3e07000000
+ VAESENCLAST 15(DX)(BX*8), Y14, Y19 // 62e20d28dd9cda0f000000 or 62e28d28dd9cda0f000000
+ VAESENCLAST Y5, Y21, Y19 // 62e25520dddd or 62e2d520dddd
+ VAESENCLAST Y16, Y21, Y19 // 62a25520ddd8 or 62a2d520ddd8
+ VAESENCLAST Y2, Y21, Y19 // 62e25520ddda or 62e2d520ddda
+ VAESENCLAST 7(SI)(DI*1), Y21, Y19 // 62e25520dd9c3e07000000 or 62e2d520dd9c3e07000000
+ VAESENCLAST 15(DX)(BX*8), Y21, Y19 // 62e25520dd9cda0f000000 or 62e2d520dd9cda0f000000
+ VAESENCLAST Y5, Y19, Y23 // 62e26520ddfd or 62e2e520ddfd
+ VAESENCLAST Y16, Y19, Y23 // 62a26520ddf8 or 62a2e520ddf8
+ VAESENCLAST Y2, Y19, Y23 // 62e26520ddfa or 62e2e520ddfa
+ VAESENCLAST 7(SI)(DI*1), Y19, Y23 // 62e26520ddbc3e07000000 or 62e2e520ddbc3e07000000
+ VAESENCLAST 15(DX)(BX*8), Y19, Y23 // 62e26520ddbcda0f000000 or 62e2e520ddbcda0f000000
+ VAESENCLAST Y5, Y14, Y23 // 62e20d28ddfd or 62e28d28ddfd
+ VAESENCLAST Y16, Y14, Y23 // 62a20d28ddf8 or 62a28d28ddf8
+ VAESENCLAST Y2, Y14, Y23 // 62e20d28ddfa or 62e28d28ddfa
+ VAESENCLAST 7(SI)(DI*1), Y14, Y23 // 62e20d28ddbc3e07000000 or 62e28d28ddbc3e07000000
+ VAESENCLAST 15(DX)(BX*8), Y14, Y23 // 62e20d28ddbcda0f000000 or 62e28d28ddbcda0f000000
+ VAESENCLAST Y5, Y21, Y23 // 62e25520ddfd or 62e2d520ddfd
+ VAESENCLAST Y16, Y21, Y23 // 62a25520ddf8 or 62a2d520ddf8
+ VAESENCLAST Y2, Y21, Y23 // 62e25520ddfa or 62e2d520ddfa
+ VAESENCLAST 7(SI)(DI*1), Y21, Y23 // 62e25520ddbc3e07000000 or 62e2d520ddbc3e07000000
+ VAESENCLAST 15(DX)(BX*8), Y21, Y23 // 62e25520ddbcda0f000000 or 62e2d520ddbcda0f000000
+ VAESENCLAST Z25, Z6, Z22 // 62824d48ddf1 or 6282cd48ddf1
+ VAESENCLAST Z12, Z6, Z22 // 62c24d48ddf4 or 62c2cd48ddf4
+ VAESENCLAST 7(SI)(DI*1), Z6, Z22 // 62e24d48ddb43e07000000 or 62e2cd48ddb43e07000000
+ VAESENCLAST 15(DX)(BX*8), Z6, Z22 // 62e24d48ddb4da0f000000 or 62e2cd48ddb4da0f000000
+ VAESENCLAST Z25, Z8, Z22 // 62823d48ddf1 or 6282bd48ddf1
+ VAESENCLAST Z12, Z8, Z22 // 62c23d48ddf4 or 62c2bd48ddf4
+ VAESENCLAST 7(SI)(DI*1), Z8, Z22 // 62e23d48ddb43e07000000 or 62e2bd48ddb43e07000000
+ VAESENCLAST 15(DX)(BX*8), Z8, Z22 // 62e23d48ddb4da0f000000 or 62e2bd48ddb4da0f000000
+ VAESENCLAST Z25, Z6, Z11 // 62124d48ddd9 or 6212cd48ddd9
+ VAESENCLAST Z12, Z6, Z11 // 62524d48dddc or 6252cd48dddc
+ VAESENCLAST 7(SI)(DI*1), Z6, Z11 // 62724d48dd9c3e07000000 or 6272cd48dd9c3e07000000
+ VAESENCLAST 15(DX)(BX*8), Z6, Z11 // 62724d48dd9cda0f000000 or 6272cd48dd9cda0f000000
+ VAESENCLAST Z25, Z8, Z11 // 62123d48ddd9 or 6212bd48ddd9
+ VAESENCLAST Z12, Z8, Z11 // 62523d48dddc or 6252bd48dddc
+ VAESENCLAST 7(SI)(DI*1), Z8, Z11 // 62723d48dd9c3e07000000 or 6272bd48dd9c3e07000000
+ VAESENCLAST 15(DX)(BX*8), Z8, Z11 // 62723d48dd9cda0f000000 or 6272bd48dd9cda0f000000
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_4fmaps.s b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_4fmaps.s
new file mode 100644
index 0000000..e30f41d
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_4fmaps.s
@@ -0,0 +1,66 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_avx512_4fmaps(SB), NOSPLIT, $0
+ V4FMADDPS 17(SP), [Z0-Z3], K2, Z0 // 62f27f4a9a842411000000
+ V4FMADDPS -17(BP)(SI*4), [Z0-Z3], K2, Z0 // 62f27f4a9a84b5efffffff
+ V4FMADDPS 17(SP), [Z10-Z13], K2, Z0 // 62f22f4a9a842411000000
+ V4FMADDPS -17(BP)(SI*4), [Z10-Z13], K2, Z0 // 62f22f4a9a84b5efffffff
+ V4FMADDPS 17(SP), [Z20-Z23], K2, Z0 // 62f25f429a842411000000
+ V4FMADDPS -17(BP)(SI*4), [Z20-Z23], K2, Z0 // 62f25f429a84b5efffffff
+ V4FMADDPS 17(SP), [Z0-Z3], K2, Z8 // 62727f4a9a842411000000
+ V4FMADDPS -17(BP)(SI*4), [Z0-Z3], K2, Z8 // 62727f4a9a84b5efffffff
+ V4FMADDPS 17(SP), [Z10-Z13], K2, Z8 // 62722f4a9a842411000000
+ V4FMADDPS -17(BP)(SI*4), [Z10-Z13], K2, Z8 // 62722f4a9a84b5efffffff
+ V4FMADDPS 17(SP), [Z20-Z23], K2, Z8 // 62725f429a842411000000
+ V4FMADDPS -17(BP)(SI*4), [Z20-Z23], K2, Z8 // 62725f429a84b5efffffff
+ V4FMADDSS 7(AX), [X0-X3], K5, X22 // 62e27f0d9bb007000000 or 62e27f2d9bb007000000 or 62e27f4d9bb007000000
+ V4FMADDSS (DI), [X0-X3], K5, X22 // 62e27f0d9b37 or 62e27f2d9b37 or 62e27f4d9b37
+ V4FMADDSS 7(AX), [X10-X13], K5, X22 // 62e22f0d9bb007000000 or 62e22f2d9bb007000000 or 62e22f4d9bb007000000
+ V4FMADDSS (DI), [X10-X13], K5, X22 // 62e22f0d9b37 or 62e22f2d9b37 or 62e22f4d9b37
+ V4FMADDSS 7(AX), [X20-X23], K5, X22 // 62e25f059bb007000000 or 62e25f259bb007000000 or 62e25f459bb007000000
+ V4FMADDSS (DI), [X20-X23], K5, X22 // 62e25f059b37 or 62e25f259b37 or 62e25f459b37
+ V4FMADDSS 7(AX), [X0-X3], K5, X30 // 62627f0d9bb007000000 or 62627f2d9bb007000000 or 62627f4d9bb007000000
+ V4FMADDSS (DI), [X0-X3], K5, X30 // 62627f0d9b37 or 62627f2d9b37 or 62627f4d9b37
+ V4FMADDSS 7(AX), [X10-X13], K5, X30 // 62622f0d9bb007000000 or 62622f2d9bb007000000 or 62622f4d9bb007000000
+ V4FMADDSS (DI), [X10-X13], K5, X30 // 62622f0d9b37 or 62622f2d9b37 or 62622f4d9b37
+ V4FMADDSS 7(AX), [X20-X23], K5, X30 // 62625f059bb007000000 or 62625f259bb007000000 or 62625f459bb007000000
+ V4FMADDSS (DI), [X20-X23], K5, X30 // 62625f059b37 or 62625f259b37 or 62625f459b37
+ V4FMADDSS 7(AX), [X0-X3], K5, X3 // 62f27f0d9b9807000000 or 62f27f2d9b9807000000 or 62f27f4d9b9807000000
+ V4FMADDSS (DI), [X0-X3], K5, X3 // 62f27f0d9b1f or 62f27f2d9b1f or 62f27f4d9b1f
+ V4FMADDSS 7(AX), [X10-X13], K5, X3 // 62f22f0d9b9807000000 or 62f22f2d9b9807000000 or 62f22f4d9b9807000000
+ V4FMADDSS (DI), [X10-X13], K5, X3 // 62f22f0d9b1f or 62f22f2d9b1f or 62f22f4d9b1f
+ V4FMADDSS 7(AX), [X20-X23], K5, X3 // 62f25f059b9807000000 or 62f25f259b9807000000 or 62f25f459b9807000000
+ V4FMADDSS (DI), [X20-X23], K5, X3 // 62f25f059b1f or 62f25f259b1f or 62f25f459b1f
+ V4FNMADDPS 99(R15)(R15*1), [Z1-Z4], K3, Z15 // 6212774baabc3f63000000
+ V4FNMADDPS (DX), [Z1-Z4], K3, Z15 // 6272774baa3a
+ V4FNMADDPS 99(R15)(R15*1), [Z11-Z14], K3, Z15 // 6212274baabc3f63000000
+ V4FNMADDPS (DX), [Z11-Z14], K3, Z15 // 6272274baa3a
+ V4FNMADDPS 99(R15)(R15*1), [Z21-Z24], K3, Z15 // 62125743aabc3f63000000
+ V4FNMADDPS (DX), [Z21-Z24], K3, Z15 // 62725743aa3a
+ V4FNMADDPS 99(R15)(R15*1), [Z1-Z4], K3, Z12 // 6212774baaa43f63000000
+ V4FNMADDPS (DX), [Z1-Z4], K3, Z12 // 6272774baa22
+ V4FNMADDPS 99(R15)(R15*1), [Z11-Z14], K3, Z12 // 6212274baaa43f63000000
+ V4FNMADDPS (DX), [Z11-Z14], K3, Z12 // 6272274baa22
+ V4FNMADDPS 99(R15)(R15*1), [Z21-Z24], K3, Z12 // 62125743aaa43f63000000
+ V4FNMADDPS (DX), [Z21-Z24], K3, Z12 // 62725743aa22
+ V4FNMADDSS -17(BP)(SI*8), [X1-X4], K4, X11 // 6272770cab9cf5efffffff or 6272772cab9cf5efffffff or 6272774cab9cf5efffffff
+ V4FNMADDSS (R15), [X1-X4], K4, X11 // 6252770cab1f or 6252772cab1f or 6252774cab1f
+ V4FNMADDSS -17(BP)(SI*8), [X11-X14], K4, X11 // 6272270cab9cf5efffffff or 6272272cab9cf5efffffff or 6272274cab9cf5efffffff
+ V4FNMADDSS (R15), [X11-X14], K4, X11 // 6252270cab1f or 6252272cab1f or 6252274cab1f
+ V4FNMADDSS -17(BP)(SI*8), [X21-X24], K4, X11 // 62725704ab9cf5efffffff or 62725724ab9cf5efffffff or 62725744ab9cf5efffffff
+ V4FNMADDSS (R15), [X21-X24], K4, X11 // 62525704ab1f or 62525724ab1f or 62525744ab1f
+ V4FNMADDSS -17(BP)(SI*8), [X1-X4], K4, X15 // 6272770cabbcf5efffffff or 6272772cabbcf5efffffff or 6272774cabbcf5efffffff
+ V4FNMADDSS (R15), [X1-X4], K4, X15 // 6252770cab3f or 6252772cab3f or 6252774cab3f
+ V4FNMADDSS -17(BP)(SI*8), [X11-X14], K4, X15 // 6272270cabbcf5efffffff or 6272272cabbcf5efffffff or 6272274cabbcf5efffffff
+ V4FNMADDSS (R15), [X11-X14], K4, X15 // 6252270cab3f or 6252272cab3f or 6252274cab3f
+ V4FNMADDSS -17(BP)(SI*8), [X21-X24], K4, X15 // 62725704abbcf5efffffff or 62725724abbcf5efffffff or 62725744abbcf5efffffff
+ V4FNMADDSS (R15), [X21-X24], K4, X15 // 62525704ab3f or 62525724ab3f or 62525744ab3f
+ V4FNMADDSS -17(BP)(SI*8), [X1-X4], K4, X30 // 6262770cabb4f5efffffff or 6262772cabb4f5efffffff or 6262774cabb4f5efffffff
+ V4FNMADDSS (R15), [X1-X4], K4, X30 // 6242770cab37 or 6242772cab37 or 6242774cab37
+ V4FNMADDSS -17(BP)(SI*8), [X11-X14], K4, X30 // 6262270cabb4f5efffffff or 6262272cabb4f5efffffff or 6262274cabb4f5efffffff
+ V4FNMADDSS (R15), [X11-X14], K4, X30 // 6242270cab37 or 6242272cab37 or 6242274cab37
+ V4FNMADDSS -17(BP)(SI*8), [X21-X24], K4, X30 // 62625704abb4f5efffffff or 62625724abb4f5efffffff or 62625744abb4f5efffffff
+ V4FNMADDSS (R15), [X21-X24], K4, X30 // 62425704ab37 or 62425724ab37 or 62425744ab37
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_4vnniw.s b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_4vnniw.s
new file mode 100644
index 0000000..5a80ed0
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_4vnniw.s
@@ -0,0 +1,30 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_avx512_4vnniw(SB), NOSPLIT, $0
+ VP4DPWSSD 7(SI)(DI*1), [Z2-Z5], K4, Z17 // 62e26f4c528c3e07000000
+ VP4DPWSSD 15(DX)(BX*8), [Z2-Z5], K4, Z17 // 62e26f4c528cda0f000000
+ VP4DPWSSD 7(SI)(DI*1), [Z12-Z15], K4, Z17 // 62e21f4c528c3e07000000
+ VP4DPWSSD 15(DX)(BX*8), [Z12-Z15], K4, Z17 // 62e21f4c528cda0f000000
+ VP4DPWSSD 7(SI)(DI*1), [Z22-Z25], K4, Z17 // 62e24f44528c3e07000000
+ VP4DPWSSD 15(DX)(BX*8), [Z22-Z25], K4, Z17 // 62e24f44528cda0f000000
+ VP4DPWSSD 7(SI)(DI*1), [Z2-Z5], K4, Z23 // 62e26f4c52bc3e07000000
+ VP4DPWSSD 15(DX)(BX*8), [Z2-Z5], K4, Z23 // 62e26f4c52bcda0f000000
+ VP4DPWSSD 7(SI)(DI*1), [Z12-Z15], K4, Z23 // 62e21f4c52bc3e07000000
+ VP4DPWSSD 15(DX)(BX*8), [Z12-Z15], K4, Z23 // 62e21f4c52bcda0f000000
+ VP4DPWSSD 7(SI)(DI*1), [Z22-Z25], K4, Z23 // 62e24f4452bc3e07000000
+ VP4DPWSSD 15(DX)(BX*8), [Z22-Z25], K4, Z23 // 62e24f4452bcda0f000000
+ VP4DPWSSDS -7(DI)(R8*1), [Z4-Z7], K1, Z31 // 62225f4953bc07f9ffffff
+ VP4DPWSSDS (SP), [Z4-Z7], K1, Z31 // 62625f49533c24
+ VP4DPWSSDS -7(DI)(R8*1), [Z14-Z17], K1, Z31 // 62220f4953bc07f9ffffff
+ VP4DPWSSDS (SP), [Z14-Z17], K1, Z31 // 62620f49533c24
+ VP4DPWSSDS -7(DI)(R8*1), [Z24-Z27], K1, Z31 // 62223f4153bc07f9ffffff
+ VP4DPWSSDS (SP), [Z24-Z27], K1, Z31 // 62623f41533c24
+ VP4DPWSSDS -7(DI)(R8*1), [Z4-Z7], K1, Z0 // 62b25f49538407f9ffffff
+ VP4DPWSSDS (SP), [Z4-Z7], K1, Z0 // 62f25f49530424
+ VP4DPWSSDS -7(DI)(R8*1), [Z14-Z17], K1, Z0 // 62b20f49538407f9ffffff
+ VP4DPWSSDS (SP), [Z14-Z17], K1, Z0 // 62f20f49530424
+ VP4DPWSSDS -7(DI)(R8*1), [Z24-Z27], K1, Z0 // 62b23f41538407f9ffffff
+ VP4DPWSSDS (SP), [Z24-Z27], K1, Z0 // 62f23f41530424
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_bitalg.s b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_bitalg.s
new file mode 100644
index 0000000..fc9dd0c
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_bitalg.s
@@ -0,0 +1,154 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_avx512_bitalg(SB), NOSPLIT, $0
+ VPOPCNTB X14, K4, X16 // 62c27d0c54c6
+ VPOPCNTB X19, K4, X16 // 62a27d0c54c3
+ VPOPCNTB X8, K4, X16 // 62c27d0c54c0
+ VPOPCNTB 15(R8)(R14*1), K4, X16 // 62827d0c5484300f000000
+ VPOPCNTB 15(R8)(R14*2), K4, X16 // 62827d0c5484700f000000
+ VPOPCNTB X14, K4, X14 // 62527d0c54f6
+ VPOPCNTB X19, K4, X14 // 62327d0c54f3
+ VPOPCNTB X8, K4, X14 // 62527d0c54f0
+ VPOPCNTB 15(R8)(R14*1), K4, X14 // 62127d0c54b4300f000000
+ VPOPCNTB 15(R8)(R14*2), K4, X14 // 62127d0c54b4700f000000
+ VPOPCNTB X14, K4, X11 // 62527d0c54de
+ VPOPCNTB X19, K4, X11 // 62327d0c54db
+ VPOPCNTB X8, K4, X11 // 62527d0c54d8
+ VPOPCNTB 15(R8)(R14*1), K4, X11 // 62127d0c549c300f000000
+ VPOPCNTB 15(R8)(R14*2), K4, X11 // 62127d0c549c700f000000
+ VPOPCNTB Y14, K4, Y24 // 62427d2c54c6
+ VPOPCNTB Y21, K4, Y24 // 62227d2c54c5
+ VPOPCNTB Y1, K4, Y24 // 62627d2c54c1
+ VPOPCNTB 15(R8)(R14*8), K4, Y24 // 62027d2c5484f00f000000
+ VPOPCNTB -15(R14)(R15*2), K4, Y24 // 62027d2c54847ef1ffffff
+ VPOPCNTB Y14, K4, Y13 // 62527d2c54ee
+ VPOPCNTB Y21, K4, Y13 // 62327d2c54ed
+ VPOPCNTB Y1, K4, Y13 // 62727d2c54e9
+ VPOPCNTB 15(R8)(R14*8), K4, Y13 // 62127d2c54acf00f000000
+ VPOPCNTB -15(R14)(R15*2), K4, Y13 // 62127d2c54ac7ef1ffffff
+ VPOPCNTB Y14, K4, Y20 // 62c27d2c54e6
+ VPOPCNTB Y21, K4, Y20 // 62a27d2c54e5
+ VPOPCNTB Y1, K4, Y20 // 62e27d2c54e1
+ VPOPCNTB 15(R8)(R14*8), K4, Y20 // 62827d2c54a4f00f000000
+ VPOPCNTB -15(R14)(R15*2), K4, Y20 // 62827d2c54a47ef1ffffff
+ VPOPCNTB Z18, K7, Z13 // 62327d4f54ea
+ VPOPCNTB Z8, K7, Z13 // 62527d4f54e8
+ VPOPCNTB 17(SP)(BP*8), K7, Z13 // 62727d4f54acec11000000
+ VPOPCNTB 17(SP)(BP*4), K7, Z13 // 62727d4f54acac11000000
+ VPOPCNTW X20, K3, X11 // 6232fd0b54dc
+ VPOPCNTW X5, K3, X11 // 6272fd0b54dd
+ VPOPCNTW X25, K3, X11 // 6212fd0b54d9
+ VPOPCNTW (CX), K3, X11 // 6272fd0b5419
+ VPOPCNTW 99(R15), K3, X11 // 6252fd0b549f63000000
+ VPOPCNTW X20, K3, X23 // 62a2fd0b54fc
+ VPOPCNTW X5, K3, X23 // 62e2fd0b54fd
+ VPOPCNTW X25, K3, X23 // 6282fd0b54f9
+ VPOPCNTW (CX), K3, X23 // 62e2fd0b5439
+ VPOPCNTW 99(R15), K3, X23 // 62c2fd0b54bf63000000
+ VPOPCNTW X20, K3, X2 // 62b2fd0b54d4
+ VPOPCNTW X5, K3, X2 // 62f2fd0b54d5
+ VPOPCNTW X25, K3, X2 // 6292fd0b54d1
+ VPOPCNTW (CX), K3, X2 // 62f2fd0b5411
+ VPOPCNTW 99(R15), K3, X2 // 62d2fd0b549763000000
+ VPOPCNTW Y13, K3, Y21 // 62c2fd2b54ed
+ VPOPCNTW Y18, K3, Y21 // 62a2fd2b54ea
+ VPOPCNTW Y24, K3, Y21 // 6282fd2b54e8
+ VPOPCNTW (SI), K3, Y21 // 62e2fd2b542e
+ VPOPCNTW 7(SI)(DI*2), K3, Y21 // 62e2fd2b54ac7e07000000
+ VPOPCNTW Y13, K3, Y7 // 62d2fd2b54fd
+ VPOPCNTW Y18, K3, Y7 // 62b2fd2b54fa
+ VPOPCNTW Y24, K3, Y7 // 6292fd2b54f8
+ VPOPCNTW (SI), K3, Y7 // 62f2fd2b543e
+ VPOPCNTW 7(SI)(DI*2), K3, Y7 // 62f2fd2b54bc7e07000000
+ VPOPCNTW Y13, K3, Y30 // 6242fd2b54f5
+ VPOPCNTW Y18, K3, Y30 // 6222fd2b54f2
+ VPOPCNTW Y24, K3, Y30 // 6202fd2b54f0
+ VPOPCNTW (SI), K3, Y30 // 6262fd2b5436
+ VPOPCNTW 7(SI)(DI*2), K3, Y30 // 6262fd2b54b47e07000000
+ VPOPCNTW Z28, K3, Z12 // 6212fd4b54e4
+ VPOPCNTW Z13, K3, Z12 // 6252fd4b54e5
+ VPOPCNTW 7(AX), K3, Z12 // 6272fd4b54a007000000
+ VPOPCNTW (DI), K3, Z12 // 6272fd4b5427
+ VPOPCNTW Z28, K3, Z16 // 6282fd4b54c4
+ VPOPCNTW Z13, K3, Z16 // 62c2fd4b54c5
+ VPOPCNTW 7(AX), K3, Z16 // 62e2fd4b548007000000
+ VPOPCNTW (DI), K3, Z16 // 62e2fd4b5407
+ VPSHUFBITQMB X24, X7, K6, K0 // 6292450e8fc0
+ VPSHUFBITQMB X7, X7, K6, K0 // 62f2450e8fc7
+ VPSHUFBITQMB X0, X7, K6, K0 // 62f2450e8fc0
+ VPSHUFBITQMB (R8), X7, K6, K0 // 62d2450e8f00
+ VPSHUFBITQMB 15(DX)(BX*2), X7, K6, K0 // 62f2450e8f845a0f000000
+ VPSHUFBITQMB X24, X13, K6, K0 // 6292150e8fc0
+ VPSHUFBITQMB X7, X13, K6, K0 // 62f2150e8fc7
+ VPSHUFBITQMB X0, X13, K6, K0 // 62f2150e8fc0
+ VPSHUFBITQMB (R8), X13, K6, K0 // 62d2150e8f00
+ VPSHUFBITQMB 15(DX)(BX*2), X13, K6, K0 // 62f2150e8f845a0f000000
+ VPSHUFBITQMB X24, X8, K6, K0 // 62923d0e8fc0
+ VPSHUFBITQMB X7, X8, K6, K0 // 62f23d0e8fc7
+ VPSHUFBITQMB X0, X8, K6, K0 // 62f23d0e8fc0
+ VPSHUFBITQMB (R8), X8, K6, K0 // 62d23d0e8f00
+ VPSHUFBITQMB 15(DX)(BX*2), X8, K6, K0 // 62f23d0e8f845a0f000000
+ VPSHUFBITQMB X24, X7, K6, K5 // 6292450e8fe8
+ VPSHUFBITQMB X7, X7, K6, K5 // 62f2450e8fef
+ VPSHUFBITQMB X0, X7, K6, K5 // 62f2450e8fe8
+ VPSHUFBITQMB (R8), X7, K6, K5 // 62d2450e8f28
+ VPSHUFBITQMB 15(DX)(BX*2), X7, K6, K5 // 62f2450e8fac5a0f000000
+ VPSHUFBITQMB X24, X13, K6, K5 // 6292150e8fe8
+ VPSHUFBITQMB X7, X13, K6, K5 // 62f2150e8fef
+ VPSHUFBITQMB X0, X13, K6, K5 // 62f2150e8fe8
+ VPSHUFBITQMB (R8), X13, K6, K5 // 62d2150e8f28
+ VPSHUFBITQMB 15(DX)(BX*2), X13, K6, K5 // 62f2150e8fac5a0f000000
+ VPSHUFBITQMB X24, X8, K6, K5 // 62923d0e8fe8
+ VPSHUFBITQMB X7, X8, K6, K5 // 62f23d0e8fef
+ VPSHUFBITQMB X0, X8, K6, K5 // 62f23d0e8fe8
+ VPSHUFBITQMB (R8), X8, K6, K5 // 62d23d0e8f28
+ VPSHUFBITQMB 15(DX)(BX*2), X8, K6, K5 // 62f23d0e8fac5a0f000000
+ VPSHUFBITQMB Y14, Y2, K3, K6 // 62d26d2b8ff6
+ VPSHUFBITQMB Y8, Y2, K3, K6 // 62d26d2b8ff0
+ VPSHUFBITQMB Y20, Y2, K3, K6 // 62b26d2b8ff4
+ VPSHUFBITQMB -17(BP), Y2, K3, K6 // 62f26d2b8fb5efffffff
+ VPSHUFBITQMB -15(R14)(R15*8), Y2, K3, K6 // 62926d2b8fb4fef1ffffff
+ VPSHUFBITQMB Y14, Y7, K3, K6 // 62d2452b8ff6
+ VPSHUFBITQMB Y8, Y7, K3, K6 // 62d2452b8ff0
+ VPSHUFBITQMB Y20, Y7, K3, K6 // 62b2452b8ff4
+ VPSHUFBITQMB -17(BP), Y7, K3, K6 // 62f2452b8fb5efffffff
+ VPSHUFBITQMB -15(R14)(R15*8), Y7, K3, K6 // 6292452b8fb4fef1ffffff
+ VPSHUFBITQMB Y14, Y21, K3, K6 // 62d255238ff6
+ VPSHUFBITQMB Y8, Y21, K3, K6 // 62d255238ff0
+ VPSHUFBITQMB Y20, Y21, K3, K6 // 62b255238ff4
+ VPSHUFBITQMB -17(BP), Y21, K3, K6 // 62f255238fb5efffffff
+ VPSHUFBITQMB -15(R14)(R15*8), Y21, K3, K6 // 629255238fb4fef1ffffff
+ VPSHUFBITQMB Y14, Y2, K3, K5 // 62d26d2b8fee
+ VPSHUFBITQMB Y8, Y2, K3, K5 // 62d26d2b8fe8
+ VPSHUFBITQMB Y20, Y2, K3, K5 // 62b26d2b8fec
+ VPSHUFBITQMB -17(BP), Y2, K3, K5 // 62f26d2b8fadefffffff
+ VPSHUFBITQMB -15(R14)(R15*8), Y2, K3, K5 // 62926d2b8facfef1ffffff
+ VPSHUFBITQMB Y14, Y7, K3, K5 // 62d2452b8fee
+ VPSHUFBITQMB Y8, Y7, K3, K5 // 62d2452b8fe8
+ VPSHUFBITQMB Y20, Y7, K3, K5 // 62b2452b8fec
+ VPSHUFBITQMB -17(BP), Y7, K3, K5 // 62f2452b8fadefffffff
+ VPSHUFBITQMB -15(R14)(R15*8), Y7, K3, K5 // 6292452b8facfef1ffffff
+ VPSHUFBITQMB Y14, Y21, K3, K5 // 62d255238fee
+ VPSHUFBITQMB Y8, Y21, K3, K5 // 62d255238fe8
+ VPSHUFBITQMB Y20, Y21, K3, K5 // 62b255238fec
+ VPSHUFBITQMB -17(BP), Y21, K3, K5 // 62f255238fadefffffff
+ VPSHUFBITQMB -15(R14)(R15*8), Y21, K3, K5 // 629255238facfef1ffffff
+ VPSHUFBITQMB Z3, Z6, K7, K1 // 62f24d4f8fcb
+ VPSHUFBITQMB Z21, Z6, K7, K1 // 62b24d4f8fcd
+ VPSHUFBITQMB -15(R14)(R15*1), Z6, K7, K1 // 62924d4f8f8c3ef1ffffff
+ VPSHUFBITQMB -15(BX), Z6, K7, K1 // 62f24d4f8f8bf1ffffff
+ VPSHUFBITQMB Z3, Z25, K7, K1 // 62f235478fcb
+ VPSHUFBITQMB Z21, Z25, K7, K1 // 62b235478fcd
+ VPSHUFBITQMB -15(R14)(R15*1), Z25, K7, K1 // 629235478f8c3ef1ffffff
+ VPSHUFBITQMB -15(BX), Z25, K7, K1 // 62f235478f8bf1ffffff
+ VPSHUFBITQMB Z3, Z6, K7, K5 // 62f24d4f8feb
+ VPSHUFBITQMB Z21, Z6, K7, K5 // 62b24d4f8fed
+ VPSHUFBITQMB -15(R14)(R15*1), Z6, K7, K5 // 62924d4f8fac3ef1ffffff
+ VPSHUFBITQMB -15(BX), Z6, K7, K5 // 62f24d4f8fabf1ffffff
+ VPSHUFBITQMB Z3, Z25, K7, K5 // 62f235478feb
+ VPSHUFBITQMB Z21, Z25, K7, K5 // 62b235478fed
+ VPSHUFBITQMB -15(R14)(R15*1), Z25, K7, K5 // 629235478fac3ef1ffffff
+ VPSHUFBITQMB -15(BX), Z25, K7, K5 // 62f235478fabf1ffffff
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_ifma.s b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_ifma.s
new file mode 100644
index 0000000..6a1e5ba
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_ifma.s
@@ -0,0 +1,194 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_avx512_ifma(SB), NOSPLIT, $0
+ VPMADD52HUQ X7, X11, K1, X18 // 62e2a509b5d7
+ VPMADD52HUQ X0, X11, K1, X18 // 62e2a509b5d0
+ VPMADD52HUQ 17(SP)(BP*2), X11, K1, X18 // 62e2a509b5946c11000000
+ VPMADD52HUQ -7(DI)(R8*4), X11, K1, X18 // 62a2a509b59487f9ffffff
+ VPMADD52HUQ X7, X31, K1, X18 // 62e28501b5d7
+ VPMADD52HUQ X0, X31, K1, X18 // 62e28501b5d0
+ VPMADD52HUQ 17(SP)(BP*2), X31, K1, X18 // 62e28501b5946c11000000
+ VPMADD52HUQ -7(DI)(R8*4), X31, K1, X18 // 62a28501b59487f9ffffff
+ VPMADD52HUQ X7, X3, K1, X18 // 62e2e509b5d7
+ VPMADD52HUQ X0, X3, K1, X18 // 62e2e509b5d0
+ VPMADD52HUQ 17(SP)(BP*2), X3, K1, X18 // 62e2e509b5946c11000000
+ VPMADD52HUQ -7(DI)(R8*4), X3, K1, X18 // 62a2e509b59487f9ffffff
+ VPMADD52HUQ X7, X11, K1, X21 // 62e2a509b5ef
+ VPMADD52HUQ X0, X11, K1, X21 // 62e2a509b5e8
+ VPMADD52HUQ 17(SP)(BP*2), X11, K1, X21 // 62e2a509b5ac6c11000000
+ VPMADD52HUQ -7(DI)(R8*4), X11, K1, X21 // 62a2a509b5ac87f9ffffff
+ VPMADD52HUQ X7, X31, K1, X21 // 62e28501b5ef
+ VPMADD52HUQ X0, X31, K1, X21 // 62e28501b5e8
+ VPMADD52HUQ 17(SP)(BP*2), X31, K1, X21 // 62e28501b5ac6c11000000
+ VPMADD52HUQ -7(DI)(R8*4), X31, K1, X21 // 62a28501b5ac87f9ffffff
+ VPMADD52HUQ X7, X3, K1, X21 // 62e2e509b5ef
+ VPMADD52HUQ X0, X3, K1, X21 // 62e2e509b5e8
+ VPMADD52HUQ 17(SP)(BP*2), X3, K1, X21 // 62e2e509b5ac6c11000000
+ VPMADD52HUQ -7(DI)(R8*4), X3, K1, X21 // 62a2e509b5ac87f9ffffff
+ VPMADD52HUQ X7, X11, K1, X1 // 62f2a509b5cf
+ VPMADD52HUQ X0, X11, K1, X1 // 62f2a509b5c8
+ VPMADD52HUQ 17(SP)(BP*2), X11, K1, X1 // 62f2a509b58c6c11000000
+ VPMADD52HUQ -7(DI)(R8*4), X11, K1, X1 // 62b2a509b58c87f9ffffff
+ VPMADD52HUQ X7, X31, K1, X1 // 62f28501b5cf
+ VPMADD52HUQ X0, X31, K1, X1 // 62f28501b5c8
+ VPMADD52HUQ 17(SP)(BP*2), X31, K1, X1 // 62f28501b58c6c11000000
+ VPMADD52HUQ -7(DI)(R8*4), X31, K1, X1 // 62b28501b58c87f9ffffff
+ VPMADD52HUQ X7, X3, K1, X1 // 62f2e509b5cf
+ VPMADD52HUQ X0, X3, K1, X1 // 62f2e509b5c8
+ VPMADD52HUQ 17(SP)(BP*2), X3, K1, X1 // 62f2e509b58c6c11000000
+ VPMADD52HUQ -7(DI)(R8*4), X3, K1, X1 // 62b2e509b58c87f9ffffff
+ VPMADD52HUQ Y28, Y31, K7, Y17 // 62828527b5cc
+ VPMADD52HUQ Y13, Y31, K7, Y17 // 62c28527b5cd
+ VPMADD52HUQ Y7, Y31, K7, Y17 // 62e28527b5cf
+ VPMADD52HUQ (R8), Y31, K7, Y17 // 62c28527b508
+ VPMADD52HUQ 15(DX)(BX*2), Y31, K7, Y17 // 62e28527b58c5a0f000000
+ VPMADD52HUQ Y28, Y8, K7, Y17 // 6282bd2fb5cc
+ VPMADD52HUQ Y13, Y8, K7, Y17 // 62c2bd2fb5cd
+ VPMADD52HUQ Y7, Y8, K7, Y17 // 62e2bd2fb5cf
+ VPMADD52HUQ (R8), Y8, K7, Y17 // 62c2bd2fb508
+ VPMADD52HUQ 15(DX)(BX*2), Y8, K7, Y17 // 62e2bd2fb58c5a0f000000
+ VPMADD52HUQ Y28, Y1, K7, Y17 // 6282f52fb5cc
+ VPMADD52HUQ Y13, Y1, K7, Y17 // 62c2f52fb5cd
+ VPMADD52HUQ Y7, Y1, K7, Y17 // 62e2f52fb5cf
+ VPMADD52HUQ (R8), Y1, K7, Y17 // 62c2f52fb508
+ VPMADD52HUQ 15(DX)(BX*2), Y1, K7, Y17 // 62e2f52fb58c5a0f000000
+ VPMADD52HUQ Y28, Y31, K7, Y7 // 62928527b5fc
+ VPMADD52HUQ Y13, Y31, K7, Y7 // 62d28527b5fd
+ VPMADD52HUQ Y7, Y31, K7, Y7 // 62f28527b5ff
+ VPMADD52HUQ (R8), Y31, K7, Y7 // 62d28527b538
+ VPMADD52HUQ 15(DX)(BX*2), Y31, K7, Y7 // 62f28527b5bc5a0f000000
+ VPMADD52HUQ Y28, Y8, K7, Y7 // 6292bd2fb5fc
+ VPMADD52HUQ Y13, Y8, K7, Y7 // 62d2bd2fb5fd
+ VPMADD52HUQ Y7, Y8, K7, Y7 // 62f2bd2fb5ff
+ VPMADD52HUQ (R8), Y8, K7, Y7 // 62d2bd2fb538
+ VPMADD52HUQ 15(DX)(BX*2), Y8, K7, Y7 // 62f2bd2fb5bc5a0f000000
+ VPMADD52HUQ Y28, Y1, K7, Y7 // 6292f52fb5fc
+ VPMADD52HUQ Y13, Y1, K7, Y7 // 62d2f52fb5fd
+ VPMADD52HUQ Y7, Y1, K7, Y7 // 62f2f52fb5ff
+ VPMADD52HUQ (R8), Y1, K7, Y7 // 62d2f52fb538
+ VPMADD52HUQ 15(DX)(BX*2), Y1, K7, Y7 // 62f2f52fb5bc5a0f000000
+ VPMADD52HUQ Y28, Y31, K7, Y9 // 62128527b5cc
+ VPMADD52HUQ Y13, Y31, K7, Y9 // 62528527b5cd
+ VPMADD52HUQ Y7, Y31, K7, Y9 // 62728527b5cf
+ VPMADD52HUQ (R8), Y31, K7, Y9 // 62528527b508
+ VPMADD52HUQ 15(DX)(BX*2), Y31, K7, Y9 // 62728527b58c5a0f000000
+ VPMADD52HUQ Y28, Y8, K7, Y9 // 6212bd2fb5cc
+ VPMADD52HUQ Y13, Y8, K7, Y9 // 6252bd2fb5cd
+ VPMADD52HUQ Y7, Y8, K7, Y9 // 6272bd2fb5cf
+ VPMADD52HUQ (R8), Y8, K7, Y9 // 6252bd2fb508
+ VPMADD52HUQ 15(DX)(BX*2), Y8, K7, Y9 // 6272bd2fb58c5a0f000000
+ VPMADD52HUQ Y28, Y1, K7, Y9 // 6212f52fb5cc
+ VPMADD52HUQ Y13, Y1, K7, Y9 // 6252f52fb5cd
+ VPMADD52HUQ Y7, Y1, K7, Y9 // 6272f52fb5cf
+ VPMADD52HUQ (R8), Y1, K7, Y9 // 6252f52fb508
+ VPMADD52HUQ 15(DX)(BX*2), Y1, K7, Y9 // 6272f52fb58c5a0f000000
+ VPMADD52HUQ Z23, Z23, K1, Z27 // 6222c541b5df
+ VPMADD52HUQ Z6, Z23, K1, Z27 // 6262c541b5de
+ VPMADD52HUQ 17(SP), Z23, K1, Z27 // 6262c541b59c2411000000
+ VPMADD52HUQ -17(BP)(SI*4), Z23, K1, Z27 // 6262c541b59cb5efffffff
+ VPMADD52HUQ Z23, Z5, K1, Z27 // 6222d549b5df
+ VPMADD52HUQ Z6, Z5, K1, Z27 // 6262d549b5de
+ VPMADD52HUQ 17(SP), Z5, K1, Z27 // 6262d549b59c2411000000
+ VPMADD52HUQ -17(BP)(SI*4), Z5, K1, Z27 // 6262d549b59cb5efffffff
+ VPMADD52HUQ Z23, Z23, K1, Z15 // 6232c541b5ff
+ VPMADD52HUQ Z6, Z23, K1, Z15 // 6272c541b5fe
+ VPMADD52HUQ 17(SP), Z23, K1, Z15 // 6272c541b5bc2411000000
+ VPMADD52HUQ -17(BP)(SI*4), Z23, K1, Z15 // 6272c541b5bcb5efffffff
+ VPMADD52HUQ Z23, Z5, K1, Z15 // 6232d549b5ff
+ VPMADD52HUQ Z6, Z5, K1, Z15 // 6272d549b5fe
+ VPMADD52HUQ 17(SP), Z5, K1, Z15 // 6272d549b5bc2411000000
+ VPMADD52HUQ -17(BP)(SI*4), Z5, K1, Z15 // 6272d549b5bcb5efffffff
+ VPMADD52LUQ X5, X9, K1, X24 // 6262b509b4c5
+ VPMADD52LUQ X31, X9, K1, X24 // 6202b509b4c7
+ VPMADD52LUQ X3, X9, K1, X24 // 6262b509b4c3
+ VPMADD52LUQ 15(R8), X9, K1, X24 // 6242b509b4800f000000
+ VPMADD52LUQ (BP), X9, K1, X24 // 6262b509b44500
+ VPMADD52LUQ X5, X7, K1, X24 // 6262c509b4c5
+ VPMADD52LUQ X31, X7, K1, X24 // 6202c509b4c7
+ VPMADD52LUQ X3, X7, K1, X24 // 6262c509b4c3
+ VPMADD52LUQ 15(R8), X7, K1, X24 // 6242c509b4800f000000
+ VPMADD52LUQ (BP), X7, K1, X24 // 6262c509b44500
+ VPMADD52LUQ X5, X14, K1, X24 // 62628d09b4c5
+ VPMADD52LUQ X31, X14, K1, X24 // 62028d09b4c7
+ VPMADD52LUQ X3, X14, K1, X24 // 62628d09b4c3
+ VPMADD52LUQ 15(R8), X14, K1, X24 // 62428d09b4800f000000
+ VPMADD52LUQ (BP), X14, K1, X24 // 62628d09b44500
+ VPMADD52LUQ X5, X9, K1, X20 // 62e2b509b4e5
+ VPMADD52LUQ X31, X9, K1, X20 // 6282b509b4e7
+ VPMADD52LUQ X3, X9, K1, X20 // 62e2b509b4e3
+ VPMADD52LUQ 15(R8), X9, K1, X20 // 62c2b509b4a00f000000
+ VPMADD52LUQ (BP), X9, K1, X20 // 62e2b509b46500
+ VPMADD52LUQ X5, X7, K1, X20 // 62e2c509b4e5
+ VPMADD52LUQ X31, X7, K1, X20 // 6282c509b4e7
+ VPMADD52LUQ X3, X7, K1, X20 // 62e2c509b4e3
+ VPMADD52LUQ 15(R8), X7, K1, X20 // 62c2c509b4a00f000000
+ VPMADD52LUQ (BP), X7, K1, X20 // 62e2c509b46500
+ VPMADD52LUQ X5, X14, K1, X20 // 62e28d09b4e5
+ VPMADD52LUQ X31, X14, K1, X20 // 62828d09b4e7
+ VPMADD52LUQ X3, X14, K1, X20 // 62e28d09b4e3
+ VPMADD52LUQ 15(R8), X14, K1, X20 // 62c28d09b4a00f000000
+ VPMADD52LUQ (BP), X14, K1, X20 // 62e28d09b46500
+ VPMADD52LUQ X5, X9, K1, X7 // 62f2b509b4fd
+ VPMADD52LUQ X31, X9, K1, X7 // 6292b509b4ff
+ VPMADD52LUQ X3, X9, K1, X7 // 62f2b509b4fb
+ VPMADD52LUQ 15(R8), X9, K1, X7 // 62d2b509b4b80f000000
+ VPMADD52LUQ (BP), X9, K1, X7 // 62f2b509b47d00
+ VPMADD52LUQ X5, X7, K1, X7 // 62f2c509b4fd
+ VPMADD52LUQ X31, X7, K1, X7 // 6292c509b4ff
+ VPMADD52LUQ X3, X7, K1, X7 // 62f2c509b4fb
+ VPMADD52LUQ 15(R8), X7, K1, X7 // 62d2c509b4b80f000000
+ VPMADD52LUQ (BP), X7, K1, X7 // 62f2c509b47d00
+ VPMADD52LUQ X5, X14, K1, X7 // 62f28d09b4fd
+ VPMADD52LUQ X31, X14, K1, X7 // 62928d09b4ff
+ VPMADD52LUQ X3, X14, K1, X7 // 62f28d09b4fb
+ VPMADD52LUQ 15(R8), X14, K1, X7 // 62d28d09b4b80f000000
+ VPMADD52LUQ (BP), X14, K1, X7 // 62f28d09b47d00
+ VPMADD52LUQ Y3, Y9, K1, Y2 // 62f2b529b4d3
+ VPMADD52LUQ Y2, Y9, K1, Y2 // 62f2b529b4d2
+ VPMADD52LUQ Y9, Y9, K1, Y2 // 62d2b529b4d1
+ VPMADD52LUQ 17(SP)(BP*1), Y9, K1, Y2 // 62f2b529b4942c11000000
+ VPMADD52LUQ -7(CX)(DX*8), Y9, K1, Y2 // 62f2b529b494d1f9ffffff
+ VPMADD52LUQ Y3, Y1, K1, Y2 // 62f2f529b4d3
+ VPMADD52LUQ Y2, Y1, K1, Y2 // 62f2f529b4d2
+ VPMADD52LUQ Y9, Y1, K1, Y2 // 62d2f529b4d1
+ VPMADD52LUQ 17(SP)(BP*1), Y1, K1, Y2 // 62f2f529b4942c11000000
+ VPMADD52LUQ -7(CX)(DX*8), Y1, K1, Y2 // 62f2f529b494d1f9ffffff
+ VPMADD52LUQ Y3, Y9, K1, Y21 // 62e2b529b4eb
+ VPMADD52LUQ Y2, Y9, K1, Y21 // 62e2b529b4ea
+ VPMADD52LUQ Y9, Y9, K1, Y21 // 62c2b529b4e9
+ VPMADD52LUQ 17(SP)(BP*1), Y9, K1, Y21 // 62e2b529b4ac2c11000000
+ VPMADD52LUQ -7(CX)(DX*8), Y9, K1, Y21 // 62e2b529b4acd1f9ffffff
+ VPMADD52LUQ Y3, Y1, K1, Y21 // 62e2f529b4eb
+ VPMADD52LUQ Y2, Y1, K1, Y21 // 62e2f529b4ea
+ VPMADD52LUQ Y9, Y1, K1, Y21 // 62c2f529b4e9
+ VPMADD52LUQ 17(SP)(BP*1), Y1, K1, Y21 // 62e2f529b4ac2c11000000
+ VPMADD52LUQ -7(CX)(DX*8), Y1, K1, Y21 // 62e2f529b4acd1f9ffffff
+ VPMADD52LUQ Y3, Y9, K1, Y12 // 6272b529b4e3
+ VPMADD52LUQ Y2, Y9, K1, Y12 // 6272b529b4e2
+ VPMADD52LUQ Y9, Y9, K1, Y12 // 6252b529b4e1
+ VPMADD52LUQ 17(SP)(BP*1), Y9, K1, Y12 // 6272b529b4a42c11000000
+ VPMADD52LUQ -7(CX)(DX*8), Y9, K1, Y12 // 6272b529b4a4d1f9ffffff
+ VPMADD52LUQ Y3, Y1, K1, Y12 // 6272f529b4e3
+ VPMADD52LUQ Y2, Y1, K1, Y12 // 6272f529b4e2
+ VPMADD52LUQ Y9, Y1, K1, Y12 // 6252f529b4e1
+ VPMADD52LUQ 17(SP)(BP*1), Y1, K1, Y12 // 6272f529b4a42c11000000
+ VPMADD52LUQ -7(CX)(DX*8), Y1, K1, Y12 // 6272f529b4a4d1f9ffffff
+ VPMADD52LUQ Z16, Z21, K7, Z8 // 6232d547b4c0
+ VPMADD52LUQ Z13, Z21, K7, Z8 // 6252d547b4c5
+ VPMADD52LUQ 7(AX), Z21, K7, Z8 // 6272d547b48007000000
+ VPMADD52LUQ (DI), Z21, K7, Z8 // 6272d547b407
+ VPMADD52LUQ Z16, Z5, K7, Z8 // 6232d54fb4c0
+ VPMADD52LUQ Z13, Z5, K7, Z8 // 6252d54fb4c5
+ VPMADD52LUQ 7(AX), Z5, K7, Z8 // 6272d54fb48007000000
+ VPMADD52LUQ (DI), Z5, K7, Z8 // 6272d54fb407
+ VPMADD52LUQ Z16, Z21, K7, Z28 // 6222d547b4e0
+ VPMADD52LUQ Z13, Z21, K7, Z28 // 6242d547b4e5
+ VPMADD52LUQ 7(AX), Z21, K7, Z28 // 6262d547b4a007000000
+ VPMADD52LUQ (DI), Z21, K7, Z28 // 6262d547b427
+ VPMADD52LUQ Z16, Z5, K7, Z28 // 6222d54fb4e0
+ VPMADD52LUQ Z13, Z5, K7, Z28 // 6242d54fb4e5
+ VPMADD52LUQ 7(AX), Z5, K7, Z28 // 6262d54fb4a007000000
+ VPMADD52LUQ (DI), Z5, K7, Z28 // 6262d54fb427
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vbmi.s b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vbmi.s
new file mode 100644
index 0000000..d598acb
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vbmi.s
@@ -0,0 +1,415 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_avx512_vbmi(SB), NOSPLIT, $0
+ VPERMB X26, X20, K1, X23 // 62825d018dfa
+ VPERMB X19, X20, K1, X23 // 62a25d018dfb
+ VPERMB X0, X20, K1, X23 // 62e25d018df8
+ VPERMB 7(SI)(DI*4), X20, K1, X23 // 62e25d018dbcbe07000000
+ VPERMB -7(DI)(R8*2), X20, K1, X23 // 62a25d018dbc47f9ffffff
+ VPERMB X26, X2, K1, X23 // 62826d098dfa
+ VPERMB X19, X2, K1, X23 // 62a26d098dfb
+ VPERMB X0, X2, K1, X23 // 62e26d098df8
+ VPERMB 7(SI)(DI*4), X2, K1, X23 // 62e26d098dbcbe07000000
+ VPERMB -7(DI)(R8*2), X2, K1, X23 // 62a26d098dbc47f9ffffff
+ VPERMB X26, X9, K1, X23 // 628235098dfa
+ VPERMB X19, X9, K1, X23 // 62a235098dfb
+ VPERMB X0, X9, K1, X23 // 62e235098df8
+ VPERMB 7(SI)(DI*4), X9, K1, X23 // 62e235098dbcbe07000000
+ VPERMB -7(DI)(R8*2), X9, K1, X23 // 62a235098dbc47f9ffffff
+ VPERMB X26, X20, K1, X30 // 62025d018df2
+ VPERMB X19, X20, K1, X30 // 62225d018df3
+ VPERMB X0, X20, K1, X30 // 62625d018df0
+ VPERMB 7(SI)(DI*4), X20, K1, X30 // 62625d018db4be07000000
+ VPERMB -7(DI)(R8*2), X20, K1, X30 // 62225d018db447f9ffffff
+ VPERMB X26, X2, K1, X30 // 62026d098df2
+ VPERMB X19, X2, K1, X30 // 62226d098df3
+ VPERMB X0, X2, K1, X30 // 62626d098df0
+ VPERMB 7(SI)(DI*4), X2, K1, X30 // 62626d098db4be07000000
+ VPERMB -7(DI)(R8*2), X2, K1, X30 // 62226d098db447f9ffffff
+ VPERMB X26, X9, K1, X30 // 620235098df2
+ VPERMB X19, X9, K1, X30 // 622235098df3
+ VPERMB X0, X9, K1, X30 // 626235098df0
+ VPERMB 7(SI)(DI*4), X9, K1, X30 // 626235098db4be07000000
+ VPERMB -7(DI)(R8*2), X9, K1, X30 // 622235098db447f9ffffff
+ VPERMB X26, X20, K1, X8 // 62125d018dc2
+ VPERMB X19, X20, K1, X8 // 62325d018dc3
+ VPERMB X0, X20, K1, X8 // 62725d018dc0
+ VPERMB 7(SI)(DI*4), X20, K1, X8 // 62725d018d84be07000000
+ VPERMB -7(DI)(R8*2), X20, K1, X8 // 62325d018d8447f9ffffff
+ VPERMB X26, X2, K1, X8 // 62126d098dc2
+ VPERMB X19, X2, K1, X8 // 62326d098dc3
+ VPERMB X0, X2, K1, X8 // 62726d098dc0
+ VPERMB 7(SI)(DI*4), X2, K1, X8 // 62726d098d84be07000000
+ VPERMB -7(DI)(R8*2), X2, K1, X8 // 62326d098d8447f9ffffff
+ VPERMB X26, X9, K1, X8 // 621235098dc2
+ VPERMB X19, X9, K1, X8 // 623235098dc3
+ VPERMB X0, X9, K1, X8 // 627235098dc0
+ VPERMB 7(SI)(DI*4), X9, K1, X8 // 627235098d84be07000000
+ VPERMB -7(DI)(R8*2), X9, K1, X8 // 623235098d8447f9ffffff
+ VPERMB Y5, Y31, K7, Y22 // 62e205278df5
+ VPERMB Y19, Y31, K7, Y22 // 62a205278df3
+ VPERMB Y31, Y31, K7, Y22 // 628205278df7
+ VPERMB 17(SP)(BP*1), Y31, K7, Y22 // 62e205278db42c11000000
+ VPERMB -7(CX)(DX*8), Y31, K7, Y22 // 62e205278db4d1f9ffffff
+ VPERMB Y5, Y5, K7, Y22 // 62e2552f8df5
+ VPERMB Y19, Y5, K7, Y22 // 62a2552f8df3
+ VPERMB Y31, Y5, K7, Y22 // 6282552f8df7
+ VPERMB 17(SP)(BP*1), Y5, K7, Y22 // 62e2552f8db42c11000000
+ VPERMB -7(CX)(DX*8), Y5, K7, Y22 // 62e2552f8db4d1f9ffffff
+ VPERMB Y5, Y0, K7, Y22 // 62e27d2f8df5
+ VPERMB Y19, Y0, K7, Y22 // 62a27d2f8df3
+ VPERMB Y31, Y0, K7, Y22 // 62827d2f8df7
+ VPERMB 17(SP)(BP*1), Y0, K7, Y22 // 62e27d2f8db42c11000000
+ VPERMB -7(CX)(DX*8), Y0, K7, Y22 // 62e27d2f8db4d1f9ffffff
+ VPERMB Y5, Y31, K7, Y9 // 627205278dcd
+ VPERMB Y19, Y31, K7, Y9 // 623205278dcb
+ VPERMB Y31, Y31, K7, Y9 // 621205278dcf
+ VPERMB 17(SP)(BP*1), Y31, K7, Y9 // 627205278d8c2c11000000
+ VPERMB -7(CX)(DX*8), Y31, K7, Y9 // 627205278d8cd1f9ffffff
+ VPERMB Y5, Y5, K7, Y9 // 6272552f8dcd
+ VPERMB Y19, Y5, K7, Y9 // 6232552f8dcb
+ VPERMB Y31, Y5, K7, Y9 // 6212552f8dcf
+ VPERMB 17(SP)(BP*1), Y5, K7, Y9 // 6272552f8d8c2c11000000
+ VPERMB -7(CX)(DX*8), Y5, K7, Y9 // 6272552f8d8cd1f9ffffff
+ VPERMB Y5, Y0, K7, Y9 // 62727d2f8dcd
+ VPERMB Y19, Y0, K7, Y9 // 62327d2f8dcb
+ VPERMB Y31, Y0, K7, Y9 // 62127d2f8dcf
+ VPERMB 17(SP)(BP*1), Y0, K7, Y9 // 62727d2f8d8c2c11000000
+ VPERMB -7(CX)(DX*8), Y0, K7, Y9 // 62727d2f8d8cd1f9ffffff
+ VPERMB Y5, Y31, K7, Y23 // 62e205278dfd
+ VPERMB Y19, Y31, K7, Y23 // 62a205278dfb
+ VPERMB Y31, Y31, K7, Y23 // 628205278dff
+ VPERMB 17(SP)(BP*1), Y31, K7, Y23 // 62e205278dbc2c11000000
+ VPERMB -7(CX)(DX*8), Y31, K7, Y23 // 62e205278dbcd1f9ffffff
+ VPERMB Y5, Y5, K7, Y23 // 62e2552f8dfd
+ VPERMB Y19, Y5, K7, Y23 // 62a2552f8dfb
+ VPERMB Y31, Y5, K7, Y23 // 6282552f8dff
+ VPERMB 17(SP)(BP*1), Y5, K7, Y23 // 62e2552f8dbc2c11000000
+ VPERMB -7(CX)(DX*8), Y5, K7, Y23 // 62e2552f8dbcd1f9ffffff
+ VPERMB Y5, Y0, K7, Y23 // 62e27d2f8dfd
+ VPERMB Y19, Y0, K7, Y23 // 62a27d2f8dfb
+ VPERMB Y31, Y0, K7, Y23 // 62827d2f8dff
+ VPERMB 17(SP)(BP*1), Y0, K7, Y23 // 62e27d2f8dbc2c11000000
+ VPERMB -7(CX)(DX*8), Y0, K7, Y23 // 62e27d2f8dbcd1f9ffffff
+ VPERMB Z3, Z8, K1, Z3 // 62f23d498ddb
+ VPERMB Z27, Z8, K1, Z3 // 62923d498ddb
+ VPERMB 7(AX), Z8, K1, Z3 // 62f23d498d9807000000
+ VPERMB (DI), Z8, K1, Z3 // 62f23d498d1f
+ VPERMB Z3, Z2, K1, Z3 // 62f26d498ddb
+ VPERMB Z27, Z2, K1, Z3 // 62926d498ddb
+ VPERMB 7(AX), Z2, K1, Z3 // 62f26d498d9807000000
+ VPERMB (DI), Z2, K1, Z3 // 62f26d498d1f
+ VPERMB Z3, Z8, K1, Z21 // 62e23d498deb
+ VPERMB Z27, Z8, K1, Z21 // 62823d498deb
+ VPERMB 7(AX), Z8, K1, Z21 // 62e23d498da807000000
+ VPERMB (DI), Z8, K1, Z21 // 62e23d498d2f
+ VPERMB Z3, Z2, K1, Z21 // 62e26d498deb
+ VPERMB Z27, Z2, K1, Z21 // 62826d498deb
+ VPERMB 7(AX), Z2, K1, Z21 // 62e26d498da807000000
+ VPERMB (DI), Z2, K1, Z21 // 62e26d498d2f
+ VPERMI2B X15, X8, K7, X31 // 62423d0f75ff
+ VPERMI2B X0, X8, K7, X31 // 62623d0f75f8
+ VPERMI2B X16, X8, K7, X31 // 62223d0f75f8
+ VPERMI2B 17(SP), X8, K7, X31 // 62623d0f75bc2411000000
+ VPERMI2B -17(BP)(SI*4), X8, K7, X31 // 62623d0f75bcb5efffffff
+ VPERMI2B X15, X1, K7, X31 // 6242750f75ff
+ VPERMI2B X0, X1, K7, X31 // 6262750f75f8
+ VPERMI2B X16, X1, K7, X31 // 6222750f75f8
+ VPERMI2B 17(SP), X1, K7, X31 // 6262750f75bc2411000000
+ VPERMI2B -17(BP)(SI*4), X1, K7, X31 // 6262750f75bcb5efffffff
+ VPERMI2B X15, X0, K7, X31 // 62427d0f75ff
+ VPERMI2B X0, X0, K7, X31 // 62627d0f75f8
+ VPERMI2B X16, X0, K7, X31 // 62227d0f75f8
+ VPERMI2B 17(SP), X0, K7, X31 // 62627d0f75bc2411000000
+ VPERMI2B -17(BP)(SI*4), X0, K7, X31 // 62627d0f75bcb5efffffff
+ VPERMI2B X15, X8, K7, X16 // 62c23d0f75c7
+ VPERMI2B X0, X8, K7, X16 // 62e23d0f75c0
+ VPERMI2B X16, X8, K7, X16 // 62a23d0f75c0
+ VPERMI2B 17(SP), X8, K7, X16 // 62e23d0f75842411000000
+ VPERMI2B -17(BP)(SI*4), X8, K7, X16 // 62e23d0f7584b5efffffff
+ VPERMI2B X15, X1, K7, X16 // 62c2750f75c7
+ VPERMI2B X0, X1, K7, X16 // 62e2750f75c0
+ VPERMI2B X16, X1, K7, X16 // 62a2750f75c0
+ VPERMI2B 17(SP), X1, K7, X16 // 62e2750f75842411000000
+ VPERMI2B -17(BP)(SI*4), X1, K7, X16 // 62e2750f7584b5efffffff
+ VPERMI2B X15, X0, K7, X16 // 62c27d0f75c7
+ VPERMI2B X0, X0, K7, X16 // 62e27d0f75c0
+ VPERMI2B X16, X0, K7, X16 // 62a27d0f75c0
+ VPERMI2B 17(SP), X0, K7, X16 // 62e27d0f75842411000000
+ VPERMI2B -17(BP)(SI*4), X0, K7, X16 // 62e27d0f7584b5efffffff
+ VPERMI2B X15, X8, K7, X7 // 62d23d0f75ff
+ VPERMI2B X0, X8, K7, X7 // 62f23d0f75f8
+ VPERMI2B X16, X8, K7, X7 // 62b23d0f75f8
+ VPERMI2B 17(SP), X8, K7, X7 // 62f23d0f75bc2411000000
+ VPERMI2B -17(BP)(SI*4), X8, K7, X7 // 62f23d0f75bcb5efffffff
+ VPERMI2B X15, X1, K7, X7 // 62d2750f75ff
+ VPERMI2B X0, X1, K7, X7 // 62f2750f75f8
+ VPERMI2B X16, X1, K7, X7 // 62b2750f75f8
+ VPERMI2B 17(SP), X1, K7, X7 // 62f2750f75bc2411000000
+ VPERMI2B -17(BP)(SI*4), X1, K7, X7 // 62f2750f75bcb5efffffff
+ VPERMI2B X15, X0, K7, X7 // 62d27d0f75ff
+ VPERMI2B X0, X0, K7, X7 // 62f27d0f75f8
+ VPERMI2B X16, X0, K7, X7 // 62b27d0f75f8
+ VPERMI2B 17(SP), X0, K7, X7 // 62f27d0f75bc2411000000
+ VPERMI2B -17(BP)(SI*4), X0, K7, X7 // 62f27d0f75bcb5efffffff
+ VPERMI2B Y18, Y15, K2, Y2 // 62b2052a75d2
+ VPERMI2B Y24, Y15, K2, Y2 // 6292052a75d0
+ VPERMI2B Y9, Y15, K2, Y2 // 62d2052a75d1
+ VPERMI2B 15(R8)(R14*1), Y15, K2, Y2 // 6292052a7594300f000000
+ VPERMI2B 15(R8)(R14*2), Y15, K2, Y2 // 6292052a7594700f000000
+ VPERMI2B Y18, Y22, K2, Y2 // 62b24d2275d2
+ VPERMI2B Y24, Y22, K2, Y2 // 62924d2275d0
+ VPERMI2B Y9, Y22, K2, Y2 // 62d24d2275d1
+ VPERMI2B 15(R8)(R14*1), Y22, K2, Y2 // 62924d227594300f000000
+ VPERMI2B 15(R8)(R14*2), Y22, K2, Y2 // 62924d227594700f000000
+ VPERMI2B Y18, Y20, K2, Y2 // 62b25d2275d2
+ VPERMI2B Y24, Y20, K2, Y2 // 62925d2275d0
+ VPERMI2B Y9, Y20, K2, Y2 // 62d25d2275d1
+ VPERMI2B 15(R8)(R14*1), Y20, K2, Y2 // 62925d227594300f000000
+ VPERMI2B 15(R8)(R14*2), Y20, K2, Y2 // 62925d227594700f000000
+ VPERMI2B Y18, Y15, K2, Y13 // 6232052a75ea
+ VPERMI2B Y24, Y15, K2, Y13 // 6212052a75e8
+ VPERMI2B Y9, Y15, K2, Y13 // 6252052a75e9
+ VPERMI2B 15(R8)(R14*1), Y15, K2, Y13 // 6212052a75ac300f000000
+ VPERMI2B 15(R8)(R14*2), Y15, K2, Y13 // 6212052a75ac700f000000
+ VPERMI2B Y18, Y22, K2, Y13 // 62324d2275ea
+ VPERMI2B Y24, Y22, K2, Y13 // 62124d2275e8
+ VPERMI2B Y9, Y22, K2, Y13 // 62524d2275e9
+ VPERMI2B 15(R8)(R14*1), Y22, K2, Y13 // 62124d2275ac300f000000
+ VPERMI2B 15(R8)(R14*2), Y22, K2, Y13 // 62124d2275ac700f000000
+ VPERMI2B Y18, Y20, K2, Y13 // 62325d2275ea
+ VPERMI2B Y24, Y20, K2, Y13 // 62125d2275e8
+ VPERMI2B Y9, Y20, K2, Y13 // 62525d2275e9
+ VPERMI2B 15(R8)(R14*1), Y20, K2, Y13 // 62125d2275ac300f000000
+ VPERMI2B 15(R8)(R14*2), Y20, K2, Y13 // 62125d2275ac700f000000
+ VPERMI2B Y18, Y15, K2, Y27 // 6222052a75da
+ VPERMI2B Y24, Y15, K2, Y27 // 6202052a75d8
+ VPERMI2B Y9, Y15, K2, Y27 // 6242052a75d9
+ VPERMI2B 15(R8)(R14*1), Y15, K2, Y27 // 6202052a759c300f000000
+ VPERMI2B 15(R8)(R14*2), Y15, K2, Y27 // 6202052a759c700f000000
+ VPERMI2B Y18, Y22, K2, Y27 // 62224d2275da
+ VPERMI2B Y24, Y22, K2, Y27 // 62024d2275d8
+ VPERMI2B Y9, Y22, K2, Y27 // 62424d2275d9
+ VPERMI2B 15(R8)(R14*1), Y22, K2, Y27 // 62024d22759c300f000000
+ VPERMI2B 15(R8)(R14*2), Y22, K2, Y27 // 62024d22759c700f000000
+ VPERMI2B Y18, Y20, K2, Y27 // 62225d2275da
+ VPERMI2B Y24, Y20, K2, Y27 // 62025d2275d8
+ VPERMI2B Y9, Y20, K2, Y27 // 62425d2275d9
+ VPERMI2B 15(R8)(R14*1), Y20, K2, Y27 // 62025d22759c300f000000
+ VPERMI2B 15(R8)(R14*2), Y20, K2, Y27 // 62025d22759c700f000000
+ VPERMI2B Z12, Z9, K4, Z3 // 62d2354c75dc
+ VPERMI2B Z22, Z9, K4, Z3 // 62b2354c75de
+ VPERMI2B -17(BP)(SI*8), Z9, K4, Z3 // 62f2354c759cf5efffffff
+ VPERMI2B (R15), Z9, K4, Z3 // 62d2354c751f
+ VPERMI2B Z12, Z19, K4, Z3 // 62d2654475dc
+ VPERMI2B Z22, Z19, K4, Z3 // 62b2654475de
+ VPERMI2B -17(BP)(SI*8), Z19, K4, Z3 // 62f26544759cf5efffffff
+ VPERMI2B (R15), Z19, K4, Z3 // 62d26544751f
+ VPERMI2B Z12, Z9, K4, Z30 // 6242354c75f4
+ VPERMI2B Z22, Z9, K4, Z30 // 6222354c75f6
+ VPERMI2B -17(BP)(SI*8), Z9, K4, Z30 // 6262354c75b4f5efffffff
+ VPERMI2B (R15), Z9, K4, Z30 // 6242354c7537
+ VPERMI2B Z12, Z19, K4, Z30 // 6242654475f4
+ VPERMI2B Z22, Z19, K4, Z30 // 6222654475f6
+ VPERMI2B -17(BP)(SI*8), Z19, K4, Z30 // 6262654475b4f5efffffff
+ VPERMI2B (R15), Z19, K4, Z30 // 624265447537
+ VPERMT2B X2, X0, K7, X20 // 62e27d0f7de2
+ VPERMT2B X8, X0, K7, X20 // 62c27d0f7de0
+ VPERMT2B X9, X0, K7, X20 // 62c27d0f7de1
+ VPERMT2B (BX), X0, K7, X20 // 62e27d0f7d23
+ VPERMT2B -17(BP)(SI*1), X0, K7, X20 // 62e27d0f7da435efffffff
+ VPERMT2B X2, X9, K7, X20 // 62e2350f7de2
+ VPERMT2B X8, X9, K7, X20 // 62c2350f7de0
+ VPERMT2B X9, X9, K7, X20 // 62c2350f7de1
+ VPERMT2B (BX), X9, K7, X20 // 62e2350f7d23
+ VPERMT2B -17(BP)(SI*1), X9, K7, X20 // 62e2350f7da435efffffff
+ VPERMT2B X2, X13, K7, X20 // 62e2150f7de2
+ VPERMT2B X8, X13, K7, X20 // 62c2150f7de0
+ VPERMT2B X9, X13, K7, X20 // 62c2150f7de1
+ VPERMT2B (BX), X13, K7, X20 // 62e2150f7d23
+ VPERMT2B -17(BP)(SI*1), X13, K7, X20 // 62e2150f7da435efffffff
+ VPERMT2B X2, X0, K7, X5 // 62f27d0f7dea
+ VPERMT2B X8, X0, K7, X5 // 62d27d0f7de8
+ VPERMT2B X9, X0, K7, X5 // 62d27d0f7de9
+ VPERMT2B (BX), X0, K7, X5 // 62f27d0f7d2b
+ VPERMT2B -17(BP)(SI*1), X0, K7, X5 // 62f27d0f7dac35efffffff
+ VPERMT2B X2, X9, K7, X5 // 62f2350f7dea
+ VPERMT2B X8, X9, K7, X5 // 62d2350f7de8
+ VPERMT2B X9, X9, K7, X5 // 62d2350f7de9
+ VPERMT2B (BX), X9, K7, X5 // 62f2350f7d2b
+ VPERMT2B -17(BP)(SI*1), X9, K7, X5 // 62f2350f7dac35efffffff
+ VPERMT2B X2, X13, K7, X5 // 62f2150f7dea
+ VPERMT2B X8, X13, K7, X5 // 62d2150f7de8
+ VPERMT2B X9, X13, K7, X5 // 62d2150f7de9
+ VPERMT2B (BX), X13, K7, X5 // 62f2150f7d2b
+ VPERMT2B -17(BP)(SI*1), X13, K7, X5 // 62f2150f7dac35efffffff
+ VPERMT2B X2, X0, K7, X25 // 62627d0f7dca
+ VPERMT2B X8, X0, K7, X25 // 62427d0f7dc8
+ VPERMT2B X9, X0, K7, X25 // 62427d0f7dc9
+ VPERMT2B (BX), X0, K7, X25 // 62627d0f7d0b
+ VPERMT2B -17(BP)(SI*1), X0, K7, X25 // 62627d0f7d8c35efffffff
+ VPERMT2B X2, X9, K7, X25 // 6262350f7dca
+ VPERMT2B X8, X9, K7, X25 // 6242350f7dc8
+ VPERMT2B X9, X9, K7, X25 // 6242350f7dc9
+ VPERMT2B (BX), X9, K7, X25 // 6262350f7d0b
+ VPERMT2B -17(BP)(SI*1), X9, K7, X25 // 6262350f7d8c35efffffff
+ VPERMT2B X2, X13, K7, X25 // 6262150f7dca
+ VPERMT2B X8, X13, K7, X25 // 6242150f7dc8
+ VPERMT2B X9, X13, K7, X25 // 6242150f7dc9
+ VPERMT2B (BX), X13, K7, X25 // 6262150f7d0b
+ VPERMT2B -17(BP)(SI*1), X13, K7, X25 // 6262150f7d8c35efffffff
+ VPERMT2B Y14, Y2, K6, Y18 // 62c26d2e7dd6
+ VPERMT2B Y8, Y2, K6, Y18 // 62c26d2e7dd0
+ VPERMT2B Y20, Y2, K6, Y18 // 62a26d2e7dd4
+ VPERMT2B 7(SI)(DI*4), Y2, K6, Y18 // 62e26d2e7d94be07000000
+ VPERMT2B -7(DI)(R8*2), Y2, K6, Y18 // 62a26d2e7d9447f9ffffff
+ VPERMT2B Y14, Y7, K6, Y18 // 62c2452e7dd6
+ VPERMT2B Y8, Y7, K6, Y18 // 62c2452e7dd0
+ VPERMT2B Y20, Y7, K6, Y18 // 62a2452e7dd4
+ VPERMT2B 7(SI)(DI*4), Y7, K6, Y18 // 62e2452e7d94be07000000
+ VPERMT2B -7(DI)(R8*2), Y7, K6, Y18 // 62a2452e7d9447f9ffffff
+ VPERMT2B Y14, Y21, K6, Y18 // 62c255267dd6
+ VPERMT2B Y8, Y21, K6, Y18 // 62c255267dd0
+ VPERMT2B Y20, Y21, K6, Y18 // 62a255267dd4
+ VPERMT2B 7(SI)(DI*4), Y21, K6, Y18 // 62e255267d94be07000000
+ VPERMT2B -7(DI)(R8*2), Y21, K6, Y18 // 62a255267d9447f9ffffff
+ VPERMT2B Y14, Y2, K6, Y3 // 62d26d2e7dde
+ VPERMT2B Y8, Y2, K6, Y3 // 62d26d2e7dd8
+ VPERMT2B Y20, Y2, K6, Y3 // 62b26d2e7ddc
+ VPERMT2B 7(SI)(DI*4), Y2, K6, Y3 // 62f26d2e7d9cbe07000000
+ VPERMT2B -7(DI)(R8*2), Y2, K6, Y3 // 62b26d2e7d9c47f9ffffff
+ VPERMT2B Y14, Y7, K6, Y3 // 62d2452e7dde
+ VPERMT2B Y8, Y7, K6, Y3 // 62d2452e7dd8
+ VPERMT2B Y20, Y7, K6, Y3 // 62b2452e7ddc
+ VPERMT2B 7(SI)(DI*4), Y7, K6, Y3 // 62f2452e7d9cbe07000000
+ VPERMT2B -7(DI)(R8*2), Y7, K6, Y3 // 62b2452e7d9c47f9ffffff
+ VPERMT2B Y14, Y21, K6, Y3 // 62d255267dde
+ VPERMT2B Y8, Y21, K6, Y3 // 62d255267dd8
+ VPERMT2B Y20, Y21, K6, Y3 // 62b255267ddc
+ VPERMT2B 7(SI)(DI*4), Y21, K6, Y3 // 62f255267d9cbe07000000
+ VPERMT2B -7(DI)(R8*2), Y21, K6, Y3 // 62b255267d9c47f9ffffff
+ VPERMT2B Y14, Y2, K6, Y24 // 62426d2e7dc6
+ VPERMT2B Y8, Y2, K6, Y24 // 62426d2e7dc0
+ VPERMT2B Y20, Y2, K6, Y24 // 62226d2e7dc4
+ VPERMT2B 7(SI)(DI*4), Y2, K6, Y24 // 62626d2e7d84be07000000
+ VPERMT2B -7(DI)(R8*2), Y2, K6, Y24 // 62226d2e7d8447f9ffffff
+ VPERMT2B Y14, Y7, K6, Y24 // 6242452e7dc6
+ VPERMT2B Y8, Y7, K6, Y24 // 6242452e7dc0
+ VPERMT2B Y20, Y7, K6, Y24 // 6222452e7dc4
+ VPERMT2B 7(SI)(DI*4), Y7, K6, Y24 // 6262452e7d84be07000000
+ VPERMT2B -7(DI)(R8*2), Y7, K6, Y24 // 6222452e7d8447f9ffffff
+ VPERMT2B Y14, Y21, K6, Y24 // 624255267dc6
+ VPERMT2B Y8, Y21, K6, Y24 // 624255267dc0
+ VPERMT2B Y20, Y21, K6, Y24 // 622255267dc4
+ VPERMT2B 7(SI)(DI*4), Y21, K6, Y24 // 626255267d84be07000000
+ VPERMT2B -7(DI)(R8*2), Y21, K6, Y24 // 622255267d8447f9ffffff
+ VPERMT2B Z20, Z1, K3, Z6 // 62b2754b7df4
+ VPERMT2B Z9, Z1, K3, Z6 // 62d2754b7df1
+ VPERMT2B (CX), Z1, K3, Z6 // 62f2754b7d31
+ VPERMT2B 99(R15), Z1, K3, Z6 // 62d2754b7db763000000
+ VPERMT2B Z20, Z9, K3, Z6 // 62b2354b7df4
+ VPERMT2B Z9, Z9, K3, Z6 // 62d2354b7df1
+ VPERMT2B (CX), Z9, K3, Z6 // 62f2354b7d31
+ VPERMT2B 99(R15), Z9, K3, Z6 // 62d2354b7db763000000
+ VPERMT2B Z20, Z1, K3, Z9 // 6232754b7dcc
+ VPERMT2B Z9, Z1, K3, Z9 // 6252754b7dc9
+ VPERMT2B (CX), Z1, K3, Z9 // 6272754b7d09
+ VPERMT2B 99(R15), Z1, K3, Z9 // 6252754b7d8f63000000
+ VPERMT2B Z20, Z9, K3, Z9 // 6232354b7dcc
+ VPERMT2B Z9, Z9, K3, Z9 // 6252354b7dc9
+ VPERMT2B (CX), Z9, K3, Z9 // 6272354b7d09
+ VPERMT2B 99(R15), Z9, K3, Z9 // 6252354b7d8f63000000
+ VPMULTISHIFTQB X9, X24, K5, X7 // 62d2bd0583f9
+ VPMULTISHIFTQB X7, X24, K5, X7 // 62f2bd0583ff
+ VPMULTISHIFTQB X14, X24, K5, X7 // 62d2bd0583fe
+ VPMULTISHIFTQB 17(SP)(BP*1), X24, K5, X7 // 62f2bd0583bc2c11000000
+ VPMULTISHIFTQB -7(CX)(DX*8), X24, K5, X7 // 62f2bd0583bcd1f9ffffff
+ VPMULTISHIFTQB X9, X20, K5, X7 // 62d2dd0583f9
+ VPMULTISHIFTQB X7, X20, K5, X7 // 62f2dd0583ff
+ VPMULTISHIFTQB X14, X20, K5, X7 // 62d2dd0583fe
+ VPMULTISHIFTQB 17(SP)(BP*1), X20, K5, X7 // 62f2dd0583bc2c11000000
+ VPMULTISHIFTQB -7(CX)(DX*8), X20, K5, X7 // 62f2dd0583bcd1f9ffffff
+ VPMULTISHIFTQB X9, X7, K5, X7 // 62d2c50d83f9
+ VPMULTISHIFTQB X7, X7, K5, X7 // 62f2c50d83ff
+ VPMULTISHIFTQB X14, X7, K5, X7 // 62d2c50d83fe
+ VPMULTISHIFTQB 17(SP)(BP*1), X7, K5, X7 // 62f2c50d83bc2c11000000
+ VPMULTISHIFTQB -7(CX)(DX*8), X7, K5, X7 // 62f2c50d83bcd1f9ffffff
+ VPMULTISHIFTQB X9, X24, K5, X0 // 62d2bd0583c1
+ VPMULTISHIFTQB X7, X24, K5, X0 // 62f2bd0583c7
+ VPMULTISHIFTQB X14, X24, K5, X0 // 62d2bd0583c6
+ VPMULTISHIFTQB 17(SP)(BP*1), X24, K5, X0 // 62f2bd0583842c11000000
+ VPMULTISHIFTQB -7(CX)(DX*8), X24, K5, X0 // 62f2bd058384d1f9ffffff
+ VPMULTISHIFTQB X9, X20, K5, X0 // 62d2dd0583c1
+ VPMULTISHIFTQB X7, X20, K5, X0 // 62f2dd0583c7
+ VPMULTISHIFTQB X14, X20, K5, X0 // 62d2dd0583c6
+ VPMULTISHIFTQB 17(SP)(BP*1), X20, K5, X0 // 62f2dd0583842c11000000
+ VPMULTISHIFTQB -7(CX)(DX*8), X20, K5, X0 // 62f2dd058384d1f9ffffff
+ VPMULTISHIFTQB X9, X7, K5, X0 // 62d2c50d83c1
+ VPMULTISHIFTQB X7, X7, K5, X0 // 62f2c50d83c7
+ VPMULTISHIFTQB X14, X7, K5, X0 // 62d2c50d83c6
+ VPMULTISHIFTQB 17(SP)(BP*1), X7, K5, X0 // 62f2c50d83842c11000000
+ VPMULTISHIFTQB -7(CX)(DX*8), X7, K5, X0 // 62f2c50d8384d1f9ffffff
+ VPMULTISHIFTQB Y16, Y30, K7, Y12 // 62328d2783e0
+ VPMULTISHIFTQB Y1, Y30, K7, Y12 // 62728d2783e1
+ VPMULTISHIFTQB Y30, Y30, K7, Y12 // 62128d2783e6
+ VPMULTISHIFTQB 17(SP)(BP*2), Y30, K7, Y12 // 62728d2783a46c11000000
+ VPMULTISHIFTQB -7(DI)(R8*4), Y30, K7, Y12 // 62328d2783a487f9ffffff
+ VPMULTISHIFTQB Y16, Y26, K7, Y12 // 6232ad2783e0
+ VPMULTISHIFTQB Y1, Y26, K7, Y12 // 6272ad2783e1
+ VPMULTISHIFTQB Y30, Y26, K7, Y12 // 6212ad2783e6
+ VPMULTISHIFTQB 17(SP)(BP*2), Y26, K7, Y12 // 6272ad2783a46c11000000
+ VPMULTISHIFTQB -7(DI)(R8*4), Y26, K7, Y12 // 6232ad2783a487f9ffffff
+ VPMULTISHIFTQB Y16, Y7, K7, Y12 // 6232c52f83e0
+ VPMULTISHIFTQB Y1, Y7, K7, Y12 // 6272c52f83e1
+ VPMULTISHIFTQB Y30, Y7, K7, Y12 // 6212c52f83e6
+ VPMULTISHIFTQB 17(SP)(BP*2), Y7, K7, Y12 // 6272c52f83a46c11000000
+ VPMULTISHIFTQB -7(DI)(R8*4), Y7, K7, Y12 // 6232c52f83a487f9ffffff
+ VPMULTISHIFTQB Y16, Y30, K7, Y21 // 62a28d2783e8
+ VPMULTISHIFTQB Y1, Y30, K7, Y21 // 62e28d2783e9
+ VPMULTISHIFTQB Y30, Y30, K7, Y21 // 62828d2783ee
+ VPMULTISHIFTQB 17(SP)(BP*2), Y30, K7, Y21 // 62e28d2783ac6c11000000
+ VPMULTISHIFTQB -7(DI)(R8*4), Y30, K7, Y21 // 62a28d2783ac87f9ffffff
+ VPMULTISHIFTQB Y16, Y26, K7, Y21 // 62a2ad2783e8
+ VPMULTISHIFTQB Y1, Y26, K7, Y21 // 62e2ad2783e9
+ VPMULTISHIFTQB Y30, Y26, K7, Y21 // 6282ad2783ee
+ VPMULTISHIFTQB 17(SP)(BP*2), Y26, K7, Y21 // 62e2ad2783ac6c11000000
+ VPMULTISHIFTQB -7(DI)(R8*4), Y26, K7, Y21 // 62a2ad2783ac87f9ffffff
+ VPMULTISHIFTQB Y16, Y7, K7, Y21 // 62a2c52f83e8
+ VPMULTISHIFTQB Y1, Y7, K7, Y21 // 62e2c52f83e9
+ VPMULTISHIFTQB Y30, Y7, K7, Y21 // 6282c52f83ee
+ VPMULTISHIFTQB 17(SP)(BP*2), Y7, K7, Y21 // 62e2c52f83ac6c11000000
+ VPMULTISHIFTQB -7(DI)(R8*4), Y7, K7, Y21 // 62a2c52f83ac87f9ffffff
+ VPMULTISHIFTQB Y16, Y30, K7, Y14 // 62328d2783f0
+ VPMULTISHIFTQB Y1, Y30, K7, Y14 // 62728d2783f1
+ VPMULTISHIFTQB Y30, Y30, K7, Y14 // 62128d2783f6
+ VPMULTISHIFTQB 17(SP)(BP*2), Y30, K7, Y14 // 62728d2783b46c11000000
+ VPMULTISHIFTQB -7(DI)(R8*4), Y30, K7, Y14 // 62328d2783b487f9ffffff
+ VPMULTISHIFTQB Y16, Y26, K7, Y14 // 6232ad2783f0
+ VPMULTISHIFTQB Y1, Y26, K7, Y14 // 6272ad2783f1
+ VPMULTISHIFTQB Y30, Y26, K7, Y14 // 6212ad2783f6
+ VPMULTISHIFTQB 17(SP)(BP*2), Y26, K7, Y14 // 6272ad2783b46c11000000
+ VPMULTISHIFTQB -7(DI)(R8*4), Y26, K7, Y14 // 6232ad2783b487f9ffffff
+ VPMULTISHIFTQB Y16, Y7, K7, Y14 // 6232c52f83f0
+ VPMULTISHIFTQB Y1, Y7, K7, Y14 // 6272c52f83f1
+ VPMULTISHIFTQB Y30, Y7, K7, Y14 // 6212c52f83f6
+ VPMULTISHIFTQB 17(SP)(BP*2), Y7, K7, Y14 // 6272c52f83b46c11000000
+ VPMULTISHIFTQB -7(DI)(R8*4), Y7, K7, Y14 // 6232c52f83b487f9ffffff
+ VPMULTISHIFTQB Z7, Z2, K7, Z18 // 62e2ed4f83d7
+ VPMULTISHIFTQB Z13, Z2, K7, Z18 // 62c2ed4f83d5
+ VPMULTISHIFTQB 7(AX)(CX*4), Z2, K7, Z18 // 62e2ed4f83948807000000
+ VPMULTISHIFTQB 7(AX)(CX*1), Z2, K7, Z18 // 62e2ed4f83940807000000
+ VPMULTISHIFTQB Z7, Z21, K7, Z18 // 62e2d54783d7
+ VPMULTISHIFTQB Z13, Z21, K7, Z18 // 62c2d54783d5
+ VPMULTISHIFTQB 7(AX)(CX*4), Z21, K7, Z18 // 62e2d54783948807000000
+ VPMULTISHIFTQB 7(AX)(CX*1), Z21, K7, Z18 // 62e2d54783940807000000
+ VPMULTISHIFTQB Z7, Z2, K7, Z24 // 6262ed4f83c7
+ VPMULTISHIFTQB Z13, Z2, K7, Z24 // 6242ed4f83c5
+ VPMULTISHIFTQB 7(AX)(CX*4), Z2, K7, Z24 // 6262ed4f83848807000000
+ VPMULTISHIFTQB 7(AX)(CX*1), Z2, K7, Z24 // 6262ed4f83840807000000
+ VPMULTISHIFTQB Z7, Z21, K7, Z24 // 6262d54783c7
+ VPMULTISHIFTQB Z13, Z21, K7, Z24 // 6242d54783c5
+ VPMULTISHIFTQB 7(AX)(CX*4), Z21, K7, Z24 // 6262d54783848807000000
+ VPMULTISHIFTQB 7(AX)(CX*1), Z21, K7, Z24 // 6262d54783840807000000
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vbmi2.s b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vbmi2.s
new file mode 100644
index 0000000..3f49fab
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vbmi2.s
@@ -0,0 +1,1386 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_avx512_vbmi2(SB), NOSPLIT, $0
+ VPCOMPRESSB X7, K1, X15 // 62d27d0963ff
+ VPCOMPRESSB X13, K1, X15 // 62527d0963ef
+ VPCOMPRESSB X8, K1, X15 // 62527d0963c7
+ VPCOMPRESSB X7, K1, X28 // 62927d0963fc
+ VPCOMPRESSB X13, K1, X28 // 62127d0963ec
+ VPCOMPRESSB X8, K1, X28 // 62127d0963c4
+ VPCOMPRESSB X7, K1, -7(CX)(DX*1) // 62f27d09637c11f9
+ VPCOMPRESSB X13, K1, -7(CX)(DX*1) // 62727d09636c11f9
+ VPCOMPRESSB X8, K1, -7(CX)(DX*1) // 62727d09634411f9
+ VPCOMPRESSB X7, K1, -15(R14)(R15*4) // 62927d09637cbef1
+ VPCOMPRESSB X13, K1, -15(R14)(R15*4) // 62127d09636cbef1
+ VPCOMPRESSB X8, K1, -15(R14)(R15*4) // 62127d096344bef1
+ VPCOMPRESSB Y5, K1, Y8 // 62d27d2963e8
+ VPCOMPRESSB Y24, K1, Y8 // 62427d2963c0
+ VPCOMPRESSB Y21, K1, Y8 // 62c27d2963e8
+ VPCOMPRESSB Y5, K1, Y11 // 62d27d2963eb
+ VPCOMPRESSB Y24, K1, Y11 // 62427d2963c3
+ VPCOMPRESSB Y21, K1, Y11 // 62c27d2963eb
+ VPCOMPRESSB Y5, K1, Y24 // 62927d2963e8
+ VPCOMPRESSB Y24, K1, Y24 // 62027d2963c0
+ VPCOMPRESSB Y21, K1, Y24 // 62827d2963e8
+ VPCOMPRESSB Y5, K1, -17(BP)(SI*8) // 62f27d29636cf5ef
+ VPCOMPRESSB Y24, K1, -17(BP)(SI*8) // 62627d296344f5ef
+ VPCOMPRESSB Y21, K1, -17(BP)(SI*8) // 62e27d29636cf5ef
+ VPCOMPRESSB Y5, K1, (R15) // 62d27d29632f
+ VPCOMPRESSB Y24, K1, (R15) // 62427d296307
+ VPCOMPRESSB Y21, K1, (R15) // 62c27d29632f
+ VPCOMPRESSB Z2, K1, Z5 // 62f27d4963d5
+ VPCOMPRESSB Z2, K1, Z23 // 62b27d4963d7
+ VPCOMPRESSB Z2, K1, -17(BP) // 62f27d496355ef
+ VPCOMPRESSB Z2, K1, -15(R14)(R15*8) // 62927d496354fef1
+ VPCOMPRESSW X20, K5, X20 // 62a2fd0d63e4
+ VPCOMPRESSW X16, K5, X20 // 62a2fd0d63c4
+ VPCOMPRESSW X12, K5, X20 // 6232fd0d63e4
+ VPCOMPRESSW X20, K5, X24 // 6282fd0d63e0
+ VPCOMPRESSW X16, K5, X24 // 6282fd0d63c0
+ VPCOMPRESSW X12, K5, X24 // 6212fd0d63e0
+ VPCOMPRESSW X20, K5, X7 // 62e2fd0d63e7
+ VPCOMPRESSW X16, K5, X7 // 62e2fd0d63c7
+ VPCOMPRESSW X12, K5, X7 // 6272fd0d63e7
+ VPCOMPRESSW X20, K5, 17(SP)(BP*2) // 62e2fd0d63a46c11000000
+ VPCOMPRESSW X16, K5, 17(SP)(BP*2) // 62e2fd0d63846c11000000
+ VPCOMPRESSW X12, K5, 17(SP)(BP*2) // 6272fd0d63a46c11000000
+ VPCOMPRESSW X20, K5, -7(DI)(R8*4) // 62a2fd0d63a487f9ffffff
+ VPCOMPRESSW X16, K5, -7(DI)(R8*4) // 62a2fd0d638487f9ffffff
+ VPCOMPRESSW X12, K5, -7(DI)(R8*4) // 6232fd0d63a487f9ffffff
+ VPCOMPRESSW Y18, K7, Y14 // 62c2fd2f63d6
+ VPCOMPRESSW Y3, K7, Y14 // 62d2fd2f63de
+ VPCOMPRESSW Y24, K7, Y14 // 6242fd2f63c6
+ VPCOMPRESSW Y18, K7, Y18 // 62a2fd2f63d2
+ VPCOMPRESSW Y3, K7, Y18 // 62b2fd2f63da
+ VPCOMPRESSW Y24, K7, Y18 // 6222fd2f63c2
+ VPCOMPRESSW Y18, K7, Y31 // 6282fd2f63d7
+ VPCOMPRESSW Y3, K7, Y31 // 6292fd2f63df
+ VPCOMPRESSW Y24, K7, Y31 // 6202fd2f63c7
+ VPCOMPRESSW Y18, K7, -7(DI)(R8*1) // 62a2fd2f639407f9ffffff
+ VPCOMPRESSW Y3, K7, -7(DI)(R8*1) // 62b2fd2f639c07f9ffffff
+ VPCOMPRESSW Y24, K7, -7(DI)(R8*1) // 6222fd2f638407f9ffffff
+ VPCOMPRESSW Y18, K7, (SP) // 62e2fd2f631424
+ VPCOMPRESSW Y3, K7, (SP) // 62f2fd2f631c24
+ VPCOMPRESSW Y24, K7, (SP) // 6262fd2f630424
+ VPCOMPRESSW Z3, K7, Z26 // 6292fd4f63da
+ VPCOMPRESSW Z0, K7, Z26 // 6292fd4f63c2
+ VPCOMPRESSW Z3, K7, Z3 // 62f2fd4f63db
+ VPCOMPRESSW Z0, K7, Z3 // 62f2fd4f63c3
+ VPCOMPRESSW Z3, K7, 15(R8)(R14*8) // 6292fd4f639cf00f000000
+ VPCOMPRESSW Z0, K7, 15(R8)(R14*8) // 6292fd4f6384f00f000000
+ VPCOMPRESSW Z3, K7, -15(R14)(R15*2) // 6292fd4f639c7ef1ffffff
+ VPCOMPRESSW Z0, K7, -15(R14)(R15*2) // 6292fd4f63847ef1ffffff
+ VPEXPANDB X16, K1, X6 // 62b27d0962f0
+ VPEXPANDB X28, K1, X6 // 62927d0962f4
+ VPEXPANDB X8, K1, X6 // 62d27d0962f0
+ VPEXPANDB 99(R15)(R15*4), K1, X6 // 62927d096274bf63
+ VPEXPANDB 15(DX), K1, X6 // 62f27d0962720f
+ VPEXPANDB X16, K1, X22 // 62a27d0962f0
+ VPEXPANDB X28, K1, X22 // 62827d0962f4
+ VPEXPANDB X8, K1, X22 // 62c27d0962f0
+ VPEXPANDB 99(R15)(R15*4), K1, X22 // 62827d096274bf63
+ VPEXPANDB 15(DX), K1, X22 // 62e27d0962720f
+ VPEXPANDB X16, K1, X12 // 62327d0962e0
+ VPEXPANDB X28, K1, X12 // 62127d0962e4
+ VPEXPANDB X8, K1, X12 // 62527d0962e0
+ VPEXPANDB 99(R15)(R15*4), K1, X12 // 62127d096264bf63
+ VPEXPANDB 15(DX), K1, X12 // 62727d0962620f
+ VPEXPANDB Y31, K1, Y27 // 62027d2962df
+ VPEXPANDB Y3, K1, Y27 // 62627d2962db
+ VPEXPANDB Y14, K1, Y27 // 62427d2962de
+ VPEXPANDB -7(DI)(R8*1), K1, Y27 // 62227d29625c07f9
+ VPEXPANDB (SP), K1, Y27 // 62627d29621c24
+ VPEXPANDB Y31, K1, Y0 // 62927d2962c7
+ VPEXPANDB Y3, K1, Y0 // 62f27d2962c3
+ VPEXPANDB Y14, K1, Y0 // 62d27d2962c6
+ VPEXPANDB -7(DI)(R8*1), K1, Y0 // 62b27d29624407f9
+ VPEXPANDB (SP), K1, Y0 // 62f27d29620424
+ VPEXPANDB Y31, K1, Y11 // 62127d2962df
+ VPEXPANDB Y3, K1, Y11 // 62727d2962db
+ VPEXPANDB Y14, K1, Y11 // 62527d2962de
+ VPEXPANDB -7(DI)(R8*1), K1, Y11 // 62327d29625c07f9
+ VPEXPANDB (SP), K1, Y11 // 62727d29621c24
+ VPEXPANDB Z14, K1, Z15 // 62527d4962fe
+ VPEXPANDB Z27, K1, Z15 // 62127d4962fb
+ VPEXPANDB 15(R8)(R14*8), K1, Z15 // 62127d49627cf00f
+ VPEXPANDB -15(R14)(R15*2), K1, Z15 // 62127d49627c7ef1
+ VPEXPANDB Z14, K1, Z12 // 62527d4962e6
+ VPEXPANDB Z27, K1, Z12 // 62127d4962e3
+ VPEXPANDB 15(R8)(R14*8), K1, Z12 // 62127d496264f00f
+ VPEXPANDB -15(R14)(R15*2), K1, Z12 // 62127d4962647ef1
+ VPEXPANDW X2, K5, X18 // 62e2fd0d62d2
+ VPEXPANDW X24, K5, X18 // 6282fd0d62d0
+ VPEXPANDW -7(CX)(DX*1), K5, X18 // 62e2fd0d629411f9ffffff
+ VPEXPANDW -15(R14)(R15*4), K5, X18 // 6282fd0d6294bef1ffffff
+ VPEXPANDW X2, K5, X11 // 6272fd0d62da
+ VPEXPANDW X24, K5, X11 // 6212fd0d62d8
+ VPEXPANDW -7(CX)(DX*1), K5, X11 // 6272fd0d629c11f9ffffff
+ VPEXPANDW -15(R14)(R15*4), K5, X11 // 6212fd0d629cbef1ffffff
+ VPEXPANDW X2, K5, X9 // 6272fd0d62ca
+ VPEXPANDW X24, K5, X9 // 6212fd0d62c8
+ VPEXPANDW -7(CX)(DX*1), K5, X9 // 6272fd0d628c11f9ffffff
+ VPEXPANDW -15(R14)(R15*4), K5, X9 // 6212fd0d628cbef1ffffff
+ VPEXPANDW Y5, K7, Y19 // 62e2fd2f62dd
+ VPEXPANDW Y16, K7, Y19 // 62a2fd2f62d8
+ VPEXPANDW Y2, K7, Y19 // 62e2fd2f62da
+ VPEXPANDW (AX), K7, Y19 // 62e2fd2f6218
+ VPEXPANDW 7(SI), K7, Y19 // 62e2fd2f629e07000000
+ VPEXPANDW Y5, K7, Y14 // 6272fd2f62f5
+ VPEXPANDW Y16, K7, Y14 // 6232fd2f62f0
+ VPEXPANDW Y2, K7, Y14 // 6272fd2f62f2
+ VPEXPANDW (AX), K7, Y14 // 6272fd2f6230
+ VPEXPANDW 7(SI), K7, Y14 // 6272fd2f62b607000000
+ VPEXPANDW Y5, K7, Y21 // 62e2fd2f62ed
+ VPEXPANDW Y16, K7, Y21 // 62a2fd2f62e8
+ VPEXPANDW Y2, K7, Y21 // 62e2fd2f62ea
+ VPEXPANDW (AX), K7, Y21 // 62e2fd2f6228
+ VPEXPANDW 7(SI), K7, Y21 // 62e2fd2f62ae07000000
+ VPEXPANDW Z26, K7, Z6 // 6292fd4f62f2
+ VPEXPANDW Z14, K7, Z6 // 62d2fd4f62f6
+ VPEXPANDW (SI), K7, Z6 // 62f2fd4f6236
+ VPEXPANDW 7(SI)(DI*2), K7, Z6 // 62f2fd4f62b47e07000000
+ VPEXPANDW Z26, K7, Z14 // 6212fd4f62f2
+ VPEXPANDW Z14, K7, Z14 // 6252fd4f62f6
+ VPEXPANDW (SI), K7, Z14 // 6272fd4f6236
+ VPEXPANDW 7(SI)(DI*2), K7, Z14 // 6272fd4f62b47e07000000
+ VPSHLDD $47, X8, X31, K4, X26 // 6243050471d02f
+ VPSHLDD $47, X1, X31, K4, X26 // 6263050471d12f
+ VPSHLDD $47, X0, X31, K4, X26 // 6263050471d02f
+ VPSHLDD $47, 7(SI)(DI*4), X31, K4, X26 // 626305047194be070000002f
+ VPSHLDD $47, -7(DI)(R8*2), X31, K4, X26 // 62230504719447f9ffffff2f
+ VPSHLDD $47, X8, X16, K4, X26 // 62437d0471d02f
+ VPSHLDD $47, X1, X16, K4, X26 // 62637d0471d12f
+ VPSHLDD $47, X0, X16, K4, X26 // 62637d0471d02f
+ VPSHLDD $47, 7(SI)(DI*4), X16, K4, X26 // 62637d047194be070000002f
+ VPSHLDD $47, -7(DI)(R8*2), X16, K4, X26 // 62237d04719447f9ffffff2f
+ VPSHLDD $47, X8, X7, K4, X26 // 6243450c71d02f
+ VPSHLDD $47, X1, X7, K4, X26 // 6263450c71d12f
+ VPSHLDD $47, X0, X7, K4, X26 // 6263450c71d02f
+ VPSHLDD $47, 7(SI)(DI*4), X7, K4, X26 // 6263450c7194be070000002f
+ VPSHLDD $47, -7(DI)(R8*2), X7, K4, X26 // 6223450c719447f9ffffff2f
+ VPSHLDD $47, X8, X31, K4, X19 // 62c3050471d82f
+ VPSHLDD $47, X1, X31, K4, X19 // 62e3050471d92f
+ VPSHLDD $47, X0, X31, K4, X19 // 62e3050471d82f
+ VPSHLDD $47, 7(SI)(DI*4), X31, K4, X19 // 62e30504719cbe070000002f
+ VPSHLDD $47, -7(DI)(R8*2), X31, K4, X19 // 62a30504719c47f9ffffff2f
+ VPSHLDD $47, X8, X16, K4, X19 // 62c37d0471d82f
+ VPSHLDD $47, X1, X16, K4, X19 // 62e37d0471d92f
+ VPSHLDD $47, X0, X16, K4, X19 // 62e37d0471d82f
+ VPSHLDD $47, 7(SI)(DI*4), X16, K4, X19 // 62e37d04719cbe070000002f
+ VPSHLDD $47, -7(DI)(R8*2), X16, K4, X19 // 62a37d04719c47f9ffffff2f
+ VPSHLDD $47, X8, X7, K4, X19 // 62c3450c71d82f
+ VPSHLDD $47, X1, X7, K4, X19 // 62e3450c71d92f
+ VPSHLDD $47, X0, X7, K4, X19 // 62e3450c71d82f
+ VPSHLDD $47, 7(SI)(DI*4), X7, K4, X19 // 62e3450c719cbe070000002f
+ VPSHLDD $47, -7(DI)(R8*2), X7, K4, X19 // 62a3450c719c47f9ffffff2f
+ VPSHLDD $47, X8, X31, K4, X0 // 62d3050471c02f
+ VPSHLDD $47, X1, X31, K4, X0 // 62f3050471c12f
+ VPSHLDD $47, X0, X31, K4, X0 // 62f3050471c02f
+ VPSHLDD $47, 7(SI)(DI*4), X31, K4, X0 // 62f305047184be070000002f
+ VPSHLDD $47, -7(DI)(R8*2), X31, K4, X0 // 62b30504718447f9ffffff2f
+ VPSHLDD $47, X8, X16, K4, X0 // 62d37d0471c02f
+ VPSHLDD $47, X1, X16, K4, X0 // 62f37d0471c12f
+ VPSHLDD $47, X0, X16, K4, X0 // 62f37d0471c02f
+ VPSHLDD $47, 7(SI)(DI*4), X16, K4, X0 // 62f37d047184be070000002f
+ VPSHLDD $47, -7(DI)(R8*2), X16, K4, X0 // 62b37d04718447f9ffffff2f
+ VPSHLDD $47, X8, X7, K4, X0 // 62d3450c71c02f
+ VPSHLDD $47, X1, X7, K4, X0 // 62f3450c71c12f
+ VPSHLDD $47, X0, X7, K4, X0 // 62f3450c71c02f
+ VPSHLDD $47, 7(SI)(DI*4), X7, K4, X0 // 62f3450c7184be070000002f
+ VPSHLDD $47, -7(DI)(R8*2), X7, K4, X0 // 62b3450c718447f9ffffff2f
+ VPSHLDD $82, Y5, Y19, K1, Y3 // 62f3652171dd52
+ VPSHLDD $82, Y16, Y19, K1, Y3 // 62b3652171d852
+ VPSHLDD $82, Y2, Y19, K1, Y3 // 62f3652171da52
+ VPSHLDD $82, (AX), Y19, K1, Y3 // 62f36521711852
+ VPSHLDD $82, 7(SI), Y19, K1, Y3 // 62f36521719e0700000052
+ VPSHLDD $82, Y5, Y14, K1, Y3 // 62f30d2971dd52
+ VPSHLDD $82, Y16, Y14, K1, Y3 // 62b30d2971d852
+ VPSHLDD $82, Y2, Y14, K1, Y3 // 62f30d2971da52
+ VPSHLDD $82, (AX), Y14, K1, Y3 // 62f30d29711852
+ VPSHLDD $82, 7(SI), Y14, K1, Y3 // 62f30d29719e0700000052
+ VPSHLDD $82, Y5, Y21, K1, Y3 // 62f3552171dd52
+ VPSHLDD $82, Y16, Y21, K1, Y3 // 62b3552171d852
+ VPSHLDD $82, Y2, Y21, K1, Y3 // 62f3552171da52
+ VPSHLDD $82, (AX), Y21, K1, Y3 // 62f35521711852
+ VPSHLDD $82, 7(SI), Y21, K1, Y3 // 62f35521719e0700000052
+ VPSHLDD $82, Y5, Y19, K1, Y19 // 62e3652171dd52
+ VPSHLDD $82, Y16, Y19, K1, Y19 // 62a3652171d852
+ VPSHLDD $82, Y2, Y19, K1, Y19 // 62e3652171da52
+ VPSHLDD $82, (AX), Y19, K1, Y19 // 62e36521711852
+ VPSHLDD $82, 7(SI), Y19, K1, Y19 // 62e36521719e0700000052
+ VPSHLDD $82, Y5, Y14, K1, Y19 // 62e30d2971dd52
+ VPSHLDD $82, Y16, Y14, K1, Y19 // 62a30d2971d852
+ VPSHLDD $82, Y2, Y14, K1, Y19 // 62e30d2971da52
+ VPSHLDD $82, (AX), Y14, K1, Y19 // 62e30d29711852
+ VPSHLDD $82, 7(SI), Y14, K1, Y19 // 62e30d29719e0700000052
+ VPSHLDD $82, Y5, Y21, K1, Y19 // 62e3552171dd52
+ VPSHLDD $82, Y16, Y21, K1, Y19 // 62a3552171d852
+ VPSHLDD $82, Y2, Y21, K1, Y19 // 62e3552171da52
+ VPSHLDD $82, (AX), Y21, K1, Y19 // 62e35521711852
+ VPSHLDD $82, 7(SI), Y21, K1, Y19 // 62e35521719e0700000052
+ VPSHLDD $82, Y5, Y19, K1, Y23 // 62e3652171fd52
+ VPSHLDD $82, Y16, Y19, K1, Y23 // 62a3652171f852
+ VPSHLDD $82, Y2, Y19, K1, Y23 // 62e3652171fa52
+ VPSHLDD $82, (AX), Y19, K1, Y23 // 62e36521713852
+ VPSHLDD $82, 7(SI), Y19, K1, Y23 // 62e3652171be0700000052
+ VPSHLDD $82, Y5, Y14, K1, Y23 // 62e30d2971fd52
+ VPSHLDD $82, Y16, Y14, K1, Y23 // 62a30d2971f852
+ VPSHLDD $82, Y2, Y14, K1, Y23 // 62e30d2971fa52
+ VPSHLDD $82, (AX), Y14, K1, Y23 // 62e30d29713852
+ VPSHLDD $82, 7(SI), Y14, K1, Y23 // 62e30d2971be0700000052
+ VPSHLDD $82, Y5, Y21, K1, Y23 // 62e3552171fd52
+ VPSHLDD $82, Y16, Y21, K1, Y23 // 62a3552171f852
+ VPSHLDD $82, Y2, Y21, K1, Y23 // 62e3552171fa52
+ VPSHLDD $82, (AX), Y21, K1, Y23 // 62e35521713852
+ VPSHLDD $82, 7(SI), Y21, K1, Y23 // 62e3552171be0700000052
+ VPSHLDD $126, Z27, Z2, K3, Z21 // 62836d4b71eb7e
+ VPSHLDD $126, Z25, Z2, K3, Z21 // 62836d4b71e97e
+ VPSHLDD $126, 17(SP)(BP*1), Z2, K3, Z21 // 62e36d4b71ac2c110000007e
+ VPSHLDD $126, -7(CX)(DX*8), Z2, K3, Z21 // 62e36d4b71acd1f9ffffff7e
+ VPSHLDD $126, Z27, Z7, K3, Z21 // 6283454b71eb7e
+ VPSHLDD $126, Z25, Z7, K3, Z21 // 6283454b71e97e
+ VPSHLDD $126, 17(SP)(BP*1), Z7, K3, Z21 // 62e3454b71ac2c110000007e
+ VPSHLDD $126, -7(CX)(DX*8), Z7, K3, Z21 // 62e3454b71acd1f9ffffff7e
+ VPSHLDD $126, Z27, Z2, K3, Z9 // 62136d4b71cb7e
+ VPSHLDD $126, Z25, Z2, K3, Z9 // 62136d4b71c97e
+ VPSHLDD $126, 17(SP)(BP*1), Z2, K3, Z9 // 62736d4b718c2c110000007e
+ VPSHLDD $126, -7(CX)(DX*8), Z2, K3, Z9 // 62736d4b718cd1f9ffffff7e
+ VPSHLDD $126, Z27, Z7, K3, Z9 // 6213454b71cb7e
+ VPSHLDD $126, Z25, Z7, K3, Z9 // 6213454b71c97e
+ VPSHLDD $126, 17(SP)(BP*1), Z7, K3, Z9 // 6273454b718c2c110000007e
+ VPSHLDD $126, -7(CX)(DX*8), Z7, K3, Z9 // 6273454b718cd1f9ffffff7e
+ VPSHLDQ $94, X22, X21, K4, X15 // 6233d50471fe5e
+ VPSHLDQ $94, X7, X21, K4, X15 // 6273d50471ff5e
+ VPSHLDQ $94, X19, X21, K4, X15 // 6233d50471fb5e
+ VPSHLDQ $94, 17(SP), X21, K4, X15 // 6273d50471bc24110000005e
+ VPSHLDQ $94, -17(BP)(SI*4), X21, K4, X15 // 6273d50471bcb5efffffff5e
+ VPSHLDQ $94, X22, X0, K4, X15 // 6233fd0c71fe5e
+ VPSHLDQ $94, X7, X0, K4, X15 // 6273fd0c71ff5e
+ VPSHLDQ $94, X19, X0, K4, X15 // 6233fd0c71fb5e
+ VPSHLDQ $94, 17(SP), X0, K4, X15 // 6273fd0c71bc24110000005e
+ VPSHLDQ $94, -17(BP)(SI*4), X0, K4, X15 // 6273fd0c71bcb5efffffff5e
+ VPSHLDQ $94, X22, X28, K4, X15 // 62339d0471fe5e
+ VPSHLDQ $94, X7, X28, K4, X15 // 62739d0471ff5e
+ VPSHLDQ $94, X19, X28, K4, X15 // 62339d0471fb5e
+ VPSHLDQ $94, 17(SP), X28, K4, X15 // 62739d0471bc24110000005e
+ VPSHLDQ $94, -17(BP)(SI*4), X28, K4, X15 // 62739d0471bcb5efffffff5e
+ VPSHLDQ $94, X22, X21, K4, X0 // 62b3d50471c65e
+ VPSHLDQ $94, X7, X21, K4, X0 // 62f3d50471c75e
+ VPSHLDQ $94, X19, X21, K4, X0 // 62b3d50471c35e
+ VPSHLDQ $94, 17(SP), X21, K4, X0 // 62f3d504718424110000005e
+ VPSHLDQ $94, -17(BP)(SI*4), X21, K4, X0 // 62f3d5047184b5efffffff5e
+ VPSHLDQ $94, X22, X0, K4, X0 // 62b3fd0c71c65e
+ VPSHLDQ $94, X7, X0, K4, X0 // 62f3fd0c71c75e
+ VPSHLDQ $94, X19, X0, K4, X0 // 62b3fd0c71c35e
+ VPSHLDQ $94, 17(SP), X0, K4, X0 // 62f3fd0c718424110000005e
+ VPSHLDQ $94, -17(BP)(SI*4), X0, K4, X0 // 62f3fd0c7184b5efffffff5e
+ VPSHLDQ $94, X22, X28, K4, X0 // 62b39d0471c65e
+ VPSHLDQ $94, X7, X28, K4, X0 // 62f39d0471c75e
+ VPSHLDQ $94, X19, X28, K4, X0 // 62b39d0471c35e
+ VPSHLDQ $94, 17(SP), X28, K4, X0 // 62f39d04718424110000005e
+ VPSHLDQ $94, -17(BP)(SI*4), X28, K4, X0 // 62f39d047184b5efffffff5e
+ VPSHLDQ $94, X22, X21, K4, X16 // 62a3d50471c65e
+ VPSHLDQ $94, X7, X21, K4, X16 // 62e3d50471c75e
+ VPSHLDQ $94, X19, X21, K4, X16 // 62a3d50471c35e
+ VPSHLDQ $94, 17(SP), X21, K4, X16 // 62e3d504718424110000005e
+ VPSHLDQ $94, -17(BP)(SI*4), X21, K4, X16 // 62e3d5047184b5efffffff5e
+ VPSHLDQ $94, X22, X0, K4, X16 // 62a3fd0c71c65e
+ VPSHLDQ $94, X7, X0, K4, X16 // 62e3fd0c71c75e
+ VPSHLDQ $94, X19, X0, K4, X16 // 62a3fd0c71c35e
+ VPSHLDQ $94, 17(SP), X0, K4, X16 // 62e3fd0c718424110000005e
+ VPSHLDQ $94, -17(BP)(SI*4), X0, K4, X16 // 62e3fd0c7184b5efffffff5e
+ VPSHLDQ $94, X22, X28, K4, X16 // 62a39d0471c65e
+ VPSHLDQ $94, X7, X28, K4, X16 // 62e39d0471c75e
+ VPSHLDQ $94, X19, X28, K4, X16 // 62a39d0471c35e
+ VPSHLDQ $94, 17(SP), X28, K4, X16 // 62e39d04718424110000005e
+ VPSHLDQ $94, -17(BP)(SI*4), X28, K4, X16 // 62e39d047184b5efffffff5e
+ VPSHLDQ $121, Y19, Y31, K5, Y21 // 62a3852571eb79
+ VPSHLDQ $121, Y7, Y31, K5, Y21 // 62e3852571ef79
+ VPSHLDQ $121, Y6, Y31, K5, Y21 // 62e3852571ee79
+ VPSHLDQ $121, (BX), Y31, K5, Y21 // 62e38525712b79
+ VPSHLDQ $121, -17(BP)(SI*1), Y31, K5, Y21 // 62e3852571ac35efffffff79
+ VPSHLDQ $121, Y19, Y6, K5, Y21 // 62a3cd2d71eb79
+ VPSHLDQ $121, Y7, Y6, K5, Y21 // 62e3cd2d71ef79
+ VPSHLDQ $121, Y6, Y6, K5, Y21 // 62e3cd2d71ee79
+ VPSHLDQ $121, (BX), Y6, K5, Y21 // 62e3cd2d712b79
+ VPSHLDQ $121, -17(BP)(SI*1), Y6, K5, Y21 // 62e3cd2d71ac35efffffff79
+ VPSHLDQ $121, Y19, Y11, K5, Y21 // 62a3a52d71eb79
+ VPSHLDQ $121, Y7, Y11, K5, Y21 // 62e3a52d71ef79
+ VPSHLDQ $121, Y6, Y11, K5, Y21 // 62e3a52d71ee79
+ VPSHLDQ $121, (BX), Y11, K5, Y21 // 62e3a52d712b79
+ VPSHLDQ $121, -17(BP)(SI*1), Y11, K5, Y21 // 62e3a52d71ac35efffffff79
+ VPSHLDQ $121, Y19, Y31, K5, Y20 // 62a3852571e379
+ VPSHLDQ $121, Y7, Y31, K5, Y20 // 62e3852571e779
+ VPSHLDQ $121, Y6, Y31, K5, Y20 // 62e3852571e679
+ VPSHLDQ $121, (BX), Y31, K5, Y20 // 62e38525712379
+ VPSHLDQ $121, -17(BP)(SI*1), Y31, K5, Y20 // 62e3852571a435efffffff79
+ VPSHLDQ $121, Y19, Y6, K5, Y20 // 62a3cd2d71e379
+ VPSHLDQ $121, Y7, Y6, K5, Y20 // 62e3cd2d71e779
+ VPSHLDQ $121, Y6, Y6, K5, Y20 // 62e3cd2d71e679
+ VPSHLDQ $121, (BX), Y6, K5, Y20 // 62e3cd2d712379
+ VPSHLDQ $121, -17(BP)(SI*1), Y6, K5, Y20 // 62e3cd2d71a435efffffff79
+ VPSHLDQ $121, Y19, Y11, K5, Y20 // 62a3a52d71e379
+ VPSHLDQ $121, Y7, Y11, K5, Y20 // 62e3a52d71e779
+ VPSHLDQ $121, Y6, Y11, K5, Y20 // 62e3a52d71e679
+ VPSHLDQ $121, (BX), Y11, K5, Y20 // 62e3a52d712379
+ VPSHLDQ $121, -17(BP)(SI*1), Y11, K5, Y20 // 62e3a52d71a435efffffff79
+ VPSHLDQ $121, Y19, Y31, K5, Y6 // 62b3852571f379
+ VPSHLDQ $121, Y7, Y31, K5, Y6 // 62f3852571f779
+ VPSHLDQ $121, Y6, Y31, K5, Y6 // 62f3852571f679
+ VPSHLDQ $121, (BX), Y31, K5, Y6 // 62f38525713379
+ VPSHLDQ $121, -17(BP)(SI*1), Y31, K5, Y6 // 62f3852571b435efffffff79
+ VPSHLDQ $121, Y19, Y6, K5, Y6 // 62b3cd2d71f379
+ VPSHLDQ $121, Y7, Y6, K5, Y6 // 62f3cd2d71f779
+ VPSHLDQ $121, Y6, Y6, K5, Y6 // 62f3cd2d71f679
+ VPSHLDQ $121, (BX), Y6, K5, Y6 // 62f3cd2d713379
+ VPSHLDQ $121, -17(BP)(SI*1), Y6, K5, Y6 // 62f3cd2d71b435efffffff79
+ VPSHLDQ $121, Y19, Y11, K5, Y6 // 62b3a52d71f379
+ VPSHLDQ $121, Y7, Y11, K5, Y6 // 62f3a52d71f779
+ VPSHLDQ $121, Y6, Y11, K5, Y6 // 62f3a52d71f679
+ VPSHLDQ $121, (BX), Y11, K5, Y6 // 62f3a52d713379
+ VPSHLDQ $121, -17(BP)(SI*1), Y11, K5, Y6 // 62f3a52d71b435efffffff79
+ VPSHLDQ $13, Z3, Z27, K7, Z23 // 62e3a54771fb0d
+ VPSHLDQ $13, Z0, Z27, K7, Z23 // 62e3a54771f80d
+ VPSHLDQ $13, -17(BP)(SI*2), Z27, K7, Z23 // 62e3a54771bc75efffffff0d
+ VPSHLDQ $13, 7(AX)(CX*2), Z27, K7, Z23 // 62e3a54771bc48070000000d
+ VPSHLDQ $13, Z3, Z14, K7, Z23 // 62e38d4f71fb0d
+ VPSHLDQ $13, Z0, Z14, K7, Z23 // 62e38d4f71f80d
+ VPSHLDQ $13, -17(BP)(SI*2), Z14, K7, Z23 // 62e38d4f71bc75efffffff0d
+ VPSHLDQ $13, 7(AX)(CX*2), Z14, K7, Z23 // 62e38d4f71bc48070000000d
+ VPSHLDQ $13, Z3, Z27, K7, Z9 // 6273a54771cb0d
+ VPSHLDQ $13, Z0, Z27, K7, Z9 // 6273a54771c80d
+ VPSHLDQ $13, -17(BP)(SI*2), Z27, K7, Z9 // 6273a547718c75efffffff0d
+ VPSHLDQ $13, 7(AX)(CX*2), Z27, K7, Z9 // 6273a547718c48070000000d
+ VPSHLDQ $13, Z3, Z14, K7, Z9 // 62738d4f71cb0d
+ VPSHLDQ $13, Z0, Z14, K7, Z9 // 62738d4f71c80d
+ VPSHLDQ $13, -17(BP)(SI*2), Z14, K7, Z9 // 62738d4f718c75efffffff0d
+ VPSHLDQ $13, 7(AX)(CX*2), Z14, K7, Z9 // 62738d4f718c48070000000d
+ VPSHLDVD X15, X1, K7, X7 // 62d2750f71ff
+ VPSHLDVD X12, X1, K7, X7 // 62d2750f71fc
+ VPSHLDVD X0, X1, K7, X7 // 62f2750f71f8
+ VPSHLDVD 7(AX), X1, K7, X7 // 62f2750f71b807000000
+ VPSHLDVD (DI), X1, K7, X7 // 62f2750f713f
+ VPSHLDVD X15, X7, K7, X7 // 62d2450f71ff
+ VPSHLDVD X12, X7, K7, X7 // 62d2450f71fc
+ VPSHLDVD X0, X7, K7, X7 // 62f2450f71f8
+ VPSHLDVD 7(AX), X7, K7, X7 // 62f2450f71b807000000
+ VPSHLDVD (DI), X7, K7, X7 // 62f2450f713f
+ VPSHLDVD X15, X9, K7, X7 // 62d2350f71ff
+ VPSHLDVD X12, X9, K7, X7 // 62d2350f71fc
+ VPSHLDVD X0, X9, K7, X7 // 62f2350f71f8
+ VPSHLDVD 7(AX), X9, K7, X7 // 62f2350f71b807000000
+ VPSHLDVD (DI), X9, K7, X7 // 62f2350f713f
+ VPSHLDVD X15, X1, K7, X16 // 62c2750f71c7
+ VPSHLDVD X12, X1, K7, X16 // 62c2750f71c4
+ VPSHLDVD X0, X1, K7, X16 // 62e2750f71c0
+ VPSHLDVD 7(AX), X1, K7, X16 // 62e2750f718007000000
+ VPSHLDVD (DI), X1, K7, X16 // 62e2750f7107
+ VPSHLDVD X15, X7, K7, X16 // 62c2450f71c7
+ VPSHLDVD X12, X7, K7, X16 // 62c2450f71c4
+ VPSHLDVD X0, X7, K7, X16 // 62e2450f71c0
+ VPSHLDVD 7(AX), X7, K7, X16 // 62e2450f718007000000
+ VPSHLDVD (DI), X7, K7, X16 // 62e2450f7107
+ VPSHLDVD X15, X9, K7, X16 // 62c2350f71c7
+ VPSHLDVD X12, X9, K7, X16 // 62c2350f71c4
+ VPSHLDVD X0, X9, K7, X16 // 62e2350f71c0
+ VPSHLDVD 7(AX), X9, K7, X16 // 62e2350f718007000000
+ VPSHLDVD (DI), X9, K7, X16 // 62e2350f7107
+ VPSHLDVD X15, X1, K7, X31 // 6242750f71ff
+ VPSHLDVD X12, X1, K7, X31 // 6242750f71fc
+ VPSHLDVD X0, X1, K7, X31 // 6262750f71f8
+ VPSHLDVD 7(AX), X1, K7, X31 // 6262750f71b807000000
+ VPSHLDVD (DI), X1, K7, X31 // 6262750f713f
+ VPSHLDVD X15, X7, K7, X31 // 6242450f71ff
+ VPSHLDVD X12, X7, K7, X31 // 6242450f71fc
+ VPSHLDVD X0, X7, K7, X31 // 6262450f71f8
+ VPSHLDVD 7(AX), X7, K7, X31 // 6262450f71b807000000
+ VPSHLDVD (DI), X7, K7, X31 // 6262450f713f
+ VPSHLDVD X15, X9, K7, X31 // 6242350f71ff
+ VPSHLDVD X12, X9, K7, X31 // 6242350f71fc
+ VPSHLDVD X0, X9, K7, X31 // 6262350f71f8
+ VPSHLDVD 7(AX), X9, K7, X31 // 6262350f71b807000000
+ VPSHLDVD (DI), X9, K7, X31 // 6262350f713f
+ VPSHLDVD Y5, Y20, K6, Y0 // 62f25d2671c5
+ VPSHLDVD Y28, Y20, K6, Y0 // 62925d2671c4
+ VPSHLDVD Y7, Y20, K6, Y0 // 62f25d2671c7
+ VPSHLDVD 15(R8)(R14*4), Y20, K6, Y0 // 62925d267184b00f000000
+ VPSHLDVD -7(CX)(DX*4), Y20, K6, Y0 // 62f25d26718491f9ffffff
+ VPSHLDVD Y5, Y12, K6, Y0 // 62f21d2e71c5
+ VPSHLDVD Y28, Y12, K6, Y0 // 62921d2e71c4
+ VPSHLDVD Y7, Y12, K6, Y0 // 62f21d2e71c7
+ VPSHLDVD 15(R8)(R14*4), Y12, K6, Y0 // 62921d2e7184b00f000000
+ VPSHLDVD -7(CX)(DX*4), Y12, K6, Y0 // 62f21d2e718491f9ffffff
+ VPSHLDVD Y5, Y3, K6, Y0 // 62f2652e71c5
+ VPSHLDVD Y28, Y3, K6, Y0 // 6292652e71c4
+ VPSHLDVD Y7, Y3, K6, Y0 // 62f2652e71c7
+ VPSHLDVD 15(R8)(R14*4), Y3, K6, Y0 // 6292652e7184b00f000000
+ VPSHLDVD -7(CX)(DX*4), Y3, K6, Y0 // 62f2652e718491f9ffffff
+ VPSHLDVD Y5, Y20, K6, Y3 // 62f25d2671dd
+ VPSHLDVD Y28, Y20, K6, Y3 // 62925d2671dc
+ VPSHLDVD Y7, Y20, K6, Y3 // 62f25d2671df
+ VPSHLDVD 15(R8)(R14*4), Y20, K6, Y3 // 62925d26719cb00f000000
+ VPSHLDVD -7(CX)(DX*4), Y20, K6, Y3 // 62f25d26719c91f9ffffff
+ VPSHLDVD Y5, Y12, K6, Y3 // 62f21d2e71dd
+ VPSHLDVD Y28, Y12, K6, Y3 // 62921d2e71dc
+ VPSHLDVD Y7, Y12, K6, Y3 // 62f21d2e71df
+ VPSHLDVD 15(R8)(R14*4), Y12, K6, Y3 // 62921d2e719cb00f000000
+ VPSHLDVD -7(CX)(DX*4), Y12, K6, Y3 // 62f21d2e719c91f9ffffff
+ VPSHLDVD Y5, Y3, K6, Y3 // 62f2652e71dd
+ VPSHLDVD Y28, Y3, K6, Y3 // 6292652e71dc
+ VPSHLDVD Y7, Y3, K6, Y3 // 62f2652e71df
+ VPSHLDVD 15(R8)(R14*4), Y3, K6, Y3 // 6292652e719cb00f000000
+ VPSHLDVD -7(CX)(DX*4), Y3, K6, Y3 // 62f2652e719c91f9ffffff
+ VPSHLDVD Y5, Y20, K6, Y5 // 62f25d2671ed
+ VPSHLDVD Y28, Y20, K6, Y5 // 62925d2671ec
+ VPSHLDVD Y7, Y20, K6, Y5 // 62f25d2671ef
+ VPSHLDVD 15(R8)(R14*4), Y20, K6, Y5 // 62925d2671acb00f000000
+ VPSHLDVD -7(CX)(DX*4), Y20, K6, Y5 // 62f25d2671ac91f9ffffff
+ VPSHLDVD Y5, Y12, K6, Y5 // 62f21d2e71ed
+ VPSHLDVD Y28, Y12, K6, Y5 // 62921d2e71ec
+ VPSHLDVD Y7, Y12, K6, Y5 // 62f21d2e71ef
+ VPSHLDVD 15(R8)(R14*4), Y12, K6, Y5 // 62921d2e71acb00f000000
+ VPSHLDVD -7(CX)(DX*4), Y12, K6, Y5 // 62f21d2e71ac91f9ffffff
+ VPSHLDVD Y5, Y3, K6, Y5 // 62f2652e71ed
+ VPSHLDVD Y28, Y3, K6, Y5 // 6292652e71ec
+ VPSHLDVD Y7, Y3, K6, Y5 // 62f2652e71ef
+ VPSHLDVD 15(R8)(R14*4), Y3, K6, Y5 // 6292652e71acb00f000000
+ VPSHLDVD -7(CX)(DX*4), Y3, K6, Y5 // 62f2652e71ac91f9ffffff
+ VPSHLDVD Z22, Z8, K3, Z14 // 62323d4b71f6
+ VPSHLDVD Z25, Z8, K3, Z14 // 62123d4b71f1
+ VPSHLDVD 15(R8)(R14*1), Z8, K3, Z14 // 62123d4b71b4300f000000
+ VPSHLDVD 15(R8)(R14*2), Z8, K3, Z14 // 62123d4b71b4700f000000
+ VPSHLDVD Z22, Z24, K3, Z14 // 62323d4371f6
+ VPSHLDVD Z25, Z24, K3, Z14 // 62123d4371f1
+ VPSHLDVD 15(R8)(R14*1), Z24, K3, Z14 // 62123d4371b4300f000000
+ VPSHLDVD 15(R8)(R14*2), Z24, K3, Z14 // 62123d4371b4700f000000
+ VPSHLDVD Z22, Z8, K3, Z7 // 62b23d4b71fe
+ VPSHLDVD Z25, Z8, K3, Z7 // 62923d4b71f9
+ VPSHLDVD 15(R8)(R14*1), Z8, K3, Z7 // 62923d4b71bc300f000000
+ VPSHLDVD 15(R8)(R14*2), Z8, K3, Z7 // 62923d4b71bc700f000000
+ VPSHLDVD Z22, Z24, K3, Z7 // 62b23d4371fe
+ VPSHLDVD Z25, Z24, K3, Z7 // 62923d4371f9
+ VPSHLDVD 15(R8)(R14*1), Z24, K3, Z7 // 62923d4371bc300f000000
+ VPSHLDVD 15(R8)(R14*2), Z24, K3, Z7 // 62923d4371bc700f000000
+ VPSHLDVQ X3, X17, K7, X12 // 6272f50771e3
+ VPSHLDVQ X26, X17, K7, X12 // 6212f50771e2
+ VPSHLDVQ X23, X17, K7, X12 // 6232f50771e7
+ VPSHLDVQ 99(R15)(R15*1), X17, K7, X12 // 6212f50771a43f63000000
+ VPSHLDVQ (DX), X17, K7, X12 // 6272f5077122
+ VPSHLDVQ X3, X15, K7, X12 // 6272850f71e3
+ VPSHLDVQ X26, X15, K7, X12 // 6212850f71e2
+ VPSHLDVQ X23, X15, K7, X12 // 6232850f71e7
+ VPSHLDVQ 99(R15)(R15*1), X15, K7, X12 // 6212850f71a43f63000000
+ VPSHLDVQ (DX), X15, K7, X12 // 6272850f7122
+ VPSHLDVQ X3, X8, K7, X12 // 6272bd0f71e3
+ VPSHLDVQ X26, X8, K7, X12 // 6212bd0f71e2
+ VPSHLDVQ X23, X8, K7, X12 // 6232bd0f71e7
+ VPSHLDVQ 99(R15)(R15*1), X8, K7, X12 // 6212bd0f71a43f63000000
+ VPSHLDVQ (DX), X8, K7, X12 // 6272bd0f7122
+ VPSHLDVQ X3, X17, K7, X14 // 6272f50771f3
+ VPSHLDVQ X26, X17, K7, X14 // 6212f50771f2
+ VPSHLDVQ X23, X17, K7, X14 // 6232f50771f7
+ VPSHLDVQ 99(R15)(R15*1), X17, K7, X14 // 6212f50771b43f63000000
+ VPSHLDVQ (DX), X17, K7, X14 // 6272f5077132
+ VPSHLDVQ X3, X15, K7, X14 // 6272850f71f3
+ VPSHLDVQ X26, X15, K7, X14 // 6212850f71f2
+ VPSHLDVQ X23, X15, K7, X14 // 6232850f71f7
+ VPSHLDVQ 99(R15)(R15*1), X15, K7, X14 // 6212850f71b43f63000000
+ VPSHLDVQ (DX), X15, K7, X14 // 6272850f7132
+ VPSHLDVQ X3, X8, K7, X14 // 6272bd0f71f3
+ VPSHLDVQ X26, X8, K7, X14 // 6212bd0f71f2
+ VPSHLDVQ X23, X8, K7, X14 // 6232bd0f71f7
+ VPSHLDVQ 99(R15)(R15*1), X8, K7, X14 // 6212bd0f71b43f63000000
+ VPSHLDVQ (DX), X8, K7, X14 // 6272bd0f7132
+ VPSHLDVQ X3, X17, K7, X5 // 62f2f50771eb
+ VPSHLDVQ X26, X17, K7, X5 // 6292f50771ea
+ VPSHLDVQ X23, X17, K7, X5 // 62b2f50771ef
+ VPSHLDVQ 99(R15)(R15*1), X17, K7, X5 // 6292f50771ac3f63000000
+ VPSHLDVQ (DX), X17, K7, X5 // 62f2f507712a
+ VPSHLDVQ X3, X15, K7, X5 // 62f2850f71eb
+ VPSHLDVQ X26, X15, K7, X5 // 6292850f71ea
+ VPSHLDVQ X23, X15, K7, X5 // 62b2850f71ef
+ VPSHLDVQ 99(R15)(R15*1), X15, K7, X5 // 6292850f71ac3f63000000
+ VPSHLDVQ (DX), X15, K7, X5 // 62f2850f712a
+ VPSHLDVQ X3, X8, K7, X5 // 62f2bd0f71eb
+ VPSHLDVQ X26, X8, K7, X5 // 6292bd0f71ea
+ VPSHLDVQ X23, X8, K7, X5 // 62b2bd0f71ef
+ VPSHLDVQ 99(R15)(R15*1), X8, K7, X5 // 6292bd0f71ac3f63000000
+ VPSHLDVQ (DX), X8, K7, X5 // 62f2bd0f712a
+ VPSHLDVQ Y17, Y12, K4, Y0 // 62b29d2c71c1
+ VPSHLDVQ Y7, Y12, K4, Y0 // 62f29d2c71c7
+ VPSHLDVQ Y9, Y12, K4, Y0 // 62d29d2c71c1
+ VPSHLDVQ (R8), Y12, K4, Y0 // 62d29d2c7100
+ VPSHLDVQ 15(DX)(BX*2), Y12, K4, Y0 // 62f29d2c71845a0f000000
+ VPSHLDVQ Y17, Y1, K4, Y0 // 62b2f52c71c1
+ VPSHLDVQ Y7, Y1, K4, Y0 // 62f2f52c71c7
+ VPSHLDVQ Y9, Y1, K4, Y0 // 62d2f52c71c1
+ VPSHLDVQ (R8), Y1, K4, Y0 // 62d2f52c7100
+ VPSHLDVQ 15(DX)(BX*2), Y1, K4, Y0 // 62f2f52c71845a0f000000
+ VPSHLDVQ Y17, Y14, K4, Y0 // 62b28d2c71c1
+ VPSHLDVQ Y7, Y14, K4, Y0 // 62f28d2c71c7
+ VPSHLDVQ Y9, Y14, K4, Y0 // 62d28d2c71c1
+ VPSHLDVQ (R8), Y14, K4, Y0 // 62d28d2c7100
+ VPSHLDVQ 15(DX)(BX*2), Y14, K4, Y0 // 62f28d2c71845a0f000000
+ VPSHLDVQ Y17, Y12, K4, Y22 // 62a29d2c71f1
+ VPSHLDVQ Y7, Y12, K4, Y22 // 62e29d2c71f7
+ VPSHLDVQ Y9, Y12, K4, Y22 // 62c29d2c71f1
+ VPSHLDVQ (R8), Y12, K4, Y22 // 62c29d2c7130
+ VPSHLDVQ 15(DX)(BX*2), Y12, K4, Y22 // 62e29d2c71b45a0f000000
+ VPSHLDVQ Y17, Y1, K4, Y22 // 62a2f52c71f1
+ VPSHLDVQ Y7, Y1, K4, Y22 // 62e2f52c71f7
+ VPSHLDVQ Y9, Y1, K4, Y22 // 62c2f52c71f1
+ VPSHLDVQ (R8), Y1, K4, Y22 // 62c2f52c7130
+ VPSHLDVQ 15(DX)(BX*2), Y1, K4, Y22 // 62e2f52c71b45a0f000000
+ VPSHLDVQ Y17, Y14, K4, Y22 // 62a28d2c71f1
+ VPSHLDVQ Y7, Y14, K4, Y22 // 62e28d2c71f7
+ VPSHLDVQ Y9, Y14, K4, Y22 // 62c28d2c71f1
+ VPSHLDVQ (R8), Y14, K4, Y22 // 62c28d2c7130
+ VPSHLDVQ 15(DX)(BX*2), Y14, K4, Y22 // 62e28d2c71b45a0f000000
+ VPSHLDVQ Y17, Y12, K4, Y13 // 62329d2c71e9
+ VPSHLDVQ Y7, Y12, K4, Y13 // 62729d2c71ef
+ VPSHLDVQ Y9, Y12, K4, Y13 // 62529d2c71e9
+ VPSHLDVQ (R8), Y12, K4, Y13 // 62529d2c7128
+ VPSHLDVQ 15(DX)(BX*2), Y12, K4, Y13 // 62729d2c71ac5a0f000000
+ VPSHLDVQ Y17, Y1, K4, Y13 // 6232f52c71e9
+ VPSHLDVQ Y7, Y1, K4, Y13 // 6272f52c71ef
+ VPSHLDVQ Y9, Y1, K4, Y13 // 6252f52c71e9
+ VPSHLDVQ (R8), Y1, K4, Y13 // 6252f52c7128
+ VPSHLDVQ 15(DX)(BX*2), Y1, K4, Y13 // 6272f52c71ac5a0f000000
+ VPSHLDVQ Y17, Y14, K4, Y13 // 62328d2c71e9
+ VPSHLDVQ Y7, Y14, K4, Y13 // 62728d2c71ef
+ VPSHLDVQ Y9, Y14, K4, Y13 // 62528d2c71e9
+ VPSHLDVQ (R8), Y14, K4, Y13 // 62528d2c7128
+ VPSHLDVQ 15(DX)(BX*2), Y14, K4, Y13 // 62728d2c71ac5a0f000000
+ VPSHLDVQ Z0, Z6, K4, Z1 // 62f2cd4c71c8
+ VPSHLDVQ Z8, Z6, K4, Z1 // 62d2cd4c71c8
+ VPSHLDVQ (R14), Z6, K4, Z1 // 62d2cd4c710e
+ VPSHLDVQ -7(DI)(R8*8), Z6, K4, Z1 // 62b2cd4c718cc7f9ffffff
+ VPSHLDVQ Z0, Z2, K4, Z1 // 62f2ed4c71c8
+ VPSHLDVQ Z8, Z2, K4, Z1 // 62d2ed4c71c8
+ VPSHLDVQ (R14), Z2, K4, Z1 // 62d2ed4c710e
+ VPSHLDVQ -7(DI)(R8*8), Z2, K4, Z1 // 62b2ed4c718cc7f9ffffff
+ VPSHLDVQ Z0, Z6, K4, Z16 // 62e2cd4c71c0
+ VPSHLDVQ Z8, Z6, K4, Z16 // 62c2cd4c71c0
+ VPSHLDVQ (R14), Z6, K4, Z16 // 62c2cd4c7106
+ VPSHLDVQ -7(DI)(R8*8), Z6, K4, Z16 // 62a2cd4c7184c7f9ffffff
+ VPSHLDVQ Z0, Z2, K4, Z16 // 62e2ed4c71c0
+ VPSHLDVQ Z8, Z2, K4, Z16 // 62c2ed4c71c0
+ VPSHLDVQ (R14), Z2, K4, Z16 // 62c2ed4c7106
+ VPSHLDVQ -7(DI)(R8*8), Z2, K4, Z16 // 62a2ed4c7184c7f9ffffff
+ VPSHLDVW X18, X9, K7, X13 // 6232b50f70ea
+ VPSHLDVW X21, X9, K7, X13 // 6232b50f70ed
+ VPSHLDVW X1, X9, K7, X13 // 6272b50f70e9
+ VPSHLDVW -17(BP)(SI*8), X9, K7, X13 // 6272b50f70acf5efffffff
+ VPSHLDVW (R15), X9, K7, X13 // 6252b50f702f
+ VPSHLDVW X18, X15, K7, X13 // 6232850f70ea
+ VPSHLDVW X21, X15, K7, X13 // 6232850f70ed
+ VPSHLDVW X1, X15, K7, X13 // 6272850f70e9
+ VPSHLDVW -17(BP)(SI*8), X15, K7, X13 // 6272850f70acf5efffffff
+ VPSHLDVW (R15), X15, K7, X13 // 6252850f702f
+ VPSHLDVW X18, X26, K7, X13 // 6232ad0770ea
+ VPSHLDVW X21, X26, K7, X13 // 6232ad0770ed
+ VPSHLDVW X1, X26, K7, X13 // 6272ad0770e9
+ VPSHLDVW -17(BP)(SI*8), X26, K7, X13 // 6272ad0770acf5efffffff
+ VPSHLDVW (R15), X26, K7, X13 // 6252ad07702f
+ VPSHLDVW X18, X9, K7, X28 // 6222b50f70e2
+ VPSHLDVW X21, X9, K7, X28 // 6222b50f70e5
+ VPSHLDVW X1, X9, K7, X28 // 6262b50f70e1
+ VPSHLDVW -17(BP)(SI*8), X9, K7, X28 // 6262b50f70a4f5efffffff
+ VPSHLDVW (R15), X9, K7, X28 // 6242b50f7027
+ VPSHLDVW X18, X15, K7, X28 // 6222850f70e2
+ VPSHLDVW X21, X15, K7, X28 // 6222850f70e5
+ VPSHLDVW X1, X15, K7, X28 // 6262850f70e1
+ VPSHLDVW -17(BP)(SI*8), X15, K7, X28 // 6262850f70a4f5efffffff
+ VPSHLDVW (R15), X15, K7, X28 // 6242850f7027
+ VPSHLDVW X18, X26, K7, X28 // 6222ad0770e2
+ VPSHLDVW X21, X26, K7, X28 // 6222ad0770e5
+ VPSHLDVW X1, X26, K7, X28 // 6262ad0770e1
+ VPSHLDVW -17(BP)(SI*8), X26, K7, X28 // 6262ad0770a4f5efffffff
+ VPSHLDVW (R15), X26, K7, X28 // 6242ad077027
+ VPSHLDVW X18, X9, K7, X24 // 6222b50f70c2
+ VPSHLDVW X21, X9, K7, X24 // 6222b50f70c5
+ VPSHLDVW X1, X9, K7, X24 // 6262b50f70c1
+ VPSHLDVW -17(BP)(SI*8), X9, K7, X24 // 6262b50f7084f5efffffff
+ VPSHLDVW (R15), X9, K7, X24 // 6242b50f7007
+ VPSHLDVW X18, X15, K7, X24 // 6222850f70c2
+ VPSHLDVW X21, X15, K7, X24 // 6222850f70c5
+ VPSHLDVW X1, X15, K7, X24 // 6262850f70c1
+ VPSHLDVW -17(BP)(SI*8), X15, K7, X24 // 6262850f7084f5efffffff
+ VPSHLDVW (R15), X15, K7, X24 // 6242850f7007
+ VPSHLDVW X18, X26, K7, X24 // 6222ad0770c2
+ VPSHLDVW X21, X26, K7, X24 // 6222ad0770c5
+ VPSHLDVW X1, X26, K7, X24 // 6262ad0770c1
+ VPSHLDVW -17(BP)(SI*8), X26, K7, X24 // 6262ad077084f5efffffff
+ VPSHLDVW (R15), X26, K7, X24 // 6242ad077007
+ VPSHLDVW Y2, Y28, K2, Y31 // 62629d2270fa
+ VPSHLDVW Y21, Y28, K2, Y31 // 62229d2270fd
+ VPSHLDVW Y12, Y28, K2, Y31 // 62429d2270fc
+ VPSHLDVW 17(SP)(BP*1), Y28, K2, Y31 // 62629d2270bc2c11000000
+ VPSHLDVW -7(CX)(DX*8), Y28, K2, Y31 // 62629d2270bcd1f9ffffff
+ VPSHLDVW Y2, Y13, K2, Y31 // 6262952a70fa
+ VPSHLDVW Y21, Y13, K2, Y31 // 6222952a70fd
+ VPSHLDVW Y12, Y13, K2, Y31 // 6242952a70fc
+ VPSHLDVW 17(SP)(BP*1), Y13, K2, Y31 // 6262952a70bc2c11000000
+ VPSHLDVW -7(CX)(DX*8), Y13, K2, Y31 // 6262952a70bcd1f9ffffff
+ VPSHLDVW Y2, Y7, K2, Y31 // 6262c52a70fa
+ VPSHLDVW Y21, Y7, K2, Y31 // 6222c52a70fd
+ VPSHLDVW Y12, Y7, K2, Y31 // 6242c52a70fc
+ VPSHLDVW 17(SP)(BP*1), Y7, K2, Y31 // 6262c52a70bc2c11000000
+ VPSHLDVW -7(CX)(DX*8), Y7, K2, Y31 // 6262c52a70bcd1f9ffffff
+ VPSHLDVW Y2, Y28, K2, Y8 // 62729d2270c2
+ VPSHLDVW Y21, Y28, K2, Y8 // 62329d2270c5
+ VPSHLDVW Y12, Y28, K2, Y8 // 62529d2270c4
+ VPSHLDVW 17(SP)(BP*1), Y28, K2, Y8 // 62729d2270842c11000000
+ VPSHLDVW -7(CX)(DX*8), Y28, K2, Y8 // 62729d227084d1f9ffffff
+ VPSHLDVW Y2, Y13, K2, Y8 // 6272952a70c2
+ VPSHLDVW Y21, Y13, K2, Y8 // 6232952a70c5
+ VPSHLDVW Y12, Y13, K2, Y8 // 6252952a70c4
+ VPSHLDVW 17(SP)(BP*1), Y13, K2, Y8 // 6272952a70842c11000000
+ VPSHLDVW -7(CX)(DX*8), Y13, K2, Y8 // 6272952a7084d1f9ffffff
+ VPSHLDVW Y2, Y7, K2, Y8 // 6272c52a70c2
+ VPSHLDVW Y21, Y7, K2, Y8 // 6232c52a70c5
+ VPSHLDVW Y12, Y7, K2, Y8 // 6252c52a70c4
+ VPSHLDVW 17(SP)(BP*1), Y7, K2, Y8 // 6272c52a70842c11000000
+ VPSHLDVW -7(CX)(DX*8), Y7, K2, Y8 // 6272c52a7084d1f9ffffff
+ VPSHLDVW Y2, Y28, K2, Y1 // 62f29d2270ca
+ VPSHLDVW Y21, Y28, K2, Y1 // 62b29d2270cd
+ VPSHLDVW Y12, Y28, K2, Y1 // 62d29d2270cc
+ VPSHLDVW 17(SP)(BP*1), Y28, K2, Y1 // 62f29d22708c2c11000000
+ VPSHLDVW -7(CX)(DX*8), Y28, K2, Y1 // 62f29d22708cd1f9ffffff
+ VPSHLDVW Y2, Y13, K2, Y1 // 62f2952a70ca
+ VPSHLDVW Y21, Y13, K2, Y1 // 62b2952a70cd
+ VPSHLDVW Y12, Y13, K2, Y1 // 62d2952a70cc
+ VPSHLDVW 17(SP)(BP*1), Y13, K2, Y1 // 62f2952a708c2c11000000
+ VPSHLDVW -7(CX)(DX*8), Y13, K2, Y1 // 62f2952a708cd1f9ffffff
+ VPSHLDVW Y2, Y7, K2, Y1 // 62f2c52a70ca
+ VPSHLDVW Y21, Y7, K2, Y1 // 62b2c52a70cd
+ VPSHLDVW Y12, Y7, K2, Y1 // 62d2c52a70cc
+ VPSHLDVW 17(SP)(BP*1), Y7, K2, Y1 // 62f2c52a708c2c11000000
+ VPSHLDVW -7(CX)(DX*8), Y7, K2, Y1 // 62f2c52a708cd1f9ffffff
+ VPSHLDVW Z11, Z14, K5, Z15 // 62528d4d70fb
+ VPSHLDVW Z5, Z14, K5, Z15 // 62728d4d70fd
+ VPSHLDVW 99(R15)(R15*4), Z14, K5, Z15 // 62128d4d70bcbf63000000
+ VPSHLDVW 15(DX), Z14, K5, Z15 // 62728d4d70ba0f000000
+ VPSHLDVW Z11, Z27, K5, Z15 // 6252a54570fb
+ VPSHLDVW Z5, Z27, K5, Z15 // 6272a54570fd
+ VPSHLDVW 99(R15)(R15*4), Z27, K5, Z15 // 6212a54570bcbf63000000
+ VPSHLDVW 15(DX), Z27, K5, Z15 // 6272a54570ba0f000000
+ VPSHLDVW Z11, Z14, K5, Z12 // 62528d4d70e3
+ VPSHLDVW Z5, Z14, K5, Z12 // 62728d4d70e5
+ VPSHLDVW 99(R15)(R15*4), Z14, K5, Z12 // 62128d4d70a4bf63000000
+ VPSHLDVW 15(DX), Z14, K5, Z12 // 62728d4d70a20f000000
+ VPSHLDVW Z11, Z27, K5, Z12 // 6252a54570e3
+ VPSHLDVW Z5, Z27, K5, Z12 // 6272a54570e5
+ VPSHLDVW 99(R15)(R15*4), Z27, K5, Z12 // 6212a54570a4bf63000000
+ VPSHLDVW 15(DX), Z27, K5, Z12 // 6272a54570a20f000000
+ VPSHLDW $65, X24, X7, K3, X11 // 6213c50b70d841
+ VPSHLDW $65, X20, X7, K3, X11 // 6233c50b70dc41
+ VPSHLDW $65, X7, X7, K3, X11 // 6273c50b70df41
+ VPSHLDW $65, 7(SI)(DI*8), X7, K3, X11 // 6273c50b709cfe0700000041
+ VPSHLDW $65, -15(R14), X7, K3, X11 // 6253c50b709ef1ffffff41
+ VPSHLDW $65, X24, X0, K3, X11 // 6213fd0b70d841
+ VPSHLDW $65, X20, X0, K3, X11 // 6233fd0b70dc41
+ VPSHLDW $65, X7, X0, K3, X11 // 6273fd0b70df41
+ VPSHLDW $65, 7(SI)(DI*8), X0, K3, X11 // 6273fd0b709cfe0700000041
+ VPSHLDW $65, -15(R14), X0, K3, X11 // 6253fd0b709ef1ffffff41
+ VPSHLDW $65, X24, X7, K3, X31 // 6203c50b70f841
+ VPSHLDW $65, X20, X7, K3, X31 // 6223c50b70fc41
+ VPSHLDW $65, X7, X7, K3, X31 // 6263c50b70ff41
+ VPSHLDW $65, 7(SI)(DI*8), X7, K3, X31 // 6263c50b70bcfe0700000041
+ VPSHLDW $65, -15(R14), X7, K3, X31 // 6243c50b70bef1ffffff41
+ VPSHLDW $65, X24, X0, K3, X31 // 6203fd0b70f841
+ VPSHLDW $65, X20, X0, K3, X31 // 6223fd0b70fc41
+ VPSHLDW $65, X7, X0, K3, X31 // 6263fd0b70ff41
+ VPSHLDW $65, 7(SI)(DI*8), X0, K3, X31 // 6263fd0b70bcfe0700000041
+ VPSHLDW $65, -15(R14), X0, K3, X31 // 6243fd0b70bef1ffffff41
+ VPSHLDW $65, X24, X7, K3, X3 // 6293c50b70d841
+ VPSHLDW $65, X20, X7, K3, X3 // 62b3c50b70dc41
+ VPSHLDW $65, X7, X7, K3, X3 // 62f3c50b70df41
+ VPSHLDW $65, 7(SI)(DI*8), X7, K3, X3 // 62f3c50b709cfe0700000041
+ VPSHLDW $65, -15(R14), X7, K3, X3 // 62d3c50b709ef1ffffff41
+ VPSHLDW $65, X24, X0, K3, X3 // 6293fd0b70d841
+ VPSHLDW $65, X20, X0, K3, X3 // 62b3fd0b70dc41
+ VPSHLDW $65, X7, X0, K3, X3 // 62f3fd0b70df41
+ VPSHLDW $65, 7(SI)(DI*8), X0, K3, X3 // 62f3fd0b709cfe0700000041
+ VPSHLDW $65, -15(R14), X0, K3, X3 // 62d3fd0b709ef1ffffff41
+ VPSHLDW $67, Y12, Y3, K4, Y9 // 6253e52c70cc43
+ VPSHLDW $67, Y21, Y3, K4, Y9 // 6233e52c70cd43
+ VPSHLDW $67, Y14, Y3, K4, Y9 // 6253e52c70ce43
+ VPSHLDW $67, -17(BP)(SI*2), Y3, K4, Y9 // 6273e52c708c75efffffff43
+ VPSHLDW $67, 7(AX)(CX*2), Y3, K4, Y9 // 6273e52c708c480700000043
+ VPSHLDW $67, Y12, Y2, K4, Y9 // 6253ed2c70cc43
+ VPSHLDW $67, Y21, Y2, K4, Y9 // 6233ed2c70cd43
+ VPSHLDW $67, Y14, Y2, K4, Y9 // 6253ed2c70ce43
+ VPSHLDW $67, -17(BP)(SI*2), Y2, K4, Y9 // 6273ed2c708c75efffffff43
+ VPSHLDW $67, 7(AX)(CX*2), Y2, K4, Y9 // 6273ed2c708c480700000043
+ VPSHLDW $67, Y12, Y9, K4, Y9 // 6253b52c70cc43
+ VPSHLDW $67, Y21, Y9, K4, Y9 // 6233b52c70cd43
+ VPSHLDW $67, Y14, Y9, K4, Y9 // 6253b52c70ce43
+ VPSHLDW $67, -17(BP)(SI*2), Y9, K4, Y9 // 6273b52c708c75efffffff43
+ VPSHLDW $67, 7(AX)(CX*2), Y9, K4, Y9 // 6273b52c708c480700000043
+ VPSHLDW $67, Y12, Y3, K4, Y1 // 62d3e52c70cc43
+ VPSHLDW $67, Y21, Y3, K4, Y1 // 62b3e52c70cd43
+ VPSHLDW $67, Y14, Y3, K4, Y1 // 62d3e52c70ce43
+ VPSHLDW $67, -17(BP)(SI*2), Y3, K4, Y1 // 62f3e52c708c75efffffff43
+ VPSHLDW $67, 7(AX)(CX*2), Y3, K4, Y1 // 62f3e52c708c480700000043
+ VPSHLDW $67, Y12, Y2, K4, Y1 // 62d3ed2c70cc43
+ VPSHLDW $67, Y21, Y2, K4, Y1 // 62b3ed2c70cd43
+ VPSHLDW $67, Y14, Y2, K4, Y1 // 62d3ed2c70ce43
+ VPSHLDW $67, -17(BP)(SI*2), Y2, K4, Y1 // 62f3ed2c708c75efffffff43
+ VPSHLDW $67, 7(AX)(CX*2), Y2, K4, Y1 // 62f3ed2c708c480700000043
+ VPSHLDW $67, Y12, Y9, K4, Y1 // 62d3b52c70cc43
+ VPSHLDW $67, Y21, Y9, K4, Y1 // 62b3b52c70cd43
+ VPSHLDW $67, Y14, Y9, K4, Y1 // 62d3b52c70ce43
+ VPSHLDW $67, -17(BP)(SI*2), Y9, K4, Y1 // 62f3b52c708c75efffffff43
+ VPSHLDW $67, 7(AX)(CX*2), Y9, K4, Y1 // 62f3b52c708c480700000043
+ VPSHLDW $127, Z2, Z5, K2, Z13 // 6273d54a70ea7f
+ VPSHLDW $127, (CX), Z5, K2, Z13 // 6273d54a70297f
+ VPSHLDW $127, 99(R15), Z5, K2, Z13 // 6253d54a70af630000007f
+ VPSHLDW $127, Z2, Z23, K2, Z13 // 6273c54270ea7f
+ VPSHLDW $127, (CX), Z23, K2, Z13 // 6273c54270297f
+ VPSHLDW $127, 99(R15), Z23, K2, Z13 // 6253c54270af630000007f
+ VPSHLDW $127, Z2, Z5, K2, Z14 // 6273d54a70f27f
+ VPSHLDW $127, (CX), Z5, K2, Z14 // 6273d54a70317f
+ VPSHLDW $127, 99(R15), Z5, K2, Z14 // 6253d54a70b7630000007f
+ VPSHLDW $127, Z2, Z23, K2, Z14 // 6273c54270f27f
+ VPSHLDW $127, (CX), Z23, K2, Z14 // 6273c54270317f
+ VPSHLDW $127, 99(R15), Z23, K2, Z14 // 6253c54270b7630000007f
+ VPSHRDD $0, X21, X5, K2, X9 // 6233550a73cd00
+ VPSHRDD $0, X1, X5, K2, X9 // 6273550a73c900
+ VPSHRDD $0, X11, X5, K2, X9 // 6253550a73cb00
+ VPSHRDD $0, 7(SI)(DI*1), X5, K2, X9 // 6273550a738c3e0700000000
+ VPSHRDD $0, 15(DX)(BX*8), X5, K2, X9 // 6273550a738cda0f00000000
+ VPSHRDD $0, X21, X31, K2, X9 // 6233050273cd00
+ VPSHRDD $0, X1, X31, K2, X9 // 6273050273c900
+ VPSHRDD $0, X11, X31, K2, X9 // 6253050273cb00
+ VPSHRDD $0, 7(SI)(DI*1), X31, K2, X9 // 62730502738c3e0700000000
+ VPSHRDD $0, 15(DX)(BX*8), X31, K2, X9 // 62730502738cda0f00000000
+ VPSHRDD $0, X21, X3, K2, X9 // 6233650a73cd00
+ VPSHRDD $0, X1, X3, K2, X9 // 6273650a73c900
+ VPSHRDD $0, X11, X3, K2, X9 // 6253650a73cb00
+ VPSHRDD $0, 7(SI)(DI*1), X3, K2, X9 // 6273650a738c3e0700000000
+ VPSHRDD $0, 15(DX)(BX*8), X3, K2, X9 // 6273650a738cda0f00000000
+ VPSHRDD $0, X21, X5, K2, X7 // 62b3550a73fd00
+ VPSHRDD $0, X1, X5, K2, X7 // 62f3550a73f900
+ VPSHRDD $0, X11, X5, K2, X7 // 62d3550a73fb00
+ VPSHRDD $0, 7(SI)(DI*1), X5, K2, X7 // 62f3550a73bc3e0700000000
+ VPSHRDD $0, 15(DX)(BX*8), X5, K2, X7 // 62f3550a73bcda0f00000000
+ VPSHRDD $0, X21, X31, K2, X7 // 62b3050273fd00
+ VPSHRDD $0, X1, X31, K2, X7 // 62f3050273f900
+ VPSHRDD $0, X11, X31, K2, X7 // 62d3050273fb00
+ VPSHRDD $0, 7(SI)(DI*1), X31, K2, X7 // 62f3050273bc3e0700000000
+ VPSHRDD $0, 15(DX)(BX*8), X31, K2, X7 // 62f3050273bcda0f00000000
+ VPSHRDD $0, X21, X3, K2, X7 // 62b3650a73fd00
+ VPSHRDD $0, X1, X3, K2, X7 // 62f3650a73f900
+ VPSHRDD $0, X11, X3, K2, X7 // 62d3650a73fb00
+ VPSHRDD $0, 7(SI)(DI*1), X3, K2, X7 // 62f3650a73bc3e0700000000
+ VPSHRDD $0, 15(DX)(BX*8), X3, K2, X7 // 62f3650a73bcda0f00000000
+ VPSHRDD $0, X21, X5, K2, X14 // 6233550a73f500
+ VPSHRDD $0, X1, X5, K2, X14 // 6273550a73f100
+ VPSHRDD $0, X11, X5, K2, X14 // 6253550a73f300
+ VPSHRDD $0, 7(SI)(DI*1), X5, K2, X14 // 6273550a73b43e0700000000
+ VPSHRDD $0, 15(DX)(BX*8), X5, K2, X14 // 6273550a73b4da0f00000000
+ VPSHRDD $0, X21, X31, K2, X14 // 6233050273f500
+ VPSHRDD $0, X1, X31, K2, X14 // 6273050273f100
+ VPSHRDD $0, X11, X31, K2, X14 // 6253050273f300
+ VPSHRDD $0, 7(SI)(DI*1), X31, K2, X14 // 6273050273b43e0700000000
+ VPSHRDD $0, 15(DX)(BX*8), X31, K2, X14 // 6273050273b4da0f00000000
+ VPSHRDD $0, X21, X3, K2, X14 // 6233650a73f500
+ VPSHRDD $0, X1, X3, K2, X14 // 6273650a73f100
+ VPSHRDD $0, X11, X3, K2, X14 // 6253650a73f300
+ VPSHRDD $0, 7(SI)(DI*1), X3, K2, X14 // 6273650a73b43e0700000000
+ VPSHRDD $0, 15(DX)(BX*8), X3, K2, X14 // 6273650a73b4da0f00000000
+ VPSHRDD $97, Y31, Y16, K3, Y30 // 62037d2373f761
+ VPSHRDD $97, Y22, Y16, K3, Y30 // 62237d2373f661
+ VPSHRDD $97, Y6, Y16, K3, Y30 // 62637d2373f661
+ VPSHRDD $97, 15(R8)(R14*1), Y16, K3, Y30 // 62037d2373b4300f00000061
+ VPSHRDD $97, 15(R8)(R14*2), Y16, K3, Y30 // 62037d2373b4700f00000061
+ VPSHRDD $97, Y31, Y1, K3, Y30 // 6203752b73f761
+ VPSHRDD $97, Y22, Y1, K3, Y30 // 6223752b73f661
+ VPSHRDD $97, Y6, Y1, K3, Y30 // 6263752b73f661
+ VPSHRDD $97, 15(R8)(R14*1), Y1, K3, Y30 // 6203752b73b4300f00000061
+ VPSHRDD $97, 15(R8)(R14*2), Y1, K3, Y30 // 6203752b73b4700f00000061
+ VPSHRDD $97, Y31, Y30, K3, Y30 // 62030d2373f761
+ VPSHRDD $97, Y22, Y30, K3, Y30 // 62230d2373f661
+ VPSHRDD $97, Y6, Y30, K3, Y30 // 62630d2373f661
+ VPSHRDD $97, 15(R8)(R14*1), Y30, K3, Y30 // 62030d2373b4300f00000061
+ VPSHRDD $97, 15(R8)(R14*2), Y30, K3, Y30 // 62030d2373b4700f00000061
+ VPSHRDD $97, Y31, Y16, K3, Y26 // 62037d2373d761
+ VPSHRDD $97, Y22, Y16, K3, Y26 // 62237d2373d661
+ VPSHRDD $97, Y6, Y16, K3, Y26 // 62637d2373d661
+ VPSHRDD $97, 15(R8)(R14*1), Y16, K3, Y26 // 62037d237394300f00000061
+ VPSHRDD $97, 15(R8)(R14*2), Y16, K3, Y26 // 62037d237394700f00000061
+ VPSHRDD $97, Y31, Y1, K3, Y26 // 6203752b73d761
+ VPSHRDD $97, Y22, Y1, K3, Y26 // 6223752b73d661
+ VPSHRDD $97, Y6, Y1, K3, Y26 // 6263752b73d661
+ VPSHRDD $97, 15(R8)(R14*1), Y1, K3, Y26 // 6203752b7394300f00000061
+ VPSHRDD $97, 15(R8)(R14*2), Y1, K3, Y26 // 6203752b7394700f00000061
+ VPSHRDD $97, Y31, Y30, K3, Y26 // 62030d2373d761
+ VPSHRDD $97, Y22, Y30, K3, Y26 // 62230d2373d661
+ VPSHRDD $97, Y6, Y30, K3, Y26 // 62630d2373d661
+ VPSHRDD $97, 15(R8)(R14*1), Y30, K3, Y26 // 62030d237394300f00000061
+ VPSHRDD $97, 15(R8)(R14*2), Y30, K3, Y26 // 62030d237394700f00000061
+ VPSHRDD $97, Y31, Y16, K3, Y7 // 62937d2373ff61
+ VPSHRDD $97, Y22, Y16, K3, Y7 // 62b37d2373fe61
+ VPSHRDD $97, Y6, Y16, K3, Y7 // 62f37d2373fe61
+ VPSHRDD $97, 15(R8)(R14*1), Y16, K3, Y7 // 62937d2373bc300f00000061
+ VPSHRDD $97, 15(R8)(R14*2), Y16, K3, Y7 // 62937d2373bc700f00000061
+ VPSHRDD $97, Y31, Y1, K3, Y7 // 6293752b73ff61
+ VPSHRDD $97, Y22, Y1, K3, Y7 // 62b3752b73fe61
+ VPSHRDD $97, Y6, Y1, K3, Y7 // 62f3752b73fe61
+ VPSHRDD $97, 15(R8)(R14*1), Y1, K3, Y7 // 6293752b73bc300f00000061
+ VPSHRDD $97, 15(R8)(R14*2), Y1, K3, Y7 // 6293752b73bc700f00000061
+ VPSHRDD $97, Y31, Y30, K3, Y7 // 62930d2373ff61
+ VPSHRDD $97, Y22, Y30, K3, Y7 // 62b30d2373fe61
+ VPSHRDD $97, Y6, Y30, K3, Y7 // 62f30d2373fe61
+ VPSHRDD $97, 15(R8)(R14*1), Y30, K3, Y7 // 62930d2373bc300f00000061
+ VPSHRDD $97, 15(R8)(R14*2), Y30, K3, Y7 // 62930d2373bc700f00000061
+ VPSHRDD $81, Z28, Z26, K3, Z6 // 62932d4373f451
+ VPSHRDD $81, Z6, Z26, K3, Z6 // 62f32d4373f651
+ VPSHRDD $81, 99(R15)(R15*2), Z26, K3, Z6 // 62932d4373b47f6300000051
+ VPSHRDD $81, -7(DI), Z26, K3, Z6 // 62f32d4373b7f9ffffff51
+ VPSHRDD $81, Z28, Z14, K3, Z6 // 62930d4b73f451
+ VPSHRDD $81, Z6, Z14, K3, Z6 // 62f30d4b73f651
+ VPSHRDD $81, 99(R15)(R15*2), Z14, K3, Z6 // 62930d4b73b47f6300000051
+ VPSHRDD $81, -7(DI), Z14, K3, Z6 // 62f30d4b73b7f9ffffff51
+ VPSHRDD $81, Z28, Z26, K3, Z14 // 62132d4373f451
+ VPSHRDD $81, Z6, Z26, K3, Z14 // 62732d4373f651
+ VPSHRDD $81, 99(R15)(R15*2), Z26, K3, Z14 // 62132d4373b47f6300000051
+ VPSHRDD $81, -7(DI), Z26, K3, Z14 // 62732d4373b7f9ffffff51
+ VPSHRDD $81, Z28, Z14, K3, Z14 // 62130d4b73f451
+ VPSHRDD $81, Z6, Z14, K3, Z14 // 62730d4b73f651
+ VPSHRDD $81, 99(R15)(R15*2), Z14, K3, Z14 // 62130d4b73b47f6300000051
+ VPSHRDD $81, -7(DI), Z14, K3, Z14 // 62730d4b73b7f9ffffff51
+ VPSHRDQ $42, X14, X16, K3, X13 // 6253fd0373ee2a
+ VPSHRDQ $42, X19, X16, K3, X13 // 6233fd0373eb2a
+ VPSHRDQ $42, X8, X16, K3, X13 // 6253fd0373e82a
+ VPSHRDQ $42, -7(DI)(R8*1), X16, K3, X13 // 6233fd0373ac07f9ffffff2a
+ VPSHRDQ $42, (SP), X16, K3, X13 // 6273fd03732c242a
+ VPSHRDQ $42, X14, X14, K3, X13 // 62538d0b73ee2a
+ VPSHRDQ $42, X19, X14, K3, X13 // 62338d0b73eb2a
+ VPSHRDQ $42, X8, X14, K3, X13 // 62538d0b73e82a
+ VPSHRDQ $42, -7(DI)(R8*1), X14, K3, X13 // 62338d0b73ac07f9ffffff2a
+ VPSHRDQ $42, (SP), X14, K3, X13 // 62738d0b732c242a
+ VPSHRDQ $42, X14, X11, K3, X13 // 6253a50b73ee2a
+ VPSHRDQ $42, X19, X11, K3, X13 // 6233a50b73eb2a
+ VPSHRDQ $42, X8, X11, K3, X13 // 6253a50b73e82a
+ VPSHRDQ $42, -7(DI)(R8*1), X11, K3, X13 // 6233a50b73ac07f9ffffff2a
+ VPSHRDQ $42, (SP), X11, K3, X13 // 6273a50b732c242a
+ VPSHRDQ $42, X14, X16, K3, X0 // 62d3fd0373c62a
+ VPSHRDQ $42, X19, X16, K3, X0 // 62b3fd0373c32a
+ VPSHRDQ $42, X8, X16, K3, X0 // 62d3fd0373c02a
+ VPSHRDQ $42, -7(DI)(R8*1), X16, K3, X0 // 62b3fd03738407f9ffffff2a
+ VPSHRDQ $42, (SP), X16, K3, X0 // 62f3fd037304242a
+ VPSHRDQ $42, X14, X14, K3, X0 // 62d38d0b73c62a
+ VPSHRDQ $42, X19, X14, K3, X0 // 62b38d0b73c32a
+ VPSHRDQ $42, X8, X14, K3, X0 // 62d38d0b73c02a
+ VPSHRDQ $42, -7(DI)(R8*1), X14, K3, X0 // 62b38d0b738407f9ffffff2a
+ VPSHRDQ $42, (SP), X14, K3, X0 // 62f38d0b7304242a
+ VPSHRDQ $42, X14, X11, K3, X0 // 62d3a50b73c62a
+ VPSHRDQ $42, X19, X11, K3, X0 // 62b3a50b73c32a
+ VPSHRDQ $42, X8, X11, K3, X0 // 62d3a50b73c02a
+ VPSHRDQ $42, -7(DI)(R8*1), X11, K3, X0 // 62b3a50b738407f9ffffff2a
+ VPSHRDQ $42, (SP), X11, K3, X0 // 62f3a50b7304242a
+ VPSHRDQ $42, X14, X16, K3, X30 // 6243fd0373f62a
+ VPSHRDQ $42, X19, X16, K3, X30 // 6223fd0373f32a
+ VPSHRDQ $42, X8, X16, K3, X30 // 6243fd0373f02a
+ VPSHRDQ $42, -7(DI)(R8*1), X16, K3, X30 // 6223fd0373b407f9ffffff2a
+ VPSHRDQ $42, (SP), X16, K3, X30 // 6263fd037334242a
+ VPSHRDQ $42, X14, X14, K3, X30 // 62438d0b73f62a
+ VPSHRDQ $42, X19, X14, K3, X30 // 62238d0b73f32a
+ VPSHRDQ $42, X8, X14, K3, X30 // 62438d0b73f02a
+ VPSHRDQ $42, -7(DI)(R8*1), X14, K3, X30 // 62238d0b73b407f9ffffff2a
+ VPSHRDQ $42, (SP), X14, K3, X30 // 62638d0b7334242a
+ VPSHRDQ $42, X14, X11, K3, X30 // 6243a50b73f62a
+ VPSHRDQ $42, X19, X11, K3, X30 // 6223a50b73f32a
+ VPSHRDQ $42, X8, X11, K3, X30 // 6243a50b73f02a
+ VPSHRDQ $42, -7(DI)(R8*1), X11, K3, X30 // 6223a50b73b407f9ffffff2a
+ VPSHRDQ $42, (SP), X11, K3, X30 // 6263a50b7334242a
+ VPSHRDQ $79, Y24, Y28, K2, Y21 // 62839d2273e84f
+ VPSHRDQ $79, Y13, Y28, K2, Y21 // 62c39d2273ed4f
+ VPSHRDQ $79, Y20, Y28, K2, Y21 // 62a39d2273ec4f
+ VPSHRDQ $79, (R14), Y28, K2, Y21 // 62c39d22732e4f
+ VPSHRDQ $79, -7(DI)(R8*8), Y28, K2, Y21 // 62a39d2273acc7f9ffffff4f
+ VPSHRDQ $79, Y24, Y20, K2, Y21 // 6283dd2273e84f
+ VPSHRDQ $79, Y13, Y20, K2, Y21 // 62c3dd2273ed4f
+ VPSHRDQ $79, Y20, Y20, K2, Y21 // 62a3dd2273ec4f
+ VPSHRDQ $79, (R14), Y20, K2, Y21 // 62c3dd22732e4f
+ VPSHRDQ $79, -7(DI)(R8*8), Y20, K2, Y21 // 62a3dd2273acc7f9ffffff4f
+ VPSHRDQ $79, Y24, Y14, K2, Y21 // 62838d2a73e84f
+ VPSHRDQ $79, Y13, Y14, K2, Y21 // 62c38d2a73ed4f
+ VPSHRDQ $79, Y20, Y14, K2, Y21 // 62a38d2a73ec4f
+ VPSHRDQ $79, (R14), Y14, K2, Y21 // 62c38d2a732e4f
+ VPSHRDQ $79, -7(DI)(R8*8), Y14, K2, Y21 // 62a38d2a73acc7f9ffffff4f
+ VPSHRDQ $79, Y24, Y28, K2, Y7 // 62939d2273f84f
+ VPSHRDQ $79, Y13, Y28, K2, Y7 // 62d39d2273fd4f
+ VPSHRDQ $79, Y20, Y28, K2, Y7 // 62b39d2273fc4f
+ VPSHRDQ $79, (R14), Y28, K2, Y7 // 62d39d22733e4f
+ VPSHRDQ $79, -7(DI)(R8*8), Y28, K2, Y7 // 62b39d2273bcc7f9ffffff4f
+ VPSHRDQ $79, Y24, Y20, K2, Y7 // 6293dd2273f84f
+ VPSHRDQ $79, Y13, Y20, K2, Y7 // 62d3dd2273fd4f
+ VPSHRDQ $79, Y20, Y20, K2, Y7 // 62b3dd2273fc4f
+ VPSHRDQ $79, (R14), Y20, K2, Y7 // 62d3dd22733e4f
+ VPSHRDQ $79, -7(DI)(R8*8), Y20, K2, Y7 // 62b3dd2273bcc7f9ffffff4f
+ VPSHRDQ $79, Y24, Y14, K2, Y7 // 62938d2a73f84f
+ VPSHRDQ $79, Y13, Y14, K2, Y7 // 62d38d2a73fd4f
+ VPSHRDQ $79, Y20, Y14, K2, Y7 // 62b38d2a73fc4f
+ VPSHRDQ $79, (R14), Y14, K2, Y7 // 62d38d2a733e4f
+ VPSHRDQ $79, -7(DI)(R8*8), Y14, K2, Y7 // 62b38d2a73bcc7f9ffffff4f
+ VPSHRDQ $79, Y24, Y28, K2, Y0 // 62939d2273c04f
+ VPSHRDQ $79, Y13, Y28, K2, Y0 // 62d39d2273c54f
+ VPSHRDQ $79, Y20, Y28, K2, Y0 // 62b39d2273c44f
+ VPSHRDQ $79, (R14), Y28, K2, Y0 // 62d39d2273064f
+ VPSHRDQ $79, -7(DI)(R8*8), Y28, K2, Y0 // 62b39d227384c7f9ffffff4f
+ VPSHRDQ $79, Y24, Y20, K2, Y0 // 6293dd2273c04f
+ VPSHRDQ $79, Y13, Y20, K2, Y0 // 62d3dd2273c54f
+ VPSHRDQ $79, Y20, Y20, K2, Y0 // 62b3dd2273c44f
+ VPSHRDQ $79, (R14), Y20, K2, Y0 // 62d3dd2273064f
+ VPSHRDQ $79, -7(DI)(R8*8), Y20, K2, Y0 // 62b3dd227384c7f9ffffff4f
+ VPSHRDQ $79, Y24, Y14, K2, Y0 // 62938d2a73c04f
+ VPSHRDQ $79, Y13, Y14, K2, Y0 // 62d38d2a73c54f
+ VPSHRDQ $79, Y20, Y14, K2, Y0 // 62b38d2a73c44f
+ VPSHRDQ $79, (R14), Y14, K2, Y0 // 62d38d2a73064f
+ VPSHRDQ $79, -7(DI)(R8*8), Y14, K2, Y0 // 62b38d2a7384c7f9ffffff4f
+ VPSHRDQ $64, Z3, Z26, K1, Z13 // 6273ad4173eb40
+ VPSHRDQ $64, Z0, Z26, K1, Z13 // 6273ad4173e840
+ VPSHRDQ $64, -7(CX)(DX*1), Z26, K1, Z13 // 6273ad4173ac11f9ffffff40
+ VPSHRDQ $64, -15(R14)(R15*4), Z26, K1, Z13 // 6213ad4173acbef1ffffff40
+ VPSHRDQ $64, Z3, Z3, K1, Z13 // 6273e54973eb40
+ VPSHRDQ $64, Z0, Z3, K1, Z13 // 6273e54973e840
+ VPSHRDQ $64, -7(CX)(DX*1), Z3, K1, Z13 // 6273e54973ac11f9ffffff40
+ VPSHRDQ $64, -15(R14)(R15*4), Z3, K1, Z13 // 6213e54973acbef1ffffff40
+ VPSHRDQ $64, Z3, Z26, K1, Z21 // 62e3ad4173eb40
+ VPSHRDQ $64, Z0, Z26, K1, Z21 // 62e3ad4173e840
+ VPSHRDQ $64, -7(CX)(DX*1), Z26, K1, Z21 // 62e3ad4173ac11f9ffffff40
+ VPSHRDQ $64, -15(R14)(R15*4), Z26, K1, Z21 // 6283ad4173acbef1ffffff40
+ VPSHRDQ $64, Z3, Z3, K1, Z21 // 62e3e54973eb40
+ VPSHRDQ $64, Z0, Z3, K1, Z21 // 62e3e54973e840
+ VPSHRDQ $64, -7(CX)(DX*1), Z3, K1, Z21 // 62e3e54973ac11f9ffffff40
+ VPSHRDQ $64, -15(R14)(R15*4), Z3, K1, Z21 // 6283e54973acbef1ffffff40
+ VPSHRDVD X23, X12, K2, X8 // 62321d0a73c7
+ VPSHRDVD X11, X12, K2, X8 // 62521d0a73c3
+ VPSHRDVD X31, X12, K2, X8 // 62121d0a73c7
+ VPSHRDVD -7(CX), X12, K2, X8 // 62721d0a7381f9ffffff
+ VPSHRDVD 15(DX)(BX*4), X12, K2, X8 // 62721d0a73849a0f000000
+ VPSHRDVD X23, X16, K2, X8 // 62327d0273c7
+ VPSHRDVD X11, X16, K2, X8 // 62527d0273c3
+ VPSHRDVD X31, X16, K2, X8 // 62127d0273c7
+ VPSHRDVD -7(CX), X16, K2, X8 // 62727d027381f9ffffff
+ VPSHRDVD 15(DX)(BX*4), X16, K2, X8 // 62727d0273849a0f000000
+ VPSHRDVD X23, X23, K2, X8 // 6232450273c7
+ VPSHRDVD X11, X23, K2, X8 // 6252450273c3
+ VPSHRDVD X31, X23, K2, X8 // 6212450273c7
+ VPSHRDVD -7(CX), X23, K2, X8 // 627245027381f9ffffff
+ VPSHRDVD 15(DX)(BX*4), X23, K2, X8 // 6272450273849a0f000000
+ VPSHRDVD X23, X12, K2, X26 // 62221d0a73d7
+ VPSHRDVD X11, X12, K2, X26 // 62421d0a73d3
+ VPSHRDVD X31, X12, K2, X26 // 62021d0a73d7
+ VPSHRDVD -7(CX), X12, K2, X26 // 62621d0a7391f9ffffff
+ VPSHRDVD 15(DX)(BX*4), X12, K2, X26 // 62621d0a73949a0f000000
+ VPSHRDVD X23, X16, K2, X26 // 62227d0273d7
+ VPSHRDVD X11, X16, K2, X26 // 62427d0273d3
+ VPSHRDVD X31, X16, K2, X26 // 62027d0273d7
+ VPSHRDVD -7(CX), X16, K2, X26 // 62627d027391f9ffffff
+ VPSHRDVD 15(DX)(BX*4), X16, K2, X26 // 62627d0273949a0f000000
+ VPSHRDVD X23, X23, K2, X26 // 6222450273d7
+ VPSHRDVD X11, X23, K2, X26 // 6242450273d3
+ VPSHRDVD X31, X23, K2, X26 // 6202450273d7
+ VPSHRDVD -7(CX), X23, K2, X26 // 626245027391f9ffffff
+ VPSHRDVD 15(DX)(BX*4), X23, K2, X26 // 6262450273949a0f000000
+ VPSHRDVD X23, X12, K2, X23 // 62a21d0a73ff
+ VPSHRDVD X11, X12, K2, X23 // 62c21d0a73fb
+ VPSHRDVD X31, X12, K2, X23 // 62821d0a73ff
+ VPSHRDVD -7(CX), X12, K2, X23 // 62e21d0a73b9f9ffffff
+ VPSHRDVD 15(DX)(BX*4), X12, K2, X23 // 62e21d0a73bc9a0f000000
+ VPSHRDVD X23, X16, K2, X23 // 62a27d0273ff
+ VPSHRDVD X11, X16, K2, X23 // 62c27d0273fb
+ VPSHRDVD X31, X16, K2, X23 // 62827d0273ff
+ VPSHRDVD -7(CX), X16, K2, X23 // 62e27d0273b9f9ffffff
+ VPSHRDVD 15(DX)(BX*4), X16, K2, X23 // 62e27d0273bc9a0f000000
+ VPSHRDVD X23, X23, K2, X23 // 62a2450273ff
+ VPSHRDVD X11, X23, K2, X23 // 62c2450273fb
+ VPSHRDVD X31, X23, K2, X23 // 6282450273ff
+ VPSHRDVD -7(CX), X23, K2, X23 // 62e2450273b9f9ffffff
+ VPSHRDVD 15(DX)(BX*4), X23, K2, X23 // 62e2450273bc9a0f000000
+ VPSHRDVD Y22, Y26, K1, Y14 // 62322d2173f6
+ VPSHRDVD Y3, Y26, K1, Y14 // 62722d2173f3
+ VPSHRDVD Y15, Y26, K1, Y14 // 62522d2173f7
+ VPSHRDVD 99(R15)(R15*4), Y26, K1, Y14 // 62122d2173b4bf63000000
+ VPSHRDVD 15(DX), Y26, K1, Y14 // 62722d2173b20f000000
+ VPSHRDVD Y22, Y30, K1, Y14 // 62320d2173f6
+ VPSHRDVD Y3, Y30, K1, Y14 // 62720d2173f3
+ VPSHRDVD Y15, Y30, K1, Y14 // 62520d2173f7
+ VPSHRDVD 99(R15)(R15*4), Y30, K1, Y14 // 62120d2173b4bf63000000
+ VPSHRDVD 15(DX), Y30, K1, Y14 // 62720d2173b20f000000
+ VPSHRDVD Y22, Y12, K1, Y14 // 62321d2973f6
+ VPSHRDVD Y3, Y12, K1, Y14 // 62721d2973f3
+ VPSHRDVD Y15, Y12, K1, Y14 // 62521d2973f7
+ VPSHRDVD 99(R15)(R15*4), Y12, K1, Y14 // 62121d2973b4bf63000000
+ VPSHRDVD 15(DX), Y12, K1, Y14 // 62721d2973b20f000000
+ VPSHRDVD Y22, Y26, K1, Y21 // 62a22d2173ee
+ VPSHRDVD Y3, Y26, K1, Y21 // 62e22d2173eb
+ VPSHRDVD Y15, Y26, K1, Y21 // 62c22d2173ef
+ VPSHRDVD 99(R15)(R15*4), Y26, K1, Y21 // 62822d2173acbf63000000
+ VPSHRDVD 15(DX), Y26, K1, Y21 // 62e22d2173aa0f000000
+ VPSHRDVD Y22, Y30, K1, Y21 // 62a20d2173ee
+ VPSHRDVD Y3, Y30, K1, Y21 // 62e20d2173eb
+ VPSHRDVD Y15, Y30, K1, Y21 // 62c20d2173ef
+ VPSHRDVD 99(R15)(R15*4), Y30, K1, Y21 // 62820d2173acbf63000000
+ VPSHRDVD 15(DX), Y30, K1, Y21 // 62e20d2173aa0f000000
+ VPSHRDVD Y22, Y12, K1, Y21 // 62a21d2973ee
+ VPSHRDVD Y3, Y12, K1, Y21 // 62e21d2973eb
+ VPSHRDVD Y15, Y12, K1, Y21 // 62c21d2973ef
+ VPSHRDVD 99(R15)(R15*4), Y12, K1, Y21 // 62821d2973acbf63000000
+ VPSHRDVD 15(DX), Y12, K1, Y21 // 62e21d2973aa0f000000
+ VPSHRDVD Y22, Y26, K1, Y1 // 62b22d2173ce
+ VPSHRDVD Y3, Y26, K1, Y1 // 62f22d2173cb
+ VPSHRDVD Y15, Y26, K1, Y1 // 62d22d2173cf
+ VPSHRDVD 99(R15)(R15*4), Y26, K1, Y1 // 62922d21738cbf63000000
+ VPSHRDVD 15(DX), Y26, K1, Y1 // 62f22d21738a0f000000
+ VPSHRDVD Y22, Y30, K1, Y1 // 62b20d2173ce
+ VPSHRDVD Y3, Y30, K1, Y1 // 62f20d2173cb
+ VPSHRDVD Y15, Y30, K1, Y1 // 62d20d2173cf
+ VPSHRDVD 99(R15)(R15*4), Y30, K1, Y1 // 62920d21738cbf63000000
+ VPSHRDVD 15(DX), Y30, K1, Y1 // 62f20d21738a0f000000
+ VPSHRDVD Y22, Y12, K1, Y1 // 62b21d2973ce
+ VPSHRDVD Y3, Y12, K1, Y1 // 62f21d2973cb
+ VPSHRDVD Y15, Y12, K1, Y1 // 62d21d2973cf
+ VPSHRDVD 99(R15)(R15*4), Y12, K1, Y1 // 62921d29738cbf63000000
+ VPSHRDVD 15(DX), Y12, K1, Y1 // 62f21d29738a0f000000
+ VPSHRDVD Z3, Z11, K7, Z21 // 62e2254f73eb
+ VPSHRDVD Z12, Z11, K7, Z21 // 62c2254f73ec
+ VPSHRDVD 15(DX)(BX*1), Z11, K7, Z21 // 62e2254f73ac1a0f000000
+ VPSHRDVD -7(CX)(DX*2), Z11, K7, Z21 // 62e2254f73ac51f9ffffff
+ VPSHRDVD Z3, Z25, K7, Z21 // 62e2354773eb
+ VPSHRDVD Z12, Z25, K7, Z21 // 62c2354773ec
+ VPSHRDVD 15(DX)(BX*1), Z25, K7, Z21 // 62e2354773ac1a0f000000
+ VPSHRDVD -7(CX)(DX*2), Z25, K7, Z21 // 62e2354773ac51f9ffffff
+ VPSHRDVD Z3, Z11, K7, Z13 // 6272254f73eb
+ VPSHRDVD Z12, Z11, K7, Z13 // 6252254f73ec
+ VPSHRDVD 15(DX)(BX*1), Z11, K7, Z13 // 6272254f73ac1a0f000000
+ VPSHRDVD -7(CX)(DX*2), Z11, K7, Z13 // 6272254f73ac51f9ffffff
+ VPSHRDVD Z3, Z25, K7, Z13 // 6272354773eb
+ VPSHRDVD Z12, Z25, K7, Z13 // 6252354773ec
+ VPSHRDVD 15(DX)(BX*1), Z25, K7, Z13 // 6272354773ac1a0f000000
+ VPSHRDVD -7(CX)(DX*2), Z25, K7, Z13 // 6272354773ac51f9ffffff
+ VPSHRDVQ X20, X11, K1, X24 // 6222a50973c4
+ VPSHRDVQ X5, X11, K1, X24 // 6262a50973c5
+ VPSHRDVQ X25, X11, K1, X24 // 6202a50973c1
+ VPSHRDVQ 99(R15)(R15*8), X11, K1, X24 // 6202a5097384ff63000000
+ VPSHRDVQ 7(AX)(CX*8), X11, K1, X24 // 6262a5097384c807000000
+ VPSHRDVQ X20, X23, K1, X24 // 6222c50173c4
+ VPSHRDVQ X5, X23, K1, X24 // 6262c50173c5
+ VPSHRDVQ X25, X23, K1, X24 // 6202c50173c1
+ VPSHRDVQ 99(R15)(R15*8), X23, K1, X24 // 6202c5017384ff63000000
+ VPSHRDVQ 7(AX)(CX*8), X23, K1, X24 // 6262c5017384c807000000
+ VPSHRDVQ X20, X2, K1, X24 // 6222ed0973c4
+ VPSHRDVQ X5, X2, K1, X24 // 6262ed0973c5
+ VPSHRDVQ X25, X2, K1, X24 // 6202ed0973c1
+ VPSHRDVQ 99(R15)(R15*8), X2, K1, X24 // 6202ed097384ff63000000
+ VPSHRDVQ 7(AX)(CX*8), X2, K1, X24 // 6262ed097384c807000000
+ VPSHRDVQ X20, X11, K1, X14 // 6232a50973f4
+ VPSHRDVQ X5, X11, K1, X14 // 6272a50973f5
+ VPSHRDVQ X25, X11, K1, X14 // 6212a50973f1
+ VPSHRDVQ 99(R15)(R15*8), X11, K1, X14 // 6212a50973b4ff63000000
+ VPSHRDVQ 7(AX)(CX*8), X11, K1, X14 // 6272a50973b4c807000000
+ VPSHRDVQ X20, X23, K1, X14 // 6232c50173f4
+ VPSHRDVQ X5, X23, K1, X14 // 6272c50173f5
+ VPSHRDVQ X25, X23, K1, X14 // 6212c50173f1
+ VPSHRDVQ 99(R15)(R15*8), X23, K1, X14 // 6212c50173b4ff63000000
+ VPSHRDVQ 7(AX)(CX*8), X23, K1, X14 // 6272c50173b4c807000000
+ VPSHRDVQ X20, X2, K1, X14 // 6232ed0973f4
+ VPSHRDVQ X5, X2, K1, X14 // 6272ed0973f5
+ VPSHRDVQ X25, X2, K1, X14 // 6212ed0973f1
+ VPSHRDVQ 99(R15)(R15*8), X2, K1, X14 // 6212ed0973b4ff63000000
+ VPSHRDVQ 7(AX)(CX*8), X2, K1, X14 // 6272ed0973b4c807000000
+ VPSHRDVQ X20, X11, K1, X0 // 62b2a50973c4
+ VPSHRDVQ X5, X11, K1, X0 // 62f2a50973c5
+ VPSHRDVQ X25, X11, K1, X0 // 6292a50973c1
+ VPSHRDVQ 99(R15)(R15*8), X11, K1, X0 // 6292a5097384ff63000000
+ VPSHRDVQ 7(AX)(CX*8), X11, K1, X0 // 62f2a5097384c807000000
+ VPSHRDVQ X20, X23, K1, X0 // 62b2c50173c4
+ VPSHRDVQ X5, X23, K1, X0 // 62f2c50173c5
+ VPSHRDVQ X25, X23, K1, X0 // 6292c50173c1
+ VPSHRDVQ 99(R15)(R15*8), X23, K1, X0 // 6292c5017384ff63000000
+ VPSHRDVQ 7(AX)(CX*8), X23, K1, X0 // 62f2c5017384c807000000
+ VPSHRDVQ X20, X2, K1, X0 // 62b2ed0973c4
+ VPSHRDVQ X5, X2, K1, X0 // 62f2ed0973c5
+ VPSHRDVQ X25, X2, K1, X0 // 6292ed0973c1
+ VPSHRDVQ 99(R15)(R15*8), X2, K1, X0 // 6292ed097384ff63000000
+ VPSHRDVQ 7(AX)(CX*8), X2, K1, X0 // 62f2ed097384c807000000
+ VPSHRDVQ Y21, Y5, K1, Y1 // 62b2d52973cd
+ VPSHRDVQ Y7, Y5, K1, Y1 // 62f2d52973cf
+ VPSHRDVQ Y30, Y5, K1, Y1 // 6292d52973ce
+ VPSHRDVQ (CX), Y5, K1, Y1 // 62f2d5297309
+ VPSHRDVQ 99(R15), Y5, K1, Y1 // 62d2d529738f63000000
+ VPSHRDVQ Y21, Y17, K1, Y1 // 62b2f52173cd
+ VPSHRDVQ Y7, Y17, K1, Y1 // 62f2f52173cf
+ VPSHRDVQ Y30, Y17, K1, Y1 // 6292f52173ce
+ VPSHRDVQ (CX), Y17, K1, Y1 // 62f2f5217309
+ VPSHRDVQ 99(R15), Y17, K1, Y1 // 62d2f521738f63000000
+ VPSHRDVQ Y21, Y13, K1, Y1 // 62b2952973cd
+ VPSHRDVQ Y7, Y13, K1, Y1 // 62f2952973cf
+ VPSHRDVQ Y30, Y13, K1, Y1 // 6292952973ce
+ VPSHRDVQ (CX), Y13, K1, Y1 // 62f295297309
+ VPSHRDVQ 99(R15), Y13, K1, Y1 // 62d29529738f63000000
+ VPSHRDVQ Y21, Y5, K1, Y27 // 6222d52973dd
+ VPSHRDVQ Y7, Y5, K1, Y27 // 6262d52973df
+ VPSHRDVQ Y30, Y5, K1, Y27 // 6202d52973de
+ VPSHRDVQ (CX), Y5, K1, Y27 // 6262d5297319
+ VPSHRDVQ 99(R15), Y5, K1, Y27 // 6242d529739f63000000
+ VPSHRDVQ Y21, Y17, K1, Y27 // 6222f52173dd
+ VPSHRDVQ Y7, Y17, K1, Y27 // 6262f52173df
+ VPSHRDVQ Y30, Y17, K1, Y27 // 6202f52173de
+ VPSHRDVQ (CX), Y17, K1, Y27 // 6262f5217319
+ VPSHRDVQ 99(R15), Y17, K1, Y27 // 6242f521739f63000000
+ VPSHRDVQ Y21, Y13, K1, Y27 // 6222952973dd
+ VPSHRDVQ Y7, Y13, K1, Y27 // 6262952973df
+ VPSHRDVQ Y30, Y13, K1, Y27 // 6202952973de
+ VPSHRDVQ (CX), Y13, K1, Y27 // 626295297319
+ VPSHRDVQ 99(R15), Y13, K1, Y27 // 62429529739f63000000
+ VPSHRDVQ Y21, Y5, K1, Y19 // 62a2d52973dd
+ VPSHRDVQ Y7, Y5, K1, Y19 // 62e2d52973df
+ VPSHRDVQ Y30, Y5, K1, Y19 // 6282d52973de
+ VPSHRDVQ (CX), Y5, K1, Y19 // 62e2d5297319
+ VPSHRDVQ 99(R15), Y5, K1, Y19 // 62c2d529739f63000000
+ VPSHRDVQ Y21, Y17, K1, Y19 // 62a2f52173dd
+ VPSHRDVQ Y7, Y17, K1, Y19 // 62e2f52173df
+ VPSHRDVQ Y30, Y17, K1, Y19 // 6282f52173de
+ VPSHRDVQ (CX), Y17, K1, Y19 // 62e2f5217319
+ VPSHRDVQ 99(R15), Y17, K1, Y19 // 62c2f521739f63000000
+ VPSHRDVQ Y21, Y13, K1, Y19 // 62a2952973dd
+ VPSHRDVQ Y7, Y13, K1, Y19 // 62e2952973df
+ VPSHRDVQ Y30, Y13, K1, Y19 // 6282952973de
+ VPSHRDVQ (CX), Y13, K1, Y19 // 62e295297319
+ VPSHRDVQ 99(R15), Y13, K1, Y19 // 62c29529739f63000000
+ VPSHRDVQ Z23, Z23, K1, Z27 // 6222c54173df
+ VPSHRDVQ Z6, Z23, K1, Z27 // 6262c54173de
+ VPSHRDVQ -17(BP), Z23, K1, Z27 // 6262c541739defffffff
+ VPSHRDVQ -15(R14)(R15*8), Z23, K1, Z27 // 6202c541739cfef1ffffff
+ VPSHRDVQ Z23, Z5, K1, Z27 // 6222d54973df
+ VPSHRDVQ Z6, Z5, K1, Z27 // 6262d54973de
+ VPSHRDVQ -17(BP), Z5, K1, Z27 // 6262d549739defffffff
+ VPSHRDVQ -15(R14)(R15*8), Z5, K1, Z27 // 6202d549739cfef1ffffff
+ VPSHRDVQ Z23, Z23, K1, Z15 // 6232c54173ff
+ VPSHRDVQ Z6, Z23, K1, Z15 // 6272c54173fe
+ VPSHRDVQ -17(BP), Z23, K1, Z15 // 6272c54173bdefffffff
+ VPSHRDVQ -15(R14)(R15*8), Z23, K1, Z15 // 6212c54173bcfef1ffffff
+ VPSHRDVQ Z23, Z5, K1, Z15 // 6232d54973ff
+ VPSHRDVQ Z6, Z5, K1, Z15 // 6272d54973fe
+ VPSHRDVQ -17(BP), Z5, K1, Z15 // 6272d54973bdefffffff
+ VPSHRDVQ -15(R14)(R15*8), Z5, K1, Z15 // 6212d54973bcfef1ffffff
+ VPSHRDVW X2, X2, K7, X0 // 62f2ed0f72c2
+ VPSHRDVW X31, X2, K7, X0 // 6292ed0f72c7
+ VPSHRDVW X11, X2, K7, X0 // 62d2ed0f72c3
+ VPSHRDVW (AX), X2, K7, X0 // 62f2ed0f7200
+ VPSHRDVW 7(SI), X2, K7, X0 // 62f2ed0f728607000000
+ VPSHRDVW X2, X8, K7, X0 // 62f2bd0f72c2
+ VPSHRDVW X31, X8, K7, X0 // 6292bd0f72c7
+ VPSHRDVW X11, X8, K7, X0 // 62d2bd0f72c3
+ VPSHRDVW (AX), X8, K7, X0 // 62f2bd0f7200
+ VPSHRDVW 7(SI), X8, K7, X0 // 62f2bd0f728607000000
+ VPSHRDVW X2, X9, K7, X0 // 62f2b50f72c2
+ VPSHRDVW X31, X9, K7, X0 // 6292b50f72c7
+ VPSHRDVW X11, X9, K7, X0 // 62d2b50f72c3
+ VPSHRDVW (AX), X9, K7, X0 // 62f2b50f7200
+ VPSHRDVW 7(SI), X9, K7, X0 // 62f2b50f728607000000
+ VPSHRDVW X2, X2, K7, X9 // 6272ed0f72ca
+ VPSHRDVW X31, X2, K7, X9 // 6212ed0f72cf
+ VPSHRDVW X11, X2, K7, X9 // 6252ed0f72cb
+ VPSHRDVW (AX), X2, K7, X9 // 6272ed0f7208
+ VPSHRDVW 7(SI), X2, K7, X9 // 6272ed0f728e07000000
+ VPSHRDVW X2, X8, K7, X9 // 6272bd0f72ca
+ VPSHRDVW X31, X8, K7, X9 // 6212bd0f72cf
+ VPSHRDVW X11, X8, K7, X9 // 6252bd0f72cb
+ VPSHRDVW (AX), X8, K7, X9 // 6272bd0f7208
+ VPSHRDVW 7(SI), X8, K7, X9 // 6272bd0f728e07000000
+ VPSHRDVW X2, X9, K7, X9 // 6272b50f72ca
+ VPSHRDVW X31, X9, K7, X9 // 6212b50f72cf
+ VPSHRDVW X11, X9, K7, X9 // 6252b50f72cb
+ VPSHRDVW (AX), X9, K7, X9 // 6272b50f7208
+ VPSHRDVW 7(SI), X9, K7, X9 // 6272b50f728e07000000
+ VPSHRDVW X2, X2, K7, X13 // 6272ed0f72ea
+ VPSHRDVW X31, X2, K7, X13 // 6212ed0f72ef
+ VPSHRDVW X11, X2, K7, X13 // 6252ed0f72eb
+ VPSHRDVW (AX), X2, K7, X13 // 6272ed0f7228
+ VPSHRDVW 7(SI), X2, K7, X13 // 6272ed0f72ae07000000
+ VPSHRDVW X2, X8, K7, X13 // 6272bd0f72ea
+ VPSHRDVW X31, X8, K7, X13 // 6212bd0f72ef
+ VPSHRDVW X11, X8, K7, X13 // 6252bd0f72eb
+ VPSHRDVW (AX), X8, K7, X13 // 6272bd0f7228
+ VPSHRDVW 7(SI), X8, K7, X13 // 6272bd0f72ae07000000
+ VPSHRDVW X2, X9, K7, X13 // 6272b50f72ea
+ VPSHRDVW X31, X9, K7, X13 // 6212b50f72ef
+ VPSHRDVW X11, X9, K7, X13 // 6252b50f72eb
+ VPSHRDVW (AX), X9, K7, X13 // 6272b50f7228
+ VPSHRDVW 7(SI), X9, K7, X13 // 6272b50f72ae07000000
+ VPSHRDVW Y5, Y8, K2, Y13 // 6272bd2a72ed
+ VPSHRDVW Y24, Y8, K2, Y13 // 6212bd2a72e8
+ VPSHRDVW Y21, Y8, K2, Y13 // 6232bd2a72ed
+ VPSHRDVW 99(R15)(R15*2), Y8, K2, Y13 // 6212bd2a72ac7f63000000
+ VPSHRDVW -7(DI), Y8, K2, Y13 // 6272bd2a72aff9ffffff
+ VPSHRDVW Y5, Y11, K2, Y13 // 6272a52a72ed
+ VPSHRDVW Y24, Y11, K2, Y13 // 6212a52a72e8
+ VPSHRDVW Y21, Y11, K2, Y13 // 6232a52a72ed
+ VPSHRDVW 99(R15)(R15*2), Y11, K2, Y13 // 6212a52a72ac7f63000000
+ VPSHRDVW -7(DI), Y11, K2, Y13 // 6272a52a72aff9ffffff
+ VPSHRDVW Y5, Y24, K2, Y13 // 6272bd2272ed
+ VPSHRDVW Y24, Y24, K2, Y13 // 6212bd2272e8
+ VPSHRDVW Y21, Y24, K2, Y13 // 6232bd2272ed
+ VPSHRDVW 99(R15)(R15*2), Y24, K2, Y13 // 6212bd2272ac7f63000000
+ VPSHRDVW -7(DI), Y24, K2, Y13 // 6272bd2272aff9ffffff
+ VPSHRDVW Y5, Y8, K2, Y18 // 62e2bd2a72d5
+ VPSHRDVW Y24, Y8, K2, Y18 // 6282bd2a72d0
+ VPSHRDVW Y21, Y8, K2, Y18 // 62a2bd2a72d5
+ VPSHRDVW 99(R15)(R15*2), Y8, K2, Y18 // 6282bd2a72947f63000000
+ VPSHRDVW -7(DI), Y8, K2, Y18 // 62e2bd2a7297f9ffffff
+ VPSHRDVW Y5, Y11, K2, Y18 // 62e2a52a72d5
+ VPSHRDVW Y24, Y11, K2, Y18 // 6282a52a72d0
+ VPSHRDVW Y21, Y11, K2, Y18 // 62a2a52a72d5
+ VPSHRDVW 99(R15)(R15*2), Y11, K2, Y18 // 6282a52a72947f63000000
+ VPSHRDVW -7(DI), Y11, K2, Y18 // 62e2a52a7297f9ffffff
+ VPSHRDVW Y5, Y24, K2, Y18 // 62e2bd2272d5
+ VPSHRDVW Y24, Y24, K2, Y18 // 6282bd2272d0
+ VPSHRDVW Y21, Y24, K2, Y18 // 62a2bd2272d5
+ VPSHRDVW 99(R15)(R15*2), Y24, K2, Y18 // 6282bd2272947f63000000
+ VPSHRDVW -7(DI), Y24, K2, Y18 // 62e2bd227297f9ffffff
+ VPSHRDVW Y5, Y8, K2, Y24 // 6262bd2a72c5
+ VPSHRDVW Y24, Y8, K2, Y24 // 6202bd2a72c0
+ VPSHRDVW Y21, Y8, K2, Y24 // 6222bd2a72c5
+ VPSHRDVW 99(R15)(R15*2), Y8, K2, Y24 // 6202bd2a72847f63000000
+ VPSHRDVW -7(DI), Y8, K2, Y24 // 6262bd2a7287f9ffffff
+ VPSHRDVW Y5, Y11, K2, Y24 // 6262a52a72c5
+ VPSHRDVW Y24, Y11, K2, Y24 // 6202a52a72c0
+ VPSHRDVW Y21, Y11, K2, Y24 // 6222a52a72c5
+ VPSHRDVW 99(R15)(R15*2), Y11, K2, Y24 // 6202a52a72847f63000000
+ VPSHRDVW -7(DI), Y11, K2, Y24 // 6262a52a7287f9ffffff
+ VPSHRDVW Y5, Y24, K2, Y24 // 6262bd2272c5
+ VPSHRDVW Y24, Y24, K2, Y24 // 6202bd2272c0
+ VPSHRDVW Y21, Y24, K2, Y24 // 6222bd2272c5
+ VPSHRDVW 99(R15)(R15*2), Y24, K2, Y24 // 6202bd2272847f63000000
+ VPSHRDVW -7(DI), Y24, K2, Y24 // 6262bd227287f9ffffff
+ VPSHRDVW Z16, Z21, K4, Z8 // 6232d54472c0
+ VPSHRDVW Z13, Z21, K4, Z8 // 6252d54472c5
+ VPSHRDVW 17(SP)(BP*2), Z21, K4, Z8 // 6272d54472846c11000000
+ VPSHRDVW -7(DI)(R8*4), Z21, K4, Z8 // 6232d544728487f9ffffff
+ VPSHRDVW Z16, Z5, K4, Z8 // 6232d54c72c0
+ VPSHRDVW Z13, Z5, K4, Z8 // 6252d54c72c5
+ VPSHRDVW 17(SP)(BP*2), Z5, K4, Z8 // 6272d54c72846c11000000
+ VPSHRDVW -7(DI)(R8*4), Z5, K4, Z8 // 6232d54c728487f9ffffff
+ VPSHRDVW Z16, Z21, K4, Z28 // 6222d54472e0
+ VPSHRDVW Z13, Z21, K4, Z28 // 6242d54472e5
+ VPSHRDVW 17(SP)(BP*2), Z21, K4, Z28 // 6262d54472a46c11000000
+ VPSHRDVW -7(DI)(R8*4), Z21, K4, Z28 // 6222d54472a487f9ffffff
+ VPSHRDVW Z16, Z5, K4, Z28 // 6222d54c72e0
+ VPSHRDVW Z13, Z5, K4, Z28 // 6242d54c72e5
+ VPSHRDVW 17(SP)(BP*2), Z5, K4, Z28 // 6262d54c72a46c11000000
+ VPSHRDVW -7(DI)(R8*4), Z5, K4, Z28 // 6222d54c72a487f9ffffff
+ VPSHRDW $27, X15, X0, K1, X22 // 62c3fd0972f71b
+ VPSHRDW $27, X11, X0, K1, X22 // 62c3fd0972f31b
+ VPSHRDW $27, X0, X0, K1, X22 // 62e3fd0972f01b
+ VPSHRDW $27, (BX), X0, K1, X22 // 62e3fd0972331b
+ VPSHRDW $27, -17(BP)(SI*1), X0, K1, X22 // 62e3fd0972b435efffffff1b
+ VPSHRDW $27, X15, X17, K1, X22 // 62c3f50172f71b
+ VPSHRDW $27, X11, X17, K1, X22 // 62c3f50172f31b
+ VPSHRDW $27, X0, X17, K1, X22 // 62e3f50172f01b
+ VPSHRDW $27, (BX), X17, K1, X22 // 62e3f50172331b
+ VPSHRDW $27, -17(BP)(SI*1), X17, K1, X22 // 62e3f50172b435efffffff1b
+ VPSHRDW $27, X15, X7, K1, X22 // 62c3c50972f71b
+ VPSHRDW $27, X11, X7, K1, X22 // 62c3c50972f31b
+ VPSHRDW $27, X0, X7, K1, X22 // 62e3c50972f01b
+ VPSHRDW $27, (BX), X7, K1, X22 // 62e3c50972331b
+ VPSHRDW $27, -17(BP)(SI*1), X7, K1, X22 // 62e3c50972b435efffffff1b
+ VPSHRDW $27, X15, X0, K1, X5 // 62d3fd0972ef1b
+ VPSHRDW $27, X11, X0, K1, X5 // 62d3fd0972eb1b
+ VPSHRDW $27, X0, X0, K1, X5 // 62f3fd0972e81b
+ VPSHRDW $27, (BX), X0, K1, X5 // 62f3fd09722b1b
+ VPSHRDW $27, -17(BP)(SI*1), X0, K1, X5 // 62f3fd0972ac35efffffff1b
+ VPSHRDW $27, X15, X17, K1, X5 // 62d3f50172ef1b
+ VPSHRDW $27, X11, X17, K1, X5 // 62d3f50172eb1b
+ VPSHRDW $27, X0, X17, K1, X5 // 62f3f50172e81b
+ VPSHRDW $27, (BX), X17, K1, X5 // 62f3f501722b1b
+ VPSHRDW $27, -17(BP)(SI*1), X17, K1, X5 // 62f3f50172ac35efffffff1b
+ VPSHRDW $27, X15, X7, K1, X5 // 62d3c50972ef1b
+ VPSHRDW $27, X11, X7, K1, X5 // 62d3c50972eb1b
+ VPSHRDW $27, X0, X7, K1, X5 // 62f3c50972e81b
+ VPSHRDW $27, (BX), X7, K1, X5 // 62f3c509722b1b
+ VPSHRDW $27, -17(BP)(SI*1), X7, K1, X5 // 62f3c50972ac35efffffff1b
+ VPSHRDW $27, X15, X0, K1, X14 // 6253fd0972f71b
+ VPSHRDW $27, X11, X0, K1, X14 // 6253fd0972f31b
+ VPSHRDW $27, X0, X0, K1, X14 // 6273fd0972f01b
+ VPSHRDW $27, (BX), X0, K1, X14 // 6273fd0972331b
+ VPSHRDW $27, -17(BP)(SI*1), X0, K1, X14 // 6273fd0972b435efffffff1b
+ VPSHRDW $27, X15, X17, K1, X14 // 6253f50172f71b
+ VPSHRDW $27, X11, X17, K1, X14 // 6253f50172f31b
+ VPSHRDW $27, X0, X17, K1, X14 // 6273f50172f01b
+ VPSHRDW $27, (BX), X17, K1, X14 // 6273f50172331b
+ VPSHRDW $27, -17(BP)(SI*1), X17, K1, X14 // 6273f50172b435efffffff1b
+ VPSHRDW $27, X15, X7, K1, X14 // 6253c50972f71b
+ VPSHRDW $27, X11, X7, K1, X14 // 6253c50972f31b
+ VPSHRDW $27, X0, X7, K1, X14 // 6273c50972f01b
+ VPSHRDW $27, (BX), X7, K1, X14 // 6273c50972331b
+ VPSHRDW $27, -17(BP)(SI*1), X7, K1, X14 // 6273c50972b435efffffff1b
+ VPSHRDW $47, Y7, Y9, K3, Y16 // 62e3b52b72c72f
+ VPSHRDW $47, Y6, Y9, K3, Y16 // 62e3b52b72c62f
+ VPSHRDW $47, Y26, Y9, K3, Y16 // 6283b52b72c22f
+ VPSHRDW $47, -7(CX)(DX*1), Y9, K3, Y16 // 62e3b52b728411f9ffffff2f
+ VPSHRDW $47, -15(R14)(R15*4), Y9, K3, Y16 // 6283b52b7284bef1ffffff2f
+ VPSHRDW $47, Y7, Y6, K3, Y16 // 62e3cd2b72c72f
+ VPSHRDW $47, Y6, Y6, K3, Y16 // 62e3cd2b72c62f
+ VPSHRDW $47, Y26, Y6, K3, Y16 // 6283cd2b72c22f
+ VPSHRDW $47, -7(CX)(DX*1), Y6, K3, Y16 // 62e3cd2b728411f9ffffff2f
+ VPSHRDW $47, -15(R14)(R15*4), Y6, K3, Y16 // 6283cd2b7284bef1ffffff2f
+ VPSHRDW $47, Y7, Y3, K3, Y16 // 62e3e52b72c72f
+ VPSHRDW $47, Y6, Y3, K3, Y16 // 62e3e52b72c62f
+ VPSHRDW $47, Y26, Y3, K3, Y16 // 6283e52b72c22f
+ VPSHRDW $47, -7(CX)(DX*1), Y3, K3, Y16 // 62e3e52b728411f9ffffff2f
+ VPSHRDW $47, -15(R14)(R15*4), Y3, K3, Y16 // 6283e52b7284bef1ffffff2f
+ VPSHRDW $47, Y7, Y9, K3, Y9 // 6273b52b72cf2f
+ VPSHRDW $47, Y6, Y9, K3, Y9 // 6273b52b72ce2f
+ VPSHRDW $47, Y26, Y9, K3, Y9 // 6213b52b72ca2f
+ VPSHRDW $47, -7(CX)(DX*1), Y9, K3, Y9 // 6273b52b728c11f9ffffff2f
+ VPSHRDW $47, -15(R14)(R15*4), Y9, K3, Y9 // 6213b52b728cbef1ffffff2f
+ VPSHRDW $47, Y7, Y6, K3, Y9 // 6273cd2b72cf2f
+ VPSHRDW $47, Y6, Y6, K3, Y9 // 6273cd2b72ce2f
+ VPSHRDW $47, Y26, Y6, K3, Y9 // 6213cd2b72ca2f
+ VPSHRDW $47, -7(CX)(DX*1), Y6, K3, Y9 // 6273cd2b728c11f9ffffff2f
+ VPSHRDW $47, -15(R14)(R15*4), Y6, K3, Y9 // 6213cd2b728cbef1ffffff2f
+ VPSHRDW $47, Y7, Y3, K3, Y9 // 6273e52b72cf2f
+ VPSHRDW $47, Y6, Y3, K3, Y9 // 6273e52b72ce2f
+ VPSHRDW $47, Y26, Y3, K3, Y9 // 6213e52b72ca2f
+ VPSHRDW $47, -7(CX)(DX*1), Y3, K3, Y9 // 6273e52b728c11f9ffffff2f
+ VPSHRDW $47, -15(R14)(R15*4), Y3, K3, Y9 // 6213e52b728cbef1ffffff2f
+ VPSHRDW $47, Y7, Y9, K3, Y13 // 6273b52b72ef2f
+ VPSHRDW $47, Y6, Y9, K3, Y13 // 6273b52b72ee2f
+ VPSHRDW $47, Y26, Y9, K3, Y13 // 6213b52b72ea2f
+ VPSHRDW $47, -7(CX)(DX*1), Y9, K3, Y13 // 6273b52b72ac11f9ffffff2f
+ VPSHRDW $47, -15(R14)(R15*4), Y9, K3, Y13 // 6213b52b72acbef1ffffff2f
+ VPSHRDW $47, Y7, Y6, K3, Y13 // 6273cd2b72ef2f
+ VPSHRDW $47, Y6, Y6, K3, Y13 // 6273cd2b72ee2f
+ VPSHRDW $47, Y26, Y6, K3, Y13 // 6213cd2b72ea2f
+ VPSHRDW $47, -7(CX)(DX*1), Y6, K3, Y13 // 6273cd2b72ac11f9ffffff2f
+ VPSHRDW $47, -15(R14)(R15*4), Y6, K3, Y13 // 6213cd2b72acbef1ffffff2f
+ VPSHRDW $47, Y7, Y3, K3, Y13 // 6273e52b72ef2f
+ VPSHRDW $47, Y6, Y3, K3, Y13 // 6273e52b72ee2f
+ VPSHRDW $47, Y26, Y3, K3, Y13 // 6213e52b72ea2f
+ VPSHRDW $47, -7(CX)(DX*1), Y3, K3, Y13 // 6273e52b72ac11f9ffffff2f
+ VPSHRDW $47, -15(R14)(R15*4), Y3, K3, Y13 // 6213e52b72acbef1ffffff2f
+ VPSHRDW $82, Z6, Z22, K4, Z12 // 6273cd4472e652
+ VPSHRDW $82, Z8, Z22, K4, Z12 // 6253cd4472e052
+ VPSHRDW $82, 15(R8), Z22, K4, Z12 // 6253cd4472a00f00000052
+ VPSHRDW $82, (BP), Z22, K4, Z12 // 6273cd4472650052
+ VPSHRDW $82, Z6, Z11, K4, Z12 // 6273a54c72e652
+ VPSHRDW $82, Z8, Z11, K4, Z12 // 6253a54c72e052
+ VPSHRDW $82, 15(R8), Z11, K4, Z12 // 6253a54c72a00f00000052
+ VPSHRDW $82, (BP), Z11, K4, Z12 // 6273a54c72650052
+ VPSHRDW $82, Z6, Z22, K4, Z27 // 6263cd4472de52
+ VPSHRDW $82, Z8, Z22, K4, Z27 // 6243cd4472d852
+ VPSHRDW $82, 15(R8), Z22, K4, Z27 // 6243cd4472980f00000052
+ VPSHRDW $82, (BP), Z22, K4, Z27 // 6263cd44725d0052
+ VPSHRDW $82, Z6, Z11, K4, Z27 // 6263a54c72de52
+ VPSHRDW $82, Z8, Z11, K4, Z27 // 6243a54c72d852
+ VPSHRDW $82, 15(R8), Z11, K4, Z27 // 6243a54c72980f00000052
+ VPSHRDW $82, (BP), Z11, K4, Z27 // 6263a54c725d0052
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vnni.s b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vnni.s
new file mode 100644
index 0000000..ce450a5
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vnni.s
@@ -0,0 +1,400 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_avx512_vnni(SB), NOSPLIT, $0
+ VPDPBUSD X15, X16, K2, X6 // 62d27d0250f7
+ VPDPBUSD X11, X16, K2, X6 // 62d27d0250f3
+ VPDPBUSD X1, X16, K2, X6 // 62f27d0250f1
+ VPDPBUSD -15(R14)(R15*1), X16, K2, X6 // 62927d0250b43ef1ffffff
+ VPDPBUSD -15(BX), X16, K2, X6 // 62f27d0250b3f1ffffff
+ VPDPBUSD X15, X28, K2, X6 // 62d21d0250f7
+ VPDPBUSD X11, X28, K2, X6 // 62d21d0250f3
+ VPDPBUSD X1, X28, K2, X6 // 62f21d0250f1
+ VPDPBUSD -15(R14)(R15*1), X28, K2, X6 // 62921d0250b43ef1ffffff
+ VPDPBUSD -15(BX), X28, K2, X6 // 62f21d0250b3f1ffffff
+ VPDPBUSD X15, X8, K2, X6 // 62d23d0a50f7
+ VPDPBUSD X11, X8, K2, X6 // 62d23d0a50f3
+ VPDPBUSD X1, X8, K2, X6 // 62f23d0a50f1
+ VPDPBUSD -15(R14)(R15*1), X8, K2, X6 // 62923d0a50b43ef1ffffff
+ VPDPBUSD -15(BX), X8, K2, X6 // 62f23d0a50b3f1ffffff
+ VPDPBUSD X15, X16, K2, X22 // 62c27d0250f7
+ VPDPBUSD X11, X16, K2, X22 // 62c27d0250f3
+ VPDPBUSD X1, X16, K2, X22 // 62e27d0250f1
+ VPDPBUSD -15(R14)(R15*1), X16, K2, X22 // 62827d0250b43ef1ffffff
+ VPDPBUSD -15(BX), X16, K2, X22 // 62e27d0250b3f1ffffff
+ VPDPBUSD X15, X28, K2, X22 // 62c21d0250f7
+ VPDPBUSD X11, X28, K2, X22 // 62c21d0250f3
+ VPDPBUSD X1, X28, K2, X22 // 62e21d0250f1
+ VPDPBUSD -15(R14)(R15*1), X28, K2, X22 // 62821d0250b43ef1ffffff
+ VPDPBUSD -15(BX), X28, K2, X22 // 62e21d0250b3f1ffffff
+ VPDPBUSD X15, X8, K2, X22 // 62c23d0a50f7
+ VPDPBUSD X11, X8, K2, X22 // 62c23d0a50f3
+ VPDPBUSD X1, X8, K2, X22 // 62e23d0a50f1
+ VPDPBUSD -15(R14)(R15*1), X8, K2, X22 // 62823d0a50b43ef1ffffff
+ VPDPBUSD -15(BX), X8, K2, X22 // 62e23d0a50b3f1ffffff
+ VPDPBUSD X15, X16, K2, X12 // 62527d0250e7
+ VPDPBUSD X11, X16, K2, X12 // 62527d0250e3
+ VPDPBUSD X1, X16, K2, X12 // 62727d0250e1
+ VPDPBUSD -15(R14)(R15*1), X16, K2, X12 // 62127d0250a43ef1ffffff
+ VPDPBUSD -15(BX), X16, K2, X12 // 62727d0250a3f1ffffff
+ VPDPBUSD X15, X28, K2, X12 // 62521d0250e7
+ VPDPBUSD X11, X28, K2, X12 // 62521d0250e3
+ VPDPBUSD X1, X28, K2, X12 // 62721d0250e1
+ VPDPBUSD -15(R14)(R15*1), X28, K2, X12 // 62121d0250a43ef1ffffff
+ VPDPBUSD -15(BX), X28, K2, X12 // 62721d0250a3f1ffffff
+ VPDPBUSD X15, X8, K2, X12 // 62523d0a50e7
+ VPDPBUSD X11, X8, K2, X12 // 62523d0a50e3
+ VPDPBUSD X1, X8, K2, X12 // 62723d0a50e1
+ VPDPBUSD -15(R14)(R15*1), X8, K2, X12 // 62123d0a50a43ef1ffffff
+ VPDPBUSD -15(BX), X8, K2, X12 // 62723d0a50a3f1ffffff
+ VPDPBUSD Y11, Y28, K5, Y20 // 62c21d2550e3
+ VPDPBUSD Y27, Y28, K5, Y20 // 62821d2550e3
+ VPDPBUSD Y17, Y28, K5, Y20 // 62a21d2550e1
+ VPDPBUSD (AX), Y28, K5, Y20 // 62e21d255020
+ VPDPBUSD 7(SI), Y28, K5, Y20 // 62e21d2550a607000000
+ VPDPBUSD Y11, Y1, K5, Y20 // 62c2752d50e3
+ VPDPBUSD Y27, Y1, K5, Y20 // 6282752d50e3
+ VPDPBUSD Y17, Y1, K5, Y20 // 62a2752d50e1
+ VPDPBUSD (AX), Y1, K5, Y20 // 62e2752d5020
+ VPDPBUSD 7(SI), Y1, K5, Y20 // 62e2752d50a607000000
+ VPDPBUSD Y11, Y8, K5, Y20 // 62c23d2d50e3
+ VPDPBUSD Y27, Y8, K5, Y20 // 62823d2d50e3
+ VPDPBUSD Y17, Y8, K5, Y20 // 62a23d2d50e1
+ VPDPBUSD (AX), Y8, K5, Y20 // 62e23d2d5020
+ VPDPBUSD 7(SI), Y8, K5, Y20 // 62e23d2d50a607000000
+ VPDPBUSD Y11, Y28, K5, Y9 // 62521d2550cb
+ VPDPBUSD Y27, Y28, K5, Y9 // 62121d2550cb
+ VPDPBUSD Y17, Y28, K5, Y9 // 62321d2550c9
+ VPDPBUSD (AX), Y28, K5, Y9 // 62721d255008
+ VPDPBUSD 7(SI), Y28, K5, Y9 // 62721d25508e07000000
+ VPDPBUSD Y11, Y1, K5, Y9 // 6252752d50cb
+ VPDPBUSD Y27, Y1, K5, Y9 // 6212752d50cb
+ VPDPBUSD Y17, Y1, K5, Y9 // 6232752d50c9
+ VPDPBUSD (AX), Y1, K5, Y9 // 6272752d5008
+ VPDPBUSD 7(SI), Y1, K5, Y9 // 6272752d508e07000000
+ VPDPBUSD Y11, Y8, K5, Y9 // 62523d2d50cb
+ VPDPBUSD Y27, Y8, K5, Y9 // 62123d2d50cb
+ VPDPBUSD Y17, Y8, K5, Y9 // 62323d2d50c9
+ VPDPBUSD (AX), Y8, K5, Y9 // 62723d2d5008
+ VPDPBUSD 7(SI), Y8, K5, Y9 // 62723d2d508e07000000
+ VPDPBUSD Y11, Y28, K5, Y28 // 62421d2550e3
+ VPDPBUSD Y27, Y28, K5, Y28 // 62021d2550e3
+ VPDPBUSD Y17, Y28, K5, Y28 // 62221d2550e1
+ VPDPBUSD (AX), Y28, K5, Y28 // 62621d255020
+ VPDPBUSD 7(SI), Y28, K5, Y28 // 62621d2550a607000000
+ VPDPBUSD Y11, Y1, K5, Y28 // 6242752d50e3
+ VPDPBUSD Y27, Y1, K5, Y28 // 6202752d50e3
+ VPDPBUSD Y17, Y1, K5, Y28 // 6222752d50e1
+ VPDPBUSD (AX), Y1, K5, Y28 // 6262752d5020
+ VPDPBUSD 7(SI), Y1, K5, Y28 // 6262752d50a607000000
+ VPDPBUSD Y11, Y8, K5, Y28 // 62423d2d50e3
+ VPDPBUSD Y27, Y8, K5, Y28 // 62023d2d50e3
+ VPDPBUSD Y17, Y8, K5, Y28 // 62223d2d50e1
+ VPDPBUSD (AX), Y8, K5, Y28 // 62623d2d5020
+ VPDPBUSD 7(SI), Y8, K5, Y28 // 62623d2d50a607000000
+ VPDPBUSD Z8, Z23, K3, Z23 // 62c2454350f8
+ VPDPBUSD Z28, Z23, K3, Z23 // 6282454350fc
+ VPDPBUSD (SI), Z23, K3, Z23 // 62e24543503e
+ VPDPBUSD 7(SI)(DI*2), Z23, K3, Z23 // 62e2454350bc7e07000000
+ VPDPBUSD Z8, Z6, K3, Z23 // 62c24d4b50f8
+ VPDPBUSD Z28, Z6, K3, Z23 // 62824d4b50fc
+ VPDPBUSD (SI), Z6, K3, Z23 // 62e24d4b503e
+ VPDPBUSD 7(SI)(DI*2), Z6, K3, Z23 // 62e24d4b50bc7e07000000
+ VPDPBUSD Z8, Z23, K3, Z5 // 62d2454350e8
+ VPDPBUSD Z28, Z23, K3, Z5 // 6292454350ec
+ VPDPBUSD (SI), Z23, K3, Z5 // 62f24543502e
+ VPDPBUSD 7(SI)(DI*2), Z23, K3, Z5 // 62f2454350ac7e07000000
+ VPDPBUSD Z8, Z6, K3, Z5 // 62d24d4b50e8
+ VPDPBUSD Z28, Z6, K3, Z5 // 62924d4b50ec
+ VPDPBUSD (SI), Z6, K3, Z5 // 62f24d4b502e
+ VPDPBUSD 7(SI)(DI*2), Z6, K3, Z5 // 62f24d4b50ac7e07000000
+ VPDPBUSDS X25, X14, K4, X19 // 62820d0c51d9
+ VPDPBUSDS X11, X14, K4, X19 // 62c20d0c51db
+ VPDPBUSDS X17, X14, K4, X19 // 62a20d0c51d9
+ VPDPBUSDS 7(AX)(CX*4), X14, K4, X19 // 62e20d0c519c8807000000
+ VPDPBUSDS 7(AX)(CX*1), X14, K4, X19 // 62e20d0c519c0807000000
+ VPDPBUSDS X25, X0, K4, X19 // 62827d0c51d9
+ VPDPBUSDS X11, X0, K4, X19 // 62c27d0c51db
+ VPDPBUSDS X17, X0, K4, X19 // 62a27d0c51d9
+ VPDPBUSDS 7(AX)(CX*4), X0, K4, X19 // 62e27d0c519c8807000000
+ VPDPBUSDS 7(AX)(CX*1), X0, K4, X19 // 62e27d0c519c0807000000
+ VPDPBUSDS X25, X14, K4, X13 // 62120d0c51e9
+ VPDPBUSDS X11, X14, K4, X13 // 62520d0c51eb
+ VPDPBUSDS X17, X14, K4, X13 // 62320d0c51e9
+ VPDPBUSDS 7(AX)(CX*4), X14, K4, X13 // 62720d0c51ac8807000000
+ VPDPBUSDS 7(AX)(CX*1), X14, K4, X13 // 62720d0c51ac0807000000
+ VPDPBUSDS X25, X0, K4, X13 // 62127d0c51e9
+ VPDPBUSDS X11, X0, K4, X13 // 62527d0c51eb
+ VPDPBUSDS X17, X0, K4, X13 // 62327d0c51e9
+ VPDPBUSDS 7(AX)(CX*4), X0, K4, X13 // 62727d0c51ac8807000000
+ VPDPBUSDS 7(AX)(CX*1), X0, K4, X13 // 62727d0c51ac0807000000
+ VPDPBUSDS X25, X14, K4, X2 // 62920d0c51d1
+ VPDPBUSDS X11, X14, K4, X2 // 62d20d0c51d3
+ VPDPBUSDS X17, X14, K4, X2 // 62b20d0c51d1
+ VPDPBUSDS 7(AX)(CX*4), X14, K4, X2 // 62f20d0c51948807000000
+ VPDPBUSDS 7(AX)(CX*1), X14, K4, X2 // 62f20d0c51940807000000
+ VPDPBUSDS X25, X0, K4, X2 // 62927d0c51d1
+ VPDPBUSDS X11, X0, K4, X2 // 62d27d0c51d3
+ VPDPBUSDS X17, X0, K4, X2 // 62b27d0c51d1
+ VPDPBUSDS 7(AX)(CX*4), X0, K4, X2 // 62f27d0c51948807000000
+ VPDPBUSDS 7(AX)(CX*1), X0, K4, X2 // 62f27d0c51940807000000
+ VPDPBUSDS Y28, Y26, K2, Y16 // 62822d2251c4
+ VPDPBUSDS Y1, Y26, K2, Y16 // 62e22d2251c1
+ VPDPBUSDS Y23, Y26, K2, Y16 // 62a22d2251c7
+ VPDPBUSDS (BX), Y26, K2, Y16 // 62e22d225103
+ VPDPBUSDS -17(BP)(SI*1), Y26, K2, Y16 // 62e22d22518435efffffff
+ VPDPBUSDS Y28, Y3, K2, Y16 // 6282652a51c4
+ VPDPBUSDS Y1, Y3, K2, Y16 // 62e2652a51c1
+ VPDPBUSDS Y23, Y3, K2, Y16 // 62a2652a51c7
+ VPDPBUSDS (BX), Y3, K2, Y16 // 62e2652a5103
+ VPDPBUSDS -17(BP)(SI*1), Y3, K2, Y16 // 62e2652a518435efffffff
+ VPDPBUSDS Y28, Y8, K2, Y16 // 62823d2a51c4
+ VPDPBUSDS Y1, Y8, K2, Y16 // 62e23d2a51c1
+ VPDPBUSDS Y23, Y8, K2, Y16 // 62a23d2a51c7
+ VPDPBUSDS (BX), Y8, K2, Y16 // 62e23d2a5103
+ VPDPBUSDS -17(BP)(SI*1), Y8, K2, Y16 // 62e23d2a518435efffffff
+ VPDPBUSDS Y28, Y26, K2, Y12 // 62122d2251e4
+ VPDPBUSDS Y1, Y26, K2, Y12 // 62722d2251e1
+ VPDPBUSDS Y23, Y26, K2, Y12 // 62322d2251e7
+ VPDPBUSDS (BX), Y26, K2, Y12 // 62722d225123
+ VPDPBUSDS -17(BP)(SI*1), Y26, K2, Y12 // 62722d2251a435efffffff
+ VPDPBUSDS Y28, Y3, K2, Y12 // 6212652a51e4
+ VPDPBUSDS Y1, Y3, K2, Y12 // 6272652a51e1
+ VPDPBUSDS Y23, Y3, K2, Y12 // 6232652a51e7
+ VPDPBUSDS (BX), Y3, K2, Y12 // 6272652a5123
+ VPDPBUSDS -17(BP)(SI*1), Y3, K2, Y12 // 6272652a51a435efffffff
+ VPDPBUSDS Y28, Y8, K2, Y12 // 62123d2a51e4
+ VPDPBUSDS Y1, Y8, K2, Y12 // 62723d2a51e1
+ VPDPBUSDS Y23, Y8, K2, Y12 // 62323d2a51e7
+ VPDPBUSDS (BX), Y8, K2, Y12 // 62723d2a5123
+ VPDPBUSDS -17(BP)(SI*1), Y8, K2, Y12 // 62723d2a51a435efffffff
+ VPDPBUSDS Y28, Y26, K2, Y6 // 62922d2251f4
+ VPDPBUSDS Y1, Y26, K2, Y6 // 62f22d2251f1
+ VPDPBUSDS Y23, Y26, K2, Y6 // 62b22d2251f7
+ VPDPBUSDS (BX), Y26, K2, Y6 // 62f22d225133
+ VPDPBUSDS -17(BP)(SI*1), Y26, K2, Y6 // 62f22d2251b435efffffff
+ VPDPBUSDS Y28, Y3, K2, Y6 // 6292652a51f4
+ VPDPBUSDS Y1, Y3, K2, Y6 // 62f2652a51f1
+ VPDPBUSDS Y23, Y3, K2, Y6 // 62b2652a51f7
+ VPDPBUSDS (BX), Y3, K2, Y6 // 62f2652a5133
+ VPDPBUSDS -17(BP)(SI*1), Y3, K2, Y6 // 62f2652a51b435efffffff
+ VPDPBUSDS Y28, Y8, K2, Y6 // 62923d2a51f4
+ VPDPBUSDS Y1, Y8, K2, Y6 // 62f23d2a51f1
+ VPDPBUSDS Y23, Y8, K2, Y6 // 62b23d2a51f7
+ VPDPBUSDS (BX), Y8, K2, Y6 // 62f23d2a5133
+ VPDPBUSDS -17(BP)(SI*1), Y8, K2, Y6 // 62f23d2a51b435efffffff
+ VPDPBUSDS Z12, Z16, K2, Z21 // 62c27d4251ec
+ VPDPBUSDS Z27, Z16, K2, Z21 // 62827d4251eb
+ VPDPBUSDS 17(SP)(BP*8), Z16, K2, Z21 // 62e27d4251acec11000000
+ VPDPBUSDS 17(SP)(BP*4), Z16, K2, Z21 // 62e27d4251acac11000000
+ VPDPBUSDS Z12, Z13, K2, Z21 // 62c2154a51ec
+ VPDPBUSDS Z27, Z13, K2, Z21 // 6282154a51eb
+ VPDPBUSDS 17(SP)(BP*8), Z13, K2, Z21 // 62e2154a51acec11000000
+ VPDPBUSDS 17(SP)(BP*4), Z13, K2, Z21 // 62e2154a51acac11000000
+ VPDPBUSDS Z12, Z16, K2, Z5 // 62d27d4251ec
+ VPDPBUSDS Z27, Z16, K2, Z5 // 62927d4251eb
+ VPDPBUSDS 17(SP)(BP*8), Z16, K2, Z5 // 62f27d4251acec11000000
+ VPDPBUSDS 17(SP)(BP*4), Z16, K2, Z5 // 62f27d4251acac11000000
+ VPDPBUSDS Z12, Z13, K2, Z5 // 62d2154a51ec
+ VPDPBUSDS Z27, Z13, K2, Z5 // 6292154a51eb
+ VPDPBUSDS 17(SP)(BP*8), Z13, K2, Z5 // 62f2154a51acec11000000
+ VPDPBUSDS 17(SP)(BP*4), Z13, K2, Z5 // 62f2154a51acac11000000
+ VPDPWSSD X2, X2, K3, X18 // 62e26d0b52d2
+ VPDPWSSD X27, X2, K3, X18 // 62826d0b52d3
+ VPDPWSSD X26, X2, K3, X18 // 62826d0b52d2
+ VPDPWSSD (SI), X2, K3, X18 // 62e26d0b5216
+ VPDPWSSD 7(SI)(DI*2), X2, K3, X18 // 62e26d0b52947e07000000
+ VPDPWSSD X2, X24, K3, X18 // 62e23d0352d2
+ VPDPWSSD X27, X24, K3, X18 // 62823d0352d3
+ VPDPWSSD X26, X24, K3, X18 // 62823d0352d2
+ VPDPWSSD (SI), X24, K3, X18 // 62e23d035216
+ VPDPWSSD 7(SI)(DI*2), X24, K3, X18 // 62e23d0352947e07000000
+ VPDPWSSD X2, X2, K3, X11 // 62726d0b52da
+ VPDPWSSD X27, X2, K3, X11 // 62126d0b52db
+ VPDPWSSD X26, X2, K3, X11 // 62126d0b52da
+ VPDPWSSD (SI), X2, K3, X11 // 62726d0b521e
+ VPDPWSSD 7(SI)(DI*2), X2, K3, X11 // 62726d0b529c7e07000000
+ VPDPWSSD X2, X24, K3, X11 // 62723d0352da
+ VPDPWSSD X27, X24, K3, X11 // 62123d0352db
+ VPDPWSSD X26, X24, K3, X11 // 62123d0352da
+ VPDPWSSD (SI), X24, K3, X11 // 62723d03521e
+ VPDPWSSD 7(SI)(DI*2), X24, K3, X11 // 62723d03529c7e07000000
+ VPDPWSSD X2, X2, K3, X9 // 62726d0b52ca
+ VPDPWSSD X27, X2, K3, X9 // 62126d0b52cb
+ VPDPWSSD X26, X2, K3, X9 // 62126d0b52ca
+ VPDPWSSD (SI), X2, K3, X9 // 62726d0b520e
+ VPDPWSSD 7(SI)(DI*2), X2, K3, X9 // 62726d0b528c7e07000000
+ VPDPWSSD X2, X24, K3, X9 // 62723d0352ca
+ VPDPWSSD X27, X24, K3, X9 // 62123d0352cb
+ VPDPWSSD X26, X24, K3, X9 // 62123d0352ca
+ VPDPWSSD (SI), X24, K3, X9 // 62723d03520e
+ VPDPWSSD 7(SI)(DI*2), X24, K3, X9 // 62723d03528c7e07000000
+ VPDPWSSD Y8, Y2, K3, Y14 // 62526d2b52f0
+ VPDPWSSD Y9, Y2, K3, Y14 // 62526d2b52f1
+ VPDPWSSD Y22, Y2, K3, Y14 // 62326d2b52f6
+ VPDPWSSD 15(R8)(R14*4), Y2, K3, Y14 // 62126d2b52b4b00f000000
+ VPDPWSSD -7(CX)(DX*4), Y2, K3, Y14 // 62726d2b52b491f9ffffff
+ VPDPWSSD Y8, Y22, K3, Y14 // 62524d2352f0
+ VPDPWSSD Y9, Y22, K3, Y14 // 62524d2352f1
+ VPDPWSSD Y22, Y22, K3, Y14 // 62324d2352f6
+ VPDPWSSD 15(R8)(R14*4), Y22, K3, Y14 // 62124d2352b4b00f000000
+ VPDPWSSD -7(CX)(DX*4), Y22, K3, Y14 // 62724d2352b491f9ffffff
+ VPDPWSSD Y8, Y27, K3, Y14 // 6252252352f0
+ VPDPWSSD Y9, Y27, K3, Y14 // 6252252352f1
+ VPDPWSSD Y22, Y27, K3, Y14 // 6232252352f6
+ VPDPWSSD 15(R8)(R14*4), Y27, K3, Y14 // 6212252352b4b00f000000
+ VPDPWSSD -7(CX)(DX*4), Y27, K3, Y14 // 6272252352b491f9ffffff
+ VPDPWSSD Y8, Y2, K3, Y31 // 62426d2b52f8
+ VPDPWSSD Y9, Y2, K3, Y31 // 62426d2b52f9
+ VPDPWSSD Y22, Y2, K3, Y31 // 62226d2b52fe
+ VPDPWSSD 15(R8)(R14*4), Y2, K3, Y31 // 62026d2b52bcb00f000000
+ VPDPWSSD -7(CX)(DX*4), Y2, K3, Y31 // 62626d2b52bc91f9ffffff
+ VPDPWSSD Y8, Y22, K3, Y31 // 62424d2352f8
+ VPDPWSSD Y9, Y22, K3, Y31 // 62424d2352f9
+ VPDPWSSD Y22, Y22, K3, Y31 // 62224d2352fe
+ VPDPWSSD 15(R8)(R14*4), Y22, K3, Y31 // 62024d2352bcb00f000000
+ VPDPWSSD -7(CX)(DX*4), Y22, K3, Y31 // 62624d2352bc91f9ffffff
+ VPDPWSSD Y8, Y27, K3, Y31 // 6242252352f8
+ VPDPWSSD Y9, Y27, K3, Y31 // 6242252352f9
+ VPDPWSSD Y22, Y27, K3, Y31 // 6222252352fe
+ VPDPWSSD 15(R8)(R14*4), Y27, K3, Y31 // 6202252352bcb00f000000
+ VPDPWSSD -7(CX)(DX*4), Y27, K3, Y31 // 6262252352bc91f9ffffff
+ VPDPWSSD Y8, Y2, K3, Y25 // 62426d2b52c8
+ VPDPWSSD Y9, Y2, K3, Y25 // 62426d2b52c9
+ VPDPWSSD Y22, Y2, K3, Y25 // 62226d2b52ce
+ VPDPWSSD 15(R8)(R14*4), Y2, K3, Y25 // 62026d2b528cb00f000000
+ VPDPWSSD -7(CX)(DX*4), Y2, K3, Y25 // 62626d2b528c91f9ffffff
+ VPDPWSSD Y8, Y22, K3, Y25 // 62424d2352c8
+ VPDPWSSD Y9, Y22, K3, Y25 // 62424d2352c9
+ VPDPWSSD Y22, Y22, K3, Y25 // 62224d2352ce
+ VPDPWSSD 15(R8)(R14*4), Y22, K3, Y25 // 62024d23528cb00f000000
+ VPDPWSSD -7(CX)(DX*4), Y22, K3, Y25 // 62624d23528c91f9ffffff
+ VPDPWSSD Y8, Y27, K3, Y25 // 6242252352c8
+ VPDPWSSD Y9, Y27, K3, Y25 // 6242252352c9
+ VPDPWSSD Y22, Y27, K3, Y25 // 6222252352ce
+ VPDPWSSD 15(R8)(R14*4), Y27, K3, Y25 // 62022523528cb00f000000
+ VPDPWSSD -7(CX)(DX*4), Y27, K3, Y25 // 62622523528c91f9ffffff
+ VPDPWSSD Z25, Z6, K3, Z22 // 62824d4b52f1
+ VPDPWSSD Z12, Z6, K3, Z22 // 62c24d4b52f4
+ VPDPWSSD 7(SI)(DI*4), Z6, K3, Z22 // 62e24d4b52b4be07000000
+ VPDPWSSD -7(DI)(R8*2), Z6, K3, Z22 // 62a24d4b52b447f9ffffff
+ VPDPWSSD Z25, Z8, K3, Z22 // 62823d4b52f1
+ VPDPWSSD Z12, Z8, K3, Z22 // 62c23d4b52f4
+ VPDPWSSD 7(SI)(DI*4), Z8, K3, Z22 // 62e23d4b52b4be07000000
+ VPDPWSSD -7(DI)(R8*2), Z8, K3, Z22 // 62a23d4b52b447f9ffffff
+ VPDPWSSD Z25, Z6, K3, Z11 // 62124d4b52d9
+ VPDPWSSD Z12, Z6, K3, Z11 // 62524d4b52dc
+ VPDPWSSD 7(SI)(DI*4), Z6, K3, Z11 // 62724d4b529cbe07000000
+ VPDPWSSD -7(DI)(R8*2), Z6, K3, Z11 // 62324d4b529c47f9ffffff
+ VPDPWSSD Z25, Z8, K3, Z11 // 62123d4b52d9
+ VPDPWSSD Z12, Z8, K3, Z11 // 62523d4b52dc
+ VPDPWSSD 7(SI)(DI*4), Z8, K3, Z11 // 62723d4b529cbe07000000
+ VPDPWSSD -7(DI)(R8*2), Z8, K3, Z11 // 62323d4b529c47f9ffffff
+ VPDPWSSDS X13, X11, K2, X22 // 62c2250a53f5
+ VPDPWSSDS X6, X11, K2, X22 // 62e2250a53f6
+ VPDPWSSDS X12, X11, K2, X22 // 62c2250a53f4
+ VPDPWSSDS 17(SP)(BP*8), X11, K2, X22 // 62e2250a53b4ec11000000
+ VPDPWSSDS 17(SP)(BP*4), X11, K2, X22 // 62e2250a53b4ac11000000
+ VPDPWSSDS X13, X15, K2, X22 // 62c2050a53f5
+ VPDPWSSDS X6, X15, K2, X22 // 62e2050a53f6
+ VPDPWSSDS X12, X15, K2, X22 // 62c2050a53f4
+ VPDPWSSDS 17(SP)(BP*8), X15, K2, X22 // 62e2050a53b4ec11000000
+ VPDPWSSDS 17(SP)(BP*4), X15, K2, X22 // 62e2050a53b4ac11000000
+ VPDPWSSDS X13, X30, K2, X22 // 62c20d0253f5
+ VPDPWSSDS X6, X30, K2, X22 // 62e20d0253f6
+ VPDPWSSDS X12, X30, K2, X22 // 62c20d0253f4
+ VPDPWSSDS 17(SP)(BP*8), X30, K2, X22 // 62e20d0253b4ec11000000
+ VPDPWSSDS 17(SP)(BP*4), X30, K2, X22 // 62e20d0253b4ac11000000
+ VPDPWSSDS X13, X11, K2, X30 // 6242250a53f5
+ VPDPWSSDS X6, X11, K2, X30 // 6262250a53f6
+ VPDPWSSDS X12, X11, K2, X30 // 6242250a53f4
+ VPDPWSSDS 17(SP)(BP*8), X11, K2, X30 // 6262250a53b4ec11000000
+ VPDPWSSDS 17(SP)(BP*4), X11, K2, X30 // 6262250a53b4ac11000000
+ VPDPWSSDS X13, X15, K2, X30 // 6242050a53f5
+ VPDPWSSDS X6, X15, K2, X30 // 6262050a53f6
+ VPDPWSSDS X12, X15, K2, X30 // 6242050a53f4
+ VPDPWSSDS 17(SP)(BP*8), X15, K2, X30 // 6262050a53b4ec11000000
+ VPDPWSSDS 17(SP)(BP*4), X15, K2, X30 // 6262050a53b4ac11000000
+ VPDPWSSDS X13, X30, K2, X30 // 62420d0253f5
+ VPDPWSSDS X6, X30, K2, X30 // 62620d0253f6
+ VPDPWSSDS X12, X30, K2, X30 // 62420d0253f4
+ VPDPWSSDS 17(SP)(BP*8), X30, K2, X30 // 62620d0253b4ec11000000
+ VPDPWSSDS 17(SP)(BP*4), X30, K2, X30 // 62620d0253b4ac11000000
+ VPDPWSSDS X13, X11, K2, X3 // 62d2250a53dd
+ VPDPWSSDS X6, X11, K2, X3 // 62f2250a53de
+ VPDPWSSDS X12, X11, K2, X3 // 62d2250a53dc
+ VPDPWSSDS 17(SP)(BP*8), X11, K2, X3 // 62f2250a539cec11000000
+ VPDPWSSDS 17(SP)(BP*4), X11, K2, X3 // 62f2250a539cac11000000
+ VPDPWSSDS X13, X15, K2, X3 // 62d2050a53dd
+ VPDPWSSDS X6, X15, K2, X3 // 62f2050a53de
+ VPDPWSSDS X12, X15, K2, X3 // 62d2050a53dc
+ VPDPWSSDS 17(SP)(BP*8), X15, K2, X3 // 62f2050a539cec11000000
+ VPDPWSSDS 17(SP)(BP*4), X15, K2, X3 // 62f2050a539cac11000000
+ VPDPWSSDS X13, X30, K2, X3 // 62d20d0253dd
+ VPDPWSSDS X6, X30, K2, X3 // 62f20d0253de
+ VPDPWSSDS X12, X30, K2, X3 // 62d20d0253dc
+ VPDPWSSDS 17(SP)(BP*8), X30, K2, X3 // 62f20d02539cec11000000
+ VPDPWSSDS 17(SP)(BP*4), X30, K2, X3 // 62f20d02539cac11000000
+ VPDPWSSDS Y0, Y6, K1, Y9 // 62724d2953c8
+ VPDPWSSDS Y19, Y6, K1, Y9 // 62324d2953cb
+ VPDPWSSDS Y31, Y6, K1, Y9 // 62124d2953cf
+ VPDPWSSDS (R8), Y6, K1, Y9 // 62524d295308
+ VPDPWSSDS 15(DX)(BX*2), Y6, K1, Y9 // 62724d29538c5a0f000000
+ VPDPWSSDS Y0, Y1, K1, Y9 // 6272752953c8
+ VPDPWSSDS Y19, Y1, K1, Y9 // 6232752953cb
+ VPDPWSSDS Y31, Y1, K1, Y9 // 6212752953cf
+ VPDPWSSDS (R8), Y1, K1, Y9 // 625275295308
+ VPDPWSSDS 15(DX)(BX*2), Y1, K1, Y9 // 62727529538c5a0f000000
+ VPDPWSSDS Y0, Y9, K1, Y9 // 6272352953c8
+ VPDPWSSDS Y19, Y9, K1, Y9 // 6232352953cb
+ VPDPWSSDS Y31, Y9, K1, Y9 // 6212352953cf
+ VPDPWSSDS (R8), Y9, K1, Y9 // 625235295308
+ VPDPWSSDS 15(DX)(BX*2), Y9, K1, Y9 // 62723529538c5a0f000000
+ VPDPWSSDS Y0, Y6, K1, Y14 // 62724d2953f0
+ VPDPWSSDS Y19, Y6, K1, Y14 // 62324d2953f3
+ VPDPWSSDS Y31, Y6, K1, Y14 // 62124d2953f7
+ VPDPWSSDS (R8), Y6, K1, Y14 // 62524d295330
+ VPDPWSSDS 15(DX)(BX*2), Y6, K1, Y14 // 62724d2953b45a0f000000
+ VPDPWSSDS Y0, Y1, K1, Y14 // 6272752953f0
+ VPDPWSSDS Y19, Y1, K1, Y14 // 6232752953f3
+ VPDPWSSDS Y31, Y1, K1, Y14 // 6212752953f7
+ VPDPWSSDS (R8), Y1, K1, Y14 // 625275295330
+ VPDPWSSDS 15(DX)(BX*2), Y1, K1, Y14 // 6272752953b45a0f000000
+ VPDPWSSDS Y0, Y9, K1, Y14 // 6272352953f0
+ VPDPWSSDS Y19, Y9, K1, Y14 // 6232352953f3
+ VPDPWSSDS Y31, Y9, K1, Y14 // 6212352953f7
+ VPDPWSSDS (R8), Y9, K1, Y14 // 625235295330
+ VPDPWSSDS 15(DX)(BX*2), Y9, K1, Y14 // 6272352953b45a0f000000
+ VPDPWSSDS Y0, Y6, K1, Y1 // 62f24d2953c8
+ VPDPWSSDS Y19, Y6, K1, Y1 // 62b24d2953cb
+ VPDPWSSDS Y31, Y6, K1, Y1 // 62924d2953cf
+ VPDPWSSDS (R8), Y6, K1, Y1 // 62d24d295308
+ VPDPWSSDS 15(DX)(BX*2), Y6, K1, Y1 // 62f24d29538c5a0f000000
+ VPDPWSSDS Y0, Y1, K1, Y1 // 62f2752953c8
+ VPDPWSSDS Y19, Y1, K1, Y1 // 62b2752953cb
+ VPDPWSSDS Y31, Y1, K1, Y1 // 6292752953cf
+ VPDPWSSDS (R8), Y1, K1, Y1 // 62d275295308
+ VPDPWSSDS 15(DX)(BX*2), Y1, K1, Y1 // 62f27529538c5a0f000000
+ VPDPWSSDS Y0, Y9, K1, Y1 // 62f2352953c8
+ VPDPWSSDS Y19, Y9, K1, Y1 // 62b2352953cb
+ VPDPWSSDS Y31, Y9, K1, Y1 // 6292352953cf
+ VPDPWSSDS (R8), Y9, K1, Y1 // 62d235295308
+ VPDPWSSDS 15(DX)(BX*2), Y9, K1, Y1 // 62f23529538c5a0f000000
+ VPDPWSSDS Z6, Z9, K2, Z12 // 6272354a53e6
+ VPDPWSSDS Z25, Z9, K2, Z12 // 6212354a53e1
+ VPDPWSSDS 17(SP), Z9, K2, Z12 // 6272354a53a42411000000
+ VPDPWSSDS -17(BP)(SI*4), Z9, K2, Z12 // 6272354a53a4b5efffffff
+ VPDPWSSDS Z6, Z12, K2, Z12 // 62721d4a53e6
+ VPDPWSSDS Z25, Z12, K2, Z12 // 62121d4a53e1
+ VPDPWSSDS 17(SP), Z12, K2, Z12 // 62721d4a53a42411000000
+ VPDPWSSDS -17(BP)(SI*4), Z12, K2, Z12 // 62721d4a53a4b5efffffff
+ VPDPWSSDS Z6, Z9, K2, Z17 // 62e2354a53ce
+ VPDPWSSDS Z25, Z9, K2, Z17 // 6282354a53c9
+ VPDPWSSDS 17(SP), Z9, K2, Z17 // 62e2354a538c2411000000
+ VPDPWSSDS -17(BP)(SI*4), Z9, K2, Z17 // 62e2354a538cb5efffffff
+ VPDPWSSDS Z6, Z12, K2, Z17 // 62e21d4a53ce
+ VPDPWSSDS Z25, Z12, K2, Z17 // 62821d4a53c9
+ VPDPWSSDS 17(SP), Z12, K2, Z17 // 62e21d4a538c2411000000
+ VPDPWSSDS -17(BP)(SI*4), Z12, K2, Z17 // 62e21d4a538cb5efffffff
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vpopcntdq.s b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vpopcntdq.s
new file mode 100644
index 0000000..d71faec
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512_vpopcntdq.s
@@ -0,0 +1,82 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_avx512_vpopcntdq(SB), NOSPLIT, $0
+ VPOPCNTD X12, K2, X8 // 62527d0a55c4
+ VPOPCNTD X16, K2, X8 // 62327d0a55c0
+ VPOPCNTD X23, K2, X8 // 62327d0a55c7
+ VPOPCNTD (R14), K2, X8 // 62527d0a5506
+ VPOPCNTD -7(DI)(R8*8), K2, X8 // 62327d0a5584c7f9ffffff
+ VPOPCNTD X12, K2, X26 // 62427d0a55d4
+ VPOPCNTD X16, K2, X26 // 62227d0a55d0
+ VPOPCNTD X23, K2, X26 // 62227d0a55d7
+ VPOPCNTD (R14), K2, X26 // 62427d0a5516
+ VPOPCNTD -7(DI)(R8*8), K2, X26 // 62227d0a5594c7f9ffffff
+ VPOPCNTD X12, K2, X23 // 62c27d0a55fc
+ VPOPCNTD X16, K2, X23 // 62a27d0a55f8
+ VPOPCNTD X23, K2, X23 // 62a27d0a55ff
+ VPOPCNTD (R14), K2, X23 // 62c27d0a553e
+ VPOPCNTD -7(DI)(R8*8), K2, X23 // 62a27d0a55bcc7f9ffffff
+ VPOPCNTD Y22, K5, Y26 // 62227d2d55d6
+ VPOPCNTD Y3, K5, Y26 // 62627d2d55d3
+ VPOPCNTD Y15, K5, Y26 // 62427d2d55d7
+ VPOPCNTD -15(R14)(R15*1), K5, Y26 // 62027d2d55943ef1ffffff
+ VPOPCNTD -15(BX), K5, Y26 // 62627d2d5593f1ffffff
+ VPOPCNTD Y22, K5, Y30 // 62227d2d55f6
+ VPOPCNTD Y3, K5, Y30 // 62627d2d55f3
+ VPOPCNTD Y15, K5, Y30 // 62427d2d55f7
+ VPOPCNTD -15(R14)(R15*1), K5, Y30 // 62027d2d55b43ef1ffffff
+ VPOPCNTD -15(BX), K5, Y30 // 62627d2d55b3f1ffffff
+ VPOPCNTD Y22, K5, Y12 // 62327d2d55e6
+ VPOPCNTD Y3, K5, Y12 // 62727d2d55e3
+ VPOPCNTD Y15, K5, Y12 // 62527d2d55e7
+ VPOPCNTD -15(R14)(R15*1), K5, Y12 // 62127d2d55a43ef1ffffff
+ VPOPCNTD -15(BX), K5, Y12 // 62727d2d55a3f1ffffff
+ VPOPCNTD Z2, K3, Z22 // 62e27d4b55f2
+ VPOPCNTD Z31, K3, Z22 // 62827d4b55f7
+ VPOPCNTD 7(SI)(DI*4), K3, Z22 // 62e27d4b55b4be07000000
+ VPOPCNTD -7(DI)(R8*2), K3, Z22 // 62a27d4b55b447f9ffffff
+ VPOPCNTD Z2, K3, Z7 // 62f27d4b55fa
+ VPOPCNTD Z31, K3, Z7 // 62927d4b55ff
+ VPOPCNTD 7(SI)(DI*4), K3, Z7 // 62f27d4b55bcbe07000000
+ VPOPCNTD -7(DI)(R8*2), K3, Z7 // 62b27d4b55bc47f9ffffff
+ VPOPCNTQ X24, K4, X23 // 6282fd0c55f8
+ VPOPCNTQ X14, K4, X23 // 62c2fd0c55fe
+ VPOPCNTQ X0, K4, X23 // 62e2fd0c55f8
+ VPOPCNTQ 99(R15)(R15*4), K4, X23 // 6282fd0c55bcbf63000000
+ VPOPCNTQ 15(DX), K4, X23 // 62e2fd0c55ba0f000000
+ VPOPCNTQ X24, K4, X11 // 6212fd0c55d8
+ VPOPCNTQ X14, K4, X11 // 6252fd0c55de
+ VPOPCNTQ X0, K4, X11 // 6272fd0c55d8
+ VPOPCNTQ 99(R15)(R15*4), K4, X11 // 6212fd0c559cbf63000000
+ VPOPCNTQ 15(DX), K4, X11 // 6272fd0c559a0f000000
+ VPOPCNTQ X24, K4, X31 // 6202fd0c55f8
+ VPOPCNTQ X14, K4, X31 // 6242fd0c55fe
+ VPOPCNTQ X0, K4, X31 // 6262fd0c55f8
+ VPOPCNTQ 99(R15)(R15*4), K4, X31 // 6202fd0c55bcbf63000000
+ VPOPCNTQ 15(DX), K4, X31 // 6262fd0c55ba0f000000
+ VPOPCNTQ Y5, K2, Y1 // 62f2fd2a55cd
+ VPOPCNTQ Y17, K2, Y1 // 62b2fd2a55c9
+ VPOPCNTQ Y13, K2, Y1 // 62d2fd2a55cd
+ VPOPCNTQ 7(AX)(CX*4), K2, Y1 // 62f2fd2a558c8807000000
+ VPOPCNTQ 7(AX)(CX*1), K2, Y1 // 62f2fd2a558c0807000000
+ VPOPCNTQ Y5, K2, Y27 // 6262fd2a55dd
+ VPOPCNTQ Y17, K2, Y27 // 6222fd2a55d9
+ VPOPCNTQ Y13, K2, Y27 // 6242fd2a55dd
+ VPOPCNTQ 7(AX)(CX*4), K2, Y27 // 6262fd2a559c8807000000
+ VPOPCNTQ 7(AX)(CX*1), K2, Y27 // 6262fd2a559c0807000000
+ VPOPCNTQ Y5, K2, Y19 // 62e2fd2a55dd
+ VPOPCNTQ Y17, K2, Y19 // 62a2fd2a55d9
+ VPOPCNTQ Y13, K2, Y19 // 62c2fd2a55dd
+ VPOPCNTQ 7(AX)(CX*4), K2, Y19 // 62e2fd2a559c8807000000
+ VPOPCNTQ 7(AX)(CX*1), K2, Y19 // 62e2fd2a559c0807000000
+ VPOPCNTQ Z1, K2, Z20 // 62e2fd4a55e1
+ VPOPCNTQ Z3, K2, Z20 // 62e2fd4a55e3
+ VPOPCNTQ 17(SP), K2, Z20 // 62e2fd4a55a42411000000
+ VPOPCNTQ -17(BP)(SI*4), K2, Z20 // 62e2fd4a55a4b5efffffff
+ VPOPCNTQ Z1, K2, Z9 // 6272fd4a55c9
+ VPOPCNTQ Z3, K2, Z9 // 6272fd4a55cb
+ VPOPCNTQ 17(SP), K2, Z9 // 6272fd4a558c2411000000
+ VPOPCNTQ -17(BP)(SI*4), K2, Z9 // 6272fd4a558cb5efffffff
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/avx512bw.s b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512bw.s
new file mode 100644
index 0000000..e1ffb72
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512bw.s
@@ -0,0 +1,1939 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_avx512bw(SB), NOSPLIT, $0
+ KADDD K4, K7, K5 // c4e1c54aec
+ KADDD K6, K7, K5 // c4e1c54aee
+ KADDD K4, K6, K5 // c4e1cd4aec
+ KADDD K6, K6, K5 // c4e1cd4aee
+ KADDD K4, K7, K4 // c4e1c54ae4
+ KADDD K6, K7, K4 // c4e1c54ae6
+ KADDD K4, K6, K4 // c4e1cd4ae4
+ KADDD K6, K6, K4 // c4e1cd4ae6
+ KADDQ K4, K5, K0 // c4e1d44ac4
+ KADDQ K6, K5, K0 // c4e1d44ac6
+ KADDQ K4, K4, K0 // c4e1dc4ac4
+ KADDQ K6, K4, K0 // c4e1dc4ac6
+ KADDQ K4, K5, K7 // c4e1d44afc
+ KADDQ K6, K5, K7 // c4e1d44afe
+ KADDQ K4, K4, K7 // c4e1dc4afc
+ KADDQ K6, K4, K7 // c4e1dc4afe
+ KANDD K1, K6, K0 // c4e1cd41c1
+ KANDD K5, K6, K0 // c4e1cd41c5
+ KANDD K1, K5, K0 // c4e1d541c1
+ KANDD K5, K5, K0 // c4e1d541c5
+ KANDD K1, K6, K5 // c4e1cd41e9
+ KANDD K5, K6, K5 // c4e1cd41ed
+ KANDD K1, K5, K5 // c4e1d541e9
+ KANDD K5, K5, K5 // c4e1d541ed
+ KANDND K5, K0, K4 // c4e1fd42e5
+ KANDND K4, K0, K4 // c4e1fd42e4
+ KANDND K5, K7, K4 // c4e1c542e5
+ KANDND K4, K7, K4 // c4e1c542e4
+ KANDND K5, K0, K6 // c4e1fd42f5
+ KANDND K4, K0, K6 // c4e1fd42f4
+ KANDND K5, K7, K6 // c4e1c542f5
+ KANDND K4, K7, K6 // c4e1c542f4
+ KANDNQ K6, K1, K4 // c4e1f442e6
+ KANDNQ K7, K1, K4 // c4e1f442e7
+ KANDNQ K6, K3, K4 // c4e1e442e6
+ KANDNQ K7, K3, K4 // c4e1e442e7
+ KANDNQ K6, K1, K6 // c4e1f442f6
+ KANDNQ K7, K1, K6 // c4e1f442f7
+ KANDNQ K6, K3, K6 // c4e1e442f6
+ KANDNQ K7, K3, K6 // c4e1e442f7
+ KANDQ K6, K0, K2 // c4e1fc41d6
+ KANDQ K5, K0, K2 // c4e1fc41d5
+ KANDQ K6, K5, K2 // c4e1d441d6
+ KANDQ K5, K5, K2 // c4e1d441d5
+ KANDQ K6, K0, K7 // c4e1fc41fe
+ KANDQ K5, K0, K7 // c4e1fc41fd
+ KANDQ K6, K5, K7 // c4e1d441fe
+ KANDQ K5, K5, K7 // c4e1d441fd
+ KMOVD K1, 17(SP) // c4e1f9914c2411
+ KMOVD K3, 17(SP) // c4e1f9915c2411
+ KMOVD K1, -17(BP)(SI*4) // c4e1f9914cb5ef
+ KMOVD K3, -17(BP)(SI*4) // c4e1f9915cb5ef
+ KMOVD K6, R14 // c57b93f6
+ KMOVD K7, R14 // c57b93f7
+ KMOVD K6, AX // c5fb93c6
+ KMOVD K7, AX // c5fb93c7
+ KMOVD K4, K6 // c4e1f990f4
+ KMOVD K6, K6 // c4e1f990f6
+ KMOVD 7(AX), K6 // c4e1f9907007
+ KMOVD (DI), K6 // c4e1f99037
+ KMOVD K4, K4 // c4e1f990e4
+ KMOVD K6, K4 // c4e1f990e6
+ KMOVD 7(AX), K4 // c4e1f9906007
+ KMOVD (DI), K4 // c4e1f99027
+ KMOVD R9, K4 // c4c17b92e1
+ KMOVD CX, K4 // c5fb92e1
+ KMOVD R9, K5 // c4c17b92e9
+ KMOVD CX, K5 // c5fb92e9
+ KMOVQ K2, 17(SP) // c4e1f891542411
+ KMOVQ K7, 17(SP) // c4e1f8917c2411
+ KMOVQ K2, -17(BP)(SI*4) // c4e1f89154b5ef
+ KMOVQ K7, -17(BP)(SI*4) // c4e1f8917cb5ef
+ KMOVQ K0, DX // c4e1fb93d0
+ KMOVQ K5, DX // c4e1fb93d5
+ KMOVQ K0, BP // c4e1fb93e8
+ KMOVQ K5, BP // c4e1fb93ed
+ KMOVQ K1, K6 // c4e1f890f1
+ KMOVQ K5, K6 // c4e1f890f5
+ KMOVQ 7(AX), K6 // c4e1f8907007
+ KMOVQ (DI), K6 // c4e1f89037
+ KMOVQ K1, K5 // c4e1f890e9
+ KMOVQ K5, K5 // c4e1f890ed
+ KMOVQ 7(AX), K5 // c4e1f8906807
+ KMOVQ (DI), K5 // c4e1f8902f
+ KMOVQ R10, K3 // c4c1fb92da
+ KMOVQ CX, K3 // c4e1fb92d9
+ KMOVQ R10, K1 // c4c1fb92ca
+ KMOVQ CX, K1 // c4e1fb92c9
+ KNOTD K6, K6 // c4e1f944f6
+ KNOTD K4, K6 // c4e1f944f4
+ KNOTD K6, K7 // c4e1f944fe
+ KNOTD K4, K7 // c4e1f944fc
+ KNOTQ K4, K4 // c4e1f844e4
+ KNOTQ K5, K4 // c4e1f844e5
+ KNOTQ K4, K6 // c4e1f844f4
+ KNOTQ K5, K6 // c4e1f844f5
+ KORD K4, K7, K5 // c4e1c545ec
+ KORD K6, K7, K5 // c4e1c545ee
+ KORD K4, K6, K5 // c4e1cd45ec
+ KORD K6, K6, K5 // c4e1cd45ee
+ KORD K4, K7, K4 // c4e1c545e4
+ KORD K6, K7, K4 // c4e1c545e6
+ KORD K4, K6, K4 // c4e1cd45e4
+ KORD K6, K6, K4 // c4e1cd45e6
+ KORQ K4, K5, K0 // c4e1d445c4
+ KORQ K6, K5, K0 // c4e1d445c6
+ KORQ K4, K4, K0 // c4e1dc45c4
+ KORQ K6, K4, K0 // c4e1dc45c6
+ KORQ K4, K5, K7 // c4e1d445fc
+ KORQ K6, K5, K7 // c4e1d445fe
+ KORQ K4, K4, K7 // c4e1dc45fc
+ KORQ K6, K4, K7 // c4e1dc45fe
+ KORTESTD K4, K6 // c4e1f998f4
+ KORTESTD K6, K6 // c4e1f998f6
+ KORTESTD K4, K4 // c4e1f998e4
+ KORTESTD K6, K4 // c4e1f998e6
+ KORTESTQ K2, K4 // c4e1f898e2
+ KORTESTQ K7, K4 // c4e1f898e7
+ KORTESTQ K2, K5 // c4e1f898ea
+ KORTESTQ K7, K5 // c4e1f898ef
+ KSHIFTLD $0, K5, K0 // c4e37933c500
+ KSHIFTLD $0, K4, K0 // c4e37933c400
+ KSHIFTLD $0, K5, K7 // c4e37933fd00
+ KSHIFTLD $0, K4, K7 // c4e37933fc00
+ KSHIFTLQ $97, K1, K4 // c4e3f933e161
+ KSHIFTLQ $97, K3, K4 // c4e3f933e361
+ KSHIFTLQ $97, K1, K6 // c4e3f933f161
+ KSHIFTLQ $97, K3, K6 // c4e3f933f361
+ KSHIFTRD $79, K0, K2 // c4e37931d04f
+ KSHIFTRD $79, K5, K2 // c4e37931d54f
+ KSHIFTRD $79, K0, K7 // c4e37931f84f
+ KSHIFTRD $79, K5, K7 // c4e37931fd4f
+ KSHIFTRQ $64, K1, K6 // c4e3f931f140
+ KSHIFTRQ $64, K5, K6 // c4e3f931f540
+ KSHIFTRQ $64, K1, K5 // c4e3f931e940
+ KSHIFTRQ $64, K5, K5 // c4e3f931ed40
+ KTESTD K5, K0 // c4e1f999c5
+ KTESTD K4, K0 // c4e1f999c4
+ KTESTD K5, K7 // c4e1f999fd
+ KTESTD K4, K7 // c4e1f999fc
+ KTESTQ K1, K4 // c4e1f899e1
+ KTESTQ K3, K4 // c4e1f899e3
+ KTESTQ K1, K6 // c4e1f899f1
+ KTESTQ K3, K6 // c4e1f899f3
+ KUNPCKDQ K1, K6, K0 // c4e1cc4bc1
+ KUNPCKDQ K5, K6, K0 // c4e1cc4bc5
+ KUNPCKDQ K1, K5, K0 // c4e1d44bc1
+ KUNPCKDQ K5, K5, K0 // c4e1d44bc5
+ KUNPCKDQ K1, K6, K5 // c4e1cc4be9
+ KUNPCKDQ K5, K6, K5 // c4e1cc4bed
+ KUNPCKDQ K1, K5, K5 // c4e1d44be9
+ KUNPCKDQ K5, K5, K5 // c4e1d44bed
+ KUNPCKWD K7, K5, K3 // c5d44bdf
+ KUNPCKWD K6, K5, K3 // c5d44bde
+ KUNPCKWD K7, K4, K3 // c5dc4bdf
+ KUNPCKWD K6, K4, K3 // c5dc4bde
+ KUNPCKWD K7, K5, K1 // c5d44bcf
+ KUNPCKWD K6, K5, K1 // c5d44bce
+ KUNPCKWD K7, K4, K1 // c5dc4bcf
+ KUNPCKWD K6, K4, K1 // c5dc4bce
+ KXNORD K6, K1, K4 // c4e1f546e6
+ KXNORD K7, K1, K4 // c4e1f546e7
+ KXNORD K6, K3, K4 // c4e1e546e6
+ KXNORD K7, K3, K4 // c4e1e546e7
+ KXNORD K6, K1, K6 // c4e1f546f6
+ KXNORD K7, K1, K6 // c4e1f546f7
+ KXNORD K6, K3, K6 // c4e1e546f6
+ KXNORD K7, K3, K6 // c4e1e546f7
+ KXNORQ K4, K4, K6 // c4e1dc46f4
+ KXNORQ K5, K4, K6 // c4e1dc46f5
+ KXNORQ K4, K6, K6 // c4e1cc46f4
+ KXNORQ K5, K6, K6 // c4e1cc46f5
+ KXNORQ K4, K4, K4 // c4e1dc46e4
+ KXNORQ K5, K4, K4 // c4e1dc46e5
+ KXNORQ K4, K6, K4 // c4e1cc46e4
+ KXNORQ K5, K6, K4 // c4e1cc46e5
+ KXORD K0, K4, K7 // c4e1dd47f8
+ KXORD K7, K4, K7 // c4e1dd47ff
+ KXORD K0, K6, K7 // c4e1cd47f8
+ KXORD K7, K6, K7 // c4e1cd47ff
+ KXORD K0, K4, K6 // c4e1dd47f0
+ KXORD K7, K4, K6 // c4e1dd47f7
+ KXORD K0, K6, K6 // c4e1cd47f0
+ KXORD K7, K6, K6 // c4e1cd47f7
+ KXORQ K1, K4, K5 // c4e1dc47e9
+ KXORQ K3, K4, K5 // c4e1dc47eb
+ KXORQ K1, K6, K5 // c4e1cc47e9
+ KXORQ K3, K6, K5 // c4e1cc47eb
+ KXORQ K1, K4, K4 // c4e1dc47e1
+ KXORQ K3, K4, K4 // c4e1dc47e3
+ KXORQ K1, K6, K4 // c4e1cc47e1
+ KXORQ K3, K6, K4 // c4e1cc47e3
+ VDBPSADBW $65, X15, X17, K3, X5 // 62d3750342ef41
+ VDBPSADBW $65, 7(AX)(CX*4), X17, K3, X5 // 62f3750342ac880700000041
+ VDBPSADBW $65, 7(AX)(CX*1), X17, K3, X5 // 62f3750342ac080700000041
+ VDBPSADBW $67, Y17, Y5, K4, Y19 // 62a3552c42d943
+ VDBPSADBW $67, 99(R15)(R15*2), Y5, K4, Y19 // 6283552c429c7f6300000043
+ VDBPSADBW $67, -7(DI), Y5, K4, Y19 // 62e3552c429ff9ffffff43
+ VDBPSADBW $127, Z3, Z5, K2, Z19 // 62e3554a42db7f
+ VDBPSADBW $127, Z5, Z5, K2, Z19 // 62e3554a42dd7f
+ VDBPSADBW $127, 17(SP)(BP*1), Z5, K2, Z19 // 62e3554a429c2c110000007f
+ VDBPSADBW $127, -7(CX)(DX*8), Z5, K2, Z19 // 62e3554a429cd1f9ffffff7f
+ VDBPSADBW $127, Z3, Z1, K2, Z19 // 62e3754a42db7f
+ VDBPSADBW $127, Z5, Z1, K2, Z19 // 62e3754a42dd7f
+ VDBPSADBW $127, 17(SP)(BP*1), Z1, K2, Z19 // 62e3754a429c2c110000007f
+ VDBPSADBW $127, -7(CX)(DX*8), Z1, K2, Z19 // 62e3754a429cd1f9ffffff7f
+ VDBPSADBW $127, Z3, Z5, K2, Z15 // 6273554a42fb7f
+ VDBPSADBW $127, Z5, Z5, K2, Z15 // 6273554a42fd7f
+ VDBPSADBW $127, 17(SP)(BP*1), Z5, K2, Z15 // 6273554a42bc2c110000007f
+ VDBPSADBW $127, -7(CX)(DX*8), Z5, K2, Z15 // 6273554a42bcd1f9ffffff7f
+ VDBPSADBW $127, Z3, Z1, K2, Z15 // 6273754a42fb7f
+ VDBPSADBW $127, Z5, Z1, K2, Z15 // 6273754a42fd7f
+ VDBPSADBW $127, 17(SP)(BP*1), Z1, K2, Z15 // 6273754a42bc2c110000007f
+ VDBPSADBW $127, -7(CX)(DX*8), Z1, K2, Z15 // 6273754a42bcd1f9ffffff7f
+ VMOVDQU16 X14, K1, X16 // 6231ff097ff0
+ VMOVDQU16 X14, K1, -17(BP)(SI*2) // 6271ff097fb475efffffff
+ VMOVDQU16 X14, K1, 7(AX)(CX*2) // 6271ff097fb44807000000
+ VMOVDQU16 X14, K1, X11 // 6251ff097ff3
+ VMOVDQU16 15(R8)(R14*1), K1, X11 // 6211ff096f9c300f000000
+ VMOVDQU16 15(R8)(R14*2), K1, X11 // 6211ff096f9c700f000000
+ VMOVDQU16 Y24, K7, Y18 // 6221ff2f7fc2
+ VMOVDQU16 Y24, K7, 7(SI)(DI*4) // 6261ff2f7f84be07000000
+ VMOVDQU16 Y24, K7, -7(DI)(R8*2) // 6221ff2f7f8447f9ffffff
+ VMOVDQU16 Y11, K2, Y8 // 6251ff2a7fd8
+ VMOVDQU16 17(SP), K2, Y8 // 6271ff2a6f842411000000
+ VMOVDQU16 -17(BP)(SI*4), K2, Y8 // 6271ff2a6f84b5efffffff
+ VMOVDQU16 Z6, K4, Z22 // 62b1ff4c7ff6
+ VMOVDQU16 Z8, K4, Z22 // 6231ff4c7fc6
+ VMOVDQU16 Z6, K4, Z11 // 62d1ff4c7ff3
+ VMOVDQU16 Z8, K4, Z11 // 6251ff4c7fc3
+ VMOVDQU16 Z6, K4, (CX) // 62f1ff4c7f31
+ VMOVDQU16 Z8, K4, (CX) // 6271ff4c7f01
+ VMOVDQU16 Z6, K4, 99(R15) // 62d1ff4c7fb763000000
+ VMOVDQU16 Z8, K4, 99(R15) // 6251ff4c7f8763000000
+ VMOVDQU16 Z12, K1, Z25 // 6211ff497fe1
+ VMOVDQU16 Z17, K1, Z25 // 6281ff497fc9
+ VMOVDQU16 99(R15)(R15*2), K1, Z25 // 6201ff496f8c7f63000000
+ VMOVDQU16 -7(DI), K1, Z25 // 6261ff496f8ff9ffffff
+ VMOVDQU16 Z12, K1, Z12 // 6251ff497fe4
+ VMOVDQU16 Z17, K1, Z12 // 62c1ff497fcc
+ VMOVDQU16 99(R15)(R15*2), K1, Z12 // 6211ff496fa47f63000000
+ VMOVDQU16 -7(DI), K1, Z12 // 6271ff496fa7f9ffffff
+ VMOVDQU8 X11, K5, X23 // 62317f0d7fdf
+ VMOVDQU8 X11, K5, -7(CX)(DX*1) // 62717f0d7f9c11f9ffffff
+ VMOVDQU8 X11, K5, -15(R14)(R15*4) // 62117f0d7f9cbef1ffffff
+ VMOVDQU8 X24, K3, X31 // 62017f0b7fc7
+ VMOVDQU8 15(DX)(BX*1), K3, X31 // 62617f0b6fbc1a0f000000
+ VMOVDQU8 -7(CX)(DX*2), K3, X31 // 62617f0b6fbc51f9ffffff
+ VMOVDQU8 Y3, K4, Y6 // 62f17f2c7fde
+ VMOVDQU8 Y3, K4, 7(SI)(DI*1) // 62f17f2c7f9c3e07000000
+ VMOVDQU8 Y3, K4, 15(DX)(BX*8) // 62f17f2c7f9cda0f000000
+ VMOVDQU8 Y6, K2, Y7 // 62f17f2a7ff7
+ VMOVDQU8 -7(DI)(R8*1), K2, Y7 // 62b17f2a6fbc07f9ffffff
+ VMOVDQU8 (SP), K2, Y7 // 62f17f2a6f3c24
+ VMOVDQU8 Z9, K2, Z3 // 62717f4a7fcb
+ VMOVDQU8 Z19, K2, Z3 // 62e17f4a7fdb
+ VMOVDQU8 Z9, K2, Z30 // 62117f4a7fce
+ VMOVDQU8 Z19, K2, Z30 // 62817f4a7fde
+ VMOVDQU8 Z9, K2, 15(R8) // 62517f4a7f880f000000
+ VMOVDQU8 Z19, K2, 15(R8) // 62c17f4a7f980f000000
+ VMOVDQU8 Z9, K2, (BP) // 62717f4a7f4d00
+ VMOVDQU8 Z19, K2, (BP) // 62e17f4a7f5d00
+ VMOVDQU8 Z11, K3, Z12 // 62517f4b7fdc
+ VMOVDQU8 Z5, K3, Z12 // 62d17f4b7fec
+ VMOVDQU8 15(R8)(R14*8), K3, Z12 // 62117f4b6fa4f00f000000
+ VMOVDQU8 -15(R14)(R15*2), K3, Z12 // 62117f4b6fa47ef1ffffff
+ VMOVDQU8 Z11, K3, Z22 // 62317f4b7fde
+ VMOVDQU8 Z5, K3, Z22 // 62b17f4b7fee
+ VMOVDQU8 15(R8)(R14*8), K3, Z22 // 62817f4b6fb4f00f000000
+ VMOVDQU8 -15(R14)(R15*2), K3, Z22 // 62817f4b6fb47ef1ffffff
+ VPABSB X22, K3, X6 // 62b27d0b1cf6 or 62b2fd0b1cf6
+ VPABSB -7(CX), K3, X6 // 62f27d0b1cb1f9ffffff or 62f2fd0b1cb1f9ffffff
+ VPABSB 15(DX)(BX*4), K3, X6 // 62f27d0b1cb49a0f000000 or 62f2fd0b1cb49a0f000000
+ VPABSB Y27, K4, Y11 // 62127d2c1cdb or 6212fd2c1cdb
+ VPABSB 15(DX)(BX*1), K4, Y11 // 62727d2c1c9c1a0f000000 or 6272fd2c1c9c1a0f000000
+ VPABSB -7(CX)(DX*2), K4, Y11 // 62727d2c1c9c51f9ffffff or 6272fd2c1c9c51f9ffffff
+ VPABSB Z6, K5, Z21 // 62e27d4d1cee or 62e2fd4d1cee
+ VPABSB Z9, K5, Z21 // 62c27d4d1ce9 or 62c2fd4d1ce9
+ VPABSB (AX), K5, Z21 // 62e27d4d1c28 or 62e2fd4d1c28
+ VPABSB 7(SI), K5, Z21 // 62e27d4d1cae07000000 or 62e2fd4d1cae07000000
+ VPABSB Z6, K5, Z9 // 62727d4d1cce or 6272fd4d1cce
+ VPABSB Z9, K5, Z9 // 62527d4d1cc9 or 6252fd4d1cc9
+ VPABSB (AX), K5, Z9 // 62727d4d1c08 or 6272fd4d1c08
+ VPABSB 7(SI), K5, Z9 // 62727d4d1c8e07000000 or 6272fd4d1c8e07000000
+ VPABSW X11, K4, X15 // 62527d0c1dfb or 6252fd0c1dfb
+ VPABSW (BX), K4, X15 // 62727d0c1d3b or 6272fd0c1d3b
+ VPABSW -17(BP)(SI*1), K4, X15 // 62727d0c1dbc35efffffff or 6272fd0c1dbc35efffffff
+ VPABSW Y3, K7, Y26 // 62627d2f1dd3 or 6262fd2f1dd3
+ VPABSW 15(R8), K7, Y26 // 62427d2f1d900f000000 or 6242fd2f1d900f000000
+ VPABSW (BP), K7, Y26 // 62627d2f1d5500 or 6262fd2f1d5500
+ VPABSW Z16, K2, Z7 // 62b27d4a1df8 or 62b2fd4a1df8
+ VPABSW Z25, K2, Z7 // 62927d4a1df9 or 6292fd4a1df9
+ VPABSW (R8), K2, Z7 // 62d27d4a1d38 or 62d2fd4a1d38
+ VPABSW 15(DX)(BX*2), K2, Z7 // 62f27d4a1dbc5a0f000000 or 62f2fd4a1dbc5a0f000000
+ VPABSW Z16, K2, Z21 // 62a27d4a1de8 or 62a2fd4a1de8
+ VPABSW Z25, K2, Z21 // 62827d4a1de9 or 6282fd4a1de9
+ VPABSW (R8), K2, Z21 // 62c27d4a1d28 or 62c2fd4a1d28
+ VPABSW 15(DX)(BX*2), K2, Z21 // 62e27d4a1dac5a0f000000 or 62e2fd4a1dac5a0f000000
+ VPACKSSDW X13, X19, K5, X1 // 62d165056bcd
+ VPACKSSDW 15(R8)(R14*4), X19, K5, X1 // 629165056b8cb00f000000
+ VPACKSSDW -7(CX)(DX*4), X19, K5, X1 // 62f165056b8c91f9ffffff
+ VPACKSSDW Y1, Y28, K3, Y8 // 62711d236bc1
+ VPACKSSDW 15(R8)(R14*8), Y28, K3, Y8 // 62111d236b84f00f000000
+ VPACKSSDW -15(R14)(R15*2), Y28, K3, Y8 // 62111d236b847ef1ffffff
+ VPACKSSDW Z21, Z12, K4, Z14 // 62311d4c6bf5
+ VPACKSSDW Z9, Z12, K4, Z14 // 62511d4c6bf1
+ VPACKSSDW 17(SP)(BP*1), Z12, K4, Z14 // 62711d4c6bb42c11000000
+ VPACKSSDW -7(CX)(DX*8), Z12, K4, Z14 // 62711d4c6bb4d1f9ffffff
+ VPACKSSDW Z21, Z13, K4, Z14 // 6231154c6bf5
+ VPACKSSDW Z9, Z13, K4, Z14 // 6251154c6bf1
+ VPACKSSDW 17(SP)(BP*1), Z13, K4, Z14 // 6271154c6bb42c11000000
+ VPACKSSDW -7(CX)(DX*8), Z13, K4, Z14 // 6271154c6bb4d1f9ffffff
+ VPACKSSDW Z21, Z12, K4, Z13 // 62311d4c6bed
+ VPACKSSDW Z9, Z12, K4, Z13 // 62511d4c6be9
+ VPACKSSDW 17(SP)(BP*1), Z12, K4, Z13 // 62711d4c6bac2c11000000
+ VPACKSSDW -7(CX)(DX*8), Z12, K4, Z13 // 62711d4c6bacd1f9ffffff
+ VPACKSSDW Z21, Z13, K4, Z13 // 6231154c6bed
+ VPACKSSDW Z9, Z13, K4, Z13 // 6251154c6be9
+ VPACKSSDW 17(SP)(BP*1), Z13, K4, Z13 // 6271154c6bac2c11000000
+ VPACKSSDW -7(CX)(DX*8), Z13, K4, Z13 // 6271154c6bacd1f9ffffff
+ VPACKSSWB X0, X14, K2, X2 // 62f10d0a63d0 or 62f18d0a63d0
+ VPACKSSWB (R8), X14, K2, X2 // 62d10d0a6310 or 62d18d0a6310
+ VPACKSSWB 15(DX)(BX*2), X14, K2, X2 // 62f10d0a63945a0f000000 or 62f18d0a63945a0f000000
+ VPACKSSWB Y31, Y14, K2, Y23 // 62810d2a63ff or 62818d2a63ff
+ VPACKSSWB -15(R14)(R15*1), Y14, K2, Y23 // 62810d2a63bc3ef1ffffff or 62818d2a63bc3ef1ffffff
+ VPACKSSWB -15(BX), Y14, K2, Y23 // 62e10d2a63bbf1ffffff or 62e18d2a63bbf1ffffff
+ VPACKSSWB Z23, Z27, K3, Z2 // 62b1254363d7 or 62b1a54363d7
+ VPACKSSWB Z9, Z27, K3, Z2 // 62d1254363d1 or 62d1a54363d1
+ VPACKSSWB -17(BP)(SI*2), Z27, K3, Z2 // 62f12543639475efffffff or 62f1a543639475efffffff
+ VPACKSSWB 7(AX)(CX*2), Z27, K3, Z2 // 62f1254363944807000000 or 62f1a54363944807000000
+ VPACKSSWB Z23, Z25, K3, Z2 // 62b1354363d7 or 62b1b54363d7
+ VPACKSSWB Z9, Z25, K3, Z2 // 62d1354363d1 or 62d1b54363d1
+ VPACKSSWB -17(BP)(SI*2), Z25, K3, Z2 // 62f13543639475efffffff or 62f1b543639475efffffff
+ VPACKSSWB 7(AX)(CX*2), Z25, K3, Z2 // 62f1354363944807000000 or 62f1b54363944807000000
+ VPACKSSWB Z23, Z27, K3, Z7 // 62b1254363ff or 62b1a54363ff
+ VPACKSSWB Z9, Z27, K3, Z7 // 62d1254363f9 or 62d1a54363f9
+ VPACKSSWB -17(BP)(SI*2), Z27, K3, Z7 // 62f1254363bc75efffffff or 62f1a54363bc75efffffff
+ VPACKSSWB 7(AX)(CX*2), Z27, K3, Z7 // 62f1254363bc4807000000 or 62f1a54363bc4807000000
+ VPACKSSWB Z23, Z25, K3, Z7 // 62b1354363ff or 62b1b54363ff
+ VPACKSSWB Z9, Z25, K3, Z7 // 62d1354363f9 or 62d1b54363f9
+ VPACKSSWB -17(BP)(SI*2), Z25, K3, Z7 // 62f1354363bc75efffffff or 62f1b54363bc75efffffff
+ VPACKSSWB 7(AX)(CX*2), Z25, K3, Z7 // 62f1354363bc4807000000 or 62f1b54363bc4807000000
+ VPACKUSDW X11, X25, K3, X0 // 62d235032bc3
+ VPACKUSDW 17(SP)(BP*1), X25, K3, X0 // 62f235032b842c11000000
+ VPACKUSDW -7(CX)(DX*8), X25, K3, X0 // 62f235032b84d1f9ffffff
+ VPACKUSDW Y22, Y2, K3, Y25 // 62226d2b2bce
+ VPACKUSDW 7(AX)(CX*4), Y2, K3, Y25 // 62626d2b2b8c8807000000
+ VPACKUSDW 7(AX)(CX*1), Y2, K3, Y25 // 62626d2b2b8c0807000000
+ VPACKUSDW Z14, Z3, K2, Z27 // 6242654a2bde
+ VPACKUSDW Z7, Z3, K2, Z27 // 6262654a2bdf
+ VPACKUSDW 15(R8)(R14*1), Z3, K2, Z27 // 6202654a2b9c300f000000
+ VPACKUSDW 15(R8)(R14*2), Z3, K2, Z27 // 6202654a2b9c700f000000
+ VPACKUSDW Z14, Z0, K2, Z27 // 62427d4a2bde
+ VPACKUSDW Z7, Z0, K2, Z27 // 62627d4a2bdf
+ VPACKUSDW 15(R8)(R14*1), Z0, K2, Z27 // 62027d4a2b9c300f000000
+ VPACKUSDW 15(R8)(R14*2), Z0, K2, Z27 // 62027d4a2b9c700f000000
+ VPACKUSDW Z14, Z3, K2, Z14 // 6252654a2bf6
+ VPACKUSDW Z7, Z3, K2, Z14 // 6272654a2bf7
+ VPACKUSDW 15(R8)(R14*1), Z3, K2, Z14 // 6212654a2bb4300f000000
+ VPACKUSDW 15(R8)(R14*2), Z3, K2, Z14 // 6212654a2bb4700f000000
+ VPACKUSDW Z14, Z0, K2, Z14 // 62527d4a2bf6
+ VPACKUSDW Z7, Z0, K2, Z14 // 62727d4a2bf7
+ VPACKUSDW 15(R8)(R14*1), Z0, K2, Z14 // 62127d4a2bb4300f000000
+ VPACKUSDW 15(R8)(R14*2), Z0, K2, Z14 // 62127d4a2bb4700f000000
+ VPACKUSWB X11, X18, K1, X17 // 62c16d0167cb or 62c1ed0167cb
+ VPACKUSWB -17(BP)(SI*2), X18, K1, X17 // 62e16d01678c75efffffff or 62e1ed01678c75efffffff
+ VPACKUSWB 7(AX)(CX*2), X18, K1, X17 // 62e16d01678c4807000000 or 62e1ed01678c4807000000
+ VPACKUSWB Y9, Y8, K2, Y27 // 62413d2a67d9 or 6241bd2a67d9
+ VPACKUSWB (SI), Y8, K2, Y27 // 62613d2a671e or 6261bd2a671e
+ VPACKUSWB 7(SI)(DI*2), Y8, K2, Y27 // 62613d2a679c7e07000000 or 6261bd2a679c7e07000000
+ VPACKUSWB Z1, Z22, K1, Z8 // 62714d4167c1 or 6271cd4167c1
+ VPACKUSWB Z16, Z22, K1, Z8 // 62314d4167c0 or 6231cd4167c0
+ VPACKUSWB (R14), Z22, K1, Z8 // 62514d416706 or 6251cd416706
+ VPACKUSWB -7(DI)(R8*8), Z22, K1, Z8 // 62314d416784c7f9ffffff or 6231cd416784c7f9ffffff
+ VPACKUSWB Z1, Z25, K1, Z8 // 6271354167c1 or 6271b54167c1
+ VPACKUSWB Z16, Z25, K1, Z8 // 6231354167c0 or 6231b54167c0
+ VPACKUSWB (R14), Z25, K1, Z8 // 625135416706 or 6251b5416706
+ VPACKUSWB -7(DI)(R8*8), Z25, K1, Z8 // 623135416784c7f9ffffff or 6231b5416784c7f9ffffff
+ VPACKUSWB Z1, Z22, K1, Z24 // 62614d4167c1 or 6261cd4167c1
+ VPACKUSWB Z16, Z22, K1, Z24 // 62214d4167c0 or 6221cd4167c0
+ VPACKUSWB (R14), Z22, K1, Z24 // 62414d416706 or 6241cd416706
+ VPACKUSWB -7(DI)(R8*8), Z22, K1, Z24 // 62214d416784c7f9ffffff or 6221cd416784c7f9ffffff
+ VPACKUSWB Z1, Z25, K1, Z24 // 6261354167c1 or 6261b54167c1
+ VPACKUSWB Z16, Z25, K1, Z24 // 6221354167c0 or 6221b54167c0
+ VPACKUSWB (R14), Z25, K1, Z24 // 624135416706 or 6241b5416706
+ VPACKUSWB -7(DI)(R8*8), Z25, K1, Z24 // 622135416784c7f9ffffff or 6221b5416784c7f9ffffff
+ VPADDB X24, X2, K7, X9 // 62116d0ffcc8 or 6211ed0ffcc8
+ VPADDB 15(R8)(R14*1), X2, K7, X9 // 62116d0ffc8c300f000000 or 6211ed0ffc8c300f000000
+ VPADDB 15(R8)(R14*2), X2, K7, X9 // 62116d0ffc8c700f000000 or 6211ed0ffc8c700f000000
+ VPADDB Y14, Y9, K1, Y22 // 62c13529fcf6 or 62c1b529fcf6
+ VPADDB 17(SP)(BP*8), Y9, K1, Y22 // 62e13529fcb4ec11000000 or 62e1b529fcb4ec11000000
+ VPADDB 17(SP)(BP*4), Y9, K1, Y22 // 62e13529fcb4ac11000000 or 62e1b529fcb4ac11000000
+ VPADDB Z15, Z0, K1, Z6 // 62d17d49fcf7 or 62d1fd49fcf7
+ VPADDB Z12, Z0, K1, Z6 // 62d17d49fcf4 or 62d1fd49fcf4
+ VPADDB 99(R15)(R15*4), Z0, K1, Z6 // 62917d49fcb4bf63000000 or 6291fd49fcb4bf63000000
+ VPADDB 15(DX), Z0, K1, Z6 // 62f17d49fcb20f000000 or 62f1fd49fcb20f000000
+ VPADDB Z15, Z8, K1, Z6 // 62d13d49fcf7 or 62d1bd49fcf7
+ VPADDB Z12, Z8, K1, Z6 // 62d13d49fcf4 or 62d1bd49fcf4
+ VPADDB 99(R15)(R15*4), Z8, K1, Z6 // 62913d49fcb4bf63000000 or 6291bd49fcb4bf63000000
+ VPADDB 15(DX), Z8, K1, Z6 // 62f13d49fcb20f000000 or 62f1bd49fcb20f000000
+ VPADDB Z15, Z0, K1, Z2 // 62d17d49fcd7 or 62d1fd49fcd7
+ VPADDB Z12, Z0, K1, Z2 // 62d17d49fcd4 or 62d1fd49fcd4
+ VPADDB 99(R15)(R15*4), Z0, K1, Z2 // 62917d49fc94bf63000000 or 6291fd49fc94bf63000000
+ VPADDB 15(DX), Z0, K1, Z2 // 62f17d49fc920f000000 or 62f1fd49fc920f000000
+ VPADDB Z15, Z8, K1, Z2 // 62d13d49fcd7 or 62d1bd49fcd7
+ VPADDB Z12, Z8, K1, Z2 // 62d13d49fcd4 or 62d1bd49fcd4
+ VPADDB 99(R15)(R15*4), Z8, K1, Z2 // 62913d49fc94bf63000000 or 6291bd49fc94bf63000000
+ VPADDB 15(DX), Z8, K1, Z2 // 62f13d49fc920f000000 or 62f1bd49fc920f000000
+ VPADDSB X15, X11, K4, X3 // 62d1250cecdf or 62d1a50cecdf
+ VPADDSB (CX), X11, K4, X3 // 62f1250cec19 or 62f1a50cec19
+ VPADDSB 99(R15), X11, K4, X3 // 62d1250cec9f63000000 or 62d1a50cec9f63000000
+ VPADDSB Y9, Y22, K5, Y31 // 62414d25ecf9 or 6241cd25ecf9
+ VPADDSB 7(AX), Y22, K5, Y31 // 62614d25ecb807000000 or 6261cd25ecb807000000
+ VPADDSB (DI), Y22, K5, Y31 // 62614d25ec3f or 6261cd25ec3f
+ VPADDSB Z13, Z28, K7, Z26 // 62411d47ecd5 or 62419d47ecd5
+ VPADDSB Z21, Z28, K7, Z26 // 62211d47ecd5 or 62219d47ecd5
+ VPADDSB -7(CX)(DX*1), Z28, K7, Z26 // 62611d47ec9411f9ffffff or 62619d47ec9411f9ffffff
+ VPADDSB -15(R14)(R15*4), Z28, K7, Z26 // 62011d47ec94bef1ffffff or 62019d47ec94bef1ffffff
+ VPADDSB Z13, Z6, K7, Z26 // 62414d4fecd5 or 6241cd4fecd5
+ VPADDSB Z21, Z6, K7, Z26 // 62214d4fecd5 or 6221cd4fecd5
+ VPADDSB -7(CX)(DX*1), Z6, K7, Z26 // 62614d4fec9411f9ffffff or 6261cd4fec9411f9ffffff
+ VPADDSB -15(R14)(R15*4), Z6, K7, Z26 // 62014d4fec94bef1ffffff or 6201cd4fec94bef1ffffff
+ VPADDSB Z13, Z28, K7, Z14 // 62511d47ecf5 or 62519d47ecf5
+ VPADDSB Z21, Z28, K7, Z14 // 62311d47ecf5 or 62319d47ecf5
+ VPADDSB -7(CX)(DX*1), Z28, K7, Z14 // 62711d47ecb411f9ffffff or 62719d47ecb411f9ffffff
+ VPADDSB -15(R14)(R15*4), Z28, K7, Z14 // 62111d47ecb4bef1ffffff or 62119d47ecb4bef1ffffff
+ VPADDSB Z13, Z6, K7, Z14 // 62514d4fecf5 or 6251cd4fecf5
+ VPADDSB Z21, Z6, K7, Z14 // 62314d4fecf5 or 6231cd4fecf5
+ VPADDSB -7(CX)(DX*1), Z6, K7, Z14 // 62714d4fecb411f9ffffff or 6271cd4fecb411f9ffffff
+ VPADDSB -15(R14)(R15*4), Z6, K7, Z14 // 62114d4fecb4bef1ffffff or 6211cd4fecb4bef1ffffff
+ VPADDSW X6, X13, K7, X30 // 6261150fedf6 or 6261950fedf6
+ VPADDSW 99(R15)(R15*2), X13, K7, X30 // 6201150fedb47f63000000 or 6201950fedb47f63000000
+ VPADDSW -7(DI), X13, K7, X30 // 6261150fedb7f9ffffff or 6261950fedb7f9ffffff
+ VPADDSW Y5, Y31, K6, Y23 // 62e10526edfd or 62e18526edfd
+ VPADDSW 99(R15)(R15*1), Y31, K6, Y23 // 62810526edbc3f63000000 or 62818526edbc3f63000000
+ VPADDSW (DX), Y31, K6, Y23 // 62e10526ed3a or 62e18526ed3a
+ VPADDSW Z21, Z3, K3, Z26 // 6221654bedd5 or 6221e54bedd5
+ VPADDSW Z13, Z3, K3, Z26 // 6241654bedd5 or 6241e54bedd5
+ VPADDSW 15(DX)(BX*1), Z3, K3, Z26 // 6261654bed941a0f000000 or 6261e54bed941a0f000000
+ VPADDSW -7(CX)(DX*2), Z3, K3, Z26 // 6261654bed9451f9ffffff or 6261e54bed9451f9ffffff
+ VPADDSW Z21, Z0, K3, Z26 // 62217d4bedd5 or 6221fd4bedd5
+ VPADDSW Z13, Z0, K3, Z26 // 62417d4bedd5 or 6241fd4bedd5
+ VPADDSW 15(DX)(BX*1), Z0, K3, Z26 // 62617d4bed941a0f000000 or 6261fd4bed941a0f000000
+ VPADDSW -7(CX)(DX*2), Z0, K3, Z26 // 62617d4bed9451f9ffffff or 6261fd4bed9451f9ffffff
+ VPADDSW Z21, Z3, K3, Z3 // 62b1654beddd or 62b1e54beddd
+ VPADDSW Z13, Z3, K3, Z3 // 62d1654beddd or 62d1e54beddd
+ VPADDSW 15(DX)(BX*1), Z3, K3, Z3 // 62f1654bed9c1a0f000000 or 62f1e54bed9c1a0f000000
+ VPADDSW -7(CX)(DX*2), Z3, K3, Z3 // 62f1654bed9c51f9ffffff or 62f1e54bed9c51f9ffffff
+ VPADDSW Z21, Z0, K3, Z3 // 62b17d4beddd or 62b1fd4beddd
+ VPADDSW Z13, Z0, K3, Z3 // 62d17d4beddd or 62d1fd4beddd
+ VPADDSW 15(DX)(BX*1), Z0, K3, Z3 // 62f17d4bed9c1a0f000000 or 62f1fd4bed9c1a0f000000
+ VPADDSW -7(CX)(DX*2), Z0, K3, Z3 // 62f17d4bed9c51f9ffffff or 62f1fd4bed9c51f9ffffff
+ VPADDUSB X30, X23, K7, X12 // 62114507dce6 or 6211c507dce6
+ VPADDUSB -7(CX)(DX*1), X23, K7, X12 // 62714507dca411f9ffffff or 6271c507dca411f9ffffff
+ VPADDUSB -15(R14)(R15*4), X23, K7, X12 // 62114507dca4bef1ffffff or 6211c507dca4bef1ffffff
+ VPADDUSB Y19, Y5, K4, Y0 // 62b1552cdcc3 or 62b1d52cdcc3
+ VPADDUSB -17(BP)(SI*8), Y5, K4, Y0 // 62f1552cdc84f5efffffff or 62f1d52cdc84f5efffffff
+ VPADDUSB (R15), Y5, K4, Y0 // 62d1552cdc07 or 62d1d52cdc07
+ VPADDUSB Z27, Z3, K4, Z11 // 6211654cdcdb or 6211e54cdcdb
+ VPADDUSB Z15, Z3, K4, Z11 // 6251654cdcdf or 6251e54cdcdf
+ VPADDUSB -17(BP), Z3, K4, Z11 // 6271654cdc9defffffff or 6271e54cdc9defffffff
+ VPADDUSB -15(R14)(R15*8), Z3, K4, Z11 // 6211654cdc9cfef1ffffff or 6211e54cdc9cfef1ffffff
+ VPADDUSB Z27, Z12, K4, Z11 // 62111d4cdcdb or 62119d4cdcdb
+ VPADDUSB Z15, Z12, K4, Z11 // 62511d4cdcdf or 62519d4cdcdf
+ VPADDUSB -17(BP), Z12, K4, Z11 // 62711d4cdc9defffffff or 62719d4cdc9defffffff
+ VPADDUSB -15(R14)(R15*8), Z12, K4, Z11 // 62111d4cdc9cfef1ffffff or 62119d4cdc9cfef1ffffff
+ VPADDUSB Z27, Z3, K4, Z25 // 6201654cdccb or 6201e54cdccb
+ VPADDUSB Z15, Z3, K4, Z25 // 6241654cdccf or 6241e54cdccf
+ VPADDUSB -17(BP), Z3, K4, Z25 // 6261654cdc8defffffff or 6261e54cdc8defffffff
+ VPADDUSB -15(R14)(R15*8), Z3, K4, Z25 // 6201654cdc8cfef1ffffff or 6201e54cdc8cfef1ffffff
+ VPADDUSB Z27, Z12, K4, Z25 // 62011d4cdccb or 62019d4cdccb
+ VPADDUSB Z15, Z12, K4, Z25 // 62411d4cdccf or 62419d4cdccf
+ VPADDUSB -17(BP), Z12, K4, Z25 // 62611d4cdc8defffffff or 62619d4cdc8defffffff
+ VPADDUSB -15(R14)(R15*8), Z12, K4, Z25 // 62011d4cdc8cfef1ffffff or 62019d4cdc8cfef1ffffff
+ VPADDUSW X2, X20, K7, X8 // 62715d07ddc2 or 6271dd07ddc2
+ VPADDUSW 15(DX)(BX*1), X20, K7, X8 // 62715d07dd841a0f000000 or 6271dd07dd841a0f000000
+ VPADDUSW -7(CX)(DX*2), X20, K7, X8 // 62715d07dd8451f9ffffff or 6271dd07dd8451f9ffffff
+ VPADDUSW Y2, Y28, K2, Y31 // 62611d22ddfa or 62619d22ddfa
+ VPADDUSW 7(SI)(DI*8), Y28, K2, Y31 // 62611d22ddbcfe07000000 or 62619d22ddbcfe07000000
+ VPADDUSW -15(R14), Y28, K2, Y31 // 62411d22ddbef1ffffff or 62419d22ddbef1ffffff
+ VPADDUSW Z8, Z23, K5, Z23 // 62c14545ddf8 or 62c1c545ddf8
+ VPADDUSW Z28, Z23, K5, Z23 // 62814545ddfc or 6281c545ddfc
+ VPADDUSW 17(SP)(BP*2), Z23, K5, Z23 // 62e14545ddbc6c11000000 or 62e1c545ddbc6c11000000
+ VPADDUSW -7(DI)(R8*4), Z23, K5, Z23 // 62a14545ddbc87f9ffffff or 62a1c545ddbc87f9ffffff
+ VPADDUSW Z8, Z6, K5, Z23 // 62c14d4dddf8 or 62c1cd4dddf8
+ VPADDUSW Z28, Z6, K5, Z23 // 62814d4dddfc or 6281cd4dddfc
+ VPADDUSW 17(SP)(BP*2), Z6, K5, Z23 // 62e14d4dddbc6c11000000 or 62e1cd4dddbc6c11000000
+ VPADDUSW -7(DI)(R8*4), Z6, K5, Z23 // 62a14d4dddbc87f9ffffff or 62a1cd4dddbc87f9ffffff
+ VPADDUSW Z8, Z23, K5, Z5 // 62d14545dde8 or 62d1c545dde8
+ VPADDUSW Z28, Z23, K5, Z5 // 62914545ddec or 6291c545ddec
+ VPADDUSW 17(SP)(BP*2), Z23, K5, Z5 // 62f14545ddac6c11000000 or 62f1c545ddac6c11000000
+ VPADDUSW -7(DI)(R8*4), Z23, K5, Z5 // 62b14545ddac87f9ffffff or 62b1c545ddac87f9ffffff
+ VPADDUSW Z8, Z6, K5, Z5 // 62d14d4ddde8 or 62d1cd4ddde8
+ VPADDUSW Z28, Z6, K5, Z5 // 62914d4dddec or 6291cd4dddec
+ VPADDUSW 17(SP)(BP*2), Z6, K5, Z5 // 62f14d4dddac6c11000000 or 62f1cd4dddac6c11000000
+ VPADDUSW -7(DI)(R8*4), Z6, K5, Z5 // 62b14d4dddac87f9ffffff or 62b1cd4dddac87f9ffffff
+ VPADDW X19, X26, K3, X9 // 62312d03fdcb or 6231ad03fdcb
+ VPADDW -17(BP), X26, K3, X9 // 62712d03fd8defffffff or 6271ad03fd8defffffff
+ VPADDW -15(R14)(R15*8), X26, K3, X9 // 62112d03fd8cfef1ffffff or 6211ad03fd8cfef1ffffff
+ VPADDW Y0, Y27, K4, Y24 // 62612524fdc0 or 6261a524fdc0
+ VPADDW 7(SI)(DI*1), Y27, K4, Y24 // 62612524fd843e07000000 or 6261a524fd843e07000000
+ VPADDW 15(DX)(BX*8), Y27, K4, Y24 // 62612524fd84da0f000000 or 6261a524fd84da0f000000
+ VPADDW Z12, Z16, K2, Z21 // 62c17d42fdec or 62c1fd42fdec
+ VPADDW Z27, Z16, K2, Z21 // 62817d42fdeb or 6281fd42fdeb
+ VPADDW 15(R8), Z16, K2, Z21 // 62c17d42fda80f000000 or 62c1fd42fda80f000000
+ VPADDW (BP), Z16, K2, Z21 // 62e17d42fd6d00 or 62e1fd42fd6d00
+ VPADDW Z12, Z13, K2, Z21 // 62c1154afdec or 62c1954afdec
+ VPADDW Z27, Z13, K2, Z21 // 6281154afdeb or 6281954afdeb
+ VPADDW 15(R8), Z13, K2, Z21 // 62c1154afda80f000000 or 62c1954afda80f000000
+ VPADDW (BP), Z13, K2, Z21 // 62e1154afd6d00 or 62e1954afd6d00
+ VPADDW Z12, Z16, K2, Z5 // 62d17d42fdec or 62d1fd42fdec
+ VPADDW Z27, Z16, K2, Z5 // 62917d42fdeb or 6291fd42fdeb
+ VPADDW 15(R8), Z16, K2, Z5 // 62d17d42fda80f000000 or 62d1fd42fda80f000000
+ VPADDW (BP), Z16, K2, Z5 // 62f17d42fd6d00 or 62f1fd42fd6d00
+ VPADDW Z12, Z13, K2, Z5 // 62d1154afdec or 62d1954afdec
+ VPADDW Z27, Z13, K2, Z5 // 6291154afdeb or 6291954afdeb
+ VPADDW 15(R8), Z13, K2, Z5 // 62d1154afda80f000000 or 62d1954afda80f000000
+ VPADDW (BP), Z13, K2, Z5 // 62f1154afd6d00 or 62f1954afd6d00
+ VPALIGNR $13, X16, X31, K2, X0 // 62b305020fc00d or 62b385020fc00d
+ VPALIGNR $13, 17(SP)(BP*2), X31, K2, X0 // 62f305020f846c110000000d or 62f385020f846c110000000d
+ VPALIGNR $13, -7(DI)(R8*4), X31, K2, X0 // 62b305020f8487f9ffffff0d or 62b385020f8487f9ffffff0d
+ VPALIGNR $65, Y3, Y31, K3, Y11 // 627305230fdb41 or 627385230fdb41
+ VPALIGNR $65, -7(DI)(R8*1), Y31, K3, Y11 // 623305230f9c07f9ffffff41 or 623385230f9c07f9ffffff41
+ VPALIGNR $65, (SP), Y31, K3, Y11 // 627305230f1c2441 or 627385230f1c2441
+ VPALIGNR $67, Z25, Z6, K3, Z22 // 62834d4b0ff143 or 6283cd4b0ff143
+ VPALIGNR $67, Z12, Z6, K3, Z22 // 62c34d4b0ff443 or 62c3cd4b0ff443
+ VPALIGNR $67, 15(R8)(R14*8), Z6, K3, Z22 // 62834d4b0fb4f00f00000043 or 6283cd4b0fb4f00f00000043
+ VPALIGNR $67, -15(R14)(R15*2), Z6, K3, Z22 // 62834d4b0fb47ef1ffffff43 or 6283cd4b0fb47ef1ffffff43
+ VPALIGNR $67, Z25, Z8, K3, Z22 // 62833d4b0ff143 or 6283bd4b0ff143
+ VPALIGNR $67, Z12, Z8, K3, Z22 // 62c33d4b0ff443 or 62c3bd4b0ff443
+ VPALIGNR $67, 15(R8)(R14*8), Z8, K3, Z22 // 62833d4b0fb4f00f00000043 or 6283bd4b0fb4f00f00000043
+ VPALIGNR $67, -15(R14)(R15*2), Z8, K3, Z22 // 62833d4b0fb47ef1ffffff43 or 6283bd4b0fb47ef1ffffff43
+ VPALIGNR $67, Z25, Z6, K3, Z11 // 62134d4b0fd943 or 6213cd4b0fd943
+ VPALIGNR $67, Z12, Z6, K3, Z11 // 62534d4b0fdc43 or 6253cd4b0fdc43
+ VPALIGNR $67, 15(R8)(R14*8), Z6, K3, Z11 // 62134d4b0f9cf00f00000043 or 6213cd4b0f9cf00f00000043
+ VPALIGNR $67, -15(R14)(R15*2), Z6, K3, Z11 // 62134d4b0f9c7ef1ffffff43 or 6213cd4b0f9c7ef1ffffff43
+ VPALIGNR $67, Z25, Z8, K3, Z11 // 62133d4b0fd943 or 6213bd4b0fd943
+ VPALIGNR $67, Z12, Z8, K3, Z11 // 62533d4b0fdc43 or 6253bd4b0fdc43
+ VPALIGNR $67, 15(R8)(R14*8), Z8, K3, Z11 // 62133d4b0f9cf00f00000043 or 6213bd4b0f9cf00f00000043
+ VPALIGNR $67, -15(R14)(R15*2), Z8, K3, Z11 // 62133d4b0f9c7ef1ffffff43 or 6213bd4b0f9c7ef1ffffff43
+ VPAVGB X16, X7, K1, X19 // 62a14509e0d8 or 62a1c509e0d8
+ VPAVGB (SI), X7, K1, X19 // 62e14509e01e or 62e1c509e01e
+ VPAVGB 7(SI)(DI*2), X7, K1, X19 // 62e14509e09c7e07000000 or 62e1c509e09c7e07000000
+ VPAVGB Y14, Y19, K3, Y23 // 62c16523e0fe or 62c1e523e0fe
+ VPAVGB 15(R8)(R14*4), Y19, K3, Y23 // 62816523e0bcb00f000000 or 6281e523e0bcb00f000000
+ VPAVGB -7(CX)(DX*4), Y19, K3, Y23 // 62e16523e0bc91f9ffffff or 62e1e523e0bc91f9ffffff
+ VPAVGB Z2, Z18, K4, Z11 // 62716d44e0da or 6271ed44e0da
+ VPAVGB Z21, Z18, K4, Z11 // 62316d44e0dd or 6231ed44e0dd
+ VPAVGB 7(SI)(DI*4), Z18, K4, Z11 // 62716d44e09cbe07000000 or 6271ed44e09cbe07000000
+ VPAVGB -7(DI)(R8*2), Z18, K4, Z11 // 62316d44e09c47f9ffffff or 6231ed44e09c47f9ffffff
+ VPAVGB Z2, Z24, K4, Z11 // 62713d44e0da or 6271bd44e0da
+ VPAVGB Z21, Z24, K4, Z11 // 62313d44e0dd or 6231bd44e0dd
+ VPAVGB 7(SI)(DI*4), Z24, K4, Z11 // 62713d44e09cbe07000000 or 6271bd44e09cbe07000000
+ VPAVGB -7(DI)(R8*2), Z24, K4, Z11 // 62313d44e09c47f9ffffff or 6231bd44e09c47f9ffffff
+ VPAVGB Z2, Z18, K4, Z5 // 62f16d44e0ea or 62f1ed44e0ea
+ VPAVGB Z21, Z18, K4, Z5 // 62b16d44e0ed or 62b1ed44e0ed
+ VPAVGB 7(SI)(DI*4), Z18, K4, Z5 // 62f16d44e0acbe07000000 or 62f1ed44e0acbe07000000
+ VPAVGB -7(DI)(R8*2), Z18, K4, Z5 // 62b16d44e0ac47f9ffffff or 62b1ed44e0ac47f9ffffff
+ VPAVGB Z2, Z24, K4, Z5 // 62f13d44e0ea or 62f1bd44e0ea
+ VPAVGB Z21, Z24, K4, Z5 // 62b13d44e0ed or 62b1bd44e0ed
+ VPAVGB 7(SI)(DI*4), Z24, K4, Z5 // 62f13d44e0acbe07000000 or 62f1bd44e0acbe07000000
+ VPAVGB -7(DI)(R8*2), Z24, K4, Z5 // 62b13d44e0ac47f9ffffff or 62b1bd44e0ac47f9ffffff
+ VPAVGW X7, X1, K5, X31 // 6261750de3ff or 6261f50de3ff
+ VPAVGW 17(SP)(BP*8), X1, K5, X31 // 6261750de3bcec11000000 or 6261f50de3bcec11000000
+ VPAVGW 17(SP)(BP*4), X1, K5, X31 // 6261750de3bcac11000000 or 6261f50de3bcac11000000
+ VPAVGW Y16, Y5, K7, Y21 // 62a1552fe3e8 or 62a1d52fe3e8
+ VPAVGW (R8), Y5, K7, Y21 // 62c1552fe328 or 62c1d52fe328
+ VPAVGW 15(DX)(BX*2), Y5, K7, Y21 // 62e1552fe3ac5a0f000000 or 62e1d52fe3ac5a0f000000
+ VPAVGW Z6, Z6, K7, Z7 // 62f14d4fe3fe or 62f1cd4fe3fe
+ VPAVGW Z22, Z6, K7, Z7 // 62b14d4fe3fe or 62b1cd4fe3fe
+ VPAVGW 17(SP), Z6, K7, Z7 // 62f14d4fe3bc2411000000 or 62f1cd4fe3bc2411000000
+ VPAVGW -17(BP)(SI*4), Z6, K7, Z7 // 62f14d4fe3bcb5efffffff or 62f1cd4fe3bcb5efffffff
+ VPAVGW Z6, Z16, K7, Z7 // 62f17d47e3fe or 62f1fd47e3fe
+ VPAVGW Z22, Z16, K7, Z7 // 62b17d47e3fe or 62b1fd47e3fe
+ VPAVGW 17(SP), Z16, K7, Z7 // 62f17d47e3bc2411000000 or 62f1fd47e3bc2411000000
+ VPAVGW -17(BP)(SI*4), Z16, K7, Z7 // 62f17d47e3bcb5efffffff or 62f1fd47e3bcb5efffffff
+ VPAVGW Z6, Z6, K7, Z13 // 62714d4fe3ee or 6271cd4fe3ee
+ VPAVGW Z22, Z6, K7, Z13 // 62314d4fe3ee or 6231cd4fe3ee
+ VPAVGW 17(SP), Z6, K7, Z13 // 62714d4fe3ac2411000000 or 6271cd4fe3ac2411000000
+ VPAVGW -17(BP)(SI*4), Z6, K7, Z13 // 62714d4fe3acb5efffffff or 6271cd4fe3acb5efffffff
+ VPAVGW Z6, Z16, K7, Z13 // 62717d47e3ee or 6271fd47e3ee
+ VPAVGW Z22, Z16, K7, Z13 // 62317d47e3ee or 6231fd47e3ee
+ VPAVGW 17(SP), Z16, K7, Z13 // 62717d47e3ac2411000000 or 6271fd47e3ac2411000000
+ VPAVGW -17(BP)(SI*4), Z16, K7, Z13 // 62717d47e3acb5efffffff or 6271fd47e3acb5efffffff
+ VPBLENDMB X12, X15, K6, X9 // 6252050e66cc
+ VPBLENDMB 7(SI)(DI*4), X15, K6, X9 // 6272050e668cbe07000000
+ VPBLENDMB -7(DI)(R8*2), X15, K6, X9 // 6232050e668c47f9ffffff
+ VPBLENDMB Y20, Y21, K3, Y2 // 62b2552366d4
+ VPBLENDMB 17(SP)(BP*1), Y21, K3, Y2 // 62f2552366942c11000000
+ VPBLENDMB -7(CX)(DX*8), Y21, K3, Y2 // 62f255236694d1f9ffffff
+ VPBLENDMB Z18, Z13, K7, Z1 // 62b2154f66ca
+ VPBLENDMB Z8, Z13, K7, Z1 // 62d2154f66c8
+ VPBLENDMB 7(AX), Z13, K7, Z1 // 62f2154f668807000000
+ VPBLENDMB (DI), Z13, K7, Z1 // 62f2154f660f
+ VPBLENDMB Z18, Z13, K7, Z15 // 6232154f66fa
+ VPBLENDMB Z8, Z13, K7, Z15 // 6252154f66f8
+ VPBLENDMB 7(AX), Z13, K7, Z15 // 6272154f66b807000000
+ VPBLENDMB (DI), Z13, K7, Z15 // 6272154f663f
+ VPBLENDMW X26, X3, K4, X8 // 6212e50c66c2
+ VPBLENDMW 99(R15)(R15*1), X3, K4, X8 // 6212e50c66843f63000000
+ VPBLENDMW (DX), X3, K4, X8 // 6272e50c6602
+ VPBLENDMW Y3, Y0, K2, Y6 // 62f2fd2a66f3
+ VPBLENDMW (R14), Y0, K2, Y6 // 62d2fd2a6636
+ VPBLENDMW -7(DI)(R8*8), Y0, K2, Y6 // 62b2fd2a66b4c7f9ffffff
+ VPBLENDMW Z15, Z3, K2, Z14 // 6252e54a66f7
+ VPBLENDMW Z30, Z3, K2, Z14 // 6212e54a66f6
+ VPBLENDMW 7(SI)(DI*8), Z3, K2, Z14 // 6272e54a66b4fe07000000
+ VPBLENDMW -15(R14), Z3, K2, Z14 // 6252e54a66b6f1ffffff
+ VPBLENDMW Z15, Z12, K2, Z14 // 62529d4a66f7
+ VPBLENDMW Z30, Z12, K2, Z14 // 62129d4a66f6
+ VPBLENDMW 7(SI)(DI*8), Z12, K2, Z14 // 62729d4a66b4fe07000000
+ VPBLENDMW -15(R14), Z12, K2, Z14 // 62529d4a66b6f1ffffff
+ VPBLENDMW Z15, Z3, K2, Z28 // 6242e54a66e7
+ VPBLENDMW Z30, Z3, K2, Z28 // 6202e54a66e6
+ VPBLENDMW 7(SI)(DI*8), Z3, K2, Z28 // 6262e54a66a4fe07000000
+ VPBLENDMW -15(R14), Z3, K2, Z28 // 6242e54a66a6f1ffffff
+ VPBLENDMW Z15, Z12, K2, Z28 // 62429d4a66e7
+ VPBLENDMW Z30, Z12, K2, Z28 // 62029d4a66e6
+ VPBLENDMW 7(SI)(DI*8), Z12, K2, Z28 // 62629d4a66a4fe07000000
+ VPBLENDMW -15(R14), Z12, K2, Z28 // 62429d4a66a6f1ffffff
+ VPBROADCASTB CX, K3, X23 // 62e27d0b7af9
+ VPBROADCASTB SP, K3, X23 // 62e27d0b7afc
+ VPBROADCASTB R14, K3, Y5 // 62d27d2b7aee
+ VPBROADCASTB AX, K3, Y5 // 62f27d2b7ae8
+ VPBROADCASTB R9, K3, Z19 // 62c27d4b7ad9
+ VPBROADCASTB CX, K3, Z19 // 62e27d4b7ad9
+ VPBROADCASTB R9, K3, Z15 // 62527d4b7af9
+ VPBROADCASTB CX, K3, Z15 // 62727d4b7af9
+ VPBROADCASTB X28, K2, X13 // 62127d0a78ec
+ VPBROADCASTB 99(R15)(R15*1), K2, X13 // 62127d0a786c3f63
+ VPBROADCASTB (DX), K2, X13 // 62727d0a782a
+ VPBROADCASTB X24, K1, Y20 // 62827d2978e0
+ VPBROADCASTB -17(BP)(SI*8), K1, Y20 // 62e27d297864f5ef
+ VPBROADCASTB (R15), K1, Y20 // 62c27d297827
+ VPBROADCASTB X9, K2, Z5 // 62d27d4a78e9
+ VPBROADCASTB 7(SI)(DI*8), K2, Z5 // 62f27d4a786cfe07
+ VPBROADCASTB -15(R14), K2, Z5 // 62d27d4a786ef1
+ VPBROADCASTB X9, K2, Z1 // 62d27d4a78c9
+ VPBROADCASTB 7(SI)(DI*8), K2, Z1 // 62f27d4a784cfe07
+ VPBROADCASTB -15(R14), K2, Z1 // 62d27d4a784ef1
+ VPBROADCASTW R14, K7, X20 // 62c27d0f7be6
+ VPBROADCASTW AX, K7, X20 // 62e27d0f7be0
+ VPBROADCASTW R9, K7, Y22 // 62c27d2f7bf1
+ VPBROADCASTW CX, K7, Y22 // 62e27d2f7bf1
+ VPBROADCASTW SP, K6, Z0 // 62f27d4e7bc4
+ VPBROADCASTW R14, K6, Z0 // 62d27d4e7bc6
+ VPBROADCASTW SP, K6, Z11 // 62727d4e7bdc
+ VPBROADCASTW R14, K6, Z11 // 62527d4e7bde
+ VPBROADCASTW X9, K3, X7 // 62d27d0b79f9
+ VPBROADCASTW 99(R15)(R15*1), K3, X7 // 62927d0b79bc3f63000000
+ VPBROADCASTW (DX), K3, X7 // 62f27d0b793a
+ VPBROADCASTW X7, K7, Y13 // 62727d2f79ef
+ VPBROADCASTW -17(BP)(SI*8), K7, Y13 // 62727d2f79acf5efffffff
+ VPBROADCASTW (R15), K7, Y13 // 62527d2f792f
+ VPBROADCASTW X14, K4, Z0 // 62d27d4c79c6
+ VPBROADCASTW 7(SI)(DI*8), K4, Z0 // 62f27d4c7984fe07000000
+ VPBROADCASTW -15(R14), K4, Z0 // 62d27d4c7986f1ffffff
+ VPBROADCASTW X14, K4, Z25 // 62427d4c79ce
+ VPBROADCASTW 7(SI)(DI*8), K4, Z25 // 62627d4c798cfe07000000
+ VPBROADCASTW -15(R14), K4, Z25 // 62427d4c798ef1ffffff
+ VPCMPB $81, X1, X21, K4, K5 // 62f355043fe951
+ VPCMPB $81, 7(SI)(DI*8), X21, K4, K5 // 62f355043facfe0700000051
+ VPCMPB $81, -15(R14), X21, K4, K5 // 62d355043faef1ffffff51
+ VPCMPB $81, X1, X21, K4, K4 // 62f355043fe151
+ VPCMPB $81, 7(SI)(DI*8), X21, K4, K4 // 62f355043fa4fe0700000051
+ VPCMPB $81, -15(R14), X21, K4, K4 // 62d355043fa6f1ffffff51
+ VPCMPB $42, Y7, Y17, K7, K4 // 62f375273fe72a
+ VPCMPB $42, (CX), Y17, K7, K4 // 62f375273f212a
+ VPCMPB $42, 99(R15), Y17, K7, K4 // 62d375273fa7630000002a
+ VPCMPB $42, Y7, Y17, K7, K6 // 62f375273ff72a
+ VPCMPB $42, (CX), Y17, K7, K6 // 62f375273f312a
+ VPCMPB $42, 99(R15), Y17, K7, K6 // 62d375273fb7630000002a
+ VPCMPB $79, Z9, Z9, K2, K1 // 62d3354a3fc94f
+ VPCMPB $79, Z28, Z9, K2, K1 // 6293354a3fcc4f
+ VPCMPB $79, -7(DI)(R8*1), Z9, K2, K1 // 62b3354a3f8c07f9ffffff4f
+ VPCMPB $79, (SP), Z9, K2, K1 // 62f3354a3f0c244f
+ VPCMPB $79, Z9, Z25, K2, K1 // 62d335423fc94f
+ VPCMPB $79, Z28, Z25, K2, K1 // 629335423fcc4f
+ VPCMPB $79, -7(DI)(R8*1), Z25, K2, K1 // 62b335423f8c07f9ffffff4f
+ VPCMPB $79, (SP), Z25, K2, K1 // 62f335423f0c244f
+ VPCMPB $79, Z9, Z9, K2, K3 // 62d3354a3fd94f
+ VPCMPB $79, Z28, Z9, K2, K3 // 6293354a3fdc4f
+ VPCMPB $79, -7(DI)(R8*1), Z9, K2, K3 // 62b3354a3f9c07f9ffffff4f
+ VPCMPB $79, (SP), Z9, K2, K3 // 62f3354a3f1c244f
+ VPCMPB $79, Z9, Z25, K2, K3 // 62d335423fd94f
+ VPCMPB $79, Z28, Z25, K2, K3 // 629335423fdc4f
+ VPCMPB $79, -7(DI)(R8*1), Z25, K2, K3 // 62b335423f9c07f9ffffff4f
+ VPCMPB $79, (SP), Z25, K2, K3 // 62f335423f1c244f
+ VPCMPEQB X30, X0, K2, K4 // 62917d0a74e6 or 6291fd0a74e6
+ VPCMPEQB -7(DI)(R8*1), X0, K2, K4 // 62b17d0a74a407f9ffffff or 62b1fd0a74a407f9ffffff
+ VPCMPEQB (SP), X0, K2, K4 // 62f17d0a742424 or 62f1fd0a742424
+ VPCMPEQB X30, X0, K2, K5 // 62917d0a74ee or 6291fd0a74ee
+ VPCMPEQB -7(DI)(R8*1), X0, K2, K5 // 62b17d0a74ac07f9ffffff or 62b1fd0a74ac07f9ffffff
+ VPCMPEQB (SP), X0, K2, K5 // 62f17d0a742c24 or 62f1fd0a742c24
+ VPCMPEQB Y1, Y8, K2, K2 // 62f13d2a74d1 or 62f1bd2a74d1
+ VPCMPEQB -7(CX)(DX*1), Y8, K2, K2 // 62f13d2a749411f9ffffff or 62f1bd2a749411f9ffffff
+ VPCMPEQB -15(R14)(R15*4), Y8, K2, K2 // 62913d2a7494bef1ffffff or 6291bd2a7494bef1ffffff
+ VPCMPEQB Y1, Y8, K2, K7 // 62f13d2a74f9 or 62f1bd2a74f9
+ VPCMPEQB -7(CX)(DX*1), Y8, K2, K7 // 62f13d2a74bc11f9ffffff or 62f1bd2a74bc11f9ffffff
+ VPCMPEQB -15(R14)(R15*4), Y8, K2, K7 // 62913d2a74bcbef1ffffff or 6291bd2a74bcbef1ffffff
+ VPCMPEQB Z31, Z17, K3, K0 // 6291754374c7 or 6291f54374c7
+ VPCMPEQB Z0, Z17, K3, K0 // 62f1754374c0 or 62f1f54374c0
+ VPCMPEQB 99(R15)(R15*8), Z17, K3, K0 // 629175437484ff63000000 or 6291f5437484ff63000000
+ VPCMPEQB 7(AX)(CX*8), Z17, K3, K0 // 62f175437484c807000000 or 62f1f5437484c807000000
+ VPCMPEQB Z31, Z23, K3, K0 // 6291454374c7 or 6291c54374c7
+ VPCMPEQB Z0, Z23, K3, K0 // 62f1454374c0 or 62f1c54374c0
+ VPCMPEQB 99(R15)(R15*8), Z23, K3, K0 // 629145437484ff63000000 or 6291c5437484ff63000000
+ VPCMPEQB 7(AX)(CX*8), Z23, K3, K0 // 62f145437484c807000000 or 62f1c5437484c807000000
+ VPCMPEQB Z31, Z17, K3, K5 // 6291754374ef or 6291f54374ef
+ VPCMPEQB Z0, Z17, K3, K5 // 62f1754374e8 or 62f1f54374e8
+ VPCMPEQB 99(R15)(R15*8), Z17, K3, K5 // 6291754374acff63000000 or 6291f54374acff63000000
+ VPCMPEQB 7(AX)(CX*8), Z17, K3, K5 // 62f1754374acc807000000 or 62f1f54374acc807000000
+ VPCMPEQB Z31, Z23, K3, K5 // 6291454374ef or 6291c54374ef
+ VPCMPEQB Z0, Z23, K3, K5 // 62f1454374e8 or 62f1c54374e8
+ VPCMPEQB 99(R15)(R15*8), Z23, K3, K5 // 6291454374acff63000000 or 6291c54374acff63000000
+ VPCMPEQB 7(AX)(CX*8), Z23, K3, K5 // 62f1454374acc807000000 or 62f1c54374acc807000000
+ VPCMPEQW X8, X19, K7, K0 // 62d1650775c0 or 62d1e50775c0
+ VPCMPEQW (AX), X19, K7, K0 // 62f165077500 or 62f1e5077500
+ VPCMPEQW 7(SI), X19, K7, K0 // 62f16507758607000000 or 62f1e507758607000000
+ VPCMPEQW X8, X19, K7, K7 // 62d1650775f8 or 62d1e50775f8
+ VPCMPEQW (AX), X19, K7, K7 // 62f165077538 or 62f1e5077538
+ VPCMPEQW 7(SI), X19, K7, K7 // 62f1650775be07000000 or 62f1e50775be07000000
+ VPCMPEQW Y12, Y21, K1, K5 // 62d1552175ec or 62d1d52175ec
+ VPCMPEQW 17(SP)(BP*2), Y21, K1, K5 // 62f1552175ac6c11000000 or 62f1d52175ac6c11000000
+ VPCMPEQW -7(DI)(R8*4), Y21, K1, K5 // 62b1552175ac87f9ffffff or 62b1d52175ac87f9ffffff
+ VPCMPEQW Y12, Y21, K1, K4 // 62d1552175e4 or 62d1d52175e4
+ VPCMPEQW 17(SP)(BP*2), Y21, K1, K4 // 62f1552175a46c11000000 or 62f1d52175a46c11000000
+ VPCMPEQW -7(DI)(R8*4), Y21, K1, K4 // 62b1552175a487f9ffffff or 62b1d52175a487f9ffffff
+ VPCMPEQW Z26, Z30, K1, K4 // 62910d4175e2 or 62918d4175e2
+ VPCMPEQW Z22, Z30, K1, K4 // 62b10d4175e6 or 62b18d4175e6
+ VPCMPEQW 15(R8)(R14*4), Z30, K1, K4 // 62910d4175a4b00f000000 or 62918d4175a4b00f000000
+ VPCMPEQW -7(CX)(DX*4), Z30, K1, K4 // 62f10d4175a491f9ffffff or 62f18d4175a491f9ffffff
+ VPCMPEQW Z26, Z5, K1, K4 // 6291554975e2 or 6291d54975e2
+ VPCMPEQW Z22, Z5, K1, K4 // 62b1554975e6 or 62b1d54975e6
+ VPCMPEQW 15(R8)(R14*4), Z5, K1, K4 // 6291554975a4b00f000000 or 6291d54975a4b00f000000
+ VPCMPEQW -7(CX)(DX*4), Z5, K1, K4 // 62f1554975a491f9ffffff or 62f1d54975a491f9ffffff
+ VPCMPEQW Z26, Z30, K1, K6 // 62910d4175f2 or 62918d4175f2
+ VPCMPEQW Z22, Z30, K1, K6 // 62b10d4175f6 or 62b18d4175f6
+ VPCMPEQW 15(R8)(R14*4), Z30, K1, K6 // 62910d4175b4b00f000000 or 62918d4175b4b00f000000
+ VPCMPEQW -7(CX)(DX*4), Z30, K1, K6 // 62f10d4175b491f9ffffff or 62f18d4175b491f9ffffff
+ VPCMPEQW Z26, Z5, K1, K6 // 6291554975f2 or 6291d54975f2
+ VPCMPEQW Z22, Z5, K1, K6 // 62b1554975f6 or 62b1d54975f6
+ VPCMPEQW 15(R8)(R14*4), Z5, K1, K6 // 6291554975b4b00f000000 or 6291d54975b4b00f000000
+ VPCMPEQW -7(CX)(DX*4), Z5, K1, K6 // 62f1554975b491f9ffffff or 62f1d54975b491f9ffffff
+ VPCMPGTB X26, X8, K1, K1 // 62913d0964ca or 6291bd0964ca
+ VPCMPGTB (BX), X8, K1, K1 // 62f13d09640b or 62f1bd09640b
+ VPCMPGTB -17(BP)(SI*1), X8, K1, K1 // 62f13d09648c35efffffff or 62f1bd09648c35efffffff
+ VPCMPGTB X26, X8, K1, K3 // 62913d0964da or 6291bd0964da
+ VPCMPGTB (BX), X8, K1, K3 // 62f13d09641b or 62f1bd09641b
+ VPCMPGTB -17(BP)(SI*1), X8, K1, K3 // 62f13d09649c35efffffff or 62f1bd09649c35efffffff
+ VPCMPGTB Y1, Y9, K7, K6 // 62f1352f64f1 or 62f1b52f64f1
+ VPCMPGTB 15(R8), Y9, K7, K6 // 62d1352f64b00f000000 or 62d1b52f64b00f000000
+ VPCMPGTB (BP), Y9, K7, K6 // 62f1352f647500 or 62f1b52f647500
+ VPCMPGTB Y1, Y9, K7, K7 // 62f1352f64f9 or 62f1b52f64f9
+ VPCMPGTB 15(R8), Y9, K7, K7 // 62d1352f64b80f000000 or 62d1b52f64b80f000000
+ VPCMPGTB (BP), Y9, K7, K7 // 62f1352f647d00 or 62f1b52f647d00
+ VPCMPGTB Z16, Z7, K2, K6 // 62b1454a64f0 or 62b1c54a64f0
+ VPCMPGTB Z25, Z7, K2, K6 // 6291454a64f1 or 6291c54a64f1
+ VPCMPGTB (R8), Z7, K2, K6 // 62d1454a6430 or 62d1c54a6430
+ VPCMPGTB 15(DX)(BX*2), Z7, K2, K6 // 62f1454a64b45a0f000000 or 62f1c54a64b45a0f000000
+ VPCMPGTB Z16, Z21, K2, K6 // 62b1554264f0 or 62b1d54264f0
+ VPCMPGTB Z25, Z21, K2, K6 // 6291554264f1 or 6291d54264f1
+ VPCMPGTB (R8), Z21, K2, K6 // 62d155426430 or 62d1d5426430
+ VPCMPGTB 15(DX)(BX*2), Z21, K2, K6 // 62f1554264b45a0f000000 or 62f1d54264b45a0f000000
+ VPCMPGTB Z16, Z7, K2, K4 // 62b1454a64e0 or 62b1c54a64e0
+ VPCMPGTB Z25, Z7, K2, K4 // 6291454a64e1 or 6291c54a64e1
+ VPCMPGTB (R8), Z7, K2, K4 // 62d1454a6420 or 62d1c54a6420
+ VPCMPGTB 15(DX)(BX*2), Z7, K2, K4 // 62f1454a64a45a0f000000 or 62f1c54a64a45a0f000000
+ VPCMPGTB Z16, Z21, K2, K4 // 62b1554264e0 or 62b1d54264e0
+ VPCMPGTB Z25, Z21, K2, K4 // 6291554264e1 or 6291d54264e1
+ VPCMPGTB (R8), Z21, K2, K4 // 62d155426420 or 62d1d5426420
+ VPCMPGTB 15(DX)(BX*2), Z21, K2, K4 // 62f1554264a45a0f000000 or 62f1d54264a45a0f000000
+ VPCMPGTW X11, X23, K7, K3 // 62d1450765db or 62d1c50765db
+ VPCMPGTW 17(SP)(BP*1), X23, K7, K3 // 62f14507659c2c11000000 or 62f1c507659c2c11000000
+ VPCMPGTW -7(CX)(DX*8), X23, K7, K3 // 62f14507659cd1f9ffffff or 62f1c507659cd1f9ffffff
+ VPCMPGTW X11, X23, K7, K1 // 62d1450765cb or 62d1c50765cb
+ VPCMPGTW 17(SP)(BP*1), X23, K7, K1 // 62f14507658c2c11000000 or 62f1c507658c2c11000000
+ VPCMPGTW -7(CX)(DX*8), X23, K7, K1 // 62f14507658cd1f9ffffff or 62f1c507658cd1f9ffffff
+ VPCMPGTW Y21, Y12, K6, K5 // 62b11d2e65ed or 62b19d2e65ed
+ VPCMPGTW 7(AX)(CX*4), Y12, K6, K5 // 62f11d2e65ac8807000000 or 62f19d2e65ac8807000000
+ VPCMPGTW 7(AX)(CX*1), Y12, K6, K5 // 62f11d2e65ac0807000000 or 62f19d2e65ac0807000000
+ VPCMPGTW Y21, Y12, K6, K4 // 62b11d2e65e5 or 62b19d2e65e5
+ VPCMPGTW 7(AX)(CX*4), Y12, K6, K4 // 62f11d2e65a48807000000 or 62f19d2e65a48807000000
+ VPCMPGTW 7(AX)(CX*1), Y12, K6, K4 // 62f11d2e65a40807000000 or 62f19d2e65a40807000000
+ VPCMPGTW Z23, Z27, K3, K7 // 62b1254365ff or 62b1a54365ff
+ VPCMPGTW Z9, Z27, K3, K7 // 62d1254365f9 or 62d1a54365f9
+ VPCMPGTW 15(R8)(R14*1), Z27, K3, K7 // 6291254365bc300f000000 or 6291a54365bc300f000000
+ VPCMPGTW 15(R8)(R14*2), Z27, K3, K7 // 6291254365bc700f000000 or 6291a54365bc700f000000
+ VPCMPGTW Z23, Z25, K3, K7 // 62b1354365ff or 62b1b54365ff
+ VPCMPGTW Z9, Z25, K3, K7 // 62d1354365f9 or 62d1b54365f9
+ VPCMPGTW 15(R8)(R14*1), Z25, K3, K7 // 6291354365bc300f000000 or 6291b54365bc300f000000
+ VPCMPGTW 15(R8)(R14*2), Z25, K3, K7 // 6291354365bc700f000000 or 6291b54365bc700f000000
+ VPCMPGTW Z23, Z27, K3, K6 // 62b1254365f7 or 62b1a54365f7
+ VPCMPGTW Z9, Z27, K3, K6 // 62d1254365f1 or 62d1a54365f1
+ VPCMPGTW 15(R8)(R14*1), Z27, K3, K6 // 6291254365b4300f000000 or 6291a54365b4300f000000
+ VPCMPGTW 15(R8)(R14*2), Z27, K3, K6 // 6291254365b4700f000000 or 6291a54365b4700f000000
+ VPCMPGTW Z23, Z25, K3, K6 // 62b1354365f7 or 62b1b54365f7
+ VPCMPGTW Z9, Z25, K3, K6 // 62d1354365f1 or 62d1b54365f1
+ VPCMPGTW 15(R8)(R14*1), Z25, K3, K6 // 6291354365b4300f000000 or 6291b54365b4300f000000
+ VPCMPGTW 15(R8)(R14*2), Z25, K3, K6 // 6291354365b4700f000000 or 6291b54365b4700f000000
+ VPCMPUB $121, X0, X14, K7, K4 // 62f30d0f3ee079
+ VPCMPUB $121, 15(R8)(R14*1), X14, K7, K4 // 62930d0f3ea4300f00000079
+ VPCMPUB $121, 15(R8)(R14*2), X14, K7, K4 // 62930d0f3ea4700f00000079
+ VPCMPUB $121, X0, X14, K7, K6 // 62f30d0f3ef079
+ VPCMPUB $121, 15(R8)(R14*1), X14, K7, K6 // 62930d0f3eb4300f00000079
+ VPCMPUB $121, 15(R8)(R14*2), X14, K7, K6 // 62930d0f3eb4700f00000079
+ VPCMPUB $13, Y7, Y26, K2, K1 // 62f32d223ecf0d
+ VPCMPUB $13, 17(SP)(BP*8), Y26, K2, K1 // 62f32d223e8cec110000000d
+ VPCMPUB $13, 17(SP)(BP*4), Y26, K2, K1 // 62f32d223e8cac110000000d
+ VPCMPUB $13, Y7, Y26, K2, K3 // 62f32d223edf0d
+ VPCMPUB $13, 17(SP)(BP*8), Y26, K2, K3 // 62f32d223e9cec110000000d
+ VPCMPUB $13, 17(SP)(BP*4), Y26, K2, K3 // 62f32d223e9cac110000000d
+ VPCMPUB $65, Z8, Z14, K5, K6 // 62d30d4d3ef041
+ VPCMPUB $65, Z24, Z14, K5, K6 // 62930d4d3ef041
+ VPCMPUB $65, 99(R15)(R15*4), Z14, K5, K6 // 62930d4d3eb4bf6300000041
+ VPCMPUB $65, 15(DX), Z14, K5, K6 // 62f30d4d3eb20f00000041
+ VPCMPUB $65, Z8, Z7, K5, K6 // 62d3454d3ef041
+ VPCMPUB $65, Z24, Z7, K5, K6 // 6293454d3ef041
+ VPCMPUB $65, 99(R15)(R15*4), Z7, K5, K6 // 6293454d3eb4bf6300000041
+ VPCMPUB $65, 15(DX), Z7, K5, K6 // 62f3454d3eb20f00000041
+ VPCMPUB $65, Z8, Z14, K5, K7 // 62d30d4d3ef841
+ VPCMPUB $65, Z24, Z14, K5, K7 // 62930d4d3ef841
+ VPCMPUB $65, 99(R15)(R15*4), Z14, K5, K7 // 62930d4d3ebcbf6300000041
+ VPCMPUB $65, 15(DX), Z14, K5, K7 // 62f30d4d3eba0f00000041
+ VPCMPUB $65, Z8, Z7, K5, K7 // 62d3454d3ef841
+ VPCMPUB $65, Z24, Z7, K5, K7 // 6293454d3ef841
+ VPCMPUB $65, 99(R15)(R15*4), Z7, K5, K7 // 6293454d3ebcbf6300000041
+ VPCMPUB $65, 15(DX), Z7, K5, K7 // 62f3454d3eba0f00000041
+ VPCMPUW $79, X25, X5, K3, K1 // 6293d50b3ec94f
+ VPCMPUW $79, (CX), X5, K3, K1 // 62f3d50b3e094f
+ VPCMPUW $79, 99(R15), X5, K3, K1 // 62d3d50b3e8f630000004f
+ VPCMPUW $79, X25, X5, K3, K5 // 6293d50b3ee94f
+ VPCMPUW $79, (CX), X5, K3, K5 // 62f3d50b3e294f
+ VPCMPUW $79, 99(R15), X5, K3, K5 // 62d3d50b3eaf630000004f
+ VPCMPUW $64, Y6, Y22, K2, K3 // 62f3cd223ede40
+ VPCMPUW $64, 7(AX), Y22, K2, K3 // 62f3cd223e980700000040
+ VPCMPUW $64, (DI), Y22, K2, K3 // 62f3cd223e1f40
+ VPCMPUW $64, Y6, Y22, K2, K1 // 62f3cd223ece40
+ VPCMPUW $64, 7(AX), Y22, K2, K1 // 62f3cd223e880700000040
+ VPCMPUW $64, (DI), Y22, K2, K1 // 62f3cd223e0f40
+ VPCMPUW $27, Z14, Z15, K1, K5 // 62d385493eee1b
+ VPCMPUW $27, Z27, Z15, K1, K5 // 629385493eeb1b
+ VPCMPUW $27, -7(CX)(DX*1), Z15, K1, K5 // 62f385493eac11f9ffffff1b
+ VPCMPUW $27, -15(R14)(R15*4), Z15, K1, K5 // 629385493eacbef1ffffff1b
+ VPCMPUW $27, Z14, Z12, K1, K5 // 62d39d493eee1b
+ VPCMPUW $27, Z27, Z12, K1, K5 // 62939d493eeb1b
+ VPCMPUW $27, -7(CX)(DX*1), Z12, K1, K5 // 62f39d493eac11f9ffffff1b
+ VPCMPUW $27, -15(R14)(R15*4), Z12, K1, K5 // 62939d493eacbef1ffffff1b
+ VPCMPUW $27, Z14, Z15, K1, K4 // 62d385493ee61b
+ VPCMPUW $27, Z27, Z15, K1, K4 // 629385493ee31b
+ VPCMPUW $27, -7(CX)(DX*1), Z15, K1, K4 // 62f385493ea411f9ffffff1b
+ VPCMPUW $27, -15(R14)(R15*4), Z15, K1, K4 // 629385493ea4bef1ffffff1b
+ VPCMPUW $27, Z14, Z12, K1, K4 // 62d39d493ee61b
+ VPCMPUW $27, Z27, Z12, K1, K4 // 62939d493ee31b
+ VPCMPUW $27, -7(CX)(DX*1), Z12, K1, K4 // 62f39d493ea411f9ffffff1b
+ VPCMPUW $27, -15(R14)(R15*4), Z12, K1, K4 // 62939d493ea4bef1ffffff1b
+ VPCMPW $47, X9, X0, K2, K7 // 62d3fd0a3ff92f
+ VPCMPW $47, 99(R15)(R15*2), X0, K2, K7 // 6293fd0a3fbc7f630000002f
+ VPCMPW $47, -7(DI), X0, K2, K7 // 62f3fd0a3fbff9ffffff2f
+ VPCMPW $47, X9, X0, K2, K6 // 62d3fd0a3ff12f
+ VPCMPW $47, 99(R15)(R15*2), X0, K2, K6 // 6293fd0a3fb47f630000002f
+ VPCMPW $47, -7(DI), X0, K2, K6 // 62f3fd0a3fb7f9ffffff2f
+ VPCMPW $82, Y7, Y21, K1, K4 // 62f3d5213fe752
+ VPCMPW $82, 99(R15)(R15*1), Y21, K1, K4 // 6293d5213fa43f6300000052
+ VPCMPW $82, (DX), Y21, K1, K4 // 62f3d5213f2252
+ VPCMPW $82, Y7, Y21, K1, K6 // 62f3d5213ff752
+ VPCMPW $82, 99(R15)(R15*1), Y21, K1, K6 // 6293d5213fb43f6300000052
+ VPCMPW $82, (DX), Y21, K1, K6 // 62f3d5213f3252
+ VPCMPW $126, Z13, Z11, K7, K0 // 62d3a54f3fc57e
+ VPCMPW $126, Z14, Z11, K7, K0 // 62d3a54f3fc67e
+ VPCMPW $126, 15(DX)(BX*1), Z11, K7, K0 // 62f3a54f3f841a0f0000007e
+ VPCMPW $126, -7(CX)(DX*2), Z11, K7, K0 // 62f3a54f3f8451f9ffffff7e
+ VPCMPW $126, Z13, Z5, K7, K0 // 62d3d54f3fc57e
+ VPCMPW $126, Z14, Z5, K7, K0 // 62d3d54f3fc67e
+ VPCMPW $126, 15(DX)(BX*1), Z5, K7, K0 // 62f3d54f3f841a0f0000007e
+ VPCMPW $126, -7(CX)(DX*2), Z5, K7, K0 // 62f3d54f3f8451f9ffffff7e
+ VPCMPW $126, Z13, Z11, K7, K7 // 62d3a54f3ffd7e
+ VPCMPW $126, Z14, Z11, K7, K7 // 62d3a54f3ffe7e
+ VPCMPW $126, 15(DX)(BX*1), Z11, K7, K7 // 62f3a54f3fbc1a0f0000007e
+ VPCMPW $126, -7(CX)(DX*2), Z11, K7, K7 // 62f3a54f3fbc51f9ffffff7e
+ VPCMPW $126, Z13, Z5, K7, K7 // 62d3d54f3ffd7e
+ VPCMPW $126, Z14, Z5, K7, K7 // 62d3d54f3ffe7e
+ VPCMPW $126, 15(DX)(BX*1), Z5, K7, K7 // 62f3d54f3fbc1a0f0000007e
+ VPCMPW $126, -7(CX)(DX*2), Z5, K7, K7 // 62f3d54f3fbc51f9ffffff7e
+ VPERMI2W X16, X20, K2, X7 // 62b2dd0275f8
+ VPERMI2W 7(SI)(DI*1), X20, K2, X7 // 62f2dd0275bc3e07000000
+ VPERMI2W 15(DX)(BX*8), X20, K2, X7 // 62f2dd0275bcda0f000000
+ VPERMI2W Y18, Y14, K5, Y12 // 62328d2d75e2
+ VPERMI2W -7(CX)(DX*1), Y14, K5, Y12 // 62728d2d75a411f9ffffff
+ VPERMI2W -15(R14)(R15*4), Y14, K5, Y12 // 62128d2d75a4bef1ffffff
+ VPERMI2W Z28, Z12, K3, Z1 // 62929d4b75cc
+ VPERMI2W Z13, Z12, K3, Z1 // 62d29d4b75cd
+ VPERMI2W 99(R15)(R15*8), Z12, K3, Z1 // 62929d4b758cff63000000
+ VPERMI2W 7(AX)(CX*8), Z12, K3, Z1 // 62f29d4b758cc807000000
+ VPERMI2W Z28, Z16, K3, Z1 // 6292fd4375cc
+ VPERMI2W Z13, Z16, K3, Z1 // 62d2fd4375cd
+ VPERMI2W 99(R15)(R15*8), Z16, K3, Z1 // 6292fd43758cff63000000
+ VPERMI2W 7(AX)(CX*8), Z16, K3, Z1 // 62f2fd43758cc807000000
+ VPERMI2W Z28, Z12, K3, Z3 // 62929d4b75dc
+ VPERMI2W Z13, Z12, K3, Z3 // 62d29d4b75dd
+ VPERMI2W 99(R15)(R15*8), Z12, K3, Z3 // 62929d4b759cff63000000
+ VPERMI2W 7(AX)(CX*8), Z12, K3, Z3 // 62f29d4b759cc807000000
+ VPERMI2W Z28, Z16, K3, Z3 // 6292fd4375dc
+ VPERMI2W Z13, Z16, K3, Z3 // 62d2fd4375dd
+ VPERMI2W 99(R15)(R15*8), Z16, K3, Z3 // 6292fd43759cff63000000
+ VPERMI2W 7(AX)(CX*8), Z16, K3, Z3 // 62f2fd43759cc807000000
+ VPERMT2W X0, X0, K3, X14 // 6272fd0b7df0
+ VPERMT2W 15(R8)(R14*1), X0, K3, X14 // 6212fd0b7db4300f000000
+ VPERMT2W 15(R8)(R14*2), X0, K3, X14 // 6212fd0b7db4700f000000
+ VPERMT2W Y8, Y27, K2, Y22 // 62c2a5227df0
+ VPERMT2W 7(SI)(DI*8), Y27, K2, Y22 // 62e2a5227db4fe07000000
+ VPERMT2W -15(R14), Y27, K2, Y22 // 62c2a5227db6f1ffffff
+ VPERMT2W Z22, Z8, K1, Z14 // 6232bd497df6
+ VPERMT2W Z25, Z8, K1, Z14 // 6212bd497df1
+ VPERMT2W 17(SP)(BP*2), Z8, K1, Z14 // 6272bd497db46c11000000
+ VPERMT2W -7(DI)(R8*4), Z8, K1, Z14 // 6232bd497db487f9ffffff
+ VPERMT2W Z22, Z24, K1, Z14 // 6232bd417df6
+ VPERMT2W Z25, Z24, K1, Z14 // 6212bd417df1
+ VPERMT2W 17(SP)(BP*2), Z24, K1, Z14 // 6272bd417db46c11000000
+ VPERMT2W -7(DI)(R8*4), Z24, K1, Z14 // 6232bd417db487f9ffffff
+ VPERMT2W Z22, Z8, K1, Z7 // 62b2bd497dfe
+ VPERMT2W Z25, Z8, K1, Z7 // 6292bd497df9
+ VPERMT2W 17(SP)(BP*2), Z8, K1, Z7 // 62f2bd497dbc6c11000000
+ VPERMT2W -7(DI)(R8*4), Z8, K1, Z7 // 62b2bd497dbc87f9ffffff
+ VPERMT2W Z22, Z24, K1, Z7 // 62b2bd417dfe
+ VPERMT2W Z25, Z24, K1, Z7 // 6292bd417df9
+ VPERMT2W 17(SP)(BP*2), Z24, K1, Z7 // 62f2bd417dbc6c11000000
+ VPERMT2W -7(DI)(R8*4), Z24, K1, Z7 // 62b2bd417dbc87f9ffffff
+ VPERMW X17, X11, K2, X25 // 6222a50a8dc9
+ VPERMW (R14), X11, K2, X25 // 6242a50a8d0e
+ VPERMW -7(DI)(R8*8), X11, K2, X25 // 6222a50a8d8cc7f9ffffff
+ VPERMW Y9, Y22, K1, Y9 // 6252cd218dc9
+ VPERMW 7(SI)(DI*1), Y22, K1, Y9 // 6272cd218d8c3e07000000
+ VPERMW 15(DX)(BX*8), Y22, K1, Y9 // 6272cd218d8cda0f000000
+ VPERMW Z0, Z6, K7, Z1 // 62f2cd4f8dc8
+ VPERMW Z8, Z6, K7, Z1 // 62d2cd4f8dc8
+ VPERMW 15(R8), Z6, K7, Z1 // 62d2cd4f8d880f000000
+ VPERMW (BP), Z6, K7, Z1 // 62f2cd4f8d4d00
+ VPERMW Z0, Z2, K7, Z1 // 62f2ed4f8dc8
+ VPERMW Z8, Z2, K7, Z1 // 62d2ed4f8dc8
+ VPERMW 15(R8), Z2, K7, Z1 // 62d2ed4f8d880f000000
+ VPERMW (BP), Z2, K7, Z1 // 62f2ed4f8d4d00
+ VPERMW Z0, Z6, K7, Z16 // 62e2cd4f8dc0
+ VPERMW Z8, Z6, K7, Z16 // 62c2cd4f8dc0
+ VPERMW 15(R8), Z6, K7, Z16 // 62c2cd4f8d800f000000
+ VPERMW (BP), Z6, K7, Z16 // 62e2cd4f8d4500
+ VPERMW Z0, Z2, K7, Z16 // 62e2ed4f8dc0
+ VPERMW Z8, Z2, K7, Z16 // 62c2ed4f8dc0
+ VPERMW 15(R8), Z2, K7, Z16 // 62c2ed4f8d800f000000
+ VPERMW (BP), Z2, K7, Z16 // 62e2ed4f8d4500
+ VPEXTRB $79, X26, AX // 62637d0814d04f or 6263fd0814d04f
+ VPEXTRB $79, X26, R9 // 62437d0814d14f or 6243fd0814d14f
+ VPEXTRB $79, X26, 7(SI)(DI*1) // 62637d0814543e074f or 6263fd0814543e074f
+ VPEXTRB $79, X26, 15(DX)(BX*8) // 62637d081454da0f4f or 6263fd081454da0f4f
+ VPMADDUBSW X21, X16, K2, X0 // 62b27d0204c5 or 62b2fd0204c5
+ VPMADDUBSW 15(R8)(R14*8), X16, K2, X0 // 62927d020484f00f000000 or 6292fd020484f00f000000
+ VPMADDUBSW -15(R14)(R15*2), X16, K2, X0 // 62927d0204847ef1ffffff or 6292fd0204847ef1ffffff
+ VPMADDUBSW Y3, Y31, K4, Y11 // 6272052404db or 6272852404db
+ VPMADDUBSW -17(BP)(SI*2), Y31, K4, Y11 // 62720524049c75efffffff or 62728524049c75efffffff
+ VPMADDUBSW 7(AX)(CX*2), Y31, K4, Y11 // 62720524049c4807000000 or 62728524049c4807000000
+ VPMADDUBSW Z6, Z22, K1, Z12 // 62724d4104e6 or 6272cd4104e6
+ VPMADDUBSW Z8, Z22, K1, Z12 // 62524d4104e0 or 6252cd4104e0
+ VPMADDUBSW 99(R15)(R15*1), Z22, K1, Z12 // 62124d4104a43f63000000 or 6212cd4104a43f63000000
+ VPMADDUBSW (DX), Z22, K1, Z12 // 62724d410422 or 6272cd410422
+ VPMADDUBSW Z6, Z11, K1, Z12 // 6272254904e6 or 6272a54904e6
+ VPMADDUBSW Z8, Z11, K1, Z12 // 6252254904e0 or 6252a54904e0
+ VPMADDUBSW 99(R15)(R15*1), Z11, K1, Z12 // 6212254904a43f63000000 or 6212a54904a43f63000000
+ VPMADDUBSW (DX), Z11, K1, Z12 // 627225490422 or 6272a5490422
+ VPMADDUBSW Z6, Z22, K1, Z27 // 62624d4104de or 6262cd4104de
+ VPMADDUBSW Z8, Z22, K1, Z27 // 62424d4104d8 or 6242cd4104d8
+ VPMADDUBSW 99(R15)(R15*1), Z22, K1, Z27 // 62024d41049c3f63000000 or 6202cd41049c3f63000000
+ VPMADDUBSW (DX), Z22, K1, Z27 // 62624d41041a or 6262cd41041a
+ VPMADDUBSW Z6, Z11, K1, Z27 // 6262254904de or 6262a54904de
+ VPMADDUBSW Z8, Z11, K1, Z27 // 6242254904d8 or 6242a54904d8
+ VPMADDUBSW 99(R15)(R15*1), Z11, K1, Z27 // 62022549049c3f63000000 or 6202a549049c3f63000000
+ VPMADDUBSW (DX), Z11, K1, Z27 // 62622549041a or 6262a549041a
+ VPMADDWD X22, X28, K3, X0 // 62b11d03f5c6 or 62b19d03f5c6
+ VPMADDWD -15(R14)(R15*1), X28, K3, X0 // 62911d03f5843ef1ffffff or 62919d03f5843ef1ffffff
+ VPMADDWD -15(BX), X28, K3, X0 // 62f11d03f583f1ffffff or 62f19d03f583f1ffffff
+ VPMADDWD Y13, Y2, K4, Y14 // 62516d2cf5f5 or 6251ed2cf5f5
+ VPMADDWD 15(R8)(R14*1), Y2, K4, Y14 // 62116d2cf5b4300f000000 or 6211ed2cf5b4300f000000
+ VPMADDWD 15(R8)(R14*2), Y2, K4, Y14 // 62116d2cf5b4700f000000 or 6211ed2cf5b4700f000000
+ VPMADDWD Z9, Z12, K5, Z25 // 62411d4df5c9 or 62419d4df5c9
+ VPMADDWD Z12, Z12, K5, Z25 // 62411d4df5cc or 62419d4df5cc
+ VPMADDWD -17(BP)(SI*8), Z12, K5, Z25 // 62611d4df58cf5efffffff or 62619d4df58cf5efffffff
+ VPMADDWD (R15), Z12, K5, Z25 // 62411d4df50f or 62419d4df50f
+ VPMADDWD Z9, Z17, K5, Z25 // 62417545f5c9 or 6241f545f5c9
+ VPMADDWD Z12, Z17, K5, Z25 // 62417545f5cc or 6241f545f5cc
+ VPMADDWD -17(BP)(SI*8), Z17, K5, Z25 // 62617545f58cf5efffffff or 6261f545f58cf5efffffff
+ VPMADDWD (R15), Z17, K5, Z25 // 62417545f50f or 6241f545f50f
+ VPMADDWD Z9, Z12, K5, Z12 // 62511d4df5e1 or 62519d4df5e1
+ VPMADDWD Z12, Z12, K5, Z12 // 62511d4df5e4 or 62519d4df5e4
+ VPMADDWD -17(BP)(SI*8), Z12, K5, Z12 // 62711d4df5a4f5efffffff or 62719d4df5a4f5efffffff
+ VPMADDWD (R15), Z12, K5, Z12 // 62511d4df527 or 62519d4df527
+ VPMADDWD Z9, Z17, K5, Z12 // 62517545f5e1 or 6251f545f5e1
+ VPMADDWD Z12, Z17, K5, Z12 // 62517545f5e4 or 6251f545f5e4
+ VPMADDWD -17(BP)(SI*8), Z17, K5, Z12 // 62717545f5a4f5efffffff or 6271f545f5a4f5efffffff
+ VPMADDWD (R15), Z17, K5, Z12 // 62517545f527 or 6251f545f527
+ VPMAXSB X7, X19, K7, X7 // 62f265073cff or 62f2e5073cff
+ VPMAXSB 7(AX)(CX*4), X19, K7, X7 // 62f265073cbc8807000000 or 62f2e5073cbc8807000000
+ VPMAXSB 7(AX)(CX*1), X19, K7, X7 // 62f265073cbc0807000000 or 62f2e5073cbc0807000000
+ VPMAXSB Y22, Y15, K7, Y27 // 6222052f3cde or 6222852f3cde
+ VPMAXSB (R14), Y15, K7, Y27 // 6242052f3c1e or 6242852f3c1e
+ VPMAXSB -7(DI)(R8*8), Y15, K7, Y27 // 6222052f3c9cc7f9ffffff or 6222852f3c9cc7f9ffffff
+ VPMAXSB Z8, Z3, K6, Z6 // 62d2654e3cf0 or 62d2e54e3cf0
+ VPMAXSB Z2, Z3, K6, Z6 // 62f2654e3cf2 or 62f2e54e3cf2
+ VPMAXSB 7(SI)(DI*8), Z3, K6, Z6 // 62f2654e3cb4fe07000000 or 62f2e54e3cb4fe07000000
+ VPMAXSB -15(R14), Z3, K6, Z6 // 62d2654e3cb6f1ffffff or 62d2e54e3cb6f1ffffff
+ VPMAXSB Z8, Z21, K6, Z6 // 62d255463cf0 or 62d2d5463cf0
+ VPMAXSB Z2, Z21, K6, Z6 // 62f255463cf2 or 62f2d5463cf2
+ VPMAXSB 7(SI)(DI*8), Z21, K6, Z6 // 62f255463cb4fe07000000 or 62f2d5463cb4fe07000000
+ VPMAXSB -15(R14), Z21, K6, Z6 // 62d255463cb6f1ffffff or 62d2d5463cb6f1ffffff
+ VPMAXSB Z8, Z3, K6, Z25 // 6242654e3cc8 or 6242e54e3cc8
+ VPMAXSB Z2, Z3, K6, Z25 // 6262654e3cca or 6262e54e3cca
+ VPMAXSB 7(SI)(DI*8), Z3, K6, Z25 // 6262654e3c8cfe07000000 or 6262e54e3c8cfe07000000
+ VPMAXSB -15(R14), Z3, K6, Z25 // 6242654e3c8ef1ffffff or 6242e54e3c8ef1ffffff
+ VPMAXSB Z8, Z21, K6, Z25 // 624255463cc8 or 6242d5463cc8
+ VPMAXSB Z2, Z21, K6, Z25 // 626255463cca or 6262d5463cca
+ VPMAXSB 7(SI)(DI*8), Z21, K6, Z25 // 626255463c8cfe07000000 or 6262d5463c8cfe07000000
+ VPMAXSB -15(R14), Z21, K6, Z25 // 624255463c8ef1ffffff or 6242d5463c8ef1ffffff
+ VPMAXSW X12, X0, K5, X12 // 62517d0deee4 or 6251fd0deee4
+ VPMAXSW 7(SI)(DI*4), X0, K5, X12 // 62717d0deea4be07000000 or 6271fd0deea4be07000000
+ VPMAXSW -7(DI)(R8*2), X0, K5, X12 // 62317d0deea447f9ffffff or 6231fd0deea447f9ffffff
+ VPMAXSW Y14, Y19, K3, Y23 // 62c16523eefe or 62c1e523eefe
+ VPMAXSW 99(R15)(R15*2), Y19, K3, Y23 // 62816523eebc7f63000000 or 6281e523eebc7f63000000
+ VPMAXSW -7(DI), Y19, K3, Y23 // 62e16523eebff9ffffff or 62e1e523eebff9ffffff
+ VPMAXSW Z18, Z11, K4, Z12 // 6231254ceee2 or 6231a54ceee2
+ VPMAXSW Z24, Z11, K4, Z12 // 6211254ceee0 or 6211a54ceee0
+ VPMAXSW -7(CX), Z11, K4, Z12 // 6271254ceea1f9ffffff or 6271a54ceea1f9ffffff
+ VPMAXSW 15(DX)(BX*4), Z11, K4, Z12 // 6271254ceea49a0f000000 or 6271a54ceea49a0f000000
+ VPMAXSW Z18, Z5, K4, Z12 // 6231554ceee2 or 6231d54ceee2
+ VPMAXSW Z24, Z5, K4, Z12 // 6211554ceee0 or 6211d54ceee0
+ VPMAXSW -7(CX), Z5, K4, Z12 // 6271554ceea1f9ffffff or 6271d54ceea1f9ffffff
+ VPMAXSW 15(DX)(BX*4), Z5, K4, Z12 // 6271554ceea49a0f000000 or 6271d54ceea49a0f000000
+ VPMAXSW Z18, Z11, K4, Z22 // 62a1254ceef2 or 62a1a54ceef2
+ VPMAXSW Z24, Z11, K4, Z22 // 6281254ceef0 or 6281a54ceef0
+ VPMAXSW -7(CX), Z11, K4, Z22 // 62e1254ceeb1f9ffffff or 62e1a54ceeb1f9ffffff
+ VPMAXSW 15(DX)(BX*4), Z11, K4, Z22 // 62e1254ceeb49a0f000000 or 62e1a54ceeb49a0f000000
+ VPMAXSW Z18, Z5, K4, Z22 // 62a1554ceef2 or 62a1d54ceef2
+ VPMAXSW Z24, Z5, K4, Z22 // 6281554ceef0 or 6281d54ceef0
+ VPMAXSW -7(CX), Z5, K4, Z22 // 62e1554ceeb1f9ffffff or 62e1d54ceeb1f9ffffff
+ VPMAXSW 15(DX)(BX*4), Z5, K4, Z22 // 62e1554ceeb49a0f000000 or 62e1d54ceeb49a0f000000
+ VPMAXUB X17, X5, K2, X14 // 6231550adef1 or 6231d50adef1
+ VPMAXUB 17(SP), X5, K2, X14 // 6271550adeb42411000000 or 6271d50adeb42411000000
+ VPMAXUB -17(BP)(SI*4), X5, K2, X14 // 6271550adeb4b5efffffff or 6271d50adeb4b5efffffff
+ VPMAXUB Y16, Y5, K2, Y21 // 62a1552adee8 or 62a1d52adee8
+ VPMAXUB -7(CX)(DX*1), Y5, K2, Y21 // 62e1552adeac11f9ffffff or 62e1d52adeac11f9ffffff
+ VPMAXUB -15(R14)(R15*4), Y5, K2, Y21 // 6281552adeacbef1ffffff or 6281d52adeacbef1ffffff
+ VPMAXUB Z6, Z7, K3, Z2 // 62f1454bded6 or 62f1c54bded6
+ VPMAXUB Z16, Z7, K3, Z2 // 62b1454bded0 or 62b1c54bded0
+ VPMAXUB 99(R15)(R15*8), Z7, K3, Z2 // 6291454bde94ff63000000 or 6291c54bde94ff63000000
+ VPMAXUB 7(AX)(CX*8), Z7, K3, Z2 // 62f1454bde94c807000000 or 62f1c54bde94c807000000
+ VPMAXUB Z6, Z13, K3, Z2 // 62f1154bded6 or 62f1954bded6
+ VPMAXUB Z16, Z13, K3, Z2 // 62b1154bded0 or 62b1954bded0
+ VPMAXUB 99(R15)(R15*8), Z13, K3, Z2 // 6291154bde94ff63000000 or 6291954bde94ff63000000
+ VPMAXUB 7(AX)(CX*8), Z13, K3, Z2 // 62f1154bde94c807000000 or 62f1954bde94c807000000
+ VPMAXUB Z6, Z7, K3, Z21 // 62e1454bdeee or 62e1c54bdeee
+ VPMAXUB Z16, Z7, K3, Z21 // 62a1454bdee8 or 62a1c54bdee8
+ VPMAXUB 99(R15)(R15*8), Z7, K3, Z21 // 6281454bdeacff63000000 or 6281c54bdeacff63000000
+ VPMAXUB 7(AX)(CX*8), Z7, K3, Z21 // 62e1454bdeacc807000000 or 62e1c54bdeacc807000000
+ VPMAXUB Z6, Z13, K3, Z21 // 62e1154bdeee or 62e1954bdeee
+ VPMAXUB Z16, Z13, K3, Z21 // 62a1154bdee8 or 62a1954bdee8
+ VPMAXUB 99(R15)(R15*8), Z13, K3, Z21 // 6281154bdeacff63000000 or 6281954bdeacff63000000
+ VPMAXUB 7(AX)(CX*8), Z13, K3, Z21 // 62e1154bdeacc807000000 or 62e1954bdeacc807000000
+ VPMAXUW X9, X24, K7, X28 // 62423d073ee1 or 6242bd073ee1
+ VPMAXUW -17(BP)(SI*8), X24, K7, X28 // 62623d073ea4f5efffffff or 6262bd073ea4f5efffffff
+ VPMAXUW (R15), X24, K7, X28 // 62423d073e27 or 6242bd073e27
+ VPMAXUW Y7, Y19, K1, Y11 // 627265213edf or 6272e5213edf
+ VPMAXUW 17(SP)(BP*2), Y19, K1, Y11 // 627265213e9c6c11000000 or 6272e5213e9c6c11000000
+ VPMAXUW -7(DI)(R8*4), Y19, K1, Y11 // 623265213e9c87f9ffffff or 6232e5213e9c87f9ffffff
+ VPMAXUW Z12, Z1, K1, Z20 // 62c275493ee4 or 62c2f5493ee4
+ VPMAXUW Z16, Z1, K1, Z20 // 62a275493ee0 or 62a2f5493ee0
+ VPMAXUW 15(R8)(R14*4), Z1, K1, Z20 // 628275493ea4b00f000000 or 6282f5493ea4b00f000000
+ VPMAXUW -7(CX)(DX*4), Z1, K1, Z20 // 62e275493ea491f9ffffff or 62e2f5493ea491f9ffffff
+ VPMAXUW Z12, Z3, K1, Z20 // 62c265493ee4 or 62c2e5493ee4
+ VPMAXUW Z16, Z3, K1, Z20 // 62a265493ee0 or 62a2e5493ee0
+ VPMAXUW 15(R8)(R14*4), Z3, K1, Z20 // 628265493ea4b00f000000 or 6282e5493ea4b00f000000
+ VPMAXUW -7(CX)(DX*4), Z3, K1, Z20 // 62e265493ea491f9ffffff or 62e2e5493ea491f9ffffff
+ VPMAXUW Z12, Z1, K1, Z9 // 625275493ecc or 6252f5493ecc
+ VPMAXUW Z16, Z1, K1, Z9 // 623275493ec8 or 6232f5493ec8
+ VPMAXUW 15(R8)(R14*4), Z1, K1, Z9 // 621275493e8cb00f000000 or 6212f5493e8cb00f000000
+ VPMAXUW -7(CX)(DX*4), Z1, K1, Z9 // 627275493e8c91f9ffffff or 6272f5493e8c91f9ffffff
+ VPMAXUW Z12, Z3, K1, Z9 // 625265493ecc or 6252e5493ecc
+ VPMAXUW Z16, Z3, K1, Z9 // 623265493ec8 or 6232e5493ec8
+ VPMAXUW 15(R8)(R14*4), Z3, K1, Z9 // 621265493e8cb00f000000 or 6212e5493e8cb00f000000
+ VPMAXUW -7(CX)(DX*4), Z3, K1, Z9 // 627265493e8c91f9ffffff or 6272e5493e8c91f9ffffff
+ VPMINSB X18, X26, K1, X15 // 62322d0138fa or 6232ad0138fa
+ VPMINSB 7(SI)(DI*8), X26, K1, X15 // 62722d0138bcfe07000000 or 6272ad0138bcfe07000000
+ VPMINSB -15(R14), X26, K1, X15 // 62522d0138bef1ffffff or 6252ad0138bef1ffffff
+ VPMINSB Y3, Y0, K7, Y6 // 62f27d2f38f3 or 62f2fd2f38f3
+ VPMINSB 15(R8), Y0, K7, Y6 // 62d27d2f38b00f000000 or 62d2fd2f38b00f000000
+ VPMINSB (BP), Y0, K7, Y6 // 62f27d2f387500 or 62f2fd2f387500
+ VPMINSB Z3, Z14, K2, Z28 // 62620d4a38e3 or 62628d4a38e3
+ VPMINSB Z12, Z14, K2, Z28 // 62420d4a38e4 or 62428d4a38e4
+ VPMINSB (R8), Z14, K2, Z28 // 62420d4a3820 or 62428d4a3820
+ VPMINSB 15(DX)(BX*2), Z14, K2, Z28 // 62620d4a38a45a0f000000 or 62628d4a38a45a0f000000
+ VPMINSB Z3, Z28, K2, Z28 // 62621d4238e3 or 62629d4238e3
+ VPMINSB Z12, Z28, K2, Z28 // 62421d4238e4 or 62429d4238e4
+ VPMINSB (R8), Z28, K2, Z28 // 62421d423820 or 62429d423820
+ VPMINSB 15(DX)(BX*2), Z28, K2, Z28 // 62621d4238a45a0f000000 or 62629d4238a45a0f000000
+ VPMINSB Z3, Z14, K2, Z13 // 62720d4a38eb or 62728d4a38eb
+ VPMINSB Z12, Z14, K2, Z13 // 62520d4a38ec or 62528d4a38ec
+ VPMINSB (R8), Z14, K2, Z13 // 62520d4a3828 or 62528d4a3828
+ VPMINSB 15(DX)(BX*2), Z14, K2, Z13 // 62720d4a38ac5a0f000000 or 62728d4a38ac5a0f000000
+ VPMINSB Z3, Z28, K2, Z13 // 62721d4238eb or 62729d4238eb
+ VPMINSB Z12, Z28, K2, Z13 // 62521d4238ec or 62529d4238ec
+ VPMINSB (R8), Z28, K2, Z13 // 62521d423828 or 62529d423828
+ VPMINSB 15(DX)(BX*2), Z28, K2, Z13 // 62721d4238ac5a0f000000 or 62729d4238ac5a0f000000
+ VPMINSW X24, X0, K7, X0 // 62917d0feac0 or 6291fd0feac0
+ VPMINSW -7(CX), X0, K7, X0 // 62f17d0fea81f9ffffff or 62f1fd0fea81f9ffffff
+ VPMINSW 15(DX)(BX*4), X0, K7, X0 // 62f17d0fea849a0f000000 or 62f1fd0fea849a0f000000
+ VPMINSW Y22, Y0, K6, Y7 // 62b17d2eeafe or 62b1fd2eeafe
+ VPMINSW 7(AX)(CX*4), Y0, K6, Y7 // 62f17d2eeabc8807000000 or 62f1fd2eeabc8807000000
+ VPMINSW 7(AX)(CX*1), Y0, K6, Y7 // 62f17d2eeabc0807000000 or 62f1fd2eeabc0807000000
+ VPMINSW Z23, Z20, K3, Z16 // 62a15d43eac7 or 62a1dd43eac7
+ VPMINSW Z19, Z20, K3, Z16 // 62a15d43eac3 or 62a1dd43eac3
+ VPMINSW 15(R8)(R14*1), Z20, K3, Z16 // 62815d43ea84300f000000 or 6281dd43ea84300f000000
+ VPMINSW 15(R8)(R14*2), Z20, K3, Z16 // 62815d43ea84700f000000 or 6281dd43ea84700f000000
+ VPMINSW Z23, Z0, K3, Z16 // 62a17d4beac7 or 62a1fd4beac7
+ VPMINSW Z19, Z0, K3, Z16 // 62a17d4beac3 or 62a1fd4beac3
+ VPMINSW 15(R8)(R14*1), Z0, K3, Z16 // 62817d4bea84300f000000 or 6281fd4bea84300f000000
+ VPMINSW 15(R8)(R14*2), Z0, K3, Z16 // 62817d4bea84700f000000 or 6281fd4bea84700f000000
+ VPMINSW Z23, Z20, K3, Z9 // 62315d43eacf or 6231dd43eacf
+ VPMINSW Z19, Z20, K3, Z9 // 62315d43eacb or 6231dd43eacb
+ VPMINSW 15(R8)(R14*1), Z20, K3, Z9 // 62115d43ea8c300f000000 or 6211dd43ea8c300f000000
+ VPMINSW 15(R8)(R14*2), Z20, K3, Z9 // 62115d43ea8c700f000000 or 6211dd43ea8c700f000000
+ VPMINSW Z23, Z0, K3, Z9 // 62317d4beacf or 6231fd4beacf
+ VPMINSW Z19, Z0, K3, Z9 // 62317d4beacb or 6231fd4beacb
+ VPMINSW 15(R8)(R14*1), Z0, K3, Z9 // 62117d4bea8c300f000000 or 6211fd4bea8c300f000000
+ VPMINSW 15(R8)(R14*2), Z0, K3, Z9 // 62117d4bea8c700f000000 or 6211fd4bea8c700f000000
+ VPMINUB X9, X7, K7, X20 // 62c1450fdae1 or 62c1c50fdae1
+ VPMINUB 99(R15)(R15*8), X7, K7, X20 // 6281450fdaa4ff63000000 or 6281c50fdaa4ff63000000
+ VPMINUB 7(AX)(CX*8), X7, K7, X20 // 62e1450fdaa4c807000000 or 62e1c50fdaa4c807000000
+ VPMINUB Y1, Y12, K4, Y13 // 62711d2cdae9 or 62719d2cdae9
+ VPMINUB (SI), Y12, K4, Y13 // 62711d2cda2e or 62719d2cda2e
+ VPMINUB 7(SI)(DI*2), Y12, K4, Y13 // 62711d2cdaac7e07000000 or 62719d2cdaac7e07000000
+ VPMINUB Z24, Z0, K4, Z0 // 62917d4cdac0 or 6291fd4cdac0
+ VPMINUB Z12, Z0, K4, Z0 // 62d17d4cdac4 or 62d1fd4cdac4
+ VPMINUB (R14), Z0, K4, Z0 // 62d17d4cda06 or 62d1fd4cda06
+ VPMINUB -7(DI)(R8*8), Z0, K4, Z0 // 62b17d4cda84c7f9ffffff or 62b1fd4cda84c7f9ffffff
+ VPMINUB Z24, Z25, K4, Z0 // 62913544dac0 or 6291b544dac0
+ VPMINUB Z12, Z25, K4, Z0 // 62d13544dac4 or 62d1b544dac4
+ VPMINUB (R14), Z25, K4, Z0 // 62d13544da06 or 62d1b544da06
+ VPMINUB -7(DI)(R8*8), Z25, K4, Z0 // 62b13544da84c7f9ffffff or 62b1b544da84c7f9ffffff
+ VPMINUB Z24, Z0, K4, Z11 // 62117d4cdad8 or 6211fd4cdad8
+ VPMINUB Z12, Z0, K4, Z11 // 62517d4cdadc or 6251fd4cdadc
+ VPMINUB (R14), Z0, K4, Z11 // 62517d4cda1e or 6251fd4cda1e
+ VPMINUB -7(DI)(R8*8), Z0, K4, Z11 // 62317d4cda9cc7f9ffffff or 6231fd4cda9cc7f9ffffff
+ VPMINUB Z24, Z25, K4, Z11 // 62113544dad8 or 6211b544dad8
+ VPMINUB Z12, Z25, K4, Z11 // 62513544dadc or 6251b544dadc
+ VPMINUB (R14), Z25, K4, Z11 // 62513544da1e or 6251b544da1e
+ VPMINUB -7(DI)(R8*8), Z25, K4, Z11 // 62313544da9cc7f9ffffff or 6231b544da9cc7f9ffffff
+ VPMINUW X13, X11, K2, X1 // 62d2250a3acd or 62d2a50a3acd
+ VPMINUW 15(R8)(R14*4), X11, K2, X1 // 6292250a3a8cb00f000000 or 6292a50a3a8cb00f000000
+ VPMINUW -7(CX)(DX*4), X11, K2, X1 // 62f2250a3a8c91f9ffffff or 62f2a50a3a8c91f9ffffff
+ VPMINUW Y13, Y28, K3, Y1 // 62d21d233acd or 62d29d233acd
+ VPMINUW 17(SP), Y28, K3, Y1 // 62f21d233a8c2411000000 or 62f29d233a8c2411000000
+ VPMINUW -17(BP)(SI*4), Y28, K3, Y1 // 62f21d233a8cb5efffffff or 62f29d233a8cb5efffffff
+ VPMINUW Z21, Z31, K3, Z17 // 62a205433acd or 62a285433acd
+ VPMINUW Z9, Z31, K3, Z17 // 62c205433ac9 or 62c285433ac9
+ VPMINUW 99(R15)(R15*2), Z31, K3, Z17 // 628205433a8c7f63000000 or 628285433a8c7f63000000
+ VPMINUW -7(DI), Z31, K3, Z17 // 62e205433a8ff9ffffff or 62e285433a8ff9ffffff
+ VPMINUW Z21, Z0, K3, Z17 // 62a27d4b3acd or 62a2fd4b3acd
+ VPMINUW Z9, Z0, K3, Z17 // 62c27d4b3ac9 or 62c2fd4b3ac9
+ VPMINUW 99(R15)(R15*2), Z0, K3, Z17 // 62827d4b3a8c7f63000000 or 6282fd4b3a8c7f63000000
+ VPMINUW -7(DI), Z0, K3, Z17 // 62e27d4b3a8ff9ffffff or 62e2fd4b3a8ff9ffffff
+ VPMINUW Z21, Z31, K3, Z23 // 62a205433afd or 62a285433afd
+ VPMINUW Z9, Z31, K3, Z23 // 62c205433af9 or 62c285433af9
+ VPMINUW 99(R15)(R15*2), Z31, K3, Z23 // 628205433abc7f63000000 or 628285433abc7f63000000
+ VPMINUW -7(DI), Z31, K3, Z23 // 62e205433abff9ffffff or 62e285433abff9ffffff
+ VPMINUW Z21, Z0, K3, Z23 // 62a27d4b3afd or 62a2fd4b3afd
+ VPMINUW Z9, Z0, K3, Z23 // 62c27d4b3af9 or 62c2fd4b3af9
+ VPMINUW 99(R15)(R15*2), Z0, K3, Z23 // 62827d4b3abc7f63000000 or 6282fd4b3abc7f63000000
+ VPMINUW -7(DI), Z0, K3, Z23 // 62e27d4b3abff9ffffff or 62e2fd4b3abff9ffffff
+ VPMOVB2M X0, K5 // 62f27e0829e8
+ VPMOVB2M X0, K4 // 62f27e0829e0
+ VPMOVB2M Y7, K4 // 62f27e2829e7
+ VPMOVB2M Y7, K6 // 62f27e2829f7
+ VPMOVB2M Z6, K1 // 62f27e4829ce
+ VPMOVB2M Z9, K1 // 62d27e4829c9
+ VPMOVB2M Z6, K3 // 62f27e4829de
+ VPMOVB2M Z9, K3 // 62d27e4829d9
+ VPMOVM2B K4, X26 // 62627e0828d4
+ VPMOVM2B K5, X26 // 62627e0828d5
+ VPMOVM2B K2, Y1 // 62f27e2828ca
+ VPMOVM2B K7, Y1 // 62f27e2828cf
+ VPMOVM2B K0, Z26 // 62627e4828d0
+ VPMOVM2B K5, Z26 // 62627e4828d5
+ VPMOVM2B K0, Z22 // 62e27e4828f0
+ VPMOVM2B K5, Z22 // 62e27e4828f5
+ VPMOVM2W K0, X16 // 62e2fe0828c0
+ VPMOVM2W K7, X16 // 62e2fe0828c7
+ VPMOVM2W K5, Y2 // 62f2fe2828d5
+ VPMOVM2W K4, Y2 // 62f2fe2828d4
+ VPMOVM2W K4, Z14 // 6272fe4828f4
+ VPMOVM2W K6, Z14 // 6272fe4828f6
+ VPMOVM2W K4, Z13 // 6272fe4828ec
+ VPMOVM2W K6, Z13 // 6272fe4828ee
+ VPMOVSWB X18, K3, X0 // 62e27e0b20d0
+ VPMOVSWB X18, K3, -7(CX) // 62e27e0b2091f9ffffff
+ VPMOVSWB X18, K3, 15(DX)(BX*4) // 62e27e0b20949a0f000000
+ VPMOVSWB Y6, K3, X8 // 62d27e2b20f0
+ VPMOVSWB Y6, K3, -7(CX)(DX*1) // 62f27e2b20b411f9ffffff
+ VPMOVSWB Y6, K3, -15(R14)(R15*4) // 62927e2b20b4bef1ffffff
+ VPMOVSWB Z22, K3, Y21 // 62a27e4b20f5
+ VPMOVSWB Z25, K3, Y21 // 62227e4b20cd
+ VPMOVSWB Z22, K3, 7(SI)(DI*1) // 62e27e4b20b43e07000000
+ VPMOVSWB Z25, K3, 7(SI)(DI*1) // 62627e4b208c3e07000000
+ VPMOVSWB Z22, K3, 15(DX)(BX*8) // 62e27e4b20b4da0f000000
+ VPMOVSWB Z25, K3, 15(DX)(BX*8) // 62627e4b208cda0f000000
+ VPMOVSXBW X13, K1, Y28 // 62427d2920e5 or 6242fd2920e5
+ VPMOVSXBW -17(BP), K1, Y28 // 62627d2920a5efffffff or 6262fd2920a5efffffff
+ VPMOVSXBW -15(R14)(R15*8), K1, Y28 // 62027d2920a4fef1ffffff or 6202fd2920a4fef1ffffff
+ VPMOVSXBW X24, K1, X8 // 62127d0920c0 or 6212fd0920c0
+ VPMOVSXBW (BX), K1, X8 // 62727d092003 or 6272fd092003
+ VPMOVSXBW -17(BP)(SI*1), K1, X8 // 62727d09208435efffffff or 6272fd09208435efffffff
+ VPMOVSXBW Y20, K7, Z0 // 62b27d4f20c4 or 62b2fd4f20c4
+ VPMOVSXBW -7(DI)(R8*1), K7, Z0 // 62b27d4f208407f9ffffff or 62b2fd4f208407f9ffffff
+ VPMOVSXBW (SP), K7, Z0 // 62f27d4f200424 or 62f2fd4f200424
+ VPMOVSXBW Y20, K7, Z8 // 62327d4f20c4 or 6232fd4f20c4
+ VPMOVSXBW -7(DI)(R8*1), K7, Z8 // 62327d4f208407f9ffffff or 6232fd4f208407f9ffffff
+ VPMOVSXBW (SP), K7, Z8 // 62727d4f200424 or 6272fd4f200424
+ VPMOVUSWB X6, K1, X6 // 62f27e0910f6
+ VPMOVUSWB X6, K1, 99(R15)(R15*2) // 62927e0910b47f63000000
+ VPMOVUSWB X6, K1, -7(DI) // 62f27e0910b7f9ffffff
+ VPMOVUSWB Y15, K2, X22 // 62327e2a10fe
+ VPMOVUSWB Y15, K2, 7(SI)(DI*4) // 62727e2a10bcbe07000000
+ VPMOVUSWB Y15, K2, -7(DI)(R8*2) // 62327e2a10bc47f9ffffff
+ VPMOVUSWB Z28, K1, Y1 // 62627e4910e1
+ VPMOVUSWB Z6, K1, Y1 // 62f27e4910f1
+ VPMOVUSWB Z28, K1, 15(R8)(R14*4) // 62027e4910a4b00f000000
+ VPMOVUSWB Z6, K1, 15(R8)(R14*4) // 62927e4910b4b00f000000
+ VPMOVUSWB Z28, K1, -7(CX)(DX*4) // 62627e4910a491f9ffffff
+ VPMOVUSWB Z6, K1, -7(CX)(DX*4) // 62f27e4910b491f9ffffff
+ VPMOVW2M X12, K4 // 62d2fe0829e4
+ VPMOVW2M X12, K6 // 62d2fe0829f4
+ VPMOVW2M Y27, K4 // 6292fe2829e3
+ VPMOVW2M Y27, K5 // 6292fe2829eb
+ VPMOVW2M Z13, K2 // 62d2fe4829d5
+ VPMOVW2M Z21, K2 // 62b2fe4829d5
+ VPMOVW2M Z13, K7 // 62d2fe4829fd
+ VPMOVW2M Z21, K7 // 62b2fe4829fd
+ VPMOVWB X28, K7, X16 // 62227e0f30e0
+ VPMOVWB X28, K7, -7(CX)(DX*1) // 62627e0f30a411f9ffffff
+ VPMOVWB X28, K7, -15(R14)(R15*4) // 62027e0f30a4bef1ffffff
+ VPMOVWB Y19, K1, X8 // 62c27e2930d8
+ VPMOVWB Y19, K1, 17(SP) // 62e27e29309c2411000000
+ VPMOVWB Y19, K1, -17(BP)(SI*4) // 62e27e29309cb5efffffff
+ VPMOVWB Z26, K1, Y5 // 62627e4930d5
+ VPMOVWB Z3, K1, Y5 // 62f27e4930dd
+ VPMOVWB Z26, K1, (R8) // 62427e493010
+ VPMOVWB Z3, K1, (R8) // 62d27e493018
+ VPMOVWB Z26, K1, 15(DX)(BX*2) // 62627e4930945a0f000000
+ VPMOVWB Z3, K1, 15(DX)(BX*2) // 62f27e49309c5a0f000000
+ VPMOVZXBW X0, K4, Y21 // 62e27d2c30e8 or 62e2fd2c30e8
+ VPMOVZXBW 99(R15)(R15*1), K4, Y21 // 62827d2c30ac3f63000000 or 6282fd2c30ac3f63000000
+ VPMOVZXBW (DX), K4, Y21 // 62e27d2c302a or 62e2fd2c302a
+ VPMOVZXBW X11, K5, X25 // 62427d0d30cb or 6242fd0d30cb
+ VPMOVZXBW 17(SP)(BP*2), K5, X25 // 62627d0d308c6c11000000 or 6262fd0d308c6c11000000
+ VPMOVZXBW -7(DI)(R8*4), K5, X25 // 62227d0d308c87f9ffffff or 6222fd0d308c87f9ffffff
+ VPMOVZXBW Y7, K7, Z11 // 62727d4f30df or 6272fd4f30df
+ VPMOVZXBW 17(SP)(BP*1), K7, Z11 // 62727d4f309c2c11000000 or 6272fd4f309c2c11000000
+ VPMOVZXBW -7(CX)(DX*8), K7, Z11 // 62727d4f309cd1f9ffffff or 6272fd4f309cd1f9ffffff
+ VPMOVZXBW Y7, K7, Z25 // 62627d4f30cf or 6262fd4f30cf
+ VPMOVZXBW 17(SP)(BP*1), K7, Z25 // 62627d4f308c2c11000000 or 6262fd4f308c2c11000000
+ VPMOVZXBW -7(CX)(DX*8), K7, Z25 // 62627d4f308cd1f9ffffff or 6262fd4f308cd1f9ffffff
+ VPMULHRSW X30, X15, K2, X11 // 6212050a0bde or 6212850a0bde
+ VPMULHRSW -7(CX), X15, K2, X11 // 6272050a0b99f9ffffff or 6272850a0b99f9ffffff
+ VPMULHRSW 15(DX)(BX*4), X15, K2, X11 // 6272050a0b9c9a0f000000 or 6272850a0b9c9a0f000000
+ VPMULHRSW Y16, Y21, K3, Y24 // 622255230bc0 or 6222d5230bc0
+ VPMULHRSW 99(R15)(R15*4), Y21, K3, Y24 // 620255230b84bf63000000 or 6202d5230b84bf63000000
+ VPMULHRSW 15(DX), Y21, K3, Y24 // 626255230b820f000000 or 6262d5230b820f000000
+ VPMULHRSW Z22, Z12, K3, Z16 // 62a21d4b0bc6 or 62a29d4b0bc6
+ VPMULHRSW Z11, Z12, K3, Z16 // 62c21d4b0bc3 or 62c29d4b0bc3
+ VPMULHRSW 15(DX)(BX*1), Z12, K3, Z16 // 62e21d4b0b841a0f000000 or 62e29d4b0b841a0f000000
+ VPMULHRSW -7(CX)(DX*2), Z12, K3, Z16 // 62e21d4b0b8451f9ffffff or 62e29d4b0b8451f9ffffff
+ VPMULHRSW Z22, Z27, K3, Z16 // 62a225430bc6 or 62a2a5430bc6
+ VPMULHRSW Z11, Z27, K3, Z16 // 62c225430bc3 or 62c2a5430bc3
+ VPMULHRSW 15(DX)(BX*1), Z27, K3, Z16 // 62e225430b841a0f000000 or 62e2a5430b841a0f000000
+ VPMULHRSW -7(CX)(DX*2), Z27, K3, Z16 // 62e225430b8451f9ffffff or 62e2a5430b8451f9ffffff
+ VPMULHRSW Z22, Z12, K3, Z13 // 62321d4b0bee or 62329d4b0bee
+ VPMULHRSW Z11, Z12, K3, Z13 // 62521d4b0beb or 62529d4b0beb
+ VPMULHRSW 15(DX)(BX*1), Z12, K3, Z13 // 62721d4b0bac1a0f000000 or 62729d4b0bac1a0f000000
+ VPMULHRSW -7(CX)(DX*2), Z12, K3, Z13 // 62721d4b0bac51f9ffffff or 62729d4b0bac51f9ffffff
+ VPMULHRSW Z22, Z27, K3, Z13 // 623225430bee or 6232a5430bee
+ VPMULHRSW Z11, Z27, K3, Z13 // 625225430beb or 6252a5430beb
+ VPMULHRSW 15(DX)(BX*1), Z27, K3, Z13 // 627225430bac1a0f000000 or 6272a5430bac1a0f000000
+ VPMULHRSW -7(CX)(DX*2), Z27, K3, Z13 // 627225430bac51f9ffffff or 6272a5430bac51f9ffffff
+ VPMULHUW X12, X6, K3, X13 // 62514d0be4ec or 6251cd0be4ec
+ VPMULHUW 99(R15)(R15*8), X6, K3, X13 // 62114d0be4acff63000000 or 6211cd0be4acff63000000
+ VPMULHUW 7(AX)(CX*8), X6, K3, X13 // 62714d0be4acc807000000 or 6271cd0be4acc807000000
+ VPMULHUW Y9, Y13, K2, Y9 // 6251152ae4c9 or 6251952ae4c9
+ VPMULHUW (CX), Y13, K2, Y9 // 6271152ae409 or 6271952ae409
+ VPMULHUW 99(R15), Y13, K2, Y9 // 6251152ae48f63000000 or 6251952ae48f63000000
+ VPMULHUW Z12, Z25, K1, Z6 // 62d13541e4f4 or 62d1b541e4f4
+ VPMULHUW Z17, Z25, K1, Z6 // 62b13541e4f1 or 62b1b541e4f1
+ VPMULHUW -17(BP), Z25, K1, Z6 // 62f13541e4b5efffffff or 62f1b541e4b5efffffff
+ VPMULHUW -15(R14)(R15*8), Z25, K1, Z6 // 62913541e4b4fef1ffffff or 6291b541e4b4fef1ffffff
+ VPMULHUW Z12, Z12, K1, Z6 // 62d11d49e4f4 or 62d19d49e4f4
+ VPMULHUW Z17, Z12, K1, Z6 // 62b11d49e4f1 or 62b19d49e4f1
+ VPMULHUW -17(BP), Z12, K1, Z6 // 62f11d49e4b5efffffff or 62f19d49e4b5efffffff
+ VPMULHUW -15(R14)(R15*8), Z12, K1, Z6 // 62911d49e4b4fef1ffffff or 62919d49e4b4fef1ffffff
+ VPMULHUW Z12, Z25, K1, Z8 // 62513541e4c4 or 6251b541e4c4
+ VPMULHUW Z17, Z25, K1, Z8 // 62313541e4c1 or 6231b541e4c1
+ VPMULHUW -17(BP), Z25, K1, Z8 // 62713541e485efffffff or 6271b541e485efffffff
+ VPMULHUW -15(R14)(R15*8), Z25, K1, Z8 // 62113541e484fef1ffffff or 6211b541e484fef1ffffff
+ VPMULHUW Z12, Z12, K1, Z8 // 62511d49e4c4 or 62519d49e4c4
+ VPMULHUW Z17, Z12, K1, Z8 // 62311d49e4c1 or 62319d49e4c1
+ VPMULHUW -17(BP), Z12, K1, Z8 // 62711d49e485efffffff or 62719d49e485efffffff
+ VPMULHUW -15(R14)(R15*8), Z12, K1, Z8 // 62111d49e484fef1ffffff or 62119d49e484fef1ffffff
+ VPMULHW X8, X30, K2, X23 // 62c10d02e5f8 or 62c18d02e5f8
+ VPMULHW (AX), X30, K2, X23 // 62e10d02e538 or 62e18d02e538
+ VPMULHW 7(SI), X30, K2, X23 // 62e10d02e5be07000000 or 62e18d02e5be07000000
+ VPMULHW Y7, Y3, K1, Y6 // 62f16529e5f7 or 62f1e529e5f7
+ VPMULHW 99(R15)(R15*2), Y3, K1, Y6 // 62916529e5b47f63000000 or 6291e529e5b47f63000000
+ VPMULHW -7(DI), Y3, K1, Y6 // 62f16529e5b7f9ffffff or 62f1e529e5b7f9ffffff
+ VPMULHW Z3, Z6, K7, Z9 // 62714d4fe5cb or 6271cd4fe5cb
+ VPMULHW Z21, Z6, K7, Z9 // 62314d4fe5cd or 6231cd4fe5cd
+ VPMULHW 17(SP)(BP*2), Z6, K7, Z9 // 62714d4fe58c6c11000000 or 6271cd4fe58c6c11000000
+ VPMULHW -7(DI)(R8*4), Z6, K7, Z9 // 62314d4fe58c87f9ffffff or 6231cd4fe58c87f9ffffff
+ VPMULHW Z3, Z25, K7, Z9 // 62713547e5cb or 6271b547e5cb
+ VPMULHW Z21, Z25, K7, Z9 // 62313547e5cd or 6231b547e5cd
+ VPMULHW 17(SP)(BP*2), Z25, K7, Z9 // 62713547e58c6c11000000 or 6271b547e58c6c11000000
+ VPMULHW -7(DI)(R8*4), Z25, K7, Z9 // 62313547e58c87f9ffffff or 6231b547e58c87f9ffffff
+ VPMULHW Z3, Z6, K7, Z12 // 62714d4fe5e3 or 6271cd4fe5e3
+ VPMULHW Z21, Z6, K7, Z12 // 62314d4fe5e5 or 6231cd4fe5e5
+ VPMULHW 17(SP)(BP*2), Z6, K7, Z12 // 62714d4fe5a46c11000000 or 6271cd4fe5a46c11000000
+ VPMULHW -7(DI)(R8*4), Z6, K7, Z12 // 62314d4fe5a487f9ffffff or 6231cd4fe5a487f9ffffff
+ VPMULHW Z3, Z25, K7, Z12 // 62713547e5e3 or 6271b547e5e3
+ VPMULHW Z21, Z25, K7, Z12 // 62313547e5e5 or 6231b547e5e5
+ VPMULHW 17(SP)(BP*2), Z25, K7, Z12 // 62713547e5a46c11000000 or 6271b547e5a46c11000000
+ VPMULHW -7(DI)(R8*4), Z25, K7, Z12 // 62313547e5a487f9ffffff or 6231b547e5a487f9ffffff
+ VPMULLW X7, X16, K1, X31 // 62617d01d5ff or 6261fd01d5ff
+ VPMULLW (R8), X16, K1, X31 // 62417d01d538 or 6241fd01d538
+ VPMULLW 15(DX)(BX*2), X16, K1, X31 // 62617d01d5bc5a0f000000 or 6261fd01d5bc5a0f000000
+ VPMULLW Y18, Y31, K3, Y18 // 62a10523d5d2 or 62a18523d5d2
+ VPMULLW -17(BP), Y31, K3, Y18 // 62e10523d595efffffff or 62e18523d595efffffff
+ VPMULLW -15(R14)(R15*8), Y31, K3, Y18 // 62810523d594fef1ffffff or 62818523d594fef1ffffff
+ VPMULLW Z11, Z12, K4, Z9 // 62511d4cd5cb or 62519d4cd5cb
+ VPMULLW Z5, Z12, K4, Z9 // 62711d4cd5cd or 62719d4cd5cd
+ VPMULLW -15(R14)(R15*1), Z12, K4, Z9 // 62111d4cd58c3ef1ffffff or 62119d4cd58c3ef1ffffff
+ VPMULLW -15(BX), Z12, K4, Z9 // 62711d4cd58bf1ffffff or 62719d4cd58bf1ffffff
+ VPMULLW Z11, Z22, K4, Z9 // 62514d44d5cb or 6251cd44d5cb
+ VPMULLW Z5, Z22, K4, Z9 // 62714d44d5cd or 6271cd44d5cd
+ VPMULLW -15(R14)(R15*1), Z22, K4, Z9 // 62114d44d58c3ef1ffffff or 6211cd44d58c3ef1ffffff
+ VPMULLW -15(BX), Z22, K4, Z9 // 62714d44d58bf1ffffff or 6271cd44d58bf1ffffff
+ VPMULLW Z11, Z12, K4, Z19 // 62c11d4cd5db or 62c19d4cd5db
+ VPMULLW Z5, Z12, K4, Z19 // 62e11d4cd5dd or 62e19d4cd5dd
+ VPMULLW -15(R14)(R15*1), Z12, K4, Z19 // 62811d4cd59c3ef1ffffff or 62819d4cd59c3ef1ffffff
+ VPMULLW -15(BX), Z12, K4, Z19 // 62e11d4cd59bf1ffffff or 62e19d4cd59bf1ffffff
+ VPMULLW Z11, Z22, K4, Z19 // 62c14d44d5db or 62c1cd44d5db
+ VPMULLW Z5, Z22, K4, Z19 // 62e14d44d5dd or 62e1cd44d5dd
+ VPMULLW -15(R14)(R15*1), Z22, K4, Z19 // 62814d44d59c3ef1ffffff or 6281cd44d59c3ef1ffffff
+ VPMULLW -15(BX), Z22, K4, Z19 // 62e14d44d59bf1ffffff or 62e1cd44d59bf1ffffff
+ VPSADBW X7, X3, X31 // 62616508f6ff or 6261e508f6ff
+ VPSADBW 17(SP)(BP*8), X3, X31 // 62616508f6bcec11000000 or 6261e508f6bcec11000000
+ VPSADBW 17(SP)(BP*4), X3, X31 // 62616508f6bcac11000000 or 6261e508f6bcac11000000
+ VPSADBW Y14, Y9, Y22 // 62c13528f6f6 or 62c1b528f6f6
+ VPSADBW 99(R15)(R15*8), Y9, Y22 // 62813528f6b4ff63000000 or 6281b528f6b4ff63000000
+ VPSADBW 7(AX)(CX*8), Y9, Y22 // 62e13528f6b4c807000000 or 62e1b528f6b4c807000000
+ VPSADBW Z7, Z26, Z30 // 62612d40f6f7 or 6261ad40f6f7
+ VPSADBW Z21, Z26, Z30 // 62212d40f6f5 or 6221ad40f6f5
+ VPSADBW (R8), Z26, Z30 // 62412d40f630 or 6241ad40f630
+ VPSADBW 15(DX)(BX*2), Z26, Z30 // 62612d40f6b45a0f000000 or 6261ad40f6b45a0f000000
+ VPSADBW Z7, Z22, Z30 // 62614d40f6f7 or 6261cd40f6f7
+ VPSADBW Z21, Z22, Z30 // 62214d40f6f5 or 6221cd40f6f5
+ VPSADBW (R8), Z22, Z30 // 62414d40f630 or 6241cd40f630
+ VPSADBW 15(DX)(BX*2), Z22, Z30 // 62614d40f6b45a0f000000 or 6261cd40f6b45a0f000000
+ VPSADBW Z7, Z26, Z5 // 62f12d40f6ef or 62f1ad40f6ef
+ VPSADBW Z21, Z26, Z5 // 62b12d40f6ed or 62b1ad40f6ed
+ VPSADBW (R8), Z26, Z5 // 62d12d40f628 or 62d1ad40f628
+ VPSADBW 15(DX)(BX*2), Z26, Z5 // 62f12d40f6ac5a0f000000 or 62f1ad40f6ac5a0f000000
+ VPSADBW Z7, Z22, Z5 // 62f14d40f6ef or 62f1cd40f6ef
+ VPSADBW Z21, Z22, Z5 // 62b14d40f6ed or 62b1cd40f6ed
+ VPSADBW (R8), Z22, Z5 // 62d14d40f628 or 62d1cd40f628
+ VPSADBW 15(DX)(BX*2), Z22, Z5 // 62f14d40f6ac5a0f000000 or 62f1cd40f6ac5a0f000000
+ VPSHUFB X13, X9, K5, X0 // 62d2350d00c5 or 62d2b50d00c5
+ VPSHUFB 15(R8)(R14*4), X9, K5, X0 // 6292350d0084b00f000000 or 6292b50d0084b00f000000
+ VPSHUFB -7(CX)(DX*4), X9, K5, X0 // 62f2350d008491f9ffffff or 62f2b50d008491f9ffffff
+ VPSHUFB Y2, Y16, K7, Y5 // 62f27d2700ea or 62f2fd2700ea
+ VPSHUFB 15(DX)(BX*1), Y16, K7, Y5 // 62f27d2700ac1a0f000000 or 62f2fd2700ac1a0f000000
+ VPSHUFB -7(CX)(DX*2), Y16, K7, Y5 // 62f27d2700ac51f9ffffff or 62f2fd2700ac51f9ffffff
+ VPSHUFB Z9, Z12, K7, Z25 // 62421d4f00c9 or 62429d4f00c9
+ VPSHUFB Z12, Z12, K7, Z25 // 62421d4f00cc or 62429d4f00cc
+ VPSHUFB 15(R8)(R14*8), Z12, K7, Z25 // 62021d4f008cf00f000000 or 62029d4f008cf00f000000
+ VPSHUFB -15(R14)(R15*2), Z12, K7, Z25 // 62021d4f008c7ef1ffffff or 62029d4f008c7ef1ffffff
+ VPSHUFB Z9, Z17, K7, Z25 // 6242754700c9 or 6242f54700c9
+ VPSHUFB Z12, Z17, K7, Z25 // 6242754700cc or 6242f54700cc
+ VPSHUFB 15(R8)(R14*8), Z17, K7, Z25 // 62027547008cf00f000000 or 6202f547008cf00f000000
+ VPSHUFB -15(R14)(R15*2), Z17, K7, Z25 // 62027547008c7ef1ffffff or 6202f547008c7ef1ffffff
+ VPSHUFB Z9, Z12, K7, Z12 // 62521d4f00e1 or 62529d4f00e1
+ VPSHUFB Z12, Z12, K7, Z12 // 62521d4f00e4 or 62529d4f00e4
+ VPSHUFB 15(R8)(R14*8), Z12, K7, Z12 // 62121d4f00a4f00f000000 or 62129d4f00a4f00f000000
+ VPSHUFB -15(R14)(R15*2), Z12, K7, Z12 // 62121d4f00a47ef1ffffff or 62129d4f00a47ef1ffffff
+ VPSHUFB Z9, Z17, K7, Z12 // 6252754700e1 or 6252f54700e1
+ VPSHUFB Z12, Z17, K7, Z12 // 6252754700e4 or 6252f54700e4
+ VPSHUFB 15(R8)(R14*8), Z17, K7, Z12 // 6212754700a4f00f000000 or 6212f54700a4f00f000000
+ VPSHUFB -15(R14)(R15*2), Z17, K7, Z12 // 6212754700a47ef1ffffff or 6212f54700a47ef1ffffff
+ VPSHUFHW $13, X11, K2, X31 // 62417e0a70fb0d or 6241fe0a70fb0d
+ VPSHUFHW $13, -17(BP)(SI*2), K2, X31 // 62617e0a70bc75efffffff0d or 6261fe0a70bc75efffffff0d
+ VPSHUFHW $13, 7(AX)(CX*2), K2, X31 // 62617e0a70bc48070000000d or 6261fe0a70bc48070000000d
+ VPSHUFHW $65, Y11, K5, Y6 // 62d17e2d70f341 or 62d1fe2d70f341
+ VPSHUFHW $65, 15(R8), K5, Y6 // 62d17e2d70b00f00000041 or 62d1fe2d70b00f00000041
+ VPSHUFHW $65, (BP), K5, Y6 // 62f17e2d70750041 or 62f1fe2d70750041
+ VPSHUFHW $67, Z0, K3, Z7 // 62f17e4b70f843 or 62f1fe4b70f843
+ VPSHUFHW $67, Z6, K3, Z7 // 62f17e4b70fe43 or 62f1fe4b70fe43
+ VPSHUFHW $67, (SI), K3, Z7 // 62f17e4b703e43 or 62f1fe4b703e43
+ VPSHUFHW $67, 7(SI)(DI*2), K3, Z7 // 62f17e4b70bc7e0700000043 or 62f1fe4b70bc7e0700000043
+ VPSHUFHW $67, Z0, K3, Z9 // 62717e4b70c843 or 6271fe4b70c843
+ VPSHUFHW $67, Z6, K3, Z9 // 62717e4b70ce43 or 6271fe4b70ce43
+ VPSHUFHW $67, (SI), K3, Z9 // 62717e4b700e43 or 6271fe4b700e43
+ VPSHUFHW $67, 7(SI)(DI*2), K3, Z9 // 62717e4b708c7e0700000043 or 6271fe4b708c7e0700000043
+ VPSHUFLW $127, X5, K4, X22 // 62e17f0c70f57f or 62e1ff0c70f57f
+ VPSHUFLW $127, 15(R8)(R14*1), K4, X22 // 62817f0c70b4300f0000007f or 6281ff0c70b4300f0000007f
+ VPSHUFLW $127, 15(R8)(R14*2), K4, X22 // 62817f0c70b4700f0000007f or 6281ff0c70b4700f0000007f
+ VPSHUFLW $0, Y7, K2, Y19 // 62e17f2a70df00 or 62e1ff2a70df00
+ VPSHUFLW $0, 15(R8)(R14*8), K2, Y19 // 62817f2a709cf00f00000000 or 6281ff2a709cf00f00000000
+ VPSHUFLW $0, -15(R14)(R15*2), K2, Y19 // 62817f2a709c7ef1ffffff00 or 6281ff2a709c7ef1ffffff00
+ VPSHUFLW $97, Z3, K2, Z20 // 62e17f4a70e361 or 62e1ff4a70e361
+ VPSHUFLW $97, Z30, K2, Z20 // 62817f4a70e661 or 6281ff4a70e661
+ VPSHUFLW $97, 17(SP)(BP*8), K2, Z20 // 62e17f4a70a4ec1100000061 or 62e1ff4a70a4ec1100000061
+ VPSHUFLW $97, 17(SP)(BP*4), K2, Z20 // 62e17f4a70a4ac1100000061 or 62e1ff4a70a4ac1100000061
+ VPSHUFLW $97, Z3, K2, Z28 // 62617f4a70e361 or 6261ff4a70e361
+ VPSHUFLW $97, Z30, K2, Z28 // 62017f4a70e661 or 6201ff4a70e661
+ VPSHUFLW $97, 17(SP)(BP*8), K2, Z28 // 62617f4a70a4ec1100000061 or 6261ff4a70a4ec1100000061
+ VPSHUFLW $97, 17(SP)(BP*4), K2, Z28 // 62617f4a70a4ac1100000061 or 6261ff4a70a4ac1100000061
+ VPSLLDQ $64, X8, X18 // 62d16d0073f840 or 62d1ed0073f840
+ VPSLLDQ $64, -7(CX)(DX*1), X18 // 62f16d0073bc11f9ffffff40 or 62f1ed0073bc11f9ffffff40
+ VPSLLDQ $64, -15(R14)(R15*4), X18 // 62916d0073bcbef1ffffff40 or 6291ed0073bcbef1ffffff40
+ VPSLLDQ $27, Y12, Y20 // 62d15d2073fc1b or 62d1dd2073fc1b
+ VPSLLDQ $27, 7(AX)(CX*4), Y20 // 62f15d2073bc88070000001b or 62f1dd2073bc88070000001b
+ VPSLLDQ $27, 7(AX)(CX*1), Y20 // 62f15d2073bc08070000001b or 62f1dd2073bc08070000001b
+ VPSLLDQ $47, Z7, Z2 // 62f16d4873ff2f or 62f1ed4873ff2f
+ VPSLLDQ $47, Z13, Z2 // 62d16d4873fd2f or 62d1ed4873fd2f
+ VPSLLDQ $47, 17(SP), Z2 // 62f16d4873bc24110000002f or 62f1ed4873bc24110000002f
+ VPSLLDQ $47, -17(BP)(SI*4), Z2 // 62f16d4873bcb5efffffff2f or 62f1ed4873bcb5efffffff2f
+ VPSLLDQ $47, Z7, Z21 // 62f1554073ff2f or 62f1d54073ff2f
+ VPSLLDQ $47, Z13, Z21 // 62d1554073fd2f or 62d1d54073fd2f
+ VPSLLDQ $47, 17(SP), Z21 // 62f1554073bc24110000002f or 62f1d54073bc24110000002f
+ VPSLLDQ $47, -17(BP)(SI*4), Z21 // 62f1554073bcb5efffffff2f or 62f1d54073bcb5efffffff2f
+ VPSLLVW X11, X1, K7, X22 // 62c2f50f12f3
+ VPSLLVW 7(AX)(CX*4), X1, K7, X22 // 62e2f50f12b48807000000
+ VPSLLVW 7(AX)(CX*1), X1, K7, X22 // 62e2f50f12b40807000000
+ VPSLLVW Y9, Y7, K7, Y17 // 62c2c52f12c9
+ VPSLLVW 17(SP), Y7, K7, Y17 // 62e2c52f128c2411000000
+ VPSLLVW -17(BP)(SI*4), Y7, K7, Y17 // 62e2c52f128cb5efffffff
+ VPSLLVW Z3, Z14, K6, Z28 // 62628d4e12e3
+ VPSLLVW Z12, Z14, K6, Z28 // 62428d4e12e4
+ VPSLLVW 7(SI)(DI*8), Z14, K6, Z28 // 62628d4e12a4fe07000000
+ VPSLLVW -15(R14), Z14, K6, Z28 // 62428d4e12a6f1ffffff
+ VPSLLVW Z3, Z28, K6, Z28 // 62629d4612e3
+ VPSLLVW Z12, Z28, K6, Z28 // 62429d4612e4
+ VPSLLVW 7(SI)(DI*8), Z28, K6, Z28 // 62629d4612a4fe07000000
+ VPSLLVW -15(R14), Z28, K6, Z28 // 62429d4612a6f1ffffff
+ VPSLLVW Z3, Z14, K6, Z13 // 62728d4e12eb
+ VPSLLVW Z12, Z14, K6, Z13 // 62528d4e12ec
+ VPSLLVW 7(SI)(DI*8), Z14, K6, Z13 // 62728d4e12acfe07000000
+ VPSLLVW -15(R14), Z14, K6, Z13 // 62528d4e12aef1ffffff
+ VPSLLVW Z3, Z28, K6, Z13 // 62729d4612eb
+ VPSLLVW Z12, Z28, K6, Z13 // 62529d4612ec
+ VPSLLVW 7(SI)(DI*8), Z28, K6, Z13 // 62729d4612acfe07000000
+ VPSLLVW -15(R14), Z28, K6, Z13 // 62529d4612aef1ffffff
+ VPSLLW $121, X7, K3, X6 // 62f14d0b71f779 or 62f1cd0b71f779
+ VPSLLW $121, (SI), K3, X6 // 62f14d0b713679 or 62f1cd0b713679
+ VPSLLW $121, 7(SI)(DI*2), K3, X6 // 62f14d0b71b47e0700000079 or 62f1cd0b71b47e0700000079
+ VPSLLW $13, Y8, K7, Y31 // 62d1052771f00d or 62d1852771f00d
+ VPSLLW $13, 7(AX), K7, Y31 // 62f1052771b0070000000d or 62f1852771b0070000000d
+ VPSLLW $13, (DI), K7, Y31 // 62f1052771370d or 62f1852771370d
+ VPSLLW $65, Z19, K4, Z15 // 62b1054c71f341 or 62b1854c71f341
+ VPSLLW $65, Z15, K4, Z15 // 62d1054c71f741 or 62d1854c71f741
+ VPSLLW $65, 7(SI)(DI*1), K4, Z15 // 62f1054c71b43e0700000041 or 62f1854c71b43e0700000041
+ VPSLLW $65, 15(DX)(BX*8), K4, Z15 // 62f1054c71b4da0f00000041 or 62f1854c71b4da0f00000041
+ VPSLLW $65, Z19, K4, Z30 // 62b10d4471f341 or 62b18d4471f341
+ VPSLLW $65, Z15, K4, Z30 // 62d10d4471f741 or 62d18d4471f741
+ VPSLLW $65, 7(SI)(DI*1), K4, Z30 // 62f10d4471b43e0700000041 or 62f18d4471b43e0700000041
+ VPSLLW $65, 15(DX)(BX*8), K4, Z30 // 62f10d4471b4da0f00000041 or 62f18d4471b4da0f00000041
+ VPSLLW X3, X31, K4, X8 // 62710504f1c3 or 62718504f1c3
+ VPSLLW 17(SP)(BP*8), X31, K4, X8 // 62710504f184ec11000000 or 62718504f184ec11000000
+ VPSLLW 17(SP)(BP*4), X31, K4, X8 // 62710504f184ac11000000 or 62718504f184ac11000000
+ VPSLLW X28, Y28, K7, Y1 // 62911d27f1cc or 62919d27f1cc
+ VPSLLW 7(SI)(DI*4), Y28, K7, Y1 // 62f11d27f18cbe07000000 or 62f19d27f18cbe07000000
+ VPSLLW -7(DI)(R8*2), Y28, K7, Y1 // 62b11d27f18c47f9ffffff or 62b19d27f18c47f9ffffff
+ VPSLLW X20, Z3, K2, Z5 // 62b1654af1ec or 62b1e54af1ec
+ VPSLLW 17(SP), Z3, K2, Z5 // 62f1654af1ac2411000000 or 62f1e54af1ac2411000000
+ VPSLLW -17(BP)(SI*4), Z3, K2, Z5 // 62f1654af1acb5efffffff or 62f1e54af1acb5efffffff
+ VPSLLW X20, Z5, K2, Z5 // 62b1554af1ec or 62b1d54af1ec
+ VPSLLW 17(SP), Z5, K2, Z5 // 62f1554af1ac2411000000 or 62f1d54af1ac2411000000
+ VPSLLW -17(BP)(SI*4), Z5, K2, Z5 // 62f1554af1acb5efffffff or 62f1d54af1acb5efffffff
+ VPSLLW X20, Z3, K2, Z1 // 62b1654af1cc or 62b1e54af1cc
+ VPSLLW 17(SP), Z3, K2, Z1 // 62f1654af18c2411000000 or 62f1e54af18c2411000000
+ VPSLLW -17(BP)(SI*4), Z3, K2, Z1 // 62f1654af18cb5efffffff or 62f1e54af18cb5efffffff
+ VPSLLW X20, Z5, K2, Z1 // 62b1554af1cc or 62b1d54af1cc
+ VPSLLW 17(SP), Z5, K2, Z1 // 62f1554af18c2411000000 or 62f1d54af18c2411000000
+ VPSLLW -17(BP)(SI*4), Z5, K2, Z1 // 62f1554af18cb5efffffff or 62f1d54af18cb5efffffff
+ VPSRAVW X8, X28, K4, X16 // 62c29d0411c0
+ VPSRAVW 15(R8)(R14*4), X28, K4, X16 // 62829d041184b00f000000
+ VPSRAVW -7(CX)(DX*4), X28, K4, X16 // 62e29d04118491f9ffffff
+ VPSRAVW Y7, Y26, K1, Y30 // 6262ad2111f7
+ VPSRAVW -7(DI)(R8*1), Y26, K1, Y30 // 6222ad2111b407f9ffffff
+ VPSRAVW (SP), Y26, K1, Y30 // 6262ad21113424
+ VPSRAVW Z21, Z31, K3, Z17 // 62a2854311cd
+ VPSRAVW Z9, Z31, K3, Z17 // 62c2854311c9
+ VPSRAVW (BX), Z31, K3, Z17 // 62e28543110b
+ VPSRAVW -17(BP)(SI*1), Z31, K3, Z17 // 62e28543118c35efffffff
+ VPSRAVW Z21, Z0, K3, Z17 // 62a2fd4b11cd
+ VPSRAVW Z9, Z0, K3, Z17 // 62c2fd4b11c9
+ VPSRAVW (BX), Z0, K3, Z17 // 62e2fd4b110b
+ VPSRAVW -17(BP)(SI*1), Z0, K3, Z17 // 62e2fd4b118c35efffffff
+ VPSRAVW Z21, Z31, K3, Z23 // 62a2854311fd
+ VPSRAVW Z9, Z31, K3, Z23 // 62c2854311f9
+ VPSRAVW (BX), Z31, K3, Z23 // 62e28543113b
+ VPSRAVW -17(BP)(SI*1), Z31, K3, Z23 // 62e2854311bc35efffffff
+ VPSRAVW Z21, Z0, K3, Z23 // 62a2fd4b11fd
+ VPSRAVW Z9, Z0, K3, Z23 // 62c2fd4b11f9
+ VPSRAVW (BX), Z0, K3, Z23 // 62e2fd4b113b
+ VPSRAVW -17(BP)(SI*1), Z0, K3, Z23 // 62e2fd4b11bc35efffffff
+ VPSRAW $79, X11, K4, X15 // 62d1050c71e34f or 62d1850c71e34f
+ VPSRAW $79, (R8), K4, X15 // 62d1050c71204f or 62d1850c71204f
+ VPSRAW $79, 15(DX)(BX*2), K4, X15 // 62f1050c71a45a0f0000004f or 62f1850c71a45a0f0000004f
+ VPSRAW $64, Y1, K5, Y16 // 62f17d2571e140 or 62f1fd2571e140
+ VPSRAW $64, -7(CX), K5, Y16 // 62f17d2571a1f9ffffff40 or 62f1fd2571a1f9ffffff40
+ VPSRAW $64, 15(DX)(BX*4), K5, Y16 // 62f17d2571a49a0f00000040 or 62f1fd2571a49a0f00000040
+ VPSRAW $27, Z1, K7, Z6 // 62f14d4f71e11b or 62f1cd4f71e11b
+ VPSRAW $27, Z9, K7, Z6 // 62d14d4f71e11b or 62d1cd4f71e11b
+ VPSRAW $27, 15(R8)(R14*4), K7, Z6 // 62914d4f71a4b00f0000001b or 6291cd4f71a4b00f0000001b
+ VPSRAW $27, -7(CX)(DX*4), K7, Z6 // 62f14d4f71a491f9ffffff1b or 62f1cd4f71a491f9ffffff1b
+ VPSRAW $27, Z1, K7, Z9 // 62f1354f71e11b or 62f1b54f71e11b
+ VPSRAW $27, Z9, K7, Z9 // 62d1354f71e11b or 62d1b54f71e11b
+ VPSRAW $27, 15(R8)(R14*4), K7, Z9 // 6291354f71a4b00f0000001b or 6291b54f71a4b00f0000001b
+ VPSRAW $27, -7(CX)(DX*4), K7, Z9 // 62f1354f71a491f9ffffff1b or 62f1b54f71a491f9ffffff1b
+ VPSRAW X13, X19, K7, X1 // 62d16507e1cd or 62d1e507e1cd
+ VPSRAW 17(SP)(BP*1), X19, K7, X1 // 62f16507e18c2c11000000 or 62f1e507e18c2c11000000
+ VPSRAW -7(CX)(DX*8), X19, K7, X1 // 62f16507e18cd1f9ffffff or 62f1e507e18cd1f9ffffff
+ VPSRAW X2, Y31, K6, Y30 // 62610526e1f2 or 62618526e1f2
+ VPSRAW -17(BP)(SI*2), Y31, K6, Y30 // 62610526e1b475efffffff or 62618526e1b475efffffff
+ VPSRAW 7(AX)(CX*2), Y31, K6, Y30 // 62610526e1b44807000000 or 62618526e1b44807000000
+ VPSRAW X14, Z30, K3, Z20 // 62c10d43e1e6 or 62c18d43e1e6
+ VPSRAW 15(R8)(R14*1), Z30, K3, Z20 // 62810d43e1a4300f000000 or 62818d43e1a4300f000000
+ VPSRAW 15(R8)(R14*2), Z30, K3, Z20 // 62810d43e1a4700f000000 or 62818d43e1a4700f000000
+ VPSRAW X14, Z5, K3, Z20 // 62c1554be1e6 or 62c1d54be1e6
+ VPSRAW 15(R8)(R14*1), Z5, K3, Z20 // 6281554be1a4300f000000 or 6281d54be1a4300f000000
+ VPSRAW 15(R8)(R14*2), Z5, K3, Z20 // 6281554be1a4700f000000 or 6281d54be1a4700f000000
+ VPSRAW X14, Z30, K3, Z9 // 62510d43e1ce or 62518d43e1ce
+ VPSRAW 15(R8)(R14*1), Z30, K3, Z9 // 62110d43e18c300f000000 or 62118d43e18c300f000000
+ VPSRAW 15(R8)(R14*2), Z30, K3, Z9 // 62110d43e18c700f000000 or 62118d43e18c700f000000
+ VPSRAW X14, Z5, K3, Z9 // 6251554be1ce or 6251d54be1ce
+ VPSRAW 15(R8)(R14*1), Z5, K3, Z9 // 6211554be18c300f000000 or 6211d54be18c300f000000
+ VPSRAW 15(R8)(R14*2), Z5, K3, Z9 // 6211554be18c700f000000 or 6211d54be18c700f000000
+ VPSRLDQ $94, -7(CX)(DX*1), X9 // 62f13508739c11f9ffffff5e or 62f1b508739c11f9ffffff5e
+ VPSRLDQ $94, -15(R14)(R15*4), X9 // 62913508739cbef1ffffff5e or 6291b508739cbef1ffffff5e
+ VPSRLDQ $121, Y28, Y0 // 62917d2873dc79 or 6291fd2873dc79
+ VPSRLDQ $121, (AX), Y0 // 62f17d28731879 or 62f1fd28731879
+ VPSRLDQ $121, 7(SI), Y0 // 62f17d28739e0700000079 or 62f1fd28739e0700000079
+ VPSRLDQ $13, Z21, Z12 // 62b11d4873dd0d or 62b19d4873dd0d
+ VPSRLDQ $13, Z9, Z12 // 62d11d4873d90d or 62d19d4873d90d
+ VPSRLDQ $13, 17(SP)(BP*1), Z12 // 62f11d48739c2c110000000d or 62f19d48739c2c110000000d
+ VPSRLDQ $13, -7(CX)(DX*8), Z12 // 62f11d48739cd1f9ffffff0d or 62f19d48739cd1f9ffffff0d
+ VPSRLDQ $13, Z21, Z13 // 62b1154873dd0d or 62b1954873dd0d
+ VPSRLDQ $13, Z9, Z13 // 62d1154873d90d or 62d1954873d90d
+ VPSRLDQ $13, 17(SP)(BP*1), Z13 // 62f11548739c2c110000000d or 62f19548739c2c110000000d
+ VPSRLDQ $13, -7(CX)(DX*8), Z13 // 62f11548739cd1f9ffffff0d or 62f19548739cd1f9ffffff0d
+ VPSRLVW X30, X23, K1, X12 // 6212c50110e6
+ VPSRLVW 7(AX)(CX*4), X23, K1, X12 // 6272c50110a48807000000
+ VPSRLVW 7(AX)(CX*1), X23, K1, X12 // 6272c50110a40807000000
+ VPSRLVW Y3, Y22, K1, Y12 // 6272cd2110e3
+ VPSRLVW 17(SP)(BP*1), Y22, K1, Y12 // 6272cd2110a42c11000000
+ VPSRLVW -7(CX)(DX*8), Y22, K1, Y12 // 6272cd2110a4d1f9ffffff
+ VPSRLVW Z14, Z15, K1, Z0 // 62d2854910c6
+ VPSRLVW Z27, Z15, K1, Z0 // 6292854910c3
+ VPSRLVW 99(R15)(R15*4), Z15, K1, Z0 // 629285491084bf63000000
+ VPSRLVW 15(DX), Z15, K1, Z0 // 62f2854910820f000000
+ VPSRLVW Z14, Z12, K1, Z0 // 62d29d4910c6
+ VPSRLVW Z27, Z12, K1, Z0 // 62929d4910c3
+ VPSRLVW 99(R15)(R15*4), Z12, K1, Z0 // 62929d491084bf63000000
+ VPSRLVW 15(DX), Z12, K1, Z0 // 62f29d4910820f000000
+ VPSRLVW Z14, Z15, K1, Z8 // 6252854910c6
+ VPSRLVW Z27, Z15, K1, Z8 // 6212854910c3
+ VPSRLVW 99(R15)(R15*4), Z15, K1, Z8 // 621285491084bf63000000
+ VPSRLVW 15(DX), Z15, K1, Z8 // 6272854910820f000000
+ VPSRLVW Z14, Z12, K1, Z8 // 62529d4910c6
+ VPSRLVW Z27, Z12, K1, Z8 // 62129d4910c3
+ VPSRLVW 99(R15)(R15*4), Z12, K1, Z8 // 62129d491084bf63000000
+ VPSRLVW 15(DX), Z12, K1, Z8 // 62729d4910820f000000
+ VPSRLW $0, X20, K7, X8 // 62b13d0f71d400 or 62b1bd0f71d400
+ VPSRLW $0, (SI), K7, X8 // 62f13d0f711600 or 62f1bd0f711600
+ VPSRLW $0, 7(SI)(DI*2), K7, X8 // 62f13d0f71947e0700000000 or 62f1bd0f71947e0700000000
+ VPSRLW $97, Y1, K2, Y15 // 62f1052a71d161 or 62f1852a71d161
+ VPSRLW $97, -17(BP)(SI*2), K2, Y15 // 62f1052a719475efffffff61 or 62f1852a719475efffffff61
+ VPSRLW $97, 7(AX)(CX*2), K2, Y15 // 62f1052a7194480700000061 or 62f1852a7194480700000061
+ VPSRLW $81, Z13, K4, Z11 // 62d1254c71d551 or 62d1a54c71d551
+ VPSRLW $81, Z14, K4, Z11 // 62d1254c71d651 or 62d1a54c71d651
+ VPSRLW $81, (CX), K4, Z11 // 62f1254c711151 or 62f1a54c711151
+ VPSRLW $81, 99(R15), K4, Z11 // 62d1254c71976300000051 or 62d1a54c71976300000051
+ VPSRLW $81, Z13, K4, Z5 // 62d1554c71d551 or 62d1d54c71d551
+ VPSRLW $81, Z14, K4, Z5 // 62d1554c71d651 or 62d1d54c71d651
+ VPSRLW $81, (CX), K4, Z5 // 62f1554c711151 or 62f1d54c711151
+ VPSRLW $81, 99(R15), K4, Z5 // 62d1554c71976300000051 or 62d1d54c71976300000051
+ VPSRLW X26, X9, K1, X2 // 62913509d1d2 or 6291b509d1d2
+ VPSRLW 17(SP)(BP*8), X9, K1, X2 // 62f13509d194ec11000000 or 62f1b509d194ec11000000
+ VPSRLW 17(SP)(BP*4), X9, K1, X2 // 62f13509d194ac11000000 or 62f1b509d194ac11000000
+ VPSRLW X19, Y19, K3, Y27 // 62216523d1db or 6221e523d1db
+ VPSRLW 7(SI)(DI*4), Y19, K3, Y27 // 62616523d19cbe07000000 or 6261e523d19cbe07000000
+ VPSRLW -7(DI)(R8*2), Y19, K3, Y27 // 62216523d19c47f9ffffff or 6221e523d19c47f9ffffff
+ VPSRLW X0, Z2, K4, Z5 // 62f16d4cd1e8 or 62f1ed4cd1e8
+ VPSRLW 17(SP), Z2, K4, Z5 // 62f16d4cd1ac2411000000 or 62f1ed4cd1ac2411000000
+ VPSRLW -17(BP)(SI*4), Z2, K4, Z5 // 62f16d4cd1acb5efffffff or 62f1ed4cd1acb5efffffff
+ VPSRLW X0, Z2, K4, Z23 // 62e16d4cd1f8 or 62e1ed4cd1f8
+ VPSRLW 17(SP), Z2, K4, Z23 // 62e16d4cd1bc2411000000 or 62e1ed4cd1bc2411000000
+ VPSRLW -17(BP)(SI*4), Z2, K4, Z23 // 62e16d4cd1bcb5efffffff or 62e1ed4cd1bcb5efffffff
+ VPSUBB X7, X16, K5, X31 // 62617d05f8ff or 6261fd05f8ff
+ VPSUBB 7(AX), X16, K5, X31 // 62617d05f8b807000000 or 6261fd05f8b807000000
+ VPSUBB (DI), X16, K5, X31 // 62617d05f83f or 6261fd05f83f
+ VPSUBB Y13, Y17, K7, Y5 // 62d17527f8ed or 62d1f527f8ed
+ VPSUBB 15(R8)(R14*1), Y17, K7, Y5 // 62917527f8ac300f000000 or 6291f527f8ac300f000000
+ VPSUBB 15(R8)(R14*2), Y17, K7, Y5 // 62917527f8ac700f000000 or 6291f527f8ac700f000000
+ VPSUBB Z28, Z26, K7, Z6 // 62912d47f8f4 or 6291ad47f8f4
+ VPSUBB Z6, Z26, K7, Z6 // 62f12d47f8f6 or 62f1ad47f8f6
+ VPSUBB 99(R15)(R15*2), Z26, K7, Z6 // 62912d47f8b47f63000000 or 6291ad47f8b47f63000000
+ VPSUBB -7(DI), Z26, K7, Z6 // 62f12d47f8b7f9ffffff or 62f1ad47f8b7f9ffffff
+ VPSUBB Z28, Z14, K7, Z6 // 62910d4ff8f4 or 62918d4ff8f4
+ VPSUBB Z6, Z14, K7, Z6 // 62f10d4ff8f6 or 62f18d4ff8f6
+ VPSUBB 99(R15)(R15*2), Z14, K7, Z6 // 62910d4ff8b47f63000000 or 62918d4ff8b47f63000000
+ VPSUBB -7(DI), Z14, K7, Z6 // 62f10d4ff8b7f9ffffff or 62f18d4ff8b7f9ffffff
+ VPSUBB Z28, Z26, K7, Z14 // 62112d47f8f4 or 6211ad47f8f4
+ VPSUBB Z6, Z26, K7, Z14 // 62712d47f8f6 or 6271ad47f8f6
+ VPSUBB 99(R15)(R15*2), Z26, K7, Z14 // 62112d47f8b47f63000000 or 6211ad47f8b47f63000000
+ VPSUBB -7(DI), Z26, K7, Z14 // 62712d47f8b7f9ffffff or 6271ad47f8b7f9ffffff
+ VPSUBB Z28, Z14, K7, Z14 // 62110d4ff8f4 or 62118d4ff8f4
+ VPSUBB Z6, Z14, K7, Z14 // 62710d4ff8f6 or 62718d4ff8f6
+ VPSUBB 99(R15)(R15*2), Z14, K7, Z14 // 62110d4ff8b47f63000000 or 62118d4ff8b47f63000000
+ VPSUBB -7(DI), Z14, K7, Z14 // 62710d4ff8b7f9ffffff or 62718d4ff8b7f9ffffff
+ VPSUBSB X28, X0, K2, X21 // 62817d0ae8ec or 6281fd0ae8ec
+ VPSUBSB 7(SI)(DI*8), X0, K2, X21 // 62e17d0ae8acfe07000000 or 62e1fd0ae8acfe07000000
+ VPSUBSB -15(R14), X0, K2, X21 // 62c17d0ae8aef1ffffff or 62c1fd0ae8aef1ffffff
+ VPSUBSB Y24, Y11, K5, Y8 // 6211252de8c0 or 6211a52de8c0
+ VPSUBSB (CX), Y11, K5, Y8 // 6271252de801 or 6271a52de801
+ VPSUBSB 99(R15), Y11, K5, Y8 // 6251252de88763000000 or 6251a52de88763000000
+ VPSUBSB Z23, Z23, K3, Z27 // 62214543e8df or 6221c543e8df
+ VPSUBSB Z6, Z23, K3, Z27 // 62614543e8de or 6261c543e8de
+ VPSUBSB -17(BP), Z23, K3, Z27 // 62614543e89defffffff or 6261c543e89defffffff
+ VPSUBSB -15(R14)(R15*8), Z23, K3, Z27 // 62014543e89cfef1ffffff or 6201c543e89cfef1ffffff
+ VPSUBSB Z23, Z5, K3, Z27 // 6221554be8df or 6221d54be8df
+ VPSUBSB Z6, Z5, K3, Z27 // 6261554be8de or 6261d54be8de
+ VPSUBSB -17(BP), Z5, K3, Z27 // 6261554be89defffffff or 6261d54be89defffffff
+ VPSUBSB -15(R14)(R15*8), Z5, K3, Z27 // 6201554be89cfef1ffffff or 6201d54be89cfef1ffffff
+ VPSUBSB Z23, Z23, K3, Z15 // 62314543e8ff or 6231c543e8ff
+ VPSUBSB Z6, Z23, K3, Z15 // 62714543e8fe or 6271c543e8fe
+ VPSUBSB -17(BP), Z23, K3, Z15 // 62714543e8bdefffffff or 6271c543e8bdefffffff
+ VPSUBSB -15(R14)(R15*8), Z23, K3, Z15 // 62114543e8bcfef1ffffff or 6211c543e8bcfef1ffffff
+ VPSUBSB Z23, Z5, K3, Z15 // 6231554be8ff or 6231d54be8ff
+ VPSUBSB Z6, Z5, K3, Z15 // 6271554be8fe or 6271d54be8fe
+ VPSUBSB -17(BP), Z5, K3, Z15 // 6271554be8bdefffffff or 6271d54be8bdefffffff
+ VPSUBSB -15(R14)(R15*8), Z5, K3, Z15 // 6211554be8bcfef1ffffff or 6211d54be8bcfef1ffffff
+ VPSUBSW X19, X7, K4, X22 // 62a1450ce9f3 or 62a1c50ce9f3
+ VPSUBSW 7(SI)(DI*1), X7, K4, X22 // 62e1450ce9b43e07000000 or 62e1c50ce9b43e07000000
+ VPSUBSW 15(DX)(BX*8), X7, K4, X22 // 62e1450ce9b4da0f000000 or 62e1c50ce9b4da0f000000
+ VPSUBSW Y21, Y24, K2, Y5 // 62b13d22e9ed or 62b1bd22e9ed
+ VPSUBSW 99(R15)(R15*2), Y24, K2, Y5 // 62913d22e9ac7f63000000 or 6291bd22e9ac7f63000000
+ VPSUBSW -7(DI), Y24, K2, Y5 // 62f13d22e9aff9ffffff or 62f1bd22e9aff9ffffff
+ VPSUBSW Z16, Z21, K2, Z8 // 62315542e9c0 or 6231d542e9c0
+ VPSUBSW Z13, Z21, K2, Z8 // 62515542e9c5 or 6251d542e9c5
+ VPSUBSW 17(SP)(BP*2), Z21, K2, Z8 // 62715542e9846c11000000 or 6271d542e9846c11000000
+ VPSUBSW -7(DI)(R8*4), Z21, K2, Z8 // 62315542e98487f9ffffff or 6231d542e98487f9ffffff
+ VPSUBSW Z16, Z5, K2, Z8 // 6231554ae9c0 or 6231d54ae9c0
+ VPSUBSW Z13, Z5, K2, Z8 // 6251554ae9c5 or 6251d54ae9c5
+ VPSUBSW 17(SP)(BP*2), Z5, K2, Z8 // 6271554ae9846c11000000 or 6271d54ae9846c11000000
+ VPSUBSW -7(DI)(R8*4), Z5, K2, Z8 // 6231554ae98487f9ffffff or 6231d54ae98487f9ffffff
+ VPSUBSW Z16, Z21, K2, Z28 // 62215542e9e0 or 6221d542e9e0
+ VPSUBSW Z13, Z21, K2, Z28 // 62415542e9e5 or 6241d542e9e5
+ VPSUBSW 17(SP)(BP*2), Z21, K2, Z28 // 62615542e9a46c11000000 or 6261d542e9a46c11000000
+ VPSUBSW -7(DI)(R8*4), Z21, K2, Z28 // 62215542e9a487f9ffffff or 6221d542e9a487f9ffffff
+ VPSUBSW Z16, Z5, K2, Z28 // 6221554ae9e0 or 6221d54ae9e0
+ VPSUBSW Z13, Z5, K2, Z28 // 6241554ae9e5 or 6241d54ae9e5
+ VPSUBSW 17(SP)(BP*2), Z5, K2, Z28 // 6261554ae9a46c11000000 or 6261d54ae9a46c11000000
+ VPSUBSW -7(DI)(R8*4), Z5, K2, Z28 // 6221554ae9a487f9ffffff or 6221d54ae9a487f9ffffff
+ VPSUBUSB X31, X16, K3, X7 // 62917d03d8ff or 6291fd03d8ff
+ VPSUBUSB -7(DI)(R8*1), X16, K3, X7 // 62b17d03d8bc07f9ffffff or 62b1fd03d8bc07f9ffffff
+ VPSUBUSB (SP), X16, K3, X7 // 62f17d03d83c24 or 62f1fd03d83c24
+ VPSUBUSB Y13, Y9, K3, Y16 // 62c1352bd8c5 or 62c1b52bd8c5
+ VPSUBUSB -7(CX)(DX*1), Y9, K3, Y16 // 62e1352bd88411f9ffffff or 62e1b52bd88411f9ffffff
+ VPSUBUSB -15(R14)(R15*4), Y9, K3, Y16 // 6281352bd884bef1ffffff or 6281b52bd884bef1ffffff
+ VPSUBUSB Z6, Z22, K3, Z12 // 62714d43d8e6 or 6271cd43d8e6
+ VPSUBUSB Z8, Z22, K3, Z12 // 62514d43d8e0 or 6251cd43d8e0
+ VPSUBUSB 15(R8), Z22, K3, Z12 // 62514d43d8a00f000000 or 6251cd43d8a00f000000
+ VPSUBUSB (BP), Z22, K3, Z12 // 62714d43d86500 or 6271cd43d86500
+ VPSUBUSB Z6, Z11, K3, Z12 // 6271254bd8e6 or 6271a54bd8e6
+ VPSUBUSB Z8, Z11, K3, Z12 // 6251254bd8e0 or 6251a54bd8e0
+ VPSUBUSB 15(R8), Z11, K3, Z12 // 6251254bd8a00f000000 or 6251a54bd8a00f000000
+ VPSUBUSB (BP), Z11, K3, Z12 // 6271254bd86500 or 6271a54bd86500
+ VPSUBUSB Z6, Z22, K3, Z27 // 62614d43d8de or 6261cd43d8de
+ VPSUBUSB Z8, Z22, K3, Z27 // 62414d43d8d8 or 6241cd43d8d8
+ VPSUBUSB 15(R8), Z22, K3, Z27 // 62414d43d8980f000000 or 6241cd43d8980f000000
+ VPSUBUSB (BP), Z22, K3, Z27 // 62614d43d85d00 or 6261cd43d85d00
+ VPSUBUSB Z6, Z11, K3, Z27 // 6261254bd8de or 6261a54bd8de
+ VPSUBUSB Z8, Z11, K3, Z27 // 6241254bd8d8 or 6241a54bd8d8
+ VPSUBUSB 15(R8), Z11, K3, Z27 // 6241254bd8980f000000 or 6241a54bd8980f000000
+ VPSUBUSB (BP), Z11, K3, Z27 // 6261254bd85d00 or 6261a54bd85d00
+ VPSUBUSW X9, X7, K2, X1 // 62d1450ad9c9 or 62d1c50ad9c9
+ VPSUBUSW -7(CX), X7, K2, X1 // 62f1450ad989f9ffffff or 62f1c50ad989f9ffffff
+ VPSUBUSW 15(DX)(BX*4), X7, K2, X1 // 62f1450ad98c9a0f000000 or 62f1c50ad98c9a0f000000
+ VPSUBUSW Y3, Y6, K1, Y9 // 62714d29d9cb or 6271cd29d9cb
+ VPSUBUSW 15(DX)(BX*1), Y6, K1, Y9 // 62714d29d98c1a0f000000 or 6271cd29d98c1a0f000000
+ VPSUBUSW -7(CX)(DX*2), Y6, K1, Y9 // 62714d29d98c51f9ffffff or 6271cd29d98c51f9ffffff
+ VPSUBUSW Z9, Z12, K2, Z25 // 62411d4ad9c9 or 62419d4ad9c9
+ VPSUBUSW Z12, Z12, K2, Z25 // 62411d4ad9cc or 62419d4ad9cc
+ VPSUBUSW 15(R8)(R14*8), Z12, K2, Z25 // 62011d4ad98cf00f000000 or 62019d4ad98cf00f000000
+ VPSUBUSW -15(R14)(R15*2), Z12, K2, Z25 // 62011d4ad98c7ef1ffffff or 62019d4ad98c7ef1ffffff
+ VPSUBUSW Z9, Z17, K2, Z25 // 62417542d9c9 or 6241f542d9c9
+ VPSUBUSW Z12, Z17, K2, Z25 // 62417542d9cc or 6241f542d9cc
+ VPSUBUSW 15(R8)(R14*8), Z17, K2, Z25 // 62017542d98cf00f000000 or 6201f542d98cf00f000000
+ VPSUBUSW -15(R14)(R15*2), Z17, K2, Z25 // 62017542d98c7ef1ffffff or 6201f542d98c7ef1ffffff
+ VPSUBUSW Z9, Z12, K2, Z12 // 62511d4ad9e1 or 62519d4ad9e1
+ VPSUBUSW Z12, Z12, K2, Z12 // 62511d4ad9e4 or 62519d4ad9e4
+ VPSUBUSW 15(R8)(R14*8), Z12, K2, Z12 // 62111d4ad9a4f00f000000 or 62119d4ad9a4f00f000000
+ VPSUBUSW -15(R14)(R15*2), Z12, K2, Z12 // 62111d4ad9a47ef1ffffff or 62119d4ad9a47ef1ffffff
+ VPSUBUSW Z9, Z17, K2, Z12 // 62517542d9e1 or 6251f542d9e1
+ VPSUBUSW Z12, Z17, K2, Z12 // 62517542d9e4 or 6251f542d9e4
+ VPSUBUSW 15(R8)(R14*8), Z17, K2, Z12 // 62117542d9a4f00f000000 or 6211f542d9a4f00f000000
+ VPSUBUSW -15(R14)(R15*2), Z17, K2, Z12 // 62117542d9a47ef1ffffff or 6211f542d9a47ef1ffffff
+ VPSUBW X0, X12, K1, X15 // 62711d09f9f8 or 62719d09f9f8
+ VPSUBW 99(R15)(R15*8), X12, K1, X15 // 62111d09f9bcff63000000 or 62119d09f9bcff63000000
+ VPSUBW 7(AX)(CX*8), X12, K1, X15 // 62711d09f9bcc807000000 or 62719d09f9bcc807000000
+ VPSUBW Y26, Y6, K7, Y7 // 62914d2ff9fa or 6291cd2ff9fa
+ VPSUBW -17(BP), Y6, K7, Y7 // 62f14d2ff9bdefffffff or 62f1cd2ff9bdefffffff
+ VPSUBW -15(R14)(R15*8), Y6, K7, Y7 // 62914d2ff9bcfef1ffffff or 6291cd2ff9bcfef1ffffff
+ VPSUBW Z8, Z3, K1, Z6 // 62d16549f9f0 or 62d1e549f9f0
+ VPSUBW Z2, Z3, K1, Z6 // 62f16549f9f2 or 62f1e549f9f2
+ VPSUBW -15(R14)(R15*1), Z3, K1, Z6 // 62916549f9b43ef1ffffff or 6291e549f9b43ef1ffffff
+ VPSUBW -15(BX), Z3, K1, Z6 // 62f16549f9b3f1ffffff or 62f1e549f9b3f1ffffff
+ VPSUBW Z8, Z21, K1, Z6 // 62d15541f9f0 or 62d1d541f9f0
+ VPSUBW Z2, Z21, K1, Z6 // 62f15541f9f2 or 62f1d541f9f2
+ VPSUBW -15(R14)(R15*1), Z21, K1, Z6 // 62915541f9b43ef1ffffff or 6291d541f9b43ef1ffffff
+ VPSUBW -15(BX), Z21, K1, Z6 // 62f15541f9b3f1ffffff or 62f1d541f9b3f1ffffff
+ VPSUBW Z8, Z3, K1, Z25 // 62416549f9c8 or 6241e549f9c8
+ VPSUBW Z2, Z3, K1, Z25 // 62616549f9ca or 6261e549f9ca
+ VPSUBW -15(R14)(R15*1), Z3, K1, Z25 // 62016549f98c3ef1ffffff or 6201e549f98c3ef1ffffff
+ VPSUBW -15(BX), Z3, K1, Z25 // 62616549f98bf1ffffff or 6261e549f98bf1ffffff
+ VPSUBW Z8, Z21, K1, Z25 // 62415541f9c8 or 6241d541f9c8
+ VPSUBW Z2, Z21, K1, Z25 // 62615541f9ca or 6261d541f9ca
+ VPSUBW -15(R14)(R15*1), Z21, K1, Z25 // 62015541f98c3ef1ffffff or 6201d541f98c3ef1ffffff
+ VPSUBW -15(BX), Z21, K1, Z25 // 62615541f98bf1ffffff or 6261d541f98bf1ffffff
+ VPTESTMB X26, X3, K3, K3 // 6292650b26da
+ VPTESTMB 15(R8)(R14*4), X3, K3, K3 // 6292650b269cb00f000000
+ VPTESTMB -7(CX)(DX*4), X3, K3, K3 // 62f2650b269c91f9ffffff
+ VPTESTMB X26, X3, K3, K1 // 6292650b26ca
+ VPTESTMB 15(R8)(R14*4), X3, K3, K1 // 6292650b268cb00f000000
+ VPTESTMB -7(CX)(DX*4), X3, K3, K1 // 62f2650b268c91f9ffffff
+ VPTESTMB Y3, Y18, K4, K5 // 62f26d2426eb
+ VPTESTMB 15(R8)(R14*8), Y18, K4, K5 // 62926d2426acf00f000000
+ VPTESTMB -15(R14)(R15*2), Y18, K4, K5 // 62926d2426ac7ef1ffffff
+ VPTESTMB Y3, Y18, K4, K4 // 62f26d2426e3
+ VPTESTMB 15(R8)(R14*8), Y18, K4, K4 // 62926d2426a4f00f000000
+ VPTESTMB -15(R14)(R15*2), Y18, K4, K4 // 62926d2426a47ef1ffffff
+ VPTESTMB Z11, Z12, K5, K7 // 62d21d4d26fb
+ VPTESTMB Z5, Z12, K5, K7 // 62f21d4d26fd
+ VPTESTMB 17(SP)(BP*8), Z12, K5, K7 // 62f21d4d26bcec11000000
+ VPTESTMB 17(SP)(BP*4), Z12, K5, K7 // 62f21d4d26bcac11000000
+ VPTESTMB Z11, Z22, K5, K7 // 62d24d4526fb
+ VPTESTMB Z5, Z22, K5, K7 // 62f24d4526fd
+ VPTESTMB 17(SP)(BP*8), Z22, K5, K7 // 62f24d4526bcec11000000
+ VPTESTMB 17(SP)(BP*4), Z22, K5, K7 // 62f24d4526bcac11000000
+ VPTESTMB Z11, Z12, K5, K6 // 62d21d4d26f3
+ VPTESTMB Z5, Z12, K5, K6 // 62f21d4d26f5
+ VPTESTMB 17(SP)(BP*8), Z12, K5, K6 // 62f21d4d26b4ec11000000
+ VPTESTMB 17(SP)(BP*4), Z12, K5, K6 // 62f21d4d26b4ac11000000
+ VPTESTMB Z11, Z22, K5, K6 // 62d24d4526f3
+ VPTESTMB Z5, Z22, K5, K6 // 62f24d4526f5
+ VPTESTMB 17(SP)(BP*8), Z22, K5, K6 // 62f24d4526b4ec11000000
+ VPTESTMB 17(SP)(BP*4), Z22, K5, K6 // 62f24d4526b4ac11000000
+ VPTESTMW X15, X9, K4, K6 // 62d2b50c26f7
+ VPTESTMW -17(BP)(SI*2), X9, K4, K6 // 62f2b50c26b475efffffff
+ VPTESTMW 7(AX)(CX*2), X9, K4, K6 // 62f2b50c26b44807000000
+ VPTESTMW X15, X9, K4, K4 // 62d2b50c26e7
+ VPTESTMW -17(BP)(SI*2), X9, K4, K4 // 62f2b50c26a475efffffff
+ VPTESTMW 7(AX)(CX*2), X9, K4, K4 // 62f2b50c26a44807000000
+ VPTESTMW Y8, Y14, K7, K4 // 62d28d2f26e0
+ VPTESTMW (SI), Y14, K7, K4 // 62f28d2f2626
+ VPTESTMW 7(SI)(DI*2), Y14, K7, K4 // 62f28d2f26a47e07000000
+ VPTESTMW Y8, Y14, K7, K6 // 62d28d2f26f0
+ VPTESTMW (SI), Y14, K7, K6 // 62f28d2f2636
+ VPTESTMW 7(SI)(DI*2), Y14, K7, K6 // 62f28d2f26b47e07000000
+ VPTESTMW Z1, Z6, K2, K4 // 62f2cd4a26e1
+ VPTESTMW Z15, Z6, K2, K4 // 62d2cd4a26e7
+ VPTESTMW 7(AX), Z6, K2, K4 // 62f2cd4a26a007000000
+ VPTESTMW (DI), Z6, K2, K4 // 62f2cd4a2627
+ VPTESTMW Z1, Z22, K2, K4 // 62f2cd4226e1
+ VPTESTMW Z15, Z22, K2, K4 // 62d2cd4226e7
+ VPTESTMW 7(AX), Z22, K2, K4 // 62f2cd4226a007000000
+ VPTESTMW (DI), Z22, K2, K4 // 62f2cd422627
+ VPTESTMW Z1, Z6, K2, K5 // 62f2cd4a26e9
+ VPTESTMW Z15, Z6, K2, K5 // 62d2cd4a26ef
+ VPTESTMW 7(AX), Z6, K2, K5 // 62f2cd4a26a807000000
+ VPTESTMW (DI), Z6, K2, K5 // 62f2cd4a262f
+ VPTESTMW Z1, Z22, K2, K5 // 62f2cd4226e9
+ VPTESTMW Z15, Z22, K2, K5 // 62d2cd4226ef
+ VPTESTMW 7(AX), Z22, K2, K5 // 62f2cd4226a807000000
+ VPTESTMW (DI), Z22, K2, K5 // 62f2cd42262f
+ VPTESTNMB X18, X26, K5, K2 // 62b22e0526d2
+ VPTESTNMB 15(R8)(R14*1), X26, K5, K2 // 62922e052694300f000000
+ VPTESTNMB 15(R8)(R14*2), X26, K5, K2 // 62922e052694700f000000
+ VPTESTNMB X18, X26, K5, K7 // 62b22e0526fa
+ VPTESTNMB 15(R8)(R14*1), X26, K5, K7 // 62922e0526bc300f000000
+ VPTESTNMB 15(R8)(R14*2), X26, K5, K7 // 62922e0526bc700f000000
+ VPTESTNMB Y11, Y20, K3, K0 // 62d25e2326c3
+ VPTESTNMB 17(SP)(BP*8), Y20, K3, K0 // 62f25e232684ec11000000
+ VPTESTNMB 17(SP)(BP*4), Y20, K3, K0 // 62f25e232684ac11000000
+ VPTESTNMB Y11, Y20, K3, K5 // 62d25e2326eb
+ VPTESTNMB 17(SP)(BP*8), Y20, K3, K5 // 62f25e2326acec11000000
+ VPTESTNMB 17(SP)(BP*4), Y20, K3, K5 // 62f25e2326acac11000000
+ VPTESTNMB Z18, Z13, K4, K6 // 62b2164c26f2
+ VPTESTNMB Z8, Z13, K4, K6 // 62d2164c26f0
+ VPTESTNMB 99(R15)(R15*1), Z13, K4, K6 // 6292164c26b43f63000000
+ VPTESTNMB (DX), Z13, K4, K6 // 62f2164c2632
+ VPTESTNMB Z18, Z13, K4, K5 // 62b2164c26ea
+ VPTESTNMB Z8, Z13, K4, K5 // 62d2164c26e8
+ VPTESTNMB 99(R15)(R15*1), Z13, K4, K5 // 6292164c26ac3f63000000
+ VPTESTNMB (DX), Z13, K4, K5 // 62f2164c262a
+ VPTESTNMW X7, X3, K1, K5 // 62f2e60926ef
+ VPTESTNMW (CX), X3, K1, K5 // 62f2e6092629
+ VPTESTNMW 99(R15), X3, K1, K5 // 62d2e60926af63000000
+ VPTESTNMW X7, X3, K1, K4 // 62f2e60926e7
+ VPTESTNMW (CX), X3, K1, K4 // 62f2e6092621
+ VPTESTNMW 99(R15), X3, K1, K4 // 62d2e60926a763000000
+ VPTESTNMW Y20, Y20, K2, K4 // 62b2de2226e4
+ VPTESTNMW 7(AX), Y20, K2, K4 // 62f2de2226a007000000
+ VPTESTNMW (DI), Y20, K2, K4 // 62f2de222627
+ VPTESTNMW Y20, Y20, K2, K6 // 62b2de2226f4
+ VPTESTNMW 7(AX), Y20, K2, K6 // 62f2de2226b007000000
+ VPTESTNMW (DI), Y20, K2, K6 // 62f2de222637
+ VPTESTNMW Z28, Z12, K1, K1 // 62929e4926cc
+ VPTESTNMW Z13, Z12, K1, K1 // 62d29e4926cd
+ VPTESTNMW 7(SI)(DI*1), Z12, K1, K1 // 62f29e49268c3e07000000
+ VPTESTNMW 15(DX)(BX*8), Z12, K1, K1 // 62f29e49268cda0f000000
+ VPTESTNMW Z28, Z16, K1, K1 // 6292fe4126cc
+ VPTESTNMW Z13, Z16, K1, K1 // 62d2fe4126cd
+ VPTESTNMW 7(SI)(DI*1), Z16, K1, K1 // 62f2fe41268c3e07000000
+ VPTESTNMW 15(DX)(BX*8), Z16, K1, K1 // 62f2fe41268cda0f000000
+ VPTESTNMW Z28, Z12, K1, K3 // 62929e4926dc
+ VPTESTNMW Z13, Z12, K1, K3 // 62d29e4926dd
+ VPTESTNMW 7(SI)(DI*1), Z12, K1, K3 // 62f29e49269c3e07000000
+ VPTESTNMW 15(DX)(BX*8), Z12, K1, K3 // 62f29e49269cda0f000000
+ VPTESTNMW Z28, Z16, K1, K3 // 6292fe4126dc
+ VPTESTNMW Z13, Z16, K1, K3 // 62d2fe4126dd
+ VPTESTNMW 7(SI)(DI*1), Z16, K1, K3 // 62f2fe41269c3e07000000
+ VPTESTNMW 15(DX)(BX*8), Z16, K1, K3 // 62f2fe41269cda0f000000
+ VPUNPCKHBW X24, X0, K7, X0 // 62917d0f68c0 or 6291fd0f68c0
+ VPUNPCKHBW 99(R15)(R15*2), X0, K7, X0 // 62917d0f68847f63000000 or 6291fd0f68847f63000000
+ VPUNPCKHBW -7(DI), X0, K7, X0 // 62f17d0f6887f9ffffff or 62f1fd0f6887f9ffffff
+ VPUNPCKHBW Y28, Y28, K1, Y9 // 62111d2168cc or 62119d2168cc
+ VPUNPCKHBW 99(R15)(R15*1), Y28, K1, Y9 // 62111d21688c3f63000000 or 62119d21688c3f63000000
+ VPUNPCKHBW (DX), Y28, K1, Y9 // 62711d21680a or 62719d21680a
+ VPUNPCKHBW Z15, Z3, K1, Z14 // 6251654968f7 or 6251e54968f7
+ VPUNPCKHBW Z30, Z3, K1, Z14 // 6211654968f6 or 6211e54968f6
+ VPUNPCKHBW -7(DI)(R8*1), Z3, K1, Z14 // 6231654968b407f9ffffff or 6231e54968b407f9ffffff
+ VPUNPCKHBW (SP), Z3, K1, Z14 // 62716549683424 or 6271e549683424
+ VPUNPCKHBW Z15, Z12, K1, Z14 // 62511d4968f7 or 62519d4968f7
+ VPUNPCKHBW Z30, Z12, K1, Z14 // 62111d4968f6 or 62119d4968f6
+ VPUNPCKHBW -7(DI)(R8*1), Z12, K1, Z14 // 62311d4968b407f9ffffff or 62319d4968b407f9ffffff
+ VPUNPCKHBW (SP), Z12, K1, Z14 // 62711d49683424 or 62719d49683424
+ VPUNPCKHBW Z15, Z3, K1, Z28 // 6241654968e7 or 6241e54968e7
+ VPUNPCKHBW Z30, Z3, K1, Z28 // 6201654968e6 or 6201e54968e6
+ VPUNPCKHBW -7(DI)(R8*1), Z3, K1, Z28 // 6221654968a407f9ffffff or 6221e54968a407f9ffffff
+ VPUNPCKHBW (SP), Z3, K1, Z28 // 62616549682424 or 6261e549682424
+ VPUNPCKHBW Z15, Z12, K1, Z28 // 62411d4968e7 or 62419d4968e7
+ VPUNPCKHBW Z30, Z12, K1, Z28 // 62011d4968e6 or 62019d4968e6
+ VPUNPCKHBW -7(DI)(R8*1), Z12, K1, Z28 // 62211d4968a407f9ffffff or 62219d4968a407f9ffffff
+ VPUNPCKHBW (SP), Z12, K1, Z28 // 62611d49682424 or 62619d49682424
+ VPUNPCKHWD X21, X3, K4, X31 // 6221650c69fd or 6221e50c69fd
+ VPUNPCKHWD -17(BP), X3, K4, X31 // 6261650c69bdefffffff or 6261e50c69bdefffffff
+ VPUNPCKHWD -15(R14)(R15*8), X3, K4, X31 // 6201650c69bcfef1ffffff or 6201e50c69bcfef1ffffff
+ VPUNPCKHWD Y26, Y6, K5, Y12 // 62114d2d69e2 or 6211cd2d69e2
+ VPUNPCKHWD 7(SI)(DI*1), Y6, K5, Y12 // 62714d2d69a43e07000000 or 6271cd2d69a43e07000000
+ VPUNPCKHWD 15(DX)(BX*8), Y6, K5, Y12 // 62714d2d69a4da0f000000 or 6271cd2d69a4da0f000000
+ VPUNPCKHWD Z0, Z23, K7, Z20 // 62e1454769e0 or 62e1c54769e0
+ VPUNPCKHWD Z11, Z23, K7, Z20 // 62c1454769e3 or 62c1c54769e3
+ VPUNPCKHWD (AX), Z23, K7, Z20 // 62e145476920 or 62e1c5476920
+ VPUNPCKHWD 7(SI), Z23, K7, Z20 // 62e1454769a607000000 or 62e1c54769a607000000
+ VPUNPCKHWD Z0, Z19, K7, Z20 // 62e1654769e0 or 62e1e54769e0
+ VPUNPCKHWD Z11, Z19, K7, Z20 // 62c1654769e3 or 62c1e54769e3
+ VPUNPCKHWD (AX), Z19, K7, Z20 // 62e165476920 or 62e1e5476920
+ VPUNPCKHWD 7(SI), Z19, K7, Z20 // 62e1654769a607000000 or 62e1e54769a607000000
+ VPUNPCKHWD Z0, Z23, K7, Z0 // 62f1454769c0 or 62f1c54769c0
+ VPUNPCKHWD Z11, Z23, K7, Z0 // 62d1454769c3 or 62d1c54769c3
+ VPUNPCKHWD (AX), Z23, K7, Z0 // 62f145476900 or 62f1c5476900
+ VPUNPCKHWD 7(SI), Z23, K7, Z0 // 62f14547698607000000 or 62f1c547698607000000
+ VPUNPCKHWD Z0, Z19, K7, Z0 // 62f1654769c0 or 62f1e54769c0
+ VPUNPCKHWD Z11, Z19, K7, Z0 // 62d1654769c3 or 62d1e54769c3
+ VPUNPCKHWD (AX), Z19, K7, Z0 // 62f165476900 or 62f1e5476900
+ VPUNPCKHWD 7(SI), Z19, K7, Z0 // 62f16547698607000000 or 62f1e547698607000000
+ VPUNPCKLBW X13, X11, K7, X1 // 62d1250f60cd or 62d1a50f60cd
+ VPUNPCKLBW 17(SP)(BP*2), X11, K7, X1 // 62f1250f608c6c11000000 or 62f1a50f608c6c11000000
+ VPUNPCKLBW -7(DI)(R8*4), X11, K7, X1 // 62b1250f608c87f9ffffff or 62b1a50f608c87f9ffffff
+ VPUNPCKLBW Y28, Y8, K6, Y3 // 62913d2e60dc or 6291bd2e60dc
+ VPUNPCKLBW -7(DI)(R8*1), Y8, K6, Y3 // 62b13d2e609c07f9ffffff or 62b1bd2e609c07f9ffffff
+ VPUNPCKLBW (SP), Y8, K6, Y3 // 62f13d2e601c24 or 62f1bd2e601c24
+ VPUNPCKLBW Z0, Z24, K3, Z0 // 62f13d4360c0 or 62f1bd4360c0
+ VPUNPCKLBW Z26, Z24, K3, Z0 // 62913d4360c2 or 6291bd4360c2
+ VPUNPCKLBW (BX), Z24, K3, Z0 // 62f13d436003 or 62f1bd436003
+ VPUNPCKLBW -17(BP)(SI*1), Z24, K3, Z0 // 62f13d43608435efffffff or 62f1bd43608435efffffff
+ VPUNPCKLBW Z0, Z12, K3, Z0 // 62f11d4b60c0 or 62f19d4b60c0
+ VPUNPCKLBW Z26, Z12, K3, Z0 // 62911d4b60c2 or 62919d4b60c2
+ VPUNPCKLBW (BX), Z12, K3, Z0 // 62f11d4b6003 or 62f19d4b6003
+ VPUNPCKLBW -17(BP)(SI*1), Z12, K3, Z0 // 62f11d4b608435efffffff or 62f19d4b608435efffffff
+ VPUNPCKLBW Z0, Z24, K3, Z25 // 62613d4360c8 or 6261bd4360c8
+ VPUNPCKLBW Z26, Z24, K3, Z25 // 62013d4360ca or 6201bd4360ca
+ VPUNPCKLBW (BX), Z24, K3, Z25 // 62613d43600b or 6261bd43600b
+ VPUNPCKLBW -17(BP)(SI*1), Z24, K3, Z25 // 62613d43608c35efffffff or 6261bd43608c35efffffff
+ VPUNPCKLBW Z0, Z12, K3, Z25 // 62611d4b60c8 or 62619d4b60c8
+ VPUNPCKLBW Z26, Z12, K3, Z25 // 62011d4b60ca or 62019d4b60ca
+ VPUNPCKLBW (BX), Z12, K3, Z25 // 62611d4b600b or 62619d4b600b
+ VPUNPCKLBW -17(BP)(SI*1), Z12, K3, Z25 // 62611d4b608c35efffffff or 62619d4b608c35efffffff
+ VPUNPCKLWD X8, X8, K3, X19 // 62c13d0b61d8 or 62c1bd0b61d8
+ VPUNPCKLWD -15(R14)(R15*1), X8, K3, X19 // 62813d0b619c3ef1ffffff or 6281bd0b619c3ef1ffffff
+ VPUNPCKLWD -15(BX), X8, K3, X19 // 62e13d0b619bf1ffffff or 62e1bd0b619bf1ffffff
+ VPUNPCKLWD Y8, Y27, K4, Y22 // 62c1252461f0 or 62c1a52461f0
+ VPUNPCKLWD (AX), Y27, K4, Y22 // 62e125246130 or 62e1a5246130
+ VPUNPCKLWD 7(SI), Y27, K4, Y22 // 62e1252461b607000000 or 62e1a52461b607000000
+ VPUNPCKLWD Z6, Z21, K2, Z31 // 6261554261fe or 6261d54261fe
+ VPUNPCKLWD Z9, Z21, K2, Z31 // 6241554261f9 or 6241d54261f9
+ VPUNPCKLWD 17(SP)(BP*1), Z21, K2, Z31 // 6261554261bc2c11000000 or 6261d54261bc2c11000000
+ VPUNPCKLWD -7(CX)(DX*8), Z21, K2, Z31 // 6261554261bcd1f9ffffff or 6261d54261bcd1f9ffffff
+ VPUNPCKLWD Z6, Z9, K2, Z31 // 6261354a61fe or 6261b54a61fe
+ VPUNPCKLWD Z9, Z9, K2, Z31 // 6241354a61f9 or 6241b54a61f9
+ VPUNPCKLWD 17(SP)(BP*1), Z9, K2, Z31 // 6261354a61bc2c11000000 or 6261b54a61bc2c11000000
+ VPUNPCKLWD -7(CX)(DX*8), Z9, K2, Z31 // 6261354a61bcd1f9ffffff or 6261b54a61bcd1f9ffffff
+ VPUNPCKLWD Z6, Z21, K2, Z0 // 62f1554261c6 or 62f1d54261c6
+ VPUNPCKLWD Z9, Z21, K2, Z0 // 62d1554261c1 or 62d1d54261c1
+ VPUNPCKLWD 17(SP)(BP*1), Z21, K2, Z0 // 62f1554261842c11000000 or 62f1d54261842c11000000
+ VPUNPCKLWD -7(CX)(DX*8), Z21, K2, Z0 // 62f155426184d1f9ffffff or 62f1d5426184d1f9ffffff
+ VPUNPCKLWD Z6, Z9, K2, Z0 // 62f1354a61c6 or 62f1b54a61c6
+ VPUNPCKLWD Z9, Z9, K2, Z0 // 62d1354a61c1 or 62d1b54a61c1
+ VPUNPCKLWD 17(SP)(BP*1), Z9, K2, Z0 // 62f1354a61842c11000000 or 62f1b54a61842c11000000
+ VPUNPCKLWD -7(CX)(DX*8), Z9, K2, Z0 // 62f1354a6184d1f9ffffff or 62f1b54a6184d1f9ffffff
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/avx512cd.s b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512cd.s
new file mode 100644
index 0000000..9b8b9fd
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512cd.s
@@ -0,0 +1,190 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_avx512cd(SB), NOSPLIT, $0
+ VPBROADCASTMB2Q K1, X25 // 6262fe082ac9
+ VPBROADCASTMB2Q K5, X25 // 6262fe082acd
+ VPBROADCASTMB2Q K1, X11 // 6272fe082ad9
+ VPBROADCASTMB2Q K5, X11 // 6272fe082add
+ VPBROADCASTMB2Q K1, X17 // 62e2fe082ac9
+ VPBROADCASTMB2Q K5, X17 // 62e2fe082acd
+ VPBROADCASTMB2Q K3, Y0 // 62f2fe282ac3
+ VPBROADCASTMB2Q K1, Y0 // 62f2fe282ac1
+ VPBROADCASTMB2Q K3, Y19 // 62e2fe282adb
+ VPBROADCASTMB2Q K1, Y19 // 62e2fe282ad9
+ VPBROADCASTMB2Q K3, Y31 // 6262fe282afb
+ VPBROADCASTMB2Q K1, Y31 // 6262fe282af9
+ VPBROADCASTMB2Q K5, Z21 // 62e2fe482aed
+ VPBROADCASTMB2Q K4, Z21 // 62e2fe482aec
+ VPBROADCASTMB2Q K5, Z8 // 6272fe482ac5
+ VPBROADCASTMB2Q K4, Z8 // 6272fe482ac4
+ VPBROADCASTMW2D K7, X18 // 62e27e083ad7
+ VPBROADCASTMW2D K6, X18 // 62e27e083ad6
+ VPBROADCASTMW2D K7, X11 // 62727e083adf
+ VPBROADCASTMW2D K6, X11 // 62727e083ade
+ VPBROADCASTMW2D K7, X9 // 62727e083acf
+ VPBROADCASTMW2D K6, X9 // 62727e083ace
+ VPBROADCASTMW2D K4, Y22 // 62e27e283af4
+ VPBROADCASTMW2D K6, Y22 // 62e27e283af6
+ VPBROADCASTMW2D K4, Y9 // 62727e283acc
+ VPBROADCASTMW2D K6, Y9 // 62727e283ace
+ VPBROADCASTMW2D K4, Y23 // 62e27e283afc
+ VPBROADCASTMW2D K6, Y23 // 62e27e283afe
+ VPBROADCASTMW2D K0, Z16 // 62e27e483ac0
+ VPBROADCASTMW2D K7, Z16 // 62e27e483ac7
+ VPBROADCASTMW2D K0, Z9 // 62727e483ac8
+ VPBROADCASTMW2D K7, Z9 // 62727e483acf
+ VPCONFLICTD X6, K6, X6 // 62f27d0ec4f6
+ VPCONFLICTD X1, K6, X6 // 62f27d0ec4f1
+ VPCONFLICTD X8, K6, X6 // 62d27d0ec4f0
+ VPCONFLICTD 15(R8), K6, X6 // 62d27d0ec4b00f000000
+ VPCONFLICTD (BP), K6, X6 // 62f27d0ec47500
+ VPCONFLICTD X6, K6, X17 // 62e27d0ec4ce
+ VPCONFLICTD X1, K6, X17 // 62e27d0ec4c9
+ VPCONFLICTD X8, K6, X17 // 62c27d0ec4c8
+ VPCONFLICTD 15(R8), K6, X17 // 62c27d0ec4880f000000
+ VPCONFLICTD (BP), K6, X17 // 62e27d0ec44d00
+ VPCONFLICTD X6, K6, X28 // 62627d0ec4e6
+ VPCONFLICTD X1, K6, X28 // 62627d0ec4e1
+ VPCONFLICTD X8, K6, X28 // 62427d0ec4e0
+ VPCONFLICTD 15(R8), K6, X28 // 62427d0ec4a00f000000
+ VPCONFLICTD (BP), K6, X28 // 62627d0ec46500
+ VPCONFLICTD Y14, K3, Y2 // 62d27d2bc4d6
+ VPCONFLICTD Y8, K3, Y2 // 62d27d2bc4d0
+ VPCONFLICTD Y20, K3, Y2 // 62b27d2bc4d4
+ VPCONFLICTD -7(CX), K3, Y2 // 62f27d2bc491f9ffffff
+ VPCONFLICTD 15(DX)(BX*4), K3, Y2 // 62f27d2bc4949a0f000000
+ VPCONFLICTD Y14, K3, Y7 // 62d27d2bc4fe
+ VPCONFLICTD Y8, K3, Y7 // 62d27d2bc4f8
+ VPCONFLICTD Y20, K3, Y7 // 62b27d2bc4fc
+ VPCONFLICTD -7(CX), K3, Y7 // 62f27d2bc4b9f9ffffff
+ VPCONFLICTD 15(DX)(BX*4), K3, Y7 // 62f27d2bc4bc9a0f000000
+ VPCONFLICTD Y14, K3, Y21 // 62c27d2bc4ee
+ VPCONFLICTD Y8, K3, Y21 // 62c27d2bc4e8
+ VPCONFLICTD Y20, K3, Y21 // 62a27d2bc4ec
+ VPCONFLICTD -7(CX), K3, Y21 // 62e27d2bc4a9f9ffffff
+ VPCONFLICTD 15(DX)(BX*4), K3, Y21 // 62e27d2bc4ac9a0f000000
+ VPCONFLICTD Z11, K7, Z21 // 62c27d4fc4eb
+ VPCONFLICTD Z25, K7, Z21 // 62827d4fc4e9
+ VPCONFLICTD -15(R14)(R15*1), K7, Z21 // 62827d4fc4ac3ef1ffffff
+ VPCONFLICTD -15(BX), K7, Z21 // 62e27d4fc4abf1ffffff
+ VPCONFLICTD Z11, K7, Z13 // 62527d4fc4eb
+ VPCONFLICTD Z25, K7, Z13 // 62127d4fc4e9
+ VPCONFLICTD -15(R14)(R15*1), K7, Z13 // 62127d4fc4ac3ef1ffffff
+ VPCONFLICTD -15(BX), K7, Z13 // 62727d4fc4abf1ffffff
+ VPCONFLICTQ X11, K4, X8 // 6252fd0cc4c3
+ VPCONFLICTQ X16, K4, X8 // 6232fd0cc4c0
+ VPCONFLICTQ X6, K4, X8 // 6272fd0cc4c6
+ VPCONFLICTQ 15(R8)(R14*8), K4, X8 // 6212fd0cc484f00f000000
+ VPCONFLICTQ -15(R14)(R15*2), K4, X8 // 6212fd0cc4847ef1ffffff
+ VPCONFLICTQ X11, K4, X6 // 62d2fd0cc4f3
+ VPCONFLICTQ X16, K4, X6 // 62b2fd0cc4f0
+ VPCONFLICTQ X6, K4, X6 // 62f2fd0cc4f6
+ VPCONFLICTQ 15(R8)(R14*8), K4, X6 // 6292fd0cc4b4f00f000000
+ VPCONFLICTQ -15(R14)(R15*2), K4, X6 // 6292fd0cc4b47ef1ffffff
+ VPCONFLICTQ X11, K4, X0 // 62d2fd0cc4c3
+ VPCONFLICTQ X16, K4, X0 // 62b2fd0cc4c0
+ VPCONFLICTQ X6, K4, X0 // 62f2fd0cc4c6
+ VPCONFLICTQ 15(R8)(R14*8), K4, X0 // 6292fd0cc484f00f000000
+ VPCONFLICTQ -15(R14)(R15*2), K4, X0 // 6292fd0cc4847ef1ffffff
+ VPCONFLICTQ Y5, K4, Y11 // 6272fd2cc4dd
+ VPCONFLICTQ Y18, K4, Y11 // 6232fd2cc4da
+ VPCONFLICTQ Y20, K4, Y11 // 6232fd2cc4dc
+ VPCONFLICTQ 99(R15)(R15*8), K4, Y11 // 6212fd2cc49cff63000000
+ VPCONFLICTQ 7(AX)(CX*8), K4, Y11 // 6272fd2cc49cc807000000
+ VPCONFLICTQ Y5, K4, Y24 // 6262fd2cc4c5
+ VPCONFLICTQ Y18, K4, Y24 // 6222fd2cc4c2
+ VPCONFLICTQ Y20, K4, Y24 // 6222fd2cc4c4
+ VPCONFLICTQ 99(R15)(R15*8), K4, Y24 // 6202fd2cc484ff63000000
+ VPCONFLICTQ 7(AX)(CX*8), K4, Y24 // 6262fd2cc484c807000000
+ VPCONFLICTQ Y5, K4, Y1 // 62f2fd2cc4cd
+ VPCONFLICTQ Y18, K4, Y1 // 62b2fd2cc4ca
+ VPCONFLICTQ Y20, K4, Y1 // 62b2fd2cc4cc
+ VPCONFLICTQ 99(R15)(R15*8), K4, Y1 // 6292fd2cc48cff63000000
+ VPCONFLICTQ 7(AX)(CX*8), K4, Y1 // 62f2fd2cc48cc807000000
+ VPCONFLICTQ Z27, K7, Z3 // 6292fd4fc4db
+ VPCONFLICTQ Z15, K7, Z3 // 62d2fd4fc4df
+ VPCONFLICTQ 7(AX)(CX*4), K7, Z3 // 62f2fd4fc49c8807000000
+ VPCONFLICTQ 7(AX)(CX*1), K7, Z3 // 62f2fd4fc49c0807000000
+ VPCONFLICTQ Z27, K7, Z12 // 6212fd4fc4e3
+ VPCONFLICTQ Z15, K7, Z12 // 6252fd4fc4e7
+ VPCONFLICTQ 7(AX)(CX*4), K7, Z12 // 6272fd4fc4a48807000000
+ VPCONFLICTQ 7(AX)(CX*1), K7, Z12 // 6272fd4fc4a40807000000
+ VPLZCNTD X3, K3, X17 // 62e27d0b44cb
+ VPLZCNTD X26, K3, X17 // 62827d0b44ca
+ VPLZCNTD X23, K3, X17 // 62a27d0b44cf
+ VPLZCNTD 15(DX)(BX*1), K3, X17 // 62e27d0b448c1a0f000000
+ VPLZCNTD -7(CX)(DX*2), K3, X17 // 62e27d0b448c51f9ffffff
+ VPLZCNTD X3, K3, X15 // 62727d0b44fb
+ VPLZCNTD X26, K3, X15 // 62127d0b44fa
+ VPLZCNTD X23, K3, X15 // 62327d0b44ff
+ VPLZCNTD 15(DX)(BX*1), K3, X15 // 62727d0b44bc1a0f000000
+ VPLZCNTD -7(CX)(DX*2), K3, X15 // 62727d0b44bc51f9ffffff
+ VPLZCNTD X3, K3, X8 // 62727d0b44c3
+ VPLZCNTD X26, K3, X8 // 62127d0b44c2
+ VPLZCNTD X23, K3, X8 // 62327d0b44c7
+ VPLZCNTD 15(DX)(BX*1), K3, X8 // 62727d0b44841a0f000000
+ VPLZCNTD -7(CX)(DX*2), K3, X8 // 62727d0b448451f9ffffff
+ VPLZCNTD Y5, K3, Y20 // 62e27d2b44e5
+ VPLZCNTD Y28, K3, Y20 // 62827d2b44e4
+ VPLZCNTD Y7, K3, Y20 // 62e27d2b44e7
+ VPLZCNTD (BX), K3, Y20 // 62e27d2b4423
+ VPLZCNTD -17(BP)(SI*1), K3, Y20 // 62e27d2b44a435efffffff
+ VPLZCNTD Y5, K3, Y12 // 62727d2b44e5
+ VPLZCNTD Y28, K3, Y12 // 62127d2b44e4
+ VPLZCNTD Y7, K3, Y12 // 62727d2b44e7
+ VPLZCNTD (BX), K3, Y12 // 62727d2b4423
+ VPLZCNTD -17(BP)(SI*1), K3, Y12 // 62727d2b44a435efffffff
+ VPLZCNTD Y5, K3, Y3 // 62f27d2b44dd
+ VPLZCNTD Y28, K3, Y3 // 62927d2b44dc
+ VPLZCNTD Y7, K3, Y3 // 62f27d2b44df
+ VPLZCNTD (BX), K3, Y3 // 62f27d2b441b
+ VPLZCNTD -17(BP)(SI*1), K3, Y3 // 62f27d2b449c35efffffff
+ VPLZCNTD Z21, K3, Z3 // 62b27d4b44dd
+ VPLZCNTD Z13, K3, Z3 // 62d27d4b44dd
+ VPLZCNTD 17(SP)(BP*8), K3, Z3 // 62f27d4b449cec11000000
+ VPLZCNTD 17(SP)(BP*4), K3, Z3 // 62f27d4b449cac11000000
+ VPLZCNTD Z21, K3, Z0 // 62b27d4b44c5
+ VPLZCNTD Z13, K3, Z0 // 62d27d4b44c5
+ VPLZCNTD 17(SP)(BP*8), K3, Z0 // 62f27d4b4484ec11000000
+ VPLZCNTD 17(SP)(BP*4), K3, Z0 // 62f27d4b4484ac11000000
+ VPLZCNTQ X9, K2, X13 // 6252fd0a44e9
+ VPLZCNTQ X15, K2, X13 // 6252fd0a44ef
+ VPLZCNTQ X26, K2, X13 // 6212fd0a44ea
+ VPLZCNTQ -17(BP), K2, X13 // 6272fd0a44adefffffff
+ VPLZCNTQ -15(R14)(R15*8), K2, X13 // 6212fd0a44acfef1ffffff
+ VPLZCNTQ X9, K2, X28 // 6242fd0a44e1
+ VPLZCNTQ X15, K2, X28 // 6242fd0a44e7
+ VPLZCNTQ X26, K2, X28 // 6202fd0a44e2
+ VPLZCNTQ -17(BP), K2, X28 // 6262fd0a44a5efffffff
+ VPLZCNTQ -15(R14)(R15*8), K2, X28 // 6202fd0a44a4fef1ffffff
+ VPLZCNTQ X9, K2, X24 // 6242fd0a44c1
+ VPLZCNTQ X15, K2, X24 // 6242fd0a44c7
+ VPLZCNTQ X26, K2, X24 // 6202fd0a44c2
+ VPLZCNTQ -17(BP), K2, X24 // 6262fd0a4485efffffff
+ VPLZCNTQ -15(R14)(R15*8), K2, X24 // 6202fd0a4484fef1ffffff
+ VPLZCNTQ Y12, K1, Y0 // 62d2fd2944c4
+ VPLZCNTQ Y1, K1, Y0 // 62f2fd2944c1
+ VPLZCNTQ Y14, K1, Y0 // 62d2fd2944c6
+ VPLZCNTQ 15(R8)(R14*4), K1, Y0 // 6292fd294484b00f000000
+ VPLZCNTQ -7(CX)(DX*4), K1, Y0 // 62f2fd29448491f9ffffff
+ VPLZCNTQ Y12, K1, Y22 // 62c2fd2944f4
+ VPLZCNTQ Y1, K1, Y22 // 62e2fd2944f1
+ VPLZCNTQ Y14, K1, Y22 // 62c2fd2944f6
+ VPLZCNTQ 15(R8)(R14*4), K1, Y22 // 6282fd2944b4b00f000000
+ VPLZCNTQ -7(CX)(DX*4), K1, Y22 // 62e2fd2944b491f9ffffff
+ VPLZCNTQ Y12, K1, Y13 // 6252fd2944ec
+ VPLZCNTQ Y1, K1, Y13 // 6272fd2944e9
+ VPLZCNTQ Y14, K1, Y13 // 6252fd2944ee
+ VPLZCNTQ 15(R8)(R14*4), K1, Y13 // 6212fd2944acb00f000000
+ VPLZCNTQ -7(CX)(DX*4), K1, Y13 // 6272fd2944ac91f9ffffff
+ VPLZCNTQ Z3, K2, Z11 // 6272fd4a44db
+ VPLZCNTQ Z12, K2, Z11 // 6252fd4a44dc
+ VPLZCNTQ 7(SI)(DI*4), K2, Z11 // 6272fd4a449cbe07000000
+ VPLZCNTQ -7(DI)(R8*2), K2, Z11 // 6232fd4a449c47f9ffffff
+ VPLZCNTQ Z3, K2, Z25 // 6262fd4a44cb
+ VPLZCNTQ Z12, K2, Z25 // 6242fd4a44cc
+ VPLZCNTQ 7(SI)(DI*4), K2, Z25 // 6262fd4a448cbe07000000
+ VPLZCNTQ -7(DI)(R8*2), K2, Z25 // 6222fd4a448c47f9ffffff
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/avx512dq.s b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512dq.s
new file mode 100644
index 0000000..9861f4a
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512dq.s
@@ -0,0 +1,2668 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_avx512dq(SB), NOSPLIT, $0
+ KADDB K3, K1, K6 // c5f54af3
+ KADDB K1, K1, K6 // c5f54af1
+ KADDB K3, K5, K6 // c5d54af3
+ KADDB K1, K5, K6 // c5d54af1
+ KADDB K3, K1, K5 // c5f54aeb
+ KADDB K1, K1, K5 // c5f54ae9
+ KADDB K3, K5, K5 // c5d54aeb
+ KADDB K1, K5, K5 // c5d54ae9
+ KADDW K6, K6, K1 // c5cc4ace
+ KADDW K4, K6, K1 // c5cc4acc
+ KADDW K6, K7, K1 // c5c44ace
+ KADDW K4, K7, K1 // c5c44acc
+ KADDW K6, K6, K3 // c5cc4ade
+ KADDW K4, K6, K3 // c5cc4adc
+ KADDW K6, K7, K3 // c5c44ade
+ KADDW K4, K7, K3 // c5c44adc
+ KANDB K2, K4, K4 // c5dd41e2
+ KANDB K7, K4, K4 // c5dd41e7
+ KANDB K2, K5, K4 // c5d541e2
+ KANDB K7, K5, K4 // c5d541e7
+ KANDB K2, K4, K6 // c5dd41f2
+ KANDB K7, K4, K6 // c5dd41f7
+ KANDB K2, K5, K6 // c5d541f2
+ KANDB K7, K5, K6 // c5d541f7
+ KANDNB K7, K5, K3 // c5d542df
+ KANDNB K6, K5, K3 // c5d542de
+ KANDNB K7, K4, K3 // c5dd42df
+ KANDNB K6, K4, K3 // c5dd42de
+ KANDNB K7, K5, K1 // c5d542cf
+ KANDNB K6, K5, K1 // c5d542ce
+ KANDNB K7, K4, K1 // c5dd42cf
+ KANDNB K6, K4, K1 // c5dd42ce
+ KMOVB K7, 17(SP) // c5f9917c2411
+ KMOVB K6, 17(SP) // c5f991742411
+ KMOVB K7, -17(BP)(SI*4) // c5f9917cb5ef
+ KMOVB K6, -17(BP)(SI*4) // c5f99174b5ef
+ KMOVB K4, AX // c5f993c4
+ KMOVB K6, AX // c5f993c6
+ KMOVB K4, R9 // c57993cc
+ KMOVB K6, R9 // c57993ce
+ KMOVB K5, K0 // c5f990c5
+ KMOVB K4, K0 // c5f990c4
+ KMOVB 7(AX), K0 // c5f9904007
+ KMOVB (DI), K0 // c5f99007
+ KMOVB K5, K7 // c5f990fd
+ KMOVB K4, K7 // c5f990fc
+ KMOVB 7(AX), K7 // c5f9907807
+ KMOVB (DI), K7 // c5f9903f
+ KMOVB CX, K4 // c5f992e1
+ KMOVB SP, K4 // c5f992e4
+ KMOVB CX, K6 // c5f992f1
+ KMOVB SP, K6 // c5f992f4
+ KNOTB K1, K4 // c5f944e1
+ KNOTB K3, K4 // c5f944e3
+ KNOTB K1, K6 // c5f944f1
+ KNOTB K3, K6 // c5f944f3
+ KORB K3, K1, K6 // c5f545f3
+ KORB K1, K1, K6 // c5f545f1
+ KORB K3, K5, K6 // c5d545f3
+ KORB K1, K5, K6 // c5d545f1
+ KORB K3, K1, K5 // c5f545eb
+ KORB K1, K1, K5 // c5f545e9
+ KORB K3, K5, K5 // c5d545eb
+ KORB K1, K5, K5 // c5d545e9
+ KORTESTB K6, K1 // c5f998ce
+ KORTESTB K7, K1 // c5f998cf
+ KORTESTB K6, K3 // c5f998de
+ KORTESTB K7, K3 // c5f998df
+ KSHIFTLB $127, K4, K7 // c4e37932fc7f
+ KSHIFTLB $127, K6, K7 // c4e37932fe7f
+ KSHIFTLB $127, K4, K6 // c4e37932f47f
+ KSHIFTLB $127, K6, K6 // c4e37932f67f
+ KSHIFTRB $42, K4, K4 // c4e37930e42a
+ KSHIFTRB $42, K5, K4 // c4e37930e52a
+ KSHIFTRB $42, K4, K6 // c4e37930f42a
+ KSHIFTRB $42, K5, K6 // c4e37930f52a
+ KTESTB K4, K7 // c5f999fc
+ KTESTB K6, K7 // c5f999fe
+ KTESTB K4, K6 // c5f999f4
+ KTESTB K6, K6 // c5f999f6
+ KTESTW K6, K6 // c5f899f6
+ KTESTW K4, K6 // c5f899f4
+ KTESTW K6, K7 // c5f899fe
+ KTESTW K4, K7 // c5f899fc
+ KXNORB K5, K0, K4 // c5fd46e5
+ KXNORB K4, K0, K4 // c5fd46e4
+ KXNORB K5, K7, K4 // c5c546e5
+ KXNORB K4, K7, K4 // c5c546e4
+ KXNORB K5, K0, K6 // c5fd46f5
+ KXNORB K4, K0, K6 // c5fd46f4
+ KXNORB K5, K7, K6 // c5c546f5
+ KXNORB K4, K7, K6 // c5c546f4
+ KXORB K5, K3, K1 // c5e547cd
+ KXORB K4, K3, K1 // c5e547cc
+ KXORB K5, K1, K1 // c5f547cd
+ KXORB K4, K1, K1 // c5f547cc
+ KXORB K5, K3, K5 // c5e547ed
+ KXORB K4, K3, K5 // c5e547ec
+ KXORB K5, K1, K5 // c5f547ed
+ KXORB K4, K1, K5 // c5f547ec
+ VANDNPD X15, X0, K4, X22 // 62c1fd0c55f7
+ VANDNPD X11, X0, K4, X22 // 62c1fd0c55f3
+ VANDNPD X0, X0, K4, X22 // 62e1fd0c55f0
+ VANDNPD (R8), X0, K4, X22 // 62c1fd0c5530
+ VANDNPD 15(DX)(BX*2), X0, K4, X22 // 62e1fd0c55b45a0f000000
+ VANDNPD X15, X17, K4, X22 // 62c1f50455f7
+ VANDNPD X11, X17, K4, X22 // 62c1f50455f3
+ VANDNPD X0, X17, K4, X22 // 62e1f50455f0
+ VANDNPD (R8), X17, K4, X22 // 62c1f5045530
+ VANDNPD 15(DX)(BX*2), X17, K4, X22 // 62e1f50455b45a0f000000
+ VANDNPD X15, X7, K4, X22 // 62c1c50c55f7
+ VANDNPD X11, X7, K4, X22 // 62c1c50c55f3
+ VANDNPD X0, X7, K4, X22 // 62e1c50c55f0
+ VANDNPD (R8), X7, K4, X22 // 62c1c50c5530
+ VANDNPD 15(DX)(BX*2), X7, K4, X22 // 62e1c50c55b45a0f000000
+ VANDNPD X15, X0, K4, X5 // 62d1fd0c55ef
+ VANDNPD X11, X0, K4, X5 // 62d1fd0c55eb
+ VANDNPD X0, X0, K4, X5 // 62f1fd0c55e8
+ VANDNPD (R8), X0, K4, X5 // 62d1fd0c5528
+ VANDNPD 15(DX)(BX*2), X0, K4, X5 // 62f1fd0c55ac5a0f000000
+ VANDNPD X15, X17, K4, X5 // 62d1f50455ef
+ VANDNPD X11, X17, K4, X5 // 62d1f50455eb
+ VANDNPD X0, X17, K4, X5 // 62f1f50455e8
+ VANDNPD (R8), X17, K4, X5 // 62d1f5045528
+ VANDNPD 15(DX)(BX*2), X17, K4, X5 // 62f1f50455ac5a0f000000
+ VANDNPD X15, X7, K4, X5 // 62d1c50c55ef
+ VANDNPD X11, X7, K4, X5 // 62d1c50c55eb
+ VANDNPD X0, X7, K4, X5 // 62f1c50c55e8
+ VANDNPD (R8), X7, K4, X5 // 62d1c50c5528
+ VANDNPD 15(DX)(BX*2), X7, K4, X5 // 62f1c50c55ac5a0f000000
+ VANDNPD X15, X0, K4, X14 // 6251fd0c55f7
+ VANDNPD X11, X0, K4, X14 // 6251fd0c55f3
+ VANDNPD X0, X0, K4, X14 // 6271fd0c55f0
+ VANDNPD (R8), X0, K4, X14 // 6251fd0c5530
+ VANDNPD 15(DX)(BX*2), X0, K4, X14 // 6271fd0c55b45a0f000000
+ VANDNPD X15, X17, K4, X14 // 6251f50455f7
+ VANDNPD X11, X17, K4, X14 // 6251f50455f3
+ VANDNPD X0, X17, K4, X14 // 6271f50455f0
+ VANDNPD (R8), X17, K4, X14 // 6251f5045530
+ VANDNPD 15(DX)(BX*2), X17, K4, X14 // 6271f50455b45a0f000000
+ VANDNPD X15, X7, K4, X14 // 6251c50c55f7
+ VANDNPD X11, X7, K4, X14 // 6251c50c55f3
+ VANDNPD X0, X7, K4, X14 // 6271c50c55f0
+ VANDNPD (R8), X7, K4, X14 // 6251c50c5530
+ VANDNPD 15(DX)(BX*2), X7, K4, X14 // 6271c50c55b45a0f000000
+ VANDNPD Y17, Y12, K5, Y0 // 62b19d2d55c1
+ VANDNPD Y7, Y12, K5, Y0 // 62f19d2d55c7
+ VANDNPD Y9, Y12, K5, Y0 // 62d19d2d55c1
+ VANDNPD 99(R15)(R15*8), Y12, K5, Y0 // 62919d2d5584ff63000000
+ VANDNPD 7(AX)(CX*8), Y12, K5, Y0 // 62f19d2d5584c807000000
+ VANDNPD Y17, Y1, K5, Y0 // 62b1f52d55c1
+ VANDNPD Y7, Y1, K5, Y0 // 62f1f52d55c7
+ VANDNPD Y9, Y1, K5, Y0 // 62d1f52d55c1
+ VANDNPD 99(R15)(R15*8), Y1, K5, Y0 // 6291f52d5584ff63000000
+ VANDNPD 7(AX)(CX*8), Y1, K5, Y0 // 62f1f52d5584c807000000
+ VANDNPD Y17, Y14, K5, Y0 // 62b18d2d55c1
+ VANDNPD Y7, Y14, K5, Y0 // 62f18d2d55c7
+ VANDNPD Y9, Y14, K5, Y0 // 62d18d2d55c1
+ VANDNPD 99(R15)(R15*8), Y14, K5, Y0 // 62918d2d5584ff63000000
+ VANDNPD 7(AX)(CX*8), Y14, K5, Y0 // 62f18d2d5584c807000000
+ VANDNPD Y17, Y12, K5, Y22 // 62a19d2d55f1
+ VANDNPD Y7, Y12, K5, Y22 // 62e19d2d55f7
+ VANDNPD Y9, Y12, K5, Y22 // 62c19d2d55f1
+ VANDNPD 99(R15)(R15*8), Y12, K5, Y22 // 62819d2d55b4ff63000000
+ VANDNPD 7(AX)(CX*8), Y12, K5, Y22 // 62e19d2d55b4c807000000
+ VANDNPD Y17, Y1, K5, Y22 // 62a1f52d55f1
+ VANDNPD Y7, Y1, K5, Y22 // 62e1f52d55f7
+ VANDNPD Y9, Y1, K5, Y22 // 62c1f52d55f1
+ VANDNPD 99(R15)(R15*8), Y1, K5, Y22 // 6281f52d55b4ff63000000
+ VANDNPD 7(AX)(CX*8), Y1, K5, Y22 // 62e1f52d55b4c807000000
+ VANDNPD Y17, Y14, K5, Y22 // 62a18d2d55f1
+ VANDNPD Y7, Y14, K5, Y22 // 62e18d2d55f7
+ VANDNPD Y9, Y14, K5, Y22 // 62c18d2d55f1
+ VANDNPD 99(R15)(R15*8), Y14, K5, Y22 // 62818d2d55b4ff63000000
+ VANDNPD 7(AX)(CX*8), Y14, K5, Y22 // 62e18d2d55b4c807000000
+ VANDNPD Y17, Y12, K5, Y13 // 62319d2d55e9
+ VANDNPD Y7, Y12, K5, Y13 // 62719d2d55ef
+ VANDNPD Y9, Y12, K5, Y13 // 62519d2d55e9
+ VANDNPD 99(R15)(R15*8), Y12, K5, Y13 // 62119d2d55acff63000000
+ VANDNPD 7(AX)(CX*8), Y12, K5, Y13 // 62719d2d55acc807000000
+ VANDNPD Y17, Y1, K5, Y13 // 6231f52d55e9
+ VANDNPD Y7, Y1, K5, Y13 // 6271f52d55ef
+ VANDNPD Y9, Y1, K5, Y13 // 6251f52d55e9
+ VANDNPD 99(R15)(R15*8), Y1, K5, Y13 // 6211f52d55acff63000000
+ VANDNPD 7(AX)(CX*8), Y1, K5, Y13 // 6271f52d55acc807000000
+ VANDNPD Y17, Y14, K5, Y13 // 62318d2d55e9
+ VANDNPD Y7, Y14, K5, Y13 // 62718d2d55ef
+ VANDNPD Y9, Y14, K5, Y13 // 62518d2d55e9
+ VANDNPD 99(R15)(R15*8), Y14, K5, Y13 // 62118d2d55acff63000000
+ VANDNPD 7(AX)(CX*8), Y14, K5, Y13 // 62718d2d55acc807000000
+ VANDNPD Z20, Z0, K7, Z7 // 62b1fd4f55fc
+ VANDNPD Z28, Z0, K7, Z7 // 6291fd4f55fc
+ VANDNPD 99(R15)(R15*8), Z0, K7, Z7 // 6291fd4f55bcff63000000
+ VANDNPD 7(AX)(CX*8), Z0, K7, Z7 // 62f1fd4f55bcc807000000
+ VANDNPD Z20, Z6, K7, Z7 // 62b1cd4f55fc
+ VANDNPD Z28, Z6, K7, Z7 // 6291cd4f55fc
+ VANDNPD 99(R15)(R15*8), Z6, K7, Z7 // 6291cd4f55bcff63000000
+ VANDNPD 7(AX)(CX*8), Z6, K7, Z7 // 62f1cd4f55bcc807000000
+ VANDNPD Z20, Z0, K7, Z9 // 6231fd4f55cc
+ VANDNPD Z28, Z0, K7, Z9 // 6211fd4f55cc
+ VANDNPD 99(R15)(R15*8), Z0, K7, Z9 // 6211fd4f558cff63000000
+ VANDNPD 7(AX)(CX*8), Z0, K7, Z9 // 6271fd4f558cc807000000
+ VANDNPD Z20, Z6, K7, Z9 // 6231cd4f55cc
+ VANDNPD Z28, Z6, K7, Z9 // 6211cd4f55cc
+ VANDNPD 99(R15)(R15*8), Z6, K7, Z9 // 6211cd4f558cff63000000
+ VANDNPD 7(AX)(CX*8), Z6, K7, Z9 // 6271cd4f558cc807000000
+ VANDNPS X15, X25, K7, X18 // 62c1340755d7
+ VANDNPS X28, X25, K7, X18 // 6281340755d4
+ VANDNPS 17(SP)(BP*1), X25, K7, X18 // 62e1340755942c11000000
+ VANDNPS -7(CX)(DX*8), X25, K7, X18 // 62e134075594d1f9ffffff
+ VANDNPS X15, X3, K7, X18 // 62c1640f55d7
+ VANDNPS X28, X3, K7, X18 // 6281640f55d4
+ VANDNPS 17(SP)(BP*1), X3, K7, X18 // 62e1640f55942c11000000
+ VANDNPS -7(CX)(DX*8), X3, K7, X18 // 62e1640f5594d1f9ffffff
+ VANDNPS X15, X18, K7, X18 // 62c16c0755d7
+ VANDNPS X28, X18, K7, X18 // 62816c0755d4
+ VANDNPS 17(SP)(BP*1), X18, K7, X18 // 62e16c0755942c11000000
+ VANDNPS -7(CX)(DX*8), X18, K7, X18 // 62e16c075594d1f9ffffff
+ VANDNPS X15, X25, K7, X8 // 6251340755c7
+ VANDNPS X28, X25, K7, X8 // 6211340755c4
+ VANDNPS 17(SP)(BP*1), X25, K7, X8 // 6271340755842c11000000
+ VANDNPS -7(CX)(DX*8), X25, K7, X8 // 627134075584d1f9ffffff
+ VANDNPS X15, X3, K7, X8 // 6251640f55c7
+ VANDNPS X28, X3, K7, X8 // 6211640f55c4
+ VANDNPS 17(SP)(BP*1), X3, K7, X8 // 6271640f55842c11000000
+ VANDNPS -7(CX)(DX*8), X3, K7, X8 // 6271640f5584d1f9ffffff
+ VANDNPS X15, X18, K7, X8 // 62516c0755c7
+ VANDNPS X28, X18, K7, X8 // 62116c0755c4
+ VANDNPS 17(SP)(BP*1), X18, K7, X8 // 62716c0755842c11000000
+ VANDNPS -7(CX)(DX*8), X18, K7, X8 // 62716c075584d1f9ffffff
+ VANDNPS X15, X25, K7, X27 // 6241340755df
+ VANDNPS X28, X25, K7, X27 // 6201340755dc
+ VANDNPS 17(SP)(BP*1), X25, K7, X27 // 62613407559c2c11000000
+ VANDNPS -7(CX)(DX*8), X25, K7, X27 // 62613407559cd1f9ffffff
+ VANDNPS X15, X3, K7, X27 // 6241640f55df
+ VANDNPS X28, X3, K7, X27 // 6201640f55dc
+ VANDNPS 17(SP)(BP*1), X3, K7, X27 // 6261640f559c2c11000000
+ VANDNPS -7(CX)(DX*8), X3, K7, X27 // 6261640f559cd1f9ffffff
+ VANDNPS X15, X18, K7, X27 // 62416c0755df
+ VANDNPS X28, X18, K7, X27 // 62016c0755dc
+ VANDNPS 17(SP)(BP*1), X18, K7, X27 // 62616c07559c2c11000000
+ VANDNPS -7(CX)(DX*8), X18, K7, X27 // 62616c07559cd1f9ffffff
+ VANDNPS Y2, Y28, K6, Y31 // 62611c2655fa
+ VANDNPS Y21, Y28, K6, Y31 // 62211c2655fd
+ VANDNPS Y12, Y28, K6, Y31 // 62411c2655fc
+ VANDNPS (AX), Y28, K6, Y31 // 62611c265538
+ VANDNPS 7(SI), Y28, K6, Y31 // 62611c2655be07000000
+ VANDNPS Y2, Y13, K6, Y31 // 6261142e55fa
+ VANDNPS Y21, Y13, K6, Y31 // 6221142e55fd
+ VANDNPS Y12, Y13, K6, Y31 // 6241142e55fc
+ VANDNPS (AX), Y13, K6, Y31 // 6261142e5538
+ VANDNPS 7(SI), Y13, K6, Y31 // 6261142e55be07000000
+ VANDNPS Y2, Y7, K6, Y31 // 6261442e55fa
+ VANDNPS Y21, Y7, K6, Y31 // 6221442e55fd
+ VANDNPS Y12, Y7, K6, Y31 // 6241442e55fc
+ VANDNPS (AX), Y7, K6, Y31 // 6261442e5538
+ VANDNPS 7(SI), Y7, K6, Y31 // 6261442e55be07000000
+ VANDNPS Y2, Y28, K6, Y8 // 62711c2655c2
+ VANDNPS Y21, Y28, K6, Y8 // 62311c2655c5
+ VANDNPS Y12, Y28, K6, Y8 // 62511c2655c4
+ VANDNPS (AX), Y28, K6, Y8 // 62711c265500
+ VANDNPS 7(SI), Y28, K6, Y8 // 62711c26558607000000
+ VANDNPS Y2, Y13, K6, Y8 // 6271142e55c2
+ VANDNPS Y21, Y13, K6, Y8 // 6231142e55c5
+ VANDNPS Y12, Y13, K6, Y8 // 6251142e55c4
+ VANDNPS (AX), Y13, K6, Y8 // 6271142e5500
+ VANDNPS 7(SI), Y13, K6, Y8 // 6271142e558607000000
+ VANDNPS Y2, Y7, K6, Y8 // 6271442e55c2
+ VANDNPS Y21, Y7, K6, Y8 // 6231442e55c5
+ VANDNPS Y12, Y7, K6, Y8 // 6251442e55c4
+ VANDNPS (AX), Y7, K6, Y8 // 6271442e5500
+ VANDNPS 7(SI), Y7, K6, Y8 // 6271442e558607000000
+ VANDNPS Y2, Y28, K6, Y1 // 62f11c2655ca
+ VANDNPS Y21, Y28, K6, Y1 // 62b11c2655cd
+ VANDNPS Y12, Y28, K6, Y1 // 62d11c2655cc
+ VANDNPS (AX), Y28, K6, Y1 // 62f11c265508
+ VANDNPS 7(SI), Y28, K6, Y1 // 62f11c26558e07000000
+ VANDNPS Y2, Y13, K6, Y1 // 62f1142e55ca
+ VANDNPS Y21, Y13, K6, Y1 // 62b1142e55cd
+ VANDNPS Y12, Y13, K6, Y1 // 62d1142e55cc
+ VANDNPS (AX), Y13, K6, Y1 // 62f1142e5508
+ VANDNPS 7(SI), Y13, K6, Y1 // 62f1142e558e07000000
+ VANDNPS Y2, Y7, K6, Y1 // 62f1442e55ca
+ VANDNPS Y21, Y7, K6, Y1 // 62b1442e55cd
+ VANDNPS Y12, Y7, K6, Y1 // 62d1442e55cc
+ VANDNPS (AX), Y7, K6, Y1 // 62f1442e5508
+ VANDNPS 7(SI), Y7, K6, Y1 // 62f1442e558e07000000
+ VANDNPS Z12, Z9, K3, Z3 // 62d1344b55dc
+ VANDNPS Z22, Z9, K3, Z3 // 62b1344b55de
+ VANDNPS (AX), Z9, K3, Z3 // 62f1344b5518
+ VANDNPS 7(SI), Z9, K3, Z3 // 62f1344b559e07000000
+ VANDNPS Z12, Z19, K3, Z3 // 62d1644355dc
+ VANDNPS Z22, Z19, K3, Z3 // 62b1644355de
+ VANDNPS (AX), Z19, K3, Z3 // 62f164435518
+ VANDNPS 7(SI), Z19, K3, Z3 // 62f16443559e07000000
+ VANDNPS Z12, Z9, K3, Z30 // 6241344b55f4
+ VANDNPS Z22, Z9, K3, Z30 // 6221344b55f6
+ VANDNPS (AX), Z9, K3, Z30 // 6261344b5530
+ VANDNPS 7(SI), Z9, K3, Z30 // 6261344b55b607000000
+ VANDNPS Z12, Z19, K3, Z30 // 6241644355f4
+ VANDNPS Z22, Z19, K3, Z30 // 6221644355f6
+ VANDNPS (AX), Z19, K3, Z30 // 626164435530
+ VANDNPS 7(SI), Z19, K3, Z30 // 6261644355b607000000
+ VANDPD X22, X24, K7, X7 // 62b1bd0754fe
+ VANDPD X1, X24, K7, X7 // 62f1bd0754f9
+ VANDPD X11, X24, K7, X7 // 62d1bd0754fb
+ VANDPD -17(BP)(SI*2), X24, K7, X7 // 62f1bd0754bc75efffffff
+ VANDPD 7(AX)(CX*2), X24, K7, X7 // 62f1bd0754bc4807000000
+ VANDPD X22, X7, K7, X7 // 62b1c50f54fe
+ VANDPD X1, X7, K7, X7 // 62f1c50f54f9
+ VANDPD X11, X7, K7, X7 // 62d1c50f54fb
+ VANDPD -17(BP)(SI*2), X7, K7, X7 // 62f1c50f54bc75efffffff
+ VANDPD 7(AX)(CX*2), X7, K7, X7 // 62f1c50f54bc4807000000
+ VANDPD X22, X0, K7, X7 // 62b1fd0f54fe
+ VANDPD X1, X0, K7, X7 // 62f1fd0f54f9
+ VANDPD X11, X0, K7, X7 // 62d1fd0f54fb
+ VANDPD -17(BP)(SI*2), X0, K7, X7 // 62f1fd0f54bc75efffffff
+ VANDPD 7(AX)(CX*2), X0, K7, X7 // 62f1fd0f54bc4807000000
+ VANDPD X22, X24, K7, X13 // 6231bd0754ee
+ VANDPD X1, X24, K7, X13 // 6271bd0754e9
+ VANDPD X11, X24, K7, X13 // 6251bd0754eb
+ VANDPD -17(BP)(SI*2), X24, K7, X13 // 6271bd0754ac75efffffff
+ VANDPD 7(AX)(CX*2), X24, K7, X13 // 6271bd0754ac4807000000
+ VANDPD X22, X7, K7, X13 // 6231c50f54ee
+ VANDPD X1, X7, K7, X13 // 6271c50f54e9
+ VANDPD X11, X7, K7, X13 // 6251c50f54eb
+ VANDPD -17(BP)(SI*2), X7, K7, X13 // 6271c50f54ac75efffffff
+ VANDPD 7(AX)(CX*2), X7, K7, X13 // 6271c50f54ac4807000000
+ VANDPD X22, X0, K7, X13 // 6231fd0f54ee
+ VANDPD X1, X0, K7, X13 // 6271fd0f54e9
+ VANDPD X11, X0, K7, X13 // 6251fd0f54eb
+ VANDPD -17(BP)(SI*2), X0, K7, X13 // 6271fd0f54ac75efffffff
+ VANDPD 7(AX)(CX*2), X0, K7, X13 // 6271fd0f54ac4807000000
+ VANDPD X22, X24, K7, X8 // 6231bd0754c6
+ VANDPD X1, X24, K7, X8 // 6271bd0754c1
+ VANDPD X11, X24, K7, X8 // 6251bd0754c3
+ VANDPD -17(BP)(SI*2), X24, K7, X8 // 6271bd07548475efffffff
+ VANDPD 7(AX)(CX*2), X24, K7, X8 // 6271bd0754844807000000
+ VANDPD X22, X7, K7, X8 // 6231c50f54c6
+ VANDPD X1, X7, K7, X8 // 6271c50f54c1
+ VANDPD X11, X7, K7, X8 // 6251c50f54c3
+ VANDPD -17(BP)(SI*2), X7, K7, X8 // 6271c50f548475efffffff
+ VANDPD 7(AX)(CX*2), X7, K7, X8 // 6271c50f54844807000000
+ VANDPD X22, X0, K7, X8 // 6231fd0f54c6
+ VANDPD X1, X0, K7, X8 // 6271fd0f54c1
+ VANDPD X11, X0, K7, X8 // 6251fd0f54c3
+ VANDPD -17(BP)(SI*2), X0, K7, X8 // 6271fd0f548475efffffff
+ VANDPD 7(AX)(CX*2), X0, K7, X8 // 6271fd0f54844807000000
+ VANDPD Y12, Y3, K4, Y9 // 6251e52c54cc
+ VANDPD Y21, Y3, K4, Y9 // 6231e52c54cd
+ VANDPD Y14, Y3, K4, Y9 // 6251e52c54ce
+ VANDPD (BX), Y3, K4, Y9 // 6271e52c540b
+ VANDPD -17(BP)(SI*1), Y3, K4, Y9 // 6271e52c548c35efffffff
+ VANDPD Y12, Y2, K4, Y9 // 6251ed2c54cc
+ VANDPD Y21, Y2, K4, Y9 // 6231ed2c54cd
+ VANDPD Y14, Y2, K4, Y9 // 6251ed2c54ce
+ VANDPD (BX), Y2, K4, Y9 // 6271ed2c540b
+ VANDPD -17(BP)(SI*1), Y2, K4, Y9 // 6271ed2c548c35efffffff
+ VANDPD Y12, Y9, K4, Y9 // 6251b52c54cc
+ VANDPD Y21, Y9, K4, Y9 // 6231b52c54cd
+ VANDPD Y14, Y9, K4, Y9 // 6251b52c54ce
+ VANDPD (BX), Y9, K4, Y9 // 6271b52c540b
+ VANDPD -17(BP)(SI*1), Y9, K4, Y9 // 6271b52c548c35efffffff
+ VANDPD Y12, Y3, K4, Y1 // 62d1e52c54cc
+ VANDPD Y21, Y3, K4, Y1 // 62b1e52c54cd
+ VANDPD Y14, Y3, K4, Y1 // 62d1e52c54ce
+ VANDPD (BX), Y3, K4, Y1 // 62f1e52c540b
+ VANDPD -17(BP)(SI*1), Y3, K4, Y1 // 62f1e52c548c35efffffff
+ VANDPD Y12, Y2, K4, Y1 // 62d1ed2c54cc
+ VANDPD Y21, Y2, K4, Y1 // 62b1ed2c54cd
+ VANDPD Y14, Y2, K4, Y1 // 62d1ed2c54ce
+ VANDPD (BX), Y2, K4, Y1 // 62f1ed2c540b
+ VANDPD -17(BP)(SI*1), Y2, K4, Y1 // 62f1ed2c548c35efffffff
+ VANDPD Y12, Y9, K4, Y1 // 62d1b52c54cc
+ VANDPD Y21, Y9, K4, Y1 // 62b1b52c54cd
+ VANDPD Y14, Y9, K4, Y1 // 62d1b52c54ce
+ VANDPD (BX), Y9, K4, Y1 // 62f1b52c540b
+ VANDPD -17(BP)(SI*1), Y9, K4, Y1 // 62f1b52c548c35efffffff
+ VANDPD Z2, Z18, K4, Z11 // 6271ed4454da
+ VANDPD Z21, Z18, K4, Z11 // 6231ed4454dd
+ VANDPD (BX), Z18, K4, Z11 // 6271ed44541b
+ VANDPD -17(BP)(SI*1), Z18, K4, Z11 // 6271ed44549c35efffffff
+ VANDPD Z2, Z24, K4, Z11 // 6271bd4454da
+ VANDPD Z21, Z24, K4, Z11 // 6231bd4454dd
+ VANDPD (BX), Z24, K4, Z11 // 6271bd44541b
+ VANDPD -17(BP)(SI*1), Z24, K4, Z11 // 6271bd44549c35efffffff
+ VANDPD Z2, Z18, K4, Z5 // 62f1ed4454ea
+ VANDPD Z21, Z18, K4, Z5 // 62b1ed4454ed
+ VANDPD (BX), Z18, K4, Z5 // 62f1ed44542b
+ VANDPD -17(BP)(SI*1), Z18, K4, Z5 // 62f1ed4454ac35efffffff
+ VANDPD Z2, Z24, K4, Z5 // 62f1bd4454ea
+ VANDPD Z21, Z24, K4, Z5 // 62b1bd4454ed
+ VANDPD (BX), Z24, K4, Z5 // 62f1bd44542b
+ VANDPD -17(BP)(SI*1), Z24, K4, Z5 // 62f1bd4454ac35efffffff
+ VANDPS X20, X31, K7, X6 // 62b1040754f4
+ VANDPS X24, X31, K7, X6 // 6291040754f0
+ VANDPS X7, X31, K7, X6 // 62f1040754f7
+ VANDPS 15(R8)(R14*1), X31, K7, X6 // 6291040754b4300f000000
+ VANDPS 15(R8)(R14*2), X31, K7, X6 // 6291040754b4700f000000
+ VANDPS X20, X3, K7, X6 // 62b1640f54f4
+ VANDPS X24, X3, K7, X6 // 6291640f54f0
+ VANDPS X7, X3, K7, X6 // 62f1640f54f7
+ VANDPS 15(R8)(R14*1), X3, K7, X6 // 6291640f54b4300f000000
+ VANDPS 15(R8)(R14*2), X3, K7, X6 // 6291640f54b4700f000000
+ VANDPS X20, X28, K7, X6 // 62b11c0754f4
+ VANDPS X24, X28, K7, X6 // 62911c0754f0
+ VANDPS X7, X28, K7, X6 // 62f11c0754f7
+ VANDPS 15(R8)(R14*1), X28, K7, X6 // 62911c0754b4300f000000
+ VANDPS 15(R8)(R14*2), X28, K7, X6 // 62911c0754b4700f000000
+ VANDPS X20, X31, K7, X7 // 62b1040754fc
+ VANDPS X24, X31, K7, X7 // 6291040754f8
+ VANDPS X7, X31, K7, X7 // 62f1040754ff
+ VANDPS 15(R8)(R14*1), X31, K7, X7 // 6291040754bc300f000000
+ VANDPS 15(R8)(R14*2), X31, K7, X7 // 6291040754bc700f000000
+ VANDPS X20, X3, K7, X7 // 62b1640f54fc
+ VANDPS X24, X3, K7, X7 // 6291640f54f8
+ VANDPS X7, X3, K7, X7 // 62f1640f54ff
+ VANDPS 15(R8)(R14*1), X3, K7, X7 // 6291640f54bc300f000000
+ VANDPS 15(R8)(R14*2), X3, K7, X7 // 6291640f54bc700f000000
+ VANDPS X20, X28, K7, X7 // 62b11c0754fc
+ VANDPS X24, X28, K7, X7 // 62911c0754f8
+ VANDPS X7, X28, K7, X7 // 62f11c0754ff
+ VANDPS 15(R8)(R14*1), X28, K7, X7 // 62911c0754bc300f000000
+ VANDPS 15(R8)(R14*2), X28, K7, X7 // 62911c0754bc700f000000
+ VANDPS X20, X31, K7, X8 // 6231040754c4
+ VANDPS X24, X31, K7, X8 // 6211040754c0
+ VANDPS X7, X31, K7, X8 // 6271040754c7
+ VANDPS 15(R8)(R14*1), X31, K7, X8 // 621104075484300f000000
+ VANDPS 15(R8)(R14*2), X31, K7, X8 // 621104075484700f000000
+ VANDPS X20, X3, K7, X8 // 6231640f54c4
+ VANDPS X24, X3, K7, X8 // 6211640f54c0
+ VANDPS X7, X3, K7, X8 // 6271640f54c7
+ VANDPS 15(R8)(R14*1), X3, K7, X8 // 6211640f5484300f000000
+ VANDPS 15(R8)(R14*2), X3, K7, X8 // 6211640f5484700f000000
+ VANDPS X20, X28, K7, X8 // 62311c0754c4
+ VANDPS X24, X28, K7, X8 // 62111c0754c0
+ VANDPS X7, X28, K7, X8 // 62711c0754c7
+ VANDPS 15(R8)(R14*1), X28, K7, X8 // 62111c075484300f000000
+ VANDPS 15(R8)(R14*2), X28, K7, X8 // 62111c075484700f000000
+ VANDPS Y31, Y16, K2, Y30 // 62017c2254f7
+ VANDPS Y22, Y16, K2, Y30 // 62217c2254f6
+ VANDPS Y6, Y16, K2, Y30 // 62617c2254f6
+ VANDPS 15(R8)(R14*4), Y16, K2, Y30 // 62017c2254b4b00f000000
+ VANDPS -7(CX)(DX*4), Y16, K2, Y30 // 62617c2254b491f9ffffff
+ VANDPS Y31, Y1, K2, Y30 // 6201742a54f7
+ VANDPS Y22, Y1, K2, Y30 // 6221742a54f6
+ VANDPS Y6, Y1, K2, Y30 // 6261742a54f6
+ VANDPS 15(R8)(R14*4), Y1, K2, Y30 // 6201742a54b4b00f000000
+ VANDPS -7(CX)(DX*4), Y1, K2, Y30 // 6261742a54b491f9ffffff
+ VANDPS Y31, Y30, K2, Y30 // 62010c2254f7
+ VANDPS Y22, Y30, K2, Y30 // 62210c2254f6
+ VANDPS Y6, Y30, K2, Y30 // 62610c2254f6
+ VANDPS 15(R8)(R14*4), Y30, K2, Y30 // 62010c2254b4b00f000000
+ VANDPS -7(CX)(DX*4), Y30, K2, Y30 // 62610c2254b491f9ffffff
+ VANDPS Y31, Y16, K2, Y26 // 62017c2254d7
+ VANDPS Y22, Y16, K2, Y26 // 62217c2254d6
+ VANDPS Y6, Y16, K2, Y26 // 62617c2254d6
+ VANDPS 15(R8)(R14*4), Y16, K2, Y26 // 62017c225494b00f000000
+ VANDPS -7(CX)(DX*4), Y16, K2, Y26 // 62617c22549491f9ffffff
+ VANDPS Y31, Y1, K2, Y26 // 6201742a54d7
+ VANDPS Y22, Y1, K2, Y26 // 6221742a54d6
+ VANDPS Y6, Y1, K2, Y26 // 6261742a54d6
+ VANDPS 15(R8)(R14*4), Y1, K2, Y26 // 6201742a5494b00f000000
+ VANDPS -7(CX)(DX*4), Y1, K2, Y26 // 6261742a549491f9ffffff
+ VANDPS Y31, Y30, K2, Y26 // 62010c2254d7
+ VANDPS Y22, Y30, K2, Y26 // 62210c2254d6
+ VANDPS Y6, Y30, K2, Y26 // 62610c2254d6
+ VANDPS 15(R8)(R14*4), Y30, K2, Y26 // 62010c225494b00f000000
+ VANDPS -7(CX)(DX*4), Y30, K2, Y26 // 62610c22549491f9ffffff
+ VANDPS Y31, Y16, K2, Y7 // 62917c2254ff
+ VANDPS Y22, Y16, K2, Y7 // 62b17c2254fe
+ VANDPS Y6, Y16, K2, Y7 // 62f17c2254fe
+ VANDPS 15(R8)(R14*4), Y16, K2, Y7 // 62917c2254bcb00f000000
+ VANDPS -7(CX)(DX*4), Y16, K2, Y7 // 62f17c2254bc91f9ffffff
+ VANDPS Y31, Y1, K2, Y7 // 6291742a54ff
+ VANDPS Y22, Y1, K2, Y7 // 62b1742a54fe
+ VANDPS Y6, Y1, K2, Y7 // 62f1742a54fe
+ VANDPS 15(R8)(R14*4), Y1, K2, Y7 // 6291742a54bcb00f000000
+ VANDPS -7(CX)(DX*4), Y1, K2, Y7 // 62f1742a54bc91f9ffffff
+ VANDPS Y31, Y30, K2, Y7 // 62910c2254ff
+ VANDPS Y22, Y30, K2, Y7 // 62b10c2254fe
+ VANDPS Y6, Y30, K2, Y7 // 62f10c2254fe
+ VANDPS 15(R8)(R14*4), Y30, K2, Y7 // 62910c2254bcb00f000000
+ VANDPS -7(CX)(DX*4), Y30, K2, Y7 // 62f10c2254bc91f9ffffff
+ VANDPS Z6, Z6, K5, Z7 // 62f14c4d54fe
+ VANDPS Z22, Z6, K5, Z7 // 62b14c4d54fe
+ VANDPS 15(R8)(R14*4), Z6, K5, Z7 // 62914c4d54bcb00f000000
+ VANDPS -7(CX)(DX*4), Z6, K5, Z7 // 62f14c4d54bc91f9ffffff
+ VANDPS Z6, Z16, K5, Z7 // 62f17c4554fe
+ VANDPS Z22, Z16, K5, Z7 // 62b17c4554fe
+ VANDPS 15(R8)(R14*4), Z16, K5, Z7 // 62917c4554bcb00f000000
+ VANDPS -7(CX)(DX*4), Z16, K5, Z7 // 62f17c4554bc91f9ffffff
+ VANDPS Z6, Z6, K5, Z13 // 62714c4d54ee
+ VANDPS Z22, Z6, K5, Z13 // 62314c4d54ee
+ VANDPS 15(R8)(R14*4), Z6, K5, Z13 // 62114c4d54acb00f000000
+ VANDPS -7(CX)(DX*4), Z6, K5, Z13 // 62714c4d54ac91f9ffffff
+ VANDPS Z6, Z16, K5, Z13 // 62717c4554ee
+ VANDPS Z22, Z16, K5, Z13 // 62317c4554ee
+ VANDPS 15(R8)(R14*4), Z16, K5, Z13 // 62117c4554acb00f000000
+ VANDPS -7(CX)(DX*4), Z16, K5, Z13 // 62717c4554ac91f9ffffff
+ VBROADCASTF32X2 X16, K3, Y1 // 62b27d2b19c8
+ VBROADCASTF32X2 X28, K3, Y1 // 62927d2b19cc
+ VBROADCASTF32X2 X8, K3, Y1 // 62d27d2b19c8
+ VBROADCASTF32X2 -17(BP)(SI*8), K3, Y1 // 62f27d2b198cf5efffffff
+ VBROADCASTF32X2 (R15), K3, Y1 // 62d27d2b190f
+ VBROADCASTF32X2 X16, K3, Y27 // 62227d2b19d8
+ VBROADCASTF32X2 X28, K3, Y27 // 62027d2b19dc
+ VBROADCASTF32X2 X8, K3, Y27 // 62427d2b19d8
+ VBROADCASTF32X2 -17(BP)(SI*8), K3, Y27 // 62627d2b199cf5efffffff
+ VBROADCASTF32X2 (R15), K3, Y27 // 62427d2b191f
+ VBROADCASTF32X2 X16, K3, Y19 // 62a27d2b19d8
+ VBROADCASTF32X2 X28, K3, Y19 // 62827d2b19dc
+ VBROADCASTF32X2 X8, K3, Y19 // 62c27d2b19d8
+ VBROADCASTF32X2 -17(BP)(SI*8), K3, Y19 // 62e27d2b199cf5efffffff
+ VBROADCASTF32X2 (R15), K3, Y19 // 62c27d2b191f
+ VBROADCASTF32X2 X15, K2, Z1 // 62d27d4a19cf
+ VBROADCASTF32X2 X11, K2, Z1 // 62d27d4a19cb
+ VBROADCASTF32X2 X1, K2, Z1 // 62f27d4a19c9
+ VBROADCASTF32X2 7(SI)(DI*8), K2, Z1 // 62f27d4a198cfe07000000
+ VBROADCASTF32X2 -15(R14), K2, Z1 // 62d27d4a198ef1ffffff
+ VBROADCASTF32X2 X15, K2, Z3 // 62d27d4a19df
+ VBROADCASTF32X2 X11, K2, Z3 // 62d27d4a19db
+ VBROADCASTF32X2 X1, K2, Z3 // 62f27d4a19d9
+ VBROADCASTF32X2 7(SI)(DI*8), K2, Z3 // 62f27d4a199cfe07000000
+ VBROADCASTF32X2 -15(R14), K2, Z3 // 62d27d4a199ef1ffffff
+ VBROADCASTF32X8 -17(BP)(SI*2), K1, Z28 // 62627d491ba475efffffff
+ VBROADCASTF32X8 7(AX)(CX*2), K1, Z28 // 62627d491ba44807000000
+ VBROADCASTF32X8 -17(BP)(SI*2), K1, Z13 // 62727d491bac75efffffff
+ VBROADCASTF32X8 7(AX)(CX*2), K1, Z13 // 62727d491bac4807000000
+ VBROADCASTF64X2 -7(CX)(DX*1), K7, Y21 // 62e2fd2f1aac11f9ffffff
+ VBROADCASTF64X2 -15(R14)(R15*4), K7, Y21 // 6282fd2f1aacbef1ffffff
+ VBROADCASTF64X2 -7(CX)(DX*1), K7, Y7 // 62f2fd2f1abc11f9ffffff
+ VBROADCASTF64X2 -15(R14)(R15*4), K7, Y7 // 6292fd2f1abcbef1ffffff
+ VBROADCASTF64X2 -7(CX)(DX*1), K7, Y30 // 6262fd2f1ab411f9ffffff
+ VBROADCASTF64X2 -15(R14)(R15*4), K7, Y30 // 6202fd2f1ab4bef1ffffff
+ VBROADCASTF64X2 15(DX)(BX*1), K1, Z14 // 6272fd491ab41a0f000000
+ VBROADCASTF64X2 -7(CX)(DX*2), K1, Z14 // 6272fd491ab451f9ffffff
+ VBROADCASTF64X2 15(DX)(BX*1), K1, Z28 // 6262fd491aa41a0f000000
+ VBROADCASTF64X2 -7(CX)(DX*2), K1, Z28 // 6262fd491aa451f9ffffff
+ VBROADCASTI32X2 X14, K1, X19 // 62c27d0959de
+ VBROADCASTI32X2 X0, K1, X19 // 62e27d0959d8
+ VBROADCASTI32X2 7(SI)(DI*1), K1, X19 // 62e27d09599c3e07000000
+ VBROADCASTI32X2 15(DX)(BX*8), K1, X19 // 62e27d09599cda0f000000
+ VBROADCASTI32X2 X14, K1, X13 // 62527d0959ee
+ VBROADCASTI32X2 X0, K1, X13 // 62727d0959e8
+ VBROADCASTI32X2 7(SI)(DI*1), K1, X13 // 62727d0959ac3e07000000
+ VBROADCASTI32X2 15(DX)(BX*8), K1, X13 // 62727d0959acda0f000000
+ VBROADCASTI32X2 X14, K1, X2 // 62d27d0959d6
+ VBROADCASTI32X2 X0, K1, X2 // 62f27d0959d0
+ VBROADCASTI32X2 7(SI)(DI*1), K1, X2 // 62f27d0959943e07000000
+ VBROADCASTI32X2 15(DX)(BX*8), K1, X2 // 62f27d095994da0f000000
+ VBROADCASTI32X2 X25, K7, Y13 // 62127d2f59e9
+ VBROADCASTI32X2 X11, K7, Y13 // 62527d2f59eb
+ VBROADCASTI32X2 X17, K7, Y13 // 62327d2f59e9
+ VBROADCASTI32X2 -7(DI)(R8*1), K7, Y13 // 62327d2f59ac07f9ffffff
+ VBROADCASTI32X2 (SP), K7, Y13 // 62727d2f592c24
+ VBROADCASTI32X2 X25, K7, Y18 // 62827d2f59d1
+ VBROADCASTI32X2 X11, K7, Y18 // 62c27d2f59d3
+ VBROADCASTI32X2 X17, K7, Y18 // 62a27d2f59d1
+ VBROADCASTI32X2 -7(DI)(R8*1), K7, Y18 // 62a27d2f599407f9ffffff
+ VBROADCASTI32X2 (SP), K7, Y18 // 62e27d2f591424
+ VBROADCASTI32X2 X25, K7, Y24 // 62027d2f59c1
+ VBROADCASTI32X2 X11, K7, Y24 // 62427d2f59c3
+ VBROADCASTI32X2 X17, K7, Y24 // 62227d2f59c1
+ VBROADCASTI32X2 -7(DI)(R8*1), K7, Y24 // 62227d2f598407f9ffffff
+ VBROADCASTI32X2 (SP), K7, Y24 // 62627d2f590424
+ VBROADCASTI32X2 X18, K2, Z15 // 62327d4a59fa
+ VBROADCASTI32X2 X11, K2, Z15 // 62527d4a59fb
+ VBROADCASTI32X2 X9, K2, Z15 // 62527d4a59f9
+ VBROADCASTI32X2 -7(CX), K2, Z15 // 62727d4a59b9f9ffffff
+ VBROADCASTI32X2 15(DX)(BX*4), K2, Z15 // 62727d4a59bc9a0f000000
+ VBROADCASTI32X2 X18, K2, Z30 // 62227d4a59f2
+ VBROADCASTI32X2 X11, K2, Z30 // 62427d4a59f3
+ VBROADCASTI32X2 X9, K2, Z30 // 62427d4a59f1
+ VBROADCASTI32X2 -7(CX), K2, Z30 // 62627d4a59b1f9ffffff
+ VBROADCASTI32X2 15(DX)(BX*4), K2, Z30 // 62627d4a59b49a0f000000
+ VBROADCASTI32X8 (R14), K3, Z5 // 62d27d4b5b2e
+ VBROADCASTI32X8 -7(DI)(R8*8), K3, Z5 // 62b27d4b5bacc7f9ffffff
+ VBROADCASTI32X8 (R14), K3, Z1 // 62d27d4b5b0e
+ VBROADCASTI32X8 -7(DI)(R8*8), K3, Z1 // 62b27d4b5b8cc7f9ffffff
+ VBROADCASTI64X2 15(R8), K4, Y5 // 62d2fd2c5aa80f000000
+ VBROADCASTI64X2 (BP), K4, Y5 // 62f2fd2c5a6d00
+ VBROADCASTI64X2 15(R8), K4, Y24 // 6242fd2c5a800f000000
+ VBROADCASTI64X2 (BP), K4, Y24 // 6262fd2c5a4500
+ VBROADCASTI64X2 15(R8), K4, Y21 // 62c2fd2c5aa80f000000
+ VBROADCASTI64X2 (BP), K4, Y21 // 62e2fd2c5a6d00
+ VBROADCASTI64X2 15(R8)(R14*8), K5, Z3 // 6292fd4d5a9cf00f000000
+ VBROADCASTI64X2 -15(R14)(R15*2), K5, Z3 // 6292fd4d5a9c7ef1ffffff
+ VBROADCASTI64X2 15(R8)(R14*8), K5, Z5 // 6292fd4d5aacf00f000000
+ VBROADCASTI64X2 -15(R14)(R15*2), K5, Z5 // 6292fd4d5aac7ef1ffffff
+ VCVTPD2QQ X15, K7, X0 // 62d1fd0f7bc7
+ VCVTPD2QQ X11, K7, X0 // 62d1fd0f7bc3
+ VCVTPD2QQ X0, K7, X0 // 62f1fd0f7bc0
+ VCVTPD2QQ -17(BP)(SI*8), K7, X0 // 62f1fd0f7b84f5efffffff
+ VCVTPD2QQ (R15), K7, X0 // 62d1fd0f7b07
+ VCVTPD2QQ X15, K7, X17 // 62c1fd0f7bcf
+ VCVTPD2QQ X11, K7, X17 // 62c1fd0f7bcb
+ VCVTPD2QQ X0, K7, X17 // 62e1fd0f7bc8
+ VCVTPD2QQ -17(BP)(SI*8), K7, X17 // 62e1fd0f7b8cf5efffffff
+ VCVTPD2QQ (R15), K7, X17 // 62c1fd0f7b0f
+ VCVTPD2QQ X15, K7, X7 // 62d1fd0f7bff
+ VCVTPD2QQ X11, K7, X7 // 62d1fd0f7bfb
+ VCVTPD2QQ X0, K7, X7 // 62f1fd0f7bf8
+ VCVTPD2QQ -17(BP)(SI*8), K7, X7 // 62f1fd0f7bbcf5efffffff
+ VCVTPD2QQ (R15), K7, X7 // 62d1fd0f7b3f
+ VCVTPD2QQ Y0, K2, Y6 // 62f1fd2a7bf0
+ VCVTPD2QQ Y19, K2, Y6 // 62b1fd2a7bf3
+ VCVTPD2QQ Y31, K2, Y6 // 6291fd2a7bf7
+ VCVTPD2QQ -15(R14)(R15*1), K2, Y6 // 6291fd2a7bb43ef1ffffff
+ VCVTPD2QQ -15(BX), K2, Y6 // 62f1fd2a7bb3f1ffffff
+ VCVTPD2QQ Y0, K2, Y1 // 62f1fd2a7bc8
+ VCVTPD2QQ Y19, K2, Y1 // 62b1fd2a7bcb
+ VCVTPD2QQ Y31, K2, Y1 // 6291fd2a7bcf
+ VCVTPD2QQ -15(R14)(R15*1), K2, Y1 // 6291fd2a7b8c3ef1ffffff
+ VCVTPD2QQ -15(BX), K2, Y1 // 62f1fd2a7b8bf1ffffff
+ VCVTPD2QQ Y0, K2, Y9 // 6271fd2a7bc8
+ VCVTPD2QQ Y19, K2, Y9 // 6231fd2a7bcb
+ VCVTPD2QQ Y31, K2, Y9 // 6211fd2a7bcf
+ VCVTPD2QQ -15(R14)(R15*1), K2, Y9 // 6211fd2a7b8c3ef1ffffff
+ VCVTPD2QQ -15(BX), K2, Y9 // 6271fd2a7b8bf1ffffff
+ VCVTPD2QQ Z12, K5, Z14 // 6251fd4d7bf4
+ VCVTPD2QQ Z13, K5, Z14 // 6251fd4d7bf5
+ VCVTPD2QQ Z12, K5, Z13 // 6251fd4d7bec
+ VCVTPD2QQ Z13, K5, Z13 // 6251fd4d7bed
+ VCVTPD2QQ Z2, K3, Z21 // 62e1fd4b7bea
+ VCVTPD2QQ Z7, K3, Z21 // 62e1fd4b7bef
+ VCVTPD2QQ -17(BP), K3, Z21 // 62e1fd4b7badefffffff
+ VCVTPD2QQ -15(R14)(R15*8), K3, Z21 // 6281fd4b7bacfef1ffffff
+ VCVTPD2QQ Z2, K3, Z9 // 6271fd4b7bca
+ VCVTPD2QQ Z7, K3, Z9 // 6271fd4b7bcf
+ VCVTPD2QQ -17(BP), K3, Z9 // 6271fd4b7b8defffffff
+ VCVTPD2QQ -15(R14)(R15*8), K3, Z9 // 6211fd4b7b8cfef1ffffff
+ VCVTPD2UQQ X24, K3, X7 // 6291fd0b79f8
+ VCVTPD2UQQ X7, K3, X7 // 62f1fd0b79ff
+ VCVTPD2UQQ X0, K3, X7 // 62f1fd0b79f8
+ VCVTPD2UQQ 7(SI)(DI*1), K3, X7 // 62f1fd0b79bc3e07000000
+ VCVTPD2UQQ 15(DX)(BX*8), K3, X7 // 62f1fd0b79bcda0f000000
+ VCVTPD2UQQ X24, K3, X13 // 6211fd0b79e8
+ VCVTPD2UQQ X7, K3, X13 // 6271fd0b79ef
+ VCVTPD2UQQ X0, K3, X13 // 6271fd0b79e8
+ VCVTPD2UQQ 7(SI)(DI*1), K3, X13 // 6271fd0b79ac3e07000000
+ VCVTPD2UQQ 15(DX)(BX*8), K3, X13 // 6271fd0b79acda0f000000
+ VCVTPD2UQQ X24, K3, X8 // 6211fd0b79c0
+ VCVTPD2UQQ X7, K3, X8 // 6271fd0b79c7
+ VCVTPD2UQQ X0, K3, X8 // 6271fd0b79c0
+ VCVTPD2UQQ 7(SI)(DI*1), K3, X8 // 6271fd0b79843e07000000
+ VCVTPD2UQQ 15(DX)(BX*8), K3, X8 // 6271fd0b7984da0f000000
+ VCVTPD2UQQ Y27, K3, Y28 // 6201fd2b79e3
+ VCVTPD2UQQ Y0, K3, Y28 // 6261fd2b79e0
+ VCVTPD2UQQ Y11, K3, Y28 // 6241fd2b79e3
+ VCVTPD2UQQ (SI), K3, Y28 // 6261fd2b7926
+ VCVTPD2UQQ 7(SI)(DI*2), K3, Y28 // 6261fd2b79a47e07000000
+ VCVTPD2UQQ Y27, K3, Y2 // 6291fd2b79d3
+ VCVTPD2UQQ Y0, K3, Y2 // 62f1fd2b79d0
+ VCVTPD2UQQ Y11, K3, Y2 // 62d1fd2b79d3
+ VCVTPD2UQQ (SI), K3, Y2 // 62f1fd2b7916
+ VCVTPD2UQQ 7(SI)(DI*2), K3, Y2 // 62f1fd2b79947e07000000
+ VCVTPD2UQQ Y27, K3, Y24 // 6201fd2b79c3
+ VCVTPD2UQQ Y0, K3, Y24 // 6261fd2b79c0
+ VCVTPD2UQQ Y11, K3, Y24 // 6241fd2b79c3
+ VCVTPD2UQQ (SI), K3, Y24 // 6261fd2b7906
+ VCVTPD2UQQ 7(SI)(DI*2), K3, Y24 // 6261fd2b79847e07000000
+ VCVTPD2UQQ Z3, K2, Z27 // 6261fd4a79db
+ VCVTPD2UQQ Z0, K2, Z27 // 6261fd4a79d8
+ VCVTPD2UQQ Z3, K2, Z14 // 6271fd4a79f3
+ VCVTPD2UQQ Z0, K2, Z14 // 6271fd4a79f0
+ VCVTPD2UQQ Z8, K1, Z14 // 6251fd4979f0
+ VCVTPD2UQQ Z24, K1, Z14 // 6211fd4979f0
+ VCVTPD2UQQ 15(R8), K1, Z14 // 6251fd4979b00f000000
+ VCVTPD2UQQ (BP), K1, Z14 // 6271fd49797500
+ VCVTPD2UQQ Z8, K1, Z7 // 62d1fd4979f8
+ VCVTPD2UQQ Z24, K1, Z7 // 6291fd4979f8
+ VCVTPD2UQQ 15(R8), K1, Z7 // 62d1fd4979b80f000000
+ VCVTPD2UQQ (BP), K1, Z7 // 62f1fd49797d00
+ VCVTPS2QQ X19, K3, X15 // 62317d0b7bfb
+ VCVTPS2QQ X13, K3, X15 // 62517d0b7bfd
+ VCVTPS2QQ X2, K3, X15 // 62717d0b7bfa
+ VCVTPS2QQ (BX), K3, X15 // 62717d0b7b3b
+ VCVTPS2QQ -17(BP)(SI*1), K3, X15 // 62717d0b7bbc35efffffff
+ VCVTPS2QQ X19, K3, X11 // 62317d0b7bdb
+ VCVTPS2QQ X13, K3, X11 // 62517d0b7bdd
+ VCVTPS2QQ X2, K3, X11 // 62717d0b7bda
+ VCVTPS2QQ (BX), K3, X11 // 62717d0b7b1b
+ VCVTPS2QQ -17(BP)(SI*1), K3, X11 // 62717d0b7b9c35efffffff
+ VCVTPS2QQ X19, K3, X1 // 62b17d0b7bcb
+ VCVTPS2QQ X13, K3, X1 // 62d17d0b7bcd
+ VCVTPS2QQ X2, K3, X1 // 62f17d0b7bca
+ VCVTPS2QQ (BX), K3, X1 // 62f17d0b7b0b
+ VCVTPS2QQ -17(BP)(SI*1), K3, X1 // 62f17d0b7b8c35efffffff
+ VCVTPS2QQ X14, K7, Y20 // 62c17d2f7be6
+ VCVTPS2QQ X0, K7, Y20 // 62e17d2f7be0
+ VCVTPS2QQ 99(R15)(R15*1), K7, Y20 // 62817d2f7ba43f63000000
+ VCVTPS2QQ (DX), K7, Y20 // 62e17d2f7b22
+ VCVTPS2QQ X14, K7, Y12 // 62517d2f7be6
+ VCVTPS2QQ X0, K7, Y12 // 62717d2f7be0
+ VCVTPS2QQ 99(R15)(R15*1), K7, Y12 // 62117d2f7ba43f63000000
+ VCVTPS2QQ (DX), K7, Y12 // 62717d2f7b22
+ VCVTPS2QQ X14, K7, Y3 // 62d17d2f7bde
+ VCVTPS2QQ X0, K7, Y3 // 62f17d2f7bd8
+ VCVTPS2QQ 99(R15)(R15*1), K7, Y3 // 62917d2f7b9c3f63000000
+ VCVTPS2QQ (DX), K7, Y3 // 62f17d2f7b1a
+ VCVTPS2QQ Y5, K4, Z6 // 62f17d4c7bf5
+ VCVTPS2QQ Y28, K4, Z6 // 62917d4c7bf4
+ VCVTPS2QQ Y7, K4, Z6 // 62f17d4c7bf7
+ VCVTPS2QQ Y5, K4, Z14 // 62717d4c7bf5
+ VCVTPS2QQ Y28, K4, Z14 // 62117d4c7bf4
+ VCVTPS2QQ Y7, K4, Z14 // 62717d4c7bf7
+ VCVTPS2QQ Y0, K4, Z26 // 62617d4c7bd0
+ VCVTPS2QQ Y22, K4, Z26 // 62217d4c7bd6
+ VCVTPS2QQ Y13, K4, Z26 // 62417d4c7bd5
+ VCVTPS2QQ 7(AX)(CX*4), K4, Z26 // 62617d4c7b948807000000
+ VCVTPS2QQ 7(AX)(CX*1), K4, Z26 // 62617d4c7b940807000000
+ VCVTPS2QQ Y0, K4, Z14 // 62717d4c7bf0
+ VCVTPS2QQ Y22, K4, Z14 // 62317d4c7bf6
+ VCVTPS2QQ Y13, K4, Z14 // 62517d4c7bf5
+ VCVTPS2QQ 7(AX)(CX*4), K4, Z14 // 62717d4c7bb48807000000
+ VCVTPS2QQ 7(AX)(CX*1), K4, Z14 // 62717d4c7bb40807000000
+ VCVTPS2UQQ X2, K4, X2 // 62f17d0c79d2
+ VCVTPS2UQQ X27, K4, X2 // 62917d0c79d3
+ VCVTPS2UQQ X26, K4, X2 // 62917d0c79d2
+ VCVTPS2UQQ (R8), K4, X2 // 62d17d0c7910
+ VCVTPS2UQQ 15(DX)(BX*2), K4, X2 // 62f17d0c79945a0f000000
+ VCVTPS2UQQ X2, K4, X24 // 62617d0c79c2
+ VCVTPS2UQQ X27, K4, X24 // 62017d0c79c3
+ VCVTPS2UQQ X26, K4, X24 // 62017d0c79c2
+ VCVTPS2UQQ (R8), K4, X24 // 62417d0c7900
+ VCVTPS2UQQ 15(DX)(BX*2), K4, X24 // 62617d0c79845a0f000000
+ VCVTPS2UQQ X22, K2, Y31 // 62217d2a79fe
+ VCVTPS2UQQ X30, K2, Y31 // 62017d2a79fe
+ VCVTPS2UQQ X3, K2, Y31 // 62617d2a79fb
+ VCVTPS2UQQ 7(SI)(DI*8), K2, Y31 // 62617d2a79bcfe07000000
+ VCVTPS2UQQ -15(R14), K2, Y31 // 62417d2a79bef1ffffff
+ VCVTPS2UQQ X22, K2, Y8 // 62317d2a79c6
+ VCVTPS2UQQ X30, K2, Y8 // 62117d2a79c6
+ VCVTPS2UQQ X3, K2, Y8 // 62717d2a79c3
+ VCVTPS2UQQ 7(SI)(DI*8), K2, Y8 // 62717d2a7984fe07000000
+ VCVTPS2UQQ -15(R14), K2, Y8 // 62517d2a7986f1ffffff
+ VCVTPS2UQQ X22, K2, Y1 // 62b17d2a79ce
+ VCVTPS2UQQ X30, K2, Y1 // 62917d2a79ce
+ VCVTPS2UQQ X3, K2, Y1 // 62f17d2a79cb
+ VCVTPS2UQQ 7(SI)(DI*8), K2, Y1 // 62f17d2a798cfe07000000
+ VCVTPS2UQQ -15(R14), K2, Y1 // 62d17d2a798ef1ffffff
+ VCVTPS2UQQ Y28, K2, Z21 // 62817d4a79ec
+ VCVTPS2UQQ Y13, K2, Z21 // 62c17d4a79ed
+ VCVTPS2UQQ Y7, K2, Z21 // 62e17d4a79ef
+ VCVTPS2UQQ Y28, K2, Z13 // 62117d4a79ec
+ VCVTPS2UQQ Y13, K2, Z13 // 62517d4a79ed
+ VCVTPS2UQQ Y7, K2, Z13 // 62717d4a79ef
+ VCVTPS2UQQ Y2, K3, Z11 // 62717d4b79da
+ VCVTPS2UQQ Y21, K3, Z11 // 62317d4b79dd
+ VCVTPS2UQQ Y12, K3, Z11 // 62517d4b79dc
+ VCVTPS2UQQ 17(SP)(BP*8), K3, Z11 // 62717d4b799cec11000000
+ VCVTPS2UQQ 17(SP)(BP*4), K3, Z11 // 62717d4b799cac11000000
+ VCVTPS2UQQ Y2, K3, Z25 // 62617d4b79ca
+ VCVTPS2UQQ Y21, K3, Z25 // 62217d4b79cd
+ VCVTPS2UQQ Y12, K3, Z25 // 62417d4b79cc
+ VCVTPS2UQQ 17(SP)(BP*8), K3, Z25 // 62617d4b798cec11000000
+ VCVTPS2UQQ 17(SP)(BP*4), K3, Z25 // 62617d4b798cac11000000
+ VCVTQQ2PD X13, K3, X11 // 6251fe0be6dd
+ VCVTQQ2PD X6, K3, X11 // 6271fe0be6de
+ VCVTQQ2PD X12, K3, X11 // 6251fe0be6dc
+ VCVTQQ2PD 17(SP)(BP*1), K3, X11 // 6271fe0be69c2c11000000
+ VCVTQQ2PD -7(CX)(DX*8), K3, X11 // 6271fe0be69cd1f9ffffff
+ VCVTQQ2PD X13, K3, X15 // 6251fe0be6fd
+ VCVTQQ2PD X6, K3, X15 // 6271fe0be6fe
+ VCVTQQ2PD X12, K3, X15 // 6251fe0be6fc
+ VCVTQQ2PD 17(SP)(BP*1), K3, X15 // 6271fe0be6bc2c11000000
+ VCVTQQ2PD -7(CX)(DX*8), K3, X15 // 6271fe0be6bcd1f9ffffff
+ VCVTQQ2PD X13, K3, X30 // 6241fe0be6f5
+ VCVTQQ2PD X6, K3, X30 // 6261fe0be6f6
+ VCVTQQ2PD X12, K3, X30 // 6241fe0be6f4
+ VCVTQQ2PD 17(SP)(BP*1), K3, X30 // 6261fe0be6b42c11000000
+ VCVTQQ2PD -7(CX)(DX*8), K3, X30 // 6261fe0be6b4d1f9ffffff
+ VCVTQQ2PD Y3, K3, Y9 // 6271fe2be6cb
+ VCVTQQ2PD Y2, K3, Y9 // 6271fe2be6ca
+ VCVTQQ2PD Y9, K3, Y9 // 6251fe2be6c9
+ VCVTQQ2PD 7(SI)(DI*1), K3, Y9 // 6271fe2be68c3e07000000
+ VCVTQQ2PD 15(DX)(BX*8), K3, Y9 // 6271fe2be68cda0f000000
+ VCVTQQ2PD Y3, K3, Y1 // 62f1fe2be6cb
+ VCVTQQ2PD Y2, K3, Y1 // 62f1fe2be6ca
+ VCVTQQ2PD Y9, K3, Y1 // 62d1fe2be6c9
+ VCVTQQ2PD 7(SI)(DI*1), K3, Y1 // 62f1fe2be68c3e07000000
+ VCVTQQ2PD 15(DX)(BX*8), K3, Y1 // 62f1fe2be68cda0f000000
+ VCVTQQ2PD Z27, K2, Z3 // 6291fe4ae6db
+ VCVTQQ2PD Z15, K2, Z3 // 62d1fe4ae6df
+ VCVTQQ2PD Z27, K2, Z12 // 6211fe4ae6e3
+ VCVTQQ2PD Z15, K2, Z12 // 6251fe4ae6e7
+ VCVTQQ2PD Z23, K1, Z23 // 62a1fe49e6ff
+ VCVTQQ2PD Z6, K1, Z23 // 62e1fe49e6fe
+ VCVTQQ2PD 7(SI)(DI*4), K1, Z23 // 62e1fe49e6bcbe07000000
+ VCVTQQ2PD -7(DI)(R8*2), K1, Z23 // 62a1fe49e6bc47f9ffffff
+ VCVTQQ2PD Z23, K1, Z5 // 62b1fe49e6ef
+ VCVTQQ2PD Z6, K1, Z5 // 62f1fe49e6ee
+ VCVTQQ2PD 7(SI)(DI*4), K1, Z5 // 62f1fe49e6acbe07000000
+ VCVTQQ2PD -7(DI)(R8*2), K1, Z5 // 62b1fe49e6ac47f9ffffff
+ VCVTQQ2PS Z8, K2, Y12 // 6251fc4a5be0
+ VCVTQQ2PS Z28, K2, Y12 // 6211fc4a5be4
+ VCVTQQ2PS Z8, K2, Y21 // 62c1fc4a5be8
+ VCVTQQ2PS Z28, K2, Y21 // 6281fc4a5bec
+ VCVTQQ2PS Z8, K2, Y14 // 6251fc4a5bf0
+ VCVTQQ2PS Z28, K2, Y14 // 6211fc4a5bf4
+ VCVTQQ2PS Z21, K1, Y30 // 6221fc495bf5
+ VCVTQQ2PS Z5, K1, Y30 // 6261fc495bf5
+ VCVTQQ2PS 17(SP), K1, Y30 // 6261fc495bb42411000000
+ VCVTQQ2PS -17(BP)(SI*4), K1, Y30 // 6261fc495bb4b5efffffff
+ VCVTQQ2PS Z21, K1, Y26 // 6221fc495bd5
+ VCVTQQ2PS Z5, K1, Y26 // 6261fc495bd5
+ VCVTQQ2PS 17(SP), K1, Y26 // 6261fc495b942411000000
+ VCVTQQ2PS -17(BP)(SI*4), K1, Y26 // 6261fc495b94b5efffffff
+ VCVTQQ2PS Z21, K1, Y7 // 62b1fc495bfd
+ VCVTQQ2PS Z5, K1, Y7 // 62f1fc495bfd
+ VCVTQQ2PS 17(SP), K1, Y7 // 62f1fc495bbc2411000000
+ VCVTQQ2PS -17(BP)(SI*4), K1, Y7 // 62f1fc495bbcb5efffffff
+ VCVTQQ2PSX X20, K7, X23 // 62a1fc0f5bfc
+ VCVTQQ2PSX X2, K7, X23 // 62e1fc0f5bfa
+ VCVTQQ2PSX X9, K7, X23 // 62c1fc0f5bf9
+ VCVTQQ2PSX -17(BP)(SI*2), K7, X23 // 62e1fc0f5bbc75efffffff
+ VCVTQQ2PSX 7(AX)(CX*2), K7, X23 // 62e1fc0f5bbc4807000000
+ VCVTQQ2PSX X20, K7, X30 // 6221fc0f5bf4
+ VCVTQQ2PSX X2, K7, X30 // 6261fc0f5bf2
+ VCVTQQ2PSX X9, K7, X30 // 6241fc0f5bf1
+ VCVTQQ2PSX -17(BP)(SI*2), K7, X30 // 6261fc0f5bb475efffffff
+ VCVTQQ2PSX 7(AX)(CX*2), K7, X30 // 6261fc0f5bb44807000000
+ VCVTQQ2PSX X20, K7, X8 // 6231fc0f5bc4
+ VCVTQQ2PSX X2, K7, X8 // 6271fc0f5bc2
+ VCVTQQ2PSX X9, K7, X8 // 6251fc0f5bc1
+ VCVTQQ2PSX -17(BP)(SI*2), K7, X8 // 6271fc0f5b8475efffffff
+ VCVTQQ2PSX 7(AX)(CX*2), K7, X8 // 6271fc0f5b844807000000
+ VCVTQQ2PSY Y16, K1, X26 // 6221fc295bd0
+ VCVTQQ2PSY Y1, K1, X26 // 6261fc295bd1
+ VCVTQQ2PSY Y30, K1, X26 // 6201fc295bd6
+ VCVTQQ2PSY -7(DI)(R8*1), K1, X26 // 6221fc295b9407f9ffffff
+ VCVTQQ2PSY (SP), K1, X26 // 6261fc295b1424
+ VCVTQQ2PSY Y16, K1, X19 // 62a1fc295bd8
+ VCVTQQ2PSY Y1, K1, X19 // 62e1fc295bd9
+ VCVTQQ2PSY Y30, K1, X19 // 6281fc295bde
+ VCVTQQ2PSY -7(DI)(R8*1), K1, X19 // 62a1fc295b9c07f9ffffff
+ VCVTQQ2PSY (SP), K1, X19 // 62e1fc295b1c24
+ VCVTQQ2PSY Y16, K1, X0 // 62b1fc295bc0
+ VCVTQQ2PSY Y1, K1, X0 // 62f1fc295bc1
+ VCVTQQ2PSY Y30, K1, X0 // 6291fc295bc6
+ VCVTQQ2PSY -7(DI)(R8*1), K1, X0 // 62b1fc295b8407f9ffffff
+ VCVTQQ2PSY (SP), K1, X0 // 62f1fc295b0424
+ VCVTTPD2QQ X6, K5, X6 // 62f1fd0d7af6
+ VCVTTPD2QQ X1, K5, X6 // 62f1fd0d7af1
+ VCVTTPD2QQ X8, K5, X6 // 62d1fd0d7af0
+ VCVTTPD2QQ (R14), K5, X6 // 62d1fd0d7a36
+ VCVTTPD2QQ -7(DI)(R8*8), K5, X6 // 62b1fd0d7ab4c7f9ffffff
+ VCVTTPD2QQ X6, K5, X17 // 62e1fd0d7ace
+ VCVTTPD2QQ X1, K5, X17 // 62e1fd0d7ac9
+ VCVTTPD2QQ X8, K5, X17 // 62c1fd0d7ac8
+ VCVTTPD2QQ (R14), K5, X17 // 62c1fd0d7a0e
+ VCVTTPD2QQ -7(DI)(R8*8), K5, X17 // 62a1fd0d7a8cc7f9ffffff
+ VCVTTPD2QQ X6, K5, X28 // 6261fd0d7ae6
+ VCVTTPD2QQ X1, K5, X28 // 6261fd0d7ae1
+ VCVTTPD2QQ X8, K5, X28 // 6241fd0d7ae0
+ VCVTTPD2QQ (R14), K5, X28 // 6241fd0d7a26
+ VCVTTPD2QQ -7(DI)(R8*8), K5, X28 // 6221fd0d7aa4c7f9ffffff
+ VCVTTPD2QQ Y14, K7, Y24 // 6241fd2f7ac6
+ VCVTTPD2QQ Y21, K7, Y24 // 6221fd2f7ac5
+ VCVTTPD2QQ Y1, K7, Y24 // 6261fd2f7ac1
+ VCVTTPD2QQ 99(R15)(R15*8), K7, Y24 // 6201fd2f7a84ff63000000
+ VCVTTPD2QQ 7(AX)(CX*8), K7, Y24 // 6261fd2f7a84c807000000
+ VCVTTPD2QQ Y14, K7, Y13 // 6251fd2f7aee
+ VCVTTPD2QQ Y21, K7, Y13 // 6231fd2f7aed
+ VCVTTPD2QQ Y1, K7, Y13 // 6271fd2f7ae9
+ VCVTTPD2QQ 99(R15)(R15*8), K7, Y13 // 6211fd2f7aacff63000000
+ VCVTTPD2QQ 7(AX)(CX*8), K7, Y13 // 6271fd2f7aacc807000000
+ VCVTTPD2QQ Y14, K7, Y20 // 62c1fd2f7ae6
+ VCVTTPD2QQ Y21, K7, Y20 // 62a1fd2f7ae5
+ VCVTTPD2QQ Y1, K7, Y20 // 62e1fd2f7ae1
+ VCVTTPD2QQ 99(R15)(R15*8), K7, Y20 // 6281fd2f7aa4ff63000000
+ VCVTTPD2QQ 7(AX)(CX*8), K7, Y20 // 62e1fd2f7aa4c807000000
+ VCVTTPD2QQ Z6, K7, Z22 // 62e1fd4f7af6
+ VCVTTPD2QQ Z8, K7, Z22 // 62c1fd4f7af0
+ VCVTTPD2QQ Z6, K7, Z11 // 6271fd4f7ade
+ VCVTTPD2QQ Z8, K7, Z11 // 6251fd4f7ad8
+ VCVTTPD2QQ Z12, K6, Z25 // 6241fd4e7acc
+ VCVTTPD2QQ Z17, K6, Z25 // 6221fd4e7ac9
+ VCVTTPD2QQ 99(R15)(R15*1), K6, Z25 // 6201fd4e7a8c3f63000000
+ VCVTTPD2QQ (DX), K6, Z25 // 6261fd4e7a0a
+ VCVTTPD2QQ Z12, K6, Z12 // 6251fd4e7ae4
+ VCVTTPD2QQ Z17, K6, Z12 // 6231fd4e7ae1
+ VCVTTPD2QQ 99(R15)(R15*1), K6, Z12 // 6211fd4e7aa43f63000000
+ VCVTTPD2QQ (DX), K6, Z12 // 6271fd4e7a22
+ VCVTTPD2UQQ X15, K7, X16 // 62c1fd0f78c7
+ VCVTTPD2UQQ X11, K7, X16 // 62c1fd0f78c3
+ VCVTTPD2UQQ X1, K7, X16 // 62e1fd0f78c1
+ VCVTTPD2UQQ (CX), K7, X16 // 62e1fd0f7801
+ VCVTTPD2UQQ 99(R15), K7, X16 // 62c1fd0f788763000000
+ VCVTTPD2UQQ X15, K7, X28 // 6241fd0f78e7
+ VCVTTPD2UQQ X11, K7, X28 // 6241fd0f78e3
+ VCVTTPD2UQQ X1, K7, X28 // 6261fd0f78e1
+ VCVTTPD2UQQ (CX), K7, X28 // 6261fd0f7821
+ VCVTTPD2UQQ 99(R15), K7, X28 // 6241fd0f78a763000000
+ VCVTTPD2UQQ X15, K7, X8 // 6251fd0f78c7
+ VCVTTPD2UQQ X11, K7, X8 // 6251fd0f78c3
+ VCVTTPD2UQQ X1, K7, X8 // 6271fd0f78c1
+ VCVTTPD2UQQ (CX), K7, X8 // 6271fd0f7801
+ VCVTTPD2UQQ 99(R15), K7, X8 // 6251fd0f788763000000
+ VCVTTPD2UQQ Y21, K2, Y5 // 62b1fd2a78ed
+ VCVTTPD2UQQ Y7, K2, Y5 // 62f1fd2a78ef
+ VCVTTPD2UQQ Y30, K2, Y5 // 6291fd2a78ee
+ VCVTTPD2UQQ (BX), K2, Y5 // 62f1fd2a782b
+ VCVTTPD2UQQ -17(BP)(SI*1), K2, Y5 // 62f1fd2a78ac35efffffff
+ VCVTTPD2UQQ Y21, K2, Y17 // 62a1fd2a78cd
+ VCVTTPD2UQQ Y7, K2, Y17 // 62e1fd2a78cf
+ VCVTTPD2UQQ Y30, K2, Y17 // 6281fd2a78ce
+ VCVTTPD2UQQ (BX), K2, Y17 // 62e1fd2a780b
+ VCVTTPD2UQQ -17(BP)(SI*1), K2, Y17 // 62e1fd2a788c35efffffff
+ VCVTTPD2UQQ Y21, K2, Y13 // 6231fd2a78ed
+ VCVTTPD2UQQ Y7, K2, Y13 // 6271fd2a78ef
+ VCVTTPD2UQQ Y30, K2, Y13 // 6211fd2a78ee
+ VCVTTPD2UQQ (BX), K2, Y13 // 6271fd2a782b
+ VCVTTPD2UQQ -17(BP)(SI*1), K2, Y13 // 6271fd2a78ac35efffffff
+ VCVTTPD2UQQ Z8, K5, Z3 // 62d1fd4d78d8
+ VCVTTPD2UQQ Z2, K5, Z3 // 62f1fd4d78da
+ VCVTTPD2UQQ Z8, K5, Z21 // 62c1fd4d78e8
+ VCVTTPD2UQQ Z2, K5, Z21 // 62e1fd4d78ea
+ VCVTTPD2UQQ Z7, K3, Z3 // 62f1fd4b78df
+ VCVTTPD2UQQ Z9, K3, Z3 // 62d1fd4b78d9
+ VCVTTPD2UQQ 7(SI)(DI*8), K3, Z3 // 62f1fd4b789cfe07000000
+ VCVTTPD2UQQ -15(R14), K3, Z3 // 62d1fd4b789ef1ffffff
+ VCVTTPD2UQQ Z7, K3, Z27 // 6261fd4b78df
+ VCVTTPD2UQQ Z9, K3, Z27 // 6241fd4b78d9
+ VCVTTPD2UQQ 7(SI)(DI*8), K3, Z27 // 6261fd4b789cfe07000000
+ VCVTTPD2UQQ -15(R14), K3, Z27 // 6241fd4b789ef1ffffff
+ VCVTTPS2QQ X18, K3, X25 // 62217d0b7aca
+ VCVTTPS2QQ X11, K3, X25 // 62417d0b7acb
+ VCVTTPS2QQ X9, K3, X25 // 62417d0b7ac9
+ VCVTTPS2QQ -7(CX)(DX*1), K3, X25 // 62617d0b7a8c11f9ffffff
+ VCVTTPS2QQ -15(R14)(R15*4), K3, X25 // 62017d0b7a8cbef1ffffff
+ VCVTTPS2QQ X18, K3, X11 // 62317d0b7ada
+ VCVTTPS2QQ X11, K3, X11 // 62517d0b7adb
+ VCVTTPS2QQ X9, K3, X11 // 62517d0b7ad9
+ VCVTTPS2QQ -7(CX)(DX*1), K3, X11 // 62717d0b7a9c11f9ffffff
+ VCVTTPS2QQ -15(R14)(R15*4), K3, X11 // 62117d0b7a9cbef1ffffff
+ VCVTTPS2QQ X18, K3, X17 // 62a17d0b7aca
+ VCVTTPS2QQ X11, K3, X17 // 62c17d0b7acb
+ VCVTTPS2QQ X9, K3, X17 // 62c17d0b7ac9
+ VCVTTPS2QQ -7(CX)(DX*1), K3, X17 // 62e17d0b7a8c11f9ffffff
+ VCVTTPS2QQ -15(R14)(R15*4), K3, X17 // 62817d0b7a8cbef1ffffff
+ VCVTTPS2QQ X2, K3, Y5 // 62f17d2b7aea
+ VCVTTPS2QQ X24, K3, Y5 // 62917d2b7ae8
+ VCVTTPS2QQ (R8), K3, Y5 // 62d17d2b7a28
+ VCVTTPS2QQ 15(DX)(BX*2), K3, Y5 // 62f17d2b7aac5a0f000000
+ VCVTTPS2QQ X2, K3, Y24 // 62617d2b7ac2
+ VCVTTPS2QQ X24, K3, Y24 // 62017d2b7ac0
+ VCVTTPS2QQ (R8), K3, Y24 // 62417d2b7a00
+ VCVTTPS2QQ 15(DX)(BX*2), K3, Y24 // 62617d2b7a845a0f000000
+ VCVTTPS2QQ X2, K3, Y21 // 62e17d2b7aea
+ VCVTTPS2QQ X24, K3, Y21 // 62817d2b7ae8
+ VCVTTPS2QQ (R8), K3, Y21 // 62c17d2b7a28
+ VCVTTPS2QQ 15(DX)(BX*2), K3, Y21 // 62e17d2b7aac5a0f000000
+ VCVTTPS2QQ Y16, K2, Z12 // 62317d4a7ae0
+ VCVTTPS2QQ Y9, K2, Z12 // 62517d4a7ae1
+ VCVTTPS2QQ Y13, K2, Z12 // 62517d4a7ae5
+ VCVTTPS2QQ Y16, K2, Z22 // 62a17d4a7af0
+ VCVTTPS2QQ Y9, K2, Z22 // 62c17d4a7af1
+ VCVTTPS2QQ Y13, K2, Z22 // 62c17d4a7af5
+ VCVTTPS2QQ Y9, K1, Z11 // 62517d497ad9
+ VCVTTPS2QQ Y6, K1, Z11 // 62717d497ade
+ VCVTTPS2QQ Y3, K1, Z11 // 62717d497adb
+ VCVTTPS2QQ -7(DI)(R8*1), K1, Z11 // 62317d497a9c07f9ffffff
+ VCVTTPS2QQ (SP), K1, Z11 // 62717d497a1c24
+ VCVTTPS2QQ Y9, K1, Z5 // 62d17d497ae9
+ VCVTTPS2QQ Y6, K1, Z5 // 62f17d497aee
+ VCVTTPS2QQ Y3, K1, Z5 // 62f17d497aeb
+ VCVTTPS2QQ -7(DI)(R8*1), K1, Z5 // 62b17d497aac07f9ffffff
+ VCVTTPS2QQ (SP), K1, Z5 // 62f17d497a2c24
+ VCVTTPS2UQQ X13, K1, X11 // 62517d0978dd
+ VCVTTPS2UQQ X6, K1, X11 // 62717d0978de
+ VCVTTPS2UQQ X12, K1, X11 // 62517d0978dc
+ VCVTTPS2UQQ -17(BP), K1, X11 // 62717d09789defffffff
+ VCVTTPS2UQQ -15(R14)(R15*8), K1, X11 // 62117d09789cfef1ffffff
+ VCVTTPS2UQQ X13, K1, X15 // 62517d0978fd
+ VCVTTPS2UQQ X6, K1, X15 // 62717d0978fe
+ VCVTTPS2UQQ X12, K1, X15 // 62517d0978fc
+ VCVTTPS2UQQ -17(BP), K1, X15 // 62717d0978bdefffffff
+ VCVTTPS2UQQ -15(R14)(R15*8), K1, X15 // 62117d0978bcfef1ffffff
+ VCVTTPS2UQQ X13, K1, X30 // 62417d0978f5
+ VCVTTPS2UQQ X6, K1, X30 // 62617d0978f6
+ VCVTTPS2UQQ X12, K1, X30 // 62417d0978f4
+ VCVTTPS2UQQ -17(BP), K1, X30 // 62617d0978b5efffffff
+ VCVTTPS2UQQ -15(R14)(R15*8), K1, X30 // 62017d0978b4fef1ffffff
+ VCVTTPS2UQQ X23, K1, Y14 // 62317d2978f7
+ VCVTTPS2UQQ X30, K1, Y14 // 62117d2978f6
+ VCVTTPS2UQQ X8, K1, Y14 // 62517d2978f0
+ VCVTTPS2UQQ -17(BP)(SI*2), K1, Y14 // 62717d2978b475efffffff
+ VCVTTPS2UQQ 7(AX)(CX*2), K1, Y14 // 62717d2978b44807000000
+ VCVTTPS2UQQ X23, K1, Y18 // 62a17d2978d7
+ VCVTTPS2UQQ X30, K1, Y18 // 62817d2978d6
+ VCVTTPS2UQQ X8, K1, Y18 // 62c17d2978d0
+ VCVTTPS2UQQ -17(BP)(SI*2), K1, Y18 // 62e17d29789475efffffff
+ VCVTTPS2UQQ 7(AX)(CX*2), K1, Y18 // 62e17d2978944807000000
+ VCVTTPS2UQQ X23, K1, Y31 // 62217d2978ff
+ VCVTTPS2UQQ X30, K1, Y31 // 62017d2978fe
+ VCVTTPS2UQQ X8, K1, Y31 // 62417d2978f8
+ VCVTTPS2UQQ -17(BP)(SI*2), K1, Y31 // 62617d2978bc75efffffff
+ VCVTTPS2UQQ 7(AX)(CX*2), K1, Y31 // 62617d2978bc4807000000
+ VCVTTPS2UQQ Y18, K7, Z6 // 62b17d4f78f2
+ VCVTTPS2UQQ Y3, K7, Z6 // 62f17d4f78f3
+ VCVTTPS2UQQ Y24, K7, Z6 // 62917d4f78f0
+ VCVTTPS2UQQ Y18, K7, Z22 // 62a17d4f78f2
+ VCVTTPS2UQQ Y3, K7, Z22 // 62e17d4f78f3
+ VCVTTPS2UQQ Y24, K7, Z22 // 62817d4f78f0
+ VCVTTPS2UQQ Y2, K2, Z1 // 62f17d4a78ca
+ VCVTTPS2UQQ Y7, K2, Z1 // 62f17d4a78cf
+ VCVTTPS2UQQ Y21, K2, Z1 // 62b17d4a78cd
+ VCVTTPS2UQQ 99(R15)(R15*8), K2, Z1 // 62917d4a788cff63000000
+ VCVTTPS2UQQ 7(AX)(CX*8), K2, Z1 // 62f17d4a788cc807000000
+ VCVTTPS2UQQ Y2, K2, Z15 // 62717d4a78fa
+ VCVTTPS2UQQ Y7, K2, Z15 // 62717d4a78ff
+ VCVTTPS2UQQ Y21, K2, Z15 // 62317d4a78fd
+ VCVTTPS2UQQ 99(R15)(R15*8), K2, Z15 // 62117d4a78bcff63000000
+ VCVTTPS2UQQ 7(AX)(CX*8), K2, Z15 // 62717d4a78bcc807000000
+ VCVTUQQ2PD X13, K6, X21 // 62c1fe0e7aed
+ VCVTUQQ2PD X0, K6, X21 // 62e1fe0e7ae8
+ VCVTUQQ2PD X30, K6, X21 // 6281fe0e7aee
+ VCVTUQQ2PD 15(R8)(R14*8), K6, X21 // 6281fe0e7aacf00f000000
+ VCVTUQQ2PD -15(R14)(R15*2), K6, X21 // 6281fe0e7aac7ef1ffffff
+ VCVTUQQ2PD X13, K6, X1 // 62d1fe0e7acd
+ VCVTUQQ2PD X0, K6, X1 // 62f1fe0e7ac8
+ VCVTUQQ2PD X30, K6, X1 // 6291fe0e7ace
+ VCVTUQQ2PD 15(R8)(R14*8), K6, X1 // 6291fe0e7a8cf00f000000
+ VCVTUQQ2PD -15(R14)(R15*2), K6, X1 // 6291fe0e7a8c7ef1ffffff
+ VCVTUQQ2PD X13, K6, X11 // 6251fe0e7add
+ VCVTUQQ2PD X0, K6, X11 // 6271fe0e7ad8
+ VCVTUQQ2PD X30, K6, X11 // 6211fe0e7ade
+ VCVTUQQ2PD 15(R8)(R14*8), K6, X11 // 6211fe0e7a9cf00f000000
+ VCVTUQQ2PD -15(R14)(R15*2), K6, X11 // 6211fe0e7a9c7ef1ffffff
+ VCVTUQQ2PD Y11, K3, Y28 // 6241fe2b7ae3
+ VCVTUQQ2PD Y27, K3, Y28 // 6201fe2b7ae3
+ VCVTUQQ2PD Y17, K3, Y28 // 6221fe2b7ae1
+ VCVTUQQ2PD 99(R15)(R15*4), K3, Y28 // 6201fe2b7aa4bf63000000
+ VCVTUQQ2PD 15(DX), K3, Y28 // 6261fe2b7aa20f000000
+ VCVTUQQ2PD Y11, K3, Y1 // 62d1fe2b7acb
+ VCVTUQQ2PD Y27, K3, Y1 // 6291fe2b7acb
+ VCVTUQQ2PD Y17, K3, Y1 // 62b1fe2b7ac9
+ VCVTUQQ2PD 99(R15)(R15*4), K3, Y1 // 6291fe2b7a8cbf63000000
+ VCVTUQQ2PD 15(DX), K3, Y1 // 62f1fe2b7a8a0f000000
+ VCVTUQQ2PD Y11, K3, Y8 // 6251fe2b7ac3
+ VCVTUQQ2PD Y27, K3, Y8 // 6211fe2b7ac3
+ VCVTUQQ2PD Y17, K3, Y8 // 6231fe2b7ac1
+ VCVTUQQ2PD 99(R15)(R15*4), K3, Y8 // 6211fe2b7a84bf63000000
+ VCVTUQQ2PD 15(DX), K3, Y8 // 6271fe2b7a820f000000
+ VCVTUQQ2PD Z12, K7, Z1 // 62d1fe4f7acc
+ VCVTUQQ2PD Z16, K7, Z1 // 62b1fe4f7ac8
+ VCVTUQQ2PD Z12, K7, Z3 // 62d1fe4f7adc
+ VCVTUQQ2PD Z16, K7, Z3 // 62b1fe4f7ad8
+ VCVTUQQ2PD Z14, K4, Z28 // 6241fe4c7ae6
+ VCVTUQQ2PD Z28, K4, Z28 // 6201fe4c7ae4
+ VCVTUQQ2PD 15(R8)(R14*4), K4, Z28 // 6201fe4c7aa4b00f000000
+ VCVTUQQ2PD -7(CX)(DX*4), K4, Z28 // 6261fe4c7aa491f9ffffff
+ VCVTUQQ2PD Z14, K4, Z13 // 6251fe4c7aee
+ VCVTUQQ2PD Z28, K4, Z13 // 6211fe4c7aec
+ VCVTUQQ2PD 15(R8)(R14*4), K4, Z13 // 6211fe4c7aacb00f000000
+ VCVTUQQ2PD -7(CX)(DX*4), K4, Z13 // 6271fe4c7aac91f9ffffff
+ VCVTUQQ2PS Z3, K4, Y16 // 62e1ff4c7ac3
+ VCVTUQQ2PS Z12, K4, Y16 // 62c1ff4c7ac4
+ VCVTUQQ2PS Z3, K4, Y12 // 6271ff4c7ae3
+ VCVTUQQ2PS Z12, K4, Y12 // 6251ff4c7ae4
+ VCVTUQQ2PS Z3, K4, Y6 // 62f1ff4c7af3
+ VCVTUQQ2PS Z12, K4, Y6 // 62d1ff4c7af4
+ VCVTUQQ2PS Z15, K7, Y26 // 6241ff4f7ad7
+ VCVTUQQ2PS Z30, K7, Y26 // 6201ff4f7ad6
+ VCVTUQQ2PS (R8), K7, Y26 // 6241ff4f7a10
+ VCVTUQQ2PS 15(DX)(BX*2), K7, Y26 // 6261ff4f7a945a0f000000
+ VCVTUQQ2PS Z15, K7, Y3 // 62d1ff4f7adf
+ VCVTUQQ2PS Z30, K7, Y3 // 6291ff4f7ade
+ VCVTUQQ2PS (R8), K7, Y3 // 62d1ff4f7a18
+ VCVTUQQ2PS 15(DX)(BX*2), K7, Y3 // 62f1ff4f7a9c5a0f000000
+ VCVTUQQ2PS Z15, K7, Y8 // 6251ff4f7ac7
+ VCVTUQQ2PS Z30, K7, Y8 // 6211ff4f7ac6
+ VCVTUQQ2PS (R8), K7, Y8 // 6251ff4f7a00
+ VCVTUQQ2PS 15(DX)(BX*2), K7, Y8 // 6271ff4f7a845a0f000000
+ VCVTUQQ2PSX X14, K2, X16 // 62c1ff0a7ac6
+ VCVTUQQ2PSX X19, K2, X16 // 62a1ff0a7ac3
+ VCVTUQQ2PSX X8, K2, X16 // 62c1ff0a7ac0
+ VCVTUQQ2PSX -15(R14)(R15*1), K2, X16 // 6281ff0a7a843ef1ffffff
+ VCVTUQQ2PSX -15(BX), K2, X16 // 62e1ff0a7a83f1ffffff
+ VCVTUQQ2PSX X14, K2, X14 // 6251ff0a7af6
+ VCVTUQQ2PSX X19, K2, X14 // 6231ff0a7af3
+ VCVTUQQ2PSX X8, K2, X14 // 6251ff0a7af0
+ VCVTUQQ2PSX -15(R14)(R15*1), K2, X14 // 6211ff0a7ab43ef1ffffff
+ VCVTUQQ2PSX -15(BX), K2, X14 // 6271ff0a7ab3f1ffffff
+ VCVTUQQ2PSX X14, K2, X11 // 6251ff0a7ade
+ VCVTUQQ2PSX X19, K2, X11 // 6231ff0a7adb
+ VCVTUQQ2PSX X8, K2, X11 // 6251ff0a7ad8
+ VCVTUQQ2PSX -15(R14)(R15*1), K2, X11 // 6211ff0a7a9c3ef1ffffff
+ VCVTUQQ2PSX -15(BX), K2, X11 // 6271ff0a7a9bf1ffffff
+ VCVTUQQ2PSY Y28, K5, X8 // 6211ff2d7ac4
+ VCVTUQQ2PSY Y1, K5, X8 // 6271ff2d7ac1
+ VCVTUQQ2PSY Y23, K5, X8 // 6231ff2d7ac7
+ VCVTUQQ2PSY (CX), K5, X8 // 6271ff2d7a01
+ VCVTUQQ2PSY 99(R15), K5, X8 // 6251ff2d7a8763000000
+ VCVTUQQ2PSY Y28, K5, X26 // 6201ff2d7ad4
+ VCVTUQQ2PSY Y1, K5, X26 // 6261ff2d7ad1
+ VCVTUQQ2PSY Y23, K5, X26 // 6221ff2d7ad7
+ VCVTUQQ2PSY (CX), K5, X26 // 6261ff2d7a11
+ VCVTUQQ2PSY 99(R15), K5, X26 // 6241ff2d7a9763000000
+ VCVTUQQ2PSY Y28, K5, X23 // 6281ff2d7afc
+ VCVTUQQ2PSY Y1, K5, X23 // 62e1ff2d7af9
+ VCVTUQQ2PSY Y23, K5, X23 // 62a1ff2d7aff
+ VCVTUQQ2PSY (CX), K5, X23 // 62e1ff2d7a39
+ VCVTUQQ2PSY 99(R15), K5, X23 // 62c1ff2d7abf63000000
+ VEXTRACTF32X8 $0, Z12, K4, Y18 // 62337d4c1be200
+ VEXTRACTF32X8 $0, Z13, K4, Y18 // 62337d4c1bea00
+ VEXTRACTF32X8 $0, Z12, K4, Y24 // 62137d4c1be000
+ VEXTRACTF32X8 $0, Z13, K4, Y24 // 62137d4c1be800
+ VEXTRACTF32X8 $0, Z12, K4, Y9 // 62537d4c1be100
+ VEXTRACTF32X8 $0, Z13, K4, Y9 // 62537d4c1be900
+ VEXTRACTF32X8 $0, Z12, K4, 15(R8) // 62537d4c1ba00f00000000
+ VEXTRACTF32X8 $0, Z13, K4, 15(R8) // 62537d4c1ba80f00000000
+ VEXTRACTF32X8 $0, Z12, K4, (BP) // 62737d4c1b650000
+ VEXTRACTF32X8 $0, Z13, K4, (BP) // 62737d4c1b6d0000
+ VEXTRACTF64X2 $1, Y3, K4, X8 // 62d3fd2c19d801
+ VEXTRACTF64X2 $1, Y19, K4, X8 // 62c3fd2c19d801
+ VEXTRACTF64X2 $1, Y23, K4, X8 // 62c3fd2c19f801
+ VEXTRACTF64X2 $1, Y3, K4, X1 // 62f3fd2c19d901
+ VEXTRACTF64X2 $1, Y19, K4, X1 // 62e3fd2c19d901
+ VEXTRACTF64X2 $1, Y23, K4, X1 // 62e3fd2c19f901
+ VEXTRACTF64X2 $1, Y3, K4, X0 // 62f3fd2c19d801
+ VEXTRACTF64X2 $1, Y19, K4, X0 // 62e3fd2c19d801
+ VEXTRACTF64X2 $1, Y23, K4, X0 // 62e3fd2c19f801
+ VEXTRACTF64X2 $1, Y3, K4, -17(BP)(SI*8) // 62f3fd2c199cf5efffffff01
+ VEXTRACTF64X2 $1, Y19, K4, -17(BP)(SI*8) // 62e3fd2c199cf5efffffff01
+ VEXTRACTF64X2 $1, Y23, K4, -17(BP)(SI*8) // 62e3fd2c19bcf5efffffff01
+ VEXTRACTF64X2 $1, Y3, K4, (R15) // 62d3fd2c191f01
+ VEXTRACTF64X2 $1, Y19, K4, (R15) // 62c3fd2c191f01
+ VEXTRACTF64X2 $1, Y23, K4, (R15) // 62c3fd2c193f01
+ VEXTRACTF64X2 $0, Z21, K7, X15 // 62c3fd4f19ef00
+ VEXTRACTF64X2 $0, Z9, K7, X15 // 6253fd4f19cf00
+ VEXTRACTF64X2 $0, Z21, K7, X0 // 62e3fd4f19e800
+ VEXTRACTF64X2 $0, Z9, K7, X0 // 6273fd4f19c800
+ VEXTRACTF64X2 $0, Z21, K7, X16 // 62a3fd4f19e800
+ VEXTRACTF64X2 $0, Z9, K7, X16 // 6233fd4f19c800
+ VEXTRACTF64X2 $0, Z21, K7, 7(SI)(DI*8) // 62e3fd4f19acfe0700000000
+ VEXTRACTF64X2 $0, Z9, K7, 7(SI)(DI*8) // 6273fd4f198cfe0700000000
+ VEXTRACTF64X2 $0, Z21, K7, -15(R14) // 62c3fd4f19aef1ffffff00
+ VEXTRACTF64X2 $0, Z9, K7, -15(R14) // 6253fd4f198ef1ffffff00
+ VEXTRACTI32X8 $1, Z23, K4, Y21 // 62a37d4c3bfd01
+ VEXTRACTI32X8 $1, Z9, K4, Y21 // 62337d4c3bcd01
+ VEXTRACTI32X8 $1, Z23, K4, Y20 // 62a37d4c3bfc01
+ VEXTRACTI32X8 $1, Z9, K4, Y20 // 62337d4c3bcc01
+ VEXTRACTI32X8 $1, Z23, K4, Y6 // 62e37d4c3bfe01
+ VEXTRACTI32X8 $1, Z9, K4, Y6 // 62737d4c3bce01
+ VEXTRACTI32X8 $1, Z23, K4, -15(R14)(R15*1) // 62837d4c3bbc3ef1ffffff01
+ VEXTRACTI32X8 $1, Z9, K4, -15(R14)(R15*1) // 62137d4c3b8c3ef1ffffff01
+ VEXTRACTI32X8 $1, Z23, K4, -15(BX) // 62e37d4c3bbbf1ffffff01
+ VEXTRACTI32X8 $1, Z9, K4, -15(BX) // 62737d4c3b8bf1ffffff01
+ VEXTRACTI64X2 $0, Y31, K2, X7 // 6263fd2a39ff00
+ VEXTRACTI64X2 $0, Y6, K2, X7 // 62f3fd2a39f700
+ VEXTRACTI64X2 $0, Y11, K2, X7 // 6273fd2a39df00
+ VEXTRACTI64X2 $0, Y31, K2, X16 // 6223fd2a39f800
+ VEXTRACTI64X2 $0, Y6, K2, X16 // 62b3fd2a39f000
+ VEXTRACTI64X2 $0, Y11, K2, X16 // 6233fd2a39d800
+ VEXTRACTI64X2 $0, Y31, K2, X31 // 6203fd2a39ff00
+ VEXTRACTI64X2 $0, Y6, K2, X31 // 6293fd2a39f700
+ VEXTRACTI64X2 $0, Y11, K2, X31 // 6213fd2a39df00
+ VEXTRACTI64X2 $0, Y31, K2, -7(CX) // 6263fd2a39b9f9ffffff00
+ VEXTRACTI64X2 $0, Y6, K2, -7(CX) // 62f3fd2a39b1f9ffffff00
+ VEXTRACTI64X2 $0, Y11, K2, -7(CX) // 6273fd2a3999f9ffffff00
+ VEXTRACTI64X2 $0, Y31, K2, 15(DX)(BX*4) // 6263fd2a39bc9a0f00000000
+ VEXTRACTI64X2 $0, Y6, K2, 15(DX)(BX*4) // 62f3fd2a39b49a0f00000000
+ VEXTRACTI64X2 $0, Y11, K2, 15(DX)(BX*4) // 6273fd2a399c9a0f00000000
+ VEXTRACTI64X2 $2, Z27, K2, X1 // 6263fd4a39d902
+ VEXTRACTI64X2 $2, Z14, K2, X1 // 6273fd4a39f102
+ VEXTRACTI64X2 $2, Z27, K2, X7 // 6263fd4a39df02
+ VEXTRACTI64X2 $2, Z14, K2, X7 // 6273fd4a39f702
+ VEXTRACTI64X2 $2, Z27, K2, X9 // 6243fd4a39d902
+ VEXTRACTI64X2 $2, Z14, K2, X9 // 6253fd4a39f102
+ VEXTRACTI64X2 $2, Z27, K2, 99(R15)(R15*8) // 6203fd4a399cff6300000002
+ VEXTRACTI64X2 $2, Z14, K2, 99(R15)(R15*8) // 6213fd4a39b4ff6300000002
+ VEXTRACTI64X2 $2, Z27, K2, 7(AX)(CX*8) // 6263fd4a399cc80700000002
+ VEXTRACTI64X2 $2, Z14, K2, 7(AX)(CX*8) // 6273fd4a39b4c80700000002
+ VFPCLASSPDX $65, X14, K4, K1 // 62d3fd0c66ce41
+ VFPCLASSPDX $65, X19, K4, K1 // 62b3fd0c66cb41
+ VFPCLASSPDX $65, X8, K4, K1 // 62d3fd0c66c841
+ VFPCLASSPDX $65, (R14), K4, K1 // 62d3fd0c660e41
+ VFPCLASSPDX $65, -7(DI)(R8*8), K4, K1 // 62b3fd0c668cc7f9ffffff41
+ VFPCLASSPDX $65, X14, K4, K3 // 62d3fd0c66de41
+ VFPCLASSPDX $65, X19, K4, K3 // 62b3fd0c66db41
+ VFPCLASSPDX $65, X8, K4, K3 // 62d3fd0c66d841
+ VFPCLASSPDX $65, (R14), K4, K3 // 62d3fd0c661e41
+ VFPCLASSPDX $65, -7(DI)(R8*8), K4, K3 // 62b3fd0c669cc7f9ffffff41
+ VFPCLASSPDY $67, Y31, K1, K6 // 6293fd2966f743
+ VFPCLASSPDY $67, Y5, K1, K6 // 62f3fd2966f543
+ VFPCLASSPDY $67, Y0, K1, K6 // 62f3fd2966f043
+ VFPCLASSPDY $67, 7(SI)(DI*8), K1, K6 // 62f3fd2966b4fe0700000043
+ VFPCLASSPDY $67, -15(R14), K1, K6 // 62d3fd2966b6f1ffffff43
+ VFPCLASSPDY $67, Y31, K1, K7 // 6293fd2966ff43
+ VFPCLASSPDY $67, Y5, K1, K7 // 62f3fd2966fd43
+ VFPCLASSPDY $67, Y0, K1, K7 // 62f3fd2966f843
+ VFPCLASSPDY $67, 7(SI)(DI*8), K1, K7 // 62f3fd2966bcfe0700000043
+ VFPCLASSPDY $67, -15(R14), K1, K7 // 62d3fd2966bef1ffffff43
+ VFPCLASSPDZ $127, Z3, K3, K6 // 62f3fd4b66f37f
+ VFPCLASSPDZ $127, Z27, K3, K6 // 6293fd4b66f37f
+ VFPCLASSPDZ $127, 7(AX)(CX*4), K3, K6 // 62f3fd4b66b488070000007f
+ VFPCLASSPDZ $127, 7(AX)(CX*1), K3, K6 // 62f3fd4b66b408070000007f
+ VFPCLASSPDZ $127, Z3, K3, K4 // 62f3fd4b66e37f
+ VFPCLASSPDZ $127, Z27, K3, K4 // 6293fd4b66e37f
+ VFPCLASSPDZ $127, 7(AX)(CX*4), K3, K4 // 62f3fd4b66a488070000007f
+ VFPCLASSPDZ $127, 7(AX)(CX*1), K3, K4 // 62f3fd4b66a408070000007f
+ VFPCLASSPSX $0, X8, K4, K4 // 62d37d0c66e000
+ VFPCLASSPSX $0, X26, K4, K4 // 62937d0c66e200
+ VFPCLASSPSX $0, X23, K4, K4 // 62b37d0c66e700
+ VFPCLASSPSX $0, 99(R15)(R15*4), K4, K4 // 62937d0c66a4bf6300000000
+ VFPCLASSPSX $0, 15(DX), K4, K4 // 62f37d0c66a20f00000000
+ VFPCLASSPSX $0, X8, K4, K6 // 62d37d0c66f000
+ VFPCLASSPSX $0, X26, K4, K6 // 62937d0c66f200
+ VFPCLASSPSX $0, X23, K4, K6 // 62b37d0c66f700
+ VFPCLASSPSX $0, 99(R15)(R15*4), K4, K6 // 62937d0c66b4bf6300000000
+ VFPCLASSPSX $0, 15(DX), K4, K6 // 62f37d0c66b20f00000000
+ VFPCLASSPSY $97, Y5, K5, K4 // 62f37d2d66e561
+ VFPCLASSPSY $97, Y19, K5, K4 // 62b37d2d66e361
+ VFPCLASSPSY $97, Y31, K5, K4 // 62937d2d66e761
+ VFPCLASSPSY $97, 7(SI)(DI*1), K5, K4 // 62f37d2d66a43e0700000061
+ VFPCLASSPSY $97, 15(DX)(BX*8), K5, K4 // 62f37d2d66a4da0f00000061
+ VFPCLASSPSY $97, Y5, K5, K5 // 62f37d2d66ed61
+ VFPCLASSPSY $97, Y19, K5, K5 // 62b37d2d66eb61
+ VFPCLASSPSY $97, Y31, K5, K5 // 62937d2d66ef61
+ VFPCLASSPSY $97, 7(SI)(DI*1), K5, K5 // 62f37d2d66ac3e0700000061
+ VFPCLASSPSY $97, 15(DX)(BX*8), K5, K5 // 62f37d2d66acda0f00000061
+ VFPCLASSPSZ $81, Z7, K7, K2 // 62f37d4f66d751
+ VFPCLASSPSZ $81, Z9, K7, K2 // 62d37d4f66d151
+ VFPCLASSPSZ $81, (SI), K7, K2 // 62f37d4f661651
+ VFPCLASSPSZ $81, 7(SI)(DI*2), K7, K2 // 62f37d4f66947e0700000051
+ VFPCLASSPSZ $81, Z7, K7, K7 // 62f37d4f66ff51
+ VFPCLASSPSZ $81, Z9, K7, K7 // 62d37d4f66f951
+ VFPCLASSPSZ $81, (SI), K7, K7 // 62f37d4f663e51
+ VFPCLASSPSZ $81, 7(SI)(DI*2), K7, K7 // 62f37d4f66bc7e0700000051
+ VFPCLASSSD $42, X12, K7, K0 // 62d3fd0f67c42a or 62d3fd2f67c42a or 62d3fd4f67c42a
+ VFPCLASSSD $42, X16, K7, K0 // 62b3fd0f67c02a or 62b3fd2f67c02a or 62b3fd4f67c02a
+ VFPCLASSSD $42, X23, K7, K0 // 62b3fd0f67c72a or 62b3fd2f67c72a or 62b3fd4f67c72a
+ VFPCLASSSD $42, (BX), K7, K0 // 62f3fd0f67032a or 62f3fd2f67032a or 62f3fd4f67032a
+ VFPCLASSSD $42, -17(BP)(SI*1), K7, K0 // 62f3fd0f678435efffffff2a or 62f3fd2f678435efffffff2a or 62f3fd4f678435efffffff2a
+ VFPCLASSSD $42, X12, K7, K5 // 62d3fd0f67ec2a or 62d3fd2f67ec2a or 62d3fd4f67ec2a
+ VFPCLASSSD $42, X16, K7, K5 // 62b3fd0f67e82a or 62b3fd2f67e82a or 62b3fd4f67e82a
+ VFPCLASSSD $42, X23, K7, K5 // 62b3fd0f67ef2a or 62b3fd2f67ef2a or 62b3fd4f67ef2a
+ VFPCLASSSD $42, (BX), K7, K5 // 62f3fd0f672b2a or 62f3fd2f672b2a or 62f3fd4f672b2a
+ VFPCLASSSD $42, -17(BP)(SI*1), K7, K5 // 62f3fd0f67ac35efffffff2a or 62f3fd2f67ac35efffffff2a or 62f3fd4f67ac35efffffff2a
+ VFPCLASSSS $79, X23, K6, K6 // 62b37d0e67f74f or 62b37d2e67f74f or 62b37d4e67f74f
+ VFPCLASSSS $79, X11, K6, K6 // 62d37d0e67f34f or 62d37d2e67f34f or 62d37d4e67f34f
+ VFPCLASSSS $79, X31, K6, K6 // 62937d0e67f74f or 62937d2e67f74f or 62937d4e67f74f
+ VFPCLASSSS $79, 7(SI)(DI*1), K6, K6 // 62f37d0e67b43e070000004f or 62f37d2e67b43e070000004f or 62f37d4e67b43e070000004f
+ VFPCLASSSS $79, 15(DX)(BX*8), K6, K6 // 62f37d0e67b4da0f0000004f or 62f37d2e67b4da0f0000004f or 62f37d4e67b4da0f0000004f
+ VFPCLASSSS $79, X23, K6, K5 // 62b37d0e67ef4f or 62b37d2e67ef4f or 62b37d4e67ef4f
+ VFPCLASSSS $79, X11, K6, K5 // 62d37d0e67eb4f or 62d37d2e67eb4f or 62d37d4e67eb4f
+ VFPCLASSSS $79, X31, K6, K5 // 62937d0e67ef4f or 62937d2e67ef4f or 62937d4e67ef4f
+ VFPCLASSSS $79, 7(SI)(DI*1), K6, K5 // 62f37d0e67ac3e070000004f or 62f37d2e67ac3e070000004f or 62f37d4e67ac3e070000004f
+ VFPCLASSSS $79, 15(DX)(BX*8), K6, K5 // 62f37d0e67acda0f0000004f or 62f37d2e67acda0f0000004f or 62f37d4e67acda0f0000004f
+ VINSERTF32X8 $1, Y12, Z0, K2, Z23 // 62c37d4a1afc01
+ VINSERTF32X8 $1, Y21, Z0, K2, Z23 // 62a37d4a1afd01
+ VINSERTF32X8 $1, Y14, Z0, K2, Z23 // 62c37d4a1afe01
+ VINSERTF32X8 $1, 17(SP)(BP*1), Z0, K2, Z23 // 62e37d4a1abc2c1100000001
+ VINSERTF32X8 $1, -7(CX)(DX*8), Z0, K2, Z23 // 62e37d4a1abcd1f9ffffff01
+ VINSERTF32X8 $1, Y12, Z11, K2, Z23 // 62c3254a1afc01
+ VINSERTF32X8 $1, Y21, Z11, K2, Z23 // 62a3254a1afd01
+ VINSERTF32X8 $1, Y14, Z11, K2, Z23 // 62c3254a1afe01
+ VINSERTF32X8 $1, 17(SP)(BP*1), Z11, K2, Z23 // 62e3254a1abc2c1100000001
+ VINSERTF32X8 $1, -7(CX)(DX*8), Z11, K2, Z23 // 62e3254a1abcd1f9ffffff01
+ VINSERTF32X8 $1, Y12, Z0, K2, Z19 // 62c37d4a1adc01
+ VINSERTF32X8 $1, Y21, Z0, K2, Z19 // 62a37d4a1add01
+ VINSERTF32X8 $1, Y14, Z0, K2, Z19 // 62c37d4a1ade01
+ VINSERTF32X8 $1, 17(SP)(BP*1), Z0, K2, Z19 // 62e37d4a1a9c2c1100000001
+ VINSERTF32X8 $1, -7(CX)(DX*8), Z0, K2, Z19 // 62e37d4a1a9cd1f9ffffff01
+ VINSERTF32X8 $1, Y12, Z11, K2, Z19 // 62c3254a1adc01
+ VINSERTF32X8 $1, Y21, Z11, K2, Z19 // 62a3254a1add01
+ VINSERTF32X8 $1, Y14, Z11, K2, Z19 // 62c3254a1ade01
+ VINSERTF32X8 $1, 17(SP)(BP*1), Z11, K2, Z19 // 62e3254a1a9c2c1100000001
+ VINSERTF32X8 $1, -7(CX)(DX*8), Z11, K2, Z19 // 62e3254a1a9cd1f9ffffff01
+ VINSERTF64X2 $0, X3, Y16, K4, Y30 // 6263fd2418f300
+ VINSERTF64X2 $0, X26, Y16, K4, Y30 // 6203fd2418f200
+ VINSERTF64X2 $0, X23, Y16, K4, Y30 // 6223fd2418f700
+ VINSERTF64X2 $0, 7(AX)(CX*4), Y16, K4, Y30 // 6263fd2418b4880700000000
+ VINSERTF64X2 $0, 7(AX)(CX*1), Y16, K4, Y30 // 6263fd2418b4080700000000
+ VINSERTF64X2 $0, X3, Y1, K4, Y30 // 6263f52c18f300
+ VINSERTF64X2 $0, X26, Y1, K4, Y30 // 6203f52c18f200
+ VINSERTF64X2 $0, X23, Y1, K4, Y30 // 6223f52c18f700
+ VINSERTF64X2 $0, 7(AX)(CX*4), Y1, K4, Y30 // 6263f52c18b4880700000000
+ VINSERTF64X2 $0, 7(AX)(CX*1), Y1, K4, Y30 // 6263f52c18b4080700000000
+ VINSERTF64X2 $0, X3, Y30, K4, Y30 // 62638d2418f300
+ VINSERTF64X2 $0, X26, Y30, K4, Y30 // 62038d2418f200
+ VINSERTF64X2 $0, X23, Y30, K4, Y30 // 62238d2418f700
+ VINSERTF64X2 $0, 7(AX)(CX*4), Y30, K4, Y30 // 62638d2418b4880700000000
+ VINSERTF64X2 $0, 7(AX)(CX*1), Y30, K4, Y30 // 62638d2418b4080700000000
+ VINSERTF64X2 $0, X3, Y16, K4, Y26 // 6263fd2418d300
+ VINSERTF64X2 $0, X26, Y16, K4, Y26 // 6203fd2418d200
+ VINSERTF64X2 $0, X23, Y16, K4, Y26 // 6223fd2418d700
+ VINSERTF64X2 $0, 7(AX)(CX*4), Y16, K4, Y26 // 6263fd241894880700000000
+ VINSERTF64X2 $0, 7(AX)(CX*1), Y16, K4, Y26 // 6263fd241894080700000000
+ VINSERTF64X2 $0, X3, Y1, K4, Y26 // 6263f52c18d300
+ VINSERTF64X2 $0, X26, Y1, K4, Y26 // 6203f52c18d200
+ VINSERTF64X2 $0, X23, Y1, K4, Y26 // 6223f52c18d700
+ VINSERTF64X2 $0, 7(AX)(CX*4), Y1, K4, Y26 // 6263f52c1894880700000000
+ VINSERTF64X2 $0, 7(AX)(CX*1), Y1, K4, Y26 // 6263f52c1894080700000000
+ VINSERTF64X2 $0, X3, Y30, K4, Y26 // 62638d2418d300
+ VINSERTF64X2 $0, X26, Y30, K4, Y26 // 62038d2418d200
+ VINSERTF64X2 $0, X23, Y30, K4, Y26 // 62238d2418d700
+ VINSERTF64X2 $0, 7(AX)(CX*4), Y30, K4, Y26 // 62638d241894880700000000
+ VINSERTF64X2 $0, 7(AX)(CX*1), Y30, K4, Y26 // 62638d241894080700000000
+ VINSERTF64X2 $0, X3, Y16, K4, Y7 // 62f3fd2418fb00
+ VINSERTF64X2 $0, X26, Y16, K4, Y7 // 6293fd2418fa00
+ VINSERTF64X2 $0, X23, Y16, K4, Y7 // 62b3fd2418ff00
+ VINSERTF64X2 $0, 7(AX)(CX*4), Y16, K4, Y7 // 62f3fd2418bc880700000000
+ VINSERTF64X2 $0, 7(AX)(CX*1), Y16, K4, Y7 // 62f3fd2418bc080700000000
+ VINSERTF64X2 $0, X3, Y1, K4, Y7 // 62f3f52c18fb00
+ VINSERTF64X2 $0, X26, Y1, K4, Y7 // 6293f52c18fa00
+ VINSERTF64X2 $0, X23, Y1, K4, Y7 // 62b3f52c18ff00
+ VINSERTF64X2 $0, 7(AX)(CX*4), Y1, K4, Y7 // 62f3f52c18bc880700000000
+ VINSERTF64X2 $0, 7(AX)(CX*1), Y1, K4, Y7 // 62f3f52c18bc080700000000
+ VINSERTF64X2 $0, X3, Y30, K4, Y7 // 62f38d2418fb00
+ VINSERTF64X2 $0, X26, Y30, K4, Y7 // 62938d2418fa00
+ VINSERTF64X2 $0, X23, Y30, K4, Y7 // 62b38d2418ff00
+ VINSERTF64X2 $0, 7(AX)(CX*4), Y30, K4, Y7 // 62f38d2418bc880700000000
+ VINSERTF64X2 $0, 7(AX)(CX*1), Y30, K4, Y7 // 62f38d2418bc080700000000
+ VINSERTF64X2 $1, X13, Z24, K1, Z0 // 62d3bd4118c501
+ VINSERTF64X2 $1, X28, Z24, K1, Z0 // 6293bd4118c401
+ VINSERTF64X2 $1, X24, Z24, K1, Z0 // 6293bd4118c001
+ VINSERTF64X2 $1, (SI), Z24, K1, Z0 // 62f3bd41180601
+ VINSERTF64X2 $1, 7(SI)(DI*2), Z24, K1, Z0 // 62f3bd4118847e0700000001
+ VINSERTF64X2 $1, X13, Z12, K1, Z0 // 62d39d4918c501
+ VINSERTF64X2 $1, X28, Z12, K1, Z0 // 62939d4918c401
+ VINSERTF64X2 $1, X24, Z12, K1, Z0 // 62939d4918c001
+ VINSERTF64X2 $1, (SI), Z12, K1, Z0 // 62f39d49180601
+ VINSERTF64X2 $1, 7(SI)(DI*2), Z12, K1, Z0 // 62f39d4918847e0700000001
+ VINSERTF64X2 $1, X13, Z24, K1, Z25 // 6243bd4118cd01
+ VINSERTF64X2 $1, X28, Z24, K1, Z25 // 6203bd4118cc01
+ VINSERTF64X2 $1, X24, Z24, K1, Z25 // 6203bd4118c801
+ VINSERTF64X2 $1, (SI), Z24, K1, Z25 // 6263bd41180e01
+ VINSERTF64X2 $1, 7(SI)(DI*2), Z24, K1, Z25 // 6263bd41188c7e0700000001
+ VINSERTF64X2 $1, X13, Z12, K1, Z25 // 62439d4918cd01
+ VINSERTF64X2 $1, X28, Z12, K1, Z25 // 62039d4918cc01
+ VINSERTF64X2 $1, X24, Z12, K1, Z25 // 62039d4918c801
+ VINSERTF64X2 $1, (SI), Z12, K1, Z25 // 62639d49180e01
+ VINSERTF64X2 $1, 7(SI)(DI*2), Z12, K1, Z25 // 62639d49188c7e0700000001
+ VINSERTI32X8 $1, Y24, Z17, K7, Z20 // 628375473ae001
+ VINSERTI32X8 $1, Y13, Z17, K7, Z20 // 62c375473ae501
+ VINSERTI32X8 $1, Y20, Z17, K7, Z20 // 62a375473ae401
+ VINSERTI32X8 $1, 15(R8)(R14*1), Z17, K7, Z20 // 628375473aa4300f00000001
+ VINSERTI32X8 $1, 15(R8)(R14*2), Z17, K7, Z20 // 628375473aa4700f00000001
+ VINSERTI32X8 $1, Y24, Z0, K7, Z20 // 62837d4f3ae001
+ VINSERTI32X8 $1, Y13, Z0, K7, Z20 // 62c37d4f3ae501
+ VINSERTI32X8 $1, Y20, Z0, K7, Z20 // 62a37d4f3ae401
+ VINSERTI32X8 $1, 15(R8)(R14*1), Z0, K7, Z20 // 62837d4f3aa4300f00000001
+ VINSERTI32X8 $1, 15(R8)(R14*2), Z0, K7, Z20 // 62837d4f3aa4700f00000001
+ VINSERTI32X8 $1, Y24, Z17, K7, Z0 // 629375473ac001
+ VINSERTI32X8 $1, Y13, Z17, K7, Z0 // 62d375473ac501
+ VINSERTI32X8 $1, Y20, Z17, K7, Z0 // 62b375473ac401
+ VINSERTI32X8 $1, 15(R8)(R14*1), Z17, K7, Z0 // 629375473a84300f00000001
+ VINSERTI32X8 $1, 15(R8)(R14*2), Z17, K7, Z0 // 629375473a84700f00000001
+ VINSERTI32X8 $1, Y24, Z0, K7, Z0 // 62937d4f3ac001
+ VINSERTI32X8 $1, Y13, Z0, K7, Z0 // 62d37d4f3ac501
+ VINSERTI32X8 $1, Y20, Z0, K7, Z0 // 62b37d4f3ac401
+ VINSERTI32X8 $1, 15(R8)(R14*1), Z0, K7, Z0 // 62937d4f3a84300f00000001
+ VINSERTI32X8 $1, 15(R8)(R14*2), Z0, K7, Z0 // 62937d4f3a84700f00000001
+ VINSERTI64X2 $0, X11, Y26, K7, Y14 // 6253ad2738f300
+ VINSERTI64X2 $0, X31, Y26, K7, Y14 // 6213ad2738f700
+ VINSERTI64X2 $0, X3, Y26, K7, Y14 // 6273ad2738f300
+ VINSERTI64X2 $0, 17(SP), Y26, K7, Y14 // 6273ad2738b4241100000000
+ VINSERTI64X2 $0, -17(BP)(SI*4), Y26, K7, Y14 // 6273ad2738b4b5efffffff00
+ VINSERTI64X2 $0, X11, Y30, K7, Y14 // 62538d2738f300
+ VINSERTI64X2 $0, X31, Y30, K7, Y14 // 62138d2738f700
+ VINSERTI64X2 $0, X3, Y30, K7, Y14 // 62738d2738f300
+ VINSERTI64X2 $0, 17(SP), Y30, K7, Y14 // 62738d2738b4241100000000
+ VINSERTI64X2 $0, -17(BP)(SI*4), Y30, K7, Y14 // 62738d2738b4b5efffffff00
+ VINSERTI64X2 $0, X11, Y12, K7, Y14 // 62539d2f38f300
+ VINSERTI64X2 $0, X31, Y12, K7, Y14 // 62139d2f38f700
+ VINSERTI64X2 $0, X3, Y12, K7, Y14 // 62739d2f38f300
+ VINSERTI64X2 $0, 17(SP), Y12, K7, Y14 // 62739d2f38b4241100000000
+ VINSERTI64X2 $0, -17(BP)(SI*4), Y12, K7, Y14 // 62739d2f38b4b5efffffff00
+ VINSERTI64X2 $0, X11, Y26, K7, Y21 // 62c3ad2738eb00
+ VINSERTI64X2 $0, X31, Y26, K7, Y21 // 6283ad2738ef00
+ VINSERTI64X2 $0, X3, Y26, K7, Y21 // 62e3ad2738eb00
+ VINSERTI64X2 $0, 17(SP), Y26, K7, Y21 // 62e3ad2738ac241100000000
+ VINSERTI64X2 $0, -17(BP)(SI*4), Y26, K7, Y21 // 62e3ad2738acb5efffffff00
+ VINSERTI64X2 $0, X11, Y30, K7, Y21 // 62c38d2738eb00
+ VINSERTI64X2 $0, X31, Y30, K7, Y21 // 62838d2738ef00
+ VINSERTI64X2 $0, X3, Y30, K7, Y21 // 62e38d2738eb00
+ VINSERTI64X2 $0, 17(SP), Y30, K7, Y21 // 62e38d2738ac241100000000
+ VINSERTI64X2 $0, -17(BP)(SI*4), Y30, K7, Y21 // 62e38d2738acb5efffffff00
+ VINSERTI64X2 $0, X11, Y12, K7, Y21 // 62c39d2f38eb00
+ VINSERTI64X2 $0, X31, Y12, K7, Y21 // 62839d2f38ef00
+ VINSERTI64X2 $0, X3, Y12, K7, Y21 // 62e39d2f38eb00
+ VINSERTI64X2 $0, 17(SP), Y12, K7, Y21 // 62e39d2f38ac241100000000
+ VINSERTI64X2 $0, -17(BP)(SI*4), Y12, K7, Y21 // 62e39d2f38acb5efffffff00
+ VINSERTI64X2 $0, X11, Y26, K7, Y1 // 62d3ad2738cb00
+ VINSERTI64X2 $0, X31, Y26, K7, Y1 // 6293ad2738cf00
+ VINSERTI64X2 $0, X3, Y26, K7, Y1 // 62f3ad2738cb00
+ VINSERTI64X2 $0, 17(SP), Y26, K7, Y1 // 62f3ad27388c241100000000
+ VINSERTI64X2 $0, -17(BP)(SI*4), Y26, K7, Y1 // 62f3ad27388cb5efffffff00
+ VINSERTI64X2 $0, X11, Y30, K7, Y1 // 62d38d2738cb00
+ VINSERTI64X2 $0, X31, Y30, K7, Y1 // 62938d2738cf00
+ VINSERTI64X2 $0, X3, Y30, K7, Y1 // 62f38d2738cb00
+ VINSERTI64X2 $0, 17(SP), Y30, K7, Y1 // 62f38d27388c241100000000
+ VINSERTI64X2 $0, -17(BP)(SI*4), Y30, K7, Y1 // 62f38d27388cb5efffffff00
+ VINSERTI64X2 $0, X11, Y12, K7, Y1 // 62d39d2f38cb00
+ VINSERTI64X2 $0, X31, Y12, K7, Y1 // 62939d2f38cf00
+ VINSERTI64X2 $0, X3, Y12, K7, Y1 // 62f39d2f38cb00
+ VINSERTI64X2 $0, 17(SP), Y12, K7, Y1 // 62f39d2f388c241100000000
+ VINSERTI64X2 $0, -17(BP)(SI*4), Y12, K7, Y1 // 62f39d2f388cb5efffffff00
+ VINSERTI64X2 $3, X7, Z31, K6, Z17 // 62e3854638cf03
+ VINSERTI64X2 $3, X0, Z31, K6, Z17 // 62e3854638c803
+ VINSERTI64X2 $3, 7(AX), Z31, K6, Z17 // 62e3854638880700000003
+ VINSERTI64X2 $3, (DI), Z31, K6, Z17 // 62e38546380f03
+ VINSERTI64X2 $3, X7, Z0, K6, Z17 // 62e3fd4e38cf03
+ VINSERTI64X2 $3, X0, Z0, K6, Z17 // 62e3fd4e38c803
+ VINSERTI64X2 $3, 7(AX), Z0, K6, Z17 // 62e3fd4e38880700000003
+ VINSERTI64X2 $3, (DI), Z0, K6, Z17 // 62e3fd4e380f03
+ VINSERTI64X2 $3, X7, Z31, K6, Z23 // 62e3854638ff03
+ VINSERTI64X2 $3, X0, Z31, K6, Z23 // 62e3854638f803
+ VINSERTI64X2 $3, 7(AX), Z31, K6, Z23 // 62e3854638b80700000003
+ VINSERTI64X2 $3, (DI), Z31, K6, Z23 // 62e38546383f03
+ VINSERTI64X2 $3, X7, Z0, K6, Z23 // 62e3fd4e38ff03
+ VINSERTI64X2 $3, X0, Z0, K6, Z23 // 62e3fd4e38f803
+ VINSERTI64X2 $3, 7(AX), Z0, K6, Z23 // 62e3fd4e38b80700000003
+ VINSERTI64X2 $3, (DI), Z0, K6, Z23 // 62e3fd4e383f03
+ VORPD X11, X24, K7, X23 // 62c1bd0756fb
+ VORPD X23, X24, K7, X23 // 62a1bd0756ff
+ VORPD X2, X24, K7, X23 // 62e1bd0756fa
+ VORPD -17(BP)(SI*8), X24, K7, X23 // 62e1bd0756bcf5efffffff
+ VORPD (R15), X24, K7, X23 // 62c1bd07563f
+ VORPD X11, X14, K7, X23 // 62c18d0f56fb
+ VORPD X23, X14, K7, X23 // 62a18d0f56ff
+ VORPD X2, X14, K7, X23 // 62e18d0f56fa
+ VORPD -17(BP)(SI*8), X14, K7, X23 // 62e18d0f56bcf5efffffff
+ VORPD (R15), X14, K7, X23 // 62c18d0f563f
+ VORPD X11, X0, K7, X23 // 62c1fd0f56fb
+ VORPD X23, X0, K7, X23 // 62a1fd0f56ff
+ VORPD X2, X0, K7, X23 // 62e1fd0f56fa
+ VORPD -17(BP)(SI*8), X0, K7, X23 // 62e1fd0f56bcf5efffffff
+ VORPD (R15), X0, K7, X23 // 62c1fd0f563f
+ VORPD X11, X24, K7, X11 // 6251bd0756db
+ VORPD X23, X24, K7, X11 // 6231bd0756df
+ VORPD X2, X24, K7, X11 // 6271bd0756da
+ VORPD -17(BP)(SI*8), X24, K7, X11 // 6271bd07569cf5efffffff
+ VORPD (R15), X24, K7, X11 // 6251bd07561f
+ VORPD X11, X14, K7, X11 // 62518d0f56db
+ VORPD X23, X14, K7, X11 // 62318d0f56df
+ VORPD X2, X14, K7, X11 // 62718d0f56da
+ VORPD -17(BP)(SI*8), X14, K7, X11 // 62718d0f569cf5efffffff
+ VORPD (R15), X14, K7, X11 // 62518d0f561f
+ VORPD X11, X0, K7, X11 // 6251fd0f56db
+ VORPD X23, X0, K7, X11 // 6231fd0f56df
+ VORPD X2, X0, K7, X11 // 6271fd0f56da
+ VORPD -17(BP)(SI*8), X0, K7, X11 // 6271fd0f569cf5efffffff
+ VORPD (R15), X0, K7, X11 // 6251fd0f561f
+ VORPD X11, X24, K7, X31 // 6241bd0756fb
+ VORPD X23, X24, K7, X31 // 6221bd0756ff
+ VORPD X2, X24, K7, X31 // 6261bd0756fa
+ VORPD -17(BP)(SI*8), X24, K7, X31 // 6261bd0756bcf5efffffff
+ VORPD (R15), X24, K7, X31 // 6241bd07563f
+ VORPD X11, X14, K7, X31 // 62418d0f56fb
+ VORPD X23, X14, K7, X31 // 62218d0f56ff
+ VORPD X2, X14, K7, X31 // 62618d0f56fa
+ VORPD -17(BP)(SI*8), X14, K7, X31 // 62618d0f56bcf5efffffff
+ VORPD (R15), X14, K7, X31 // 62418d0f563f
+ VORPD X11, X0, K7, X31 // 6241fd0f56fb
+ VORPD X23, X0, K7, X31 // 6221fd0f56ff
+ VORPD X2, X0, K7, X31 // 6261fd0f56fa
+ VORPD -17(BP)(SI*8), X0, K7, X31 // 6261fd0f56bcf5efffffff
+ VORPD (R15), X0, K7, X31 // 6241fd0f563f
+ VORPD Y16, Y5, K1, Y8 // 6231d52956c0
+ VORPD Y9, Y5, K1, Y8 // 6251d52956c1
+ VORPD Y13, Y5, K1, Y8 // 6251d52956c5
+ VORPD 99(R15)(R15*2), Y5, K1, Y8 // 6211d52956847f63000000
+ VORPD -7(DI), Y5, K1, Y8 // 6271d5295687f9ffffff
+ VORPD Y16, Y24, K1, Y8 // 6231bd2156c0
+ VORPD Y9, Y24, K1, Y8 // 6251bd2156c1
+ VORPD Y13, Y24, K1, Y8 // 6251bd2156c5
+ VORPD 99(R15)(R15*2), Y24, K1, Y8 // 6211bd2156847f63000000
+ VORPD -7(DI), Y24, K1, Y8 // 6271bd215687f9ffffff
+ VORPD Y16, Y21, K1, Y8 // 6231d52156c0
+ VORPD Y9, Y21, K1, Y8 // 6251d52156c1
+ VORPD Y13, Y21, K1, Y8 // 6251d52156c5
+ VORPD 99(R15)(R15*2), Y21, K1, Y8 // 6211d52156847f63000000
+ VORPD -7(DI), Y21, K1, Y8 // 6271d5215687f9ffffff
+ VORPD Y16, Y5, K1, Y11 // 6231d52956d8
+ VORPD Y9, Y5, K1, Y11 // 6251d52956d9
+ VORPD Y13, Y5, K1, Y11 // 6251d52956dd
+ VORPD 99(R15)(R15*2), Y5, K1, Y11 // 6211d529569c7f63000000
+ VORPD -7(DI), Y5, K1, Y11 // 6271d529569ff9ffffff
+ VORPD Y16, Y24, K1, Y11 // 6231bd2156d8
+ VORPD Y9, Y24, K1, Y11 // 6251bd2156d9
+ VORPD Y13, Y24, K1, Y11 // 6251bd2156dd
+ VORPD 99(R15)(R15*2), Y24, K1, Y11 // 6211bd21569c7f63000000
+ VORPD -7(DI), Y24, K1, Y11 // 6271bd21569ff9ffffff
+ VORPD Y16, Y21, K1, Y11 // 6231d52156d8
+ VORPD Y9, Y21, K1, Y11 // 6251d52156d9
+ VORPD Y13, Y21, K1, Y11 // 6251d52156dd
+ VORPD 99(R15)(R15*2), Y21, K1, Y11 // 6211d521569c7f63000000
+ VORPD -7(DI), Y21, K1, Y11 // 6271d521569ff9ffffff
+ VORPD Y16, Y5, K1, Y24 // 6221d52956c0
+ VORPD Y9, Y5, K1, Y24 // 6241d52956c1
+ VORPD Y13, Y5, K1, Y24 // 6241d52956c5
+ VORPD 99(R15)(R15*2), Y5, K1, Y24 // 6201d52956847f63000000
+ VORPD -7(DI), Y5, K1, Y24 // 6261d5295687f9ffffff
+ VORPD Y16, Y24, K1, Y24 // 6221bd2156c0
+ VORPD Y9, Y24, K1, Y24 // 6241bd2156c1
+ VORPD Y13, Y24, K1, Y24 // 6241bd2156c5
+ VORPD 99(R15)(R15*2), Y24, K1, Y24 // 6201bd2156847f63000000
+ VORPD -7(DI), Y24, K1, Y24 // 6261bd215687f9ffffff
+ VORPD Y16, Y21, K1, Y24 // 6221d52156c0
+ VORPD Y9, Y21, K1, Y24 // 6241d52156c1
+ VORPD Y13, Y21, K1, Y24 // 6241d52156c5
+ VORPD 99(R15)(R15*2), Y21, K1, Y24 // 6201d52156847f63000000
+ VORPD -7(DI), Y21, K1, Y24 // 6261d5215687f9ffffff
+ VORPD Z9, Z9, K1, Z0 // 62d1b54956c1
+ VORPD Z25, Z9, K1, Z0 // 6291b54956c1
+ VORPD -7(CX), Z9, K1, Z0 // 62f1b5495681f9ffffff
+ VORPD 15(DX)(BX*4), Z9, K1, Z0 // 62f1b54956849a0f000000
+ VORPD Z9, Z3, K1, Z0 // 62d1e54956c1
+ VORPD Z25, Z3, K1, Z0 // 6291e54956c1
+ VORPD -7(CX), Z3, K1, Z0 // 62f1e5495681f9ffffff
+ VORPD 15(DX)(BX*4), Z3, K1, Z0 // 62f1e54956849a0f000000
+ VORPD Z9, Z9, K1, Z26 // 6241b54956d1
+ VORPD Z25, Z9, K1, Z26 // 6201b54956d1
+ VORPD -7(CX), Z9, K1, Z26 // 6261b5495691f9ffffff
+ VORPD 15(DX)(BX*4), Z9, K1, Z26 // 6261b54956949a0f000000
+ VORPD Z9, Z3, K1, Z26 // 6241e54956d1
+ VORPD Z25, Z3, K1, Z26 // 6201e54956d1
+ VORPD -7(CX), Z3, K1, Z26 // 6261e5495691f9ffffff
+ VORPD 15(DX)(BX*4), Z3, K1, Z26 // 6261e54956949a0f000000
+ VORPS X2, X0, K1, X20 // 62e17c0956e2
+ VORPS X8, X0, K1, X20 // 62c17c0956e0
+ VORPS X9, X0, K1, X20 // 62c17c0956e1
+ VORPS 7(SI)(DI*8), X0, K1, X20 // 62e17c0956a4fe07000000
+ VORPS -15(R14), X0, K1, X20 // 62c17c0956a6f1ffffff
+ VORPS X2, X9, K1, X20 // 62e1340956e2
+ VORPS X8, X9, K1, X20 // 62c1340956e0
+ VORPS X9, X9, K1, X20 // 62c1340956e1
+ VORPS 7(SI)(DI*8), X9, K1, X20 // 62e1340956a4fe07000000
+ VORPS -15(R14), X9, K1, X20 // 62c1340956a6f1ffffff
+ VORPS X2, X13, K1, X20 // 62e1140956e2
+ VORPS X8, X13, K1, X20 // 62c1140956e0
+ VORPS X9, X13, K1, X20 // 62c1140956e1
+ VORPS 7(SI)(DI*8), X13, K1, X20 // 62e1140956a4fe07000000
+ VORPS -15(R14), X13, K1, X20 // 62c1140956a6f1ffffff
+ VORPS X2, X0, K1, X5 // 62f17c0956ea
+ VORPS X8, X0, K1, X5 // 62d17c0956e8
+ VORPS X9, X0, K1, X5 // 62d17c0956e9
+ VORPS 7(SI)(DI*8), X0, K1, X5 // 62f17c0956acfe07000000
+ VORPS -15(R14), X0, K1, X5 // 62d17c0956aef1ffffff
+ VORPS X2, X9, K1, X5 // 62f1340956ea
+ VORPS X8, X9, K1, X5 // 62d1340956e8
+ VORPS X9, X9, K1, X5 // 62d1340956e9
+ VORPS 7(SI)(DI*8), X9, K1, X5 // 62f1340956acfe07000000
+ VORPS -15(R14), X9, K1, X5 // 62d1340956aef1ffffff
+ VORPS X2, X13, K1, X5 // 62f1140956ea
+ VORPS X8, X13, K1, X5 // 62d1140956e8
+ VORPS X9, X13, K1, X5 // 62d1140956e9
+ VORPS 7(SI)(DI*8), X13, K1, X5 // 62f1140956acfe07000000
+ VORPS -15(R14), X13, K1, X5 // 62d1140956aef1ffffff
+ VORPS X2, X0, K1, X25 // 62617c0956ca
+ VORPS X8, X0, K1, X25 // 62417c0956c8
+ VORPS X9, X0, K1, X25 // 62417c0956c9
+ VORPS 7(SI)(DI*8), X0, K1, X25 // 62617c09568cfe07000000
+ VORPS -15(R14), X0, K1, X25 // 62417c09568ef1ffffff
+ VORPS X2, X9, K1, X25 // 6261340956ca
+ VORPS X8, X9, K1, X25 // 6241340956c8
+ VORPS X9, X9, K1, X25 // 6241340956c9
+ VORPS 7(SI)(DI*8), X9, K1, X25 // 62613409568cfe07000000
+ VORPS -15(R14), X9, K1, X25 // 62413409568ef1ffffff
+ VORPS X2, X13, K1, X25 // 6261140956ca
+ VORPS X8, X13, K1, X25 // 6241140956c8
+ VORPS X9, X13, K1, X25 // 6241140956c9
+ VORPS 7(SI)(DI*8), X13, K1, X25 // 62611409568cfe07000000
+ VORPS -15(R14), X13, K1, X25 // 62411409568ef1ffffff
+ VORPS Y11, Y7, K7, Y9 // 6251442f56cb
+ VORPS Y26, Y7, K7, Y9 // 6211442f56ca
+ VORPS Y12, Y7, K7, Y9 // 6251442f56cc
+ VORPS -7(CX)(DX*1), Y7, K7, Y9 // 6271442f568c11f9ffffff
+ VORPS -15(R14)(R15*4), Y7, K7, Y9 // 6211442f568cbef1ffffff
+ VORPS Y11, Y6, K7, Y9 // 62514c2f56cb
+ VORPS Y26, Y6, K7, Y9 // 62114c2f56ca
+ VORPS Y12, Y6, K7, Y9 // 62514c2f56cc
+ VORPS -7(CX)(DX*1), Y6, K7, Y9 // 62714c2f568c11f9ffffff
+ VORPS -15(R14)(R15*4), Y6, K7, Y9 // 62114c2f568cbef1ffffff
+ VORPS Y11, Y26, K7, Y9 // 62512c2756cb
+ VORPS Y26, Y26, K7, Y9 // 62112c2756ca
+ VORPS Y12, Y26, K7, Y9 // 62512c2756cc
+ VORPS -7(CX)(DX*1), Y26, K7, Y9 // 62712c27568c11f9ffffff
+ VORPS -15(R14)(R15*4), Y26, K7, Y9 // 62112c27568cbef1ffffff
+ VORPS Y11, Y7, K7, Y6 // 62d1442f56f3
+ VORPS Y26, Y7, K7, Y6 // 6291442f56f2
+ VORPS Y12, Y7, K7, Y6 // 62d1442f56f4
+ VORPS -7(CX)(DX*1), Y7, K7, Y6 // 62f1442f56b411f9ffffff
+ VORPS -15(R14)(R15*4), Y7, K7, Y6 // 6291442f56b4bef1ffffff
+ VORPS Y11, Y6, K7, Y6 // 62d14c2f56f3
+ VORPS Y26, Y6, K7, Y6 // 62914c2f56f2
+ VORPS Y12, Y6, K7, Y6 // 62d14c2f56f4
+ VORPS -7(CX)(DX*1), Y6, K7, Y6 // 62f14c2f56b411f9ffffff
+ VORPS -15(R14)(R15*4), Y6, K7, Y6 // 62914c2f56b4bef1ffffff
+ VORPS Y11, Y26, K7, Y6 // 62d12c2756f3
+ VORPS Y26, Y26, K7, Y6 // 62912c2756f2
+ VORPS Y12, Y26, K7, Y6 // 62d12c2756f4
+ VORPS -7(CX)(DX*1), Y26, K7, Y6 // 62f12c2756b411f9ffffff
+ VORPS -15(R14)(R15*4), Y26, K7, Y6 // 62912c2756b4bef1ffffff
+ VORPS Y11, Y7, K7, Y3 // 62d1442f56db
+ VORPS Y26, Y7, K7, Y3 // 6291442f56da
+ VORPS Y12, Y7, K7, Y3 // 62d1442f56dc
+ VORPS -7(CX)(DX*1), Y7, K7, Y3 // 62f1442f569c11f9ffffff
+ VORPS -15(R14)(R15*4), Y7, K7, Y3 // 6291442f569cbef1ffffff
+ VORPS Y11, Y6, K7, Y3 // 62d14c2f56db
+ VORPS Y26, Y6, K7, Y3 // 62914c2f56da
+ VORPS Y12, Y6, K7, Y3 // 62d14c2f56dc
+ VORPS -7(CX)(DX*1), Y6, K7, Y3 // 62f14c2f569c11f9ffffff
+ VORPS -15(R14)(R15*4), Y6, K7, Y3 // 62914c2f569cbef1ffffff
+ VORPS Y11, Y26, K7, Y3 // 62d12c2756db
+ VORPS Y26, Y26, K7, Y3 // 62912c2756da
+ VORPS Y12, Y26, K7, Y3 // 62d12c2756dc
+ VORPS -7(CX)(DX*1), Y26, K7, Y3 // 62f12c27569c11f9ffffff
+ VORPS -15(R14)(R15*4), Y26, K7, Y3 // 62912c27569cbef1ffffff
+ VORPS Z17, Z20, K2, Z9 // 62315c4256c9
+ VORPS Z0, Z20, K2, Z9 // 62715c4256c8
+ VORPS 99(R15)(R15*8), Z20, K2, Z9 // 62115c42568cff63000000
+ VORPS 7(AX)(CX*8), Z20, K2, Z9 // 62715c42568cc807000000
+ VORPS Z17, Z0, K2, Z9 // 62317c4a56c9
+ VORPS Z0, Z0, K2, Z9 // 62717c4a56c8
+ VORPS 99(R15)(R15*8), Z0, K2, Z9 // 62117c4a568cff63000000
+ VORPS 7(AX)(CX*8), Z0, K2, Z9 // 62717c4a568cc807000000
+ VORPS Z17, Z20, K2, Z28 // 62215c4256e1
+ VORPS Z0, Z20, K2, Z28 // 62615c4256e0
+ VORPS 99(R15)(R15*8), Z20, K2, Z28 // 62015c4256a4ff63000000
+ VORPS 7(AX)(CX*8), Z20, K2, Z28 // 62615c4256a4c807000000
+ VORPS Z17, Z0, K2, Z28 // 62217c4a56e1
+ VORPS Z0, Z0, K2, Z28 // 62617c4a56e0
+ VORPS 99(R15)(R15*8), Z0, K2, Z28 // 62017c4a56a4ff63000000
+ VORPS 7(AX)(CX*8), Z0, K2, Z28 // 62617c4a56a4c807000000
+ VPEXTRD $64, X22, CX // 62e37d0816f140
+ VPEXTRD $64, X30, CX // 62637d0816f140
+ VPEXTRD $64, X22, SP // 62e37d0816f440
+ VPEXTRD $64, X30, SP // 62637d0816f440
+ VPEXTRD $64, X22, 99(R15)(R15*2) // 62837d0816b47f6300000040
+ VPEXTRD $64, X30, 99(R15)(R15*2) // 62037d0816b47f6300000040
+ VPEXTRD $64, X22, -7(DI) // 62e37d0816b7f9ffffff40
+ VPEXTRD $64, X30, -7(DI) // 62637d0816b7f9ffffff40
+ VPEXTRQ $27, X30, R9 // 6243fd0816f11b
+ VPEXTRQ $27, X30, R13 // 6243fd0816f51b
+ VPEXTRQ $27, X30, -15(R14)(R15*1) // 6203fd0816b43ef1ffffff1b
+ VPEXTRQ $27, X30, -15(BX) // 6263fd0816b3f1ffffff1b
+ VPINSRD $82, R9, X22, X21 // 62c34d0022e952
+ VPINSRD $82, CX, X22, X21 // 62e34d0022e952
+ VPINSRD $82, -7(CX)(DX*1), X22, X21 // 62e34d0022ac11f9ffffff52
+ VPINSRD $82, -15(R14)(R15*4), X22, X21 // 62834d0022acbef1ffffff52
+ VPINSRD $82, R9, X7, X21 // 62c3450822e952
+ VPINSRD $82, CX, X7, X21 // 62e3450822e952
+ VPINSRD $82, -7(CX)(DX*1), X7, X21 // 62e3450822ac11f9ffffff52
+ VPINSRD $82, -15(R14)(R15*4), X7, X21 // 6283450822acbef1ffffff52
+ VPINSRD $82, R9, X19, X21 // 62c3650022e952
+ VPINSRD $82, CX, X19, X21 // 62e3650022e952
+ VPINSRD $82, -7(CX)(DX*1), X19, X21 // 62e3650022ac11f9ffffff52
+ VPINSRD $82, -15(R14)(R15*4), X19, X21 // 6283650022acbef1ffffff52
+ VPINSRD $82, R9, X22, X0 // 62d34d0022c152
+ VPINSRD $82, CX, X22, X0 // 62f34d0022c152
+ VPINSRD $82, -7(CX)(DX*1), X22, X0 // 62f34d00228411f9ffffff52
+ VPINSRD $82, -15(R14)(R15*4), X22, X0 // 62934d002284bef1ffffff52
+ VPINSRD $82, R9, X19, X0 // 62d3650022c152
+ VPINSRD $82, CX, X19, X0 // 62f3650022c152
+ VPINSRD $82, -7(CX)(DX*1), X19, X0 // 62f36500228411f9ffffff52
+ VPINSRD $82, -15(R14)(R15*4), X19, X0 // 629365002284bef1ffffff52
+ VPINSRD $82, R9, X22, X28 // 62434d0022e152
+ VPINSRD $82, CX, X22, X28 // 62634d0022e152
+ VPINSRD $82, -7(CX)(DX*1), X22, X28 // 62634d0022a411f9ffffff52
+ VPINSRD $82, -15(R14)(R15*4), X22, X28 // 62034d0022a4bef1ffffff52
+ VPINSRD $82, R9, X7, X28 // 6243450822e152
+ VPINSRD $82, CX, X7, X28 // 6263450822e152
+ VPINSRD $82, -7(CX)(DX*1), X7, X28 // 6263450822a411f9ffffff52
+ VPINSRD $82, -15(R14)(R15*4), X7, X28 // 6203450822a4bef1ffffff52
+ VPINSRD $82, R9, X19, X28 // 6243650022e152
+ VPINSRD $82, CX, X19, X28 // 6263650022e152
+ VPINSRD $82, -7(CX)(DX*1), X19, X28 // 6263650022a411f9ffffff52
+ VPINSRD $82, -15(R14)(R15*4), X19, X28 // 6203650022a4bef1ffffff52
+ VPINSRQ $126, DX, X1, X16 // 62e3f50822c27e
+ VPINSRQ $126, BP, X1, X16 // 62e3f50822c57e
+ VPINSRQ $126, 7(AX)(CX*4), X1, X16 // 62e3f508228488070000007e
+ VPINSRQ $126, 7(AX)(CX*1), X1, X16 // 62e3f508228408070000007e
+ VPINSRQ $126, DX, X7, X16 // 62e3c50822c27e
+ VPINSRQ $126, BP, X7, X16 // 62e3c50822c57e
+ VPINSRQ $126, 7(AX)(CX*4), X7, X16 // 62e3c508228488070000007e
+ VPINSRQ $126, 7(AX)(CX*1), X7, X16 // 62e3c508228408070000007e
+ VPINSRQ $126, DX, X9, X16 // 62e3b50822c27e
+ VPINSRQ $126, BP, X9, X16 // 62e3b50822c57e
+ VPINSRQ $126, 7(AX)(CX*4), X9, X16 // 62e3b508228488070000007e
+ VPINSRQ $126, 7(AX)(CX*1), X9, X16 // 62e3b508228408070000007e
+ VPINSRQ $126, DX, X1, X31 // 6263f50822fa7e
+ VPINSRQ $126, BP, X1, X31 // 6263f50822fd7e
+ VPINSRQ $126, 7(AX)(CX*4), X1, X31 // 6263f50822bc88070000007e
+ VPINSRQ $126, 7(AX)(CX*1), X1, X31 // 6263f50822bc08070000007e
+ VPINSRQ $126, DX, X7, X31 // 6263c50822fa7e
+ VPINSRQ $126, BP, X7, X31 // 6263c50822fd7e
+ VPINSRQ $126, 7(AX)(CX*4), X7, X31 // 6263c50822bc88070000007e
+ VPINSRQ $126, 7(AX)(CX*1), X7, X31 // 6263c50822bc08070000007e
+ VPINSRQ $126, DX, X9, X31 // 6263b50822fa7e
+ VPINSRQ $126, BP, X9, X31 // 6263b50822fd7e
+ VPINSRQ $126, 7(AX)(CX*4), X9, X31 // 6263b50822bc88070000007e
+ VPINSRQ $126, 7(AX)(CX*1), X9, X31 // 6263b50822bc08070000007e
+ VPMOVD2M X3, K6 // 62f27e0839f3
+ VPMOVD2M X26, K6 // 62927e0839f2
+ VPMOVD2M X23, K6 // 62b27e0839f7
+ VPMOVD2M X3, K7 // 62f27e0839fb
+ VPMOVD2M X26, K7 // 62927e0839fa
+ VPMOVD2M X23, K7 // 62b27e0839ff
+ VPMOVD2M Y5, K6 // 62f27e2839f5
+ VPMOVD2M Y28, K6 // 62927e2839f4
+ VPMOVD2M Y7, K6 // 62f27e2839f7
+ VPMOVD2M Y5, K4 // 62f27e2839e5
+ VPMOVD2M Y28, K4 // 62927e2839e4
+ VPMOVD2M Y7, K4 // 62f27e2839e7
+ VPMOVD2M Z1, K4 // 62f27e4839e1
+ VPMOVD2M Z9, K4 // 62d27e4839e1
+ VPMOVD2M Z1, K6 // 62f27e4839f1
+ VPMOVD2M Z9, K6 // 62d27e4839f1
+ VPMOVM2D K6, X21 // 62e27e0838ee
+ VPMOVM2D K5, X21 // 62e27e0838ed
+ VPMOVM2D K6, X1 // 62f27e0838ce
+ VPMOVM2D K5, X1 // 62f27e0838cd
+ VPMOVM2D K6, X11 // 62727e0838de
+ VPMOVM2D K5, X11 // 62727e0838dd
+ VPMOVM2D K1, Y28 // 62627e2838e1
+ VPMOVM2D K5, Y28 // 62627e2838e5
+ VPMOVM2D K1, Y13 // 62727e2838e9
+ VPMOVM2D K5, Y13 // 62727e2838ed
+ VPMOVM2D K1, Y7 // 62f27e2838f9
+ VPMOVM2D K5, Y7 // 62f27e2838fd
+ VPMOVM2D K3, Z7 // 62f27e4838fb
+ VPMOVM2D K1, Z7 // 62f27e4838f9
+ VPMOVM2D K3, Z21 // 62e27e4838eb
+ VPMOVM2D K1, Z21 // 62e27e4838e9
+ VPMOVM2Q K5, X13 // 6272fe0838ed
+ VPMOVM2Q K4, X13 // 6272fe0838ec
+ VPMOVM2Q K5, X0 // 62f2fe0838c5
+ VPMOVM2Q K4, X0 // 62f2fe0838c4
+ VPMOVM2Q K5, X30 // 6262fe0838f5
+ VPMOVM2Q K4, X30 // 6262fe0838f4
+ VPMOVM2Q K7, Y2 // 62f2fe2838d7
+ VPMOVM2Q K6, Y2 // 62f2fe2838d6
+ VPMOVM2Q K7, Y21 // 62e2fe2838ef
+ VPMOVM2Q K6, Y21 // 62e2fe2838ee
+ VPMOVM2Q K7, Y12 // 6272fe2838e7
+ VPMOVM2Q K6, Y12 // 6272fe2838e6
+ VPMOVM2Q K4, Z16 // 62e2fe4838c4
+ VPMOVM2Q K6, Z16 // 62e2fe4838c6
+ VPMOVM2Q K4, Z25 // 6262fe4838cc
+ VPMOVM2Q K6, Z25 // 6262fe4838ce
+ VPMOVQ2M X14, K1 // 62d2fe0839ce
+ VPMOVQ2M X19, K1 // 62b2fe0839cb
+ VPMOVQ2M X8, K1 // 62d2fe0839c8
+ VPMOVQ2M X14, K3 // 62d2fe0839de
+ VPMOVQ2M X19, K3 // 62b2fe0839db
+ VPMOVQ2M X8, K3 // 62d2fe0839d8
+ VPMOVQ2M Y3, K6 // 62f2fe2839f3
+ VPMOVQ2M Y2, K6 // 62f2fe2839f2
+ VPMOVQ2M Y9, K6 // 62d2fe2839f1
+ VPMOVQ2M Y3, K7 // 62f2fe2839fb
+ VPMOVQ2M Y2, K7 // 62f2fe2839fa
+ VPMOVQ2M Y9, K7 // 62d2fe2839f9
+ VPMOVQ2M Z12, K6 // 62d2fe4839f4
+ VPMOVQ2M Z13, K6 // 62d2fe4839f5
+ VPMOVQ2M Z12, K4 // 62d2fe4839e4
+ VPMOVQ2M Z13, K4 // 62d2fe4839e5
+ VPMULLQ X13, X3, K7, X17 // 62c2e50f40cd
+ VPMULLQ X28, X3, K7, X17 // 6282e50f40cc
+ VPMULLQ X24, X3, K7, X17 // 6282e50f40c8
+ VPMULLQ 15(R8)(R14*4), X3, K7, X17 // 6282e50f408cb00f000000
+ VPMULLQ -7(CX)(DX*4), X3, K7, X17 // 62e2e50f408c91f9ffffff
+ VPMULLQ X13, X26, K7, X17 // 62c2ad0740cd
+ VPMULLQ X28, X26, K7, X17 // 6282ad0740cc
+ VPMULLQ X24, X26, K7, X17 // 6282ad0740c8
+ VPMULLQ 15(R8)(R14*4), X26, K7, X17 // 6282ad07408cb00f000000
+ VPMULLQ -7(CX)(DX*4), X26, K7, X17 // 62e2ad07408c91f9ffffff
+ VPMULLQ X13, X23, K7, X17 // 62c2c50740cd
+ VPMULLQ X28, X23, K7, X17 // 6282c50740cc
+ VPMULLQ X24, X23, K7, X17 // 6282c50740c8
+ VPMULLQ 15(R8)(R14*4), X23, K7, X17 // 6282c507408cb00f000000
+ VPMULLQ -7(CX)(DX*4), X23, K7, X17 // 62e2c507408c91f9ffffff
+ VPMULLQ X13, X3, K7, X15 // 6252e50f40fd
+ VPMULLQ X28, X3, K7, X15 // 6212e50f40fc
+ VPMULLQ X24, X3, K7, X15 // 6212e50f40f8
+ VPMULLQ 15(R8)(R14*4), X3, K7, X15 // 6212e50f40bcb00f000000
+ VPMULLQ -7(CX)(DX*4), X3, K7, X15 // 6272e50f40bc91f9ffffff
+ VPMULLQ X13, X26, K7, X15 // 6252ad0740fd
+ VPMULLQ X28, X26, K7, X15 // 6212ad0740fc
+ VPMULLQ X24, X26, K7, X15 // 6212ad0740f8
+ VPMULLQ 15(R8)(R14*4), X26, K7, X15 // 6212ad0740bcb00f000000
+ VPMULLQ -7(CX)(DX*4), X26, K7, X15 // 6272ad0740bc91f9ffffff
+ VPMULLQ X13, X23, K7, X15 // 6252c50740fd
+ VPMULLQ X28, X23, K7, X15 // 6212c50740fc
+ VPMULLQ X24, X23, K7, X15 // 6212c50740f8
+ VPMULLQ 15(R8)(R14*4), X23, K7, X15 // 6212c50740bcb00f000000
+ VPMULLQ -7(CX)(DX*4), X23, K7, X15 // 6272c50740bc91f9ffffff
+ VPMULLQ X13, X3, K7, X8 // 6252e50f40c5
+ VPMULLQ X28, X3, K7, X8 // 6212e50f40c4
+ VPMULLQ X24, X3, K7, X8 // 6212e50f40c0
+ VPMULLQ 15(R8)(R14*4), X3, K7, X8 // 6212e50f4084b00f000000
+ VPMULLQ -7(CX)(DX*4), X3, K7, X8 // 6272e50f408491f9ffffff
+ VPMULLQ X13, X26, K7, X8 // 6252ad0740c5
+ VPMULLQ X28, X26, K7, X8 // 6212ad0740c4
+ VPMULLQ X24, X26, K7, X8 // 6212ad0740c0
+ VPMULLQ 15(R8)(R14*4), X26, K7, X8 // 6212ad074084b00f000000
+ VPMULLQ -7(CX)(DX*4), X26, K7, X8 // 6272ad07408491f9ffffff
+ VPMULLQ X13, X23, K7, X8 // 6252c50740c5
+ VPMULLQ X28, X23, K7, X8 // 6212c50740c4
+ VPMULLQ X24, X23, K7, X8 // 6212c50740c0
+ VPMULLQ 15(R8)(R14*4), X23, K7, X8 // 6212c5074084b00f000000
+ VPMULLQ -7(CX)(DX*4), X23, K7, X8 // 6272c507408491f9ffffff
+ VPMULLQ Y28, Y31, K2, Y17 // 6282852240cc
+ VPMULLQ Y13, Y31, K2, Y17 // 62c2852240cd
+ VPMULLQ Y7, Y31, K2, Y17 // 62e2852240cf
+ VPMULLQ 15(DX)(BX*1), Y31, K2, Y17 // 62e28522408c1a0f000000
+ VPMULLQ -7(CX)(DX*2), Y31, K2, Y17 // 62e28522408c51f9ffffff
+ VPMULLQ Y28, Y8, K2, Y17 // 6282bd2a40cc
+ VPMULLQ Y13, Y8, K2, Y17 // 62c2bd2a40cd
+ VPMULLQ Y7, Y8, K2, Y17 // 62e2bd2a40cf
+ VPMULLQ 15(DX)(BX*1), Y8, K2, Y17 // 62e2bd2a408c1a0f000000
+ VPMULLQ -7(CX)(DX*2), Y8, K2, Y17 // 62e2bd2a408c51f9ffffff
+ VPMULLQ Y28, Y1, K2, Y17 // 6282f52a40cc
+ VPMULLQ Y13, Y1, K2, Y17 // 62c2f52a40cd
+ VPMULLQ Y7, Y1, K2, Y17 // 62e2f52a40cf
+ VPMULLQ 15(DX)(BX*1), Y1, K2, Y17 // 62e2f52a408c1a0f000000
+ VPMULLQ -7(CX)(DX*2), Y1, K2, Y17 // 62e2f52a408c51f9ffffff
+ VPMULLQ Y28, Y31, K2, Y7 // 6292852240fc
+ VPMULLQ Y13, Y31, K2, Y7 // 62d2852240fd
+ VPMULLQ Y7, Y31, K2, Y7 // 62f2852240ff
+ VPMULLQ 15(DX)(BX*1), Y31, K2, Y7 // 62f2852240bc1a0f000000
+ VPMULLQ -7(CX)(DX*2), Y31, K2, Y7 // 62f2852240bc51f9ffffff
+ VPMULLQ Y28, Y8, K2, Y7 // 6292bd2a40fc
+ VPMULLQ Y13, Y8, K2, Y7 // 62d2bd2a40fd
+ VPMULLQ Y7, Y8, K2, Y7 // 62f2bd2a40ff
+ VPMULLQ 15(DX)(BX*1), Y8, K2, Y7 // 62f2bd2a40bc1a0f000000
+ VPMULLQ -7(CX)(DX*2), Y8, K2, Y7 // 62f2bd2a40bc51f9ffffff
+ VPMULLQ Y28, Y1, K2, Y7 // 6292f52a40fc
+ VPMULLQ Y13, Y1, K2, Y7 // 62d2f52a40fd
+ VPMULLQ Y7, Y1, K2, Y7 // 62f2f52a40ff
+ VPMULLQ 15(DX)(BX*1), Y1, K2, Y7 // 62f2f52a40bc1a0f000000
+ VPMULLQ -7(CX)(DX*2), Y1, K2, Y7 // 62f2f52a40bc51f9ffffff
+ VPMULLQ Y28, Y31, K2, Y9 // 6212852240cc
+ VPMULLQ Y13, Y31, K2, Y9 // 6252852240cd
+ VPMULLQ Y7, Y31, K2, Y9 // 6272852240cf
+ VPMULLQ 15(DX)(BX*1), Y31, K2, Y9 // 62728522408c1a0f000000
+ VPMULLQ -7(CX)(DX*2), Y31, K2, Y9 // 62728522408c51f9ffffff
+ VPMULLQ Y28, Y8, K2, Y9 // 6212bd2a40cc
+ VPMULLQ Y13, Y8, K2, Y9 // 6252bd2a40cd
+ VPMULLQ Y7, Y8, K2, Y9 // 6272bd2a40cf
+ VPMULLQ 15(DX)(BX*1), Y8, K2, Y9 // 6272bd2a408c1a0f000000
+ VPMULLQ -7(CX)(DX*2), Y8, K2, Y9 // 6272bd2a408c51f9ffffff
+ VPMULLQ Y28, Y1, K2, Y9 // 6212f52a40cc
+ VPMULLQ Y13, Y1, K2, Y9 // 6252f52a40cd
+ VPMULLQ Y7, Y1, K2, Y9 // 6272f52a40cf
+ VPMULLQ 15(DX)(BX*1), Y1, K2, Y9 // 6272f52a408c1a0f000000
+ VPMULLQ -7(CX)(DX*2), Y1, K2, Y9 // 6272f52a408c51f9ffffff
+ VPMULLQ Z3, Z20, K4, Z0 // 62f2dd4440c3
+ VPMULLQ Z30, Z20, K4, Z0 // 6292dd4440c6
+ VPMULLQ 15(R8)(R14*8), Z20, K4, Z0 // 6292dd444084f00f000000
+ VPMULLQ -15(R14)(R15*2), Z20, K4, Z0 // 6292dd4440847ef1ffffff
+ VPMULLQ Z3, Z28, K4, Z0 // 62f29d4440c3
+ VPMULLQ Z30, Z28, K4, Z0 // 62929d4440c6
+ VPMULLQ 15(R8)(R14*8), Z28, K4, Z0 // 62929d444084f00f000000
+ VPMULLQ -15(R14)(R15*2), Z28, K4, Z0 // 62929d4440847ef1ffffff
+ VPMULLQ Z3, Z20, K4, Z6 // 62f2dd4440f3
+ VPMULLQ Z30, Z20, K4, Z6 // 6292dd4440f6
+ VPMULLQ 15(R8)(R14*8), Z20, K4, Z6 // 6292dd4440b4f00f000000
+ VPMULLQ -15(R14)(R15*2), Z20, K4, Z6 // 6292dd4440b47ef1ffffff
+ VPMULLQ Z3, Z28, K4, Z6 // 62f29d4440f3
+ VPMULLQ Z30, Z28, K4, Z6 // 62929d4440f6
+ VPMULLQ 15(R8)(R14*8), Z28, K4, Z6 // 62929d4440b4f00f000000
+ VPMULLQ -15(R14)(R15*2), Z28, K4, Z6 // 62929d4440b47ef1ffffff
+ VRANGEPD $11, X24, X23, K2, X12 // 6213c50250e00b
+ VRANGEPD $11, X14, X23, K2, X12 // 6253c50250e60b
+ VRANGEPD $11, X0, X23, K2, X12 // 6273c50250e00b
+ VRANGEPD $11, 17(SP)(BP*8), X23, K2, X12 // 6273c50250a4ec110000000b
+ VRANGEPD $11, 17(SP)(BP*4), X23, K2, X12 // 6273c50250a4ac110000000b
+ VRANGEPD $11, X24, X11, K2, X12 // 6213a50a50e00b
+ VRANGEPD $11, X14, X11, K2, X12 // 6253a50a50e60b
+ VRANGEPD $11, X0, X11, K2, X12 // 6273a50a50e00b
+ VRANGEPD $11, 17(SP)(BP*8), X11, K2, X12 // 6273a50a50a4ec110000000b
+ VRANGEPD $11, 17(SP)(BP*4), X11, K2, X12 // 6273a50a50a4ac110000000b
+ VRANGEPD $11, X24, X31, K2, X12 // 6213850250e00b
+ VRANGEPD $11, X14, X31, K2, X12 // 6253850250e60b
+ VRANGEPD $11, X0, X31, K2, X12 // 6273850250e00b
+ VRANGEPD $11, 17(SP)(BP*8), X31, K2, X12 // 6273850250a4ec110000000b
+ VRANGEPD $11, 17(SP)(BP*4), X31, K2, X12 // 6273850250a4ac110000000b
+ VRANGEPD $11, X24, X23, K2, X16 // 6283c50250c00b
+ VRANGEPD $11, X14, X23, K2, X16 // 62c3c50250c60b
+ VRANGEPD $11, X0, X23, K2, X16 // 62e3c50250c00b
+ VRANGEPD $11, 17(SP)(BP*8), X23, K2, X16 // 62e3c5025084ec110000000b
+ VRANGEPD $11, 17(SP)(BP*4), X23, K2, X16 // 62e3c5025084ac110000000b
+ VRANGEPD $11, X24, X11, K2, X16 // 6283a50a50c00b
+ VRANGEPD $11, X14, X11, K2, X16 // 62c3a50a50c60b
+ VRANGEPD $11, X0, X11, K2, X16 // 62e3a50a50c00b
+ VRANGEPD $11, 17(SP)(BP*8), X11, K2, X16 // 62e3a50a5084ec110000000b
+ VRANGEPD $11, 17(SP)(BP*4), X11, K2, X16 // 62e3a50a5084ac110000000b
+ VRANGEPD $11, X24, X31, K2, X16 // 6283850250c00b
+ VRANGEPD $11, X14, X31, K2, X16 // 62c3850250c60b
+ VRANGEPD $11, X0, X31, K2, X16 // 62e3850250c00b
+ VRANGEPD $11, 17(SP)(BP*8), X31, K2, X16 // 62e385025084ec110000000b
+ VRANGEPD $11, 17(SP)(BP*4), X31, K2, X16 // 62e385025084ac110000000b
+ VRANGEPD $11, X24, X23, K2, X23 // 6283c50250f80b
+ VRANGEPD $11, X14, X23, K2, X23 // 62c3c50250fe0b
+ VRANGEPD $11, X0, X23, K2, X23 // 62e3c50250f80b
+ VRANGEPD $11, 17(SP)(BP*8), X23, K2, X23 // 62e3c50250bcec110000000b
+ VRANGEPD $11, 17(SP)(BP*4), X23, K2, X23 // 62e3c50250bcac110000000b
+ VRANGEPD $11, X24, X11, K2, X23 // 6283a50a50f80b
+ VRANGEPD $11, X14, X11, K2, X23 // 62c3a50a50fe0b
+ VRANGEPD $11, X0, X11, K2, X23 // 62e3a50a50f80b
+ VRANGEPD $11, 17(SP)(BP*8), X11, K2, X23 // 62e3a50a50bcec110000000b
+ VRANGEPD $11, 17(SP)(BP*4), X11, K2, X23 // 62e3a50a50bcac110000000b
+ VRANGEPD $11, X24, X31, K2, X23 // 6283850250f80b
+ VRANGEPD $11, X14, X31, K2, X23 // 62c3850250fe0b
+ VRANGEPD $11, X0, X31, K2, X23 // 62e3850250f80b
+ VRANGEPD $11, 17(SP)(BP*8), X31, K2, X23 // 62e3850250bcec110000000b
+ VRANGEPD $11, 17(SP)(BP*4), X31, K2, X23 // 62e3850250bcac110000000b
+ VRANGEPD $12, Y3, Y18, K1, Y15 // 6273ed2150fb0c
+ VRANGEPD $12, Y19, Y18, K1, Y15 // 6233ed2150fb0c
+ VRANGEPD $12, Y23, Y18, K1, Y15 // 6233ed2150ff0c
+ VRANGEPD $12, (R8), Y18, K1, Y15 // 6253ed2150380c
+ VRANGEPD $12, 15(DX)(BX*2), Y18, K1, Y15 // 6273ed2150bc5a0f0000000c
+ VRANGEPD $12, Y3, Y24, K1, Y15 // 6273bd2150fb0c
+ VRANGEPD $12, Y19, Y24, K1, Y15 // 6233bd2150fb0c
+ VRANGEPD $12, Y23, Y24, K1, Y15 // 6233bd2150ff0c
+ VRANGEPD $12, (R8), Y24, K1, Y15 // 6253bd2150380c
+ VRANGEPD $12, 15(DX)(BX*2), Y24, K1, Y15 // 6273bd2150bc5a0f0000000c
+ VRANGEPD $12, Y3, Y9, K1, Y15 // 6273b52950fb0c
+ VRANGEPD $12, Y19, Y9, K1, Y15 // 6233b52950fb0c
+ VRANGEPD $12, Y23, Y9, K1, Y15 // 6233b52950ff0c
+ VRANGEPD $12, (R8), Y9, K1, Y15 // 6253b52950380c
+ VRANGEPD $12, 15(DX)(BX*2), Y9, K1, Y15 // 6273b52950bc5a0f0000000c
+ VRANGEPD $12, Y3, Y18, K1, Y22 // 62e3ed2150f30c
+ VRANGEPD $12, Y19, Y18, K1, Y22 // 62a3ed2150f30c
+ VRANGEPD $12, Y23, Y18, K1, Y22 // 62a3ed2150f70c
+ VRANGEPD $12, (R8), Y18, K1, Y22 // 62c3ed2150300c
+ VRANGEPD $12, 15(DX)(BX*2), Y18, K1, Y22 // 62e3ed2150b45a0f0000000c
+ VRANGEPD $12, Y3, Y24, K1, Y22 // 62e3bd2150f30c
+ VRANGEPD $12, Y19, Y24, K1, Y22 // 62a3bd2150f30c
+ VRANGEPD $12, Y23, Y24, K1, Y22 // 62a3bd2150f70c
+ VRANGEPD $12, (R8), Y24, K1, Y22 // 62c3bd2150300c
+ VRANGEPD $12, 15(DX)(BX*2), Y24, K1, Y22 // 62e3bd2150b45a0f0000000c
+ VRANGEPD $12, Y3, Y9, K1, Y22 // 62e3b52950f30c
+ VRANGEPD $12, Y19, Y9, K1, Y22 // 62a3b52950f30c
+ VRANGEPD $12, Y23, Y9, K1, Y22 // 62a3b52950f70c
+ VRANGEPD $12, (R8), Y9, K1, Y22 // 62c3b52950300c
+ VRANGEPD $12, 15(DX)(BX*2), Y9, K1, Y22 // 62e3b52950b45a0f0000000c
+ VRANGEPD $12, Y3, Y18, K1, Y20 // 62e3ed2150e30c
+ VRANGEPD $12, Y19, Y18, K1, Y20 // 62a3ed2150e30c
+ VRANGEPD $12, Y23, Y18, K1, Y20 // 62a3ed2150e70c
+ VRANGEPD $12, (R8), Y18, K1, Y20 // 62c3ed2150200c
+ VRANGEPD $12, 15(DX)(BX*2), Y18, K1, Y20 // 62e3ed2150a45a0f0000000c
+ VRANGEPD $12, Y3, Y24, K1, Y20 // 62e3bd2150e30c
+ VRANGEPD $12, Y19, Y24, K1, Y20 // 62a3bd2150e30c
+ VRANGEPD $12, Y23, Y24, K1, Y20 // 62a3bd2150e70c
+ VRANGEPD $12, (R8), Y24, K1, Y20 // 62c3bd2150200c
+ VRANGEPD $12, 15(DX)(BX*2), Y24, K1, Y20 // 62e3bd2150a45a0f0000000c
+ VRANGEPD $12, Y3, Y9, K1, Y20 // 62e3b52950e30c
+ VRANGEPD $12, Y19, Y9, K1, Y20 // 62a3b52950e30c
+ VRANGEPD $12, Y23, Y9, K1, Y20 // 62a3b52950e70c
+ VRANGEPD $12, (R8), Y9, K1, Y20 // 62c3b52950200c
+ VRANGEPD $12, 15(DX)(BX*2), Y9, K1, Y20 // 62e3b52950a45a0f0000000c
+ VRANGEPD $13, Z21, Z12, K7, Z14 // 62339d4f50f50d
+ VRANGEPD $13, Z9, Z12, K7, Z14 // 62539d4f50f10d
+ VRANGEPD $13, Z21, Z13, K7, Z14 // 6233954f50f50d
+ VRANGEPD $13, Z9, Z13, K7, Z14 // 6253954f50f10d
+ VRANGEPD $13, Z21, Z12, K7, Z13 // 62339d4f50ed0d
+ VRANGEPD $13, Z9, Z12, K7, Z13 // 62539d4f50e90d
+ VRANGEPD $13, Z21, Z13, K7, Z13 // 6233954f50ed0d
+ VRANGEPD $13, Z9, Z13, K7, Z13 // 6253954f50e90d
+ VRANGEPD $14, Z23, Z27, K1, Z2 // 62b3a54150d70e
+ VRANGEPD $14, Z9, Z27, K1, Z2 // 62d3a54150d10e
+ VRANGEPD $14, (R14), Z27, K1, Z2 // 62d3a54150160e
+ VRANGEPD $14, -7(DI)(R8*8), Z27, K1, Z2 // 62b3a5415094c7f9ffffff0e
+ VRANGEPD $14, Z23, Z25, K1, Z2 // 62b3b54150d70e
+ VRANGEPD $14, Z9, Z25, K1, Z2 // 62d3b54150d10e
+ VRANGEPD $14, (R14), Z25, K1, Z2 // 62d3b54150160e
+ VRANGEPD $14, -7(DI)(R8*8), Z25, K1, Z2 // 62b3b5415094c7f9ffffff0e
+ VRANGEPD $14, Z23, Z27, K1, Z7 // 62b3a54150ff0e
+ VRANGEPD $14, Z9, Z27, K1, Z7 // 62d3a54150f90e
+ VRANGEPD $14, (R14), Z27, K1, Z7 // 62d3a541503e0e
+ VRANGEPD $14, -7(DI)(R8*8), Z27, K1, Z7 // 62b3a54150bcc7f9ffffff0e
+ VRANGEPD $14, Z23, Z25, K1, Z7 // 62b3b54150ff0e
+ VRANGEPD $14, Z9, Z25, K1, Z7 // 62d3b54150f90e
+ VRANGEPD $14, (R14), Z25, K1, Z7 // 62d3b541503e0e
+ VRANGEPD $14, -7(DI)(R8*8), Z25, K1, Z7 // 62b3b54150bcc7f9ffffff0e
+ VRANGEPS $15, X0, X20, K1, X11 // 62735d0150d80f
+ VRANGEPS $15, X9, X20, K1, X11 // 62535d0150d90f
+ VRANGEPS $15, X13, X20, K1, X11 // 62535d0150dd0f
+ VRANGEPS $15, 7(SI)(DI*4), X20, K1, X11 // 62735d01509cbe070000000f
+ VRANGEPS $15, -7(DI)(R8*2), X20, K1, X11 // 62335d01509c47f9ffffff0f
+ VRANGEPS $15, X0, X5, K1, X11 // 6273550950d80f
+ VRANGEPS $15, X9, X5, K1, X11 // 6253550950d90f
+ VRANGEPS $15, X13, X5, K1, X11 // 6253550950dd0f
+ VRANGEPS $15, 7(SI)(DI*4), X5, K1, X11 // 62735509509cbe070000000f
+ VRANGEPS $15, -7(DI)(R8*2), X5, K1, X11 // 62335509509c47f9ffffff0f
+ VRANGEPS $15, X0, X25, K1, X11 // 6273350150d80f
+ VRANGEPS $15, X9, X25, K1, X11 // 6253350150d90f
+ VRANGEPS $15, X13, X25, K1, X11 // 6253350150dd0f
+ VRANGEPS $15, 7(SI)(DI*4), X25, K1, X11 // 62733501509cbe070000000f
+ VRANGEPS $15, -7(DI)(R8*2), X25, K1, X11 // 62333501509c47f9ffffff0f
+ VRANGEPS $15, X0, X20, K1, X23 // 62e35d0150f80f
+ VRANGEPS $15, X9, X20, K1, X23 // 62c35d0150f90f
+ VRANGEPS $15, X13, X20, K1, X23 // 62c35d0150fd0f
+ VRANGEPS $15, 7(SI)(DI*4), X20, K1, X23 // 62e35d0150bcbe070000000f
+ VRANGEPS $15, -7(DI)(R8*2), X20, K1, X23 // 62a35d0150bc47f9ffffff0f
+ VRANGEPS $15, X0, X5, K1, X23 // 62e3550950f80f
+ VRANGEPS $15, X9, X5, K1, X23 // 62c3550950f90f
+ VRANGEPS $15, X13, X5, K1, X23 // 62c3550950fd0f
+ VRANGEPS $15, 7(SI)(DI*4), X5, K1, X23 // 62e3550950bcbe070000000f
+ VRANGEPS $15, -7(DI)(R8*2), X5, K1, X23 // 62a3550950bc47f9ffffff0f
+ VRANGEPS $15, X0, X25, K1, X23 // 62e3350150f80f
+ VRANGEPS $15, X9, X25, K1, X23 // 62c3350150f90f
+ VRANGEPS $15, X13, X25, K1, X23 // 62c3350150fd0f
+ VRANGEPS $15, 7(SI)(DI*4), X25, K1, X23 // 62e3350150bcbe070000000f
+ VRANGEPS $15, -7(DI)(R8*2), X25, K1, X23 // 62a3350150bc47f9ffffff0f
+ VRANGEPS $15, X0, X20, K1, X2 // 62f35d0150d00f
+ VRANGEPS $15, X9, X20, K1, X2 // 62d35d0150d10f
+ VRANGEPS $15, X13, X20, K1, X2 // 62d35d0150d50f
+ VRANGEPS $15, 7(SI)(DI*4), X20, K1, X2 // 62f35d015094be070000000f
+ VRANGEPS $15, -7(DI)(R8*2), X20, K1, X2 // 62b35d01509447f9ffffff0f
+ VRANGEPS $15, X0, X5, K1, X2 // 62f3550950d00f
+ VRANGEPS $15, X9, X5, K1, X2 // 62d3550950d10f
+ VRANGEPS $15, X13, X5, K1, X2 // 62d3550950d50f
+ VRANGEPS $15, 7(SI)(DI*4), X5, K1, X2 // 62f355095094be070000000f
+ VRANGEPS $15, -7(DI)(R8*2), X5, K1, X2 // 62b35509509447f9ffffff0f
+ VRANGEPS $15, X0, X25, K1, X2 // 62f3350150d00f
+ VRANGEPS $15, X9, X25, K1, X2 // 62d3350150d10f
+ VRANGEPS $15, X13, X25, K1, X2 // 62d3350150d50f
+ VRANGEPS $15, 7(SI)(DI*4), X25, K1, X2 // 62f335015094be070000000f
+ VRANGEPS $15, -7(DI)(R8*2), X25, K1, X2 // 62b33501509447f9ffffff0f
+ VRANGEPS $0, Y21, Y5, K1, Y19 // 62a3552950dd00
+ VRANGEPS $0, Y20, Y5, K1, Y19 // 62a3552950dc00
+ VRANGEPS $0, Y6, Y5, K1, Y19 // 62e3552950de00
+ VRANGEPS $0, 17(SP)(BP*1), Y5, K1, Y19 // 62e35529509c2c1100000000
+ VRANGEPS $0, -7(CX)(DX*8), Y5, K1, Y19 // 62e35529509cd1f9ffffff00
+ VRANGEPS $0, Y21, Y16, K1, Y19 // 62a37d2150dd00
+ VRANGEPS $0, Y20, Y16, K1, Y19 // 62a37d2150dc00
+ VRANGEPS $0, Y6, Y16, K1, Y19 // 62e37d2150de00
+ VRANGEPS $0, 17(SP)(BP*1), Y16, K1, Y19 // 62e37d21509c2c1100000000
+ VRANGEPS $0, -7(CX)(DX*8), Y16, K1, Y19 // 62e37d21509cd1f9ffffff00
+ VRANGEPS $0, Y21, Y2, K1, Y19 // 62a36d2950dd00
+ VRANGEPS $0, Y20, Y2, K1, Y19 // 62a36d2950dc00
+ VRANGEPS $0, Y6, Y2, K1, Y19 // 62e36d2950de00
+ VRANGEPS $0, 17(SP)(BP*1), Y2, K1, Y19 // 62e36d29509c2c1100000000
+ VRANGEPS $0, -7(CX)(DX*8), Y2, K1, Y19 // 62e36d29509cd1f9ffffff00
+ VRANGEPS $0, Y21, Y5, K1, Y14 // 6233552950f500
+ VRANGEPS $0, Y20, Y5, K1, Y14 // 6233552950f400
+ VRANGEPS $0, Y6, Y5, K1, Y14 // 6273552950f600
+ VRANGEPS $0, 17(SP)(BP*1), Y5, K1, Y14 // 6273552950b42c1100000000
+ VRANGEPS $0, -7(CX)(DX*8), Y5, K1, Y14 // 6273552950b4d1f9ffffff00
+ VRANGEPS $0, Y21, Y16, K1, Y14 // 62337d2150f500
+ VRANGEPS $0, Y20, Y16, K1, Y14 // 62337d2150f400
+ VRANGEPS $0, Y6, Y16, K1, Y14 // 62737d2150f600
+ VRANGEPS $0, 17(SP)(BP*1), Y16, K1, Y14 // 62737d2150b42c1100000000
+ VRANGEPS $0, -7(CX)(DX*8), Y16, K1, Y14 // 62737d2150b4d1f9ffffff00
+ VRANGEPS $0, Y21, Y2, K1, Y14 // 62336d2950f500
+ VRANGEPS $0, Y20, Y2, K1, Y14 // 62336d2950f400
+ VRANGEPS $0, Y6, Y2, K1, Y14 // 62736d2950f600
+ VRANGEPS $0, 17(SP)(BP*1), Y2, K1, Y14 // 62736d2950b42c1100000000
+ VRANGEPS $0, -7(CX)(DX*8), Y2, K1, Y14 // 62736d2950b4d1f9ffffff00
+ VRANGEPS $0, Y21, Y5, K1, Y21 // 62a3552950ed00
+ VRANGEPS $0, Y20, Y5, K1, Y21 // 62a3552950ec00
+ VRANGEPS $0, Y6, Y5, K1, Y21 // 62e3552950ee00
+ VRANGEPS $0, 17(SP)(BP*1), Y5, K1, Y21 // 62e3552950ac2c1100000000
+ VRANGEPS $0, -7(CX)(DX*8), Y5, K1, Y21 // 62e3552950acd1f9ffffff00
+ VRANGEPS $0, Y21, Y16, K1, Y21 // 62a37d2150ed00
+ VRANGEPS $0, Y20, Y16, K1, Y21 // 62a37d2150ec00
+ VRANGEPS $0, Y6, Y16, K1, Y21 // 62e37d2150ee00
+ VRANGEPS $0, 17(SP)(BP*1), Y16, K1, Y21 // 62e37d2150ac2c1100000000
+ VRANGEPS $0, -7(CX)(DX*8), Y16, K1, Y21 // 62e37d2150acd1f9ffffff00
+ VRANGEPS $0, Y21, Y2, K1, Y21 // 62a36d2950ed00
+ VRANGEPS $0, Y20, Y2, K1, Y21 // 62a36d2950ec00
+ VRANGEPS $0, Y6, Y2, K1, Y21 // 62e36d2950ee00
+ VRANGEPS $0, 17(SP)(BP*1), Y2, K1, Y21 // 62e36d2950ac2c1100000000
+ VRANGEPS $0, -7(CX)(DX*8), Y2, K1, Y21 // 62e36d2950acd1f9ffffff00
+ VRANGEPS $1, Z14, Z3, K7, Z27 // 6243654f50de01
+ VRANGEPS $1, Z7, Z3, K7, Z27 // 6263654f50df01
+ VRANGEPS $1, Z14, Z0, K7, Z27 // 62437d4f50de01
+ VRANGEPS $1, Z7, Z0, K7, Z27 // 62637d4f50df01
+ VRANGEPS $1, Z14, Z3, K7, Z14 // 6253654f50f601
+ VRANGEPS $1, Z7, Z3, K7, Z14 // 6273654f50f701
+ VRANGEPS $1, Z14, Z0, K7, Z14 // 62537d4f50f601
+ VRANGEPS $1, Z7, Z0, K7, Z14 // 62737d4f50f701
+ VRANGEPS $2, Z1, Z22, K2, Z8 // 62734d4250c102
+ VRANGEPS $2, Z16, Z22, K2, Z8 // 62334d4250c002
+ VRANGEPS $2, 99(R15)(R15*4), Z22, K2, Z8 // 62134d425084bf6300000002
+ VRANGEPS $2, 15(DX), Z22, K2, Z8 // 62734d4250820f00000002
+ VRANGEPS $2, Z1, Z25, K2, Z8 // 6273354250c102
+ VRANGEPS $2, Z16, Z25, K2, Z8 // 6233354250c002
+ VRANGEPS $2, 99(R15)(R15*4), Z25, K2, Z8 // 621335425084bf6300000002
+ VRANGEPS $2, 15(DX), Z25, K2, Z8 // 6273354250820f00000002
+ VRANGEPS $2, Z1, Z22, K2, Z24 // 62634d4250c102
+ VRANGEPS $2, Z16, Z22, K2, Z24 // 62234d4250c002
+ VRANGEPS $2, 99(R15)(R15*4), Z22, K2, Z24 // 62034d425084bf6300000002
+ VRANGEPS $2, 15(DX), Z22, K2, Z24 // 62634d4250820f00000002
+ VRANGEPS $2, Z1, Z25, K2, Z24 // 6263354250c102
+ VRANGEPS $2, Z16, Z25, K2, Z24 // 6223354250c002
+ VRANGEPS $2, 99(R15)(R15*4), Z25, K2, Z24 // 620335425084bf6300000002
+ VRANGEPS $2, 15(DX), Z25, K2, Z24 // 6263354250820f00000002
+ VRANGESD $3, X22, X2, K4, X2 // 62b3ed0c51d603
+ VRANGESD $3, X5, X2, K4, X2 // 62f3ed0c51d503
+ VRANGESD $3, X14, X2, K4, X2 // 62d3ed0c51d603
+ VRANGESD $3, X22, X31, K4, X2 // 62b3850451d603
+ VRANGESD $3, X5, X31, K4, X2 // 62f3850451d503
+ VRANGESD $3, X14, X31, K4, X2 // 62d3850451d603
+ VRANGESD $3, X22, X11, K4, X2 // 62b3a50c51d603
+ VRANGESD $3, X5, X11, K4, X2 // 62f3a50c51d503
+ VRANGESD $3, X14, X11, K4, X2 // 62d3a50c51d603
+ VRANGESD $3, X22, X2, K4, X8 // 6233ed0c51c603
+ VRANGESD $3, X5, X2, K4, X8 // 6273ed0c51c503
+ VRANGESD $3, X14, X2, K4, X8 // 6253ed0c51c603
+ VRANGESD $3, X22, X31, K4, X8 // 6233850451c603
+ VRANGESD $3, X5, X31, K4, X8 // 6273850451c503
+ VRANGESD $3, X14, X31, K4, X8 // 6253850451c603
+ VRANGESD $3, X22, X11, K4, X8 // 6233a50c51c603
+ VRANGESD $3, X5, X11, K4, X8 // 6273a50c51c503
+ VRANGESD $3, X14, X11, K4, X8 // 6253a50c51c603
+ VRANGESD $3, X22, X2, K4, X9 // 6233ed0c51ce03
+ VRANGESD $3, X5, X2, K4, X9 // 6273ed0c51cd03
+ VRANGESD $3, X14, X2, K4, X9 // 6253ed0c51ce03
+ VRANGESD $3, X22, X31, K4, X9 // 6233850451ce03
+ VRANGESD $3, X5, X31, K4, X9 // 6273850451cd03
+ VRANGESD $3, X14, X31, K4, X9 // 6253850451ce03
+ VRANGESD $3, X22, X11, K4, X9 // 6233a50c51ce03
+ VRANGESD $3, X5, X11, K4, X9 // 6273a50c51cd03
+ VRANGESD $3, X14, X11, K4, X9 // 6253a50c51ce03
+ VRANGESD $4, X18, X15, K1, X0 // 62b3850951c204 or 62b3852951c204 or 62b3854951c204
+ VRANGESD $4, X8, X15, K1, X0 // 62d3850951c004 or 62d3852951c004 or 62d3854951c004
+ VRANGESD $4, X27, X15, K1, X0 // 6293850951c304 or 6293852951c304 or 6293854951c304
+ VRANGESD $4, 7(AX)(CX*4), X15, K1, X0 // 62f385095184880700000004 or 62f385295184880700000004 or 62f385495184880700000004
+ VRANGESD $4, 7(AX)(CX*1), X15, K1, X0 // 62f385095184080700000004 or 62f385295184080700000004 or 62f385495184080700000004
+ VRANGESD $4, X18, X11, K1, X0 // 62b3a50951c204 or 62b3a52951c204 or 62b3a54951c204
+ VRANGESD $4, X8, X11, K1, X0 // 62d3a50951c004 or 62d3a52951c004 or 62d3a54951c004
+ VRANGESD $4, X27, X11, K1, X0 // 6293a50951c304 or 6293a52951c304 or 6293a54951c304
+ VRANGESD $4, 7(AX)(CX*4), X11, K1, X0 // 62f3a5095184880700000004 or 62f3a5295184880700000004 or 62f3a5495184880700000004
+ VRANGESD $4, 7(AX)(CX*1), X11, K1, X0 // 62f3a5095184080700000004 or 62f3a5295184080700000004 or 62f3a5495184080700000004
+ VRANGESD $4, X18, X0, K1, X0 // 62b3fd0951c204 or 62b3fd2951c204 or 62b3fd4951c204
+ VRANGESD $4, X8, X0, K1, X0 // 62d3fd0951c004 or 62d3fd2951c004 or 62d3fd4951c004
+ VRANGESD $4, X27, X0, K1, X0 // 6293fd0951c304 or 6293fd2951c304 or 6293fd4951c304
+ VRANGESD $4, 7(AX)(CX*4), X0, K1, X0 // 62f3fd095184880700000004 or 62f3fd295184880700000004 or 62f3fd495184880700000004
+ VRANGESD $4, 7(AX)(CX*1), X0, K1, X0 // 62f3fd095184080700000004 or 62f3fd295184080700000004 or 62f3fd495184080700000004
+ VRANGESD $4, X18, X15, K1, X17 // 62a3850951ca04 or 62a3852951ca04 or 62a3854951ca04
+ VRANGESD $4, X8, X15, K1, X17 // 62c3850951c804 or 62c3852951c804 or 62c3854951c804
+ VRANGESD $4, X27, X15, K1, X17 // 6283850951cb04 or 6283852951cb04 or 6283854951cb04
+ VRANGESD $4, 7(AX)(CX*4), X15, K1, X17 // 62e38509518c880700000004 or 62e38529518c880700000004 or 62e38549518c880700000004
+ VRANGESD $4, 7(AX)(CX*1), X15, K1, X17 // 62e38509518c080700000004 or 62e38529518c080700000004 or 62e38549518c080700000004
+ VRANGESD $4, X18, X11, K1, X17 // 62a3a50951ca04 or 62a3a52951ca04 or 62a3a54951ca04
+ VRANGESD $4, X8, X11, K1, X17 // 62c3a50951c804 or 62c3a52951c804 or 62c3a54951c804
+ VRANGESD $4, X27, X11, K1, X17 // 6283a50951cb04 or 6283a52951cb04 or 6283a54951cb04
+ VRANGESD $4, 7(AX)(CX*4), X11, K1, X17 // 62e3a509518c880700000004 or 62e3a529518c880700000004 or 62e3a549518c880700000004
+ VRANGESD $4, 7(AX)(CX*1), X11, K1, X17 // 62e3a509518c080700000004 or 62e3a529518c080700000004 or 62e3a549518c080700000004
+ VRANGESD $4, X18, X0, K1, X17 // 62a3fd0951ca04 or 62a3fd2951ca04 or 62a3fd4951ca04
+ VRANGESD $4, X8, X0, K1, X17 // 62c3fd0951c804 or 62c3fd2951c804 or 62c3fd4951c804
+ VRANGESD $4, X27, X0, K1, X17 // 6283fd0951cb04 or 6283fd2951cb04 or 6283fd4951cb04
+ VRANGESD $4, 7(AX)(CX*4), X0, K1, X17 // 62e3fd09518c880700000004 or 62e3fd29518c880700000004 or 62e3fd49518c880700000004
+ VRANGESD $4, 7(AX)(CX*1), X0, K1, X17 // 62e3fd09518c080700000004 or 62e3fd29518c080700000004 or 62e3fd49518c080700000004
+ VRANGESD $4, X18, X15, K1, X7 // 62b3850951fa04 or 62b3852951fa04 or 62b3854951fa04
+ VRANGESD $4, X8, X15, K1, X7 // 62d3850951f804 or 62d3852951f804 or 62d3854951f804
+ VRANGESD $4, X27, X15, K1, X7 // 6293850951fb04 or 6293852951fb04 or 6293854951fb04
+ VRANGESD $4, 7(AX)(CX*4), X15, K1, X7 // 62f3850951bc880700000004 or 62f3852951bc880700000004 or 62f3854951bc880700000004
+ VRANGESD $4, 7(AX)(CX*1), X15, K1, X7 // 62f3850951bc080700000004 or 62f3852951bc080700000004 or 62f3854951bc080700000004
+ VRANGESD $4, X18, X11, K1, X7 // 62b3a50951fa04 or 62b3a52951fa04 or 62b3a54951fa04
+ VRANGESD $4, X8, X11, K1, X7 // 62d3a50951f804 or 62d3a52951f804 or 62d3a54951f804
+ VRANGESD $4, X27, X11, K1, X7 // 6293a50951fb04 or 6293a52951fb04 or 6293a54951fb04
+ VRANGESD $4, 7(AX)(CX*4), X11, K1, X7 // 62f3a50951bc880700000004 or 62f3a52951bc880700000004 or 62f3a54951bc880700000004
+ VRANGESD $4, 7(AX)(CX*1), X11, K1, X7 // 62f3a50951bc080700000004 or 62f3a52951bc080700000004 or 62f3a54951bc080700000004
+ VRANGESD $4, X18, X0, K1, X7 // 62b3fd0951fa04 or 62b3fd2951fa04 or 62b3fd4951fa04
+ VRANGESD $4, X8, X0, K1, X7 // 62d3fd0951f804 or 62d3fd2951f804 or 62d3fd4951f804
+ VRANGESD $4, X27, X0, K1, X7 // 6293fd0951fb04 or 6293fd2951fb04 or 6293fd4951fb04
+ VRANGESD $4, 7(AX)(CX*4), X0, K1, X7 // 62f3fd0951bc880700000004 or 62f3fd2951bc880700000004 or 62f3fd4951bc880700000004
+ VRANGESD $4, 7(AX)(CX*1), X0, K1, X7 // 62f3fd0951bc080700000004 or 62f3fd2951bc080700000004 or 62f3fd4951bc080700000004
+ VRANGESS $5, X7, X15, K3, X25 // 6263050b51cf05
+ VRANGESS $5, X13, X15, K3, X25 // 6243050b51cd05
+ VRANGESS $5, X8, X15, K3, X25 // 6243050b51c805
+ VRANGESS $5, X7, X28, K3, X25 // 62631d0351cf05
+ VRANGESS $5, X13, X28, K3, X25 // 62431d0351cd05
+ VRANGESS $5, X8, X28, K3, X25 // 62431d0351c805
+ VRANGESS $5, X7, X15, K3, X3 // 62f3050b51df05
+ VRANGESS $5, X13, X15, K3, X3 // 62d3050b51dd05
+ VRANGESS $5, X8, X15, K3, X3 // 62d3050b51d805
+ VRANGESS $5, X7, X28, K3, X3 // 62f31d0351df05
+ VRANGESS $5, X13, X28, K3, X3 // 62d31d0351dd05
+ VRANGESS $5, X8, X28, K3, X3 // 62d31d0351d805
+ VRANGESS $5, X7, X15, K3, X18 // 62e3050b51d705
+ VRANGESS $5, X13, X15, K3, X18 // 62c3050b51d505
+ VRANGESS $5, X8, X15, K3, X18 // 62c3050b51d005
+ VRANGESS $5, X7, X28, K3, X18 // 62e31d0351d705
+ VRANGESS $5, X13, X28, K3, X18 // 62c31d0351d505
+ VRANGESS $5, X8, X28, K3, X18 // 62c31d0351d005
+ VRANGESS $6, X6, X22, K4, X24 // 62634d0451c606 or 62634d2451c606 or 62634d4451c606
+ VRANGESS $6, X7, X22, K4, X24 // 62634d0451c706 or 62634d2451c706 or 62634d4451c706
+ VRANGESS $6, X8, X22, K4, X24 // 62434d0451c006 or 62434d2451c006 or 62434d4451c006
+ VRANGESS $6, 7(SI)(DI*1), X22, K4, X24 // 62634d0451843e0700000006 or 62634d2451843e0700000006 or 62634d4451843e0700000006
+ VRANGESS $6, 15(DX)(BX*8), X22, K4, X24 // 62634d045184da0f00000006 or 62634d245184da0f00000006 or 62634d445184da0f00000006
+ VRANGESS $6, X6, X1, K4, X24 // 6263750c51c606 or 6263752c51c606 or 6263754c51c606
+ VRANGESS $6, X7, X1, K4, X24 // 6263750c51c706 or 6263752c51c706 or 6263754c51c706
+ VRANGESS $6, X8, X1, K4, X24 // 6243750c51c006 or 6243752c51c006 or 6243754c51c006
+ VRANGESS $6, 7(SI)(DI*1), X1, K4, X24 // 6263750c51843e0700000006 or 6263752c51843e0700000006 or 6263754c51843e0700000006
+ VRANGESS $6, 15(DX)(BX*8), X1, K4, X24 // 6263750c5184da0f00000006 or 6263752c5184da0f00000006 or 6263754c5184da0f00000006
+ VRANGESS $6, X6, X11, K4, X24 // 6263250c51c606 or 6263252c51c606 or 6263254c51c606
+ VRANGESS $6, X7, X11, K4, X24 // 6263250c51c706 or 6263252c51c706 or 6263254c51c706
+ VRANGESS $6, X8, X11, K4, X24 // 6243250c51c006 or 6243252c51c006 or 6243254c51c006
+ VRANGESS $6, 7(SI)(DI*1), X11, K4, X24 // 6263250c51843e0700000006 or 6263252c51843e0700000006 or 6263254c51843e0700000006
+ VRANGESS $6, 15(DX)(BX*8), X11, K4, X24 // 6263250c5184da0f00000006 or 6263252c5184da0f00000006 or 6263254c5184da0f00000006
+ VRANGESS $6, X6, X22, K4, X7 // 62f34d0451fe06 or 62f34d2451fe06 or 62f34d4451fe06
+ VRANGESS $6, X7, X22, K4, X7 // 62f34d0451ff06 or 62f34d2451ff06 or 62f34d4451ff06
+ VRANGESS $6, X8, X22, K4, X7 // 62d34d0451f806 or 62d34d2451f806 or 62d34d4451f806
+ VRANGESS $6, 7(SI)(DI*1), X22, K4, X7 // 62f34d0451bc3e0700000006 or 62f34d2451bc3e0700000006 or 62f34d4451bc3e0700000006
+ VRANGESS $6, 15(DX)(BX*8), X22, K4, X7 // 62f34d0451bcda0f00000006 or 62f34d2451bcda0f00000006 or 62f34d4451bcda0f00000006
+ VRANGESS $6, X6, X1, K4, X7 // 62f3750c51fe06 or 62f3752c51fe06 or 62f3754c51fe06
+ VRANGESS $6, X7, X1, K4, X7 // 62f3750c51ff06 or 62f3752c51ff06 or 62f3754c51ff06
+ VRANGESS $6, X8, X1, K4, X7 // 62d3750c51f806 or 62d3752c51f806 or 62d3754c51f806
+ VRANGESS $6, 7(SI)(DI*1), X1, K4, X7 // 62f3750c51bc3e0700000006 or 62f3752c51bc3e0700000006 or 62f3754c51bc3e0700000006
+ VRANGESS $6, 15(DX)(BX*8), X1, K4, X7 // 62f3750c51bcda0f00000006 or 62f3752c51bcda0f00000006 or 62f3754c51bcda0f00000006
+ VRANGESS $6, X6, X11, K4, X7 // 62f3250c51fe06 or 62f3252c51fe06 or 62f3254c51fe06
+ VRANGESS $6, X7, X11, K4, X7 // 62f3250c51ff06 or 62f3252c51ff06 or 62f3254c51ff06
+ VRANGESS $6, X8, X11, K4, X7 // 62d3250c51f806 or 62d3252c51f806 or 62d3254c51f806
+ VRANGESS $6, 7(SI)(DI*1), X11, K4, X7 // 62f3250c51bc3e0700000006 or 62f3252c51bc3e0700000006 or 62f3254c51bc3e0700000006
+ VRANGESS $6, 15(DX)(BX*8), X11, K4, X7 // 62f3250c51bcda0f00000006 or 62f3252c51bcda0f00000006 or 62f3254c51bcda0f00000006
+ VRANGESS $6, X6, X22, K4, X0 // 62f34d0451c606 or 62f34d2451c606 or 62f34d4451c606
+ VRANGESS $6, X7, X22, K4, X0 // 62f34d0451c706 or 62f34d2451c706 or 62f34d4451c706
+ VRANGESS $6, X8, X22, K4, X0 // 62d34d0451c006 or 62d34d2451c006 or 62d34d4451c006
+ VRANGESS $6, 7(SI)(DI*1), X22, K4, X0 // 62f34d0451843e0700000006 or 62f34d2451843e0700000006 or 62f34d4451843e0700000006
+ VRANGESS $6, 15(DX)(BX*8), X22, K4, X0 // 62f34d045184da0f00000006 or 62f34d245184da0f00000006 or 62f34d445184da0f00000006
+ VRANGESS $6, X6, X1, K4, X0 // 62f3750c51c606 or 62f3752c51c606 or 62f3754c51c606
+ VRANGESS $6, X7, X1, K4, X0 // 62f3750c51c706 or 62f3752c51c706 or 62f3754c51c706
+ VRANGESS $6, X8, X1, K4, X0 // 62d3750c51c006 or 62d3752c51c006 or 62d3754c51c006
+ VRANGESS $6, 7(SI)(DI*1), X1, K4, X0 // 62f3750c51843e0700000006 or 62f3752c51843e0700000006 or 62f3754c51843e0700000006
+ VRANGESS $6, 15(DX)(BX*8), X1, K4, X0 // 62f3750c5184da0f00000006 or 62f3752c5184da0f00000006 or 62f3754c5184da0f00000006
+ VRANGESS $6, X6, X11, K4, X0 // 62f3250c51c606 or 62f3252c51c606 or 62f3254c51c606
+ VRANGESS $6, X7, X11, K4, X0 // 62f3250c51c706 or 62f3252c51c706 or 62f3254c51c706
+ VRANGESS $6, X8, X11, K4, X0 // 62d3250c51c006 or 62d3252c51c006 or 62d3254c51c006
+ VRANGESS $6, 7(SI)(DI*1), X11, K4, X0 // 62f3250c51843e0700000006 or 62f3252c51843e0700000006 or 62f3254c51843e0700000006
+ VRANGESS $6, 15(DX)(BX*8), X11, K4, X0 // 62f3250c5184da0f00000006 or 62f3252c5184da0f00000006 or 62f3254c5184da0f00000006
+ VREDUCEPD $126, X8, K3, X31 // 6243fd0b56f87e
+ VREDUCEPD $126, X1, K3, X31 // 6263fd0b56f97e
+ VREDUCEPD $126, X0, K3, X31 // 6263fd0b56f87e
+ VREDUCEPD $126, 99(R15)(R15*1), K3, X31 // 6203fd0b56bc3f630000007e
+ VREDUCEPD $126, (DX), K3, X31 // 6263fd0b563a7e
+ VREDUCEPD $126, X8, K3, X16 // 62c3fd0b56c07e
+ VREDUCEPD $126, X1, K3, X16 // 62e3fd0b56c17e
+ VREDUCEPD $126, X0, K3, X16 // 62e3fd0b56c07e
+ VREDUCEPD $126, 99(R15)(R15*1), K3, X16 // 6283fd0b56843f630000007e
+ VREDUCEPD $126, (DX), K3, X16 // 62e3fd0b56027e
+ VREDUCEPD $126, X8, K3, X7 // 62d3fd0b56f87e
+ VREDUCEPD $126, X1, K3, X7 // 62f3fd0b56f97e
+ VREDUCEPD $126, X0, K3, X7 // 62f3fd0b56f87e
+ VREDUCEPD $126, 99(R15)(R15*1), K3, X7 // 6293fd0b56bc3f630000007e
+ VREDUCEPD $126, (DX), K3, X7 // 62f3fd0b563a7e
+ VREDUCEPD $94, Y0, K3, Y5 // 62f3fd2b56e85e
+ VREDUCEPD $94, Y22, K3, Y5 // 62b3fd2b56ee5e
+ VREDUCEPD $94, Y13, K3, Y5 // 62d3fd2b56ed5e
+ VREDUCEPD $94, (R14), K3, Y5 // 62d3fd2b562e5e
+ VREDUCEPD $94, -7(DI)(R8*8), K3, Y5 // 62b3fd2b56acc7f9ffffff5e
+ VREDUCEPD $94, Y0, K3, Y28 // 6263fd2b56e05e
+ VREDUCEPD $94, Y22, K3, Y28 // 6223fd2b56e65e
+ VREDUCEPD $94, Y13, K3, Y28 // 6243fd2b56e55e
+ VREDUCEPD $94, (R14), K3, Y28 // 6243fd2b56265e
+ VREDUCEPD $94, -7(DI)(R8*8), K3, Y28 // 6223fd2b56a4c7f9ffffff5e
+ VREDUCEPD $94, Y0, K3, Y7 // 62f3fd2b56f85e
+ VREDUCEPD $94, Y22, K3, Y7 // 62b3fd2b56fe5e
+ VREDUCEPD $94, Y13, K3, Y7 // 62d3fd2b56fd5e
+ VREDUCEPD $94, (R14), K3, Y7 // 62d3fd2b563e5e
+ VREDUCEPD $94, -7(DI)(R8*8), K3, Y7 // 62b3fd2b56bcc7f9ffffff5e
+ VREDUCEPD $121, Z3, K2, Z26 // 6263fd4a56d379
+ VREDUCEPD $121, Z0, K2, Z26 // 6263fd4a56d079
+ VREDUCEPD $121, Z3, K2, Z3 // 62f3fd4a56db79
+ VREDUCEPD $121, Z0, K2, Z3 // 62f3fd4a56d879
+ VREDUCEPD $13, Z11, K1, Z21 // 62c3fd4956eb0d
+ VREDUCEPD $13, Z25, K1, Z21 // 6283fd4956e90d
+ VREDUCEPD $13, -17(BP), K1, Z21 // 62e3fd4956adefffffff0d
+ VREDUCEPD $13, -15(R14)(R15*8), K1, Z21 // 6283fd4956acfef1ffffff0d
+ VREDUCEPD $13, Z11, K1, Z13 // 6253fd4956eb0d
+ VREDUCEPD $13, Z25, K1, Z13 // 6213fd4956e90d
+ VREDUCEPD $13, -17(BP), K1, Z13 // 6273fd4956adefffffff0d
+ VREDUCEPD $13, -15(R14)(R15*8), K1, Z13 // 6213fd4956acfef1ffffff0d
+ VREDUCEPS $65, X21, K2, X15 // 62337d0a56fd41
+ VREDUCEPS $65, X0, K2, X15 // 62737d0a56f841
+ VREDUCEPS $65, X28, K2, X15 // 62137d0a56fc41
+ VREDUCEPS $65, -17(BP)(SI*8), K2, X15 // 62737d0a56bcf5efffffff41
+ VREDUCEPS $65, (R15), K2, X15 // 62537d0a563f41
+ VREDUCEPS $65, X21, K2, X0 // 62b37d0a56c541
+ VREDUCEPS $65, X0, K2, X0 // 62f37d0a56c041
+ VREDUCEPS $65, X28, K2, X0 // 62937d0a56c441
+ VREDUCEPS $65, -17(BP)(SI*8), K2, X0 // 62f37d0a5684f5efffffff41
+ VREDUCEPS $65, (R15), K2, X0 // 62d37d0a560741
+ VREDUCEPS $65, X21, K2, X16 // 62a37d0a56c541
+ VREDUCEPS $65, X0, K2, X16 // 62e37d0a56c041
+ VREDUCEPS $65, X28, K2, X16 // 62837d0a56c441
+ VREDUCEPS $65, -17(BP)(SI*8), K2, X16 // 62e37d0a5684f5efffffff41
+ VREDUCEPS $65, (R15), K2, X16 // 62c37d0a560741
+ VREDUCEPS $67, Y17, K1, Y12 // 62337d2956e143
+ VREDUCEPS $67, Y7, K1, Y12 // 62737d2956e743
+ VREDUCEPS $67, Y9, K1, Y12 // 62537d2956e143
+ VREDUCEPS $67, 99(R15)(R15*4), K1, Y12 // 62137d2956a4bf6300000043
+ VREDUCEPS $67, 15(DX), K1, Y12 // 62737d2956a20f00000043
+ VREDUCEPS $67, Y17, K1, Y1 // 62b37d2956c943
+ VREDUCEPS $67, Y7, K1, Y1 // 62f37d2956cf43
+ VREDUCEPS $67, Y9, K1, Y1 // 62d37d2956c943
+ VREDUCEPS $67, 99(R15)(R15*4), K1, Y1 // 62937d29568cbf6300000043
+ VREDUCEPS $67, 15(DX), K1, Y1 // 62f37d29568a0f00000043
+ VREDUCEPS $67, Y17, K1, Y14 // 62337d2956f143
+ VREDUCEPS $67, Y7, K1, Y14 // 62737d2956f743
+ VREDUCEPS $67, Y9, K1, Y14 // 62537d2956f143
+ VREDUCEPS $67, 99(R15)(R15*4), K1, Y14 // 62137d2956b4bf6300000043
+ VREDUCEPS $67, 15(DX), K1, Y14 // 62737d2956b20f00000043
+ VREDUCEPS $127, Z27, K7, Z3 // 62937d4f56db7f
+ VREDUCEPS $127, Z15, K7, Z3 // 62d37d4f56df7f
+ VREDUCEPS $127, Z27, K7, Z12 // 62137d4f56e37f
+ VREDUCEPS $127, Z15, K7, Z12 // 62537d4f56e77f
+ VREDUCEPS $0, Z23, K1, Z23 // 62a37d4956ff00
+ VREDUCEPS $0, Z6, K1, Z23 // 62e37d4956fe00
+ VREDUCEPS $0, 17(SP)(BP*2), K1, Z23 // 62e37d4956bc6c1100000000
+ VREDUCEPS $0, -7(DI)(R8*4), K1, Z23 // 62a37d4956bc87f9ffffff00
+ VREDUCEPS $0, Z23, K1, Z5 // 62b37d4956ef00
+ VREDUCEPS $0, Z6, K1, Z5 // 62f37d4956ee00
+ VREDUCEPS $0, 17(SP)(BP*2), K1, Z5 // 62f37d4956ac6c1100000000
+ VREDUCEPS $0, -7(DI)(R8*4), K1, Z5 // 62b37d4956ac87f9ffffff00
+ VREDUCESD $97, X1, X7, K1, X22 // 62e3c50957f161
+ VREDUCESD $97, X7, X7, K1, X22 // 62e3c50957f761
+ VREDUCESD $97, X9, X7, K1, X22 // 62c3c50957f161
+ VREDUCESD $97, X1, X16, K1, X22 // 62e3fd0157f161
+ VREDUCESD $97, X7, X16, K1, X22 // 62e3fd0157f761
+ VREDUCESD $97, X9, X16, K1, X22 // 62c3fd0157f161
+ VREDUCESD $97, X1, X31, K1, X22 // 62e3850157f161
+ VREDUCESD $97, X7, X31, K1, X22 // 62e3850157f761
+ VREDUCESD $97, X9, X31, K1, X22 // 62c3850157f161
+ VREDUCESD $97, X1, X7, K1, X7 // 62f3c50957f961
+ VREDUCESD $97, X7, X7, K1, X7 // 62f3c50957ff61
+ VREDUCESD $97, X9, X7, K1, X7 // 62d3c50957f961
+ VREDUCESD $97, X1, X16, K1, X7 // 62f3fd0157f961
+ VREDUCESD $97, X7, X16, K1, X7 // 62f3fd0157ff61
+ VREDUCESD $97, X9, X16, K1, X7 // 62d3fd0157f961
+ VREDUCESD $97, X1, X31, K1, X7 // 62f3850157f961
+ VREDUCESD $97, X7, X31, K1, X7 // 62f3850157ff61
+ VREDUCESD $97, X9, X31, K1, X7 // 62d3850157f961
+ VREDUCESD $97, X1, X7, K1, X19 // 62e3c50957d961
+ VREDUCESD $97, X7, X7, K1, X19 // 62e3c50957df61
+ VREDUCESD $97, X9, X7, K1, X19 // 62c3c50957d961
+ VREDUCESD $97, X1, X16, K1, X19 // 62e3fd0157d961
+ VREDUCESD $97, X7, X16, K1, X19 // 62e3fd0157df61
+ VREDUCESD $97, X9, X16, K1, X19 // 62c3fd0157d961
+ VREDUCESD $97, X1, X31, K1, X19 // 62e3850157d961
+ VREDUCESD $97, X7, X31, K1, X19 // 62e3850157df61
+ VREDUCESD $97, X9, X31, K1, X19 // 62c3850157d961
+ VREDUCESD $81, X17, X12, K1, X15 // 62339d0957f951 or 62339d2957f951 or 62339d4957f951
+ VREDUCESD $81, X15, X12, K1, X15 // 62539d0957ff51 or 62539d2957ff51 or 62539d4957ff51
+ VREDUCESD $81, X8, X12, K1, X15 // 62539d0957f851 or 62539d2957f851 or 62539d4957f851
+ VREDUCESD $81, 7(SI)(DI*4), X12, K1, X15 // 62739d0957bcbe0700000051 or 62739d2957bcbe0700000051 or 62739d4957bcbe0700000051
+ VREDUCESD $81, -7(DI)(R8*2), X12, K1, X15 // 62339d0957bc47f9ffffff51 or 62339d2957bc47f9ffffff51 or 62339d4957bc47f9ffffff51
+ VREDUCESD $81, X17, X14, K1, X15 // 62338d0957f951 or 62338d2957f951 or 62338d4957f951
+ VREDUCESD $81, X15, X14, K1, X15 // 62538d0957ff51 or 62538d2957ff51 or 62538d4957ff51
+ VREDUCESD $81, X8, X14, K1, X15 // 62538d0957f851 or 62538d2957f851 or 62538d4957f851
+ VREDUCESD $81, 7(SI)(DI*4), X14, K1, X15 // 62738d0957bcbe0700000051 or 62738d2957bcbe0700000051 or 62738d4957bcbe0700000051
+ VREDUCESD $81, -7(DI)(R8*2), X14, K1, X15 // 62338d0957bc47f9ffffff51 or 62338d2957bc47f9ffffff51 or 62338d4957bc47f9ffffff51
+ VREDUCESD $81, X17, X5, K1, X15 // 6233d50957f951 or 6233d52957f951 or 6233d54957f951
+ VREDUCESD $81, X15, X5, K1, X15 // 6253d50957ff51 or 6253d52957ff51 or 6253d54957ff51
+ VREDUCESD $81, X8, X5, K1, X15 // 6253d50957f851 or 6253d52957f851 or 6253d54957f851
+ VREDUCESD $81, 7(SI)(DI*4), X5, K1, X15 // 6273d50957bcbe0700000051 or 6273d52957bcbe0700000051 or 6273d54957bcbe0700000051
+ VREDUCESD $81, -7(DI)(R8*2), X5, K1, X15 // 6233d50957bc47f9ffffff51 or 6233d52957bc47f9ffffff51 or 6233d54957bc47f9ffffff51
+ VREDUCESD $81, X17, X12, K1, X12 // 62339d0957e151 or 62339d2957e151 or 62339d4957e151
+ VREDUCESD $81, X15, X12, K1, X12 // 62539d0957e751 or 62539d2957e751 or 62539d4957e751
+ VREDUCESD $81, X8, X12, K1, X12 // 62539d0957e051 or 62539d2957e051 or 62539d4957e051
+ VREDUCESD $81, 7(SI)(DI*4), X12, K1, X12 // 62739d0957a4be0700000051 or 62739d2957a4be0700000051 or 62739d4957a4be0700000051
+ VREDUCESD $81, -7(DI)(R8*2), X12, K1, X12 // 62339d0957a447f9ffffff51 or 62339d2957a447f9ffffff51 or 62339d4957a447f9ffffff51
+ VREDUCESD $81, X17, X14, K1, X12 // 62338d0957e151 or 62338d2957e151 or 62338d4957e151
+ VREDUCESD $81, X15, X14, K1, X12 // 62538d0957e751 or 62538d2957e751 or 62538d4957e751
+ VREDUCESD $81, X8, X14, K1, X12 // 62538d0957e051 or 62538d2957e051 or 62538d4957e051
+ VREDUCESD $81, 7(SI)(DI*4), X14, K1, X12 // 62738d0957a4be0700000051 or 62738d2957a4be0700000051 or 62738d4957a4be0700000051
+ VREDUCESD $81, -7(DI)(R8*2), X14, K1, X12 // 62338d0957a447f9ffffff51 or 62338d2957a447f9ffffff51 or 62338d4957a447f9ffffff51
+ VREDUCESD $81, X17, X5, K1, X12 // 6233d50957e151 or 6233d52957e151 or 6233d54957e151
+ VREDUCESD $81, X15, X5, K1, X12 // 6253d50957e751 or 6253d52957e751 or 6253d54957e751
+ VREDUCESD $81, X8, X5, K1, X12 // 6253d50957e051 or 6253d52957e051 or 6253d54957e051
+ VREDUCESD $81, 7(SI)(DI*4), X5, K1, X12 // 6273d50957a4be0700000051 or 6273d52957a4be0700000051 or 6273d54957a4be0700000051
+ VREDUCESD $81, -7(DI)(R8*2), X5, K1, X12 // 6233d50957a447f9ffffff51 or 6233d52957a447f9ffffff51 or 6233d54957a447f9ffffff51
+ VREDUCESD $81, X17, X12, K1, X0 // 62b39d0957c151 or 62b39d2957c151 or 62b39d4957c151
+ VREDUCESD $81, X15, X12, K1, X0 // 62d39d0957c751 or 62d39d2957c751 or 62d39d4957c751
+ VREDUCESD $81, X8, X12, K1, X0 // 62d39d0957c051 or 62d39d2957c051 or 62d39d4957c051
+ VREDUCESD $81, 7(SI)(DI*4), X12, K1, X0 // 62f39d095784be0700000051 or 62f39d295784be0700000051 or 62f39d495784be0700000051
+ VREDUCESD $81, -7(DI)(R8*2), X12, K1, X0 // 62b39d09578447f9ffffff51 or 62b39d29578447f9ffffff51 or 62b39d49578447f9ffffff51
+ VREDUCESD $81, X17, X14, K1, X0 // 62b38d0957c151 or 62b38d2957c151 or 62b38d4957c151
+ VREDUCESD $81, X15, X14, K1, X0 // 62d38d0957c751 or 62d38d2957c751 or 62d38d4957c751
+ VREDUCESD $81, X8, X14, K1, X0 // 62d38d0957c051 or 62d38d2957c051 or 62d38d4957c051
+ VREDUCESD $81, 7(SI)(DI*4), X14, K1, X0 // 62f38d095784be0700000051 or 62f38d295784be0700000051 or 62f38d495784be0700000051
+ VREDUCESD $81, -7(DI)(R8*2), X14, K1, X0 // 62b38d09578447f9ffffff51 or 62b38d29578447f9ffffff51 or 62b38d49578447f9ffffff51
+ VREDUCESD $81, X17, X5, K1, X0 // 62b3d50957c151 or 62b3d52957c151 or 62b3d54957c151
+ VREDUCESD $81, X15, X5, K1, X0 // 62d3d50957c751 or 62d3d52957c751 or 62d3d54957c751
+ VREDUCESD $81, X8, X5, K1, X0 // 62d3d50957c051 or 62d3d52957c051 or 62d3d54957c051
+ VREDUCESD $81, 7(SI)(DI*4), X5, K1, X0 // 62f3d5095784be0700000051 or 62f3d5295784be0700000051 or 62f3d5495784be0700000051
+ VREDUCESD $81, -7(DI)(R8*2), X5, K1, X0 // 62b3d509578447f9ffffff51 or 62b3d529578447f9ffffff51 or 62b3d549578447f9ffffff51
+ VREDUCESS $42, X9, X13, K7, X3 // 62d3150f57d92a
+ VREDUCESS $42, X15, X13, K7, X3 // 62d3150f57df2a
+ VREDUCESS $42, X26, X13, K7, X3 // 6293150f57da2a
+ VREDUCESS $42, X9, X28, K7, X3 // 62d31d0757d92a
+ VREDUCESS $42, X15, X28, K7, X3 // 62d31d0757df2a
+ VREDUCESS $42, X26, X28, K7, X3 // 62931d0757da2a
+ VREDUCESS $42, X9, X24, K7, X3 // 62d33d0757d92a
+ VREDUCESS $42, X15, X24, K7, X3 // 62d33d0757df2a
+ VREDUCESS $42, X26, X24, K7, X3 // 62933d0757da2a
+ VREDUCESS $42, X9, X13, K7, X26 // 6243150f57d12a
+ VREDUCESS $42, X15, X13, K7, X26 // 6243150f57d72a
+ VREDUCESS $42, X26, X13, K7, X26 // 6203150f57d22a
+ VREDUCESS $42, X9, X28, K7, X26 // 62431d0757d12a
+ VREDUCESS $42, X15, X28, K7, X26 // 62431d0757d72a
+ VREDUCESS $42, X26, X28, K7, X26 // 62031d0757d22a
+ VREDUCESS $42, X9, X24, K7, X26 // 62433d0757d12a
+ VREDUCESS $42, X15, X24, K7, X26 // 62433d0757d72a
+ VREDUCESS $42, X26, X24, K7, X26 // 62033d0757d22a
+ VREDUCESS $42, X9, X13, K7, X23 // 62c3150f57f92a
+ VREDUCESS $42, X15, X13, K7, X23 // 62c3150f57ff2a
+ VREDUCESS $42, X26, X13, K7, X23 // 6283150f57fa2a
+ VREDUCESS $42, X9, X28, K7, X23 // 62c31d0757f92a
+ VREDUCESS $42, X15, X28, K7, X23 // 62c31d0757ff2a
+ VREDUCESS $42, X26, X28, K7, X23 // 62831d0757fa2a
+ VREDUCESS $42, X9, X24, K7, X23 // 62c33d0757f92a
+ VREDUCESS $42, X15, X24, K7, X23 // 62c33d0757ff2a
+ VREDUCESS $42, X26, X24, K7, X23 // 62833d0757fa2a
+ VREDUCESS $79, X7, X11, K2, X18 // 62e3250a57d74f or 62e3252a57d74f or 62e3254a57d74f
+ VREDUCESS $79, X0, X11, K2, X18 // 62e3250a57d04f or 62e3252a57d04f or 62e3254a57d04f
+ VREDUCESS $79, 99(R15)(R15*8), X11, K2, X18 // 6283250a5794ff630000004f or 6283252a5794ff630000004f or 6283254a5794ff630000004f
+ VREDUCESS $79, 7(AX)(CX*8), X11, K2, X18 // 62e3250a5794c8070000004f or 62e3252a5794c8070000004f or 62e3254a5794c8070000004f
+ VREDUCESS $79, X7, X31, K2, X18 // 62e3050257d74f or 62e3052257d74f or 62e3054257d74f
+ VREDUCESS $79, X0, X31, K2, X18 // 62e3050257d04f or 62e3052257d04f or 62e3054257d04f
+ VREDUCESS $79, 99(R15)(R15*8), X31, K2, X18 // 628305025794ff630000004f or 628305225794ff630000004f or 628305425794ff630000004f
+ VREDUCESS $79, 7(AX)(CX*8), X31, K2, X18 // 62e305025794c8070000004f or 62e305225794c8070000004f or 62e305425794c8070000004f
+ VREDUCESS $79, X7, X3, K2, X18 // 62e3650a57d74f or 62e3652a57d74f or 62e3654a57d74f
+ VREDUCESS $79, X0, X3, K2, X18 // 62e3650a57d04f or 62e3652a57d04f or 62e3654a57d04f
+ VREDUCESS $79, 99(R15)(R15*8), X3, K2, X18 // 6283650a5794ff630000004f or 6283652a5794ff630000004f or 6283654a5794ff630000004f
+ VREDUCESS $79, 7(AX)(CX*8), X3, K2, X18 // 62e3650a5794c8070000004f or 62e3652a5794c8070000004f or 62e3654a5794c8070000004f
+ VREDUCESS $79, X7, X11, K2, X21 // 62e3250a57ef4f or 62e3252a57ef4f or 62e3254a57ef4f
+ VREDUCESS $79, X0, X11, K2, X21 // 62e3250a57e84f or 62e3252a57e84f or 62e3254a57e84f
+ VREDUCESS $79, 99(R15)(R15*8), X11, K2, X21 // 6283250a57acff630000004f or 6283252a57acff630000004f or 6283254a57acff630000004f
+ VREDUCESS $79, 7(AX)(CX*8), X11, K2, X21 // 62e3250a57acc8070000004f or 62e3252a57acc8070000004f or 62e3254a57acc8070000004f
+ VREDUCESS $79, X7, X31, K2, X21 // 62e3050257ef4f or 62e3052257ef4f or 62e3054257ef4f
+ VREDUCESS $79, X0, X31, K2, X21 // 62e3050257e84f or 62e3052257e84f or 62e3054257e84f
+ VREDUCESS $79, 99(R15)(R15*8), X31, K2, X21 // 6283050257acff630000004f or 6283052257acff630000004f or 6283054257acff630000004f
+ VREDUCESS $79, 7(AX)(CX*8), X31, K2, X21 // 62e3050257acc8070000004f or 62e3052257acc8070000004f or 62e3054257acc8070000004f
+ VREDUCESS $79, X7, X3, K2, X21 // 62e3650a57ef4f or 62e3652a57ef4f or 62e3654a57ef4f
+ VREDUCESS $79, X0, X3, K2, X21 // 62e3650a57e84f or 62e3652a57e84f or 62e3654a57e84f
+ VREDUCESS $79, 99(R15)(R15*8), X3, K2, X21 // 6283650a57acff630000004f or 6283652a57acff630000004f or 6283654a57acff630000004f
+ VREDUCESS $79, 7(AX)(CX*8), X3, K2, X21 // 62e3650a57acc8070000004f or 62e3652a57acc8070000004f or 62e3654a57acc8070000004f
+ VREDUCESS $79, X7, X11, K2, X1 // 62f3250a57cf4f or 62f3252a57cf4f or 62f3254a57cf4f
+ VREDUCESS $79, X0, X11, K2, X1 // 62f3250a57c84f or 62f3252a57c84f or 62f3254a57c84f
+ VREDUCESS $79, 99(R15)(R15*8), X11, K2, X1 // 6293250a578cff630000004f or 6293252a578cff630000004f or 6293254a578cff630000004f
+ VREDUCESS $79, 7(AX)(CX*8), X11, K2, X1 // 62f3250a578cc8070000004f or 62f3252a578cc8070000004f or 62f3254a578cc8070000004f
+ VREDUCESS $79, X7, X31, K2, X1 // 62f3050257cf4f or 62f3052257cf4f or 62f3054257cf4f
+ VREDUCESS $79, X0, X31, K2, X1 // 62f3050257c84f or 62f3052257c84f or 62f3054257c84f
+ VREDUCESS $79, 99(R15)(R15*8), X31, K2, X1 // 62930502578cff630000004f or 62930522578cff630000004f or 62930542578cff630000004f
+ VREDUCESS $79, 7(AX)(CX*8), X31, K2, X1 // 62f30502578cc8070000004f or 62f30522578cc8070000004f or 62f30542578cc8070000004f
+ VREDUCESS $79, X7, X3, K2, X1 // 62f3650a57cf4f or 62f3652a57cf4f or 62f3654a57cf4f
+ VREDUCESS $79, X0, X3, K2, X1 // 62f3650a57c84f or 62f3652a57c84f or 62f3654a57c84f
+ VREDUCESS $79, 99(R15)(R15*8), X3, K2, X1 // 6293650a578cff630000004f or 6293652a578cff630000004f or 6293654a578cff630000004f
+ VREDUCESS $79, 7(AX)(CX*8), X3, K2, X1 // 62f3650a578cc8070000004f or 62f3652a578cc8070000004f or 62f3654a578cc8070000004f
+ VXORPD X13, X3, K5, X17 // 62c1e50d57cd
+ VXORPD X28, X3, K5, X17 // 6281e50d57cc
+ VXORPD X24, X3, K5, X17 // 6281e50d57c8
+ VXORPD -7(CX)(DX*1), X3, K5, X17 // 62e1e50d578c11f9ffffff
+ VXORPD -15(R14)(R15*4), X3, K5, X17 // 6281e50d578cbef1ffffff
+ VXORPD X13, X26, K5, X17 // 62c1ad0557cd
+ VXORPD X28, X26, K5, X17 // 6281ad0557cc
+ VXORPD X24, X26, K5, X17 // 6281ad0557c8
+ VXORPD -7(CX)(DX*1), X26, K5, X17 // 62e1ad05578c11f9ffffff
+ VXORPD -15(R14)(R15*4), X26, K5, X17 // 6281ad05578cbef1ffffff
+ VXORPD X13, X23, K5, X17 // 62c1c50557cd
+ VXORPD X28, X23, K5, X17 // 6281c50557cc
+ VXORPD X24, X23, K5, X17 // 6281c50557c8
+ VXORPD -7(CX)(DX*1), X23, K5, X17 // 62e1c505578c11f9ffffff
+ VXORPD -15(R14)(R15*4), X23, K5, X17 // 6281c505578cbef1ffffff
+ VXORPD X13, X3, K5, X15 // 6251e50d57fd
+ VXORPD X28, X3, K5, X15 // 6211e50d57fc
+ VXORPD X24, X3, K5, X15 // 6211e50d57f8
+ VXORPD -7(CX)(DX*1), X3, K5, X15 // 6271e50d57bc11f9ffffff
+ VXORPD -15(R14)(R15*4), X3, K5, X15 // 6211e50d57bcbef1ffffff
+ VXORPD X13, X26, K5, X15 // 6251ad0557fd
+ VXORPD X28, X26, K5, X15 // 6211ad0557fc
+ VXORPD X24, X26, K5, X15 // 6211ad0557f8
+ VXORPD -7(CX)(DX*1), X26, K5, X15 // 6271ad0557bc11f9ffffff
+ VXORPD -15(R14)(R15*4), X26, K5, X15 // 6211ad0557bcbef1ffffff
+ VXORPD X13, X23, K5, X15 // 6251c50557fd
+ VXORPD X28, X23, K5, X15 // 6211c50557fc
+ VXORPD X24, X23, K5, X15 // 6211c50557f8
+ VXORPD -7(CX)(DX*1), X23, K5, X15 // 6271c50557bc11f9ffffff
+ VXORPD -15(R14)(R15*4), X23, K5, X15 // 6211c50557bcbef1ffffff
+ VXORPD X13, X3, K5, X8 // 6251e50d57c5
+ VXORPD X28, X3, K5, X8 // 6211e50d57c4
+ VXORPD X24, X3, K5, X8 // 6211e50d57c0
+ VXORPD -7(CX)(DX*1), X3, K5, X8 // 6271e50d578411f9ffffff
+ VXORPD -15(R14)(R15*4), X3, K5, X8 // 6211e50d5784bef1ffffff
+ VXORPD X13, X26, K5, X8 // 6251ad0557c5
+ VXORPD X28, X26, K5, X8 // 6211ad0557c4
+ VXORPD X24, X26, K5, X8 // 6211ad0557c0
+ VXORPD -7(CX)(DX*1), X26, K5, X8 // 6271ad05578411f9ffffff
+ VXORPD -15(R14)(R15*4), X26, K5, X8 // 6211ad055784bef1ffffff
+ VXORPD X13, X23, K5, X8 // 6251c50557c5
+ VXORPD X28, X23, K5, X8 // 6211c50557c4
+ VXORPD X24, X23, K5, X8 // 6211c50557c0
+ VXORPD -7(CX)(DX*1), X23, K5, X8 // 6271c505578411f9ffffff
+ VXORPD -15(R14)(R15*4), X23, K5, X8 // 6211c5055784bef1ffffff
+ VXORPD Y5, Y20, K3, Y0 // 62f1dd2357c5
+ VXORPD Y28, Y20, K3, Y0 // 6291dd2357c4
+ VXORPD Y7, Y20, K3, Y0 // 62f1dd2357c7
+ VXORPD -7(CX), Y20, K3, Y0 // 62f1dd235781f9ffffff
+ VXORPD 15(DX)(BX*4), Y20, K3, Y0 // 62f1dd2357849a0f000000
+ VXORPD Y5, Y12, K3, Y0 // 62f19d2b57c5
+ VXORPD Y28, Y12, K3, Y0 // 62919d2b57c4
+ VXORPD Y7, Y12, K3, Y0 // 62f19d2b57c7
+ VXORPD -7(CX), Y12, K3, Y0 // 62f19d2b5781f9ffffff
+ VXORPD 15(DX)(BX*4), Y12, K3, Y0 // 62f19d2b57849a0f000000
+ VXORPD Y5, Y3, K3, Y0 // 62f1e52b57c5
+ VXORPD Y28, Y3, K3, Y0 // 6291e52b57c4
+ VXORPD Y7, Y3, K3, Y0 // 62f1e52b57c7
+ VXORPD -7(CX), Y3, K3, Y0 // 62f1e52b5781f9ffffff
+ VXORPD 15(DX)(BX*4), Y3, K3, Y0 // 62f1e52b57849a0f000000
+ VXORPD Y5, Y20, K3, Y3 // 62f1dd2357dd
+ VXORPD Y28, Y20, K3, Y3 // 6291dd2357dc
+ VXORPD Y7, Y20, K3, Y3 // 62f1dd2357df
+ VXORPD -7(CX), Y20, K3, Y3 // 62f1dd235799f9ffffff
+ VXORPD 15(DX)(BX*4), Y20, K3, Y3 // 62f1dd23579c9a0f000000
+ VXORPD Y5, Y12, K3, Y3 // 62f19d2b57dd
+ VXORPD Y28, Y12, K3, Y3 // 62919d2b57dc
+ VXORPD Y7, Y12, K3, Y3 // 62f19d2b57df
+ VXORPD -7(CX), Y12, K3, Y3 // 62f19d2b5799f9ffffff
+ VXORPD 15(DX)(BX*4), Y12, K3, Y3 // 62f19d2b579c9a0f000000
+ VXORPD Y5, Y3, K3, Y3 // 62f1e52b57dd
+ VXORPD Y28, Y3, K3, Y3 // 6291e52b57dc
+ VXORPD Y7, Y3, K3, Y3 // 62f1e52b57df
+ VXORPD -7(CX), Y3, K3, Y3 // 62f1e52b5799f9ffffff
+ VXORPD 15(DX)(BX*4), Y3, K3, Y3 // 62f1e52b579c9a0f000000
+ VXORPD Y5, Y20, K3, Y5 // 62f1dd2357ed
+ VXORPD Y28, Y20, K3, Y5 // 6291dd2357ec
+ VXORPD Y7, Y20, K3, Y5 // 62f1dd2357ef
+ VXORPD -7(CX), Y20, K3, Y5 // 62f1dd2357a9f9ffffff
+ VXORPD 15(DX)(BX*4), Y20, K3, Y5 // 62f1dd2357ac9a0f000000
+ VXORPD Y5, Y12, K3, Y5 // 62f19d2b57ed
+ VXORPD Y28, Y12, K3, Y5 // 62919d2b57ec
+ VXORPD Y7, Y12, K3, Y5 // 62f19d2b57ef
+ VXORPD -7(CX), Y12, K3, Y5 // 62f19d2b57a9f9ffffff
+ VXORPD 15(DX)(BX*4), Y12, K3, Y5 // 62f19d2b57ac9a0f000000
+ VXORPD Y5, Y3, K3, Y5 // 62f1e52b57ed
+ VXORPD Y28, Y3, K3, Y5 // 6291e52b57ec
+ VXORPD Y7, Y3, K3, Y5 // 62f1e52b57ef
+ VXORPD -7(CX), Y3, K3, Y5 // 62f1e52b57a9f9ffffff
+ VXORPD 15(DX)(BX*4), Y3, K3, Y5 // 62f1e52b57ac9a0f000000
+ VXORPD Z13, Z28, K4, Z26 // 62419d4457d5
+ VXORPD Z21, Z28, K4, Z26 // 62219d4457d5
+ VXORPD 15(R8)(R14*1), Z28, K4, Z26 // 62019d445794300f000000
+ VXORPD 15(R8)(R14*2), Z28, K4, Z26 // 62019d445794700f000000
+ VXORPD Z13, Z6, K4, Z26 // 6241cd4c57d5
+ VXORPD Z21, Z6, K4, Z26 // 6221cd4c57d5
+ VXORPD 15(R8)(R14*1), Z6, K4, Z26 // 6201cd4c5794300f000000
+ VXORPD 15(R8)(R14*2), Z6, K4, Z26 // 6201cd4c5794700f000000
+ VXORPD Z13, Z28, K4, Z14 // 62519d4457f5
+ VXORPD Z21, Z28, K4, Z14 // 62319d4457f5
+ VXORPD 15(R8)(R14*1), Z28, K4, Z14 // 62119d4457b4300f000000
+ VXORPD 15(R8)(R14*2), Z28, K4, Z14 // 62119d4457b4700f000000
+ VXORPD Z13, Z6, K4, Z14 // 6251cd4c57f5
+ VXORPD Z21, Z6, K4, Z14 // 6231cd4c57f5
+ VXORPD 15(R8)(R14*1), Z6, K4, Z14 // 6211cd4c57b4300f000000
+ VXORPD 15(R8)(R14*2), Z6, K4, Z14 // 6211cd4c57b4700f000000
+ VXORPS X11, X18, K2, X9 // 62516c0257cb
+ VXORPS X31, X18, K2, X9 // 62116c0257cf
+ VXORPS X3, X18, K2, X9 // 62716c0257cb
+ VXORPS 15(DX)(BX*1), X18, K2, X9 // 62716c02578c1a0f000000
+ VXORPS -7(CX)(DX*2), X18, K2, X9 // 62716c02578c51f9ffffff
+ VXORPS X11, X21, K2, X9 // 6251540257cb
+ VXORPS X31, X21, K2, X9 // 6211540257cf
+ VXORPS X3, X21, K2, X9 // 6271540257cb
+ VXORPS 15(DX)(BX*1), X21, K2, X9 // 62715402578c1a0f000000
+ VXORPS -7(CX)(DX*2), X21, K2, X9 // 62715402578c51f9ffffff
+ VXORPS X11, X1, K2, X9 // 6251740a57cb
+ VXORPS X31, X1, K2, X9 // 6211740a57cf
+ VXORPS X3, X1, K2, X9 // 6271740a57cb
+ VXORPS 15(DX)(BX*1), X1, K2, X9 // 6271740a578c1a0f000000
+ VXORPS -7(CX)(DX*2), X1, K2, X9 // 6271740a578c51f9ffffff
+ VXORPS X11, X18, K2, X15 // 62516c0257fb
+ VXORPS X31, X18, K2, X15 // 62116c0257ff
+ VXORPS X3, X18, K2, X15 // 62716c0257fb
+ VXORPS 15(DX)(BX*1), X18, K2, X15 // 62716c0257bc1a0f000000
+ VXORPS -7(CX)(DX*2), X18, K2, X15 // 62716c0257bc51f9ffffff
+ VXORPS X11, X21, K2, X15 // 6251540257fb
+ VXORPS X31, X21, K2, X15 // 6211540257ff
+ VXORPS X3, X21, K2, X15 // 6271540257fb
+ VXORPS 15(DX)(BX*1), X21, K2, X15 // 6271540257bc1a0f000000
+ VXORPS -7(CX)(DX*2), X21, K2, X15 // 6271540257bc51f9ffffff
+ VXORPS X11, X1, K2, X15 // 6251740a57fb
+ VXORPS X31, X1, K2, X15 // 6211740a57ff
+ VXORPS X3, X1, K2, X15 // 6271740a57fb
+ VXORPS 15(DX)(BX*1), X1, K2, X15 // 6271740a57bc1a0f000000
+ VXORPS -7(CX)(DX*2), X1, K2, X15 // 6271740a57bc51f9ffffff
+ VXORPS X11, X18, K2, X26 // 62416c0257d3
+ VXORPS X31, X18, K2, X26 // 62016c0257d7
+ VXORPS X3, X18, K2, X26 // 62616c0257d3
+ VXORPS 15(DX)(BX*1), X18, K2, X26 // 62616c0257941a0f000000
+ VXORPS -7(CX)(DX*2), X18, K2, X26 // 62616c02579451f9ffffff
+ VXORPS X11, X21, K2, X26 // 6241540257d3
+ VXORPS X31, X21, K2, X26 // 6201540257d7
+ VXORPS X3, X21, K2, X26 // 6261540257d3
+ VXORPS 15(DX)(BX*1), X21, K2, X26 // 6261540257941a0f000000
+ VXORPS -7(CX)(DX*2), X21, K2, X26 // 62615402579451f9ffffff
+ VXORPS X11, X1, K2, X26 // 6241740a57d3
+ VXORPS X31, X1, K2, X26 // 6201740a57d7
+ VXORPS X3, X1, K2, X26 // 6261740a57d3
+ VXORPS 15(DX)(BX*1), X1, K2, X26 // 6261740a57941a0f000000
+ VXORPS -7(CX)(DX*2), X1, K2, X26 // 6261740a579451f9ffffff
+ VXORPS Y17, Y12, K2, Y0 // 62b11c2a57c1
+ VXORPS Y7, Y12, K2, Y0 // 62f11c2a57c7
+ VXORPS Y9, Y12, K2, Y0 // 62d11c2a57c1
+ VXORPS 99(R15)(R15*8), Y12, K2, Y0 // 62911c2a5784ff63000000
+ VXORPS 7(AX)(CX*8), Y12, K2, Y0 // 62f11c2a5784c807000000
+ VXORPS Y17, Y1, K2, Y0 // 62b1742a57c1
+ VXORPS Y7, Y1, K2, Y0 // 62f1742a57c7
+ VXORPS Y9, Y1, K2, Y0 // 62d1742a57c1
+ VXORPS 99(R15)(R15*8), Y1, K2, Y0 // 6291742a5784ff63000000
+ VXORPS 7(AX)(CX*8), Y1, K2, Y0 // 62f1742a5784c807000000
+ VXORPS Y17, Y14, K2, Y0 // 62b10c2a57c1
+ VXORPS Y7, Y14, K2, Y0 // 62f10c2a57c7
+ VXORPS Y9, Y14, K2, Y0 // 62d10c2a57c1
+ VXORPS 99(R15)(R15*8), Y14, K2, Y0 // 62910c2a5784ff63000000
+ VXORPS 7(AX)(CX*8), Y14, K2, Y0 // 62f10c2a5784c807000000
+ VXORPS Y17, Y12, K2, Y22 // 62a11c2a57f1
+ VXORPS Y7, Y12, K2, Y22 // 62e11c2a57f7
+ VXORPS Y9, Y12, K2, Y22 // 62c11c2a57f1
+ VXORPS 99(R15)(R15*8), Y12, K2, Y22 // 62811c2a57b4ff63000000
+ VXORPS 7(AX)(CX*8), Y12, K2, Y22 // 62e11c2a57b4c807000000
+ VXORPS Y17, Y1, K2, Y22 // 62a1742a57f1
+ VXORPS Y7, Y1, K2, Y22 // 62e1742a57f7
+ VXORPS Y9, Y1, K2, Y22 // 62c1742a57f1
+ VXORPS 99(R15)(R15*8), Y1, K2, Y22 // 6281742a57b4ff63000000
+ VXORPS 7(AX)(CX*8), Y1, K2, Y22 // 62e1742a57b4c807000000
+ VXORPS Y17, Y14, K2, Y22 // 62a10c2a57f1
+ VXORPS Y7, Y14, K2, Y22 // 62e10c2a57f7
+ VXORPS Y9, Y14, K2, Y22 // 62c10c2a57f1
+ VXORPS 99(R15)(R15*8), Y14, K2, Y22 // 62810c2a57b4ff63000000
+ VXORPS 7(AX)(CX*8), Y14, K2, Y22 // 62e10c2a57b4c807000000
+ VXORPS Y17, Y12, K2, Y13 // 62311c2a57e9
+ VXORPS Y7, Y12, K2, Y13 // 62711c2a57ef
+ VXORPS Y9, Y12, K2, Y13 // 62511c2a57e9
+ VXORPS 99(R15)(R15*8), Y12, K2, Y13 // 62111c2a57acff63000000
+ VXORPS 7(AX)(CX*8), Y12, K2, Y13 // 62711c2a57acc807000000
+ VXORPS Y17, Y1, K2, Y13 // 6231742a57e9
+ VXORPS Y7, Y1, K2, Y13 // 6271742a57ef
+ VXORPS Y9, Y1, K2, Y13 // 6251742a57e9
+ VXORPS 99(R15)(R15*8), Y1, K2, Y13 // 6211742a57acff63000000
+ VXORPS 7(AX)(CX*8), Y1, K2, Y13 // 6271742a57acc807000000
+ VXORPS Y17, Y14, K2, Y13 // 62310c2a57e9
+ VXORPS Y7, Y14, K2, Y13 // 62710c2a57ef
+ VXORPS Y9, Y14, K2, Y13 // 62510c2a57e9
+ VXORPS 99(R15)(R15*8), Y14, K2, Y13 // 62110c2a57acff63000000
+ VXORPS 7(AX)(CX*8), Y14, K2, Y13 // 62710c2a57acc807000000
+ VXORPS Z21, Z3, K3, Z26 // 6221644b57d5
+ VXORPS Z13, Z3, K3, Z26 // 6241644b57d5
+ VXORPS (R14), Z3, K3, Z26 // 6241644b5716
+ VXORPS -7(DI)(R8*8), Z3, K3, Z26 // 6221644b5794c7f9ffffff
+ VXORPS Z21, Z0, K3, Z26 // 62217c4b57d5
+ VXORPS Z13, Z0, K3, Z26 // 62417c4b57d5
+ VXORPS (R14), Z0, K3, Z26 // 62417c4b5716
+ VXORPS -7(DI)(R8*8), Z0, K3, Z26 // 62217c4b5794c7f9ffffff
+ VXORPS Z21, Z3, K3, Z3 // 62b1644b57dd
+ VXORPS Z13, Z3, K3, Z3 // 62d1644b57dd
+ VXORPS (R14), Z3, K3, Z3 // 62d1644b571e
+ VXORPS -7(DI)(R8*8), Z3, K3, Z3 // 62b1644b579cc7f9ffffff
+ VXORPS Z21, Z0, K3, Z3 // 62b17c4b57dd
+ VXORPS Z13, Z0, K3, Z3 // 62d17c4b57dd
+ VXORPS (R14), Z0, K3, Z3 // 62d17c4b571e
+ VXORPS -7(DI)(R8*8), Z0, K3, Z3 // 62b17c4b579cc7f9ffffff
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/avx512er.s b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512er.s
new file mode 100644
index 0000000..855a8d9
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512er.s
@@ -0,0 +1,331 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_avx512er(SB), NOSPLIT, $0
+ VEXP2PD Z17, K7, Z20 // 62a2fd4fc8e1
+ VEXP2PD Z0, K7, Z20 // 62e2fd4fc8e0
+ VEXP2PD Z17, K7, Z0 // 62b2fd4fc8c1
+ VEXP2PD Z0, K7, Z0 // 62f2fd4fc8c0
+ VEXP2PD Z31, K2, Z17 // 6282fd4ac8cf
+ VEXP2PD Z0, K2, Z17 // 62e2fd4ac8c8
+ VEXP2PD (R14), K2, Z17 // 62c2fd4ac80e
+ VEXP2PD -7(DI)(R8*8), K2, Z17 // 62a2fd4ac88cc7f9ffffff
+ VEXP2PD Z31, K2, Z23 // 6282fd4ac8ff
+ VEXP2PD Z0, K2, Z23 // 62e2fd4ac8f8
+ VEXP2PD (R14), K2, Z23 // 62c2fd4ac83e
+ VEXP2PD -7(DI)(R8*8), K2, Z23 // 62a2fd4ac8bcc7f9ffffff
+ VEXP2PS Z6, K4, Z21 // 62e27d4cc8ee
+ VEXP2PS Z9, K4, Z21 // 62c27d4cc8e9
+ VEXP2PS Z6, K4, Z9 // 62727d4cc8ce
+ VEXP2PS Z9, K4, Z9 // 62527d4cc8c9
+ VEXP2PS Z20, K1, Z1 // 62b27d49c8cc
+ VEXP2PS Z9, K1, Z1 // 62d27d49c8c9
+ VEXP2PS 99(R15)(R15*4), K1, Z1 // 62927d49c88cbf63000000
+ VEXP2PS 15(DX), K1, Z1 // 62f27d49c88a0f000000
+ VEXP2PS Z20, K1, Z9 // 62327d49c8cc
+ VEXP2PS Z9, K1, Z9 // 62527d49c8c9
+ VEXP2PS 99(R15)(R15*4), K1, Z9 // 62127d49c88cbf63000000
+ VEXP2PS 15(DX), K1, Z9 // 62727d49c88a0f000000
+ VRCP28PD Z13, K7, Z11 // 6252fd4fcadd
+ VRCP28PD Z14, K7, Z11 // 6252fd4fcade
+ VRCP28PD Z13, K7, Z5 // 62d2fd4fcaed
+ VRCP28PD Z14, K7, Z5 // 62d2fd4fcaee
+ VRCP28PD Z2, K2, Z5 // 62f2fd4acaea
+ VRCP28PD -7(CX)(DX*1), K2, Z5 // 62f2fd4acaac11f9ffffff
+ VRCP28PD -15(R14)(R15*4), K2, Z5 // 6292fd4acaacbef1ffffff
+ VRCP28PD Z2, K2, Z23 // 62e2fd4acafa
+ VRCP28PD -7(CX)(DX*1), K2, Z23 // 62e2fd4acabc11f9ffffff
+ VRCP28PD -15(R14)(R15*4), K2, Z23 // 6282fd4acabcbef1ffffff
+ VRCP28PS Z26, K5, Z6 // 62927d4dcaf2
+ VRCP28PS Z14, K5, Z6 // 62d27d4dcaf6
+ VRCP28PS Z26, K5, Z14 // 62127d4dcaf2
+ VRCP28PS Z14, K5, Z14 // 62527d4dcaf6
+ VRCP28PS Z13, K3, Z28 // 62427d4bcae5
+ VRCP28PS Z21, K3, Z28 // 62227d4bcae5
+ VRCP28PS 15(DX)(BX*1), K3, Z28 // 62627d4bcaa41a0f000000
+ VRCP28PS -7(CX)(DX*2), K3, Z28 // 62627d4bcaa451f9ffffff
+ VRCP28PS Z13, K3, Z6 // 62d27d4bcaf5
+ VRCP28PS Z21, K3, Z6 // 62b27d4bcaf5
+ VRCP28PS 15(DX)(BX*1), K3, Z6 // 62f27d4bcab41a0f000000
+ VRCP28PS -7(CX)(DX*2), K3, Z6 // 62f27d4bcab451f9ffffff
+ VRCP28SD X25, X14, K4, X19 // 62828d0ccbd9
+ VRCP28SD X11, X14, K4, X19 // 62c28d0ccbdb
+ VRCP28SD X17, X14, K4, X19 // 62a28d0ccbd9
+ VRCP28SD X25, X0, K4, X19 // 6282fd0ccbd9
+ VRCP28SD X11, X0, K4, X19 // 62c2fd0ccbdb
+ VRCP28SD X17, X0, K4, X19 // 62a2fd0ccbd9
+ VRCP28SD X25, X14, K4, X13 // 62128d0ccbe9
+ VRCP28SD X11, X14, K4, X13 // 62528d0ccbeb
+ VRCP28SD X17, X14, K4, X13 // 62328d0ccbe9
+ VRCP28SD X25, X0, K4, X13 // 6212fd0ccbe9
+ VRCP28SD X11, X0, K4, X13 // 6252fd0ccbeb
+ VRCP28SD X17, X0, K4, X13 // 6232fd0ccbe9
+ VRCP28SD X25, X14, K4, X2 // 62928d0ccbd1
+ VRCP28SD X11, X14, K4, X2 // 62d28d0ccbd3
+ VRCP28SD X17, X14, K4, X2 // 62b28d0ccbd1
+ VRCP28SD X25, X0, K4, X2 // 6292fd0ccbd1
+ VRCP28SD X11, X0, K4, X2 // 62d2fd0ccbd3
+ VRCP28SD X17, X0, K4, X2 // 62b2fd0ccbd1
+ VRCP28SD X2, X2, K2, X18 // 62e2ed0acbd2 or 62e2ed2acbd2 or 62e2ed4acbd2
+ VRCP28SD X27, X2, K2, X18 // 6282ed0acbd3 or 6282ed2acbd3 or 6282ed4acbd3
+ VRCP28SD X26, X2, K2, X18 // 6282ed0acbd2 or 6282ed2acbd2 or 6282ed4acbd2
+ VRCP28SD 17(SP)(BP*8), X2, K2, X18 // 62e2ed0acb94ec11000000 or 62e2ed2acb94ec11000000 or 62e2ed4acb94ec11000000
+ VRCP28SD 17(SP)(BP*4), X2, K2, X18 // 62e2ed0acb94ac11000000 or 62e2ed2acb94ac11000000 or 62e2ed4acb94ac11000000
+ VRCP28SD X2, X24, K2, X18 // 62e2bd02cbd2 or 62e2bd22cbd2 or 62e2bd42cbd2
+ VRCP28SD X27, X24, K2, X18 // 6282bd02cbd3 or 6282bd22cbd3 or 6282bd42cbd3
+ VRCP28SD X26, X24, K2, X18 // 6282bd02cbd2 or 6282bd22cbd2 or 6282bd42cbd2
+ VRCP28SD 17(SP)(BP*8), X24, K2, X18 // 62e2bd02cb94ec11000000 or 62e2bd22cb94ec11000000 or 62e2bd42cb94ec11000000
+ VRCP28SD 17(SP)(BP*4), X24, K2, X18 // 62e2bd02cb94ac11000000 or 62e2bd22cb94ac11000000 or 62e2bd42cb94ac11000000
+ VRCP28SD X2, X2, K2, X11 // 6272ed0acbda or 6272ed2acbda or 6272ed4acbda
+ VRCP28SD X27, X2, K2, X11 // 6212ed0acbdb or 6212ed2acbdb or 6212ed4acbdb
+ VRCP28SD X26, X2, K2, X11 // 6212ed0acbda or 6212ed2acbda or 6212ed4acbda
+ VRCP28SD 17(SP)(BP*8), X2, K2, X11 // 6272ed0acb9cec11000000 or 6272ed2acb9cec11000000 or 6272ed4acb9cec11000000
+ VRCP28SD 17(SP)(BP*4), X2, K2, X11 // 6272ed0acb9cac11000000 or 6272ed2acb9cac11000000 or 6272ed4acb9cac11000000
+ VRCP28SD X2, X24, K2, X11 // 6272bd02cbda or 6272bd22cbda or 6272bd42cbda
+ VRCP28SD X27, X24, K2, X11 // 6212bd02cbdb or 6212bd22cbdb or 6212bd42cbdb
+ VRCP28SD X26, X24, K2, X11 // 6212bd02cbda or 6212bd22cbda or 6212bd42cbda
+ VRCP28SD 17(SP)(BP*8), X24, K2, X11 // 6272bd02cb9cec11000000 or 6272bd22cb9cec11000000 or 6272bd42cb9cec11000000
+ VRCP28SD 17(SP)(BP*4), X24, K2, X11 // 6272bd02cb9cac11000000 or 6272bd22cb9cac11000000 or 6272bd42cb9cac11000000
+ VRCP28SD X2, X2, K2, X9 // 6272ed0acbca or 6272ed2acbca or 6272ed4acbca
+ VRCP28SD X27, X2, K2, X9 // 6212ed0acbcb or 6212ed2acbcb or 6212ed4acbcb
+ VRCP28SD X26, X2, K2, X9 // 6212ed0acbca or 6212ed2acbca or 6212ed4acbca
+ VRCP28SD 17(SP)(BP*8), X2, K2, X9 // 6272ed0acb8cec11000000 or 6272ed2acb8cec11000000 or 6272ed4acb8cec11000000
+ VRCP28SD 17(SP)(BP*4), X2, K2, X9 // 6272ed0acb8cac11000000 or 6272ed2acb8cac11000000 or 6272ed4acb8cac11000000
+ VRCP28SD X2, X24, K2, X9 // 6272bd02cbca or 6272bd22cbca or 6272bd42cbca
+ VRCP28SD X27, X24, K2, X9 // 6212bd02cbcb or 6212bd22cbcb or 6212bd42cbcb
+ VRCP28SD X26, X24, K2, X9 // 6212bd02cbca or 6212bd22cbca or 6212bd42cbca
+ VRCP28SD 17(SP)(BP*8), X24, K2, X9 // 6272bd02cb8cec11000000 or 6272bd22cb8cec11000000 or 6272bd42cb8cec11000000
+ VRCP28SD 17(SP)(BP*4), X24, K2, X9 // 6272bd02cb8cac11000000 or 6272bd22cb8cac11000000 or 6272bd42cb8cac11000000
+ VRCP28SS X13, X11, K2, X22 // 62c2250acbf5
+ VRCP28SS X6, X11, K2, X22 // 62e2250acbf6
+ VRCP28SS X12, X11, K2, X22 // 62c2250acbf4
+ VRCP28SS X13, X15, K2, X22 // 62c2050acbf5
+ VRCP28SS X6, X15, K2, X22 // 62e2050acbf6
+ VRCP28SS X12, X15, K2, X22 // 62c2050acbf4
+ VRCP28SS X13, X30, K2, X22 // 62c20d02cbf5
+ VRCP28SS X6, X30, K2, X22 // 62e20d02cbf6
+ VRCP28SS X12, X30, K2, X22 // 62c20d02cbf4
+ VRCP28SS X13, X11, K2, X30 // 6242250acbf5
+ VRCP28SS X6, X11, K2, X30 // 6262250acbf6
+ VRCP28SS X12, X11, K2, X30 // 6242250acbf4
+ VRCP28SS X13, X15, K2, X30 // 6242050acbf5
+ VRCP28SS X6, X15, K2, X30 // 6262050acbf6
+ VRCP28SS X12, X15, K2, X30 // 6242050acbf4
+ VRCP28SS X13, X30, K2, X30 // 62420d02cbf5
+ VRCP28SS X6, X30, K2, X30 // 62620d02cbf6
+ VRCP28SS X12, X30, K2, X30 // 62420d02cbf4
+ VRCP28SS X13, X11, K2, X3 // 62d2250acbdd
+ VRCP28SS X6, X11, K2, X3 // 62f2250acbde
+ VRCP28SS X12, X11, K2, X3 // 62d2250acbdc
+ VRCP28SS X13, X15, K2, X3 // 62d2050acbdd
+ VRCP28SS X6, X15, K2, X3 // 62f2050acbde
+ VRCP28SS X12, X15, K2, X3 // 62d2050acbdc
+ VRCP28SS X13, X30, K2, X3 // 62d20d02cbdd
+ VRCP28SS X6, X30, K2, X3 // 62f20d02cbde
+ VRCP28SS X12, X30, K2, X3 // 62d20d02cbdc
+ VRCP28SS X26, X20, K3, X23 // 62825d03cbfa or 62825d23cbfa or 62825d43cbfa
+ VRCP28SS X19, X20, K3, X23 // 62a25d03cbfb or 62a25d23cbfb or 62a25d43cbfb
+ VRCP28SS X0, X20, K3, X23 // 62e25d03cbf8 or 62e25d23cbf8 or 62e25d43cbf8
+ VRCP28SS -7(CX), X20, K3, X23 // 62e25d03cbb9f9ffffff or 62e25d23cbb9f9ffffff or 62e25d43cbb9f9ffffff
+ VRCP28SS 15(DX)(BX*4), X20, K3, X23 // 62e25d03cbbc9a0f000000 or 62e25d23cbbc9a0f000000 or 62e25d43cbbc9a0f000000
+ VRCP28SS X26, X2, K3, X23 // 62826d0bcbfa or 62826d2bcbfa or 62826d4bcbfa
+ VRCP28SS X19, X2, K3, X23 // 62a26d0bcbfb or 62a26d2bcbfb or 62a26d4bcbfb
+ VRCP28SS X0, X2, K3, X23 // 62e26d0bcbf8 or 62e26d2bcbf8 or 62e26d4bcbf8
+ VRCP28SS -7(CX), X2, K3, X23 // 62e26d0bcbb9f9ffffff or 62e26d2bcbb9f9ffffff or 62e26d4bcbb9f9ffffff
+ VRCP28SS 15(DX)(BX*4), X2, K3, X23 // 62e26d0bcbbc9a0f000000 or 62e26d2bcbbc9a0f000000 or 62e26d4bcbbc9a0f000000
+ VRCP28SS X26, X9, K3, X23 // 6282350bcbfa or 6282352bcbfa or 6282354bcbfa
+ VRCP28SS X19, X9, K3, X23 // 62a2350bcbfb or 62a2352bcbfb or 62a2354bcbfb
+ VRCP28SS X0, X9, K3, X23 // 62e2350bcbf8 or 62e2352bcbf8 or 62e2354bcbf8
+ VRCP28SS -7(CX), X9, K3, X23 // 62e2350bcbb9f9ffffff or 62e2352bcbb9f9ffffff or 62e2354bcbb9f9ffffff
+ VRCP28SS 15(DX)(BX*4), X9, K3, X23 // 62e2350bcbbc9a0f000000 or 62e2352bcbbc9a0f000000 or 62e2354bcbbc9a0f000000
+ VRCP28SS X26, X20, K3, X30 // 62025d03cbf2 or 62025d23cbf2 or 62025d43cbf2
+ VRCP28SS X19, X20, K3, X30 // 62225d03cbf3 or 62225d23cbf3 or 62225d43cbf3
+ VRCP28SS X0, X20, K3, X30 // 62625d03cbf0 or 62625d23cbf0 or 62625d43cbf0
+ VRCP28SS -7(CX), X20, K3, X30 // 62625d03cbb1f9ffffff or 62625d23cbb1f9ffffff or 62625d43cbb1f9ffffff
+ VRCP28SS 15(DX)(BX*4), X20, K3, X30 // 62625d03cbb49a0f000000 or 62625d23cbb49a0f000000 or 62625d43cbb49a0f000000
+ VRCP28SS X26, X2, K3, X30 // 62026d0bcbf2 or 62026d2bcbf2 or 62026d4bcbf2
+ VRCP28SS X19, X2, K3, X30 // 62226d0bcbf3 or 62226d2bcbf3 or 62226d4bcbf3
+ VRCP28SS X0, X2, K3, X30 // 62626d0bcbf0 or 62626d2bcbf0 or 62626d4bcbf0
+ VRCP28SS -7(CX), X2, K3, X30 // 62626d0bcbb1f9ffffff or 62626d2bcbb1f9ffffff or 62626d4bcbb1f9ffffff
+ VRCP28SS 15(DX)(BX*4), X2, K3, X30 // 62626d0bcbb49a0f000000 or 62626d2bcbb49a0f000000 or 62626d4bcbb49a0f000000
+ VRCP28SS X26, X9, K3, X30 // 6202350bcbf2 or 6202352bcbf2 or 6202354bcbf2
+ VRCP28SS X19, X9, K3, X30 // 6222350bcbf3 or 6222352bcbf3 or 6222354bcbf3
+ VRCP28SS X0, X9, K3, X30 // 6262350bcbf0 or 6262352bcbf0 or 6262354bcbf0
+ VRCP28SS -7(CX), X9, K3, X30 // 6262350bcbb1f9ffffff or 6262352bcbb1f9ffffff or 6262354bcbb1f9ffffff
+ VRCP28SS 15(DX)(BX*4), X9, K3, X30 // 6262350bcbb49a0f000000 or 6262352bcbb49a0f000000 or 6262354bcbb49a0f000000
+ VRCP28SS X26, X20, K3, X8 // 62125d03cbc2 or 62125d23cbc2 or 62125d43cbc2
+ VRCP28SS X19, X20, K3, X8 // 62325d03cbc3 or 62325d23cbc3 or 62325d43cbc3
+ VRCP28SS X0, X20, K3, X8 // 62725d03cbc0 or 62725d23cbc0 or 62725d43cbc0
+ VRCP28SS -7(CX), X20, K3, X8 // 62725d03cb81f9ffffff or 62725d23cb81f9ffffff or 62725d43cb81f9ffffff
+ VRCP28SS 15(DX)(BX*4), X20, K3, X8 // 62725d03cb849a0f000000 or 62725d23cb849a0f000000 or 62725d43cb849a0f000000
+ VRCP28SS X26, X2, K3, X8 // 62126d0bcbc2 or 62126d2bcbc2 or 62126d4bcbc2
+ VRCP28SS X19, X2, K3, X8 // 62326d0bcbc3 or 62326d2bcbc3 or 62326d4bcbc3
+ VRCP28SS X0, X2, K3, X8 // 62726d0bcbc0 or 62726d2bcbc0 or 62726d4bcbc0
+ VRCP28SS -7(CX), X2, K3, X8 // 62726d0bcb81f9ffffff or 62726d2bcb81f9ffffff or 62726d4bcb81f9ffffff
+ VRCP28SS 15(DX)(BX*4), X2, K3, X8 // 62726d0bcb849a0f000000 or 62726d2bcb849a0f000000 or 62726d4bcb849a0f000000
+ VRCP28SS X26, X9, K3, X8 // 6212350bcbc2 or 6212352bcbc2 or 6212354bcbc2
+ VRCP28SS X19, X9, K3, X8 // 6232350bcbc3 or 6232352bcbc3 or 6232354bcbc3
+ VRCP28SS X0, X9, K3, X8 // 6272350bcbc0 or 6272352bcbc0 or 6272354bcbc0
+ VRCP28SS -7(CX), X9, K3, X8 // 6272350bcb81f9ffffff or 6272352bcb81f9ffffff or 6272354bcb81f9ffffff
+ VRCP28SS 15(DX)(BX*4), X9, K3, X8 // 6272350bcb849a0f000000 or 6272352bcb849a0f000000 or 6272354bcb849a0f000000
+ VRSQRT28PD Z7, K3, Z3 // 62f2fd4bccdf
+ VRSQRT28PD Z9, K3, Z3 // 62d2fd4bccd9
+ VRSQRT28PD Z7, K3, Z27 // 6262fd4bccdf
+ VRSQRT28PD Z9, K3, Z27 // 6242fd4bccd9
+ VRSQRT28PD Z20, K3, Z0 // 62b2fd4bccc4
+ VRSQRT28PD Z28, K3, Z0 // 6292fd4bccc4
+ VRSQRT28PD (SI), K3, Z0 // 62f2fd4bcc06
+ VRSQRT28PD 7(SI)(DI*2), K3, Z0 // 62f2fd4bcc847e07000000
+ VRSQRT28PD Z20, K3, Z6 // 62b2fd4bccf4
+ VRSQRT28PD Z28, K3, Z6 // 6292fd4bccf4
+ VRSQRT28PD (SI), K3, Z6 // 62f2fd4bcc36
+ VRSQRT28PD 7(SI)(DI*2), K3, Z6 // 62f2fd4bccb47e07000000
+ VRSQRT28PS Z9, K2, Z3 // 62d27d4accd9
+ VRSQRT28PS Z19, K2, Z3 // 62b27d4accdb
+ VRSQRT28PS Z9, K2, Z30 // 62427d4accf1
+ VRSQRT28PS Z19, K2, Z30 // 62227d4accf3
+ VRSQRT28PS Z11, K1, Z12 // 62527d49cce3
+ VRSQRT28PS Z5, K1, Z12 // 62727d49cce5
+ VRSQRT28PS 17(SP)(BP*8), K1, Z12 // 62727d49cca4ec11000000
+ VRSQRT28PS 17(SP)(BP*4), K1, Z12 // 62727d49cca4ac11000000
+ VRSQRT28PS Z11, K1, Z22 // 62c27d49ccf3
+ VRSQRT28PS Z5, K1, Z22 // 62e27d49ccf5
+ VRSQRT28PS 17(SP)(BP*8), K1, Z22 // 62e27d49ccb4ec11000000
+ VRSQRT28PS 17(SP)(BP*4), K1, Z22 // 62e27d49ccb4ac11000000
+ VRSQRT28SD X20, X20, K2, X31 // 6222dd02cdfc
+ VRSQRT28SD X16, X20, K2, X31 // 6222dd02cdf8
+ VRSQRT28SD X12, X20, K2, X31 // 6242dd02cdfc
+ VRSQRT28SD X20, X24, K2, X31 // 6222bd02cdfc
+ VRSQRT28SD X16, X24, K2, X31 // 6222bd02cdf8
+ VRSQRT28SD X12, X24, K2, X31 // 6242bd02cdfc
+ VRSQRT28SD X20, X7, K2, X31 // 6222c50acdfc
+ VRSQRT28SD X16, X7, K2, X31 // 6222c50acdf8
+ VRSQRT28SD X12, X7, K2, X31 // 6242c50acdfc
+ VRSQRT28SD X20, X20, K2, X3 // 62b2dd02cddc
+ VRSQRT28SD X16, X20, K2, X3 // 62b2dd02cdd8
+ VRSQRT28SD X12, X20, K2, X3 // 62d2dd02cddc
+ VRSQRT28SD X20, X24, K2, X3 // 62b2bd02cddc
+ VRSQRT28SD X16, X24, K2, X3 // 62b2bd02cdd8
+ VRSQRT28SD X12, X24, K2, X3 // 62d2bd02cddc
+ VRSQRT28SD X20, X7, K2, X3 // 62b2c50acddc
+ VRSQRT28SD X16, X7, K2, X3 // 62b2c50acdd8
+ VRSQRT28SD X12, X7, K2, X3 // 62d2c50acddc
+ VRSQRT28SD X20, X20, K2, X28 // 6222dd02cde4
+ VRSQRT28SD X16, X20, K2, X28 // 6222dd02cde0
+ VRSQRT28SD X12, X20, K2, X28 // 6242dd02cde4
+ VRSQRT28SD X20, X24, K2, X28 // 6222bd02cde4
+ VRSQRT28SD X16, X24, K2, X28 // 6222bd02cde0
+ VRSQRT28SD X12, X24, K2, X28 // 6242bd02cde4
+ VRSQRT28SD X20, X7, K2, X28 // 6222c50acde4
+ VRSQRT28SD X16, X7, K2, X28 // 6222c50acde0
+ VRSQRT28SD X12, X7, K2, X28 // 6242c50acde4
+ VRSQRT28SD X8, X6, K1, X6 // 62d2cd09cdf0 or 62d2cd29cdf0 or 62d2cd49cdf0
+ VRSQRT28SD X6, X6, K1, X6 // 62f2cd09cdf6 or 62f2cd29cdf6 or 62f2cd49cdf6
+ VRSQRT28SD X0, X6, K1, X6 // 62f2cd09cdf0 or 62f2cd29cdf0 or 62f2cd49cdf0
+ VRSQRT28SD 99(R15)(R15*1), X6, K1, X6 // 6292cd09cdb43f63000000 or 6292cd29cdb43f63000000 or 6292cd49cdb43f63000000
+ VRSQRT28SD (DX), X6, K1, X6 // 62f2cd09cd32 or 62f2cd29cd32 or 62f2cd49cd32
+ VRSQRT28SD X8, X1, K1, X6 // 62d2f509cdf0 or 62d2f529cdf0 or 62d2f549cdf0
+ VRSQRT28SD X6, X1, K1, X6 // 62f2f509cdf6 or 62f2f529cdf6 or 62f2f549cdf6
+ VRSQRT28SD X0, X1, K1, X6 // 62f2f509cdf0 or 62f2f529cdf0 or 62f2f549cdf0
+ VRSQRT28SD 99(R15)(R15*1), X1, K1, X6 // 6292f509cdb43f63000000 or 6292f529cdb43f63000000 or 6292f549cdb43f63000000
+ VRSQRT28SD (DX), X1, K1, X6 // 62f2f509cd32 or 62f2f529cd32 or 62f2f549cd32
+ VRSQRT28SD X8, X8, K1, X6 // 62d2bd09cdf0 or 62d2bd29cdf0 or 62d2bd49cdf0
+ VRSQRT28SD X6, X8, K1, X6 // 62f2bd09cdf6 or 62f2bd29cdf6 or 62f2bd49cdf6
+ VRSQRT28SD X0, X8, K1, X6 // 62f2bd09cdf0 or 62f2bd29cdf0 or 62f2bd49cdf0
+ VRSQRT28SD 99(R15)(R15*1), X8, K1, X6 // 6292bd09cdb43f63000000 or 6292bd29cdb43f63000000 or 6292bd49cdb43f63000000
+ VRSQRT28SD (DX), X8, K1, X6 // 62f2bd09cd32 or 62f2bd29cd32 or 62f2bd49cd32
+ VRSQRT28SD X8, X6, K1, X17 // 62c2cd09cdc8 or 62c2cd29cdc8 or 62c2cd49cdc8
+ VRSQRT28SD X6, X6, K1, X17 // 62e2cd09cdce or 62e2cd29cdce or 62e2cd49cdce
+ VRSQRT28SD X0, X6, K1, X17 // 62e2cd09cdc8 or 62e2cd29cdc8 or 62e2cd49cdc8
+ VRSQRT28SD 99(R15)(R15*1), X6, K1, X17 // 6282cd09cd8c3f63000000 or 6282cd29cd8c3f63000000 or 6282cd49cd8c3f63000000
+ VRSQRT28SD (DX), X6, K1, X17 // 62e2cd09cd0a or 62e2cd29cd0a or 62e2cd49cd0a
+ VRSQRT28SD X8, X1, K1, X17 // 62c2f509cdc8 or 62c2f529cdc8 or 62c2f549cdc8
+ VRSQRT28SD X6, X1, K1, X17 // 62e2f509cdce or 62e2f529cdce or 62e2f549cdce
+ VRSQRT28SD X0, X1, K1, X17 // 62e2f509cdc8 or 62e2f529cdc8 or 62e2f549cdc8
+ VRSQRT28SD 99(R15)(R15*1), X1, K1, X17 // 6282f509cd8c3f63000000 or 6282f529cd8c3f63000000 or 6282f549cd8c3f63000000
+ VRSQRT28SD (DX), X1, K1, X17 // 62e2f509cd0a or 62e2f529cd0a or 62e2f549cd0a
+ VRSQRT28SD X8, X8, K1, X17 // 62c2bd09cdc8 or 62c2bd29cdc8 or 62c2bd49cdc8
+ VRSQRT28SD X6, X8, K1, X17 // 62e2bd09cdce or 62e2bd29cdce or 62e2bd49cdce
+ VRSQRT28SD X0, X8, K1, X17 // 62e2bd09cdc8 or 62e2bd29cdc8 or 62e2bd49cdc8
+ VRSQRT28SD 99(R15)(R15*1), X8, K1, X17 // 6282bd09cd8c3f63000000 or 6282bd29cd8c3f63000000 or 6282bd49cd8c3f63000000
+ VRSQRT28SD (DX), X8, K1, X17 // 62e2bd09cd0a or 62e2bd29cd0a or 62e2bd49cd0a
+ VRSQRT28SD X8, X6, K1, X28 // 6242cd09cde0 or 6242cd29cde0 or 6242cd49cde0
+ VRSQRT28SD X6, X6, K1, X28 // 6262cd09cde6 or 6262cd29cde6 or 6262cd49cde6
+ VRSQRT28SD X0, X6, K1, X28 // 6262cd09cde0 or 6262cd29cde0 or 6262cd49cde0
+ VRSQRT28SD 99(R15)(R15*1), X6, K1, X28 // 6202cd09cda43f63000000 or 6202cd29cda43f63000000 or 6202cd49cda43f63000000
+ VRSQRT28SD (DX), X6, K1, X28 // 6262cd09cd22 or 6262cd29cd22 or 6262cd49cd22
+ VRSQRT28SD X8, X1, K1, X28 // 6242f509cde0 or 6242f529cde0 or 6242f549cde0
+ VRSQRT28SD X6, X1, K1, X28 // 6262f509cde6 or 6262f529cde6 or 6262f549cde6
+ VRSQRT28SD X0, X1, K1, X28 // 6262f509cde0 or 6262f529cde0 or 6262f549cde0
+ VRSQRT28SD 99(R15)(R15*1), X1, K1, X28 // 6202f509cda43f63000000 or 6202f529cda43f63000000 or 6202f549cda43f63000000
+ VRSQRT28SD (DX), X1, K1, X28 // 6262f509cd22 or 6262f529cd22 or 6262f549cd22
+ VRSQRT28SD X8, X8, K1, X28 // 6242bd09cde0 or 6242bd29cde0 or 6242bd49cde0
+ VRSQRT28SD X6, X8, K1, X28 // 6262bd09cde6 or 6262bd29cde6 or 6262bd49cde6
+ VRSQRT28SD X0, X8, K1, X28 // 6262bd09cde0 or 6262bd29cde0 or 6262bd49cde0
+ VRSQRT28SD 99(R15)(R15*1), X8, K1, X28 // 6202bd09cda43f63000000 or 6202bd29cda43f63000000 or 6202bd49cda43f63000000
+ VRSQRT28SD (DX), X8, K1, X28 // 6262bd09cd22 or 6262bd29cd22 or 6262bd49cd22
+ VRSQRT28SS X16, X6, K7, X11 // 62324d0fcdd8
+ VRSQRT28SS X28, X6, K7, X11 // 62124d0fcddc
+ VRSQRT28SS X8, X6, K7, X11 // 62524d0fcdd8
+ VRSQRT28SS X16, X22, K7, X11 // 62324d07cdd8
+ VRSQRT28SS X28, X22, K7, X11 // 62124d07cddc
+ VRSQRT28SS X8, X22, K7, X11 // 62524d07cdd8
+ VRSQRT28SS X16, X12, K7, X11 // 62321d0fcdd8
+ VRSQRT28SS X28, X12, K7, X11 // 62121d0fcddc
+ VRSQRT28SS X8, X12, K7, X11 // 62521d0fcdd8
+ VRSQRT28SS X16, X6, K7, X16 // 62a24d0fcdc0
+ VRSQRT28SS X28, X6, K7, X16 // 62824d0fcdc4
+ VRSQRT28SS X8, X6, K7, X16 // 62c24d0fcdc0
+ VRSQRT28SS X16, X22, K7, X16 // 62a24d07cdc0
+ VRSQRT28SS X28, X22, K7, X16 // 62824d07cdc4
+ VRSQRT28SS X8, X22, K7, X16 // 62c24d07cdc0
+ VRSQRT28SS X16, X12, K7, X16 // 62a21d0fcdc0
+ VRSQRT28SS X28, X12, K7, X16 // 62821d0fcdc4
+ VRSQRT28SS X8, X12, K7, X16 // 62c21d0fcdc0
+ VRSQRT28SS X16, X6, K7, X6 // 62b24d0fcdf0
+ VRSQRT28SS X28, X6, K7, X6 // 62924d0fcdf4
+ VRSQRT28SS X8, X6, K7, X6 // 62d24d0fcdf0
+ VRSQRT28SS X16, X22, K7, X6 // 62b24d07cdf0
+ VRSQRT28SS X28, X22, K7, X6 // 62924d07cdf4
+ VRSQRT28SS X8, X22, K7, X6 // 62d24d07cdf0
+ VRSQRT28SS X16, X12, K7, X6 // 62b21d0fcdf0
+ VRSQRT28SS X28, X12, K7, X6 // 62921d0fcdf4
+ VRSQRT28SS X8, X12, K7, X6 // 62d21d0fcdf0
+ VRSQRT28SS X14, X19, K1, X15 // 62526501cdfe or 62526521cdfe or 62526541cdfe
+ VRSQRT28SS X0, X19, K1, X15 // 62726501cdf8 or 62726521cdf8 or 62726541cdf8
+ VRSQRT28SS 15(R8)(R14*4), X19, K1, X15 // 62126501cdbcb00f000000 or 62126521cdbcb00f000000 or 62126541cdbcb00f000000
+ VRSQRT28SS -7(CX)(DX*4), X19, K1, X15 // 62726501cdbc91f9ffffff or 62726521cdbc91f9ffffff or 62726541cdbc91f9ffffff
+ VRSQRT28SS X14, X13, K1, X15 // 62521509cdfe or 62521529cdfe or 62521549cdfe
+ VRSQRT28SS X0, X13, K1, X15 // 62721509cdf8 or 62721529cdf8 or 62721549cdf8
+ VRSQRT28SS 15(R8)(R14*4), X13, K1, X15 // 62121509cdbcb00f000000 or 62121529cdbcb00f000000 or 62121549cdbcb00f000000
+ VRSQRT28SS -7(CX)(DX*4), X13, K1, X15 // 62721509cdbc91f9ffffff or 62721529cdbc91f9ffffff or 62721549cdbc91f9ffffff
+ VRSQRT28SS X14, X2, K1, X15 // 62526d09cdfe or 62526d29cdfe or 62526d49cdfe
+ VRSQRT28SS X0, X2, K1, X15 // 62726d09cdf8 or 62726d29cdf8 or 62726d49cdf8
+ VRSQRT28SS 15(R8)(R14*4), X2, K1, X15 // 62126d09cdbcb00f000000 or 62126d29cdbcb00f000000 or 62126d49cdbcb00f000000
+ VRSQRT28SS -7(CX)(DX*4), X2, K1, X15 // 62726d09cdbc91f9ffffff or 62726d29cdbc91f9ffffff or 62726d49cdbc91f9ffffff
+ VRSQRT28SS X14, X19, K1, X11 // 62526501cdde or 62526521cdde or 62526541cdde
+ VRSQRT28SS X0, X19, K1, X11 // 62726501cdd8 or 62726521cdd8 or 62726541cdd8
+ VRSQRT28SS 15(R8)(R14*4), X19, K1, X11 // 62126501cd9cb00f000000 or 62126521cd9cb00f000000 or 62126541cd9cb00f000000
+ VRSQRT28SS -7(CX)(DX*4), X19, K1, X11 // 62726501cd9c91f9ffffff or 62726521cd9c91f9ffffff or 62726541cd9c91f9ffffff
+ VRSQRT28SS X14, X13, K1, X11 // 62521509cdde or 62521529cdde or 62521549cdde
+ VRSQRT28SS X0, X13, K1, X11 // 62721509cdd8 or 62721529cdd8 or 62721549cdd8
+ VRSQRT28SS 15(R8)(R14*4), X13, K1, X11 // 62121509cd9cb00f000000 or 62121529cd9cb00f000000 or 62121549cd9cb00f000000
+ VRSQRT28SS -7(CX)(DX*4), X13, K1, X11 // 62721509cd9c91f9ffffff or 62721529cd9c91f9ffffff or 62721549cd9c91f9ffffff
+ VRSQRT28SS X14, X2, K1, X11 // 62526d09cdde or 62526d29cdde or 62526d49cdde
+ VRSQRT28SS X0, X2, K1, X11 // 62726d09cdd8 or 62726d29cdd8 or 62726d49cdd8
+ VRSQRT28SS 15(R8)(R14*4), X2, K1, X11 // 62126d09cd9cb00f000000 or 62126d29cd9cb00f000000 or 62126d49cd9cb00f000000
+ VRSQRT28SS -7(CX)(DX*4), X2, K1, X11 // 62726d09cd9c91f9ffffff or 62726d29cd9c91f9ffffff or 62726d49cd9c91f9ffffff
+ VRSQRT28SS X14, X19, K1, X1 // 62d26501cdce or 62d26521cdce or 62d26541cdce
+ VRSQRT28SS X0, X19, K1, X1 // 62f26501cdc8 or 62f26521cdc8 or 62f26541cdc8
+ VRSQRT28SS 15(R8)(R14*4), X19, K1, X1 // 62926501cd8cb00f000000 or 62926521cd8cb00f000000 or 62926541cd8cb00f000000
+ VRSQRT28SS -7(CX)(DX*4), X19, K1, X1 // 62f26501cd8c91f9ffffff or 62f26521cd8c91f9ffffff or 62f26541cd8c91f9ffffff
+ VRSQRT28SS X14, X13, K1, X1 // 62d21509cdce or 62d21529cdce or 62d21549cdce
+ VRSQRT28SS X0, X13, K1, X1 // 62f21509cdc8 or 62f21529cdc8 or 62f21549cdc8
+ VRSQRT28SS 15(R8)(R14*4), X13, K1, X1 // 62921509cd8cb00f000000 or 62921529cd8cb00f000000 or 62921549cd8cb00f000000
+ VRSQRT28SS -7(CX)(DX*4), X13, K1, X1 // 62f21509cd8c91f9ffffff or 62f21529cd8c91f9ffffff or 62f21549cd8c91f9ffffff
+ VRSQRT28SS X14, X2, K1, X1 // 62d26d09cdce or 62d26d29cdce or 62d26d49cdce
+ VRSQRT28SS X0, X2, K1, X1 // 62f26d09cdc8 or 62f26d29cdc8 or 62f26d49cdc8
+ VRSQRT28SS 15(R8)(R14*4), X2, K1, X1 // 62926d09cd8cb00f000000 or 62926d29cd8cb00f000000 or 62926d49cd8cb00f000000
+ VRSQRT28SS -7(CX)(DX*4), X2, K1, X1 // 62f26d09cd8c91f9ffffff or 62f26d29cd8c91f9ffffff or 62f26d49cd8c91f9ffffff
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/avx512f.s b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512f.s
new file mode 100644
index 0000000..71b5764
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512f.s
@@ -0,0 +1,5639 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_avx512f(SB), NOSPLIT, $0
+ KANDNW K4, K4, K6 // c5dc42f4
+ KANDNW K5, K4, K6 // c5dc42f5
+ KANDNW K4, K6, K6 // c5cc42f4
+ KANDNW K5, K6, K6 // c5cc42f5
+ KANDNW K4, K4, K4 // c5dc42e4
+ KANDNW K5, K4, K4 // c5dc42e5
+ KANDNW K4, K6, K4 // c5cc42e4
+ KANDNW K5, K6, K4 // c5cc42e5
+ KANDW K5, K3, K1 // c5e441cd
+ KANDW K4, K3, K1 // c5e441cc
+ KANDW K5, K1, K1 // c5f441cd
+ KANDW K4, K1, K1 // c5f441cc
+ KANDW K5, K3, K5 // c5e441ed
+ KANDW K4, K3, K5 // c5e441ec
+ KANDW K5, K1, K5 // c5f441ed
+ KANDW K4, K1, K5 // c5f441ec
+ KMOVW K5, 17(SP) // c5f8916c2411
+ KMOVW K4, 17(SP) // c5f891642411
+ KMOVW K5, -17(BP)(SI*4) // c5f8916cb5ef
+ KMOVW K4, -17(BP)(SI*4) // c5f89164b5ef
+ KMOVW K7, SP // c5f893e7
+ KMOVW K6, SP // c5f893e6
+ KMOVW K7, R14 // c57893f7
+ KMOVW K6, R14 // c57893f6
+ KMOVW K0, K4 // c5f890e0
+ KMOVW K7, K4 // c5f890e7
+ KMOVW 7(AX), K4 // c5f8906007
+ KMOVW (DI), K4 // c5f89027
+ KMOVW K0, K6 // c5f890f0
+ KMOVW K7, K6 // c5f890f7
+ KMOVW 7(AX), K6 // c5f8907007
+ KMOVW (DI), K6 // c5f89037
+ KMOVW AX, K5 // c5f892e8
+ KMOVW R9, K5 // c4c17892e9
+ KMOVW AX, K4 // c5f892e0
+ KMOVW R9, K4 // c4c17892e1
+ KNOTW K0, K2 // c5f844d0
+ KNOTW K5, K2 // c5f844d5
+ KNOTW K0, K7 // c5f844f8
+ KNOTW K5, K7 // c5f844fd
+ KORTESTW K6, K0 // c5f898c6
+ KORTESTW K5, K0 // c5f898c5
+ KORTESTW K6, K5 // c5f898ee
+ KORTESTW K5, K5 // c5f898ed
+ KORW K5, K3, K1 // c5e445cd
+ KORW K4, K3, K1 // c5e445cc
+ KORW K5, K1, K1 // c5f445cd
+ KORW K4, K1, K1 // c5f445cc
+ KORW K5, K3, K5 // c5e445ed
+ KORW K4, K3, K5 // c5e445ec
+ KORW K5, K1, K5 // c5f445ed
+ KORW K4, K1, K5 // c5f445ec
+ KSHIFTLW $81, K6, K6 // c4e3f932f651
+ KSHIFTLW $81, K4, K6 // c4e3f932f451
+ KSHIFTLW $81, K6, K7 // c4e3f932fe51
+ KSHIFTLW $81, K4, K7 // c4e3f932fc51
+ KSHIFTRW $27, K5, K3 // c4e3f930dd1b
+ KSHIFTRW $27, K4, K3 // c4e3f930dc1b
+ KSHIFTRW $27, K5, K1 // c4e3f930cd1b
+ KSHIFTRW $27, K4, K1 // c4e3f930cc1b
+ KUNPCKBW K2, K4, K4 // c5dd4be2
+ KUNPCKBW K7, K4, K4 // c5dd4be7
+ KUNPCKBW K2, K5, K4 // c5d54be2
+ KUNPCKBW K7, K5, K4 // c5d54be7
+ KUNPCKBW K2, K4, K6 // c5dd4bf2
+ KUNPCKBW K7, K4, K6 // c5dd4bf7
+ KUNPCKBW K2, K5, K6 // c5d54bf2
+ KUNPCKBW K7, K5, K6 // c5d54bf7
+ KXNORW K6, K0, K2 // c5fc46d6
+ KXNORW K5, K0, K2 // c5fc46d5
+ KXNORW K6, K5, K2 // c5d446d6
+ KXNORW K5, K5, K2 // c5d446d5
+ KXNORW K6, K0, K7 // c5fc46fe
+ KXNORW K5, K0, K7 // c5fc46fd
+ KXNORW K6, K5, K7 // c5d446fe
+ KXNORW K5, K5, K7 // c5d446fd
+ KXORW K4, K6, K6 // c5cc47f4
+ KXORW K6, K6, K6 // c5cc47f6
+ KXORW K4, K4, K6 // c5dc47f4
+ KXORW K6, K4, K6 // c5dc47f6
+ KXORW K4, K6, K7 // c5cc47fc
+ KXORW K6, K6, K7 // c5cc47fe
+ KXORW K4, K4, K7 // c5dc47fc
+ KXORW K6, K4, K7 // c5dc47fe
+ VADDPD X15, X11, K2, X3 // 62d1a50a58df
+ VADDPD 7(SI)(DI*8), X11, K2, X3 // 62f1a50a589cfe07000000
+ VADDPD -15(R14), X11, K2, X3 // 62d1a50a589ef1ffffff
+ VADDPD Y25, Y31, K2, Y14 // 6211852258f1
+ VADDPD 17(SP), Y31, K2, Y14 // 6271852258b42411000000
+ VADDPD -17(BP)(SI*4), Y31, K2, Y14 // 6271852258b4b5efffffff
+ VADDPD Z13, Z11, K3, Z14 // 6251a54b58f5
+ VADDPD Z14, Z11, K3, Z14 // 6251a54b58f6
+ VADDPD Z13, Z5, K3, Z14 // 6251d54b58f5
+ VADDPD Z14, Z5, K3, Z14 // 6251d54b58f6
+ VADDPD Z13, Z11, K3, Z27 // 6241a54b58dd
+ VADDPD Z14, Z11, K3, Z27 // 6241a54b58de
+ VADDPD Z13, Z5, K3, Z27 // 6241d54b58dd
+ VADDPD Z14, Z5, K3, Z27 // 6241d54b58de
+ VADDPD Z6, Z2, K3, Z5 // 62f1ed4b58ee
+ VADDPD Z14, Z2, K3, Z5 // 62d1ed4b58ee
+ VADDPD 17(SP), Z2, K3, Z5 // 62f1ed4b58ac2411000000
+ VADDPD -17(BP)(SI*4), Z2, K3, Z5 // 62f1ed4b58acb5efffffff
+ VADDPD Z6, Z2, K3, Z23 // 62e1ed4b58fe
+ VADDPD Z14, Z2, K3, Z23 // 62c1ed4b58fe
+ VADDPD 17(SP), Z2, K3, Z23 // 62e1ed4b58bc2411000000
+ VADDPD -17(BP)(SI*4), Z2, K3, Z23 // 62e1ed4b58bcb5efffffff
+ VADDPS X6, X13, K3, X30 // 6261140b58f6
+ VADDPS 7(SI)(DI*1), X13, K3, X30 // 6261140b58b43e07000000
+ VADDPS 15(DX)(BX*8), X13, K3, X30 // 6261140b58b4da0f000000
+ VADDPS Y27, Y22, K2, Y2 // 62914c2258d3
+ VADDPS 7(AX), Y22, K2, Y2 // 62f14c22589007000000
+ VADDPS (DI), Y22, K2, Y2 // 62f14c225817
+ VADDPS Z13, Z28, K1, Z26 // 62411c4158d5
+ VADDPS Z21, Z28, K1, Z26 // 62211c4158d5
+ VADDPS Z13, Z6, K1, Z26 // 62414c4958d5
+ VADDPS Z21, Z6, K1, Z26 // 62214c4958d5
+ VADDPS Z13, Z28, K1, Z14 // 62511c4158f5
+ VADDPS Z21, Z28, K1, Z14 // 62311c4158f5
+ VADDPS Z13, Z6, K1, Z14 // 62514c4958f5
+ VADDPS Z21, Z6, K1, Z14 // 62314c4958f5
+ VADDPS Z21, Z3, K2, Z26 // 6221644a58d5
+ VADDPS Z13, Z3, K2, Z26 // 6241644a58d5
+ VADDPS 7(AX), Z3, K2, Z26 // 6261644a589007000000
+ VADDPS (DI), Z3, K2, Z26 // 6261644a5817
+ VADDPS Z21, Z0, K2, Z26 // 62217c4a58d5
+ VADDPS Z13, Z0, K2, Z26 // 62417c4a58d5
+ VADDPS 7(AX), Z0, K2, Z26 // 62617c4a589007000000
+ VADDPS (DI), Z0, K2, Z26 // 62617c4a5817
+ VADDPS Z21, Z3, K2, Z3 // 62b1644a58dd
+ VADDPS Z13, Z3, K2, Z3 // 62d1644a58dd
+ VADDPS 7(AX), Z3, K2, Z3 // 62f1644a589807000000
+ VADDPS (DI), Z3, K2, Z3 // 62f1644a581f
+ VADDPS Z21, Z0, K2, Z3 // 62b17c4a58dd
+ VADDPS Z13, Z0, K2, Z3 // 62d17c4a58dd
+ VADDPS 7(AX), Z0, K2, Z3 // 62f17c4a589807000000
+ VADDPS (DI), Z0, K2, Z3 // 62f17c4a581f
+ VADDSD X30, X23, K1, X12 // 6211c70158e6
+ VADDSD X2, X20, K7, X8 // 6271df0758c2 or 6271df2758c2 or 6271df4758c2
+ VADDSD 99(R15)(R15*1), X20, K7, X8 // 6211df0758843f63000000 or 6211df2758843f63000000 or 6211df4758843f63000000
+ VADDSD (DX), X20, K7, X8 // 6271df075802 or 6271df275802 or 6271df475802
+ VADDSS X19, X26, K1, X9 // 62312e0158cb
+ VADDSS X16, X31, K1, X0 // 62b1060158c0 or 62b1062158c0 or 62b1064158c0
+ VADDSS 99(R15)(R15*1), X31, K1, X0 // 6291060158843f63000000 or 6291062158843f63000000 or 6291064158843f63000000
+ VADDSS (DX), X31, K1, X0 // 62f106015802 or 62f106215802 or 62f106415802
+ VALIGND $47, X16, X7, K1, X19 // 62a3450903d82f
+ VALIGND $47, (BX), X7, K1, X19 // 62e34509031b2f
+ VALIGND $47, -17(BP)(SI*1), X7, K1, X19 // 62e34509039c35efffffff2f
+ VALIGND $82, Y23, Y9, K7, Y22 // 62a3352f03f752
+ VALIGND $82, -7(DI)(R8*1), Y9, K7, Y22 // 62a3352f03b407f9ffffff52
+ VALIGND $82, (SP), Y9, K7, Y22 // 62e3352f03342452
+ VALIGND $126, Z6, Z9, K2, Z12 // 6273354a03e67e
+ VALIGND $126, Z25, Z9, K2, Z12 // 6213354a03e17e
+ VALIGND $126, -7(DI)(R8*1), Z9, K2, Z12 // 6233354a03a407f9ffffff7e
+ VALIGND $126, (SP), Z9, K2, Z12 // 6273354a0324247e
+ VALIGND $126, Z6, Z12, K2, Z12 // 62731d4a03e67e
+ VALIGND $126, Z25, Z12, K2, Z12 // 62131d4a03e17e
+ VALIGND $126, -7(DI)(R8*1), Z12, K2, Z12 // 62331d4a03a407f9ffffff7e
+ VALIGND $126, (SP), Z12, K2, Z12 // 62731d4a0324247e
+ VALIGND $126, Z6, Z9, K2, Z17 // 62e3354a03ce7e
+ VALIGND $126, Z25, Z9, K2, Z17 // 6283354a03c97e
+ VALIGND $126, -7(DI)(R8*1), Z9, K2, Z17 // 62a3354a038c07f9ffffff7e
+ VALIGND $126, (SP), Z9, K2, Z17 // 62e3354a030c247e
+ VALIGND $126, Z6, Z12, K2, Z17 // 62e31d4a03ce7e
+ VALIGND $126, Z25, Z12, K2, Z17 // 62831d4a03c97e
+ VALIGND $126, -7(DI)(R8*1), Z12, K2, Z17 // 62a31d4a038c07f9ffffff7e
+ VALIGND $126, (SP), Z12, K2, Z17 // 62e31d4a030c247e
+ VALIGNQ $94, X7, X1, K4, X31 // 6263f50c03ff5e
+ VALIGNQ $94, 15(R8)(R14*4), X1, K4, X31 // 6203f50c03bcb00f0000005e
+ VALIGNQ $94, -7(CX)(DX*4), X1, K4, X31 // 6263f50c03bc91f9ffffff5e
+ VALIGNQ $121, Y0, Y5, K1, Y31 // 6263d52903f879
+ VALIGNQ $121, -7(CX), Y5, K1, Y31 // 6263d52903b9f9ffffff79
+ VALIGNQ $121, 15(DX)(BX*4), Y5, K1, Y31 // 6263d52903bc9a0f00000079
+ VALIGNQ $13, Z3, Z8, K3, Z3 // 62f3bd4b03db0d
+ VALIGNQ $13, Z27, Z8, K3, Z3 // 6293bd4b03db0d
+ VALIGNQ $13, -7(CX), Z8, K3, Z3 // 62f3bd4b0399f9ffffff0d
+ VALIGNQ $13, 15(DX)(BX*4), Z8, K3, Z3 // 62f3bd4b039c9a0f0000000d
+ VALIGNQ $13, Z3, Z2, K3, Z3 // 62f3ed4b03db0d
+ VALIGNQ $13, Z27, Z2, K3, Z3 // 6293ed4b03db0d
+ VALIGNQ $13, -7(CX), Z2, K3, Z3 // 62f3ed4b0399f9ffffff0d
+ VALIGNQ $13, 15(DX)(BX*4), Z2, K3, Z3 // 62f3ed4b039c9a0f0000000d
+ VALIGNQ $13, Z3, Z8, K3, Z21 // 62e3bd4b03eb0d
+ VALIGNQ $13, Z27, Z8, K3, Z21 // 6283bd4b03eb0d
+ VALIGNQ $13, -7(CX), Z8, K3, Z21 // 62e3bd4b03a9f9ffffff0d
+ VALIGNQ $13, 15(DX)(BX*4), Z8, K3, Z21 // 62e3bd4b03ac9a0f0000000d
+ VALIGNQ $13, Z3, Z2, K3, Z21 // 62e3ed4b03eb0d
+ VALIGNQ $13, Z27, Z2, K3, Z21 // 6283ed4b03eb0d
+ VALIGNQ $13, -7(CX), Z2, K3, Z21 // 62e3ed4b03a9f9ffffff0d
+ VALIGNQ $13, 15(DX)(BX*4), Z2, K3, Z21 // 62e3ed4b03ac9a0f0000000d
+ VBLENDMPD X28, X13, K3, X23 // 6282950b65fc
+ VBLENDMPD (R14), X13, K3, X23 // 62c2950b653e
+ VBLENDMPD -7(DI)(R8*8), X13, K3, X23 // 62a2950b65bcc7f9ffffff
+ VBLENDMPD Y27, Y13, K4, Y2 // 6292952c65d3
+ VBLENDMPD (R8), Y13, K4, Y2 // 62d2952c6510
+ VBLENDMPD 15(DX)(BX*2), Y13, K4, Y2 // 62f2952c65945a0f000000
+ VBLENDMPD Z18, Z13, K2, Z1 // 62b2954a65ca
+ VBLENDMPD Z8, Z13, K2, Z1 // 62d2954a65c8
+ VBLENDMPD (R8), Z13, K2, Z1 // 62d2954a6508
+ VBLENDMPD 15(DX)(BX*2), Z13, K2, Z1 // 62f2954a658c5a0f000000
+ VBLENDMPD Z18, Z13, K2, Z15 // 6232954a65fa
+ VBLENDMPD Z8, Z13, K2, Z15 // 6252954a65f8
+ VBLENDMPD (R8), Z13, K2, Z15 // 6252954a6538
+ VBLENDMPD 15(DX)(BX*2), Z13, K2, Z15 // 6272954a65bc5a0f000000
+ VBLENDMPS X15, X9, K2, X24 // 6242350a65c7
+ VBLENDMPS 99(R15)(R15*4), X9, K2, X24 // 6202350a6584bf63000000
+ VBLENDMPS 15(DX), X9, K2, X24 // 6262350a65820f000000
+ VBLENDMPS Y20, Y22, K3, Y15 // 62324d2365fc
+ VBLENDMPS 17(SP)(BP*1), Y22, K3, Y15 // 62724d2365bc2c11000000
+ VBLENDMPS -7(CX)(DX*8), Y22, K3, Y15 // 62724d2365bcd1f9ffffff
+ VBLENDMPS Z20, Z2, K3, Z22 // 62a26d4b65f4
+ VBLENDMPS Z9, Z2, K3, Z22 // 62c26d4b65f1
+ VBLENDMPS 17(SP)(BP*1), Z2, K3, Z22 // 62e26d4b65b42c11000000
+ VBLENDMPS -7(CX)(DX*8), Z2, K3, Z22 // 62e26d4b65b4d1f9ffffff
+ VBLENDMPS Z20, Z31, K3, Z22 // 62a2054365f4
+ VBLENDMPS Z9, Z31, K3, Z22 // 62c2054365f1
+ VBLENDMPS 17(SP)(BP*1), Z31, K3, Z22 // 62e2054365b42c11000000
+ VBLENDMPS -7(CX)(DX*8), Z31, K3, Z22 // 62e2054365b4d1f9ffffff
+ VBLENDMPS Z20, Z2, K3, Z7 // 62b26d4b65fc
+ VBLENDMPS Z9, Z2, K3, Z7 // 62d26d4b65f9
+ VBLENDMPS 17(SP)(BP*1), Z2, K3, Z7 // 62f26d4b65bc2c11000000
+ VBLENDMPS -7(CX)(DX*8), Z2, K3, Z7 // 62f26d4b65bcd1f9ffffff
+ VBLENDMPS Z20, Z31, K3, Z7 // 62b2054365fc
+ VBLENDMPS Z9, Z31, K3, Z7 // 62d2054365f9
+ VBLENDMPS 17(SP)(BP*1), Z31, K3, Z7 // 62f2054365bc2c11000000
+ VBLENDMPS -7(CX)(DX*8), Z31, K3, Z7 // 62f2054365bcd1f9ffffff
+ VBROADCASTF32X4 (CX), K1, Y24 // 62627d291a01
+ VBROADCASTF32X4 99(R15), K1, Y24 // 62427d291a8763000000
+ VBROADCASTF32X4 99(R15)(R15*2), K2, Z12 // 62127d4a1aa47f63000000
+ VBROADCASTF32X4 -7(DI), K2, Z12 // 62727d4a1aa7f9ffffff
+ VBROADCASTF32X4 99(R15)(R15*2), K2, Z16 // 62827d4a1a847f63000000
+ VBROADCASTF32X4 -7(DI), K2, Z16 // 62e27d4a1a87f9ffffff
+ VBROADCASTF64X4 15(R8)(R14*1), K1, Z3 // 6292fd491b9c300f000000
+ VBROADCASTF64X4 15(R8)(R14*2), K1, Z3 // 6292fd491b9c700f000000
+ VBROADCASTF64X4 15(R8)(R14*1), K1, Z12 // 6212fd491ba4300f000000
+ VBROADCASTF64X4 15(R8)(R14*2), K1, Z12 // 6212fd491ba4700f000000
+ VBROADCASTI32X4 -17(BP), K4, Y19 // 62e27d2c5a9defffffff
+ VBROADCASTI32X4 -15(R14)(R15*8), K4, Y19 // 62827d2c5a9cfef1ffffff
+ VBROADCASTI32X4 17(SP)(BP*2), K1, Z19 // 62e27d495a9c6c11000000
+ VBROADCASTI32X4 -7(DI)(R8*4), K1, Z19 // 62a27d495a9c87f9ffffff
+ VBROADCASTI32X4 17(SP)(BP*2), K1, Z15 // 62727d495abc6c11000000
+ VBROADCASTI32X4 -7(DI)(R8*4), K1, Z15 // 62327d495abc87f9ffffff
+ VBROADCASTI64X4 99(R15)(R15*4), K7, Z14 // 6212fd4f5bb4bf63000000
+ VBROADCASTI64X4 15(DX), K7, Z14 // 6272fd4f5bb20f000000
+ VBROADCASTI64X4 99(R15)(R15*4), K7, Z15 // 6212fd4f5bbcbf63000000
+ VBROADCASTI64X4 15(DX), K7, Z15 // 6272fd4f5bba0f000000
+ VBROADCASTSD X3, K7, Y19 // 62e2fd2f19db
+ VBROADCASTSD 99(R15)(R15*8), K7, Y19 // 6282fd2f199cff63000000
+ VBROADCASTSD 7(AX)(CX*8), K7, Y19 // 62e2fd2f199cc807000000
+ VBROADCASTSD X7, K6, Z21 // 62e2fd4e19ef
+ VBROADCASTSD (AX), K6, Z21 // 62e2fd4e1928
+ VBROADCASTSD 7(SI), K6, Z21 // 62e2fd4e19ae07000000
+ VBROADCASTSD X7, K6, Z8 // 6272fd4e19c7
+ VBROADCASTSD (AX), K6, Z8 // 6272fd4e1900
+ VBROADCASTSD 7(SI), K6, Z8 // 6272fd4e198607000000
+ VBROADCASTSS X0, K3, X0 // 62f27d0b18c0
+ VBROADCASTSS -17(BP)(SI*8), K3, X0 // 62f27d0b1884f5efffffff
+ VBROADCASTSS (R15), K3, X0 // 62d27d0b1807
+ VBROADCASTSS X24, K7, Y14 // 62127d2f18f0
+ VBROADCASTSS 7(SI)(DI*8), K7, Y14 // 62727d2f18b4fe07000000
+ VBROADCASTSS -15(R14), K7, Y14 // 62527d2f18b6f1ffffff
+ VBROADCASTSS X20, K4, Z16 // 62a27d4c18c4
+ VBROADCASTSS 7(SI)(DI*1), K4, Z16 // 62e27d4c18843e07000000
+ VBROADCASTSS 15(DX)(BX*8), K4, Z16 // 62e27d4c1884da0f000000
+ VBROADCASTSS X20, K4, Z9 // 62327d4c18cc
+ VBROADCASTSS 7(SI)(DI*1), K4, Z9 // 62727d4c188c3e07000000
+ VBROADCASTSS 15(DX)(BX*8), K4, Z9 // 62727d4c188cda0f000000
+ VCMPPD $65, X9, X7, K4, K4 // 62d1c50cc2e141
+ VCMPPD $65, -15(R14)(R15*1), X7, K4, K4 // 6291c50cc2a43ef1ffffff41
+ VCMPPD $65, -15(BX), X7, K4, K4 // 62f1c50cc2a3f1ffffff41
+ VCMPPD $65, X9, X7, K4, K5 // 62d1c50cc2e941
+ VCMPPD $65, -15(R14)(R15*1), X7, K4, K5 // 6291c50cc2ac3ef1ffffff41
+ VCMPPD $65, -15(BX), X7, K4, K5 // 62f1c50cc2abf1ffffff41
+ VCMPPD $67, Y5, Y21, K7, K2 // 62f1d527c2d543
+ VCMPPD $67, (CX), Y21, K7, K2 // 62f1d527c21143
+ VCMPPD $67, 99(R15), Y21, K7, K2 // 62d1d527c2976300000043
+ VCMPPD $67, Y5, Y21, K7, K7 // 62f1d527c2fd43
+ VCMPPD $67, (CX), Y21, K7, K7 // 62f1d527c23943
+ VCMPPD $67, 99(R15), Y21, K7, K7 // 62d1d527c2bf6300000043
+ VCMPPD $127, Z23, Z20, K2, K0 // 62b1dd42c2c77f
+ VCMPPD $127, Z19, Z20, K2, K0 // 62b1dd42c2c37f
+ VCMPPD $127, Z23, Z0, K2, K0 // 62b1fd4ac2c77f
+ VCMPPD $127, Z19, Z0, K2, K0 // 62b1fd4ac2c37f
+ VCMPPD $127, Z23, Z20, K2, K5 // 62b1dd42c2ef7f
+ VCMPPD $127, Z19, Z20, K2, K5 // 62b1dd42c2eb7f
+ VCMPPD $127, Z23, Z0, K2, K5 // 62b1fd4ac2ef7f
+ VCMPPD $127, Z19, Z0, K2, K5 // 62b1fd4ac2eb7f
+ VCMPPD $0, Z0, Z0, K5, K6 // 62f1fd4dc2f000
+ VCMPPD $0, Z25, Z0, K5, K6 // 6291fd4dc2f100
+ VCMPPD $0, -17(BP)(SI*2), Z0, K5, K6 // 62f1fd4dc2b475efffffff00
+ VCMPPD $0, 7(AX)(CX*2), Z0, K5, K6 // 62f1fd4dc2b4480700000000
+ VCMPPD $0, Z0, Z11, K5, K6 // 62f1a54dc2f000
+ VCMPPD $0, Z25, Z11, K5, K6 // 6291a54dc2f100
+ VCMPPD $0, -17(BP)(SI*2), Z11, K5, K6 // 62f1a54dc2b475efffffff00
+ VCMPPD $0, 7(AX)(CX*2), Z11, K5, K6 // 62f1a54dc2b4480700000000
+ VCMPPD $0, Z0, Z0, K5, K5 // 62f1fd4dc2e800
+ VCMPPD $0, Z25, Z0, K5, K5 // 6291fd4dc2e900
+ VCMPPD $0, -17(BP)(SI*2), Z0, K5, K5 // 62f1fd4dc2ac75efffffff00
+ VCMPPD $0, 7(AX)(CX*2), Z0, K5, K5 // 62f1fd4dc2ac480700000000
+ VCMPPD $0, Z0, Z11, K5, K5 // 62f1a54dc2e800
+ VCMPPD $0, Z25, Z11, K5, K5 // 6291a54dc2e900
+ VCMPPD $0, -17(BP)(SI*2), Z11, K5, K5 // 62f1a54dc2ac75efffffff00
+ VCMPPD $0, 7(AX)(CX*2), Z11, K5, K5 // 62f1a54dc2ac480700000000
+ VCMPPS $97, X14, X7, K3, K1 // 62d1440bc2ce61
+ VCMPPS $97, 7(AX)(CX*4), X7, K3, K1 // 62f1440bc28c880700000061
+ VCMPPS $97, 7(AX)(CX*1), X7, K3, K1 // 62f1440bc28c080700000061
+ VCMPPS $97, X14, X7, K3, K5 // 62d1440bc2ee61
+ VCMPPS $97, 7(AX)(CX*4), X7, K3, K5 // 62f1440bc2ac880700000061
+ VCMPPS $97, 7(AX)(CX*1), X7, K3, K5 // 62f1440bc2ac080700000061
+ VCMPPS $81, Y2, Y16, K4, K3 // 62f17c24c2da51
+ VCMPPS $81, 99(R15)(R15*2), Y16, K4, K3 // 62917c24c29c7f6300000051
+ VCMPPS $81, -7(DI), Y16, K4, K3 // 62f17c24c29ff9ffffff51
+ VCMPPS $81, Y2, Y16, K4, K1 // 62f17c24c2ca51
+ VCMPPS $81, 99(R15)(R15*2), Y16, K4, K1 // 62917c24c28c7f6300000051
+ VCMPPS $81, -7(DI), Y16, K4, K1 // 62f17c24c28ff9ffffff51
+ VCMPPS $42, Z0, Z24, K2, K5 // 62f13c42c2e82a
+ VCMPPS $42, Z26, Z24, K2, K5 // 62913c42c2ea2a
+ VCMPPS $42, Z0, Z12, K2, K5 // 62f11c4ac2e82a
+ VCMPPS $42, Z26, Z12, K2, K5 // 62911c4ac2ea2a
+ VCMPPS $42, Z0, Z24, K2, K4 // 62f13c42c2e02a
+ VCMPPS $42, Z26, Z24, K2, K4 // 62913c42c2e22a
+ VCMPPS $42, Z0, Z12, K2, K4 // 62f11c4ac2e02a
+ VCMPPS $42, Z26, Z12, K2, K4 // 62911c4ac2e22a
+ VCMPPS $79, Z9, Z9, K2, K7 // 62d1344ac2f94f
+ VCMPPS $79, Z25, Z9, K2, K7 // 6291344ac2f94f
+ VCMPPS $79, 15(R8)(R14*1), Z9, K2, K7 // 6291344ac2bc300f0000004f
+ VCMPPS $79, 15(R8)(R14*2), Z9, K2, K7 // 6291344ac2bc700f0000004f
+ VCMPPS $79, Z9, Z3, K2, K7 // 62d1644ac2f94f
+ VCMPPS $79, Z25, Z3, K2, K7 // 6291644ac2f94f
+ VCMPPS $79, 15(R8)(R14*1), Z3, K2, K7 // 6291644ac2bc300f0000004f
+ VCMPPS $79, 15(R8)(R14*2), Z3, K2, K7 // 6291644ac2bc700f0000004f
+ VCMPPS $79, Z9, Z9, K2, K6 // 62d1344ac2f14f
+ VCMPPS $79, Z25, Z9, K2, K6 // 6291344ac2f14f
+ VCMPPS $79, 15(R8)(R14*1), Z9, K2, K6 // 6291344ac2b4300f0000004f
+ VCMPPS $79, 15(R8)(R14*2), Z9, K2, K6 // 6291344ac2b4700f0000004f
+ VCMPPS $79, Z9, Z3, K2, K6 // 62d1644ac2f14f
+ VCMPPS $79, Z25, Z3, K2, K6 // 6291644ac2f14f
+ VCMPPS $79, 15(R8)(R14*1), Z3, K2, K6 // 6291644ac2b4300f0000004f
+ VCMPPS $79, 15(R8)(R14*2), Z3, K2, K6 // 6291644ac2b4700f0000004f
+ VCMPSD $64, X31, X5, K3, K4 // 6291d70bc2e740
+ VCMPSD $64, X31, X5, K3, K6 // 6291d70bc2f740
+ VCMPSD $27, X21, X3, K3, K0 // 62b1e70bc2c51b or 62b1e72bc2c51b or 62b1e74bc2c51b
+ VCMPSD $27, (BX), X3, K3, K0 // 62f1e70bc2031b or 62f1e72bc2031b or 62f1e74bc2031b
+ VCMPSD $27, -17(BP)(SI*1), X3, K3, K0 // 62f1e70bc28435efffffff1b or 62f1e72bc28435efffffff1b or 62f1e74bc28435efffffff1b
+ VCMPSD $27, X21, X3, K3, K7 // 62b1e70bc2fd1b or 62b1e72bc2fd1b or 62b1e74bc2fd1b
+ VCMPSD $27, (BX), X3, K3, K7 // 62f1e70bc23b1b or 62f1e72bc23b1b or 62f1e74bc23b1b
+ VCMPSD $27, -17(BP)(SI*1), X3, K3, K7 // 62f1e70bc2bc35efffffff1b or 62f1e72bc2bc35efffffff1b or 62f1e74bc2bc35efffffff1b
+ VCMPSS $47, X11, X1, K3, K5 // 62d1760bc2eb2f
+ VCMPSS $47, X11, X1, K3, K4 // 62d1760bc2e32f
+ VCMPSS $82, X0, X13, K2, K4 // 62f1160ac2e052 or 62f1162ac2e052 or 62f1164ac2e052
+ VCMPSS $82, -7(DI)(R8*1), X13, K2, K4 // 62b1160ac2a407f9ffffff52 or 62b1162ac2a407f9ffffff52 or 62b1164ac2a407f9ffffff52
+ VCMPSS $82, (SP), X13, K2, K4 // 62f1160ac2242452 or 62f1162ac2242452 or 62f1164ac2242452
+ VCMPSS $82, X0, X13, K2, K6 // 62f1160ac2f052 or 62f1162ac2f052 or 62f1164ac2f052
+ VCMPSS $82, -7(DI)(R8*1), X13, K2, K6 // 62b1160ac2b407f9ffffff52 or 62b1162ac2b407f9ffffff52 or 62b1164ac2b407f9ffffff52
+ VCMPSS $82, (SP), X13, K2, K6 // 62f1160ac2342452 or 62f1162ac2342452 or 62f1164ac2342452
+ VCOMISD X16, X30 // 6221fd082ff0
+ VCOMISS X19, X14 // 62317c082ff3
+ VCOMPRESSPD X23, K1, X26 // 6282fd098afa
+ VCOMPRESSPD X23, K1, (SI) // 62e2fd098a3e
+ VCOMPRESSPD X23, K1, 7(SI)(DI*2) // 62e2fd098abc7e07000000
+ VCOMPRESSPD Y20, K2, Y21 // 62a2fd2a8ae5
+ VCOMPRESSPD Y20, K2, -7(CX)(DX*1) // 62e2fd2a8aa411f9ffffff
+ VCOMPRESSPD Y20, K2, -15(R14)(R15*4) // 6282fd2a8aa4bef1ffffff
+ VCOMPRESSPD Z20, K1, Z9 // 62c2fd498ae1
+ VCOMPRESSPD Z0, K1, Z9 // 62d2fd498ac1
+ VCOMPRESSPD Z20, K1, Z28 // 6282fd498ae4
+ VCOMPRESSPD Z0, K1, Z28 // 6292fd498ac4
+ VCOMPRESSPD Z20, K1, (R14) // 62c2fd498a26
+ VCOMPRESSPD Z0, K1, (R14) // 62d2fd498a06
+ VCOMPRESSPD Z20, K1, -7(DI)(R8*8) // 62a2fd498aa4c7f9ffffff
+ VCOMPRESSPD Z0, K1, -7(DI)(R8*8) // 62b2fd498a84c7f9ffffff
+ VCOMPRESSPS X16, K7, X12 // 62c27d0f8ac4
+ VCOMPRESSPS X16, K7, 17(SP)(BP*8) // 62e27d0f8a84ec11000000
+ VCOMPRESSPS X16, K7, 17(SP)(BP*4) // 62e27d0f8a84ac11000000
+ VCOMPRESSPS Y31, K1, Y6 // 62627d298afe
+ VCOMPRESSPS Y31, K1, 15(DX)(BX*1) // 62627d298abc1a0f000000
+ VCOMPRESSPS Y31, K1, -7(CX)(DX*2) // 62627d298abc51f9ffffff
+ VCOMPRESSPS Z17, K1, Z17 // 62a27d498ac9
+ VCOMPRESSPS Z23, K1, Z17 // 62a27d498af9
+ VCOMPRESSPS Z17, K1, Z0 // 62e27d498ac8
+ VCOMPRESSPS Z23, K1, Z0 // 62e27d498af8
+ VCOMPRESSPS Z17, K1, 99(R15)(R15*4) // 62827d498a8cbf63000000
+ VCOMPRESSPS Z23, K1, 99(R15)(R15*4) // 62827d498abcbf63000000
+ VCOMPRESSPS Z17, K1, 15(DX) // 62e27d498a8a0f000000
+ VCOMPRESSPS Z23, K1, 15(DX) // 62e27d498aba0f000000
+ VCVTDQ2PD X23, K1, X23 // 62a17e09e6ff
+ VCVTDQ2PD 7(SI)(DI*4), K1, X23 // 62e17e09e6bcbe07000000
+ VCVTDQ2PD -7(DI)(R8*2), K1, X23 // 62a17e09e6bc47f9ffffff
+ VCVTDQ2PD X11, K7, Y6 // 62d17e2fe6f3
+ VCVTDQ2PD -17(BP), K7, Y6 // 62f17e2fe6b5efffffff
+ VCVTDQ2PD -15(R14)(R15*8), K7, Y6 // 62917e2fe6b4fef1ffffff
+ VCVTDQ2PD Y11, K2, Z31 // 62417e4ae6fb
+ VCVTDQ2PD (CX), K2, Z31 // 62617e4ae639
+ VCVTDQ2PD 99(R15), K2, Z31 // 62417e4ae6bf63000000
+ VCVTDQ2PD Y11, K2, Z0 // 62d17e4ae6c3
+ VCVTDQ2PD (CX), K2, Z0 // 62f17e4ae601
+ VCVTDQ2PD 99(R15), K2, Z0 // 62d17e4ae68763000000
+ VCVTDQ2PS X24, K4, X31 // 62017c0c5bf8
+ VCVTDQ2PS 17(SP), K4, X31 // 62617c0c5bbc2411000000
+ VCVTDQ2PS -17(BP)(SI*4), K4, X31 // 62617c0c5bbcb5efffffff
+ VCVTDQ2PS Y7, K1, Y19 // 62e17c295bdf
+ VCVTDQ2PS 17(SP)(BP*2), K1, Y19 // 62e17c295b9c6c11000000
+ VCVTDQ2PS -7(DI)(R8*4), K1, Y19 // 62a17c295b9c87f9ffffff
+ VCVTDQ2PS Z6, K3, Z21 // 62e17c4b5bee
+ VCVTDQ2PS Z9, K3, Z21 // 62c17c4b5be9
+ VCVTDQ2PS Z6, K3, Z9 // 62717c4b5bce
+ VCVTDQ2PS Z9, K3, Z9 // 62517c4b5bc9
+ VCVTDQ2PS Z20, K4, Z1 // 62b17c4c5bcc
+ VCVTDQ2PS Z9, K4, Z1 // 62d17c4c5bc9
+ VCVTDQ2PS 99(R15)(R15*2), K4, Z1 // 62917c4c5b8c7f63000000
+ VCVTDQ2PS -7(DI), K4, Z1 // 62f17c4c5b8ff9ffffff
+ VCVTDQ2PS Z20, K4, Z9 // 62317c4c5bcc
+ VCVTDQ2PS Z9, K4, Z9 // 62517c4c5bc9
+ VCVTDQ2PS 99(R15)(R15*2), K4, Z9 // 62117c4c5b8c7f63000000
+ VCVTDQ2PS -7(DI), K4, Z9 // 62717c4c5b8ff9ffffff
+ VCVTPD2DQ Z30, K5, Y6 // 6291ff4de6f6
+ VCVTPD2DQ Z5, K5, Y6 // 62f1ff4de6f5
+ VCVTPD2DQ Z26, K7, Y0 // 6291ff4fe6c2
+ VCVTPD2DQ Z22, K7, Y0 // 62b1ff4fe6c6
+ VCVTPD2DQ -7(CX)(DX*1), K7, Y0 // 62f1ff4fe68411f9ffffff
+ VCVTPD2DQ -15(R14)(R15*4), K7, Y0 // 6291ff4fe684bef1ffffff
+ VCVTPD2DQX X0, K7, X14 // 6271ff0fe6f0
+ VCVTPD2DQX 7(AX), K7, X14 // 6271ff0fe6b007000000
+ VCVTPD2DQX (DI), K7, X14 // 6271ff0fe637
+ VCVTPD2DQY Y3, K6, X11 // 6271ff2ee6db
+ VCVTPD2DQY 15(R8), K6, X11 // 6251ff2ee6980f000000
+ VCVTPD2DQY (BP), K6, X11 // 6271ff2ee65d00
+ VCVTPD2PS Z7, K3, Y5 // 62f1fd4b5aef
+ VCVTPD2PS Z21, K3, Y5 // 62b1fd4b5aed
+ VCVTPD2PS Z16, K7, Y20 // 62a1fd4f5ae0
+ VCVTPD2PS Z25, K7, Y20 // 6281fd4f5ae1
+ VCVTPD2PS 15(DX)(BX*1), K7, Y20 // 62e1fd4f5aa41a0f000000
+ VCVTPD2PS -7(CX)(DX*2), K7, Y20 // 62e1fd4f5aa451f9ffffff
+ VCVTPD2PSX X2, K4, X23 // 62e1fd0c5afa
+ VCVTPD2PSX 99(R15)(R15*1), K4, X23 // 6281fd0c5abc3f63000000
+ VCVTPD2PSX (DX), K4, X23 // 62e1fd0c5a3a
+ VCVTPD2PSY Y12, K4, X20 // 62c1fd2c5ae4
+ VCVTPD2PSY 15(R8)(R14*8), K4, X20 // 6281fd2c5aa4f00f000000
+ VCVTPD2PSY -15(R14)(R15*2), K4, X20 // 6281fd2c5aa47ef1ffffff
+ VCVTPD2UDQ Z27, K4, Y28 // 6201fc4c79e3
+ VCVTPD2UDQ Z25, K4, Y28 // 6201fc4c79e1
+ VCVTPD2UDQ Z23, K2, Y7 // 62b1fc4a79ff
+ VCVTPD2UDQ Z9, K2, Y7 // 62d1fc4a79f9
+ VCVTPD2UDQ 17(SP)(BP*2), K2, Y7 // 62f1fc4a79bc6c11000000
+ VCVTPD2UDQ -7(DI)(R8*4), K2, Y7 // 62b1fc4a79bc87f9ffffff
+ VCVTPD2UDQX X9, K2, X0 // 62d1fc0a79c1
+ VCVTPD2UDQX 7(SI)(DI*8), K2, X0 // 62f1fc0a7984fe07000000
+ VCVTPD2UDQX -15(R14), K2, X0 // 62d1fc0a7986f1ffffff
+ VCVTPD2UDQY Y0, K3, X13 // 6271fc2b79e8
+ VCVTPD2UDQY 7(AX)(CX*4), K3, X13 // 6271fc2b79ac8807000000
+ VCVTPD2UDQY 7(AX)(CX*1), K3, X13 // 6271fc2b79ac0807000000
+ VCVTPH2PS X9, K2, Y12 // 62527d2a13e1
+ VCVTPH2PS -7(DI)(R8*1), K2, Y12 // 62327d2a13a407f9ffffff
+ VCVTPH2PS (SP), K2, Y12 // 62727d2a132424
+ VCVTPH2PS X31, K1, X2 // 62927d0913d7
+ VCVTPH2PS (R8), K1, X2 // 62d27d091310
+ VCVTPH2PS 15(DX)(BX*2), K1, X2 // 62f27d0913945a0f000000
+ VCVTPH2PS Y1, K7, Z22 // 62e27d4f13f1
+ VCVTPH2PS Y1, K7, Z25 // 62627d4f13c9
+ VCVTPH2PS Y14, K1, Z1 // 62d27d4913ce
+ VCVTPH2PS 17(SP)(BP*8), K1, Z1 // 62f27d49138cec11000000
+ VCVTPH2PS 17(SP)(BP*4), K1, Z1 // 62f27d49138cac11000000
+ VCVTPH2PS Y14, K1, Z16 // 62c27d4913c6
+ VCVTPH2PS 17(SP)(BP*8), K1, Z16 // 62e27d491384ec11000000
+ VCVTPH2PS 17(SP)(BP*4), K1, Z16 // 62e27d491384ac11000000
+ VCVTPS2DQ X22, K1, X11 // 62317d095bde
+ VCVTPS2DQ -7(CX), K1, X11 // 62717d095b99f9ffffff
+ VCVTPS2DQ 15(DX)(BX*4), K1, X11 // 62717d095b9c9a0f000000
+ VCVTPS2DQ Y7, K1, Y17 // 62e17d295bcf
+ VCVTPS2DQ 7(SI)(DI*4), K1, Y17 // 62e17d295b8cbe07000000
+ VCVTPS2DQ -7(DI)(R8*2), K1, Y17 // 62a17d295b8c47f9ffffff
+ VCVTPS2DQ Z0, K7, Z6 // 62f17d4f5bf0
+ VCVTPS2DQ Z8, K7, Z6 // 62d17d4f5bf0
+ VCVTPS2DQ Z0, K7, Z2 // 62f17d4f5bd0
+ VCVTPS2DQ Z8, K7, Z2 // 62d17d4f5bd0
+ VCVTPS2DQ Z14, K2, Z15 // 62517d4a5bfe
+ VCVTPS2DQ Z27, K2, Z15 // 62117d4a5bfb
+ VCVTPS2DQ 15(R8)(R14*8), K2, Z15 // 62117d4a5bbcf00f000000
+ VCVTPS2DQ -15(R14)(R15*2), K2, Z15 // 62117d4a5bbc7ef1ffffff
+ VCVTPS2DQ Z14, K2, Z12 // 62517d4a5be6
+ VCVTPS2DQ Z27, K2, Z12 // 62117d4a5be3
+ VCVTPS2DQ 15(R8)(R14*8), K2, Z12 // 62117d4a5ba4f00f000000
+ VCVTPS2DQ -15(R14)(R15*2), K2, Z12 // 62117d4a5ba47ef1ffffff
+ VCVTPS2PD X14, K4, X5 // 62d17c0c5aee
+ VCVTPS2PD 99(R15)(R15*8), K4, X5 // 62917c0c5aacff63000000
+ VCVTPS2PD 7(AX)(CX*8), K4, X5 // 62f17c0c5aacc807000000
+ VCVTPS2PD X0, K1, Y9 // 62717c295ac8
+ VCVTPS2PD 17(SP), K1, Y9 // 62717c295a8c2411000000
+ VCVTPS2PD -17(BP)(SI*4), K1, Y9 // 62717c295a8cb5efffffff
+ VCVTPS2PD Y31, K3, Z11 // 62117c4b5adf
+ VCVTPS2PD Y31, K3, Z5 // 62917c4b5aef
+ VCVTPS2PD Y8, K4, Z13 // 62517c4c5ae8
+ VCVTPS2PD -15(R14)(R15*1), K4, Z13 // 62117c4c5aac3ef1ffffff
+ VCVTPS2PD -15(BX), K4, Z13 // 62717c4c5aabf1ffffff
+ VCVTPS2PD Y8, K4, Z14 // 62517c4c5af0
+ VCVTPS2PD -15(R14)(R15*1), K4, Z14 // 62117c4c5ab43ef1ffffff
+ VCVTPS2PD -15(BX), K4, Z14 // 62717c4c5ab3f1ffffff
+ VCVTPS2PH $126, X7, K5, X17 // 62b37d0d1df97e
+ VCVTPS2PH $126, X7, K5, 17(SP)(BP*1) // 62f37d0d1dbc2c110000007e
+ VCVTPS2PH $126, X7, K5, -7(CX)(DX*8) // 62f37d0d1dbcd1f9ffffff7e
+ VCVTPS2PH $94, Y1, K7, X15 // 62d37d2f1dcf5e
+ VCVTPS2PH $94, Y1, K7, (AX) // 62f37d2f1d085e
+ VCVTPS2PH $94, Y1, K7, 7(SI) // 62f37d2f1d8e070000005e
+ VCVTPS2PH $121, Z5, K7, Y28 // 62937d4f1dec79
+ VCVTPS2PH $121, Z23, K7, Y28 // 62837d4f1dfc79
+ VCVTPS2PH $121, Z5, K7, 7(AX) // 62f37d4f1da80700000079
+ VCVTPS2PH $121, Z23, K7, 7(AX) // 62e37d4f1db80700000079
+ VCVTPS2PH $121, Z5, K7, (DI) // 62f37d4f1d2f79
+ VCVTPS2PH $121, Z23, K7, (DI) // 62e37d4f1d3f79
+ VCVTPS2PH $13, Z2, K6, Y13 // 62d37d4e1dd50d
+ VCVTPS2UDQ X27, K7, X8 // 62117c0f79c3
+ VCVTPS2UDQ 15(R8)(R14*4), K7, X8 // 62117c0f7984b00f000000
+ VCVTPS2UDQ -7(CX)(DX*4), K7, X8 // 62717c0f798491f9ffffff
+ VCVTPS2UDQ Y9, K2, Y12 // 62517c2a79e1
+ VCVTPS2UDQ -17(BP)(SI*8), K2, Y12 // 62717c2a79a4f5efffffff
+ VCVTPS2UDQ (R15), K2, Y12 // 62517c2a7927
+ VCVTPS2UDQ Z13, K5, Z28 // 62417c4d79e5
+ VCVTPS2UDQ Z21, K5, Z28 // 62217c4d79e5
+ VCVTPS2UDQ Z13, K5, Z6 // 62d17c4d79f5
+ VCVTPS2UDQ Z21, K5, Z6 // 62b17c4d79f5
+ VCVTPS2UDQ Z3, K3, Z26 // 62617c4b79d3
+ VCVTPS2UDQ Z0, K3, Z26 // 62617c4b79d0
+ VCVTPS2UDQ (SI), K3, Z26 // 62617c4b7916
+ VCVTPS2UDQ 7(SI)(DI*2), K3, Z26 // 62617c4b79947e07000000
+ VCVTPS2UDQ Z3, K3, Z3 // 62f17c4b79db
+ VCVTPS2UDQ Z0, K3, Z3 // 62f17c4b79d8
+ VCVTPS2UDQ (SI), K3, Z3 // 62f17c4b791e
+ VCVTPS2UDQ 7(SI)(DI*2), K3, Z3 // 62f17c4b799c7e07000000
+ VCVTSD2SI X24, R14 // 62117f082df0 or 62117f282df0 or 62117f482df0
+ VCVTSD2SI X24, AX // 62917f082dc0 or 62917f282dc0 or 62917f482dc0
+ VCVTSD2SS X11, X1, K1, X22 // 62c1f7095af3
+ VCVTSD2SS X8, X7, K1, X6 // 62d1c7095af0 or 62d1c7295af0 or 62d1c7495af0
+ VCVTSD2SS (R14), X7, K1, X6 // 62d1c7095a36 or 62d1c7295a36 or 62d1c7495a36
+ VCVTSD2SS -7(DI)(R8*8), X7, K1, X6 // 62b1c7095ab4c7f9ffffff or 62b1c7295ab4c7f9ffffff or 62b1c7495ab4c7f9ffffff
+ VCVTSD2USIL X31, R9 // 62117f0879cf
+ VCVTSD2USIL X31, CX // 62917f0879cf
+ VCVTSD2USIL X3, SP // 62f17f0879e3 or 62f17f2879e3 or 62f17f4879e3
+ VCVTSD2USIL 99(R15)(R15*4), SP // 62917f0879a4bf63000000 or 62917f2879a4bf63000000 or 62917f4879a4bf63000000
+ VCVTSD2USIL 15(DX), SP // 62f17f0879a20f000000 or 62f17f2879a20f000000 or 62f17f4879a20f000000
+ VCVTSD2USIL X3, R14 // 62717f0879f3 or 62717f2879f3 or 62717f4879f3
+ VCVTSD2USIL 99(R15)(R15*4), R14 // 62117f0879b4bf63000000 or 62117f2879b4bf63000000 or 62117f4879b4bf63000000
+ VCVTSD2USIL 15(DX), R14 // 62717f0879b20f000000 or 62717f2879b20f000000 or 62717f4879b20f000000
+ VCVTSD2USIQ X28, R10 // 6211ff0879d4
+ VCVTSD2USIQ X28, CX // 6291ff0879cc
+ VCVTSD2USIQ X20, R9 // 6231ff0879cc or 6231ff2879cc or 6231ff4879cc
+ VCVTSD2USIQ (CX), R9 // 6271ff087909 or 6271ff287909 or 6271ff487909
+ VCVTSD2USIQ 99(R15), R9 // 6251ff08798f63000000 or 6251ff28798f63000000 or 6251ff48798f63000000
+ VCVTSD2USIQ X20, R13 // 6231ff0879ec or 6231ff2879ec or 6231ff4879ec
+ VCVTSD2USIQ (CX), R13 // 6271ff087929 or 6271ff287929 or 6271ff487929
+ VCVTSD2USIQ 99(R15), R13 // 6251ff0879af63000000 or 6251ff2879af63000000 or 6251ff4879af63000000
+ VCVTSI2SDL AX, X7, X24 // 626147082ac0 or 626147282ac0 or 626147482ac0
+ VCVTSI2SDL R9, X7, X24 // 624147082ac1 or 624147282ac1 or 624147482ac1
+ VCVTSI2SDL 99(R15)(R15*8), X7, X24 // 620147082a84ff63000000 or 620147282a84ff63000000 or 620147482a84ff63000000
+ VCVTSI2SDL 7(AX)(CX*8), X7, X24 // 626147082a84c807000000 or 626147282a84c807000000 or 626147482a84c807000000
+ VCVTSI2SDQ DX, X16, X20 // 62e1ff002ae2 or 62e1ff202ae2 or 62e1ff402ae2
+ VCVTSI2SDQ BP, X16, X20 // 62e1ff002ae5 or 62e1ff202ae5 or 62e1ff402ae5
+ VCVTSI2SDQ 99(R15)(R15*2), X16, X20 // 6281ff002aa47f63000000 or 6281ff202aa47f63000000 or 6281ff402aa47f63000000
+ VCVTSI2SDQ -7(DI), X16, X20 // 62e1ff002aa7f9ffffff or 62e1ff202aa7f9ffffff or 62e1ff402aa7f9ffffff
+ VCVTSI2SSL CX, X28, X17 // 62e11e002ac9 or 62e11e202ac9 or 62e11e402ac9
+ VCVTSI2SSL SP, X28, X17 // 62e11e002acc or 62e11e202acc or 62e11e402acc
+ VCVTSI2SSL (AX), X28, X17 // 62e11e002a08 or 62e11e202a08 or 62e11e402a08
+ VCVTSI2SSL 7(SI), X28, X17 // 62e11e002a8e07000000 or 62e11e202a8e07000000 or 62e11e402a8e07000000
+ VCVTSS2SD X6, X16, K7, X11 // 62717e075ade
+ VCVTSS2SD X12, X22, K2, X6 // 62d14e025af4 or 62d14e225af4 or 62d14e425af4
+ VCVTSS2SD (BX), X22, K2, X6 // 62f14e025a33 or 62f14e225a33 or 62f14e425a33
+ VCVTSS2SD -17(BP)(SI*1), X22, K2, X6 // 62f14e025ab435efffffff or 62f14e225ab435efffffff or 62f14e425ab435efffffff
+ VCVTSS2SI X16, R9 // 62317e082dc8
+ VCVTSS2SI X16, CX // 62b17e082dc8
+ VCVTSS2SI X28, SP // 62917e082de4 or 62917e282de4 or 62917e482de4
+ VCVTSS2SI X28, R14 // 62117e082df4 or 62117e282df4 or 62117e482df4
+ VCVTSS2USIL X11, AX // 62d17e0879c3
+ VCVTSS2USIL X11, R9 // 62517e0879cb
+ VCVTSS2USIL X1, CX // 62f17e0879c9 or 62f17e2879c9 or 62f17e4879c9
+ VCVTSS2USIL 17(SP)(BP*1), CX // 62f17e08798c2c11000000 or 62f17e28798c2c11000000 or 62f17e48798c2c11000000
+ VCVTSS2USIL -7(CX)(DX*8), CX // 62f17e08798cd1f9ffffff or 62f17e28798cd1f9ffffff or 62f17e48798cd1f9ffffff
+ VCVTSS2USIL X1, SP // 62f17e0879e1 or 62f17e2879e1 or 62f17e4879e1
+ VCVTSS2USIL 17(SP)(BP*1), SP // 62f17e0879a42c11000000 or 62f17e2879a42c11000000 or 62f17e4879a42c11000000
+ VCVTSS2USIL -7(CX)(DX*8), SP // 62f17e0879a4d1f9ffffff or 62f17e2879a4d1f9ffffff or 62f17e4879a4d1f9ffffff
+ VCVTSS2USIQ X19, DX // 62b1fe0879d3
+ VCVTSS2USIQ X19, BP // 62b1fe0879eb
+ VCVTSS2USIQ X13, R10 // 6251fe0879d5 or 6251fe2879d5 or 6251fe4879d5
+ VCVTSS2USIQ -17(BP)(SI*2), R10 // 6271fe08799475efffffff or 6271fe28799475efffffff or 6271fe48799475efffffff
+ VCVTSS2USIQ 7(AX)(CX*2), R10 // 6271fe0879944807000000 or 6271fe2879944807000000 or 6271fe4879944807000000
+ VCVTSS2USIQ X13, CX // 62d1fe0879cd or 62d1fe2879cd or 62d1fe4879cd
+ VCVTSS2USIQ -17(BP)(SI*2), CX // 62f1fe08798c75efffffff or 62f1fe28798c75efffffff or 62f1fe48798c75efffffff
+ VCVTSS2USIQ 7(AX)(CX*2), CX // 62f1fe08798c4807000000 or 62f1fe28798c4807000000 or 62f1fe48798c4807000000
+ VCVTTPD2DQ Z16, K4, Y30 // 6221fd4ce6f0
+ VCVTTPD2DQ Z13, K4, Y30 // 6241fd4ce6f5
+ VCVTTPD2DQ Z12, K1, Y26 // 6241fd49e6d4
+ VCVTTPD2DQ Z27, K1, Y26 // 6201fd49e6d3
+ VCVTTPD2DQ 7(AX), K1, Y26 // 6261fd49e69007000000
+ VCVTTPD2DQ (DI), K1, Y26 // 6261fd49e617
+ VCVTTPD2DQX X14, K3, X2 // 62d1fd0be6d6
+ VCVTTPD2DQX 15(R8)(R14*1), K3, X2 // 6291fd0be694300f000000
+ VCVTTPD2DQX 15(R8)(R14*2), K3, X2 // 6291fd0be694700f000000
+ VCVTTPD2DQY Y7, K4, X0 // 62f1fd2ce6c7
+ VCVTTPD2DQY -7(CX), K4, X0 // 62f1fd2ce681f9ffffff
+ VCVTTPD2DQY 15(DX)(BX*4), K4, X0 // 62f1fd2ce6849a0f000000
+ VCVTTPD2UDQ Z9, K3, Y30 // 6241fc4b78f1
+ VCVTTPD2UDQ Z12, K3, Y30 // 6241fc4b78f4
+ VCVTTPD2UDQ Z6, K7, Y31 // 6261fc4f78fe
+ VCVTTPD2UDQ Z25, K7, Y31 // 6201fc4f78f9
+ VCVTTPD2UDQ -17(BP)(SI*8), K7, Y31 // 6261fc4f78bcf5efffffff
+ VCVTTPD2UDQ (R15), K7, Y31 // 6241fc4f783f
+ VCVTTPD2UDQX X17, K4, X11 // 6231fc0c78d9
+ VCVTTPD2UDQX 99(R15)(R15*4), K4, X11 // 6211fc0c789cbf63000000
+ VCVTTPD2UDQX 15(DX), K4, X11 // 6271fc0c789a0f000000
+ VCVTTPD2UDQY Y22, K4, X18 // 62a1fc2c78d6
+ VCVTTPD2UDQY (AX), K4, X18 // 62e1fc2c7810
+ VCVTTPD2UDQY 7(SI), K4, X18 // 62e1fc2c789607000000
+ VCVTTPS2DQ X24, K4, X2 // 62917e0c5bd0
+ VCVTTPS2DQ 99(R15)(R15*2), K4, X2 // 62917e0c5b947f63000000
+ VCVTTPS2DQ -7(DI), K4, X2 // 62f17e0c5b97f9ffffff
+ VCVTTPS2DQ Y0, K2, Y7 // 62f17e2a5bf8
+ VCVTTPS2DQ 15(R8)(R14*4), K2, Y7 // 62917e2a5bbcb00f000000
+ VCVTTPS2DQ -7(CX)(DX*4), K2, Y7 // 62f17e2a5bbc91f9ffffff
+ VCVTTPS2DQ Z20, K2, Z0 // 62b17e4a5bc4
+ VCVTTPS2DQ Z28, K2, Z0 // 62917e4a5bc4
+ VCVTTPS2DQ Z20, K2, Z6 // 62b17e4a5bf4
+ VCVTTPS2DQ Z28, K2, Z6 // 62917e4a5bf4
+ VCVTTPS2DQ Z9, K3, Z3 // 62d17e4b5bd9
+ VCVTTPS2DQ Z19, K3, Z3 // 62b17e4b5bdb
+ VCVTTPS2DQ 7(SI)(DI*1), K3, Z3 // 62f17e4b5b9c3e07000000
+ VCVTTPS2DQ 15(DX)(BX*8), K3, Z3 // 62f17e4b5b9cda0f000000
+ VCVTTPS2DQ Z9, K3, Z30 // 62417e4b5bf1
+ VCVTTPS2DQ Z19, K3, Z30 // 62217e4b5bf3
+ VCVTTPS2DQ 7(SI)(DI*1), K3, Z30 // 62617e4b5bb43e07000000
+ VCVTTPS2DQ 15(DX)(BX*8), K3, Z30 // 62617e4b5bb4da0f000000
+ VCVTTPS2UDQ X22, K2, X26 // 62217c0a78d6
+ VCVTTPS2UDQ 15(DX)(BX*1), K2, X26 // 62617c0a78941a0f000000
+ VCVTTPS2UDQ -7(CX)(DX*2), K2, X26 // 62617c0a789451f9ffffff
+ VCVTTPS2UDQ Y13, K1, Y24 // 62417c2978c5
+ VCVTTPS2UDQ 17(SP)(BP*1), K1, Y24 // 62617c2978842c11000000
+ VCVTTPS2UDQ -7(CX)(DX*8), K1, Y24 // 62617c297884d1f9ffffff
+ VCVTTPS2UDQ Z2, K7, Z18 // 62e17c4f78d2
+ VCVTTPS2UDQ Z21, K7, Z18 // 62a17c4f78d5
+ VCVTTPS2UDQ Z2, K7, Z24 // 62617c4f78c2
+ VCVTTPS2UDQ Z21, K7, Z24 // 62217c4f78c5
+ VCVTTPS2UDQ Z6, K1, Z7 // 62f17c4978fe
+ VCVTTPS2UDQ Z16, K1, Z7 // 62b17c4978f8
+ VCVTTPS2UDQ -7(CX), K1, Z7 // 62f17c4978b9f9ffffff
+ VCVTTPS2UDQ 15(DX)(BX*4), K1, Z7 // 62f17c4978bc9a0f000000
+ VCVTTPS2UDQ Z6, K1, Z13 // 62717c4978ee
+ VCVTTPS2UDQ Z16, K1, Z13 // 62317c4978e8
+ VCVTTPS2UDQ -7(CX), K1, Z13 // 62717c4978a9f9ffffff
+ VCVTTPS2UDQ 15(DX)(BX*4), K1, Z13 // 62717c4978ac9a0f000000
+ VCVTTSD2SI X30, R9 // 62117f082cce or 62117f282cce or 62117f482cce
+ VCVTTSD2SI X30, CX // 62917f082cce or 62917f282cce or 62917f482cce
+ VCVTTSD2USIL X12, SP // 62d17f0878e4
+ VCVTTSD2USIL X12, R14 // 62517f0878f4
+ VCVTTSD2USIL X23, AX // 62b17f0878c7 or 62b17f2878c7 or 62b17f4878c7
+ VCVTTSD2USIL 17(SP)(BP*2), AX // 62f17f0878846c11000000 or 62f17f2878846c11000000 or 62f17f4878846c11000000
+ VCVTTSD2USIL -7(DI)(R8*4), AX // 62b17f08788487f9ffffff or 62b17f28788487f9ffffff or 62b17f48788487f9ffffff
+ VCVTTSD2USIL X23, R9 // 62317f0878cf or 62317f2878cf or 62317f4878cf
+ VCVTTSD2USIL 17(SP)(BP*2), R9 // 62717f08788c6c11000000 or 62717f28788c6c11000000 or 62717f48788c6c11000000
+ VCVTTSD2USIL -7(DI)(R8*4), R9 // 62317f08788c87f9ffffff or 62317f28788c87f9ffffff or 62317f48788c87f9ffffff
+ VCVTTSD2USIQ X30, R10 // 6211ff0878d6
+ VCVTTSD2USIQ X30, CX // 6291ff0878ce
+ VCVTTSD2USIQ X8, R9 // 6251ff0878c8 or 6251ff2878c8 or 6251ff4878c8
+ VCVTTSD2USIQ 15(R8), R9 // 6251ff0878880f000000 or 6251ff2878880f000000 or 6251ff4878880f000000
+ VCVTTSD2USIQ (BP), R9 // 6271ff08784d00 or 6271ff28784d00 or 6271ff48784d00
+ VCVTTSD2USIQ X8, R13 // 6251ff0878e8 or 6251ff2878e8 or 6251ff4878e8
+ VCVTTSD2USIQ 15(R8), R13 // 6251ff0878a80f000000 or 6251ff2878a80f000000 or 6251ff4878a80f000000
+ VCVTTSD2USIQ (BP), R13 // 6271ff08786d00 or 6271ff28786d00 or 6271ff48786d00
+ VCVTTSS2SI X20, CX // 62b17e082ccc
+ VCVTTSS2SI X20, SP // 62b17e082ce4
+ VCVTTSS2SIQ X26, R10 // 6211fe082cd2 or 6211fe282cd2 or 6211fe482cd2
+ VCVTTSS2SIQ X26, CX // 6291fe082cca or 6291fe282cca or 6291fe482cca
+ VCVTTSS2USIL X19, R9 // 62317e0878cb
+ VCVTTSS2USIL X19, CX // 62b17e0878cb
+ VCVTTSS2USIL X0, SP // 62f17e0878e0 or 62f17e2878e0 or 62f17e4878e0
+ VCVTTSS2USIL 99(R15)(R15*4), SP // 62917e0878a4bf63000000 or 62917e2878a4bf63000000 or 62917e4878a4bf63000000
+ VCVTTSS2USIL 15(DX), SP // 62f17e0878a20f000000 or 62f17e2878a20f000000 or 62f17e4878a20f000000
+ VCVTTSS2USIL X0, R14 // 62717e0878f0 or 62717e2878f0 or 62717e4878f0
+ VCVTTSS2USIL 99(R15)(R15*4), R14 // 62117e0878b4bf63000000 or 62117e2878b4bf63000000 or 62117e4878b4bf63000000
+ VCVTTSS2USIL 15(DX), R14 // 62717e0878b20f000000 or 62717e2878b20f000000 or 62717e4878b20f000000
+ VCVTTSS2USIQ X31, R9 // 6211fe0878cf
+ VCVTTSS2USIQ X31, R13 // 6211fe0878ef
+ VCVTTSS2USIQ X16, DX // 62b1fe0878d0 or 62b1fe2878d0 or 62b1fe4878d0
+ VCVTTSS2USIQ (CX), DX // 62f1fe087811 or 62f1fe287811 or 62f1fe487811
+ VCVTTSS2USIQ 99(R15), DX // 62d1fe08789763000000 or 62d1fe28789763000000 or 62d1fe48789763000000
+ VCVTTSS2USIQ X16, BP // 62b1fe0878e8 or 62b1fe2878e8 or 62b1fe4878e8
+ VCVTTSS2USIQ (CX), BP // 62f1fe087829 or 62f1fe287829 or 62f1fe487829
+ VCVTTSS2USIQ 99(R15), BP // 62d1fe0878af63000000 or 62d1fe2878af63000000 or 62d1fe4878af63000000
+ VCVTUDQ2PD X8, K4, X7 // 62d17e0c7af8
+ VCVTUDQ2PD 17(SP)(BP*2), K4, X7 // 62f17e0c7abc6c11000000
+ VCVTUDQ2PD -7(DI)(R8*4), K4, X7 // 62b17e0c7abc87f9ffffff
+ VCVTUDQ2PD X1, K1, Y1 // 62f17e297ac9
+ VCVTUDQ2PD 15(R8)(R14*1), K1, Y1 // 62917e297a8c300f000000
+ VCVTUDQ2PD 15(R8)(R14*2), K1, Y1 // 62917e297a8c700f000000
+ VCVTUDQ2PD Y26, K3, Z13 // 62117e4b7aea
+ VCVTUDQ2PD (AX), K3, Z13 // 62717e4b7a28
+ VCVTUDQ2PD 7(SI), K3, Z13 // 62717e4b7aae07000000
+ VCVTUDQ2PS X15, K4, X0 // 62d17f0c7ac7
+ VCVTUDQ2PS 15(R8), K4, X0 // 62d17f0c7a800f000000
+ VCVTUDQ2PS (BP), K4, X0 // 62f17f0c7a4500
+ VCVTUDQ2PS Y12, K5, Y30 // 62417f2d7af4
+ VCVTUDQ2PS (R14), K5, Y30 // 62417f2d7a36
+ VCVTUDQ2PS -7(DI)(R8*8), K5, Y30 // 62217f2d7ab4c7f9ffffff
+ VCVTUDQ2PS Z22, K7, Z18 // 62a17f4f7ad6
+ VCVTUDQ2PS Z7, K7, Z18 // 62e17f4f7ad7
+ VCVTUDQ2PS Z22, K7, Z8 // 62317f4f7ac6
+ VCVTUDQ2PS Z7, K7, Z8 // 62717f4f7ac7
+ VCVTUDQ2PS Z20, K7, Z2 // 62b17f4f7ad4
+ VCVTUDQ2PS Z9, K7, Z2 // 62d17f4f7ad1
+ VCVTUDQ2PS (BX), K7, Z2 // 62f17f4f7a13
+ VCVTUDQ2PS -17(BP)(SI*1), K7, Z2 // 62f17f4f7a9435efffffff
+ VCVTUDQ2PS Z20, K7, Z31 // 62217f4f7afc
+ VCVTUDQ2PS Z9, K7, Z31 // 62417f4f7af9
+ VCVTUDQ2PS (BX), K7, Z31 // 62617f4f7a3b
+ VCVTUDQ2PS -17(BP)(SI*1), K7, Z31 // 62617f4f7abc35efffffff
+ VCVTUSI2SDL AX, X7, X22 // 62e147087bf0 or 62e147287bf0 or 62e147487bf0
+ VCVTUSI2SDL R9, X7, X22 // 62c147087bf1 or 62c147287bf1 or 62c147487bf1
+ VCVTUSI2SDL 99(R15)(R15*2), X7, X22 // 628147087bb47f63000000 or 628147287bb47f63000000 or 628147487bb47f63000000
+ VCVTUSI2SDL -7(DI), X7, X22 // 62e147087bb7f9ffffff or 62e147287bb7f9ffffff or 62e147487bb7f9ffffff
+ VCVTUSI2SDQ R10, X7, X19 // 62c1c7087bda or 62c1c7287bda or 62c1c7487bda
+ VCVTUSI2SDQ CX, X7, X19 // 62e1c7087bd9 or 62e1c7287bd9 or 62e1c7487bd9
+ VCVTUSI2SDQ 15(R8)(R14*8), X7, X19 // 6281c7087b9cf00f000000 or 6281c7287b9cf00f000000 or 6281c7487b9cf00f000000
+ VCVTUSI2SDQ -15(R14)(R15*2), X7, X19 // 6281c7087b9c7ef1ffffff or 6281c7287b9c7ef1ffffff or 6281c7487b9c7ef1ffffff
+ VCVTUSI2SDQ R9, X31, X16 // 62c187007bc1
+ VCVTUSI2SDQ R13, X31, X16 // 62c187007bc5
+ VCVTUSI2SSL CX, X7, X1 // 62f146087bc9 or 62f146287bc9 or 62f146487bc9
+ VCVTUSI2SSL SP, X7, X1 // 62f146087bcc or 62f146287bcc or 62f146487bcc
+ VCVTUSI2SSL -7(CX)(DX*1), X7, X1 // 62f146087b8c11f9ffffff or 62f146287b8c11f9ffffff or 62f146487b8c11f9ffffff
+ VCVTUSI2SSL -15(R14)(R15*4), X7, X1 // 629146087b8cbef1ffffff or 629146287b8cbef1ffffff or 629146487b8cbef1ffffff
+ VCVTUSI2SSL R14, X15, X9 // 625106087bce
+ VCVTUSI2SSL AX, X15, X9 // 627106087bc8
+ VCVTUSI2SSQ DX, X0, X12 // 6271fe087be2 or 6271fe287be2 or 6271fe487be2
+ VCVTUSI2SSQ BP, X0, X12 // 6271fe087be5 or 6271fe287be5 or 6271fe487be5
+ VCVTUSI2SSQ -15(R14)(R15*1), X0, X12 // 6211fe087ba43ef1ffffff or 6211fe287ba43ef1ffffff or 6211fe487ba43ef1ffffff
+ VCVTUSI2SSQ -15(BX), X0, X12 // 6271fe087ba3f1ffffff or 6271fe287ba3f1ffffff or 6271fe487ba3f1ffffff
+ VCVTUSI2SSQ R10, X14, X12 // 62518e087be2
+ VCVTUSI2SSQ CX, X14, X12 // 62718e087be1
+ VDIVPD X26, X3, K2, X8 // 6211e50a5ec2
+ VDIVPD (SI), X3, K2, X8 // 6271e50a5e06
+ VDIVPD 7(SI)(DI*2), X3, K2, X8 // 6271e50a5e847e07000000
+ VDIVPD Y7, Y21, K3, Y13 // 6271d5235eef
+ VDIVPD -7(CX)(DX*1), Y21, K3, Y13 // 6271d5235eac11f9ffffff
+ VDIVPD -15(R14)(R15*4), Y21, K3, Y13 // 6211d5235eacbef1ffffff
+ VDIVPD Z16, Z21, K3, Z14 // 6231d5435ef0
+ VDIVPD Z9, Z21, K3, Z14 // 6251d5435ef1
+ VDIVPD Z16, Z8, K3, Z14 // 6231bd4b5ef0
+ VDIVPD Z9, Z8, K3, Z14 // 6251bd4b5ef1
+ VDIVPD Z16, Z21, K3, Z15 // 6231d5435ef8
+ VDIVPD Z9, Z21, K3, Z15 // 6251d5435ef9
+ VDIVPD Z16, Z8, K3, Z15 // 6231bd4b5ef8
+ VDIVPD Z9, Z8, K3, Z15 // 6251bd4b5ef9
+ VDIVPD Z0, Z23, K3, Z20 // 62e1c5435ee0
+ VDIVPD Z11, Z23, K3, Z20 // 62c1c5435ee3
+ VDIVPD -17(BP)(SI*2), Z23, K3, Z20 // 62e1c5435ea475efffffff
+ VDIVPD 7(AX)(CX*2), Z23, K3, Z20 // 62e1c5435ea44807000000
+ VDIVPD Z0, Z19, K3, Z20 // 62e1e5435ee0
+ VDIVPD Z11, Z19, K3, Z20 // 62c1e5435ee3
+ VDIVPD -17(BP)(SI*2), Z19, K3, Z20 // 62e1e5435ea475efffffff
+ VDIVPD 7(AX)(CX*2), Z19, K3, Z20 // 62e1e5435ea44807000000
+ VDIVPD Z0, Z23, K3, Z0 // 62f1c5435ec0
+ VDIVPD Z11, Z23, K3, Z0 // 62d1c5435ec3
+ VDIVPD -17(BP)(SI*2), Z23, K3, Z0 // 62f1c5435e8475efffffff
+ VDIVPD 7(AX)(CX*2), Z23, K3, Z0 // 62f1c5435e844807000000
+ VDIVPD Z0, Z19, K3, Z0 // 62f1e5435ec0
+ VDIVPD Z11, Z19, K3, Z0 // 62d1e5435ec3
+ VDIVPD -17(BP)(SI*2), Z19, K3, Z0 // 62f1e5435e8475efffffff
+ VDIVPD 7(AX)(CX*2), Z19, K3, Z0 // 62f1e5435e844807000000
+ VDIVPS X28, X13, K2, X23 // 6281140a5efc
+ VDIVPS 17(SP)(BP*8), X13, K2, X23 // 62e1140a5ebcec11000000
+ VDIVPS 17(SP)(BP*4), X13, K2, X23 // 62e1140a5ebcac11000000
+ VDIVPS Y18, Y13, K1, Y30 // 622114295ef2
+ VDIVPS 15(DX)(BX*1), Y13, K1, Y30 // 626114295eb41a0f000000
+ VDIVPS -7(CX)(DX*2), Y13, K1, Y30 // 626114295eb451f9ffffff
+ VDIVPS Z0, Z24, K2, Z0 // 62f13c425ec0
+ VDIVPS Z26, Z24, K2, Z0 // 62913c425ec2
+ VDIVPS Z0, Z12, K2, Z0 // 62f11c4a5ec0
+ VDIVPS Z26, Z12, K2, Z0 // 62911c4a5ec2
+ VDIVPS Z0, Z24, K2, Z25 // 62613c425ec8
+ VDIVPS Z26, Z24, K2, Z25 // 62013c425eca
+ VDIVPS Z0, Z12, K2, Z25 // 62611c4a5ec8
+ VDIVPS Z26, Z12, K2, Z25 // 62011c4a5eca
+ VDIVPS Z9, Z9, K1, Z9 // 625134495ec9
+ VDIVPS Z28, Z9, K1, Z9 // 621134495ecc
+ VDIVPS 15(R8)(R14*1), Z9, K1, Z9 // 621134495e8c300f000000
+ VDIVPS 15(R8)(R14*2), Z9, K1, Z9 // 621134495e8c700f000000
+ VDIVPS Z9, Z25, K1, Z9 // 625134415ec9
+ VDIVPS Z28, Z25, K1, Z9 // 621134415ecc
+ VDIVPS 15(R8)(R14*1), Z25, K1, Z9 // 621134415e8c300f000000
+ VDIVPS 15(R8)(R14*2), Z25, K1, Z9 // 621134415e8c700f000000
+ VDIVPS Z9, Z9, K1, Z3 // 62d134495ed9
+ VDIVPS Z28, Z9, K1, Z3 // 629134495edc
+ VDIVPS 15(R8)(R14*1), Z9, K1, Z3 // 629134495e9c300f000000
+ VDIVPS 15(R8)(R14*2), Z9, K1, Z3 // 629134495e9c700f000000
+ VDIVPS Z9, Z25, K1, Z3 // 62d134415ed9
+ VDIVPS Z28, Z25, K1, Z3 // 629134415edc
+ VDIVPS 15(R8)(R14*1), Z25, K1, Z3 // 629134415e9c300f000000
+ VDIVPS 15(R8)(R14*2), Z25, K1, Z3 // 629134415e9c700f000000
+ VDIVSD X15, X9, K7, X24 // 6241b70f5ec7
+ VDIVSD X21, X18, K1, X26 // 6221ef015ed5 or 6221ef215ed5 or 6221ef415ed5
+ VDIVSD 7(AX)(CX*4), X18, K1, X26 // 6261ef015e948807000000 or 6261ef215e948807000000 or 6261ef415e948807000000
+ VDIVSD 7(AX)(CX*1), X18, K1, X26 // 6261ef015e940807000000 or 6261ef215e940807000000 or 6261ef415e940807000000
+ VDIVSS X31, X11, K1, X1 // 629126095ecf
+ VDIVSS X0, X7, K1, X3 // 62f146095ed8 or 62f146295ed8 or 62f146495ed8
+ VDIVSS 15(DX)(BX*1), X7, K1, X3 // 62f146095e9c1a0f000000 or 62f146295e9c1a0f000000 or 62f146495e9c1a0f000000
+ VDIVSS -7(CX)(DX*2), X7, K1, X3 // 62f146095e9c51f9ffffff or 62f146295e9c51f9ffffff or 62f146495e9c51f9ffffff
+ VEXPANDPD X24, K3, X0 // 6292fd0b88c0
+ VEXPANDPD 7(SI)(DI*4), K3, X0 // 62f2fd0b8884be07000000
+ VEXPANDPD -7(DI)(R8*2), K3, X0 // 62b2fd0b888447f9ffffff
+ VEXPANDPD Y8, K4, Y24 // 6242fd2c88c0
+ VEXPANDPD -17(BP), K4, Y24 // 6262fd2c8885efffffff
+ VEXPANDPD -15(R14)(R15*8), K4, Y24 // 6202fd2c8884fef1ffffff
+ VEXPANDPD Z26, K5, Z30 // 6202fd4d88f2
+ VEXPANDPD Z22, K5, Z30 // 6222fd4d88f6
+ VEXPANDPD (CX), K5, Z30 // 6262fd4d8831
+ VEXPANDPD 99(R15), K5, Z30 // 6242fd4d88b763000000
+ VEXPANDPD Z26, K5, Z5 // 6292fd4d88ea
+ VEXPANDPD Z22, K5, Z5 // 62b2fd4d88ee
+ VEXPANDPD (CX), K5, Z5 // 62f2fd4d8829
+ VEXPANDPD 99(R15), K5, Z5 // 62d2fd4d88af63000000
+ VEXPANDPS X7, K7, X20 // 62e27d0f88e7
+ VEXPANDPS 17(SP), K7, X20 // 62e27d0f88a42411000000
+ VEXPANDPS -17(BP)(SI*4), K7, X20 // 62e27d0f88a4b5efffffff
+ VEXPANDPS Y24, K7, Y11 // 62127d2f88d8
+ VEXPANDPS 17(SP)(BP*2), K7, Y11 // 62727d2f889c6c11000000
+ VEXPANDPS -7(DI)(R8*4), K7, Y11 // 62327d2f889c87f9ffffff
+ VEXPANDPS Z16, K6, Z7 // 62b27d4e88f8
+ VEXPANDPS Z25, K6, Z7 // 62927d4e88f9
+ VEXPANDPS 99(R15)(R15*2), K6, Z7 // 62927d4e88bc7f63000000
+ VEXPANDPS -7(DI), K6, Z7 // 62f27d4e88bff9ffffff
+ VEXPANDPS Z16, K6, Z21 // 62a27d4e88e8
+ VEXPANDPS Z25, K6, Z21 // 62827d4e88e9
+ VEXPANDPS 99(R15)(R15*2), K6, Z21 // 62827d4e88ac7f63000000
+ VEXPANDPS -7(DI), K6, Z21 // 62e27d4e88aff9ffffff
+ VEXTRACTF32X4 $1, Y5, K3, X9 // 62d37d2b19e901
+ VEXTRACTF32X4 $1, Y5, K3, 7(AX) // 62f37d2b19a80700000001
+ VEXTRACTF32X4 $1, Y5, K3, (DI) // 62f37d2b192f01
+ VEXTRACTF32X4 $3, Z14, K7, X7 // 62737d4f19f703
+ VEXTRACTF32X4 $3, Z13, K7, X7 // 62737d4f19ef03
+ VEXTRACTF32X4 $3, Z14, K7, 99(R15)(R15*1) // 62137d4f19b43f6300000003
+ VEXTRACTF32X4 $3, Z13, K7, 99(R15)(R15*1) // 62137d4f19ac3f6300000003
+ VEXTRACTF32X4 $3, Z14, K7, (DX) // 62737d4f193203
+ VEXTRACTF32X4 $3, Z13, K7, (DX) // 62737d4f192a03
+ VEXTRACTF64X4 $0, Z2, K2, Y16 // 62b3fd4a1bd000
+ VEXTRACTF64X4 $0, Z7, K2, Y16 // 62b3fd4a1bf800
+ VEXTRACTF64X4 $0, Z2, K2, 15(R8)(R14*8) // 6293fd4a1b94f00f00000000
+ VEXTRACTF64X4 $0, Z7, K2, 15(R8)(R14*8) // 6293fd4a1bbcf00f00000000
+ VEXTRACTF64X4 $0, Z2, K2, -15(R14)(R15*2) // 6293fd4a1b947ef1ffffff00
+ VEXTRACTF64X4 $0, Z7, K2, -15(R14)(R15*2) // 6293fd4a1bbc7ef1ffffff00
+ VEXTRACTI32X4 $0, Y9, K5, X31 // 62137d2d39cf00
+ VEXTRACTI32X4 $0, Y9, K5, 7(SI)(DI*1) // 62737d2d398c3e0700000000
+ VEXTRACTI32X4 $0, Y9, K5, 15(DX)(BX*8) // 62737d2d398cda0f00000000
+ VEXTRACTI32X4 $1, Z27, K3, X3 // 62637d4b39db01
+ VEXTRACTI32X4 $1, Z25, K3, X3 // 62637d4b39cb01
+ VEXTRACTI32X4 $1, Z27, K3, -7(DI)(R8*1) // 62237d4b399c07f9ffffff01
+ VEXTRACTI32X4 $1, Z25, K3, -7(DI)(R8*1) // 62237d4b398c07f9ffffff01
+ VEXTRACTI32X4 $1, Z27, K3, (SP) // 62637d4b391c2401
+ VEXTRACTI32X4 $1, Z25, K3, (SP) // 62637d4b390c2401
+ VEXTRACTI64X4 $1, Z3, K3, Y6 // 62f3fd4b3bde01
+ VEXTRACTI64X4 $1, Z0, K3, Y6 // 62f3fd4b3bc601
+ VEXTRACTI64X4 $1, Z3, K3, 7(AX)(CX*4) // 62f3fd4b3b9c880700000001
+ VEXTRACTI64X4 $1, Z0, K3, 7(AX)(CX*4) // 62f3fd4b3b84880700000001
+ VEXTRACTI64X4 $1, Z3, K3, 7(AX)(CX*1) // 62f3fd4b3b9c080700000001
+ VEXTRACTI64X4 $1, Z0, K3, 7(AX)(CX*1) // 62f3fd4b3b84080700000001
+ VFIXUPIMMPD $97, X30, X0, K3, X13 // 6213fd0b54ee61
+ VFIXUPIMMPD $97, (AX), X0, K3, X13 // 6273fd0b542861
+ VFIXUPIMMPD $97, 7(SI), X0, K3, X13 // 6273fd0b54ae0700000061
+ VFIXUPIMMPD $81, Y6, Y7, K3, Y3 // 62f3c52b54de51
+ VFIXUPIMMPD $81, (SI), Y7, K3, Y3 // 62f3c52b541e51
+ VFIXUPIMMPD $81, 7(SI)(DI*2), Y7, K3, Y3 // 62f3c52b549c7e0700000051
+ VFIXUPIMMPD $42, Z22, Z8, K2, Z14 // 6233bd4a54f62a
+ VFIXUPIMMPD $42, Z25, Z8, K2, Z14 // 6213bd4a54f12a
+ VFIXUPIMMPD $42, Z22, Z24, K2, Z14 // 6233bd4254f62a
+ VFIXUPIMMPD $42, Z25, Z24, K2, Z14 // 6213bd4254f12a
+ VFIXUPIMMPD $42, Z22, Z8, K2, Z7 // 62b3bd4a54fe2a
+ VFIXUPIMMPD $42, Z25, Z8, K2, Z7 // 6293bd4a54f92a
+ VFIXUPIMMPD $42, Z22, Z24, K2, Z7 // 62b3bd4254fe2a
+ VFIXUPIMMPD $42, Z25, Z24, K2, Z7 // 6293bd4254f92a
+ VFIXUPIMMPD $79, Z0, Z6, K1, Z1 // 62f3cd4954c84f
+ VFIXUPIMMPD $79, Z8, Z6, K1, Z1 // 62d3cd4954c84f
+ VFIXUPIMMPD $79, -7(CX)(DX*1), Z6, K1, Z1 // 62f3cd49548c11f9ffffff4f
+ VFIXUPIMMPD $79, -15(R14)(R15*4), Z6, K1, Z1 // 6293cd49548cbef1ffffff4f
+ VFIXUPIMMPD $79, Z0, Z2, K1, Z1 // 62f3ed4954c84f
+ VFIXUPIMMPD $79, Z8, Z2, K1, Z1 // 62d3ed4954c84f
+ VFIXUPIMMPD $79, -7(CX)(DX*1), Z2, K1, Z1 // 62f3ed49548c11f9ffffff4f
+ VFIXUPIMMPD $79, -15(R14)(R15*4), Z2, K1, Z1 // 6293ed49548cbef1ffffff4f
+ VFIXUPIMMPD $79, Z0, Z6, K1, Z16 // 62e3cd4954c04f
+ VFIXUPIMMPD $79, Z8, Z6, K1, Z16 // 62c3cd4954c04f
+ VFIXUPIMMPD $79, -7(CX)(DX*1), Z6, K1, Z16 // 62e3cd49548411f9ffffff4f
+ VFIXUPIMMPD $79, -15(R14)(R15*4), Z6, K1, Z16 // 6283cd495484bef1ffffff4f
+ VFIXUPIMMPD $79, Z0, Z2, K1, Z16 // 62e3ed4954c04f
+ VFIXUPIMMPD $79, Z8, Z2, K1, Z16 // 62c3ed4954c04f
+ VFIXUPIMMPD $79, -7(CX)(DX*1), Z2, K1, Z16 // 62e3ed49548411f9ffffff4f
+ VFIXUPIMMPD $79, -15(R14)(R15*4), Z2, K1, Z16 // 6283ed495484bef1ffffff4f
+ VFIXUPIMMPS $64, X11, X14, K2, X16 // 62c30d0a54c340
+ VFIXUPIMMPS $64, (BX), X14, K2, X16 // 62e30d0a540340
+ VFIXUPIMMPS $64, -17(BP)(SI*1), X14, K2, X16 // 62e30d0a548435efffffff40
+ VFIXUPIMMPS $27, Y26, Y11, K1, Y26 // 6203252954d21b
+ VFIXUPIMMPS $27, 17(SP)(BP*8), Y11, K1, Y26 // 626325295494ec110000001b
+ VFIXUPIMMPS $27, 17(SP)(BP*4), Y11, K1, Y26 // 626325295494ac110000001b
+ VFIXUPIMMPS $47, Z11, Z14, K7, Z15 // 62530d4f54fb2f
+ VFIXUPIMMPS $47, Z5, Z14, K7, Z15 // 62730d4f54fd2f
+ VFIXUPIMMPS $47, Z11, Z27, K7, Z15 // 6253254754fb2f
+ VFIXUPIMMPS $47, Z5, Z27, K7, Z15 // 6273254754fd2f
+ VFIXUPIMMPS $47, Z11, Z14, K7, Z12 // 62530d4f54e32f
+ VFIXUPIMMPS $47, Z5, Z14, K7, Z12 // 62730d4f54e52f
+ VFIXUPIMMPS $47, Z11, Z27, K7, Z12 // 6253254754e32f
+ VFIXUPIMMPS $47, Z5, Z27, K7, Z12 // 6273254754e52f
+ VFIXUPIMMPS $82, Z2, Z5, K1, Z13 // 6273554954ea52
+ VFIXUPIMMPS $82, 15(DX)(BX*1), Z5, K1, Z13 // 6273554954ac1a0f00000052
+ VFIXUPIMMPS $82, -7(CX)(DX*2), Z5, K1, Z13 // 6273554954ac51f9ffffff52
+ VFIXUPIMMPS $82, Z2, Z23, K1, Z13 // 6273454154ea52
+ VFIXUPIMMPS $82, 15(DX)(BX*1), Z23, K1, Z13 // 6273454154ac1a0f00000052
+ VFIXUPIMMPS $82, -7(CX)(DX*2), Z23, K1, Z13 // 6273454154ac51f9ffffff52
+ VFIXUPIMMPS $82, Z2, Z5, K1, Z14 // 6273554954f252
+ VFIXUPIMMPS $82, 15(DX)(BX*1), Z5, K1, Z14 // 6273554954b41a0f00000052
+ VFIXUPIMMPS $82, -7(CX)(DX*2), Z5, K1, Z14 // 6273554954b451f9ffffff52
+ VFIXUPIMMPS $82, Z2, Z23, K1, Z14 // 6273454154f252
+ VFIXUPIMMPS $82, 15(DX)(BX*1), Z23, K1, Z14 // 6273454154b41a0f00000052
+ VFIXUPIMMPS $82, -7(CX)(DX*2), Z23, K1, Z14 // 6273454154b451f9ffffff52
+ VFIXUPIMMSD $126, X8, X19, K1, X14 // 6253e50155f07e
+ VFIXUPIMMSD $94, X23, X26, K1, X8 // 6233ad0155c75e or 6233ad2155c75e or 6233ad4155c75e
+ VFIXUPIMMSD $94, (SI), X26, K1, X8 // 6273ad0155065e or 6273ad2155065e or 6273ad4155065e
+ VFIXUPIMMSD $94, 7(SI)(DI*2), X26, K1, X8 // 6273ad0155847e070000005e or 6273ad2155847e070000005e or 6273ad4155847e070000005e
+ VFIXUPIMMSS $121, X23, X16, K7, X12 // 62337d0755e779
+ VFIXUPIMMSS $13, X31, X11, K2, X23 // 6283250a55ff0d or 6283252a55ff0d or 6283254a55ff0d
+ VFIXUPIMMSS $13, 17(SP)(BP*2), X11, K2, X23 // 62e3250a55bc6c110000000d or 62e3252a55bc6c110000000d or 62e3254a55bc6c110000000d
+ VFIXUPIMMSS $13, -7(DI)(R8*4), X11, K2, X23 // 62a3250a55bc87f9ffffff0d or 62a3252a55bc87f9ffffff0d or 62a3254a55bc87f9ffffff0d
+ VFMADD132PD X0, X14, K4, X24 // 62628d0c98c0
+ VFMADD132PD 15(R8)(R14*4), X14, K4, X24 // 62028d0c9884b00f000000
+ VFMADD132PD -7(CX)(DX*4), X14, K4, X24 // 62628d0c988491f9ffffff
+ VFMADD132PD Y18, Y14, K1, Y12 // 62328d2998e2
+ VFMADD132PD 7(SI)(DI*4), Y14, K1, Y12 // 62728d2998a4be07000000
+ VFMADD132PD -7(DI)(R8*2), Y14, K1, Y12 // 62328d2998a447f9ffffff
+ VFMADD132PD Z28, Z26, K3, Z6 // 6292ad4398f4
+ VFMADD132PD Z6, Z26, K3, Z6 // 62f2ad4398f6
+ VFMADD132PD Z28, Z14, K3, Z6 // 62928d4b98f4
+ VFMADD132PD Z6, Z14, K3, Z6 // 62f28d4b98f6
+ VFMADD132PD Z28, Z26, K3, Z14 // 6212ad4398f4
+ VFMADD132PD Z6, Z26, K3, Z14 // 6272ad4398f6
+ VFMADD132PD Z28, Z14, K3, Z14 // 62128d4b98f4
+ VFMADD132PD Z6, Z14, K3, Z14 // 62728d4b98f6
+ VFMADD132PD Z3, Z26, K4, Z13 // 6272ad4498eb
+ VFMADD132PD Z0, Z26, K4, Z13 // 6272ad4498e8
+ VFMADD132PD -17(BP), Z26, K4, Z13 // 6272ad4498adefffffff
+ VFMADD132PD -15(R14)(R15*8), Z26, K4, Z13 // 6212ad4498acfef1ffffff
+ VFMADD132PD Z3, Z3, K4, Z13 // 6272e54c98eb
+ VFMADD132PD Z0, Z3, K4, Z13 // 6272e54c98e8
+ VFMADD132PD -17(BP), Z3, K4, Z13 // 6272e54c98adefffffff
+ VFMADD132PD -15(R14)(R15*8), Z3, K4, Z13 // 6212e54c98acfef1ffffff
+ VFMADD132PD Z3, Z26, K4, Z21 // 62e2ad4498eb
+ VFMADD132PD Z0, Z26, K4, Z21 // 62e2ad4498e8
+ VFMADD132PD -17(BP), Z26, K4, Z21 // 62e2ad4498adefffffff
+ VFMADD132PD -15(R14)(R15*8), Z26, K4, Z21 // 6282ad4498acfef1ffffff
+ VFMADD132PD Z3, Z3, K4, Z21 // 62e2e54c98eb
+ VFMADD132PD Z0, Z3, K4, Z21 // 62e2e54c98e8
+ VFMADD132PD -17(BP), Z3, K4, Z21 // 62e2e54c98adefffffff
+ VFMADD132PD -15(R14)(R15*8), Z3, K4, Z21 // 6282e54c98acfef1ffffff
+ VFMADD132PS X2, X23, K5, X11 // 6272450598da
+ VFMADD132PS (R8), X23, K5, X11 // 625245059818
+ VFMADD132PS 15(DX)(BX*2), X23, K5, X11 // 62724505989c5a0f000000
+ VFMADD132PS Y3, Y18, K7, Y31 // 62626d2798fb
+ VFMADD132PS 17(SP), Y18, K7, Y31 // 62626d2798bc2411000000
+ VFMADD132PS -17(BP)(SI*4), Y18, K7, Y31 // 62626d2798bcb5efffffff
+ VFMADD132PS Z3, Z11, K7, Z21 // 62e2254f98eb
+ VFMADD132PS Z12, Z11, K7, Z21 // 62c2254f98ec
+ VFMADD132PS Z3, Z25, K7, Z21 // 62e2354798eb
+ VFMADD132PS Z12, Z25, K7, Z21 // 62c2354798ec
+ VFMADD132PS Z3, Z11, K7, Z13 // 6272254f98eb
+ VFMADD132PS Z12, Z11, K7, Z13 // 6252254f98ec
+ VFMADD132PS Z3, Z25, K7, Z13 // 6272354798eb
+ VFMADD132PS Z12, Z25, K7, Z13 // 6252354798ec
+ VFMADD132PS Z23, Z23, K6, Z27 // 6222454698df
+ VFMADD132PS Z6, Z23, K6, Z27 // 6262454698de
+ VFMADD132PS 17(SP)(BP*2), Z23, K6, Z27 // 62624546989c6c11000000
+ VFMADD132PS -7(DI)(R8*4), Z23, K6, Z27 // 62224546989c87f9ffffff
+ VFMADD132PS Z23, Z5, K6, Z27 // 6222554e98df
+ VFMADD132PS Z6, Z5, K6, Z27 // 6262554e98de
+ VFMADD132PS 17(SP)(BP*2), Z5, K6, Z27 // 6262554e989c6c11000000
+ VFMADD132PS -7(DI)(R8*4), Z5, K6, Z27 // 6222554e989c87f9ffffff
+ VFMADD132PS Z23, Z23, K6, Z15 // 6232454698ff
+ VFMADD132PS Z6, Z23, K6, Z15 // 6272454698fe
+ VFMADD132PS 17(SP)(BP*2), Z23, K6, Z15 // 6272454698bc6c11000000
+ VFMADD132PS -7(DI)(R8*4), Z23, K6, Z15 // 6232454698bc87f9ffffff
+ VFMADD132PS Z23, Z5, K6, Z15 // 6232554e98ff
+ VFMADD132PS Z6, Z5, K6, Z15 // 6272554e98fe
+ VFMADD132PS 17(SP)(BP*2), Z5, K6, Z15 // 6272554e98bc6c11000000
+ VFMADD132PS -7(DI)(R8*4), Z5, K6, Z15 // 6232554e98bc87f9ffffff
+ VFMADD132SD X25, X5, K3, X20 // 6282d50b99e1
+ VFMADD132SD X13, X9, K7, X0 // 62d2b50f99c5 or 62d2b52f99c5 or 62d2b54f99c5
+ VFMADD132SD 17(SP)(BP*8), X9, K7, X0 // 62f2b50f9984ec11000000 or 62f2b52f9984ec11000000 or 62f2b54f9984ec11000000
+ VFMADD132SD 17(SP)(BP*4), X9, K7, X0 // 62f2b50f9984ac11000000 or 62f2b52f9984ac11000000 or 62f2b54f9984ac11000000
+ VFMADD132SS X9, X8, K4, X2 // 62d23d0c99d1
+ VFMADD132SS X11, X31, K4, X2 // 62d2050499d3 or 62d2052499d3 or 62d2054499d3
+ VFMADD132SS 15(R8), X31, K4, X2 // 62d2050499900f000000 or 62d2052499900f000000 or 62d2054499900f000000
+ VFMADD132SS (BP), X31, K4, X2 // 62f20504995500 or 62f20524995500 or 62f20544995500
+ VFMADD213PD X14, X5, K7, X22 // 62c2d50fa8f6
+ VFMADD213PD 17(SP)(BP*1), X5, K7, X22 // 62e2d50fa8b42c11000000
+ VFMADD213PD -7(CX)(DX*8), X5, K7, X22 // 62e2d50fa8b4d1f9ffffff
+ VFMADD213PD Y7, Y2, K2, Y24 // 6262ed2aa8c7
+ VFMADD213PD 7(AX), Y2, K2, Y24 // 6262ed2aa88007000000
+ VFMADD213PD (DI), Y2, K2, Y24 // 6262ed2aa807
+ VFMADD213PD Z16, Z21, K5, Z8 // 6232d545a8c0
+ VFMADD213PD Z13, Z21, K5, Z8 // 6252d545a8c5
+ VFMADD213PD Z16, Z5, K5, Z8 // 6232d54da8c0
+ VFMADD213PD Z13, Z5, K5, Z8 // 6252d54da8c5
+ VFMADD213PD Z16, Z21, K5, Z28 // 6222d545a8e0
+ VFMADD213PD Z13, Z21, K5, Z28 // 6242d545a8e5
+ VFMADD213PD Z16, Z5, K5, Z28 // 6222d54da8e0
+ VFMADD213PD Z13, Z5, K5, Z28 // 6242d54da8e5
+ VFMADD213PD Z6, Z22, K3, Z12 // 6272cd43a8e6
+ VFMADD213PD Z8, Z22, K3, Z12 // 6252cd43a8e0
+ VFMADD213PD 15(R8), Z22, K3, Z12 // 6252cd43a8a00f000000
+ VFMADD213PD (BP), Z22, K3, Z12 // 6272cd43a86500
+ VFMADD213PD Z6, Z11, K3, Z12 // 6272a54ba8e6
+ VFMADD213PD Z8, Z11, K3, Z12 // 6252a54ba8e0
+ VFMADD213PD 15(R8), Z11, K3, Z12 // 6252a54ba8a00f000000
+ VFMADD213PD (BP), Z11, K3, Z12 // 6272a54ba86500
+ VFMADD213PD Z6, Z22, K3, Z27 // 6262cd43a8de
+ VFMADD213PD Z8, Z22, K3, Z27 // 6242cd43a8d8
+ VFMADD213PD 15(R8), Z22, K3, Z27 // 6242cd43a8980f000000
+ VFMADD213PD (BP), Z22, K3, Z27 // 6262cd43a85d00
+ VFMADD213PD Z6, Z11, K3, Z27 // 6262a54ba8de
+ VFMADD213PD Z8, Z11, K3, Z27 // 6242a54ba8d8
+ VFMADD213PD 15(R8), Z11, K3, Z27 // 6242a54ba8980f000000
+ VFMADD213PD (BP), Z11, K3, Z27 // 6262a54ba85d00
+ VFMADD213PS X7, X17, K4, X0 // 62f27504a8c7
+ VFMADD213PS -17(BP)(SI*2), X17, K4, X0 // 62f27504a88475efffffff
+ VFMADD213PS 7(AX)(CX*2), X17, K4, X0 // 62f27504a8844807000000
+ VFMADD213PS Y8, Y14, K2, Y21 // 62c20d2aa8e8
+ VFMADD213PS 99(R15)(R15*1), Y14, K2, Y21 // 62820d2aa8ac3f63000000
+ VFMADD213PS (DX), Y14, K2, Y21 // 62e20d2aa82a
+ VFMADD213PS Z9, Z12, K2, Z25 // 62421d4aa8c9
+ VFMADD213PS Z12, Z12, K2, Z25 // 62421d4aa8cc
+ VFMADD213PS Z9, Z17, K2, Z25 // 62427542a8c9
+ VFMADD213PS Z12, Z17, K2, Z25 // 62427542a8cc
+ VFMADD213PS Z9, Z12, K2, Z12 // 62521d4aa8e1
+ VFMADD213PS Z12, Z12, K2, Z12 // 62521d4aa8e4
+ VFMADD213PS Z9, Z17, K2, Z12 // 62527542a8e1
+ VFMADD213PS Z12, Z17, K2, Z12 // 62527542a8e4
+ VFMADD213PS Z8, Z3, K3, Z6 // 62d2654ba8f0
+ VFMADD213PS Z2, Z3, K3, Z6 // 62f2654ba8f2
+ VFMADD213PS 15(R8)(R14*8), Z3, K3, Z6 // 6292654ba8b4f00f000000
+ VFMADD213PS -15(R14)(R15*2), Z3, K3, Z6 // 6292654ba8b47ef1ffffff
+ VFMADD213PS Z8, Z21, K3, Z6 // 62d25543a8f0
+ VFMADD213PS Z2, Z21, K3, Z6 // 62f25543a8f2
+ VFMADD213PS 15(R8)(R14*8), Z21, K3, Z6 // 62925543a8b4f00f000000
+ VFMADD213PS -15(R14)(R15*2), Z21, K3, Z6 // 62925543a8b47ef1ffffff
+ VFMADD213PS Z8, Z3, K3, Z25 // 6242654ba8c8
+ VFMADD213PS Z2, Z3, K3, Z25 // 6262654ba8ca
+ VFMADD213PS 15(R8)(R14*8), Z3, K3, Z25 // 6202654ba88cf00f000000
+ VFMADD213PS -15(R14)(R15*2), Z3, K3, Z25 // 6202654ba88c7ef1ffffff
+ VFMADD213PS Z8, Z21, K3, Z25 // 62425543a8c8
+ VFMADD213PS Z2, Z21, K3, Z25 // 62625543a8ca
+ VFMADD213PS 15(R8)(R14*8), Z21, K3, Z25 // 62025543a88cf00f000000
+ VFMADD213PS -15(R14)(R15*2), Z21, K3, Z25 // 62025543a88c7ef1ffffff
+ VFMADD213SD X0, X11, K3, X15 // 6272a50ba9f8
+ VFMADD213SD X27, X8, K3, X18 // 6282bd0ba9d3 or 6282bd2ba9d3 or 6282bd4ba9d3
+ VFMADD213SD 7(SI)(DI*4), X8, K3, X18 // 62e2bd0ba994be07000000 or 62e2bd2ba994be07000000 or 62e2bd4ba994be07000000
+ VFMADD213SD -7(DI)(R8*2), X8, K3, X18 // 62a2bd0ba99447f9ffffff or 62a2bd2ba99447f9ffffff or 62a2bd4ba99447f9ffffff
+ VFMADD213SS X18, X3, K2, X25 // 6222650aa9ca
+ VFMADD213SS X15, X28, K1, X15 // 62521d01a9ff or 62521d21a9ff or 62521d41a9ff
+ VFMADD213SS 15(R8)(R14*8), X28, K1, X15 // 62121d01a9bcf00f000000 or 62121d21a9bcf00f000000 or 62121d41a9bcf00f000000
+ VFMADD213SS -15(R14)(R15*2), X28, K1, X15 // 62121d01a9bc7ef1ffffff or 62121d21a9bc7ef1ffffff or 62121d41a9bc7ef1ffffff
+ VFMADD231PD X8, X13, K2, X7 // 62d2950ab8f8
+ VFMADD231PD 15(R8)(R14*1), X13, K2, X7 // 6292950ab8bc300f000000
+ VFMADD231PD 15(R8)(R14*2), X13, K2, X7 // 6292950ab8bc700f000000
+ VFMADD231PD Y24, Y11, K1, Y20 // 6282a529b8e0
+ VFMADD231PD -17(BP)(SI*8), Y11, K1, Y20 // 62e2a529b8a4f5efffffff
+ VFMADD231PD (R15), Y11, K1, Y20 // 62c2a529b827
+ VFMADD231PD Z0, Z7, K7, Z3 // 62f2c54fb8d8
+ VFMADD231PD Z6, Z7, K7, Z3 // 62f2c54fb8de
+ VFMADD231PD Z0, Z9, K7, Z3 // 62f2b54fb8d8
+ VFMADD231PD Z6, Z9, K7, Z3 // 62f2b54fb8de
+ VFMADD231PD Z0, Z7, K7, Z27 // 6262c54fb8d8
+ VFMADD231PD Z6, Z7, K7, Z27 // 6262c54fb8de
+ VFMADD231PD Z0, Z9, K7, Z27 // 6262b54fb8d8
+ VFMADD231PD Z6, Z9, K7, Z27 // 6262b54fb8de
+ VFMADD231PD Z9, Z3, K1, Z20 // 62c2e549b8e1
+ VFMADD231PD Z19, Z3, K1, Z20 // 62a2e549b8e3
+ VFMADD231PD -15(R14)(R15*1), Z3, K1, Z20 // 6282e549b8a43ef1ffffff
+ VFMADD231PD -15(BX), Z3, K1, Z20 // 62e2e549b8a3f1ffffff
+ VFMADD231PD Z9, Z30, K1, Z20 // 62c28d41b8e1
+ VFMADD231PD Z19, Z30, K1, Z20 // 62a28d41b8e3
+ VFMADD231PD -15(R14)(R15*1), Z30, K1, Z20 // 62828d41b8a43ef1ffffff
+ VFMADD231PD -15(BX), Z30, K1, Z20 // 62e28d41b8a3f1ffffff
+ VFMADD231PD Z9, Z3, K1, Z28 // 6242e549b8e1
+ VFMADD231PD Z19, Z3, K1, Z28 // 6222e549b8e3
+ VFMADD231PD -15(R14)(R15*1), Z3, K1, Z28 // 6202e549b8a43ef1ffffff
+ VFMADD231PD -15(BX), Z3, K1, Z28 // 6262e549b8a3f1ffffff
+ VFMADD231PD Z9, Z30, K1, Z28 // 62428d41b8e1
+ VFMADD231PD Z19, Z30, K1, Z28 // 62228d41b8e3
+ VFMADD231PD -15(R14)(R15*1), Z30, K1, Z28 // 62028d41b8a43ef1ffffff
+ VFMADD231PD -15(BX), Z30, K1, Z28 // 62628d41b8a3f1ffffff
+ VFMADD231PS X0, X7, K1, X24 // 62624509b8c0
+ VFMADD231PS (R14), X7, K1, X24 // 62424509b806
+ VFMADD231PS -7(DI)(R8*8), X7, K1, X24 // 62224509b884c7f9ffffff
+ VFMADD231PS Y18, Y5, K1, Y1 // 62b25529b8ca
+ VFMADD231PS 7(SI)(DI*8), Y5, K1, Y1 // 62f25529b88cfe07000000
+ VFMADD231PS -15(R14), Y5, K1, Y1 // 62d25529b88ef1ffffff
+ VFMADD231PS Z18, Z11, K7, Z12 // 6232254fb8e2
+ VFMADD231PS Z24, Z11, K7, Z12 // 6212254fb8e0
+ VFMADD231PS Z18, Z5, K7, Z12 // 6232554fb8e2
+ VFMADD231PS Z24, Z5, K7, Z12 // 6212554fb8e0
+ VFMADD231PS Z18, Z11, K7, Z22 // 62a2254fb8f2
+ VFMADD231PS Z24, Z11, K7, Z22 // 6282254fb8f0
+ VFMADD231PS Z18, Z5, K7, Z22 // 62a2554fb8f2
+ VFMADD231PS Z24, Z5, K7, Z22 // 6282554fb8f0
+ VFMADD231PS Z6, Z7, K2, Z2 // 62f2454ab8d6
+ VFMADD231PS Z16, Z7, K2, Z2 // 62b2454ab8d0
+ VFMADD231PS 7(AX)(CX*4), Z7, K2, Z2 // 62f2454ab8948807000000
+ VFMADD231PS 7(AX)(CX*1), Z7, K2, Z2 // 62f2454ab8940807000000
+ VFMADD231PS Z6, Z13, K2, Z2 // 62f2154ab8d6
+ VFMADD231PS Z16, Z13, K2, Z2 // 62b2154ab8d0
+ VFMADD231PS 7(AX)(CX*4), Z13, K2, Z2 // 62f2154ab8948807000000
+ VFMADD231PS 7(AX)(CX*1), Z13, K2, Z2 // 62f2154ab8940807000000
+ VFMADD231PS Z6, Z7, K2, Z21 // 62e2454ab8ee
+ VFMADD231PS Z16, Z7, K2, Z21 // 62a2454ab8e8
+ VFMADD231PS 7(AX)(CX*4), Z7, K2, Z21 // 62e2454ab8ac8807000000
+ VFMADD231PS 7(AX)(CX*1), Z7, K2, Z21 // 62e2454ab8ac0807000000
+ VFMADD231PS Z6, Z13, K2, Z21 // 62e2154ab8ee
+ VFMADD231PS Z16, Z13, K2, Z21 // 62a2154ab8e8
+ VFMADD231PS 7(AX)(CX*4), Z13, K2, Z21 // 62e2154ab8ac8807000000
+ VFMADD231PS 7(AX)(CX*1), Z13, K2, Z21 // 62e2154ab8ac0807000000
+ VFMADD231SD X11, X1, K4, X22 // 62c2f50cb9f3
+ VFMADD231SD X8, X7, K1, X6 // 62d2c509b9f0 or 62d2c529b9f0 or 62d2c549b9f0
+ VFMADD231SD 17(SP), X7, K1, X6 // 62f2c509b9b42411000000 or 62f2c529b9b42411000000 or 62f2c549b9b42411000000
+ VFMADD231SD -17(BP)(SI*4), X7, K1, X6 // 62f2c509b9b4b5efffffff or 62f2c529b9b4b5efffffff or 62f2c549b9b4b5efffffff
+ VFMADD231SS X28, X3, K3, X31 // 6202650bb9fc
+ VFMADD231SS X7, X24, K4, X20 // 62e23d04b9e7 or 62e23d24b9e7 or 62e23d44b9e7
+ VFMADD231SS -15(R14)(R15*1), X24, K4, X20 // 62823d04b9a43ef1ffffff or 62823d24b9a43ef1ffffff or 62823d44b9a43ef1ffffff
+ VFMADD231SS -15(BX), X24, K4, X20 // 62e23d04b9a3f1ffffff or 62e23d24b9a3f1ffffff or 62e23d44b9a3f1ffffff
+ VFMADDSUB132PD X12, X16, K5, X20 // 62c2fd0596e4
+ VFMADDSUB132PD 99(R15)(R15*4), X16, K5, X20 // 6282fd0596a4bf63000000
+ VFMADDSUB132PD 15(DX), X16, K5, X20 // 62e2fd0596a20f000000
+ VFMADDSUB132PD Y9, Y20, K7, Y20 // 62c2dd2796e1
+ VFMADDSUB132PD 7(SI)(DI*1), Y20, K7, Y20 // 62e2dd2796a43e07000000
+ VFMADDSUB132PD 15(DX)(BX*8), Y20, K7, Y20 // 62e2dd2796a4da0f000000
+ VFMADDSUB132PD Z13, Z1, K7, Z6 // 62d2f54f96f5
+ VFMADDSUB132PD Z13, Z15, K7, Z6 // 62d2854f96f5
+ VFMADDSUB132PD Z13, Z1, K7, Z22 // 62c2f54f96f5
+ VFMADDSUB132PD Z13, Z15, K7, Z22 // 62c2854f96f5
+ VFMADDSUB132PD Z2, Z22, K6, Z18 // 62e2cd4696d2
+ VFMADDSUB132PD Z31, Z22, K6, Z18 // 6282cd4696d7
+ VFMADDSUB132PD (SI), Z22, K6, Z18 // 62e2cd469616
+ VFMADDSUB132PD 7(SI)(DI*2), Z22, K6, Z18 // 62e2cd4696947e07000000
+ VFMADDSUB132PD Z2, Z7, K6, Z18 // 62e2c54e96d2
+ VFMADDSUB132PD Z31, Z7, K6, Z18 // 6282c54e96d7
+ VFMADDSUB132PD (SI), Z7, K6, Z18 // 62e2c54e9616
+ VFMADDSUB132PD 7(SI)(DI*2), Z7, K6, Z18 // 62e2c54e96947e07000000
+ VFMADDSUB132PD Z2, Z22, K6, Z8 // 6272cd4696c2
+ VFMADDSUB132PD Z31, Z22, K6, Z8 // 6212cd4696c7
+ VFMADDSUB132PD (SI), Z22, K6, Z8 // 6272cd469606
+ VFMADDSUB132PD 7(SI)(DI*2), Z22, K6, Z8 // 6272cd4696847e07000000
+ VFMADDSUB132PD Z2, Z7, K6, Z8 // 6272c54e96c2
+ VFMADDSUB132PD Z31, Z7, K6, Z8 // 6212c54e96c7
+ VFMADDSUB132PD (SI), Z7, K6, Z8 // 6272c54e9606
+ VFMADDSUB132PD 7(SI)(DI*2), Z7, K6, Z8 // 6272c54e96847e07000000
+ VFMADDSUB132PS X28, X17, K3, X6 // 6292750396f4
+ VFMADDSUB132PS (CX), X17, K3, X6 // 62f275039631
+ VFMADDSUB132PS 99(R15), X17, K3, X6 // 62d2750396b763000000
+ VFMADDSUB132PS Y1, Y28, K7, Y28 // 62621d2796e1
+ VFMADDSUB132PS -7(DI)(R8*1), Y28, K7, Y28 // 62221d2796a407f9ffffff
+ VFMADDSUB132PS (SP), Y28, K7, Y28 // 62621d27962424
+ VFMADDSUB132PS Z12, Z1, K4, Z20 // 62c2754c96e4
+ VFMADDSUB132PS Z16, Z1, K4, Z20 // 62a2754c96e0
+ VFMADDSUB132PS Z12, Z3, K4, Z20 // 62c2654c96e4
+ VFMADDSUB132PS Z16, Z3, K4, Z20 // 62a2654c96e0
+ VFMADDSUB132PS Z12, Z1, K4, Z9 // 6252754c96cc
+ VFMADDSUB132PS Z16, Z1, K4, Z9 // 6232754c96c8
+ VFMADDSUB132PS Z12, Z3, K4, Z9 // 6252654c96cc
+ VFMADDSUB132PS Z16, Z3, K4, Z9 // 6232654c96c8
+ VFMADDSUB132PS Z3, Z14, K4, Z28 // 62620d4c96e3
+ VFMADDSUB132PS Z12, Z14, K4, Z28 // 62420d4c96e4
+ VFMADDSUB132PS 17(SP)(BP*8), Z14, K4, Z28 // 62620d4c96a4ec11000000
+ VFMADDSUB132PS 17(SP)(BP*4), Z14, K4, Z28 // 62620d4c96a4ac11000000
+ VFMADDSUB132PS Z3, Z28, K4, Z28 // 62621d4496e3
+ VFMADDSUB132PS Z12, Z28, K4, Z28 // 62421d4496e4
+ VFMADDSUB132PS 17(SP)(BP*8), Z28, K4, Z28 // 62621d4496a4ec11000000
+ VFMADDSUB132PS 17(SP)(BP*4), Z28, K4, Z28 // 62621d4496a4ac11000000
+ VFMADDSUB132PS Z3, Z14, K4, Z13 // 62720d4c96eb
+ VFMADDSUB132PS Z12, Z14, K4, Z13 // 62520d4c96ec
+ VFMADDSUB132PS 17(SP)(BP*8), Z14, K4, Z13 // 62720d4c96acec11000000
+ VFMADDSUB132PS 17(SP)(BP*4), Z14, K4, Z13 // 62720d4c96acac11000000
+ VFMADDSUB132PS Z3, Z28, K4, Z13 // 62721d4496eb
+ VFMADDSUB132PS Z12, Z28, K4, Z13 // 62521d4496ec
+ VFMADDSUB132PS 17(SP)(BP*8), Z28, K4, Z13 // 62721d4496acec11000000
+ VFMADDSUB132PS 17(SP)(BP*4), Z28, K4, Z13 // 62721d4496acac11000000
+ VFMADDSUB213PD X8, X1, K7, X6 // 62d2f50fa6f0
+ VFMADDSUB213PD 99(R15)(R15*2), X1, K7, X6 // 6292f50fa6b47f63000000
+ VFMADDSUB213PD -7(DI), X1, K7, X6 // 62f2f50fa6b7f9ffffff
+ VFMADDSUB213PD Y27, Y11, K2, Y8 // 6212a52aa6c3
+ VFMADDSUB213PD -7(CX), Y11, K2, Y8 // 6272a52aa681f9ffffff
+ VFMADDSUB213PD 15(DX)(BX*4), Y11, K2, Y8 // 6272a52aa6849a0f000000
+ VFMADDSUB213PD Z5, Z19, K5, Z15 // 6272e545a6fd
+ VFMADDSUB213PD Z1, Z19, K5, Z15 // 6272e545a6f9
+ VFMADDSUB213PD Z5, Z15, K5, Z15 // 6272854da6fd
+ VFMADDSUB213PD Z1, Z15, K5, Z15 // 6272854da6f9
+ VFMADDSUB213PD Z5, Z19, K5, Z30 // 6262e545a6f5
+ VFMADDSUB213PD Z1, Z19, K5, Z30 // 6262e545a6f1
+ VFMADDSUB213PD Z5, Z15, K5, Z30 // 6262854da6f5
+ VFMADDSUB213PD Z1, Z15, K5, Z30 // 6262854da6f1
+ VFMADDSUB213PD Z21, Z14, K3, Z3 // 62b28d4ba6dd
+ VFMADDSUB213PD Z8, Z14, K3, Z3 // 62d28d4ba6d8
+ VFMADDSUB213PD 7(SI)(DI*4), Z14, K3, Z3 // 62f28d4ba69cbe07000000
+ VFMADDSUB213PD -7(DI)(R8*2), Z14, K3, Z3 // 62b28d4ba69c47f9ffffff
+ VFMADDSUB213PD Z21, Z15, K3, Z3 // 62b2854ba6dd
+ VFMADDSUB213PD Z8, Z15, K3, Z3 // 62d2854ba6d8
+ VFMADDSUB213PD 7(SI)(DI*4), Z15, K3, Z3 // 62f2854ba69cbe07000000
+ VFMADDSUB213PD -7(DI)(R8*2), Z15, K3, Z3 // 62b2854ba69c47f9ffffff
+ VFMADDSUB213PD Z21, Z14, K3, Z5 // 62b28d4ba6ed
+ VFMADDSUB213PD Z8, Z14, K3, Z5 // 62d28d4ba6e8
+ VFMADDSUB213PD 7(SI)(DI*4), Z14, K3, Z5 // 62f28d4ba6acbe07000000
+ VFMADDSUB213PD -7(DI)(R8*2), Z14, K3, Z5 // 62b28d4ba6ac47f9ffffff
+ VFMADDSUB213PD Z21, Z15, K3, Z5 // 62b2854ba6ed
+ VFMADDSUB213PD Z8, Z15, K3, Z5 // 62d2854ba6e8
+ VFMADDSUB213PD 7(SI)(DI*4), Z15, K3, Z5 // 62f2854ba6acbe07000000
+ VFMADDSUB213PD -7(DI)(R8*2), Z15, K3, Z5 // 62b2854ba6ac47f9ffffff
+ VFMADDSUB213PS X0, X6, K4, X8 // 62724d0ca6c0
+ VFMADDSUB213PS -7(CX)(DX*1), X6, K4, X8 // 62724d0ca68411f9ffffff
+ VFMADDSUB213PS -15(R14)(R15*4), X6, K4, X8 // 62124d0ca684bef1ffffff
+ VFMADDSUB213PS Y12, Y16, K2, Y17 // 62c27d22a6cc
+ VFMADDSUB213PS 99(R15)(R15*8), Y16, K2, Y17 // 62827d22a68cff63000000
+ VFMADDSUB213PS 7(AX)(CX*8), Y16, K2, Y17 // 62e27d22a68cc807000000
+ VFMADDSUB213PS Z23, Z20, K2, Z16 // 62a25d42a6c7
+ VFMADDSUB213PS Z19, Z20, K2, Z16 // 62a25d42a6c3
+ VFMADDSUB213PS Z23, Z0, K2, Z16 // 62a27d4aa6c7
+ VFMADDSUB213PS Z19, Z0, K2, Z16 // 62a27d4aa6c3
+ VFMADDSUB213PS Z23, Z20, K2, Z9 // 62325d42a6cf
+ VFMADDSUB213PS Z19, Z20, K2, Z9 // 62325d42a6cb
+ VFMADDSUB213PS Z23, Z0, K2, Z9 // 62327d4aa6cf
+ VFMADDSUB213PS Z19, Z0, K2, Z9 // 62327d4aa6cb
+ VFMADDSUB213PS Z24, Z0, K3, Z0 // 62927d4ba6c0
+ VFMADDSUB213PS Z12, Z0, K3, Z0 // 62d27d4ba6c4
+ VFMADDSUB213PS 17(SP), Z0, K3, Z0 // 62f27d4ba6842411000000
+ VFMADDSUB213PS -17(BP)(SI*4), Z0, K3, Z0 // 62f27d4ba684b5efffffff
+ VFMADDSUB213PS Z24, Z25, K3, Z0 // 62923543a6c0
+ VFMADDSUB213PS Z12, Z25, K3, Z0 // 62d23543a6c4
+ VFMADDSUB213PS 17(SP), Z25, K3, Z0 // 62f23543a6842411000000
+ VFMADDSUB213PS -17(BP)(SI*4), Z25, K3, Z0 // 62f23543a684b5efffffff
+ VFMADDSUB213PS Z24, Z0, K3, Z11 // 62127d4ba6d8
+ VFMADDSUB213PS Z12, Z0, K3, Z11 // 62527d4ba6dc
+ VFMADDSUB213PS 17(SP), Z0, K3, Z11 // 62727d4ba69c2411000000
+ VFMADDSUB213PS -17(BP)(SI*4), Z0, K3, Z11 // 62727d4ba69cb5efffffff
+ VFMADDSUB213PS Z24, Z25, K3, Z11 // 62123543a6d8
+ VFMADDSUB213PS Z12, Z25, K3, Z11 // 62523543a6dc
+ VFMADDSUB213PS 17(SP), Z25, K3, Z11 // 62723543a69c2411000000
+ VFMADDSUB213PS -17(BP)(SI*4), Z25, K3, Z11 // 62723543a69cb5efffffff
+ VFMADDSUB231PD X6, X16, K3, X11 // 6272fd03b6de
+ VFMADDSUB231PD 15(DX)(BX*1), X16, K3, X11 // 6272fd03b69c1a0f000000
+ VFMADDSUB231PD -7(CX)(DX*2), X16, K3, X11 // 6272fd03b69c51f9ffffff
+ VFMADDSUB231PD Y3, Y26, K3, Y6 // 62f2ad23b6f3
+ VFMADDSUB231PD (AX), Y26, K3, Y6 // 62f2ad23b630
+ VFMADDSUB231PD 7(SI), Y26, K3, Y6 // 62f2ad23b6b607000000
+ VFMADDSUB231PD Z9, Z9, K2, Z0 // 62d2b54ab6c1
+ VFMADDSUB231PD Z25, Z9, K2, Z0 // 6292b54ab6c1
+ VFMADDSUB231PD Z9, Z3, K2, Z0 // 62d2e54ab6c1
+ VFMADDSUB231PD Z25, Z3, K2, Z0 // 6292e54ab6c1
+ VFMADDSUB231PD Z9, Z9, K2, Z26 // 6242b54ab6d1
+ VFMADDSUB231PD Z25, Z9, K2, Z26 // 6202b54ab6d1
+ VFMADDSUB231PD Z9, Z3, K2, Z26 // 6242e54ab6d1
+ VFMADDSUB231PD Z25, Z3, K2, Z26 // 6202e54ab6d1
+ VFMADDSUB231PD Z17, Z20, K1, Z9 // 6232dd41b6c9
+ VFMADDSUB231PD Z0, Z20, K1, Z9 // 6272dd41b6c8
+ VFMADDSUB231PD 7(AX), Z20, K1, Z9 // 6272dd41b68807000000
+ VFMADDSUB231PD (DI), Z20, K1, Z9 // 6272dd41b60f
+ VFMADDSUB231PD Z17, Z0, K1, Z9 // 6232fd49b6c9
+ VFMADDSUB231PD Z0, Z0, K1, Z9 // 6272fd49b6c8
+ VFMADDSUB231PD 7(AX), Z0, K1, Z9 // 6272fd49b68807000000
+ VFMADDSUB231PD (DI), Z0, K1, Z9 // 6272fd49b60f
+ VFMADDSUB231PD Z17, Z20, K1, Z28 // 6222dd41b6e1
+ VFMADDSUB231PD Z0, Z20, K1, Z28 // 6262dd41b6e0
+ VFMADDSUB231PD 7(AX), Z20, K1, Z28 // 6262dd41b6a007000000
+ VFMADDSUB231PD (DI), Z20, K1, Z28 // 6262dd41b627
+ VFMADDSUB231PD Z17, Z0, K1, Z28 // 6222fd49b6e1
+ VFMADDSUB231PD Z0, Z0, K1, Z28 // 6262fd49b6e0
+ VFMADDSUB231PD 7(AX), Z0, K1, Z28 // 6262fd49b6a007000000
+ VFMADDSUB231PD (DI), Z0, K1, Z28 // 6262fd49b627
+ VFMADDSUB231PS X12, X22, K2, X6 // 62d24d02b6f4
+ VFMADDSUB231PS -17(BP), X22, K2, X6 // 62f24d02b6b5efffffff
+ VFMADDSUB231PS -15(R14)(R15*8), X22, K2, X6 // 62924d02b6b4fef1ffffff
+ VFMADDSUB231PS Y1, Y28, K1, Y8 // 62721d21b6c1
+ VFMADDSUB231PS (BX), Y28, K1, Y8 // 62721d21b603
+ VFMADDSUB231PS -17(BP)(SI*1), Y28, K1, Y8 // 62721d21b68435efffffff
+ VFMADDSUB231PS Z21, Z31, K7, Z17 // 62a20547b6cd
+ VFMADDSUB231PS Z9, Z31, K7, Z17 // 62c20547b6c9
+ VFMADDSUB231PS Z21, Z0, K7, Z17 // 62a27d4fb6cd
+ VFMADDSUB231PS Z9, Z0, K7, Z17 // 62c27d4fb6c9
+ VFMADDSUB231PS Z21, Z31, K7, Z23 // 62a20547b6fd
+ VFMADDSUB231PS Z9, Z31, K7, Z23 // 62c20547b6f9
+ VFMADDSUB231PS Z21, Z0, K7, Z23 // 62a27d4fb6fd
+ VFMADDSUB231PS Z9, Z0, K7, Z23 // 62c27d4fb6f9
+ VFMADDSUB231PS Z20, Z1, K1, Z6 // 62b27549b6f4
+ VFMADDSUB231PS Z9, Z1, K1, Z6 // 62d27549b6f1
+ VFMADDSUB231PS 99(R15)(R15*1), Z1, K1, Z6 // 62927549b6b43f63000000
+ VFMADDSUB231PS (DX), Z1, K1, Z6 // 62f27549b632
+ VFMADDSUB231PS Z20, Z9, K1, Z6 // 62b23549b6f4
+ VFMADDSUB231PS Z9, Z9, K1, Z6 // 62d23549b6f1
+ VFMADDSUB231PS 99(R15)(R15*1), Z9, K1, Z6 // 62923549b6b43f63000000
+ VFMADDSUB231PS (DX), Z9, K1, Z6 // 62f23549b632
+ VFMADDSUB231PS Z20, Z1, K1, Z9 // 62327549b6cc
+ VFMADDSUB231PS Z9, Z1, K1, Z9 // 62527549b6c9
+ VFMADDSUB231PS 99(R15)(R15*1), Z1, K1, Z9 // 62127549b68c3f63000000
+ VFMADDSUB231PS (DX), Z1, K1, Z9 // 62727549b60a
+ VFMADDSUB231PS Z20, Z9, K1, Z9 // 62323549b6cc
+ VFMADDSUB231PS Z9, Z9, K1, Z9 // 62523549b6c9
+ VFMADDSUB231PS 99(R15)(R15*1), Z9, K1, Z9 // 62123549b68c3f63000000
+ VFMADDSUB231PS (DX), Z9, K1, Z9 // 62723549b60a
+ VFMSUB132PD X8, X28, K1, X16 // 62c29d019ac0
+ VFMSUB132PD 17(SP)(BP*2), X28, K1, X16 // 62e29d019a846c11000000
+ VFMSUB132PD -7(DI)(R8*4), X28, K1, X16 // 62a29d019a8487f9ffffff
+ VFMSUB132PD Y31, Y14, K1, Y23 // 62828d299aff
+ VFMSUB132PD 15(R8)(R14*4), Y14, K1, Y23 // 62828d299abcb00f000000
+ VFMSUB132PD -7(CX)(DX*4), Y14, K1, Y23 // 62e28d299abc91f9ffffff
+ VFMSUB132PD Z7, Z26, K7, Z30 // 6262ad479af7
+ VFMSUB132PD Z21, Z26, K7, Z30 // 6222ad479af5
+ VFMSUB132PD Z7, Z22, K7, Z30 // 6262cd479af7
+ VFMSUB132PD Z21, Z22, K7, Z30 // 6222cd479af5
+ VFMSUB132PD Z7, Z26, K7, Z5 // 62f2ad479aef
+ VFMSUB132PD Z21, Z26, K7, Z5 // 62b2ad479aed
+ VFMSUB132PD Z7, Z22, K7, Z5 // 62f2cd479aef
+ VFMSUB132PD Z21, Z22, K7, Z5 // 62b2cd479aed
+ VFMSUB132PD Z12, Z14, K2, Z16 // 62c28d4a9ac4
+ VFMSUB132PD Z13, Z14, K2, Z16 // 62c28d4a9ac5
+ VFMSUB132PD -17(BP)(SI*8), Z14, K2, Z16 // 62e28d4a9a84f5efffffff
+ VFMSUB132PD (R15), Z14, K2, Z16 // 62c28d4a9a07
+ VFMSUB132PD Z12, Z13, K2, Z16 // 62c2954a9ac4
+ VFMSUB132PD Z13, Z13, K2, Z16 // 62c2954a9ac5
+ VFMSUB132PD -17(BP)(SI*8), Z13, K2, Z16 // 62e2954a9a84f5efffffff
+ VFMSUB132PD (R15), Z13, K2, Z16 // 62c2954a9a07
+ VFMSUB132PD Z12, Z14, K2, Z25 // 62428d4a9acc
+ VFMSUB132PD Z13, Z14, K2, Z25 // 62428d4a9acd
+ VFMSUB132PD -17(BP)(SI*8), Z14, K2, Z25 // 62628d4a9a8cf5efffffff
+ VFMSUB132PD (R15), Z14, K2, Z25 // 62428d4a9a0f
+ VFMSUB132PD Z12, Z13, K2, Z25 // 6242954a9acc
+ VFMSUB132PD Z13, Z13, K2, Z25 // 6242954a9acd
+ VFMSUB132PD -17(BP)(SI*8), Z13, K2, Z25 // 6262954a9a8cf5efffffff
+ VFMSUB132PD (R15), Z13, K2, Z25 // 6242954a9a0f
+ VFMSUB132PS X1, X11, K4, X15 // 6272250c9af9
+ VFMSUB132PS 15(R8), X11, K4, X15 // 6252250c9ab80f000000
+ VFMSUB132PS (BP), X11, K4, X15 // 6272250c9a7d00
+ VFMSUB132PS Y22, Y2, K1, Y25 // 62226d299ace
+ VFMSUB132PS (R8), Y2, K1, Y25 // 62426d299a08
+ VFMSUB132PS 15(DX)(BX*2), Y2, K1, Y25 // 62626d299a8c5a0f000000
+ VFMSUB132PS Z27, Z2, K3, Z21 // 62826d4b9aeb
+ VFMSUB132PS Z25, Z2, K3, Z21 // 62826d4b9ae9
+ VFMSUB132PS Z27, Z7, K3, Z21 // 6282454b9aeb
+ VFMSUB132PS Z25, Z7, K3, Z21 // 6282454b9ae9
+ VFMSUB132PS Z27, Z2, K3, Z9 // 62126d4b9acb
+ VFMSUB132PS Z25, Z2, K3, Z9 // 62126d4b9ac9
+ VFMSUB132PS Z27, Z7, K3, Z9 // 6212454b9acb
+ VFMSUB132PS Z25, Z7, K3, Z9 // 6212454b9ac9
+ VFMSUB132PS Z3, Z27, K4, Z23 // 62e225449afb
+ VFMSUB132PS Z0, Z27, K4, Z23 // 62e225449af8
+ VFMSUB132PS 7(SI)(DI*8), Z27, K4, Z23 // 62e225449abcfe07000000
+ VFMSUB132PS -15(R14), Z27, K4, Z23 // 62c225449abef1ffffff
+ VFMSUB132PS Z3, Z14, K4, Z23 // 62e20d4c9afb
+ VFMSUB132PS Z0, Z14, K4, Z23 // 62e20d4c9af8
+ VFMSUB132PS 7(SI)(DI*8), Z14, K4, Z23 // 62e20d4c9abcfe07000000
+ VFMSUB132PS -15(R14), Z14, K4, Z23 // 62c20d4c9abef1ffffff
+ VFMSUB132PS Z3, Z27, K4, Z9 // 627225449acb
+ VFMSUB132PS Z0, Z27, K4, Z9 // 627225449ac8
+ VFMSUB132PS 7(SI)(DI*8), Z27, K4, Z9 // 627225449a8cfe07000000
+ VFMSUB132PS -15(R14), Z27, K4, Z9 // 625225449a8ef1ffffff
+ VFMSUB132PS Z3, Z14, K4, Z9 // 62720d4c9acb
+ VFMSUB132PS Z0, Z14, K4, Z9 // 62720d4c9ac8
+ VFMSUB132PS 7(SI)(DI*8), Z14, K4, Z9 // 62720d4c9a8cfe07000000
+ VFMSUB132PS -15(R14), Z14, K4, Z9 // 62520d4c9a8ef1ffffff
+ VFMSUB132SD X2, X13, K5, X19 // 62e2950d9bda
+ VFMSUB132SD X0, X0, K7, X14 // 6272fd0f9bf0 or 6272fd2f9bf0 or 6272fd4f9bf0
+ VFMSUB132SD 7(AX), X0, K7, X14 // 6272fd0f9bb007000000 or 6272fd2f9bb007000000 or 6272fd4f9bb007000000
+ VFMSUB132SD (DI), X0, K7, X14 // 6272fd0f9b37 or 6272fd2f9b37 or 6272fd4f9b37
+ VFMSUB132SS X17, X11, K7, X25 // 6222250f9bc9
+ VFMSUB132SS X9, X11, K6, X18 // 62c2250e9bd1 or 62c2252e9bd1 or 62c2254e9bd1
+ VFMSUB132SS 7(AX)(CX*4), X11, K6, X18 // 62e2250e9b948807000000 or 62e2252e9b948807000000 or 62e2254e9b948807000000
+ VFMSUB132SS 7(AX)(CX*1), X11, K6, X18 // 62e2250e9b940807000000 or 62e2252e9b940807000000 or 62e2254e9b940807000000
+ VFMSUB213PD X2, X24, K3, X2 // 62f2bd03aad2
+ VFMSUB213PD 15(R8)(R14*8), X24, K3, X2 // 6292bd03aa94f00f000000
+ VFMSUB213PD -15(R14)(R15*2), X24, K3, X2 // 6292bd03aa947ef1ffffff
+ VFMSUB213PD Y9, Y8, K7, Y27 // 6242bd2faad9
+ VFMSUB213PD 17(SP)(BP*1), Y8, K7, Y27 // 6262bd2faa9c2c11000000
+ VFMSUB213PD -7(CX)(DX*8), Y8, K7, Y27 // 6262bd2faa9cd1f9ffffff
+ VFMSUB213PD Z22, Z8, K4, Z14 // 6232bd4caaf6
+ VFMSUB213PD Z25, Z8, K4, Z14 // 6212bd4caaf1
+ VFMSUB213PD Z22, Z24, K4, Z14 // 6232bd44aaf6
+ VFMSUB213PD Z25, Z24, K4, Z14 // 6212bd44aaf1
+ VFMSUB213PD Z22, Z8, K4, Z7 // 62b2bd4caafe
+ VFMSUB213PD Z25, Z8, K4, Z7 // 6292bd4caaf9
+ VFMSUB213PD Z22, Z24, K4, Z7 // 62b2bd44aafe
+ VFMSUB213PD Z25, Z24, K4, Z7 // 6292bd44aaf9
+ VFMSUB213PD Z0, Z6, K4, Z1 // 62f2cd4caac8
+ VFMSUB213PD Z8, Z6, K4, Z1 // 62d2cd4caac8
+ VFMSUB213PD 7(SI)(DI*1), Z6, K4, Z1 // 62f2cd4caa8c3e07000000
+ VFMSUB213PD 15(DX)(BX*8), Z6, K4, Z1 // 62f2cd4caa8cda0f000000
+ VFMSUB213PD Z0, Z2, K4, Z1 // 62f2ed4caac8
+ VFMSUB213PD Z8, Z2, K4, Z1 // 62d2ed4caac8
+ VFMSUB213PD 7(SI)(DI*1), Z2, K4, Z1 // 62f2ed4caa8c3e07000000
+ VFMSUB213PD 15(DX)(BX*8), Z2, K4, Z1 // 62f2ed4caa8cda0f000000
+ VFMSUB213PD Z0, Z6, K4, Z16 // 62e2cd4caac0
+ VFMSUB213PD Z8, Z6, K4, Z16 // 62c2cd4caac0
+ VFMSUB213PD 7(SI)(DI*1), Z6, K4, Z16 // 62e2cd4caa843e07000000
+ VFMSUB213PD 15(DX)(BX*8), Z6, K4, Z16 // 62e2cd4caa84da0f000000
+ VFMSUB213PD Z0, Z2, K4, Z16 // 62e2ed4caac0
+ VFMSUB213PD Z8, Z2, K4, Z16 // 62c2ed4caac0
+ VFMSUB213PD 7(SI)(DI*1), Z2, K4, Z16 // 62e2ed4caa843e07000000
+ VFMSUB213PD 15(DX)(BX*8), Z2, K4, Z16 // 62e2ed4caa84da0f000000
+ VFMSUB213PS X26, X27, K7, X2 // 62922507aad2
+ VFMSUB213PS -15(R14)(R15*1), X27, K7, X2 // 62922507aa943ef1ffffff
+ VFMSUB213PS -15(BX), X27, K7, X2 // 62f22507aa93f1ffffff
+ VFMSUB213PS Y14, Y9, K2, Y22 // 62c2352aaaf6
+ VFMSUB213PS -17(BP)(SI*2), Y9, K2, Y22 // 62e2352aaab475efffffff
+ VFMSUB213PS 7(AX)(CX*2), Y9, K2, Y22 // 62e2352aaab44807000000
+ VFMSUB213PS Z11, Z14, K5, Z15 // 62520d4daafb
+ VFMSUB213PS Z5, Z14, K5, Z15 // 62720d4daafd
+ VFMSUB213PS Z11, Z27, K5, Z15 // 62522545aafb
+ VFMSUB213PS Z5, Z27, K5, Z15 // 62722545aafd
+ VFMSUB213PS Z11, Z14, K5, Z12 // 62520d4daae3
+ VFMSUB213PS Z5, Z14, K5, Z12 // 62720d4daae5
+ VFMSUB213PS Z11, Z27, K5, Z12 // 62522545aae3
+ VFMSUB213PS Z5, Z27, K5, Z12 // 62722545aae5
+ VFMSUB213PS Z2, Z5, K3, Z13 // 6272554baaea
+ VFMSUB213PS -7(DI)(R8*1), Z5, K3, Z13 // 6232554baaac07f9ffffff
+ VFMSUB213PS (SP), Z5, K3, Z13 // 6272554baa2c24
+ VFMSUB213PS Z2, Z23, K3, Z13 // 62724543aaea
+ VFMSUB213PS -7(DI)(R8*1), Z23, K3, Z13 // 62324543aaac07f9ffffff
+ VFMSUB213PS (SP), Z23, K3, Z13 // 62724543aa2c24
+ VFMSUB213PS Z2, Z5, K3, Z14 // 6272554baaf2
+ VFMSUB213PS -7(DI)(R8*1), Z5, K3, Z14 // 6232554baab407f9ffffff
+ VFMSUB213PS (SP), Z5, K3, Z14 // 6272554baa3424
+ VFMSUB213PS Z2, Z23, K3, Z14 // 62724543aaf2
+ VFMSUB213PS -7(DI)(R8*1), Z23, K3, Z14 // 62324543aab407f9ffffff
+ VFMSUB213PS (SP), Z23, K3, Z14 // 62724543aa3424
+ VFMSUB213SD X3, X30, K4, X22 // 62e28d04abf3
+ VFMSUB213SD X30, X15, K2, X11 // 6212850aabde or 6212852aabde or 6212854aabde
+ VFMSUB213SD 99(R15)(R15*1), X15, K2, X11 // 6212850aab9c3f63000000 or 6212852aab9c3f63000000 or 6212854aab9c3f63000000
+ VFMSUB213SD (DX), X15, K2, X11 // 6272850aab1a or 6272852aab1a or 6272854aab1a
+ VFMSUB213SS X12, X6, K2, X13 // 62524d0aabec
+ VFMSUB213SS X8, X30, K3, X23 // 62c20d03abf8 or 62c20d23abf8 or 62c20d43abf8
+ VFMSUB213SS (SI), X30, K3, X23 // 62e20d03ab3e or 62e20d23ab3e or 62e20d43ab3e
+ VFMSUB213SS 7(SI)(DI*2), X30, K3, X23 // 62e20d03abbc7e07000000 or 62e20d23abbc7e07000000 or 62e20d43abbc7e07000000
+ VFMSUB231PD X9, X2, K3, X20 // 62c2ed0bbae1
+ VFMSUB231PD 7(AX)(CX*4), X2, K3, X20 // 62e2ed0bbaa48807000000
+ VFMSUB231PD 7(AX)(CX*1), X2, K3, X20 // 62e2ed0bbaa40807000000
+ VFMSUB231PD Y1, Y6, K3, Y1 // 62f2cd2bbac9
+ VFMSUB231PD 15(R8)(R14*1), Y6, K3, Y1 // 6292cd2bba8c300f000000
+ VFMSUB231PD 15(R8)(R14*2), Y6, K3, Y1 // 6292cd2bba8c700f000000
+ VFMSUB231PD Z28, Z26, K2, Z6 // 6292ad42baf4
+ VFMSUB231PD Z6, Z26, K2, Z6 // 62f2ad42baf6
+ VFMSUB231PD Z28, Z14, K2, Z6 // 62928d4abaf4
+ VFMSUB231PD Z6, Z14, K2, Z6 // 62f28d4abaf6
+ VFMSUB231PD Z28, Z26, K2, Z14 // 6212ad42baf4
+ VFMSUB231PD Z6, Z26, K2, Z14 // 6272ad42baf6
+ VFMSUB231PD Z28, Z14, K2, Z14 // 62128d4abaf4
+ VFMSUB231PD Z6, Z14, K2, Z14 // 62728d4abaf6
+ VFMSUB231PD Z3, Z26, K1, Z13 // 6272ad41baeb
+ VFMSUB231PD Z0, Z26, K1, Z13 // 6272ad41bae8
+ VFMSUB231PD -7(CX), Z26, K1, Z13 // 6272ad41baa9f9ffffff
+ VFMSUB231PD 15(DX)(BX*4), Z26, K1, Z13 // 6272ad41baac9a0f000000
+ VFMSUB231PD Z3, Z3, K1, Z13 // 6272e549baeb
+ VFMSUB231PD Z0, Z3, K1, Z13 // 6272e549bae8
+ VFMSUB231PD -7(CX), Z3, K1, Z13 // 6272e549baa9f9ffffff
+ VFMSUB231PD 15(DX)(BX*4), Z3, K1, Z13 // 6272e549baac9a0f000000
+ VFMSUB231PD Z3, Z26, K1, Z21 // 62e2ad41baeb
+ VFMSUB231PD Z0, Z26, K1, Z21 // 62e2ad41bae8
+ VFMSUB231PD -7(CX), Z26, K1, Z21 // 62e2ad41baa9f9ffffff
+ VFMSUB231PD 15(DX)(BX*4), Z26, K1, Z21 // 62e2ad41baac9a0f000000
+ VFMSUB231PD Z3, Z3, K1, Z21 // 62e2e549baeb
+ VFMSUB231PD Z0, Z3, K1, Z21 // 62e2e549bae8
+ VFMSUB231PD -7(CX), Z3, K1, Z21 // 62e2e549baa9f9ffffff
+ VFMSUB231PD 15(DX)(BX*4), Z3, K1, Z21 // 62e2e549baac9a0f000000
+ VFMSUB231PS X0, X19, K2, X26 // 62626502bad0
+ VFMSUB231PS (SI), X19, K2, X26 // 62626502ba16
+ VFMSUB231PS 7(SI)(DI*2), X19, K2, X26 // 62626502ba947e07000000
+ VFMSUB231PS Y19, Y0, K1, Y9 // 62327d29bacb
+ VFMSUB231PS (R14), Y0, K1, Y9 // 62527d29ba0e
+ VFMSUB231PS -7(DI)(R8*8), Y0, K1, Y9 // 62327d29ba8cc7f9ffffff
+ VFMSUB231PS Z3, Z11, K7, Z21 // 62e2254fbaeb
+ VFMSUB231PS Z12, Z11, K7, Z21 // 62c2254fbaec
+ VFMSUB231PS Z3, Z25, K7, Z21 // 62e23547baeb
+ VFMSUB231PS Z12, Z25, K7, Z21 // 62c23547baec
+ VFMSUB231PS Z3, Z11, K7, Z13 // 6272254fbaeb
+ VFMSUB231PS Z12, Z11, K7, Z13 // 6252254fbaec
+ VFMSUB231PS Z3, Z25, K7, Z13 // 62723547baeb
+ VFMSUB231PS Z12, Z25, K7, Z13 // 62523547baec
+ VFMSUB231PS Z23, Z23, K1, Z27 // 62224541badf
+ VFMSUB231PS Z6, Z23, K1, Z27 // 62624541bade
+ VFMSUB231PS 99(R15)(R15*8), Z23, K1, Z27 // 62024541ba9cff63000000
+ VFMSUB231PS 7(AX)(CX*8), Z23, K1, Z27 // 62624541ba9cc807000000
+ VFMSUB231PS Z23, Z5, K1, Z27 // 62225549badf
+ VFMSUB231PS Z6, Z5, K1, Z27 // 62625549bade
+ VFMSUB231PS 99(R15)(R15*8), Z5, K1, Z27 // 62025549ba9cff63000000
+ VFMSUB231PS 7(AX)(CX*8), Z5, K1, Z27 // 62625549ba9cc807000000
+ VFMSUB231PS Z23, Z23, K1, Z15 // 62324541baff
+ VFMSUB231PS Z6, Z23, K1, Z15 // 62724541bafe
+ VFMSUB231PS 99(R15)(R15*8), Z23, K1, Z15 // 62124541babcff63000000
+ VFMSUB231PS 7(AX)(CX*8), Z23, K1, Z15 // 62724541babcc807000000
+ VFMSUB231PS Z23, Z5, K1, Z15 // 62325549baff
+ VFMSUB231PS Z6, Z5, K1, Z15 // 62725549bafe
+ VFMSUB231PS 99(R15)(R15*8), Z5, K1, Z15 // 62125549babcff63000000
+ VFMSUB231PS 7(AX)(CX*8), Z5, K1, Z15 // 62725549babcc807000000
+ VFMSUB231SD X7, X16, K1, X31 // 6262fd01bbff
+ VFMSUB231SD X0, X1, K1, X8 // 6272f509bbc0 or 6272f529bbc0 or 6272f549bbc0
+ VFMSUB231SD -17(BP)(SI*8), X1, K1, X8 // 6272f509bb84f5efffffff or 6272f529bb84f5efffffff or 6272f549bb84f5efffffff
+ VFMSUB231SD (R15), X1, K1, X8 // 6252f509bb07 or 6252f529bb07 or 6252f549bb07
+ VFMSUB231SS X16, X0, K7, X15 // 62327d0fbbf8
+ VFMSUB231SS X28, X0, K2, X21 // 62827d0abbec or 62827d2abbec or 62827d4abbec
+ VFMSUB231SS 17(SP)(BP*8), X0, K2, X21 // 62e27d0abbacec11000000 or 62e27d2abbacec11000000 or 62e27d4abbacec11000000
+ VFMSUB231SS 17(SP)(BP*4), X0, K2, X21 // 62e27d0abbacac11000000 or 62e27d2abbacac11000000 or 62e27d4abbacac11000000
+ VFMSUBADD132PD X19, X7, K4, X22 // 62a2c50c97f3
+ VFMSUBADD132PD 17(SP)(BP*8), X7, K4, X22 // 62e2c50c97b4ec11000000
+ VFMSUBADD132PD 17(SP)(BP*4), X7, K4, X22 // 62e2c50c97b4ac11000000
+ VFMSUBADD132PD Y9, Y22, K1, Y31 // 6242cd2197f9
+ VFMSUBADD132PD 99(R15)(R15*4), Y22, K1, Y31 // 6202cd2197bcbf63000000
+ VFMSUBADD132PD 15(DX), Y22, K1, Y31 // 6262cd2197ba0f000000
+ VFMSUBADD132PD Z16, Z21, K3, Z8 // 6232d54397c0
+ VFMSUBADD132PD Z13, Z21, K3, Z8 // 6252d54397c5
+ VFMSUBADD132PD Z16, Z5, K3, Z8 // 6232d54b97c0
+ VFMSUBADD132PD Z13, Z5, K3, Z8 // 6252d54b97c5
+ VFMSUBADD132PD Z16, Z21, K3, Z28 // 6222d54397e0
+ VFMSUBADD132PD Z13, Z21, K3, Z28 // 6242d54397e5
+ VFMSUBADD132PD Z16, Z5, K3, Z28 // 6222d54b97e0
+ VFMSUBADD132PD Z13, Z5, K3, Z28 // 6242d54b97e5
+ VFMSUBADD132PD Z6, Z22, K4, Z12 // 6272cd4497e6
+ VFMSUBADD132PD Z8, Z22, K4, Z12 // 6252cd4497e0
+ VFMSUBADD132PD (AX), Z22, K4, Z12 // 6272cd449720
+ VFMSUBADD132PD 7(SI), Z22, K4, Z12 // 6272cd4497a607000000
+ VFMSUBADD132PD Z6, Z11, K4, Z12 // 6272a54c97e6
+ VFMSUBADD132PD Z8, Z11, K4, Z12 // 6252a54c97e0
+ VFMSUBADD132PD (AX), Z11, K4, Z12 // 6272a54c9720
+ VFMSUBADD132PD 7(SI), Z11, K4, Z12 // 6272a54c97a607000000
+ VFMSUBADD132PD Z6, Z22, K4, Z27 // 6262cd4497de
+ VFMSUBADD132PD Z8, Z22, K4, Z27 // 6242cd4497d8
+ VFMSUBADD132PD (AX), Z22, K4, Z27 // 6262cd449718
+ VFMSUBADD132PD 7(SI), Z22, K4, Z27 // 6262cd44979e07000000
+ VFMSUBADD132PD Z6, Z11, K4, Z27 // 6262a54c97de
+ VFMSUBADD132PD Z8, Z11, K4, Z27 // 6242a54c97d8
+ VFMSUBADD132PD (AX), Z11, K4, Z27 // 6262a54c9718
+ VFMSUBADD132PD 7(SI), Z11, K4, Z27 // 6262a54c979e07000000
+ VFMSUBADD132PS X31, X16, K5, X7 // 62927d0597ff
+ VFMSUBADD132PS 7(SI)(DI*4), X16, K5, X7 // 62f27d0597bcbe07000000
+ VFMSUBADD132PS -7(DI)(R8*2), X16, K5, X7 // 62b27d0597bc47f9ffffff
+ VFMSUBADD132PS Y5, Y31, K7, Y23 // 62e2052797fd
+ VFMSUBADD132PS (CX), Y31, K7, Y23 // 62e205279739
+ VFMSUBADD132PS 99(R15), Y31, K7, Y23 // 62c2052797bf63000000
+ VFMSUBADD132PS Z9, Z12, K7, Z25 // 62421d4f97c9
+ VFMSUBADD132PS Z12, Z12, K7, Z25 // 62421d4f97cc
+ VFMSUBADD132PS Z9, Z17, K7, Z25 // 6242754797c9
+ VFMSUBADD132PS Z12, Z17, K7, Z25 // 6242754797cc
+ VFMSUBADD132PS Z9, Z12, K7, Z12 // 62521d4f97e1
+ VFMSUBADD132PS Z12, Z12, K7, Z12 // 62521d4f97e4
+ VFMSUBADD132PS Z9, Z17, K7, Z12 // 6252754797e1
+ VFMSUBADD132PS Z12, Z17, K7, Z12 // 6252754797e4
+ VFMSUBADD132PS Z8, Z3, K6, Z6 // 62d2654e97f0
+ VFMSUBADD132PS Z2, Z3, K6, Z6 // 62f2654e97f2
+ VFMSUBADD132PS (BX), Z3, K6, Z6 // 62f2654e9733
+ VFMSUBADD132PS -17(BP)(SI*1), Z3, K6, Z6 // 62f2654e97b435efffffff
+ VFMSUBADD132PS Z8, Z21, K6, Z6 // 62d2554697f0
+ VFMSUBADD132PS Z2, Z21, K6, Z6 // 62f2554697f2
+ VFMSUBADD132PS (BX), Z21, K6, Z6 // 62f255469733
+ VFMSUBADD132PS -17(BP)(SI*1), Z21, K6, Z6 // 62f2554697b435efffffff
+ VFMSUBADD132PS Z8, Z3, K6, Z25 // 6242654e97c8
+ VFMSUBADD132PS Z2, Z3, K6, Z25 // 6262654e97ca
+ VFMSUBADD132PS (BX), Z3, K6, Z25 // 6262654e970b
+ VFMSUBADD132PS -17(BP)(SI*1), Z3, K6, Z25 // 6262654e978c35efffffff
+ VFMSUBADD132PS Z8, Z21, K6, Z25 // 6242554697c8
+ VFMSUBADD132PS Z2, Z21, K6, Z25 // 6262554697ca
+ VFMSUBADD132PS (BX), Z21, K6, Z25 // 62625546970b
+ VFMSUBADD132PS -17(BP)(SI*1), Z21, K6, Z25 // 62625546978c35efffffff
+ VFMSUBADD213PD X9, X7, K3, X1 // 62d2c50ba7c9
+ VFMSUBADD213PD 17(SP), X7, K3, X1 // 62f2c50ba78c2411000000
+ VFMSUBADD213PD -17(BP)(SI*4), X7, K3, X1 // 62f2c50ba78cb5efffffff
+ VFMSUBADD213PD Y19, Y5, K7, Y0 // 62b2d52fa7c3
+ VFMSUBADD213PD 99(R15)(R15*2), Y5, K7, Y0 // 6292d52fa7847f63000000
+ VFMSUBADD213PD -7(DI), Y5, K7, Y0 // 62f2d52fa787f9ffffff
+ VFMSUBADD213PD Z0, Z7, K4, Z3 // 62f2c54ca7d8
+ VFMSUBADD213PD Z6, Z7, K4, Z3 // 62f2c54ca7de
+ VFMSUBADD213PD Z0, Z9, K4, Z3 // 62f2b54ca7d8
+ VFMSUBADD213PD Z6, Z9, K4, Z3 // 62f2b54ca7de
+ VFMSUBADD213PD Z0, Z7, K4, Z27 // 6262c54ca7d8
+ VFMSUBADD213PD Z6, Z7, K4, Z27 // 6262c54ca7de
+ VFMSUBADD213PD Z0, Z9, K4, Z27 // 6262b54ca7d8
+ VFMSUBADD213PD Z6, Z9, K4, Z27 // 6262b54ca7de
+ VFMSUBADD213PD Z9, Z3, K4, Z20 // 62c2e54ca7e1
+ VFMSUBADD213PD Z19, Z3, K4, Z20 // 62a2e54ca7e3
+ VFMSUBADD213PD 15(R8)(R14*4), Z3, K4, Z20 // 6282e54ca7a4b00f000000
+ VFMSUBADD213PD -7(CX)(DX*4), Z3, K4, Z20 // 62e2e54ca7a491f9ffffff
+ VFMSUBADD213PD Z9, Z30, K4, Z20 // 62c28d44a7e1
+ VFMSUBADD213PD Z19, Z30, K4, Z20 // 62a28d44a7e3
+ VFMSUBADD213PD 15(R8)(R14*4), Z30, K4, Z20 // 62828d44a7a4b00f000000
+ VFMSUBADD213PD -7(CX)(DX*4), Z30, K4, Z20 // 62e28d44a7a491f9ffffff
+ VFMSUBADD213PD Z9, Z3, K4, Z28 // 6242e54ca7e1
+ VFMSUBADD213PD Z19, Z3, K4, Z28 // 6222e54ca7e3
+ VFMSUBADD213PD 15(R8)(R14*4), Z3, K4, Z28 // 6202e54ca7a4b00f000000
+ VFMSUBADD213PD -7(CX)(DX*4), Z3, K4, Z28 // 6262e54ca7a491f9ffffff
+ VFMSUBADD213PD Z9, Z30, K4, Z28 // 62428d44a7e1
+ VFMSUBADD213PD Z19, Z30, K4, Z28 // 62228d44a7e3
+ VFMSUBADD213PD 15(R8)(R14*4), Z30, K4, Z28 // 62028d44a7a4b00f000000
+ VFMSUBADD213PD -7(CX)(DX*4), Z30, K4, Z28 // 62628d44a7a491f9ffffff
+ VFMSUBADD213PS X0, X12, K7, X15 // 62721d0fa7f8
+ VFMSUBADD213PS 7(AX), X12, K7, X15 // 62721d0fa7b807000000
+ VFMSUBADD213PS (DI), X12, K7, X15 // 62721d0fa73f
+ VFMSUBADD213PS Y2, Y28, K2, Y31 // 62621d22a7fa
+ VFMSUBADD213PS -7(CX)(DX*1), Y28, K2, Y31 // 62621d22a7bc11f9ffffff
+ VFMSUBADD213PS -15(R14)(R15*4), Y28, K2, Y31 // 62021d22a7bcbef1ffffff
+ VFMSUBADD213PS Z18, Z11, K5, Z12 // 6232254da7e2
+ VFMSUBADD213PS Z24, Z11, K5, Z12 // 6212254da7e0
+ VFMSUBADD213PS Z18, Z5, K5, Z12 // 6232554da7e2
+ VFMSUBADD213PS Z24, Z5, K5, Z12 // 6212554da7e0
+ VFMSUBADD213PS Z18, Z11, K5, Z22 // 62a2254da7f2
+ VFMSUBADD213PS Z24, Z11, K5, Z22 // 6282254da7f0
+ VFMSUBADD213PS Z18, Z5, K5, Z22 // 62a2554da7f2
+ VFMSUBADD213PS Z24, Z5, K5, Z22 // 6282554da7f0
+ VFMSUBADD213PS Z6, Z7, K3, Z2 // 62f2454ba7d6
+ VFMSUBADD213PS Z16, Z7, K3, Z2 // 62b2454ba7d0
+ VFMSUBADD213PS (R8), Z7, K3, Z2 // 62d2454ba710
+ VFMSUBADD213PS 15(DX)(BX*2), Z7, K3, Z2 // 62f2454ba7945a0f000000
+ VFMSUBADD213PS Z6, Z13, K3, Z2 // 62f2154ba7d6
+ VFMSUBADD213PS Z16, Z13, K3, Z2 // 62b2154ba7d0
+ VFMSUBADD213PS (R8), Z13, K3, Z2 // 62d2154ba710
+ VFMSUBADD213PS 15(DX)(BX*2), Z13, K3, Z2 // 62f2154ba7945a0f000000
+ VFMSUBADD213PS Z6, Z7, K3, Z21 // 62e2454ba7ee
+ VFMSUBADD213PS Z16, Z7, K3, Z21 // 62a2454ba7e8
+ VFMSUBADD213PS (R8), Z7, K3, Z21 // 62c2454ba728
+ VFMSUBADD213PS 15(DX)(BX*2), Z7, K3, Z21 // 62e2454ba7ac5a0f000000
+ VFMSUBADD213PS Z6, Z13, K3, Z21 // 62e2154ba7ee
+ VFMSUBADD213PS Z16, Z13, K3, Z21 // 62a2154ba7e8
+ VFMSUBADD213PS (R8), Z13, K3, Z21 // 62c2154ba728
+ VFMSUBADD213PS 15(DX)(BX*2), Z13, K3, Z21 // 62e2154ba7ac5a0f000000
+ VFMSUBADD231PD X5, X14, K4, X12 // 62728d0cb7e5
+ VFMSUBADD231PD 99(R15)(R15*1), X14, K4, X12 // 62128d0cb7a43f63000000
+ VFMSUBADD231PD (DX), X14, K4, X12 // 62728d0cb722
+ VFMSUBADD231PD Y0, Y27, K2, Y24 // 6262a522b7c0
+ VFMSUBADD231PD 15(DX)(BX*1), Y27, K2, Y24 // 6262a522b7841a0f000000
+ VFMSUBADD231PD -7(CX)(DX*2), Y27, K2, Y24 // 6262a522b78451f9ffffff
+ VFMSUBADD231PD Z13, Z1, K2, Z6 // 62d2f54ab7f5
+ VFMSUBADD231PD Z13, Z15, K2, Z6 // 62d2854ab7f5
+ VFMSUBADD231PD Z13, Z1, K2, Z22 // 62c2f54ab7f5
+ VFMSUBADD231PD Z13, Z15, K2, Z22 // 62c2854ab7f5
+ VFMSUBADD231PD Z2, Z22, K3, Z18 // 62e2cd43b7d2
+ VFMSUBADD231PD Z31, Z22, K3, Z18 // 6282cd43b7d7
+ VFMSUBADD231PD 17(SP)(BP*1), Z22, K3, Z18 // 62e2cd43b7942c11000000
+ VFMSUBADD231PD -7(CX)(DX*8), Z22, K3, Z18 // 62e2cd43b794d1f9ffffff
+ VFMSUBADD231PD Z2, Z7, K3, Z18 // 62e2c54bb7d2
+ VFMSUBADD231PD Z31, Z7, K3, Z18 // 6282c54bb7d7
+ VFMSUBADD231PD 17(SP)(BP*1), Z7, K3, Z18 // 62e2c54bb7942c11000000
+ VFMSUBADD231PD -7(CX)(DX*8), Z7, K3, Z18 // 62e2c54bb794d1f9ffffff
+ VFMSUBADD231PD Z2, Z22, K3, Z8 // 6272cd43b7c2
+ VFMSUBADD231PD Z31, Z22, K3, Z8 // 6212cd43b7c7
+ VFMSUBADD231PD 17(SP)(BP*1), Z22, K3, Z8 // 6272cd43b7842c11000000
+ VFMSUBADD231PD -7(CX)(DX*8), Z22, K3, Z8 // 6272cd43b784d1f9ffffff
+ VFMSUBADD231PD Z2, Z7, K3, Z8 // 6272c54bb7c2
+ VFMSUBADD231PD Z31, Z7, K3, Z8 // 6212c54bb7c7
+ VFMSUBADD231PD 17(SP)(BP*1), Z7, K3, Z8 // 6272c54bb7842c11000000
+ VFMSUBADD231PD -7(CX)(DX*8), Z7, K3, Z8 // 6272c54bb784d1f9ffffff
+ VFMSUBADD231PS X8, X15, K3, X17 // 62c2050bb7c8
+ VFMSUBADD231PS -17(BP)(SI*8), X15, K3, X17 // 62e2050bb78cf5efffffff
+ VFMSUBADD231PS (R15), X15, K3, X17 // 62c2050bb70f
+ VFMSUBADD231PS Y3, Y31, K3, Y11 // 62720523b7db
+ VFMSUBADD231PS -17(BP), Y31, K3, Y11 // 62720523b79defffffff
+ VFMSUBADD231PS -15(R14)(R15*8), Y31, K3, Y11 // 62120523b79cfef1ffffff
+ VFMSUBADD231PS Z12, Z1, K2, Z20 // 62c2754ab7e4
+ VFMSUBADD231PS Z16, Z1, K2, Z20 // 62a2754ab7e0
+ VFMSUBADD231PS Z12, Z3, K2, Z20 // 62c2654ab7e4
+ VFMSUBADD231PS Z16, Z3, K2, Z20 // 62a2654ab7e0
+ VFMSUBADD231PS Z12, Z1, K2, Z9 // 6252754ab7cc
+ VFMSUBADD231PS Z16, Z1, K2, Z9 // 6232754ab7c8
+ VFMSUBADD231PS Z12, Z3, K2, Z9 // 6252654ab7cc
+ VFMSUBADD231PS Z16, Z3, K2, Z9 // 6232654ab7c8
+ VFMSUBADD231PS Z3, Z14, K1, Z28 // 62620d49b7e3
+ VFMSUBADD231PS Z12, Z14, K1, Z28 // 62420d49b7e4
+ VFMSUBADD231PS -17(BP)(SI*2), Z14, K1, Z28 // 62620d49b7a475efffffff
+ VFMSUBADD231PS 7(AX)(CX*2), Z14, K1, Z28 // 62620d49b7a44807000000
+ VFMSUBADD231PS Z3, Z28, K1, Z28 // 62621d41b7e3
+ VFMSUBADD231PS Z12, Z28, K1, Z28 // 62421d41b7e4
+ VFMSUBADD231PS -17(BP)(SI*2), Z28, K1, Z28 // 62621d41b7a475efffffff
+ VFMSUBADD231PS 7(AX)(CX*2), Z28, K1, Z28 // 62621d41b7a44807000000
+ VFMSUBADD231PS Z3, Z14, K1, Z13 // 62720d49b7eb
+ VFMSUBADD231PS Z12, Z14, K1, Z13 // 62520d49b7ec
+ VFMSUBADD231PS -17(BP)(SI*2), Z14, K1, Z13 // 62720d49b7ac75efffffff
+ VFMSUBADD231PS 7(AX)(CX*2), Z14, K1, Z13 // 62720d49b7ac4807000000
+ VFMSUBADD231PS Z3, Z28, K1, Z13 // 62721d41b7eb
+ VFMSUBADD231PS Z12, Z28, K1, Z13 // 62521d41b7ec
+ VFMSUBADD231PS -17(BP)(SI*2), Z28, K1, Z13 // 62721d41b7ac75efffffff
+ VFMSUBADD231PS 7(AX)(CX*2), Z28, K1, Z13 // 62721d41b7ac4807000000
+ VFNMADD132PD X23, X26, K2, X3 // 62b2ad029cdf
+ VFNMADD132PD 7(SI)(DI*8), X26, K2, X3 // 62f2ad029c9cfe07000000
+ VFNMADD132PD -15(R14), X26, K2, X3 // 62d2ad029c9ef1ffffff
+ VFNMADD132PD Y13, Y2, K1, Y14 // 6252ed299cf5
+ VFNMADD132PD 17(SP)(BP*2), Y2, K1, Y14 // 6272ed299cb46c11000000
+ VFNMADD132PD -7(DI)(R8*4), Y2, K1, Y14 // 6232ed299cb487f9ffffff
+ VFNMADD132PD Z5, Z19, K7, Z15 // 6272e5479cfd
+ VFNMADD132PD Z1, Z19, K7, Z15 // 6272e5479cf9
+ VFNMADD132PD Z5, Z15, K7, Z15 // 6272854f9cfd
+ VFNMADD132PD Z1, Z15, K7, Z15 // 6272854f9cf9
+ VFNMADD132PD Z5, Z19, K7, Z30 // 6262e5479cf5
+ VFNMADD132PD Z1, Z19, K7, Z30 // 6262e5479cf1
+ VFNMADD132PD Z5, Z15, K7, Z30 // 6262854f9cf5
+ VFNMADD132PD Z1, Z15, K7, Z30 // 6262854f9cf1
+ VFNMADD132PD Z21, Z14, K1, Z3 // 62b28d499cdd
+ VFNMADD132PD Z8, Z14, K1, Z3 // 62d28d499cd8
+ VFNMADD132PD 15(R8)(R14*1), Z14, K1, Z3 // 62928d499c9c300f000000
+ VFNMADD132PD 15(R8)(R14*2), Z14, K1, Z3 // 62928d499c9c700f000000
+ VFNMADD132PD Z21, Z15, K1, Z3 // 62b285499cdd
+ VFNMADD132PD Z8, Z15, K1, Z3 // 62d285499cd8
+ VFNMADD132PD 15(R8)(R14*1), Z15, K1, Z3 // 629285499c9c300f000000
+ VFNMADD132PD 15(R8)(R14*2), Z15, K1, Z3 // 629285499c9c700f000000
+ VFNMADD132PD Z21, Z14, K1, Z5 // 62b28d499ced
+ VFNMADD132PD Z8, Z14, K1, Z5 // 62d28d499ce8
+ VFNMADD132PD 15(R8)(R14*1), Z14, K1, Z5 // 62928d499cac300f000000
+ VFNMADD132PD 15(R8)(R14*2), Z14, K1, Z5 // 62928d499cac700f000000
+ VFNMADD132PD Z21, Z15, K1, Z5 // 62b285499ced
+ VFNMADD132PD Z8, Z15, K1, Z5 // 62d285499ce8
+ VFNMADD132PD 15(R8)(R14*1), Z15, K1, Z5 // 629285499cac300f000000
+ VFNMADD132PD 15(R8)(R14*2), Z15, K1, Z5 // 629285499cac700f000000
+ VFNMADD132PS X24, X28, K1, X13 // 62121d019ce8
+ VFNMADD132PS 7(SI)(DI*1), X28, K1, X13 // 62721d019cac3e07000000
+ VFNMADD132PS 15(DX)(BX*8), X28, K1, X13 // 62721d019cacda0f000000
+ VFNMADD132PS Y22, Y15, K1, Y27 // 622205299cde
+ VFNMADD132PS 15(R8), Y15, K1, Y27 // 624205299c980f000000
+ VFNMADD132PS (BP), Y15, K1, Y27 // 626205299c5d00
+ VFNMADD132PS Z23, Z20, K7, Z16 // 62a25d479cc7
+ VFNMADD132PS Z19, Z20, K7, Z16 // 62a25d479cc3
+ VFNMADD132PS Z23, Z0, K7, Z16 // 62a27d4f9cc7
+ VFNMADD132PS Z19, Z0, K7, Z16 // 62a27d4f9cc3
+ VFNMADD132PS Z23, Z20, K7, Z9 // 62325d479ccf
+ VFNMADD132PS Z19, Z20, K7, Z9 // 62325d479ccb
+ VFNMADD132PS Z23, Z0, K7, Z9 // 62327d4f9ccf
+ VFNMADD132PS Z19, Z0, K7, Z9 // 62327d4f9ccb
+ VFNMADD132PS Z24, Z0, K2, Z0 // 62927d4a9cc0
+ VFNMADD132PS Z12, Z0, K2, Z0 // 62d27d4a9cc4
+ VFNMADD132PS (R14), Z0, K2, Z0 // 62d27d4a9c06
+ VFNMADD132PS -7(DI)(R8*8), Z0, K2, Z0 // 62b27d4a9c84c7f9ffffff
+ VFNMADD132PS Z24, Z25, K2, Z0 // 629235429cc0
+ VFNMADD132PS Z12, Z25, K2, Z0 // 62d235429cc4
+ VFNMADD132PS (R14), Z25, K2, Z0 // 62d235429c06
+ VFNMADD132PS -7(DI)(R8*8), Z25, K2, Z0 // 62b235429c84c7f9ffffff
+ VFNMADD132PS Z24, Z0, K2, Z11 // 62127d4a9cd8
+ VFNMADD132PS Z12, Z0, K2, Z11 // 62527d4a9cdc
+ VFNMADD132PS (R14), Z0, K2, Z11 // 62527d4a9c1e
+ VFNMADD132PS -7(DI)(R8*8), Z0, K2, Z11 // 62327d4a9c9cc7f9ffffff
+ VFNMADD132PS Z24, Z25, K2, Z11 // 621235429cd8
+ VFNMADD132PS Z12, Z25, K2, Z11 // 625235429cdc
+ VFNMADD132PS (R14), Z25, K2, Z11 // 625235429c1e
+ VFNMADD132PS -7(DI)(R8*8), Z25, K2, Z11 // 623235429c9cc7f9ffffff
+ VFNMADD132SD X26, X15, K4, X9 // 6212850c9dca
+ VFNMADD132SD X1, X21, K1, X18 // 62e2d5019dd1 or 62e2d5219dd1 or 62e2d5419dd1
+ VFNMADD132SD 7(SI)(DI*8), X21, K1, X18 // 62e2d5019d94fe07000000 or 62e2d5219d94fe07000000 or 62e2d5419d94fe07000000
+ VFNMADD132SD -15(R14), X21, K1, X18 // 62c2d5019d96f1ffffff or 62c2d5219d96f1ffffff or 62c2d5419d96f1ffffff
+ VFNMADD132SS X3, X31, K3, X11 // 627205039ddb
+ VFNMADD132SS X0, X0, K4, X7 // 62f27d0c9df8 or 62f27d2c9df8 or 62f27d4c9df8
+ VFNMADD132SS 7(SI)(DI*4), X0, K4, X7 // 62f27d0c9dbcbe07000000 or 62f27d2c9dbcbe07000000 or 62f27d4c9dbcbe07000000
+ VFNMADD132SS -7(DI)(R8*2), X0, K4, X7 // 62b27d0c9dbc47f9ffffff or 62b27d2c9dbc47f9ffffff or 62b27d4c9dbc47f9ffffff
+ VFNMADD213PD X7, X20, K5, X24 // 6262dd05acc7
+ VFNMADD213PD -7(DI)(R8*1), X20, K5, X24 // 6222dd05ac8407f9ffffff
+ VFNMADD213PD (SP), X20, K5, X24 // 6262dd05ac0424
+ VFNMADD213PD Y24, Y18, K7, Y20 // 6282ed27ace0
+ VFNMADD213PD 15(R8)(R14*8), Y18, K7, Y20 // 6282ed27aca4f00f000000
+ VFNMADD213PD -15(R14)(R15*2), Y18, K7, Y20 // 6282ed27aca47ef1ffffff
+ VFNMADD213PD Z9, Z9, K7, Z0 // 62d2b54facc1
+ VFNMADD213PD Z25, Z9, K7, Z0 // 6292b54facc1
+ VFNMADD213PD Z9, Z3, K7, Z0 // 62d2e54facc1
+ VFNMADD213PD Z25, Z3, K7, Z0 // 6292e54facc1
+ VFNMADD213PD Z9, Z9, K7, Z26 // 6242b54facd1
+ VFNMADD213PD Z25, Z9, K7, Z26 // 6202b54facd1
+ VFNMADD213PD Z9, Z3, K7, Z26 // 6242e54facd1
+ VFNMADD213PD Z25, Z3, K7, Z26 // 6202e54facd1
+ VFNMADD213PD Z17, Z20, K6, Z9 // 6232dd46acc9
+ VFNMADD213PD Z0, Z20, K6, Z9 // 6272dd46acc8
+ VFNMADD213PD 99(R15)(R15*4), Z20, K6, Z9 // 6212dd46ac8cbf63000000
+ VFNMADD213PD 15(DX), Z20, K6, Z9 // 6272dd46ac8a0f000000
+ VFNMADD213PD Z17, Z0, K6, Z9 // 6232fd4eacc9
+ VFNMADD213PD Z0, Z0, K6, Z9 // 6272fd4eacc8
+ VFNMADD213PD 99(R15)(R15*4), Z0, K6, Z9 // 6212fd4eac8cbf63000000
+ VFNMADD213PD 15(DX), Z0, K6, Z9 // 6272fd4eac8a0f000000
+ VFNMADD213PD Z17, Z20, K6, Z28 // 6222dd46ace1
+ VFNMADD213PD Z0, Z20, K6, Z28 // 6262dd46ace0
+ VFNMADD213PD 99(R15)(R15*4), Z20, K6, Z28 // 6202dd46aca4bf63000000
+ VFNMADD213PD 15(DX), Z20, K6, Z28 // 6262dd46aca20f000000
+ VFNMADD213PD Z17, Z0, K6, Z28 // 6222fd4eace1
+ VFNMADD213PD Z0, Z0, K6, Z28 // 6262fd4eace0
+ VFNMADD213PD 99(R15)(R15*4), Z0, K6, Z28 // 6202fd4eaca4bf63000000
+ VFNMADD213PD 15(DX), Z0, K6, Z28 // 6262fd4eaca20f000000
+ VFNMADD213PS X14, X7, K3, X9 // 6252450bacce
+ VFNMADD213PS -7(CX), X7, K3, X9 // 6272450bac89f9ffffff
+ VFNMADD213PS 15(DX)(BX*4), X7, K3, X9 // 6272450bac8c9a0f000000
+ VFNMADD213PS Y19, Y3, K7, Y9 // 6232652faccb
+ VFNMADD213PS -15(R14)(R15*1), Y3, K7, Y9 // 6212652fac8c3ef1ffffff
+ VFNMADD213PS -15(BX), Y3, K7, Y9 // 6272652fac8bf1ffffff
+ VFNMADD213PS Z21, Z31, K4, Z17 // 62a20544accd
+ VFNMADD213PS Z9, Z31, K4, Z17 // 62c20544acc9
+ VFNMADD213PS Z21, Z0, K4, Z17 // 62a27d4caccd
+ VFNMADD213PS Z9, Z0, K4, Z17 // 62c27d4cacc9
+ VFNMADD213PS Z21, Z31, K4, Z23 // 62a20544acfd
+ VFNMADD213PS Z9, Z31, K4, Z23 // 62c20544acf9
+ VFNMADD213PS Z21, Z0, K4, Z23 // 62a27d4cacfd
+ VFNMADD213PS Z9, Z0, K4, Z23 // 62c27d4cacf9
+ VFNMADD213PS Z20, Z1, K4, Z6 // 62b2754cacf4
+ VFNMADD213PS Z9, Z1, K4, Z6 // 62d2754cacf1
+ VFNMADD213PS (CX), Z1, K4, Z6 // 62f2754cac31
+ VFNMADD213PS 99(R15), Z1, K4, Z6 // 62d2754cacb763000000
+ VFNMADD213PS Z20, Z9, K4, Z6 // 62b2354cacf4
+ VFNMADD213PS Z9, Z9, K4, Z6 // 62d2354cacf1
+ VFNMADD213PS (CX), Z9, K4, Z6 // 62f2354cac31
+ VFNMADD213PS 99(R15), Z9, K4, Z6 // 62d2354cacb763000000
+ VFNMADD213PS Z20, Z1, K4, Z9 // 6232754caccc
+ VFNMADD213PS Z9, Z1, K4, Z9 // 6252754cacc9
+ VFNMADD213PS (CX), Z1, K4, Z9 // 6272754cac09
+ VFNMADD213PS 99(R15), Z1, K4, Z9 // 6252754cac8f63000000
+ VFNMADD213PS Z20, Z9, K4, Z9 // 6232354caccc
+ VFNMADD213PS Z9, Z9, K4, Z9 // 6252354cacc9
+ VFNMADD213PS (CX), Z9, K4, Z9 // 6272354cac09
+ VFNMADD213PS 99(R15), Z9, K4, Z9 // 6252354cac8f63000000
+ VFNMADD213SD X3, X31, K7, X5 // 62f28507adeb
+ VFNMADD213SD X11, X1, K2, X21 // 62c2f50aadeb or 62c2f52aadeb or 62c2f54aadeb
+ VFNMADD213SD 7(SI)(DI*1), X1, K2, X21 // 62e2f50aadac3e07000000 or 62e2f52aadac3e07000000 or 62e2f54aadac3e07000000
+ VFNMADD213SD 15(DX)(BX*8), X1, K2, X21 // 62e2f50aadacda0f000000 or 62e2f52aadacda0f000000 or 62e2f54aadacda0f000000
+ VFNMADD213SS X30, X0, K5, X13 // 62127d0dadee
+ VFNMADD213SS X11, X14, K3, X16 // 62c20d0badc3 or 62c20d2badc3 or 62c20d4badc3
+ VFNMADD213SS 17(SP), X14, K3, X16 // 62e20d0bad842411000000 or 62e20d2bad842411000000 or 62e20d4bad842411000000
+ VFNMADD213SS -17(BP)(SI*4), X14, K3, X16 // 62e20d0bad84b5efffffff or 62e20d2bad84b5efffffff or 62e20d4bad84b5efffffff
+ VFNMADD231PD X8, X19, K4, X14 // 6252e504bcf0
+ VFNMADD231PD 99(R15)(R15*8), X19, K4, X14 // 6212e504bcb4ff63000000
+ VFNMADD231PD 7(AX)(CX*8), X19, K4, X14 // 6272e504bcb4c807000000
+ VFNMADD231PD Y14, Y19, K2, Y23 // 62c2e522bcfe
+ VFNMADD231PD 7(AX)(CX*4), Y19, K2, Y23 // 62e2e522bcbc8807000000
+ VFNMADD231PD 7(AX)(CX*1), Y19, K2, Y23 // 62e2e522bcbc0807000000
+ VFNMADD231PD Z7, Z26, K2, Z30 // 6262ad42bcf7
+ VFNMADD231PD Z21, Z26, K2, Z30 // 6222ad42bcf5
+ VFNMADD231PD Z7, Z22, K2, Z30 // 6262cd42bcf7
+ VFNMADD231PD Z21, Z22, K2, Z30 // 6222cd42bcf5
+ VFNMADD231PD Z7, Z26, K2, Z5 // 62f2ad42bcef
+ VFNMADD231PD Z21, Z26, K2, Z5 // 62b2ad42bced
+ VFNMADD231PD Z7, Z22, K2, Z5 // 62f2cd42bcef
+ VFNMADD231PD Z21, Z22, K2, Z5 // 62b2cd42bced
+ VFNMADD231PD Z12, Z14, K3, Z16 // 62c28d4bbcc4
+ VFNMADD231PD Z13, Z14, K3, Z16 // 62c28d4bbcc5
+ VFNMADD231PD 99(R15)(R15*2), Z14, K3, Z16 // 62828d4bbc847f63000000
+ VFNMADD231PD -7(DI), Z14, K3, Z16 // 62e28d4bbc87f9ffffff
+ VFNMADD231PD Z12, Z13, K3, Z16 // 62c2954bbcc4
+ VFNMADD231PD Z13, Z13, K3, Z16 // 62c2954bbcc5
+ VFNMADD231PD 99(R15)(R15*2), Z13, K3, Z16 // 6282954bbc847f63000000
+ VFNMADD231PD -7(DI), Z13, K3, Z16 // 62e2954bbc87f9ffffff
+ VFNMADD231PD Z12, Z14, K3, Z25 // 62428d4bbccc
+ VFNMADD231PD Z13, Z14, K3, Z25 // 62428d4bbccd
+ VFNMADD231PD 99(R15)(R15*2), Z14, K3, Z25 // 62028d4bbc8c7f63000000
+ VFNMADD231PD -7(DI), Z14, K3, Z25 // 62628d4bbc8ff9ffffff
+ VFNMADD231PD Z12, Z13, K3, Z25 // 6242954bbccc
+ VFNMADD231PD Z13, Z13, K3, Z25 // 6242954bbccd
+ VFNMADD231PD 99(R15)(R15*2), Z13, K3, Z25 // 6202954bbc8c7f63000000
+ VFNMADD231PD -7(DI), Z13, K3, Z25 // 6262954bbc8ff9ffffff
+ VFNMADD231PS X23, X26, K3, X8 // 62322d03bcc7
+ VFNMADD231PS (AX), X26, K3, X8 // 62722d03bc00
+ VFNMADD231PS 7(SI), X26, K3, X8 // 62722d03bc8607000000
+ VFNMADD231PS Y16, Y5, K3, Y21 // 62a2552bbce8
+ VFNMADD231PS (SI), Y5, K3, Y21 // 62e2552bbc2e
+ VFNMADD231PS 7(SI)(DI*2), Y5, K3, Y21 // 62e2552bbcac7e07000000
+ VFNMADD231PS Z27, Z2, K2, Z21 // 62826d4abceb
+ VFNMADD231PS Z25, Z2, K2, Z21 // 62826d4abce9
+ VFNMADD231PS Z27, Z7, K2, Z21 // 6282454abceb
+ VFNMADD231PS Z25, Z7, K2, Z21 // 6282454abce9
+ VFNMADD231PS Z27, Z2, K2, Z9 // 62126d4abccb
+ VFNMADD231PS Z25, Z2, K2, Z9 // 62126d4abcc9
+ VFNMADD231PS Z27, Z7, K2, Z9 // 6212454abccb
+ VFNMADD231PS Z25, Z7, K2, Z9 // 6212454abcc9
+ VFNMADD231PS Z3, Z27, K1, Z23 // 62e22541bcfb
+ VFNMADD231PS Z0, Z27, K1, Z23 // 62e22541bcf8
+ VFNMADD231PS -7(CX)(DX*1), Z27, K1, Z23 // 62e22541bcbc11f9ffffff
+ VFNMADD231PS -15(R14)(R15*4), Z27, K1, Z23 // 62822541bcbcbef1ffffff
+ VFNMADD231PS Z3, Z14, K1, Z23 // 62e20d49bcfb
+ VFNMADD231PS Z0, Z14, K1, Z23 // 62e20d49bcf8
+ VFNMADD231PS -7(CX)(DX*1), Z14, K1, Z23 // 62e20d49bcbc11f9ffffff
+ VFNMADD231PS -15(R14)(R15*4), Z14, K1, Z23 // 62820d49bcbcbef1ffffff
+ VFNMADD231PS Z3, Z27, K1, Z9 // 62722541bccb
+ VFNMADD231PS Z0, Z27, K1, Z9 // 62722541bcc8
+ VFNMADD231PS -7(CX)(DX*1), Z27, K1, Z9 // 62722541bc8c11f9ffffff
+ VFNMADD231PS -15(R14)(R15*4), Z27, K1, Z9 // 62122541bc8cbef1ffffff
+ VFNMADD231PS Z3, Z14, K1, Z9 // 62720d49bccb
+ VFNMADD231PS Z0, Z14, K1, Z9 // 62720d49bcc8
+ VFNMADD231PS -7(CX)(DX*1), Z14, K1, Z9 // 62720d49bc8c11f9ffffff
+ VFNMADD231PS -15(R14)(R15*4), Z14, K1, Z9 // 62120d49bc8cbef1ffffff
+ VFNMADD231SD X23, X16, K2, X12 // 6232fd02bde7
+ VFNMADD231SD X31, X11, K1, X23 // 6282a509bdff or 6282a529bdff or 6282a549bdff
+ VFNMADD231SD -7(DI)(R8*1), X11, K1, X23 // 62a2a509bdbc07f9ffffff or 62a2a529bdbc07f9ffffff or 62a2a549bdbc07f9ffffff
+ VFNMADD231SD (SP), X11, K1, X23 // 62e2a509bd3c24 or 62e2a529bd3c24 or 62e2a549bd3c24
+ VFNMADD231SS X0, X14, K7, X24 // 62620d0fbdc0
+ VFNMADD231SS X2, X23, K1, X11 // 62724501bdda or 62724521bdda or 62724541bdda
+ VFNMADD231SS 7(AX), X23, K1, X11 // 62724501bd9807000000 or 62724521bd9807000000 or 62724541bd9807000000
+ VFNMADD231SS (DI), X23, K1, X11 // 62724501bd1f or 62724521bd1f or 62724541bd1f
+ VFNMSUB132PD X25, X5, K1, X20 // 6282d5099ee1
+ VFNMSUB132PD (BX), X5, K1, X20 // 62e2d5099e23
+ VFNMSUB132PD -17(BP)(SI*1), X5, K1, X20 // 62e2d5099ea435efffffff
+ VFNMSUB132PD Y20, Y21, K1, Y2 // 62b2d5219ed4
+ VFNMSUB132PD 17(SP)(BP*8), Y21, K1, Y2 // 62f2d5219e94ec11000000
+ VFNMSUB132PD 17(SP)(BP*4), Y21, K1, Y2 // 62f2d5219e94ac11000000
+ VFNMSUB132PD Z22, Z8, K7, Z14 // 6232bd4f9ef6
+ VFNMSUB132PD Z25, Z8, K7, Z14 // 6212bd4f9ef1
+ VFNMSUB132PD Z22, Z24, K7, Z14 // 6232bd479ef6
+ VFNMSUB132PD Z25, Z24, K7, Z14 // 6212bd479ef1
+ VFNMSUB132PD Z22, Z8, K7, Z7 // 62b2bd4f9efe
+ VFNMSUB132PD Z25, Z8, K7, Z7 // 6292bd4f9ef9
+ VFNMSUB132PD Z22, Z24, K7, Z7 // 62b2bd479efe
+ VFNMSUB132PD Z25, Z24, K7, Z7 // 6292bd479ef9
+ VFNMSUB132PD Z0, Z6, K2, Z1 // 62f2cd4a9ec8
+ VFNMSUB132PD Z8, Z6, K2, Z1 // 62d2cd4a9ec8
+ VFNMSUB132PD 15(DX)(BX*1), Z6, K2, Z1 // 62f2cd4a9e8c1a0f000000
+ VFNMSUB132PD -7(CX)(DX*2), Z6, K2, Z1 // 62f2cd4a9e8c51f9ffffff
+ VFNMSUB132PD Z0, Z2, K2, Z1 // 62f2ed4a9ec8
+ VFNMSUB132PD Z8, Z2, K2, Z1 // 62d2ed4a9ec8
+ VFNMSUB132PD 15(DX)(BX*1), Z2, K2, Z1 // 62f2ed4a9e8c1a0f000000
+ VFNMSUB132PD -7(CX)(DX*2), Z2, K2, Z1 // 62f2ed4a9e8c51f9ffffff
+ VFNMSUB132PD Z0, Z6, K2, Z16 // 62e2cd4a9ec0
+ VFNMSUB132PD Z8, Z6, K2, Z16 // 62c2cd4a9ec0
+ VFNMSUB132PD 15(DX)(BX*1), Z6, K2, Z16 // 62e2cd4a9e841a0f000000
+ VFNMSUB132PD -7(CX)(DX*2), Z6, K2, Z16 // 62e2cd4a9e8451f9ffffff
+ VFNMSUB132PD Z0, Z2, K2, Z16 // 62e2ed4a9ec0
+ VFNMSUB132PD Z8, Z2, K2, Z16 // 62c2ed4a9ec0
+ VFNMSUB132PD 15(DX)(BX*1), Z2, K2, Z16 // 62e2ed4a9e841a0f000000
+ VFNMSUB132PD -7(CX)(DX*2), Z2, K2, Z16 // 62e2ed4a9e8451f9ffffff
+ VFNMSUB132PS X13, X9, K4, X0 // 62d2350c9ec5
+ VFNMSUB132PS 15(R8)(R14*4), X9, K4, X0 // 6292350c9e84b00f000000
+ VFNMSUB132PS -7(CX)(DX*4), X9, K4, X0 // 62f2350c9e8491f9ffffff
+ VFNMSUB132PS Y6, Y31, K1, Y6 // 62f205219ef6
+ VFNMSUB132PS 7(SI)(DI*4), Y31, K1, Y6 // 62f205219eb4be07000000
+ VFNMSUB132PS -7(DI)(R8*2), Y31, K1, Y6 // 62b205219eb447f9ffffff
+ VFNMSUB132PS Z11, Z14, K3, Z15 // 62520d4b9efb
+ VFNMSUB132PS Z5, Z14, K3, Z15 // 62720d4b9efd
+ VFNMSUB132PS Z11, Z27, K3, Z15 // 625225439efb
+ VFNMSUB132PS Z5, Z27, K3, Z15 // 627225439efd
+ VFNMSUB132PS Z11, Z14, K3, Z12 // 62520d4b9ee3
+ VFNMSUB132PS Z5, Z14, K3, Z12 // 62720d4b9ee5
+ VFNMSUB132PS Z11, Z27, K3, Z12 // 625225439ee3
+ VFNMSUB132PS Z5, Z27, K3, Z12 // 627225439ee5
+ VFNMSUB132PS Z2, Z5, K4, Z13 // 6272554c9eea
+ VFNMSUB132PS -17(BP), Z5, K4, Z13 // 6272554c9eadefffffff
+ VFNMSUB132PS -15(R14)(R15*8), Z5, K4, Z13 // 6212554c9eacfef1ffffff
+ VFNMSUB132PS Z2, Z23, K4, Z13 // 627245449eea
+ VFNMSUB132PS -17(BP), Z23, K4, Z13 // 627245449eadefffffff
+ VFNMSUB132PS -15(R14)(R15*8), Z23, K4, Z13 // 621245449eacfef1ffffff
+ VFNMSUB132PS Z2, Z5, K4, Z14 // 6272554c9ef2
+ VFNMSUB132PS -17(BP), Z5, K4, Z14 // 6272554c9eb5efffffff
+ VFNMSUB132PS -15(R14)(R15*8), Z5, K4, Z14 // 6212554c9eb4fef1ffffff
+ VFNMSUB132PS Z2, Z23, K4, Z14 // 627245449ef2
+ VFNMSUB132PS -17(BP), Z23, K4, Z14 // 627245449eb5efffffff
+ VFNMSUB132PS -15(R14)(R15*8), Z23, K4, Z14 // 621245449eb4fef1ffffff
+ VFNMSUB132SD X9, X8, K5, X2 // 62d2bd0d9fd1
+ VFNMSUB132SD X11, X31, K7, X2 // 62d285079fd3 or 62d285279fd3 or 62d285479fd3
+ VFNMSUB132SD -7(CX), X31, K7, X2 // 62f285079f91f9ffffff or 62f285279f91f9ffffff or 62f285479f91f9ffffff
+ VFNMSUB132SD 15(DX)(BX*4), X31, K7, X2 // 62f285079f949a0f000000 or 62f285279f949a0f000000 or 62f285479f949a0f000000
+ VFNMSUB132SS X14, X5, K7, X22 // 62c2550f9ff6
+ VFNMSUB132SS X7, X17, K6, X0 // 62f275069fc7 or 62f275269fc7 or 62f275469fc7
+ VFNMSUB132SS 99(R15)(R15*1), X17, K6, X0 // 629275069f843f63000000 or 629275269f843f63000000 or 629275469f843f63000000
+ VFNMSUB132SS (DX), X17, K6, X0 // 62f275069f02 or 62f275269f02 or 62f275469f02
+ VFNMSUB213PD X0, X11, K3, X15 // 6272a50baef8
+ VFNMSUB213PD (R8), X11, K3, X15 // 6252a50bae38
+ VFNMSUB213PD 15(DX)(BX*2), X11, K3, X15 // 6272a50baebc5a0f000000
+ VFNMSUB213PD Y7, Y19, K7, Y11 // 6272e527aedf
+ VFNMSUB213PD 17(SP), Y19, K7, Y11 // 6272e527ae9c2411000000
+ VFNMSUB213PD -17(BP)(SI*4), Y19, K7, Y11 // 6272e527ae9cb5efffffff
+ VFNMSUB213PD Z28, Z26, K4, Z6 // 6292ad44aef4
+ VFNMSUB213PD Z6, Z26, K4, Z6 // 62f2ad44aef6
+ VFNMSUB213PD Z28, Z14, K4, Z6 // 62928d4caef4
+ VFNMSUB213PD Z6, Z14, K4, Z6 // 62f28d4caef6
+ VFNMSUB213PD Z28, Z26, K4, Z14 // 6212ad44aef4
+ VFNMSUB213PD Z6, Z26, K4, Z14 // 6272ad44aef6
+ VFNMSUB213PD Z28, Z14, K4, Z14 // 62128d4caef4
+ VFNMSUB213PD Z6, Z14, K4, Z14 // 62728d4caef6
+ VFNMSUB213PD Z3, Z26, K4, Z13 // 6272ad44aeeb
+ VFNMSUB213PD Z0, Z26, K4, Z13 // 6272ad44aee8
+ VFNMSUB213PD 17(SP)(BP*2), Z26, K4, Z13 // 6272ad44aeac6c11000000
+ VFNMSUB213PD -7(DI)(R8*4), Z26, K4, Z13 // 6232ad44aeac87f9ffffff
+ VFNMSUB213PD Z3, Z3, K4, Z13 // 6272e54caeeb
+ VFNMSUB213PD Z0, Z3, K4, Z13 // 6272e54caee8
+ VFNMSUB213PD 17(SP)(BP*2), Z3, K4, Z13 // 6272e54caeac6c11000000
+ VFNMSUB213PD -7(DI)(R8*4), Z3, K4, Z13 // 6232e54caeac87f9ffffff
+ VFNMSUB213PD Z3, Z26, K4, Z21 // 62e2ad44aeeb
+ VFNMSUB213PD Z0, Z26, K4, Z21 // 62e2ad44aee8
+ VFNMSUB213PD 17(SP)(BP*2), Z26, K4, Z21 // 62e2ad44aeac6c11000000
+ VFNMSUB213PD -7(DI)(R8*4), Z26, K4, Z21 // 62a2ad44aeac87f9ffffff
+ VFNMSUB213PD Z3, Z3, K4, Z21 // 62e2e54caeeb
+ VFNMSUB213PD Z0, Z3, K4, Z21 // 62e2e54caee8
+ VFNMSUB213PD 17(SP)(BP*2), Z3, K4, Z21 // 62e2e54caeac6c11000000
+ VFNMSUB213PD -7(DI)(R8*4), Z3, K4, Z21 // 62a2e54caeac87f9ffffff
+ VFNMSUB213PS X27, X8, K7, X18 // 62823d0faed3
+ VFNMSUB213PS 17(SP)(BP*1), X8, K7, X18 // 62e23d0fae942c11000000
+ VFNMSUB213PS -7(CX)(DX*8), X8, K7, X18 // 62e23d0fae94d1f9ffffff
+ VFNMSUB213PS Y3, Y0, K2, Y6 // 62f27d2aaef3
+ VFNMSUB213PS 7(AX), Y0, K2, Y6 // 62f27d2aaeb007000000
+ VFNMSUB213PS (DI), Y0, K2, Y6 // 62f27d2aae37
+ VFNMSUB213PS Z3, Z11, K5, Z21 // 62e2254daeeb
+ VFNMSUB213PS Z12, Z11, K5, Z21 // 62c2254daeec
+ VFNMSUB213PS Z3, Z25, K5, Z21 // 62e23545aeeb
+ VFNMSUB213PS Z12, Z25, K5, Z21 // 62c23545aeec
+ VFNMSUB213PS Z3, Z11, K5, Z13 // 6272254daeeb
+ VFNMSUB213PS Z12, Z11, K5, Z13 // 6252254daeec
+ VFNMSUB213PS Z3, Z25, K5, Z13 // 62723545aeeb
+ VFNMSUB213PS Z12, Z25, K5, Z13 // 62523545aeec
+ VFNMSUB213PS Z23, Z23, K3, Z27 // 62224543aedf
+ VFNMSUB213PS Z6, Z23, K3, Z27 // 62624543aede
+ VFNMSUB213PS 15(R8), Z23, K3, Z27 // 62424543ae980f000000
+ VFNMSUB213PS (BP), Z23, K3, Z27 // 62624543ae5d00
+ VFNMSUB213PS Z23, Z5, K3, Z27 // 6222554baedf
+ VFNMSUB213PS Z6, Z5, K3, Z27 // 6262554baede
+ VFNMSUB213PS 15(R8), Z5, K3, Z27 // 6242554bae980f000000
+ VFNMSUB213PS (BP), Z5, K3, Z27 // 6262554bae5d00
+ VFNMSUB213PS Z23, Z23, K3, Z15 // 62324543aeff
+ VFNMSUB213PS Z6, Z23, K3, Z15 // 62724543aefe
+ VFNMSUB213PS 15(R8), Z23, K3, Z15 // 62524543aeb80f000000
+ VFNMSUB213PS (BP), Z23, K3, Z15 // 62724543ae7d00
+ VFNMSUB213PS Z23, Z5, K3, Z15 // 6232554baeff
+ VFNMSUB213PS Z6, Z5, K3, Z15 // 6272554baefe
+ VFNMSUB213PS 15(R8), Z5, K3, Z15 // 6252554baeb80f000000
+ VFNMSUB213PS (BP), Z5, K3, Z15 // 6272554bae7d00
+ VFNMSUB213SD X18, X3, K4, X25 // 6222e50cafca
+ VFNMSUB213SD X15, X28, K2, X15 // 62529d02afff or 62529d22afff or 62529d42afff
+ VFNMSUB213SD 99(R15)(R15*8), X28, K2, X15 // 62129d02afbcff63000000 or 62129d22afbcff63000000 or 62129d42afbcff63000000
+ VFNMSUB213SD 7(AX)(CX*8), X28, K2, X15 // 62729d02afbcc807000000 or 62729d22afbcc807000000 or 62729d42afbcc807000000
+ VFNMSUB213SS X8, X13, K2, X7 // 62d2150aaff8
+ VFNMSUB213SS X0, X7, K3, X24 // 6262450bafc0 or 6262452bafc0 or 6262454bafc0
+ VFNMSUB213SS -17(BP)(SI*8), X7, K3, X24 // 6262450baf84f5efffffff or 6262452baf84f5efffffff or 6262454baf84f5efffffff
+ VFNMSUB213SS (R15), X7, K3, X24 // 6242450baf07 or 6242452baf07 or 6242454baf07
+ VFNMSUB231PD X11, X1, K3, X22 // 62c2f50bbef3
+ VFNMSUB231PD -17(BP)(SI*2), X1, K3, X22 // 62e2f50bbeb475efffffff
+ VFNMSUB231PD 7(AX)(CX*2), X1, K3, X22 // 62e2f50bbeb44807000000
+ VFNMSUB231PD Y12, Y20, K3, Y5 // 62d2dd23beec
+ VFNMSUB231PD 99(R15)(R15*1), Y20, K3, Y5 // 6292dd23beac3f63000000
+ VFNMSUB231PD (DX), Y20, K3, Y5 // 62f2dd23be2a
+ VFNMSUB231PD Z16, Z21, K2, Z8 // 6232d542bec0
+ VFNMSUB231PD Z13, Z21, K2, Z8 // 6252d542bec5
+ VFNMSUB231PD Z16, Z5, K2, Z8 // 6232d54abec0
+ VFNMSUB231PD Z13, Z5, K2, Z8 // 6252d54abec5
+ VFNMSUB231PD Z16, Z21, K2, Z28 // 6222d542bee0
+ VFNMSUB231PD Z13, Z21, K2, Z28 // 6242d542bee5
+ VFNMSUB231PD Z16, Z5, K2, Z28 // 6222d54abee0
+ VFNMSUB231PD Z13, Z5, K2, Z28 // 6242d54abee5
+ VFNMSUB231PD Z6, Z22, K1, Z12 // 6272cd41bee6
+ VFNMSUB231PD Z8, Z22, K1, Z12 // 6252cd41bee0
+ VFNMSUB231PD 15(R8)(R14*8), Z22, K1, Z12 // 6212cd41bea4f00f000000
+ VFNMSUB231PD -15(R14)(R15*2), Z22, K1, Z12 // 6212cd41bea47ef1ffffff
+ VFNMSUB231PD Z6, Z11, K1, Z12 // 6272a549bee6
+ VFNMSUB231PD Z8, Z11, K1, Z12 // 6252a549bee0
+ VFNMSUB231PD 15(R8)(R14*8), Z11, K1, Z12 // 6212a549bea4f00f000000
+ VFNMSUB231PD -15(R14)(R15*2), Z11, K1, Z12 // 6212a549bea47ef1ffffff
+ VFNMSUB231PD Z6, Z22, K1, Z27 // 6262cd41bede
+ VFNMSUB231PD Z8, Z22, K1, Z27 // 6242cd41bed8
+ VFNMSUB231PD 15(R8)(R14*8), Z22, K1, Z27 // 6202cd41be9cf00f000000
+ VFNMSUB231PD -15(R14)(R15*2), Z22, K1, Z27 // 6202cd41be9c7ef1ffffff
+ VFNMSUB231PD Z6, Z11, K1, Z27 // 6262a549bede
+ VFNMSUB231PD Z8, Z11, K1, Z27 // 6242a549bed8
+ VFNMSUB231PD 15(R8)(R14*8), Z11, K1, Z27 // 6202a549be9cf00f000000
+ VFNMSUB231PD -15(R14)(R15*2), Z11, K1, Z27 // 6202a549be9c7ef1ffffff
+ VFNMSUB231PS X8, X7, K2, X6 // 62d2450abef0
+ VFNMSUB231PS 15(R8)(R14*1), X7, K2, X6 // 6292450abeb4300f000000
+ VFNMSUB231PS 15(R8)(R14*2), X7, K2, X6 // 6292450abeb4700f000000
+ VFNMSUB231PS Y28, Y5, K1, Y3 // 62925529bedc
+ VFNMSUB231PS -17(BP)(SI*8), Y5, K1, Y3 // 62f25529be9cf5efffffff
+ VFNMSUB231PS (R15), Y5, K1, Y3 // 62d25529be1f
+ VFNMSUB231PS Z9, Z12, K7, Z25 // 62421d4fbec9
+ VFNMSUB231PS Z12, Z12, K7, Z25 // 62421d4fbecc
+ VFNMSUB231PS Z9, Z17, K7, Z25 // 62427547bec9
+ VFNMSUB231PS Z12, Z17, K7, Z25 // 62427547becc
+ VFNMSUB231PS Z9, Z12, K7, Z12 // 62521d4fbee1
+ VFNMSUB231PS Z12, Z12, K7, Z12 // 62521d4fbee4
+ VFNMSUB231PS Z9, Z17, K7, Z12 // 62527547bee1
+ VFNMSUB231PS Z12, Z17, K7, Z12 // 62527547bee4
+ VFNMSUB231PS Z8, Z3, K1, Z6 // 62d26549bef0
+ VFNMSUB231PS Z2, Z3, K1, Z6 // 62f26549bef2
+ VFNMSUB231PS -15(R14)(R15*1), Z3, K1, Z6 // 62926549beb43ef1ffffff
+ VFNMSUB231PS -15(BX), Z3, K1, Z6 // 62f26549beb3f1ffffff
+ VFNMSUB231PS Z8, Z21, K1, Z6 // 62d25541bef0
+ VFNMSUB231PS Z2, Z21, K1, Z6 // 62f25541bef2
+ VFNMSUB231PS -15(R14)(R15*1), Z21, K1, Z6 // 62925541beb43ef1ffffff
+ VFNMSUB231PS -15(BX), Z21, K1, Z6 // 62f25541beb3f1ffffff
+ VFNMSUB231PS Z8, Z3, K1, Z25 // 62426549bec8
+ VFNMSUB231PS Z2, Z3, K1, Z25 // 62626549beca
+ VFNMSUB231PS -15(R14)(R15*1), Z3, K1, Z25 // 62026549be8c3ef1ffffff
+ VFNMSUB231PS -15(BX), Z3, K1, Z25 // 62626549be8bf1ffffff
+ VFNMSUB231PS Z8, Z21, K1, Z25 // 62425541bec8
+ VFNMSUB231PS Z2, Z21, K1, Z25 // 62625541beca
+ VFNMSUB231PS -15(R14)(R15*1), Z21, K1, Z25 // 62025541be8c3ef1ffffff
+ VFNMSUB231PS -15(BX), Z21, K1, Z25 // 62625541be8bf1ffffff
+ VFNMSUB231SD X28, X3, K1, X31 // 6202e509bffc
+ VFNMSUB231SD X7, X24, K1, X20 // 62e2bd01bfe7 or 62e2bd21bfe7 or 62e2bd41bfe7
+ VFNMSUB231SD (AX), X24, K1, X20 // 62e2bd01bf20 or 62e2bd21bf20 or 62e2bd41bf20
+ VFNMSUB231SD 7(SI), X24, K1, X20 // 62e2bd01bfa607000000 or 62e2bd21bfa607000000 or 62e2bd41bfa607000000
+ VFNMSUB231SS X12, X16, K7, X20 // 62c27d07bfe4
+ VFNMSUB231SS X28, X17, K2, X6 // 62927502bff4 or 62927522bff4 or 62927542bff4
+ VFNMSUB231SS 7(SI)(DI*8), X17, K2, X6 // 62f27502bfb4fe07000000 or 62f27522bfb4fe07000000 or 62f27542bfb4fe07000000
+ VFNMSUB231SS -15(R14), X17, K2, X6 // 62d27502bfb6f1ffffff or 62d27522bfb6f1ffffff or 62d27542bfb6f1ffffff
+ VGATHERDPD (AX)(X4*1), K3, X6 // 62f2fd0b923420
+ VGATHERDPD (BP)(X10*2), K3, X6 // 62b2fd0b92745500
+ VGATHERDPD (R10)(X29*8), K3, X6 // 6292fd039234ea
+ VGATHERDPD (DX)(X10*4), K7, Y22 // 62a2fd2f923492
+ VGATHERDPD (SP)(X4*2), K7, Y22 // 62e2fd2f923464
+ VGATHERDPD (R14)(X29*8), K7, Y22 // 6282fd279234ee
+ VGATHERDPD (R10)(Y29*8), K4, Z0 // 6292fd449204ea
+ VGATHERDPD (SP)(Y4*2), K4, Z0 // 62f2fd4c920464
+ VGATHERDPD (DX)(Y10*4), K4, Z0 // 62b2fd4c920492
+ VGATHERDPD (R10)(Y29*8), K4, Z6 // 6292fd449234ea
+ VGATHERDPD (SP)(Y4*2), K4, Z6 // 62f2fd4c923464
+ VGATHERDPD (DX)(Y10*4), K4, Z6 // 62b2fd4c923492
+ VGATHERDPS (AX)(X4*1), K4, X0 // 62f27d0c920420
+ VGATHERDPS (BP)(X10*2), K4, X0 // 62b27d0c92445500
+ VGATHERDPS (R10)(X29*8), K4, X0 // 62927d049204ea
+ VGATHERDPS (R14)(Y29*8), K7, Y13 // 62127d27922cee
+ VGATHERDPS (AX)(Y4*1), K7, Y13 // 62727d2f922c20
+ VGATHERDPS (BP)(Y10*2), K7, Y13 // 62327d2f926c5500
+ VGATHERDPS (DX)(Z10*4), K2, Z20 // 62a27d4a922492
+ VGATHERDPS (AX)(Z4*1), K2, Z20 // 62e27d4a922420
+ VGATHERDPS (SP)(Z4*2), K2, Z20 // 62e27d4a922464
+ VGATHERDPS (DX)(Z10*4), K2, Z28 // 62227d4a922492
+ VGATHERDPS (AX)(Z4*1), K2, Z28 // 62627d4a922420
+ VGATHERDPS (SP)(Z4*2), K2, Z28 // 62627d4a922464
+ VGATHERQPD (AX)(X4*1), K2, X11 // 6272fd0a931c20
+ VGATHERQPD (BP)(X10*2), K2, X11 // 6232fd0a935c5500
+ VGATHERQPD (R10)(X29*8), K2, X11 // 6212fd02931cea
+ VGATHERQPD (R10)(Y29*8), K1, Y12 // 6212fd219324ea
+ VGATHERQPD (SP)(Y4*2), K1, Y12 // 6272fd29932464
+ VGATHERQPD (DX)(Y10*4), K1, Y12 // 6232fd29932492
+ VGATHERQPD (DX)(Z10*4), K2, Z3 // 62b2fd4a931c92
+ VGATHERQPD (AX)(Z4*1), K2, Z3 // 62f2fd4a931c20
+ VGATHERQPD (SP)(Z4*2), K2, Z3 // 62f2fd4a931c64
+ VGATHERQPD (DX)(Z10*4), K2, Z30 // 6222fd4a933492
+ VGATHERQPD (AX)(Z4*1), K2, Z30 // 6262fd4a933420
+ VGATHERQPD (SP)(Z4*2), K2, Z30 // 6262fd4a933464
+ VGATHERQPS (DX)(X10*4), K1, X16 // 62a27d09930492
+ VGATHERQPS (SP)(X4*2), K1, X16 // 62e27d09930464
+ VGATHERQPS (R14)(X29*8), K1, X16 // 62827d019304ee
+ VGATHERQPS (R14)(Y29*8), K7, X6 // 62927d279334ee
+ VGATHERQPS (AX)(Y4*1), K7, X6 // 62f27d2f933420
+ VGATHERQPS (BP)(Y10*2), K7, X6 // 62b27d2f93745500
+ VGATHERQPS (BP)(Z10*2), K1, Y1 // 62b27d49934c5500
+ VGATHERQPS (R10)(Z29*8), K1, Y1 // 62927d41930cea
+ VGATHERQPS (R14)(Z29*8), K1, Y1 // 62927d41930cee
+ VGETEXPPD X22, K1, X6 // 62b2fd0942f6
+ VGETEXPPD (CX), K1, X6 // 62f2fd094231
+ VGETEXPPD 99(R15), K1, X6 // 62d2fd0942b763000000
+ VGETEXPPD Y17, K1, Y14 // 6232fd2942f1
+ VGETEXPPD -7(DI)(R8*1), K1, Y14 // 6232fd2942b407f9ffffff
+ VGETEXPPD (SP), K1, Y14 // 6272fd29423424
+ VGETEXPPD Z12, K7, Z9 // 6252fd4f42cc
+ VGETEXPPD Z22, K7, Z9 // 6232fd4f42ce
+ VGETEXPPD Z12, K7, Z19 // 62c2fd4f42dc
+ VGETEXPPD Z22, K7, Z19 // 62a2fd4f42de
+ VGETEXPPD Z18, K2, Z11 // 6232fd4a42da
+ VGETEXPPD Z24, K2, Z11 // 6212fd4a42d8
+ VGETEXPPD 17(SP)(BP*8), K2, Z11 // 6272fd4a429cec11000000
+ VGETEXPPD 17(SP)(BP*4), K2, Z11 // 6272fd4a429cac11000000
+ VGETEXPPD Z18, K2, Z5 // 62b2fd4a42ea
+ VGETEXPPD Z24, K2, Z5 // 6292fd4a42e8
+ VGETEXPPD 17(SP)(BP*8), K2, Z5 // 62f2fd4a42acec11000000
+ VGETEXPPD 17(SP)(BP*4), K2, Z5 // 62f2fd4a42acac11000000
+ VGETEXPPS X16, K4, X12 // 62327d0c42e0
+ VGETEXPPS 99(R15)(R15*2), K4, X12 // 62127d0c42a47f63000000
+ VGETEXPPS -7(DI), K4, X12 // 62727d0c42a7f9ffffff
+ VGETEXPPS Y9, K1, Y7 // 62d27d2942f9
+ VGETEXPPS -7(CX), K1, Y7 // 62f27d2942b9f9ffffff
+ VGETEXPPS 15(DX)(BX*4), K1, Y7 // 62f27d2942bc9a0f000000
+ VGETEXPPS Z7, K3, Z2 // 62f27d4b42d7
+ VGETEXPPS Z13, K3, Z2 // 62d27d4b42d5
+ VGETEXPPS Z7, K3, Z21 // 62e27d4b42ef
+ VGETEXPPS Z13, K3, Z21 // 62c27d4b42ed
+ VGETEXPPS Z6, K4, Z6 // 62f27d4c42f6
+ VGETEXPPS Z22, K4, Z6 // 62b27d4c42f6
+ VGETEXPPS 7(SI)(DI*4), K4, Z6 // 62f27d4c42b4be07000000
+ VGETEXPPS -7(DI)(R8*2), K4, Z6 // 62b27d4c42b447f9ffffff
+ VGETEXPPS Z6, K4, Z16 // 62e27d4c42c6
+ VGETEXPPS Z22, K4, Z16 // 62a27d4c42c6
+ VGETEXPPS 7(SI)(DI*4), K4, Z16 // 62e27d4c4284be07000000
+ VGETEXPPS -7(DI)(R8*2), K4, Z16 // 62a27d4c428447f9ffffff
+ VGETEXPSD X15, X8, K5, X28 // 6242bd0d43e7
+ VGETEXPSD X19, X1, K7, X11 // 6232f50f43db or 6232f52f43db or 6232f54f43db
+ VGETEXPSD 15(R8)(R14*4), X1, K7, X11 // 6212f50f439cb00f000000 or 6212f52f439cb00f000000 or 6212f54f439cb00f000000
+ VGETEXPSD -7(CX)(DX*4), X1, K7, X11 // 6272f50f439c91f9ffffff or 6272f52f439c91f9ffffff or 6272f54f439c91f9ffffff
+ VGETEXPSS X14, X2, K7, X13 // 62526d0f43ee
+ VGETEXPSS X25, X0, K6, X0 // 62927d0e43c1 or 62927d2e43c1 or 62927d4e43c1
+ VGETEXPSS -7(DI)(R8*1), X0, K6, X0 // 62b27d0e438407f9ffffff or 62b27d2e438407f9ffffff or 62b27d4e438407f9ffffff
+ VGETEXPSS (SP), X0, K6, X0 // 62f27d0e430424 or 62f27d2e430424 or 62f27d4e430424
+ VGETMANTPD $15, X17, K3, X11 // 6233fd0b26d90f
+ VGETMANTPD $15, -7(CX)(DX*1), K3, X11 // 6273fd0b269c11f9ffffff0f
+ VGETMANTPD $15, -15(R14)(R15*4), K3, X11 // 6213fd0b269cbef1ffffff0f
+ VGETMANTPD $0, Y8, K7, Y31 // 6243fd2f26f800
+ VGETMANTPD $0, 99(R15)(R15*8), K7, Y31 // 6203fd2f26bcff6300000000
+ VGETMANTPD $0, 7(AX)(CX*8), K7, Y31 // 6263fd2f26bcc80700000000
+ VGETMANTPD $1, Z13, K4, Z1 // 62d3fd4c26cd01
+ VGETMANTPD $1, Z13, K4, Z15 // 6253fd4c26fd01
+ VGETMANTPD $2, Z22, K4, Z18 // 62a3fd4c26d602
+ VGETMANTPD $2, Z7, K4, Z18 // 62e3fd4c26d702
+ VGETMANTPD $2, 17(SP), K4, Z18 // 62e3fd4c2694241100000002
+ VGETMANTPD $2, -17(BP)(SI*4), K4, Z18 // 62e3fd4c2694b5efffffff02
+ VGETMANTPD $2, Z22, K4, Z8 // 6233fd4c26c602
+ VGETMANTPD $2, Z7, K4, Z8 // 6273fd4c26c702
+ VGETMANTPD $2, 17(SP), K4, Z8 // 6273fd4c2684241100000002
+ VGETMANTPD $2, -17(BP)(SI*4), K4, Z8 // 6273fd4c2684b5efffffff02
+ VGETMANTPS $3, X11, K7, X18 // 62c37d0f26d303
+ VGETMANTPS $3, 15(DX)(BX*1), K7, X18 // 62e37d0f26941a0f00000003
+ VGETMANTPS $3, -7(CX)(DX*2), K7, X18 // 62e37d0f269451f9ffffff03
+ VGETMANTPS $4, Y28, K2, Y1 // 62937d2a26cc04
+ VGETMANTPS $4, (AX), K2, Y1 // 62f37d2a260804
+ VGETMANTPS $4, 7(SI), K2, Y1 // 62f37d2a268e0700000004
+ VGETMANTPS $5, Z20, K5, Z2 // 62b37d4d26d405
+ VGETMANTPS $5, Z9, K5, Z2 // 62d37d4d26d105
+ VGETMANTPS $5, Z20, K5, Z31 // 62237d4d26fc05
+ VGETMANTPS $5, Z9, K5, Z31 // 62437d4d26f905
+ VGETMANTPS $6, Z12, K3, Z1 // 62d37d4b26cc06
+ VGETMANTPS $6, Z16, K3, Z1 // 62b37d4b26c806
+ VGETMANTPS $6, 7(AX), K3, Z1 // 62f37d4b26880700000006
+ VGETMANTPS $6, (DI), K3, Z1 // 62f37d4b260f06
+ VGETMANTPS $6, Z12, K3, Z3 // 62d37d4b26dc06
+ VGETMANTPS $6, Z16, K3, Z3 // 62b37d4b26d806
+ VGETMANTPS $6, 7(AX), K3, Z3 // 62f37d4b26980700000006
+ VGETMANTPS $6, (DI), K3, Z3 // 62f37d4b261f06
+ VGETMANTSD $7, X24, X2, K4, X9 // 6213ed0c27c807
+ VGETMANTSD $8, X27, X2, K2, X2 // 6293ed0a27d308 or 6293ed2a27d308 or 6293ed4a27d308
+ VGETMANTSD $8, (R8), X2, K2, X2 // 62d3ed0a271008 or 62d3ed2a271008 or 62d3ed4a271008
+ VGETMANTSD $8, 15(DX)(BX*2), X2, K2, X2 // 62f3ed0a27945a0f00000008 or 62f3ed2a27945a0f00000008 or 62f3ed4a27945a0f00000008
+ VGETMANTSS $9, X30, X22, K2, X26 // 62034d0227d609
+ VGETMANTSS $10, X15, X11, K3, X3 // 62d3250b27df0a or 62d3252b27df0a or 62d3254b27df0a
+ VGETMANTSS $10, -7(CX), X11, K3, X3 // 62f3250b2799f9ffffff0a or 62f3252b2799f9ffffff0a or 62f3254b2799f9ffffff0a
+ VGETMANTSS $10, 15(DX)(BX*4), X11, K3, X3 // 62f3250b279c9a0f0000000a or 62f3252b279c9a0f0000000a or 62f3254b279c9a0f0000000a
+ VINSERTF32X4 $0, X9, Y9, K1, Y2 // 62d3352918d100
+ VINSERTF32X4 $0, 15(R8)(R14*8), Y9, K1, Y2 // 629335291894f00f00000000
+ VINSERTF32X4 $0, -15(R14)(R15*2), Y9, K1, Y2 // 6293352918947ef1ffffff00
+ VINSERTF32X4 $0, X26, Z20, K7, Z16 // 62835d4718c200
+ VINSERTF32X4 $0, -15(R14)(R15*1), Z20, K7, Z16 // 62835d4718843ef1ffffff00
+ VINSERTF32X4 $0, -15(BX), Z20, K7, Z16 // 62e35d471883f1ffffff00
+ VINSERTF32X4 $0, X26, Z0, K7, Z16 // 62837d4f18c200
+ VINSERTF32X4 $0, -15(R14)(R15*1), Z0, K7, Z16 // 62837d4f18843ef1ffffff00
+ VINSERTF32X4 $0, -15(BX), Z0, K7, Z16 // 62e37d4f1883f1ffffff00
+ VINSERTF32X4 $0, X26, Z20, K7, Z9 // 62135d4718ca00
+ VINSERTF32X4 $0, -15(R14)(R15*1), Z20, K7, Z9 // 62135d47188c3ef1ffffff00
+ VINSERTF32X4 $0, -15(BX), Z20, K7, Z9 // 62735d47188bf1ffffff00
+ VINSERTF32X4 $0, X26, Z0, K7, Z9 // 62137d4f18ca00
+ VINSERTF32X4 $0, -15(R14)(R15*1), Z0, K7, Z9 // 62137d4f188c3ef1ffffff00
+ VINSERTF32X4 $0, -15(BX), Z0, K7, Z9 // 62737d4f188bf1ffffff00
+ VINSERTF64X4 $1, Y30, Z9, K3, Z0 // 6293b54b1ac601
+ VINSERTF64X4 $1, -17(BP)(SI*2), Z9, K3, Z0 // 62f3b54b1a8475efffffff01
+ VINSERTF64X4 $1, 7(AX)(CX*2), Z9, K3, Z0 // 62f3b54b1a84480700000001
+ VINSERTF64X4 $1, Y30, Z3, K3, Z0 // 6293e54b1ac601
+ VINSERTF64X4 $1, -17(BP)(SI*2), Z3, K3, Z0 // 62f3e54b1a8475efffffff01
+ VINSERTF64X4 $1, 7(AX)(CX*2), Z3, K3, Z0 // 62f3e54b1a84480700000001
+ VINSERTF64X4 $1, Y30, Z9, K3, Z26 // 6203b54b1ad601
+ VINSERTF64X4 $1, -17(BP)(SI*2), Z9, K3, Z26 // 6263b54b1a9475efffffff01
+ VINSERTF64X4 $1, 7(AX)(CX*2), Z9, K3, Z26 // 6263b54b1a94480700000001
+ VINSERTF64X4 $1, Y30, Z3, K3, Z26 // 6203e54b1ad601
+ VINSERTF64X4 $1, -17(BP)(SI*2), Z3, K3, Z26 // 6263e54b1a9475efffffff01
+ VINSERTF64X4 $1, 7(AX)(CX*2), Z3, K3, Z26 // 6263e54b1a94480700000001
+ VINSERTI32X4 $0, X31, Y7, K4, Y26 // 6203452c38d700
+ VINSERTI32X4 $0, 17(SP)(BP*8), Y7, K4, Y26 // 6263452c3894ec1100000000
+ VINSERTI32X4 $0, 17(SP)(BP*4), Y7, K4, Y26 // 6263452c3894ac1100000000
+ VINSERTI32X4 $2, X16, Z9, K5, Z9 // 6233354d38c802
+ VINSERTI32X4 $2, 7(SI)(DI*4), Z9, K5, Z9 // 6273354d388cbe0700000002
+ VINSERTI32X4 $2, -7(DI)(R8*2), Z9, K5, Z9 // 6233354d388c47f9ffffff02
+ VINSERTI32X4 $2, X16, Z28, K5, Z9 // 62331d4538c802
+ VINSERTI32X4 $2, 7(SI)(DI*4), Z28, K5, Z9 // 62731d45388cbe0700000002
+ VINSERTI32X4 $2, -7(DI)(R8*2), Z28, K5, Z9 // 62331d45388c47f9ffffff02
+ VINSERTI32X4 $2, X16, Z9, K5, Z25 // 6223354d38c802
+ VINSERTI32X4 $2, 7(SI)(DI*4), Z9, K5, Z25 // 6263354d388cbe0700000002
+ VINSERTI32X4 $2, -7(DI)(R8*2), Z9, K5, Z25 // 6223354d388c47f9ffffff02
+ VINSERTI32X4 $2, X16, Z28, K5, Z25 // 62231d4538c802
+ VINSERTI32X4 $2, 7(SI)(DI*4), Z28, K5, Z25 // 62631d45388cbe0700000002
+ VINSERTI32X4 $2, -7(DI)(R8*2), Z28, K5, Z25 // 62231d45388c47f9ffffff02
+ VINSERTI64X4 $1, Y31, Z6, K3, Z21 // 6283cd4b3aef01
+ VINSERTI64X4 $1, (R14), Z6, K3, Z21 // 62c3cd4b3a2e01
+ VINSERTI64X4 $1, -7(DI)(R8*8), Z6, K3, Z21 // 62a3cd4b3aacc7f9ffffff01
+ VINSERTI64X4 $1, Y31, Z9, K3, Z21 // 6283b54b3aef01
+ VINSERTI64X4 $1, (R14), Z9, K3, Z21 // 62c3b54b3a2e01
+ VINSERTI64X4 $1, -7(DI)(R8*8), Z9, K3, Z21 // 62a3b54b3aacc7f9ffffff01
+ VINSERTI64X4 $1, Y31, Z6, K3, Z9 // 6213cd4b3acf01
+ VINSERTI64X4 $1, (R14), Z6, K3, Z9 // 6253cd4b3a0e01
+ VINSERTI64X4 $1, -7(DI)(R8*8), Z6, K3, Z9 // 6233cd4b3a8cc7f9ffffff01
+ VINSERTI64X4 $1, Y31, Z9, K3, Z9 // 6213b54b3acf01
+ VINSERTI64X4 $1, (R14), Z9, K3, Z9 // 6253b54b3a0e01
+ VINSERTI64X4 $1, -7(DI)(R8*8), Z9, K3, Z9 // 6233b54b3a8cc7f9ffffff01
+ VMAXPD X21, X16, K7, X0 // 62b1fd075fc5
+ VMAXPD 99(R15)(R15*1), X16, K7, X0 // 6291fd075f843f63000000
+ VMAXPD (DX), X16, K7, X0 // 62f1fd075f02
+ VMAXPD Y21, Y6, K4, Y22 // 62a1cd2c5ff5
+ VMAXPD 99(R15)(R15*4), Y6, K4, Y22 // 6281cd2c5fb4bf63000000
+ VMAXPD 15(DX), Y6, K4, Y22 // 62e1cd2c5fb20f000000
+ VMAXPD Z30, Z20, K4, Z1 // 6291dd445fce
+ VMAXPD Z5, Z20, K4, Z1 // 62f1dd445fcd
+ VMAXPD Z30, Z9, K4, Z1 // 6291b54c5fce
+ VMAXPD Z5, Z9, K4, Z1 // 62f1b54c5fcd
+ VMAXPD Z30, Z20, K4, Z9 // 6211dd445fce
+ VMAXPD Z5, Z20, K4, Z9 // 6271dd445fcd
+ VMAXPD Z30, Z9, K4, Z9 // 6211b54c5fce
+ VMAXPD Z5, Z9, K4, Z9 // 6271b54c5fcd
+ VMAXPD Z16, Z7, K7, Z26 // 6221c54f5fd0
+ VMAXPD Z25, Z7, K7, Z26 // 6201c54f5fd1
+ VMAXPD 7(SI)(DI*1), Z7, K7, Z26 // 6261c54f5f943e07000000
+ VMAXPD 15(DX)(BX*8), Z7, K7, Z26 // 6261c54f5f94da0f000000
+ VMAXPD Z16, Z21, K7, Z26 // 6221d5475fd0
+ VMAXPD Z25, Z21, K7, Z26 // 6201d5475fd1
+ VMAXPD 7(SI)(DI*1), Z21, K7, Z26 // 6261d5475f943e07000000
+ VMAXPD 15(DX)(BX*8), Z21, K7, Z26 // 6261d5475f94da0f000000
+ VMAXPD Z16, Z7, K7, Z22 // 62a1c54f5ff0
+ VMAXPD Z25, Z7, K7, Z22 // 6281c54f5ff1
+ VMAXPD 7(SI)(DI*1), Z7, K7, Z22 // 62e1c54f5fb43e07000000
+ VMAXPD 15(DX)(BX*8), Z7, K7, Z22 // 62e1c54f5fb4da0f000000
+ VMAXPD Z16, Z21, K7, Z22 // 62a1d5475ff0
+ VMAXPD Z25, Z21, K7, Z22 // 6281d5475ff1
+ VMAXPD 7(SI)(DI*1), Z21, K7, Z22 // 62e1d5475fb43e07000000
+ VMAXPD 15(DX)(BX*8), Z21, K7, Z22 // 62e1d5475fb4da0f000000
+ VMAXPS X22, X28, K2, X0 // 62b11c025fc6
+ VMAXPS -17(BP)(SI*8), X28, K2, X0 // 62f11c025f84f5efffffff
+ VMAXPS (R15), X28, K2, X0 // 62d11c025f07
+ VMAXPS Y28, Y0, K5, Y7 // 62917c2d5ffc
+ VMAXPS (CX), Y0, K5, Y7 // 62f17c2d5f39
+ VMAXPS 99(R15), Y0, K5, Y7 // 62d17c2d5fbf63000000
+ VMAXPS Z21, Z12, K3, Z14 // 62311c4b5ff5
+ VMAXPS Z9, Z12, K3, Z14 // 62511c4b5ff1
+ VMAXPS Z21, Z13, K3, Z14 // 6231144b5ff5
+ VMAXPS Z9, Z13, K3, Z14 // 6251144b5ff1
+ VMAXPS Z21, Z12, K3, Z13 // 62311c4b5fed
+ VMAXPS Z9, Z12, K3, Z13 // 62511c4b5fe9
+ VMAXPS Z21, Z13, K3, Z13 // 6231144b5fed
+ VMAXPS Z9, Z13, K3, Z13 // 6251144b5fe9
+ VMAXPS Z23, Z27, K4, Z2 // 62b124445fd7
+ VMAXPS Z9, Z27, K4, Z2 // 62d124445fd1
+ VMAXPS -7(DI)(R8*1), Z27, K4, Z2 // 62b124445f9407f9ffffff
+ VMAXPS (SP), Z27, K4, Z2 // 62f124445f1424
+ VMAXPS Z23, Z25, K4, Z2 // 62b134445fd7
+ VMAXPS Z9, Z25, K4, Z2 // 62d134445fd1
+ VMAXPS -7(DI)(R8*1), Z25, K4, Z2 // 62b134445f9407f9ffffff
+ VMAXPS (SP), Z25, K4, Z2 // 62f134445f1424
+ VMAXPS Z23, Z27, K4, Z7 // 62b124445fff
+ VMAXPS Z9, Z27, K4, Z7 // 62d124445ff9
+ VMAXPS -7(DI)(R8*1), Z27, K4, Z7 // 62b124445fbc07f9ffffff
+ VMAXPS (SP), Z27, K4, Z7 // 62f124445f3c24
+ VMAXPS Z23, Z25, K4, Z7 // 62b134445fff
+ VMAXPS Z9, Z25, K4, Z7 // 62d134445ff9
+ VMAXPS -7(DI)(R8*1), Z25, K4, Z7 // 62b134445fbc07f9ffffff
+ VMAXPS (SP), Z25, K4, Z7 // 62f134445f3c24
+ VMAXSD X7, X19, K2, X7 // 62f1e7025fff
+ VMAXSD X1, X31, K2, X16 // 62e187025fc1 or 62e187225fc1 or 62e187425fc1
+ VMAXSD 17(SP)(BP*1), X31, K2, X16 // 62e187025f842c11000000 or 62e187225f842c11000000 or 62e187425f842c11000000
+ VMAXSD -7(CX)(DX*8), X31, K2, X16 // 62e187025f84d1f9ffffff or 62e187225f84d1f9ffffff or 62e187425f84d1f9ffffff
+ VMAXSS X15, X9, K3, X7 // 62d1360b5fff
+ VMAXSS X12, X0, K3, X12 // 62517e0b5fe4 or 62517e2b5fe4 or 62517e4b5fe4
+ VMAXSS (AX), X0, K3, X12 // 62717e0b5f20 or 62717e2b5f20 or 62717e4b5f20
+ VMAXSS 7(SI), X0, K3, X12 // 62717e0b5fa607000000 or 62717e2b5fa607000000 or 62717e4b5fa607000000
+ VMINPD X17, X5, K3, X14 // 6231d50b5df1
+ VMINPD 7(SI)(DI*8), X5, K3, X14 // 6271d50b5db4fe07000000
+ VMINPD -15(R14), X5, K3, X14 // 6251d50b5db6f1ffffff
+ VMINPD Y24, Y14, K2, Y20 // 62818d2a5de0
+ VMINPD 99(R15)(R15*2), Y14, K2, Y20 // 62818d2a5da47f63000000
+ VMINPD -7(DI), Y14, K2, Y20 // 62e18d2a5da7f9ffffff
+ VMINPD Z14, Z3, K1, Z27 // 6241e5495dde
+ VMINPD Z7, Z3, K1, Z27 // 6261e5495ddf
+ VMINPD Z14, Z0, K1, Z27 // 6241fd495dde
+ VMINPD Z7, Z0, K1, Z27 // 6261fd495ddf
+ VMINPD Z14, Z3, K1, Z14 // 6251e5495df6
+ VMINPD Z7, Z3, K1, Z14 // 6271e5495df7
+ VMINPD Z14, Z0, K1, Z14 // 6251fd495df6
+ VMINPD Z7, Z0, K1, Z14 // 6271fd495df7
+ VMINPD Z1, Z22, K2, Z8 // 6271cd425dc1
+ VMINPD Z16, Z22, K2, Z8 // 6231cd425dc0
+ VMINPD -7(CX), Z22, K2, Z8 // 6271cd425d81f9ffffff
+ VMINPD 15(DX)(BX*4), Z22, K2, Z8 // 6271cd425d849a0f000000
+ VMINPD Z1, Z25, K2, Z8 // 6271b5425dc1
+ VMINPD Z16, Z25, K2, Z8 // 6231b5425dc0
+ VMINPD -7(CX), Z25, K2, Z8 // 6271b5425d81f9ffffff
+ VMINPD 15(DX)(BX*4), Z25, K2, Z8 // 6271b5425d849a0f000000
+ VMINPD Z1, Z22, K2, Z24 // 6261cd425dc1
+ VMINPD Z16, Z22, K2, Z24 // 6221cd425dc0
+ VMINPD -7(CX), Z22, K2, Z24 // 6261cd425d81f9ffffff
+ VMINPD 15(DX)(BX*4), Z22, K2, Z24 // 6261cd425d849a0f000000
+ VMINPD Z1, Z25, K2, Z24 // 6261b5425dc1
+ VMINPD Z16, Z25, K2, Z24 // 6221b5425dc0
+ VMINPD -7(CX), Z25, K2, Z24 // 6261b5425d81f9ffffff
+ VMINPD 15(DX)(BX*4), Z25, K2, Z24 // 6261b5425d849a0f000000
+ VMINPS X3, X8, K1, X15 // 62713c095dfb
+ VMINPS 7(SI)(DI*1), X8, K1, X15 // 62713c095dbc3e07000000
+ VMINPS 15(DX)(BX*8), X8, K1, X15 // 62713c095dbcda0f000000
+ VMINPS Y14, Y20, K7, Y13 // 62515c275dee
+ VMINPS -7(CX)(DX*1), Y20, K7, Y13 // 62715c275dac11f9ffffff
+ VMINPS -15(R14)(R15*4), Y20, K7, Y13 // 62115c275dacbef1ffffff
+ VMINPS Z15, Z0, K1, Z6 // 62d17c495df7
+ VMINPS Z12, Z0, K1, Z6 // 62d17c495df4
+ VMINPS Z15, Z8, K1, Z6 // 62d13c495df7
+ VMINPS Z12, Z8, K1, Z6 // 62d13c495df4
+ VMINPS Z15, Z0, K1, Z2 // 62d17c495dd7
+ VMINPS Z12, Z0, K1, Z2 // 62d17c495dd4
+ VMINPS Z15, Z8, K1, Z2 // 62d13c495dd7
+ VMINPS Z12, Z8, K1, Z2 // 62d13c495dd4
+ VMINPS Z13, Z11, K1, Z14 // 625124495df5
+ VMINPS Z14, Z11, K1, Z14 // 625124495df6
+ VMINPS 99(R15)(R15*8), Z11, K1, Z14 // 621124495db4ff63000000
+ VMINPS 7(AX)(CX*8), Z11, K1, Z14 // 627124495db4c807000000
+ VMINPS Z13, Z5, K1, Z14 // 625154495df5
+ VMINPS Z14, Z5, K1, Z14 // 625154495df6
+ VMINPS 99(R15)(R15*8), Z5, K1, Z14 // 621154495db4ff63000000
+ VMINPS 7(AX)(CX*8), Z5, K1, Z14 // 627154495db4c807000000
+ VMINPS Z13, Z11, K1, Z27 // 624124495ddd
+ VMINPS Z14, Z11, K1, Z27 // 624124495dde
+ VMINPS 99(R15)(R15*8), Z11, K1, Z27 // 620124495d9cff63000000
+ VMINPS 7(AX)(CX*8), Z11, K1, Z27 // 626124495d9cc807000000
+ VMINPS Z13, Z5, K1, Z27 // 624154495ddd
+ VMINPS Z14, Z5, K1, Z27 // 624154495dde
+ VMINPS 99(R15)(R15*8), Z5, K1, Z27 // 620154495d9cff63000000
+ VMINPS 7(AX)(CX*8), Z5, K1, Z27 // 626154495d9cc807000000
+ VMINSD X13, X23, K1, X26 // 6241c7015dd5
+ VMINSD X9, X24, K7, X28 // 6241bf075de1 or 6241bf275de1 or 6241bf475de1
+ VMINSD -17(BP)(SI*2), X24, K7, X28 // 6261bf075da475efffffff or 6261bf275da475efffffff or 6261bf475da475efffffff
+ VMINSD 7(AX)(CX*2), X24, K7, X28 // 6261bf075da44807000000 or 6261bf275da44807000000 or 6261bf475da44807000000
+ VMINSS X18, X26, K2, X15 // 62312e025dfa
+ VMINSS X11, X1, K4, X21 // 62c1760c5deb or 62c1762c5deb or 62c1764c5deb
+ VMINSS (BX), X1, K4, X21 // 62e1760c5d2b or 62e1762c5d2b or 62e1764c5d2b
+ VMINSS -17(BP)(SI*1), X1, K4, X21 // 62e1760c5dac35efffffff or 62e1762c5dac35efffffff or 62e1764c5dac35efffffff
+ VMOVAPD X3, K1, X31 // 6291fd0929df
+ VMOVAPD X3, K1, -7(DI)(R8*1) // 62b1fd09299c07f9ffffff
+ VMOVAPD X3, K1, (SP) // 62f1fd09291c24
+ VMOVAPD X0, K3, X7 // 62f1fd0b29c7
+ VMOVAPD -7(CX), K3, X7 // 62f1fd0b28b9f9ffffff
+ VMOVAPD 15(DX)(BX*4), K3, X7 // 62f1fd0b28bc9a0f000000
+ VMOVAPD Y1, K4, Y21 // 62b1fd2c29cd
+ VMOVAPD Y1, K4, 15(DX)(BX*1) // 62f1fd2c298c1a0f000000
+ VMOVAPD Y1, K4, -7(CX)(DX*2) // 62f1fd2c298c51f9ffffff
+ VMOVAPD Y30, K5, Y26 // 6201fd2d29f2
+ VMOVAPD -17(BP), K5, Y26 // 6261fd2d2895efffffff
+ VMOVAPD -15(R14)(R15*8), K5, Y26 // 6201fd2d2894fef1ffffff
+ VMOVAPD Z2, K7, Z5 // 62f1fd4f29d5
+ VMOVAPD Z2, K7, Z23 // 62b1fd4f29d7
+ VMOVAPD Z2, K7, (AX) // 62f1fd4f2910
+ VMOVAPD Z2, K7, 7(SI) // 62f1fd4f299607000000
+ VMOVAPD Z26, K7, Z6 // 6261fd4f29d6
+ VMOVAPD Z14, K7, Z6 // 6271fd4f29f6
+ VMOVAPD (BX), K7, Z6 // 62f1fd4f2833
+ VMOVAPD -17(BP)(SI*1), K7, Z6 // 62f1fd4f28b435efffffff
+ VMOVAPD Z26, K7, Z14 // 6241fd4f29d6
+ VMOVAPD Z14, K7, Z14 // 6251fd4f29f6
+ VMOVAPD (BX), K7, Z14 // 6271fd4f2833
+ VMOVAPD -17(BP)(SI*1), K7, Z14 // 6271fd4f28b435efffffff
+ VMOVAPS X24, K6, X0 // 62617c0e29c0
+ VMOVAPS X24, K6, 99(R15)(R15*8) // 62017c0e2984ff63000000
+ VMOVAPS X24, K6, 7(AX)(CX*8) // 62617c0e2984c807000000
+ VMOVAPS X7, K3, X20 // 62b17c0b29fc
+ VMOVAPS (AX), K3, X20 // 62e17c0b2820
+ VMOVAPS 7(SI), K3, X20 // 62e17c0b28a607000000
+ VMOVAPS Y22, K7, Y12 // 62c17c2f29f4
+ VMOVAPS Y22, K7, 17(SP)(BP*2) // 62e17c2f29b46c11000000
+ VMOVAPS Y22, K7, -7(DI)(R8*4) // 62a17c2f29b487f9ffffff
+ VMOVAPS Y15, K4, Y3 // 62717c2c29fb
+ VMOVAPS 15(R8), K4, Y3 // 62d17c2c28980f000000
+ VMOVAPS (BP), K4, Y3 // 62f17c2c285d00
+ VMOVAPS Z13, K4, Z28 // 62117c4c29ec
+ VMOVAPS Z21, K4, Z28 // 62817c4c29ec
+ VMOVAPS Z13, K4, Z6 // 62717c4c29ee
+ VMOVAPS Z21, K4, Z6 // 62e17c4c29ee
+ VMOVAPS Z13, K4, 15(R8)(R14*4) // 62117c4c29acb00f000000
+ VMOVAPS Z21, K4, 15(R8)(R14*4) // 62817c4c29acb00f000000
+ VMOVAPS Z13, K4, -7(CX)(DX*4) // 62717c4c29ac91f9ffffff
+ VMOVAPS Z21, K4, -7(CX)(DX*4) // 62e17c4c29ac91f9ffffff
+ VMOVAPS Z3, K7, Z26 // 62917c4f29da
+ VMOVAPS Z0, K7, Z26 // 62917c4f29c2
+ VMOVAPS (R8), K7, Z26 // 62417c4f2810
+ VMOVAPS 15(DX)(BX*2), K7, Z26 // 62617c4f28945a0f000000
+ VMOVAPS Z3, K7, Z3 // 62f17c4f29db
+ VMOVAPS Z0, K7, Z3 // 62f17c4f29c3
+ VMOVAPS (R8), K7, Z3 // 62d17c4f2818
+ VMOVAPS 15(DX)(BX*2), K7, Z3 // 62f17c4f289c5a0f000000
+ VMOVDDUP X5, K2, X14 // 6271ff0a12f5
+ VMOVDDUP 15(R8)(R14*1), K2, X14 // 6211ff0a12b4300f000000
+ VMOVDDUP 15(R8)(R14*2), K2, X14 // 6211ff0a12b4700f000000
+ VMOVDDUP Y27, K5, Y1 // 6291ff2d12cb
+ VMOVDDUP 15(R8)(R14*8), K5, Y1 // 6291ff2d128cf00f000000
+ VMOVDDUP -15(R14)(R15*2), K5, Y1 // 6291ff2d128c7ef1ffffff
+ VMOVDDUP Z11, K3, Z21 // 62c1ff4b12eb
+ VMOVDDUP Z25, K3, Z21 // 6281ff4b12e9
+ VMOVDDUP 17(SP)(BP*1), K3, Z21 // 62e1ff4b12ac2c11000000
+ VMOVDDUP -7(CX)(DX*8), K3, Z21 // 62e1ff4b12acd1f9ffffff
+ VMOVDDUP Z11, K3, Z13 // 6251ff4b12eb
+ VMOVDDUP Z25, K3, Z13 // 6211ff4b12e9
+ VMOVDDUP 17(SP)(BP*1), K3, Z13 // 6271ff4b12ac2c11000000
+ VMOVDDUP -7(CX)(DX*8), K3, Z13 // 6271ff4b12acd1f9ffffff
+ VMOVDQA32 X3, K4, X31 // 62917d0c7fdf
+ VMOVDQA32 X3, K4, (BX) // 62f17d0c7f1b
+ VMOVDQA32 X3, K4, -17(BP)(SI*1) // 62f17d0c7f9c35efffffff
+ VMOVDQA32 X1, K2, X21 // 62b17d0a7fcd
+ VMOVDQA32 15(R8)(R14*4), K2, X21 // 62817d0a6facb00f000000
+ VMOVDQA32 -7(CX)(DX*4), K2, X21 // 62e17d0a6fac91f9ffffff
+ VMOVDQA32 Y5, K2, Y19 // 62b17d2a7feb
+ VMOVDQA32 Y5, K2, -15(R14)(R15*1) // 62917d2a7fac3ef1ffffff
+ VMOVDQA32 Y5, K2, -15(BX) // 62f17d2a7fabf1ffffff
+ VMOVDQA32 Y13, K3, Y17 // 62317d2b7fe9
+ VMOVDQA32 7(AX)(CX*4), K3, Y17 // 62e17d2b6f8c8807000000
+ VMOVDQA32 7(AX)(CX*1), K3, Y17 // 62e17d2b6f8c0807000000
+ VMOVDQA32 Z27, K3, Z3 // 62617d4b7fdb
+ VMOVDQA32 Z15, K3, Z3 // 62717d4b7ffb
+ VMOVDQA32 Z27, K3, Z12 // 62417d4b7fdc
+ VMOVDQA32 Z15, K3, Z12 // 62517d4b7ffc
+ VMOVDQA32 Z27, K3, -17(BP)(SI*2) // 62617d4b7f9c75efffffff
+ VMOVDQA32 Z15, K3, -17(BP)(SI*2) // 62717d4b7fbc75efffffff
+ VMOVDQA32 Z27, K3, 7(AX)(CX*2) // 62617d4b7f9c4807000000
+ VMOVDQA32 Z15, K3, 7(AX)(CX*2) // 62717d4b7fbc4807000000
+ VMOVDQA32 Z23, K3, Z23 // 62a17d4b7fff
+ VMOVDQA32 Z6, K3, Z23 // 62b17d4b7ff7
+ VMOVDQA32 15(R8)(R14*1), K3, Z23 // 62817d4b6fbc300f000000
+ VMOVDQA32 15(R8)(R14*2), K3, Z23 // 62817d4b6fbc700f000000
+ VMOVDQA32 Z23, K3, Z5 // 62e17d4b7ffd
+ VMOVDQA32 Z6, K3, Z5 // 62f17d4b7ff5
+ VMOVDQA32 15(R8)(R14*1), K3, Z5 // 62917d4b6fac300f000000
+ VMOVDQA32 15(R8)(R14*2), K3, Z5 // 62917d4b6fac700f000000
+ VMOVDQA64 X13, K2, X11 // 6251fd0a7feb
+ VMOVDQA64 X13, K2, (R8) // 6251fd0a7f28
+ VMOVDQA64 X13, K2, 15(DX)(BX*2) // 6271fd0a7fac5a0f000000
+ VMOVDQA64 X30, K1, X0 // 6261fd097ff0
+ VMOVDQA64 17(SP)(BP*1), K1, X0 // 62f1fd096f842c11000000
+ VMOVDQA64 -7(CX)(DX*8), K1, X0 // 62f1fd096f84d1f9ffffff
+ VMOVDQA64 Y7, K2, Y21 // 62b1fd2a7ffd
+ VMOVDQA64 Y7, K2, (SI) // 62f1fd2a7f3e
+ VMOVDQA64 Y7, K2, 7(SI)(DI*2) // 62f1fd2a7fbc7e07000000
+ VMOVDQA64 Y13, K1, Y30 // 6211fd297fee
+ VMOVDQA64 17(SP)(BP*8), K1, Y30 // 6261fd296fb4ec11000000
+ VMOVDQA64 17(SP)(BP*4), K1, Y30 // 6261fd296fb4ac11000000
+ VMOVDQA64 Z21, K7, Z8 // 62c1fd4f7fe8
+ VMOVDQA64 Z5, K7, Z8 // 62d1fd4f7fe8
+ VMOVDQA64 Z21, K7, Z28 // 6281fd4f7fec
+ VMOVDQA64 Z5, K7, Z28 // 6291fd4f7fec
+ VMOVDQA64 Z21, K7, (R14) // 62c1fd4f7f2e
+ VMOVDQA64 Z5, K7, (R14) // 62d1fd4f7f2e
+ VMOVDQA64 Z21, K7, -7(DI)(R8*8) // 62a1fd4f7facc7f9ffffff
+ VMOVDQA64 Z5, K7, -7(DI)(R8*8) // 62b1fd4f7facc7f9ffffff
+ VMOVDQA64 Z12, K1, Z16 // 6231fd497fe0
+ VMOVDQA64 Z27, K1, Z16 // 6221fd497fd8
+ VMOVDQA64 99(R15)(R15*4), K1, Z16 // 6281fd496f84bf63000000
+ VMOVDQA64 15(DX), K1, Z16 // 62e1fd496f820f000000
+ VMOVDQA64 Z12, K1, Z13 // 6251fd497fe5
+ VMOVDQA64 Z27, K1, Z13 // 6241fd497fdd
+ VMOVDQA64 99(R15)(R15*4), K1, Z13 // 6211fd496facbf63000000
+ VMOVDQA64 15(DX), K1, Z13 // 6271fd496faa0f000000
+ VMOVDQU32 X8, K3, X19 // 62317e0b7fc3
+ VMOVDQU32 X8, K3, (R14) // 62517e0b7f06
+ VMOVDQU32 X8, K3, -7(DI)(R8*8) // 62317e0b7f84c7f9ffffff
+ VMOVDQU32 X26, K4, X8 // 62417e0c7fd0
+ VMOVDQU32 99(R15)(R15*4), K4, X8 // 62117e0c6f84bf63000000
+ VMOVDQU32 15(DX), K4, X8 // 62717e0c6f820f000000
+ VMOVDQU32 Y5, K5, Y24 // 62917e2d7fe8
+ VMOVDQU32 Y5, K5, 7(AX) // 62f17e2d7fa807000000
+ VMOVDQU32 Y5, K5, (DI) // 62f17e2d7f2f
+ VMOVDQU32 Y21, K7, Y24 // 62817e2f7fe8
+ VMOVDQU32 99(R15)(R15*1), K7, Y24 // 62017e2f6f843f63000000
+ VMOVDQU32 (DX), K7, Y24 // 62617e2f6f02
+ VMOVDQU32 Z6, K7, Z9 // 62d17e4f7ff1
+ VMOVDQU32 Z25, K7, Z9 // 62417e4f7fc9
+ VMOVDQU32 Z6, K7, Z12 // 62d17e4f7ff4
+ VMOVDQU32 Z25, K7, Z12 // 62417e4f7fcc
+ VMOVDQU32 Z6, K7, -7(CX)(DX*1) // 62f17e4f7fb411f9ffffff
+ VMOVDQU32 Z25, K7, -7(CX)(DX*1) // 62617e4f7f8c11f9ffffff
+ VMOVDQU32 Z6, K7, -15(R14)(R15*4) // 62917e4f7fb4bef1ffffff
+ VMOVDQU32 Z25, K7, -15(R14)(R15*4) // 62017e4f7f8cbef1ffffff
+ VMOVDQU32 Z8, K6, Z3 // 62717e4e7fc3
+ VMOVDQU32 Z2, K6, Z3 // 62f17e4e7fd3
+ VMOVDQU32 15(DX)(BX*1), K6, Z3 // 62f17e4e6f9c1a0f000000
+ VMOVDQU32 -7(CX)(DX*2), K6, Z3 // 62f17e4e6f9c51f9ffffff
+ VMOVDQU32 Z8, K6, Z21 // 62317e4e7fc5
+ VMOVDQU32 Z2, K6, Z21 // 62b17e4e7fd5
+ VMOVDQU32 15(DX)(BX*1), K6, Z21 // 62e17e4e6fac1a0f000000
+ VMOVDQU32 -7(CX)(DX*2), K6, Z21 // 62e17e4e6fac51f9ffffff
+ VMOVDQU64 X12, K3, X23 // 6231fe0b7fe7
+ VMOVDQU64 X12, K3, (CX) // 6271fe0b7f21
+ VMOVDQU64 X12, K3, 99(R15) // 6251fe0b7fa763000000
+ VMOVDQU64 X23, K7, X16 // 62a1fe0f7ff8
+ VMOVDQU64 99(R15)(R15*2), K7, X16 // 6281fe0f6f847f63000000
+ VMOVDQU64 -7(DI), K7, X16 // 62e1fe0f6f87f9ffffff
+ VMOVDQU64 Y9, K4, Y16 // 6231fe2c7fc8
+ VMOVDQU64 Y9, K4, -17(BP)(SI*8) // 6271fe2c7f8cf5efffffff
+ VMOVDQU64 Y9, K4, (R15) // 6251fe2c7f0f
+ VMOVDQU64 Y9, K4, Y13 // 6251fe2c7fcd
+ VMOVDQU64 7(SI)(DI*8), K4, Y13 // 6271fe2c6facfe07000000
+ VMOVDQU64 -15(R14), K4, Y13 // 6251fe2c6faef1ffffff
+ VMOVDQU64 Z7, K7, Z3 // 62f1fe4f7ffb
+ VMOVDQU64 Z9, K7, Z3 // 6271fe4f7fcb
+ VMOVDQU64 Z7, K7, Z27 // 6291fe4f7ffb
+ VMOVDQU64 Z9, K7, Z27 // 6211fe4f7fcb
+ VMOVDQU64 Z7, K7, -17(BP) // 62f1fe4f7fbdefffffff
+ VMOVDQU64 Z9, K7, -17(BP) // 6271fe4f7f8defffffff
+ VMOVDQU64 Z7, K7, -15(R14)(R15*8) // 6291fe4f7fbcfef1ffffff
+ VMOVDQU64 Z9, K7, -15(R14)(R15*8) // 6211fe4f7f8cfef1ffffff
+ VMOVDQU64 Z20, K2, Z0 // 62e1fe4a7fe0
+ VMOVDQU64 Z28, K2, Z0 // 6261fe4a7fe0
+ VMOVDQU64 17(SP)(BP*2), K2, Z0 // 62f1fe4a6f846c11000000
+ VMOVDQU64 -7(DI)(R8*4), K2, Z0 // 62b1fe4a6f8487f9ffffff
+ VMOVDQU64 Z20, K2, Z6 // 62e1fe4a7fe6
+ VMOVDQU64 Z28, K2, Z6 // 6261fe4a7fe6
+ VMOVDQU64 17(SP)(BP*2), K2, Z6 // 62f1fe4a6fb46c11000000
+ VMOVDQU64 -7(DI)(R8*4), K2, Z6 // 62b1fe4a6fb487f9ffffff
+ VMOVHPS (R14), X2, X23 // 62c16c08163e
+ VMOVHPS -7(DI)(R8*8), X2, X23 // 62a16c0816bcc7f9ffffff
+ VMOVHPS X20, 99(R15)(R15*4) // 62817c0817a4bf63000000
+ VMOVHPS X20, 15(DX) // 62e17c0817a20f000000
+ VMOVLHPS X0, X25, X5 // 62f1340016e8
+ VMOVNTDQ Y26, -7(CX) // 62617d28e791f9ffffff
+ VMOVNTDQ Y26, 15(DX)(BX*4) // 62617d28e7949a0f000000
+ VMOVNTDQ Z18, -15(R14)(R15*1) // 62817d48e7943ef1ffffff
+ VMOVNTDQ Z24, -15(R14)(R15*1) // 62017d48e7843ef1ffffff
+ VMOVNTDQ Z18, -15(BX) // 62e17d48e793f1ffffff
+ VMOVNTDQ Z24, -15(BX) // 62617d48e783f1ffffff
+ VMOVNTDQA 7(AX)(CX*4), Z2 // 62f27d482a948807000000
+ VMOVNTDQA 7(AX)(CX*1), Z2 // 62f27d482a940807000000
+ VMOVNTDQA 7(AX)(CX*4), Z21 // 62e27d482aac8807000000
+ VMOVNTDQA 7(AX)(CX*1), Z21 // 62e27d482aac0807000000
+ VMOVNTPD Y26, (AX) // 6261fd282b10
+ VMOVNTPD Y26, 7(SI) // 6261fd282b9607000000
+ VMOVNTPD Z7, (SI) // 62f1fd482b3e
+ VMOVNTPD Z13, (SI) // 6271fd482b2e
+ VMOVNTPD Z7, 7(SI)(DI*2) // 62f1fd482bbc7e07000000
+ VMOVNTPD Z13, 7(SI)(DI*2) // 6271fd482bac7e07000000
+ VMOVNTPS X31, 15(R8)(R14*8) // 62017c082bbcf00f000000
+ VMOVNTPS X31, -15(R14)(R15*2) // 62017c082bbc7ef1ffffff
+ VMOVNTPS Z6, 17(SP)(BP*8) // 62f17c482bb4ec11000000
+ VMOVNTPS Z16, 17(SP)(BP*8) // 62e17c482b84ec11000000
+ VMOVNTPS Z6, 17(SP)(BP*4) // 62f17c482bb4ac11000000
+ VMOVNTPS Z16, 17(SP)(BP*4) // 62e17c482b84ac11000000
+ VMOVSD -7(CX)(DX*1), K3, X11 // 6271ff0b109c11f9ffffff or 6271ff2b109c11f9ffffff or 6271ff4b109c11f9ffffff
+ VMOVSD -15(R14)(R15*4), K3, X11 // 6211ff0b109cbef1ffffff or 6211ff2b109cbef1ffffff or 6211ff4b109cbef1ffffff
+ VMOVSD X14, X5, K3, X22 // 6231d70b11f6 or 6231d72b11f6 or 6231d74b11f6
+ VMOVSD X0, K2, 15(DX)(BX*1) // 62f1ff0a11841a0f000000 or 62f1ff2a11841a0f000000 or 62f1ff4a11841a0f000000
+ VMOVSD X0, K2, -7(CX)(DX*2) // 62f1ff0a118451f9ffffff or 62f1ff2a118451f9ffffff or 62f1ff4a118451f9ffffff
+ VMOVSD X15, X7, K1, X17 // 6231c70911f9 or 6231c72911f9 or 6231c74911f9
+ VMOVSHDUP X0, K2, X11 // 62717e0a16d8
+ VMOVSHDUP -15(R14)(R15*1), K2, X11 // 62117e0a169c3ef1ffffff
+ VMOVSHDUP -15(BX), K2, X11 // 62717e0a169bf1ffffff
+ VMOVSHDUP Y18, K1, Y14 // 62317e2916f2
+ VMOVSHDUP 15(R8)(R14*4), K1, Y14 // 62117e2916b4b00f000000
+ VMOVSHDUP -7(CX)(DX*4), K1, Y14 // 62717e2916b491f9ffffff
+ VMOVSHDUP Z1, K7, Z6 // 62f17e4f16f1
+ VMOVSHDUP Z15, K7, Z6 // 62d17e4f16f7
+ VMOVSHDUP 7(SI)(DI*4), K7, Z6 // 62f17e4f16b4be07000000
+ VMOVSHDUP -7(DI)(R8*2), K7, Z6 // 62b17e4f16b447f9ffffff
+ VMOVSHDUP Z1, K7, Z22 // 62e17e4f16f1
+ VMOVSHDUP Z15, K7, Z22 // 62c17e4f16f7
+ VMOVSHDUP 7(SI)(DI*4), K7, Z22 // 62e17e4f16b4be07000000
+ VMOVSHDUP -7(DI)(R8*2), K7, Z22 // 62a17e4f16b447f9ffffff
+ VMOVSLDUP X8, K1, X18 // 62c17e0912d0
+ VMOVSLDUP 7(AX)(CX*4), K1, X18 // 62e17e0912948807000000
+ VMOVSLDUP 7(AX)(CX*1), K1, X18 // 62e17e0912940807000000
+ VMOVSLDUP Y18, K1, Y31 // 62217e2912fa
+ VMOVSLDUP (R8), K1, Y31 // 62417e291238
+ VMOVSLDUP 15(DX)(BX*2), K1, Y31 // 62617e2912bc5a0f000000
+ VMOVSLDUP Z18, K1, Z13 // 62317e4912ea
+ VMOVSLDUP Z8, K1, Z13 // 62517e4912e8
+ VMOVSLDUP 17(SP), K1, Z13 // 62717e4912ac2411000000
+ VMOVSLDUP -17(BP)(SI*4), K1, Z13 // 62717e4912acb5efffffff
+ VMOVSS 17(SP)(BP*1), K7, X27 // 62617e0f109c2c11000000 or 62617e2f109c2c11000000 or 62617e4f109c2c11000000
+ VMOVSS -7(CX)(DX*8), K7, X27 // 62617e0f109cd1f9ffffff or 62617e2f109cd1f9ffffff or 62617e4f109cd1f9ffffff
+ VMOVSS X18, X3, K2, X25 // 6281660a11d1 or 6281662a11d1 or 6281664a11d1
+ VMOVSS X15, K4, -17(BP)(SI*2) // 62717e0c11bc75efffffff or 62717e2c11bc75efffffff or 62717e4c11bc75efffffff
+ VMOVSS X15, K4, 7(AX)(CX*2) // 62717e0c11bc4807000000 or 62717e2c11bc4807000000 or 62717e4c11bc4807000000
+ VMOVSS X7, X15, K1, X28 // 6291060911fc or 6291062911fc or 6291064911fc
+ VMOVUPD X8, K3, X13 // 6251fd0b11c5
+ VMOVUPD X8, K3, (SI) // 6271fd0b1106
+ VMOVUPD X8, K3, 7(SI)(DI*2) // 6271fd0b11847e07000000
+ VMOVUPD X7, K4, X24 // 6291fd0c11f8
+ VMOVUPD 17(SP)(BP*8), K4, X24 // 6261fd0c1084ec11000000
+ VMOVUPD 17(SP)(BP*4), K4, X24 // 6261fd0c1084ac11000000
+ VMOVUPD Y24, K5, Y3 // 6261fd2d11c3
+ VMOVUPD Y24, K5, 17(SP)(BP*1) // 6261fd2d11842c11000000
+ VMOVUPD Y24, K5, -7(CX)(DX*8) // 6261fd2d1184d1f9ffffff
+ VMOVUPD Y7, K7, Y2 // 62f1fd2f11fa
+ VMOVUPD -17(BP)(SI*2), K7, Y2 // 62f1fd2f109475efffffff
+ VMOVUPD 7(AX)(CX*2), K7, Y2 // 62f1fd2f10944807000000
+ VMOVUPD Z2, K7, Z22 // 62b1fd4f11d6
+ VMOVUPD Z31, K7, Z22 // 6221fd4f11fe
+ VMOVUPD Z2, K7, Z7 // 62f1fd4f11d7
+ VMOVUPD Z31, K7, Z7 // 6261fd4f11ff
+ VMOVUPD Z2, K7, 7(AX) // 62f1fd4f119007000000
+ VMOVUPD Z31, K7, 7(AX) // 6261fd4f11b807000000
+ VMOVUPD Z2, K7, (DI) // 62f1fd4f1117
+ VMOVUPD Z31, K7, (DI) // 6261fd4f113f
+ VMOVUPD Z1, K6, Z20 // 62b1fd4e11cc
+ VMOVUPD Z3, K6, Z20 // 62b1fd4e11dc
+ VMOVUPD 99(R15)(R15*1), K6, Z20 // 6281fd4e10a43f63000000
+ VMOVUPD (DX), K6, Z20 // 62e1fd4e1022
+ VMOVUPD Z1, K6, Z9 // 62d1fd4e11c9
+ VMOVUPD Z3, K6, Z9 // 62d1fd4e11d9
+ VMOVUPD 99(R15)(R15*1), K6, Z9 // 6211fd4e108c3f63000000
+ VMOVUPD (DX), K6, Z9 // 6271fd4e100a
+ VMOVUPS X22, K3, X0 // 62e17c0b11f0
+ VMOVUPS X22, K3, 7(SI)(DI*4) // 62e17c0b11b4be07000000
+ VMOVUPS X22, K3, -7(DI)(R8*2) // 62a17c0b11b447f9ffffff
+ VMOVUPS X11, K7, X1 // 62717c0f11d9
+ VMOVUPS 17(SP), K7, X1 // 62f17c0f108c2411000000
+ VMOVUPS -17(BP)(SI*4), K7, X1 // 62f17c0f108cb5efffffff
+ VMOVUPS Y14, K4, Y21 // 62317c2c11f5
+ VMOVUPS Y14, K4, 15(R8)(R14*1) // 62117c2c11b4300f000000
+ VMOVUPS Y14, K4, 15(R8)(R14*2) // 62117c2c11b4700f000000
+ VMOVUPS Y20, K4, Y8 // 62c17c2c11e0
+ VMOVUPS (R14), K4, Y8 // 62517c2c1006
+ VMOVUPS -7(DI)(R8*8), K4, Y8 // 62317c2c1084c7f9ffffff
+ VMOVUPS Z28, K7, Z12 // 62417c4f11e4
+ VMOVUPS Z13, K7, Z12 // 62517c4f11ec
+ VMOVUPS Z28, K7, Z16 // 62217c4f11e0
+ VMOVUPS Z13, K7, Z16 // 62317c4f11e8
+ VMOVUPS Z28, K7, -17(BP)(SI*8) // 62617c4f11a4f5efffffff
+ VMOVUPS Z13, K7, -17(BP)(SI*8) // 62717c4f11acf5efffffff
+ VMOVUPS Z28, K7, (R15) // 62417c4f1127
+ VMOVUPS Z13, K7, (R15) // 62517c4f112f
+ VMOVUPS Z3, K2, Z14 // 62d17c4a11de
+ VMOVUPS Z12, K2, Z14 // 62517c4a11e6
+ VMOVUPS 7(SI)(DI*8), K2, Z14 // 62717c4a10b4fe07000000
+ VMOVUPS -15(R14), K2, Z14 // 62517c4a10b6f1ffffff
+ VMOVUPS Z3, K2, Z28 // 62917c4a11dc
+ VMOVUPS Z12, K2, Z28 // 62117c4a11e4
+ VMOVUPS 7(SI)(DI*8), K2, Z28 // 62617c4a10a4fe07000000
+ VMOVUPS -15(R14), K2, Z28 // 62417c4a10a6f1ffffff
+ VMULPD X8, X7, K5, X6 // 62d1c50d59f0
+ VMULPD 7(AX), X7, K5, X6 // 62f1c50d59b007000000
+ VMULPD (DI), X7, K5, X6 // 62f1c50d5937
+ VMULPD Y1, Y24, K3, Y11 // 6271bd2359d9
+ VMULPD 99(R15)(R15*4), Y24, K3, Y11 // 6211bd23599cbf63000000
+ VMULPD 15(DX), Y24, K3, Y11 // 6271bd23599a0f000000
+ VMULPD Z5, Z19, K4, Z15 // 6271e54459fd
+ VMULPD Z1, Z19, K4, Z15 // 6271e54459f9
+ VMULPD Z5, Z15, K4, Z15 // 6271854c59fd
+ VMULPD Z1, Z15, K4, Z15 // 6271854c59f9
+ VMULPD Z5, Z19, K4, Z30 // 6261e54459f5
+ VMULPD Z1, Z19, K4, Z30 // 6261e54459f1
+ VMULPD Z5, Z15, K4, Z30 // 6261854c59f5
+ VMULPD Z1, Z15, K4, Z30 // 6261854c59f1
+ VMULPD Z21, Z14, K2, Z3 // 62b18d4a59dd
+ VMULPD Z8, Z14, K2, Z3 // 62d18d4a59d8
+ VMULPD 7(SI)(DI*1), Z14, K2, Z3 // 62f18d4a599c3e07000000
+ VMULPD 15(DX)(BX*8), Z14, K2, Z3 // 62f18d4a599cda0f000000
+ VMULPD Z21, Z15, K2, Z3 // 62b1854a59dd
+ VMULPD Z8, Z15, K2, Z3 // 62d1854a59d8
+ VMULPD 7(SI)(DI*1), Z15, K2, Z3 // 62f1854a599c3e07000000
+ VMULPD 15(DX)(BX*8), Z15, K2, Z3 // 62f1854a599cda0f000000
+ VMULPD Z21, Z14, K2, Z5 // 62b18d4a59ed
+ VMULPD Z8, Z14, K2, Z5 // 62d18d4a59e8
+ VMULPD 7(SI)(DI*1), Z14, K2, Z5 // 62f18d4a59ac3e07000000
+ VMULPD 15(DX)(BX*8), Z14, K2, Z5 // 62f18d4a59acda0f000000
+ VMULPD Z21, Z15, K2, Z5 // 62b1854a59ed
+ VMULPD Z8, Z15, K2, Z5 // 62d1854a59e8
+ VMULPD 7(SI)(DI*1), Z15, K2, Z5 // 62f1854a59ac3e07000000
+ VMULPD 15(DX)(BX*8), Z15, K2, Z5 // 62f1854a59acda0f000000
+ VMULPS X28, X3, K2, X31 // 6201640a59fc
+ VMULPS 99(R15)(R15*1), X3, K2, X31 // 6201640a59bc3f63000000
+ VMULPS (DX), X3, K2, X31 // 6261640a593a
+ VMULPS Y20, Y18, K3, Y5 // 62b16c2359ec
+ VMULPS (CX), Y18, K3, Y5 // 62f16c235929
+ VMULPS 99(R15), Y18, K3, Y5 // 62d16c2359af63000000
+ VMULPS Z23, Z20, K3, Z16 // 62a15c4359c7
+ VMULPS Z19, Z20, K3, Z16 // 62a15c4359c3
+ VMULPS Z23, Z0, K3, Z16 // 62a17c4b59c7
+ VMULPS Z19, Z0, K3, Z16 // 62a17c4b59c3
+ VMULPS Z23, Z20, K3, Z9 // 62315c4359cf
+ VMULPS Z19, Z20, K3, Z9 // 62315c4359cb
+ VMULPS Z23, Z0, K3, Z9 // 62317c4b59cf
+ VMULPS Z19, Z0, K3, Z9 // 62317c4b59cb
+ VMULPS Z24, Z0, K3, Z0 // 62917c4b59c0
+ VMULPS Z12, Z0, K3, Z0 // 62d17c4b59c4
+ VMULPS -7(DI)(R8*1), Z0, K3, Z0 // 62b17c4b598407f9ffffff
+ VMULPS (SP), Z0, K3, Z0 // 62f17c4b590424
+ VMULPS Z24, Z25, K3, Z0 // 6291344359c0
+ VMULPS Z12, Z25, K3, Z0 // 62d1344359c4
+ VMULPS -7(DI)(R8*1), Z25, K3, Z0 // 62b13443598407f9ffffff
+ VMULPS (SP), Z25, K3, Z0 // 62f13443590424
+ VMULPS Z24, Z0, K3, Z11 // 62117c4b59d8
+ VMULPS Z12, Z0, K3, Z11 // 62517c4b59dc
+ VMULPS -7(DI)(R8*1), Z0, K3, Z11 // 62317c4b599c07f9ffffff
+ VMULPS (SP), Z0, K3, Z11 // 62717c4b591c24
+ VMULPS Z24, Z25, K3, Z11 // 6211344359d8
+ VMULPS Z12, Z25, K3, Z11 // 6251344359dc
+ VMULPS -7(DI)(R8*1), Z25, K3, Z11 // 62313443599c07f9ffffff
+ VMULPS (SP), Z25, K3, Z11 // 62713443591c24
+ VMULSD X7, X24, K2, X20 // 62e1bf0259e7
+ VMULSD X12, X16, K1, X20 // 62c1ff0159e4 or 62c1ff2159e4 or 62c1ff4159e4
+ VMULSD -17(BP), X16, K1, X20 // 62e1ff0159a5efffffff or 62e1ff2159a5efffffff or 62e1ff4159a5efffffff
+ VMULSD -15(R14)(R15*8), X16, K1, X20 // 6281ff0159a4fef1ffffff or 6281ff2159a4fef1ffffff or 6281ff4159a4fef1ffffff
+ VMULSS X28, X17, K2, X6 // 6291760259f4
+ VMULSS X8, X1, K1, X6 // 62d1760959f0 or 62d1762959f0 or 62d1764959f0
+ VMULSS 15(R8)(R14*1), X1, K1, X6 // 6291760959b4300f000000 or 6291762959b4300f000000 or 6291764959b4300f000000
+ VMULSS 15(R8)(R14*2), X1, K1, X6 // 6291760959b4700f000000 or 6291762959b4700f000000 or 6291764959b4700f000000
+ VPABSD X16, K7, X12 // 62327d0f1ee0
+ VPABSD 99(R15)(R15*8), K7, X12 // 62127d0f1ea4ff63000000
+ VPABSD 7(AX)(CX*8), K7, X12 // 62727d0f1ea4c807000000
+ VPABSD Y16, K7, Y17 // 62a27d2f1ec8
+ VPABSD -17(BP), K7, Y17 // 62e27d2f1e8defffffff
+ VPABSD -15(R14)(R15*8), K7, Y17 // 62827d2f1e8cfef1ffffff
+ VPABSD Z20, K6, Z1 // 62b27d4e1ecc
+ VPABSD Z9, K6, Z1 // 62d27d4e1ec9
+ VPABSD (BX), K6, Z1 // 62f27d4e1e0b
+ VPABSD -17(BP)(SI*1), K6, Z1 // 62f27d4e1e8c35efffffff
+ VPABSD Z20, K6, Z9 // 62327d4e1ecc
+ VPABSD Z9, K6, Z9 // 62527d4e1ec9
+ VPABSD (BX), K6, Z9 // 62727d4e1e0b
+ VPABSD -17(BP)(SI*1), K6, Z9 // 62727d4e1e8c35efffffff
+ VPABSQ X8, K3, X28 // 6242fd0b1fe0
+ VPABSQ (AX), K3, X28 // 6262fd0b1f20
+ VPABSQ 7(SI), K3, X28 // 6262fd0b1fa607000000
+ VPABSQ Y6, K7, Y12 // 6272fd2f1fe6
+ VPABSQ 17(SP)(BP*2), K7, Y12 // 6272fd2f1fa46c11000000
+ VPABSQ -7(DI)(R8*4), K7, Y12 // 6232fd2f1fa487f9ffffff
+ VPABSQ Z26, K4, Z30 // 6202fd4c1ff2
+ VPABSQ Z22, K4, Z30 // 6222fd4c1ff6
+ VPABSQ 15(R8)(R14*4), K4, Z30 // 6202fd4c1fb4b00f000000
+ VPABSQ -7(CX)(DX*4), K4, Z30 // 6262fd4c1fb491f9ffffff
+ VPABSQ Z26, K4, Z5 // 6292fd4c1fea
+ VPABSQ Z22, K4, Z5 // 62b2fd4c1fee
+ VPABSQ 15(R8)(R14*4), K4, Z5 // 6292fd4c1facb00f000000
+ VPABSQ -7(CX)(DX*4), K4, Z5 // 62f2fd4c1fac91f9ffffff
+ VPADDD X27, X2, K1, X2 // 62916d09fed3
+ VPADDD (R14), X2, K1, X2 // 62d16d09fe16
+ VPADDD -7(DI)(R8*8), X2, K1, X2 // 62b16d09fe94c7f9ffffff
+ VPADDD Y1, Y6, K7, Y1 // 62f14d2ffec9
+ VPADDD 7(SI)(DI*4), Y6, K7, Y1 // 62f14d2ffe8cbe07000000
+ VPADDD -7(DI)(R8*2), Y6, K7, Y1 // 62b14d2ffe8c47f9ffffff
+ VPADDD Z13, Z11, K2, Z14 // 6251254afef5
+ VPADDD Z14, Z11, K2, Z14 // 6251254afef6
+ VPADDD (CX), Z11, K2, Z14 // 6271254afe31
+ VPADDD 99(R15), Z11, K2, Z14 // 6251254afeb763000000
+ VPADDD Z13, Z5, K2, Z14 // 6251554afef5
+ VPADDD Z14, Z5, K2, Z14 // 6251554afef6
+ VPADDD (CX), Z5, K2, Z14 // 6271554afe31
+ VPADDD 99(R15), Z5, K2, Z14 // 6251554afeb763000000
+ VPADDD Z13, Z11, K2, Z27 // 6241254afedd
+ VPADDD Z14, Z11, K2, Z27 // 6241254afede
+ VPADDD (CX), Z11, K2, Z27 // 6261254afe19
+ VPADDD 99(R15), Z11, K2, Z27 // 6241254afe9f63000000
+ VPADDD Z13, Z5, K2, Z27 // 6241554afedd
+ VPADDD Z14, Z5, K2, Z27 // 6241554afede
+ VPADDD (CX), Z5, K2, Z27 // 6261554afe19
+ VPADDD 99(R15), Z5, K2, Z27 // 6241554afe9f63000000
+ VPADDQ X30, X22, K4, X26 // 6201cd04d4d6
+ VPADDQ 99(R15)(R15*4), X22, K4, X26 // 6201cd04d494bf63000000
+ VPADDQ 15(DX), X22, K4, X26 // 6261cd04d4920f000000
+ VPADDQ Y19, Y0, K1, Y9 // 6231fd29d4cb
+ VPADDQ 17(SP), Y0, K1, Y9 // 6271fd29d48c2411000000
+ VPADDQ -17(BP)(SI*4), Y0, K1, Y9 // 6271fd29d48cb5efffffff
+ VPADDQ Z6, Z2, K3, Z5 // 62f1ed4bd4ee
+ VPADDQ Z14, Z2, K3, Z5 // 62d1ed4bd4ee
+ VPADDQ 99(R15)(R15*2), Z2, K3, Z5 // 6291ed4bd4ac7f63000000
+ VPADDQ -7(DI), Z2, K3, Z5 // 62f1ed4bd4aff9ffffff
+ VPADDQ Z6, Z2, K3, Z23 // 62e1ed4bd4fe
+ VPADDQ Z14, Z2, K3, Z23 // 62c1ed4bd4fe
+ VPADDQ 99(R15)(R15*2), Z2, K3, Z23 // 6281ed4bd4bc7f63000000
+ VPADDQ -7(DI), Z2, K3, Z23 // 62e1ed4bd4bff9ffffff
+ VPANDD X1, X8, K3, X7 // 62f13d0bdbf9
+ VPANDD 15(R8), X8, K3, X7 // 62d13d0bdbb80f000000
+ VPANDD (BP), X8, K3, X7 // 62f13d0bdb7d00
+ VPANDD Y13, Y2, K2, Y14 // 62516d2adbf5
+ VPANDD -7(CX), Y2, K2, Y14 // 62716d2adbb1f9ffffff
+ VPANDD 15(DX)(BX*4), Y2, K2, Y14 // 62716d2adbb49a0f000000
+ VPANDD Z6, Z9, K1, Z12 // 62713549dbe6
+ VPANDD Z25, Z9, K1, Z12 // 62113549dbe1
+ VPANDD -15(R14)(R15*1), Z9, K1, Z12 // 62113549dba43ef1ffffff
+ VPANDD -15(BX), Z9, K1, Z12 // 62713549dba3f1ffffff
+ VPANDD Z6, Z12, K1, Z12 // 62711d49dbe6
+ VPANDD Z25, Z12, K1, Z12 // 62111d49dbe1
+ VPANDD -15(R14)(R15*1), Z12, K1, Z12 // 62111d49dba43ef1ffffff
+ VPANDD -15(BX), Z12, K1, Z12 // 62711d49dba3f1ffffff
+ VPANDD Z6, Z9, K1, Z17 // 62e13549dbce
+ VPANDD Z25, Z9, K1, Z17 // 62813549dbc9
+ VPANDD -15(R14)(R15*1), Z9, K1, Z17 // 62813549db8c3ef1ffffff
+ VPANDD -15(BX), Z9, K1, Z17 // 62e13549db8bf1ffffff
+ VPANDD Z6, Z12, K1, Z17 // 62e11d49dbce
+ VPANDD Z25, Z12, K1, Z17 // 62811d49dbc9
+ VPANDD -15(R14)(R15*1), Z12, K1, Z17 // 62811d49db8c3ef1ffffff
+ VPANDD -15(BX), Z12, K1, Z17 // 62e11d49db8bf1ffffff
+ VPANDND X0, X15, K2, X0 // 62f1050adfc0
+ VPANDND 15(R8)(R14*8), X15, K2, X0 // 6291050adf84f00f000000
+ VPANDND -15(R14)(R15*2), X15, K2, X0 // 6291050adf847ef1ffffff
+ VPANDND Y22, Y15, K1, Y27 // 62210529dfde
+ VPANDND 99(R15)(R15*8), Y15, K1, Y27 // 62010529df9cff63000000
+ VPANDND 7(AX)(CX*8), Y15, K1, Y27 // 62610529df9cc807000000
+ VPANDND Z3, Z8, K7, Z3 // 62f13d4fdfdb
+ VPANDND Z27, Z8, K7, Z3 // 62913d4fdfdb
+ VPANDND 7(AX)(CX*4), Z8, K7, Z3 // 62f13d4fdf9c8807000000
+ VPANDND 7(AX)(CX*1), Z8, K7, Z3 // 62f13d4fdf9c0807000000
+ VPANDND Z3, Z2, K7, Z3 // 62f16d4fdfdb
+ VPANDND Z27, Z2, K7, Z3 // 62916d4fdfdb
+ VPANDND 7(AX)(CX*4), Z2, K7, Z3 // 62f16d4fdf9c8807000000
+ VPANDND 7(AX)(CX*1), Z2, K7, Z3 // 62f16d4fdf9c0807000000
+ VPANDND Z3, Z8, K7, Z21 // 62e13d4fdfeb
+ VPANDND Z27, Z8, K7, Z21 // 62813d4fdfeb
+ VPANDND 7(AX)(CX*4), Z8, K7, Z21 // 62e13d4fdfac8807000000
+ VPANDND 7(AX)(CX*1), Z8, K7, Z21 // 62e13d4fdfac0807000000
+ VPANDND Z3, Z2, K7, Z21 // 62e16d4fdfeb
+ VPANDND Z27, Z2, K7, Z21 // 62816d4fdfeb
+ VPANDND 7(AX)(CX*4), Z2, K7, Z21 // 62e16d4fdfac8807000000
+ VPANDND 7(AX)(CX*1), Z2, K7, Z21 // 62e16d4fdfac0807000000
+ VPANDNQ X0, X21, K1, X16 // 62e1d501dfc0
+ VPANDNQ -15(R14)(R15*1), X21, K1, X16 // 6281d501df843ef1ffffff
+ VPANDNQ -15(BX), X21, K1, X16 // 62e1d501df83f1ffffff
+ VPANDNQ Y24, Y18, K1, Y20 // 6281ed21dfe0
+ VPANDNQ (AX), Y18, K1, Y20 // 62e1ed21df20
+ VPANDNQ 7(SI), Y18, K1, Y20 // 62e1ed21dfa607000000
+ VPANDNQ Z20, Z0, K1, Z7 // 62b1fd49dffc
+ VPANDNQ Z28, Z0, K1, Z7 // 6291fd49dffc
+ VPANDNQ (SI), Z0, K1, Z7 // 62f1fd49df3e
+ VPANDNQ 7(SI)(DI*2), Z0, K1, Z7 // 62f1fd49dfbc7e07000000
+ VPANDNQ Z20, Z6, K1, Z7 // 62b1cd49dffc
+ VPANDNQ Z28, Z6, K1, Z7 // 6291cd49dffc
+ VPANDNQ (SI), Z6, K1, Z7 // 62f1cd49df3e
+ VPANDNQ 7(SI)(DI*2), Z6, K1, Z7 // 62f1cd49dfbc7e07000000
+ VPANDNQ Z20, Z0, K1, Z9 // 6231fd49dfcc
+ VPANDNQ Z28, Z0, K1, Z9 // 6211fd49dfcc
+ VPANDNQ (SI), Z0, K1, Z9 // 6271fd49df0e
+ VPANDNQ 7(SI)(DI*2), Z0, K1, Z9 // 6271fd49df8c7e07000000
+ VPANDNQ Z20, Z6, K1, Z9 // 6231cd49dfcc
+ VPANDNQ Z28, Z6, K1, Z9 // 6211cd49dfcc
+ VPANDNQ (SI), Z6, K1, Z9 // 6271cd49df0e
+ VPANDNQ 7(SI)(DI*2), Z6, K1, Z9 // 6271cd49df8c7e07000000
+ VPANDQ X7, X22, K7, X28 // 6261cd07dbe7
+ VPANDQ 7(AX)(CX*4), X22, K7, X28 // 6261cd07dba48807000000
+ VPANDQ 7(AX)(CX*1), X22, K7, X28 // 6261cd07dba40807000000
+ VPANDQ Y19, Y3, K2, Y9 // 6231e52adbcb
+ VPANDQ (BX), Y3, K2, Y9 // 6271e52adb0b
+ VPANDQ -17(BP)(SI*1), Y3, K2, Y9 // 6271e52adb8c35efffffff
+ VPANDQ Z12, Z9, K4, Z3 // 62d1b54cdbdc
+ VPANDQ Z22, Z9, K4, Z3 // 62b1b54cdbde
+ VPANDQ 17(SP)(BP*8), Z9, K4, Z3 // 62f1b54cdb9cec11000000
+ VPANDQ 17(SP)(BP*4), Z9, K4, Z3 // 62f1b54cdb9cac11000000
+ VPANDQ Z12, Z19, K4, Z3 // 62d1e544dbdc
+ VPANDQ Z22, Z19, K4, Z3 // 62b1e544dbde
+ VPANDQ 17(SP)(BP*8), Z19, K4, Z3 // 62f1e544db9cec11000000
+ VPANDQ 17(SP)(BP*4), Z19, K4, Z3 // 62f1e544db9cac11000000
+ VPANDQ Z12, Z9, K4, Z30 // 6241b54cdbf4
+ VPANDQ Z22, Z9, K4, Z30 // 6221b54cdbf6
+ VPANDQ 17(SP)(BP*8), Z9, K4, Z30 // 6261b54cdbb4ec11000000
+ VPANDQ 17(SP)(BP*4), Z9, K4, Z30 // 6261b54cdbb4ac11000000
+ VPANDQ Z12, Z19, K4, Z30 // 6241e544dbf4
+ VPANDQ Z22, Z19, K4, Z30 // 6221e544dbf6
+ VPANDQ 17(SP)(BP*8), Z19, K4, Z30 // 6261e544dbb4ec11000000
+ VPANDQ 17(SP)(BP*4), Z19, K4, Z30 // 6261e544dbb4ac11000000
+ VPBLENDMD X14, X12, K4, X0 // 62d21d0c64c6
+ VPBLENDMD 17(SP), X12, K4, X0 // 62f21d0c64842411000000
+ VPBLENDMD -17(BP)(SI*4), X12, K4, X0 // 62f21d0c6484b5efffffff
+ VPBLENDMD Y6, Y31, K4, Y6 // 62f2052464f6
+ VPBLENDMD -17(BP)(SI*2), Y31, K4, Y6 // 62f2052464b475efffffff
+ VPBLENDMD 7(AX)(CX*2), Y31, K4, Y6 // 62f2052464b44807000000
+ VPBLENDMD Z20, Z2, K7, Z22 // 62a26d4f64f4
+ VPBLENDMD Z9, Z2, K7, Z22 // 62c26d4f64f1
+ VPBLENDMD 99(R15)(R15*1), Z2, K7, Z22 // 62826d4f64b43f63000000
+ VPBLENDMD (DX), Z2, K7, Z22 // 62e26d4f6432
+ VPBLENDMD Z20, Z31, K7, Z22 // 62a2054764f4
+ VPBLENDMD Z9, Z31, K7, Z22 // 62c2054764f1
+ VPBLENDMD 99(R15)(R15*1), Z31, K7, Z22 // 6282054764b43f63000000
+ VPBLENDMD (DX), Z31, K7, Z22 // 62e205476432
+ VPBLENDMD Z20, Z2, K7, Z7 // 62b26d4f64fc
+ VPBLENDMD Z9, Z2, K7, Z7 // 62d26d4f64f9
+ VPBLENDMD 99(R15)(R15*1), Z2, K7, Z7 // 62926d4f64bc3f63000000
+ VPBLENDMD (DX), Z2, K7, Z7 // 62f26d4f643a
+ VPBLENDMD Z20, Z31, K7, Z7 // 62b2054764fc
+ VPBLENDMD Z9, Z31, K7, Z7 // 62d2054764f9
+ VPBLENDMD 99(R15)(R15*1), Z31, K7, Z7 // 6292054764bc3f63000000
+ VPBLENDMD (DX), Z31, K7, Z7 // 62f20547643a
+ VPBLENDMQ X15, X17, K2, X5 // 62d2f50264ef
+ VPBLENDMQ 7(AX), X17, K2, X5 // 62f2f50264a807000000
+ VPBLENDMQ (DI), X17, K2, X5 // 62f2f502642f
+ VPBLENDMQ Y7, Y19, K5, Y11 // 6272e52564df
+ VPBLENDMQ 15(R8)(R14*1), Y19, K5, Y11 // 6212e525649c300f000000
+ VPBLENDMQ 15(R8)(R14*2), Y19, K5, Y11 // 6212e525649c700f000000
+ VPBLENDMQ Z28, Z12, K3, Z1 // 62929d4b64cc
+ VPBLENDMQ Z13, Z12, K3, Z1 // 62d29d4b64cd
+ VPBLENDMQ -17(BP)(SI*8), Z12, K3, Z1 // 62f29d4b648cf5efffffff
+ VPBLENDMQ (R15), Z12, K3, Z1 // 62d29d4b640f
+ VPBLENDMQ Z28, Z16, K3, Z1 // 6292fd4364cc
+ VPBLENDMQ Z13, Z16, K3, Z1 // 62d2fd4364cd
+ VPBLENDMQ -17(BP)(SI*8), Z16, K3, Z1 // 62f2fd43648cf5efffffff
+ VPBLENDMQ (R15), Z16, K3, Z1 // 62d2fd43640f
+ VPBLENDMQ Z28, Z12, K3, Z3 // 62929d4b64dc
+ VPBLENDMQ Z13, Z12, K3, Z3 // 62d29d4b64dd
+ VPBLENDMQ -17(BP)(SI*8), Z12, K3, Z3 // 62f29d4b649cf5efffffff
+ VPBLENDMQ (R15), Z12, K3, Z3 // 62d29d4b641f
+ VPBLENDMQ Z28, Z16, K3, Z3 // 6292fd4364dc
+ VPBLENDMQ Z13, Z16, K3, Z3 // 62d2fd4364dd
+ VPBLENDMQ -17(BP)(SI*8), Z16, K3, Z3 // 62f2fd43649cf5efffffff
+ VPBLENDMQ (R15), Z16, K3, Z3 // 62d2fd43641f
+ VPBROADCASTD SP, K1, X15 // 62727d097cfc
+ VPBROADCASTD R14, K1, X15 // 62527d097cfe
+ VPBROADCASTD AX, K7, Y12 // 62727d2f7ce0
+ VPBROADCASTD R9, K7, Y12 // 62527d2f7ce1
+ VPBROADCASTD CX, K1, Z3 // 62f27d497cd9
+ VPBROADCASTD SP, K1, Z3 // 62f27d497cdc
+ VPBROADCASTD CX, K1, Z5 // 62f27d497ce9
+ VPBROADCASTD SP, K1, Z5 // 62f27d497cec
+ VPBROADCASTD X18, K1, X26 // 62227d0958d2
+ VPBROADCASTD (R14), K1, X26 // 62427d095816
+ VPBROADCASTD -7(DI)(R8*8), K1, X26 // 62227d095894c7f9ffffff
+ VPBROADCASTD X21, K1, Y3 // 62b27d2958dd
+ VPBROADCASTD 99(R15)(R15*4), K1, Y3 // 62927d29589cbf63000000
+ VPBROADCASTD 15(DX), K1, Y3 // 62f27d29589a0f000000
+ VPBROADCASTD X1, K7, Z14 // 62727d4f58f1
+ VPBROADCASTD (CX), K7, Z14 // 62727d4f5831
+ VPBROADCASTD 99(R15), K7, Z14 // 62527d4f58b763000000
+ VPBROADCASTD X1, K7, Z15 // 62727d4f58f9
+ VPBROADCASTD (CX), K7, Z15 // 62727d4f5839
+ VPBROADCASTD 99(R15), K7, Z15 // 62527d4f58bf63000000
+ VPBROADCASTQ R9, K2, X3 // 62d2fd0a7cd9
+ VPBROADCASTQ R13, K2, X3 // 62d2fd0a7cdd
+ VPBROADCASTQ DX, K4, Y7 // 62f2fd2c7cfa
+ VPBROADCASTQ BP, K4, Y7 // 62f2fd2c7cfd
+ VPBROADCASTQ R10, K1, Z20 // 62c2fd497ce2
+ VPBROADCASTQ CX, K1, Z20 // 62e2fd497ce1
+ VPBROADCASTQ R10, K1, Z0 // 62d2fd497cc2
+ VPBROADCASTQ CX, K1, Z0 // 62f2fd497cc1
+ VPBROADCASTQ X0, K3, X7 // 62f2fd0b59f8
+ VPBROADCASTQ 17(SP)(BP*2), K3, X7 // 62f2fd0b59bc6c11000000
+ VPBROADCASTQ -7(DI)(R8*4), K3, X7 // 62b2fd0b59bc87f9ffffff
+ VPBROADCASTQ X0, K4, Y0 // 62f2fd2c59c0
+ VPBROADCASTQ 15(R8), K4, Y0 // 62d2fd2c59800f000000
+ VPBROADCASTQ (BP), K4, Y0 // 62f2fd2c594500
+ VPBROADCASTQ X24, K5, Z23 // 6282fd4d59f8
+ VPBROADCASTQ 15(R8)(R14*8), K5, Z23 // 6282fd4d59bcf00f000000
+ VPBROADCASTQ -15(R14)(R15*2), K5, Z23 // 6282fd4d59bc7ef1ffffff
+ VPBROADCASTQ X24, K5, Z19 // 6282fd4d59d8
+ VPBROADCASTQ 15(R8)(R14*8), K5, Z19 // 6282fd4d599cf00f000000
+ VPBROADCASTQ -15(R14)(R15*2), K5, Z19 // 6282fd4d599c7ef1ffffff
+ VPCMPD $64, X13, X11, K5, K6 // 62d3250d1ff540
+ VPCMPD $64, 7(SI)(DI*1), X11, K5, K6 // 62f3250d1fb43e0700000040
+ VPCMPD $64, 15(DX)(BX*8), X11, K5, K6 // 62f3250d1fb4da0f00000040
+ VPCMPD $64, X13, X11, K5, K7 // 62d3250d1ffd40
+ VPCMPD $64, 7(SI)(DI*1), X11, K5, K7 // 62f3250d1fbc3e0700000040
+ VPCMPD $64, 15(DX)(BX*8), X11, K5, K7 // 62f3250d1fbcda0f00000040
+ VPCMPD $27, Y31, Y9, K3, K6 // 6293352b1ff71b
+ VPCMPD $27, 99(R15)(R15*2), Y9, K3, K6 // 6293352b1fb47f630000001b
+ VPCMPD $27, -7(DI), Y9, K3, K6 // 62f3352b1fb7f9ffffff1b
+ VPCMPD $27, Y31, Y9, K3, K4 // 6293352b1fe71b
+ VPCMPD $27, 99(R15)(R15*2), Y9, K3, K4 // 6293352b1fa47f630000001b
+ VPCMPD $27, -7(DI), Y9, K3, K4 // 62f3352b1fa7f9ffffff1b
+ VPCMPD $47, Z17, Z20, K4, K4 // 62b35d441fe12f
+ VPCMPD $47, Z0, Z20, K4, K4 // 62f35d441fe02f
+ VPCMPD $47, -7(CX), Z20, K4, K4 // 62f35d441fa1f9ffffff2f
+ VPCMPD $47, 15(DX)(BX*4), Z20, K4, K4 // 62f35d441fa49a0f0000002f
+ VPCMPD $47, Z17, Z0, K4, K4 // 62b37d4c1fe12f
+ VPCMPD $47, Z0, Z0, K4, K4 // 62f37d4c1fe02f
+ VPCMPD $47, -7(CX), Z0, K4, K4 // 62f37d4c1fa1f9ffffff2f
+ VPCMPD $47, 15(DX)(BX*4), Z0, K4, K4 // 62f37d4c1fa49a0f0000002f
+ VPCMPD $47, Z17, Z20, K4, K6 // 62b35d441ff12f
+ VPCMPD $47, Z0, Z20, K4, K6 // 62f35d441ff02f
+ VPCMPD $47, -7(CX), Z20, K4, K6 // 62f35d441fb1f9ffffff2f
+ VPCMPD $47, 15(DX)(BX*4), Z20, K4, K6 // 62f35d441fb49a0f0000002f
+ VPCMPD $47, Z17, Z0, K4, K6 // 62b37d4c1ff12f
+ VPCMPD $47, Z0, Z0, K4, K6 // 62f37d4c1ff02f
+ VPCMPD $47, -7(CX), Z0, K4, K6 // 62f37d4c1fb1f9ffffff2f
+ VPCMPD $47, 15(DX)(BX*4), Z0, K4, K6 // 62f37d4c1fb49a0f0000002f
+ VPCMPEQD X14, X16, K3, K6 // 62d17d0376f6
+ VPCMPEQD -7(CX), X16, K3, K6 // 62f17d0376b1f9ffffff
+ VPCMPEQD 15(DX)(BX*4), X16, K3, K6 // 62f17d0376b49a0f000000
+ VPCMPEQD X14, X16, K3, K5 // 62d17d0376ee
+ VPCMPEQD -7(CX), X16, K3, K5 // 62f17d0376a9f9ffffff
+ VPCMPEQD 15(DX)(BX*4), X16, K3, K5 // 62f17d0376ac9a0f000000
+ VPCMPEQD Y13, Y28, K3, K1 // 62d11d2376cd
+ VPCMPEQD 15(DX)(BX*1), Y28, K3, K1 // 62f11d23768c1a0f000000
+ VPCMPEQD -7(CX)(DX*2), Y28, K3, K1 // 62f11d23768c51f9ffffff
+ VPCMPEQD Y13, Y28, K3, K5 // 62d11d2376ed
+ VPCMPEQD 15(DX)(BX*1), Y28, K3, K5 // 62f11d2376ac1a0f000000
+ VPCMPEQD -7(CX)(DX*2), Y28, K3, K5 // 62f11d2376ac51f9ffffff
+ VPCMPEQD Z6, Z21, K2, K3 // 62f1554276de
+ VPCMPEQD Z9, Z21, K2, K3 // 62d1554276d9
+ VPCMPEQD (AX), Z21, K2, K3 // 62f155427618
+ VPCMPEQD 7(SI), Z21, K2, K3 // 62f15542769e07000000
+ VPCMPEQD Z6, Z9, K2, K3 // 62f1354a76de
+ VPCMPEQD Z9, Z9, K2, K3 // 62d1354a76d9
+ VPCMPEQD (AX), Z9, K2, K3 // 62f1354a7618
+ VPCMPEQD 7(SI), Z9, K2, K3 // 62f1354a769e07000000
+ VPCMPEQD Z6, Z21, K2, K1 // 62f1554276ce
+ VPCMPEQD Z9, Z21, K2, K1 // 62d1554276c9
+ VPCMPEQD (AX), Z21, K2, K1 // 62f155427608
+ VPCMPEQD 7(SI), Z21, K2, K1 // 62f15542768e07000000
+ VPCMPEQD Z6, Z9, K2, K1 // 62f1354a76ce
+ VPCMPEQD Z9, Z9, K2, K1 // 62d1354a76c9
+ VPCMPEQD (AX), Z9, K2, K1 // 62f1354a7608
+ VPCMPEQD 7(SI), Z9, K2, K1 // 62f1354a768e07000000
+ VPCMPEQQ X14, X11, K1, K5 // 62d2a50929ee
+ VPCMPEQQ 99(R15)(R15*8), X11, K1, K5 // 6292a50929acff63000000
+ VPCMPEQQ 7(AX)(CX*8), X11, K1, K5 // 62f2a50929acc807000000
+ VPCMPEQQ X14, X11, K1, K4 // 62d2a50929e6
+ VPCMPEQQ 99(R15)(R15*8), X11, K1, K4 // 6292a50929a4ff63000000
+ VPCMPEQQ 7(AX)(CX*8), X11, K1, K4 // 62f2a50929a4c807000000
+ VPCMPEQQ Y2, Y7, K2, K7 // 62f2c52a29fa
+ VPCMPEQQ -17(BP), Y7, K2, K7 // 62f2c52a29bdefffffff
+ VPCMPEQQ -15(R14)(R15*8), Y7, K2, K7 // 6292c52a29bcfef1ffffff
+ VPCMPEQQ Y2, Y7, K2, K6 // 62f2c52a29f2
+ VPCMPEQQ -17(BP), Y7, K2, K6 // 62f2c52a29b5efffffff
+ VPCMPEQQ -15(R14)(R15*8), Y7, K2, K6 // 6292c52a29b4fef1ffffff
+ VPCMPEQQ Z20, Z1, K1, K4 // 62b2f54929e4
+ VPCMPEQQ Z9, Z1, K1, K4 // 62d2f54929e1
+ VPCMPEQQ (BX), Z1, K1, K4 // 62f2f5492923
+ VPCMPEQQ -17(BP)(SI*1), Z1, K1, K4 // 62f2f54929a435efffffff
+ VPCMPEQQ Z20, Z9, K1, K4 // 62b2b54929e4
+ VPCMPEQQ Z9, Z9, K1, K4 // 62d2b54929e1
+ VPCMPEQQ (BX), Z9, K1, K4 // 62f2b5492923
+ VPCMPEQQ -17(BP)(SI*1), Z9, K1, K4 // 62f2b54929a435efffffff
+ VPCMPEQQ Z20, Z1, K1, K6 // 62b2f54929f4
+ VPCMPEQQ Z9, Z1, K1, K6 // 62d2f54929f1
+ VPCMPEQQ (BX), Z1, K1, K6 // 62f2f5492933
+ VPCMPEQQ -17(BP)(SI*1), Z1, K1, K6 // 62f2f54929b435efffffff
+ VPCMPEQQ Z20, Z9, K1, K6 // 62b2b54929f4
+ VPCMPEQQ Z9, Z9, K1, K6 // 62d2b54929f1
+ VPCMPEQQ (BX), Z9, K1, K6 // 62f2b5492933
+ VPCMPEQQ -17(BP)(SI*1), Z9, K1, K6 // 62f2b54929b435efffffff
+ VPCMPGTD X12, X23, K4, K4 // 62d1450466e4
+ VPCMPGTD 15(R8)(R14*4), X23, K4, K4 // 6291450466a4b00f000000
+ VPCMPGTD -7(CX)(DX*4), X23, K4, K4 // 62f1450466a491f9ffffff
+ VPCMPGTD X12, X23, K4, K6 // 62d1450466f4
+ VPCMPGTD 15(R8)(R14*4), X23, K4, K6 // 6291450466b4b00f000000
+ VPCMPGTD -7(CX)(DX*4), X23, K4, K6 // 62f1450466b491f9ffffff
+ VPCMPGTD Y3, Y9, K1, K4 // 62f1352966e3
+ VPCMPGTD 15(R8)(R14*8), Y9, K1, K4 // 6291352966a4f00f000000
+ VPCMPGTD -15(R14)(R15*2), Y9, K1, K4 // 6291352966a47ef1ffffff
+ VPCMPGTD Y3, Y9, K1, K5 // 62f1352966eb
+ VPCMPGTD 15(R8)(R14*8), Y9, K1, K5 // 6291352966acf00f000000
+ VPCMPGTD -15(R14)(R15*2), Y9, K1, K5 // 6291352966ac7ef1ffffff
+ VPCMPGTD Z12, Z14, K3, K2 // 62d10d4b66d4
+ VPCMPGTD Z13, Z14, K3, K2 // 62d10d4b66d5
+ VPCMPGTD 17(SP)(BP*1), Z14, K3, K2 // 62f10d4b66942c11000000
+ VPCMPGTD -7(CX)(DX*8), Z14, K3, K2 // 62f10d4b6694d1f9ffffff
+ VPCMPGTD Z12, Z13, K3, K2 // 62d1154b66d4
+ VPCMPGTD Z13, Z13, K3, K2 // 62d1154b66d5
+ VPCMPGTD 17(SP)(BP*1), Z13, K3, K2 // 62f1154b66942c11000000
+ VPCMPGTD -7(CX)(DX*8), Z13, K3, K2 // 62f1154b6694d1f9ffffff
+ VPCMPGTD Z12, Z14, K3, K7 // 62d10d4b66fc
+ VPCMPGTD Z13, Z14, K3, K7 // 62d10d4b66fd
+ VPCMPGTD 17(SP)(BP*1), Z14, K3, K7 // 62f10d4b66bc2c11000000
+ VPCMPGTD -7(CX)(DX*8), Z14, K3, K7 // 62f10d4b66bcd1f9ffffff
+ VPCMPGTD Z12, Z13, K3, K7 // 62d1154b66fc
+ VPCMPGTD Z13, Z13, K3, K7 // 62d1154b66fd
+ VPCMPGTD 17(SP)(BP*1), Z13, K3, K7 // 62f1154b66bc2c11000000
+ VPCMPGTD -7(CX)(DX*8), Z13, K3, K7 // 62f1154b66bcd1f9ffffff
+ VPCMPGTQ X23, X16, K4, K0 // 62b2fd0437c7
+ VPCMPGTQ (R8), X16, K4, K0 // 62d2fd043700
+ VPCMPGTQ 15(DX)(BX*2), X16, K4, K0 // 62f2fd0437845a0f000000
+ VPCMPGTQ X23, X16, K4, K5 // 62b2fd0437ef
+ VPCMPGTQ (R8), X16, K4, K5 // 62d2fd043728
+ VPCMPGTQ 15(DX)(BX*2), X16, K4, K5 // 62f2fd0437ac5a0f000000
+ VPCMPGTQ Y9, Y2, K5, K6 // 62d2ed2d37f1
+ VPCMPGTQ -15(R14)(R15*1), Y2, K5, K6 // 6292ed2d37b43ef1ffffff
+ VPCMPGTQ -15(BX), Y2, K5, K6 // 62f2ed2d37b3f1ffffff
+ VPCMPGTQ Y9, Y2, K5, K5 // 62d2ed2d37e9
+ VPCMPGTQ -15(R14)(R15*1), Y2, K5, K5 // 6292ed2d37ac3ef1ffffff
+ VPCMPGTQ -15(BX), Y2, K5, K5 // 62f2ed2d37abf1ffffff
+ VPCMPGTQ Z2, Z21, K7, K1 // 62f2d54737ca
+ VPCMPGTQ Z7, Z21, K7, K1 // 62f2d54737cf
+ VPCMPGTQ -17(BP)(SI*2), Z21, K7, K1 // 62f2d547378c75efffffff
+ VPCMPGTQ 7(AX)(CX*2), Z21, K7, K1 // 62f2d547378c4807000000
+ VPCMPGTQ Z2, Z9, K7, K1 // 62f2b54f37ca
+ VPCMPGTQ Z7, Z9, K7, K1 // 62f2b54f37cf
+ VPCMPGTQ -17(BP)(SI*2), Z9, K7, K1 // 62f2b54f378c75efffffff
+ VPCMPGTQ 7(AX)(CX*2), Z9, K7, K1 // 62f2b54f378c4807000000
+ VPCMPGTQ Z2, Z21, K7, K5 // 62f2d54737ea
+ VPCMPGTQ Z7, Z21, K7, K5 // 62f2d54737ef
+ VPCMPGTQ -17(BP)(SI*2), Z21, K7, K5 // 62f2d54737ac75efffffff
+ VPCMPGTQ 7(AX)(CX*2), Z21, K7, K5 // 62f2d54737ac4807000000
+ VPCMPGTQ Z2, Z9, K7, K5 // 62f2b54f37ea
+ VPCMPGTQ Z7, Z9, K7, K5 // 62f2b54f37ef
+ VPCMPGTQ -17(BP)(SI*2), Z9, K7, K5 // 62f2b54f37ac75efffffff
+ VPCMPGTQ 7(AX)(CX*2), Z9, K7, K5 // 62f2b54f37ac4807000000
+ VPCMPQ $82, X24, X31, K7, K4 // 629385071fe052
+ VPCMPQ $82, -17(BP)(SI*2), X31, K7, K4 // 62f385071fa475efffffff52
+ VPCMPQ $82, 7(AX)(CX*2), X31, K7, K4 // 62f385071fa4480700000052
+ VPCMPQ $82, X24, X31, K7, K6 // 629385071ff052
+ VPCMPQ $82, -17(BP)(SI*2), X31, K7, K6 // 62f385071fb475efffffff52
+ VPCMPQ $82, 7(AX)(CX*2), X31, K7, K6 // 62f385071fb4480700000052
+ VPCMPQ $126, Y30, Y14, K4, K0 // 62938d2c1fc67e
+ VPCMPQ $126, (SI), Y14, K4, K0 // 62f38d2c1f067e
+ VPCMPQ $126, 7(SI)(DI*2), Y14, K4, K0 // 62f38d2c1f847e070000007e
+ VPCMPQ $126, Y30, Y14, K4, K7 // 62938d2c1ffe7e
+ VPCMPQ $126, (SI), Y14, K4, K7 // 62f38d2c1f3e7e
+ VPCMPQ $126, 7(SI)(DI*2), Y14, K4, K7 // 62f38d2c1fbc7e070000007e
+ VPCMPQ $94, Z3, Z27, K4, K5 // 62f3a5441feb5e
+ VPCMPQ $94, Z0, Z27, K4, K5 // 62f3a5441fe85e
+ VPCMPQ $94, (R14), Z27, K4, K5 // 62d3a5441f2e5e
+ VPCMPQ $94, -7(DI)(R8*8), Z27, K4, K5 // 62b3a5441facc7f9ffffff5e
+ VPCMPQ $94, Z3, Z14, K4, K5 // 62f38d4c1feb5e
+ VPCMPQ $94, Z0, Z14, K4, K5 // 62f38d4c1fe85e
+ VPCMPQ $94, (R14), Z14, K4, K5 // 62d38d4c1f2e5e
+ VPCMPQ $94, -7(DI)(R8*8), Z14, K4, K5 // 62b38d4c1facc7f9ffffff5e
+ VPCMPQ $94, Z3, Z27, K4, K4 // 62f3a5441fe35e
+ VPCMPQ $94, Z0, Z27, K4, K4 // 62f3a5441fe05e
+ VPCMPQ $94, (R14), Z27, K4, K4 // 62d3a5441f265e
+ VPCMPQ $94, -7(DI)(R8*8), Z27, K4, K4 // 62b3a5441fa4c7f9ffffff5e
+ VPCMPQ $94, Z3, Z14, K4, K4 // 62f38d4c1fe35e
+ VPCMPQ $94, Z0, Z14, K4, K4 // 62f38d4c1fe05e
+ VPCMPQ $94, (R14), Z14, K4, K4 // 62d38d4c1f265e
+ VPCMPQ $94, -7(DI)(R8*8), Z14, K4, K4 // 62b38d4c1fa4c7f9ffffff5e
+ VPCMPUD $67, X23, X11, K3, K6 // 62b3250b1ef743
+ VPCMPUD $67, (R14), X11, K3, K6 // 62d3250b1e3643
+ VPCMPUD $67, -7(DI)(R8*8), X11, K3, K6 // 62b3250b1eb4c7f9ffffff43
+ VPCMPUD $67, X23, X11, K3, K4 // 62b3250b1ee743
+ VPCMPUD $67, (R14), X11, K3, K4 // 62d3250b1e2643
+ VPCMPUD $67, -7(DI)(R8*8), X11, K3, K4 // 62b3250b1ea4c7f9ffffff43
+ VPCMPUD $127, Y1, Y16, K4, K4 // 62f37d241ee17f
+ VPCMPUD $127, 7(SI)(DI*4), Y16, K4, K4 // 62f37d241ea4be070000007f
+ VPCMPUD $127, -7(DI)(R8*2), Y16, K4, K4 // 62b37d241ea447f9ffffff7f
+ VPCMPUD $127, Y1, Y16, K4, K6 // 62f37d241ef17f
+ VPCMPUD $127, 7(SI)(DI*4), Y16, K4, K6 // 62f37d241eb4be070000007f
+ VPCMPUD $127, -7(DI)(R8*2), Y16, K4, K6 // 62b37d241eb447f9ffffff7f
+ VPCMPUD $0, Z1, Z22, K2, K4 // 62f34d421ee100
+ VPCMPUD $0, Z16, Z22, K2, K4 // 62b34d421ee000
+ VPCMPUD $0, (CX), Z22, K2, K4 // 62f34d421e2100
+ VPCMPUD $0, 99(R15), Z22, K2, K4 // 62d34d421ea76300000000
+ VPCMPUD $0, Z1, Z25, K2, K4 // 62f335421ee100
+ VPCMPUD $0, Z16, Z25, K2, K4 // 62b335421ee000
+ VPCMPUD $0, (CX), Z25, K2, K4 // 62f335421e2100
+ VPCMPUD $0, 99(R15), Z25, K2, K4 // 62d335421ea76300000000
+ VPCMPUD $0, Z1, Z22, K2, K5 // 62f34d421ee900
+ VPCMPUD $0, Z16, Z22, K2, K5 // 62b34d421ee800
+ VPCMPUD $0, (CX), Z22, K2, K5 // 62f34d421e2900
+ VPCMPUD $0, 99(R15), Z22, K2, K5 // 62d34d421eaf6300000000
+ VPCMPUD $0, Z1, Z25, K2, K5 // 62f335421ee900
+ VPCMPUD $0, Z16, Z25, K2, K5 // 62b335421ee800
+ VPCMPUD $0, (CX), Z25, K2, K5 // 62f335421e2900
+ VPCMPUD $0, 99(R15), Z25, K2, K5 // 62d335421eaf6300000000
+ VPCMPUQ $97, X20, X2, K2, K2 // 62b3ed0a1ed461
+ VPCMPUQ $97, 99(R15)(R15*4), X2, K2, K2 // 6293ed0a1e94bf6300000061
+ VPCMPUQ $97, 15(DX), X2, K2, K2 // 62f3ed0a1e920f00000061
+ VPCMPUQ $97, X20, X2, K2, K7 // 62b3ed0a1efc61
+ VPCMPUQ $97, 99(R15)(R15*4), X2, K2, K7 // 6293ed0a1ebcbf6300000061
+ VPCMPUQ $97, 15(DX), X2, K2, K7 // 62f3ed0a1eba0f00000061
+ VPCMPUQ $81, Y31, Y30, K3, K0 // 62938d231ec751
+ VPCMPUQ $81, 17(SP), Y30, K3, K0 // 62f38d231e84241100000051
+ VPCMPUQ $81, -17(BP)(SI*4), Y30, K3, K0 // 62f38d231e84b5efffffff51
+ VPCMPUQ $81, Y31, Y30, K3, K5 // 62938d231eef51
+ VPCMPUQ $81, 17(SP), Y30, K3, K5 // 62f38d231eac241100000051
+ VPCMPUQ $81, -17(BP)(SI*4), Y30, K3, K5 // 62f38d231eacb5efffffff51
+ VPCMPUQ $42, Z0, Z6, K3, K6 // 62f3cd4b1ef02a
+ VPCMPUQ $42, Z8, Z6, K3, K6 // 62d3cd4b1ef02a
+ VPCMPUQ $42, 99(R15)(R15*2), Z6, K3, K6 // 6293cd4b1eb47f630000002a
+ VPCMPUQ $42, -7(DI), Z6, K3, K6 // 62f3cd4b1eb7f9ffffff2a
+ VPCMPUQ $42, Z0, Z2, K3, K6 // 62f3ed4b1ef02a
+ VPCMPUQ $42, Z8, Z2, K3, K6 // 62d3ed4b1ef02a
+ VPCMPUQ $42, 99(R15)(R15*2), Z2, K3, K6 // 6293ed4b1eb47f630000002a
+ VPCMPUQ $42, -7(DI), Z2, K3, K6 // 62f3ed4b1eb7f9ffffff2a
+ VPCMPUQ $42, Z0, Z6, K3, K5 // 62f3cd4b1ee82a
+ VPCMPUQ $42, Z8, Z6, K3, K5 // 62d3cd4b1ee82a
+ VPCMPUQ $42, 99(R15)(R15*2), Z6, K3, K5 // 6293cd4b1eac7f630000002a
+ VPCMPUQ $42, -7(DI), Z6, K3, K5 // 62f3cd4b1eaff9ffffff2a
+ VPCMPUQ $42, Z0, Z2, K3, K5 // 62f3ed4b1ee82a
+ VPCMPUQ $42, Z8, Z2, K3, K5 // 62d3ed4b1ee82a
+ VPCMPUQ $42, 99(R15)(R15*2), Z2, K3, K5 // 6293ed4b1eac7f630000002a
+ VPCMPUQ $42, -7(DI), Z2, K3, K5 // 62f3ed4b1eaff9ffffff2a
+ VPCOMPRESSD X9, K7, X8 // 62527d0f8bc8
+ VPCOMPRESSD X9, K7, 15(DX)(BX*1) // 62727d0f8b8c1a0f000000
+ VPCOMPRESSD X9, K7, -7(CX)(DX*2) // 62727d0f8b8c51f9ffffff
+ VPCOMPRESSD Y14, K2, Y20 // 62327d2a8bf4
+ VPCOMPRESSD Y14, K2, 7(SI)(DI*8) // 62727d2a8bb4fe07000000
+ VPCOMPRESSD Y14, K2, -15(R14) // 62527d2a8bb6f1ffffff
+ VPCOMPRESSD Z26, K4, Z6 // 62627d4c8bd6
+ VPCOMPRESSD Z14, K4, Z6 // 62727d4c8bf6
+ VPCOMPRESSD Z26, K4, Z14 // 62427d4c8bd6
+ VPCOMPRESSD Z14, K4, Z14 // 62527d4c8bf6
+ VPCOMPRESSD Z26, K4, 17(SP)(BP*2) // 62627d4c8b946c11000000
+ VPCOMPRESSD Z14, K4, 17(SP)(BP*2) // 62727d4c8bb46c11000000
+ VPCOMPRESSD Z26, K4, -7(DI)(R8*4) // 62227d4c8b9487f9ffffff
+ VPCOMPRESSD Z14, K4, -7(DI)(R8*4) // 62327d4c8bb487f9ffffff
+ VPCOMPRESSQ X31, K1, X2 // 6262fd098bfa
+ VPCOMPRESSQ X31, K1, -17(BP) // 6262fd098bbdefffffff
+ VPCOMPRESSQ X31, K1, -15(R14)(R15*8) // 6202fd098bbcfef1ffffff
+ VPCOMPRESSQ Y13, K3, Y24 // 6212fd2b8be8
+ VPCOMPRESSQ Y13, K3, 7(SI)(DI*1) // 6272fd2b8bac3e07000000
+ VPCOMPRESSQ Y13, K3, 15(DX)(BX*8) // 6272fd2b8bacda0f000000
+ VPCOMPRESSQ Z13, K4, Z28 // 6212fd4c8bec
+ VPCOMPRESSQ Z21, K4, Z28 // 6282fd4c8bec
+ VPCOMPRESSQ Z13, K4, Z6 // 6272fd4c8bee
+ VPCOMPRESSQ Z21, K4, Z6 // 62e2fd4c8bee
+ VPCOMPRESSQ Z13, K4, 15(R8) // 6252fd4c8ba80f000000
+ VPCOMPRESSQ Z21, K4, 15(R8) // 62c2fd4c8ba80f000000
+ VPCOMPRESSQ Z13, K4, (BP) // 6272fd4c8b6d00
+ VPCOMPRESSQ Z21, K4, (BP) // 62e2fd4c8b6d00
+ VPERMD Y11, Y8, K1, Y24 // 62423d2936c3
+ VPERMD -17(BP)(SI*2), Y8, K1, Y24 // 62623d29368475efffffff
+ VPERMD 7(AX)(CX*2), Y8, K1, Y24 // 62623d2936844807000000
+ VPERMD Z20, Z0, K1, Z7 // 62b27d4936fc
+ VPERMD Z28, Z0, K1, Z7 // 62927d4936fc
+ VPERMD 99(R15)(R15*1), Z0, K1, Z7 // 62927d4936bc3f63000000
+ VPERMD (DX), Z0, K1, Z7 // 62f27d49363a
+ VPERMD Z20, Z6, K1, Z7 // 62b24d4936fc
+ VPERMD Z28, Z6, K1, Z7 // 62924d4936fc
+ VPERMD 99(R15)(R15*1), Z6, K1, Z7 // 62924d4936bc3f63000000
+ VPERMD (DX), Z6, K1, Z7 // 62f24d49363a
+ VPERMD Z20, Z0, K1, Z9 // 62327d4936cc
+ VPERMD Z28, Z0, K1, Z9 // 62127d4936cc
+ VPERMD 99(R15)(R15*1), Z0, K1, Z9 // 62127d49368c3f63000000
+ VPERMD (DX), Z0, K1, Z9 // 62727d49360a
+ VPERMD Z20, Z6, K1, Z9 // 62324d4936cc
+ VPERMD Z28, Z6, K1, Z9 // 62124d4936cc
+ VPERMD 99(R15)(R15*1), Z6, K1, Z9 // 62124d49368c3f63000000
+ VPERMD (DX), Z6, K1, Z9 // 62724d49360a
+ VPERMI2D X1, X22, K1, X0 // 62f24d0176c1
+ VPERMI2D 7(AX), X22, K1, X0 // 62f24d01768007000000
+ VPERMI2D (DI), X22, K1, X0 // 62f24d017607
+ VPERMI2D Y9, Y16, K3, Y21 // 62c27d2376e9
+ VPERMI2D (R14), Y16, K3, Y21 // 62c27d23762e
+ VPERMI2D -7(DI)(R8*8), Y16, K3, Y21 // 62a27d2376acc7f9ffffff
+ VPERMI2D Z2, Z18, K4, Z11 // 62726d4476da
+ VPERMI2D Z21, Z18, K4, Z11 // 62326d4476dd
+ VPERMI2D 7(SI)(DI*8), Z18, K4, Z11 // 62726d44769cfe07000000
+ VPERMI2D -15(R14), Z18, K4, Z11 // 62526d44769ef1ffffff
+ VPERMI2D Z2, Z24, K4, Z11 // 62723d4476da
+ VPERMI2D Z21, Z24, K4, Z11 // 62323d4476dd
+ VPERMI2D 7(SI)(DI*8), Z24, K4, Z11 // 62723d44769cfe07000000
+ VPERMI2D -15(R14), Z24, K4, Z11 // 62523d44769ef1ffffff
+ VPERMI2D Z2, Z18, K4, Z5 // 62f26d4476ea
+ VPERMI2D Z21, Z18, K4, Z5 // 62b26d4476ed
+ VPERMI2D 7(SI)(DI*8), Z18, K4, Z5 // 62f26d4476acfe07000000
+ VPERMI2D -15(R14), Z18, K4, Z5 // 62d26d4476aef1ffffff
+ VPERMI2D Z2, Z24, K4, Z5 // 62f23d4476ea
+ VPERMI2D Z21, Z24, K4, Z5 // 62b23d4476ed
+ VPERMI2D 7(SI)(DI*8), Z24, K4, Z5 // 62f23d4476acfe07000000
+ VPERMI2D -15(R14), Z24, K4, Z5 // 62d23d4476aef1ffffff
+ VPERMI2PD X7, X6, K5, X11 // 6272cd0d77df
+ VPERMI2PD 99(R15)(R15*1), X6, K5, X11 // 6212cd0d779c3f63000000
+ VPERMI2PD (DX), X6, K5, X11 // 6272cd0d771a
+ VPERMI2PD Y6, Y9, K7, Y13 // 6272b52f77ee
+ VPERMI2PD 99(R15)(R15*4), Y9, K7, Y13 // 6212b52f77acbf63000000
+ VPERMI2PD 15(DX), Y9, K7, Y13 // 6272b52f77aa0f000000
+ VPERMI2PD Z6, Z6, K7, Z7 // 62f2cd4f77fe
+ VPERMI2PD Z22, Z6, K7, Z7 // 62b2cd4f77fe
+ VPERMI2PD 7(SI)(DI*1), Z6, K7, Z7 // 62f2cd4f77bc3e07000000
+ VPERMI2PD 15(DX)(BX*8), Z6, K7, Z7 // 62f2cd4f77bcda0f000000
+ VPERMI2PD Z6, Z16, K7, Z7 // 62f2fd4777fe
+ VPERMI2PD Z22, Z16, K7, Z7 // 62b2fd4777fe
+ VPERMI2PD 7(SI)(DI*1), Z16, K7, Z7 // 62f2fd4777bc3e07000000
+ VPERMI2PD 15(DX)(BX*8), Z16, K7, Z7 // 62f2fd4777bcda0f000000
+ VPERMI2PD Z6, Z6, K7, Z13 // 6272cd4f77ee
+ VPERMI2PD Z22, Z6, K7, Z13 // 6232cd4f77ee
+ VPERMI2PD 7(SI)(DI*1), Z6, K7, Z13 // 6272cd4f77ac3e07000000
+ VPERMI2PD 15(DX)(BX*8), Z6, K7, Z13 // 6272cd4f77acda0f000000
+ VPERMI2PD Z6, Z16, K7, Z13 // 6272fd4777ee
+ VPERMI2PD Z22, Z16, K7, Z13 // 6232fd4777ee
+ VPERMI2PD 7(SI)(DI*1), Z16, K7, Z13 // 6272fd4777ac3e07000000
+ VPERMI2PD 15(DX)(BX*8), Z16, K7, Z13 // 6272fd4777acda0f000000
+ VPERMI2PS X3, X31, K6, X8 // 6272050677c3
+ VPERMI2PS -17(BP)(SI*8), X31, K6, X8 // 627205067784f5efffffff
+ VPERMI2PS (R15), X31, K6, X8 // 625205067707
+ VPERMI2PS Y6, Y7, K3, Y3 // 62f2452b77de
+ VPERMI2PS (CX), Y7, K3, Y3 // 62f2452b7719
+ VPERMI2PS 99(R15), Y7, K3, Y3 // 62d2452b779f63000000
+ VPERMI2PS Z18, Z13, K7, Z1 // 62b2154f77ca
+ VPERMI2PS Z8, Z13, K7, Z1 // 62d2154f77c8
+ VPERMI2PS -7(DI)(R8*1), Z13, K7, Z1 // 62b2154f778c07f9ffffff
+ VPERMI2PS (SP), Z13, K7, Z1 // 62f2154f770c24
+ VPERMI2PS Z18, Z13, K7, Z15 // 6232154f77fa
+ VPERMI2PS Z8, Z13, K7, Z15 // 6252154f77f8
+ VPERMI2PS -7(DI)(R8*1), Z13, K7, Z15 // 6232154f77bc07f9ffffff
+ VPERMI2PS (SP), Z13, K7, Z15 // 6272154f773c24
+ VPERMI2Q X24, X20, K4, X28 // 6202dd0476e0
+ VPERMI2Q 7(SI)(DI*8), X20, K4, X28 // 6262dd0476a4fe07000000
+ VPERMI2Q -15(R14), X20, K4, X28 // 6242dd0476a6f1ffffff
+ VPERMI2Q Y26, Y11, K4, Y26 // 6202a52c76d2
+ VPERMI2Q 99(R15)(R15*2), Y11, K4, Y26 // 6202a52c76947f63000000
+ VPERMI2Q -7(DI), Y11, K4, Y26 // 6262a52c7697f9ffffff
+ VPERMI2Q Z20, Z2, K7, Z22 // 62a2ed4f76f4
+ VPERMI2Q Z9, Z2, K7, Z22 // 62c2ed4f76f1
+ VPERMI2Q -7(CX), Z2, K7, Z22 // 62e2ed4f76b1f9ffffff
+ VPERMI2Q 15(DX)(BX*4), Z2, K7, Z22 // 62e2ed4f76b49a0f000000
+ VPERMI2Q Z20, Z31, K7, Z22 // 62a2854776f4
+ VPERMI2Q Z9, Z31, K7, Z22 // 62c2854776f1
+ VPERMI2Q -7(CX), Z31, K7, Z22 // 62e2854776b1f9ffffff
+ VPERMI2Q 15(DX)(BX*4), Z31, K7, Z22 // 62e2854776b49a0f000000
+ VPERMI2Q Z20, Z2, K7, Z7 // 62b2ed4f76fc
+ VPERMI2Q Z9, Z2, K7, Z7 // 62d2ed4f76f9
+ VPERMI2Q -7(CX), Z2, K7, Z7 // 62f2ed4f76b9f9ffffff
+ VPERMI2Q 15(DX)(BX*4), Z2, K7, Z7 // 62f2ed4f76bc9a0f000000
+ VPERMI2Q Z20, Z31, K7, Z7 // 62b2854776fc
+ VPERMI2Q Z9, Z31, K7, Z7 // 62d2854776f9
+ VPERMI2Q -7(CX), Z31, K7, Z7 // 62f2854776b9f9ffffff
+ VPERMI2Q 15(DX)(BX*4), Z31, K7, Z7 // 62f2854776bc9a0f000000
+ VPERMILPD $94, X6, K4, X12 // 6273fd0c05e65e
+ VPERMILPD $94, -7(DI)(R8*1), K4, X12 // 6233fd0c05a407f9ffffff5e
+ VPERMILPD $94, (SP), K4, X12 // 6273fd0c0524245e
+ VPERMILPD $121, Y18, K2, Y31 // 6223fd2a05fa79
+ VPERMILPD $121, 15(DX)(BX*1), K2, Y31 // 6263fd2a05bc1a0f00000079
+ VPERMILPD $121, -7(CX)(DX*2), K2, Y31 // 6263fd2a05bc51f9ffffff79
+ VPERMILPD $13, Z3, K2, Z14 // 6273fd4a05f30d
+ VPERMILPD $13, Z12, K2, Z14 // 6253fd4a05f40d
+ VPERMILPD $13, (AX), K2, Z14 // 6273fd4a05300d
+ VPERMILPD $13, 7(SI), K2, Z14 // 6273fd4a05b6070000000d
+ VPERMILPD $13, Z3, K2, Z28 // 6263fd4a05e30d
+ VPERMILPD $13, Z12, K2, Z28 // 6243fd4a05e40d
+ VPERMILPD $13, (AX), K2, Z28 // 6263fd4a05200d
+ VPERMILPD $13, 7(SI), K2, Z28 // 6263fd4a05a6070000000d
+ VPERMILPD X6, X28, K3, X17 // 62e29d030dce
+ VPERMILPD -7(CX), X28, K3, X17 // 62e29d030d89f9ffffff
+ VPERMILPD 15(DX)(BX*4), X28, K3, X17 // 62e29d030d8c9a0f000000
+ VPERMILPD Y2, Y24, K3, Y3 // 62f2bd230dda
+ VPERMILPD -17(BP), Y24, K3, Y3 // 62f2bd230d9defffffff
+ VPERMILPD -15(R14)(R15*8), Y24, K3, Y3 // 6292bd230d9cfef1ffffff
+ VPERMILPD Z5, Z19, K3, Z15 // 6272e5430dfd
+ VPERMILPD Z1, Z19, K3, Z15 // 6272e5430df9
+ VPERMILPD (BX), Z19, K3, Z15 // 6272e5430d3b
+ VPERMILPD -17(BP)(SI*1), Z19, K3, Z15 // 6272e5430dbc35efffffff
+ VPERMILPD Z5, Z15, K3, Z15 // 6272854b0dfd
+ VPERMILPD Z1, Z15, K3, Z15 // 6272854b0df9
+ VPERMILPD (BX), Z15, K3, Z15 // 6272854b0d3b
+ VPERMILPD -17(BP)(SI*1), Z15, K3, Z15 // 6272854b0dbc35efffffff
+ VPERMILPD Z5, Z19, K3, Z30 // 6262e5430df5
+ VPERMILPD Z1, Z19, K3, Z30 // 6262e5430df1
+ VPERMILPD (BX), Z19, K3, Z30 // 6262e5430d33
+ VPERMILPD -17(BP)(SI*1), Z19, K3, Z30 // 6262e5430db435efffffff
+ VPERMILPD Z5, Z15, K3, Z30 // 6262854b0df5
+ VPERMILPD Z1, Z15, K3, Z30 // 6262854b0df1
+ VPERMILPD (BX), Z15, K3, Z30 // 6262854b0d33
+ VPERMILPD -17(BP)(SI*1), Z15, K3, Z30 // 6262854b0db435efffffff
+ VPERMILPS $65, X8, K2, X1 // 62d37d0a04c841
+ VPERMILPS $65, 99(R15)(R15*8), K2, X1 // 62937d0a048cff6300000041
+ VPERMILPS $65, 7(AX)(CX*8), K2, X1 // 62f37d0a048cc80700000041
+ VPERMILPS $67, Y21, K1, Y7 // 62b37d2904fd43
+ VPERMILPS $67, 17(SP)(BP*2), K1, Y7 // 62f37d2904bc6c1100000043
+ VPERMILPS $67, -7(DI)(R8*4), K1, Y7 // 62b37d2904bc87f9ffffff43
+ VPERMILPS $127, Z14, K2, Z3 // 62d37d4a04de7f
+ VPERMILPS $127, Z15, K2, Z3 // 62d37d4a04df7f
+ VPERMILPS $127, 15(R8)(R14*4), K2, Z3 // 62937d4a049cb00f0000007f
+ VPERMILPS $127, -7(CX)(DX*4), K2, Z3 // 62f37d4a049c91f9ffffff7f
+ VPERMILPS $127, Z14, K2, Z5 // 62d37d4a04ee7f
+ VPERMILPS $127, Z15, K2, Z5 // 62d37d4a04ef7f
+ VPERMILPS $127, 15(R8)(R14*4), K2, Z5 // 62937d4a04acb00f0000007f
+ VPERMILPS $127, -7(CX)(DX*4), K2, Z5 // 62f37d4a04ac91f9ffffff7f
+ VPERMILPS X0, X6, K1, X8 // 62724d090cc0
+ VPERMILPS (AX), X6, K1, X8 // 62724d090c00
+ VPERMILPS 7(SI), X6, K1, X8 // 62724d090c8607000000
+ VPERMILPS Y20, Y8, K7, Y14 // 62323d2f0cf4
+ VPERMILPS 15(R8), Y8, K7, Y14 // 62523d2f0cb00f000000
+ VPERMILPS (BP), Y8, K7, Y14 // 62723d2f0c7500
+ VPERMILPS Z20, Z16, K1, Z21 // 62a27d410cec
+ VPERMILPS Z0, Z16, K1, Z21 // 62e27d410ce8
+ VPERMILPS (R8), Z16, K1, Z21 // 62c27d410c28
+ VPERMILPS 15(DX)(BX*2), Z16, K1, Z21 // 62e27d410cac5a0f000000
+ VPERMILPS Z20, Z9, K1, Z21 // 62a235490cec
+ VPERMILPS Z0, Z9, K1, Z21 // 62e235490ce8
+ VPERMILPS (R8), Z9, K1, Z21 // 62c235490c28
+ VPERMILPS 15(DX)(BX*2), Z9, K1, Z21 // 62e235490cac5a0f000000
+ VPERMILPS Z20, Z16, K1, Z8 // 62327d410cc4
+ VPERMILPS Z0, Z16, K1, Z8 // 62727d410cc0
+ VPERMILPS (R8), Z16, K1, Z8 // 62527d410c00
+ VPERMILPS 15(DX)(BX*2), Z16, K1, Z8 // 62727d410c845a0f000000
+ VPERMILPS Z20, Z9, K1, Z8 // 623235490cc4
+ VPERMILPS Z0, Z9, K1, Z8 // 627235490cc0
+ VPERMILPS (R8), Z9, K1, Z8 // 625235490c00
+ VPERMILPS 15(DX)(BX*2), Z9, K1, Z8 // 627235490c845a0f000000
+ VPERMPD $0, Y24, K1, Y11 // 6213fd2901d800
+ VPERMPD $0, 15(R8)(R14*8), K1, Y11 // 6213fd29019cf00f00000000
+ VPERMPD $0, -15(R14)(R15*2), K1, Y11 // 6213fd29019c7ef1ffffff00
+ VPERMPD $97, Z0, K1, Z23 // 62e3fd4901f861
+ VPERMPD $97, Z11, K1, Z23 // 62c3fd4901fb61
+ VPERMPD $97, 17(SP)(BP*1), K1, Z23 // 62e3fd4901bc2c1100000061
+ VPERMPD $97, -7(CX)(DX*8), K1, Z23 // 62e3fd4901bcd1f9ffffff61
+ VPERMPD $97, Z0, K1, Z19 // 62e3fd4901d861
+ VPERMPD $97, Z11, K1, Z19 // 62c3fd4901db61
+ VPERMPD $97, 17(SP)(BP*1), K1, Z19 // 62e3fd49019c2c1100000061
+ VPERMPD $97, -7(CX)(DX*8), K1, Z19 // 62e3fd49019cd1f9ffffff61
+ VPERMPD Y18, Y5, K7, Y1 // 62b2d52f16ca
+ VPERMPD -15(R14)(R15*1), Y5, K7, Y1 // 6292d52f168c3ef1ffffff
+ VPERMPD -15(BX), Y5, K7, Y1 // 62f2d52f168bf1ffffff
+ VPERMPD Z0, Z24, K2, Z0 // 62f2bd4216c0
+ VPERMPD Z26, Z24, K2, Z0 // 6292bd4216c2
+ VPERMPD -17(BP)(SI*2), Z24, K2, Z0 // 62f2bd42168475efffffff
+ VPERMPD 7(AX)(CX*2), Z24, K2, Z0 // 62f2bd4216844807000000
+ VPERMPD Z0, Z12, K2, Z0 // 62f29d4a16c0
+ VPERMPD Z26, Z12, K2, Z0 // 62929d4a16c2
+ VPERMPD -17(BP)(SI*2), Z12, K2, Z0 // 62f29d4a168475efffffff
+ VPERMPD 7(AX)(CX*2), Z12, K2, Z0 // 62f29d4a16844807000000
+ VPERMPD Z0, Z24, K2, Z25 // 6262bd4216c8
+ VPERMPD Z26, Z24, K2, Z25 // 6202bd4216ca
+ VPERMPD -17(BP)(SI*2), Z24, K2, Z25 // 6262bd42168c75efffffff
+ VPERMPD 7(AX)(CX*2), Z24, K2, Z25 // 6262bd42168c4807000000
+ VPERMPD Z0, Z12, K2, Z25 // 62629d4a16c8
+ VPERMPD Z26, Z12, K2, Z25 // 62029d4a16ca
+ VPERMPD -17(BP)(SI*2), Z12, K2, Z25 // 62629d4a168c75efffffff
+ VPERMPD 7(AX)(CX*2), Z12, K2, Z25 // 62629d4a168c4807000000
+ VPERMPS Y9, Y20, K4, Y20 // 62c25d2416e1
+ VPERMPS 7(AX)(CX*4), Y20, K4, Y20 // 62e25d2416a48807000000
+ VPERMPS 7(AX)(CX*1), Y20, K4, Y20 // 62e25d2416a40807000000
+ VPERMPS Z9, Z9, K1, Z9 // 6252354916c9
+ VPERMPS Z28, Z9, K1, Z9 // 6212354916cc
+ VPERMPS 15(R8)(R14*1), Z9, K1, Z9 // 62123549168c300f000000
+ VPERMPS 15(R8)(R14*2), Z9, K1, Z9 // 62123549168c700f000000
+ VPERMPS Z9, Z25, K1, Z9 // 6252354116c9
+ VPERMPS Z28, Z25, K1, Z9 // 6212354116cc
+ VPERMPS 15(R8)(R14*1), Z25, K1, Z9 // 62123541168c300f000000
+ VPERMPS 15(R8)(R14*2), Z25, K1, Z9 // 62123541168c700f000000
+ VPERMPS Z9, Z9, K1, Z3 // 62d2354916d9
+ VPERMPS Z28, Z9, K1, Z3 // 6292354916dc
+ VPERMPS 15(R8)(R14*1), Z9, K1, Z3 // 62923549169c300f000000
+ VPERMPS 15(R8)(R14*2), Z9, K1, Z3 // 62923549169c700f000000
+ VPERMPS Z9, Z25, K1, Z3 // 62d2354116d9
+ VPERMPS Z28, Z25, K1, Z3 // 6292354116dc
+ VPERMPS 15(R8)(R14*1), Z25, K1, Z3 // 62923541169c300f000000
+ VPERMPS 15(R8)(R14*2), Z25, K1, Z3 // 62923541169c700f000000
+ VPERMQ $81, Y28, K3, Y28 // 6203fd2b00e451
+ VPERMQ $81, (SI), K3, Y28 // 6263fd2b002651
+ VPERMQ $81, 7(SI)(DI*2), K3, Y28 // 6263fd2b00a47e0700000051
+ VPERMQ $42, Z17, K4, Z20 // 62a3fd4c00e12a
+ VPERMQ $42, Z0, K4, Z20 // 62e3fd4c00e02a
+ VPERMQ $42, (R14), K4, Z20 // 62c3fd4c00262a
+ VPERMQ $42, -7(DI)(R8*8), K4, Z20 // 62a3fd4c00a4c7f9ffffff2a
+ VPERMQ $42, Z17, K4, Z0 // 62b3fd4c00c12a
+ VPERMQ $42, Z0, K4, Z0 // 62f3fd4c00c02a
+ VPERMQ $42, (R14), K4, Z0 // 62d3fd4c00062a
+ VPERMQ $42, -7(DI)(R8*8), K4, Z0 // 62b3fd4c0084c7f9ffffff2a
+ VPERMQ Y11, Y8, K5, Y1 // 62d2bd2d36cb
+ VPERMQ 17(SP)(BP*8), Y8, K5, Y1 // 62f2bd2d368cec11000000
+ VPERMQ 17(SP)(BP*4), Y8, K5, Y1 // 62f2bd2d368cac11000000
+ VPERMQ Z21, Z31, K7, Z17 // 62a2854736cd
+ VPERMQ Z9, Z31, K7, Z17 // 62c2854736c9
+ VPERMQ 99(R15)(R15*4), Z31, K7, Z17 // 62828547368cbf63000000
+ VPERMQ 15(DX), Z31, K7, Z17 // 62e28547368a0f000000
+ VPERMQ Z21, Z0, K7, Z17 // 62a2fd4f36cd
+ VPERMQ Z9, Z0, K7, Z17 // 62c2fd4f36c9
+ VPERMQ 99(R15)(R15*4), Z0, K7, Z17 // 6282fd4f368cbf63000000
+ VPERMQ 15(DX), Z0, K7, Z17 // 62e2fd4f368a0f000000
+ VPERMQ Z21, Z31, K7, Z23 // 62a2854736fd
+ VPERMQ Z9, Z31, K7, Z23 // 62c2854736f9
+ VPERMQ 99(R15)(R15*4), Z31, K7, Z23 // 6282854736bcbf63000000
+ VPERMQ 15(DX), Z31, K7, Z23 // 62e2854736ba0f000000
+ VPERMQ Z21, Z0, K7, Z23 // 62a2fd4f36fd
+ VPERMQ Z9, Z0, K7, Z23 // 62c2fd4f36f9
+ VPERMQ 99(R15)(R15*4), Z0, K7, Z23 // 6282fd4f36bcbf63000000
+ VPERMQ 15(DX), Z0, K7, Z23 // 62e2fd4f36ba0f000000
+ VPERMT2D X12, X22, K7, X6 // 62d24d077ef4
+ VPERMT2D 15(R8)(R14*4), X22, K7, X6 // 62924d077eb4b00f000000
+ VPERMT2D -7(CX)(DX*4), X22, K7, X6 // 62f24d077eb491f9ffffff
+ VPERMT2D Y26, Y6, K4, Y12 // 62124d2c7ee2
+ VPERMT2D 17(SP), Y6, K4, Y12 // 62724d2c7ea42411000000
+ VPERMT2D -17(BP)(SI*4), Y6, K4, Y12 // 62724d2c7ea4b5efffffff
+ VPERMT2D Z7, Z26, K4, Z30 // 62622d447ef7
+ VPERMT2D Z21, Z26, K4, Z30 // 62222d447ef5
+ VPERMT2D 99(R15)(R15*2), Z26, K4, Z30 // 62022d447eb47f63000000
+ VPERMT2D -7(DI), Z26, K4, Z30 // 62622d447eb7f9ffffff
+ VPERMT2D Z7, Z22, K4, Z30 // 62624d447ef7
+ VPERMT2D Z21, Z22, K4, Z30 // 62224d447ef5
+ VPERMT2D 99(R15)(R15*2), Z22, K4, Z30 // 62024d447eb47f63000000
+ VPERMT2D -7(DI), Z22, K4, Z30 // 62624d447eb7f9ffffff
+ VPERMT2D Z7, Z26, K4, Z5 // 62f22d447eef
+ VPERMT2D Z21, Z26, K4, Z5 // 62b22d447eed
+ VPERMT2D 99(R15)(R15*2), Z26, K4, Z5 // 62922d447eac7f63000000
+ VPERMT2D -7(DI), Z26, K4, Z5 // 62f22d447eaff9ffffff
+ VPERMT2D Z7, Z22, K4, Z5 // 62f24d447eef
+ VPERMT2D Z21, Z22, K4, Z5 // 62b24d447eed
+ VPERMT2D 99(R15)(R15*2), Z22, K4, Z5 // 62924d447eac7f63000000
+ VPERMT2D -7(DI), Z22, K4, Z5 // 62f24d447eaff9ffffff
+ VPERMT2PD X8, X28, K7, X16 // 62c29d077fc0
+ VPERMT2PD (R8), X28, K7, X16 // 62c29d077f00
+ VPERMT2PD 15(DX)(BX*2), X28, K7, X16 // 62e29d077f845a0f000000
+ VPERMT2PD Y28, Y8, K2, Y3 // 6292bd2a7fdc
+ VPERMT2PD 7(AX), Y8, K2, Y3 // 62f2bd2a7f9807000000
+ VPERMT2PD (DI), Y8, K2, Y3 // 62f2bd2a7f1f
+ VPERMT2PD Z12, Z14, K5, Z16 // 62c28d4d7fc4
+ VPERMT2PD Z13, Z14, K5, Z16 // 62c28d4d7fc5
+ VPERMT2PD -7(CX)(DX*1), Z14, K5, Z16 // 62e28d4d7f8411f9ffffff
+ VPERMT2PD -15(R14)(R15*4), Z14, K5, Z16 // 62828d4d7f84bef1ffffff
+ VPERMT2PD Z12, Z13, K5, Z16 // 62c2954d7fc4
+ VPERMT2PD Z13, Z13, K5, Z16 // 62c2954d7fc5
+ VPERMT2PD -7(CX)(DX*1), Z13, K5, Z16 // 62e2954d7f8411f9ffffff
+ VPERMT2PD -15(R14)(R15*4), Z13, K5, Z16 // 6282954d7f84bef1ffffff
+ VPERMT2PD Z12, Z14, K5, Z25 // 62428d4d7fcc
+ VPERMT2PD Z13, Z14, K5, Z25 // 62428d4d7fcd
+ VPERMT2PD -7(CX)(DX*1), Z14, K5, Z25 // 62628d4d7f8c11f9ffffff
+ VPERMT2PD -15(R14)(R15*4), Z14, K5, Z25 // 62028d4d7f8cbef1ffffff
+ VPERMT2PD Z12, Z13, K5, Z25 // 6242954d7fcc
+ VPERMT2PD Z13, Z13, K5, Z25 // 6242954d7fcd
+ VPERMT2PD -7(CX)(DX*1), Z13, K5, Z25 // 6262954d7f8c11f9ffffff
+ VPERMT2PD -15(R14)(R15*4), Z13, K5, Z25 // 6202954d7f8cbef1ffffff
+ VPERMT2PS X1, X11, K3, X15 // 6272250b7ff9
+ VPERMT2PS 17(SP)(BP*1), X11, K3, X15 // 6272250b7fbc2c11000000
+ VPERMT2PS -7(CX)(DX*8), X11, K3, X15 // 6272250b7fbcd1f9ffffff
+ VPERMT2PS Y14, Y23, K4, Y1 // 62d245247fce
+ VPERMT2PS 99(R15)(R15*1), Y23, K4, Y1 // 629245247f8c3f63000000
+ VPERMT2PS (DX), Y23, K4, Y1 // 62f245247f0a
+ VPERMT2PS Z27, Z2, K2, Z21 // 62826d4a7feb
+ VPERMT2PS Z25, Z2, K2, Z21 // 62826d4a7fe9
+ VPERMT2PS 15(DX)(BX*1), Z2, K2, Z21 // 62e26d4a7fac1a0f000000
+ VPERMT2PS -7(CX)(DX*2), Z2, K2, Z21 // 62e26d4a7fac51f9ffffff
+ VPERMT2PS Z27, Z7, K2, Z21 // 6282454a7feb
+ VPERMT2PS Z25, Z7, K2, Z21 // 6282454a7fe9
+ VPERMT2PS 15(DX)(BX*1), Z7, K2, Z21 // 62e2454a7fac1a0f000000
+ VPERMT2PS -7(CX)(DX*2), Z7, K2, Z21 // 62e2454a7fac51f9ffffff
+ VPERMT2PS Z27, Z2, K2, Z9 // 62126d4a7fcb
+ VPERMT2PS Z25, Z2, K2, Z9 // 62126d4a7fc9
+ VPERMT2PS 15(DX)(BX*1), Z2, K2, Z9 // 62726d4a7f8c1a0f000000
+ VPERMT2PS -7(CX)(DX*2), Z2, K2, Z9 // 62726d4a7f8c51f9ffffff
+ VPERMT2PS Z27, Z7, K2, Z9 // 6212454a7fcb
+ VPERMT2PS Z25, Z7, K2, Z9 // 6212454a7fc9
+ VPERMT2PS 15(DX)(BX*1), Z7, K2, Z9 // 6272454a7f8c1a0f000000
+ VPERMT2PS -7(CX)(DX*2), Z7, K2, Z9 // 6272454a7f8c51f9ffffff
+ VPERMT2Q X2, X13, K2, X19 // 62e2950a7eda
+ VPERMT2Q -17(BP)(SI*2), X13, K2, X19 // 62e2950a7e9c75efffffff
+ VPERMT2Q 7(AX)(CX*2), X13, K2, X19 // 62e2950a7e9c4807000000
+ VPERMT2Q Y2, Y25, K3, Y31 // 6262b5237efa
+ VPERMT2Q -17(BP)(SI*8), Y25, K3, Y31 // 6262b5237ebcf5efffffff
+ VPERMT2Q (R15), Y25, K3, Y31 // 6242b5237e3f
+ VPERMT2Q Z3, Z27, K3, Z23 // 62e2a5437efb
+ VPERMT2Q Z0, Z27, K3, Z23 // 62e2a5437ef8
+ VPERMT2Q -17(BP), Z27, K3, Z23 // 62e2a5437ebdefffffff
+ VPERMT2Q -15(R14)(R15*8), Z27, K3, Z23 // 6282a5437ebcfef1ffffff
+ VPERMT2Q Z3, Z14, K3, Z23 // 62e28d4b7efb
+ VPERMT2Q Z0, Z14, K3, Z23 // 62e28d4b7ef8
+ VPERMT2Q -17(BP), Z14, K3, Z23 // 62e28d4b7ebdefffffff
+ VPERMT2Q -15(R14)(R15*8), Z14, K3, Z23 // 62828d4b7ebcfef1ffffff
+ VPERMT2Q Z3, Z27, K3, Z9 // 6272a5437ecb
+ VPERMT2Q Z0, Z27, K3, Z9 // 6272a5437ec8
+ VPERMT2Q -17(BP), Z27, K3, Z9 // 6272a5437e8defffffff
+ VPERMT2Q -15(R14)(R15*8), Z27, K3, Z9 // 6212a5437e8cfef1ffffff
+ VPERMT2Q Z3, Z14, K3, Z9 // 62728d4b7ecb
+ VPERMT2Q Z0, Z14, K3, Z9 // 62728d4b7ec8
+ VPERMT2Q -17(BP), Z14, K3, Z9 // 62728d4b7e8defffffff
+ VPERMT2Q -15(R14)(R15*8), Z14, K3, Z9 // 62128d4b7e8cfef1ffffff
+ VPEXPANDD X2, K7, X9 // 62727d0f89ca
+ VPEXPANDD (CX), K7, X9 // 62727d0f8909
+ VPEXPANDD 99(R15), K7, X9 // 62527d0f898f63000000
+ VPEXPANDD Y1, K2, Y6 // 62f27d2a89f1
+ VPEXPANDD -7(CX), K2, Y6 // 62f27d2a89b1f9ffffff
+ VPEXPANDD 15(DX)(BX*4), K2, Y6 // 62f27d2a89b49a0f000000
+ VPEXPANDD Z13, K4, Z11 // 62527d4c89dd
+ VPEXPANDD Z14, K4, Z11 // 62527d4c89de
+ VPEXPANDD -15(R14)(R15*1), K4, Z11 // 62127d4c899c3ef1ffffff
+ VPEXPANDD -15(BX), K4, Z11 // 62727d4c899bf1ffffff
+ VPEXPANDD Z13, K4, Z5 // 62d27d4c89ed
+ VPEXPANDD Z14, K4, Z5 // 62d27d4c89ee
+ VPEXPANDD -15(R14)(R15*1), K4, Z5 // 62927d4c89ac3ef1ffffff
+ VPEXPANDD -15(BX), K4, Z5 // 62f27d4c89abf1ffffff
+ VPEXPANDQ X2, K1, X24 // 6262fd0989c2
+ VPEXPANDQ 99(R15)(R15*2), K1, X24 // 6202fd0989847f63000000
+ VPEXPANDQ -7(DI), K1, X24 // 6262fd098987f9ffffff
+ VPEXPANDQ Y0, K3, Y9 // 6272fd2b89c8
+ VPEXPANDQ 99(R15)(R15*8), K3, Y9 // 6212fd2b898cff63000000
+ VPEXPANDQ 7(AX)(CX*8), K3, Y9 // 6272fd2b898cc807000000
+ VPEXPANDQ Z2, K4, Z5 // 62f2fd4c89ea
+ VPEXPANDQ 7(AX)(CX*4), K4, Z5 // 62f2fd4c89ac8807000000
+ VPEXPANDQ 7(AX)(CX*1), K4, Z5 // 62f2fd4c89ac0807000000
+ VPEXPANDQ Z2, K4, Z23 // 62e2fd4c89fa
+ VPEXPANDQ 7(AX)(CX*4), K4, Z23 // 62e2fd4c89bc8807000000
+ VPEXPANDQ 7(AX)(CX*1), K4, Z23 // 62e2fd4c89bc0807000000
+ VPGATHERDD (DX)(X10*4), K6, X3 // 62b27d0e901c92
+ VPGATHERDD (SP)(X4*2), K6, X3 // 62f27d0e901c64
+ VPGATHERDD (R14)(X29*8), K6, X3 // 62927d06901cee
+ VPGATHERDD (R10)(Y29*8), K3, Y22 // 62827d239034ea
+ VPGATHERDD (SP)(Y4*2), K3, Y22 // 62e27d2b903464
+ VPGATHERDD (DX)(Y10*4), K3, Y22 // 62a27d2b903492
+ VPGATHERDD (BP)(Z10*2), K7, Z28 // 62227d4f90645500
+ VPGATHERDD (R10)(Z29*8), K7, Z28 // 62027d479024ea
+ VPGATHERDD (R14)(Z29*8), K7, Z28 // 62027d479024ee
+ VPGATHERDD (BP)(Z10*2), K7, Z6 // 62b27d4f90745500
+ VPGATHERDD (R10)(Z29*8), K7, Z6 // 62927d479034ea
+ VPGATHERDD (R14)(Z29*8), K7, Z6 // 62927d479034ee
+ VPGATHERDQ (AX)(X4*1), K4, X11 // 6272fd0c901c20
+ VPGATHERDQ (BP)(X10*2), K4, X11 // 6232fd0c905c5500
+ VPGATHERDQ (R10)(X29*8), K4, X11 // 6212fd04901cea
+ VPGATHERDQ (DX)(X10*4), K4, Y9 // 6232fd2c900c92
+ VPGATHERDQ (SP)(X4*2), K4, Y9 // 6272fd2c900c64
+ VPGATHERDQ (R14)(X29*8), K4, Y9 // 6212fd24900cee
+ VPGATHERDQ (R14)(Y29*8), K7, Z13 // 6212fd47902cee
+ VPGATHERDQ (AX)(Y4*1), K7, Z13 // 6272fd4f902c20
+ VPGATHERDQ (BP)(Y10*2), K7, Z13 // 6232fd4f906c5500
+ VPGATHERDQ (R14)(Y29*8), K7, Z21 // 6282fd47902cee
+ VPGATHERDQ (AX)(Y4*1), K7, Z21 // 62e2fd4f902c20
+ VPGATHERDQ (BP)(Y10*2), K7, Z21 // 62a2fd4f906c5500
+ VPGATHERQD (AX)(X4*1), K2, X15 // 62727d0a913c20
+ VPGATHERQD (BP)(X10*2), K2, X15 // 62327d0a917c5500
+ VPGATHERQD (R10)(X29*8), K2, X15 // 62127d02913cea
+ VPGATHERQD (R10)(Y29*8), K5, X30 // 62027d259134ea
+ VPGATHERQD (SP)(Y4*2), K5, X30 // 62627d2d913464
+ VPGATHERQD (DX)(Y10*4), K5, X30 // 62227d2d913492
+ VPGATHERQD (DX)(Z10*4), K3, Y23 // 62a27d4b913c92
+ VPGATHERQD (AX)(Z4*1), K3, Y23 // 62e27d4b913c20
+ VPGATHERQD (SP)(Z4*2), K3, Y23 // 62e27d4b913c64
+ VPGATHERQQ (DX)(X10*4), K4, X13 // 6232fd0c912c92
+ VPGATHERQQ (SP)(X4*2), K4, X13 // 6272fd0c912c64
+ VPGATHERQQ (R14)(X29*8), K4, X13 // 6212fd04912cee
+ VPGATHERQQ (R14)(Y29*8), K2, Y31 // 6202fd22913cee
+ VPGATHERQQ (AX)(Y4*1), K2, Y31 // 6262fd2a913c20
+ VPGATHERQQ (BP)(Y10*2), K2, Y31 // 6222fd2a917c5500
+ VPGATHERQQ (BP)(Z10*2), K2, Z26 // 6222fd4a91545500
+ VPGATHERQQ (R10)(Z29*8), K2, Z26 // 6202fd429114ea
+ VPGATHERQQ (R14)(Z29*8), K2, Z26 // 6202fd429114ee
+ VPGATHERQQ (BP)(Z10*2), K2, Z3 // 62b2fd4a915c5500
+ VPGATHERQQ (R10)(Z29*8), K2, Z3 // 6292fd42911cea
+ VPGATHERQQ (R14)(Z29*8), K2, Z3 // 6292fd42911cee
+ VPMAXSD X1, X31, K3, X16 // 62e205033dc1
+ VPMAXSD (SI), X31, K3, X16 // 62e205033d06
+ VPMAXSD 7(SI)(DI*2), X31, K3, X16 // 62e205033d847e07000000
+ VPMAXSD Y24, Y18, K7, Y20 // 62826d273de0
+ VPMAXSD 99(R15)(R15*4), Y18, K7, Y20 // 62826d273da4bf63000000
+ VPMAXSD 15(DX), Y18, K7, Y20 // 62e26d273da20f000000
+ VPMAXSD Z0, Z7, K4, Z3 // 62f2454c3dd8
+ VPMAXSD Z6, Z7, K4, Z3 // 62f2454c3dde
+ VPMAXSD 7(SI)(DI*1), Z7, K4, Z3 // 62f2454c3d9c3e07000000
+ VPMAXSD 15(DX)(BX*8), Z7, K4, Z3 // 62f2454c3d9cda0f000000
+ VPMAXSD Z0, Z9, K4, Z3 // 62f2354c3dd8
+ VPMAXSD Z6, Z9, K4, Z3 // 62f2354c3dde
+ VPMAXSD 7(SI)(DI*1), Z9, K4, Z3 // 62f2354c3d9c3e07000000
+ VPMAXSD 15(DX)(BX*8), Z9, K4, Z3 // 62f2354c3d9cda0f000000
+ VPMAXSD Z0, Z7, K4, Z27 // 6262454c3dd8
+ VPMAXSD Z6, Z7, K4, Z27 // 6262454c3dde
+ VPMAXSD 7(SI)(DI*1), Z7, K4, Z27 // 6262454c3d9c3e07000000
+ VPMAXSD 15(DX)(BX*8), Z7, K4, Z27 // 6262454c3d9cda0f000000
+ VPMAXSD Z0, Z9, K4, Z27 // 6262354c3dd8
+ VPMAXSD Z6, Z9, K4, Z27 // 6262354c3dde
+ VPMAXSD 7(SI)(DI*1), Z9, K4, Z27 // 6262354c3d9c3e07000000
+ VPMAXSD 15(DX)(BX*8), Z9, K4, Z27 // 6262354c3d9cda0f000000
+ VPMAXSQ X15, X9, K4, X7 // 62d2b50c3dff
+ VPMAXSQ 17(SP)(BP*8), X9, K4, X7 // 62f2b50c3dbcec11000000
+ VPMAXSQ 17(SP)(BP*4), X9, K4, X7 // 62f2b50c3dbcac11000000
+ VPMAXSQ Y19, Y3, K7, Y9 // 6232e52f3dcb
+ VPMAXSQ (CX), Y3, K7, Y9 // 6272e52f3d09
+ VPMAXSQ 99(R15), Y3, K7, Y9 // 6252e52f3d8f63000000
+ VPMAXSQ Z9, Z3, K2, Z20 // 62c2e54a3de1
+ VPMAXSQ Z19, Z3, K2, Z20 // 62a2e54a3de3
+ VPMAXSQ -7(DI)(R8*1), Z3, K2, Z20 // 62a2e54a3da407f9ffffff
+ VPMAXSQ (SP), Z3, K2, Z20 // 62e2e54a3d2424
+ VPMAXSQ Z9, Z30, K2, Z20 // 62c28d423de1
+ VPMAXSQ Z19, Z30, K2, Z20 // 62a28d423de3
+ VPMAXSQ -7(DI)(R8*1), Z30, K2, Z20 // 62a28d423da407f9ffffff
+ VPMAXSQ (SP), Z30, K2, Z20 // 62e28d423d2424
+ VPMAXSQ Z9, Z3, K2, Z28 // 6242e54a3de1
+ VPMAXSQ Z19, Z3, K2, Z28 // 6222e54a3de3
+ VPMAXSQ -7(DI)(R8*1), Z3, K2, Z28 // 6222e54a3da407f9ffffff
+ VPMAXSQ (SP), Z3, K2, Z28 // 6262e54a3d2424
+ VPMAXSQ Z9, Z30, K2, Z28 // 62428d423de1
+ VPMAXSQ Z19, Z30, K2, Z28 // 62228d423de3
+ VPMAXSQ -7(DI)(R8*1), Z30, K2, Z28 // 62228d423da407f9ffffff
+ VPMAXSQ (SP), Z30, K2, Z28 // 62628d423d2424
+ VPMAXUD X3, X8, K3, X15 // 62723d0b3ffb
+ VPMAXUD 7(AX), X8, K3, X15 // 62723d0b3fb807000000
+ VPMAXUD (DI), X8, K3, X15 // 62723d0b3f3f
+ VPMAXUD Y20, Y21, K3, Y2 // 62b255233fd4
+ VPMAXUD 15(DX)(BX*1), Y21, K3, Y2 // 62f255233f941a0f000000
+ VPMAXUD -7(CX)(DX*2), Y21, K3, Y2 // 62f255233f9451f9ffffff
+ VPMAXUD Z13, Z1, K2, Z6 // 62d2754a3ff5
+ VPMAXUD (AX), Z1, K2, Z6 // 62f2754a3f30
+ VPMAXUD 7(SI), Z1, K2, Z6 // 62f2754a3fb607000000
+ VPMAXUD Z13, Z15, K2, Z6 // 62d2054a3ff5
+ VPMAXUD (AX), Z15, K2, Z6 // 62f2054a3f30
+ VPMAXUD 7(SI), Z15, K2, Z6 // 62f2054a3fb607000000
+ VPMAXUD Z13, Z1, K2, Z22 // 62c2754a3ff5
+ VPMAXUD (AX), Z1, K2, Z22 // 62e2754a3f30
+ VPMAXUD 7(SI), Z1, K2, Z22 // 62e2754a3fb607000000
+ VPMAXUD Z13, Z15, K2, Z22 // 62c2054a3ff5
+ VPMAXUD (AX), Z15, K2, Z22 // 62e2054a3f30
+ VPMAXUD 7(SI), Z15, K2, Z22 // 62e2054a3fb607000000
+ VPMAXUQ X13, X23, K1, X26 // 6242c5013fd5
+ VPMAXUQ 99(R15)(R15*1), X23, K1, X26 // 6202c5013f943f63000000
+ VPMAXUQ (DX), X23, K1, X26 // 6262c5013f12
+ VPMAXUQ Y6, Y31, K2, Y6 // 62f285223ff6
+ VPMAXUQ -17(BP), Y31, K2, Y6 // 62f285223fb5efffffff
+ VPMAXUQ -15(R14)(R15*8), Y31, K2, Y6 // 629285223fb4fef1ffffff
+ VPMAXUQ Z2, Z22, K1, Z18 // 62e2cd413fd2
+ VPMAXUQ Z31, Z22, K1, Z18 // 6282cd413fd7
+ VPMAXUQ (BX), Z22, K1, Z18 // 62e2cd413f13
+ VPMAXUQ -17(BP)(SI*1), Z22, K1, Z18 // 62e2cd413f9435efffffff
+ VPMAXUQ Z2, Z7, K1, Z18 // 62e2c5493fd2
+ VPMAXUQ Z31, Z7, K1, Z18 // 6282c5493fd7
+ VPMAXUQ (BX), Z7, K1, Z18 // 62e2c5493f13
+ VPMAXUQ -17(BP)(SI*1), Z7, K1, Z18 // 62e2c5493f9435efffffff
+ VPMAXUQ Z2, Z22, K1, Z8 // 6272cd413fc2
+ VPMAXUQ Z31, Z22, K1, Z8 // 6212cd413fc7
+ VPMAXUQ (BX), Z22, K1, Z8 // 6272cd413f03
+ VPMAXUQ -17(BP)(SI*1), Z22, K1, Z8 // 6272cd413f8435efffffff
+ VPMAXUQ Z2, Z7, K1, Z8 // 6272c5493fc2
+ VPMAXUQ Z31, Z7, K1, Z8 // 6212c5493fc7
+ VPMAXUQ (BX), Z7, K1, Z8 // 6272c5493f03
+ VPMAXUQ -17(BP)(SI*1), Z7, K1, Z8 // 6272c5493f8435efffffff
+ VPMINSD X11, X1, K4, X21 // 62c2750c39eb
+ VPMINSD 7(SI)(DI*1), X1, K4, X21 // 62e2750c39ac3e07000000
+ VPMINSD 15(DX)(BX*8), X1, K4, X21 // 62e2750c39acda0f000000
+ VPMINSD Y12, Y20, K1, Y5 // 62d25d2139ec
+ VPMINSD 15(R8)(R14*8), Y20, K1, Y5 // 62925d2139acf00f000000
+ VPMINSD -15(R14)(R15*2), Y20, K1, Y5 // 62925d2139ac7ef1ffffff
+ VPMINSD Z5, Z19, K3, Z15 // 6272654339fd
+ VPMINSD Z1, Z19, K3, Z15 // 6272654339f9
+ VPMINSD 17(SP)(BP*1), Z19, K3, Z15 // 6272654339bc2c11000000
+ VPMINSD -7(CX)(DX*8), Z19, K3, Z15 // 6272654339bcd1f9ffffff
+ VPMINSD Z5, Z15, K3, Z15 // 6272054b39fd
+ VPMINSD Z1, Z15, K3, Z15 // 6272054b39f9
+ VPMINSD 17(SP)(BP*1), Z15, K3, Z15 // 6272054b39bc2c11000000
+ VPMINSD -7(CX)(DX*8), Z15, K3, Z15 // 6272054b39bcd1f9ffffff
+ VPMINSD Z5, Z19, K3, Z30 // 6262654339f5
+ VPMINSD Z1, Z19, K3, Z30 // 6262654339f1
+ VPMINSD 17(SP)(BP*1), Z19, K3, Z30 // 6262654339b42c11000000
+ VPMINSD -7(CX)(DX*8), Z19, K3, Z30 // 6262654339b4d1f9ffffff
+ VPMINSD Z5, Z15, K3, Z30 // 6262054b39f5
+ VPMINSD Z1, Z15, K3, Z30 // 6262054b39f1
+ VPMINSD 17(SP)(BP*1), Z15, K3, Z30 // 6262054b39b42c11000000
+ VPMINSD -7(CX)(DX*8), Z15, K3, Z30 // 6262054b39b4d1f9ffffff
+ VPMINSQ X7, X3, K4, X31 // 6262e50c39ff
+ VPMINSQ -7(DI)(R8*1), X3, K4, X31 // 6222e50c39bc07f9ffffff
+ VPMINSQ (SP), X3, K4, X31 // 6262e50c393c24
+ VPMINSQ Y28, Y5, K5, Y3 // 6292d52d39dc
+ VPMINSQ -15(R14)(R15*1), Y5, K5, Y3 // 6292d52d399c3ef1ffffff
+ VPMINSQ -15(BX), Y5, K5, Y3 // 62f2d52d399bf1ffffff
+ VPMINSQ Z21, Z14, K7, Z3 // 62b28d4f39dd
+ VPMINSQ Z8, Z14, K7, Z3 // 62d28d4f39d8
+ VPMINSQ -17(BP)(SI*2), Z14, K7, Z3 // 62f28d4f399c75efffffff
+ VPMINSQ 7(AX)(CX*2), Z14, K7, Z3 // 62f28d4f399c4807000000
+ VPMINSQ Z21, Z15, K7, Z3 // 62b2854f39dd
+ VPMINSQ Z8, Z15, K7, Z3 // 62d2854f39d8
+ VPMINSQ -17(BP)(SI*2), Z15, K7, Z3 // 62f2854f399c75efffffff
+ VPMINSQ 7(AX)(CX*2), Z15, K7, Z3 // 62f2854f399c4807000000
+ VPMINSQ Z21, Z14, K7, Z5 // 62b28d4f39ed
+ VPMINSQ Z8, Z14, K7, Z5 // 62d28d4f39e8
+ VPMINSQ -17(BP)(SI*2), Z14, K7, Z5 // 62f28d4f39ac75efffffff
+ VPMINSQ 7(AX)(CX*2), Z14, K7, Z5 // 62f28d4f39ac4807000000
+ VPMINSQ Z21, Z15, K7, Z5 // 62b2854f39ed
+ VPMINSQ Z8, Z15, K7, Z5 // 62d2854f39e8
+ VPMINSQ -17(BP)(SI*2), Z15, K7, Z5 // 62f2854f39ac75efffffff
+ VPMINSQ 7(AX)(CX*2), Z15, K7, Z5 // 62f2854f39ac4807000000
+ VPMINUD X5, X14, K7, X7 // 62f20d0f3bfd
+ VPMINUD (AX), X14, K7, X7 // 62f20d0f3b38
+ VPMINUD 7(SI), X14, K7, X7 // 62f20d0f3bbe07000000
+ VPMINUD Y7, Y17, K2, Y14 // 627275223bf7
+ VPMINUD 17(SP)(BP*8), Y17, K2, Y14 // 627275223bb4ec11000000
+ VPMINUD 17(SP)(BP*4), Y17, K2, Y14 // 627275223bb4ac11000000
+ VPMINUD Z9, Z9, K5, Z0 // 62d2354d3bc1
+ VPMINUD Z25, Z9, K5, Z0 // 6292354d3bc1
+ VPMINUD 99(R15)(R15*4), Z9, K5, Z0 // 6292354d3b84bf63000000
+ VPMINUD 15(DX), Z9, K5, Z0 // 62f2354d3b820f000000
+ VPMINUD Z9, Z3, K5, Z0 // 62d2654d3bc1
+ VPMINUD Z25, Z3, K5, Z0 // 6292654d3bc1
+ VPMINUD 99(R15)(R15*4), Z3, K5, Z0 // 6292654d3b84bf63000000
+ VPMINUD 15(DX), Z3, K5, Z0 // 62f2654d3b820f000000
+ VPMINUD Z9, Z9, K5, Z26 // 6242354d3bd1
+ VPMINUD Z25, Z9, K5, Z26 // 6202354d3bd1
+ VPMINUD 99(R15)(R15*4), Z9, K5, Z26 // 6202354d3b94bf63000000
+ VPMINUD 15(DX), Z9, K5, Z26 // 6262354d3b920f000000
+ VPMINUD Z9, Z3, K5, Z26 // 6242654d3bd1
+ VPMINUD Z25, Z3, K5, Z26 // 6202654d3bd1
+ VPMINUD 99(R15)(R15*4), Z3, K5, Z26 // 6202654d3b94bf63000000
+ VPMINUD 15(DX), Z3, K5, Z26 // 6262654d3b920f000000
+ VPMINUQ X21, X3, K3, X31 // 6222e50b3bfd
+ VPMINUQ (BX), X3, K3, X31 // 6262e50b3b3b
+ VPMINUQ -17(BP)(SI*1), X3, K3, X31 // 6262e50b3bbc35efffffff
+ VPMINUQ Y8, Y31, K4, Y9 // 625285243bc8
+ VPMINUQ 7(SI)(DI*4), Y31, K4, Y9 // 627285243b8cbe07000000
+ VPMINUQ -7(DI)(R8*2), Y31, K4, Y9 // 623285243b8c47f9ffffff
+ VPMINUQ Z17, Z20, K2, Z9 // 6232dd423bc9
+ VPMINUQ Z0, Z20, K2, Z9 // 6272dd423bc8
+ VPMINUQ (CX), Z20, K2, Z9 // 6272dd423b09
+ VPMINUQ 99(R15), Z20, K2, Z9 // 6252dd423b8f63000000
+ VPMINUQ Z17, Z0, K2, Z9 // 6232fd4a3bc9
+ VPMINUQ Z0, Z0, K2, Z9 // 6272fd4a3bc8
+ VPMINUQ (CX), Z0, K2, Z9 // 6272fd4a3b09
+ VPMINUQ 99(R15), Z0, K2, Z9 // 6252fd4a3b8f63000000
+ VPMINUQ Z17, Z20, K2, Z28 // 6222dd423be1
+ VPMINUQ Z0, Z20, K2, Z28 // 6262dd423be0
+ VPMINUQ (CX), Z20, K2, Z28 // 6262dd423b21
+ VPMINUQ 99(R15), Z20, K2, Z28 // 6242dd423ba763000000
+ VPMINUQ Z17, Z0, K2, Z28 // 6222fd4a3be1
+ VPMINUQ Z0, Z0, K2, Z28 // 6262fd4a3be0
+ VPMINUQ (CX), Z0, K2, Z28 // 6262fd4a3b21
+ VPMINUQ 99(R15), Z0, K2, Z28 // 6242fd4a3ba763000000
+ VPMOVDB X14, K3, X16 // 62327e0b31f0
+ VPMOVDB X14, K3, 15(DX)(BX*1) // 62727e0b31b41a0f000000
+ VPMOVDB X14, K3, -7(CX)(DX*2) // 62727e0b31b451f9ffffff
+ VPMOVDB Y21, K2, X11 // 62c27e2a31eb
+ VPMOVDB Y21, K2, (SI) // 62e27e2a312e
+ VPMOVDB Y21, K2, 7(SI)(DI*2) // 62e27e2a31ac7e07000000
+ VPMOVDB Z20, K1, X14 // 62c27e4931e6
+ VPMOVDB Z9, K1, X14 // 62527e4931ce
+ VPMOVDB Z20, K1, (R8) // 62c27e493120
+ VPMOVDB Z9, K1, (R8) // 62527e493108
+ VPMOVDB Z20, K1, 15(DX)(BX*2) // 62e27e4931a45a0f000000
+ VPMOVDB Z9, K1, 15(DX)(BX*2) // 62727e49318c5a0f000000
+ VPMOVDW X8, K2, X19 // 62327e0a33c3
+ VPMOVDW X8, K2, 17(SP)(BP*8) // 62727e0a3384ec11000000
+ VPMOVDW X8, K2, 17(SP)(BP*4) // 62727e0a3384ac11000000
+ VPMOVDW Y12, K1, X8 // 62527e2933e0
+ VPMOVDW Y12, K1, 17(SP)(BP*1) // 62727e2933a42c11000000
+ VPMOVDW Y12, K1, -7(CX)(DX*8) // 62727e2933a4d1f9ffffff
+ VPMOVDW Z30, K7, Y9 // 62427e4f33f1
+ VPMOVDW Z5, K7, Y9 // 62d27e4f33e9
+ VPMOVDW Z30, K7, 7(AX) // 62627e4f33b007000000
+ VPMOVDW Z5, K7, 7(AX) // 62f27e4f33a807000000
+ VPMOVDW Z30, K7, (DI) // 62627e4f3337
+ VPMOVDW Z5, K7, (DI) // 62f27e4f332f
+ VPMOVQB X11, K1, X23 // 62327e0932df
+ VPMOVQB X11, K1, -7(DI)(R8*1) // 62327e09329c07f9ffffff
+ VPMOVQB X11, K1, (SP) // 62727e09321c24
+ VPMOVQB Y12, K1, X31 // 62127e2932e7
+ VPMOVQB Y12, K1, -17(BP) // 62727e2932a5efffffff
+ VPMOVQB Y12, K1, -15(R14)(R15*8) // 62127e2932a4fef1ffffff
+ VPMOVQB Z21, K1, X24 // 62827e4932e8
+ VPMOVQB Z9, K1, X24 // 62127e4932c8
+ VPMOVQB Z21, K1, 7(SI)(DI*4) // 62e27e4932acbe07000000
+ VPMOVQB Z9, K1, 7(SI)(DI*4) // 62727e49328cbe07000000
+ VPMOVQB Z21, K1, -7(DI)(R8*2) // 62a27e4932ac47f9ffffff
+ VPMOVQB Z9, K1, -7(DI)(R8*2) // 62327e49328c47f9ffffff
+ VPMOVQD X0, K7, X14 // 62d27e0f35c6
+ VPMOVQD X0, K7, 17(SP) // 62f27e0f35842411000000
+ VPMOVQD X0, K7, -17(BP)(SI*4) // 62f27e0f3584b5efffffff
+ VPMOVQD Y21, K2, X11 // 62c27e2a35eb
+ VPMOVQD Y21, K2, -17(BP)(SI*2) // 62e27e2a35ac75efffffff
+ VPMOVQD Y21, K2, 7(AX)(CX*2) // 62e27e2a35ac4807000000
+ VPMOVQD Z2, K4, Y14 // 62d27e4c35d6
+ VPMOVQD Z7, K4, Y14 // 62d27e4c35fe
+ VPMOVQD Z2, K4, 99(R15)(R15*1) // 62927e4c35943f63000000
+ VPMOVQD Z7, K4, 99(R15)(R15*1) // 62927e4c35bc3f63000000
+ VPMOVQD Z2, K4, (DX) // 62f27e4c3512
+ VPMOVQD Z7, K4, (DX) // 62f27e4c353a
+ VPMOVQW X2, K1, X23 // 62b27e0934d7
+ VPMOVQW X2, K1, 17(SP)(BP*2) // 62f27e0934946c11000000
+ VPMOVQW X2, K1, -7(DI)(R8*4) // 62b27e09349487f9ffffff
+ VPMOVQW Y30, K3, X20 // 62227e2b34f4
+ VPMOVQW Y30, K3, 7(AX) // 62627e2b34b007000000
+ VPMOVQW Y30, K3, (DI) // 62627e2b3437
+ VPMOVQW Z27, K4, X5 // 62627e4c34dd
+ VPMOVQW Z25, K4, X5 // 62627e4c34cd
+ VPMOVQW Z27, K4, 15(R8)(R14*1) // 62027e4c349c300f000000
+ VPMOVQW Z25, K4, 15(R8)(R14*1) // 62027e4c348c300f000000
+ VPMOVQW Z27, K4, 15(R8)(R14*2) // 62027e4c349c700f000000
+ VPMOVQW Z25, K4, 15(R8)(R14*2) // 62027e4c348c700f000000
+ VPMOVSDB X0, K5, X25 // 62927e0d21c1
+ VPMOVSDB X0, K5, 15(R8) // 62d27e0d21800f000000
+ VPMOVSDB X0, K5, (BP) // 62f27e0d214500
+ VPMOVSDB Y26, K7, X9 // 62427e2f21d1
+ VPMOVSDB Y26, K7, 99(R15)(R15*1) // 62027e2f21943f63000000
+ VPMOVSDB Y26, K7, (DX) // 62627e2f2112
+ VPMOVSDB Z23, K7, X13 // 62c27e4f21fd
+ VPMOVSDB Z9, K7, X13 // 62527e4f21cd
+ VPMOVSDB Z23, K7, (R14) // 62c27e4f213e
+ VPMOVSDB Z9, K7, (R14) // 62527e4f210e
+ VPMOVSDB Z23, K7, -7(DI)(R8*8) // 62a27e4f21bcc7f9ffffff
+ VPMOVSDB Z9, K7, -7(DI)(R8*8) // 62327e4f218cc7f9ffffff
+ VPMOVSDW X8, K6, X2 // 62727e0e23c2
+ VPMOVSDW X8, K6, -17(BP)(SI*8) // 62727e0e2384f5efffffff
+ VPMOVSDW X8, K6, (R15) // 62527e0e2307
+ VPMOVSDW Y7, K3, X9 // 62d27e2b23f9
+ VPMOVSDW Y7, K3, 99(R15)(R15*4) // 62927e2b23bcbf63000000
+ VPMOVSDW Y7, K3, 15(DX) // 62f27e2b23ba0f000000
+ VPMOVSDW Z27, K7, Y16 // 62227e4f23d8
+ VPMOVSDW Z14, K7, Y16 // 62327e4f23f0
+ VPMOVSDW Z27, K7, -17(BP)(SI*8) // 62627e4f239cf5efffffff
+ VPMOVSDW Z14, K7, -17(BP)(SI*8) // 62727e4f23b4f5efffffff
+ VPMOVSDW Z27, K7, (R15) // 62427e4f231f
+ VPMOVSDW Z14, K7, (R15) // 62527e4f2337
+ VPMOVSQB X31, K4, X2 // 62627e0c22fa
+ VPMOVSQB X31, K4, -7(CX) // 62627e0c22b9f9ffffff
+ VPMOVSQB X31, K4, 15(DX)(BX*4) // 62627e0c22bc9a0f000000
+ VPMOVSQB Y1, K4, X11 // 62d27e2c22cb
+ VPMOVSQB Y1, K4, 15(R8)(R14*8) // 62927e2c228cf00f000000
+ VPMOVSQB Y1, K4, -15(R14)(R15*2) // 62927e2c228c7ef1ffffff
+ VPMOVSQB Z3, K7, X22 // 62b27e4f22de
+ VPMOVSQB Z0, K7, X22 // 62b27e4f22c6
+ VPMOVSQB Z3, K7, 7(SI)(DI*8) // 62f27e4f229cfe07000000
+ VPMOVSQB Z0, K7, 7(SI)(DI*8) // 62f27e4f2284fe07000000
+ VPMOVSQB Z3, K7, -15(R14) // 62d27e4f229ef1ffffff
+ VPMOVSQB Z0, K7, -15(R14) // 62d27e4f2286f1ffffff
+ VPMOVSQD X14, K2, X5 // 62727e0a25f5
+ VPMOVSQD X14, K2, 7(SI)(DI*1) // 62727e0a25b43e07000000
+ VPMOVSQD X14, K2, 15(DX)(BX*8) // 62727e0a25b4da0f000000
+ VPMOVSQD Y30, K5, X0 // 62627e2d25f0
+ VPMOVSQD Y30, K5, (CX) // 62627e2d2531
+ VPMOVSQD Y30, K5, 99(R15) // 62427e2d25b763000000
+ VPMOVSQD Z14, K3, Y31 // 62127e4b25f7
+ VPMOVSQD Z7, K3, Y31 // 62927e4b25ff
+ VPMOVSQD Z14, K3, 7(SI)(DI*8) // 62727e4b25b4fe07000000
+ VPMOVSQD Z7, K3, 7(SI)(DI*8) // 62f27e4b25bcfe07000000
+ VPMOVSQD Z14, K3, -15(R14) // 62527e4b25b6f1ffffff
+ VPMOVSQD Z7, K3, -15(R14) // 62d27e4b25bef1ffffff
+ VPMOVSQW X7, K4, X17 // 62b27e0c24f9
+ VPMOVSQW X7, K4, -15(R14)(R15*1) // 62927e0c24bc3ef1ffffff
+ VPMOVSQW X7, K4, -15(BX) // 62f27e0c24bbf1ffffff
+ VPMOVSQW Y22, K2, X15 // 62c27e2a24f7
+ VPMOVSQW Y22, K2, -7(DI)(R8*1) // 62a27e2a24b407f9ffffff
+ VPMOVSQW Y22, K2, (SP) // 62e27e2a243424
+ VPMOVSQW Z8, K2, X11 // 62527e4a24c3
+ VPMOVSQW Z24, K2, X11 // 62427e4a24c3
+ VPMOVSQW Z8, K2, 99(R15)(R15*2) // 62127e4a24847f63000000
+ VPMOVSQW Z24, K2, 99(R15)(R15*2) // 62027e4a24847f63000000
+ VPMOVSQW Z8, K2, -7(DI) // 62727e4a2487f9ffffff
+ VPMOVSQW Z24, K2, -7(DI) // 62627e4a2487f9ffffff
+ VPMOVSXBD X27, K2, Z1 // 62927d4a21cb or 6292fd4a21cb
+ VPMOVSXBD 15(DX)(BX*1), K2, Z1 // 62f27d4a218c1a0f000000 or 62f2fd4a218c1a0f000000
+ VPMOVSXBD -7(CX)(DX*2), K2, Z1 // 62f27d4a218c51f9ffffff or 62f2fd4a218c51f9ffffff
+ VPMOVSXBD X27, K2, Z16 // 62827d4a21c3 or 6282fd4a21c3
+ VPMOVSXBD 15(DX)(BX*1), K2, Z16 // 62e27d4a21841a0f000000 or 62e2fd4a21841a0f000000
+ VPMOVSXBD -7(CX)(DX*2), K2, Z16 // 62e27d4a218451f9ffffff or 62e2fd4a218451f9ffffff
+ VPMOVSXBD X3, K1, X25 // 62627d0921cb or 6262fd0921cb
+ VPMOVSXBD 7(AX)(CX*4), K1, X25 // 62627d09218c8807000000 or 6262fd09218c8807000000
+ VPMOVSXBD 7(AX)(CX*1), K1, X25 // 62627d09218c0807000000 or 6262fd09218c0807000000
+ VPMOVSXBD X18, K2, Y7 // 62b27d2a21fa or 62b2fd2a21fa
+ VPMOVSXBD 99(R15)(R15*8), K2, Y7 // 62927d2a21bcff63000000 or 6292fd2a21bcff63000000
+ VPMOVSXBD 7(AX)(CX*8), K2, Y7 // 62f27d2a21bcc807000000 or 62f2fd2a21bcc807000000
+ VPMOVSXBQ X28, K1, X15 // 62127d0922fc or 6212fd0922fc
+ VPMOVSXBQ 99(R15)(R15*8), K1, X15 // 62127d0922bcff63000000 or 6212fd0922bcff63000000
+ VPMOVSXBQ 7(AX)(CX*8), K1, X15 // 62727d0922bcc807000000 or 6272fd0922bcc807000000
+ VPMOVSXBQ X15, K7, Y0 // 62d27d2f22c7 or 62d2fd2f22c7
+ VPMOVSXBQ (SI), K7, Y0 // 62f27d2f2206 or 62f2fd2f2206
+ VPMOVSXBQ 7(SI)(DI*2), K7, Y0 // 62f27d2f22847e07000000 or 62f2fd2f22847e07000000
+ VPMOVSXBQ X7, K1, Z6 // 62f27d4922f7 or 62f2fd4922f7
+ VPMOVSXBQ (AX), K1, Z6 // 62f27d492230 or 62f2fd492230
+ VPMOVSXBQ 7(SI), K1, Z6 // 62f27d4922b607000000 or 62f2fd4922b607000000
+ VPMOVSXBQ X7, K1, Z2 // 62f27d4922d7 or 62f2fd4922d7
+ VPMOVSXBQ (AX), K1, Z2 // 62f27d492210 or 62f2fd492210
+ VPMOVSXBQ 7(SI), K1, Z2 // 62f27d49229607000000 or 62f2fd49229607000000
+ VPMOVSXDQ X7, K2, Y14 // 62727d2a25f7
+ VPMOVSXDQ 17(SP)(BP*2), K2, Y14 // 62727d2a25b46c11000000
+ VPMOVSXDQ -7(DI)(R8*4), K2, Y14 // 62327d2a25b487f9ffffff
+ VPMOVSXDQ X22, K4, X0 // 62b27d0c25c6
+ VPMOVSXDQ 15(R8)(R14*4), K4, X0 // 62927d0c2584b00f000000
+ VPMOVSXDQ -7(CX)(DX*4), K4, X0 // 62f27d0c258491f9ffffff
+ VPMOVSXDQ Y24, K1, Z15 // 62127d4925f8
+ VPMOVSXDQ -7(CX), K1, Z15 // 62727d4925b9f9ffffff
+ VPMOVSXDQ 15(DX)(BX*4), K1, Z15 // 62727d4925bc9a0f000000
+ VPMOVSXDQ Y24, K1, Z12 // 62127d4925e0
+ VPMOVSXDQ -7(CX), K1, Z12 // 62727d4925a1f9ffffff
+ VPMOVSXDQ 15(DX)(BX*4), K1, Z12 // 62727d4925a49a0f000000
+ VPMOVSXWD X1, K3, Y13 // 62727d2b23e9 or 6272fd2b23e9
+ VPMOVSXWD 15(R8), K3, Y13 // 62527d2b23a80f000000 or 6252fd2b23a80f000000
+ VPMOVSXWD (BP), K3, Y13 // 62727d2b236d00 or 6272fd2b236d00
+ VPMOVSXWD X6, K4, X11 // 62727d0c23de or 6272fd0c23de
+ VPMOVSXWD (R8), K4, X11 // 62527d0c2318 or 6252fd0c2318
+ VPMOVSXWD 15(DX)(BX*2), K4, X11 // 62727d0c239c5a0f000000 or 6272fd0c239c5a0f000000
+ VPMOVSXWD Y20, K5, Z14 // 62327d4d23f4 or 6232fd4d23f4
+ VPMOVSXWD 99(R15)(R15*8), K5, Z14 // 62127d4d23b4ff63000000 or 6212fd4d23b4ff63000000
+ VPMOVSXWD 7(AX)(CX*8), K5, Z14 // 62727d4d23b4c807000000 or 6272fd4d23b4c807000000
+ VPMOVSXWD Y20, K5, Z27 // 62227d4d23dc or 6222fd4d23dc
+ VPMOVSXWD 99(R15)(R15*8), K5, Z27 // 62027d4d239cff63000000 or 6202fd4d239cff63000000
+ VPMOVSXWD 7(AX)(CX*8), K5, Z27 // 62627d4d239cc807000000 or 6262fd4d239cc807000000
+ VPMOVSXWQ X7, K7, Z11 // 62727d4f24df or 6272fd4f24df
+ VPMOVSXWQ 15(R8)(R14*8), K7, Z11 // 62127d4f249cf00f000000 or 6212fd4f249cf00f000000
+ VPMOVSXWQ -15(R14)(R15*2), K7, Z11 // 62127d4f249c7ef1ffffff or 6212fd4f249c7ef1ffffff
+ VPMOVSXWQ X7, K7, Z5 // 62f27d4f24ef or 62f2fd4f24ef
+ VPMOVSXWQ 15(R8)(R14*8), K7, Z5 // 62927d4f24acf00f000000 or 6292fd4f24acf00f000000
+ VPMOVSXWQ -15(R14)(R15*2), K7, Z5 // 62927d4f24ac7ef1ffffff or 6292fd4f24ac7ef1ffffff
+ VPMOVSXWQ X31, K7, X8 // 62127d0f24c7 or 6212fd0f24c7
+ VPMOVSXWQ 17(SP)(BP*8), K7, X8 // 62727d0f2484ec11000000 or 6272fd0f2484ec11000000
+ VPMOVSXWQ 17(SP)(BP*4), K7, X8 // 62727d0f2484ac11000000 or 6272fd0f2484ac11000000
+ VPMOVSXWQ X3, K6, Y14 // 62727d2e24f3 or 6272fd2e24f3
+ VPMOVSXWQ 17(SP)(BP*1), K6, Y14 // 62727d2e24b42c11000000 or 6272fd2e24b42c11000000
+ VPMOVSXWQ -7(CX)(DX*8), K6, Y14 // 62727d2e24b4d1f9ffffff or 6272fd2e24b4d1f9ffffff
+ VPMOVUSDB X20, K3, X28 // 62827e0b11e4
+ VPMOVUSDB X20, K3, 7(SI)(DI*4) // 62e27e0b11a4be07000000
+ VPMOVUSDB X20, K3, -7(DI)(R8*2) // 62a27e0b11a447f9ffffff
+ VPMOVUSDB Y21, K7, X24 // 62827e2f11e8
+ VPMOVUSDB Y21, K7, -17(BP)(SI*2) // 62e27e2f11ac75efffffff
+ VPMOVUSDB Y21, K7, 7(AX)(CX*2) // 62e27e2f11ac4807000000
+ VPMOVUSDB Z13, K4, X7 // 62727e4c11ef
+ VPMOVUSDB Z14, K4, X7 // 62727e4c11f7
+ VPMOVUSDB Z13, K4, -15(R14)(R15*1) // 62127e4c11ac3ef1ffffff
+ VPMOVUSDB Z14, K4, -15(R14)(R15*1) // 62127e4c11b43ef1ffffff
+ VPMOVUSDB Z13, K4, -15(BX) // 62727e4c11abf1ffffff
+ VPMOVUSDB Z14, K4, -15(BX) // 62727e4c11b3f1ffffff
+ VPMOVUSDW X16, K4, X20 // 62a27e0c13c4
+ VPMOVUSDW X16, K4, 15(R8)(R14*1) // 62827e0c1384300f000000
+ VPMOVUSDW X16, K4, 15(R8)(R14*2) // 62827e0c1384700f000000
+ VPMOVUSDW Y1, K7, X12 // 62d27e2f13cc
+ VPMOVUSDW Y1, K7, 7(AX)(CX*4) // 62f27e2f138c8807000000
+ VPMOVUSDW Y1, K7, 7(AX)(CX*1) // 62f27e2f138c0807000000
+ VPMOVUSDW Z5, K2, Y26 // 62927e4a13ea
+ VPMOVUSDW Z23, K2, Y26 // 62827e4a13fa
+ VPMOVUSDW Z5, K2, (AX) // 62f27e4a1328
+ VPMOVUSDW Z23, K2, (AX) // 62e27e4a1338
+ VPMOVUSDW Z5, K2, 7(SI) // 62f27e4a13ae07000000
+ VPMOVUSDW Z23, K2, 7(SI) // 62e27e4a13be07000000
+ VPMOVUSQB X17, K5, X6 // 62e27e0d12ce
+ VPMOVUSQB X17, K5, (AX) // 62e27e0d1208
+ VPMOVUSQB X17, K5, 7(SI) // 62e27e0d128e07000000
+ VPMOVUSQB Y30, K3, X28 // 62027e2b12f4
+ VPMOVUSQB Y30, K3, 17(SP) // 62627e2b12b42411000000
+ VPMOVUSQB Y30, K3, -17(BP)(SI*4) // 62627e2b12b4b5efffffff
+ VPMOVUSQB Z2, K4, X6 // 62f27e4c12d6
+ VPMOVUSQB Z2, K4, (R14) // 62d27e4c1216
+ VPMOVUSQB Z2, K4, -7(DI)(R8*8) // 62b27e4c1294c7f9ffffff
+ VPMOVUSQD X8, K2, X1 // 62727e0a15c1
+ VPMOVUSQD X8, K2, 99(R15)(R15*4) // 62127e0a1584bf63000000
+ VPMOVUSQD X8, K2, 15(DX) // 62727e0a15820f000000
+ VPMOVUSQD Y12, K2, X8 // 62527e2a15e0
+ VPMOVUSQD Y12, K2, (SI) // 62727e2a1526
+ VPMOVUSQD Y12, K2, 7(SI)(DI*2) // 62727e2a15a47e07000000
+ VPMOVUSQD Z6, K3, Y22 // 62b27e4b15f6
+ VPMOVUSQD Z14, K3, Y22 // 62327e4b15f6
+ VPMOVUSQD Z6, K3, (BX) // 62f27e4b1533
+ VPMOVUSQD Z14, K3, (BX) // 62727e4b1533
+ VPMOVUSQD Z6, K3, -17(BP)(SI*1) // 62f27e4b15b435efffffff
+ VPMOVUSQD Z14, K3, -17(BP)(SI*1) // 62727e4b15b435efffffff
+ VPMOVUSQW X0, K3, X6 // 62f27e0b14c6
+ VPMOVUSQW X0, K3, 7(AX) // 62f27e0b148007000000
+ VPMOVUSQW X0, K3, (DI) // 62f27e0b1407
+ VPMOVUSQW Y3, K3, X11 // 62d27e2b14db
+ VPMOVUSQW Y3, K3, (CX) // 62f27e2b1419
+ VPMOVUSQW Y3, K3, 99(R15) // 62d27e2b149f63000000
+ VPMOVUSQW Z26, K2, X16 // 62227e4a14d0
+ VPMOVUSQW Z14, K2, X16 // 62327e4a14f0
+ VPMOVUSQW Z26, K2, 17(SP)(BP*8) // 62627e4a1494ec11000000
+ VPMOVUSQW Z14, K2, 17(SP)(BP*8) // 62727e4a14b4ec11000000
+ VPMOVUSQW Z26, K2, 17(SP)(BP*4) // 62627e4a1494ac11000000
+ VPMOVUSQW Z14, K2, 17(SP)(BP*4) // 62727e4a14b4ac11000000
+ VPMOVZXBD X15, K1, Z3 // 62d27d4931df or 62d2fd4931df
+ VPMOVZXBD 7(AX), K1, Z3 // 62f27d49319807000000 or 62f2fd49319807000000
+ VPMOVZXBD (DI), K1, Z3 // 62f27d49311f or 62f2fd49311f
+ VPMOVZXBD X15, K1, Z0 // 62d27d4931c7 or 62d2fd4931c7
+ VPMOVZXBD 7(AX), K1, Z0 // 62f27d49318007000000 or 62f2fd49318007000000
+ VPMOVZXBD (DI), K1, Z0 // 62f27d493107 or 62f2fd493107
+ VPMOVZXBD X1, K7, X11 // 62727d0f31d9 or 6272fd0f31d9
+ VPMOVZXBD 99(R15)(R15*1), K7, X11 // 62127d0f319c3f63000000 or 6212fd0f319c3f63000000
+ VPMOVZXBD (DX), K7, X11 // 62727d0f311a or 6272fd0f311a
+ VPMOVZXBD X19, K2, Y17 // 62a27d2a31cb or 62a2fd2a31cb
+ VPMOVZXBD 15(DX)(BX*1), K2, Y17 // 62e27d2a318c1a0f000000 or 62e2fd2a318c1a0f000000
+ VPMOVZXBD -7(CX)(DX*2), K2, Y17 // 62e27d2a318c51f9ffffff or 62e2fd2a318c51f9ffffff
+ VPMOVZXBQ X2, K4, X13 // 62727d0c32ea or 6272fd0c32ea
+ VPMOVZXBQ (BX), K4, X13 // 62727d0c322b or 6272fd0c322b
+ VPMOVZXBQ -17(BP)(SI*1), K4, X13 // 62727d0c32ac35efffffff or 6272fd0c32ac35efffffff
+ VPMOVZXBQ X14, K1, Y13 // 62527d2932ee or 6252fd2932ee
+ VPMOVZXBQ -17(BP)(SI*8), K1, Y13 // 62727d2932acf5efffffff or 6272fd2932acf5efffffff
+ VPMOVZXBQ (R15), K1, Y13 // 62527d29322f or 6252fd29322f
+ VPMOVZXBQ X0, K3, Z21 // 62e27d4b32e8 or 62e2fd4b32e8
+ VPMOVZXBQ -17(BP), K3, Z21 // 62e27d4b32adefffffff or 62e2fd4b32adefffffff
+ VPMOVZXBQ -15(R14)(R15*8), K3, Z21 // 62827d4b32acfef1ffffff or 6282fd4b32acfef1ffffff
+ VPMOVZXBQ X0, K3, Z13 // 62727d4b32e8 or 6272fd4b32e8
+ VPMOVZXBQ -17(BP), K3, Z13 // 62727d4b32adefffffff or 6272fd4b32adefffffff
+ VPMOVZXBQ -15(R14)(R15*8), K3, Z13 // 62127d4b32acfef1ffffff or 6212fd4b32acfef1ffffff
+ VPMOVZXDQ X17, K7, Y30 // 62227d2f35f1
+ VPMOVZXDQ -17(BP)(SI*8), K7, Y30 // 62627d2f35b4f5efffffff
+ VPMOVZXDQ (R15), K7, Y30 // 62427d2f3537
+ VPMOVZXDQ X11, K6, X18 // 62c27d0e35d3
+ VPMOVZXDQ 15(R8), K6, X18 // 62c27d0e35900f000000
+ VPMOVZXDQ (BP), K6, X18 // 62e27d0e355500
+ VPMOVZXDQ Y13, K3, Z3 // 62d27d4b35dd
+ VPMOVZXDQ -17(BP)(SI*2), K3, Z3 // 62f27d4b359c75efffffff
+ VPMOVZXDQ 7(AX)(CX*2), K3, Z3 // 62f27d4b359c4807000000
+ VPMOVZXDQ Y13, K3, Z12 // 62527d4b35e5
+ VPMOVZXDQ -17(BP)(SI*2), K3, Z12 // 62727d4b35a475efffffff
+ VPMOVZXDQ 7(AX)(CX*2), K3, Z12 // 62727d4b35a44807000000
+ VPMOVZXWD X9, K7, Y18 // 62c27d2f33d1 or 62c2fd2f33d1
+ VPMOVZXWD 7(SI)(DI*8), K7, Y18 // 62e27d2f3394fe07000000 or 62e2fd2f3394fe07000000
+ VPMOVZXWD -15(R14), K7, Y18 // 62c27d2f3396f1ffffff or 62c2fd2f3396f1ffffff
+ VPMOVZXWD X24, K4, X2 // 62927d0c33d0 or 6292fd0c33d0
+ VPMOVZXWD 15(R8)(R14*8), K4, X2 // 62927d0c3394f00f000000 or 6292fd0c3394f00f000000
+ VPMOVZXWD -15(R14)(R15*2), K4, X2 // 62927d0c33947ef1ffffff or 6292fd0c33947ef1ffffff
+ VPMOVZXWD Y24, K4, Z27 // 62027d4c33d8 or 6202fd4c33d8
+ VPMOVZXWD 15(R8)(R14*1), K4, Z27 // 62027d4c339c300f000000 or 6202fd4c339c300f000000
+ VPMOVZXWD 15(R8)(R14*2), K4, Z27 // 62027d4c339c700f000000 or 6202fd4c339c700f000000
+ VPMOVZXWD Y24, K4, Z15 // 62127d4c33f8 or 6212fd4c33f8
+ VPMOVZXWD 15(R8)(R14*1), K4, Z15 // 62127d4c33bc300f000000 or 6212fd4c33bc300f000000
+ VPMOVZXWD 15(R8)(R14*2), K4, Z15 // 62127d4c33bc700f000000 or 6212fd4c33bc700f000000
+ VPMOVZXWQ X2, K7, Z23 // 62e27d4f34fa or 62e2fd4f34fa
+ VPMOVZXWQ 7(SI)(DI*1), K7, Z23 // 62e27d4f34bc3e07000000 or 62e2fd4f34bc3e07000000
+ VPMOVZXWQ 15(DX)(BX*8), K7, Z23 // 62e27d4f34bcda0f000000 or 62e2fd4f34bcda0f000000
+ VPMOVZXWQ X2, K7, Z5 // 62f27d4f34ea or 62f2fd4f34ea
+ VPMOVZXWQ 7(SI)(DI*1), K7, Z5 // 62f27d4f34ac3e07000000 or 62f2fd4f34ac3e07000000
+ VPMOVZXWQ 15(DX)(BX*8), K7, Z5 // 62f27d4f34acda0f000000 or 62f2fd4f34acda0f000000
+ VPMOVZXWQ X27, K2, X2 // 62927d0a34d3 or 6292fd0a34d3
+ VPMOVZXWQ 7(SI)(DI*8), K2, X2 // 62f27d0a3494fe07000000 or 62f2fd0a3494fe07000000
+ VPMOVZXWQ -15(R14), K2, X2 // 62d27d0a3496f1ffffff or 62d2fd0a3496f1ffffff
+ VPMOVZXWQ X26, K5, Y8 // 62127d2d34c2 or 6212fd2d34c2
+ VPMOVZXWQ -15(R14)(R15*1), K5, Y8 // 62127d2d34843ef1ffffff or 6212fd2d34843ef1ffffff
+ VPMOVZXWQ -15(BX), K5, Y8 // 62727d2d3483f1ffffff or 6272fd2d3483f1ffffff
+ VPMULDQ X3, X30, K3, X22 // 62e28d0328f3
+ VPMULDQ -7(DI)(R8*1), X30, K3, X22 // 62a28d0328b407f9ffffff
+ VPMULDQ (SP), X30, K3, X22 // 62e28d03283424
+ VPMULDQ Y5, Y24, K4, Y11 // 6272bd2428dd
+ VPMULDQ (R14), Y24, K4, Y11 // 6252bd24281e
+ VPMULDQ -7(DI)(R8*8), Y24, K4, Y11 // 6232bd24289cc7f9ffffff
+ VPMULDQ Z21, Z8, K2, Z23 // 62a2bd4a28fd
+ VPMULDQ Z5, Z8, K2, Z23 // 62e2bd4a28fd
+ VPMULDQ -7(CX)(DX*1), Z8, K2, Z23 // 62e2bd4a28bc11f9ffffff
+ VPMULDQ -15(R14)(R15*4), Z8, K2, Z23 // 6282bd4a28bcbef1ffffff
+ VPMULDQ Z21, Z28, K2, Z23 // 62a29d4228fd
+ VPMULDQ Z5, Z28, K2, Z23 // 62e29d4228fd
+ VPMULDQ -7(CX)(DX*1), Z28, K2, Z23 // 62e29d4228bc11f9ffffff
+ VPMULDQ -15(R14)(R15*4), Z28, K2, Z23 // 62829d4228bcbef1ffffff
+ VPMULDQ Z21, Z8, K2, Z6 // 62b2bd4a28f5
+ VPMULDQ Z5, Z8, K2, Z6 // 62f2bd4a28f5
+ VPMULDQ -7(CX)(DX*1), Z8, K2, Z6 // 62f2bd4a28b411f9ffffff
+ VPMULDQ -15(R14)(R15*4), Z8, K2, Z6 // 6292bd4a28b4bef1ffffff
+ VPMULDQ Z21, Z28, K2, Z6 // 62b29d4228f5
+ VPMULDQ Z5, Z28, K2, Z6 // 62f29d4228f5
+ VPMULDQ -7(CX)(DX*1), Z28, K2, Z6 // 62f29d4228b411f9ffffff
+ VPMULDQ -15(R14)(R15*4), Z28, K2, Z6 // 62929d4228b4bef1ffffff
+ VPMULLD X9, X2, K1, X20 // 62c26d0940e1
+ VPMULLD (BX), X2, K1, X20 // 62e26d094023
+ VPMULLD -17(BP)(SI*1), X2, K1, X20 // 62e26d0940a435efffffff
+ VPMULLD Y11, Y26, K1, Y6 // 62d22d2140f3
+ VPMULLD -7(CX)(DX*1), Y26, K1, Y6 // 62f22d2140b411f9ffffff
+ VPMULLD -15(R14)(R15*4), Y26, K1, Y6 // 62922d2140b4bef1ffffff
+ VPMULLD Z7, Z3, K1, Z8 // 6272654940c7
+ VPMULLD Z9, Z3, K1, Z8 // 6252654940c1
+ VPMULLD 15(R8), Z3, K1, Z8 // 6252654940800f000000
+ VPMULLD (BP), Z3, K1, Z8 // 62726549404500
+ VPMULLD Z7, Z27, K1, Z8 // 6272254140c7
+ VPMULLD Z9, Z27, K1, Z8 // 6252254140c1
+ VPMULLD 15(R8), Z27, K1, Z8 // 6252254140800f000000
+ VPMULLD (BP), Z27, K1, Z8 // 62722541404500
+ VPMULLD Z7, Z3, K1, Z2 // 62f2654940d7
+ VPMULLD Z9, Z3, K1, Z2 // 62d2654940d1
+ VPMULLD 15(R8), Z3, K1, Z2 // 62d2654940900f000000
+ VPMULLD (BP), Z3, K1, Z2 // 62f26549405500
+ VPMULLD Z7, Z27, K1, Z2 // 62f2254140d7
+ VPMULLD Z9, Z27, K1, Z2 // 62d2254140d1
+ VPMULLD 15(R8), Z27, K1, Z2 // 62d2254140900f000000
+ VPMULLD (BP), Z27, K1, Z2 // 62f22541405500
+ VPMULUDQ X16, X0, K6, X15 // 6231fd0ef4f8
+ VPMULUDQ -17(BP)(SI*2), X0, K6, X15 // 6271fd0ef4bc75efffffff
+ VPMULUDQ 7(AX)(CX*2), X0, K6, X15 // 6271fd0ef4bc4807000000
+ VPMULUDQ Y14, Y21, K3, Y7 // 62d1d523f4fe
+ VPMULUDQ 15(R8), Y21, K3, Y7 // 62d1d523f4b80f000000
+ VPMULUDQ (BP), Y21, K3, Y7 // 62f1d523f47d00
+ VPMULUDQ Z1, Z6, K7, Z6 // 62f1cd4ff4f1
+ VPMULUDQ Z15, Z6, K7, Z6 // 62d1cd4ff4f7
+ VPMULUDQ (SI), Z6, K7, Z6 // 62f1cd4ff436
+ VPMULUDQ 7(SI)(DI*2), Z6, K7, Z6 // 62f1cd4ff4b47e07000000
+ VPMULUDQ Z1, Z22, K7, Z6 // 62f1cd47f4f1
+ VPMULUDQ Z15, Z22, K7, Z6 // 62d1cd47f4f7
+ VPMULUDQ (SI), Z22, K7, Z6 // 62f1cd47f436
+ VPMULUDQ 7(SI)(DI*2), Z22, K7, Z6 // 62f1cd47f4b47e07000000
+ VPMULUDQ Z1, Z6, K7, Z16 // 62e1cd4ff4c1
+ VPMULUDQ Z15, Z6, K7, Z16 // 62c1cd4ff4c7
+ VPMULUDQ (SI), Z6, K7, Z16 // 62e1cd4ff406
+ VPMULUDQ 7(SI)(DI*2), Z6, K7, Z16 // 62e1cd4ff4847e07000000
+ VPMULUDQ Z1, Z22, K7, Z16 // 62e1cd47f4c1
+ VPMULUDQ Z15, Z22, K7, Z16 // 62c1cd47f4c7
+ VPMULUDQ (SI), Z22, K7, Z16 // 62e1cd47f406
+ VPMULUDQ 7(SI)(DI*2), Z22, K7, Z16 // 62e1cd47f4847e07000000
+ VPORD X7, X1, K2, X31 // 6261750aebff
+ VPORD 99(R15)(R15*2), X1, K2, X31 // 6201750aebbc7f63000000
+ VPORD -7(DI), X1, K2, X31 // 6261750aebbff9ffffff
+ VPORD Y28, Y9, K1, Y20 // 62813529ebe4
+ VPORD 17(SP)(BP*8), Y9, K1, Y20 // 62e13529eba4ec11000000
+ VPORD 17(SP)(BP*4), Y9, K1, Y20 // 62e13529eba4ac11000000
+ VPORD Z15, Z3, K2, Z14 // 6251654aebf7
+ VPORD Z30, Z3, K2, Z14 // 6211654aebf6
+ VPORD 99(R15)(R15*1), Z3, K2, Z14 // 6211654aebb43f63000000
+ VPORD (DX), Z3, K2, Z14 // 6271654aeb32
+ VPORD Z15, Z12, K2, Z14 // 62511d4aebf7
+ VPORD Z30, Z12, K2, Z14 // 62111d4aebf6
+ VPORD 99(R15)(R15*1), Z12, K2, Z14 // 62111d4aebb43f63000000
+ VPORD (DX), Z12, K2, Z14 // 62711d4aeb32
+ VPORD Z15, Z3, K2, Z28 // 6241654aebe7
+ VPORD Z30, Z3, K2, Z28 // 6201654aebe6
+ VPORD 99(R15)(R15*1), Z3, K2, Z28 // 6201654aeba43f63000000
+ VPORD (DX), Z3, K2, Z28 // 6261654aeb22
+ VPORD Z15, Z12, K2, Z28 // 62411d4aebe7
+ VPORD Z30, Z12, K2, Z28 // 62011d4aebe6
+ VPORD 99(R15)(R15*1), Z12, K2, Z28 // 62011d4aeba43f63000000
+ VPORD (DX), Z12, K2, Z28 // 62611d4aeb22
+ VPORQ X12, X15, K1, X9 // 62518509ebcc
+ VPORQ -7(CX)(DX*1), X15, K1, X9 // 62718509eb8c11f9ffffff
+ VPORQ -15(R14)(R15*4), X15, K1, X9 // 62118509eb8cbef1ffffff
+ VPORQ Y8, Y1, K7, Y28 // 6241f52febe0
+ VPORQ 7(SI)(DI*4), Y1, K7, Y28 // 6261f52feba4be07000000
+ VPORQ -7(DI)(R8*2), Y1, K7, Y28 // 6221f52feba447f9ffffff
+ VPORQ Z3, Z5, K1, Z19 // 62e1d549ebdb
+ VPORQ Z5, Z5, K1, Z19 // 62e1d549ebdd
+ VPORQ -17(BP)(SI*8), Z5, K1, Z19 // 62e1d549eb9cf5efffffff
+ VPORQ (R15), Z5, K1, Z19 // 62c1d549eb1f
+ VPORQ Z3, Z1, K1, Z19 // 62e1f549ebdb
+ VPORQ Z5, Z1, K1, Z19 // 62e1f549ebdd
+ VPORQ -17(BP)(SI*8), Z1, K1, Z19 // 62e1f549eb9cf5efffffff
+ VPORQ (R15), Z1, K1, Z19 // 62c1f549eb1f
+ VPORQ Z3, Z5, K1, Z15 // 6271d549ebfb
+ VPORQ Z5, Z5, K1, Z15 // 6271d549ebfd
+ VPORQ -17(BP)(SI*8), Z5, K1, Z15 // 6271d549ebbcf5efffffff
+ VPORQ (R15), Z5, K1, Z15 // 6251d549eb3f
+ VPORQ Z3, Z1, K1, Z15 // 6271f549ebfb
+ VPORQ Z5, Z1, K1, Z15 // 6271f549ebfd
+ VPORQ -17(BP)(SI*8), Z1, K1, Z15 // 6271f549ebbcf5efffffff
+ VPORQ (R15), Z1, K1, Z15 // 6251f549eb3f
+ VPROLD $121, X12, K1, X0 // 62d17d0972cc79
+ VPROLD $121, 15(DX)(BX*1), K1, X0 // 62f17d09728c1a0f00000079
+ VPROLD $121, -7(CX)(DX*2), K1, X0 // 62f17d09728c51f9ffffff79
+ VPROLD $13, Y27, K1, Y11 // 6291252972cb0d
+ VPROLD $13, 17(SP), K1, Y11 // 62f12529728c24110000000d
+ VPROLD $13, -17(BP)(SI*4), K1, Y11 // 62f12529728cb5efffffff0d
+ VPROLD $65, Z21, K7, Z14 // 62b10d4f72cd41
+ VPROLD $65, Z8, K7, Z14 // 62d10d4f72c841
+ VPROLD $65, 7(SI)(DI*8), K7, Z14 // 62f10d4f728cfe0700000041
+ VPROLD $65, -15(R14), K7, Z14 // 62d10d4f728ef1ffffff41
+ VPROLD $65, Z21, K7, Z15 // 62b1054f72cd41
+ VPROLD $65, Z8, K7, Z15 // 62d1054f72c841
+ VPROLD $65, 7(SI)(DI*8), K7, Z15 // 62f1054f728cfe0700000041
+ VPROLD $65, -15(R14), K7, Z15 // 62d1054f728ef1ffffff41
+ VPROLQ $67, X5, K2, X14 // 62f18d0a72cd43
+ VPROLQ $67, -17(BP), K2, X14 // 62f18d0a728defffffff43
+ VPROLQ $67, -15(R14)(R15*8), K2, X14 // 62918d0a728cfef1ffffff43
+ VPROLQ $127, Y16, K4, Y17 // 62b1f52472c87f
+ VPROLQ $127, 7(AX), K4, Y17 // 62f1f5247288070000007f
+ VPROLQ $127, (DI), K4, Y17 // 62f1f524720f7f
+ VPROLQ $0, Z20, K1, Z16 // 62b1fd4172cc00
+ VPROLQ $0, Z0, K1, Z16 // 62f1fd4172c800
+ VPROLQ $0, 7(SI)(DI*1), K1, Z16 // 62f1fd41728c3e0700000000
+ VPROLQ $0, 15(DX)(BX*8), K1, Z16 // 62f1fd41728cda0f00000000
+ VPROLQ $0, Z20, K1, Z9 // 62b1b54972cc00
+ VPROLQ $0, Z0, K1, Z9 // 62f1b54972c800
+ VPROLQ $0, 7(SI)(DI*1), K1, Z9 // 62f1b549728c3e0700000000
+ VPROLQ $0, 15(DX)(BX*8), K1, Z9 // 62f1b549728cda0f00000000
+ VPROLVD X8, X15, K3, X17 // 62c2050b15c8
+ VPROLVD 17(SP)(BP*2), X15, K3, X17 // 62e2050b158c6c11000000
+ VPROLVD -7(DI)(R8*4), X15, K3, X17 // 62a2050b158c87f9ffffff
+ VPROLVD Y26, Y6, K4, Y12 // 62124d2c15e2
+ VPROLVD 99(R15)(R15*1), Y6, K4, Y12 // 62124d2c15a43f63000000
+ VPROLVD (DX), Y6, K4, Y12 // 62724d2c1522
+ VPROLVD Z0, Z0, K5, Z23 // 62e27d4d15f8
+ VPROLVD Z25, Z0, K5, Z23 // 62827d4d15f9
+ VPROLVD -7(DI)(R8*1), Z0, K5, Z23 // 62a27d4d15bc07f9ffffff
+ VPROLVD (SP), Z0, K5, Z23 // 62e27d4d153c24
+ VPROLVD Z0, Z11, K5, Z23 // 62e2254d15f8
+ VPROLVD Z25, Z11, K5, Z23 // 6282254d15f9
+ VPROLVD -7(DI)(R8*1), Z11, K5, Z23 // 62a2254d15bc07f9ffffff
+ VPROLVD (SP), Z11, K5, Z23 // 62e2254d153c24
+ VPROLVD Z0, Z0, K5, Z19 // 62e27d4d15d8
+ VPROLVD Z25, Z0, K5, Z19 // 62827d4d15d9
+ VPROLVD -7(DI)(R8*1), Z0, K5, Z19 // 62a27d4d159c07f9ffffff
+ VPROLVD (SP), Z0, K5, Z19 // 62e27d4d151c24
+ VPROLVD Z0, Z11, K5, Z19 // 62e2254d15d8
+ VPROLVD Z25, Z11, K5, Z19 // 6282254d15d9
+ VPROLVD -7(DI)(R8*1), Z11, K5, Z19 // 62a2254d159c07f9ffffff
+ VPROLVD (SP), Z11, K5, Z19 // 62e2254d151c24
+ VPROLVQ X23, X26, K7, X3 // 62b2ad0715df
+ VPROLVQ 15(R8), X26, K7, X3 // 62d2ad0715980f000000
+ VPROLVQ (BP), X26, K7, X3 // 62f2ad07155d00
+ VPROLVQ Y28, Y8, K7, Y3 // 6292bd2f15dc
+ VPROLVQ -17(BP)(SI*8), Y8, K7, Y3 // 62f2bd2f159cf5efffffff
+ VPROLVQ (R15), Y8, K7, Y3 // 62d2bd2f151f
+ VPROLVQ Z9, Z0, K6, Z24 // 6242fd4e15c1
+ VPROLVQ Z3, Z0, K6, Z24 // 6262fd4e15c3
+ VPROLVQ -7(CX), Z0, K6, Z24 // 6262fd4e1581f9ffffff
+ VPROLVQ 15(DX)(BX*4), Z0, K6, Z24 // 6262fd4e15849a0f000000
+ VPROLVQ Z9, Z26, K6, Z24 // 6242ad4615c1
+ VPROLVQ Z3, Z26, K6, Z24 // 6262ad4615c3
+ VPROLVQ -7(CX), Z26, K6, Z24 // 6262ad461581f9ffffff
+ VPROLVQ 15(DX)(BX*4), Z26, K6, Z24 // 6262ad4615849a0f000000
+ VPROLVQ Z9, Z0, K6, Z12 // 6252fd4e15e1
+ VPROLVQ Z3, Z0, K6, Z12 // 6272fd4e15e3
+ VPROLVQ -7(CX), Z0, K6, Z12 // 6272fd4e15a1f9ffffff
+ VPROLVQ 15(DX)(BX*4), Z0, K6, Z12 // 6272fd4e15a49a0f000000
+ VPROLVQ Z9, Z26, K6, Z12 // 6252ad4615e1
+ VPROLVQ Z3, Z26, K6, Z12 // 6272ad4615e3
+ VPROLVQ -7(CX), Z26, K6, Z12 // 6272ad4615a1f9ffffff
+ VPROLVQ 15(DX)(BX*4), Z26, K6, Z12 // 6272ad4615a49a0f000000
+ VPRORD $97, X28, K3, X13 // 6291150b72c461
+ VPRORD $97, 15(R8)(R14*8), K3, X13 // 6291150b7284f00f00000061
+ VPRORD $97, -15(R14)(R15*2), K3, X13 // 6291150b72847ef1ffffff61
+ VPRORD $81, Y23, K7, Y1 // 62b1752f72c751
+ VPRORD $81, 7(SI)(DI*8), K7, Y1 // 62f1752f7284fe0700000051
+ VPRORD $81, -15(R14), K7, Y1 // 62d1752f7286f1ffffff51
+ VPRORD $42, Z9, K4, Z9 // 62d1354c72c12a
+ VPRORD $42, Z28, K4, Z9 // 6291354c72c42a
+ VPRORD $42, 99(R15)(R15*8), K4, Z9 // 6291354c7284ff630000002a
+ VPRORD $42, 7(AX)(CX*8), K4, Z9 // 62f1354c7284c8070000002a
+ VPRORD $42, Z9, K4, Z25 // 62d1354472c12a
+ VPRORD $42, Z28, K4, Z25 // 6291354472c42a
+ VPRORD $42, 99(R15)(R15*8), K4, Z25 // 629135447284ff630000002a
+ VPRORD $42, 7(AX)(CX*8), K4, Z25 // 62f135447284c8070000002a
+ VPRORQ $79, X9, K4, X24 // 62d1bd0472c14f
+ VPRORQ $79, -15(R14)(R15*1), K4, X24 // 6291bd0472843ef1ffffff4f
+ VPRORQ $79, -15(BX), K4, X24 // 62f1bd047283f1ffffff4f
+ VPRORQ $64, Y31, K7, Y14 // 62918d2f72c740
+ VPRORQ $64, 7(SI)(DI*1), K7, Y14 // 62f18d2f72843e0700000040
+ VPRORQ $64, 15(DX)(BX*8), K7, Y14 // 62f18d2f7284da0f00000040
+ VPRORQ $27, Z17, K2, Z20 // 62b1dd4272c11b
+ VPRORQ $27, Z0, K2, Z20 // 62f1dd4272c01b
+ VPRORQ $27, (AX), K2, Z20 // 62f1dd4272001b
+ VPRORQ $27, 7(SI), K2, Z20 // 62f1dd427286070000001b
+ VPRORQ $27, Z17, K2, Z0 // 62b1fd4a72c11b
+ VPRORQ $27, Z0, K2, Z0 // 62f1fd4a72c01b
+ VPRORQ $27, (AX), K2, Z0 // 62f1fd4a72001b
+ VPRORQ $27, 7(SI), K2, Z0 // 62f1fd4a7286070000001b
+ VPRORVD X18, X26, K5, X15 // 62322d0514fa
+ VPRORVD 7(AX)(CX*4), X26, K5, X15 // 62722d0514bc8807000000
+ VPRORVD 7(AX)(CX*1), X26, K5, X15 // 62722d0514bc0807000000
+ VPRORVD Y22, Y2, K3, Y25 // 62226d2b14ce
+ VPRORVD -7(DI)(R8*1), Y2, K3, Y25 // 62226d2b148c07f9ffffff
+ VPRORVD (SP), Y2, K3, Y25 // 62626d2b140c24
+ VPRORVD Z21, Z31, K4, Z17 // 62a2054414cd
+ VPRORVD Z9, Z31, K4, Z17 // 62c2054414c9
+ VPRORVD (BX), Z31, K4, Z17 // 62e20544140b
+ VPRORVD -17(BP)(SI*1), Z31, K4, Z17 // 62e20544148c35efffffff
+ VPRORVD Z21, Z0, K4, Z17 // 62a27d4c14cd
+ VPRORVD Z9, Z0, K4, Z17 // 62c27d4c14c9
+ VPRORVD (BX), Z0, K4, Z17 // 62e27d4c140b
+ VPRORVD -17(BP)(SI*1), Z0, K4, Z17 // 62e27d4c148c35efffffff
+ VPRORVD Z21, Z31, K4, Z23 // 62a2054414fd
+ VPRORVD Z9, Z31, K4, Z23 // 62c2054414f9
+ VPRORVD (BX), Z31, K4, Z23 // 62e20544143b
+ VPRORVD -17(BP)(SI*1), Z31, K4, Z23 // 62e2054414bc35efffffff
+ VPRORVD Z21, Z0, K4, Z23 // 62a27d4c14fd
+ VPRORVD Z9, Z0, K4, Z23 // 62c27d4c14f9
+ VPRORVD (BX), Z0, K4, Z23 // 62e27d4c143b
+ VPRORVD -17(BP)(SI*1), Z0, K4, Z23 // 62e27d4c14bc35efffffff
+ VPRORVQ X11, X1, K2, X21 // 62c2f50a14eb
+ VPRORVQ (SI), X1, K2, X21 // 62e2f50a142e
+ VPRORVQ 7(SI)(DI*2), X1, K2, X21 // 62e2f50a14ac7e07000000
+ VPRORVQ Y9, Y8, K2, Y27 // 6242bd2a14d9
+ VPRORVQ -7(CX), Y8, K2, Y27 // 6262bd2a1499f9ffffff
+ VPRORVQ 15(DX)(BX*4), Y8, K2, Y27 // 6262bd2a149c9a0f000000
+ VPRORVQ Z20, Z1, K3, Z6 // 62b2f54b14f4
+ VPRORVQ Z9, Z1, K3, Z6 // 62d2f54b14f1
+ VPRORVQ 15(R8)(R14*4), Z1, K3, Z6 // 6292f54b14b4b00f000000
+ VPRORVQ -7(CX)(DX*4), Z1, K3, Z6 // 62f2f54b14b491f9ffffff
+ VPRORVQ Z20, Z9, K3, Z6 // 62b2b54b14f4
+ VPRORVQ Z9, Z9, K3, Z6 // 62d2b54b14f1
+ VPRORVQ 15(R8)(R14*4), Z9, K3, Z6 // 6292b54b14b4b00f000000
+ VPRORVQ -7(CX)(DX*4), Z9, K3, Z6 // 62f2b54b14b491f9ffffff
+ VPRORVQ Z20, Z1, K3, Z9 // 6232f54b14cc
+ VPRORVQ Z9, Z1, K3, Z9 // 6252f54b14c9
+ VPRORVQ 15(R8)(R14*4), Z1, K3, Z9 // 6212f54b148cb00f000000
+ VPRORVQ -7(CX)(DX*4), Z1, K3, Z9 // 6272f54b148c91f9ffffff
+ VPRORVQ Z20, Z9, K3, Z9 // 6232b54b14cc
+ VPRORVQ Z9, Z9, K3, Z9 // 6252b54b14c9
+ VPRORVQ 15(R8)(R14*4), Z9, K3, Z9 // 6212b54b148cb00f000000
+ VPRORVQ -7(CX)(DX*4), Z9, K3, Z9 // 6272b54b148c91f9ffffff
+ VPSCATTERDD X0, K3, (AX)(X4*1) // 62f27d0ba00420
+ VPSCATTERDD X0, K3, (BP)(X10*2) // 62b27d0ba0445500
+ VPSCATTERDD X0, K3, (R10)(X29*8) // 62927d03a004ea
+ VPSCATTERDD Y1, K3, (R10)(Y29*8) // 62927d23a00cea
+ VPSCATTERDD Y1, K3, (SP)(Y4*2) // 62f27d2ba00c64
+ VPSCATTERDD Y1, K3, (DX)(Y10*4) // 62b27d2ba00c92
+ VPSCATTERDD Z16, K2, (DX)(Z10*4) // 62a27d4aa00492
+ VPSCATTERDD Z25, K2, (DX)(Z10*4) // 62227d4aa00c92
+ VPSCATTERDD Z16, K2, (AX)(Z4*1) // 62e27d4aa00420
+ VPSCATTERDD Z25, K2, (AX)(Z4*1) // 62627d4aa00c20
+ VPSCATTERDD Z16, K2, (SP)(Z4*2) // 62e27d4aa00464
+ VPSCATTERDD Z25, K2, (SP)(Z4*2) // 62627d4aa00c64
+ VPSCATTERDQ X0, K1, (DX)(X10*4) // 62b2fd09a00492
+ VPSCATTERDQ X0, K1, (SP)(X4*2) // 62f2fd09a00464
+ VPSCATTERDQ X0, K1, (R14)(X29*8) // 6292fd01a004ee
+ VPSCATTERDQ Y6, K2, (AX)(X4*1) // 62f2fd2aa03420
+ VPSCATTERDQ Y6, K2, (BP)(X10*2) // 62b2fd2aa0745500
+ VPSCATTERDQ Y6, K2, (R10)(X29*8) // 6292fd22a034ea
+ VPSCATTERDQ Z14, K1, (R14)(Y29*8) // 6212fd41a034ee
+ VPSCATTERDQ Z13, K1, (R14)(Y29*8) // 6212fd41a02cee
+ VPSCATTERDQ Z14, K1, (AX)(Y4*1) // 6272fd49a03420
+ VPSCATTERDQ Z13, K1, (AX)(Y4*1) // 6272fd49a02c20
+ VPSCATTERDQ Z14, K1, (BP)(Y10*2) // 6232fd49a0745500
+ VPSCATTERDQ Z13, K1, (BP)(Y10*2) // 6232fd49a06c5500
+ VPSCATTERQD X24, K7, (AX)(X4*1) // 62627d0fa10420
+ VPSCATTERQD X24, K7, (BP)(X10*2) // 62227d0fa1445500
+ VPSCATTERQD X24, K7, (R10)(X29*8) // 62027d07a104ea
+ VPSCATTERQD X20, K1, (R10)(Y29*8) // 62827d21a124ea
+ VPSCATTERQD X20, K1, (SP)(Y4*2) // 62e27d29a12464
+ VPSCATTERQD X20, K1, (DX)(Y10*4) // 62a27d29a12492
+ VPSCATTERQD Y1, K1, (DX)(Z10*4) // 62b27d49a10c92
+ VPSCATTERQD Y1, K1, (AX)(Z4*1) // 62f27d49a10c20
+ VPSCATTERQD Y1, K1, (SP)(Z4*2) // 62f27d49a10c64
+ VPSCATTERQQ X7, K1, (DX)(X10*4) // 62b2fd09a13c92
+ VPSCATTERQQ X7, K1, (SP)(X4*2) // 62f2fd09a13c64
+ VPSCATTERQQ X7, K1, (R14)(X29*8) // 6292fd01a13cee
+ VPSCATTERQQ Y9, K7, (R14)(Y29*8) // 6212fd27a10cee
+ VPSCATTERQQ Y9, K7, (AX)(Y4*1) // 6272fd2fa10c20
+ VPSCATTERQQ Y9, K7, (BP)(Y10*2) // 6232fd2fa14c5500
+ VPSCATTERQQ Z12, K2, (BP)(Z10*2) // 6232fd4aa1645500
+ VPSCATTERQQ Z13, K2, (BP)(Z10*2) // 6232fd4aa16c5500
+ VPSCATTERQQ Z12, K2, (R10)(Z29*8) // 6212fd42a124ea
+ VPSCATTERQQ Z13, K2, (R10)(Z29*8) // 6212fd42a12cea
+ VPSCATTERQQ Z12, K2, (R14)(Z29*8) // 6212fd42a124ee
+ VPSCATTERQQ Z13, K2, (R14)(Z29*8) // 6212fd42a12cee
+ VPSHUFD $126, X2, K4, X9 // 62717d0c70ca7e
+ VPSHUFD $126, 17(SP)(BP*1), K4, X9 // 62717d0c708c2c110000007e
+ VPSHUFD $126, -7(CX)(DX*8), K4, X9 // 62717d0c708cd1f9ffffff7e
+ VPSHUFD $94, Y31, K4, Y6 // 62917d2c70f75e
+ VPSHUFD $94, 17(SP)(BP*2), K4, Y6 // 62f17d2c70b46c110000005e
+ VPSHUFD $94, -7(DI)(R8*4), K4, Y6 // 62b17d2c70b487f9ffffff5e
+ VPSHUFD $121, Z3, K7, Z8 // 62717d4f70c379
+ VPSHUFD $121, Z27, K7, Z8 // 62117d4f70c379
+ VPSHUFD $121, 7(AX)(CX*4), K7, Z8 // 62717d4f7084880700000079
+ VPSHUFD $121, 7(AX)(CX*1), K7, Z8 // 62717d4f7084080700000079
+ VPSHUFD $121, Z3, K7, Z2 // 62f17d4f70d379
+ VPSHUFD $121, Z27, K7, Z2 // 62917d4f70d379
+ VPSHUFD $121, 7(AX)(CX*4), K7, Z2 // 62f17d4f7094880700000079
+ VPSHUFD $121, 7(AX)(CX*1), K7, Z2 // 62f17d4f7094080700000079
+ VPSLLD $81, X0, K3, X14 // 62f10d0b72f051
+ VPSLLD $81, (R14), K3, X14 // 62d10d0b723651
+ VPSLLD $81, -7(DI)(R8*8), K3, X14 // 62b10d0b72b4c7f9ffffff51
+ VPSLLD $42, Y0, K3, Y6 // 62f14d2b72f02a
+ VPSLLD $42, -15(R14)(R15*1), K3, Y6 // 62914d2b72b43ef1ffffff2a
+ VPSLLD $42, -15(BX), K3, Y6 // 62f14d2b72b3f1ffffff2a
+ VPSLLD $79, Z12, K3, Z9 // 62d1354b72f44f
+ VPSLLD $79, Z22, K3, Z9 // 62b1354b72f64f
+ VPSLLD $79, 7(SI)(DI*4), K3, Z9 // 62f1354b72b4be070000004f
+ VPSLLD $79, -7(DI)(R8*2), K3, Z9 // 62b1354b72b447f9ffffff4f
+ VPSLLD $79, Z12, K3, Z19 // 62d1654372f44f
+ VPSLLD $79, Z22, K3, Z19 // 62b1654372f64f
+ VPSLLD $79, 7(SI)(DI*4), K3, Z19 // 62f1654372b4be070000004f
+ VPSLLD $79, -7(DI)(R8*2), K3, Z19 // 62b1654372b447f9ffffff4f
+ VPSLLD X15, X7, K2, X17 // 62c1450af2cf
+ VPSLLD 99(R15)(R15*4), X7, K2, X17 // 6281450af28cbf63000000
+ VPSLLD 15(DX), X7, K2, X17 // 62e1450af28a0f000000
+ VPSLLD X11, Y5, K1, Y3 // 62d15529f2db
+ VPSLLD (CX), Y5, K1, Y3 // 62f15529f219
+ VPSLLD 99(R15), Y5, K1, Y3 // 62d15529f29f63000000
+ VPSLLD X0, Z18, K2, Z11 // 62716d42f2d8
+ VPSLLD 99(R15)(R15*2), Z18, K2, Z11 // 62116d42f29c7f63000000
+ VPSLLD -7(DI), Z18, K2, Z11 // 62716d42f29ff9ffffff
+ VPSLLD X0, Z24, K2, Z11 // 62713d42f2d8
+ VPSLLD 99(R15)(R15*2), Z24, K2, Z11 // 62113d42f29c7f63000000
+ VPSLLD -7(DI), Z24, K2, Z11 // 62713d42f29ff9ffffff
+ VPSLLD X0, Z18, K2, Z5 // 62f16d42f2e8
+ VPSLLD 99(R15)(R15*2), Z18, K2, Z5 // 62916d42f2ac7f63000000
+ VPSLLD -7(DI), Z18, K2, Z5 // 62f16d42f2aff9ffffff
+ VPSLLD X0, Z24, K2, Z5 // 62f13d42f2e8
+ VPSLLD 99(R15)(R15*2), Z24, K2, Z5 // 62913d42f2ac7f63000000
+ VPSLLD -7(DI), Z24, K2, Z5 // 62f13d42f2aff9ffffff
+ VPSLLQ $82, X25, K1, X27 // 6291a50173f152
+ VPSLLQ $82, 15(DX)(BX*1), K1, X27 // 62f1a50173b41a0f00000052
+ VPSLLQ $82, -7(CX)(DX*2), K1, X27 // 62f1a50173b451f9ffffff52
+ VPSLLQ $126, Y5, K7, Y3 // 62f1e52f73f57e
+ VPSLLQ $126, (SI), K7, Y3 // 62f1e52f73367e
+ VPSLLQ $126, 7(SI)(DI*2), K7, Y3 // 62f1e52f73b47e070000007e
+ VPSLLQ $94, Z6, K1, Z6 // 62f1cd4973f65e
+ VPSLLQ $94, Z22, K1, Z6 // 62b1cd4973f65e
+ VPSLLQ $94, 7(AX), K1, Z6 // 62f1cd4973b0070000005e
+ VPSLLQ $94, (DI), K1, Z6 // 62f1cd4973375e
+ VPSLLQ $94, Z6, K1, Z16 // 62f1fd4173f65e
+ VPSLLQ $94, Z22, K1, Z16 // 62b1fd4173f65e
+ VPSLLQ $94, 7(AX), K1, Z16 // 62f1fd4173b0070000005e
+ VPSLLQ $94, (DI), K1, Z16 // 62f1fd4173375e
+ VPSLLQ X15, X18, K1, X3 // 62d1ed01f3df
+ VPSLLQ -17(BP), X18, K1, X3 // 62f1ed01f39defffffff
+ VPSLLQ -15(R14)(R15*8), X18, K1, X3 // 6291ed01f39cfef1ffffff
+ VPSLLQ X28, Y7, K1, Y28 // 6201c529f3e4
+ VPSLLQ 17(SP)(BP*2), Y7, K1, Y28 // 6261c529f3a46c11000000
+ VPSLLQ -7(DI)(R8*4), Y7, K1, Y28 // 6221c529f3a487f9ffffff
+ VPSLLQ X15, Z13, K7, Z1 // 62d1954ff3cf
+ VPSLLQ 15(R8), Z13, K7, Z1 // 62d1954ff3880f000000
+ VPSLLQ (BP), Z13, K7, Z1 // 62f1954ff34d00
+ VPSLLQ X15, Z13, K7, Z15 // 6251954ff3ff
+ VPSLLQ 15(R8), Z13, K7, Z15 // 6251954ff3b80f000000
+ VPSLLQ (BP), Z13, K7, Z15 // 6271954ff37d00
+ VPSLLVD X8, X13, K2, X7 // 62d2150a47f8
+ VPSLLVD 15(R8)(R14*8), X13, K2, X7 // 6292150a47bcf00f000000
+ VPSLLVD -15(R14)(R15*2), X13, K2, X7 // 6292150a47bc7ef1ffffff
+ VPSLLVD Y13, Y22, K4, Y0 // 62d24d2447c5
+ VPSLLVD 17(SP)(BP*8), Y22, K4, Y0 // 62f24d244784ec11000000
+ VPSLLVD 17(SP)(BP*4), Y22, K4, Y0 // 62f24d244784ac11000000
+ VPSLLVD Z2, Z22, K1, Z18 // 62e24d4147d2
+ VPSLLVD Z31, Z22, K1, Z18 // 62824d4147d7
+ VPSLLVD 99(R15)(R15*1), Z22, K1, Z18 // 62824d4147943f63000000
+ VPSLLVD (DX), Z22, K1, Z18 // 62e24d414712
+ VPSLLVD Z2, Z7, K1, Z18 // 62e2454947d2
+ VPSLLVD Z31, Z7, K1, Z18 // 6282454947d7
+ VPSLLVD 99(R15)(R15*1), Z7, K1, Z18 // 6282454947943f63000000
+ VPSLLVD (DX), Z7, K1, Z18 // 62e245494712
+ VPSLLVD Z2, Z22, K1, Z8 // 62724d4147c2
+ VPSLLVD Z31, Z22, K1, Z8 // 62124d4147c7
+ VPSLLVD 99(R15)(R15*1), Z22, K1, Z8 // 62124d4147843f63000000
+ VPSLLVD (DX), Z22, K1, Z8 // 62724d414702
+ VPSLLVD Z2, Z7, K1, Z8 // 6272454947c2
+ VPSLLVD Z31, Z7, K1, Z8 // 6212454947c7
+ VPSLLVD 99(R15)(R15*1), Z7, K1, Z8 // 6212454947843f63000000
+ VPSLLVD (DX), Z7, K1, Z8 // 627245494702
+ VPSLLVQ X0, X7, K3, X24 // 6262c50b47c0
+ VPSLLVQ -15(R14)(R15*1), X7, K3, X24 // 6202c50b47843ef1ffffff
+ VPSLLVQ -15(BX), X7, K3, X24 // 6262c50b4783f1ffffff
+ VPSLLVQ Y14, Y1, K4, Y12 // 6252f52c47e6
+ VPSLLVQ 7(SI)(DI*4), Y1, K4, Y12 // 6272f52c47a4be07000000
+ VPSLLVQ -7(DI)(R8*2), Y1, K4, Y12 // 6232f52c47a447f9ffffff
+ VPSLLVQ Z12, Z1, K5, Z20 // 62c2f54d47e4
+ VPSLLVQ Z16, Z1, K5, Z20 // 62a2f54d47e0
+ VPSLLVQ -17(BP)(SI*8), Z1, K5, Z20 // 62e2f54d47a4f5efffffff
+ VPSLLVQ (R15), Z1, K5, Z20 // 62c2f54d4727
+ VPSLLVQ Z12, Z3, K5, Z20 // 62c2e54d47e4
+ VPSLLVQ Z16, Z3, K5, Z20 // 62a2e54d47e0
+ VPSLLVQ -17(BP)(SI*8), Z3, K5, Z20 // 62e2e54d47a4f5efffffff
+ VPSLLVQ (R15), Z3, K5, Z20 // 62c2e54d4727
+ VPSLLVQ Z12, Z1, K5, Z9 // 6252f54d47cc
+ VPSLLVQ Z16, Z1, K5, Z9 // 6232f54d47c8
+ VPSLLVQ -17(BP)(SI*8), Z1, K5, Z9 // 6272f54d478cf5efffffff
+ VPSLLVQ (R15), Z1, K5, Z9 // 6252f54d470f
+ VPSLLVQ Z12, Z3, K5, Z9 // 6252e54d47cc
+ VPSLLVQ Z16, Z3, K5, Z9 // 6232e54d47c8
+ VPSLLVQ -17(BP)(SI*8), Z3, K5, Z9 // 6272e54d478cf5efffffff
+ VPSLLVQ (R15), Z3, K5, Z9 // 6252e54d470f
+ VPSRAD $67, X7, K5, X24 // 62f13d0572e743
+ VPSRAD $67, 7(AX), K5, X24 // 62f13d0572a00700000043
+ VPSRAD $67, (DI), K5, X24 // 62f13d05722743
+ VPSRAD $127, Y7, K3, Y13 // 62f1152b72e77f
+ VPSRAD $127, 99(R15)(R15*1), K3, Y13 // 6291152b72a43f630000007f
+ VPSRAD $127, (DX), K3, Y13 // 62f1152b72227f
+ VPSRAD $0, Z21, K4, Z14 // 62b10d4c72e500
+ VPSRAD $0, Z8, K4, Z14 // 62d10d4c72e000
+ VPSRAD $0, -7(DI)(R8*1), K4, Z14 // 62b10d4c72a407f9ffffff00
+ VPSRAD $0, (SP), K4, Z14 // 62f10d4c72242400
+ VPSRAD $0, Z21, K4, Z15 // 62b1054c72e500
+ VPSRAD $0, Z8, K4, Z15 // 62d1054c72e000
+ VPSRAD $0, -7(DI)(R8*1), K4, Z15 // 62b1054c72a407f9ffffff00
+ VPSRAD $0, (SP), K4, Z15 // 62f1054c72242400
+ VPSRAD X12, X16, K2, X20 // 62c17d02e2e4
+ VPSRAD 99(R15)(R15*1), X16, K2, X20 // 62817d02e2a43f63000000
+ VPSRAD (DX), X16, K2, X20 // 62e17d02e222
+ VPSRAD X6, Y21, K2, Y2 // 62f15522e2d6
+ VPSRAD -17(BP)(SI*8), Y21, K2, Y2 // 62f15522e294f5efffffff
+ VPSRAD (R15), Y21, K2, Y2 // 62d15522e217
+ VPSRAD X17, Z20, K3, Z16 // 62a15d43e2c1
+ VPSRAD 7(SI)(DI*8), Z20, K3, Z16 // 62e15d43e284fe07000000
+ VPSRAD -15(R14), Z20, K3, Z16 // 62c15d43e286f1ffffff
+ VPSRAD X17, Z0, K3, Z16 // 62a17d4be2c1
+ VPSRAD 7(SI)(DI*8), Z0, K3, Z16 // 62e17d4be284fe07000000
+ VPSRAD -15(R14), Z0, K3, Z16 // 62c17d4be286f1ffffff
+ VPSRAD X17, Z20, K3, Z9 // 62315d43e2c9
+ VPSRAD 7(SI)(DI*8), Z20, K3, Z9 // 62715d43e28cfe07000000
+ VPSRAD -15(R14), Z20, K3, Z9 // 62515d43e28ef1ffffff
+ VPSRAD X17, Z0, K3, Z9 // 62317d4be2c9
+ VPSRAD 7(SI)(DI*8), Z0, K3, Z9 // 62717d4be28cfe07000000
+ VPSRAD -15(R14), Z0, K3, Z9 // 62517d4be28ef1ffffff
+ VPSRAQ $97, X6, K3, X28 // 62f19d0372e661
+ VPSRAQ $97, 7(SI)(DI*1), K3, X28 // 62f19d0372a43e0700000061
+ VPSRAQ $97, 15(DX)(BX*8), K3, X28 // 62f19d0372a4da0f00000061
+ VPSRAQ $81, Y9, K3, Y12 // 62d19d2b72e151
+ VPSRAQ $81, -17(BP)(SI*8), K3, Y12 // 62f19d2b72a4f5efffffff51
+ VPSRAQ $81, (R15), K3, Y12 // 62d19d2b722751
+ VPSRAQ $42, Z0, K2, Z23 // 62f1c54272e02a
+ VPSRAQ $42, Z11, K2, Z23 // 62d1c54272e32a
+ VPSRAQ $42, -7(CX), K2, Z23 // 62f1c54272a1f9ffffff2a
+ VPSRAQ $42, 15(DX)(BX*4), K2, Z23 // 62f1c54272a49a0f0000002a
+ VPSRAQ $42, Z0, K2, Z19 // 62f1e54272e02a
+ VPSRAQ $42, Z11, K2, Z19 // 62d1e54272e32a
+ VPSRAQ $42, -7(CX), K2, Z19 // 62f1e54272a1f9ffffff2a
+ VPSRAQ $42, 15(DX)(BX*4), K2, Z19 // 62f1e54272a49a0f0000002a
+ VPSRAQ X8, X8, K1, X1 // 62d1bd09e2c8
+ VPSRAQ -7(DI)(R8*1), X8, K1, X1 // 62b1bd09e28c07f9ffffff
+ VPSRAQ (SP), X8, K1, X1 // 62f1bd09e20c24
+ VPSRAQ X6, Y9, K2, Y1 // 62f1b52ae2ce
+ VPSRAQ -7(CX), Y9, K2, Y1 // 62f1b52ae289f9ffffff
+ VPSRAQ 15(DX)(BX*4), Y9, K2, Y1 // 62f1b52ae28c9a0f000000
+ VPSRAQ X0, Z24, K1, Z0 // 62f1bd41e2c0
+ VPSRAQ 99(R15)(R15*8), Z24, K1, Z0 // 6291bd41e284ff63000000
+ VPSRAQ 7(AX)(CX*8), Z24, K1, Z0 // 62f1bd41e284c807000000
+ VPSRAQ X0, Z12, K1, Z0 // 62f19d49e2c0
+ VPSRAQ 99(R15)(R15*8), Z12, K1, Z0 // 62919d49e284ff63000000
+ VPSRAQ 7(AX)(CX*8), Z12, K1, Z0 // 62f19d49e284c807000000
+ VPSRAQ X0, Z24, K1, Z25 // 6261bd41e2c8
+ VPSRAQ 99(R15)(R15*8), Z24, K1, Z25 // 6201bd41e28cff63000000
+ VPSRAQ 7(AX)(CX*8), Z24, K1, Z25 // 6261bd41e28cc807000000
+ VPSRAQ X0, Z12, K1, Z25 // 62619d49e2c8
+ VPSRAQ 99(R15)(R15*8), Z12, K1, Z25 // 62019d49e28cff63000000
+ VPSRAQ 7(AX)(CX*8), Z12, K1, Z25 // 62619d49e28cc807000000
+ VPSRAVD X6, X16, K7, X11 // 62727d0746de
+ VPSRAVD (AX), X16, K7, X11 // 62727d074618
+ VPSRAVD 7(SI), X16, K7, X11 // 62727d07469e07000000
+ VPSRAVD Y9, Y2, K1, Y3 // 62d26d2946d9
+ VPSRAVD 7(SI)(DI*8), Y2, K1, Y3 // 62f26d29469cfe07000000
+ VPSRAVD -15(R14), Y2, K1, Y3 // 62d26d29469ef1ffffff
+ VPSRAVD Z9, Z9, K1, Z0 // 62d2354946c1
+ VPSRAVD Z25, Z9, K1, Z0 // 6292354946c1
+ VPSRAVD 99(R15)(R15*8), Z9, K1, Z0 // 629235494684ff63000000
+ VPSRAVD 7(AX)(CX*8), Z9, K1, Z0 // 62f235494684c807000000
+ VPSRAVD Z9, Z3, K1, Z0 // 62d2654946c1
+ VPSRAVD Z25, Z3, K1, Z0 // 6292654946c1
+ VPSRAVD 99(R15)(R15*8), Z3, K1, Z0 // 629265494684ff63000000
+ VPSRAVD 7(AX)(CX*8), Z3, K1, Z0 // 62f265494684c807000000
+ VPSRAVD Z9, Z9, K1, Z26 // 6242354946d1
+ VPSRAVD Z25, Z9, K1, Z26 // 6202354946d1
+ VPSRAVD 99(R15)(R15*8), Z9, K1, Z26 // 620235494694ff63000000
+ VPSRAVD 7(AX)(CX*8), Z9, K1, Z26 // 626235494694c807000000
+ VPSRAVD Z9, Z3, K1, Z26 // 6242654946d1
+ VPSRAVD Z25, Z3, K1, Z26 // 6202654946d1
+ VPSRAVD 99(R15)(R15*8), Z3, K1, Z26 // 620265494694ff63000000
+ VPSRAVD 7(AX)(CX*8), Z3, K1, Z26 // 626265494694c807000000
+ VPSRAVQ X12, X22, K1, X6 // 62d2cd0146f4
+ VPSRAVQ (BX), X22, K1, X6 // 62f2cd014633
+ VPSRAVQ -17(BP)(SI*1), X22, K1, X6 // 62f2cd0146b435efffffff
+ VPSRAVQ Y14, Y21, K7, Y12 // 6252d52746e6
+ VPSRAVQ 7(SI)(DI*1), Y21, K7, Y12 // 6272d52746a43e07000000
+ VPSRAVQ 15(DX)(BX*8), Y21, K7, Y12 // 6272d52746a4da0f000000
+ VPSRAVQ Z17, Z20, K2, Z9 // 6232dd4246c9
+ VPSRAVQ Z0, Z20, K2, Z9 // 6272dd4246c8
+ VPSRAVQ (AX), Z20, K2, Z9 // 6272dd424608
+ VPSRAVQ 7(SI), Z20, K2, Z9 // 6272dd42468e07000000
+ VPSRAVQ Z17, Z0, K2, Z9 // 6232fd4a46c9
+ VPSRAVQ Z0, Z0, K2, Z9 // 6272fd4a46c8
+ VPSRAVQ (AX), Z0, K2, Z9 // 6272fd4a4608
+ VPSRAVQ 7(SI), Z0, K2, Z9 // 6272fd4a468e07000000
+ VPSRAVQ Z17, Z20, K2, Z28 // 6222dd4246e1
+ VPSRAVQ Z0, Z20, K2, Z28 // 6262dd4246e0
+ VPSRAVQ (AX), Z20, K2, Z28 // 6262dd424620
+ VPSRAVQ 7(SI), Z20, K2, Z28 // 6262dd4246a607000000
+ VPSRAVQ Z17, Z0, K2, Z28 // 6222fd4a46e1
+ VPSRAVQ Z0, Z0, K2, Z28 // 6262fd4a46e0
+ VPSRAVQ (AX), Z0, K2, Z28 // 6262fd4a4620
+ VPSRAVQ 7(SI), Z0, K2, Z28 // 6262fd4a46a607000000
+ VPSRLD $47, X0, K7, X0 // 62f17d0f72d02f
+ VPSRLD $47, (R14), K7, X0 // 62d17d0f72162f
+ VPSRLD $47, -7(DI)(R8*8), K7, X0 // 62b17d0f7294c7f9ffffff2f
+ VPSRLD $82, Y6, K4, Y22 // 62f14d2472d652
+ VPSRLD $82, 99(R15)(R15*8), K4, Y22 // 62914d247294ff6300000052
+ VPSRLD $82, 7(AX)(CX*8), K4, Y22 // 62f14d247294c80700000052
+ VPSRLD $126, Z7, K4, Z26 // 62f12d4472d77e
+ VPSRLD $126, Z21, K4, Z26 // 62b12d4472d57e
+ VPSRLD $126, (R8), K4, Z26 // 62d12d4472107e
+ VPSRLD $126, 15(DX)(BX*2), K4, Z26 // 62f12d4472945a0f0000007e
+ VPSRLD $126, Z7, K4, Z22 // 62f14d4472d77e
+ VPSRLD $126, Z21, K4, Z22 // 62b14d4472d57e
+ VPSRLD $126, (R8), K4, Z22 // 62d14d4472107e
+ VPSRLD $126, 15(DX)(BX*2), K4, Z22 // 62f14d4472945a0f0000007e
+ VPSRLD X17, X11, K7, X25 // 6221250fd2c9
+ VPSRLD 99(R15)(R15*4), X11, K7, X25 // 6201250fd28cbf63000000
+ VPSRLD 15(DX), X11, K7, X25 // 6261250fd28a0f000000
+ VPSRLD X18, Y7, K2, Y21 // 62a1452ad2ea
+ VPSRLD (CX), Y7, K2, Y21 // 62e1452ad229
+ VPSRLD 99(R15), Y7, K2, Y21 // 62c1452ad2af63000000
+ VPSRLD X11, Z14, K5, Z16 // 62c10d4dd2c3
+ VPSRLD 99(R15)(R15*2), Z14, K5, Z16 // 62810d4dd2847f63000000
+ VPSRLD -7(DI), Z14, K5, Z16 // 62e10d4dd287f9ffffff
+ VPSRLD X11, Z13, K5, Z16 // 62c1154dd2c3
+ VPSRLD 99(R15)(R15*2), Z13, K5, Z16 // 6281154dd2847f63000000
+ VPSRLD -7(DI), Z13, K5, Z16 // 62e1154dd287f9ffffff
+ VPSRLD X11, Z14, K5, Z25 // 62410d4dd2cb
+ VPSRLD 99(R15)(R15*2), Z14, K5, Z25 // 62010d4dd28c7f63000000
+ VPSRLD -7(DI), Z14, K5, Z25 // 62610d4dd28ff9ffffff
+ VPSRLD X11, Z13, K5, Z25 // 6241154dd2cb
+ VPSRLD 99(R15)(R15*2), Z13, K5, Z25 // 6201154dd28c7f63000000
+ VPSRLD -7(DI), Z13, K5, Z25 // 6261154dd28ff9ffffff
+ VPSRLQ $65, X2, K3, X24 // 62f1bd0373d241
+ VPSRLQ $65, 15(DX)(BX*1), K3, X24 // 62f1bd0373941a0f00000041
+ VPSRLQ $65, -7(CX)(DX*2), K3, X24 // 62f1bd03739451f9ffffff41
+ VPSRLQ $67, Y14, K4, Y20 // 62d1dd2473d643
+ VPSRLQ $67, (BX), K4, Y20 // 62f1dd24731343
+ VPSRLQ $67, -17(BP)(SI*1), K4, Y20 // 62f1dd24739435efffffff43
+ VPSRLQ $127, Z27, K2, Z2 // 6291ed4a73d37f
+ VPSRLQ $127, Z25, K2, Z2 // 6291ed4a73d17f
+ VPSRLQ $127, -17(BP)(SI*2), K2, Z2 // 62f1ed4a739475efffffff7f
+ VPSRLQ $127, 7(AX)(CX*2), K2, Z2 // 62f1ed4a739448070000007f
+ VPSRLQ $127, Z27, K2, Z7 // 6291c54a73d37f
+ VPSRLQ $127, Z25, K2, Z7 // 6291c54a73d17f
+ VPSRLQ $127, -17(BP)(SI*2), K2, Z7 // 62f1c54a739475efffffff7f
+ VPSRLQ $127, 7(AX)(CX*2), K2, Z7 // 62f1c54a739448070000007f
+ VPSRLQ X26, X27, K2, X2 // 6291a502d3d2
+ VPSRLQ -17(BP), X27, K2, X2 // 62f1a502d395efffffff
+ VPSRLQ -15(R14)(R15*8), X27, K2, X2 // 6291a502d394fef1ffffff
+ VPSRLQ X22, Y13, K3, Y24 // 6221952bd3c6
+ VPSRLQ 17(SP)(BP*2), Y13, K3, Y24 // 6261952bd3846c11000000
+ VPSRLQ -7(DI)(R8*4), Y13, K3, Y24 // 6221952bd38487f9ffffff
+ VPSRLQ X30, Z27, K3, Z23 // 6281a543d3fe
+ VPSRLQ 15(R8), Z27, K3, Z23 // 62c1a543d3b80f000000
+ VPSRLQ (BP), Z27, K3, Z23 // 62e1a543d37d00
+ VPSRLQ X30, Z14, K3, Z23 // 62818d4bd3fe
+ VPSRLQ 15(R8), Z14, K3, Z23 // 62c18d4bd3b80f000000
+ VPSRLQ (BP), Z14, K3, Z23 // 62e18d4bd37d00
+ VPSRLQ X30, Z27, K3, Z9 // 6211a543d3ce
+ VPSRLQ 15(R8), Z27, K3, Z9 // 6251a543d3880f000000
+ VPSRLQ (BP), Z27, K3, Z9 // 6271a543d34d00
+ VPSRLQ X30, Z14, K3, Z9 // 62118d4bd3ce
+ VPSRLQ 15(R8), Z14, K3, Z9 // 62518d4bd3880f000000
+ VPSRLQ (BP), Z14, K3, Z9 // 62718d4bd34d00
+ VPSRLVD X15, X11, K3, X3 // 62d2250b45df
+ VPSRLVD 15(R8)(R14*8), X11, K3, X3 // 6292250b459cf00f000000
+ VPSRLVD -15(R14)(R15*2), X11, K3, X3 // 6292250b459c7ef1ffffff
+ VPSRLVD Y21, Y14, K2, Y20 // 62a20d2a45e5
+ VPSRLVD 15(R8)(R14*4), Y14, K2, Y20 // 62820d2a45a4b00f000000
+ VPSRLVD -7(CX)(DX*4), Y14, K2, Y20 // 62e20d2a45a491f9ffffff
+ VPSRLVD Z8, Z14, K1, Z3 // 62d20d4945d8
+ VPSRLVD Z24, Z14, K1, Z3 // 62920d4945d8
+ VPSRLVD 15(R8)(R14*1), Z14, K1, Z3 // 62920d49459c300f000000
+ VPSRLVD 15(R8)(R14*2), Z14, K1, Z3 // 62920d49459c700f000000
+ VPSRLVD Z8, Z7, K1, Z3 // 62d2454945d8
+ VPSRLVD Z24, Z7, K1, Z3 // 6292454945d8
+ VPSRLVD 15(R8)(R14*1), Z7, K1, Z3 // 62924549459c300f000000
+ VPSRLVD 15(R8)(R14*2), Z7, K1, Z3 // 62924549459c700f000000
+ VPSRLVD Z8, Z14, K1, Z0 // 62d20d4945c0
+ VPSRLVD Z24, Z14, K1, Z0 // 62920d4945c0
+ VPSRLVD 15(R8)(R14*1), Z14, K1, Z0 // 62920d494584300f000000
+ VPSRLVD 15(R8)(R14*2), Z14, K1, Z0 // 62920d494584700f000000
+ VPSRLVD Z8, Z7, K1, Z0 // 62d2454945c0
+ VPSRLVD Z24, Z7, K1, Z0 // 6292454945c0
+ VPSRLVD 15(R8)(R14*1), Z7, K1, Z0 // 629245494584300f000000
+ VPSRLVD 15(R8)(R14*2), Z7, K1, Z0 // 629245494584700f000000
+ VPSRLVQ X6, X13, K2, X30 // 6262950a45f6
+ VPSRLVQ -15(R14)(R15*1), X13, K2, X30 // 6202950a45b43ef1ffffff
+ VPSRLVQ -15(BX), X13, K2, X30 // 6262950a45b3f1ffffff
+ VPSRLVQ Y30, Y26, K1, Y1 // 6292ad2145ce
+ VPSRLVQ (R8), Y26, K1, Y1 // 62d2ad214508
+ VPSRLVQ 15(DX)(BX*2), Y26, K1, Y1 // 62f2ad21458c5a0f000000
+ VPSRLVQ Z6, Z1, K7, Z22 // 62e2f54f45f6
+ VPSRLVQ Z2, Z1, K7, Z22 // 62e2f54f45f2
+ VPSRLVQ (R14), Z1, K7, Z22 // 62c2f54f4536
+ VPSRLVQ -7(DI)(R8*8), Z1, K7, Z22 // 62a2f54f45b4c7f9ffffff
+ VPSRLVQ Z6, Z16, K7, Z22 // 62e2fd4745f6
+ VPSRLVQ Z2, Z16, K7, Z22 // 62e2fd4745f2
+ VPSRLVQ (R14), Z16, K7, Z22 // 62c2fd474536
+ VPSRLVQ -7(DI)(R8*8), Z16, K7, Z22 // 62a2fd4745b4c7f9ffffff
+ VPSRLVQ Z6, Z1, K7, Z25 // 6262f54f45ce
+ VPSRLVQ Z2, Z1, K7, Z25 // 6262f54f45ca
+ VPSRLVQ (R14), Z1, K7, Z25 // 6242f54f450e
+ VPSRLVQ -7(DI)(R8*8), Z1, K7, Z25 // 6222f54f458cc7f9ffffff
+ VPSRLVQ Z6, Z16, K7, Z25 // 6262fd4745ce
+ VPSRLVQ Z2, Z16, K7, Z25 // 6262fd4745ca
+ VPSRLVQ (R14), Z16, K7, Z25 // 6242fd47450e
+ VPSRLVQ -7(DI)(R8*8), Z16, K7, Z25 // 6222fd47458cc7f9ffffff
+ VPSUBD X0, X1, K6, X8 // 6271750efac0
+ VPSUBD 99(R15)(R15*1), X1, K6, X8 // 6211750efa843f63000000
+ VPSUBD (DX), X1, K6, X8 // 6271750efa02
+ VPSUBD Y30, Y7, K3, Y21 // 6281452bfaee
+ VPSUBD (R14), Y7, K3, Y21 // 62c1452bfa2e
+ VPSUBD -7(DI)(R8*8), Y7, K3, Y21 // 62a1452bfaacc7f9ffffff
+ VPSUBD Z3, Z26, K7, Z13 // 62712d47faeb
+ VPSUBD Z0, Z26, K7, Z13 // 62712d47fae8
+ VPSUBD -7(CX)(DX*1), Z26, K7, Z13 // 62712d47faac11f9ffffff
+ VPSUBD -15(R14)(R15*4), Z26, K7, Z13 // 62112d47faacbef1ffffff
+ VPSUBD Z3, Z3, K7, Z13 // 6271654ffaeb
+ VPSUBD Z0, Z3, K7, Z13 // 6271654ffae8
+ VPSUBD -7(CX)(DX*1), Z3, K7, Z13 // 6271654ffaac11f9ffffff
+ VPSUBD -15(R14)(R15*4), Z3, K7, Z13 // 6211654ffaacbef1ffffff
+ VPSUBD Z3, Z26, K7, Z21 // 62e12d47faeb
+ VPSUBD Z0, Z26, K7, Z21 // 62e12d47fae8
+ VPSUBD -7(CX)(DX*1), Z26, K7, Z21 // 62e12d47faac11f9ffffff
+ VPSUBD -15(R14)(R15*4), Z26, K7, Z21 // 62812d47faacbef1ffffff
+ VPSUBD Z3, Z3, K7, Z21 // 62e1654ffaeb
+ VPSUBD Z0, Z3, K7, Z21 // 62e1654ffae8
+ VPSUBD -7(CX)(DX*1), Z3, K7, Z21 // 62e1654ffaac11f9ffffff
+ VPSUBD -15(R14)(R15*4), Z3, K7, Z21 // 6281654ffaacbef1ffffff
+ VPSUBQ X16, X0, K4, X15 // 6231fd0cfbf8
+ VPSUBQ -17(BP)(SI*8), X0, K4, X15 // 6271fd0cfbbcf5efffffff
+ VPSUBQ (R15), X0, K4, X15 // 6251fd0cfb3f
+ VPSUBQ Y24, Y18, K4, Y13 // 6211ed24fbe8
+ VPSUBQ 99(R15)(R15*4), Y18, K4, Y13 // 6211ed24fbacbf63000000
+ VPSUBQ 15(DX), Y18, K4, Y13 // 6271ed24fbaa0f000000
+ VPSUBQ Z3, Z11, K7, Z21 // 62e1a54ffbeb
+ VPSUBQ Z12, Z11, K7, Z21 // 62c1a54ffbec
+ VPSUBQ 15(DX)(BX*1), Z11, K7, Z21 // 62e1a54ffbac1a0f000000
+ VPSUBQ -7(CX)(DX*2), Z11, K7, Z21 // 62e1a54ffbac51f9ffffff
+ VPSUBQ Z3, Z25, K7, Z21 // 62e1b547fbeb
+ VPSUBQ Z12, Z25, K7, Z21 // 62c1b547fbec
+ VPSUBQ 15(DX)(BX*1), Z25, K7, Z21 // 62e1b547fbac1a0f000000
+ VPSUBQ -7(CX)(DX*2), Z25, K7, Z21 // 62e1b547fbac51f9ffffff
+ VPSUBQ Z3, Z11, K7, Z13 // 6271a54ffbeb
+ VPSUBQ Z12, Z11, K7, Z13 // 6251a54ffbec
+ VPSUBQ 15(DX)(BX*1), Z11, K7, Z13 // 6271a54ffbac1a0f000000
+ VPSUBQ -7(CX)(DX*2), Z11, K7, Z13 // 6271a54ffbac51f9ffffff
+ VPSUBQ Z3, Z25, K7, Z13 // 6271b547fbeb
+ VPSUBQ Z12, Z25, K7, Z13 // 6251b547fbec
+ VPSUBQ 15(DX)(BX*1), Z25, K7, Z13 // 6271b547fbac1a0f000000
+ VPSUBQ -7(CX)(DX*2), Z25, K7, Z13 // 6271b547fbac51f9ffffff
+ VPTERNLOGD $42, X5, X14, K1, X12 // 62730d0925e52a
+ VPTERNLOGD $42, (AX), X14, K1, X12 // 62730d0925202a
+ VPTERNLOGD $42, 7(SI), X14, K1, X12 // 62730d0925a6070000002a
+ VPTERNLOGD $79, Y12, Y26, K1, Y11 // 62532d2125dc4f
+ VPTERNLOGD $79, 17(SP)(BP*2), Y26, K1, Y11 // 62732d21259c6c110000004f
+ VPTERNLOGD $79, -7(DI)(R8*4), Y26, K1, Y11 // 62332d21259c87f9ffffff4f
+ VPTERNLOGD $64, Z0, Z7, K7, Z3 // 62f3454f25d840
+ VPTERNLOGD $64, Z6, Z7, K7, Z3 // 62f3454f25de40
+ VPTERNLOGD $64, 7(AX)(CX*4), Z7, K7, Z3 // 62f3454f259c880700000040
+ VPTERNLOGD $64, 7(AX)(CX*1), Z7, K7, Z3 // 62f3454f259c080700000040
+ VPTERNLOGD $64, Z0, Z9, K7, Z3 // 62f3354f25d840
+ VPTERNLOGD $64, Z6, Z9, K7, Z3 // 62f3354f25de40
+ VPTERNLOGD $64, 7(AX)(CX*4), Z9, K7, Z3 // 62f3354f259c880700000040
+ VPTERNLOGD $64, 7(AX)(CX*1), Z9, K7, Z3 // 62f3354f259c080700000040
+ VPTERNLOGD $64, Z0, Z7, K7, Z27 // 6263454f25d840
+ VPTERNLOGD $64, Z6, Z7, K7, Z27 // 6263454f25de40
+ VPTERNLOGD $64, 7(AX)(CX*4), Z7, K7, Z27 // 6263454f259c880700000040
+ VPTERNLOGD $64, 7(AX)(CX*1), Z7, K7, Z27 // 6263454f259c080700000040
+ VPTERNLOGD $64, Z0, Z9, K7, Z27 // 6263354f25d840
+ VPTERNLOGD $64, Z6, Z9, K7, Z27 // 6263354f25de40
+ VPTERNLOGD $64, 7(AX)(CX*4), Z9, K7, Z27 // 6263354f259c880700000040
+ VPTERNLOGD $64, 7(AX)(CX*1), Z9, K7, Z27 // 6263354f259c080700000040
+ VPTERNLOGQ $27, X8, X15, K2, X17 // 62c3850a25c81b
+ VPTERNLOGQ $27, (BX), X15, K2, X17 // 62e3850a250b1b
+ VPTERNLOGQ $27, -17(BP)(SI*1), X15, K2, X17 // 62e3850a258c35efffffff1b
+ VPTERNLOGQ $47, Y31, Y18, K4, Y14 // 6213ed2425f72f
+ VPTERNLOGQ $47, 15(R8), Y18, K4, Y14 // 6253ed2425b00f0000002f
+ VPTERNLOGQ $47, (BP), Y18, K4, Y14 // 6273ed242575002f
+ VPTERNLOGQ $82, Z9, Z3, K1, Z20 // 62c3e54925e152
+ VPTERNLOGQ $82, Z19, Z3, K1, Z20 // 62a3e54925e352
+ VPTERNLOGQ $82, (SI), Z3, K1, Z20 // 62e3e549252652
+ VPTERNLOGQ $82, 7(SI)(DI*2), Z3, K1, Z20 // 62e3e54925a47e0700000052
+ VPTERNLOGQ $82, Z9, Z30, K1, Z20 // 62c38d4125e152
+ VPTERNLOGQ $82, Z19, Z30, K1, Z20 // 62a38d4125e352
+ VPTERNLOGQ $82, (SI), Z30, K1, Z20 // 62e38d41252652
+ VPTERNLOGQ $82, 7(SI)(DI*2), Z30, K1, Z20 // 62e38d4125a47e0700000052
+ VPTERNLOGQ $82, Z9, Z3, K1, Z28 // 6243e54925e152
+ VPTERNLOGQ $82, Z19, Z3, K1, Z28 // 6223e54925e352
+ VPTERNLOGQ $82, (SI), Z3, K1, Z28 // 6263e549252652
+ VPTERNLOGQ $82, 7(SI)(DI*2), Z3, K1, Z28 // 6263e54925a47e0700000052
+ VPTERNLOGQ $82, Z9, Z30, K1, Z28 // 62438d4125e152
+ VPTERNLOGQ $82, Z19, Z30, K1, Z28 // 62238d4125e352
+ VPTERNLOGQ $82, (SI), Z30, K1, Z28 // 62638d41252652
+ VPTERNLOGQ $82, 7(SI)(DI*2), Z30, K1, Z28 // 62638d4125a47e0700000052
+ VPTESTMD X13, X23, K7, K4 // 62d2450727e5
+ VPTESTMD (R8), X23, K7, K4 // 62d245072720
+ VPTESTMD 15(DX)(BX*2), X23, K7, K4 // 62f2450727a45a0f000000
+ VPTESTMD X13, X23, K7, K6 // 62d2450727f5
+ VPTESTMD (R8), X23, K7, K6 // 62d245072730
+ VPTESTMD 15(DX)(BX*2), X23, K7, K6 // 62f2450727b45a0f000000
+ VPTESTMD Y2, Y24, K7, K0 // 62f23d2727c2
+ VPTESTMD -15(R14)(R15*1), Y24, K7, K0 // 62923d2727843ef1ffffff
+ VPTESTMD -15(BX), Y24, K7, K0 // 62f23d272783f1ffffff
+ VPTESTMD Y2, Y24, K7, K7 // 62f23d2727fa
+ VPTESTMD -15(R14)(R15*1), Y24, K7, K7 // 62923d2727bc3ef1ffffff
+ VPTESTMD -15(BX), Y24, K7, K7 // 62f23d2727bbf1ffffff
+ VPTESTMD Z2, Z18, K6, K5 // 62f26d4627ea
+ VPTESTMD Z21, Z18, K6, K5 // 62b26d4627ed
+ VPTESTMD 7(SI)(DI*4), Z18, K6, K5 // 62f26d4627acbe07000000
+ VPTESTMD -7(DI)(R8*2), Z18, K6, K5 // 62b26d4627ac47f9ffffff
+ VPTESTMD Z2, Z24, K6, K5 // 62f23d4627ea
+ VPTESTMD Z21, Z24, K6, K5 // 62b23d4627ed
+ VPTESTMD 7(SI)(DI*4), Z24, K6, K5 // 62f23d4627acbe07000000
+ VPTESTMD -7(DI)(R8*2), Z24, K6, K5 // 62b23d4627ac47f9ffffff
+ VPTESTMD Z2, Z18, K6, K4 // 62f26d4627e2
+ VPTESTMD Z21, Z18, K6, K4 // 62b26d4627e5
+ VPTESTMD 7(SI)(DI*4), Z18, K6, K4 // 62f26d4627a4be07000000
+ VPTESTMD -7(DI)(R8*2), Z18, K6, K4 // 62b26d4627a447f9ffffff
+ VPTESTMD Z2, Z24, K6, K4 // 62f23d4627e2
+ VPTESTMD Z21, Z24, K6, K4 // 62b23d4627e5
+ VPTESTMD 7(SI)(DI*4), Z24, K6, K4 // 62f23d4627a4be07000000
+ VPTESTMD -7(DI)(R8*2), Z24, K6, K4 // 62b23d4627a447f9ffffff
+ VPTESTMQ X24, X28, K3, K4 // 62929d0327e0
+ VPTESTMQ 17(SP)(BP*1), X28, K3, K4 // 62f29d0327a42c11000000
+ VPTESTMQ -7(CX)(DX*8), X28, K3, K4 // 62f29d0327a4d1f9ffffff
+ VPTESTMQ X24, X28, K3, K6 // 62929d0327f0
+ VPTESTMQ 17(SP)(BP*1), X28, K3, K6 // 62f29d0327b42c11000000
+ VPTESTMQ -7(CX)(DX*8), X28, K3, K6 // 62f29d0327b4d1f9ffffff
+ VPTESTMQ Y21, Y7, K7, K1 // 62b2c52f27cd
+ VPTESTMQ 7(AX)(CX*4), Y7, K7, K1 // 62f2c52f278c8807000000
+ VPTESTMQ 7(AX)(CX*1), Y7, K7, K1 // 62f2c52f278c0807000000
+ VPTESTMQ Y21, Y7, K7, K3 // 62b2c52f27dd
+ VPTESTMQ 7(AX)(CX*4), Y7, K7, K3 // 62f2c52f279c8807000000
+ VPTESTMQ 7(AX)(CX*1), Y7, K7, K3 // 62f2c52f279c0807000000
+ VPTESTMQ Z6, Z7, K4, K6 // 62f2c54c27f6
+ VPTESTMQ Z16, Z7, K4, K6 // 62b2c54c27f0
+ VPTESTMQ 17(SP), Z7, K4, K6 // 62f2c54c27b42411000000
+ VPTESTMQ -17(BP)(SI*4), Z7, K4, K6 // 62f2c54c27b4b5efffffff
+ VPTESTMQ Z6, Z13, K4, K6 // 62f2954c27f6
+ VPTESTMQ Z16, Z13, K4, K6 // 62b2954c27f0
+ VPTESTMQ 17(SP), Z13, K4, K6 // 62f2954c27b42411000000
+ VPTESTMQ -17(BP)(SI*4), Z13, K4, K6 // 62f2954c27b4b5efffffff
+ VPTESTMQ Z6, Z7, K4, K7 // 62f2c54c27fe
+ VPTESTMQ Z16, Z7, K4, K7 // 62b2c54c27f8
+ VPTESTMQ 17(SP), Z7, K4, K7 // 62f2c54c27bc2411000000
+ VPTESTMQ -17(BP)(SI*4), Z7, K4, K7 // 62f2c54c27bcb5efffffff
+ VPTESTMQ Z6, Z13, K4, K7 // 62f2954c27fe
+ VPTESTMQ Z16, Z13, K4, K7 // 62b2954c27f8
+ VPTESTMQ 17(SP), Z13, K4, K7 // 62f2954c27bc2411000000
+ VPTESTMQ -17(BP)(SI*4), Z13, K4, K7 // 62f2954c27bcb5efffffff
+ VPTESTNMD X1, X21, K2, K1 // 62f2560227c9
+ VPTESTNMD (R14), X21, K2, K1 // 62d25602270e
+ VPTESTNMD -7(DI)(R8*8), X21, K2, K1 // 62b25602278cc7f9ffffff
+ VPTESTNMD X1, X21, K2, K5 // 62f2560227e9
+ VPTESTNMD (R14), X21, K2, K5 // 62d25602272e
+ VPTESTNMD -7(DI)(R8*8), X21, K2, K5 // 62b2560227acc7f9ffffff
+ VPTESTNMD Y1, Y24, K2, K3 // 62f23e2227d9
+ VPTESTNMD 7(SI)(DI*4), Y24, K2, K3 // 62f23e22279cbe07000000
+ VPTESTNMD -7(DI)(R8*2), Y24, K2, K3 // 62b23e22279c47f9ffffff
+ VPTESTNMD Y1, Y24, K2, K1 // 62f23e2227c9
+ VPTESTNMD 7(SI)(DI*4), Y24, K2, K1 // 62f23e22278cbe07000000
+ VPTESTNMD -7(DI)(R8*2), Y24, K2, K1 // 62b23e22278c47f9ffffff
+ VPTESTNMD Z2, Z22, K3, K5 // 62f24e4327ea
+ VPTESTNMD Z31, Z22, K3, K5 // 62924e4327ef
+ VPTESTNMD -17(BP)(SI*8), Z22, K3, K5 // 62f24e4327acf5efffffff
+ VPTESTNMD (R15), Z22, K3, K5 // 62d24e43272f
+ VPTESTNMD Z2, Z7, K3, K5 // 62f2464b27ea
+ VPTESTNMD Z31, Z7, K3, K5 // 6292464b27ef
+ VPTESTNMD -17(BP)(SI*8), Z7, K3, K5 // 62f2464b27acf5efffffff
+ VPTESTNMD (R15), Z7, K3, K5 // 62d2464b272f
+ VPTESTNMD Z2, Z22, K3, K4 // 62f24e4327e2
+ VPTESTNMD Z31, Z22, K3, K4 // 62924e4327e7
+ VPTESTNMD -17(BP)(SI*8), Z22, K3, K4 // 62f24e4327a4f5efffffff
+ VPTESTNMD (R15), Z22, K3, K4 // 62d24e432727
+ VPTESTNMD Z2, Z7, K3, K4 // 62f2464b27e2
+ VPTESTNMD Z31, Z7, K3, K4 // 6292464b27e7
+ VPTESTNMD -17(BP)(SI*8), Z7, K3, K4 // 62f2464b27a4f5efffffff
+ VPTESTNMD (R15), Z7, K3, K4 // 62d2464b2727
+ VPTESTNMQ X31, X11, K3, K7 // 6292a60b27ff
+ VPTESTNMQ 99(R15)(R15*4), X11, K3, K7 // 6292a60b27bcbf63000000
+ VPTESTNMQ 15(DX), X11, K3, K7 // 62f2a60b27ba0f000000
+ VPTESTNMQ X31, X11, K3, K6 // 6292a60b27f7
+ VPTESTNMQ 99(R15)(R15*4), X11, K3, K6 // 6292a60b27b4bf63000000
+ VPTESTNMQ 15(DX), X11, K3, K6 // 62f2a60b27b20f000000
+ VPTESTNMQ Y18, Y5, K3, K4 // 62b2d62b27e2
+ VPTESTNMQ 17(SP), Y5, K3, K4 // 62f2d62b27a42411000000
+ VPTESTNMQ -17(BP)(SI*4), Y5, K3, K4 // 62f2d62b27a4b5efffffff
+ VPTESTNMQ Y18, Y5, K3, K6 // 62b2d62b27f2
+ VPTESTNMQ 17(SP), Y5, K3, K6 // 62f2d62b27b42411000000
+ VPTESTNMQ -17(BP)(SI*4), Y5, K3, K6 // 62f2d62b27b4b5efffffff
+ VPTESTNMQ Z1, Z20, K2, K0 // 62f2de4227c1
+ VPTESTNMQ Z3, Z20, K2, K0 // 62f2de4227c3
+ VPTESTNMQ 7(SI)(DI*8), Z20, K2, K0 // 62f2de422784fe07000000
+ VPTESTNMQ -15(R14), Z20, K2, K0 // 62d2de422786f1ffffff
+ VPTESTNMQ Z1, Z9, K2, K0 // 62f2b64a27c1
+ VPTESTNMQ Z3, Z9, K2, K0 // 62f2b64a27c3
+ VPTESTNMQ 7(SI)(DI*8), Z9, K2, K0 // 62f2b64a2784fe07000000
+ VPTESTNMQ -15(R14), Z9, K2, K0 // 62d2b64a2786f1ffffff
+ VPTESTNMQ Z1, Z20, K2, K7 // 62f2de4227f9
+ VPTESTNMQ Z3, Z20, K2, K7 // 62f2de4227fb
+ VPTESTNMQ 7(SI)(DI*8), Z20, K2, K7 // 62f2de4227bcfe07000000
+ VPTESTNMQ -15(R14), Z20, K2, K7 // 62d2de4227bef1ffffff
+ VPTESTNMQ Z1, Z9, K2, K7 // 62f2b64a27f9
+ VPTESTNMQ Z3, Z9, K2, K7 // 62f2b64a27fb
+ VPTESTNMQ 7(SI)(DI*8), Z9, K2, K7 // 62f2b64a27bcfe07000000
+ VPTESTNMQ -15(R14), Z9, K2, K7 // 62d2b64a27bef1ffffff
+ VPUNPCKHDQ X9, X7, K1, X20 // 62c145096ae1
+ VPUNPCKHDQ -7(CX)(DX*1), X7, K1, X20 // 62e145096aa411f9ffffff
+ VPUNPCKHDQ -15(R14)(R15*4), X7, K1, X20 // 628145096aa4bef1ffffff
+ VPUNPCKHDQ Y11, Y8, K7, Y1 // 62d13d2f6acb
+ VPUNPCKHDQ -17(BP)(SI*8), Y8, K7, Y1 // 62f13d2f6a8cf5efffffff
+ VPUNPCKHDQ (R15), Y8, K7, Y1 // 62d13d2f6a0f
+ VPUNPCKHDQ Z3, Z5, K2, Z19 // 62e1554a6adb
+ VPUNPCKHDQ Z5, Z5, K2, Z19 // 62e1554a6add
+ VPUNPCKHDQ -7(CX), Z5, K2, Z19 // 62e1554a6a99f9ffffff
+ VPUNPCKHDQ 15(DX)(BX*4), Z5, K2, Z19 // 62e1554a6a9c9a0f000000
+ VPUNPCKHDQ Z3, Z1, K2, Z19 // 62e1754a6adb
+ VPUNPCKHDQ Z5, Z1, K2, Z19 // 62e1754a6add
+ VPUNPCKHDQ -7(CX), Z1, K2, Z19 // 62e1754a6a99f9ffffff
+ VPUNPCKHDQ 15(DX)(BX*4), Z1, K2, Z19 // 62e1754a6a9c9a0f000000
+ VPUNPCKHDQ Z3, Z5, K2, Z15 // 6271554a6afb
+ VPUNPCKHDQ Z5, Z5, K2, Z15 // 6271554a6afd
+ VPUNPCKHDQ -7(CX), Z5, K2, Z15 // 6271554a6ab9f9ffffff
+ VPUNPCKHDQ 15(DX)(BX*4), Z5, K2, Z15 // 6271554a6abc9a0f000000
+ VPUNPCKHDQ Z3, Z1, K2, Z15 // 6271754a6afb
+ VPUNPCKHDQ Z5, Z1, K2, Z15 // 6271754a6afd
+ VPUNPCKHDQ -7(CX), Z1, K2, Z15 // 6271754a6ab9f9ffffff
+ VPUNPCKHDQ 15(DX)(BX*4), Z1, K2, Z15 // 6271754a6abc9a0f000000
+ VPUNPCKHQDQ X5, X14, K4, X7 // 62f18d0c6dfd
+ VPUNPCKHQDQ 15(DX)(BX*1), X14, K4, X7 // 62f18d0c6dbc1a0f000000
+ VPUNPCKHQDQ -7(CX)(DX*2), X14, K4, X7 // 62f18d0c6dbc51f9ffffff
+ VPUNPCKHQDQ Y16, Y17, K1, Y27 // 6221f5216dd8
+ VPUNPCKHQDQ 7(SI)(DI*8), Y17, K1, Y27 // 6261f5216d9cfe07000000
+ VPUNPCKHQDQ -15(R14), Y17, K1, Y27 // 6241f5216d9ef1ffffff
+ VPUNPCKHQDQ Z16, Z21, K3, Z14 // 6231d5436df0
+ VPUNPCKHQDQ Z9, Z21, K3, Z14 // 6251d5436df1
+ VPUNPCKHQDQ 99(R15)(R15*8), Z21, K3, Z14 // 6211d5436db4ff63000000
+ VPUNPCKHQDQ 7(AX)(CX*8), Z21, K3, Z14 // 6271d5436db4c807000000
+ VPUNPCKHQDQ Z16, Z8, K3, Z14 // 6231bd4b6df0
+ VPUNPCKHQDQ Z9, Z8, K3, Z14 // 6251bd4b6df1
+ VPUNPCKHQDQ 99(R15)(R15*8), Z8, K3, Z14 // 6211bd4b6db4ff63000000
+ VPUNPCKHQDQ 7(AX)(CX*8), Z8, K3, Z14 // 6271bd4b6db4c807000000
+ VPUNPCKHQDQ Z16, Z21, K3, Z15 // 6231d5436df8
+ VPUNPCKHQDQ Z9, Z21, K3, Z15 // 6251d5436df9
+ VPUNPCKHQDQ 99(R15)(R15*8), Z21, K3, Z15 // 6211d5436dbcff63000000
+ VPUNPCKHQDQ 7(AX)(CX*8), Z21, K3, Z15 // 6271d5436dbcc807000000
+ VPUNPCKHQDQ Z16, Z8, K3, Z15 // 6231bd4b6df8
+ VPUNPCKHQDQ Z9, Z8, K3, Z15 // 6251bd4b6df9
+ VPUNPCKHQDQ 99(R15)(R15*8), Z8, K3, Z15 // 6211bd4b6dbcff63000000
+ VPUNPCKHQDQ 7(AX)(CX*8), Z8, K3, Z15 // 6271bd4b6dbcc807000000
+ VPUNPCKLDQ X16, X30, K7, X0 // 62b10d0762c0
+ VPUNPCKLDQ 15(R8), X30, K7, X0 // 62d10d0762800f000000
+ VPUNPCKLDQ (BP), X30, K7, X0 // 62f10d07624500
+ VPUNPCKLDQ Y14, Y23, K4, Y1 // 62d1452462ce
+ VPUNPCKLDQ -7(CX), Y23, K4, Y1 // 62f145246289f9ffffff
+ VPUNPCKLDQ 15(DX)(BX*4), Y23, K4, Y1 // 62f14524628c9a0f000000
+ VPUNPCKLDQ Z9, Z9, K4, Z9 // 6251354c62c9
+ VPUNPCKLDQ Z28, Z9, K4, Z9 // 6211354c62cc
+ VPUNPCKLDQ 15(R8)(R14*4), Z9, K4, Z9 // 6211354c628cb00f000000
+ VPUNPCKLDQ -7(CX)(DX*4), Z9, K4, Z9 // 6271354c628c91f9ffffff
+ VPUNPCKLDQ Z9, Z25, K4, Z9 // 6251354462c9
+ VPUNPCKLDQ Z28, Z25, K4, Z9 // 6211354462cc
+ VPUNPCKLDQ 15(R8)(R14*4), Z25, K4, Z9 // 62113544628cb00f000000
+ VPUNPCKLDQ -7(CX)(DX*4), Z25, K4, Z9 // 62713544628c91f9ffffff
+ VPUNPCKLDQ Z9, Z9, K4, Z3 // 62d1354c62d9
+ VPUNPCKLDQ Z28, Z9, K4, Z3 // 6291354c62dc
+ VPUNPCKLDQ 15(R8)(R14*4), Z9, K4, Z3 // 6291354c629cb00f000000
+ VPUNPCKLDQ -7(CX)(DX*4), Z9, K4, Z3 // 62f1354c629c91f9ffffff
+ VPUNPCKLDQ Z9, Z25, K4, Z3 // 62d1354462d9
+ VPUNPCKLDQ Z28, Z25, K4, Z3 // 6291354462dc
+ VPUNPCKLDQ 15(R8)(R14*4), Z25, K4, Z3 // 62913544629cb00f000000
+ VPUNPCKLDQ -7(CX)(DX*4), Z25, K4, Z3 // 62f13544629c91f9ffffff
+ VPUNPCKLQDQ X14, X11, K7, X14 // 6251a50f6cf6
+ VPUNPCKLQDQ 15(R8)(R14*8), X11, K7, X14 // 6211a50f6cb4f00f000000
+ VPUNPCKLQDQ -15(R14)(R15*2), X11, K7, X14 // 6211a50f6cb47ef1ffffff
+ VPUNPCKLQDQ Y2, Y25, K2, Y31 // 6261b5226cfa
+ VPUNPCKLQDQ 99(R15)(R15*8), Y25, K2, Y31 // 6201b5226cbcff63000000
+ VPUNPCKLQDQ 7(AX)(CX*8), Y25, K2, Y31 // 6261b5226cbcc807000000
+ VPUNPCKLQDQ Z17, Z17, K5, Z20 // 62a1f5456ce1
+ VPUNPCKLQDQ Z23, Z17, K5, Z20 // 62a1f5456ce7
+ VPUNPCKLQDQ (R8), Z17, K5, Z20 // 62c1f5456c20
+ VPUNPCKLQDQ 15(DX)(BX*2), Z17, K5, Z20 // 62e1f5456ca45a0f000000
+ VPUNPCKLQDQ Z17, Z0, K5, Z20 // 62a1fd4d6ce1
+ VPUNPCKLQDQ Z23, Z0, K5, Z20 // 62a1fd4d6ce7
+ VPUNPCKLQDQ (R8), Z0, K5, Z20 // 62c1fd4d6c20
+ VPUNPCKLQDQ 15(DX)(BX*2), Z0, K5, Z20 // 62e1fd4d6ca45a0f000000
+ VPUNPCKLQDQ Z17, Z17, K5, Z0 // 62b1f5456cc1
+ VPUNPCKLQDQ Z23, Z17, K5, Z0 // 62b1f5456cc7
+ VPUNPCKLQDQ (R8), Z17, K5, Z0 // 62d1f5456c00
+ VPUNPCKLQDQ 15(DX)(BX*2), Z17, K5, Z0 // 62f1f5456c845a0f000000
+ VPUNPCKLQDQ Z17, Z0, K5, Z0 // 62b1fd4d6cc1
+ VPUNPCKLQDQ Z23, Z0, K5, Z0 // 62b1fd4d6cc7
+ VPUNPCKLQDQ (R8), Z0, K5, Z0 // 62d1fd4d6c00
+ VPUNPCKLQDQ 15(DX)(BX*2), Z0, K5, Z0 // 62f1fd4d6c845a0f000000
+ VPXORD X12, X23, K2, X26 // 62414502efd4
+ VPXORD 7(AX)(CX*4), X23, K2, X26 // 62614502ef948807000000
+ VPXORD 7(AX)(CX*1), X23, K2, X26 // 62614502ef940807000000
+ VPXORD Y9, Y22, K3, Y9 // 62514d23efc9
+ VPXORD (BX), Y22, K3, Y9 // 62714d23ef0b
+ VPXORD -17(BP)(SI*1), Y22, K3, Y9 // 62714d23ef8c35efffffff
+ VPXORD Z30, Z20, K3, Z1 // 62915d43efce
+ VPXORD Z5, Z20, K3, Z1 // 62f15d43efcd
+ VPXORD -17(BP)(SI*2), Z20, K3, Z1 // 62f15d43ef8c75efffffff
+ VPXORD 7(AX)(CX*2), Z20, K3, Z1 // 62f15d43ef8c4807000000
+ VPXORD Z30, Z9, K3, Z1 // 6291354befce
+ VPXORD Z5, Z9, K3, Z1 // 62f1354befcd
+ VPXORD -17(BP)(SI*2), Z9, K3, Z1 // 62f1354bef8c75efffffff
+ VPXORD 7(AX)(CX*2), Z9, K3, Z1 // 62f1354bef8c4807000000
+ VPXORD Z30, Z20, K3, Z9 // 62115d43efce
+ VPXORD Z5, Z20, K3, Z9 // 62715d43efcd
+ VPXORD -17(BP)(SI*2), Z20, K3, Z9 // 62715d43ef8c75efffffff
+ VPXORD 7(AX)(CX*2), Z20, K3, Z9 // 62715d43ef8c4807000000
+ VPXORD Z30, Z9, K3, Z9 // 6211354befce
+ VPXORD Z5, Z9, K3, Z9 // 6271354befcd
+ VPXORD -17(BP)(SI*2), Z9, K3, Z9 // 6271354bef8c75efffffff
+ VPXORD 7(AX)(CX*2), Z9, K3, Z9 // 6271354bef8c4807000000
+ VPXORQ X23, X23, K3, X16 // 62a1c503efc7
+ VPXORQ (SI), X23, K3, X16 // 62e1c503ef06
+ VPXORQ 7(SI)(DI*2), X23, K3, X16 // 62e1c503ef847e07000000
+ VPXORQ Y6, Y1, K2, Y14 // 6271f52aeff6
+ VPXORQ 15(R8)(R14*4), Y1, K2, Y14 // 6211f52aefb4b00f000000
+ VPXORQ -7(CX)(DX*4), Y1, K2, Y14 // 6271f52aefb491f9ffffff
+ VPXORQ Z16, Z7, K1, Z26 // 6221c549efd0
+ VPXORQ Z25, Z7, K1, Z26 // 6201c549efd1
+ VPXORQ 15(R8)(R14*1), Z7, K1, Z26 // 6201c549ef94300f000000
+ VPXORQ 15(R8)(R14*2), Z7, K1, Z26 // 6201c549ef94700f000000
+ VPXORQ Z16, Z21, K1, Z26 // 6221d541efd0
+ VPXORQ Z25, Z21, K1, Z26 // 6201d541efd1
+ VPXORQ 15(R8)(R14*1), Z21, K1, Z26 // 6201d541ef94300f000000
+ VPXORQ 15(R8)(R14*2), Z21, K1, Z26 // 6201d541ef94700f000000
+ VPXORQ Z16, Z7, K1, Z22 // 62a1c549eff0
+ VPXORQ Z25, Z7, K1, Z22 // 6281c549eff1
+ VPXORQ 15(R8)(R14*1), Z7, K1, Z22 // 6281c549efb4300f000000
+ VPXORQ 15(R8)(R14*2), Z7, K1, Z22 // 6281c549efb4700f000000
+ VPXORQ Z16, Z21, K1, Z22 // 62a1d541eff0
+ VPXORQ Z25, Z21, K1, Z22 // 6281d541eff1
+ VPXORQ 15(R8)(R14*1), Z21, K1, Z22 // 6281d541efb4300f000000
+ VPXORQ 15(R8)(R14*2), Z21, K1, Z22 // 6281d541efb4700f000000
+ VRCP14PD X11, K5, X31 // 6242fd0d4cfb
+ VRCP14PD 17(SP), K5, X31 // 6262fd0d4cbc2411000000
+ VRCP14PD -17(BP)(SI*4), K5, X31 // 6262fd0d4cbcb5efffffff
+ VRCP14PD Y23, K7, Y9 // 6232fd2f4ccf
+ VRCP14PD -17(BP)(SI*2), K7, Y9 // 6272fd2f4c8c75efffffff
+ VRCP14PD 7(AX)(CX*2), K7, Y9 // 6272fd2f4c8c4807000000
+ VRCP14PD Z0, K7, Z6 // 62f2fd4f4cf0
+ VRCP14PD Z8, K7, Z6 // 62d2fd4f4cf0
+ VRCP14PD (CX), K7, Z6 // 62f2fd4f4c31
+ VRCP14PD 99(R15), K7, Z6 // 62d2fd4f4cb763000000
+ VRCP14PD Z0, K7, Z2 // 62f2fd4f4cd0
+ VRCP14PD Z8, K7, Z2 // 62d2fd4f4cd0
+ VRCP14PD (CX), K7, Z2 // 62f2fd4f4c11
+ VRCP14PD 99(R15), K7, Z2 // 62d2fd4f4c9763000000
+ VRCP14PS X5, K6, X22 // 62e27d0e4cf5
+ VRCP14PS 7(AX), K6, X22 // 62e27d0e4cb007000000
+ VRCP14PS (DI), K6, X22 // 62e27d0e4c37
+ VRCP14PS Y5, K3, Y31 // 62627d2b4cfd
+ VRCP14PS 15(R8)(R14*1), K3, Y31 // 62027d2b4cbc300f000000
+ VRCP14PS 15(R8)(R14*2), K3, Y31 // 62027d2b4cbc700f000000
+ VRCP14PS Z14, K7, Z15 // 62527d4f4cfe
+ VRCP14PS Z27, K7, Z15 // 62127d4f4cfb
+ VRCP14PS 99(R15)(R15*2), K7, Z15 // 62127d4f4cbc7f63000000
+ VRCP14PS -7(DI), K7, Z15 // 62727d4f4cbff9ffffff
+ VRCP14PS Z14, K7, Z12 // 62527d4f4ce6
+ VRCP14PS Z27, K7, Z12 // 62127d4f4ce3
+ VRCP14PS 99(R15)(R15*2), K7, Z12 // 62127d4f4ca47f63000000
+ VRCP14PS -7(DI), K7, Z12 // 62727d4f4ca7f9ffffff
+ VRCP14SD X17, X0, K4, X14 // 6232fd0c4df1 or 6232fd2c4df1 or 6232fd4c4df1
+ VRCP14SD (SI), X0, K4, X14 // 6272fd0c4d36 or 6272fd2c4d36 or 6272fd4c4d36
+ VRCP14SD 7(SI)(DI*2), X0, K4, X14 // 6272fd0c4db47e07000000 or 6272fd2c4db47e07000000 or 6272fd4c4db47e07000000
+ VRCP14SS X11, X15, K4, X7 // 62d2050c4dfb or 62d2052c4dfb or 62d2054c4dfb
+ VRCP14SS -7(DI)(R8*1), X15, K4, X7 // 62b2050c4dbc07f9ffffff or 62b2052c4dbc07f9ffffff or 62b2054c4dbc07f9ffffff
+ VRCP14SS (SP), X15, K4, X7 // 62f2050c4d3c24 or 62f2052c4d3c24 or 62f2054c4d3c24
+ VRNDSCALEPD $64, X16, K4, X20 // 62a3fd0c09e040
+ VRNDSCALEPD $64, 7(SI)(DI*8), K4, X20 // 62e3fd0c09a4fe0700000040
+ VRNDSCALEPD $64, -15(R14), K4, X20 // 62c3fd0c09a6f1ffffff40
+ VRNDSCALEPD $27, Y2, K1, Y28 // 6263fd2909e21b
+ VRNDSCALEPD $27, (CX), K1, Y28 // 6263fd2909211b
+ VRNDSCALEPD $27, 99(R15), K1, Y28 // 6243fd2909a7630000001b
+ VRNDSCALEPD $47, Z21, K3, Z8 // 6233fd4b09c52f
+ VRNDSCALEPD $47, Z5, K3, Z8 // 6273fd4b09c52f
+ VRNDSCALEPD $47, Z21, K3, Z28 // 6223fd4b09e52f
+ VRNDSCALEPD $47, Z5, K3, Z28 // 6263fd4b09e52f
+ VRNDSCALEPD $82, Z12, K4, Z16 // 62c3fd4c09c452
+ VRNDSCALEPD $82, Z27, K4, Z16 // 6283fd4c09c352
+ VRNDSCALEPD $82, 15(R8), K4, Z16 // 62c3fd4c09800f00000052
+ VRNDSCALEPD $82, (BP), K4, Z16 // 62e3fd4c09450052
+ VRNDSCALEPD $82, Z12, K4, Z13 // 6253fd4c09ec52
+ VRNDSCALEPD $82, Z27, K4, Z13 // 6213fd4c09eb52
+ VRNDSCALEPD $82, 15(R8), K4, Z13 // 6253fd4c09a80f00000052
+ VRNDSCALEPD $82, (BP), K4, Z13 // 6273fd4c096d0052
+ VRNDSCALEPS $126, X6, K5, X12 // 62737d0d08e67e
+ VRNDSCALEPS $126, 7(SI)(DI*1), K5, X12 // 62737d0d08a43e070000007e
+ VRNDSCALEPS $126, 15(DX)(BX*8), K5, X12 // 62737d0d08a4da0f0000007e
+ VRNDSCALEPS $94, Y27, K7, Y24 // 62037d2f08c35e
+ VRNDSCALEPS $94, 99(R15)(R15*2), K7, Y24 // 62037d2f08847f630000005e
+ VRNDSCALEPS $94, -7(DI), K7, Y24 // 62637d2f0887f9ffffff5e
+ VRNDSCALEPS $121, Z6, K7, Z22 // 62e37d4f08f679
+ VRNDSCALEPS $121, Z8, K7, Z22 // 62c37d4f08f079
+ VRNDSCALEPS $121, Z6, K7, Z11 // 62737d4f08de79
+ VRNDSCALEPS $121, Z8, K7, Z11 // 62537d4f08d879
+ VRNDSCALEPS $13, Z12, K6, Z25 // 62437d4e08cc0d
+ VRNDSCALEPS $13, Z17, K6, Z25 // 62237d4e08c90d
+ VRNDSCALEPS $13, 15(R8)(R14*8), K6, Z25 // 62037d4e088cf00f0000000d
+ VRNDSCALEPS $13, -15(R14)(R15*2), K6, Z25 // 62037d4e088c7ef1ffffff0d
+ VRNDSCALEPS $13, Z12, K6, Z12 // 62537d4e08e40d
+ VRNDSCALEPS $13, Z17, K6, Z12 // 62337d4e08e10d
+ VRNDSCALEPS $13, 15(R8)(R14*8), K6, Z12 // 62137d4e08a4f00f0000000d
+ VRNDSCALEPS $13, -15(R14)(R15*2), K6, Z12 // 62137d4e08a47ef1ffffff0d
+ VRNDSCALESD $65, X6, X28, K3, X17 // 62e39d030bce41
+ VRNDSCALESD $67, X8, X8, K7, X1 // 62d3bd0f0bc843 or 62d3bd2f0bc843 or 62d3bd4f0bc843
+ VRNDSCALESD $67, 17(SP), X8, K7, X1 // 62f3bd0f0b8c241100000043 or 62f3bd2f0b8c241100000043 or 62f3bd4f0b8c241100000043
+ VRNDSCALESD $67, -17(BP)(SI*4), X8, K7, X1 // 62f3bd0f0b8cb5efffffff43 or 62f3bd2f0b8cb5efffffff43 or 62f3bd4f0b8cb5efffffff43
+ VRNDSCALESS $127, X11, X0, K4, X6 // 62d37d0c0af37f
+ VRNDSCALESS $0, X6, X6, K4, X16 // 62e34d0c0ac600 or 62e34d2c0ac600 or 62e34d4c0ac600
+ VRNDSCALESS $0, (AX), X6, K4, X16 // 62e34d0c0a0000 or 62e34d2c0a0000 or 62e34d4c0a0000
+ VRNDSCALESS $0, 7(SI), X6, K4, X16 // 62e34d0c0a860700000000 or 62e34d2c0a860700000000 or 62e34d4c0a860700000000
+ VRSQRT14PD X12, K7, X22 // 62c2fd0f4ef4
+ VRSQRT14PD -7(DI)(R8*1), K7, X22 // 62a2fd0f4eb407f9ffffff
+ VRSQRT14PD (SP), K7, X22 // 62e2fd0f4e3424
+ VRSQRT14PD Y11, K2, Y0 // 62d2fd2a4ec3
+ VRSQRT14PD -7(CX)(DX*1), K2, Y0 // 62f2fd2a4e8411f9ffffff
+ VRSQRT14PD -15(R14)(R15*4), K2, Y0 // 6292fd2a4e84bef1ffffff
+ VRSQRT14PD Z6, K5, Z9 // 6272fd4d4ece
+ VRSQRT14PD Z25, K5, Z9 // 6212fd4d4ec9
+ VRSQRT14PD -15(R14)(R15*1), K5, Z9 // 6212fd4d4e8c3ef1ffffff
+ VRSQRT14PD -15(BX), K5, Z9 // 6272fd4d4e8bf1ffffff
+ VRSQRT14PD Z6, K5, Z12 // 6272fd4d4ee6
+ VRSQRT14PD Z25, K5, Z12 // 6212fd4d4ee1
+ VRSQRT14PD -15(R14)(R15*1), K5, Z12 // 6212fd4d4ea43ef1ffffff
+ VRSQRT14PD -15(BX), K5, Z12 // 6272fd4d4ea3f1ffffff
+ VRSQRT14PS X28, K3, X16 // 62827d0b4ec4
+ VRSQRT14PS -7(CX), K3, X16 // 62e27d0b4e81f9ffffff
+ VRSQRT14PS 15(DX)(BX*4), K3, X16 // 62e27d0b4e849a0f000000
+ VRSQRT14PS Y3, K4, Y31 // 62627d2c4efb
+ VRSQRT14PS 15(DX)(BX*1), K4, Y31 // 62627d2c4ebc1a0f000000
+ VRSQRT14PS -7(CX)(DX*2), K4, Y31 // 62627d2c4ebc51f9ffffff
+ VRSQRT14PS Z8, K2, Z3 // 62d27d4a4ed8
+ VRSQRT14PS Z2, K2, Z3 // 62f27d4a4eda
+ VRSQRT14PS 7(AX)(CX*4), K2, Z3 // 62f27d4a4e9c8807000000
+ VRSQRT14PS 7(AX)(CX*1), K2, Z3 // 62f27d4a4e9c0807000000
+ VRSQRT14PS Z8, K2, Z21 // 62c27d4a4ee8
+ VRSQRT14PS Z2, K2, Z21 // 62e27d4a4eea
+ VRSQRT14PS 7(AX)(CX*4), K2, Z21 // 62e27d4a4eac8807000000
+ VRSQRT14PS 7(AX)(CX*1), K2, Z21 // 62e27d4a4eac0807000000
+ VRSQRT14SD X11, X15, K2, X8 // 6252850a4fc3 or 6252852a4fc3 or 6252854a4fc3
+ VRSQRT14SD 7(AX), X15, K2, X8 // 6272850a4f8007000000 or 6272852a4f8007000000 or 6272854a4f8007000000
+ VRSQRT14SD (DI), X15, K2, X8 // 6272850a4f07 or 6272852a4f07 or 6272854a4f07
+ VRSQRT14SS X13, X19, K3, X1 // 62d265034fcd or 62d265234fcd or 62d265434fcd
+ VRSQRT14SS (BX), X19, K3, X1 // 62f265034f0b or 62f265234f0b or 62f265434f0b
+ VRSQRT14SS -17(BP)(SI*1), X19, K3, X1 // 62f265034f8c35efffffff or 62f265234f8c35efffffff or 62f265434f8c35efffffff
+ VSCALEFPD X27, X2, K1, X2 // 6292ed092cd3
+ VSCALEFPD 99(R15)(R15*8), X2, K1, X2 // 6292ed092c94ff63000000
+ VSCALEFPD 7(AX)(CX*8), X2, K1, X2 // 62f2ed092c94c807000000
+ VSCALEFPD Y13, Y2, K1, Y14 // 6252ed292cf5
+ VSCALEFPD -17(BP), Y2, K1, Y14 // 6272ed292cb5efffffff
+ VSCALEFPD -15(R14)(R15*8), Y2, K1, Y14 // 6212ed292cb4fef1ffffff
+ VSCALEFPD Z7, Z2, K7, Z18 // 62e2ed4f2cd7
+ VSCALEFPD Z13, Z2, K7, Z18 // 62c2ed4f2cd5
+ VSCALEFPD Z7, Z21, K7, Z18 // 62e2d5472cd7
+ VSCALEFPD Z13, Z21, K7, Z18 // 62c2d5472cd5
+ VSCALEFPD Z7, Z2, K7, Z24 // 6262ed4f2cc7
+ VSCALEFPD Z13, Z2, K7, Z24 // 6242ed4f2cc5
+ VSCALEFPD Z7, Z21, K7, Z24 // 6262d5472cc7
+ VSCALEFPD Z13, Z21, K7, Z24 // 6242d5472cc5
+ VSCALEFPD Z1, Z6, K2, Z6 // 62f2cd4a2cf1
+ VSCALEFPD Z15, Z6, K2, Z6 // 62d2cd4a2cf7
+ VSCALEFPD 7(SI)(DI*4), Z6, K2, Z6 // 62f2cd4a2cb4be07000000
+ VSCALEFPD -7(DI)(R8*2), Z6, K2, Z6 // 62b2cd4a2cb447f9ffffff
+ VSCALEFPD Z1, Z22, K2, Z6 // 62f2cd422cf1
+ VSCALEFPD Z15, Z22, K2, Z6 // 62d2cd422cf7
+ VSCALEFPD 7(SI)(DI*4), Z22, K2, Z6 // 62f2cd422cb4be07000000
+ VSCALEFPD -7(DI)(R8*2), Z22, K2, Z6 // 62b2cd422cb447f9ffffff
+ VSCALEFPD Z1, Z6, K2, Z16 // 62e2cd4a2cc1
+ VSCALEFPD Z15, Z6, K2, Z16 // 62c2cd4a2cc7
+ VSCALEFPD 7(SI)(DI*4), Z6, K2, Z16 // 62e2cd4a2c84be07000000
+ VSCALEFPD -7(DI)(R8*2), Z6, K2, Z16 // 62a2cd4a2c8447f9ffffff
+ VSCALEFPD Z1, Z22, K2, Z16 // 62e2cd422cc1
+ VSCALEFPD Z15, Z22, K2, Z16 // 62c2cd422cc7
+ VSCALEFPD 7(SI)(DI*4), Z22, K2, Z16 // 62e2cd422c84be07000000
+ VSCALEFPD -7(DI)(R8*2), Z22, K2, Z16 // 62a2cd422c8447f9ffffff
+ VSCALEFPS X30, X22, K4, X26 // 62024d042cd6
+ VSCALEFPS (AX), X22, K4, X26 // 62624d042c10
+ VSCALEFPS 7(SI), X22, K4, X26 // 62624d042c9607000000
+ VSCALEFPS Y22, Y15, K1, Y27 // 622205292cde
+ VSCALEFPS 17(SP)(BP*2), Y15, K1, Y27 // 626205292c9c6c11000000
+ VSCALEFPS -7(DI)(R8*4), Y15, K1, Y27 // 622205292c9c87f9ffffff
+ VSCALEFPS Z22, Z18, K3, Z13 // 62326d432cee
+ VSCALEFPS Z7, Z18, K3, Z13 // 62726d432cef
+ VSCALEFPS Z22, Z8, K3, Z13 // 62323d4b2cee
+ VSCALEFPS Z7, Z8, K3, Z13 // 62723d4b2cef
+ VSCALEFPS Z1, Z20, K4, Z2 // 62f25d442cd1
+ VSCALEFPS Z3, Z20, K4, Z2 // 62f25d442cd3
+ VSCALEFPS 17(SP), Z20, K4, Z2 // 62f25d442c942411000000
+ VSCALEFPS -17(BP)(SI*4), Z20, K4, Z2 // 62f25d442c94b5efffffff
+ VSCALEFPS Z1, Z9, K4, Z2 // 62f2354c2cd1
+ VSCALEFPS Z3, Z9, K4, Z2 // 62f2354c2cd3
+ VSCALEFPS 17(SP), Z9, K4, Z2 // 62f2354c2c942411000000
+ VSCALEFPS -17(BP)(SI*4), Z9, K4, Z2 // 62f2354c2c94b5efffffff
+ VSCALEFPS Z1, Z20, K4, Z31 // 62625d442cf9
+ VSCALEFPS Z3, Z20, K4, Z31 // 62625d442cfb
+ VSCALEFPS 17(SP), Z20, K4, Z31 // 62625d442cbc2411000000
+ VSCALEFPS -17(BP)(SI*4), Z20, K4, Z31 // 62625d442cbcb5efffffff
+ VSCALEFPS Z1, Z9, K4, Z31 // 6262354c2cf9
+ VSCALEFPS Z3, Z9, K4, Z31 // 6262354c2cfb
+ VSCALEFPS 17(SP), Z9, K4, Z31 // 6262354c2cbc2411000000
+ VSCALEFPS -17(BP)(SI*4), Z9, K4, Z31 // 6262354c2cbcb5efffffff
+ VSCALEFSD X15, X11, K5, X3 // 62d2a50d2ddf
+ VSCALEFSD X6, X13, K7, X30 // 6262950f2df6 or 6262952f2df6 or 6262954f2df6
+ VSCALEFSD -17(BP)(SI*8), X13, K7, X30 // 6262950f2db4f5efffffff or 6262952f2db4f5efffffff or 6262954f2db4f5efffffff
+ VSCALEFSD (R15), X13, K7, X30 // 6242950f2d37 or 6242952f2d37 or 6242954f2d37
+ VSCALEFSS X30, X23, K7, X12 // 621245072de6
+ VSCALEFSS X2, X20, K6, X8 // 62725d062dc2 or 62725d262dc2 or 62725d462dc2
+ VSCALEFSS (R8), X20, K6, X8 // 62525d062d00 or 62525d262d00 or 62525d462d00
+ VSCALEFSS 15(DX)(BX*2), X20, K6, X8 // 62725d062d845a0f000000 or 62725d262d845a0f000000 or 62725d462d845a0f000000
+ VSCATTERDPD X9, K3, (DX)(X10*4) // 6232fd0ba20c92
+ VSCATTERDPD X9, K3, (SP)(X4*2) // 6272fd0ba20c64
+ VSCATTERDPD X9, K3, (R14)(X29*8) // 6212fd03a20cee
+ VSCATTERDPD Y20, K7, (AX)(X4*1) // 62e2fd2fa22420
+ VSCATTERDPD Y20, K7, (BP)(X10*2) // 62a2fd2fa2645500
+ VSCATTERDPD Y20, K7, (R10)(X29*8) // 6282fd27a224ea
+ VSCATTERDPD Z12, K4, (R10)(Y29*8) // 6212fd44a224ea
+ VSCATTERDPD Z16, K4, (R10)(Y29*8) // 6282fd44a204ea
+ VSCATTERDPD Z12, K4, (SP)(Y4*2) // 6272fd4ca22464
+ VSCATTERDPD Z16, K4, (SP)(Y4*2) // 62e2fd4ca20464
+ VSCATTERDPD Z12, K4, (DX)(Y10*4) // 6232fd4ca22492
+ VSCATTERDPD Z16, K4, (DX)(Y10*4) // 62a2fd4ca20492
+ VSCATTERDPS X26, K4, (DX)(X10*4) // 62227d0ca21492
+ VSCATTERDPS X26, K4, (SP)(X4*2) // 62627d0ca21464
+ VSCATTERDPS X26, K4, (R14)(X29*8) // 62027d04a214ee
+ VSCATTERDPS Y18, K7, (R14)(Y29*8) // 62827d27a214ee
+ VSCATTERDPS Y18, K7, (AX)(Y4*1) // 62e27d2fa21420
+ VSCATTERDPS Y18, K7, (BP)(Y10*2) // 62a27d2fa2545500
+ VSCATTERDPS Z28, K2, (BP)(Z10*2) // 62227d4aa2645500
+ VSCATTERDPS Z13, K2, (BP)(Z10*2) // 62327d4aa26c5500
+ VSCATTERDPS Z28, K2, (R10)(Z29*8) // 62027d42a224ea
+ VSCATTERDPS Z13, K2, (R10)(Z29*8) // 62127d42a22cea
+ VSCATTERDPS Z28, K2, (R14)(Z29*8) // 62027d42a224ee
+ VSCATTERDPS Z13, K2, (R14)(Z29*8) // 62127d42a22cee
+ VSCATTERQPD X19, K2, (AX)(X4*1) // 62e2fd0aa31c20
+ VSCATTERQPD X19, K2, (BP)(X10*2) // 62a2fd0aa35c5500
+ VSCATTERQPD X19, K2, (R10)(X29*8) // 6282fd02a31cea
+ VSCATTERQPD Y24, K1, (R10)(Y29*8) // 6202fd21a304ea
+ VSCATTERQPD Y24, K1, (SP)(Y4*2) // 6262fd29a30464
+ VSCATTERQPD Y24, K1, (DX)(Y10*4) // 6222fd29a30492
+ VSCATTERQPD Z14, K2, (DX)(Z10*4) // 6232fd4aa33492
+ VSCATTERQPD Z28, K2, (DX)(Z10*4) // 6222fd4aa32492
+ VSCATTERQPD Z14, K2, (AX)(Z4*1) // 6272fd4aa33420
+ VSCATTERQPD Z28, K2, (AX)(Z4*1) // 6262fd4aa32420
+ VSCATTERQPD Z14, K2, (SP)(Z4*2) // 6272fd4aa33464
+ VSCATTERQPD Z28, K2, (SP)(Z4*2) // 6262fd4aa32464
+ VSCATTERQPS X0, K1, (DX)(X10*4) // 62b27d09a30492
+ VSCATTERQPS X0, K1, (SP)(X4*2) // 62f27d09a30464
+ VSCATTERQPS X0, K1, (R14)(X29*8) // 62927d01a304ee
+ VSCATTERQPS X31, K7, (R14)(Y29*8) // 62027d27a33cee
+ VSCATTERQPS X31, K7, (AX)(Y4*1) // 62627d2fa33c20
+ VSCATTERQPS X31, K7, (BP)(Y10*2) // 62227d2fa37c5500
+ VSCATTERQPS Y9, K1, (BP)(Z10*2) // 62327d49a34c5500
+ VSCATTERQPS Y9, K1, (R10)(Z29*8) // 62127d41a30cea
+ VSCATTERQPS Y9, K1, (R14)(Z29*8) // 62127d41a30cee
+ VSHUFF32X4 $97, Y23, Y19, K1, Y3 // 62b3652123df61
+ VSHUFF32X4 $97, 15(R8), Y19, K1, Y3 // 62d3652123980f00000061
+ VSHUFF32X4 $97, (BP), Y19, K1, Y3 // 62f36521235d0061
+ VSHUFF32X4 $81, Z19, Z15, K1, Z3 // 62b3054923db51
+ VSHUFF32X4 $81, Z15, Z15, K1, Z3 // 62d3054923df51
+ VSHUFF32X4 $81, 7(AX), Z15, K1, Z3 // 62f3054923980700000051
+ VSHUFF32X4 $81, (DI), Z15, K1, Z3 // 62f30549231f51
+ VSHUFF32X4 $81, Z19, Z30, K1, Z3 // 62b30d4123db51
+ VSHUFF32X4 $81, Z15, Z30, K1, Z3 // 62d30d4123df51
+ VSHUFF32X4 $81, 7(AX), Z30, K1, Z3 // 62f30d4123980700000051
+ VSHUFF32X4 $81, (DI), Z30, K1, Z3 // 62f30d41231f51
+ VSHUFF32X4 $81, Z19, Z15, K1, Z12 // 6233054923e351
+ VSHUFF32X4 $81, Z15, Z15, K1, Z12 // 6253054923e751
+ VSHUFF32X4 $81, 7(AX), Z15, K1, Z12 // 6273054923a00700000051
+ VSHUFF32X4 $81, (DI), Z15, K1, Z12 // 62730549232751
+ VSHUFF32X4 $81, Z19, Z30, K1, Z12 // 62330d4123e351
+ VSHUFF32X4 $81, Z15, Z30, K1, Z12 // 62530d4123e751
+ VSHUFF32X4 $81, 7(AX), Z30, K1, Z12 // 62730d4123a00700000051
+ VSHUFF32X4 $81, (DI), Z30, K1, Z12 // 62730d41232751
+ VSHUFF64X2 $42, Y21, Y14, K7, Y19 // 62a38d2f23dd2a
+ VSHUFF64X2 $42, 15(R8)(R14*8), Y14, K7, Y19 // 62838d2f239cf00f0000002a
+ VSHUFF64X2 $42, -15(R14)(R15*2), Y14, K7, Y19 // 62838d2f239c7ef1ffffff2a
+ VSHUFF64X2 $79, Z14, Z3, K2, Z5 // 62d3e54a23ee4f
+ VSHUFF64X2 $79, Z15, Z3, K2, Z5 // 62d3e54a23ef4f
+ VSHUFF64X2 $79, 99(R15)(R15*1), Z3, K2, Z5 // 6293e54a23ac3f630000004f
+ VSHUFF64X2 $79, (DX), Z3, K2, Z5 // 62f3e54a232a4f
+ VSHUFF64X2 $79, Z14, Z5, K2, Z5 // 62d3d54a23ee4f
+ VSHUFF64X2 $79, Z15, Z5, K2, Z5 // 62d3d54a23ef4f
+ VSHUFF64X2 $79, 99(R15)(R15*1), Z5, K2, Z5 // 6293d54a23ac3f630000004f
+ VSHUFF64X2 $79, (DX), Z5, K2, Z5 // 62f3d54a232a4f
+ VSHUFF64X2 $79, Z14, Z3, K2, Z1 // 62d3e54a23ce4f
+ VSHUFF64X2 $79, Z15, Z3, K2, Z1 // 62d3e54a23cf4f
+ VSHUFF64X2 $79, 99(R15)(R15*1), Z3, K2, Z1 // 6293e54a238c3f630000004f
+ VSHUFF64X2 $79, (DX), Z3, K2, Z1 // 62f3e54a230a4f
+ VSHUFF64X2 $79, Z14, Z5, K2, Z1 // 62d3d54a23ce4f
+ VSHUFF64X2 $79, Z15, Z5, K2, Z1 // 62d3d54a23cf4f
+ VSHUFF64X2 $79, 99(R15)(R15*1), Z5, K2, Z1 // 6293d54a238c3f630000004f
+ VSHUFF64X2 $79, (DX), Z5, K2, Z1 // 62f3d54a230a4f
+ VSHUFI32X4 $64, Y2, Y16, K4, Y5 // 62f37d2443ea40
+ VSHUFI32X4 $64, -15(R14)(R15*1), Y16, K4, Y5 // 62937d2443ac3ef1ffffff40
+ VSHUFI32X4 $64, -15(BX), Y16, K4, Y5 // 62f37d2443abf1ffffff40
+ VSHUFI32X4 $27, Z20, Z16, K1, Z21 // 62a37d4143ec1b
+ VSHUFI32X4 $27, Z0, Z16, K1, Z21 // 62e37d4143e81b
+ VSHUFI32X4 $27, -17(BP)(SI*8), Z16, K1, Z21 // 62e37d4143acf5efffffff1b
+ VSHUFI32X4 $27, (R15), Z16, K1, Z21 // 62c37d41432f1b
+ VSHUFI32X4 $27, Z20, Z9, K1, Z21 // 62a3354943ec1b
+ VSHUFI32X4 $27, Z0, Z9, K1, Z21 // 62e3354943e81b
+ VSHUFI32X4 $27, -17(BP)(SI*8), Z9, K1, Z21 // 62e3354943acf5efffffff1b
+ VSHUFI32X4 $27, (R15), Z9, K1, Z21 // 62c33549432f1b
+ VSHUFI32X4 $27, Z20, Z16, K1, Z8 // 62337d4143c41b
+ VSHUFI32X4 $27, Z0, Z16, K1, Z8 // 62737d4143c01b
+ VSHUFI32X4 $27, -17(BP)(SI*8), Z16, K1, Z8 // 62737d414384f5efffffff1b
+ VSHUFI32X4 $27, (R15), Z16, K1, Z8 // 62537d4143071b
+ VSHUFI32X4 $27, Z20, Z9, K1, Z8 // 6233354943c41b
+ VSHUFI32X4 $27, Z0, Z9, K1, Z8 // 6273354943c01b
+ VSHUFI32X4 $27, -17(BP)(SI*8), Z9, K1, Z8 // 627335494384f5efffffff1b
+ VSHUFI32X4 $27, (R15), Z9, K1, Z8 // 6253354943071b
+ VSHUFI64X2 $47, Y6, Y20, K3, Y21 // 62e3dd2343ee2f
+ VSHUFI64X2 $47, 7(AX)(CX*4), Y20, K3, Y21 // 62e3dd2343ac88070000002f
+ VSHUFI64X2 $47, 7(AX)(CX*1), Y20, K3, Y21 // 62e3dd2343ac08070000002f
+ VSHUFI64X2 $82, Z0, Z0, K4, Z23 // 62e3fd4c43f852
+ VSHUFI64X2 $82, Z25, Z0, K4, Z23 // 6283fd4c43f952
+ VSHUFI64X2 $82, 7(SI)(DI*8), Z0, K4, Z23 // 62e3fd4c43bcfe0700000052
+ VSHUFI64X2 $82, -15(R14), Z0, K4, Z23 // 62c3fd4c43bef1ffffff52
+ VSHUFI64X2 $82, Z0, Z11, K4, Z23 // 62e3a54c43f852
+ VSHUFI64X2 $82, Z25, Z11, K4, Z23 // 6283a54c43f952
+ VSHUFI64X2 $82, 7(SI)(DI*8), Z11, K4, Z23 // 62e3a54c43bcfe0700000052
+ VSHUFI64X2 $82, -15(R14), Z11, K4, Z23 // 62c3a54c43bef1ffffff52
+ VSHUFI64X2 $82, Z0, Z0, K4, Z19 // 62e3fd4c43d852
+ VSHUFI64X2 $82, Z25, Z0, K4, Z19 // 6283fd4c43d952
+ VSHUFI64X2 $82, 7(SI)(DI*8), Z0, K4, Z19 // 62e3fd4c439cfe0700000052
+ VSHUFI64X2 $82, -15(R14), Z0, K4, Z19 // 62c3fd4c439ef1ffffff52
+ VSHUFI64X2 $82, Z0, Z11, K4, Z19 // 62e3a54c43d852
+ VSHUFI64X2 $82, Z25, Z11, K4, Z19 // 6283a54c43d952
+ VSHUFI64X2 $82, 7(SI)(DI*8), Z11, K4, Z19 // 62e3a54c439cfe0700000052
+ VSHUFI64X2 $82, -15(R14), Z11, K4, Z19 // 62c3a54c439ef1ffffff52
+ VSHUFPD $126, X8, X7, K5, X16 // 62c1c50dc6c07e
+ VSHUFPD $126, (BX), X7, K5, X16 // 62e1c50dc6037e
+ VSHUFPD $126, -17(BP)(SI*1), X7, K5, X16 // 62e1c50dc68435efffffff7e
+ VSHUFPD $94, Y11, Y6, K7, Y31 // 6241cd2fc6fb5e
+ VSHUFPD $94, (SI), Y6, K7, Y31 // 6261cd2fc63e5e
+ VSHUFPD $94, 7(SI)(DI*2), Y6, K7, Y31 // 6261cd2fc6bc7e070000005e
+ VSHUFPD $121, Z9, Z0, K7, Z24 // 6241fd4fc6c179
+ VSHUFPD $121, Z3, Z0, K7, Z24 // 6261fd4fc6c379
+ VSHUFPD $121, 7(SI)(DI*1), Z0, K7, Z24 // 6261fd4fc6843e0700000079
+ VSHUFPD $121, 15(DX)(BX*8), Z0, K7, Z24 // 6261fd4fc684da0f00000079
+ VSHUFPD $121, Z9, Z26, K7, Z24 // 6241ad47c6c179
+ VSHUFPD $121, Z3, Z26, K7, Z24 // 6261ad47c6c379
+ VSHUFPD $121, 7(SI)(DI*1), Z26, K7, Z24 // 6261ad47c6843e0700000079
+ VSHUFPD $121, 15(DX)(BX*8), Z26, K7, Z24 // 6261ad47c684da0f00000079
+ VSHUFPD $121, Z9, Z0, K7, Z12 // 6251fd4fc6e179
+ VSHUFPD $121, Z3, Z0, K7, Z12 // 6271fd4fc6e379
+ VSHUFPD $121, 7(SI)(DI*1), Z0, K7, Z12 // 6271fd4fc6a43e0700000079
+ VSHUFPD $121, 15(DX)(BX*8), Z0, K7, Z12 // 6271fd4fc6a4da0f00000079
+ VSHUFPD $121, Z9, Z26, K7, Z12 // 6251ad47c6e179
+ VSHUFPD $121, Z3, Z26, K7, Z12 // 6271ad47c6e379
+ VSHUFPD $121, 7(SI)(DI*1), Z26, K7, Z12 // 6271ad47c6a43e0700000079
+ VSHUFPD $121, 15(DX)(BX*8), Z26, K7, Z12 // 6271ad47c6a4da0f00000079
+ VSHUFPS $13, X15, X0, K6, X1 // 62d17c0ec6cf0d
+ VSHUFPS $13, 15(R8)(R14*4), X0, K6, X1 // 62917c0ec68cb00f0000000d
+ VSHUFPS $13, -7(CX)(DX*4), X0, K6, X1 // 62f17c0ec68c91f9ffffff0d
+ VSHUFPS $65, Y6, Y7, K3, Y19 // 62e1442bc6de41
+ VSHUFPS $65, 17(SP)(BP*8), Y7, K3, Y19 // 62e1442bc69cec1100000041
+ VSHUFPS $65, 17(SP)(BP*4), Y7, K3, Y19 // 62e1442bc69cac1100000041
+ VSHUFPS $67, Z20, Z9, K7, Z9 // 6231344fc6cc43
+ VSHUFPS $67, Z0, Z9, K7, Z9 // 6271344fc6c843
+ VSHUFPS $67, -7(DI)(R8*1), Z9, K7, Z9 // 6231344fc68c07f9ffffff43
+ VSHUFPS $67, (SP), Z9, K7, Z9 // 6271344fc60c2443
+ VSHUFPS $67, Z20, Z28, K7, Z9 // 62311c47c6cc43
+ VSHUFPS $67, Z0, Z28, K7, Z9 // 62711c47c6c843
+ VSHUFPS $67, -7(DI)(R8*1), Z28, K7, Z9 // 62311c47c68c07f9ffffff43
+ VSHUFPS $67, (SP), Z28, K7, Z9 // 62711c47c60c2443
+ VSHUFPS $67, Z20, Z9, K7, Z25 // 6221344fc6cc43
+ VSHUFPS $67, Z0, Z9, K7, Z25 // 6261344fc6c843
+ VSHUFPS $67, -7(DI)(R8*1), Z9, K7, Z25 // 6221344fc68c07f9ffffff43
+ VSHUFPS $67, (SP), Z9, K7, Z25 // 6261344fc60c2443
+ VSHUFPS $67, Z20, Z28, K7, Z25 // 62211c47c6cc43
+ VSHUFPS $67, Z0, Z28, K7, Z25 // 62611c47c6c843
+ VSHUFPS $67, -7(DI)(R8*1), Z28, K7, Z25 // 62211c47c68c07f9ffffff43
+ VSHUFPS $67, (SP), Z28, K7, Z25 // 62611c47c60c2443
+ VSQRTPD X16, K4, X0 // 62b1fd0c51c0
+ VSQRTPD (R8), K4, X0 // 62d1fd0c5100
+ VSQRTPD 15(DX)(BX*2), K4, X0 // 62f1fd0c51845a0f000000
+ VSQRTPD Y3, K4, Y0 // 62f1fd2c51c3
+ VSQRTPD 7(SI)(DI*4), K4, Y0 // 62f1fd2c5184be07000000
+ VSQRTPD -7(DI)(R8*2), K4, Y0 // 62b1fd2c518447f9ffffff
+ VSQRTPD Z17, K7, Z17 // 62a1fd4f51c9
+ VSQRTPD Z23, K7, Z17 // 62a1fd4f51cf
+ VSQRTPD Z17, K7, Z0 // 62b1fd4f51c1
+ VSQRTPD Z23, K7, Z0 // 62b1fd4f51c7
+ VSQRTPD Z21, K2, Z31 // 6221fd4a51fd
+ VSQRTPD Z9, K2, Z31 // 6241fd4a51f9
+ VSQRTPD -7(CX), K2, Z31 // 6261fd4a51b9f9ffffff
+ VSQRTPD 15(DX)(BX*4), K2, Z31 // 6261fd4a51bc9a0f000000
+ VSQRTPD Z21, K2, Z0 // 62b1fd4a51c5
+ VSQRTPD Z9, K2, Z0 // 62d1fd4a51c1
+ VSQRTPD -7(CX), K2, Z0 // 62f1fd4a5181f9ffffff
+ VSQRTPD 15(DX)(BX*4), K2, Z0 // 62f1fd4a51849a0f000000
+ VSQRTPS X0, K5, X21 // 62e17c0d51e8
+ VSQRTPS 17(SP)(BP*1), K5, X21 // 62e17c0d51ac2c11000000
+ VSQRTPS -7(CX)(DX*8), K5, X21 // 62e17c0d51acd1f9ffffff
+ VSQRTPS Y20, K3, Y5 // 62b17c2b51ec
+ VSQRTPS 17(SP), K3, Y5 // 62f17c2b51ac2411000000
+ VSQRTPS -17(BP)(SI*4), K3, Y5 // 62f17c2b51acb5efffffff
+ VSQRTPS Z1, K4, Z6 // 62f17c4c51f1
+ VSQRTPS Z9, K4, Z6 // 62d17c4c51f1
+ VSQRTPS Z1, K4, Z9 // 62717c4c51c9
+ VSQRTPS Z9, K4, Z9 // 62517c4c51c9
+ VSQRTPS Z30, K2, Z20 // 62817c4a51e6
+ VSQRTPS Z5, K2, Z20 // 62e17c4a51e5
+ VSQRTPS 99(R15)(R15*8), K2, Z20 // 62817c4a51a4ff63000000
+ VSQRTPS 7(AX)(CX*8), K2, Z20 // 62e17c4a51a4c807000000
+ VSQRTPS Z30, K2, Z9 // 62117c4a51ce
+ VSQRTPS Z5, K2, Z9 // 62717c4a51cd
+ VSQRTPS 99(R15)(R15*8), K2, Z9 // 62117c4a518cff63000000
+ VSQRTPS 7(AX)(CX*8), K2, Z9 // 62717c4a518cc807000000
+ VSQRTSD X7, X22, K2, X28 // 6261cf0251e7
+ VSQRTSD X16, X7, K3, X19 // 62a1c70b51d8 or 62a1c72b51d8 or 62a1c74b51d8
+ VSQRTSD 7(SI)(DI*8), X7, K3, X19 // 62e1c70b519cfe07000000 or 62e1c72b519cfe07000000 or 62e1c74b519cfe07000000
+ VSQRTSD -15(R14), X7, K3, X19 // 62c1c70b519ef1ffffff or 62c1c72b519ef1ffffff or 62c1c74b519ef1ffffff
+ VSQRTSS X7, X1, K3, X31 // 6261760b51ff
+ VSQRTSS X12, X15, K3, X9 // 6251060b51cc or 6251062b51cc or 6251064b51cc
+ VSQRTSS 17(SP)(BP*1), X15, K3, X9 // 6271060b518c2c11000000 or 6271062b518c2c11000000 or 6271064b518c2c11000000
+ VSQRTSS -7(CX)(DX*8), X15, K3, X9 // 6271060b518cd1f9ffffff or 6271062b518cd1f9ffffff or 6271064b518cd1f9ffffff
+ VSUBPD X14, X12, K2, X0 // 62d19d0a5cc6
+ VSUBPD -17(BP)(SI*2), X12, K2, X0 // 62f19d0a5c8475efffffff
+ VSUBPD 7(AX)(CX*2), X12, K2, X0 // 62f19d0a5c844807000000
+ VSUBPD Y5, Y3, K1, Y12 // 6271e5295ce5
+ VSUBPD 7(AX), Y3, K1, Y12 // 6271e5295ca007000000
+ VSUBPD (DI), Y3, K1, Y12 // 6271e5295c27
+ VSUBPD Z16, Z7, K2, Z26 // 6221c54a5cd0
+ VSUBPD Z25, Z7, K2, Z26 // 6201c54a5cd1
+ VSUBPD Z16, Z21, K2, Z26 // 6221d5425cd0
+ VSUBPD Z25, Z21, K2, Z26 // 6201d5425cd1
+ VSUBPD Z16, Z7, K2, Z22 // 62a1c54a5cf0
+ VSUBPD Z25, Z7, K2, Z22 // 6281c54a5cf1
+ VSUBPD Z16, Z21, K2, Z22 // 62a1d5425cf0
+ VSUBPD Z25, Z21, K2, Z22 // 6281d5425cf1
+ VSUBPD Z21, Z12, K1, Z14 // 62319d495cf5
+ VSUBPD Z9, Z12, K1, Z14 // 62519d495cf1
+ VSUBPD (AX), Z12, K1, Z14 // 62719d495c30
+ VSUBPD 7(SI), Z12, K1, Z14 // 62719d495cb607000000
+ VSUBPD Z21, Z13, K1, Z14 // 623195495cf5
+ VSUBPD Z9, Z13, K1, Z14 // 625195495cf1
+ VSUBPD (AX), Z13, K1, Z14 // 627195495c30
+ VSUBPD 7(SI), Z13, K1, Z14 // 627195495cb607000000
+ VSUBPD Z21, Z12, K1, Z13 // 62319d495ced
+ VSUBPD Z9, Z12, K1, Z13 // 62519d495ce9
+ VSUBPD (AX), Z12, K1, Z13 // 62719d495c28
+ VSUBPD 7(SI), Z12, K1, Z13 // 62719d495cae07000000
+ VSUBPD Z21, Z13, K1, Z13 // 623195495ced
+ VSUBPD Z9, Z13, K1, Z13 // 625195495ce9
+ VSUBPD (AX), Z13, K1, Z13 // 627195495c28
+ VSUBPD 7(SI), Z13, K1, Z13 // 627195495cae07000000
+ VSUBPS X15, X17, K7, X5 // 62d174075cef
+ VSUBPS 15(R8)(R14*1), X17, K7, X5 // 629174075cac300f000000
+ VSUBPS 15(R8)(R14*2), X17, K7, X5 // 629174075cac700f000000
+ VSUBPS Y0, Y7, K1, Y28 // 626144295ce0
+ VSUBPS 99(R15)(R15*1), Y7, K1, Y28 // 620144295ca43f63000000
+ VSUBPS (DX), Y7, K1, Y28 // 626144295c22
+ VSUBPS Z23, Z27, K1, Z2 // 62b124415cd7
+ VSUBPS Z9, Z27, K1, Z2 // 62d124415cd1
+ VSUBPS Z23, Z25, K1, Z2 // 62b134415cd7
+ VSUBPS Z9, Z25, K1, Z2 // 62d134415cd1
+ VSUBPS Z23, Z27, K1, Z7 // 62b124415cff
+ VSUBPS Z9, Z27, K1, Z7 // 62d124415cf9
+ VSUBPS Z23, Z25, K1, Z7 // 62b134415cff
+ VSUBPS Z9, Z25, K1, Z7 // 62d134415cf9
+ VSUBPS Z14, Z3, K1, Z27 // 624164495cde
+ VSUBPS Z7, Z3, K1, Z27 // 626164495cdf
+ VSUBPS (BX), Z3, K1, Z27 // 626164495c1b
+ VSUBPS -17(BP)(SI*1), Z3, K1, Z27 // 626164495c9c35efffffff
+ VSUBPS Z14, Z0, K1, Z27 // 62417c495cde
+ VSUBPS Z7, Z0, K1, Z27 // 62617c495cdf
+ VSUBPS (BX), Z0, K1, Z27 // 62617c495c1b
+ VSUBPS -17(BP)(SI*1), Z0, K1, Z27 // 62617c495c9c35efffffff
+ VSUBPS Z14, Z3, K1, Z14 // 625164495cf6
+ VSUBPS Z7, Z3, K1, Z14 // 627164495cf7
+ VSUBPS (BX), Z3, K1, Z14 // 627164495c33
+ VSUBPS -17(BP)(SI*1), Z3, K1, Z14 // 627164495cb435efffffff
+ VSUBPS Z14, Z0, K1, Z14 // 62517c495cf6
+ VSUBPS Z7, Z0, K1, Z14 // 62717c495cf7
+ VSUBPS (BX), Z0, K1, Z14 // 62717c495c33
+ VSUBPS -17(BP)(SI*1), Z0, K1, Z14 // 62717c495cb435efffffff
+ VSUBSD X26, X3, K7, X8 // 6211e70f5cc2
+ VSUBSD X28, X13, K2, X23 // 6281970a5cfc or 6281972a5cfc or 6281974a5cfc
+ VSUBSD 7(SI)(DI*1), X13, K2, X23 // 62e1970a5cbc3e07000000 or 62e1972a5cbc3e07000000 or 62e1974a5cbc3e07000000
+ VSUBSD 15(DX)(BX*8), X13, K2, X23 // 62e1970a5cbcda0f000000 or 62e1972a5cbcda0f000000 or 62e1974a5cbcda0f000000
+ VSUBSS X15, X9, K4, X24 // 6241360c5cc7
+ VSUBSS X21, X18, K1, X26 // 62216e015cd5 or 62216e215cd5 or 62216e415cd5
+ VSUBSS -17(BP)(SI*2), X18, K1, X26 // 62616e015c9475efffffff or 62616e215c9475efffffff or 62616e415c9475efffffff
+ VSUBSS 7(AX)(CX*2), X18, K1, X26 // 62616e015c944807000000 or 62616e215c944807000000 or 62616e415c944807000000
+ VUCOMISD X3, X31 // 6261fd082efb or 6261fd282efb or 6261fd482efb
+ VUCOMISD -7(DI)(R8*1), X31 // 6221fd082ebc07f9ffffff or 6221fd282ebc07f9ffffff or 6221fd482ebc07f9ffffff
+ VUCOMISD (SP), X31 // 6261fd082e3c24 or 6261fd282e3c24 or 6261fd482e3c24
+ VUCOMISS X24, X0 // 62917c082ec0 or 62917c282ec0 or 62917c482ec0
+ VUNPCKHPD X9, X7, K3, X20 // 62c1c50b15e1
+ VUNPCKHPD (R14), X7, K3, X20 // 62c1c50b1526
+ VUNPCKHPD -7(DI)(R8*8), X7, K3, X20 // 62a1c50b15a4c7f9ffffff
+ VUNPCKHPD Y12, Y13, K4, Y22 // 62c1952c15f4
+ VUNPCKHPD -17(BP)(SI*8), Y13, K4, Y22 // 62e1952c15b4f5efffffff
+ VUNPCKHPD (R15), Y13, K4, Y22 // 62c1952c1537
+ VUNPCKHPD Z1, Z22, K5, Z8 // 6271cd4515c1
+ VUNPCKHPD Z16, Z22, K5, Z8 // 6231cd4515c0
+ VUNPCKHPD 15(R8)(R14*4), Z22, K5, Z8 // 6211cd451584b00f000000
+ VUNPCKHPD -7(CX)(DX*4), Z22, K5, Z8 // 6271cd45158491f9ffffff
+ VUNPCKHPD Z1, Z25, K5, Z8 // 6271b54515c1
+ VUNPCKHPD Z16, Z25, K5, Z8 // 6231b54515c0
+ VUNPCKHPD 15(R8)(R14*4), Z25, K5, Z8 // 6211b5451584b00f000000
+ VUNPCKHPD -7(CX)(DX*4), Z25, K5, Z8 // 6271b545158491f9ffffff
+ VUNPCKHPD Z1, Z22, K5, Z24 // 6261cd4515c1
+ VUNPCKHPD Z16, Z22, K5, Z24 // 6221cd4515c0
+ VUNPCKHPD 15(R8)(R14*4), Z22, K5, Z24 // 6201cd451584b00f000000
+ VUNPCKHPD -7(CX)(DX*4), Z22, K5, Z24 // 6261cd45158491f9ffffff
+ VUNPCKHPD Z1, Z25, K5, Z24 // 6261b54515c1
+ VUNPCKHPD Z16, Z25, K5, Z24 // 6221b54515c0
+ VUNPCKHPD 15(R8)(R14*4), Z25, K5, Z24 // 6201b5451584b00f000000
+ VUNPCKHPD -7(CX)(DX*4), Z25, K5, Z24 // 6261b545158491f9ffffff
+ VUNPCKHPS X5, X14, K7, X7 // 62f10c0f15fd
+ VUNPCKHPS 99(R15)(R15*4), X14, K7, X7 // 62910c0f15bcbf63000000
+ VUNPCKHPS 15(DX), X14, K7, X7 // 62f10c0f15ba0f000000
+ VUNPCKHPS Y17, Y14, K7, Y1 // 62b10c2f15c9
+ VUNPCKHPS 7(SI)(DI*8), Y14, K7, Y1 // 62f10c2f158cfe07000000
+ VUNPCKHPS -15(R14), Y14, K7, Y1 // 62d10c2f158ef1ffffff
+ VUNPCKHPS Z15, Z0, K6, Z6 // 62d17c4e15f7
+ VUNPCKHPS Z12, Z0, K6, Z6 // 62d17c4e15f4
+ VUNPCKHPS (R8), Z0, K6, Z6 // 62d17c4e1530
+ VUNPCKHPS 15(DX)(BX*2), Z0, K6, Z6 // 62f17c4e15b45a0f000000
+ VUNPCKHPS Z15, Z8, K6, Z6 // 62d13c4e15f7
+ VUNPCKHPS Z12, Z8, K6, Z6 // 62d13c4e15f4
+ VUNPCKHPS (R8), Z8, K6, Z6 // 62d13c4e1530
+ VUNPCKHPS 15(DX)(BX*2), Z8, K6, Z6 // 62f13c4e15b45a0f000000
+ VUNPCKHPS Z15, Z0, K6, Z2 // 62d17c4e15d7
+ VUNPCKHPS Z12, Z0, K6, Z2 // 62d17c4e15d4
+ VUNPCKHPS (R8), Z0, K6, Z2 // 62d17c4e1510
+ VUNPCKHPS 15(DX)(BX*2), Z0, K6, Z2 // 62f17c4e15945a0f000000
+ VUNPCKHPS Z15, Z8, K6, Z2 // 62d13c4e15d7
+ VUNPCKHPS Z12, Z8, K6, Z2 // 62d13c4e15d4
+ VUNPCKHPS (R8), Z8, K6, Z2 // 62d13c4e1510
+ VUNPCKHPS 15(DX)(BX*2), Z8, K6, Z2 // 62f13c4e15945a0f000000
+ VUNPCKLPD X21, X3, K3, X31 // 6221e50b14fd
+ VUNPCKLPD (CX), X3, K3, X31 // 6261e50b1439
+ VUNPCKLPD 99(R15), X3, K3, X31 // 6241e50b14bf63000000
+ VUNPCKLPD Y31, Y9, K7, Y7 // 6291b52f14ff
+ VUNPCKLPD 7(SI)(DI*1), Y9, K7, Y7 // 62f1b52f14bc3e07000000
+ VUNPCKLPD 15(DX)(BX*8), Y9, K7, Y7 // 62f1b52f14bcda0f000000
+ VUNPCKLPD Z13, Z11, K4, Z14 // 6251a54c14f5
+ VUNPCKLPD Z14, Z11, K4, Z14 // 6251a54c14f6
+ VUNPCKLPD 17(SP)(BP*1), Z11, K4, Z14 // 6271a54c14b42c11000000
+ VUNPCKLPD -7(CX)(DX*8), Z11, K4, Z14 // 6271a54c14b4d1f9ffffff
+ VUNPCKLPD Z13, Z5, K4, Z14 // 6251d54c14f5
+ VUNPCKLPD Z14, Z5, K4, Z14 // 6251d54c14f6
+ VUNPCKLPD 17(SP)(BP*1), Z5, K4, Z14 // 6271d54c14b42c11000000
+ VUNPCKLPD -7(CX)(DX*8), Z5, K4, Z14 // 6271d54c14b4d1f9ffffff
+ VUNPCKLPD Z13, Z11, K4, Z27 // 6241a54c14dd
+ VUNPCKLPD Z14, Z11, K4, Z27 // 6241a54c14de
+ VUNPCKLPD 17(SP)(BP*1), Z11, K4, Z27 // 6261a54c149c2c11000000
+ VUNPCKLPD -7(CX)(DX*8), Z11, K4, Z27 // 6261a54c149cd1f9ffffff
+ VUNPCKLPD Z13, Z5, K4, Z27 // 6241d54c14dd
+ VUNPCKLPD Z14, Z5, K4, Z27 // 6241d54c14de
+ VUNPCKLPD 17(SP)(BP*1), Z5, K4, Z27 // 6261d54c149c2c11000000
+ VUNPCKLPD -7(CX)(DX*8), Z5, K4, Z27 // 6261d54c149cd1f9ffffff
+ VUNPCKLPS X13, X11, K4, X1 // 62d1240c14cd
+ VUNPCKLPS 99(R15)(R15*2), X11, K4, X1 // 6291240c148c7f63000000
+ VUNPCKLPS -7(DI), X11, K4, X1 // 62f1240c148ff9ffffff
+ VUNPCKLPS Y28, Y1, K7, Y8 // 6211742f14c4
+ VUNPCKLPS -7(DI)(R8*1), Y1, K7, Y8 // 6231742f148407f9ffffff
+ VUNPCKLPS (SP), Y1, K7, Y8 // 6271742f140424
+ VUNPCKLPS Z6, Z2, K2, Z5 // 62f16c4a14ee
+ VUNPCKLPS Z14, Z2, K2, Z5 // 62d16c4a14ee
+ VUNPCKLPS -17(BP)(SI*2), Z2, K2, Z5 // 62f16c4a14ac75efffffff
+ VUNPCKLPS 7(AX)(CX*2), Z2, K2, Z5 // 62f16c4a14ac4807000000
+ VUNPCKLPS Z6, Z2, K2, Z23 // 62e16c4a14fe
+ VUNPCKLPS Z14, Z2, K2, Z23 // 62c16c4a14fe
+ VUNPCKLPS -17(BP)(SI*2), Z2, K2, Z23 // 62e16c4a14bc75efffffff
+ VUNPCKLPS 7(AX)(CX*2), Z2, K2, Z23 // 62e16c4a14bc4807000000
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/avx512pf.s b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512pf.s
new file mode 100644
index 0000000..1b3cce7
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/avx512pf.s
@@ -0,0 +1,54 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_avx512pf(SB), NOSPLIT, $0
+ VGATHERPF0DPD K5, (R10)(Y29*8) // 6292fd45c60cea
+ VGATHERPF0DPD K5, (SP)(Y4*2) // 62f2fd4dc60c64
+ VGATHERPF0DPD K5, (DX)(Y10*4) // 62b2fd4dc60c92
+ VGATHERPF0DPS K3, (BP)(Z10*2) // 62b27d4bc64c5500
+ VGATHERPF0DPS K3, (R10)(Z29*8) // 62927d43c60cea
+ VGATHERPF0DPS K3, (R14)(Z29*8) // 62927d43c60cee
+ VGATHERPF0QPD K4, (DX)(Z10*4) // 62b2fd4cc70c92
+ VGATHERPF0QPD K4, (AX)(Z4*1) // 62f2fd4cc70c20
+ VGATHERPF0QPD K4, (SP)(Z4*2) // 62f2fd4cc70c64
+ VGATHERPF0QPS K2, (BP)(Z10*2) // 62b27d4ac74c5500
+ VGATHERPF0QPS K2, (R10)(Z29*8) // 62927d42c70cea
+ VGATHERPF0QPS K2, (R14)(Z29*8) // 62927d42c70cee
+ VGATHERPF1DPD K2, (R14)(Y29*8) // 6292fd42c614ee
+ VGATHERPF1DPD K2, (AX)(Y4*1) // 62f2fd4ac61420
+ VGATHERPF1DPD K2, (BP)(Y10*2) // 62b2fd4ac6545500
+ VGATHERPF1DPS K3, (DX)(Z10*4) // 62b27d4bc61492
+ VGATHERPF1DPS K3, (AX)(Z4*1) // 62f27d4bc61420
+ VGATHERPF1DPS K3, (SP)(Z4*2) // 62f27d4bc61464
+ VGATHERPF1QPD K3, (DX)(Z10*4) // 62b2fd4bc71492
+ VGATHERPF1QPD K3, (AX)(Z4*1) // 62f2fd4bc71420
+ VGATHERPF1QPD K3, (SP)(Z4*2) // 62f2fd4bc71464
+ VGATHERPF1QPS K3, (BP)(Z10*2) // 62b27d4bc7545500
+ VGATHERPF1QPS K3, (R10)(Z29*8) // 62927d43c714ea
+ VGATHERPF1QPS K3, (R14)(Z29*8) // 62927d43c714ee
+ VSCATTERPF0DPD K5, (R10)(Y29*8) // 6292fd45c62cea
+ VSCATTERPF0DPD K5, (SP)(Y4*2) // 62f2fd4dc62c64
+ VSCATTERPF0DPD K5, (DX)(Y10*4) // 62b2fd4dc62c92
+ VSCATTERPF0DPS K3, (DX)(Z10*4) // 62b27d4bc62c92
+ VSCATTERPF0DPS K3, (AX)(Z4*1) // 62f27d4bc62c20
+ VSCATTERPF0DPS K3, (SP)(Z4*2) // 62f27d4bc62c64
+ VSCATTERPF0QPD K4, (DX)(Z10*4) // 62b2fd4cc72c92
+ VSCATTERPF0QPD K4, (AX)(Z4*1) // 62f2fd4cc72c20
+ VSCATTERPF0QPD K4, (SP)(Z4*2) // 62f2fd4cc72c64
+ VSCATTERPF0QPS K2, (BP)(Z10*2) // 62b27d4ac76c5500
+ VSCATTERPF0QPS K2, (R10)(Z29*8) // 62927d42c72cea
+ VSCATTERPF0QPS K2, (R14)(Z29*8) // 62927d42c72cee
+ VSCATTERPF1DPD K2, (R14)(Y29*8) // 6292fd42c634ee
+ VSCATTERPF1DPD K2, (AX)(Y4*1) // 62f2fd4ac63420
+ VSCATTERPF1DPD K2, (BP)(Y10*2) // 62b2fd4ac6745500
+ VSCATTERPF1DPS K3, (BP)(Z10*2) // 62b27d4bc6745500
+ VSCATTERPF1DPS K3, (R10)(Z29*8) // 62927d43c634ea
+ VSCATTERPF1DPS K3, (R14)(Z29*8) // 62927d43c634ee
+ VSCATTERPF1QPD K3, (DX)(Z10*4) // 62b2fd4bc73492
+ VSCATTERPF1QPD K3, (AX)(Z4*1) // 62f2fd4bc73420
+ VSCATTERPF1QPD K3, (SP)(Z4*2) // 62f2fd4bc73464
+ VSCATTERPF1QPS K3, (BP)(Z10*2) // 62b27d4bc7745500
+ VSCATTERPF1QPS K3, (R10)(Z29*8) // 62927d43c734ea
+ VSCATTERPF1QPS K3, (R14)(Z29*8) // 62927d43c734ee
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/gfni_avx512f.s b/src/cmd/asm/internal/asm/testdata/avx512enc/gfni_avx512f.s
new file mode 100644
index 0000000..9df5f0e
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/gfni_avx512f.s
@@ -0,0 +1,324 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_gfni_avx512f(SB), NOSPLIT, $0
+ VGF2P8AFFINEINVQB $64, X8, X31, K3, X26 // 62438503cfd040
+ VGF2P8AFFINEINVQB $64, X1, X31, K3, X26 // 62638503cfd140
+ VGF2P8AFFINEINVQB $64, X0, X31, K3, X26 // 62638503cfd040
+ VGF2P8AFFINEINVQB $64, -17(BP), X31, K3, X26 // 62638503cf95efffffff40
+ VGF2P8AFFINEINVQB $64, -15(R14)(R15*8), X31, K3, X26 // 62038503cf94fef1ffffff40
+ VGF2P8AFFINEINVQB $64, X8, X16, K3, X26 // 6243fd03cfd040
+ VGF2P8AFFINEINVQB $64, X1, X16, K3, X26 // 6263fd03cfd140
+ VGF2P8AFFINEINVQB $64, X0, X16, K3, X26 // 6263fd03cfd040
+ VGF2P8AFFINEINVQB $64, -17(BP), X16, K3, X26 // 6263fd03cf95efffffff40
+ VGF2P8AFFINEINVQB $64, -15(R14)(R15*8), X16, K3, X26 // 6203fd03cf94fef1ffffff40
+ VGF2P8AFFINEINVQB $64, X8, X7, K3, X26 // 6243c50bcfd040
+ VGF2P8AFFINEINVQB $64, X1, X7, K3, X26 // 6263c50bcfd140
+ VGF2P8AFFINEINVQB $64, X0, X7, K3, X26 // 6263c50bcfd040
+ VGF2P8AFFINEINVQB $64, -17(BP), X7, K3, X26 // 6263c50bcf95efffffff40
+ VGF2P8AFFINEINVQB $64, -15(R14)(R15*8), X7, K3, X26 // 6203c50bcf94fef1ffffff40
+ VGF2P8AFFINEINVQB $64, X8, X31, K3, X19 // 62c38503cfd840
+ VGF2P8AFFINEINVQB $64, X1, X31, K3, X19 // 62e38503cfd940
+ VGF2P8AFFINEINVQB $64, X0, X31, K3, X19 // 62e38503cfd840
+ VGF2P8AFFINEINVQB $64, -17(BP), X31, K3, X19 // 62e38503cf9defffffff40
+ VGF2P8AFFINEINVQB $64, -15(R14)(R15*8), X31, K3, X19 // 62838503cf9cfef1ffffff40
+ VGF2P8AFFINEINVQB $64, X8, X16, K3, X19 // 62c3fd03cfd840
+ VGF2P8AFFINEINVQB $64, X1, X16, K3, X19 // 62e3fd03cfd940
+ VGF2P8AFFINEINVQB $64, X0, X16, K3, X19 // 62e3fd03cfd840
+ VGF2P8AFFINEINVQB $64, -17(BP), X16, K3, X19 // 62e3fd03cf9defffffff40
+ VGF2P8AFFINEINVQB $64, -15(R14)(R15*8), X16, K3, X19 // 6283fd03cf9cfef1ffffff40
+ VGF2P8AFFINEINVQB $64, X8, X7, K3, X19 // 62c3c50bcfd840
+ VGF2P8AFFINEINVQB $64, X1, X7, K3, X19 // 62e3c50bcfd940
+ VGF2P8AFFINEINVQB $64, X0, X7, K3, X19 // 62e3c50bcfd840
+ VGF2P8AFFINEINVQB $64, -17(BP), X7, K3, X19 // 62e3c50bcf9defffffff40
+ VGF2P8AFFINEINVQB $64, -15(R14)(R15*8), X7, K3, X19 // 6283c50bcf9cfef1ffffff40
+ VGF2P8AFFINEINVQB $64, X8, X31, K3, X0 // 62d38503cfc040
+ VGF2P8AFFINEINVQB $64, X1, X31, K3, X0 // 62f38503cfc140
+ VGF2P8AFFINEINVQB $64, X0, X31, K3, X0 // 62f38503cfc040
+ VGF2P8AFFINEINVQB $64, -17(BP), X31, K3, X0 // 62f38503cf85efffffff40
+ VGF2P8AFFINEINVQB $64, -15(R14)(R15*8), X31, K3, X0 // 62938503cf84fef1ffffff40
+ VGF2P8AFFINEINVQB $64, X8, X16, K3, X0 // 62d3fd03cfc040
+ VGF2P8AFFINEINVQB $64, X1, X16, K3, X0 // 62f3fd03cfc140
+ VGF2P8AFFINEINVQB $64, X0, X16, K3, X0 // 62f3fd03cfc040
+ VGF2P8AFFINEINVQB $64, -17(BP), X16, K3, X0 // 62f3fd03cf85efffffff40
+ VGF2P8AFFINEINVQB $64, -15(R14)(R15*8), X16, K3, X0 // 6293fd03cf84fef1ffffff40
+ VGF2P8AFFINEINVQB $64, X8, X7, K3, X0 // 62d3c50bcfc040
+ VGF2P8AFFINEINVQB $64, X1, X7, K3, X0 // 62f3c50bcfc140
+ VGF2P8AFFINEINVQB $64, X0, X7, K3, X0 // 62f3c50bcfc040
+ VGF2P8AFFINEINVQB $64, -17(BP), X7, K3, X0 // 62f3c50bcf85efffffff40
+ VGF2P8AFFINEINVQB $64, -15(R14)(R15*8), X7, K3, X0 // 6293c50bcf84fef1ffffff40
+ VGF2P8AFFINEINVQB $27, Y5, Y20, K3, Y0 // 62f3dd23cfc51b
+ VGF2P8AFFINEINVQB $27, Y28, Y20, K3, Y0 // 6293dd23cfc41b
+ VGF2P8AFFINEINVQB $27, Y7, Y20, K3, Y0 // 62f3dd23cfc71b
+ VGF2P8AFFINEINVQB $27, (BX), Y20, K3, Y0 // 62f3dd23cf031b
+ VGF2P8AFFINEINVQB $27, -17(BP)(SI*1), Y20, K3, Y0 // 62f3dd23cf8435efffffff1b
+ VGF2P8AFFINEINVQB $27, Y5, Y12, K3, Y0 // 62f39d2bcfc51b
+ VGF2P8AFFINEINVQB $27, Y28, Y12, K3, Y0 // 62939d2bcfc41b
+ VGF2P8AFFINEINVQB $27, Y7, Y12, K3, Y0 // 62f39d2bcfc71b
+ VGF2P8AFFINEINVQB $27, (BX), Y12, K3, Y0 // 62f39d2bcf031b
+ VGF2P8AFFINEINVQB $27, -17(BP)(SI*1), Y12, K3, Y0 // 62f39d2bcf8435efffffff1b
+ VGF2P8AFFINEINVQB $27, Y5, Y3, K3, Y0 // 62f3e52bcfc51b
+ VGF2P8AFFINEINVQB $27, Y28, Y3, K3, Y0 // 6293e52bcfc41b
+ VGF2P8AFFINEINVQB $27, Y7, Y3, K3, Y0 // 62f3e52bcfc71b
+ VGF2P8AFFINEINVQB $27, (BX), Y3, K3, Y0 // 62f3e52bcf031b
+ VGF2P8AFFINEINVQB $27, -17(BP)(SI*1), Y3, K3, Y0 // 62f3e52bcf8435efffffff1b
+ VGF2P8AFFINEINVQB $27, Y5, Y20, K3, Y3 // 62f3dd23cfdd1b
+ VGF2P8AFFINEINVQB $27, Y28, Y20, K3, Y3 // 6293dd23cfdc1b
+ VGF2P8AFFINEINVQB $27, Y7, Y20, K3, Y3 // 62f3dd23cfdf1b
+ VGF2P8AFFINEINVQB $27, (BX), Y20, K3, Y3 // 62f3dd23cf1b1b
+ VGF2P8AFFINEINVQB $27, -17(BP)(SI*1), Y20, K3, Y3 // 62f3dd23cf9c35efffffff1b
+ VGF2P8AFFINEINVQB $27, Y5, Y12, K3, Y3 // 62f39d2bcfdd1b
+ VGF2P8AFFINEINVQB $27, Y28, Y12, K3, Y3 // 62939d2bcfdc1b
+ VGF2P8AFFINEINVQB $27, Y7, Y12, K3, Y3 // 62f39d2bcfdf1b
+ VGF2P8AFFINEINVQB $27, (BX), Y12, K3, Y3 // 62f39d2bcf1b1b
+ VGF2P8AFFINEINVQB $27, -17(BP)(SI*1), Y12, K3, Y3 // 62f39d2bcf9c35efffffff1b
+ VGF2P8AFFINEINVQB $27, Y5, Y3, K3, Y3 // 62f3e52bcfdd1b
+ VGF2P8AFFINEINVQB $27, Y28, Y3, K3, Y3 // 6293e52bcfdc1b
+ VGF2P8AFFINEINVQB $27, Y7, Y3, K3, Y3 // 62f3e52bcfdf1b
+ VGF2P8AFFINEINVQB $27, (BX), Y3, K3, Y3 // 62f3e52bcf1b1b
+ VGF2P8AFFINEINVQB $27, -17(BP)(SI*1), Y3, K3, Y3 // 62f3e52bcf9c35efffffff1b
+ VGF2P8AFFINEINVQB $27, Y5, Y20, K3, Y5 // 62f3dd23cfed1b
+ VGF2P8AFFINEINVQB $27, Y28, Y20, K3, Y5 // 6293dd23cfec1b
+ VGF2P8AFFINEINVQB $27, Y7, Y20, K3, Y5 // 62f3dd23cfef1b
+ VGF2P8AFFINEINVQB $27, (BX), Y20, K3, Y5 // 62f3dd23cf2b1b
+ VGF2P8AFFINEINVQB $27, -17(BP)(SI*1), Y20, K3, Y5 // 62f3dd23cfac35efffffff1b
+ VGF2P8AFFINEINVQB $27, Y5, Y12, K3, Y5 // 62f39d2bcfed1b
+ VGF2P8AFFINEINVQB $27, Y28, Y12, K3, Y5 // 62939d2bcfec1b
+ VGF2P8AFFINEINVQB $27, Y7, Y12, K3, Y5 // 62f39d2bcfef1b
+ VGF2P8AFFINEINVQB $27, (BX), Y12, K3, Y5 // 62f39d2bcf2b1b
+ VGF2P8AFFINEINVQB $27, -17(BP)(SI*1), Y12, K3, Y5 // 62f39d2bcfac35efffffff1b
+ VGF2P8AFFINEINVQB $27, Y5, Y3, K3, Y5 // 62f3e52bcfed1b
+ VGF2P8AFFINEINVQB $27, Y28, Y3, K3, Y5 // 6293e52bcfec1b
+ VGF2P8AFFINEINVQB $27, Y7, Y3, K3, Y5 // 62f3e52bcfef1b
+ VGF2P8AFFINEINVQB $27, (BX), Y3, K3, Y5 // 62f3e52bcf2b1b
+ VGF2P8AFFINEINVQB $27, -17(BP)(SI*1), Y3, K3, Y5 // 62f3e52bcfac35efffffff1b
+ VGF2P8AFFINEINVQB $47, Z3, Z14, K2, Z28 // 62638d4acfe32f
+ VGF2P8AFFINEINVQB $47, Z12, Z14, K2, Z28 // 62438d4acfe42f
+ VGF2P8AFFINEINVQB $47, 99(R15)(R15*1), Z14, K2, Z28 // 62038d4acfa43f630000002f
+ VGF2P8AFFINEINVQB $47, (DX), Z14, K2, Z28 // 62638d4acf222f
+ VGF2P8AFFINEINVQB $47, Z3, Z28, K2, Z28 // 62639d42cfe32f
+ VGF2P8AFFINEINVQB $47, Z12, Z28, K2, Z28 // 62439d42cfe42f
+ VGF2P8AFFINEINVQB $47, 99(R15)(R15*1), Z28, K2, Z28 // 62039d42cfa43f630000002f
+ VGF2P8AFFINEINVQB $47, (DX), Z28, K2, Z28 // 62639d42cf222f
+ VGF2P8AFFINEINVQB $47, Z3, Z14, K2, Z13 // 62738d4acfeb2f
+ VGF2P8AFFINEINVQB $47, Z12, Z14, K2, Z13 // 62538d4acfec2f
+ VGF2P8AFFINEINVQB $47, 99(R15)(R15*1), Z14, K2, Z13 // 62138d4acfac3f630000002f
+ VGF2P8AFFINEINVQB $47, (DX), Z14, K2, Z13 // 62738d4acf2a2f
+ VGF2P8AFFINEINVQB $47, Z3, Z28, K2, Z13 // 62739d42cfeb2f
+ VGF2P8AFFINEINVQB $47, Z12, Z28, K2, Z13 // 62539d42cfec2f
+ VGF2P8AFFINEINVQB $47, 99(R15)(R15*1), Z28, K2, Z13 // 62139d42cfac3f630000002f
+ VGF2P8AFFINEINVQB $47, (DX), Z28, K2, Z13 // 62739d42cf2a2f
+ VGF2P8AFFINEQB $82, X22, X21, K1, X15 // 6233d501cefe52
+ VGF2P8AFFINEQB $82, X7, X21, K1, X15 // 6273d501ceff52
+ VGF2P8AFFINEQB $82, X19, X21, K1, X15 // 6233d501cefb52
+ VGF2P8AFFINEQB $82, 17(SP)(BP*2), X21, K1, X15 // 6273d501cebc6c1100000052
+ VGF2P8AFFINEQB $82, -7(DI)(R8*4), X21, K1, X15 // 6233d501cebc87f9ffffff52
+ VGF2P8AFFINEQB $82, X22, X0, K1, X15 // 6233fd09cefe52
+ VGF2P8AFFINEQB $82, X7, X0, K1, X15 // 6273fd09ceff52
+ VGF2P8AFFINEQB $82, X19, X0, K1, X15 // 6233fd09cefb52
+ VGF2P8AFFINEQB $82, 17(SP)(BP*2), X0, K1, X15 // 6273fd09cebc6c1100000052
+ VGF2P8AFFINEQB $82, -7(DI)(R8*4), X0, K1, X15 // 6233fd09cebc87f9ffffff52
+ VGF2P8AFFINEQB $82, X22, X28, K1, X15 // 62339d01cefe52
+ VGF2P8AFFINEQB $82, X7, X28, K1, X15 // 62739d01ceff52
+ VGF2P8AFFINEQB $82, X19, X28, K1, X15 // 62339d01cefb52
+ VGF2P8AFFINEQB $82, 17(SP)(BP*2), X28, K1, X15 // 62739d01cebc6c1100000052
+ VGF2P8AFFINEQB $82, -7(DI)(R8*4), X28, K1, X15 // 62339d01cebc87f9ffffff52
+ VGF2P8AFFINEQB $82, X22, X21, K1, X0 // 62b3d501cec652
+ VGF2P8AFFINEQB $82, X7, X21, K1, X0 // 62f3d501cec752
+ VGF2P8AFFINEQB $82, X19, X21, K1, X0 // 62b3d501cec352
+ VGF2P8AFFINEQB $82, 17(SP)(BP*2), X21, K1, X0 // 62f3d501ce846c1100000052
+ VGF2P8AFFINEQB $82, -7(DI)(R8*4), X21, K1, X0 // 62b3d501ce8487f9ffffff52
+ VGF2P8AFFINEQB $82, X22, X0, K1, X0 // 62b3fd09cec652
+ VGF2P8AFFINEQB $82, X7, X0, K1, X0 // 62f3fd09cec752
+ VGF2P8AFFINEQB $82, X19, X0, K1, X0 // 62b3fd09cec352
+ VGF2P8AFFINEQB $82, 17(SP)(BP*2), X0, K1, X0 // 62f3fd09ce846c1100000052
+ VGF2P8AFFINEQB $82, -7(DI)(R8*4), X0, K1, X0 // 62b3fd09ce8487f9ffffff52
+ VGF2P8AFFINEQB $82, X22, X28, K1, X0 // 62b39d01cec652
+ VGF2P8AFFINEQB $82, X7, X28, K1, X0 // 62f39d01cec752
+ VGF2P8AFFINEQB $82, X19, X28, K1, X0 // 62b39d01cec352
+ VGF2P8AFFINEQB $82, 17(SP)(BP*2), X28, K1, X0 // 62f39d01ce846c1100000052
+ VGF2P8AFFINEQB $82, -7(DI)(R8*4), X28, K1, X0 // 62b39d01ce8487f9ffffff52
+ VGF2P8AFFINEQB $82, X22, X21, K1, X16 // 62a3d501cec652
+ VGF2P8AFFINEQB $82, X7, X21, K1, X16 // 62e3d501cec752
+ VGF2P8AFFINEQB $82, X19, X21, K1, X16 // 62a3d501cec352
+ VGF2P8AFFINEQB $82, 17(SP)(BP*2), X21, K1, X16 // 62e3d501ce846c1100000052
+ VGF2P8AFFINEQB $82, -7(DI)(R8*4), X21, K1, X16 // 62a3d501ce8487f9ffffff52
+ VGF2P8AFFINEQB $82, X22, X0, K1, X16 // 62a3fd09cec652
+ VGF2P8AFFINEQB $82, X7, X0, K1, X16 // 62e3fd09cec752
+ VGF2P8AFFINEQB $82, X19, X0, K1, X16 // 62a3fd09cec352
+ VGF2P8AFFINEQB $82, 17(SP)(BP*2), X0, K1, X16 // 62e3fd09ce846c1100000052
+ VGF2P8AFFINEQB $82, -7(DI)(R8*4), X0, K1, X16 // 62a3fd09ce8487f9ffffff52
+ VGF2P8AFFINEQB $82, X22, X28, K1, X16 // 62a39d01cec652
+ VGF2P8AFFINEQB $82, X7, X28, K1, X16 // 62e39d01cec752
+ VGF2P8AFFINEQB $82, X19, X28, K1, X16 // 62a39d01cec352
+ VGF2P8AFFINEQB $82, 17(SP)(BP*2), X28, K1, X16 // 62e39d01ce846c1100000052
+ VGF2P8AFFINEQB $82, -7(DI)(R8*4), X28, K1, X16 // 62a39d01ce8487f9ffffff52
+ VGF2P8AFFINEQB $126, Y17, Y12, K2, Y0 // 62b39d2acec17e
+ VGF2P8AFFINEQB $126, Y7, Y12, K2, Y0 // 62f39d2acec77e
+ VGF2P8AFFINEQB $126, Y9, Y12, K2, Y0 // 62d39d2acec17e
+ VGF2P8AFFINEQB $126, 15(R8)(R14*4), Y12, K2, Y0 // 62939d2ace84b00f0000007e
+ VGF2P8AFFINEQB $126, -7(CX)(DX*4), Y12, K2, Y0 // 62f39d2ace8491f9ffffff7e
+ VGF2P8AFFINEQB $126, Y17, Y1, K2, Y0 // 62b3f52acec17e
+ VGF2P8AFFINEQB $126, Y7, Y1, K2, Y0 // 62f3f52acec77e
+ VGF2P8AFFINEQB $126, Y9, Y1, K2, Y0 // 62d3f52acec17e
+ VGF2P8AFFINEQB $126, 15(R8)(R14*4), Y1, K2, Y0 // 6293f52ace84b00f0000007e
+ VGF2P8AFFINEQB $126, -7(CX)(DX*4), Y1, K2, Y0 // 62f3f52ace8491f9ffffff7e
+ VGF2P8AFFINEQB $126, Y17, Y14, K2, Y0 // 62b38d2acec17e
+ VGF2P8AFFINEQB $126, Y7, Y14, K2, Y0 // 62f38d2acec77e
+ VGF2P8AFFINEQB $126, Y9, Y14, K2, Y0 // 62d38d2acec17e
+ VGF2P8AFFINEQB $126, 15(R8)(R14*4), Y14, K2, Y0 // 62938d2ace84b00f0000007e
+ VGF2P8AFFINEQB $126, -7(CX)(DX*4), Y14, K2, Y0 // 62f38d2ace8491f9ffffff7e
+ VGF2P8AFFINEQB $126, Y17, Y12, K2, Y22 // 62a39d2acef17e
+ VGF2P8AFFINEQB $126, Y7, Y12, K2, Y22 // 62e39d2acef77e
+ VGF2P8AFFINEQB $126, Y9, Y12, K2, Y22 // 62c39d2acef17e
+ VGF2P8AFFINEQB $126, 15(R8)(R14*4), Y12, K2, Y22 // 62839d2aceb4b00f0000007e
+ VGF2P8AFFINEQB $126, -7(CX)(DX*4), Y12, K2, Y22 // 62e39d2aceb491f9ffffff7e
+ VGF2P8AFFINEQB $126, Y17, Y1, K2, Y22 // 62a3f52acef17e
+ VGF2P8AFFINEQB $126, Y7, Y1, K2, Y22 // 62e3f52acef77e
+ VGF2P8AFFINEQB $126, Y9, Y1, K2, Y22 // 62c3f52acef17e
+ VGF2P8AFFINEQB $126, 15(R8)(R14*4), Y1, K2, Y22 // 6283f52aceb4b00f0000007e
+ VGF2P8AFFINEQB $126, -7(CX)(DX*4), Y1, K2, Y22 // 62e3f52aceb491f9ffffff7e
+ VGF2P8AFFINEQB $126, Y17, Y14, K2, Y22 // 62a38d2acef17e
+ VGF2P8AFFINEQB $126, Y7, Y14, K2, Y22 // 62e38d2acef77e
+ VGF2P8AFFINEQB $126, Y9, Y14, K2, Y22 // 62c38d2acef17e
+ VGF2P8AFFINEQB $126, 15(R8)(R14*4), Y14, K2, Y22 // 62838d2aceb4b00f0000007e
+ VGF2P8AFFINEQB $126, -7(CX)(DX*4), Y14, K2, Y22 // 62e38d2aceb491f9ffffff7e
+ VGF2P8AFFINEQB $126, Y17, Y12, K2, Y13 // 62339d2acee97e
+ VGF2P8AFFINEQB $126, Y7, Y12, K2, Y13 // 62739d2aceef7e
+ VGF2P8AFFINEQB $126, Y9, Y12, K2, Y13 // 62539d2acee97e
+ VGF2P8AFFINEQB $126, 15(R8)(R14*4), Y12, K2, Y13 // 62139d2aceacb00f0000007e
+ VGF2P8AFFINEQB $126, -7(CX)(DX*4), Y12, K2, Y13 // 62739d2aceac91f9ffffff7e
+ VGF2P8AFFINEQB $126, Y17, Y1, K2, Y13 // 6233f52acee97e
+ VGF2P8AFFINEQB $126, Y7, Y1, K2, Y13 // 6273f52aceef7e
+ VGF2P8AFFINEQB $126, Y9, Y1, K2, Y13 // 6253f52acee97e
+ VGF2P8AFFINEQB $126, 15(R8)(R14*4), Y1, K2, Y13 // 6213f52aceacb00f0000007e
+ VGF2P8AFFINEQB $126, -7(CX)(DX*4), Y1, K2, Y13 // 6273f52aceac91f9ffffff7e
+ VGF2P8AFFINEQB $126, Y17, Y14, K2, Y13 // 62338d2acee97e
+ VGF2P8AFFINEQB $126, Y7, Y14, K2, Y13 // 62738d2aceef7e
+ VGF2P8AFFINEQB $126, Y9, Y14, K2, Y13 // 62538d2acee97e
+ VGF2P8AFFINEQB $126, 15(R8)(R14*4), Y14, K2, Y13 // 62138d2aceacb00f0000007e
+ VGF2P8AFFINEQB $126, -7(CX)(DX*4), Y14, K2, Y13 // 62738d2aceac91f9ffffff7e
+ VGF2P8AFFINEQB $94, Z5, Z19, K1, Z15 // 6273e541cefd5e
+ VGF2P8AFFINEQB $94, Z1, Z19, K1, Z15 // 6273e541cef95e
+ VGF2P8AFFINEQB $94, -17(BP)(SI*8), Z19, K1, Z15 // 6273e541cebcf5efffffff5e
+ VGF2P8AFFINEQB $94, (R15), Z19, K1, Z15 // 6253e541ce3f5e
+ VGF2P8AFFINEQB $94, Z5, Z15, K1, Z15 // 62738549cefd5e
+ VGF2P8AFFINEQB $94, Z1, Z15, K1, Z15 // 62738549cef95e
+ VGF2P8AFFINEQB $94, -17(BP)(SI*8), Z15, K1, Z15 // 62738549cebcf5efffffff5e
+ VGF2P8AFFINEQB $94, (R15), Z15, K1, Z15 // 62538549ce3f5e
+ VGF2P8AFFINEQB $94, Z5, Z19, K1, Z30 // 6263e541cef55e
+ VGF2P8AFFINEQB $94, Z1, Z19, K1, Z30 // 6263e541cef15e
+ VGF2P8AFFINEQB $94, -17(BP)(SI*8), Z19, K1, Z30 // 6263e541ceb4f5efffffff5e
+ VGF2P8AFFINEQB $94, (R15), Z19, K1, Z30 // 6243e541ce375e
+ VGF2P8AFFINEQB $94, Z5, Z15, K1, Z30 // 62638549cef55e
+ VGF2P8AFFINEQB $94, Z1, Z15, K1, Z30 // 62638549cef15e
+ VGF2P8AFFINEQB $94, -17(BP)(SI*8), Z15, K1, Z30 // 62638549ceb4f5efffffff5e
+ VGF2P8AFFINEQB $94, (R15), Z15, K1, Z30 // 62438549ce375e
+ VGF2P8MULB X15, X1, K7, X7 // 62d2750fcfff
+ VGF2P8MULB X12, X1, K7, X7 // 62d2750fcffc
+ VGF2P8MULB X0, X1, K7, X7 // 62f2750fcff8
+ VGF2P8MULB 15(R8), X1, K7, X7 // 62d2750fcfb80f000000
+ VGF2P8MULB (BP), X1, K7, X7 // 62f2750fcf7d00
+ VGF2P8MULB X15, X7, K7, X7 // 62d2450fcfff
+ VGF2P8MULB X12, X7, K7, X7 // 62d2450fcffc
+ VGF2P8MULB X0, X7, K7, X7 // 62f2450fcff8
+ VGF2P8MULB 15(R8), X7, K7, X7 // 62d2450fcfb80f000000
+ VGF2P8MULB (BP), X7, K7, X7 // 62f2450fcf7d00
+ VGF2P8MULB X15, X9, K7, X7 // 62d2350fcfff
+ VGF2P8MULB X12, X9, K7, X7 // 62d2350fcffc
+ VGF2P8MULB X0, X9, K7, X7 // 62f2350fcff8
+ VGF2P8MULB 15(R8), X9, K7, X7 // 62d2350fcfb80f000000
+ VGF2P8MULB (BP), X9, K7, X7 // 62f2350fcf7d00
+ VGF2P8MULB X15, X1, K7, X16 // 62c2750fcfc7
+ VGF2P8MULB X12, X1, K7, X16 // 62c2750fcfc4
+ VGF2P8MULB X0, X1, K7, X16 // 62e2750fcfc0
+ VGF2P8MULB 15(R8), X1, K7, X16 // 62c2750fcf800f000000
+ VGF2P8MULB (BP), X1, K7, X16 // 62e2750fcf4500
+ VGF2P8MULB X15, X7, K7, X16 // 62c2450fcfc7
+ VGF2P8MULB X12, X7, K7, X16 // 62c2450fcfc4
+ VGF2P8MULB X0, X7, K7, X16 // 62e2450fcfc0
+ VGF2P8MULB 15(R8), X7, K7, X16 // 62c2450fcf800f000000
+ VGF2P8MULB (BP), X7, K7, X16 // 62e2450fcf4500
+ VGF2P8MULB X15, X9, K7, X16 // 62c2350fcfc7
+ VGF2P8MULB X12, X9, K7, X16 // 62c2350fcfc4
+ VGF2P8MULB X0, X9, K7, X16 // 62e2350fcfc0
+ VGF2P8MULB 15(R8), X9, K7, X16 // 62c2350fcf800f000000
+ VGF2P8MULB (BP), X9, K7, X16 // 62e2350fcf4500
+ VGF2P8MULB X15, X1, K7, X31 // 6242750fcfff
+ VGF2P8MULB X12, X1, K7, X31 // 6242750fcffc
+ VGF2P8MULB X0, X1, K7, X31 // 6262750fcff8
+ VGF2P8MULB 15(R8), X1, K7, X31 // 6242750fcfb80f000000
+ VGF2P8MULB (BP), X1, K7, X31 // 6262750fcf7d00
+ VGF2P8MULB X15, X7, K7, X31 // 6242450fcfff
+ VGF2P8MULB X12, X7, K7, X31 // 6242450fcffc
+ VGF2P8MULB X0, X7, K7, X31 // 6262450fcff8
+ VGF2P8MULB 15(R8), X7, K7, X31 // 6242450fcfb80f000000
+ VGF2P8MULB (BP), X7, K7, X31 // 6262450fcf7d00
+ VGF2P8MULB X15, X9, K7, X31 // 6242350fcfff
+ VGF2P8MULB X12, X9, K7, X31 // 6242350fcffc
+ VGF2P8MULB X0, X9, K7, X31 // 6262350fcff8
+ VGF2P8MULB 15(R8), X9, K7, X31 // 6242350fcfb80f000000
+ VGF2P8MULB (BP), X9, K7, X31 // 6262350fcf7d00
+ VGF2P8MULB Y2, Y28, K1, Y31 // 62621d21cffa
+ VGF2P8MULB Y21, Y28, K1, Y31 // 62221d21cffd
+ VGF2P8MULB Y12, Y28, K1, Y31 // 62421d21cffc
+ VGF2P8MULB (R8), Y28, K1, Y31 // 62421d21cf38
+ VGF2P8MULB 15(DX)(BX*2), Y28, K1, Y31 // 62621d21cfbc5a0f000000
+ VGF2P8MULB Y2, Y13, K1, Y31 // 62621529cffa
+ VGF2P8MULB Y21, Y13, K1, Y31 // 62221529cffd
+ VGF2P8MULB Y12, Y13, K1, Y31 // 62421529cffc
+ VGF2P8MULB (R8), Y13, K1, Y31 // 62421529cf38
+ VGF2P8MULB 15(DX)(BX*2), Y13, K1, Y31 // 62621529cfbc5a0f000000
+ VGF2P8MULB Y2, Y7, K1, Y31 // 62624529cffa
+ VGF2P8MULB Y21, Y7, K1, Y31 // 62224529cffd
+ VGF2P8MULB Y12, Y7, K1, Y31 // 62424529cffc
+ VGF2P8MULB (R8), Y7, K1, Y31 // 62424529cf38
+ VGF2P8MULB 15(DX)(BX*2), Y7, K1, Y31 // 62624529cfbc5a0f000000
+ VGF2P8MULB Y2, Y28, K1, Y8 // 62721d21cfc2
+ VGF2P8MULB Y21, Y28, K1, Y8 // 62321d21cfc5
+ VGF2P8MULB Y12, Y28, K1, Y8 // 62521d21cfc4
+ VGF2P8MULB (R8), Y28, K1, Y8 // 62521d21cf00
+ VGF2P8MULB 15(DX)(BX*2), Y28, K1, Y8 // 62721d21cf845a0f000000
+ VGF2P8MULB Y2, Y13, K1, Y8 // 62721529cfc2
+ VGF2P8MULB Y21, Y13, K1, Y8 // 62321529cfc5
+ VGF2P8MULB Y12, Y13, K1, Y8 // 62521529cfc4
+ VGF2P8MULB (R8), Y13, K1, Y8 // 62521529cf00
+ VGF2P8MULB 15(DX)(BX*2), Y13, K1, Y8 // 62721529cf845a0f000000
+ VGF2P8MULB Y2, Y7, K1, Y8 // 62724529cfc2
+ VGF2P8MULB Y21, Y7, K1, Y8 // 62324529cfc5
+ VGF2P8MULB Y12, Y7, K1, Y8 // 62524529cfc4
+ VGF2P8MULB (R8), Y7, K1, Y8 // 62524529cf00
+ VGF2P8MULB 15(DX)(BX*2), Y7, K1, Y8 // 62724529cf845a0f000000
+ VGF2P8MULB Y2, Y28, K1, Y1 // 62f21d21cfca
+ VGF2P8MULB Y21, Y28, K1, Y1 // 62b21d21cfcd
+ VGF2P8MULB Y12, Y28, K1, Y1 // 62d21d21cfcc
+ VGF2P8MULB (R8), Y28, K1, Y1 // 62d21d21cf08
+ VGF2P8MULB 15(DX)(BX*2), Y28, K1, Y1 // 62f21d21cf8c5a0f000000
+ VGF2P8MULB Y2, Y13, K1, Y1 // 62f21529cfca
+ VGF2P8MULB Y21, Y13, K1, Y1 // 62b21529cfcd
+ VGF2P8MULB Y12, Y13, K1, Y1 // 62d21529cfcc
+ VGF2P8MULB (R8), Y13, K1, Y1 // 62d21529cf08
+ VGF2P8MULB 15(DX)(BX*2), Y13, K1, Y1 // 62f21529cf8c5a0f000000
+ VGF2P8MULB Y2, Y7, K1, Y1 // 62f24529cfca
+ VGF2P8MULB Y21, Y7, K1, Y1 // 62b24529cfcd
+ VGF2P8MULB Y12, Y7, K1, Y1 // 62d24529cfcc
+ VGF2P8MULB (R8), Y7, K1, Y1 // 62d24529cf08
+ VGF2P8MULB 15(DX)(BX*2), Y7, K1, Y1 // 62f24529cf8c5a0f000000
+ VGF2P8MULB Z21, Z14, K1, Z3 // 62b20d49cfdd
+ VGF2P8MULB Z8, Z14, K1, Z3 // 62d20d49cfd8
+ VGF2P8MULB 7(SI)(DI*8), Z14, K1, Z3 // 62f20d49cf9cfe07000000
+ VGF2P8MULB -15(R14), Z14, K1, Z3 // 62d20d49cf9ef1ffffff
+ VGF2P8MULB Z21, Z15, K1, Z3 // 62b20549cfdd
+ VGF2P8MULB Z8, Z15, K1, Z3 // 62d20549cfd8
+ VGF2P8MULB 7(SI)(DI*8), Z15, K1, Z3 // 62f20549cf9cfe07000000
+ VGF2P8MULB -15(R14), Z15, K1, Z3 // 62d20549cf9ef1ffffff
+ VGF2P8MULB Z21, Z14, K1, Z5 // 62b20d49cfed
+ VGF2P8MULB Z8, Z14, K1, Z5 // 62d20d49cfe8
+ VGF2P8MULB 7(SI)(DI*8), Z14, K1, Z5 // 62f20d49cfacfe07000000
+ VGF2P8MULB -15(R14), Z14, K1, Z5 // 62d20d49cfaef1ffffff
+ VGF2P8MULB Z21, Z15, K1, Z5 // 62b20549cfed
+ VGF2P8MULB Z8, Z15, K1, Z5 // 62d20549cfe8
+ VGF2P8MULB 7(SI)(DI*8), Z15, K1, Z5 // 62f20549cfacfe07000000
+ VGF2P8MULB -15(R14), Z15, K1, Z5 // 62d20549cfaef1ffffff
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/avx512enc/vpclmulqdq_avx512f.s b/src/cmd/asm/internal/asm/testdata/avx512enc/vpclmulqdq_avx512f.s
new file mode 100644
index 0000000..86579d6
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/avx512enc/vpclmulqdq_avx512f.s
@@ -0,0 +1,94 @@
+// Code generated by avx512test. DO NOT EDIT.
+
+#include "../../../../../../runtime/textflag.h"
+
+TEXT asmtest_vpclmulqdq_avx512f(SB), NOSPLIT, $0
+ VPCLMULQDQ $127, X22, X21, X15 // 6233550044fe7f or 6233d50044fe7f
+ VPCLMULQDQ $127, X7, X21, X15 // 6273550044ff7f or 6273d50044ff7f
+ VPCLMULQDQ $127, X19, X21, X15 // 6233550044fb7f or 6233d50044fb7f
+ VPCLMULQDQ $127, -17(BP)(SI*8), X21, X15 // 6273550044bcf5efffffff7f or 6273d50044bcf5efffffff7f
+ VPCLMULQDQ $127, (R15), X21, X15 // 62535500443f7f or 6253d500443f7f
+ VPCLMULQDQ $127, X22, X0, X15 // 62337d0844fe7f or 6233fd0844fe7f
+ VPCLMULQDQ $127, X19, X0, X15 // 62337d0844fb7f or 6233fd0844fb7f
+ VPCLMULQDQ $127, X22, X28, X15 // 62331d0044fe7f or 62339d0044fe7f
+ VPCLMULQDQ $127, X7, X28, X15 // 62731d0044ff7f or 62739d0044ff7f
+ VPCLMULQDQ $127, X19, X28, X15 // 62331d0044fb7f or 62339d0044fb7f
+ VPCLMULQDQ $127, -17(BP)(SI*8), X28, X15 // 62731d0044bcf5efffffff7f or 62739d0044bcf5efffffff7f
+ VPCLMULQDQ $127, (R15), X28, X15 // 62531d00443f7f or 62539d00443f7f
+ VPCLMULQDQ $127, X22, X21, X0 // 62b3550044c67f or 62b3d50044c67f
+ VPCLMULQDQ $127, X7, X21, X0 // 62f3550044c77f or 62f3d50044c77f
+ VPCLMULQDQ $127, X19, X21, X0 // 62b3550044c37f or 62b3d50044c37f
+ VPCLMULQDQ $127, -17(BP)(SI*8), X21, X0 // 62f355004484f5efffffff7f or 62f3d5004484f5efffffff7f
+ VPCLMULQDQ $127, (R15), X21, X0 // 62d3550044077f or 62d3d50044077f
+ VPCLMULQDQ $127, X22, X0, X0 // 62b37d0844c67f or 62b3fd0844c67f
+ VPCLMULQDQ $127, X19, X0, X0 // 62b37d0844c37f or 62b3fd0844c37f
+ VPCLMULQDQ $127, X22, X28, X0 // 62b31d0044c67f or 62b39d0044c67f
+ VPCLMULQDQ $127, X7, X28, X0 // 62f31d0044c77f or 62f39d0044c77f
+ VPCLMULQDQ $127, X19, X28, X0 // 62b31d0044c37f or 62b39d0044c37f
+ VPCLMULQDQ $127, -17(BP)(SI*8), X28, X0 // 62f31d004484f5efffffff7f or 62f39d004484f5efffffff7f
+ VPCLMULQDQ $127, (R15), X28, X0 // 62d31d0044077f or 62d39d0044077f
+ VPCLMULQDQ $127, X22, X21, X16 // 62a3550044c67f or 62a3d50044c67f
+ VPCLMULQDQ $127, X7, X21, X16 // 62e3550044c77f or 62e3d50044c77f
+ VPCLMULQDQ $127, X19, X21, X16 // 62a3550044c37f or 62a3d50044c37f
+ VPCLMULQDQ $127, -17(BP)(SI*8), X21, X16 // 62e355004484f5efffffff7f or 62e3d5004484f5efffffff7f
+ VPCLMULQDQ $127, (R15), X21, X16 // 62c3550044077f or 62c3d50044077f
+ VPCLMULQDQ $127, X22, X0, X16 // 62a37d0844c67f or 62a3fd0844c67f
+ VPCLMULQDQ $127, X7, X0, X16 // 62e37d0844c77f or 62e3fd0844c77f
+ VPCLMULQDQ $127, X19, X0, X16 // 62a37d0844c37f or 62a3fd0844c37f
+ VPCLMULQDQ $127, -17(BP)(SI*8), X0, X16 // 62e37d084484f5efffffff7f or 62e3fd084484f5efffffff7f
+ VPCLMULQDQ $127, (R15), X0, X16 // 62c37d0844077f or 62c3fd0844077f
+ VPCLMULQDQ $127, X22, X28, X16 // 62a31d0044c67f or 62a39d0044c67f
+ VPCLMULQDQ $127, X7, X28, X16 // 62e31d0044c77f or 62e39d0044c77f
+ VPCLMULQDQ $127, X19, X28, X16 // 62a31d0044c37f or 62a39d0044c37f
+ VPCLMULQDQ $127, -17(BP)(SI*8), X28, X16 // 62e31d004484f5efffffff7f or 62e39d004484f5efffffff7f
+ VPCLMULQDQ $127, (R15), X28, X16 // 62c31d0044077f or 62c39d0044077f
+ VPCLMULQDQ $0, Y15, Y2, Y31 // 62436d2844ff00 or 6243ed2844ff00
+ VPCLMULQDQ $0, Y22, Y2, Y31 // 62236d2844fe00 or 6223ed2844fe00
+ VPCLMULQDQ $0, Y20, Y2, Y31 // 62236d2844fc00 or 6223ed2844fc00
+ VPCLMULQDQ $0, 99(R15)(R15*4), Y2, Y31 // 62036d2844bcbf6300000000 or 6203ed2844bcbf6300000000
+ VPCLMULQDQ $0, 15(DX), Y2, Y31 // 62636d2844ba0f00000000 or 6263ed2844ba0f00000000
+ VPCLMULQDQ $0, Y15, Y13, Y31 // 6243152844ff00 or 6243952844ff00
+ VPCLMULQDQ $0, Y22, Y13, Y31 // 6223152844fe00 or 6223952844fe00
+ VPCLMULQDQ $0, Y20, Y13, Y31 // 6223152844fc00 or 6223952844fc00
+ VPCLMULQDQ $0, 99(R15)(R15*4), Y13, Y31 // 6203152844bcbf6300000000 or 6203952844bcbf6300000000
+ VPCLMULQDQ $0, 15(DX), Y13, Y31 // 6263152844ba0f00000000 or 6263952844ba0f00000000
+ VPCLMULQDQ $0, Y15, Y27, Y31 // 6243252044ff00 or 6243a52044ff00
+ VPCLMULQDQ $0, Y22, Y27, Y31 // 6223252044fe00 or 6223a52044fe00
+ VPCLMULQDQ $0, Y20, Y27, Y31 // 6223252044fc00 or 6223a52044fc00
+ VPCLMULQDQ $0, 99(R15)(R15*4), Y27, Y31 // 6203252044bcbf6300000000 or 6203a52044bcbf6300000000
+ VPCLMULQDQ $0, 15(DX), Y27, Y31 // 6263252044ba0f00000000 or 6263a52044ba0f00000000
+ VPCLMULQDQ $0, Y22, Y2, Y3 // 62b36d2844de00 or 62b3ed2844de00
+ VPCLMULQDQ $0, Y20, Y2, Y3 // 62b36d2844dc00 or 62b3ed2844dc00
+ VPCLMULQDQ $0, Y22, Y13, Y3 // 62b3152844de00 or 62b3952844de00
+ VPCLMULQDQ $0, Y20, Y13, Y3 // 62b3152844dc00 or 62b3952844dc00
+ VPCLMULQDQ $0, Y15, Y27, Y3 // 62d3252044df00 or 62d3a52044df00
+ VPCLMULQDQ $0, Y22, Y27, Y3 // 62b3252044de00 or 62b3a52044de00
+ VPCLMULQDQ $0, Y20, Y27, Y3 // 62b3252044dc00 or 62b3a52044dc00
+ VPCLMULQDQ $0, 99(R15)(R15*4), Y27, Y3 // 62932520449cbf6300000000 or 6293a520449cbf6300000000
+ VPCLMULQDQ $0, 15(DX), Y27, Y3 // 62f32520449a0f00000000 or 62f3a520449a0f00000000
+ VPCLMULQDQ $0, Y22, Y2, Y14 // 62336d2844f600 or 6233ed2844f600
+ VPCLMULQDQ $0, Y20, Y2, Y14 // 62336d2844f400 or 6233ed2844f400
+ VPCLMULQDQ $0, Y22, Y13, Y14 // 6233152844f600 or 6233952844f600
+ VPCLMULQDQ $0, Y20, Y13, Y14 // 6233152844f400 or 6233952844f400
+ VPCLMULQDQ $0, Y15, Y27, Y14 // 6253252044f700 or 6253a52044f700
+ VPCLMULQDQ $0, Y22, Y27, Y14 // 6233252044f600 or 6233a52044f600
+ VPCLMULQDQ $0, Y20, Y27, Y14 // 6233252044f400 or 6233a52044f400
+ VPCLMULQDQ $0, 99(R15)(R15*4), Y27, Y14 // 6213252044b4bf6300000000 or 6213a52044b4bf6300000000
+ VPCLMULQDQ $0, 15(DX), Y27, Y14 // 6273252044b20f00000000 or 6273a52044b20f00000000
+ VPCLMULQDQ $97, Z9, Z0, Z24 // 62437d4844c161 or 6243fd4844c161
+ VPCLMULQDQ $97, Z3, Z0, Z24 // 62637d4844c361 or 6263fd4844c361
+ VPCLMULQDQ $97, 7(SI)(DI*1), Z0, Z24 // 62637d4844843e0700000061 or 6263fd4844843e0700000061
+ VPCLMULQDQ $97, 15(DX)(BX*8), Z0, Z24 // 62637d484484da0f00000061 or 6263fd484484da0f00000061
+ VPCLMULQDQ $97, Z9, Z26, Z24 // 62432d4044c161 or 6243ad4044c161
+ VPCLMULQDQ $97, Z3, Z26, Z24 // 62632d4044c361 or 6263ad4044c361
+ VPCLMULQDQ $97, 7(SI)(DI*1), Z26, Z24 // 62632d4044843e0700000061 or 6263ad4044843e0700000061
+ VPCLMULQDQ $97, 15(DX)(BX*8), Z26, Z24 // 62632d404484da0f00000061 or 6263ad404484da0f00000061
+ VPCLMULQDQ $97, Z9, Z0, Z12 // 62537d4844e161 or 6253fd4844e161
+ VPCLMULQDQ $97, Z3, Z0, Z12 // 62737d4844e361 or 6273fd4844e361
+ VPCLMULQDQ $97, 7(SI)(DI*1), Z0, Z12 // 62737d4844a43e0700000061 or 6273fd4844a43e0700000061
+ VPCLMULQDQ $97, 15(DX)(BX*8), Z0, Z12 // 62737d4844a4da0f00000061 or 6273fd4844a4da0f00000061
+ VPCLMULQDQ $97, Z9, Z26, Z12 // 62532d4044e161 or 6253ad4044e161
+ VPCLMULQDQ $97, Z3, Z26, Z12 // 62732d4044e361 or 6273ad4044e361
+ VPCLMULQDQ $97, 7(SI)(DI*1), Z26, Z12 // 62732d4044a43e0700000061 or 6273ad4044a43e0700000061
+ VPCLMULQDQ $97, 15(DX)(BX*8), Z26, Z12 // 62732d4044a4da0f00000061 or 6273ad4044a4da0f00000061
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/buildtagerror.s b/src/cmd/asm/internal/asm/testdata/buildtagerror.s
new file mode 100644
index 0000000..5a2d65b
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/buildtagerror.s
@@ -0,0 +1,8 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define X 1
+
+//go:build x // ERROR "misplaced //go:build comment"
+
diff --git a/src/cmd/asm/internal/asm/testdata/mips.s b/src/cmd/asm/internal/asm/testdata/mips.s
new file mode 100644
index 0000000..7136d68
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/mips.s
@@ -0,0 +1,443 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This input was created by taking the mips64 testcase and modified
+// by hand.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT foo(SB),DUPOK|NOSPLIT,$0
+
+ //inst:
+ //
+ // load ints and bytes
+ //
+ // LMOVW rreg ',' rreg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVW R1, R2
+ MOVW LO, R1
+ MOVW HI, R1
+ MOVW R1, LO
+ MOVW R1, HI
+ MOVW R1, R2
+ MOVW LO, R1
+ MOVW HI, R1
+ MOVW R1, LO
+ MOVW R1, HI
+
+ // LMOVW addr ',' rreg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVW foo<>+3(SB), R2
+ MOVW 16(R1), R2
+ MOVW (R1), R2
+ MOVW foo<>+3(SB), R2
+ MOVW 16(R1), R2
+ MOVW (R1), R2
+ LL (R1), R2
+
+ // LMOVB rreg ',' rreg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVB R1, R2
+
+ // LMOVB addr ',' rreg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVB foo<>+3(SB), R2
+ MOVB 16(R1), R2
+ MOVB (R1), R2
+
+ //
+ // load floats
+ //
+ // LFMOV addr ',' freg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVF foo<>+3(SB), F2
+ MOVF 16(R1), F2
+ MOVF (R1), F2
+
+ // LFMOV fimm ',' freg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVF $0.1, F2 // MOVF $(0.10000000000000001), F2
+
+ // LFMOV freg ',' freg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVF F1, F2
+
+ // LFMOV freg ',' addr
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVF F2, foo<>+3(SB)
+ MOVF F2, 16(R1)
+ MOVF F2, (R1)
+
+ //
+ // store ints and bytes
+ //
+ // LMOVW rreg ',' addr
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVW R1, foo<>+3(SB)
+ MOVW R1, 16(R2)
+ MOVW R1, (R2)
+ MOVW R1, foo<>+3(SB)
+ MOVW R1, 16(R2)
+ MOVW R1, (R2)
+ SC R1, (R2)
+
+ // LMOVB rreg ',' addr
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVB R1, foo<>+3(SB)
+ MOVB R1, 16(R2)
+ MOVB R1, (R2)
+
+ //
+ // store floats
+ //
+ // LMOVW freg ',' addr
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVD F1, foo<>+3(SB)
+ MOVD F1, 16(R2)
+ MOVD F1, (R2)
+
+ //
+ // floating point status
+ //
+ // LMOVW fpscr ',' freg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVW FCR0, R1
+
+ // LMOVW freg ',' fpscr
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVW R1, FCR0
+
+ // LMOVW rreg ',' mreg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVW R1, M1
+ MOVW R1, M1
+
+ // LMOVW mreg ',' rreg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVW M1, R1
+ MOVW M1, R1
+
+
+ //
+ // integer operations
+ // logical instructions
+ // shift instructions
+ // unary instructions
+ //
+ // LADDW rreg ',' sreg ',' rreg
+ // {
+ // outcode(int($1), &$2, int($4), &$6);
+ // }
+ ADD R1, R2, R3
+
+ // LADDW imm ',' sreg ',' rreg
+ // {
+ // outcode(int($1), &$2, int($4), &$6);
+ // }
+ ADD $1, R2, R3
+
+ // LADDW rreg ',' rreg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ ADD R1, R2
+
+ // LADDW imm ',' rreg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ ADD $4, R1
+
+ // LMUL rreg ',' rreg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MUL R1, R2
+
+ // LSHW rreg ',' sreg ',' rreg
+ // {
+ // outcode(int($1), &$2, int($4), &$6);
+ // }
+ SLL R1, R2, R3
+
+ // LSHW rreg ',' rreg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ SLL R1, R2
+
+ // LSHW imm ',' sreg ',' rreg
+ // {
+ // outcode(int($1), &$2, int($4), &$6);
+ // }
+ SLL $4, R1, R2
+
+ // LSHW imm ',' rreg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ SLL $4, R1
+
+ //
+ // move immediate: macro for lui+or, addi, addis, and other combinations
+ //
+ // LMOVW imm ',' rreg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVW $1, R1
+ MOVW $1, R1
+
+ // LMOVW ximm ',' rreg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ MOVW $1, R1
+ MOVW $foo(SB), R1
+ MOVW $1, R1
+ MOVW $foo(SB), R1
+
+
+ //
+ // branch
+ //
+ // LBRA rel
+ // {
+ // outcode(int($1), &nullgen, 0, &$2);
+ // }
+ BEQ R1, 2(PC)
+label0:
+ JMP 1(PC)
+ BEQ R1, 2(PC)
+ JMP label0+0 // JMP 66
+ BEQ R1, 2(PC)
+ JAL 1(PC) // CALL 1(PC)
+ BEQ R1, 2(PC)
+ JAL label0+0 // CALL 66
+
+ // LBRA addr
+ // {
+ // outcode(int($1), &nullgen, 0, &$2);
+ // }
+ BEQ R1, 2(PC)
+ JMP 0(R1) // JMP (R1)
+ BEQ R1, 2(PC)
+ JMP foo+0(SB) // JMP foo(SB)
+ BEQ R1, 2(PC)
+ JAL 0(R1) // CALL (R1)
+ BEQ R1, 2(PC)
+ JAL foo+0(SB) // CALL foo(SB)
+
+//
+// BEQ/BNE
+//
+// LBRA rreg ',' rel
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+label1:
+ BEQ R1, 1(PC)
+ BEQ R1, label1 // BEQ R1, 81
+
+// LBRA rreg ',' sreg ',' rel
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+label2:
+ BEQ R1, R2, 1(PC)
+ BEQ R1, R2, label2 // BEQ R1, R2, 83
+
+//
+// other integer conditional branch
+//
+// LBRA rreg ',' rel
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+label3:
+ BLTZ R1, 1(PC)
+ BLTZ R1, label3 // BLTZ R1, 85
+
+//
+// floating point conditional branch
+//
+// LBRA rel
+label4:
+ BFPT 1(PC)
+ BFPT label4 // BFPT 87
+
+
+ //
+ // floating point operate
+ //
+ // LFCONV freg ',' freg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ ABSD F1, F2
+
+ // LFADD freg ',' freg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ ADDD F1, F2
+
+ // LFADD freg ',' freg ',' freg
+ // {
+ // outcode(int($1), &$2, int($4.Reg), &$6);
+ // }
+ ADDD F1, F2, F3
+
+ // LFCMP freg ',' freg
+ // {
+ // outcode(int($1), &$2, 0, &$4);
+ // }
+ CMPEQD F1, F2
+
+
+ //
+ // WORD
+ //
+ WORD $1
+
+ //
+ // NOP
+ //
+ // LNOP comma // asm doesn't support the trailing comma.
+ // {
+ // outcode(int($1), &nullgen, 0, &nullgen);
+ // }
+ NOP
+
+ // LNOP rreg comma // asm doesn't support the trailing comma.
+ // {
+ // outcode(int($1), &$2, 0, &nullgen);
+ // }
+ NOP R2
+
+ // LNOP freg comma // asm doesn't support the trailing comma.
+ // {
+ // outcode(int($1), &$2, 0, &nullgen);
+ // }
+ NOP F2
+
+ // LNOP ',' rreg // asm doesn't support the leading comma.
+ // {
+ // outcode(int($1), &nullgen, 0, &$3);
+ // }
+ NOP R2
+
+ // LNOP ',' freg // asm doesn't support the leading comma.
+ // {
+ // outcode(int($1), &nullgen, 0, &$3);
+ // }
+ NOP F2
+
+ // LNOP imm
+ // {
+ // outcode(int($1), &$2, 0, &nullgen);
+ // }
+ NOP $4
+
+ //
+ // special
+ //
+ SYSCALL
+ BREAK
+ SYNC
+
+ //
+ // conditional move on zero/nonzero gp value
+ //
+ CMOVN R1, R2, R3
+ CMOVZ R1, R2, R3
+
+ //
+ // conditional move on fp false/true
+ //
+ CMOVF R1, R2
+ CMOVT R1, R2
+
+ //
+ // conditional traps
+ //
+ TEQ $1, R1, R2
+ TEQ $1, R1
+
+
+ //
+ // other
+ //
+ CLO R1, R2
+ SQRTD F0, F1
+ MUL R1, R2, R3
+
+
+ //
+ // RET
+ //
+ // LRETRN comma // asm doesn't support the trailing comma.
+ // {
+ // outcode(int($1), &nullgen, 0, &nullgen);
+ // }
+ SYSCALL
+ BEQ R1, 2(PC)
+ RET
+
+
+ // More JMP/JAL cases, and canonical names JMP, CALL.
+
+ JAL foo(SB) // CALL foo(SB)
+ BEQ R1, 2(PC)
+ JMP foo(SB)
+ CALL foo(SB)
+ RET foo(SB)
+
+ // unary operation
+ NEGW R1, R2 // 00011023
+ CLZ R1, R2 // 70221020
+ CLO R1, R2 // 70221021
+
+ // to (Hi, Lo)
+ MADD R2, R1 // 70220000
+ MSUB R2, R1 // 70220004
+ MUL R2, R1 // 00220018
+
+ // END
+ //
+ // LEND comma // asm doesn't support the trailing comma.
+ // {
+ // outcode(int($1), &nullgen, 0, &nullgen);
+ // }
+ END
diff --git a/src/cmd/asm/internal/asm/testdata/mips64.s b/src/cmd/asm/internal/asm/testdata/mips64.s
new file mode 100644
index 0000000..99044d8
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/mips64.s
@@ -0,0 +1,633 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This input was created by taking the ppc64 testcase and modified
+// by hand.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT foo(SB),DUPOK|NOSPLIT,$0
+//
+// branch
+//
+// LBRA rel
+// {
+// outcode(int($1), &nullgen, 0, &$2);
+// }
+ BEQ R1, 2(PC)
+label0:
+ JMP 1(PC) // JMP 1(PC) // 10000001
+ BEQ R1, 2(PC)
+ JMP label0+0 // JMP 3 // 1000fffd
+ BEQ R1, 2(PC)
+ JAL 1(PC) // CALL 1(PC) // 0c00000e
+ BEQ R1, 2(PC)
+ JAL label0+0 // CALL 3 // 0c000006
+
+// LBRA addr
+// {
+// outcode(int($1), &nullgen, 0, &$2);
+// }
+ BEQ R1, 2(PC)
+ JMP 0(R1) // JMP (R1) // 00200008
+ BEQ R1, 2(PC)
+ JMP foo+0(SB) // JMP foo(SB) // 08000018
+ BEQ R1, 2(PC)
+ JAL 0(R1) // CALL (R1) // 0020f809
+ BEQ R1, 2(PC)
+ JAL foo+0(SB) // CALL foo(SB) // 0c000020
+
+//
+// BEQ/BNE
+//
+// LBRA rreg ',' rel
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+label1:
+ BEQ R1, 1(PC) // BEQ R1, 1(PC) // 10200001
+ BEQ R1, label1 // BEQ R1, 18 // 1020fffd
+
+// LBRA rreg ',' sreg ',' rel
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+label2:
+ BEQ R1, R2, 1(PC) // BEQ R1, R2, 1(PC) // 10220001
+ BEQ R1, R2, label2 // BEQ R1, R2, 20 // 1022fffd
+
+//
+// other integer conditional branch
+//
+// LBRA rreg ',' rel
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+label3:
+ BLTZ R1, 1(PC) // BLTZ R1, 1(PC) // 04200001
+ BLTZ R1, label3 // BLTZ R1, 22 // 0420fffd
+
+//
+// floating point conditional branch
+//
+// LBRA rel
+label4:
+ BFPT 1(PC) // BFPT 1(PC) // 4501000100000000
+ BFPT label4 // BFPT 24 // 4501fffd00000000
+
+//inst:
+//
+// load ints and bytes
+//
+// LMOVV rreg ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVV R25, R17 // 00198825
+ MOVV R1, R2 // 00011025
+ MOVV LO, R1 // 00000812
+ MOVV HI, R1 // 00000810
+ MOVV R1, LO // 00200013
+ MOVV R1, HI // 00200011
+
+
+// LMOVW rreg ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVW R1, R2 // 00011004
+ MOVW LO, R1 // 00000812
+ MOVW HI, R1 // 00000810
+ MOVW R1, LO // 00200013
+ MOVW R1, HI // 00200011
+ MOVWU R14, R27 // 000ed83c001bd83e
+
+// LMOVH rreg ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVH R16, R27 // 0010dc00001bdc03
+ MOVHU R1, R3 // 3023ffff
+
+// LMOVB rreg ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVB R8, R9 // 00084e0000094e03
+ MOVBU R12, R17 // 319100ff
+
+// LMOVV addr ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVV foo<>+3(SB), R2
+ MOVV (R5), R18 // dcb20000
+ MOVV 8(R16), R4 // de040008
+ MOVV -32(R14), R1 // ddc1ffe0
+ LLV (R1), R2 // d0220000
+
+// LMOVW addr ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVW foo<>+3(SB), R2
+ MOVW (R11), R22 // 8d760000
+ MOVW 1(R9), R24 // 8d380001
+ MOVW -17(R24), R8 // 8f08ffef
+ MOVWU (R11), R22 // 9d760000
+ MOVWU 1(R9), R24 // 9d380001
+ MOVWU -17(R24), R8 // 9f08ffef
+ LL (R1), R2 // c0220000
+
+// LMOVH addr ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVH foo<>+3(SB), R2
+ MOVH (R20), R7 // 86870000
+ MOVH 54(R11), R26 // 857a0036
+ MOVH -42(R3), R20 // 8474ffd6
+ MOVHU (R20), R7 // 96870000
+ MOVHU 54(R11), R26 // 957a0036
+ MOVHU -42(R3), R20 // 9474ffd6
+
+// LMOVB addr ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVB foo<>+3(SB), R2
+ MOVB (R4), R21 // 80950000
+ MOVB 9(R19), R18 // 82720009
+ MOVB -10(R19), R18 // 8272fff6
+ MOVBU (R4), R21 // 90950000
+ MOVBU 9(R19), R18 // 92720009
+ MOVBU -10(R19), R18 // 9272fff6
+
+//
+// load floats
+//
+// LFMOV addr ',' freg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVD foo<>+3(SB), F2
+ MOVD 16(R1), F2
+ MOVD (R1), F2
+
+// LFMOV fimm ',' freg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVD $0.1, F2 // MOVD $(0.10000000000000001), F2
+
+// LFMOV freg ',' freg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVD F1, F2
+
+// LFMOV freg ',' addr
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVD F2, foo<>+3(SB)
+ MOVD F2, 16(R1)
+ MOVD F2, (R1)
+
+//
+// store ints and bytes
+//
+// LMOVV rreg ',' addr
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVV R1, foo<>+3(SB)
+ MOVV R18, (R5) // fcb20000
+ MOVV R4, 8(R16) // fe040008
+ MOVV R1, -32(R14) // fdc1ffe0
+ SCV R1, (R2) // f0410000
+
+// LMOVW rreg ',' addr
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVW R1, foo<>+3(SB)
+ MOVW R8, (R3) // ac680000
+ MOVW R11, 19(R2) // ac4b0013
+ MOVW R25, -89(R22) // aed9ffa7
+ MOVWU R8, (R3) // ac680000
+ MOVWU R11, 19(R2) // ac4b0013
+ MOVWU R25, -89(R22) // aed9ffa7
+ SC R1, (R2) // e0410000
+
+// LMOVH rreg ',' addr
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVH R13, (R7) // a4ed0000
+ MOVH R10, 61(R23) // a6ea003d
+ MOVH R8, -33(R12) // a588ffdf
+ MOVHU R13, (R7) // a4ed0000
+ MOVHU R10, 61(R23) // a6ea003d
+ MOVHU R8, -33(R12) // a588ffdf
+
+// LMOVB rreg ',' addr
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVB R1, foo<>+3(SB)
+ MOVB R5, -18(R4) // a085ffee
+ MOVB R10, 9(R13) // a1aa0009
+ MOVB R15, (R13) // a1af0000
+ MOVBU R5, -18(R4) // a085ffee
+ MOVBU R10, 9(R13) // a1aa0009
+ MOVBU R15, (R13) // a1af0000
+
+//
+// store floats
+//
+// LMOVW freg ',' addr
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVD F1, foo<>+3(SB)
+ MOVD F1, 16(R2)
+ MOVD F1, (R2)
+
+//
+// floating point status
+//
+// LMOVW fpscr ',' freg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVW FCR31, R1 // 4441f800
+
+// LMOVW freg ',' fpscr
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVW R1, FCR31 // 44c1f800
+
+// LMOVW rreg ',' mreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVW R1, M1 // 40810800
+ MOVV R1, M1 // 40a10800
+
+// LMOVW mreg ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVW M1, R1 // 40010800
+ MOVV M1, R1 // 40210800
+
+
+//
+// integer operations
+// logical instructions
+// shift instructions
+// unary instructions
+//
+// LADDW rreg ',' sreg ',' rreg
+// {
+// outcode(int($1), &$2, int($4), &$6);
+// }
+ ADD R5, R9, R10 // 01255020
+ ADDU R13, R14, R19 // 01cd9821
+ ADDV R5, R9, R10 // 0125502c
+ ADDVU R13, R14, R19 // 01cd982d
+
+// LADDW imm ',' sreg ',' rreg
+// {
+// outcode(int($1), &$2, int($4), &$6);
+// }
+ ADD $15176, R14, R9 // 21c93b48
+ ADD $-9, R5, R8 // 20a8fff7
+ ADDU $10, R9, R9 // 2529000a
+ ADDV $15176, R14, R9 // 61c93b48
+ ADDV $-9, R5, R8 // 60a8fff7
+ ADDVU $10, R9, R9 // 6529000a
+
+// LADDW rreg ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ ADD R1, R2 // 00411020
+ ADDU R1, R2 // 00411021
+ ADDV R1, R2 // 0041102c
+ ADDVU R1, R2 // 0041102d
+
+// LADDW imm ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ ADD $4, R1 // 20210004
+ ADDV $4, R1 // 60210004
+ ADDU $4, R1 // 24210004
+ ADDVU $4, R1 // 64210004
+ ADD $-7193, R24 // 2318e3e7
+ ADDV $-7193, R24 // 6318e3e7
+
+// LSUBW rreg ',' sreg ',' rreg
+// {
+// outcode(int($1), &$2, int($4), &$6);
+// }
+ SUB R6, R26, R27 // 0346d822
+ SUBU R6, R26, R27 // 0346d823
+ SUBV R16, R17, R26 // 0230d02e
+ SUBVU R16, R17, R26 // 0230d02f
+
+// LSUBW imm ',' sreg ',' rreg
+// {
+// outcode(int($1), &$2, int($4), &$6);
+// }
+ SUB $-3126, R17, R22 // 22360c36
+ SUB $3126, R17, R22 // 2236f3ca
+ SUBU $16384, R17, R12 // 262cc000
+ SUBV $-6122, R10, R9 // 614917ea
+ SUBV $6122, R10, R9 // 6149e816
+ SUBVU $1203, R17, R12 // 662cfb4d
+
+// LSUBW rreg ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ SUB R14, R13 // 01ae6822
+ SUBU R14, R13 // 01ae6823
+ SUBV R4, R3 // 0064182e
+ SUBVU R4, R3 // 0064182f
+// LSUBW imm ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ SUB $6512, R13 // 21ade690
+ SUB $-6512, R13 // 21ad1970
+ SUBU $6512, R13 // 25ade690
+ SUBV $9531, R16 // 6210dac5
+ SUBV $-9531, R13 // 61ad253b
+ SUBVU $9531, R16 // 6610dac5
+
+// LMUL rreg ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MUL R19, R8 // 01130018
+ MULU R21, R13 // 01b50019
+ MULV R19, R8 // 0113001c
+ MULVU R21, R13 // 01b5001d
+
+// LDIV rreg ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ DIV R18, R22 // 02d2001a
+ DIVU R14, R9 // 012e001b
+ DIVV R8, R13 // 01a8001e
+ DIVVU R16, R19 // 0270001f
+
+// LREM rreg ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ REM R18, R22 // 02d2001a
+ REMU R14, R9 // 012e001b
+ REMV R8, R13 // 01a8001e
+ REMVU R16, R19 // 0270001f
+
+// LSHW rreg ',' sreg ',' rreg
+// {
+// outcode(int($1), &$2, int($4), &$6);
+// }
+ SLL R1, R2, R3 // 00221804
+ SLLV R10, R22, R21 // 0156a814
+ SRL R27, R6, R17 // 03668806
+ SRLV R27, R6, R17 // 03668816
+ SRA R11, R19, R20 // 0173a007
+ SRAV R20, R19, R19 // 02939817
+ ROTR R19, R18, R20 // 0272a046
+ ROTRV R9, R13, R16 // 012d8056
+
+// LSHW rreg ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ SLL R1, R2 // 00221004
+ SLLV R10, R22 // 0156b014
+ SRL R27, R6 // 03663006
+ SRLV R27, R6 // 03663016
+ SRA R11, R19 // 01739807
+ SRAV R20, R19 // 02939817
+ ROTR R20, R19 // 02939846
+ ROTRV R16, R9 // 02094856
+
+// LSHW imm ',' sreg ',' rreg
+// {
+// outcode(int($1), &$2, int($4), &$6);
+// }
+ SLL $19, R22, R21 // 0016acc0
+ SLLV $19, R22, R21 // 0016acf8
+ SRL $31, R6, R17 // 00068fc2
+ SRLV $31, R6, R17 // 00068ffa
+ SRA $8, R8, R19 // 00089a03
+ SRAV $19, R8, R7 // 00083cfb
+ ROTR $12, R8, R3 // 00281b02
+ ROTRV $8, R22, R22 // 0036b23a
+
+// LSHW imm ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ SLL $19, R21 // 0015acc0
+ SLLV $19, R21 // 0015acf8
+ SRL $31, R17 // 00118fc2
+ SRLV $31, R17 // 00118ffa
+ SRA $3, R12 // 000c60c3
+ SRAV $12, R3 // 00031b3b
+ ROTR $12, R8 // 00284302
+ ROTRV $63, R22 // 0036b7fe
+
+
+// LAND/LXOR/LNOR/LOR rreg ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ AND R14, R8 // 010e4024
+ XOR R15, R9 // 012f4826
+ NOR R16, R10 // 01505027
+ OR R17, R11 // 01715825
+
+// LAND/LXOR/LOR imm ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ AND $11, R17, R7 // 3227000b
+ XOR $341, R1, R23 // 38370155
+ OR $254, R25, R13 // 372d00fe
+//
+// move immediate: macro for lui+or, addi, addis, and other combinations
+//
+// LMOVW imm ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVW $1, R1
+ MOVV $1, R1
+
+// LMOVW ximm ',' rreg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ MOVW $1, R1
+ MOVW $foo(SB), R1
+ MOVV $1, R1
+ MOVV $foo(SB), R1
+
+//
+// floating point operate
+//
+// LFCONV freg ',' freg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ ABSD F1, F2
+
+// LFADD freg ',' freg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ ADDD F1, F2
+
+// LFADD freg ',' freg ',' freg
+// {
+// outcode(int($1), &$2, int($4.Reg), &$6);
+// }
+ ADDD F1, F2, F3
+
+// LFCMP freg ',' freg
+// {
+// outcode(int($1), &$2, 0, &$4);
+// }
+ CMPEQD F1, F2
+
+
+//
+// WORD
+//
+ WORD $1 // 00000001
+ NOOP // 00000000
+ SYNC // 0000000f
+
+//
+// NOP
+//
+// LNOP comma // asm doesn't support the trailing comma.
+// {
+// outcode(int($1), &nullgen, 0, &nullgen);
+// }
+ NOP
+
+// LNOP rreg comma // asm doesn't support the trailing comma.
+// {
+// outcode(int($1), &$2, 0, &nullgen);
+// }
+ NOP R2
+
+// LNOP freg comma // asm doesn't support the trailing comma.
+// {
+// outcode(int($1), &$2, 0, &nullgen);
+// }
+ NOP F2
+
+// LNOP ',' rreg // asm doesn't support the leading comma.
+// {
+// outcode(int($1), &nullgen, 0, &$3);
+// }
+ NOP R2
+
+// LNOP ',' freg // asm doesn't support the leading comma.
+// {
+// outcode(int($1), &nullgen, 0, &$3);
+// }
+ NOP F2
+
+// LNOP imm
+// {
+// outcode(int($1), &$2, 0, &nullgen);
+// }
+ NOP $4
+
+//
+// special
+//
+ SYSCALL
+ BREAK
+ // overloaded cache opcode:
+ BREAK R1, (R1)
+
+//
+// RET
+//
+// LRETRN comma // asm doesn't support the trailing comma.
+// {
+// outcode(int($1), &nullgen, 0, &nullgen);
+// }
+ SYSCALL
+ BEQ R1, 2(PC)
+ RET
+
+
+// More JMP/JAL cases, and canonical names JMP, CALL.
+
+ JAL foo(SB) // CALL foo(SB)
+ BEQ R1, 2(PC)
+ JMP foo(SB)
+ CALL foo(SB)
+ RET foo(SB)
+
+ NEGW R1, R2 // 00011023
+ NEGV R1, R2 // 0001102f
+ RET
+
+// MSA VMOVI
+ VMOVB $511, W0 // 7b0ff807
+ VMOVH $24, W23 // 7b20c5c7
+ VMOVW $-24, W15 // 7b5f43c7
+ VMOVD $-511, W31 // 7b700fc7
+
+ VMOVB (R0), W8 // 78000220
+ VMOVB 511(R3), W0 // 79ff1820
+ VMOVB -512(R12), W21 // 7a006560
+ VMOVH (R24), W12 // 7800c321
+ VMOVH 110(R19), W8 // 78379a21
+ VMOVH -70(R12), W3 // 7bdd60e1
+ VMOVW (R3), W31 // 78001fe2
+ VMOVW 64(R20), W16 // 7810a422
+ VMOVW -104(R17), W24 // 7be68e22
+ VMOVD (R3), W2 // 780018a3
+ VMOVD 128(R23), W19 // 7810bce3
+ VMOVD -256(R31), W0 // 7be0f823
+
+ VMOVB W8, (R0) // 78000224
+ VMOVB W0, 511(R3) // 79ff1824
+ VMOVB W21, -512(R12) // 7a006564
+ VMOVH W12, (R24) // 7800c325
+ VMOVH W8, 110(R19) // 78379a25
+ VMOVH W3, -70(R12) // 7bdd60e5
+ VMOVW W31, (R3) // 78001fe6
+ VMOVW W16, 64(R20) // 7810a426
+ VMOVW W24, -104(R17) // 7be68e26
+ VMOVD W2, (R3) // 780018a7
+ VMOVD W19, 128(R23) // 7810bce7
+ VMOVD W0, -256(R31) // 7be0f827
+ RET
+
+// END
+//
+// LEND comma // asm doesn't support the trailing comma.
+// {
+// outcode(int($1), &nullgen, 0, &nullgen);
+// }
+ END
diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s
new file mode 100644
index 0000000..c140fd0
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/ppc64.s
@@ -0,0 +1,767 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This contains the majority of valid opcode combinations
+// available in cmd/internal/obj/ppc64/asm9.go with
+// their valid instruction encodings.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT asmtest(SB),DUPOK|NOSPLIT,$0
+ // move constants
+ MOVD $1, R3 // 38600001
+ MOVD $-1, R4 // 3880ffff
+ MOVD $65535, R5 // 6005ffff
+ MOVD $65536, R6 // 64060001
+ MOVD $-32767, R5 // 38a08001
+ MOVD $-32768, R6 // 38c08000
+ MOVD $1234567, R5 // 6405001260a5d687
+ MOVW $1, R3 // 38600001
+ MOVW $-1, R4 // 3880ffff
+ MOVW $65535, R5 // 6005ffff
+ MOVW $65536, R6 // 64060001
+ MOVW $-32767, R5 // 38a08001
+ MOVW $-32768, R6 // 38c08000
+ MOVW $1234567, R5 // 6405001260a5d687
+ MOVD 8(R3), R4 // e8830008
+ MOVD (R3)(R4), R5 // 7ca4182a
+ MOVW 4(R3), R4 // e8830006
+ MOVW (R3)(R4), R5 // 7ca41aaa
+ MOVWZ 4(R3), R4 // 80830004
+ MOVWZ (R3)(R4), R5 // 7ca4182e
+ MOVH 4(R3), R4 // a8830004
+ MOVH (R3)(R4), R5 // 7ca41aae
+ MOVHZ 2(R3), R4 // a0830002
+ MOVHZ (R3)(R4), R5 // 7ca41a2e
+ MOVB 1(R3), R4 // 888300017c840774
+ MOVB (R3)(R4), R5 // 7ca418ae7ca50774
+ MOVBZ 1(R3), R4 // 88830001
+ MOVBZ (R3)(R4), R5 // 7ca418ae
+ MOVDBR (R3)(R4), R5 // 7ca41c28
+ MOVWBR (R3)(R4), R5 // 7ca41c2c
+ MOVHBR (R3)(R4), R5 // 7ca41e2c
+ MOVD $foo+4009806848(FP), R5 // 3ca1ef0138a5cc20
+ MOVD $foo(SB), R5 // 3ca0000038a50000
+
+ MOVDU 8(R3), R4 // e8830009
+ MOVDU (R3)(R4), R5 // 7ca4186a
+ MOVWU (R3)(R4), R5 // 7ca41aea
+ MOVWZU 4(R3), R4 // 84830004
+ MOVWZU (R3)(R4), R5 // 7ca4186e
+ MOVHU 2(R3), R4 // ac830002
+ MOVHU (R3)(R4), R5 // 7ca41aee
+ MOVHZU 2(R3), R4 // a4830002
+ MOVHZU (R3)(R4), R5 // 7ca41a6e
+ MOVBU 1(R3), R4 // 8c8300017c840774
+ MOVBU (R3)(R4), R5 // 7ca418ee7ca50774
+ MOVBZU 1(R3), R4 // 8c830001
+ MOVBZU (R3)(R4), R5 // 7ca418ee
+
+ MOVD R4, 8(R3) // f8830008
+ MOVD R5, (R3)(R4) // 7ca4192a
+ MOVW R4, 4(R3) // 90830004
+ MOVW R5, (R3)(R4) // 7ca4192e
+ MOVH R4, 2(R3) // b0830002
+ MOVH R5, (R3)(R4) // 7ca41b2e
+ MOVB R4, 1(R3) // 98830001
+ MOVB R5, (R3)(R4) // 7ca419ae
+ MOVDBR R5, (R3)(R4) // 7ca41d28
+ MOVWBR R5, (R3)(R4) // 7ca41d2c
+ MOVHBR R5, (R3)(R4) // 7ca41f2c
+
+ MOVDU R4, 8(R3) // f8830009
+ MOVDU R5, (R3)(R4) // 7ca4196a
+ MOVWU R4, 4(R3) // 94830004
+ MOVWU R5, (R3)(R4) // 7ca4196e
+ MOVHU R4, 2(R3) // b4830002
+ MOVHU R5, (R3)(R4) // 7ca41b6e
+ MOVBU R4, 1(R3) // 9c830001
+ MOVBU R5, (R3)(R4) // 7ca419ee
+
+ MOVB $0, R4 // 38800000
+ MOVBZ $0, R4 // 38800000
+ MOVH $0, R4 // 38800000
+ MOVHZ $0, R4 // 38800000
+ MOVW $0, R4 // 38800000
+ MOVWZ $0, R4 // 38800000
+ MOVD $0, R4 // 38800000
+ MOVD $0, R0 // 38000000
+
+ ADD $1, R3 // 38630001
+ ADD $1, R3, R4 // 38830001
+ ADD $-1, R4 // 3884ffff
+ ADD $-1, R4, R5 // 38a4ffff
+ ADD $65535, R5 // 601fffff7cbf2a14
+ ADD $65535, R5, R6 // 601fffff7cdf2a14
+ ADD $65536, R6 // 3cc60001
+ ADD $65536, R6, R7 // 3ce60001
+ ADD $-32767, R5 // 38a58001
+ ADD $-32767, R5, R4 // 38858001
+ ADD $-32768, R6 // 38c68000
+ ADD $-32768, R6, R5 // 38a68000
+ ADD $1234567, R5 // 641f001263ffd6877cbf2a14
+ ADD $1234567, R5, R6 // 641f001263ffd6877cdf2a14
+ ADDEX R3, R5, $3, R6 // 7cc32f54
+ ADDIS $8, R3 // 3c630008
+ ADDIS $1000, R3, R4 // 3c8303e8
+
+ ANDCC $1, R3 // 70630001
+ ANDCC $1, R3, R4 // 70640001
+ ANDCC $-1, R4 // 3be0ffff7fe42039
+ ANDCC $-1, R4, R5 // 3be0ffff7fe52039
+ ANDCC $65535, R5 // 70a5ffff
+ ANDCC $65535, R5, R6 // 70a6ffff
+ ANDCC $65536, R6 // 74c60001
+ ANDCC $65536, R6, R7 // 74c70001
+ ANDCC $-32767, R5 // 3be080017fe52839
+ ANDCC $-32767, R5, R4 // 3be080017fe42839
+ ANDCC $-32768, R6 // 3be080007fe63039
+ ANDCC $-32768, R5, R6 // 3be080007fe62839
+ ANDCC $1234567, R5 // 641f001263ffd6877fe52839
+ ANDCC $1234567, R5, R6 // 641f001263ffd6877fe62839
+ ANDISCC $1, R3 // 74630001
+ ANDISCC $1000, R3, R4 // 746403e8
+
+ OR $1, R3 // 60630001
+ OR $1, R3, R4 // 60640001
+ OR $-1, R4 // 3be0ffff7fe42378
+ OR $-1, R4, R5 // 3be0ffff7fe52378
+ OR $65535, R5 // 60a5ffff
+ OR $65535, R5, R6 // 60a6ffff
+ OR $65536, R6 // 64c60001
+ OR $65536, R6, R7 // 64c70001
+ OR $-32767, R5 // 3be080017fe52b78
+ OR $-32767, R5, R6 // 3be080017fe62b78
+ OR $-32768, R6 // 3be080007fe63378
+ OR $-32768, R6, R7 // 3be080007fe73378
+ OR $1234567, R5 // 641f001263ffd6877fe52b78
+ OR $1234567, R5, R3 // 641f001263ffd6877fe32b78
+ ORIS $255, R3, R4
+
+ XOR $1, R3 // 68630001
+ XOR $1, R3, R4 // 68640001
+ XOR $-1, R4 // 3be0ffff7fe42278
+ XOR $-1, R4, R5 // 3be0ffff7fe52278
+ XOR $65535, R5 // 68a5ffff
+ XOR $65535, R5, R6 // 68a6ffff
+ XOR $65536, R6 // 6cc60001
+ XOR $65536, R6, R7 // 6cc70001
+ XOR $-32767, R5 // 3be080017fe52a78
+ XOR $-32767, R5, R6 // 3be080017fe62a78
+ XOR $-32768, R6 // 3be080007fe63278
+ XOR $-32768, R6, R7 // 3be080007fe73278
+ XOR $1234567, R5 // 641f001263ffd6877fe52a78
+ XOR $1234567, R5, R3 // 641f001263ffd6877fe32a78
+ XORIS $15, R3, R4
+
+ // TODO: the order of CR operands don't match
+ CMP R3, R4 // 7c232000
+ CMPU R3, R4 // 7c232040
+ CMPW R3, R4 // 7c032000
+ CMPWU R3, R4 // 7c032040
+ CMPB R3,R4,R4 // 7c6423f8
+ CMPEQB R3,R4,CR6 // 7f0321c0
+
+ // TODO: constants for ADDC?
+ ADD R3, R4 // 7c841a14
+ ADD R3, R4, R5 // 7ca41a14
+ ADDC R3, R4 // 7c841814
+ ADDC R3, R4, R5 // 7ca41814
+ ADDE R3, R4 // 7c841914
+ ADDECC R3, R4 // 7c841915
+ ADDEV R3, R4 // 7c841d14
+ ADDEVCC R3, R4 // 7c841d15
+ ADDV R3, R4 // 7c841e14
+ ADDVCC R3, R4 // 7c841e15
+ ADDCCC R3, R4, R5 // 7ca41815
+ ADDME R3, R4 // 7c8301d4
+ ADDMECC R3, R4 // 7c8301d5
+ ADDMEV R3, R4 // 7c8305d4
+ ADDMEVCC R3, R4 // 7c8305d5
+ ADDCV R3, R4 // 7c841c14
+ ADDCVCC R3, R4 // 7c841c15
+ ADDZE R3, R4 // 7c830194
+ ADDZECC R3, R4 // 7c830195
+ ADDZEV R3, R4 // 7c830594
+ ADDZEVCC R3, R4 // 7c830595
+ SUBME R3, R4 // 7c8301d0
+ SUBMECC R3, R4 // 7c8301d1
+ SUBMEV R3, R4 // 7c8305d0
+ SUBZE R3, R4 // 7c830190
+ SUBZECC R3, R4 // 7c830191
+ SUBZEV R3, R4 // 7c830590
+ SUBZEVCC R3, R4 // 7c830591
+
+ AND R3, R4 // 7c841838
+ AND R3, R4, R5 // 7c851838
+ ANDN R3, R4, R5 // 7c851878
+ ANDCC R3, R4, R5 // 7c851839
+ OR R3, R4 // 7c841b78
+ OR R3, R4, R5 // 7c851b78
+ ORN R3, R4, R5 // 7c851b38
+ ORCC R3, R4, R5 // 7c851b79
+ XOR R3, R4 // 7c841a78
+ XOR R3, R4, R5 // 7c851a78
+ XORCC R3, R4, R5 // 7c851a79
+ NAND R3, R4, R5 // 7c851bb8
+ NANDCC R3, R4, R5 // 7c851bb9
+ EQV R3, R4, R5 // 7c851a38
+ EQVCC R3, R4, R5 // 7c851a39
+ NOR R3, R4, R5 // 7c8518f8
+ NORCC R3, R4, R5 // 7c8518f9
+
+ SUB R3, R4 // 7c832050
+ SUB R3, R4, R5 // 7ca32050
+ SUBC R3, R4 // 7c832010
+ SUBC R3, R4, R5 // 7ca32010
+
+ MULLW R3, R4 // 7c8419d6
+ MULLW R3, R4, R5 // 7ca419d6
+ MULLW $10, R3 // 1c63000a
+ MULLW $10000000, R3 // 641f009863ff96807c7f19d6
+
+ MULLWCC R3, R4, R5 // 7ca419d7
+ MULHW R3, R4, R5 // 7ca41896
+
+ MULHWU R3, R4, R5 // 7ca41816
+ MULLD R3, R4 // 7c8419d2
+ MULLD R4, R4, R5 // 7ca421d2
+ MULLD $20, R4 // 1c840014
+ MULLD $200000000, R4 // 641f0beb63ffc2007c9f21d2
+
+ MULLDCC R3, R4, R5 // 7ca419d3
+ MULHD R3, R4, R5 // 7ca41892
+ MULHDCC R3, R4, R5 // 7ca41893
+
+ MULLWV R3, R4 // 7c841dd6
+ MULLWV R3, R4, R5 // 7ca41dd6
+ MULLWVCC R3, R4, R5 // 7ca41dd7
+ MULHWUCC R3, R4, R5 // 7ca41817
+ MULLDV R3, R4, R5 // 7ca41dd2
+ MULLDVCC R3, R4, R5 // 7ca41dd3
+
+ DIVD R3,R4 // 7c841bd2
+ DIVD R3, R4, R5 // 7ca41bd2
+ DIVDCC R3,R4, R5 // 7ca41bd3
+ DIVDU R3, R4, R5 // 7ca41b92
+ DIVDV R3, R4, R5 // 7ca41fd2
+ DIVDUCC R3, R4, R5 // 7ca41b93
+ DIVDVCC R3, R4, R5 // 7ca41fd3
+ DIVDUV R3, R4, R5 // 7ca41f92
+ DIVDUVCC R3, R4, R5 // 7ca41f93
+ DIVDE R3, R4, R5 // 7ca41b52
+ DIVDECC R3, R4, R5 // 7ca41b53
+ DIVDEU R3, R4, R5 // 7ca41b12
+ DIVDEUCC R3, R4, R5 // 7ca41b13
+
+ REM R3, R4, R5 // 7fe41bd67fff19d67cbf2050
+ REMU R3, R4, R5 // 7fe41b967fff19d67bff00287cbf2050
+ REMD R3, R4, R5 // 7fe41bd27fff19d27cbf2050
+ REMDU R3, R4, R5 // 7fe41b927fff19d27cbf2050
+
+ MADDHD R3,R4,R5,R6 // 10c32170
+ MADDHDU R3,R4,R5,R6 // 10c32171
+
+ MODUD R3, R4, R5 // 7ca41a12
+ MODUW R3, R4, R5 // 7ca41a16
+ MODSD R3, R4, R5 // 7ca41e12
+ MODSW R3, R4, R5 // 7ca41e16
+
+ SLW $8, R3, R4 // 5464402e
+ SLW R3, R4, R5 // 7c851830
+ SLWCC R3, R4 // 7c841831
+ SLD $16, R3, R4 // 786483e4
+ SLD R3, R4, R5 // 7c851836
+ SLDCC R3, R4 // 7c841837
+
+ SRW $8, R3, R4 // 5464c23e
+ SRW R3, R4, R5 // 7c851c30
+ SRWCC R3, R4 // 7c841c31
+ SRAW $8, R3, R4 // 7c644670
+ SRAW R3, R4, R5 // 7c851e30
+ SRAWCC R3, R4 // 7c841e31
+ SRD $16, R3, R4 // 78648402
+ SRD R3, R4, R5 // 7c851c36
+ SRDCC R3, R4 // 7c841c37
+ SRAD $16, R3, R4 // 7c648674
+ SRAD R3, R4, R5 // 7c851e34
+ SRDCC R3, R4 // 7c841c37
+ ROTLW $16, R3, R4 // 5464803e
+ ROTLW R3, R4, R5 // 5c85183e
+ EXTSWSLI $3, R4, R5 // 7c851ef4
+ RLWMI $7, R3, $65535, R6 // 50663c3e
+ RLWMI $7, R3, $16, $31, R6 // 50663c3e
+ RLWMICC $7, R3, $65535, R6 // 50663c3f
+ RLWMICC $7, R3, $16, $31, R6 // 50663c3f
+ RLWNM $3, R4, $7, R6 // 54861f7e
+ RLWNM $3, R4, $29, $31, R6 // 54861f7e
+ RLWNM R3, R4, $7, R6 // 5c861f7e
+ RLWNM R3, R4, $29, $31, R6 // 5c861f7e
+ RLWNMCC $3, R4, $7, R6 // 54861f7f
+ RLWNMCC $3, R4, $29, $31, R6 // 54861f7f
+ RLWNMCC R3, R4, $7, R6 // 5c861f7f
+ RLWNMCC R3, R4, $29, $31, R6 // 5c861f7f
+ RLDMI $0, R4, $7, R6 // 7886076c
+ RLDMICC $0, R4, $7, R6 // 7886076d
+ RLDIMI $0, R4, $7, R6 // 788601cc
+ RLDIMICC $0, R4, $7, R6 // 788601cd
+ RLDC $0, R4, $15, R6 // 78860728
+ RLDCCC $0, R4, $15, R6 // 78860729
+ RLDCL $0, R4, $7, R6 // 78860770
+ RLDCLCC $0, R4, $15, R6 // 78860721
+ RLDCR $0, R4, $-16, R6 // 788606f2
+ RLDCRCC $0, R4, $-16, R6 // 788606f3
+ RLDICL $0, R4, $15, R6 // 788603c0
+ RLDICLCC $0, R4, $15, R6 // 788603c1
+ RLDICR $0, R4, $15, R6 // 788603c4
+ RLDICRCC $0, R4, $15, R6 // 788603c5
+ RLDIC $0, R4, $15, R6 // 788603c8
+ RLDICCC $0, R4, $15, R6 // 788603c9
+ CLRLSLWI $16, R5, $8, R4 // 54a4422e
+ CLRLSLDI $24, R4, $2, R3 // 78831588
+ RLDCR $1, R1, $-16, R1 // 78210ee4
+ RLDCRCC $1, R1, $-16, R1 // 78210ee5
+
+ BEQ 0(PC) // 41820000
+ BEQ CR1,0(PC) // 41860000
+ BGE 0(PC) // 40800000
+ BGE CR2,0(PC) // 40880000
+ BGT 4(PC) // 41810010
+ BGT CR3,4(PC) // 418d0010
+ BLE 0(PC) // 40810000
+ BLE CR4,0(PC) // 40910000
+ BLT 0(PC) // 41800000
+ BLT CR5,0(PC) // 41940000
+ BNE 0(PC) // 40820000
+ BLT CR6,0(PC) // 41980000
+ JMP 8(PC) // 48000010
+
+ NOP
+ NOP R2
+ NOP F2
+ NOP $4
+
+ CRAND CR0GT, CR0EQ, CR0SO // 4c620a02
+ CRANDN CR0GT, CR0EQ, CR0SO // 4c620902
+ CREQV CR0GT, CR0EQ, CR0SO // 4c620a42
+ CRNAND CR0GT, CR0EQ, CR0SO // 4c6209c2
+ CRNOR CR0GT, CR0EQ, CR0SO // 4c620842
+ CROR CR0GT, CR0EQ, CR0SO // 4c620b82
+ CRORN CR0GT, CR0EQ, CR0SO // 4c620b42
+ CRXOR CR0GT, CR0EQ, CR0SO // 4c620982
+
+ ISEL $1, R3, R4, R5 // 7ca3205e
+ ISEL $0, R3, R4, R5 // 7ca3201e
+ ISEL $2, R3, R4, R5 // 7ca3209e
+ ISEL $3, R3, R4, R5 // 7ca320de
+ ISEL $4, R3, R4, R5 // 7ca3211e
+ POPCNTB R3, R4 // 7c6400f4
+ POPCNTW R3, R4 // 7c6402f4
+ POPCNTD R3, R4 // 7c6403f4
+
+ PASTECC R3, R4 // 7c23270d
+ COPY R3, R4 // 7c23260c
+
+ // load-and-reserve
+ LBAR (R4)(R3*1),$1,R5 // 7ca32069
+ LBAR (R4),$0,R5 // 7ca02068
+ LBAR (R3),R5 // 7ca01868
+ LHAR (R4)(R3*1),$1,R5 // 7ca320e9
+ LHAR (R4),$0,R5 // 7ca020e8
+ LHAR (R3),R5 // 7ca018e8
+ LWAR (R4)(R3*1),$1,R5 // 7ca32029
+ LWAR (R4),$0,R5 // 7ca02028
+ LWAR (R3),R5 // 7ca01828
+ LDAR (R4)(R3*1),$1,R5 // 7ca320a9
+ LDAR (R4),$0,R5 // 7ca020a8
+ LDAR (R3),R5 // 7ca018a8
+
+ STBCCC R3, (R4)(R5) // 7c65256d
+ STWCCC R3, (R4)(R5) // 7c65212d
+ STDCCC R3, (R4)(R5) // 7c6521ad
+ STHCCC R3, (R4)(R5)
+ STSW R3, (R4)(R5)
+
+ SYNC // 7c0004ac
+ ISYNC // 4c00012c
+ LWSYNC // 7c2004ac
+
+ DARN $1, R5 // 7ca105e6
+
+ DCBF (R3)(R4) // 7c0418ac
+ DCBI (R3)(R4) // 7c041bac
+ DCBST (R3)(R4) // 7c04186c
+ DCBZ (R3)(R4) // 7c041fec
+ DCBT (R3)(R4) // 7c041a2c
+ ICBI (R3)(R4) // 7c041fac
+
+ // float constants
+ FMOVD $(0.0), F1 // f0210cd0
+ FMOVD $(-0.0), F1 // f0210cd0fc200850
+
+ FMOVD 8(R3), F1 // c8230008
+ FMOVD (R3)(R4), F1 // 7c241cae
+ FMOVDU 8(R3), F1 // cc230008
+ FMOVDU (R3)(R4), F1 // 7c241cee
+ FMOVS 4(R3), F1 // c0230004
+ FMOVS (R3)(R4), F1 // 7c241c2e
+ FMOVSU 4(R3), F1 // c4230004
+ FMOVSU (R3)(R4), F1 // 7c241c6e
+
+ FMOVD F1, 8(R3) // d8230008
+ FMOVD F1, (R3)(R4) // 7c241dae
+ FMOVDU F1, 8(R3) // dc230008
+ FMOVDU F1, (R3)(R4) // 7c241dee
+ FMOVS F1, 4(R3) // d0230004
+ FMOVS F1, (R3)(R4) // 7c241d2e
+ FMOVSU F1, 4(R3) // d4230004
+ FMOVSU F1, (R3)(R4) // 7c241d6e
+ FADD F1, F2 // fc42082a
+ FADD F1, F2, F3 // fc62082a
+ FADDCC F1, F2, F3 // fc62082b
+ FADDS F1, F2 // ec42082a
+ FADDS F1, F2, F3 // ec62082a
+ FADDSCC F1, F2, F3 // ec62082b
+ FSUB F1, F2 // fc420828
+ FSUB F1, F2, F3 // fc620828
+ FSUBCC F1, F2, F3 // fc620829
+ FSUBS F1, F2 // ec420828
+ FSUBS F1, F2, F3 // ec620828
+ FSUBCC F1, F2, F3 // fc620829
+ FMUL F1, F2 // fc420072
+ FMUL F1, F2, F3 // fc620072
+ FMULCC F1, F2, F3 // fc620073
+ FMULS F1, F2 // ec420072
+ FMULS F1, F2, F3 // ec620072
+ FMULSCC F1, F2, F3 // ec620073
+ FDIV F1, F2 // fc420824
+ FDIV F1, F2, F3 // fc620824
+ FDIVCC F1, F2, F3 // fc620825
+ FDIVS F1, F2 // ec420824
+ FDIVS F1, F2, F3 // ec620824
+ FDIVSCC F1, F2, F3 // ec620825
+ FMADD F1, F2, F3, F4 // fc8110fa
+ FMADDCC F1, F2, F3, F4 // fc8110fb
+ FMADDS F1, F2, F3, F4 // ec8110fa
+ FMADDSCC F1, F2, F3, F4 // ec8110fb
+ FMSUB F1, F2, F3, F4 // fc8110f8
+ FMSUBCC F1, F2, F3, F4 // fc8110f9
+ FMSUBS F1, F2, F3, F4 // ec8110f8
+ FMSUBSCC F1, F2, F3, F4 // ec8110f9
+ FNMADD F1, F2, F3, F4 // fc8110fe
+ FNMADDCC F1, F2, F3, F4 // fc8110ff
+ FNMADDS F1, F2, F3, F4 // ec8110fe
+ FNMADDSCC F1, F2, F3, F4 // ec8110ff
+ FNMSUB F1, F2, F3, F4 // fc8110fc
+ FNMSUBCC F1, F2, F3, F4 // fc8110fd
+ FNMSUBS F1, F2, F3, F4 // ec8110fc
+ FNMSUBSCC F1, F2, F3, F4 // ec8110fd
+ FSEL F1, F2, F3, F4 // fc8110ee
+ FSELCC F1, F2, F3, F4 // fc8110ef
+ FABS F1, F2 // fc400a10
+ FABSCC F1, F2 // fc400a11
+ FNEG F1, F2 // fc400850
+ FABSCC F1, F2 // fc400a11
+ FRSP F1, F2 // fc400818
+ FRSPCC F1, F2 // fc400819
+ FCTIW F1, F2 // fc40081c
+ FCTIWCC F1, F2 // fc40081d
+ FCTIWZ F1, F2 // fc40081e
+ FCTIWZCC F1, F2 // fc40081f
+ FCTID F1, F2 // fc400e5c
+ FCTIDCC F1, F2 // fc400e5d
+ FCTIDZ F1, F2 // fc400e5e
+ FCTIDZCC F1, F2 // fc400e5f
+ FCFID F1, F2 // fc400e9c
+ FCFIDCC F1, F2 // fc400e9d
+ FCFIDU F1, F2 // fc400f9c
+ FCFIDUCC F1, F2 // fc400f9d
+ FCFIDS F1, F2 // ec400e9c
+ FCFIDSCC F1, F2 // ec400e9d
+ FRES F1, F2 // ec400830
+ FRESCC F1, F2 // ec400831
+ FRIM F1, F2 // fc400bd0
+ FRIMCC F1, F2 // fc400bd1
+ FRIP F1, F2 // fc400b90
+ FRIPCC F1, F2 // fc400b91
+ FRIZ F1, F2 // fc400b50
+ FRIZCC F1, F2 // fc400b51
+ FRIN F1, F2 // fc400b10
+ FRINCC F1, F2 // fc400b11
+ FRSQRTE F1, F2 // fc400834
+ FRSQRTECC F1, F2 // fc400835
+ FSQRT F1, F2 // fc40082c
+ FSQRTCC F1, F2 // fc40082d
+ FSQRTS F1, F2 // ec40082c
+ FSQRTSCC F1, F2 // ec40082d
+ FCPSGN F1, F2 // fc420810
+ FCPSGNCC F1, F2 // fc420811
+ FCMPO F1, F2 // fc011040
+ FCMPU F1, F2 // fc011000
+ LVX (R3)(R4), V1 // 7c2418ce
+ LVXL (R3)(R4), V1 // 7c241ace
+ LVSL (R3)(R4), V1 // 7c24180c
+ LVSR (R3)(R4), V1 // 7c24184c
+ LVEBX (R3)(R4), V1 // 7c24180e
+ LVEHX (R3)(R4), V1 // 7c24184e
+ LVEWX (R3)(R4), V1 // 7c24188e
+ STVX V1, (R3)(R4) // 7c2419ce
+ STVXL V1, (R3)(R4) // 7c241bce
+ STVEBX V1, (R3)(R4) // 7c24190e
+ STVEHX V1, (R3)(R4) // 7c24194e
+ STVEWX V1, (R3)(R4) // 7c24198e
+
+ VAND V1, V2, V3 // 10611404
+ VANDC V1, V2, V3 // 10611444
+ VNAND V1, V2, V3 // 10611584
+ VOR V1, V2, V3 // 10611484
+ VORC V1, V2, V3 // 10611544
+ VXOR V1, V2, V3 // 106114c4
+ VNOR V1, V2, V3 // 10611504
+ VEQV V1, V2, V3 // 10611684
+ VADDUBM V1, V2, V3 // 10611000
+ VADDUHM V1, V2, V3 // 10611040
+ VADDUWM V1, V2, V3 // 10611080
+ VADDUDM V1, V2, V3 // 106110c0
+ VADDUQM V1, V2, V3 // 10611100
+ VADDCUQ V1, V2, V3 // 10611140
+ VADDCUW V1, V2, V3 // 10611180
+ VADDUBS V1, V2, V3 // 10611200
+ VADDUHS V1, V2, V3 // 10611240
+ VADDUWS V1, V2, V3 // 10611280
+ VSUBUBM V1, V2, V3 // 10611400
+ VSUBUHM V1, V2, V3 // 10611440
+ VSUBUWM V1, V2, V3 // 10611480
+ VSUBUDM V1, V2, V3 // 106114c0
+ VSUBUQM V1, V2, V3 // 10611500
+ VSUBCUQ V1, V2, V3 // 10611540
+ VSUBCUW V1, V2, V3 // 10611580
+ VSUBUBS V1, V2, V3 // 10611600
+ VSUBUHS V1, V2, V3 // 10611640
+ VSUBUWS V1, V2, V3 // 10611680
+ VSUBSBS V1, V2, V3 // 10611700
+ VSUBSHS V1, V2, V3 // 10611740
+ VSUBSWS V1, V2, V3 // 10611780
+ VSUBEUQM V1, V2, V3, V4 // 108110fe
+ VSUBECUQ V1, V2, V3, V4 // 108110ff
+ VMULESB V1, V2, V3 // 10611308
+ VMULOSB V1, V2, V3 // 10611108
+ VMULEUB V1, V2, V3 // 10611208
+ VMULOUB V1, V2, V3 // 10611008
+ VMULESH V1, V2, V3 // 10611348
+ VMULOSH V1, V2, V3 // 10611148
+ VMULEUH V1, V2, V3 // 10611248
+ VMULOUH V1, V2, V3 // 10611048
+ VMULESH V1, V2, V3 // 10611348
+ VMULOSW V1, V2, V3 // 10611188
+ VMULEUW V1, V2, V3 // 10611288
+ VMULOUW V1, V2, V3 // 10611088
+ VMULUWM V1, V2, V3 // 10611089
+ VPMSUMB V1, V2, V3 // 10611408
+ VPMSUMH V1, V2, V3 // 10611448
+ VPMSUMW V1, V2, V3 // 10611488
+ VPMSUMD V1, V2, V3 // 106114c8
+ VMSUMUDM V1, V2, V3, V4 // 108110e3
+ VRLB V1, V2, V3 // 10611004
+ VRLH V1, V2, V3 // 10611044
+ VRLW V1, V2, V3 // 10611084
+ VRLD V1, V2, V3 // 106110c4
+ VSLB V1, V2, V3 // 10611104
+ VSLH V1, V2, V3 // 10611144
+ VSLW V1, V2, V3 // 10611184
+ VSL V1, V2, V3 // 106111c4
+ VSLO V1, V2, V3 // 1061140c
+ VSRB V1, V2, V3 // 10611204
+ VSRH V1, V2, V3 // 10611244
+ VSRW V1, V2, V3 // 10611284
+ VSR V1, V2, V3 // 106112c4
+ VSRO V1, V2, V3 // 1061144c
+ VSLD V1, V2, V3 // 106115c4
+ VSRAB V1, V2, V3 // 10611304
+ VSRAH V1, V2, V3 // 10611344
+ VSRAW V1, V2, V3 // 10611384
+ VSRAD V1, V2, V3 // 106113c4
+ VSLDOI $3, V1, V2, V3 // 106110ec
+ VCLZB V1, V2 // 10400f02
+ VCLZH V1, V2 // 10400f42
+ VCLZW V1, V2 // 10400f82
+ VCLZD V1, V2 // 10400fc2
+ VPOPCNTB V1, V2 // 10400f03
+ VPOPCNTH V1, V2 // 10400f43
+ VPOPCNTW V1, V2 // 10400f83
+ VPOPCNTD V1, V2 // 10400fc3
+ VCMPEQUB V1, V2, V3 // 10611006
+ VCMPEQUBCC V1, V2, V3 // 10611406
+ VCMPEQUH V1, V2, V3 // 10611046
+ VCMPEQUHCC V1, V2, V3 // 10611446
+ VCMPEQUW V1, V2, V3 // 10611086
+ VCMPEQUWCC V1, V2, V3 // 10611486
+ VCMPEQUD V1, V2, V3 // 106110c7
+ VCMPEQUDCC V1, V2, V3 // 106114c7
+ VCMPGTUB V1, V2, V3 // 10611206
+ VCMPGTUBCC V1, V2, V3 // 10611606
+ VCMPGTUH V1, V2, V3 // 10611246
+ VCMPGTUHCC V1, V2, V3 // 10611646
+ VCMPGTUW V1, V2, V3 // 10611286
+ VCMPGTUWCC V1, V2, V3 // 10611686
+ VCMPGTUD V1, V2, V3 // 106112c7
+ VCMPGTUDCC V1, V2, V3 // 106116c7
+ VCMPGTSB V1, V2, V3 // 10611306
+ VCMPGTSBCC V1, V2, V3 // 10611706
+ VCMPGTSH V1, V2, V3 // 10611346
+ VCMPGTSHCC V1, V2, V3 // 10611746
+ VCMPGTSW V1, V2, V3 // 10611386
+ VCMPGTSWCC V1, V2, V3 // 10611786
+ VCMPGTSD V1, V2, V3 // 106113c7
+ VCMPGTSDCC V1, V2, V3 // 106117c7
+ VCMPNEZB V1, V2, V3 // 10611107
+ VCMPNEZBCC V1, V2, V3 // 10611507
+ VCMPNEB V1, V2, V3 // 10611007
+ VCMPNEBCC V1, V2, V3 // 10611407
+ VCMPNEH V1, V2, V3 // 10611047
+ VCMPNEHCC V1, V2, V3 // 10611447
+ VCMPNEW V1, V2, V3 // 10611087
+ VCMPNEWCC V1, V2, V3 // 10611487
+ VPERM V1, V2, V3, V4 // 108110eb
+ VPERMR V1, V2, V3, V4 // 108110fb
+ VPERMXOR V1, V2, V3, V4 // 108110ed
+ VBPERMQ V1, V2, V3 // 1061154c
+ VBPERMD V1, V2, V3 // 106115cc
+ VSEL V1, V2, V3, V4 // 108110ea
+ VSPLTB $1, V1, V2 // 10410a0c
+ VSPLTH $1, V1, V2 // 10410a4c
+ VSPLTW $1, V1, V2 // 10410a8c
+ VSPLTISB $1, V1 // 1021030c
+ VSPLTISW $1, V1 // 1021038c
+ VSPLTISH $1, V1 // 1021034c
+ VCIPHER V1, V2, V3 // 10611508
+ VCIPHERLAST V1, V2, V3 // 10611509
+ VNCIPHER V1, V2, V3 // 10611548
+ VNCIPHERLAST V1, V2, V3 // 10611549
+ VSBOX V1, V2 // 104105c8
+ VSHASIGMAW $1, V1, $15, V2 // 10418e82
+ VSHASIGMAD $2, V1, $15, V2 // 104196c2
+
+ LXVD2X (R3)(R4), VS1 // 7c241e98
+ LXVDSX (R3)(R4), VS1 // 7c241a98
+ LXVH8X (R3)(R4), VS1 // 7c241e58
+ LXVB16X (R3)(R4), VS1 // 7c241ed8
+ LXVW4X (R3)(R4), VS1 // 7c241e18
+ LXV 16(R3), VS1 // f4230011
+ LXV 16(R3), VS33 // f4230019
+ LXV 16(R3), V1 // f4230019
+ LXVL R3, R4, VS1 // 7c23221a
+ LXVLL R3, R4, VS1 // 7c23225a
+ LXVX R3, R4, VS1 // 7c232218
+ LXSDX (R3)(R4), VS1 // 7c241c98
+ STXVD2X VS1, (R3)(R4) // 7c241f98
+ STXV VS1,16(R3) // f4230015
+ STXVL VS1, R3, R4 // 7c23231a
+ STXVLL VS1, R3, R4 // 7c23235a
+ STXVX VS1, R3, R4 // 7c232318
+ STXVB16X VS1, (R4)(R5) // 7c2527d8
+ STXVH8X VS1, (R4)(R5) // 7c252758
+
+ STXSDX VS1, (R3)(R4) // 7c241d98
+ LXSIWAX (R3)(R4), VS1 // 7c241898
+ STXSIWX VS1, (R3)(R4) // 7c241918
+ MFVSRD VS1, R3 // 7c230066
+ MTFPRD R3, F0 // 7c030166
+ MFVRD V0, R3 // 7c030067
+ MFVSRLD VS63,R4 // 7fe40267
+ MFVSRLD V31,R4 // 7fe40267
+ MFVSRWZ VS33,R4 // 7c2400e7
+ MFVSRWZ V1,R4 // 7c2400e7
+ MTVSRD R3, VS1 // 7c230166
+ MTVSRDD R3, R4, VS1 // 7c232366
+ MTVSRDD R3, R4, VS33 // 7c232367
+ MTVSRDD R3, R4, V1 // 7c232367
+ MTVRD R3, V13 // 7da30167
+ MTVSRWA R4, VS31 // 7fe401a6
+ MTVSRWS R4, VS32 // 7c040327
+ MTVSRWZ R4, VS63 // 7fe401e7
+ XXBRD VS0, VS1 // f037076c
+ XXBRW VS1, VS2 // f04f0f6c
+ XXBRH VS2, VS3 // f067176c
+ XXLAND VS1, VS2, VS3 // f0611410
+ XXLAND V1, V2, V3 // f0611417
+ XXLAND VS33, VS34, VS35 // f0611417
+ XXLANDC VS1, VS2, VS3 // f0611450
+ XXLEQV VS0, VS1, VS2 // f0400dd0
+ XXLNAND VS0, VS1, VS2 // f0400d90
+ XXLNOR VS0, VS1, VS32 // f0000d11
+ XXLOR VS1, VS2, VS3 // f0611490
+ XXLORC VS1, VS2, VS3 // f0611550
+ XXLORQ VS1, VS2, VS3 // f0611490
+ XXLXOR VS1, VS2, VS3 // f06114d0
+ XXSEL VS1, VS2, VS3, VS4 // f08110f0
+ XXSEL VS33, VS34, VS35, VS36 // f08110ff
+ XXSEL V1, V2, V3, V4 // f08110ff
+ XXMRGHW VS1, VS2, VS3 // f0611090
+ XXMRGLW VS1, VS2, VS3 // f0611190
+ XXSPLTW VS1, $1, VS2 // f0410a90
+ XXSPLTW VS33, $1, VS34 // f0410a93
+ XXSPLTW V1, $1, V2 // f0410a93
+ XXPERM VS1, VS2, VS3 // f06110d0
+ XXSLDWI VS1, VS2, $1, VS3 // f0611110
+ XXSLDWI V1, V2, $1, V3 // f0611117
+ XXSLDWI VS33, VS34, $1, VS35 // f0611117
+ XSCVDPSP VS1, VS2 // f0400c24
+ XVCVDPSP VS1, VS2 // f0400e24
+ XSCVSXDDP VS1, VS2 // f0400de0
+ XVCVDPSXDS VS1, VS2 // f0400f60
+ XVCVSXDDP VS1, VS2 // f0400fe0
+ XSCVDPSPN VS1,VS32 // f0000c2d
+ XSCVDPSP VS1,VS32 // f0000c25
+ XSCVDPSXDS VS1,VS32 // f0000d61
+ XSCVDPSXWS VS1,VS32 // f0000961
+ XSCVDPUXDS VS1,VS32 // f0000d21
+ XSCVDPUXWS VS1,VS32 // f0000921
+ XSCVSPDPN VS1,VS32 // f0000d2d
+ XSCVSPDP VS1,VS32 // f0000d25
+ XSCVSXDDP VS1,VS32 // f0000de1
+ XSCVSXDSP VS1,VS32 // f0000ce1
+ XSCVUXDDP VS1,VS32 // f0000da1
+ XSCVUXDSP VS1,VS32 // f0000ca1
+ XVCVDPSP VS1,VS32 // f0000e25
+ XVCVDPSXDS VS1,VS32 // f0000f61
+ XVCVDPSXWS VS1,VS32 // f0000b61
+ XVCVDPUXDS VS1,VS32 // f0000f21
+ XVCVDPUXWS VS1,VS32 // f0000b21
+ XVCVSPDP VS1,VS32 // f0000f25
+ XVCVSPSXDS VS1,VS32 // f0000e61
+ XVCVSPSXWS VS1,VS32 // f0000a61
+ XVCVSPUXDS VS1,VS32 // f0000e21
+ XVCVSPUXWS VS1,VS32 // f0000a21
+ XVCVSXDDP VS1,VS32 // f0000fe1
+ XVCVSXDSP VS1,VS32 // f0000ee1
+ XVCVSXWDP VS1,VS32 // f0000be1
+ XVCVSXWSP VS1,VS32 // f0000ae1
+ XVCVUXDDP VS1,VS32 // f0000fa1
+ XVCVUXDSP VS1,VS32 // f0000ea1
+ XVCVUXWDP VS1,VS32 // f0000ba1
+ XVCVUXWSP VS1,VS32 // f0000aa1
+
+ MOVD R3, LR // 7c6803a6
+ MOVD R3, CTR // 7c6903a6
+ MOVD R3, XER // 7c6103a6
+ MOVD LR, R3 // 7c6802a6
+ MOVD CTR, R3 // 7c6902a6
+ MOVD XER, R3 // 7c6102a6
+ MOVFL CR3, CR1 // 4c8c0000
+
+ MOVW CR0, R1 // 7c380026
+ MOVW CR7, R1 // 7c301026
+ MOVW CR, R1 // 7c200026
+
+ MOVW R1, CR // 7c2ff120
+ MOVFL R1, CR // 7c2ff120
+ MOVW R1, CR2 // 7c320120
+ MOVFL R1, CR2 // 7c320120
+ MOVFL R1, $255 // 7c2ff120
+ MOVFL R1, $1 // 7c301120
+ MOVFL R1, $128 // 7c380120
+ MOVFL R1, $3 // 7c203120
+
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s
new file mode 100644
index 0000000..fe911a7
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/riscv64.s
@@ -0,0 +1,394 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT asmtest(SB),DUPOK|NOSPLIT,$0
+start:
+ // Unprivileged ISA
+
+ // 2.4: Integer Computational Instructions
+
+ ADDI $2047, X5 // 9382f27f
+ ADDI $-2048, X5 // 93820280
+ ADDI $2048, X5 // 9382024093820240
+ ADDI $-2049, X5 // 938202c09382f2bf
+ ADDI $4094, X5 // 9382f27f9382f27f
+ ADDI $-4096, X5 // 9382028093820280
+ ADDI $4095, X5 // b71f00009b8fffffb382f201
+ ADDI $-4097, X5 // b7ffffff9b8fffffb382f201
+ ADDI $2047, X5, X6 // 1383f27f
+ ADDI $-2048, X5, X6 // 13830280
+ ADDI $2048, X5, X6 // 1383024013030340
+ ADDI $-2049, X5, X6 // 138302c01303f3bf
+ ADDI $4094, X5, X6 // 1383f27f1303f37f
+ ADDI $-4096, X5, X6 // 1383028013030380
+ ADDI $4095, X5, X6 // b71f00009b8fffff3383f201
+ ADDI $-4097, X5, X6 // b7ffffff9b8fffff3383f201
+
+ SLTI $55, X5, X7 // 93a37203
+ SLTIU $55, X5, X7 // 93b37203
+
+ ANDI $1, X5, X6 // 13f31200
+ ANDI $1, X5 // 93f21200
+ ANDI $2048, X5 // b71f00009b8f0f80b3f2f201
+ ORI $1, X5, X6 // 13e31200
+ ORI $1, X5 // 93e21200
+ ORI $2048, X5 // b71f00009b8f0f80b3e2f201
+ XORI $1, X5, X6 // 13c31200
+ XORI $1, X5 // 93c21200
+ XORI $2048, X5 // b71f00009b8f0f80b3c2f201
+
+ SLLI $1, X5, X6 // 13931200
+ SLLI $1, X5 // 93921200
+ SRLI $1, X5, X6 // 13d31200
+ SRLI $1, X5 // 93d21200
+ SRAI $1, X5, X6 // 13d31240
+ SRAI $1, X5 // 93d21240
+
+ ADD X6, X5, X7 // b3836200
+ ADD X5, X6 // 33035300
+ ADD $2047, X5, X6 // 1383f27f
+ ADD $-2048, X5, X6 // 13830280
+ ADD $2047, X5 // 9382f27f
+ ADD $-2048, X5 // 93820280
+
+ SLT X6, X5, X7 // b3a36200
+ SLT $55, X5, X7 // 93a37203
+ SLTU X6, X5, X7 // b3b36200
+ SLTU $55, X5, X7 // 93b37203
+
+ AND X6, X5, X7 // b3f36200
+ AND X5, X6 // 33735300
+ AND $1, X5, X6 // 13f31200
+ AND $1, X5 // 93f21200
+ OR X6, X5, X7 // b3e36200
+ OR X5, X6 // 33635300
+ OR $1, X5, X6 // 13e31200
+ OR $1, X5 // 93e21200
+ XOR X6, X5, X7 // b3c36200
+ XOR X5, X6 // 33435300
+ XOR $1, X5, X6 // 13c31200
+ XOR $1, X5 // 93c21200
+
+ AUIPC $0, X10 // 17050000
+ AUIPC $0, X11 // 97050000
+ AUIPC $1, X10 // 17150000
+ AUIPC $-524288, X15 // 97070080
+ AUIPC $524287, X10 // 17f5ff7f
+
+ LUI $0, X15 // b7070000
+ LUI $167, X15 // b7770a00
+ LUI $-524288, X15 // b7070080
+ LUI $524287, X15 // b7f7ff7f
+
+ SLL X6, X5, X7 // b3936200
+ SLL X5, X6 // 33135300
+ SLL $1, X5, X6 // 13931200
+ SLL $1, X5 // 93921200
+ SRL X6, X5, X7 // b3d36200
+ SRL X5, X6 // 33535300
+ SRL $1, X5, X6 // 13d31200
+ SRL $1, X5 // 93d21200
+
+ SUB X6, X5, X7 // b3836240
+ SUB X5, X6 // 33035340
+
+ SRA X6, X5, X7 // b3d36240
+ SRA X5, X6 // 33535340
+ SRA $1, X5, X6 // 13d31240
+ SRA $1, X5 // 93d21240
+
+ // 2.5: Control Transfer Instructions
+ JAL X5, 2(PC) // ef028000
+ JALR X6, (X5) // 67830200
+ JALR X6, 4(X5) // 67834200
+ BEQ X5, X6, 2(PC) // 63846200
+ BNE X5, X6, 2(PC) // 63946200
+ BLT X5, X6, 2(PC) // 63c46200
+ BLTU X5, X6, 2(PC) // 63e46200
+ BGE X5, X6, 2(PC) // 63d46200
+ BGEU X5, X6, 2(PC) // 63f46200
+
+ // 2.6: Load and Store Instructions
+ LW (X5), X6 // 03a30200
+ LW 4(X5), X6 // 03a34200
+ LWU (X5), X6 // 03e30200
+ LWU 4(X5), X6 // 03e34200
+ LH (X5), X6 // 03930200
+ LH 4(X5), X6 // 03934200
+ LHU (X5), X6 // 03d30200
+ LHU 4(X5), X6 // 03d34200
+ LB (X5), X6 // 03830200
+ LB 4(X5), X6 // 03834200
+ LBU (X5), X6 // 03c30200
+ LBU 4(X5), X6 // 03c34200
+
+ SW X5, (X6) // 23205300
+ SW X5, 4(X6) // 23225300
+ SH X5, (X6) // 23105300
+ SH X5, 4(X6) // 23125300
+ SB X5, (X6) // 23005300
+ SB X5, 4(X6) // 23025300
+
+ // 2.7: Memory Ordering Instructions
+ FENCE // 0f00f00f
+
+ // 5.2: Integer Computational Instructions (RV64I)
+ ADDIW $1, X5, X6 // 1b831200
+ SLLIW $1, X5, X6 // 1b931200
+ SRLIW $1, X5, X6 // 1bd31200
+ SRAIW $1, X5, X6 // 1bd31240
+ ADDW X5, X6, X7 // bb035300
+ SLLW X5, X6, X7 // bb135300
+ SRLW X5, X6, X7 // bb535300
+ SUBW X5, X6, X7 // bb035340
+ SRAW X5, X6, X7 // bb535340
+
+ // 5.3: Load and Store Instructions (RV64I)
+ LD (X5), X6 // 03b30200
+ LD 4(X5), X6 // 03b34200
+ SD X5, (X6) // 23305300
+ SD X5, 4(X6) // 23325300
+
+ // 7.1: Multiplication Operations
+ MUL X5, X6, X7 // b3035302
+ MULH X5, X6, X7 // b3135302
+ MULHU X5, X6, X7 // b3335302
+ MULHSU X5, X6, X7 // b3235302
+ MULW X5, X6, X7 // bb035302
+ DIV X5, X6, X7 // b3435302
+ DIVU X5, X6, X7 // b3535302
+ REM X5, X6, X7 // b3635302
+ REMU X5, X6, X7 // b3735302
+ DIVW X5, X6, X7 // bb435302
+ DIVUW X5, X6, X7 // bb535302
+ REMW X5, X6, X7 // bb635302
+ REMUW X5, X6, X7 // bb735302
+
+ // 8.2: Load-Reserved/Store-Conditional
+ LRW (X5), X6 // 2fa30214
+ LRD (X5), X6 // 2fb30214
+ SCW X5, (X6), X7 // af23531c
+ SCD X5, (X6), X7 // af33531c
+
+ // 8.3: Atomic Memory Operations
+ AMOSWAPW X5, (X6), X7 // af23530c
+ AMOSWAPD X5, (X6), X7 // af33530c
+ AMOADDW X5, (X6), X7 // af235304
+ AMOADDD X5, (X6), X7 // af335304
+ AMOANDW X5, (X6), X7 // af235364
+ AMOANDD X5, (X6), X7 // af335364
+ AMOORW X5, (X6), X7 // af235344
+ AMOORD X5, (X6), X7 // af335344
+ AMOXORW X5, (X6), X7 // af235324
+ AMOXORD X5, (X6), X7 // af335324
+ AMOMAXW X5, (X6), X7 // af2353a4
+ AMOMAXD X5, (X6), X7 // af3353a4
+ AMOMAXUW X5, (X6), X7 // af2353e4
+ AMOMAXUD X5, (X6), X7 // af3353e4
+ AMOMINW X5, (X6), X7 // af235384
+ AMOMIND X5, (X6), X7 // af335384
+ AMOMINUW X5, (X6), X7 // af2353c4
+ AMOMINUD X5, (X6), X7 // af3353c4
+
+ // 10.1: Base Counters and Timers
+ RDCYCLE X5 // f32200c0
+ RDTIME X5 // f32210c0
+ RDINSTRET X5 // f32220c0
+
+ // 11.5: Single-Precision Load and Store Instructions
+ FLW (X5), F0 // 07a00200
+ FLW 4(X5), F0 // 07a04200
+ FSW F0, (X5) // 27a00200
+ FSW F0, 4(X5) // 27a20200
+
+ // 11.6: Single-Precision Floating-Point Computational Instructions
+ FADDS F1, F0, F2 // 53011000
+ FSUBS F1, F0, F2 // 53011008
+ FMULS F1, F0, F2 // 53011010
+ FDIVS F1, F0, F2 // 53011018
+ FMINS F1, F0, F2 // 53011028
+ FMAXS F1, F0, F2 // 53111028
+ FSQRTS F0, F1 // d3000058
+
+ // 11.7: Single-Precision Floating-Point Conversion and Move Instructions
+ FCVTWS F0, X5 // d31200c0
+ FCVTLS F0, X5 // d31220c0
+ FCVTSW X5, F0 // 538002d0
+ FCVTSL X5, F0 // 538022d0
+ FCVTWUS F0, X5 // d31210c0
+ FCVTLUS F0, X5 // d31230c0
+ FCVTSWU X5, F0 // 538012d0
+ FCVTSLU X5, F0 // 538032d0
+ FSGNJS F1, F0, F2 // 53011020
+ FSGNJNS F1, F0, F2 // 53111020
+ FSGNJXS F1, F0, F2 // 53211020
+ FMVXS F0, X5 // d30200e0
+ FMVSX X5, F0 // 538002f0
+ FMVXW F0, X5 // d30200e0
+ FMVWX X5, F0 // 538002f0
+ FMADDS F1, F2, F3, F4 // 43822018
+ FMSUBS F1, F2, F3, F4 // 47822018
+ FNMSUBS F1, F2, F3, F4 // 4b822018
+ FNMADDS F1, F2, F3, F4 // 4f822018
+
+ // 11.8: Single-Precision Floating-Point Compare Instructions
+ FEQS F0, F1, X7 // d3a300a0
+ FLTS F0, F1, X7 // d39300a0
+ FLES F0, F1, X7 // d38300a0
+
+ // 11.9: Single-Precision Floating-Point Classify Instruction
+ FCLASSS F0, X5 // d31200e0
+
+ // 12.3: Double-Precision Load and Store Instructions
+ FLD (X5), F0 // 07b00200
+ FLD 4(X5), F0 // 07b04200
+ FSD F0, (X5) // 27b00200
+ FSD F0, 4(X5) // 27b20200
+
+ // 12.4: Double-Precision Floating-Point Computational Instructions
+ FADDD F1, F0, F2 // 53011002
+ FSUBD F1, F0, F2 // 5301100a
+ FMULD F1, F0, F2 // 53011012
+ FDIVD F1, F0, F2 // 5301101a
+ FMIND F1, F0, F2 // 5301102a
+ FMAXD F1, F0, F2 // 5311102a
+ FSQRTD F0, F1 // d300005a
+
+ // 12.5: Double-Precision Floating-Point Conversion and Move Instructions
+ FCVTWD F0, X5 // d31200c2
+ FCVTLD F0, X5 // d31220c2
+ FCVTDW X5, F0 // 538002d2
+ FCVTDL X5, F0 // 538022d2
+ FCVTWUD F0, X5 // d31210c2
+ FCVTLUD F0, X5 // d31230c2
+ FCVTDWU X5, F0 // 538012d2
+ FCVTDLU X5, F0 // 538032d2
+ FCVTSD F0, F1 // d3001040
+ FCVTDS F0, F1 // d3000042
+ FSGNJD F1, F0, F2 // 53011022
+ FSGNJND F1, F0, F2 // 53111022
+ FSGNJXD F1, F0, F2 // 53211022
+ FMVXD F0, X5 // d30200e2
+ FMVDX X5, F0 // 538002f2
+ FMADDD F1, F2, F3, F4 // 4382201a
+ FMSUBD F1, F2, F3, F4 // 4782201a
+ FNMSUBD F1, F2, F3, F4 // 4b82201a
+ FNMADDD F1, F2, F3, F4 // 4f82201a
+
+ // 12.6: Double-Precision Floating-Point Classify Instruction
+ FCLASSD F0, X5 // d31200e2
+
+ // Privileged ISA
+
+ // 3.2.1: Environment Call and Breakpoint
+ ECALL // 73000000
+ SCALL // 73000000
+ EBREAK // 73001000
+ SBREAK // 73001000
+
+ // Arbitrary bytes (entered in little-endian mode)
+ WORD $0x12345678 // WORD $305419896 // 78563412
+ WORD $0x9abcdef0 // WORD $2596069104 // f0debc9a
+
+ // MOV pseudo-instructions
+ MOV X5, X6 // 13830200
+ MOV $2047, X5 // 9302f07f
+ MOV $-2048, X5 // 93020080
+ MOV $2048, X5 // b71200009b820280
+ MOV $-2049, X5 // b7f2ffff9b82f27f
+ MOV $4096, X5 // b7120000
+ MOV $2147479552, X5 // b7f2ff7f
+ MOV $2147483647, X5 // b70200809b82f2ff
+ MOV $-2147483647, X5 // b70200809b821200
+
+ // Converted to load of symbol (AUIPC + LD)
+ MOV $4294967296, X5 // 9702000083b20200
+
+ MOV (X5), X6 // 03b30200
+ MOV 4(X5), X6 // 03b34200
+ MOVB (X5), X6 // 03830200
+ MOVB 4(X5), X6 // 03834200
+ MOVH (X5), X6 // 03930200
+ MOVH 4(X5), X6 // 03934200
+ MOVW (X5), X6 // 03a30200
+ MOVW 4(X5), X6 // 03a34200
+ MOV X5, (X6) // 23305300
+ MOV X5, 4(X6) // 23325300
+ MOVB X5, (X6) // 23005300
+ MOVB X5, 4(X6) // 23025300
+ MOVH X5, (X6) // 23105300
+ MOVH X5, 4(X6) // 23125300
+ MOVW X5, (X6) // 23205300
+ MOVW X5, 4(X6) // 23225300
+
+ MOVB X5, X6 // 1393820313538343
+ MOVH X5, X6 // 1393020313530343
+ MOVW X5, X6 // 1b830200
+ MOVBU X5, X6 // 13f3f20f
+ MOVHU X5, X6 // 1393020313530303
+ MOVWU X5, X6 // 1393020213530302
+
+ MOVF 4(X5), F0 // 07a04200
+ MOVF F0, 4(X5) // 27a20200
+ MOVF F0, F1 // d3000020
+
+ MOVD 4(X5), F0 // 07b04200
+ MOVD F0, 4(X5) // 27b20200
+ MOVD F0, F1 // d3000022
+
+ // NOT pseudo-instruction
+ NOT X5 // 93c2f2ff
+ NOT X5, X6 // 13c3f2ff
+
+ // NEG/NEGW pseudo-instructions
+ NEG X5 // b3025040
+ NEG X5, X6 // 33035040
+ NEGW X5 // bb025040
+ NEGW X5, X6 // 3b035040
+
+ // This jumps to the second instruction in the function (the
+ // first instruction is an invisible stack pointer adjustment).
+ JMP start // JMP 2
+
+ JMP 2(PC) // 6f008000
+ JMP (X5) // 67800200
+ JMP 4(X5) // 67804200
+
+ // CALL and JMP to symbol are encoded as JAL (using LR or ZERO
+ // respectively), with a R_RISCV_CALL relocation. The linker resolves
+ // the real address and updates the immediate, using a trampoline in
+ // the case where the address is not directly reachable.
+ CALL asmtest(SB) // ef000000
+ JMP asmtest(SB) // 6f000000
+
+ // Branch pseudo-instructions
+ BEQZ X5, 2(PC) // 63840200
+ BGEZ X5, 2(PC) // 63d40200
+ BGT X5, X6, 2(PC) // 63445300
+ BGTU X5, X6, 2(PC) // 63645300
+ BGTZ X5, 2(PC) // 63445000
+ BLE X5, X6, 2(PC) // 63545300
+ BLEU X5, X6, 2(PC) // 63745300
+ BLEZ X5, 2(PC) // 63545000
+ BLTZ X5, 2(PC) // 63c40200
+ BNEZ X5, 2(PC) // 63940200
+
+ // Set pseudo-instructions
+ SEQZ X15, X15 // 93b71700
+ SNEZ X15, X15 // b337f000
+
+ // F extension
+ FABSS F0, F1 // d3200020
+ FNEGS F0, F1 // d3100020
+ FNES F0, F1, X7 // d3a300a093c31300
+
+ // D extension
+ FABSD F0, F1 // d3200022
+ FNEGD F0, F1 // d3100022
+ FNED F0, F1, X5 // d3a200a293c21200
+ FLTD F0, F1, X5 // d39200a2
+ FLED F0, F1, X5 // d38200a2
+ FEQD F0, F1, X5 // d3a200a2
diff --git a/src/cmd/asm/internal/asm/testdata/riscv64error.s b/src/cmd/asm/internal/asm/testdata/riscv64error.s
new file mode 100644
index 0000000..2385525
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/riscv64error.s
@@ -0,0 +1,26 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+TEXT errors(SB),$0
+ MOV $errors(SB), (X5) // ERROR "address load must target register"
+ MOV $8(SP), (X5) // ERROR "address load must target register"
+ MOVB $8(SP), X5 // ERROR "unsupported address load"
+ MOVH $8(SP), X5 // ERROR "unsupported address load"
+ MOVW $8(SP), X5 // ERROR "unsupported address load"
+ MOVF $8(SP), X5 // ERROR "unsupported address load"
+ MOV $1234, 0(SP) // ERROR "constant load must target register"
+ MOV $1234, 8(SP) // ERROR "constant load must target register"
+ MOV $0, 0(SP) // ERROR "constant load must target register"
+ MOV $0, 8(SP) // ERROR "constant load must target register"
+ MOV $1234, 0(SP) // ERROR "constant load must target register"
+ MOV $1234, 8(SP) // ERROR "constant load must target register"
+ MOVB $1, X5 // ERROR "unsupported constant load"
+ MOVH $1, X5 // ERROR "unsupported constant load"
+ MOVW $1, X5 // ERROR "unsupported constant load"
+ MOVF $1, X5 // ERROR "unsupported constant load"
+ MOVBU X5, (X6) // ERROR "unsupported unsigned store"
+ MOVHU X5, (X6) // ERROR "unsupported unsigned store"
+ MOVWU X5, (X6) // ERROR "unsupported unsigned store"
+
+ RET
diff --git a/src/cmd/asm/internal/asm/testdata/s390x.s b/src/cmd/asm/internal/asm/testdata/s390x.s
new file mode 100644
index 0000000..7c5d26b
--- /dev/null
+++ b/src/cmd/asm/internal/asm/testdata/s390x.s
@@ -0,0 +1,524 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-0
+ MOVD R1, R2 // b9040021
+ MOVW R3, R4 // b9140043
+ MOVH R5, R6 // b9070065
+ MOVB R7, R8 // b9060087
+ MOVWZ R1, R2 // b9160021
+ MOVHZ R2, R3 // b9850032
+ MOVBZ R4, R5 // b9840054
+ MOVDBR R1, R2 // b90f0021
+ MOVWBR R3, R4 // b91f0043
+
+ MOVDEQ R0, R1 // b9e28010
+ MOVDGE R2, R3 // b9e2a032
+ MOVDGT R4, R5 // b9e22054
+ MOVDLE R6, R7 // b9e2c076
+ MOVDLT R8, R9 // b9e24098
+ MOVDNE R10, R11 // b9e270ba
+
+ LOCR $3, R2, R1 // b9f23012
+ LOCGR $7, R5, R6 // b9e27065
+
+ MOVD (R15), R1 // e310f0000004
+ MOVW (R15), R2 // e320f0000014
+ MOVH (R15), R3 // e330f0000015
+ MOVB (R15), R4 // e340f0000077
+ MOVWZ (R15), R5 // e350f0000016
+ MOVHZ (R15), R6 // e360f0000091
+ MOVBZ (R15), R7 // e370f0000090
+ MOVDBR (R15), R8 // e380f000000f
+ MOVWBR (R15), R9 // e390f000001e
+
+ MOVD R1, n-8(SP) // e310f0100024
+ MOVW R2, n-8(SP) // 5020f010
+ MOVH R3, n-8(SP) // 4030f010
+ MOVB R4, n-8(SP) // 4240f010
+ MOVWZ R5, n-8(SP) // 5050f010
+ MOVHZ R6, n-8(SP) // 4060f010
+ MOVBZ R7, n-8(SP) // 4270f010
+ MOVDBR R8, n-8(SP) // e380f010002f
+ MOVWBR R9, n-8(SP) // e390f010003e
+
+ MOVD $-8589934592, R1 // c01efffffffe
+ MOVW $-131072, R2 // c021fffe0000
+ MOVH $-512, R3 // a739fe00
+ MOVB $-1, R4 // a749ffff
+
+ MOVD $32767, n-8(SP) // e548f0107fff
+ MOVD $-1, -524288(R1) // e3a010008071e548a000ffff
+ MOVW $32767, n-8(SP) // e54cf0107fff
+ MOVW $-32768, 4096(R2) // e3a020000171e54ca0008000
+ MOVH $512, n-8(SP) // e544f0100200
+ MOVH $-512, 524288(R3) // c0a10008000041aa3000e544a000fe00
+ MOVB $-1, n-8(SP) // 92fff010
+ MOVB $255, 4096(R4) // ebff40000152
+ MOVB $-128, -524288(R5) // eb8050008052
+ MOVB $1, -524289(R6) // c0a1fff7ffff41aa60009201a000
+
+ // RX (12-bit displacement) and RXY (20-bit displacement) instruction encoding (e.g: ST vs STY)
+ MOVW R1, 4095(R2)(R3) // 50132fff
+ MOVW R1, 4096(R2)(R3) // e31320000150
+ MOVWZ R1, 4095(R2)(R3) // 50132fff
+ MOVWZ R1, 4096(R2)(R3) // e31320000150
+ MOVH R1, 4095(R2)(R3) // 40132fff
+ MOVHZ R1, 4095(R2)(R3) // 40132fff
+ MOVH R1, 4096(R2)(R3) // e31320000170
+ MOVHZ R1, 4096(R2)(R3) // e31320000170
+ MOVB R1, 4095(R2)(R3) // 42132fff
+ MOVBZ R1, 4095(R2)(R3) // 42132fff
+ MOVB R1, 4096(R2)(R3) // e31320000172
+ MOVBZ R1, 4096(R2)(R3) // e31320000172
+
+ ADD R1, R2 // b9e81022
+ ADD R1, R2, R3 // b9e81032
+ ADD $8192, R1 // a71b2000
+ ADD $8192, R1, R2 // ec21200000d9
+ ADD $32768, R1 // c21800008000
+ ADD $32768, R1, R2 // b9040021c22800008000
+ ADDC R1, R2 // b9ea1022
+ ADDC $1, R1, R2 // ec21000100db
+ ADDC $-1, R1, R2 // ec21ffff00db
+ ADDC R1, R2, R3 // b9ea1032
+ ADDW R1, R2 // 1a21
+ ADDW R1, R2, R3 // b9f81032
+ ADDW $8192, R1 // a71a2000
+ ADDW $8192, R1, R2 // ec21200000d8
+ ADDE R1, R2 // b9880021
+ SUB R3, R4 // b9090043
+ SUB R3, R4, R5 // b9e93054
+ SUB $8192, R3 // a73be000
+ SUB $8192, R3, R4 // ec43e00000d9
+ SUBC R1, R2 // b90b0021
+ SUBC $1, R1, R2 // ec21ffff00db
+ SUBC R2, R3, R4 // b9eb2043
+ SUBW R3, R4 // 1b43
+ SUBW R3, R4, R5 // b9f93054
+ SUBW $8192, R1 // c21500002000
+ SUBW $8192, R1, R2 // 1821c22500002000
+ MULLW R6, R7 // b91c0076
+ MULLW R6, R7, R8 // b9040087b91c0086
+ MULLW $8192, R6 // a76c2000
+ MULLW $8192, R6, R7 // 1876a77c2000
+ MULLW $-32769, R8 // c281ffff7fff
+ MULLW $-32769, R8, R9 // 1898c291ffff7fff
+ MULLD $-2147483648, R1 // c21080000000
+ MULLD $-2147483648, R1, R2 // b9040021c22080000000
+ MULHD R9, R8 // b90400b8b98600a9ebb9003f000ab98000b8b90900abebb8003f000ab98000b9b9e9b08a
+ MULHD R7, R2, R1 // b90400b2b98600a7ebb7003f000ab98000b2b90900abebb2003f000ab98000b7b9e9b01a
+ MULHDU R3, R4 // b90400b4b98600a3b904004a
+ MULHDU R5, R6, R7 // b90400b6b98600a5b904007a
+ MLGR R1, R2 // b9860021
+ DIVD R1, R2 // b90400b2b90d00a1b904002b
+ DIVD R1, R2, R3 // b90400b2b90d00a1b904003b
+ DIVW R4, R5 // b90400b5b91d00a4b904005b
+ DIVW R4, R5, R6 // b90400b5b91d00a4b904006b
+ DIVDU R7, R8 // a7a90000b90400b8b98700a7b904008b
+ DIVDU R7, R8, R9 // a7a90000b90400b8b98700a7b904009b
+ DIVWU R1, R2 // a7a90000b90400b2b99700a1b904002b
+ DIVWU R1, R2, R3 // a7a90000b90400b2b99700a1b904003b
+ MODD R1, R2 // b90400b2b90d00a1b904002a
+ MODD R1, R2, R3 // b90400b2b90d00a1b904003a
+ MODW R4, R5 // b90400b5b91d00a4b904005a
+ MODW R4, R5, R6 // b90400b5b91d00a4b904006a
+ MODDU R7, R8 // a7a90000b90400b8b98700a7b904008a
+ MODDU R7, R8, R9 // a7a90000b90400b8b98700a7b904009a
+ MODWU R1, R2 // a7a90000b90400b2b99700a1b904002a
+ MODWU R1, R2, R3 // a7a90000b90400b2b99700a1b904003a
+ NEG R1 // b9030011
+ NEG R1, R2 // b9030021
+ NEGW R1 // b9130011
+ NEGW R1, R2 // b9130021
+ FLOGR R2, R2 // b9830022
+ POPCNT R3, R4 // b9e10043
+
+ AND R1, R2 // b9800021
+ AND R1, R2, R3 // b9e42031
+ AND $-2, R1 // a517fffe
+ AND $-65536, R1 // c01bffff0000
+ AND $1, R1 // c0a100000001b980001a
+ ANDW R1, R2 // 1421
+ ANDW R1, R2, R3 // b9f42031
+ ANDW $1, R1 // c01b00000001
+ ANDW $131071, R1 // a5160001
+ ANDW $65536, R1 // c01b00010000
+ ANDW $-2, R1 // a517fffe
+ OR R1, R2 // b9810021
+ OR R1, R2, R3 // b9e62031
+ OR $1, R1 // a51b0001
+ OR $131071, R1 // c01d0001ffff
+ OR $65536, R1 // c01d00010000
+ OR $-2, R1 // c0a1fffffffeb981001a
+ ORW R1, R2 // 1621
+ ORW R1, R2, R3 // b9f62031
+ ORW $1, R1 // a51b0001
+ ORW $131071, R1 // c01d0001ffff
+ ORW $65536, R1 // a51a0001
+ ORW $-2, R1 // c01dfffffffe
+ XOR R1, R2 // b9820021
+ XOR R1, R2, R3 // b9e72031
+ XOR $1, R1 // c01700000001
+ XOR $131071, R1 // c0170001ffff
+ XOR $65536, R1 // c01700010000
+ XOR $-2, R1 // c0a1fffffffeb982001a
+ XORW R1, R2 // 1721
+ XORW R1, R2, R3 // b9f72031
+ XORW $1, R1 // c01700000001
+ XORW $131071, R1 // c0170001ffff
+ XORW $65536, R1 // c01700010000
+ XORW $-2, R1 // c017fffffffe
+
+ ADD -524288(R1), R2 // e32010008008
+ ADD 524287(R3), R4 // e3403fff7f08
+ ADD -524289(R1), R2 // c0a1fff7ffffe32a10000008
+ ADD 524288(R3), R4 // c0a100080000e34a30000008
+ ADD -524289(R1)(R2*1), R3 // c0a1fff7ffff41aa2000e33a10000008
+ ADD 524288(R3)(R4*1), R5 // c0a10008000041aa4000e35a30000008
+ ADDC (R1), R2 // e3201000000a
+ ADDW (R5), R6 // 5a605000
+ ADDW 4095(R7), R8 // 5a807fff
+ ADDW -1(R1), R2 // e3201fffff5a
+ ADDW 4096(R3), R4 // e3403000015a
+ ADDE 4096(R3), R4 // e34030000188
+ ADDE 4096(R3)(R2*1), R4 // e34230000188
+ ADDE 524288(R3)(R4*1), R5 // c0a10008000041aa4000e35a30000088
+ MULLD (R1)(R2*1), R3 // e3321000000c
+ MULLW (R3)(R4*1), R5 // 71543000
+ MULLW 4096(R3), R4 // e34030000151
+ SUB (R1), R2 // e32010000009
+ SUBC (R1), R2 // e3201000000b
+ SUBE (R1), R2 // e32010000089
+ SUBW (R1), R2 // 5b201000
+ SUBW -1(R1), R2 // e3201fffff5b
+ AND (R1), R2 // e32010000080
+ ANDW (R1), R2 // 54201000
+ ANDW -1(R1), R2 // e3201fffff54
+ OR (R1), R2 // e32010000081
+ ORW (R1), R2 // 56201000
+ ORW -1(R1), R2 // e3201fffff56
+ XOR (R1), R2 // e32010000082
+ XORW (R1), R2 // 57201000
+ XORW -1(R1), R2 // e3201fffff57
+
+ // shift and rotate instructions
+ SRD $4, R4, R7 // eb740004000c
+ SRD R1, R4, R7 // eb741000000c
+ SRW $4, R4, R7 // eb74000400de
+ SRW R1, R4, R7 // eb74100000de
+ SLW $4, R3, R6 // eb63000400df
+ SLW R2, R3, R6 // eb63200000df
+ SLD $4, R3, R6 // eb630004000d
+ SLD R2, R3, R6 // eb632000000d
+ SRAD $4, R5, R8 // eb850004000a
+ SRAD R3, R5, R8 // eb853000000a
+ SRAW $4, R5, R8 // eb85000400dc
+ SRAW R3, R5, R8 // eb85300000dc
+ RLL R1, R2, R3 // eb321000001d
+ RLL $4, R2, R3 // eb320004001d
+ RLLG R1, R2, R3 // eb321000001c
+ RLLG $4, R2, R3 // eb320004001c
+
+ RNSBG $0, $31, $32, R1, R2 // ec21001f2054
+ RXSBG $17, $8, $16, R3, R4 // ec4311081057
+ ROSBG $9, $24, $11, R5, R6 // ec6509180b56
+ RNSBGT $0, $31, $32, R7, R8 // ec87801f2054
+ RXSBGT $17, $8, $16, R9, R10 // eca991081057
+ ROSBGT $9, $24, $11, R11, R0 // ec0b89180b56
+ RISBG $0, $31, $32, R1, R2 // ec21001f2055
+ RISBGN $17, $8, $16, R3, R4 // ec4311081059
+ RISBGZ $9, $24, $11, R5, R6 // ec6509980b55
+ RISBGNZ $0, $31, $32, R7, R8 // ec87009f2059
+ RISBHG $17, $8, $16, R9, R10 // eca91108105d
+ RISBLG $9, $24, $11, R11, R0 // ec0b09180b51
+ RISBHGZ $17, $8, $16, R9, R10 // eca91188105d
+ RISBLGZ $9, $24, $11, R11, R0 // ec0b09980b51
+
+ LAA R1, R2, 524287(R3) // eb213fff7ff8
+ LAAG R4, R5, -524288(R6) // eb54600080e8
+ LAAL R7, R8, 8192(R9) // eb87900002fa
+ LAALG R10, R11, -8192(R12) // ebbac000feea
+ LAN R1, R2, (R3) // eb21300000f4
+ LANG R4, R5, (R6) // eb54600000e4
+ LAX R7, R8, (R9) // eb87900000f7
+ LAXG R10, R11, (R12) // ebbac00000e7
+ LAO R1, R2, (R3) // eb21300000f6
+ LAOG R4, R5, (R6) // eb54600000e6
+
+ // load and store multiple
+ LMG n-8(SP), R3, R4 // eb34f0100004
+ LMG -5(R5), R3, R4 // eb345ffbff04
+ LMY n-8(SP), R3, R4 // 9834f010
+ LMY 4096(R1), R3, R4 // eb3410000198
+ STMG R1, R2, n-8(SP) // eb12f0100024
+ STMG R1, R2, -5(R3) // eb123ffbff24
+ STMY R1, R2, n-8(SP) // 9012f010
+ STMY R1, R2, 4096(R3) // eb1230000190
+
+ XC $8, (R15), n-8(SP) // d707f010f000
+ NC $8, (R15), n-8(SP) // d407f010f000
+ OC $8, (R15), n-8(SP) // d607f010f000
+ MVC $8, (R15), n-8(SP) // d207f010f000
+ MVCIN $8, (R15), n-8(SP) // e807f010f000
+ CLC $8, (R15), n-8(SP) // d507f000f010
+ XC $256, -8(R15), -8(R15) // b90400afc2a8fffffff8d7ffa000a000
+ MVC $256, 8192(R1), 8192(R2) // b90400a2c2a800002000b90400b1c2b800002000d2ffa000b000
+
+ CMP R1, R2 // b9200012
+ CMP R3, $32767 // a73f7fff
+ CMP R3, $32768 // c23c00008000
+ CMP R3, $-2147483648 // c23c80000000
+ CMPU R4, R5 // b9210045
+ CMPU R6, $4294967295 // c26effffffff
+ CMPW R7, R8 // 1978
+ CMPW R9, $-32768 // a79e8000
+ CMPW R9, $-32769 // c29dffff7fff
+ CMPW R9, $-2147483648 // c29d80000000
+ CMPWU R1, R2 // 1512
+ CMPWU R3, $4294967295 // c23fffffffff
+
+ TMHH R1, $65535 // a712ffff
+ TMHL R2, $1 // a7230001
+ TMLH R3, $0 // a7300000
+ TMLL R4, $32768 // a7418000
+
+ IPM R3 // b2220030
+ IPM R12 // b22200c0
+
+ SPM R1 // 0410
+ SPM R10 // 04a0
+
+ BRC $7, 0(PC) // a7740000
+ BNE 0(PC) // a7740000
+ BEQ 0(PC) // a7840000
+ BLT 0(PC) // a7440000
+ BLE 0(PC) // a7c40000
+ BGT 0(PC) // a7240000
+ BGE 0(PC) // a7a40000
+ BLTU 0(PC) // a7540000
+ BLEU 0(PC) // a7d40000
+
+ BRCT R1, 0(PC) // a7160000
+ BRCTG R2, 0(PC) // a7270000
+
+ CMPBNE R1, R2, 0(PC) // ec1200007064
+ CMPBEQ R3, R4, 0(PC) // ec3400008064
+ CMPBLT R5, R6, 0(PC) // ec5600004064
+ CMPBLE R7, R8, 0(PC) // ec780000c064
+ CMPBGT R9, R1, 0(PC) // ec9100002064
+ CMPBGE R2, R3, 0(PC) // ec230000a064
+
+ CMPBNE R1, $-127, 0(PC) // ec170000817c
+ CMPBEQ R3, $0, 0(PC) // ec380000007c
+ CMPBLT R5, $128, 0(PC) // ec540000807c
+ CMPBLE R7, $127, 0(PC) // ec7c00007f7c
+ CMPBGT R9, $0, 0(PC) // ec920000007c
+ CMPBGE R2, $128, 0(PC) // ec2a0000807c
+
+ CMPUBNE R1, R2, 0(PC) // ec1200007065
+ CMPUBEQ R3, R4, 0(PC) // ec3400008065
+ CMPUBLT R5, R6, 0(PC) // ec5600004065
+ CMPUBLE R7, R8, 0(PC) // ec780000c065
+ CMPUBGT R9, R1, 0(PC) // ec9100002065
+ CMPUBGE R2, R3, 0(PC) // ec230000a065
+
+ CMPUBNE R1, $256, 0(PC) // ec170000007d
+ CMPUBEQ R3, $0, 0(PC) // ec380000007d
+ CMPUBLT R5, $256, 0(PC) // ec540000007d
+ CMPUBLE R7, $0, 0(PC) // ec7c0000007d
+ CMPUBGT R9, $256, 0(PC) // ec920000007d
+ CMPUBGE R2, $0, 0(PC) // ec2a0000007d
+
+ CRJ $15, R1, R2, 0(PC) // ec120000f076
+ CGRJ $12, R3, R4, 0(PC) // ec340000c064
+ CLRJ $3, R5, R6, 0(PC) // ec5600003077
+ CLGRJ $0, R7, R8, 0(PC) // ec7800000065
+
+ CIJ $4, R9, $127, 0(PC) // ec9400007f7e
+ CGIJ $8, R11, $-128, 0(PC) // ecb80000807c
+ CLIJ $1, R1, $255, 0(PC) // ec110000ff7f
+ CLGIJ $2, R3, $0, 0(PC) // ec320000007d
+
+ LGDR F1, R12 // b3cd00c1
+ LDGR R2, F15 // b3c100f2
+
+ CEFBRA R0, F15 // b39400f0
+ CDFBRA R1, F14 // b39500e1
+ CEGBRA R2, F13 // b3a400d2
+ CDGBRA R3, F12 // b3a500c3
+
+ CELFBR R0, F15 // b39000f0
+ CDLFBR R1, F14 // b39100e1
+ CELGBR R2, F13 // b3a000d2
+ CDLGBR R3, F12 // b3a100c3
+
+ CFEBRA F15, R1 // b398501f
+ CFDBRA F14, R2 // b399502e
+ CGEBRA F13, R3 // b3a8503d
+ CGDBRA F12, R4 // b3a9504c
+
+ CLFEBR F15, R1 // b39c501f
+ CLFDBR F14, R2 // b39d502e
+ CLGEBR F13, R3 // b3ac503d
+ CLGDBR F12, R4 // b3ad504c
+
+ FMOVS $0, F11 // b37400b0
+ FMOVD $0, F12 // b37500c0
+ FMOVS (R1)(R2*1), F0 // 78021000
+ FMOVS n-8(SP), F15 // 78f0f010
+ FMOVD -9999999(R8)(R9*1), F8 // c0a1ff67698141aa9000688a8000
+ FMOVD F4, F5 // 2854
+
+ // RX (12-bit displacement) and RXY (20-bit displacement) instruction encoding (e.g. LD vs LDY)
+ FMOVD (R1), F0 // 68001000
+ FMOVD 4095(R2), F13 // 68d02fff
+ FMOVD 4096(R2), F15 // edf020000165
+ FMOVS 4095(R2)(R3), F13 // 78d32fff
+ FMOVS 4096(R2)(R4), F15 // edf420000164
+ FMOVD F0, 4095(R1) // 60001fff
+ FMOVD F0, 4096(R1) // ed0010000167
+ FMOVS F13, 4095(R2)(R3) // 70d32fff
+ FMOVS F13, 4096(R2)(R3) // edd320000166
+
+ FADDS F0, F15 // b30a00f0
+ FADD F1, F14 // b31a00e1
+ FSUBS F2, F13 // b30b00d2
+ FSUB F3, F12 // b31b00c3
+ FMULS F4, F11 // b31700b4
+ FMUL F5, F10 // b31c00a5
+ FDIVS F6, F9 // b30d0096
+ FDIV F7, F8 // b31d0087
+ FABS F1, F2 // b3100021
+ FSQRTS F3, F4 // b3140043
+ FSQRT F5, F15 // b31500f5
+ FIEBR $0, F0, F1 // b3570010
+ FIDBR $7, F2, F3 // b35f7032
+ FMADD F1, F1, F1 // b31e1011
+ FMADDS F1, F2, F3 // b30e3012
+ FMSUB F4, F5, F5 // b31f5045
+ FMSUBS F6, F6, F7 // b30f7066
+ LPDFR F1, F2 // b3700021
+ LNDFR F3, F4 // b3710043
+ CPSDR F5, F6, F7 // b3725076
+ LTEBR F1, F2 // b3020021
+ LTDBR F3, F4 // b3120043
+ TCEB F5, $8 // ed5000080010
+ TCDB F15, $4095 // edf00fff0011
+
+ UNDEF // 00000000
+ NOPH // 0700
+
+ SYNC // 07e0
+
+ // vector add and sub instructions
+ VAB V3, V4, V4 // e743400000f3
+ VAH V3, V4, V4 // e743400010f3
+ VAF V3, V4, V4 // e743400020f3
+ VAG V3, V4, V4 // e743400030f3
+ VAQ V3, V4, V4 // e743400040f3
+ VAB V1, V2 // e721200000f3
+ VAH V1, V2 // e721200010f3
+ VAF V1, V2 // e721200020f3
+ VAG V1, V2 // e721200030f3
+ VAQ V1, V2 // e721200040f3
+ VSB V3, V4, V4 // e744300000f7
+ VSH V3, V4, V4 // e744300010f7
+ VSF V3, V4, V4 // e744300020f7
+ VSG V3, V4, V4 // e744300030f7
+ VSQ V3, V4, V4 // e744300040f7
+ VSB V1, V2 // e722100000f7
+ VSH V1, V2 // e722100010f7
+ VSF V1, V2 // e722100020f7
+ VSG V1, V2 // e722100030f7
+ VSQ V1, V2 // e722100040f7
+
+ VCEQB V1, V3, V3 // e731300000f8
+ VL (R15), V1 // e710f0000006
+ VST V1, (R15) // e710f000000e
+ VL (R15), V31 // e7f0f0000806
+ VST V31, (R15) // e7f0f000080e
+ VESLB $5, V14 // e7ee00050030
+ VESRAG $0, V15, V16 // e70f0000383a
+ VLM (R15), V8, V23 // e787f0000436
+ VSTM V8, V23, (R15) // e787f000043e
+ VONE V1 // e710ffff0044
+ VZERO V16 // e70000000844
+ VGBM $52428, V31 // e7f0cccc0844
+ VREPIB $255, V4 // e74000ff0045
+ VREPIH $-1, V16 // e700ffff1845
+ VREPIF $-32768, V0 // e70080002045
+ VREPIG $32767, V31 // e7f07fff3845
+ VREPG $1, V4, V16 // e7040001384d
+ VREPB $4, V31, V1 // e71f0004044d
+ VFTCIDB $4095, V1, V2 // e721fff0304a
+ WFTCIDB $3276, V15, V16 // e70fccc8384a
+ VPOPCT V8, V19 // e73800000850
+ VFEEZBS V1, V2, V31 // e7f120300880
+ WFCHDBS V22, V23, V4 // e746701836eb
+ VMNH V1, V2, V30 // e7e1200018fe
+ VERLLVF V2, V30, V27 // e7be20002c73
+ VSCBIB V0, V23, V24 // e78700000cf5
+ VN V2, V1, V0 // e70210000068
+ VNC V2, V1, V0 // e70210000069
+ VO V2, V1, V0 // e7021000006a
+ VX V2, V1, V0 // e7021000006d
+ VN V16, V1 // e71010000468
+ VNC V16, V1 // e71010000469
+ VO V16, V1 // e7101000046a
+ VX V16, V1 // e7101000046d
+ VNOT V16, V1 // e7101000046b
+ VCLZF V16, V17 // e71000002c53
+ VLVGP R3, R4, V8 // e78340000062
+ VGEG $1, 8(R15)(V30*1), V31 // e7fef0081c12
+ VSCEG $1, V31, 16(R15)(V30*1) // e7fef0101c1a
+ VGEF $0, 2048(R15)(V1*1), V2 // e721f8000013
+ VSCEF $0, V2, 4095(R15)(V1*1) // e721ffff001b
+ VLL R0, (R15), V1 // e710f0000037
+ VSTL R0, V16, (R15) // e700f000083f
+ VGMH $8, $16, V12 // e7c008101046
+ VLEIB $15, $255, V0 // e70000fff040
+ VLEIH $7, $-32768, V15 // e7f080007041
+ VLEIF $2, $-43, V16 // e700ffd52843
+ VLEIG $1, $32767, V31 // e7f07fff1842
+ VSLDB $3, V1, V16, V18 // e72100030a77
+ VERIMB $2, V31, V1, V2 // e72f10020472
+ VSEL V1, V2, V3, V4 // e7412000308d
+ VGFMAH V21, V31, V24, V0 // e705f10087bc
+ VFMADB V16, V8, V9, V10 // e7a08300948f
+ WFMADB V17, V18, V19, V20 // e74123083f8f
+ VFMSDB V2, V25, V24, V31 // e7f293008b8e
+ WFMSDB V31, V2, V3, V4 // e74f2308348e
+ VPERM V31, V0, V2, V3 // e73f0000248c
+ VPDI $1, V2, V31, V1 // e712f0001284
+ VLEG $1, (R3), V1 // e71030001002
+ VLEF $2, (R0), V31 // e7f000002803
+ VLEH $3, (R12), V16 // e700c0003801
+ VLEB $15, 4095(R9), V15 // e7f09ffff000
+ VSTEG $1, V30, (R1)(R2*1) // e7e21000180a
+ VSTEF $3, V2, (R9) // e7209000300b
+ VSTEH $7, V31, (R2) // e7f020007809
+ VSTEB $15, V29, 4094(R12) // e7d0cffef808
+ VMSLG V21, V22, V23, V24 // e78563007fb8
+ VMSLEG V21, V22, V23, V24 // e78563807fb8
+ VMSLOG V21, V22, V23, V24 // e78563407fb8
+ VMSLEOG V21, V22, V23, V24 // e78563c07fb8
+ VSUMGH V1, V2, V3 // e73120001065
+ VSUMGF V16, V17, V18 // e72010002e65
+ VSUMQF V4, V5, V6 // e76450002067
+ VSUMQG V19, V20, V21 // e75340003e67
+ VSUMB V7, V8, V9 // e79780000064
+ VSUMH V22, V23, V24 // e78670001e64
+
+ RET
+ RET foo(SB)
+
+TEXT main·init(SB),DUPOK|NOSPLIT,$0 // TEXT main.init(SB), DUPOK|NOSPLIT, $0
+ RET
+
+TEXT main·main(SB),DUPOK|NOSPLIT,$0 // TEXT main.main(SB), DUPOK|NOSPLIT, $0
+ BL main·foo(SB) // CALL main.foo(SB)
+ RET
diff --git a/src/cmd/asm/internal/flags/flags.go b/src/cmd/asm/internal/flags/flags.go
new file mode 100644
index 0000000..607166e
--- /dev/null
+++ b/src/cmd/asm/internal/flags/flags.go
@@ -0,0 +1,91 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package flags implements top-level flags and the usage message for the assembler.
+package flags
+
+import (
+ "cmd/internal/objabi"
+ "flag"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+)
+
+var (
+ Debug = flag.Bool("debug", false, "dump instructions as they are parsed")
+ OutputFile = flag.String("o", "", "output file; default foo.o for /a/b/c/foo.s as first argument")
+ TrimPath = flag.String("trimpath", "", "remove prefix from recorded source file paths")
+ Shared = flag.Bool("shared", false, "generate code that can be linked into a shared library")
+ Dynlink = flag.Bool("dynlink", false, "support references to Go symbols defined in other shared libraries")
+ Linkshared = flag.Bool("linkshared", false, "generate code that will be linked against Go shared libraries")
+ AllErrors = flag.Bool("e", false, "no limit on number of errors reported")
+ SymABIs = flag.Bool("gensymabis", false, "write symbol ABI information to output file, don't assemble")
+ Importpath = flag.String("p", "", "set expected package import to path")
+ Spectre = flag.String("spectre", "", "enable spectre mitigations in `list` (all, ret)")
+ CompilingRuntime = flag.Bool("compiling-runtime", false, "source to be compiled is part of the Go runtime")
+)
+
+var DebugFlags struct {
+ MayMoreStack string `help:"call named function before all stack growth checks"`
+}
+
+var (
+ D MultiFlag
+ I MultiFlag
+ PrintOut int
+ DebugV bool
+)
+
+func init() {
+ flag.Var(&D, "D", "predefined symbol with optional simple value -D=identifier=value; can be set multiple times")
+ flag.Var(&I, "I", "include directory; can be set multiple times")
+ flag.BoolVar(&DebugV, "v", false, "print debug output")
+ flag.Var(objabi.NewDebugFlag(&DebugFlags, nil), "d", "enable debugging settings; try -d help")
+ objabi.AddVersionFlag() // -V
+ objabi.Flagcount("S", "print assembly and machine code", &PrintOut)
+}
+
+// MultiFlag allows setting a value multiple times to collect a list, as in -I=dir1 -I=dir2.
+type MultiFlag []string
+
+func (m *MultiFlag) String() string {
+ if len(*m) == 0 {
+ return ""
+ }
+ return fmt.Sprint(*m)
+}
+
+func (m *MultiFlag) Set(val string) error {
+ (*m) = append(*m, val)
+ return nil
+}
+
+func Usage() {
+ fmt.Fprintf(os.Stderr, "usage: asm [options] file.s ...\n")
+ fmt.Fprintf(os.Stderr, "Flags:\n")
+ flag.PrintDefaults()
+ os.Exit(2)
+}
+
+func Parse() {
+ flag.Usage = Usage
+ flag.Parse()
+ if flag.NArg() == 0 {
+ flag.Usage()
+ }
+
+ // Flag refinement.
+ if *OutputFile == "" {
+ if flag.NArg() != 1 {
+ flag.Usage()
+ }
+ input := filepath.Base(flag.Arg(0))
+ if strings.HasSuffix(input, ".s") {
+ input = input[:len(input)-2]
+ }
+ *OutputFile = fmt.Sprintf("%s.o", input)
+ }
+}
diff --git a/src/cmd/asm/internal/lex/input.go b/src/cmd/asm/internal/lex/input.go
new file mode 100644
index 0000000..e373ae8
--- /dev/null
+++ b/src/cmd/asm/internal/lex/input.go
@@ -0,0 +1,502 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+ "fmt"
+ "internal/buildcfg"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "text/scanner"
+
+ "cmd/asm/internal/flags"
+ "cmd/internal/objabi"
+ "cmd/internal/src"
+)
+
+// Input is the main input: a stack of readers and some macro definitions.
+// It also handles #include processing (by pushing onto the input stack)
+// and parses and instantiates macro definitions.
+type Input struct {
+ Stack
+ includes []string
+ beginningOfLine bool
+ ifdefStack []bool
+ macros map[string]*Macro
+ text string // Text of last token returned by Next.
+ peek bool
+ peekToken ScanToken
+ peekText string
+}
+
+// NewInput returns an Input from the given path.
+func NewInput(name string) *Input {
+ return &Input{
+ // include directories: look in source dir, then -I directories.
+ includes: append([]string{filepath.Dir(name)}, flags.I...),
+ beginningOfLine: true,
+ macros: predefine(flags.D),
+ }
+}
+
+// predefine installs the macros set by the -D flag on the command line.
+func predefine(defines flags.MultiFlag) map[string]*Macro {
+ macros := make(map[string]*Macro)
+
+ // Set macros for GOEXPERIMENTs so we can easily switch
+ // runtime assembly code based on them.
+ if *flags.CompilingRuntime {
+ for _, exp := range buildcfg.EnabledExperiments() {
+ // Define macro.
+ name := "GOEXPERIMENT_" + exp
+ macros[name] = &Macro{
+ name: name,
+ args: nil,
+ tokens: Tokenize("1"),
+ }
+ }
+ }
+
+ for _, name := range defines {
+ value := "1"
+ i := strings.IndexRune(name, '=')
+ if i > 0 {
+ name, value = name[:i], name[i+1:]
+ }
+ tokens := Tokenize(name)
+ if len(tokens) != 1 || tokens[0].ScanToken != scanner.Ident {
+ fmt.Fprintf(os.Stderr, "asm: parsing -D: %q is not a valid identifier name\n", tokens[0])
+ flags.Usage()
+ }
+ macros[name] = &Macro{
+ name: name,
+ args: nil,
+ tokens: Tokenize(value),
+ }
+ }
+ return macros
+}
+
+var panicOnError bool // For testing.
+
+func (in *Input) Error(args ...interface{}) {
+ if panicOnError {
+ panic(fmt.Errorf("%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...)))
+ }
+ fmt.Fprintf(os.Stderr, "%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...))
+ os.Exit(1)
+}
+
+// expectText is like Error but adds "got XXX" where XXX is a quoted representation of the most recent token.
+func (in *Input) expectText(args ...interface{}) {
+ in.Error(append(args, "; got", strconv.Quote(in.Stack.Text()))...)
+}
+
+// enabled reports whether the input is enabled by an ifdef, or is at the top level.
+func (in *Input) enabled() bool {
+ return len(in.ifdefStack) == 0 || in.ifdefStack[len(in.ifdefStack)-1]
+}
+
+func (in *Input) expectNewline(directive string) {
+ tok := in.Stack.Next()
+ if tok != '\n' {
+ in.expectText("expected newline after", directive)
+ }
+}
+
+func (in *Input) Next() ScanToken {
+ if in.peek {
+ in.peek = false
+ tok := in.peekToken
+ in.text = in.peekText
+ return tok
+ }
+ // If we cannot generate a token after 100 macro invocations, we're in trouble.
+ // The usual case is caught by Push, below, but be safe.
+ for nesting := 0; nesting < 100; {
+ tok := in.Stack.Next()
+ switch tok {
+ case '#':
+ if !in.beginningOfLine {
+ in.Error("'#' must be first item on line")
+ }
+ in.beginningOfLine = in.hash()
+ in.text = "#"
+ return '#'
+
+ case scanner.Ident:
+ // Is it a macro name?
+ name := in.Stack.Text()
+ macro := in.macros[name]
+ if macro != nil {
+ nesting++
+ in.invokeMacro(macro)
+ continue
+ }
+ fallthrough
+ default:
+ if tok == scanner.EOF && len(in.ifdefStack) > 0 {
+ // We're skipping text but have run out of input with no #endif.
+ in.Error("unclosed #ifdef or #ifndef")
+ }
+ in.beginningOfLine = tok == '\n'
+ if in.enabled() {
+ in.text = in.Stack.Text()
+ return tok
+ }
+ }
+ }
+ in.Error("recursive macro invocation")
+ return 0
+}
+
+func (in *Input) Text() string {
+ return in.text
+}
+
+// hash processes a # preprocessor directive. It reports whether it completes.
+func (in *Input) hash() bool {
+ // We have a '#'; it must be followed by a known word (define, include, etc.).
+ tok := in.Stack.Next()
+ if tok != scanner.Ident {
+ in.expectText("expected identifier after '#'")
+ }
+ if !in.enabled() {
+ // Can only start including again if we are at #else or #endif but also
+ // need to keep track of nested #if[n]defs.
+ // We let #line through because it might affect errors.
+ switch in.Stack.Text() {
+ case "else", "endif", "ifdef", "ifndef", "line":
+ // Press on.
+ default:
+ return false
+ }
+ }
+ switch in.Stack.Text() {
+ case "define":
+ in.define()
+ case "else":
+ in.else_()
+ case "endif":
+ in.endif()
+ case "ifdef":
+ in.ifdef(true)
+ case "ifndef":
+ in.ifdef(false)
+ case "include":
+ in.include()
+ case "line":
+ in.line()
+ case "undef":
+ in.undef()
+ default:
+ in.Error("unexpected token after '#':", in.Stack.Text())
+ }
+ return true
+}
+
+// macroName returns the name for the macro being referenced.
+func (in *Input) macroName() string {
+ // We use the Stack's input method; no macro processing at this stage.
+ tok := in.Stack.Next()
+ if tok != scanner.Ident {
+ in.expectText("expected identifier after # directive")
+ }
+ // Name is alphanumeric by definition.
+ return in.Stack.Text()
+}
+
+// #define processing.
+func (in *Input) define() {
+ name := in.macroName()
+ args, tokens := in.macroDefinition(name)
+ in.defineMacro(name, args, tokens)
+}
+
+// defineMacro stores the macro definition in the Input.
+func (in *Input) defineMacro(name string, args []string, tokens []Token) {
+ if in.macros[name] != nil {
+ in.Error("redefinition of macro:", name)
+ }
+ in.macros[name] = &Macro{
+ name: name,
+ args: args,
+ tokens: tokens,
+ }
+}
+
+// macroDefinition returns the list of formals and the tokens of the definition.
+// The argument list is nil for no parens on the definition; otherwise a list of
+// formal argument names.
+func (in *Input) macroDefinition(name string) ([]string, []Token) {
+ prevCol := in.Stack.Col()
+ tok := in.Stack.Next()
+ if tok == '\n' || tok == scanner.EOF {
+ return nil, nil // No definition for macro
+ }
+ var args []string
+ // The C preprocessor treats
+ // #define A(x)
+ // and
+ // #define A (x)
+ // distinctly: the first is a macro with arguments, the second without.
+ // Distinguish these cases using the column number, since we don't
+ // see the space itself. Note that text/scanner reports the position at the
+ // end of the token. It's where you are now, and you just read this token.
+ if tok == '(' && in.Stack.Col() == prevCol+1 {
+ // Macro has arguments. Scan list of formals.
+ acceptArg := true
+ args = []string{} // Zero length but not nil.
+ Loop:
+ for {
+ tok = in.Stack.Next()
+ switch tok {
+ case ')':
+ tok = in.Stack.Next() // First token of macro definition.
+ break Loop
+ case ',':
+ if acceptArg {
+ in.Error("bad syntax in definition for macro:", name)
+ }
+ acceptArg = true
+ case scanner.Ident:
+ if !acceptArg {
+ in.Error("bad syntax in definition for macro:", name)
+ }
+ arg := in.Stack.Text()
+ if i := lookup(args, arg); i >= 0 {
+ in.Error("duplicate argument", arg, "in definition for macro:", name)
+ }
+ args = append(args, arg)
+ acceptArg = false
+ default:
+ in.Error("bad definition for macro:", name)
+ }
+ }
+ }
+ var tokens []Token
+ // Scan to newline. Backslashes escape newlines.
+ for tok != '\n' {
+ if tok == scanner.EOF {
+ in.Error("missing newline in definition for macro:", name)
+ }
+ if tok == '\\' {
+ tok = in.Stack.Next()
+ if tok != '\n' && tok != '\\' {
+ in.Error(`can only escape \ or \n in definition for macro:`, name)
+ }
+ }
+ tokens = append(tokens, Make(tok, in.Stack.Text()))
+ tok = in.Stack.Next()
+ }
+ return args, tokens
+}
+
+func lookup(args []string, arg string) int {
+ for i, a := range args {
+ if a == arg {
+ return i
+ }
+ }
+ return -1
+}
+
+// invokeMacro pushes onto the input Stack a Slice that holds the macro definition with the actual
+// parameters substituted for the formals.
+// Invoking a macro does not touch the PC/line history.
+func (in *Input) invokeMacro(macro *Macro) {
+ // If the macro has no arguments, just substitute the text.
+ if macro.args == nil {
+ in.Push(NewSlice(in.Base(), in.Line(), macro.tokens))
+ return
+ }
+ tok := in.Stack.Next()
+ if tok != '(' {
+ // If the macro has arguments but is invoked without them, all we push is the macro name.
+ // First, put back the token.
+ in.peekToken = tok
+ in.peekText = in.text
+ in.peek = true
+ in.Push(NewSlice(in.Base(), in.Line(), []Token{Make(macroName, macro.name)}))
+ return
+ }
+ actuals := in.argsFor(macro)
+ var tokens []Token
+ for _, tok := range macro.tokens {
+ if tok.ScanToken != scanner.Ident {
+ tokens = append(tokens, tok)
+ continue
+ }
+ substitution := actuals[tok.text]
+ if substitution == nil {
+ tokens = append(tokens, tok)
+ continue
+ }
+ tokens = append(tokens, substitution...)
+ }
+ in.Push(NewSlice(in.Base(), in.Line(), tokens))
+}
+
+// argsFor returns a map from formal name to actual value for this argumented macro invocation.
+// The opening parenthesis has been absorbed.
+func (in *Input) argsFor(macro *Macro) map[string][]Token {
+ var args [][]Token
+ // One macro argument per iteration. Collect them all and check counts afterwards.
+ for argNum := 0; ; argNum++ {
+ tokens, tok := in.collectArgument(macro)
+ args = append(args, tokens)
+ if tok == ')' {
+ break
+ }
+ }
+ // Zero-argument macros are tricky.
+ if len(macro.args) == 0 && len(args) == 1 && args[0] == nil {
+ args = nil
+ } else if len(args) != len(macro.args) {
+ in.Error("wrong arg count for macro", macro.name)
+ }
+ argMap := make(map[string][]Token)
+ for i, arg := range args {
+ argMap[macro.args[i]] = arg
+ }
+ return argMap
+}
+
+// collectArgument returns the actual tokens for a single argument of a macro.
+// It also returns the token that terminated the argument, which will always
+// be either ',' or ')'. The starting '(' has been scanned.
+func (in *Input) collectArgument(macro *Macro) ([]Token, ScanToken) {
+ nesting := 0
+ var tokens []Token
+ for {
+ tok := in.Stack.Next()
+ if tok == scanner.EOF || tok == '\n' {
+ in.Error("unterminated arg list invoking macro:", macro.name)
+ }
+ if nesting == 0 && (tok == ')' || tok == ',') {
+ return tokens, tok
+ }
+ if tok == '(' {
+ nesting++
+ }
+ if tok == ')' {
+ nesting--
+ }
+ tokens = append(tokens, Make(tok, in.Stack.Text()))
+ }
+}
+
+// #ifdef and #ifndef processing.
+func (in *Input) ifdef(truth bool) {
+ name := in.macroName()
+ in.expectNewline("#if[n]def")
+ if !in.enabled() {
+ truth = false
+ } else if _, defined := in.macros[name]; !defined {
+ truth = !truth
+ }
+ in.ifdefStack = append(in.ifdefStack, truth)
+}
+
+// #else processing
+func (in *Input) else_() {
+ in.expectNewline("#else")
+ if len(in.ifdefStack) == 0 {
+ in.Error("unmatched #else")
+ }
+ if len(in.ifdefStack) == 1 || in.ifdefStack[len(in.ifdefStack)-2] {
+ in.ifdefStack[len(in.ifdefStack)-1] = !in.ifdefStack[len(in.ifdefStack)-1]
+ }
+}
+
+// #endif processing.
+func (in *Input) endif() {
+ in.expectNewline("#endif")
+ if len(in.ifdefStack) == 0 {
+ in.Error("unmatched #endif")
+ }
+ in.ifdefStack = in.ifdefStack[:len(in.ifdefStack)-1]
+}
+
+// #include processing.
+func (in *Input) include() {
+ // Find and parse string.
+ tok := in.Stack.Next()
+ if tok != scanner.String {
+ in.expectText("expected string after #include")
+ }
+ name, err := strconv.Unquote(in.Stack.Text())
+ if err != nil {
+ in.Error("unquoting include file name: ", err)
+ }
+ in.expectNewline("#include")
+ // Push tokenizer for file onto stack.
+ fd, err := os.Open(name)
+ if err != nil {
+ for _, dir := range in.includes {
+ fd, err = os.Open(filepath.Join(dir, name))
+ if err == nil {
+ break
+ }
+ }
+ if err != nil {
+ in.Error("#include:", err)
+ }
+ }
+ in.Push(NewTokenizer(name, fd, fd))
+}
+
+// #line processing.
+func (in *Input) line() {
+ // Only need to handle Plan 9 format: #line 337 "filename"
+ tok := in.Stack.Next()
+ if tok != scanner.Int {
+ in.expectText("expected line number after #line")
+ }
+ line, err := strconv.Atoi(in.Stack.Text())
+ if err != nil {
+ in.Error("error parsing #line (cannot happen):", err)
+ }
+ tok = in.Stack.Next()
+ if tok != scanner.String {
+ in.expectText("expected file name in #line")
+ }
+ file, err := strconv.Unquote(in.Stack.Text())
+ if err != nil {
+ in.Error("unquoting #line file name: ", err)
+ }
+ tok = in.Stack.Next()
+ if tok != '\n' {
+ in.Error("unexpected token at end of #line: ", tok)
+ }
+ pos := src.MakePos(in.Base(), uint(in.Line())+1, 1) // +1 because #line nnn means line nnn starts on next line
+ in.Stack.SetBase(src.NewLinePragmaBase(pos, file, objabi.AbsFile(objabi.WorkingDir(), file, *flags.TrimPath), uint(line), 1))
+}
+
+// #undef processing
+func (in *Input) undef() {
+ name := in.macroName()
+ if in.macros[name] == nil {
+ in.Error("#undef for undefined macro:", name)
+ }
+ // Newline must be next.
+ tok := in.Stack.Next()
+ if tok != '\n' {
+ in.Error("syntax error in #undef for macro:", name)
+ }
+ delete(in.macros, name)
+}
+
+func (in *Input) Push(r TokenReader) {
+ if len(in.tr) > 100 {
+ in.Error("input recursion")
+ }
+ in.Stack.Push(r)
+}
+
+func (in *Input) Close() {
+}
diff --git a/src/cmd/asm/internal/lex/lex.go b/src/cmd/asm/internal/lex/lex.go
new file mode 100644
index 0000000..7cd41a5
--- /dev/null
+++ b/src/cmd/asm/internal/lex/lex.go
@@ -0,0 +1,141 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package lex implements lexical analysis for the assembler.
+package lex
+
+import (
+ "fmt"
+ "log"
+ "os"
+ "strings"
+ "text/scanner"
+
+ "cmd/internal/src"
+)
+
+// A ScanToken represents an input item. It is a simple wrapping of rune, as
+// returned by text/scanner.Scanner, plus a couple of extra values.
+type ScanToken rune
+
+const (
+ // Asm defines some two-character lexemes. We make up
+ // a rune/ScanToken value for them - ugly but simple.
+ LSH ScanToken = -1000 - iota // << Left shift.
+ RSH // >> Logical right shift.
+ ARR // -> Used on ARM for shift type 3, arithmetic right shift.
+ ROT // @> Used on ARM for shift type 4, rotate right.
+ Include // included file started here
+ BuildComment // //go:build or +build comment
+ macroName // name of macro that should not be expanded
+)
+
+// IsRegisterShift reports whether the token is one of the ARM register shift operators.
+func IsRegisterShift(r ScanToken) bool {
+ return ROT <= r && r <= LSH // Order looks backwards because these are negative.
+}
+
+func (t ScanToken) String() string {
+ switch t {
+ case scanner.EOF:
+ return "EOF"
+ case scanner.Ident:
+ return "identifier"
+ case scanner.Int:
+ return "integer constant"
+ case scanner.Float:
+ return "float constant"
+ case scanner.Char:
+ return "rune constant"
+ case scanner.String:
+ return "string constant"
+ case scanner.RawString:
+ return "raw string constant"
+ case scanner.Comment:
+ return "comment"
+ default:
+ return fmt.Sprintf("%q", rune(t))
+ }
+}
+
+// NewLexer returns a lexer for the named file and the given link context.
+func NewLexer(name string) TokenReader {
+ input := NewInput(name)
+ fd, err := os.Open(name)
+ if err != nil {
+ log.Fatalf("%s\n", err)
+ }
+ input.Push(NewTokenizer(name, fd, fd))
+ return input
+}
+
+// The other files in this directory each contain an implementation of TokenReader.
+
+// A TokenReader is like a reader, but returns lex tokens of type Token. It also can tell you what
+// the text of the most recently returned token is, and where it was found.
+// The underlying scanner elides all spaces except newline, so the input looks like a stream of
+// Tokens; original spacing is lost but we don't need it.
+type TokenReader interface {
+ // Next returns the next token.
+ Next() ScanToken
+ // The following methods all refer to the most recent token returned by Next.
+ // Text returns the original string representation of the token.
+ Text() string
+ // File reports the source file name of the token.
+ File() string
+ // Base reports the position base of the token.
+ Base() *src.PosBase
+ // SetBase sets the position base.
+ SetBase(*src.PosBase)
+ // Line reports the source line number of the token.
+ Line() int
+ // Col reports the source column number of the token.
+ Col() int
+ // Close does any teardown required.
+ Close()
+}
+
+// A Token is a scan token plus its string value.
+// A macro is stored as a sequence of Tokens with spaces stripped.
+type Token struct {
+ ScanToken
+ text string
+}
+
+// Make returns a Token with the given rune (ScanToken) and text representation.
+func Make(token ScanToken, text string) Token {
+ // If the symbol starts with center dot, as in ·x, rewrite it as ""·x
+ if token == scanner.Ident && strings.HasPrefix(text, "\u00B7") {
+ text = `""` + text
+ }
+ // Substitute the substitutes for . and /.
+ text = strings.Replace(text, "\u00B7", ".", -1)
+ text = strings.Replace(text, "\u2215", "/", -1)
+ return Token{ScanToken: token, text: text}
+}
+
+func (l Token) String() string {
+ return l.text
+}
+
+// A Macro represents the definition of a #defined macro.
+type Macro struct {
+ name string // The #define name.
+ args []string // Formal arguments.
+ tokens []Token // Body of macro.
+}
+
+// Tokenize turns a string into a list of Tokens; used to parse the -D flag and in tests.
+func Tokenize(str string) []Token {
+ t := NewTokenizer("command line", strings.NewReader(str), nil)
+ var tokens []Token
+ for {
+ tok := t.Next()
+ if tok == scanner.EOF {
+ break
+ }
+ tokens = append(tokens, Make(tok, t.Text()))
+ }
+ return tokens
+}
diff --git a/src/cmd/asm/internal/lex/lex_test.go b/src/cmd/asm/internal/lex/lex_test.go
new file mode 100644
index 0000000..51679d2
--- /dev/null
+++ b/src/cmd/asm/internal/lex/lex_test.go
@@ -0,0 +1,365 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+ "bytes"
+ "strings"
+ "testing"
+ "text/scanner"
+)
+
+type lexTest struct {
+ name string
+ input string
+ output string
+}
+
+var lexTests = []lexTest{
+ {
+ "empty",
+ "",
+ "",
+ },
+ {
+ "simple",
+ "1 (a)",
+ "1.(.a.)",
+ },
+ {
+ "simple define",
+ lines(
+ "#define A 1234",
+ "A",
+ ),
+ "1234.\n",
+ },
+ {
+ "define without value",
+ "#define A",
+ "",
+ },
+ {
+ "macro without arguments",
+ "#define A() 1234\n" + "A()\n",
+ "1234.\n",
+ },
+ {
+ "macro with just parens as body",
+ "#define A () \n" + "A\n",
+ "(.).\n",
+ },
+ {
+ "macro with parens but no arguments",
+ "#define A (x) \n" + "A\n",
+ "(.x.).\n",
+ },
+ {
+ "macro with arguments",
+ "#define A(x, y, z) x+z+y\n" + "A(1, 2, 3)\n",
+ "1.+.3.+.2.\n",
+ },
+ {
+ "argumented macro invoked without arguments",
+ lines(
+ "#define X() foo ",
+ "X()",
+ "X",
+ ),
+ "foo.\n.X.\n",
+ },
+ {
+ "multiline macro without arguments",
+ lines(
+ "#define A 1\\",
+ "\t2\\",
+ "\t3",
+ "before",
+ "A",
+ "after",
+ ),
+ "before.\n.1.\n.2.\n.3.\n.after.\n",
+ },
+ {
+ "multiline macro with arguments",
+ lines(
+ "#define A(a, b, c) a\\",
+ "\tb\\",
+ "\tc",
+ "before",
+ "A(1, 2, 3)",
+ "after",
+ ),
+ "before.\n.1.\n.2.\n.3.\n.after.\n",
+ },
+ {
+ "LOAD macro",
+ lines(
+ "#define LOAD(off, reg) \\",
+ "\tMOVBLZX (off*4)(R12), reg \\",
+ "\tADDB reg, DX",
+ "",
+ "LOAD(8, AX)",
+ ),
+ "\n.\n.MOVBLZX.(.8.*.4.).(.R12.).,.AX.\n.ADDB.AX.,.DX.\n",
+ },
+ {
+ "nested multiline macro",
+ lines(
+ "#define KEYROUND(xmm, load, off, r1, r2, index) \\",
+ "\tMOVBLZX (BP)(DX*4), R8 \\",
+ "\tload((off+1), r2) \\",
+ "\tMOVB R8, (off*4)(R12) \\",
+ "\tPINSRW $index, (BP)(R8*4), xmm",
+ "#define LOAD(off, reg) \\",
+ "\tMOVBLZX (off*4)(R12), reg \\",
+ "\tADDB reg, DX",
+ "KEYROUND(X0, LOAD, 8, AX, BX, 0)",
+ ),
+ "\n.MOVBLZX.(.BP.).(.DX.*.4.).,.R8.\n.\n.MOVBLZX.(.(.8.+.1.).*.4.).(.R12.).,.BX.\n.ADDB.BX.,.DX.\n.MOVB.R8.,.(.8.*.4.).(.R12.).\n.PINSRW.$.0.,.(.BP.).(.R8.*.4.).,.X0.\n",
+ },
+ {
+ "taken #ifdef",
+ lines(
+ "#define A",
+ "#ifdef A",
+ "#define B 1234",
+ "#endif",
+ "B",
+ ),
+ "1234.\n",
+ },
+ {
+ "not taken #ifdef",
+ lines(
+ "#ifdef A",
+ "#define B 1234",
+ "#endif",
+ "B",
+ ),
+ "B.\n",
+ },
+ {
+ "taken #ifdef with else",
+ lines(
+ "#define A",
+ "#ifdef A",
+ "#define B 1234",
+ "#else",
+ "#define B 5678",
+ "#endif",
+ "B",
+ ),
+ "1234.\n",
+ },
+ {
+ "not taken #ifdef with else",
+ lines(
+ "#ifdef A",
+ "#define B 1234",
+ "#else",
+ "#define B 5678",
+ "#endif",
+ "B",
+ ),
+ "5678.\n",
+ },
+ {
+ "nested taken/taken #ifdef",
+ lines(
+ "#define A",
+ "#define B",
+ "#ifdef A",
+ "#ifdef B",
+ "#define C 1234",
+ "#else",
+ "#define C 5678",
+ "#endif",
+ "#endif",
+ "C",
+ ),
+ "1234.\n",
+ },
+ {
+ "nested taken/not-taken #ifdef",
+ lines(
+ "#define A",
+ "#ifdef A",
+ "#ifdef B",
+ "#define C 1234",
+ "#else",
+ "#define C 5678",
+ "#endif",
+ "#endif",
+ "C",
+ ),
+ "5678.\n",
+ },
+ {
+ "nested not-taken/would-be-taken #ifdef",
+ lines(
+ "#define B",
+ "#ifdef A",
+ "#ifdef B",
+ "#define C 1234",
+ "#else",
+ "#define C 5678",
+ "#endif",
+ "#endif",
+ "C",
+ ),
+ "C.\n",
+ },
+ {
+ "nested not-taken/not-taken #ifdef",
+ lines(
+ "#ifdef A",
+ "#ifdef B",
+ "#define C 1234",
+ "#else",
+ "#define C 5678",
+ "#endif",
+ "#endif",
+ "C",
+ ),
+ "C.\n",
+ },
+ {
+ "nested #define",
+ lines(
+ "#define A #define B THIS",
+ "A",
+ "B",
+ ),
+ "THIS.\n",
+ },
+ {
+ "nested #define with args",
+ lines(
+ "#define A #define B(x) x",
+ "A",
+ "B(THIS)",
+ ),
+ "THIS.\n",
+ },
+ /* This one fails. See comment in Slice.Col.
+ {
+ "nested #define with args",
+ lines(
+ "#define A #define B (x) x",
+ "A",
+ "B(THIS)",
+ ),
+ "x.\n",
+ },
+ */
+}
+
+func TestLex(t *testing.T) {
+ for _, test := range lexTests {
+ input := NewInput(test.name)
+ input.Push(NewTokenizer(test.name, strings.NewReader(test.input), nil))
+ result := drain(input)
+ if result != test.output {
+ t.Errorf("%s: got %q expected %q", test.name, result, test.output)
+ }
+ }
+}
+
+// lines joins the arguments together as complete lines.
+func lines(a ...string) string {
+ return strings.Join(a, "\n") + "\n"
+}
+
+// drain returns a single string representing the processed input tokens.
+func drain(input *Input) string {
+ var buf bytes.Buffer
+ for {
+ tok := input.Next()
+ if tok == scanner.EOF {
+ return buf.String()
+ }
+ if tok == '#' {
+ continue
+ }
+ if buf.Len() > 0 {
+ buf.WriteByte('.')
+ }
+ buf.WriteString(input.Text())
+ }
+}
+
+type badLexTest struct {
+ input string
+ error string
+}
+
+var badLexTests = []badLexTest{
+ {
+ "3 #define foo bar\n",
+ "'#' must be first item on line",
+ },
+ {
+ "#ifdef foo\nhello",
+ "unclosed #ifdef or #ifndef",
+ },
+ {
+ "#ifndef foo\nhello",
+ "unclosed #ifdef or #ifndef",
+ },
+ {
+ "#ifdef foo\nhello\n#else\nbye",
+ "unclosed #ifdef or #ifndef",
+ },
+ {
+ "#define A() A()\nA()",
+ "recursive macro invocation",
+ },
+ {
+ "#define A a\n#define A a\n",
+ "redefinition of macro",
+ },
+ {
+ "#define A a",
+ "no newline after macro definition",
+ },
+}
+
+func TestBadLex(t *testing.T) {
+ for _, test := range badLexTests {
+ input := NewInput(test.error)
+ input.Push(NewTokenizer(test.error, strings.NewReader(test.input), nil))
+ err := firstError(input)
+ if err == nil {
+ t.Errorf("%s: got no error", test.error)
+ continue
+ }
+ if !strings.Contains(err.Error(), test.error) {
+ t.Errorf("got error %q expected %q", err.Error(), test.error)
+ }
+ }
+}
+
+// firstError returns the first error value triggered by the input.
+func firstError(input *Input) (err error) {
+ panicOnError = true
+ defer func() {
+ panicOnError = false
+ switch e := recover(); e := e.(type) {
+ case nil:
+ case error:
+ err = e
+ default:
+ panic(e)
+ }
+ }()
+
+ for {
+ tok := input.Next()
+ if tok == scanner.EOF {
+ return
+ }
+ }
+}
diff --git a/src/cmd/asm/internal/lex/slice.go b/src/cmd/asm/internal/lex/slice.go
new file mode 100644
index 0000000..8ee0c70
--- /dev/null
+++ b/src/cmd/asm/internal/lex/slice.go
@@ -0,0 +1,74 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+ "text/scanner"
+
+ "cmd/internal/src"
+)
+
+// A Slice reads from a slice of Tokens.
+type Slice struct {
+ tokens []Token
+ base *src.PosBase
+ line int
+ pos int
+}
+
+func NewSlice(base *src.PosBase, line int, tokens []Token) *Slice {
+ return &Slice{
+ tokens: tokens,
+ base: base,
+ line: line,
+ pos: -1, // Next will advance to zero.
+ }
+}
+
+func (s *Slice) Next() ScanToken {
+ s.pos++
+ if s.pos >= len(s.tokens) {
+ return scanner.EOF
+ }
+ return s.tokens[s.pos].ScanToken
+}
+
+func (s *Slice) Text() string {
+ return s.tokens[s.pos].text
+}
+
+func (s *Slice) File() string {
+ return s.base.Filename()
+}
+
+func (s *Slice) Base() *src.PosBase {
+ return s.base
+}
+
+func (s *Slice) SetBase(base *src.PosBase) {
+ // Cannot happen because we only have slices of already-scanned text,
+ // but be prepared.
+ s.base = base
+}
+
+func (s *Slice) Line() int {
+ return s.line
+}
+
+func (s *Slice) Col() int {
+ // TODO: Col is only called when defining a macro and all it cares about is increasing
+ // position to discover whether there is a blank before the parenthesis.
+ // We only get here if defining a macro inside a macro.
+ // This imperfect implementation means we cannot tell the difference between
+ // #define A #define B(x) x
+ // and
+ // #define A #define B (x) x
+ // The first has definition of B has an argument, the second doesn't. Because we let
+ // text/scanner strip the blanks for us, this is extremely rare, hard to fix, and not worth it.
+ return s.pos
+}
+
+func (s *Slice) Close() {
+}
diff --git a/src/cmd/asm/internal/lex/stack.go b/src/cmd/asm/internal/lex/stack.go
new file mode 100644
index 0000000..929e528
--- /dev/null
+++ b/src/cmd/asm/internal/lex/stack.go
@@ -0,0 +1,61 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+ "text/scanner"
+
+ "cmd/internal/src"
+)
+
+// A Stack is a stack of TokenReaders. As the top TokenReader hits EOF,
+// it resumes reading the next one down.
+type Stack struct {
+ tr []TokenReader
+}
+
+// Push adds tr to the top (end) of the input stack. (Popping happens automatically.)
+func (s *Stack) Push(tr TokenReader) {
+ s.tr = append(s.tr, tr)
+}
+
+func (s *Stack) Next() ScanToken {
+ tos := s.tr[len(s.tr)-1]
+ tok := tos.Next()
+ for tok == scanner.EOF && len(s.tr) > 1 {
+ tos.Close()
+ // Pop the topmost item from the stack and resume with the next one down.
+ s.tr = s.tr[:len(s.tr)-1]
+ tok = s.Next()
+ }
+ return tok
+}
+
+func (s *Stack) Text() string {
+ return s.tr[len(s.tr)-1].Text()
+}
+
+func (s *Stack) File() string {
+ return s.Base().Filename()
+}
+
+func (s *Stack) Base() *src.PosBase {
+ return s.tr[len(s.tr)-1].Base()
+}
+
+func (s *Stack) SetBase(base *src.PosBase) {
+ s.tr[len(s.tr)-1].SetBase(base)
+}
+
+func (s *Stack) Line() int {
+ return s.tr[len(s.tr)-1].Line()
+}
+
+func (s *Stack) Col() int {
+ return s.tr[len(s.tr)-1].Col()
+}
+
+func (s *Stack) Close() { // Unused.
+}
diff --git a/src/cmd/asm/internal/lex/tokenizer.go b/src/cmd/asm/internal/lex/tokenizer.go
new file mode 100644
index 0000000..861a2d4
--- /dev/null
+++ b/src/cmd/asm/internal/lex/tokenizer.go
@@ -0,0 +1,153 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+ "io"
+ "os"
+ "strings"
+ "text/scanner"
+ "unicode"
+
+ "cmd/asm/internal/flags"
+ "cmd/internal/objabi"
+ "cmd/internal/src"
+)
+
+// A Tokenizer is a simple wrapping of text/scanner.Scanner, configured
+// for our purposes and made a TokenReader. It forms the lowest level,
+// turning text from readers into tokens.
+type Tokenizer struct {
+ tok ScanToken
+ s *scanner.Scanner
+ base *src.PosBase
+ line int
+ file *os.File // If non-nil, file descriptor to close.
+}
+
+func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer {
+ var s scanner.Scanner
+ s.Init(r)
+ // Newline is like a semicolon; other space characters are fine.
+ s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' '
+ // Don't skip comments: we need to count newlines.
+ s.Mode = scanner.ScanChars |
+ scanner.ScanFloats |
+ scanner.ScanIdents |
+ scanner.ScanInts |
+ scanner.ScanStrings |
+ scanner.ScanComments
+ s.Position.Filename = name
+ s.IsIdentRune = isIdentRune
+ return &Tokenizer{
+ s: &s,
+ base: src.NewFileBase(name, objabi.AbsFile(objabi.WorkingDir(), name, *flags.TrimPath)),
+ line: 1,
+ file: file,
+ }
+}
+
+// We want center dot (·) and division slash (∕) to work as identifier characters.
+func isIdentRune(ch rune, i int) bool {
+ if unicode.IsLetter(ch) {
+ return true
+ }
+ switch ch {
+ case '_': // Underscore; traditional.
+ return true
+ case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot
+ return true
+ case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash
+ return true
+ }
+ // Digits are OK only after the first character.
+ return i > 0 && unicode.IsDigit(ch)
+}
+
+func (t *Tokenizer) Text() string {
+ switch t.tok {
+ case LSH:
+ return "<<"
+ case RSH:
+ return ">>"
+ case ARR:
+ return "->"
+ case ROT:
+ return "@>"
+ }
+ return t.s.TokenText()
+}
+
+func (t *Tokenizer) File() string {
+ return t.base.Filename()
+}
+
+func (t *Tokenizer) Base() *src.PosBase {
+ return t.base
+}
+
+func (t *Tokenizer) SetBase(base *src.PosBase) {
+ t.base = base
+}
+
+func (t *Tokenizer) Line() int {
+ return t.line
+}
+
+func (t *Tokenizer) Col() int {
+ return t.s.Pos().Column
+}
+
+func (t *Tokenizer) Next() ScanToken {
+ s := t.s
+ for {
+ t.tok = ScanToken(s.Scan())
+ if t.tok != scanner.Comment {
+ break
+ }
+ text := s.TokenText()
+ t.line += strings.Count(text, "\n")
+ // TODO: Use constraint.IsGoBuild once it exists.
+ if strings.HasPrefix(text, "//go:build") {
+ t.tok = BuildComment
+ break
+ }
+ }
+ switch t.tok {
+ case '\n':
+ t.line++
+ case '-':
+ if s.Peek() == '>' {
+ s.Next()
+ t.tok = ARR
+ return ARR
+ }
+ case '@':
+ if s.Peek() == '>' {
+ s.Next()
+ t.tok = ROT
+ return ROT
+ }
+ case '<':
+ if s.Peek() == '<' {
+ s.Next()
+ t.tok = LSH
+ return LSH
+ }
+ case '>':
+ if s.Peek() == '>' {
+ s.Next()
+ t.tok = RSH
+ return RSH
+ }
+ }
+ return t.tok
+}
+
+func (t *Tokenizer) Close() {
+ if t.file != nil {
+ t.file.Close()
+ }
+}
diff --git a/src/cmd/asm/main.go b/src/cmd/asm/main.go
new file mode 100644
index 0000000..3683527
--- /dev/null
+++ b/src/cmd/asm/main.go
@@ -0,0 +1,117 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "bufio"
+ "flag"
+ "fmt"
+ "internal/buildcfg"
+ "log"
+ "os"
+
+ "cmd/asm/internal/arch"
+ "cmd/asm/internal/asm"
+ "cmd/asm/internal/flags"
+ "cmd/asm/internal/lex"
+
+ "cmd/internal/bio"
+ "cmd/internal/obj"
+ "cmd/internal/objabi"
+)
+
+func main() {
+ log.SetFlags(0)
+ log.SetPrefix("asm: ")
+
+ buildcfg.Check()
+ GOARCH := buildcfg.GOARCH
+
+ flags.Parse()
+
+ architecture := arch.Set(GOARCH, *flags.Shared || *flags.Dynlink)
+ if architecture == nil {
+ log.Fatalf("unrecognized architecture %s", GOARCH)
+ }
+
+ ctxt := obj.Linknew(architecture.LinkArch)
+ ctxt.Debugasm = flags.PrintOut
+ ctxt.Debugvlog = flags.DebugV
+ ctxt.Flag_dynlink = *flags.Dynlink
+ ctxt.Flag_linkshared = *flags.Linkshared
+ ctxt.Flag_shared = *flags.Shared || *flags.Dynlink
+ ctxt.Flag_maymorestack = flags.DebugFlags.MayMoreStack
+ ctxt.IsAsm = true
+ ctxt.Pkgpath = *flags.Importpath
+ switch *flags.Spectre {
+ default:
+ log.Printf("unknown setting -spectre=%s", *flags.Spectre)
+ os.Exit(2)
+ case "":
+ // nothing
+ case "index":
+ // known to compiler; ignore here so people can use
+ // the same list with -gcflags=-spectre=LIST and -asmflags=-spectrre=LIST
+ case "all", "ret":
+ ctxt.Retpoline = true
+ }
+
+ ctxt.Bso = bufio.NewWriter(os.Stdout)
+ defer ctxt.Bso.Flush()
+
+ architecture.Init(ctxt)
+
+ // Create object file, write header.
+ buf, err := bio.Create(*flags.OutputFile)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer buf.Close()
+
+ if !*flags.SymABIs {
+ buf.WriteString(objabi.HeaderString())
+ fmt.Fprintf(buf, "!\n")
+ }
+
+ var ok, diag bool
+ var failedFile string
+ for _, f := range flag.Args() {
+ lexer := lex.NewLexer(f)
+ parser := asm.NewParser(ctxt, architecture, lexer,
+ *flags.CompilingRuntime)
+ ctxt.DiagFunc = func(format string, args ...interface{}) {
+ diag = true
+ log.Printf(format, args...)
+ }
+ if *flags.SymABIs {
+ ok = parser.ParseSymABIs(buf)
+ } else {
+ pList := new(obj.Plist)
+ pList.Firstpc, ok = parser.Parse()
+ // reports errors to parser.Errorf
+ if ok {
+ obj.Flushplist(ctxt, pList, nil, *flags.Importpath)
+ }
+ }
+ if !ok {
+ failedFile = f
+ break
+ }
+ }
+ if ok && !*flags.SymABIs {
+ ctxt.NumberSyms()
+ obj.WriteObjFile(ctxt, buf)
+ }
+ if !ok || diag {
+ if failedFile != "" {
+ log.Printf("assembly of %s failed", failedFile)
+ } else {
+ log.Print("assembly failed")
+ }
+ buf.Close()
+ os.Remove(*flags.OutputFile)
+ os.Exit(1)
+ }
+}