diff options
Diffstat (limited to 'src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-3-template.mac')
-rw-r--r-- | src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-3-template.mac | 2963 |
1 files changed, 2963 insertions, 0 deletions
diff --git a/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-3-template.mac b/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-3-template.mac new file mode 100644 index 00000000..1828e412 --- /dev/null +++ b/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-3-template.mac @@ -0,0 +1,2963 @@ +; $Id: bs3-cpu-instr-3-template.mac $ +;; @file +; BS3Kit - bs3-cpu-instr-3 - MMX, SSE and AVX instructions, assembly template. +; + +; +; Copyright (C) 2007-2022 Oracle and/or its affiliates. +; +; This file is part of VirtualBox base platform packages, as +; available from https://www.virtualbox.org. +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation, in version 3 of the +; License. +; +; This program is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, see <https://www.gnu.org/licenses>. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included +; in the VirtualBox distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; +; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 +; + + +;********************************************************************************************************************************* +;* Header Files * +;********************************************************************************************************************************* +%include "bs3kit-template-header.mac" ; setup environment + + +;********************************************************************************************************************************* +;* External Symbols * +;********************************************************************************************************************************* +TMPL_BEGIN_TEXT + + +; +; Test code snippets containing code which differs between 16-bit, 32-bit +; and 64-bit CPUs modes. +; +%ifdef BS3_INSTANTIATING_CMN + + +;; +; Variant on BS3_PROC_BEGIN_CMN w/ BS3_PBC_NEAR that prefixes the function +; with an instruction length byte. +; +; ASSUMES the length is between the start of the function and the .again label. +; + %ifndef BS3CPUINSTR3_PROC_BEGIN_CMN_DEFINED + %define BS3CPUINSTR3_PROC_BEGIN_CMN_DEFINED + %macro BS3CPUINSTR3_PROC_BEGIN_CMN 1 + align 8, db 0cch + db BS3_CMN_NM(%1).again - BS3_CMN_NM(%1) +BS3_PROC_BEGIN_CMN %1, BS3_PBC_NEAR + %endmacro + %endif + +;; +; The EMIT_INSTR_PLUS_ICEBP macros is for creating a common function for and +; named after a single instruction, followed by a looping ICEBP. +; +; This works like a prefix to the instruction invocation, only exception is that +; instead of [fs:xBX] you write FSxBS as that's what is wanted in the name. +; + %ifndef EMIT_INSTR_PLUS_ICEBP_DEFINED + %define EMIT_INSTR_PLUS_ICEBP_DEFINED + + %macro EMIT_INSTR_PLUS_ICEBP 2 +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _icebp + %define FSxBX [fs:xBX] + %1 %2 + %undef FSxBX +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _icebp + %endmacro + + %macro EMIT_INSTR_PLUS_ICEBP 3 +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _ %+ %3 %+ _icebp + %define FSxBX [fs:xBX] + %1 %2, %3 + %undef FSxBX +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _ %+ %3 %+ _icebp + %endmacro + + %macro EMIT_INSTR_PLUS_ICEBP 4 +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _ %+ %3 %+ _ %+ %4 %+ _icebp + %define FSxBX [fs:xBX] + %1 %2, %3, %4 + %undef FSxBX +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _ %+ %3 %+ _ %+ %4 %+ _icebp + %endmacro + + %macro EMIT_INSTR_PLUS_ICEBP 5 +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _ %+ %3 %+ _ %+ %4 %+ _ %+ %5 %+ _icebp + %define FSxBX [fs:xBX] + %1 %2, %3, %4, %5 + %undef FSxBX +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _ %+ %2 %+ _ %+ %3 %+ _ %+ %4 %+ _ %+ %5 %+ _icebp + %endmacro + + %endif + +;; +; Companion to EMIT_INSTR_PLUS_ICEBP for dealing stuff that the assmbler does +; not want to emit. +; +; @param 1 The function name (omitting bs3CpuInstr3_ and _icebp). +; @param 2+ The opcode bytes. FSxBX_PFX and FSxBX_MODRM are defined locally. +; + %ifndef EMIT_INSTR_PLUS_ICEBP_BYTES_DEFINED + %define EMIT_INSTR_PLUS_ICEBP_BYTES_DEFINED + + %macro EMIT_INSTR_PLUS_ICEBP_BYTES 2+ +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _icebp + %define FSxBX_PFX 64h + %if TMPL_BITS == 16 + %define FSxBX_MODRM 07h + %else + %define FSxBX_MODRM 03h + %endif + db %2 + %undef FSxBX_MODRM + %undef FSxBX_PFX +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _icebp + %endmacro + %endif + + + +%ifndef EMIT_TYPE1_INSTR_DEFINED + %define EMIT_TYPE1_INSTR_DEFINED + ;; @param 7 Indicates whether the 2nd and 3rd pair has MMX variants. + %macro EMIT_TYPE1_INSTR 7 +; +; PXOR (SSE2) & VPXOR (AVX2) +; +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_MM2_icebp + %1 mm1, mm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_MM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_FSxBX_icebp + %1 mm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_FSxBX_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_XMM2_icebp + %1 xmm1, xmm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_XMM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_FSxBX_icebp + %1 xmm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_FSxBX_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _XMM1_XMM1_XMM2_icebp + %2 xmm1, xmm1, xmm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _XMM1_XMM1_XMM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _XMM1_XMM1_FSxBX_icebp + %2 xmm1, xmm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _XMM1_XMM1_FSxBX_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _YMM7_YMM2_YMM3_icebp + %2 ymm7, ymm2, ymm3 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _YMM7_YMM2_YMM3_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _YMM7_YMM2_FSxBX_icebp + %2 ymm7, ymm2, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _YMM7_YMM2_FSxBX_icebp + + +; +; XORPS (SSE2) & VXORPS (AVX) +; + %if %7 != 0 +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %3 %+ _MM1_MM2_icebp + %3 mm1, mm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %3 %+ _MM1_MM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %3 %+ _MM1_FSxBX_icebp + %3 mm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %3 %+ _MM1_FSxBX_icebp + %endif + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %3 %+ _XMM1_XMM2_icebp + %3 xmm1, xmm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %3 %+ _XMM1_XMM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %3 %+ _XMM1_FSxBX_icebp + %3 xmm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %3 %+ _XMM1_FSxBX_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %4 %+ _XMM1_XMM1_XMM2_icebp + %4 xmm1, xmm1, xmm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %4 %+ _XMM1_XMM1_XMM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %4 %+ _XMM1_XMM1_FSxBX_icebp + %4 xmm1, xmm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %4 %+ _XMM1_XMM1_FSxBX_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %4 %+ _YMM1_YMM1_YMM2_icebp + %4 ymm1, ymm1, ymm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %4 %+ _YMM1_YMM1_YMM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %4 %+ _YMM1_YMM1_FSxBX_icebp + %4 ymm1, ymm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %4 %+ _YMM1_YMM1_FSxBX_icebp + + + +; +; XORPD (SSE2) & VXORPD (AVX) +; + %if %7 != 0 +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %5 %+ _MM1_MM2_icebp + %5 mm1, mm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %5 %+ _MM1_MM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %5 %+ _MM1_FSxBX_icebp + %5 mm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %5 %+ _MM1_FSxBX_icebp + %endif + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %5 %+ _XMM1_XMM2_icebp + %5 xmm1, xmm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %5 %+ _XMM1_XMM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %5 %+ _XMM1_FSxBX_icebp + %5 xmm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %5 %+ _XMM1_FSxBX_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %6 %+ _XMM2_XMM1_XMM0_icebp + %6 xmm2, xmm1, xmm0 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %6 %+ _XMM2_XMM1_XMM0_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %6 %+ _XMM2_XMM1_FSxBX_icebp + %6 xmm2, xmm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %6 %+ _XMM2_XMM1_FSxBX_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %6 %+ _YMM2_YMM1_YMM0_icebp + %6 ymm2, ymm1, ymm0 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %6 %+ _YMM2_YMM1_YMM0_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %6 %+ _YMM2_YMM1_FSxBX_icebp + %6 ymm2, ymm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %6 %+ _YMM2_YMM1_FSxBX_icebp + + %if TMPL_BITS == 64 +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %6 %+ _YMM10_YMM8_YMM15_icebp + %6 ymm10, ymm8, ymm15 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %6 %+ _YMM10_YMM8_YMM15_icebp + %endif + + %endmacro ; EMIT_TYPE1_INSTR + + %macro EMIT_TYPE1_ONE_INSTR 3 + %if %3 != 0 +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_MM2_icebp + %1 mm1, mm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_MM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_FSxBX_icebp + %1 mm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _MM1_FSxBX_icebp + %endif + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_XMM2_icebp + %1 xmm1, xmm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_XMM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_FSxBX_icebp + %1 xmm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _XMM1_FSxBX_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _XMM2_XMM1_XMM0_icebp + %2 xmm2, xmm1, xmm0 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _XMM2_XMM1_XMM0_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _XMM2_XMM1_FSxBX_icebp + %2 xmm2, xmm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _XMM2_XMM1_FSxBX_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _YMM2_YMM1_YMM0_icebp + %2 ymm2, ymm1, ymm0 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _YMM2_YMM1_YMM0_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _YMM2_YMM1_FSxBX_icebp + %2 ymm2, ymm1, [fs:xBX] +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _YMM2_YMM1_FSxBX_icebp + + %if TMPL_BITS == 64 +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _YMM10_YMM8_YMM15_icebp + %2 ymm10, ymm8, ymm15 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _YMM10_YMM8_YMM15_icebp + %endif + %endmacro ; EMIT_TYPE1_ONE_INSTR + +%endif + +EMIT_TYPE1_INSTR pand, vpand, andps, vandps, andpd, vandpd, 0 +EMIT_TYPE1_INSTR pandn, vpandn, andnps, vandnps, andnpd, vandnpd, 0 +EMIT_TYPE1_INSTR por, vpor, orps, vorps, orpd, vorpd, 0 +EMIT_TYPE1_INSTR pxor, vpxor, xorps, vxorps, xorpd, vxorpd, 0 + +EMIT_TYPE1_INSTR pcmpgtb, vpcmpgtb, pcmpgtw, vpcmpgtw, pcmpgtd, vpcmpgtd, 1 +EMIT_TYPE1_ONE_INSTR pcmpgtq, vpcmpgtq, 0 +EMIT_TYPE1_INSTR pcmpeqb, vpcmpeqb, pcmpeqw, vpcmpeqw, pcmpeqd, vpcmpeqd, 1 +EMIT_TYPE1_ONE_INSTR pcmpeqq, vpcmpeqq, 0 + +EMIT_TYPE1_INSTR paddb, vpaddb, paddw, vpaddw, paddd, vpaddd, 1 +EMIT_TYPE1_ONE_INSTR paddq, vpaddq, 1 + +EMIT_TYPE1_INSTR psubb, vpsubb, psubw, vpsubw, psubd, vpsubd, 1 +EMIT_TYPE1_ONE_INSTR psubq, vpsubq, 1 + + +; +; Type 2 instructions. On the form: pxxxx sAX, [zy]mm0 +; +%ifndef EMIT_TYPE2_ONE_INSTR_DEFINED + %define EMIT_TYPE2_ONE_INSTR_DEFINED + ;; @param 1 MMX/SSE instruction name + ;; @param 2 AVX instruction name + ;; @param 3 Whether to emit MMX function + ;; @param 4 The opcode byte. (assuming two byte / vex map 1) + %macro EMIT_TYPE2_ONE_INSTR 4 + %if %3 != 0 +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_MM2_icebp + %1 eax, mm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_MM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_qword_FSxBX_icebp + %if TMPL_BITS == 16 + db 64h, 0fh, %4, 7 ; %1 eax, qword [fs:xBX] + %else + db 64h, 0fh, %4, 3 ; %1 eax, qword [fs:xBX] + %endif +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_qword_FSxBX_icebp + %endif + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_XMM2_icebp + %1 eax, xmm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_XMM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_dqword_FSxBX_icebp + %if TMPL_BITS == 16 + db 64h, 66h, 0fh, %4, 7 ; %1 eax, dqword [fs:xBX] + %else + db 64h, 66h, 0fh, %4, 3 ; %1 eax, dqword [fs:xBX] + %endif +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %1 %+ _EAX_dqword_FSxBX_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_XMM2_icebp + %2 eax, xmm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_XMM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_dqword_FSxBX_icebp + %if TMPL_BITS == 16 + db 64h, 0c4h, 0e0h, 071h, %4, 7 ; %2 eax, dqword [fs:xBX] + %else + db 64h, 0c4h, 0e0h, 071h, %4, 3 ; %2 eax, dqword [fs:xBX] + %endif +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_dqword_FSxBX_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_YMM2_icebp + %2 eax, ymm2 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_YMM2_icebp + +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_qqword_FSxBX_icebp + %if TMPL_BITS == 16 + db 64h, 0c4h, 0e0h, 075h, %4, 7 ; %2 eax, qqword [fs:xBX] + %else + db 64h, 0c4h, 0e0h, 075h, %4, 3 ; %2 eax, qqword [fs:xBX] + %endif +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _EAX_qqword_FSxBX_icebp + + %if TMPL_BITS == 64 +BS3CPUINSTR3_PROC_BEGIN_CMN bs3CpuInstr3_ %+ %2 %+ _RAX_YMM9_icebp + %2 rax, ymm9 +.again: + icebp + jmp .again +BS3_PROC_END_CMN bs3CpuInstr3_ %+ %2 %+ _RAX_YMM9_icebp + %endif + %endmacro ; EMIT_TYPE2_ONE_INSTR +%endif + +EMIT_TYPE2_ONE_INSTR pmovmskb, vpmovmskb, 1, 0d7h + +; +; [V]PMULLW +; +EMIT_INSTR_PLUS_ICEBP pmullw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pmullw, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP pmullw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmullw, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmullw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pmullw, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmullw, XMM1, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmullw, XMM1, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmullw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpmullw, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmullw, YMM1, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP vpmullw, YMM1, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmullw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpmullw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMULLD +; +EMIT_INSTR_PLUS_ICEBP pmulld, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmulld, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmulld, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pmulld, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmulld, XMM2, XMM1, XMM0 +EMIT_INSTR_PLUS_ICEBP vpmulld, XMM2, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmulld, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpmulld, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmulld, YMM2, YMM1, YMM0 +EMIT_INSTR_PLUS_ICEBP vpmulld, YMM2, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmulld, YMM10, YMM8, YMM15 +EMIT_INSTR_PLUS_ICEBP vpmulld, YMM10, YMM8, FSxBX + %endif + +; +; [V]PMULHW +; +EMIT_INSTR_PLUS_ICEBP pmulhw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pmulhw, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP pmulhw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmulhw, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmulhw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pmulhw, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmulhw, XMM1, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmulhw, XMM1, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmulhw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpmulhw, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmulhw, YMM1, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP vpmulhw, YMM1, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmulhw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpmulhw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMULHUW +; +EMIT_INSTR_PLUS_ICEBP pmulhuw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pmulhuw, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP pmulhuw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmulhuw, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmulhuw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pmulhuw, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmulhuw, XMM1, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmulhuw, XMM1, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmulhuw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpmulhuw, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmulhuw, YMM1, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP vpmulhuw, YMM1, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmulhuw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpmulhuw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PSHUFB +; +EMIT_INSTR_PLUS_ICEBP pshufb, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pshufb, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP pshufb, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pshufb, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pshufb, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pshufb, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpshufb, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpshufb, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpshufb, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpshufb, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpshufb, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpshufb, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpshufb, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpshufb, YMM8, YMM9, FSxBX + %endif + +; +; PSHUFW +; +EMIT_INSTR_PLUS_ICEBP pshufw, MM1, MM2, 0FFh ; FF = top src word in all destination words +EMIT_INSTR_PLUS_ICEBP pshufw, MM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP pshufw, MM1, MM2, 01Bh ; 1B = word swap (like bswap but for words) +EMIT_INSTR_PLUS_ICEBP pshufw, MM1, FSxBX, 01Bh + +; +; [V]PSHUFHW +; +EMIT_INSTR_PLUS_ICEBP pshufhw, XMM1, XMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP pshufhw, XMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP pshufhw, XMM1, XMM2, 01Bh +EMIT_INSTR_PLUS_ICEBP pshufhw, XMM1, FSxBX, 01Bh + +EMIT_INSTR_PLUS_ICEBP vpshufhw, XMM1, XMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshufhw, XMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshufhw, XMM1, XMM2, 01Bh +EMIT_INSTR_PLUS_ICEBP vpshufhw, XMM1, FSxBX, 01Bh + +EMIT_INSTR_PLUS_ICEBP vpshufhw, YMM1, YMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshufhw, YMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshufhw, YMM1, YMM2, 01Bh +EMIT_INSTR_PLUS_ICEBP vpshufhw, YMM1, FSxBX, 01Bh + + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpshufhw, YMM12, YMM7, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshufhw, YMM9, YMM12, 01Bh + %endif + +; +; [V]PSHUFLW +; +EMIT_INSTR_PLUS_ICEBP pshuflw, XMM1, XMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP pshuflw, XMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP pshuflw, XMM1, XMM2, 01Bh +EMIT_INSTR_PLUS_ICEBP pshuflw, XMM1, FSxBX, 01Bh + +EMIT_INSTR_PLUS_ICEBP vpshuflw, XMM1, XMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshuflw, XMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshuflw, XMM1, XMM2, 01Bh +EMIT_INSTR_PLUS_ICEBP vpshuflw, XMM1, FSxBX, 01Bh + +EMIT_INSTR_PLUS_ICEBP vpshuflw, YMM1, YMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshuflw, YMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshuflw, YMM1, YMM2, 01Bh +EMIT_INSTR_PLUS_ICEBP vpshuflw, YMM1, FSxBX, 01Bh + + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpshuflw, YMM12, YMM7, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshuflw, YMM9, YMM12, 01Bh + %endif + +; +; [V]PSHUFD +; +EMIT_INSTR_PLUS_ICEBP pshufd, XMM1, XMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP pshufd, XMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP pshufd, XMM1, XMM2, 01Bh +EMIT_INSTR_PLUS_ICEBP pshufd, XMM1, FSxBX, 01Bh + +EMIT_INSTR_PLUS_ICEBP vpshufd, XMM1, XMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshufd, XMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshufd, XMM1, XMM2, 01Bh +EMIT_INSTR_PLUS_ICEBP vpshufd, XMM1, FSxBX, 01Bh + +EMIT_INSTR_PLUS_ICEBP vpshufd, YMM1, YMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshufd, YMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshufd, YMM1, YMM2, 01Bh +EMIT_INSTR_PLUS_ICEBP vpshufd, YMM1, FSxBX, 01Bh + + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpshufd, YMM12, YMM7, 0FFh +EMIT_INSTR_PLUS_ICEBP vpshufd, YMM9, YMM12, 01Bh + %endif + +; +; [V]PUNPCKHBW +; +EMIT_INSTR_PLUS_ICEBP punpckhbw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP punpckhbw, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP punpckhbw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP punpckhbw, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP punpckhbw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP punpckhbw, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpckhbw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpunpckhbw, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpckhbw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpunpckhbw, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpckhbw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpunpckhbw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpckhbw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpunpckhbw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PUNPCKHWD +; +EMIT_INSTR_PLUS_ICEBP punpckhwd, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP punpckhwd, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP punpckhwd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP punpckhwd, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP punpckhwd, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP punpckhwd, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpckhwd, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpunpckhwd, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpckhwd, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpunpckhwd, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpckhwd, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpunpckhwd, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpckhwd, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpunpckhwd, YMM8, YMM9, FSxBX + %endif + +; +; [V]PUNPCKHDQ +; +EMIT_INSTR_PLUS_ICEBP punpckhdq, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP punpckhdq, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP punpckhdq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP punpckhdq, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP punpckhdq, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP punpckhdq, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpckhdq, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpunpckhdq, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpckhdq, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpunpckhdq, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpckhdq, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpunpckhdq, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpckhdq, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpunpckhdq, YMM8, YMM9, FSxBX + %endif + +; +; [V]PUNPCKHQDQ (no MMX) +; +EMIT_INSTR_PLUS_ICEBP punpckhqdq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP punpckhqdq, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP punpckhqdq, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP punpckhqdq, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpunpckhqdq, YMM8, YMM9, FSxBX + %endif + +; +; [V]PUNPCKLBW +; +EMIT_INSTR_PLUS_ICEBP punpcklbw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP punpcklbw, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP punpcklbw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP punpcklbw, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP punpcklbw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP punpcklbw, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpcklbw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpunpcklbw, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpcklbw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpunpcklbw, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpcklbw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpunpcklbw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpcklbw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpunpcklbw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PUNPCKLWD +; +EMIT_INSTR_PLUS_ICEBP punpcklwd, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP punpcklwd, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP punpcklwd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP punpcklwd, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP punpcklwd, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP punpcklwd, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpcklwd, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpunpcklwd, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpcklwd, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpunpcklwd, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpcklwd, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpunpcklwd, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpcklwd, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpunpcklwd, YMM8, YMM9, FSxBX + %endif + +; +; [V]PUNPCKLDQ +; +EMIT_INSTR_PLUS_ICEBP punpckldq, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP punpckldq, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP punpckldq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP punpckldq, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP punpckldq, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP punpckldq, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpckldq, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpunpckldq, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpckldq, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpunpckldq, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpckldq, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpunpckldq, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpckldq, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpunpckldq, YMM8, YMM9, FSxBX + %endif + +; +; [V]PUNPCKLQDQ (no MMX) +; +EMIT_INSTR_PLUS_ICEBP punpcklqdq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP punpcklqdq, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP punpcklqdq, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP punpcklqdq, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpunpcklqdq, YMM8, YMM9, FSxBX + %endif + +; +; [V]PACKSSWB +; +EMIT_INSTR_PLUS_ICEBP packsswb, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP packsswb, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP packsswb, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP packsswb, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP packsswb, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP packsswb, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpacksswb, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpacksswb, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpacksswb, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpacksswb, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpacksswb, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpacksswb, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpacksswb, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpacksswb, YMM8, YMM9, FSxBX + %endif + +; +; [V]PACKSSWD +; +EMIT_INSTR_PLUS_ICEBP packssdw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP packssdw, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP packssdw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP packssdw, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP packssdw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP packssdw, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpackssdw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpackssdw, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpackssdw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpackssdw, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpackssdw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpackssdw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpackssdw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpackssdw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PACKUSWB +; +EMIT_INSTR_PLUS_ICEBP packuswb, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP packuswb, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP packuswb, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP packuswb, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP packuswb, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP packuswb, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpackuswb, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpackuswb, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpackuswb, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpackuswb, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpackuswb, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpackuswb, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpackuswb, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpackuswb, YMM8, YMM9, FSxBX + %endif + +; +; [V]PACKUSWD (no MMX) +; +EMIT_INSTR_PLUS_ICEBP packusdw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP packusdw, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP packusdw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP packusdw, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpackusdw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpackusdw, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpackusdw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpackusdw, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpackusdw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpackusdw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpackusdw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpackusdw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMAXUB +; +EMIT_INSTR_PLUS_ICEBP pmaxub, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pmaxub, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP pmaxub, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmaxub, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmaxub, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pmaxub, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmaxub, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpmaxub, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmaxub, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpmaxub, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmaxub, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpmaxub, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmaxub, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpmaxub, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMAXUW +; +EMIT_INSTR_PLUS_ICEBP pmaxuw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmaxuw, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmaxuw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pmaxuw, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmaxuw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpmaxuw, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmaxuw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpmaxuw, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmaxuw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpmaxuw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmaxuw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpmaxuw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMAXUD +; +EMIT_INSTR_PLUS_ICEBP pmaxud, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmaxud, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmaxud, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pmaxud, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmaxud, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpmaxud, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmaxud, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpmaxud, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmaxud, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpmaxud, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmaxud, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpmaxud, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMAXSB +; +EMIT_INSTR_PLUS_ICEBP pmaxsb, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmaxsb, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmaxsb, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pmaxsb, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmaxsb, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpmaxsb, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmaxsb, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpmaxsb, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmaxsb, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpmaxsb, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmaxsb, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpmaxsb, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMAXSW +; +EMIT_INSTR_PLUS_ICEBP pmaxsw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pmaxsw, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP pmaxsw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmaxsw, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmaxsw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pmaxsw, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmaxsw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpmaxsw, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmaxsw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpmaxsw, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmaxsw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpmaxsw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmaxsw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpmaxsw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMAXSD +; +EMIT_INSTR_PLUS_ICEBP pmaxsd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmaxsd, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmaxsd, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pmaxsd, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmaxsd, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpmaxsd, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmaxsd, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpmaxsd, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpmaxsd, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpmaxsd, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpmaxsd, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpmaxsd, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMINUB +; +EMIT_INSTR_PLUS_ICEBP pminub, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pminub, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP pminub, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pminub, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pminub, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pminub, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpminub, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpminub, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpminub, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpminub, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpminub, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpminub, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpminub, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpminub, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMINUW +; +EMIT_INSTR_PLUS_ICEBP pminuw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pminuw, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pminuw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pminuw, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpminuw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpminuw, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpminuw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpminuw, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpminuw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpminuw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpminuw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpminuw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMINUD +; +EMIT_INSTR_PLUS_ICEBP pminud, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pminud, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pminud, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pminud, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpminud, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpminud, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpminud, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpminud, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpminud, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpminud, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpminud, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpminud, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMINSB +; +EMIT_INSTR_PLUS_ICEBP pminsb, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pminsb, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pminsb, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pminsb, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpminsb, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpminsb, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpminsb, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpminsb, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpminsb, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpminsb, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpminsb, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpminsb, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMINSW +; +EMIT_INSTR_PLUS_ICEBP pminsw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pminsw, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP pminsw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pminsw, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pminsw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pminsw, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpminsw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpminsw, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpminsw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpminsw, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpminsw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpminsw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpminsw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpminsw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMINSD +; +EMIT_INSTR_PLUS_ICEBP pminsd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pminsd, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pminsd, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pminsd, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpminsd, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpminsd, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpminsd, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpminsd, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpminsd, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpminsd, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpminsd, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpminsd, YMM8, YMM9, FSxBX + %endif + +; +; [V]MOVNTDQA +; +EMIT_INSTR_PLUS_ICEBP movntdqa, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovntdqa, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovntdqa, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movntdqa, XMM10, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovntdqa, XMM11, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovntdqa, YMM12, FSxBX + %endif + +; +; [V]MOVNTDQ +; +EMIT_INSTR_PLUS_ICEBP movntdq, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovntdq, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovntdq, FSxBX, YMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movntdq, FSxBX, XMM10 +EMIT_INSTR_PLUS_ICEBP vmovntdq, FSxBX, XMM10 +EMIT_INSTR_PLUS_ICEBP vmovntdq, FSxBX, YMM10 + %endif + + +; +; [V]MOVNTPS +; +EMIT_INSTR_PLUS_ICEBP movntps, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovntps, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovntps, FSxBX, YMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movntps, FSxBX, XMM10 +EMIT_INSTR_PLUS_ICEBP vmovntps, FSxBX, XMM11 +EMIT_INSTR_PLUS_ICEBP vmovntps, FSxBX, YMM12 + %endif + +; +; [V]MOVNTPD +; +EMIT_INSTR_PLUS_ICEBP movntpd, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovntpd, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovntpd, FSxBX, YMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movntpd, FSxBX, XMM10 +EMIT_INSTR_PLUS_ICEBP vmovntpd, FSxBX, XMM11 +EMIT_INSTR_PLUS_ICEBP vmovntpd, FSxBX, YMM12 + %endif + +; +; [V]MOVUPS - not testing the 2nd register variant. +; +EMIT_INSTR_PLUS_ICEBP movups, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP movups, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP movups, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovups, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vmovups, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovups, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovups, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP vmovups, YMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovups, FSxBX, YMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movups, XMM8, XMM12 +EMIT_INSTR_PLUS_ICEBP movups, XMM10, FSxBX +EMIT_INSTR_PLUS_ICEBP movups, FSxBX, XMM10 +EMIT_INSTR_PLUS_ICEBP vmovups, XMM7, XMM14 +EMIT_INSTR_PLUS_ICEBP vmovups, XMM11, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovups, FSxBX, XMM11 +EMIT_INSTR_PLUS_ICEBP vmovups, YMM12, YMM8 +EMIT_INSTR_PLUS_ICEBP vmovups, YMM12, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovups, FSxBX, YMM12 + %endif + +; +; [V]MOVUPD - not testing the 2nd register variant. +; +EMIT_INSTR_PLUS_ICEBP movupd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP movupd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP movupd, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovupd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vmovupd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovupd, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovupd, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP vmovupd, YMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovupd, FSxBX, YMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movupd, XMM8, XMM12 +EMIT_INSTR_PLUS_ICEBP movupd, XMM10, FSxBX +EMIT_INSTR_PLUS_ICEBP movupd, FSxBX, XMM10 +EMIT_INSTR_PLUS_ICEBP vmovupd, XMM7, XMM14 +EMIT_INSTR_PLUS_ICEBP vmovupd, XMM11, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovupd, FSxBX, XMM11 +EMIT_INSTR_PLUS_ICEBP vmovupd, YMM12, YMM8 +EMIT_INSTR_PLUS_ICEBP vmovupd, YMM12, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovupd, FSxBX, YMM12 + %endif + +; +; [V]MOVSS - not testing the 2nd register variant. +; +EMIT_INSTR_PLUS_ICEBP movss, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP movss, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP movss, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovss, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vmovss, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovss, FSxBX, XMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movss, XMM11, XMM8 +EMIT_INSTR_PLUS_ICEBP movss, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP movss, FSxBX, XMM11 +EMIT_INSTR_PLUS_ICEBP vmovss, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vmovss, XMM10, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovss, FSxBX, XMM9 + %endif + +; +; [V]MOVSD - not testing the 2nd register variant. +; +EMIT_INSTR_PLUS_ICEBP movsd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP movsd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP movsd, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovsd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vmovsd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovsd, FSxBX, XMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movsd, XMM11, XMM8 +EMIT_INSTR_PLUS_ICEBP movsd, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP movsd, FSxBX, XMM11 +EMIT_INSTR_PLUS_ICEBP vmovsd, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vmovsd, XMM10, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovsd, FSxBX, XMM9 + %endif + +; +; [V]MOVLPS +; +EMIT_INSTR_PLUS_ICEBP movlps, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP movlps, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovlps, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovlps, FSxBX, XMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movlps, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP movlps, FSxBX, XMM11 +EMIT_INSTR_PLUS_ICEBP vmovlps, XMM10, XMM14, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovlps, FSxBX, XMM9 + %endif + +; +; [V]MOVLPD +; +EMIT_INSTR_PLUS_ICEBP movlpd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP movlpd, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovlpd, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovlpd, FSxBX, XMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movlpd, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP movlpd, FSxBX, XMM11 +EMIT_INSTR_PLUS_ICEBP vmovlpd, XMM10, XMM14, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovlpd, FSxBX, XMM9 + %endif + +; +; [V]MOVHPS +; +EMIT_INSTR_PLUS_ICEBP movhps, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP movhps, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovhps, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovhps, FSxBX, XMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movhps, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP movhps, FSxBX, XMM11 +EMIT_INSTR_PLUS_ICEBP vmovhps, XMM10, XMM14, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovhps, FSxBX, XMM9 + %endif + +; +; [V]MOVHPD +; +EMIT_INSTR_PLUS_ICEBP movhpd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP movhpd, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovhpd, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovhpd, FSxBX, XMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movhpd, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP movhpd, FSxBX, XMM11 +EMIT_INSTR_PLUS_ICEBP vmovhpd, XMM10, XMM14, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovhpd, FSxBX, XMM9 + %endif + +; +; [V]MOVHLPS +; +EMIT_INSTR_PLUS_ICEBP movhlps, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vmovhlps, XMM1, XMM2, XMM3 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movhlps, XMM8, XMM12 +EMIT_INSTR_PLUS_ICEBP vmovhlps, XMM10, XMM14, XMM12 + %endif + +; +; [V]MOVSLDUP +; +EMIT_INSTR_PLUS_ICEBP movsldup, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP movsldup, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovsldup, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vmovsldup, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovsldup, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP vmovsldup, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movsldup, XMM8, XMM12 +EMIT_INSTR_PLUS_ICEBP movsldup, XMM10, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovsldup, XMM7, XMM14 +EMIT_INSTR_PLUS_ICEBP vmovsldup, XMM11, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovsldup, YMM12, YMM8 +EMIT_INSTR_PLUS_ICEBP vmovsldup, YMM12, FSxBX + %endif + +; +; [V]MOVSHDUP +; +EMIT_INSTR_PLUS_ICEBP movshdup, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP movshdup, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovshdup, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vmovshdup, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovshdup, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP vmovshdup, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movshdup, XMM8, XMM12 +EMIT_INSTR_PLUS_ICEBP movshdup, XMM10, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovshdup, XMM7, XMM14 +EMIT_INSTR_PLUS_ICEBP vmovshdup, XMM11, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovshdup, YMM12, YMM8 +EMIT_INSTR_PLUS_ICEBP vmovshdup, YMM12, FSxBX + %endif + +; +; [V]MOVDDUP +; +EMIT_INSTR_PLUS_ICEBP movddup, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP movddup, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovddup, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vmovddup, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovddup, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP vmovddup, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movddup, XMM8, XMM12 +EMIT_INSTR_PLUS_ICEBP movddup, XMM10, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovddup, XMM7, XMM14 +EMIT_INSTR_PLUS_ICEBP vmovddup, XMM11, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovddup, YMM12, YMM8 +EMIT_INSTR_PLUS_ICEBP vmovddup, YMM12, FSxBX + %endif + +; +; [V]MOVAPS +; +EMIT_INSTR_PLUS_ICEBP movaps, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP movaps, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovaps, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vmovaps, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovaps, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP vmovaps, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movaps, XMM8, XMM12 +EMIT_INSTR_PLUS_ICEBP movaps, XMM10, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovaps, XMM7, XMM14 +EMIT_INSTR_PLUS_ICEBP vmovaps, XMM11, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovaps, YMM12, YMM8 +EMIT_INSTR_PLUS_ICEBP vmovaps, YMM12, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP movapd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP movapd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovapd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vmovapd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovapd, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP vmovapd, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movapd, XMM8, XMM12 +EMIT_INSTR_PLUS_ICEBP movapd, XMM10, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovapd, XMM7, XMM14 +EMIT_INSTR_PLUS_ICEBP vmovapd, XMM11, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovapd, YMM12, YMM8 +EMIT_INSTR_PLUS_ICEBP vmovapd, YMM12, FSxBX + %endif + +; +; [V]MOVD +; +EMIT_INSTR_PLUS_ICEBP movd, MM1, EDX +EMIT_INSTR_PLUS_ICEBP movd, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP movd, EAX, MM1 +EMIT_INSTR_PLUS_ICEBP movd, FSxBX, MM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movd, MM1, R9D +EMIT_INSTR_PLUS_ICEBP movd, R10D, MM0 + %endif + +EMIT_INSTR_PLUS_ICEBP movd, XMM1, EAX +EMIT_INSTR_PLUS_ICEBP movd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP movd, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP movd, EAX, XMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movd, XMM9, R8D +EMIT_INSTR_PLUS_ICEBP movd, R8D, XMM9 +EMIT_INSTR_PLUS_ICEBP movd, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP movd, FSxBX, XMM9 + %endif + +EMIT_INSTR_PLUS_ICEBP vmovd, XMM1, EAX +EMIT_INSTR_PLUS_ICEBP vmovd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovd, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovd, EDX, XMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vmovd, XMM9, R9D +EMIT_INSTR_PLUS_ICEBP vmovd, R8D, XMM9 +EMIT_INSTR_PLUS_ICEBP vmovd, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovd, FSxBX, XMM9 + %endif + +; +; [V]MOVQ - some hand coded stuff here as the assembler prefers the 7f/6f variants. +; +EMIT_INSTR_PLUS_ICEBP movq, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP movq, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP movq, FSxBX, MM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movq, R9, MM1 +EMIT_INSTR_PLUS_ICEBP movq, MM1, R9 +EMIT_INSTR_PLUS_ICEBP_BYTES 06e_movq_MM1_FSxBX, FSxBX_PFX, 48h, 0fh, 06eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT) +EMIT_INSTR_PLUS_ICEBP_BYTES 07e_movq_FSxBX_MM1, FSxBX_PFX, 48h, 0fh, 07eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT) + %endif + +EMIT_INSTR_PLUS_ICEBP movq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP movq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP movq, FSxBX, XMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movq, XMM9, R8 +EMIT_INSTR_PLUS_ICEBP movq, R8, XMM9 +EMIT_INSTR_PLUS_ICEBP movq, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP movq, FSxBX, XMM9 +EMIT_INSTR_PLUS_ICEBP_BYTES 06e_movq_XMM1_FSxBX, FSxBX_PFX, 66h, 48h, 0fh, 06eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT) +EMIT_INSTR_PLUS_ICEBP_BYTES 06e_movq_XMM9_FSxBX, FSxBX_PFX, 66h, 4ch, 0fh, 06eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT) +EMIT_INSTR_PLUS_ICEBP_BYTES 07e_movq_FSxBX_XMM1, FSxBX_PFX, 66h, 48h, 0fh, 07eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT) +EMIT_INSTR_PLUS_ICEBP_BYTES 07e_movq_FSxBX_XMM9, FSxBX_PFX, 66h, 4ch, 0fh, 07eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT) + %endif + +EMIT_INSTR_PLUS_ICEBP vmovq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vmovq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP_BYTES 06e_vmovq_XMM1_FSxBX, FSxBX_PFX, 0c4h, 0e1h, 0f9h, 06eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT) +EMIT_INSTR_PLUS_ICEBP vmovq, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP_BYTES 07e_vmovq_FSxBX_XMM1, FSxBX_PFX, 0c4h, 0e1h, 0f9h, 07eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT) + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vmovq, XMM9, R8 +EMIT_INSTR_PLUS_ICEBP vmovq, R8, XMM9 +EMIT_INSTR_PLUS_ICEBP vmovq, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovq, FSxBX, XMM9 +EMIT_INSTR_PLUS_ICEBP_BYTES 06e_vmovq_XMM9_FSxBX, FSxBX_PFX, 0c4h, 061h, 0f9h, 06eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT) +EMIT_INSTR_PLUS_ICEBP_BYTES 07e_vmovq_FSxBX_XMM9, FSxBX_PFX, 0c4h, 061h, 0f9h, 07eh, FSxBX_MODRM | (1 << X86_MODRM_REG_SHIFT) + %endif + +; +; [V]MOVDQU - not testing the 2nd register variant. +; +EMIT_INSTR_PLUS_ICEBP movdqu, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP_BYTES 07f_movdqu_XMM1_XMM2, 0f3h, 00fh, 07fh, X86_MODRM_MAKE(3, 2, 1) +EMIT_INSTR_PLUS_ICEBP movdqu, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP movdqu, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovdqu, XMM1, XMM2 ; C5 FA 6F CA +EMIT_INSTR_PLUS_ICEBP_BYTES 07f_vmovdqu_XMM1_XMM2, 0c5h, 0fah, 07fh, X86_MODRM_MAKE(3, 2, 1) +EMIT_INSTR_PLUS_ICEBP vmovdqu, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovdqu, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovdqu, YMM1, YMM2 ; C5 FE 6F CA +EMIT_INSTR_PLUS_ICEBP_BYTES 07f_vmovdqu_YMM1_YMM2, 0c5h, 0feh, 07fh, X86_MODRM_MAKE(3, 2, 1) +EMIT_INSTR_PLUS_ICEBP vmovdqu, YMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovdqu, FSxBX, YMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movdqu, XMM8, XMM12 ; F3 45 0F 6F C4 +EMIT_INSTR_PLUS_ICEBP_BYTES 07f_movdqu_XMM8_XMM12, 0f3h, 045h, 00fh, 07fh, X86_MODRM_MAKE(3, 4, 0) +EMIT_INSTR_PLUS_ICEBP movdqu, XMM10, FSxBX +EMIT_INSTR_PLUS_ICEBP movdqu, FSxBX, XMM10 +EMIT_INSTR_PLUS_ICEBP vmovdqu, XMM7, XMM14 +EMIT_INSTR_PLUS_ICEBP vmovdqu, XMM11, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovdqu, FSxBX, XMM11 +EMIT_INSTR_PLUS_ICEBP vmovdqu, YMM12, YMM8 +EMIT_INSTR_PLUS_ICEBP vmovdqu, YMM12, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovdqu, FSxBX, YMM12 + %endif + +; +; [V]MOVDQA - not testing the 2nd register variant. +; +EMIT_INSTR_PLUS_ICEBP movdqa, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP_BYTES 07f_movdqa_XMM1_XMM2, 066h, 00fh, 07fh, X86_MODRM_MAKE(3, 2, 1) +EMIT_INSTR_PLUS_ICEBP movdqa, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP movdqa, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovdqa, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP_BYTES 07f_vmovdqa_XMM1_XMM2, 0c5h, 0f9h, 07fh, X86_MODRM_MAKE(3, 2, 1) +EMIT_INSTR_PLUS_ICEBP vmovdqa, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovdqa, FSxBX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovdqa, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP_BYTES 07f_vmovdqa_YMM1_YMM2, 0c5h, 0fdh, 07fh, X86_MODRM_MAKE(3, 2, 1) +EMIT_INSTR_PLUS_ICEBP vmovdqa, YMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovdqa, FSxBX, YMM1 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movdqa, XMM8, XMM12 ; 66 45 0F 6F C4 +EMIT_INSTR_PLUS_ICEBP_BYTES 07f_movdqa_XMM8_XMM12, 066h, 045h, 00fh, 07fh, X86_MODRM_MAKE(3, 4, 0) +EMIT_INSTR_PLUS_ICEBP movdqa, XMM10, FSxBX +EMIT_INSTR_PLUS_ICEBP movdqa, FSxBX, XMM10 +EMIT_INSTR_PLUS_ICEBP vmovdqa, XMM8, XMM14 ; C4 C1 79 6F FE +EMIT_INSTR_PLUS_ICEBP_BYTES 07f_vmovdqa_XMM8_XMM14, 0c4h, 041h, 79h, 07fh, X86_MODRM_MAKE(3, 6, 0) +EMIT_INSTR_PLUS_ICEBP vmovdqa, XMM11, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovdqa, FSxBX, XMM11 +EMIT_INSTR_PLUS_ICEBP vmovdqa, YMM12, YMM8 +EMIT_INSTR_PLUS_ICEBP_BYTES 07f_vmovdqa_YMM12_YMM8, 0c4h, 041h, 7dh, 07fh, X86_MODRM_MAKE(3, 0, 4) +EMIT_INSTR_PLUS_ICEBP vmovdqa, YMM12, FSxBX +EMIT_INSTR_PLUS_ICEBP vmovdqa, FSxBX, YMM12 + %endif + +; +; [V]PTEST +; +EMIT_INSTR_PLUS_ICEBP ptest, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP ptest, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vptest, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vptest, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vptest, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP vptest, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP ptest, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP ptest, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vptest, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vptest, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vptest, YMM9, YMM8 +EMIT_INSTR_PLUS_ICEBP vptest, YMM9, FSxBX + %endif + +; +; [V]PAVGB +; +EMIT_INSTR_PLUS_ICEBP pavgb, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pavgb, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP pavgb, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pavgb, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pavgb, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pavgb, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpavgb, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpavgb, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpavgb, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpavgb, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpavgb, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpavgb, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpavgb, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpavgb, YMM8, YMM9, FSxBX + %endif + +; +; [V]PAVGW +; +EMIT_INSTR_PLUS_ICEBP pavgw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pavgw, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP pavgw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pavgw, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pavgw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pavgw, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpavgw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpavgw, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpavgw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpavgw, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpavgw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpavgw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpavgw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpavgw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PSIGNB +; +EMIT_INSTR_PLUS_ICEBP psignb, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP psignb, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP psignb, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP psignb, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP psignb, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP psignb, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpsignb, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpsignb, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpsignb, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpsignb, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpsignb, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpsignb, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpsignb, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpsignb, YMM8, YMM9, FSxBX + %endif + +; +; [V]PSIGNW +; +EMIT_INSTR_PLUS_ICEBP psignw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP psignw, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP psignw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP psignw, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP psignw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP psignw, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpsignw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpsignw, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpsignw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpsignw, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpsignw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpsignw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpsignw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpsignw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PSIGND +; +EMIT_INSTR_PLUS_ICEBP psignd, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP psignd, MM1, FSxBX + +EMIT_INSTR_PLUS_ICEBP psignd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP psignd, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP psignd, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP psignd, XMM8, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpsignd, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpsignd, XMM1, XMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpsignd, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpsignd, XMM8, XMM9, FSxBX + %endif + +EMIT_INSTR_PLUS_ICEBP vpsignd, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpsignd, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpsignd, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpsignd, YMM8, YMM9, FSxBX + %endif + +; +; [V]ABSB +; +EMIT_INSTR_PLUS_ICEBP pabsb, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pabsb, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP pabsb, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pabsb, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpabsb, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpabsb, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpabsb, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP vpabsb, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pabsb, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pabsb, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpabsb, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpabsb, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpabsb, YMM9, YMM8 +EMIT_INSTR_PLUS_ICEBP vpabsb, YMM9, FSxBX + %endif + +; +; [V]ABSW +; +EMIT_INSTR_PLUS_ICEBP pabsw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pabsw, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP pabsw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pabsw, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpabsw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpabsw, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpabsw, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP vpabsw, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pabsw, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pabsw, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpabsw, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpabsw, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpabsw, YMM9, YMM8 +EMIT_INSTR_PLUS_ICEBP vpabsw, YMM9, FSxBX + %endif + +; +; [V]ABSD +; +EMIT_INSTR_PLUS_ICEBP pabsd, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pabsd, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP pabsd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pabsd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpabsd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpabsd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpabsd, YMM1, YMM2 +EMIT_INSTR_PLUS_ICEBP vpabsd, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pabsd, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pabsd, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpabsd, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpabsd, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpabsd, YMM9, YMM8 +EMIT_INSTR_PLUS_ICEBP vpabsd, YMM9, FSxBX + %endif + +; +; [V]PHADDW +; +EMIT_INSTR_PLUS_ICEBP phaddw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP phaddw, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP phaddw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP phaddw, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vphaddw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vphaddw, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vphaddw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vphaddw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP phaddw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP phaddw, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vphaddw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vphaddw, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vphaddw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vphaddw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PHADDD +; +EMIT_INSTR_PLUS_ICEBP phaddd, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP phaddd, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP phaddd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP phaddd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vphaddd, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vphaddd, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vphaddd, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vphaddd, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP phaddd, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP phaddd, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vphaddd, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vphaddd, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vphaddd, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vphaddd, YMM8, YMM9, FSxBX + %endif + + +; +; [V]PHSUBW +; +EMIT_INSTR_PLUS_ICEBP phsubw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP phsubw, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP phsubw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP phsubw, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vphsubw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vphsubw, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vphsubw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vphsubw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP phsubw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP phsubw, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vphsubw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vphsubw, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vphsubw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vphsubw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PHSUBD +; +EMIT_INSTR_PLUS_ICEBP phsubd, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP phsubd, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP phsubd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP phsubd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vphsubd, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vphsubd, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vphsubd, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vphsubd, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP phsubd, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP phsubd, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vphsubd, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vphsubd, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vphsubd, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vphsubd, YMM8, YMM9, FSxBX + %endif + +; +; [V]PHADDSW +; +EMIT_INSTR_PLUS_ICEBP phaddsw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP phaddsw, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP phaddsw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP phaddsw, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vphaddsw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vphaddsw, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vphaddsw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vphaddsw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP phaddsw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP phaddsw, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vphaddsw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vphaddsw, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vphaddsw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vphaddsw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PHSUBSW +; +EMIT_INSTR_PLUS_ICEBP phsubsw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP phsubsw, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP phsubsw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP phsubsw, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vphsubsw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vphsubsw, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vphsubsw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vphsubsw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP phsubsw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP phsubsw, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vphsubsw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vphsubsw, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vphsubsw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vphsubsw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMADDUBSW +; +EMIT_INSTR_PLUS_ICEBP pmaddubsw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pmaddubsw, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP pmaddubsw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmaddubsw, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmaddubsw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpmaddubsw, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmaddubsw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpmaddubsw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmaddubsw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pmaddubsw, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmaddubsw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpmaddubsw, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmaddubsw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpmaddubsw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMULHRSW +; +EMIT_INSTR_PLUS_ICEBP pmulhrsw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pmulhrsw, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP pmulhrsw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmulhrsw, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmulhrsw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpmulhrsw, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmulhrsw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpmulhrsw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmulhrsw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pmulhrsw, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmulhrsw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpmulhrsw, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmulhrsw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpmulhrsw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PSADBW +; +EMIT_INSTR_PLUS_ICEBP psadbw, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP psadbw, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP psadbw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP psadbw, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpsadbw, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpsadbw, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vpsadbw, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpsadbw, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP psadbw, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP psadbw, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vpsadbw, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpsadbw, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpsadbw, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpsadbw, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMULDQ +; +EMIT_INSTR_PLUS_ICEBP pmuldq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmuldq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmuldq, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpmuldq, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmuldq, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpmuldq, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmuldq, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pmuldq, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmuldq, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpmuldq, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmuldq, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpmuldq, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMULUDQ +; +EMIT_INSTR_PLUS_ICEBP pmuludq, MM1, MM2 +EMIT_INSTR_PLUS_ICEBP pmuludq, MM1, FSxBX +EMIT_INSTR_PLUS_ICEBP pmuludq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmuludq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmuludq, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vpmuludq, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmuludq, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vpmuludq, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmuludq, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pmuludq, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmuludq, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vpmuludq, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmuludq, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vpmuludq, YMM8, YMM9, FSxBX + %endif + +; +; [V]PUNPCKLPS +; +EMIT_INSTR_PLUS_ICEBP unpcklps, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP unpcklps, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpcklps, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vunpcklps, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpcklps, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vunpcklps, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP unpcklps, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP unpcklps, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpcklps, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vunpcklps, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpcklps, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vunpcklps, YMM8, YMM9, FSxBX + %endif + +; +; [V]PUNPCKLPD +; +EMIT_INSTR_PLUS_ICEBP unpcklpd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP unpcklpd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpcklpd, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vunpcklpd, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpcklpd, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vunpcklpd, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP unpcklpd, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP unpcklpd, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpcklpd, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vunpcklpd, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpcklpd, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vunpcklpd, YMM8, YMM9, FSxBX + %endif + +; +; [V]PUNPCKHPS +; +EMIT_INSTR_PLUS_ICEBP unpckhps, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP unpckhps, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpckhps, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vunpckhps, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpckhps, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vunpckhps, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP unpckhps, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP unpckhps, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpckhps, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vunpckhps, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpckhps, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vunpckhps, YMM8, YMM9, FSxBX + %endif + +; +; [V]PUNPCKHPD +; +EMIT_INSTR_PLUS_ICEBP unpckhpd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP unpckhpd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpckhpd, XMM1, XMM2, XMM3 +EMIT_INSTR_PLUS_ICEBP vunpckhpd, XMM1, XMM2, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpckhpd, YMM1, YMM2, YMM3 +EMIT_INSTR_PLUS_ICEBP vunpckhpd, YMM1, YMM2, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP unpckhpd, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP unpckhpd, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpckhpd, XMM8, XMM9, XMM10 +EMIT_INSTR_PLUS_ICEBP vunpckhpd, XMM8, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vunpckhpd, YMM8, YMM9, YMM10 +EMIT_INSTR_PLUS_ICEBP vunpckhpd, YMM8, YMM9, FSxBX + %endif + +; +; [V]PMOVSXBW +; +EMIT_INSTR_PLUS_ICEBP pmovsxbw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmovsxbw, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxbw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovsxbw, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxbw, YMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovsxbw, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmovsxbw, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pmovsxbw, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxbw, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovsxbw, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxbw, YMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovsxbw, YMM9, FSxBX + %endif + +; +; [V]PMOVSXBD +; +EMIT_INSTR_PLUS_ICEBP pmovsxbd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmovsxbd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxbd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovsxbd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxbd, YMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovsxbd, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmovsxbd, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pmovsxbd, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxbd, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovsxbd, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxbd, YMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovsxbd, YMM9, FSxBX + %endif + +; +; [V]PMOVSXBQ +; +EMIT_INSTR_PLUS_ICEBP pmovsxbq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmovsxbq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxbq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovsxbq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxbq, YMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovsxbq, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmovsxbq, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pmovsxbq, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxbq, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovsxbq, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxbq, YMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovsxbq, YMM9, FSxBX + %endif + +; +; [V]PMOVSXWD +; +EMIT_INSTR_PLUS_ICEBP pmovsxwd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmovsxwd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxwd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovsxwd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxwd, YMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovsxwd, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmovsxwd, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pmovsxwd, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxwd, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovsxwd, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxwd, YMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovsxwd, YMM9, FSxBX + %endif + +; +; [V]PMOVSXWQ +; +EMIT_INSTR_PLUS_ICEBP pmovsxwq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmovsxwq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxwq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovsxwq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxwq, YMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovsxwq, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmovsxwq, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pmovsxwq, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxwq, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovsxwq, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxwq, YMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovsxwq, YMM9, FSxBX + %endif + +; +; [V]PMOVSXDQ +; +EMIT_INSTR_PLUS_ICEBP pmovsxdq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmovsxdq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxdq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovsxdq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxdq, YMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovsxdq, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmovsxdq, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pmovsxdq, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxdq, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovsxdq, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovsxdq, YMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovsxdq, YMM9, FSxBX + %endif + +; +; [V]PMOVZXBW +; +EMIT_INSTR_PLUS_ICEBP pmovzxbw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmovzxbw, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxbw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovzxbw, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxbw, YMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovzxbw, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmovzxbw, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pmovzxbw, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxbw, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovzxbw, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxbw, YMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovzxbw, YMM9, FSxBX + %endif + +; +; [V]PMOVZXBD +; +EMIT_INSTR_PLUS_ICEBP pmovzxbd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmovzxbd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxbd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovzxbd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxbd, YMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovzxbd, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmovzxbd, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pmovzxbd, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxbd, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovzxbd, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxbd, YMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovzxbd, YMM9, FSxBX + %endif + +; +; [V]PMOVZXBQ +; +EMIT_INSTR_PLUS_ICEBP pmovzxbq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmovzxbq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxbq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovzxbq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxbq, YMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovzxbq, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmovzxbq, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pmovzxbq, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxbq, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovzxbq, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxbq, YMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovzxbq, YMM9, FSxBX + %endif + +; +; [V]PMOVZXWD +; +EMIT_INSTR_PLUS_ICEBP pmovzxwd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmovzxwd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxwd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovzxwd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxwd, YMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovzxwd, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmovzxwd, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pmovzxwd, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxwd, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovzxwd, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxwd, YMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovzxwd, YMM9, FSxBX + %endif + +; +; [V]PMOVZXWQ +; +EMIT_INSTR_PLUS_ICEBP pmovzxwq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmovzxwq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxwq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovzxwq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxwq, YMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovzxwq, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmovzxwq, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pmovzxwq, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxwq, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovzxwq, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxwq, YMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovzxwq, YMM9, FSxBX + %endif + +; +; [V]PMOVZXDQ +; +EMIT_INSTR_PLUS_ICEBP pmovzxdq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pmovzxdq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxdq, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovzxdq, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxdq, YMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vpmovzxdq, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pmovzxdq, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP pmovzxdq, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxdq, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovzxdq, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vpmovzxdq, YMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vpmovzxdq, YMM9, FSxBX + %endif + +; +; [V]SHUFPS +; +EMIT_INSTR_PLUS_ICEBP shufps, XMM1, XMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP shufps, XMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP shufps, XMM1, XMM2, 000h +EMIT_INSTR_PLUS_ICEBP shufps, XMM1, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vshufps, XMM1, XMM2, XMM3, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufps, XMM1, XMM2, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufps, XMM1, XMM2, XMM3, 000h +EMIT_INSTR_PLUS_ICEBP vshufps, XMM1, XMM2, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vshufps, YMM1, YMM2, YMM3, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufps, YMM1, YMM2, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufps, YMM1, YMM2, YMM3, 000h +EMIT_INSTR_PLUS_ICEBP vshufps, YMM1, YMM2, FSxBX, 000h + + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP shufps, XMM8, XMM9, 0FFh +EMIT_INSTR_PLUS_ICEBP shufps, XMM8, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP shufps, XMM8, XMM9, 000h +EMIT_INSTR_PLUS_ICEBP shufps, XMM8, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vshufps, XMM8, XMM9, XMM10, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufps, XMM8, XMM9, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufps, XMM8, XMM9, XMM10, 000h +EMIT_INSTR_PLUS_ICEBP vshufps, XMM8, XMM9, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vshufps, YMM8, YMM9, YMM10, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufps, YMM8, YMM9, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufps, YMM8, YMM9, YMM10, 000h +EMIT_INSTR_PLUS_ICEBP vshufps, YMM8, YMM9, FSxBX, 000h + %endif + +; +; [V]SHUFPD +; +EMIT_INSTR_PLUS_ICEBP shufpd, XMM1, XMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP shufpd, XMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP shufpd, XMM1, XMM2, 000h +EMIT_INSTR_PLUS_ICEBP shufpd, XMM1, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vshufpd, XMM1, XMM2, XMM3, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufpd, XMM1, XMM2, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufpd, XMM1, XMM2, XMM3, 000h +EMIT_INSTR_PLUS_ICEBP vshufpd, XMM1, XMM2, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vshufpd, YMM1, YMM2, YMM3, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufpd, YMM1, YMM2, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufpd, YMM1, YMM2, YMM3, 000h +EMIT_INSTR_PLUS_ICEBP vshufpd, YMM1, YMM2, FSxBX, 000h + + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP shufpd, XMM8, XMM9, 0FFh +EMIT_INSTR_PLUS_ICEBP shufpd, XMM8, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP shufpd, XMM8, XMM9, 000h +EMIT_INSTR_PLUS_ICEBP shufpd, XMM8, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vshufpd, XMM8, XMM9, XMM10, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufpd, XMM8, XMM9, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufpd, XMM8, XMM9, XMM10, 000h +EMIT_INSTR_PLUS_ICEBP vshufpd, XMM8, XMM9, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vshufpd, YMM8, YMM9, YMM10, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufpd, YMM8, YMM9, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vshufpd, YMM8, YMM9, YMM10, 000h +EMIT_INSTR_PLUS_ICEBP vshufpd, YMM8, YMM9, FSxBX, 000h + %endif + +; +; [V]LDDQU +; +EMIT_INSTR_PLUS_ICEBP lddqu, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vlddqu, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vlddqu, YMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP lddqu, XMM10, FSxBX +EMIT_INSTR_PLUS_ICEBP vlddqu, XMM11, FSxBX +EMIT_INSTR_PLUS_ICEBP vlddqu, YMM12, FSxBX + %endif + +; +; [V]PHMINPOSUW +; +EMIT_INSTR_PLUS_ICEBP phminposuw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP phminposuw, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vphminposuw, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP vphminposuw, XMM1, FSxBX + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP phminposuw, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP phminposuw, XMM9, FSxBX +EMIT_INSTR_PLUS_ICEBP vphminposuw, XMM9, XMM8 +EMIT_INSTR_PLUS_ICEBP vphminposuw, XMM9, FSxBX + %endif + +; +; [V]PBLENDVB +; +EMIT_INSTR_PLUS_ICEBP pblendvb, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP pblendvb, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vpblendvb, XMM1, XMM2, XMM3, XMM4 +EMIT_INSTR_PLUS_ICEBP vpblendvb, XMM1, XMM2, FSxBX, XMM4 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pblendvb, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP pblendvb, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vpblendvb, XMM8, XMM9, XMM10, XMM11 +EMIT_INSTR_PLUS_ICEBP vpblendvb, XMM8, XMM9, FSxBX, XMM11 + %endif + +EMIT_INSTR_PLUS_ICEBP vpblendvb, YMM1, YMM2, YMM3, YMM4 +EMIT_INSTR_PLUS_ICEBP vpblendvb, YMM1, YMM2, FSxBX, YMM4 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vpblendvb, YMM8, YMM9, YMM10, YMM11 +EMIT_INSTR_PLUS_ICEBP vpblendvb, YMM8, YMM9, FSxBX, YMM11 + %endif + +; +; [V]BLENDVPS +; +EMIT_INSTR_PLUS_ICEBP blendvps, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP blendvps, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vblendvps, XMM1, XMM2, XMM3, XMM4 +EMIT_INSTR_PLUS_ICEBP vblendvps, XMM1, XMM2, FSxBX, XMM4 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP blendvps, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP blendvps, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vblendvps, XMM8, XMM9, XMM10, XMM11 +EMIT_INSTR_PLUS_ICEBP vblendvps, XMM8, XMM9, FSxBX, XMM11 + %endif + +EMIT_INSTR_PLUS_ICEBP vblendvps, YMM1, YMM2, YMM3, YMM4 +EMIT_INSTR_PLUS_ICEBP vblendvps, YMM1, YMM2, FSxBX, YMM4 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vblendvps, YMM8, YMM9, YMM10, YMM11 +EMIT_INSTR_PLUS_ICEBP vblendvps, YMM8, YMM9, FSxBX, YMM11 + %endif + +; +; [V]BLENDVPD +; +EMIT_INSTR_PLUS_ICEBP blendvpd, XMM1, XMM2 +EMIT_INSTR_PLUS_ICEBP blendvpd, XMM1, FSxBX +EMIT_INSTR_PLUS_ICEBP vblendvpd, XMM1, XMM2, XMM3, XMM4 +EMIT_INSTR_PLUS_ICEBP vblendvpd, XMM1, XMM2, FSxBX, XMM4 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP blendvpd, XMM8, XMM9 +EMIT_INSTR_PLUS_ICEBP blendvpd, XMM8, FSxBX +EMIT_INSTR_PLUS_ICEBP vblendvpd, XMM8, XMM9, XMM10, XMM11 +EMIT_INSTR_PLUS_ICEBP vblendvpd, XMM8, XMM9, FSxBX, XMM11 + %endif + +EMIT_INSTR_PLUS_ICEBP vblendvpd, YMM1, YMM2, YMM3, YMM4 +EMIT_INSTR_PLUS_ICEBP vblendvpd, YMM1, YMM2, FSxBX, YMM4 + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP vblendvpd, YMM8, YMM9, YMM10, YMM11 +EMIT_INSTR_PLUS_ICEBP vblendvpd, YMM8, YMM9, FSxBX, YMM11 + %endif + +; +; [V]PALIGNR +; +EMIT_INSTR_PLUS_ICEBP palignr, MM1, MM2, 0FFh +EMIT_INSTR_PLUS_ICEBP palignr, MM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP palignr, MM1, MM2, 000h +EMIT_INSTR_PLUS_ICEBP palignr, MM1, FSxBX, 000h +EMIT_INSTR_PLUS_ICEBP palignr, MM1, MM2, 003h +EMIT_INSTR_PLUS_ICEBP palignr, MM1, FSxBX, 003h +EMIT_INSTR_PLUS_ICEBP palignr, MM1, MM2, 009h +EMIT_INSTR_PLUS_ICEBP palignr, MM1, FSxBX, 009h + +EMIT_INSTR_PLUS_ICEBP palignr, XMM1, XMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP palignr, XMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP palignr, XMM1, XMM2, 000h +EMIT_INSTR_PLUS_ICEBP palignr, XMM1, FSxBX, 000h +EMIT_INSTR_PLUS_ICEBP palignr, XMM1, XMM2, 003h +EMIT_INSTR_PLUS_ICEBP palignr, XMM1, FSxBX, 003h +EMIT_INSTR_PLUS_ICEBP palignr, XMM1, XMM2, 013h +EMIT_INSTR_PLUS_ICEBP palignr, XMM1, FSxBX, 013h + +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM1, XMM2, XMM3, 0FFh +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM1, XMM2, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM1, XMM2, XMM3, 000h +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM1, XMM2, FSxBX, 000h +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM1, XMM2, XMM3, 003h +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM1, XMM2, FSxBX, 003h +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM1, XMM2, XMM3, 013h +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM1, XMM2, FSxBX, 013h + +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM1, YMM2, YMM3, 0FFh +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM1, YMM2, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM1, YMM2, YMM3, 000h +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM1, YMM2, FSxBX, 000h +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM1, YMM2, YMM3, 003h +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM1, YMM2, FSxBX, 003h +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM1, YMM2, YMM3, 013h +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM1, YMM2, FSxBX, 013h + + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP palignr, XMM8, XMM9, 0FFh +EMIT_INSTR_PLUS_ICEBP palignr, XMM8, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP palignr, XMM8, XMM9, 000h +EMIT_INSTR_PLUS_ICEBP palignr, XMM8, FSxBX, 000h +EMIT_INSTR_PLUS_ICEBP palignr, XMM8, XMM9, 003h +EMIT_INSTR_PLUS_ICEBP palignr, XMM8, FSxBX, 003h +EMIT_INSTR_PLUS_ICEBP palignr, XMM8, XMM9, 013h +EMIT_INSTR_PLUS_ICEBP palignr, XMM8, FSxBX, 013h + +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM8, XMM9, XMM10, 0FFh +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM8, XMM9, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM8, XMM9, XMM10, 000h +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM8, XMM9, FSxBX, 000h +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM8, XMM9, XMM10, 003h +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM8, XMM9, FSxBX, 003h +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM8, XMM9, XMM10, 013h +EMIT_INSTR_PLUS_ICEBP vpalignr, XMM8, XMM9, FSxBX, 013h + +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM8, YMM9, YMM10, 0FFh +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM8, YMM9, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM8, YMM9, YMM10, 000h +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM8, YMM9, FSxBX, 000h +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM8, YMM9, YMM10, 003h +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM8, YMM9, FSxBX, 003h +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM8, YMM9, YMM10, 013h +EMIT_INSTR_PLUS_ICEBP vpalignr, YMM8, YMM9, FSxBX, 013h + %endif + +; +; [V]PBLENDW +; +EMIT_INSTR_PLUS_ICEBP pblendw, XMM1, XMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP pblendw, XMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP pblendw, XMM1, XMM2, 000h +EMIT_INSTR_PLUS_ICEBP pblendw, XMM1, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vpblendw, XMM1, XMM2, XMM3, 0FFh +EMIT_INSTR_PLUS_ICEBP vpblendw, XMM1, XMM2, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpblendw, XMM1, XMM2, XMM3, 000h +EMIT_INSTR_PLUS_ICEBP vpblendw, XMM1, XMM2, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vpblendw, YMM1, YMM2, YMM3, 0FFh +EMIT_INSTR_PLUS_ICEBP vpblendw, YMM1, YMM2, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpblendw, YMM1, YMM2, YMM3, 000h +EMIT_INSTR_PLUS_ICEBP vpblendw, YMM1, YMM2, FSxBX, 000h + + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pblendw, XMM8, XMM9, 0FFh +EMIT_INSTR_PLUS_ICEBP pblendw, XMM8, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP pblendw, XMM8, XMM9, 000h +EMIT_INSTR_PLUS_ICEBP pblendw, XMM8, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vpblendw, XMM8, XMM9, XMM10, 0FFh +EMIT_INSTR_PLUS_ICEBP vpblendw, XMM8, XMM9, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpblendw, XMM8, XMM9, XMM10, 000h +EMIT_INSTR_PLUS_ICEBP vpblendw, XMM8, XMM9, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vpblendw, YMM8, YMM9, YMM10, 0FFh +EMIT_INSTR_PLUS_ICEBP vpblendw, YMM8, YMM9, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpblendw, YMM8, YMM9, YMM10, 000h +EMIT_INSTR_PLUS_ICEBP vpblendw, YMM8, YMM9, FSxBX, 000h + %endif + +; +; [V]BLENDPS +; +EMIT_INSTR_PLUS_ICEBP blendps, XMM1, XMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP blendps, XMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP blendps, XMM1, XMM2, 000h +EMIT_INSTR_PLUS_ICEBP blendps, XMM1, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vblendps, XMM1, XMM2, XMM3, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendps, XMM1, XMM2, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendps, XMM1, XMM2, XMM3, 000h +EMIT_INSTR_PLUS_ICEBP vblendps, XMM1, XMM2, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vblendps, YMM1, YMM2, YMM3, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendps, YMM1, YMM2, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendps, YMM1, YMM2, YMM3, 000h +EMIT_INSTR_PLUS_ICEBP vblendps, YMM1, YMM2, FSxBX, 000h + + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP blendps, XMM8, XMM9, 0FFh +EMIT_INSTR_PLUS_ICEBP blendps, XMM8, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP blendps, XMM8, XMM9, 000h +EMIT_INSTR_PLUS_ICEBP blendps, XMM8, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vblendps, XMM8, XMM9, XMM10, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendps, XMM8, XMM9, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendps, XMM8, XMM9, XMM10, 000h +EMIT_INSTR_PLUS_ICEBP vblendps, XMM8, XMM9, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vblendps, YMM8, YMM9, YMM10, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendps, YMM8, YMM9, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendps, YMM8, YMM9, YMM10, 000h +EMIT_INSTR_PLUS_ICEBP vblendps, YMM8, YMM9, FSxBX, 000h + %endif + +; +; [V]BLENDPD +; +EMIT_INSTR_PLUS_ICEBP blendpd, XMM1, XMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP blendpd, XMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP blendpd, XMM1, XMM2, 000h +EMIT_INSTR_PLUS_ICEBP blendpd, XMM1, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vblendpd, XMM1, XMM2, XMM3, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendpd, XMM1, XMM2, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendpd, XMM1, XMM2, XMM3, 000h +EMIT_INSTR_PLUS_ICEBP vblendpd, XMM1, XMM2, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vblendpd, YMM1, YMM2, YMM3, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendpd, YMM1, YMM2, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendpd, YMM1, YMM2, YMM3, 000h +EMIT_INSTR_PLUS_ICEBP vblendpd, YMM1, YMM2, FSxBX, 000h + + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP blendpd, XMM8, XMM9, 0FFh +EMIT_INSTR_PLUS_ICEBP blendpd, XMM8, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP blendpd, XMM8, XMM9, 000h +EMIT_INSTR_PLUS_ICEBP blendpd, XMM8, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vblendpd, XMM8, XMM9, XMM10, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendpd, XMM8, XMM9, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendpd, XMM8, XMM9, XMM10, 000h +EMIT_INSTR_PLUS_ICEBP vblendpd, XMM8, XMM9, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vblendpd, YMM8, YMM9, YMM10, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendpd, YMM8, YMM9, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vblendpd, YMM8, YMM9, YMM10, 000h +EMIT_INSTR_PLUS_ICEBP vblendpd, YMM8, YMM9, FSxBX, 000h + %endif + +; +; [V]PCLMULQDQ +; +EMIT_INSTR_PLUS_ICEBP pclmulqdq, XMM1, XMM2, 0FFh +EMIT_INSTR_PLUS_ICEBP pclmulqdq, XMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP pclmulqdq, XMM1, XMM2, 000h +EMIT_INSTR_PLUS_ICEBP pclmulqdq, XMM1, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vpclmulqdq, XMM1, XMM2, XMM3, 0FFh +EMIT_INSTR_PLUS_ICEBP vpclmulqdq, XMM1, XMM2, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpclmulqdq, XMM1, XMM2, XMM3, 000h +EMIT_INSTR_PLUS_ICEBP vpclmulqdq, XMM1, XMM2, FSxBX, 000h + + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pclmulqdq, XMM8, XMM9, 0FFh +EMIT_INSTR_PLUS_ICEBP pclmulqdq, XMM8, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP pclmulqdq, XMM8, XMM9, 000h +EMIT_INSTR_PLUS_ICEBP pclmulqdq, XMM8, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vpclmulqdq, XMM8, XMM9, XMM10, 0FFh +EMIT_INSTR_PLUS_ICEBP vpclmulqdq, XMM8, XMM9, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpclmulqdq, XMM8, XMM9, XMM10, 000h +EMIT_INSTR_PLUS_ICEBP vpclmulqdq, XMM8, XMM9, FSxBX, 000h + %endif + +; +; [V]PINSRW +; +EMIT_INSTR_PLUS_ICEBP pinsrw, MM1, EDX, 0FFh +EMIT_INSTR_PLUS_ICEBP pinsrw, MM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP pinsrw, MM1, EDX, 000h +EMIT_INSTR_PLUS_ICEBP pinsrw, MM1, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP pinsrw, XMM1, EDX, 0FFh +EMIT_INSTR_PLUS_ICEBP pinsrw, XMM1, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP pinsrw, XMM1, EDX, 000h +EMIT_INSTR_PLUS_ICEBP pinsrw, XMM1, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vpinsrw, XMM1, XMM2, EDX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpinsrw, XMM1, XMM2, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpinsrw, XMM1, XMM2, EDX, 000h +EMIT_INSTR_PLUS_ICEBP vpinsrw, XMM1, XMM2, FSxBX, 000h + + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pinsrw, MM1, R9D, 0FFh +EMIT_INSTR_PLUS_ICEBP pinsrw, MM1, R9D, 000h + +EMIT_INSTR_PLUS_ICEBP pinsrw, XMM8, R9D, 0FFh +EMIT_INSTR_PLUS_ICEBP pinsrw, XMM8, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP pinsrw, XMM8, R9D, 000h +EMIT_INSTR_PLUS_ICEBP pinsrw, XMM8, FSxBX, 000h + +EMIT_INSTR_PLUS_ICEBP vpinsrw, XMM8, XMM9, R9D, 0FFh +EMIT_INSTR_PLUS_ICEBP vpinsrw, XMM8, XMM9, FSxBX, 0FFh +EMIT_INSTR_PLUS_ICEBP vpinsrw, XMM8, XMM9, R9D, 000h +EMIT_INSTR_PLUS_ICEBP vpinsrw, XMM8, XMM9, FSxBX, 000h + %endif + +; +; [V]PEXTRW +; +EMIT_INSTR_PLUS_ICEBP pextrw, EDX, MM1, 0FFh +EMIT_INSTR_PLUS_ICEBP pextrw, EDX, MM1, 000h + +EMIT_INSTR_PLUS_ICEBP pextrw, EDX, XMM1, 0FFh +EMIT_INSTR_PLUS_ICEBP pextrw, EDX, XMM1, 000h + +EMIT_INSTR_PLUS_ICEBP vpextrw, EDX, XMM1, 0FFh +EMIT_INSTR_PLUS_ICEBP vpextrw, EDX, XMM1, 000h + + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP pextrw, R9D, MM1, 0FFh +EMIT_INSTR_PLUS_ICEBP pextrw, R9D, MM1, 000h + +; @todo Emits the SSE4.1 0f3a variant EMIT_INSTR_PLUS_ICEBP pextrw, RDX, XMM1, 0FFh +; @todo Emits the SSE4.1 0f3a variant EMIT_INSTR_PLUS_ICEBP pextrw, RDX, XMM1, 000h + +EMIT_INSTR_PLUS_ICEBP pextrw, R9D, XMM8, 0FFh +EMIT_INSTR_PLUS_ICEBP pextrw, R9D, XMM8, 000h + +EMIT_INSTR_PLUS_ICEBP vpextrw, R9D, XMM8, 0FFh +EMIT_INSTR_PLUS_ICEBP vpextrw, R9D, XMM8, 000h + +EMIT_INSTR_PLUS_ICEBP vpextrw, RDX, XMM1, 0FFh +EMIT_INSTR_PLUS_ICEBP vpextrw, RDX, XMM1, 000h + %endif + +; +; [V]MOVMSKPS +; +EMIT_INSTR_PLUS_ICEBP movmskps, EDX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovmskps, EDX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovmskps, EDX, YMM1 + + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movmskps, R9D, XMM8 +EMIT_INSTR_PLUS_ICEBP movmskps, RDX, XMM1 + +EMIT_INSTR_PLUS_ICEBP vmovmskps, R9D, XMM8 +EMIT_INSTR_PLUS_ICEBP vmovmskps, RDX, XMM1 + +EMIT_INSTR_PLUS_ICEBP vmovmskps, R9D, YMM8 +EMIT_INSTR_PLUS_ICEBP vmovmskps, RDX, YMM1 + %endif + +; +; [V]MOVMSKPD +; +EMIT_INSTR_PLUS_ICEBP movmskpd, EDX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovmskpd, EDX, XMM1 +EMIT_INSTR_PLUS_ICEBP vmovmskpd, EDX, YMM1 + + %if TMPL_BITS == 64 +EMIT_INSTR_PLUS_ICEBP movmskpd, R9D, XMM8 +EMIT_INSTR_PLUS_ICEBP movmskpd, RDX, XMM1 + +EMIT_INSTR_PLUS_ICEBP vmovmskpd, R9D, XMM8 +EMIT_INSTR_PLUS_ICEBP vmovmskpd, RDX, XMM1 + +EMIT_INSTR_PLUS_ICEBP vmovmskpd, R9D, YMM8 +EMIT_INSTR_PLUS_ICEBP vmovmskpd, RDX, YMM1 + %endif + + +%endif ; BS3_INSTANTIATING_CMN + +%include "bs3kit-template-footer.mac" ; reset environment |