diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 03:01:46 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 03:01:46 +0000 |
commit | f8fe689a81f906d1b91bb3220acde2a4ecb14c5b (patch) | |
tree | 26484e9d7e2c67806c2d1760196ff01aaa858e8c /src/recompiler/target-i386 | |
parent | Initial commit. (diff) | |
download | virtualbox-f8fe689a81f906d1b91bb3220acde2a4ecb14c5b.tar.xz virtualbox-f8fe689a81f906d1b91bb3220acde2a4ecb14c5b.zip |
Adding upstream version 6.0.4-dfsg.upstream/6.0.4-dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/recompiler/target-i386')
-rw-r--r-- | src/recompiler/target-i386/Makefile.kup | 0 | ||||
-rw-r--r-- | src/recompiler/target-i386/TODO | 32 | ||||
-rw-r--r-- | src/recompiler/target-i386/cpu.h | 1215 | ||||
-rw-r--r-- | src/recompiler/target-i386/exec.h | 370 | ||||
-rw-r--r-- | src/recompiler/target-i386/helper.c | 1227 | ||||
-rw-r--r-- | src/recompiler/target-i386/helper.h | 253 | ||||
-rw-r--r-- | src/recompiler/target-i386/helper_template.h | 344 | ||||
-rw-r--r-- | src/recompiler/target-i386/op_helper.c | 7164 | ||||
-rw-r--r-- | src/recompiler/target-i386/ops_sse.h | 2111 | ||||
-rw-r--r-- | src/recompiler/target-i386/ops_sse_header.h | 359 | ||||
-rw-r--r-- | src/recompiler/target-i386/svm.h | 222 | ||||
-rw-r--r-- | src/recompiler/target-i386/translate.c | 8385 |
12 files changed, 21682 insertions, 0 deletions
diff --git a/src/recompiler/target-i386/Makefile.kup b/src/recompiler/target-i386/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/recompiler/target-i386/Makefile.kup diff --git a/src/recompiler/target-i386/TODO b/src/recompiler/target-i386/TODO new file mode 100644 index 00000000..8dfd4633 --- /dev/null +++ b/src/recompiler/target-i386/TODO @@ -0,0 +1,32 @@ +Correctness issues: + +- some eflags manipulation incorrectly reset the bit 0x2. +- SVM: test, cpu save/restore, SMM save/restore. +- x86_64: lcall/ljmp intel/amd differences ? +- better code fetch (different exception handling + CS.limit support) +- user/kernel PUSHL/POPL in helper.c +- add missing cpuid tests +- return UD exception if LOCK prefix incorrectly used +- test ldt limit < 7 ? +- fix some 16 bit sp push/pop overflow (pusha/popa, lcall lret) +- full support of segment limit/rights +- full x87 exception support +- improve x87 bit exactness (use bochs code ?) +- DRx register support +- CR0.AC emulation +- SSE alignment checks +- fix SSE min/max with nans + +Optimizations/Features: + +- add SVM nested paging support +- add VMX support +- add AVX support +- add SSE5 support +- fxsave/fxrstor AMD extensions +- improve monitor/mwait support +- faster EFLAGS update: consider SZAP, C, O can be updated separately + with a bit field in CC_OP and more state variables. +- evaluate x87 stack pointer statically +- find a way to avoid translating several time the same TB if CR0.TS + is set or not. diff --git a/src/recompiler/target-i386/cpu.h b/src/recompiler/target-i386/cpu.h new file mode 100644 index 00000000..c643db8d --- /dev/null +++ b/src/recompiler/target-i386/cpu.h @@ -0,0 +1,1215 @@ +/* + * i386 virtual CPU header + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#ifndef CPU_I386_H +#define CPU_I386_H + +#include "config.h" + +#ifdef TARGET_X86_64 +#define TARGET_LONG_BITS 64 +#else +#define TARGET_LONG_BITS 32 +#endif + +/* target supports implicit self modifying code */ +#define TARGET_HAS_SMC +/* support for self modifying code even if the modified instruction is + close to the modifying instruction */ +#define TARGET_HAS_PRECISE_SMC + +#define TARGET_HAS_ICE 1 + +#ifdef TARGET_X86_64 +#define ELF_MACHINE EM_X86_64 +#else +#define ELF_MACHINE EM_386 +#endif + +#define CPUState struct CPUX86State + +#include "cpu-defs.h" + +#include "softfloat.h" + +#ifdef VBOX +# include <iprt/critsect.h> +# include <iprt/thread.h> +# include <iprt/assert.h> +# include <iprt/asm.h> +# include <VBox/vmm/vmm.h> +# include <VBox/vmm/stam.h> +# include <VBox/vmm/cpumctx.h> +# undef MSR_IA32_APICBASE_BSP +#endif /* VBOX */ + +#define R_EAX 0 +#define R_ECX 1 +#define R_EDX 2 +#define R_EBX 3 +#define R_ESP 4 +#define R_EBP 5 +#define R_ESI 6 +#define R_EDI 7 + +#define R_AL 0 +#define R_CL 1 +#define R_DL 2 +#define R_BL 3 +#define R_AH 4 +#define R_CH 5 +#define R_DH 6 +#define R_BH 7 + +#define R_ES 0 +#define R_CS 1 +#define R_SS 2 +#define R_DS 3 +#define R_FS 4 +#define R_GS 5 + +/* segment descriptor fields */ +#define DESC_G_MASK (1 << 23) +#define DESC_B_SHIFT 22 +#define DESC_B_MASK (1 << DESC_B_SHIFT) +#define DESC_L_SHIFT 21 /* x86_64 only : 64 bit code segment */ +#define DESC_L_MASK (1 << DESC_L_SHIFT) +#define DESC_AVL_MASK (1 << 20) +#define DESC_P_MASK (1 << 15) +#define DESC_DPL_SHIFT 13 +#define DESC_DPL_MASK (3 << DESC_DPL_SHIFT) +#define DESC_S_MASK (1 << 12) +#define DESC_TYPE_SHIFT 8 +#define DESC_TYPE_MASK (15 << DESC_TYPE_SHIFT) +#define DESC_A_MASK (1 << 8) + +#define DESC_CS_MASK (1 << 11) /* 1=code segment 0=data segment */ +#define DESC_C_MASK (1 << 10) /* code: conforming */ +#define DESC_R_MASK (1 << 9) /* code: readable */ + +#define DESC_E_MASK (1 << 10) /* data: expansion direction */ +#define DESC_W_MASK (1 << 9) /* data: writable */ + +#define DESC_TSS_BUSY_MASK (1 << 9) +#ifdef VBOX +# define DESC_INTEL_UNUSABLE RT_BIT_32(16+8) /**< Internal VT-x bit for NULL sectors. */ +# define DESC_RAW_FLAG_BITS UINT32_C(0x00ffffff) /**< Flag bits we load from the descriptor. */ +#endif + +/* eflags masks */ +#define CC_C 0x0001 +#define CC_P 0x0004 +#define CC_A 0x0010 +#define CC_Z 0x0040 +#define CC_S 0x0080 +#define CC_O 0x0800 + +#define TF_SHIFT 8 +#define IOPL_SHIFT 12 +#define VM_SHIFT 17 + +#define TF_MASK 0x00000100 +#define IF_MASK 0x00000200 +#define DF_MASK 0x00000400 +#define IOPL_MASK 0x00003000 +#define NT_MASK 0x00004000 +#define RF_MASK 0x00010000 +#define VM_MASK 0x00020000 +#define AC_MASK 0x00040000 +#define VIF_MASK 0x00080000 +#define VIP_MASK 0x00100000 +#define ID_MASK 0x00200000 + +/* hidden flags - used internally by qemu to represent additional cpu + states. Only the CPL, INHIBIT_IRQ, SMM and SVMI are not + redundant. We avoid using the IOPL_MASK, TF_MASK and VM_MASK bit + position to ease oring with eflags. */ +/* current cpl */ +#define HF_CPL_SHIFT 0 +/* true if soft mmu is being used */ +#define HF_SOFTMMU_SHIFT 2 +/* true if hardware interrupts must be disabled for next instruction */ +#define HF_INHIBIT_IRQ_SHIFT 3 +/* 16 or 32 segments */ +#define HF_CS32_SHIFT 4 +#define HF_SS32_SHIFT 5 +/* zero base for DS, ES and SS : can be '0' only in 32 bit CS segment */ +#define HF_ADDSEG_SHIFT 6 +/* copy of CR0.PE (protected mode) */ +#define HF_PE_SHIFT 7 +#define HF_TF_SHIFT 8 /* must be same as eflags */ +#define HF_MP_SHIFT 9 /* the order must be MP, EM, TS */ +#define HF_EM_SHIFT 10 +#define HF_TS_SHIFT 11 +#define HF_IOPL_SHIFT 12 /* must be same as eflags */ +#define HF_LMA_SHIFT 14 /* only used on x86_64: long mode active */ +#define HF_CS64_SHIFT 15 /* only used on x86_64: 64 bit code segment */ +#define HF_RF_SHIFT 16 /* must be same as eflags */ +#define HF_VM_SHIFT 17 /* must be same as eflags */ +#define HF_SMM_SHIFT 19 /* CPU in SMM mode */ +#define HF_SVME_SHIFT 20 /* SVME enabled (copy of EFER.SVME) */ +#define HF_SVMI_SHIFT 21 /* SVM intercepts are active */ +#define HF_OSFXSR_SHIFT 22 /* CR4.OSFXSR */ + +#define HF_CPL_MASK (3 << HF_CPL_SHIFT) +#define HF_SOFTMMU_MASK (1 << HF_SOFTMMU_SHIFT) +#define HF_INHIBIT_IRQ_MASK (1 << HF_INHIBIT_IRQ_SHIFT) +#define HF_CS32_MASK (1 << HF_CS32_SHIFT) +#define HF_SS32_MASK (1 << HF_SS32_SHIFT) +#define HF_ADDSEG_MASK (1 << HF_ADDSEG_SHIFT) +#define HF_PE_MASK (1 << HF_PE_SHIFT) +#define HF_TF_MASK (1 << HF_TF_SHIFT) +#define HF_MP_MASK (1 << HF_MP_SHIFT) +#define HF_EM_MASK (1 << HF_EM_SHIFT) +#define HF_TS_MASK (1 << HF_TS_SHIFT) +#define HF_IOPL_MASK (3 << HF_IOPL_SHIFT) +#define HF_LMA_MASK (1 << HF_LMA_SHIFT) +#define HF_CS64_MASK (1 << HF_CS64_SHIFT) +#define HF_RF_MASK (1 << HF_RF_SHIFT) +#define HF_VM_MASK (1 << HF_VM_SHIFT) +#define HF_SMM_MASK (1 << HF_SMM_SHIFT) +#define HF_SVME_MASK (1 << HF_SVME_SHIFT) +#define HF_SVMI_MASK (1 << HF_SVMI_SHIFT) +#define HF_OSFXSR_MASK (1 << HF_OSFXSR_SHIFT) + +/* hflags2 */ + +#define HF2_GIF_SHIFT 0 /* if set CPU takes interrupts */ +#define HF2_HIF_SHIFT 1 /* value of IF_MASK when entering SVM */ +#define HF2_NMI_SHIFT 2 /* CPU serving NMI */ +#define HF2_VINTR_SHIFT 3 /* value of V_INTR_MASKING bit */ + +#define HF2_GIF_MASK (1 << HF2_GIF_SHIFT) +#define HF2_HIF_MASK (1 << HF2_HIF_SHIFT) +#define HF2_NMI_MASK (1 << HF2_NMI_SHIFT) +#define HF2_VINTR_MASK (1 << HF2_VINTR_SHIFT) + +#define CR0_PE_SHIFT 0 +#define CR0_MP_SHIFT 1 + +#define CR0_PE_MASK (1 << 0) +#define CR0_MP_MASK (1 << 1) +#define CR0_EM_MASK (1 << 2) +#define CR0_TS_MASK (1 << 3) +#define CR0_ET_MASK (1 << 4) +#define CR0_NE_MASK (1 << 5) +#define CR0_WP_MASK (1 << 16) +#define CR0_AM_MASK (1 << 18) +#define CR0_PG_MASK (1U << 31) + +#define CR4_VME_MASK (1 << 0) +#define CR4_PVI_MASK (1 << 1) +#define CR4_TSD_MASK (1 << 2) +#define CR4_DE_MASK (1 << 3) +#define CR4_PSE_MASK (1 << 4) +#define CR4_PAE_MASK (1 << 5) +#define CR4_MCE_MASK (1 << 6) +#define CR4_PGE_MASK (1 << 7) +#define CR4_PCE_MASK (1 << 8) +#define CR4_OSFXSR_SHIFT 9 +#define CR4_OSFXSR_MASK (1 << CR4_OSFXSR_SHIFT) +#define CR4_OSXMMEXCPT_MASK (1 << 10) + +#define DR6_BD (1 << 13) +#define DR6_BS (1 << 14) +#define DR6_BT (1 << 15) +#define DR6_FIXED_1 0xffff0ff0 + +#define DR7_GD (1 << 13) +#define DR7_TYPE_SHIFT 16 +#define DR7_LEN_SHIFT 18 +#define DR7_FIXED_1 0x00000400 + +#define PG_PRESENT_BIT 0 +#define PG_RW_BIT 1 +#define PG_USER_BIT 2 +#define PG_PWT_BIT 3 +#define PG_PCD_BIT 4 +#define PG_ACCESSED_BIT 5 +#define PG_DIRTY_BIT 6 +#define PG_PSE_BIT 7 +#define PG_GLOBAL_BIT 8 +#define PG_NX_BIT 63 + +#define PG_PRESENT_MASK (1 << PG_PRESENT_BIT) +#define PG_RW_MASK (1 << PG_RW_BIT) +#define PG_USER_MASK (1 << PG_USER_BIT) +#define PG_PWT_MASK (1 << PG_PWT_BIT) +#define PG_PCD_MASK (1 << PG_PCD_BIT) +#define PG_ACCESSED_MASK (1 << PG_ACCESSED_BIT) +#define PG_DIRTY_MASK (1 << PG_DIRTY_BIT) +#define PG_PSE_MASK (1 << PG_PSE_BIT) +#define PG_GLOBAL_MASK (1 << PG_GLOBAL_BIT) +#define PG_NX_MASK (1LL << PG_NX_BIT) + +#define PG_ERROR_W_BIT 1 + +#define PG_ERROR_P_MASK 0x01 +#define PG_ERROR_W_MASK (1 << PG_ERROR_W_BIT) +#define PG_ERROR_U_MASK 0x04 +#define PG_ERROR_RSVD_MASK 0x08 +#define PG_ERROR_I_D_MASK 0x10 + +#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */ + +#define MCE_CAP_DEF MCG_CTL_P +#define MCE_BANKS_DEF 10 + +#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ + +#define MCI_STATUS_VAL (1ULL<<63) /* valid error */ +#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ +#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ + +#define MSR_IA32_TSC 0x10 +#define MSR_IA32_APICBASE 0x1b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_MTRRcap 0xfe +#define MSR_MTRRcap_VCNT 8 +#define MSR_MTRRcap_FIXRANGE_SUPPORT (1 << 8) +#define MSR_MTRRcap_WC_SUPPORTED (1 << 10) + +#define MSR_IA32_SYSENTER_CS 0x174 +#define MSR_IA32_SYSENTER_ESP 0x175 +#define MSR_IA32_SYSENTER_EIP 0x176 + +#define MSR_MCG_CAP 0x179 +#define MSR_MCG_STATUS 0x17a +#define MSR_MCG_CTL 0x17b + +#define MSR_IA32_PERF_STATUS 0x198 + +#define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg)) +#define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1) + +#define MSR_MTRRfix64K_00000 0x250 +#define MSR_MTRRfix16K_80000 0x258 +#define MSR_MTRRfix16K_A0000 0x259 +#define MSR_MTRRfix4K_C0000 0x268 +#define MSR_MTRRfix4K_C8000 0x269 +#define MSR_MTRRfix4K_D0000 0x26a +#define MSR_MTRRfix4K_D8000 0x26b +#define MSR_MTRRfix4K_E0000 0x26c +#define MSR_MTRRfix4K_E8000 0x26d +#define MSR_MTRRfix4K_F0000 0x26e +#define MSR_MTRRfix4K_F8000 0x26f + +#define MSR_PAT 0x277 + +#define MSR_MTRRdefType 0x2ff + +#define MSR_MC0_CTL 0x400 +#define MSR_MC0_STATUS 0x401 +#define MSR_MC0_ADDR 0x402 +#define MSR_MC0_MISC 0x403 + +#define MSR_EFER 0xc0000080 + +#define MSR_EFER_SCE (1 << 0) +#define MSR_EFER_LME (1 << 8) +#define MSR_EFER_LMA (1 << 10) +#define MSR_EFER_NXE (1 << 11) +#define MSR_EFER_SVME (1 << 12) +#define MSR_EFER_FFXSR (1 << 14) + +#ifdef VBOX +# define MSR_APIC_RANGE_START 0x800 +# define MSR_APIC_RANGE_END 0x900 +#endif + +#define MSR_STAR 0xc0000081 +#define MSR_LSTAR 0xc0000082 +#define MSR_CSTAR 0xc0000083 +#define MSR_FMASK 0xc0000084 +#define MSR_FSBASE 0xc0000100 +#define MSR_GSBASE 0xc0000101 +#define MSR_KERNELGSBASE 0xc0000102 +#define MSR_TSC_AUX 0xc0000103 + +#define MSR_VM_HSAVE_PA 0xc0010117 + +/* cpuid_features bits */ +#define CPUID_FP87 (1 << 0) +#define CPUID_VME (1 << 1) +#define CPUID_DE (1 << 2) +#define CPUID_PSE (1 << 3) +#define CPUID_TSC (1 << 4) +#define CPUID_MSR (1 << 5) +#define CPUID_PAE (1 << 6) +#define CPUID_MCE (1 << 7) +#define CPUID_CX8 (1 << 8) +#define CPUID_APIC (1 << 9) +#define CPUID_SEP (1 << 11) /* sysenter/sysexit */ +#define CPUID_MTRR (1 << 12) +#define CPUID_PGE (1 << 13) +#define CPUID_MCA (1 << 14) +#define CPUID_CMOV (1 << 15) +#define CPUID_PAT (1 << 16) +#define CPUID_PSE36 (1 << 17) +#define CPUID_PN (1 << 18) +#define CPUID_CLFLUSH (1 << 19) +#define CPUID_DTS (1 << 21) +#define CPUID_ACPI (1 << 22) +#define CPUID_MMX (1 << 23) +#define CPUID_FXSR (1 << 24) +#define CPUID_SSE (1 << 25) +#define CPUID_SSE2 (1 << 26) +#define CPUID_SS (1 << 27) +#define CPUID_HT (1 << 28) +#define CPUID_TM (1 << 29) +#define CPUID_IA64 (1 << 30) +#define CPUID_PBE (1 << 31) + +#define CPUID_EXT_SSE3 (1 << 0) +#define CPUID_EXT_DTES64 (1 << 2) +#define CPUID_EXT_MONITOR (1 << 3) +#define CPUID_EXT_DSCPL (1 << 4) +#define CPUID_EXT_VMX (1 << 5) +#define CPUID_EXT_SMX (1 << 6) +#define CPUID_EXT_EST (1 << 7) +#define CPUID_EXT_TM2 (1 << 8) +#define CPUID_EXT_SSSE3 (1 << 9) +#define CPUID_EXT_CID (1 << 10) +#define CPUID_EXT_CX16 (1 << 13) +#define CPUID_EXT_XTPR (1 << 14) +#define CPUID_EXT_PDCM (1 << 15) +#define CPUID_EXT_DCA (1 << 18) +#define CPUID_EXT_SSE41 (1 << 19) +#define CPUID_EXT_SSE42 (1 << 20) +#define CPUID_EXT_X2APIC (1 << 21) +#define CPUID_EXT_MOVBE (1 << 22) +#define CPUID_EXT_POPCNT (1 << 23) +#define CPUID_EXT_XSAVE (1 << 26) +#define CPUID_EXT_OSXSAVE (1 << 27) +#define CPUID_EXT_HYPERVISOR (1 << 31) + +#define CPUID_EXT2_SYSCALL (1 << 11) +#define CPUID_EXT2_MP (1 << 19) +#define CPUID_EXT2_NX (1 << 20) +#define CPUID_EXT2_MMXEXT (1 << 22) +#define CPUID_EXT2_FFXSR (1 << 25) +#define CPUID_EXT2_PDPE1GB (1 << 26) +#define CPUID_EXT2_RDTSCP (1 << 27) +#define CPUID_EXT2_LM (1 << 29) +#define CPUID_EXT2_3DNOWEXT (1 << 30) +#define CPUID_EXT2_3DNOW (1 << 31) + +#define CPUID_EXT3_LAHF_LM (1 << 0) +#define CPUID_EXT3_CMP_LEG (1 << 1) +#define CPUID_EXT3_SVM (1 << 2) +#define CPUID_EXT3_EXTAPIC (1 << 3) +#define CPUID_EXT3_CR8LEG (1 << 4) +#define CPUID_EXT3_ABM (1 << 5) +#define CPUID_EXT3_SSE4A (1 << 6) +#define CPUID_EXT3_MISALIGNSSE (1 << 7) +#define CPUID_EXT3_3DNOWPREFETCH (1 << 8) +#define CPUID_EXT3_OSVW (1 << 9) +#define CPUID_EXT3_IBS (1 << 10) +#define CPUID_EXT3_SKINIT (1 << 12) + +#define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */ +#define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */ +#define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */ + +#define CPUID_VENDOR_AMD_1 0x68747541 /* "Auth" */ +#define CPUID_VENDOR_AMD_2 0x69746e65 /* "enti" */ +#define CPUID_VENDOR_AMD_3 0x444d4163 /* "cAMD" */ + +#define CPUID_MWAIT_IBE (1 << 1) /* Interrupts can exit capability */ +#define CPUID_MWAIT_EMX (1 << 0) /* enumeration supported */ + +#define EXCP00_DIVZ 0 +#define EXCP01_DB 1 +#define EXCP02_NMI 2 +#define EXCP03_INT3 3 +#define EXCP04_INTO 4 +#define EXCP05_BOUND 5 +#define EXCP06_ILLOP 6 +#define EXCP07_PREX 7 +#define EXCP08_DBLE 8 +#define EXCP09_XERR 9 +#define EXCP0A_TSS 10 +#define EXCP0B_NOSEG 11 +#define EXCP0C_STACK 12 +#define EXCP0D_GPF 13 +#define EXCP0E_PAGE 14 +#define EXCP10_COPR 16 +#define EXCP11_ALGN 17 +#define EXCP12_MCHK 18 + +#define EXCP_SYSCALL 0x100 /* only happens in user only emulation + for syscall instruction */ + +enum { + CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ + CC_OP_EFLAGS, /* all cc are explicitly computed, CC_SRC = flags */ + + CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */ + CC_OP_MULW, + CC_OP_MULL, + CC_OP_MULQ, + + CC_OP_ADDB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ + CC_OP_ADDW, + CC_OP_ADDL, + CC_OP_ADDQ, + + CC_OP_ADCB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ + CC_OP_ADCW, + CC_OP_ADCL, + CC_OP_ADCQ, + + CC_OP_SUBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ + CC_OP_SUBW, + CC_OP_SUBL, + CC_OP_SUBQ, + + CC_OP_SBBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ + CC_OP_SBBW, + CC_OP_SBBL, + CC_OP_SBBQ, + + CC_OP_LOGICB, /* modify all flags, CC_DST = res */ + CC_OP_LOGICW, + CC_OP_LOGICL, + CC_OP_LOGICQ, + + CC_OP_INCB, /* modify all flags except, CC_DST = res, CC_SRC = C */ + CC_OP_INCW, + CC_OP_INCL, + CC_OP_INCQ, + + CC_OP_DECB, /* modify all flags except, CC_DST = res, CC_SRC = C */ + CC_OP_DECW, + CC_OP_DECL, + CC_OP_DECQ, + + CC_OP_SHLB, /* modify all flags, CC_DST = res, CC_SRC.msb = C */ + CC_OP_SHLW, + CC_OP_SHLL, + CC_OP_SHLQ, + + CC_OP_SARB, /* modify all flags, CC_DST = res, CC_SRC.lsb = C */ + CC_OP_SARW, + CC_OP_SARL, + CC_OP_SARQ, + + CC_OP_NB, +}; + +#ifdef FLOATX80 +#define USE_X86LDOUBLE +#endif + +#ifdef USE_X86LDOUBLE +typedef floatx80 CPU86_LDouble; +#else +typedef float64 CPU86_LDouble; +#endif + +typedef struct SegmentCache { + uint32_t selector; +#ifdef VBOX + /** The new selector is saved here when we are unable to sync it before invoking the recompiled code. */ + uint16_t newselector; + uint16_t fVBoxFlags; +#endif + target_ulong base; + uint32_t limit; + uint32_t flags; +} SegmentCache; + +typedef union { + uint8_t _b[16]; + uint16_t _w[8]; + uint32_t _l[4]; + uint64_t _q[2]; + float32 _s[4]; + float64 _d[2]; +} XMMReg; + +typedef union { + uint8_t _b[8]; + uint16_t _w[4]; + uint32_t _l[2]; + float32 _s[2]; + uint64_t q; +} MMXReg; + +#ifdef HOST_WORDS_BIGENDIAN +#define XMM_B(n) _b[15 - (n)] +#define XMM_W(n) _w[7 - (n)] +#define XMM_L(n) _l[3 - (n)] +#define XMM_S(n) _s[3 - (n)] +#define XMM_Q(n) _q[1 - (n)] +#define XMM_D(n) _d[1 - (n)] + +#define MMX_B(n) _b[7 - (n)] +#define MMX_W(n) _w[3 - (n)] +#define MMX_L(n) _l[1 - (n)] +#define MMX_S(n) _s[1 - (n)] +#else +#define XMM_B(n) _b[n] +#define XMM_W(n) _w[n] +#define XMM_L(n) _l[n] +#define XMM_S(n) _s[n] +#define XMM_Q(n) _q[n] +#define XMM_D(n) _d[n] + +#define MMX_B(n) _b[n] +#define MMX_W(n) _w[n] +#define MMX_L(n) _l[n] +#define MMX_S(n) _s[n] +#endif +#define MMX_Q(n) q + +typedef union { +#ifdef USE_X86LDOUBLE + CPU86_LDouble d __attribute__((aligned(16))); +#else + CPU86_LDouble d; +#endif + MMXReg mmx; +} FPReg; + +typedef struct { + uint64_t base; + uint64_t mask; +} MTRRVar; + +#define CPU_NB_REGS64 16 +#define CPU_NB_REGS32 8 + +#ifdef TARGET_X86_64 +#define CPU_NB_REGS CPU_NB_REGS64 +#else +#define CPU_NB_REGS CPU_NB_REGS32 +#endif + +#define NB_MMU_MODES 2 + +typedef struct CPUX86State { + /* standard registers */ + target_ulong regs[CPU_NB_REGS]; + target_ulong eip; + target_ulong eflags; /* eflags register. During CPU emulation, CC + flags and DF are set to zero because they are + stored elsewhere */ + + /* emulator internal eflags handling */ + target_ulong cc_src; + target_ulong cc_dst; + uint32_t cc_op; + int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */ + uint32_t hflags; /* TB flags, see HF_xxx constants. These flags + are known at translation time. */ + uint32_t hflags2; /* various other flags, see HF2_xxx constants. */ + + /* segments */ + SegmentCache segs[6]; /* selector values */ + SegmentCache ldt; + SegmentCache tr; + SegmentCache gdt; /* only base and limit are used */ + SegmentCache idt; /* only base and limit are used */ + + target_ulong cr[5]; /* NOTE: cr1 is unused */ + int32_t a20_mask; + + /* FPU state */ + unsigned int fpstt; /* top of stack index */ + uint16_t fpus; + uint16_t fpuc; + uint8_t fptags[8]; /* 0 = valid, 1 = empty */ + FPReg fpregs[8]; + + /* emulator internal variables */ + float_status fp_status; +#ifdef VBOX + uint32_t alignment3[3]; /* force the long double to start a 16 byte line. */ +#endif + CPU86_LDouble ft0; +#if defined(VBOX) && defined(RT_ARCH_X86) && !defined(RT_OS_DARWIN) + uint32_t alignment4; /* long double is 12 byte, pad it to 16. */ +#endif + + float_status mmx_status; /* for 3DNow! float ops */ + float_status sse_status; + uint32_t mxcsr; + XMMReg xmm_regs[CPU_NB_REGS]; + XMMReg xmm_t0; + MMXReg mmx_t0; + target_ulong cc_tmp; /* temporary for rcr/rcl */ + + /* sysenter registers */ + uint32_t sysenter_cs; +#ifdef VBOX + uint32_t alignment0; +#endif + target_ulong sysenter_esp; + target_ulong sysenter_eip; + uint64_t efer; + uint64_t star; + + uint64_t vm_hsave; + uint64_t vm_vmcb; + uint64_t tsc_offset; + uint64_t intercept; + uint16_t intercept_cr_read; + uint16_t intercept_cr_write; + uint16_t intercept_dr_read; + uint16_t intercept_dr_write; + uint32_t intercept_exceptions; + uint8_t v_tpr; + +#ifdef TARGET_X86_64 + target_ulong lstar; + target_ulong cstar; + target_ulong fmask; + target_ulong kernelgsbase; +#endif + uint64_t system_time_msr; + uint64_t wall_clock_msr; + + uint64_t tsc; + + uint64_t pat; + + /* exception/interrupt handling */ + int error_code; + int exception_is_int; +#ifdef VBOX +# define EXCEPTION_IS_INT_VALUE_HARDWARE_IRQ 0x42 /**< Special CPUX86State::exception_is_int value indicating hardware irq. (HACK ALERT) */ +#endif + target_ulong exception_next_eip; + target_ulong dr[8]; /* debug registers */ + union { + CPUBreakpoint *cpu_breakpoint[4]; + CPUWatchpoint *cpu_watchpoint[4]; + }; /* break/watchpoints for dr[0..3] */ + uint32_t smbase; + int old_exception; /* exception in flight */ + + CPU_COMMON + +#ifdef VBOX + /** cpu state flags. (see defines below) */ + uint32_t state; + /** The VM handle. */ + PVM pVM; + /** The VMCPU handle. */ + PVMCPU pVCpu; + /** code buffer for instruction emulation */ + void *pvCodeBuffer; + /** code buffer size */ + uint32_t cbCodeBuffer; +#endif /* VBOX */ + + /* processor features (e.g. for CPUID insn) */ +#ifndef VBOX /* remR3CpuId deals with these */ + uint32_t cpuid_level; + uint32_t cpuid_vendor1; + uint32_t cpuid_vendor2; + uint32_t cpuid_vendor3; + uint32_t cpuid_version; +#endif /* !VBOX */ + uint32_t cpuid_features; + uint32_t cpuid_ext_features; +#ifndef VBOX + uint32_t cpuid_xlevel; + uint32_t cpuid_model[12]; +#endif /* !VBOX */ + uint32_t cpuid_ext2_features; + uint32_t cpuid_ext3_features; + uint32_t cpuid_apic_id; +#ifndef VBOX + int cpuid_vendor_override; + + /* MTRRs */ + uint64_t mtrr_fixed[11]; + uint64_t mtrr_deftype; + MTRRVar mtrr_var[8]; + + /* For KVM */ + uint32_t mp_state; + int32_t exception_injected; + int32_t interrupt_injected; + uint8_t soft_interrupt; + uint8_t nmi_injected; + uint8_t nmi_pending; + uint8_t has_error_code; + uint32_t sipi_vector; + + uint32_t cpuid_kvm_features; + + /* in order to simplify APIC support, we leave this pointer to the + user */ + struct DeviceState *apic_state; + + uint64 mcg_cap; + uint64 mcg_status; + uint64 mcg_ctl; + uint64 mce_banks[MCE_BANKS_DEF*4]; + + uint64_t tsc_aux; + + /* vmstate */ + uint16_t fpus_vmstate; + uint16_t fptag_vmstate; + uint16_t fpregs_format_vmstate; + + uint64_t xstate_bv; + XMMReg ymmh_regs[CPU_NB_REGS]; + + uint64_t xcr0; +#else /* VBOX */ + + /** Alignment padding. */ +# if HC_ARCH_BITS == 64 \ + || ( HC_ARCH_BITS == 32 \ + && !defined(RT_OS_WINDOWS) \ + && ( (!defined(VBOX_ENABLE_VBOXREM64) && !defined(RT_OS_SOLARIS) && !defined(RT_OS_FREEBSD)) \ + || (defined(VBOX_ENABLE_VBOXREM64) && (defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD))) ) ) + uint32_t alignment2[1]; +# endif + + /** Profiling tb_flush. */ + STAMPROFILE StatTbFlush; + + /** Addends for HVA -> GPA translations. */ + target_phys_addr_t phys_addends[NB_MMU_MODES][CPU_TLB_SIZE]; +#endif /* VBOX */ +} CPUX86State; + +#ifdef VBOX + +/* Version 1.6 structure; just for loading the old saved state */ +typedef struct SegmentCache_Ver16 { + uint32_t selector; + uint32_t base; + uint32_t limit; + uint32_t flags; + /** The new selector is saved here when we are unable to sync it before invoking the recompiled code. */ + uint32_t newselector; +} SegmentCache_Ver16; + +# define CPU_NB_REGS_VER16 8 + +/* Version 1.6 structure; just for loading the old saved state */ +typedef struct CPUX86State_Ver16 { +# if TARGET_LONG_BITS > HOST_LONG_BITS + /* temporaries if we cannot store them in host registers */ + uint32_t t0, t1, t2; +# endif + + /* standard registers */ + uint32_t regs[CPU_NB_REGS_VER16]; + uint32_t eip; + uint32_t eflags; /* eflags register. During CPU emulation, CC + flags and DF are set to zero because they are + stored elsewhere */ + + /* emulator internal eflags handling */ + uint32_t cc_src; + uint32_t cc_dst; + uint32_t cc_op; + int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */ + uint32_t hflags; /* hidden flags, see HF_xxx constants */ + + /* segments */ + SegmentCache_Ver16 segs[6]; /* selector values */ + SegmentCache_Ver16 ldt; + SegmentCache_Ver16 tr; + SegmentCache_Ver16 gdt; /* only base and limit are used */ + SegmentCache_Ver16 idt; /* only base and limit are used */ + + uint32_t cr[5]; /* NOTE: cr1 is unused */ + uint32_t a20_mask; + + /* FPU state */ + unsigned int fpstt; /* top of stack index */ + unsigned int fpus; + unsigned int fpuc; + uint8_t fptags[8]; /* 0 = valid, 1 = empty */ + union { +# ifdef USE_X86LDOUBLE + CPU86_LDouble d __attribute__((aligned(16))); +# else + CPU86_LDouble d; +# endif + MMXReg mmx; + } fpregs[8]; + + /* emulator internal variables */ + float_status fp_status; +# ifdef VBOX + uint32_t alignment3[3]; /* force the long double to start a 16 byte line. */ +# endif + CPU86_LDouble ft0; +# if defined(VBOX) && defined(RT_ARCH_X86) && !defined(RT_OS_DARWIN) + uint32_t alignment4; /* long double is 12 byte, pad it to 16. */ +# endif + union { + float f; + double d; + int i32; + int64_t i64; + } fp_convert; + + float_status sse_status; + uint32_t mxcsr; + XMMReg xmm_regs[CPU_NB_REGS_VER16]; + XMMReg xmm_t0; + MMXReg mmx_t0; + + /* sysenter registers */ + uint32_t sysenter_cs; + uint32_t sysenter_esp; + uint32_t sysenter_eip; +# ifdef VBOX + uint32_t alignment0; +# endif + uint64_t efer; + uint64_t star; + + uint64_t pat; + + /* temporary data for USE_CODE_COPY mode */ +# ifdef USE_CODE_COPY + uint32_t tmp0; + uint32_t saved_esp; + int native_fp_regs; /* if true, the FPU state is in the native CPU regs */ +# endif + + /* exception/interrupt handling */ + jmp_buf jmp_env; +} CPUX86State_Ver16; + +/** CPUX86State state flags + * @{ */ +# define CPU_RAW_RING0 0x0002 /* Set after first time RawR0 is executed, never cleared. */ +# define CPU_EMULATE_SINGLE_INSTR 0x0040 /* Execute a single instruction in emulation mode */ +# define CPU_EMULATE_SINGLE_STEP 0x0080 /* go into single step mode */ +# define CPU_RAW_HM 0x0100 /* Set after first time HWACC is executed, never cleared. */ +/** @} */ +#endif /* !VBOX */ + +#ifdef VBOX +CPUX86State *cpu_x86_init(CPUX86State *env, const char *cpu_model); +#else /* !VBOX */ +CPUX86State *cpu_x86_init(const char *cpu_model); +#endif /* !VBOX */ +int cpu_x86_exec(CPUX86State *s); +void cpu_x86_close(CPUX86State *s); +void x86_cpu_list (FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...), + const char *optarg); +void x86_cpudef_setup(void); + +int cpu_get_pic_interrupt(CPUX86State *s); +/* MSDOS compatibility mode FPU exception support */ +void cpu_set_ferr(CPUX86State *s); + +/* this function must always be used to load data in the segment + cache: it synchronizes the hflags with the segment cache values */ +#ifndef VBOX +static inline void cpu_x86_load_seg_cache(CPUX86State *env, + int seg_reg, unsigned int selector, + target_ulong base, + unsigned int limit, + unsigned int flags) +#else +static inline void cpu_x86_load_seg_cache_with_clean_flags(CPUX86State *env, + int seg_reg, unsigned int selector, + target_ulong base, + unsigned int limit, + unsigned int flags) +#endif +{ + SegmentCache *sc; + unsigned int new_hflags; + + sc = &env->segs[seg_reg]; + sc->selector = selector; + sc->base = base; + sc->limit = limit; + sc->flags = flags; +#ifdef VBOX + sc->newselector = 0; + sc->fVBoxFlags = CPUMSELREG_FLAGS_VALID; +#endif + + /* update the hidden flags */ + { + if (seg_reg == R_CS) { +#ifdef TARGET_X86_64 + if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) { + /* long mode */ + env->hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK; + env->hflags &= ~(HF_ADDSEG_MASK); + } else +#endif + { + /* legacy / compatibility case */ + new_hflags = (env->segs[R_CS].flags & DESC_B_MASK) + >> (DESC_B_SHIFT - HF_CS32_SHIFT); + env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) | + new_hflags; + } + } + new_hflags = (env->segs[R_SS].flags & DESC_B_MASK) + >> (DESC_B_SHIFT - HF_SS32_SHIFT); + if (env->hflags & HF_CS64_MASK) { + /* zero base assumed for DS, ES and SS in long mode */ + } else if (!(env->cr[0] & CR0_PE_MASK) || + (env->eflags & VM_MASK) || + !(env->hflags & HF_CS32_MASK)) { + /* XXX: try to avoid this test. The problem comes from the + fact that is real mode or vm86 mode we only modify the + 'base' and 'selector' fields of the segment cache to go + faster. A solution may be to force addseg to one in + translate-i386.c. */ + new_hflags |= HF_ADDSEG_MASK; + } else { + new_hflags |= ((env->segs[R_DS].base | + env->segs[R_ES].base | + env->segs[R_SS].base) != 0) << + HF_ADDSEG_SHIFT; + } + env->hflags = (env->hflags & + ~(HF_SS32_MASK | HF_ADDSEG_MASK)) | new_hflags; + } +} + +#ifdef VBOX +/* Raw input, adjust the flags adding the stupid intel flag when applicable. */ +static inline void cpu_x86_load_seg_cache(CPUX86State *env, + int seg_reg, unsigned int selector, + target_ulong base, + unsigned int limit, + unsigned int flags) +{ + flags &= DESC_RAW_FLAG_BITS; + if (flags & DESC_P_MASK) + flags |= DESC_A_MASK; /* Make sure the A bit is set to avoid trouble. */ + else if (selector < 4U) + flags |= DESC_INTEL_UNUSABLE; + cpu_x86_load_seg_cache_with_clean_flags(env, seg_reg, selector, base, limit, flags); +} +#endif + +static inline void cpu_x86_load_seg_cache_sipi(CPUX86State *env, + int sipi_vector) +{ + env->eip = 0; + cpu_x86_load_seg_cache(env, R_CS, sipi_vector << 8, + sipi_vector << 12, + env->segs[R_CS].limit, + env->segs[R_CS].flags); + env->halted = 0; +} + +int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector, + target_ulong *base, unsigned int *limit, + unsigned int *flags); + +/* wrapper, just in case memory mappings must be changed */ +static inline void cpu_x86_set_cpl(CPUX86State *s, int cpl) +{ +#if HF_CPL_MASK == 3 + s->hflags = (s->hflags & ~HF_CPL_MASK) | cpl; +#else +#error HF_CPL_MASK is hardcoded +#endif +} + +/* op_helper.c */ +/* used for debug or cpu save/restore */ +void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f); +CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper); + +/* cpu-exec.c */ +/* the following helpers are only usable in user mode simulation as + they can trigger unexpected exceptions */ +void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector); +void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32); +void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32); + +/* you can call this signal handler from your SIGBUS and SIGSEGV + signal handlers to inform the virtual CPU of exceptions. non zero + is returned if the signal was handled by the virtual CPU. */ +int cpu_x86_signal_handler(int host_signum, void *pinfo, + void *puc); + +/* cpuid.c */ +void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx); +int cpu_x86_register (CPUX86State *env, const char *cpu_model); +void cpu_clear_apic_feature(CPUX86State *env); + +/* helper.c */ +int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr, + int is_write, int mmu_idx, int is_softmmu); +#define cpu_handle_mmu_fault cpu_x86_handle_mmu_fault +void cpu_x86_set_a20(CPUX86State *env, int a20_state); + +static inline int hw_breakpoint_enabled(unsigned long dr7, int index) +{ + return (dr7 >> (index * 2)) & 3; +} + +static inline int hw_breakpoint_type(unsigned long dr7, int index) +{ + return (dr7 >> (DR7_TYPE_SHIFT + (index * 4))) & 3; +} + +static inline int hw_breakpoint_len(unsigned long dr7, int index) +{ + int len = ((dr7 >> (DR7_LEN_SHIFT + (index * 4))) & 3); + return (len == 2) ? 8 : len + 1; +} + +void hw_breakpoint_insert(CPUX86State *env, int index); +void hw_breakpoint_remove(CPUX86State *env, int index); +int check_hw_breakpoints(CPUX86State *env, int force_dr6_update); + +/* will be suppressed */ +void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0); +void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3); +void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4); + +/* hw/pc.c */ +void cpu_smm_update(CPUX86State *env); +uint64_t cpu_get_tsc(CPUX86State *env); + +/* used to debug */ +#define X86_DUMP_FPU 0x0001 /* dump FPU state too */ +#define X86_DUMP_CCOP 0x0002 /* dump qemu flag cache */ + +#ifdef VBOX +int cpu_rdmsr(CPUX86State *env, uint32_t idMsr, uint64_t *puValue); +int cpu_wrmsr(CPUX86State *env, uint32_t idMsr, uint64_t uValue); +void cpu_trap_raw(CPUX86State *env1); + +/* in helper.c */ +uint8_t read_byte(CPUX86State *env1, target_ulong addr); +uint16_t read_word(CPUX86State *env1, target_ulong addr); +void write_byte(CPUX86State *env1, target_ulong addr, uint8_t val); +uint32_t read_dword(CPUX86State *env1, target_ulong addr); +void write_word(CPUX86State *env1, target_ulong addr, uint16_t val); +void write_dword(CPUX86State *env1, target_ulong addr, uint32_t val); +/* in helper.c */ +int emulate_single_instr(CPUX86State *env1); +int get_ss_esp_from_tss_raw(CPUX86State *env1, uint32_t *ss_ptr, uint32_t *esp_ptr, int dpl); + +void restore_raw_fp_state(CPUX86State *env, uint8_t *ptr); +void save_raw_fp_state(CPUX86State *env, uint8_t *ptr); +#endif /* VBOX */ + +#define TARGET_PAGE_BITS 12 + +#ifdef TARGET_X86_64 +#define TARGET_PHYS_ADDR_SPACE_BITS 52 +/* ??? This is really 48 bits, sign-extended, but the only thing + accessible to userland with bit 48 set is the VSYSCALL, and that + is handled via other mechanisms. */ +#define TARGET_VIRT_ADDR_SPACE_BITS 47 +#else +#define TARGET_PHYS_ADDR_SPACE_BITS 36 +#define TARGET_VIRT_ADDR_SPACE_BITS 32 +#endif + +#define cpu_init cpu_x86_init +#define cpu_exec cpu_x86_exec +#define cpu_gen_code cpu_x86_gen_code +#define cpu_signal_handler cpu_x86_signal_handler +#define cpu_list_id x86_cpu_list +#define cpudef_setup x86_cpudef_setup + +#define CPU_SAVE_VERSION 12 + +/* MMU modes definitions */ +#define MMU_MODE0_SUFFIX _kernel +#define MMU_MODE1_SUFFIX _user +#define MMU_USER_IDX 1 +static inline int cpu_mmu_index (CPUState *env) +{ + return (env->hflags & HF_CPL_MASK) == 3 ? 1 : 0; +} + +/* translate.c */ +void optimize_flags_init(void); + +typedef struct CCTable { + int (*compute_all)(void); /* return all the flags */ + int (*compute_c)(void); /* return the C flag */ +} CCTable; + +#if defined(CONFIG_USER_ONLY) +static inline void cpu_clone_regs(CPUState *env, target_ulong newsp) +{ + if (newsp) + env->regs[R_ESP] = newsp; + env->regs[R_EAX] = 0; +} +#endif + +#include "cpu-all.h" +#include "svm.h" + +#ifndef VBOX +#if !defined(CONFIG_USER_ONLY) +#include "hw/apic.h" +#endif +#else /* VBOX */ +extern void cpu_set_apic_tpr(CPUX86State *env, uint8_t val); +extern uint8_t cpu_get_apic_tpr(CPUX86State *env); +extern uint64_t cpu_get_apic_base(CPUX86State *env); +#endif /* VBOX */ + +static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, + target_ulong *cs_base, int *flags) +{ + *cs_base = env->segs[R_CS].base; + if (env->hflags & HF_CS64_MASK) + *pc = *cs_base + env->eip; + else + *pc = (uint32_t)(*cs_base + env->eip); + *flags = env->hflags | + (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK)); +} + +#ifndef VBOX +void apic_init_reset(CPUState *env); +void apic_sipi(CPUState *env); +void do_cpu_init(CPUState *env); +void do_cpu_sipi(CPUState *env); +#endif /* !VBOX */ +#endif /* CPU_I386_H */ diff --git a/src/recompiler/target-i386/exec.h b/src/recompiler/target-i386/exec.h new file mode 100644 index 00000000..355599fa --- /dev/null +++ b/src/recompiler/target-i386/exec.h @@ -0,0 +1,370 @@ +/* + * i386 execution defines + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#include "config.h" +#include "dyngen-exec.h" + +/* XXX: factorize this mess */ +#ifdef TARGET_X86_64 +#define TARGET_LONG_BITS 64 +#else +#define TARGET_LONG_BITS 32 +#endif + +#include "cpu-defs.h" + +register struct CPUX86State *env asm(AREG0); + +#include "qemu-common.h" +#include "qemu-log.h" + +#undef EAX +#define EAX (env->regs[R_EAX]) +#undef ECX +#define ECX (env->regs[R_ECX]) +#undef EDX +#define EDX (env->regs[R_EDX]) +#undef EBX +#define EBX (env->regs[R_EBX]) +#undef ESP +#define ESP (env->regs[R_ESP]) +#undef EBP +#define EBP (env->regs[R_EBP]) +#undef ESI +#define ESI (env->regs[R_ESI]) +#undef EDI +#define EDI (env->regs[R_EDI]) +#undef EIP +#define EIP (env->eip) +#define DF (env->df) + +#define CC_SRC (env->cc_src) +#define CC_DST (env->cc_dst) +#define CC_OP (env->cc_op) + +/* float macros */ +#define FT0 (env->ft0) +#define ST0 (env->fpregs[env->fpstt].d) +#define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) +#define ST1 ST(1) + +#include "cpu.h" +#include "exec-all.h" + +/* op_helper.c */ +void do_interrupt(int intno, int is_int, int error_code, + target_ulong next_eip, int is_hw); +void do_interrupt_user(int intno, int is_int, int error_code, + target_ulong next_eip); +void QEMU_NORETURN raise_exception_err(int exception_index, int error_code); +void QEMU_NORETURN raise_exception(int exception_index); +void QEMU_NORETURN raise_exception_env(int exception_index, CPUState *nenv); +void do_smm_enter(void); + +/* n must be a constant to be efficient */ +static inline target_long lshift(target_long x, int n) +{ + if (n >= 0) + return x << n; + else + return x >> (-n); +} + +#include "helper.h" + +static inline void svm_check_intercept(uint32_t type) +{ + helper_svm_check_intercept_param(type, 0); +} + +#if !defined(CONFIG_USER_ONLY) + +#include "softmmu_exec.h" + +#endif /* !defined(CONFIG_USER_ONLY) */ + +#ifdef USE_X86LDOUBLE +/* use long double functions */ +#define floatx_to_int32 floatx80_to_int32 +#define floatx_to_int64 floatx80_to_int64 +#define floatx_to_int32_round_to_zero floatx80_to_int32_round_to_zero +#define floatx_to_int64_round_to_zero floatx80_to_int64_round_to_zero +#define int32_to_floatx int32_to_floatx80 +#define int64_to_floatx int64_to_floatx80 +#define float32_to_floatx float32_to_floatx80 +#define float64_to_floatx float64_to_floatx80 +#define floatx_to_float32 floatx80_to_float32 +#define floatx_to_float64 floatx80_to_float64 +#define floatx_abs floatx80_abs +#define floatx_chs floatx80_chs +#define floatx_round_to_int floatx80_round_to_int +#define floatx_compare floatx80_compare +#define floatx_compare_quiet floatx80_compare_quiet +#else +#define floatx_to_int32 float64_to_int32 +#define floatx_to_int64 float64_to_int64 +#define floatx_to_int32_round_to_zero float64_to_int32_round_to_zero +#define floatx_to_int64_round_to_zero float64_to_int64_round_to_zero +#define int32_to_floatx int32_to_float64 +#define int64_to_floatx int64_to_float64 +#define float32_to_floatx float32_to_float64 +#define float64_to_floatx(x, e) (x) +#define floatx_to_float32 float64_to_float32 +#define floatx_to_float64(x, e) (x) +#define floatx_abs float64_abs +#define floatx_chs float64_chs +#define floatx_round_to_int float64_round_to_int +#define floatx_compare float64_compare +#define floatx_compare_quiet float64_compare_quiet +#endif + +#ifdef VBOX +# ifdef IPRT_NO_CRT +# undef sin +# undef cos +# undef sqrt +# undef pow +# undef log +# undef tan +# undef atan2 +# undef floor +# undef ceil +# undef ldexp +# define sin sinl +# define cos cosl +# define sqrt sqrtl +# define pow powl +# define log logl +# define tan tanl +# define atan2 atan2l +# define floor floorl +# define ceil ceill +# define ldexp ldexpl +# endif +#endif + +#define RC_MASK 0xc00 +#define RC_NEAR 0x000 +#define RC_DOWN 0x400 +#define RC_UP 0x800 +#define RC_CHOP 0xc00 + +#define MAXTAN 9223372036854775808.0 + +#ifdef USE_X86LDOUBLE + +/* only for x86 */ +typedef union { + long double d; + struct { + unsigned long long lower; + unsigned short upper; + } l; +} CPU86_LDoubleU; + +/* the following deal with x86 long double-precision numbers */ +#define MAXEXPD 0x7fff +#define EXPBIAS 16383 +#define EXPD(fp) (fp.l.upper & 0x7fff) +#define SIGND(fp) ((fp.l.upper) & 0x8000) +#define MANTD(fp) (fp.l.lower) +#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS + +#else + +/* NOTE: arm is horrible as double 32 bit words are stored in big endian ! */ +typedef union { + double d; +#if !defined(HOST_WORDS_BIGENDIAN) && !defined(__arm__) + struct { + uint32_t lower; + int32_t upper; + } l; +#else + struct { + int32_t upper; + uint32_t lower; + } l; +#endif +#ifndef __arm__ + int64_t ll; +#endif +} CPU86_LDoubleU; + +/* the following deal with IEEE double-precision numbers */ +#define MAXEXPD 0x7ff +#define EXPBIAS 1023 +#define EXPD(fp) (((fp.l.upper) >> 20) & 0x7FF) +#define SIGND(fp) ((fp.l.upper) & 0x80000000) +#ifdef __arm__ +#define MANTD(fp) (fp.l.lower | ((uint64_t)(fp.l.upper & ((1 << 20) - 1)) << 32)) +#else +#define MANTD(fp) (fp.ll & ((1LL << 52) - 1)) +#endif +#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7ff << 20)) | (EXPBIAS << 20) +#endif + +static inline void fpush(void) +{ + env->fpstt = (env->fpstt - 1) & 7; + env->fptags[env->fpstt] = 0; /* validate stack entry */ +} + +static inline void fpop(void) +{ + env->fptags[env->fpstt] = 1; /* invvalidate stack entry */ + env->fpstt = (env->fpstt + 1) & 7; +} + +#ifndef USE_X86LDOUBLE +static inline CPU86_LDouble helper_fldt(target_ulong ptr) +{ + CPU86_LDoubleU temp; + int upper, e; + uint64_t ll; + + /* mantissa */ + upper = lduw(ptr + 8); + /* XXX: handle overflow ? */ + e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */ + e |= (upper >> 4) & 0x800; /* sign */ + ll = (ldq(ptr) >> 11) & ((1LL << 52) - 1); +#ifdef __arm__ + temp.l.upper = (e << 20) | (ll >> 32); + temp.l.lower = ll; +#else + temp.ll = ll | ((uint64_t)e << 52); +#endif + return temp.d; +} + +static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr) +{ + CPU86_LDoubleU temp; + int e; + + temp.d = f; + /* mantissa */ + stq(ptr, (MANTD(temp) << 11) | (1LL << 63)); + /* exponent + sign */ + e = EXPD(temp) - EXPBIAS + 16383; + e |= SIGND(temp) >> 16; + stw(ptr + 8, e); +} +#else + +/* we use memory access macros */ + +static inline CPU86_LDouble helper_fldt(target_ulong ptr) +{ + CPU86_LDoubleU temp; + + temp.l.lower = ldq(ptr); + temp.l.upper = lduw(ptr + 8); + return temp.d; +} + +static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr) +{ + CPU86_LDoubleU temp; + + temp.d = f; + stq(ptr, temp.l.lower); + stw(ptr + 8, temp.l.upper); +} + +#endif /* USE_X86LDOUBLE */ + +#define FPUS_IE (1 << 0) +#define FPUS_DE (1 << 1) +#define FPUS_ZE (1 << 2) +#define FPUS_OE (1 << 3) +#define FPUS_UE (1 << 4) +#define FPUS_PE (1 << 5) +#define FPUS_SF (1 << 6) +#define FPUS_SE (1 << 7) +#define FPUS_B (1 << 15) + +#define FPUC_EM 0x3f + +static inline uint32_t compute_eflags(void) +{ + return env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK); +} + +/* NOTE: CC_OP must be modified manually to CC_OP_EFLAGS */ +static inline void load_eflags(int eflags, int update_mask) +{ + CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); + DF = 1 - (2 * ((eflags >> 10) & 1)); + env->eflags = (env->eflags & ~update_mask) | + (eflags & update_mask) | 0x2; +} + +static inline int cpu_has_work(CPUState *env) +{ + int work; + + work = (env->interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK); + work |= env->interrupt_request & CPU_INTERRUPT_NMI; + work |= env->interrupt_request & CPU_INTERRUPT_INIT; + work |= env->interrupt_request & CPU_INTERRUPT_SIPI; + + return work; +} + +static inline int cpu_halted(CPUState *env) { + /* handle exit of HALTED state */ + if (!env->halted) + return 0; + /* disable halt condition */ + if (cpu_has_work(env)) { + env->halted = 0; + return 0; + } + return EXCP_HALTED; +} + +/* load efer and update the corresponding hflags. XXX: do consistency + checks with cpuid bits ? */ +static inline void cpu_load_efer(CPUState *env, uint64_t val) +{ + env->efer = val; + env->hflags &= ~(HF_LMA_MASK | HF_SVME_MASK); + if (env->efer & MSR_EFER_LMA) + env->hflags |= HF_LMA_MASK; + if (env->efer & MSR_EFER_SVME) + env->hflags |= HF_SVME_MASK; +} + +static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb) +{ + env->eip = tb->pc - tb->cs_base; +} + diff --git a/src/recompiler/target-i386/helper.c b/src/recompiler/target-i386/helper.c new file mode 100644 index 00000000..a6f37ff6 --- /dev/null +++ b/src/recompiler/target-i386/helper.c @@ -0,0 +1,1227 @@ +/* + * i386 helpers (without register variable usage) + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#ifndef VBOX +#include <inttypes.h> +#include <signal.h> +#endif /* !VBOX */ + +#include "cpu.h" +#include "exec-all.h" +#include "qemu-common.h" +#include "kvm.h" + +//#define DEBUG_MMU + +/* NOTE: must be called outside the CPU execute loop */ +void cpu_reset(CPUX86State *env) +{ + int i; + + if (qemu_loglevel_mask(CPU_LOG_RESET)) { + qemu_log("CPU Reset (CPU %d)\n", env->cpu_index); + log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP); + } + + memset(env, 0, offsetof(CPUX86State, breakpoints)); + + tlb_flush(env, 1); + + env->old_exception = -1; + + /* init to reset state */ + +#ifdef CONFIG_SOFTMMU + env->hflags |= HF_SOFTMMU_MASK; +#endif + env->hflags2 |= HF2_GIF_MASK; + + cpu_x86_update_cr0(env, 0x60000010); + env->a20_mask = ~0x0; + env->smbase = 0x30000; + + env->idt.limit = 0xffff; + env->gdt.limit = 0xffff; + env->ldt.limit = 0xffff; + env->ldt.flags = DESC_P_MASK | (2 << DESC_TYPE_SHIFT); + env->tr.limit = 0xffff; + env->tr.flags = DESC_P_MASK | (11 << DESC_TYPE_SHIFT); + + cpu_x86_load_seg_cache(env, R_CS, 0xf000, 0xffff0000, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | + DESC_R_MASK | DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + + env->eip = 0xfff0; +#ifndef VBOX /* We'll get the right value from CPUM. */ + env->regs[R_EDX] = env->cpuid_version; +#endif + + env->eflags = 0x2; + + /* FPU init */ + for(i = 0;i < 8; i++) + env->fptags[i] = 1; + env->fpuc = 0x37f; + + env->mxcsr = 0x1f80; + + memset(env->dr, 0, sizeof(env->dr)); + env->dr[6] = DR6_FIXED_1; + env->dr[7] = DR7_FIXED_1; + cpu_breakpoint_remove_all(env, BP_CPU); + cpu_watchpoint_remove_all(env, BP_CPU); + +#ifndef VBOX + env->mcg_status = 0; +#endif +} + +void cpu_x86_close(CPUX86State *env) +{ +#ifndef VBOX + qemu_free(env); +#endif +} + +/***********************************************************/ +/* x86 debug */ + +static const char *cc_op_str[] = { + "DYNAMIC", + "EFLAGS", + + "MULB", + "MULW", + "MULL", + "MULQ", + + "ADDB", + "ADDW", + "ADDL", + "ADDQ", + + "ADCB", + "ADCW", + "ADCL", + "ADCQ", + + "SUBB", + "SUBW", + "SUBL", + "SUBQ", + + "SBBB", + "SBBW", + "SBBL", + "SBBQ", + + "LOGICB", + "LOGICW", + "LOGICL", + "LOGICQ", + + "INCB", + "INCW", + "INCL", + "INCQ", + + "DECB", + "DECW", + "DECL", + "DECQ", + + "SHLB", + "SHLW", + "SHLL", + "SHLQ", + + "SARB", + "SARW", + "SARL", + "SARQ", +}; + +static void +cpu_x86_dump_seg_cache(CPUState *env, FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...), + const char *name, struct SegmentCache *sc) +{ +#ifdef VBOX +# define cpu_fprintf(f, ...) RTLogPrintf(__VA_ARGS__) +#endif +#ifdef TARGET_X86_64 + if (env->hflags & HF_CS64_MASK) { + cpu_fprintf(f, "%-3s=%04x %016" PRIx64 " %08x %08x", name, + sc->selector, sc->base, sc->limit, sc->flags); + } else +#endif + { + cpu_fprintf(f, "%-3s=%04x %08x %08x %08x", name, sc->selector, + (uint32_t)sc->base, sc->limit, sc->flags); + } + + if (!(env->hflags & HF_PE_MASK) || !(sc->flags & DESC_P_MASK)) + goto done; + + cpu_fprintf(f, " DPL=%d ", (sc->flags & DESC_DPL_MASK) >> DESC_DPL_SHIFT); + if (sc->flags & DESC_S_MASK) { + if (sc->flags & DESC_CS_MASK) { + cpu_fprintf(f, (sc->flags & DESC_L_MASK) ? "CS64" : + ((sc->flags & DESC_B_MASK) ? "CS32" : "CS16")); + cpu_fprintf(f, " [%c%c", (sc->flags & DESC_C_MASK) ? 'C' : '-', + (sc->flags & DESC_R_MASK) ? 'R' : '-'); + } else { + cpu_fprintf(f, (sc->flags & DESC_B_MASK) ? "DS " : "DS16"); + cpu_fprintf(f, " [%c%c", (sc->flags & DESC_E_MASK) ? 'E' : '-', + (sc->flags & DESC_W_MASK) ? 'W' : '-'); + } + cpu_fprintf(f, "%c]", (sc->flags & DESC_A_MASK) ? 'A' : '-'); + } else { + static const char *sys_type_name[2][16] = { + { /* 32 bit mode */ + "Reserved", "TSS16-avl", "LDT", "TSS16-busy", + "CallGate16", "TaskGate", "IntGate16", "TrapGate16", + "Reserved", "TSS32-avl", "Reserved", "TSS32-busy", + "CallGate32", "Reserved", "IntGate32", "TrapGate32" + }, + { /* 64 bit mode */ + "<hiword>", "Reserved", "LDT", "Reserved", "Reserved", + "Reserved", "Reserved", "Reserved", "Reserved", + "TSS64-avl", "Reserved", "TSS64-busy", "CallGate64", + "Reserved", "IntGate64", "TrapGate64" + } + }; + cpu_fprintf(f, "%s", + sys_type_name[(env->hflags & HF_LMA_MASK) ? 1 : 0] + [(sc->flags & DESC_TYPE_MASK) + >> DESC_TYPE_SHIFT]); + } +done: + cpu_fprintf(f, "\n"); +#ifdef VBOX +# undef cpu_fprintf +#endif +} + +void cpu_dump_state(CPUState *env, FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...), + int flags) +{ + int eflags, i, nb; + char cc_op_name[32]; + static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" }; + +#ifdef VBOX +# define cpu_fprintf(f, ...) RTLogPrintf(__VA_ARGS__) +#endif + cpu_synchronize_state(env); + + eflags = env->eflags; +#ifdef TARGET_X86_64 + if (env->hflags & HF_CS64_MASK) { + cpu_fprintf(f, + "RAX=%016" PRIx64 " RBX=%016" PRIx64 " RCX=%016" PRIx64 " RDX=%016" PRIx64 "\n" + "RSI=%016" PRIx64 " RDI=%016" PRIx64 " RBP=%016" PRIx64 " RSP=%016" PRIx64 "\n" + "R8 =%016" PRIx64 " R9 =%016" PRIx64 " R10=%016" PRIx64 " R11=%016" PRIx64 "\n" + "R12=%016" PRIx64 " R13=%016" PRIx64 " R14=%016" PRIx64 " R15=%016" PRIx64 "\n" + "RIP=%016" PRIx64 " RFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n", + env->regs[R_EAX], + env->regs[R_EBX], + env->regs[R_ECX], + env->regs[R_EDX], + env->regs[R_ESI], + env->regs[R_EDI], + env->regs[R_EBP], + env->regs[R_ESP], + env->regs[8], + env->regs[9], + env->regs[10], + env->regs[11], + env->regs[12], + env->regs[13], + env->regs[14], + env->regs[15], + env->eip, eflags, + eflags & DF_MASK ? 'D' : '-', + eflags & CC_O ? 'O' : '-', + eflags & CC_S ? 'S' : '-', + eflags & CC_Z ? 'Z' : '-', + eflags & CC_A ? 'A' : '-', + eflags & CC_P ? 'P' : '-', + eflags & CC_C ? 'C' : '-', + env->hflags & HF_CPL_MASK, + (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1, + (env->a20_mask >> 20) & 1, + (env->hflags >> HF_SMM_SHIFT) & 1, + env->halted); + } else +#endif + { + cpu_fprintf(f, "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n" + "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n" + "EIP=%08x EFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n", + (uint32_t)env->regs[R_EAX], + (uint32_t)env->regs[R_EBX], + (uint32_t)env->regs[R_ECX], + (uint32_t)env->regs[R_EDX], + (uint32_t)env->regs[R_ESI], + (uint32_t)env->regs[R_EDI], + (uint32_t)env->regs[R_EBP], + (uint32_t)env->regs[R_ESP], + (uint32_t)env->eip, eflags, + eflags & DF_MASK ? 'D' : '-', + eflags & CC_O ? 'O' : '-', + eflags & CC_S ? 'S' : '-', + eflags & CC_Z ? 'Z' : '-', + eflags & CC_A ? 'A' : '-', + eflags & CC_P ? 'P' : '-', + eflags & CC_C ? 'C' : '-', + env->hflags & HF_CPL_MASK, + (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1, + (env->a20_mask >> 20) & 1, + (env->hflags >> HF_SMM_SHIFT) & 1, + env->halted); + } + + for(i = 0; i < 6; i++) { + cpu_x86_dump_seg_cache(env, f, cpu_fprintf, seg_name[i], + &env->segs[i]); + } + cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "LDT", &env->ldt); + cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "TR", &env->tr); + +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + cpu_fprintf(f, "GDT= %016" PRIx64 " %08x\n", + env->gdt.base, env->gdt.limit); + cpu_fprintf(f, "IDT= %016" PRIx64 " %08x\n", + env->idt.base, env->idt.limit); + cpu_fprintf(f, "CR0=%08x CR2=%016" PRIx64 " CR3=%016" PRIx64 " CR4=%08x\n", + (uint32_t)env->cr[0], + env->cr[2], + env->cr[3], + (uint32_t)env->cr[4]); + for(i = 0; i < 4; i++) + cpu_fprintf(f, "DR%d=%016" PRIx64 " ", i, env->dr[i]); + cpu_fprintf(f, "\nDR6=%016" PRIx64 " DR7=%016" PRIx64 "\n", + env->dr[6], env->dr[7]); + } else +#endif + { + cpu_fprintf(f, "GDT= %08x %08x\n", + (uint32_t)env->gdt.base, env->gdt.limit); + cpu_fprintf(f, "IDT= %08x %08x\n", + (uint32_t)env->idt.base, env->idt.limit); + cpu_fprintf(f, "CR0=%08x CR2=%08x CR3=%08x CR4=%08x\n", + (uint32_t)env->cr[0], + (uint32_t)env->cr[2], + (uint32_t)env->cr[3], + (uint32_t)env->cr[4]); + for(i = 0; i < 4; i++) + cpu_fprintf(f, "DR%d=%08x ", i, env->dr[i]); + cpu_fprintf(f, "\nDR6=%08x DR7=%08x\n", env->dr[6], env->dr[7]); + } + if (flags & X86_DUMP_CCOP) { + if ((unsigned)env->cc_op < CC_OP_NB) + snprintf(cc_op_name, sizeof(cc_op_name), "%s", cc_op_str[env->cc_op]); + else + snprintf(cc_op_name, sizeof(cc_op_name), "[%d]", env->cc_op); +#ifdef TARGET_X86_64 + if (env->hflags & HF_CS64_MASK) { + cpu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%-8s\n", + env->cc_src, env->cc_dst, + cc_op_name); + } else +#endif + { + cpu_fprintf(f, "CCS=%08x CCD=%08x CCO=%-8s\n", + (uint32_t)env->cc_src, (uint32_t)env->cc_dst, + cc_op_name); + } + } + cpu_fprintf(f, "EFER=%016" PRIx64 "\n", env->efer); + if (flags & X86_DUMP_FPU) { + int fptag; + fptag = 0; + for(i = 0; i < 8; i++) { + fptag |= ((!env->fptags[i]) << i); + } + cpu_fprintf(f, "FCW=%04x FSW=%04x [ST=%d] FTW=%02x MXCSR=%08x\n", + env->fpuc, + (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11, + env->fpstt, + fptag, + env->mxcsr); + for(i=0;i<8;i++) { +#if defined(USE_X86LDOUBLE) + union { + long double d; + struct { + uint64_t lower; + uint16_t upper; + } l; + } tmp; + tmp.d = env->fpregs[i].d; + cpu_fprintf(f, "FPR%d=%016" PRIx64 " %04x", + i, tmp.l.lower, tmp.l.upper); +#else + cpu_fprintf(f, "FPR%d=%016" PRIx64, + i, env->fpregs[i].mmx.q); +#endif + if ((i & 1) == 1) + cpu_fprintf(f, "\n"); + else + cpu_fprintf(f, " "); + } + if (env->hflags & HF_CS64_MASK) + nb = 16; + else + nb = 8; + for(i=0;i<nb;i++) { + cpu_fprintf(f, "XMM%02d=%08x%08x%08x%08x", + i, + env->xmm_regs[i].XMM_L(3), + env->xmm_regs[i].XMM_L(2), + env->xmm_regs[i].XMM_L(1), + env->xmm_regs[i].XMM_L(0)); + if ((i & 1) == 1) + cpu_fprintf(f, "\n"); + else + cpu_fprintf(f, " "); + } + } +#ifdef VBOX +# undef cpu_fprintf +#endif +} + +/***********************************************************/ +/* x86 mmu */ +/* XXX: add PGE support */ + +void cpu_x86_set_a20(CPUX86State *env, int a20_state) +{ + a20_state = (a20_state != 0); + if (a20_state != ((env->a20_mask >> 20) & 1)) { +#if defined(DEBUG_MMU) + printf("A20 update: a20=%d\n", a20_state); +#endif + /* if the cpu is currently executing code, we must unlink it and + all the potentially executing TB */ + cpu_interrupt(env, CPU_INTERRUPT_EXITTB); + + /* when a20 is changed, all the MMU mappings are invalid, so + we must flush everything */ + tlb_flush(env, 1); + env->a20_mask = ~(1 << 20) | (a20_state << 20); + } +} + +void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0) +{ + int pe_state; + +#if defined(DEBUG_MMU) + printf("CR0 update: CR0=0x%08x\n", new_cr0); +#endif + if ((new_cr0 & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK)) != + (env->cr[0] & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK))) { + tlb_flush(env, 1); + } + +#ifdef TARGET_X86_64 + if (!(env->cr[0] & CR0_PG_MASK) && (new_cr0 & CR0_PG_MASK) && + (env->efer & MSR_EFER_LME)) { + /* enter in long mode */ + /* XXX: generate an exception */ + if (!(env->cr[4] & CR4_PAE_MASK)) + return; + env->efer |= MSR_EFER_LMA; + env->hflags |= HF_LMA_MASK; + } else if ((env->cr[0] & CR0_PG_MASK) && !(new_cr0 & CR0_PG_MASK) && + (env->efer & MSR_EFER_LMA)) { + /* exit long mode */ + env->efer &= ~MSR_EFER_LMA; + env->hflags &= ~(HF_LMA_MASK | HF_CS64_MASK); + env->eip &= 0xffffffff; + } +#endif + env->cr[0] = new_cr0 | CR0_ET_MASK; + + /* update PE flag in hidden flags */ + pe_state = (env->cr[0] & CR0_PE_MASK); + env->hflags = (env->hflags & ~HF_PE_MASK) | (pe_state << HF_PE_SHIFT); + /* ensure that ADDSEG is always set in real mode */ + env->hflags |= ((pe_state ^ 1) << HF_ADDSEG_SHIFT); + /* update FPU flags */ + env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) | + ((new_cr0 << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)); +#ifdef VBOX + remR3ChangeCpuMode(env); +#endif +} + +/* XXX: in legacy PAE mode, generate a GPF if reserved bits are set in + the PDPT */ +void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3) +{ + env->cr[3] = new_cr3; + if (env->cr[0] & CR0_PG_MASK) { +#if defined(DEBUG_MMU) + printf("CR3 update: CR3=" TARGET_FMT_lx "\n", new_cr3); +#endif + tlb_flush(env, 0); + } +} + +void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) +{ +#if defined(DEBUG_MMU) + printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]); +#endif + if ((new_cr4 & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK)) != + (env->cr[4] & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK))) { + tlb_flush(env, 1); + } + /* SSE handling */ + if (!(env->cpuid_features & CPUID_SSE)) + new_cr4 &= ~CR4_OSFXSR_MASK; + if (new_cr4 & CR4_OSFXSR_MASK) + env->hflags |= HF_OSFXSR_MASK; + else + env->hflags &= ~HF_OSFXSR_MASK; + + env->cr[4] = new_cr4; +#ifdef VBOX + remR3ChangeCpuMode(env); +#endif +} + +#if defined(CONFIG_USER_ONLY) + +int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr, + int is_write, int mmu_idx, int is_softmmu) +{ + /* user mode only emulation */ + is_write &= 1; + env->cr[2] = addr; + env->error_code = (is_write << PG_ERROR_W_BIT); + env->error_code |= PG_ERROR_U_MASK; + env->exception_index = EXCP0E_PAGE; + return 1; +} + +#else + +/* XXX: This value should match the one returned by CPUID + * and in exec.c */ +# if defined(TARGET_X86_64) +# define PHYS_ADDR_MASK 0xfffffff000LL +# else +# define PHYS_ADDR_MASK 0xffffff000LL +# endif + +/* return value: + -1 = cannot handle fault + 0 = nothing more to do + 1 = generate PF fault +*/ +int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr, + int is_write1, int mmu_idx, int is_softmmu) +{ + uint64_t ptep, pte; + target_ulong pde_addr, pte_addr; + int error_code, is_dirty, prot, page_size, is_write, is_user; + target_phys_addr_t paddr; + uint32_t page_offset; + target_ulong vaddr, virt_addr; + + is_user = mmu_idx == MMU_USER_IDX; +#if defined(DEBUG_MMU) + printf("MMU fault: addr=" TARGET_FMT_lx " w=%d u=%d eip=" TARGET_FMT_lx "\n", + addr, is_write1, is_user, env->eip); +#endif + is_write = is_write1 & 1; + + if (!(env->cr[0] & CR0_PG_MASK)) { + pte = addr; + virt_addr = addr & TARGET_PAGE_MASK; + prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + page_size = 4096; + goto do_mapping; + } + + if (env->cr[4] & CR4_PAE_MASK) { + uint64_t pde, pdpe; + target_ulong pdpe_addr; + +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + uint64_t pml4e_addr, pml4e; + int32_t sext; + + /* test virtual address sign extension */ + sext = (int64_t)addr >> 47; + if (sext != 0 && sext != -1) { + env->error_code = 0; + env->exception_index = EXCP0D_GPF; + return 1; + } + + pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) & + env->a20_mask; + pml4e = ldq_phys(pml4e_addr); + if (!(pml4e & PG_PRESENT_MASK)) { + error_code = 0; + goto do_fault; + } + if (!(env->efer & MSR_EFER_NXE) && (pml4e & PG_NX_MASK)) { + error_code = PG_ERROR_RSVD_MASK; + goto do_fault; + } + if (!(pml4e & PG_ACCESSED_MASK)) { + pml4e |= PG_ACCESSED_MASK; + stl_phys_notdirty(pml4e_addr, pml4e); + } + ptep = pml4e ^ PG_NX_MASK; + pdpe_addr = ((pml4e & PHYS_ADDR_MASK) + (((addr >> 30) & 0x1ff) << 3)) & + env->a20_mask; + pdpe = ldq_phys(pdpe_addr); + if (!(pdpe & PG_PRESENT_MASK)) { + error_code = 0; + goto do_fault; + } + if (!(env->efer & MSR_EFER_NXE) && (pdpe & PG_NX_MASK)) { + error_code = PG_ERROR_RSVD_MASK; + goto do_fault; + } + ptep &= pdpe ^ PG_NX_MASK; + if (!(pdpe & PG_ACCESSED_MASK)) { + pdpe |= PG_ACCESSED_MASK; + stl_phys_notdirty(pdpe_addr, pdpe); + } + } else +#endif + { + /* XXX: load them when cr3 is loaded ? */ + pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) & + env->a20_mask; + pdpe = ldq_phys(pdpe_addr); + if (!(pdpe & PG_PRESENT_MASK)) { + error_code = 0; + goto do_fault; + } + ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK; + } + + pde_addr = ((pdpe & PHYS_ADDR_MASK) + (((addr >> 21) & 0x1ff) << 3)) & + env->a20_mask; + pde = ldq_phys(pde_addr); + if (!(pde & PG_PRESENT_MASK)) { + error_code = 0; + goto do_fault; + } + if (!(env->efer & MSR_EFER_NXE) && (pde & PG_NX_MASK)) { + error_code = PG_ERROR_RSVD_MASK; + goto do_fault; + } + ptep &= pde ^ PG_NX_MASK; + if (pde & PG_PSE_MASK) { + /* 2 MB page */ + page_size = 2048 * 1024; + ptep ^= PG_NX_MASK; + if ((ptep & PG_NX_MASK) && is_write1 == 2) + goto do_fault_protect; + if (is_user) { + if (!(ptep & PG_USER_MASK)) + goto do_fault_protect; + if (is_write && !(ptep & PG_RW_MASK)) + goto do_fault_protect; + } else { + if ((env->cr[0] & CR0_WP_MASK) && + is_write && !(ptep & PG_RW_MASK)) + goto do_fault_protect; + } + is_dirty = is_write && !(pde & PG_DIRTY_MASK); + if (!(pde & PG_ACCESSED_MASK) || is_dirty) { + pde |= PG_ACCESSED_MASK; + if (is_dirty) + pde |= PG_DIRTY_MASK; + stl_phys_notdirty(pde_addr, pde); + } + /* align to page_size */ + pte = pde & ((PHYS_ADDR_MASK & ~(page_size - 1)) | 0xfff); + virt_addr = addr & ~(page_size - 1); + } else { + /* 4 KB page */ + if (!(pde & PG_ACCESSED_MASK)) { + pde |= PG_ACCESSED_MASK; + stl_phys_notdirty(pde_addr, pde); + } + pte_addr = ((pde & PHYS_ADDR_MASK) + (((addr >> 12) & 0x1ff) << 3)) & + env->a20_mask; + pte = ldq_phys(pte_addr); + if (!(pte & PG_PRESENT_MASK)) { + error_code = 0; + goto do_fault; + } + if (!(env->efer & MSR_EFER_NXE) && (pte & PG_NX_MASK)) { + error_code = PG_ERROR_RSVD_MASK; + goto do_fault; + } + /* combine pde and pte nx, user and rw protections */ + ptep &= pte ^ PG_NX_MASK; + ptep ^= PG_NX_MASK; + if ((ptep & PG_NX_MASK) && is_write1 == 2) + goto do_fault_protect; + if (is_user) { + if (!(ptep & PG_USER_MASK)) + goto do_fault_protect; + if (is_write && !(ptep & PG_RW_MASK)) + goto do_fault_protect; + } else { + if ((env->cr[0] & CR0_WP_MASK) && + is_write && !(ptep & PG_RW_MASK)) + goto do_fault_protect; + } + is_dirty = is_write && !(pte & PG_DIRTY_MASK); + if (!(pte & PG_ACCESSED_MASK) || is_dirty) { + pte |= PG_ACCESSED_MASK; + if (is_dirty) + pte |= PG_DIRTY_MASK; + stl_phys_notdirty(pte_addr, pte); + } + page_size = 4096; + virt_addr = addr & ~0xfff; + pte = pte & (PHYS_ADDR_MASK | 0xfff); + } + } else { + uint32_t pde; + + /* page directory entry */ + pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & + env->a20_mask; + pde = ldl_phys(pde_addr); + if (!(pde & PG_PRESENT_MASK)) { + error_code = 0; + goto do_fault; + } + /* if PSE bit is set, then we use a 4MB page */ + if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) { + page_size = 4096 * 1024; + if (is_user) { + if (!(pde & PG_USER_MASK)) + goto do_fault_protect; + if (is_write && !(pde & PG_RW_MASK)) + goto do_fault_protect; + } else { + if ((env->cr[0] & CR0_WP_MASK) && + is_write && !(pde & PG_RW_MASK)) + goto do_fault_protect; + } + is_dirty = is_write && !(pde & PG_DIRTY_MASK); + if (!(pde & PG_ACCESSED_MASK) || is_dirty) { + pde |= PG_ACCESSED_MASK; + if (is_dirty) + pde |= PG_DIRTY_MASK; + stl_phys_notdirty(pde_addr, pde); + } + + pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */ + ptep = pte; + virt_addr = addr & ~(page_size - 1); + } else { + if (!(pde & PG_ACCESSED_MASK)) { + pde |= PG_ACCESSED_MASK; + stl_phys_notdirty(pde_addr, pde); + } + + /* page directory entry */ + pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & + env->a20_mask; + pte = ldl_phys(pte_addr); + if (!(pte & PG_PRESENT_MASK)) { + error_code = 0; + goto do_fault; + } + /* combine pde and pte user and rw protections */ + ptep = pte & pde; + if (is_user) { + if (!(ptep & PG_USER_MASK)) + goto do_fault_protect; + if (is_write && !(ptep & PG_RW_MASK)) + goto do_fault_protect; + } else { + if ((env->cr[0] & CR0_WP_MASK) && + is_write && !(ptep & PG_RW_MASK)) + goto do_fault_protect; + } + is_dirty = is_write && !(pte & PG_DIRTY_MASK); + if (!(pte & PG_ACCESSED_MASK) || is_dirty) { + pte |= PG_ACCESSED_MASK; + if (is_dirty) + pte |= PG_DIRTY_MASK; + stl_phys_notdirty(pte_addr, pte); + } + page_size = 4096; + virt_addr = addr & ~0xfff; + } + } + /* the page can be put in the TLB */ + prot = PAGE_READ; + if (!(ptep & PG_NX_MASK)) + prot |= PAGE_EXEC; + if (pte & PG_DIRTY_MASK) { + /* only set write access if already dirty... otherwise wait + for dirty access */ + if (is_user) { + if (ptep & PG_RW_MASK) + prot |= PAGE_WRITE; + } else { + if (!(env->cr[0] & CR0_WP_MASK) || + (ptep & PG_RW_MASK)) + prot |= PAGE_WRITE; + } + } + do_mapping: +#ifndef VBOX + pte = pte & env->a20_mask; +#endif + + /* Even if 4MB pages, we map only one 4KB page in the cache to + avoid filling it too fast */ + page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1); + paddr = (pte & TARGET_PAGE_MASK) + page_offset; +#ifdef VBOX + paddr &= env->a20_mask; +#endif + vaddr = virt_addr + page_offset; + + tlb_set_page(env, vaddr, paddr, prot, mmu_idx, page_size); + return 0; + do_fault_protect: + error_code = PG_ERROR_P_MASK; + do_fault: + error_code |= (is_write << PG_ERROR_W_BIT); + if (is_user) + error_code |= PG_ERROR_U_MASK; + if (is_write1 == 2 && + (env->efer & MSR_EFER_NXE) && + (env->cr[4] & CR4_PAE_MASK)) + error_code |= PG_ERROR_I_D_MASK; + if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) { + /* cr2 is not modified in case of exceptions */ + stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), + addr); + } else { + env->cr[2] = addr; + } + env->error_code = error_code; + env->exception_index = EXCP0E_PAGE; + return 1; +} + +target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr) +{ + target_ulong pde_addr, pte_addr; + uint64_t pte; + target_phys_addr_t paddr; + uint32_t page_offset; + int page_size; + + if (env->cr[4] & CR4_PAE_MASK) { + target_ulong pdpe_addr; + uint64_t pde, pdpe; + +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + uint64_t pml4e_addr, pml4e; + int32_t sext; + + /* test virtual address sign extension */ + sext = (int64_t)addr >> 47; + if (sext != 0 && sext != -1) + return -1; + + pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) & + env->a20_mask; + pml4e = ldq_phys(pml4e_addr); + if (!(pml4e & PG_PRESENT_MASK)) + return -1; + + pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) & + env->a20_mask; + pdpe = ldq_phys(pdpe_addr); + if (!(pdpe & PG_PRESENT_MASK)) + return -1; + } else +#endif + { + pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) & + env->a20_mask; + pdpe = ldq_phys(pdpe_addr); + if (!(pdpe & PG_PRESENT_MASK)) + return -1; + } + + pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) & + env->a20_mask; + pde = ldq_phys(pde_addr); + if (!(pde & PG_PRESENT_MASK)) { + return -1; + } + if (pde & PG_PSE_MASK) { + /* 2 MB page */ + page_size = 2048 * 1024; + pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */ + } else { + /* 4 KB page */ + pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) & + env->a20_mask; + page_size = 4096; + pte = ldq_phys(pte_addr); + } + if (!(pte & PG_PRESENT_MASK)) + return -1; + } else { + uint32_t pde; + + if (!(env->cr[0] & CR0_PG_MASK)) { + pte = addr; + page_size = 4096; + } else { + /* page directory entry */ + pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask; + pde = ldl_phys(pde_addr); + if (!(pde & PG_PRESENT_MASK)) + return -1; + if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) { + pte = pde & ~0x003ff000; /* align to 4MB */ + page_size = 4096 * 1024; + } else { + /* page directory entry */ + pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask; + pte = ldl_phys(pte_addr); + if (!(pte & PG_PRESENT_MASK)) + return -1; + page_size = 4096; + } + } + pte = pte & env->a20_mask; + } + + page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1); + paddr = (pte & TARGET_PAGE_MASK) + page_offset; + return paddr; +} + +void hw_breakpoint_insert(CPUState *env, int index) +{ + int type, err = 0; + + switch (hw_breakpoint_type(env->dr[7], index)) { + case 0: + if (hw_breakpoint_enabled(env->dr[7], index)) + err = cpu_breakpoint_insert(env, env->dr[index], BP_CPU, + &env->cpu_breakpoint[index]); + break; + case 1: + type = BP_CPU | BP_MEM_WRITE; + goto insert_wp; + case 2: + /* No support for I/O watchpoints yet */ + break; + case 3: + type = BP_CPU | BP_MEM_ACCESS; + insert_wp: + err = cpu_watchpoint_insert(env, env->dr[index], + hw_breakpoint_len(env->dr[7], index), + type, &env->cpu_watchpoint[index]); + break; + } + if (err) + env->cpu_breakpoint[index] = NULL; +} + +void hw_breakpoint_remove(CPUState *env, int index) +{ + if (!env->cpu_breakpoint[index]) + return; + switch (hw_breakpoint_type(env->dr[7], index)) { + case 0: + if (hw_breakpoint_enabled(env->dr[7], index)) + cpu_breakpoint_remove_by_ref(env, env->cpu_breakpoint[index]); + break; + case 1: + case 3: + cpu_watchpoint_remove_by_ref(env, env->cpu_watchpoint[index]); + break; + case 2: + /* No support for I/O watchpoints yet */ + break; + } +} + +int check_hw_breakpoints(CPUState *env, int force_dr6_update) +{ + target_ulong dr6; + int reg, type; + int hit_enabled = 0; + + dr6 = env->dr[6] & ~0xf; + for (reg = 0; reg < 4; reg++) { + type = hw_breakpoint_type(env->dr[7], reg); + if ((type == 0 && env->dr[reg] == env->eip) || + ((type & 1) && env->cpu_watchpoint[reg] && + (env->cpu_watchpoint[reg]->flags & BP_WATCHPOINT_HIT))) { + dr6 |= 1 << reg; + if (hw_breakpoint_enabled(env->dr[7], reg)) + hit_enabled = 1; + } + } + if (hit_enabled || force_dr6_update) + env->dr[6] = dr6; + return hit_enabled; +} + +static CPUDebugExcpHandler *prev_debug_excp_handler; + +void raise_exception_env(int exception_index, CPUState *env); + +static void breakpoint_handler(CPUState *env) +{ + CPUBreakpoint *bp; + + if (env->watchpoint_hit) { + if (env->watchpoint_hit->flags & BP_CPU) { + env->watchpoint_hit = NULL; + if (check_hw_breakpoints(env, 0)) + raise_exception_env(EXCP01_DB, env); + else + cpu_resume_from_signal(env, NULL); + } + } else { + QTAILQ_FOREACH(bp, &env->breakpoints, entry) + if (bp->pc == env->eip) { + if (bp->flags & BP_CPU) { + check_hw_breakpoints(env, 1); + raise_exception_env(EXCP01_DB, env); + } + break; + } + } + if (prev_debug_excp_handler) + prev_debug_excp_handler(env); +} + +#ifndef VBOX +/* This should come from sysemu.h - if we could include it here... */ +void qemu_system_reset_request(void); + +void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, + uint64_t mcg_status, uint64_t addr, uint64_t misc) +{ + uint64_t mcg_cap = cenv->mcg_cap; + unsigned bank_num = mcg_cap & 0xff; + uint64_t *banks = cenv->mce_banks; + + if (bank >= bank_num || !(status & MCI_STATUS_VAL)) + return; + + /* + * if MSR_MCG_CTL is not all 1s, the uncorrected error + * reporting is disabled + */ + if ((status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) && + cenv->mcg_ctl != ~(uint64_t)0) + return; + banks += 4 * bank; + /* + * if MSR_MCi_CTL is not all 1s, the uncorrected error + * reporting is disabled for the bank + */ + if ((status & MCI_STATUS_UC) && banks[0] != ~(uint64_t)0) + return; + if (status & MCI_STATUS_UC) { + if ((cenv->mcg_status & MCG_STATUS_MCIP) || + !(cenv->cr[4] & CR4_MCE_MASK)) { + fprintf(stderr, "injects mce exception while previous " + "one is in progress!\n"); + qemu_log_mask(CPU_LOG_RESET, "Triple fault\n"); + qemu_system_reset_request(); + return; + } + if (banks[1] & MCI_STATUS_VAL) + status |= MCI_STATUS_OVER; + banks[2] = addr; + banks[3] = misc; + cenv->mcg_status = mcg_status; + banks[1] = status; + cpu_interrupt(cenv, CPU_INTERRUPT_MCE); + } else if (!(banks[1] & MCI_STATUS_VAL) + || !(banks[1] & MCI_STATUS_UC)) { + if (banks[1] & MCI_STATUS_VAL) + status |= MCI_STATUS_OVER; + banks[2] = addr; + banks[3] = misc; + banks[1] = status; + } else + banks[1] |= MCI_STATUS_OVER; +} +#endif /* !VBOX */ +#endif /* !CONFIG_USER_ONLY */ + +#ifndef VBOX + +static void mce_init(CPUX86State *cenv) +{ + unsigned int bank, bank_num; + + if (((cenv->cpuid_version >> 8)&0xf) >= 6 + && (cenv->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)) { + cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF; + cenv->mcg_ctl = ~(uint64_t)0; + bank_num = MCE_BANKS_DEF; + for (bank = 0; bank < bank_num; bank++) + cenv->mce_banks[bank*4] = ~(uint64_t)0; + } +} + +int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector, + target_ulong *base, unsigned int *limit, + unsigned int *flags) +{ + SegmentCache *dt; + target_ulong ptr; + uint32_t e1, e2; + int index; + + if (selector & 0x4) + dt = &env->ldt; + else + dt = &env->gdt; + index = selector & ~7; + ptr = dt->base + index; + if ((index + 7) > dt->limit + || cpu_memory_rw_debug(env, ptr, (uint8_t *)&e1, sizeof(e1), 0) != 0 + || cpu_memory_rw_debug(env, ptr+4, (uint8_t *)&e2, sizeof(e2), 0) != 0) + return 0; + + *base = ((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000)); + *limit = (e1 & 0xffff) | (e2 & 0x000f0000); + if (e2 & DESC_G_MASK) + *limit = (*limit << 12) | 0xfff; + *flags = e2; + + return 1; +} + +#endif /* !VBOX */ + +#ifndef VBOX +CPUX86State *cpu_x86_init(const char *cpu_model) +#else +CPUX86State *cpu_x86_init(CPUX86State *env, const char *cpu_model) +#endif +{ +#ifndef VBOX + CPUX86State *env; +#endif + static int inited; + +#ifndef VBOX + env = qemu_mallocz(sizeof(CPUX86State)); +#endif + cpu_exec_init(env); + env->cpu_model_str = cpu_model; + + /* init various static tables */ + if (!inited) { + inited = 1; + optimize_flags_init(); +#ifndef CONFIG_USER_ONLY + prev_debug_excp_handler = + cpu_set_debug_excp_handler(breakpoint_handler); +#endif + } +#ifndef VBOX + if (cpu_x86_register(env, cpu_model) < 0) { + cpu_x86_close(env); + return NULL; + } + mce_init(env); +#endif + + qemu_init_vcpu(env); + + return env; +} + +#ifndef VBOX +#if !defined(CONFIG_USER_ONLY) +void do_cpu_init(CPUState *env) +{ + int sipi = env->interrupt_request & CPU_INTERRUPT_SIPI; + cpu_reset(env); + env->interrupt_request = sipi; + apic_init_reset(env->apic_state); + env->halted = !cpu_is_bsp(env); +} + +void do_cpu_sipi(CPUState *env) +{ + apic_sipi(env->apic_state); +} +#else +void do_cpu_init(CPUState *env) +{ +} +void do_cpu_sipi(CPUState *env) +{ +} +#endif +#endif /* !VBOX */ diff --git a/src/recompiler/target-i386/helper.h b/src/recompiler/target-i386/helper.h new file mode 100644 index 00000000..8307304a --- /dev/null +++ b/src/recompiler/target-i386/helper.h @@ -0,0 +1,253 @@ +#include "def-helper.h" + +DEF_HELPER_FLAGS_1(cc_compute_all, TCG_CALL_PURE, i32, int) +DEF_HELPER_FLAGS_1(cc_compute_c, TCG_CALL_PURE, i32, int) + +DEF_HELPER_0(lock, void) +DEF_HELPER_0(unlock, void) +DEF_HELPER_2(write_eflags, void, tl, i32) +DEF_HELPER_0(read_eflags, tl) +DEF_HELPER_1(divb_AL, void, tl) +DEF_HELPER_1(idivb_AL, void, tl) +DEF_HELPER_1(divw_AX, void, tl) +DEF_HELPER_1(idivw_AX, void, tl) +DEF_HELPER_1(divl_EAX, void, tl) +DEF_HELPER_1(idivl_EAX, void, tl) +#ifdef TARGET_X86_64 +DEF_HELPER_1(mulq_EAX_T0, void, tl) +DEF_HELPER_1(imulq_EAX_T0, void, tl) +DEF_HELPER_2(imulq_T0_T1, tl, tl, tl) +DEF_HELPER_1(divq_EAX, void, tl) +DEF_HELPER_1(idivq_EAX, void, tl) +#endif + +DEF_HELPER_1(aam, void, int) +DEF_HELPER_1(aad, void, int) +DEF_HELPER_0(aaa, void) +DEF_HELPER_0(aas, void) +DEF_HELPER_0(daa, void) +DEF_HELPER_0(das, void) + +DEF_HELPER_1(lsl, tl, tl) +DEF_HELPER_1(lar, tl, tl) +DEF_HELPER_1(verr, void, tl) +DEF_HELPER_1(verw, void, tl) +DEF_HELPER_1(lldt, void, int) +DEF_HELPER_1(ltr, void, int) +DEF_HELPER_2(load_seg, void, int, int) +DEF_HELPER_3(ljmp_protected, void, int, tl, int) +DEF_HELPER_4(lcall_real, void, int, tl, int, int) +DEF_HELPER_4(lcall_protected, void, int, tl, int, int) +DEF_HELPER_1(iret_real, void, int) +DEF_HELPER_2(iret_protected, void, int, int) +DEF_HELPER_2(lret_protected, void, int, int) +DEF_HELPER_1(read_crN, tl, int) +DEF_HELPER_2(write_crN, void, int, tl) +DEF_HELPER_1(lmsw, void, tl) +DEF_HELPER_0(clts, void) +DEF_HELPER_2(movl_drN_T0, void, int, tl) +DEF_HELPER_1(invlpg, void, tl) + +DEF_HELPER_3(enter_level, void, int, int, tl) +#ifdef TARGET_X86_64 +DEF_HELPER_3(enter64_level, void, int, int, tl) +#endif +DEF_HELPER_0(sysenter, void) +DEF_HELPER_1(sysexit, void, int) +#ifdef TARGET_X86_64 +DEF_HELPER_1(syscall, void, int) +DEF_HELPER_1(sysret, void, int) +#endif +DEF_HELPER_1(hlt, void, int) +DEF_HELPER_1(monitor, void, tl) +DEF_HELPER_1(mwait, void, int) +DEF_HELPER_0(debug, void) +DEF_HELPER_0(reset_rf, void) +DEF_HELPER_2(raise_interrupt, void, int, int) +DEF_HELPER_1(raise_exception, void, int) +DEF_HELPER_0(cli, void) +DEF_HELPER_0(sti, void) +DEF_HELPER_0(set_inhibit_irq, void) +DEF_HELPER_0(reset_inhibit_irq, void) +DEF_HELPER_2(boundw, void, tl, int) +DEF_HELPER_2(boundl, void, tl, int) +DEF_HELPER_0(rsm, void) +DEF_HELPER_1(into, void, int) +DEF_HELPER_1(cmpxchg8b, void, tl) +#ifdef TARGET_X86_64 +DEF_HELPER_1(cmpxchg16b, void, tl) +#endif +DEF_HELPER_0(single_step, void) +DEF_HELPER_0(cpuid, void) +DEF_HELPER_0(rdtsc, void) +DEF_HELPER_0(rdtscp, void) +DEF_HELPER_0(rdpmc, void) +DEF_HELPER_0(rdmsr, void) +DEF_HELPER_0(wrmsr, void) + +DEF_HELPER_1(check_iob, void, i32) +DEF_HELPER_1(check_iow, void, i32) +DEF_HELPER_1(check_iol, void, i32) +DEF_HELPER_2(outb, void, i32, i32) +DEF_HELPER_1(inb, tl, i32) +DEF_HELPER_2(outw, void, i32, i32) +DEF_HELPER_1(inw, tl, i32) +DEF_HELPER_2(outl, void, i32, i32) +DEF_HELPER_1(inl, tl, i32) + +DEF_HELPER_2(svm_check_intercept_param, void, i32, i64) +DEF_HELPER_2(vmexit, void, i32, i64) +DEF_HELPER_3(svm_check_io, void, i32, i32, i32) +DEF_HELPER_2(vmrun, void, int, int) +DEF_HELPER_0(vmmcall, void) +DEF_HELPER_1(vmload, void, int) +DEF_HELPER_1(vmsave, void, int) +DEF_HELPER_0(stgi, void) +DEF_HELPER_0(clgi, void) +DEF_HELPER_0(skinit, void) +DEF_HELPER_1(invlpga, void, int) + +/* x86 FPU */ + +DEF_HELPER_1(flds_FT0, void, i32) +DEF_HELPER_1(fldl_FT0, void, i64) +DEF_HELPER_1(fildl_FT0, void, s32) +DEF_HELPER_1(flds_ST0, void, i32) +DEF_HELPER_1(fldl_ST0, void, i64) +DEF_HELPER_1(fildl_ST0, void, s32) +DEF_HELPER_1(fildll_ST0, void, s64) +#ifndef VBOX +DEF_HELPER_0(fsts_ST0, i32) +DEF_HELPER_0(fstl_ST0, i64) +DEF_HELPER_0(fist_ST0, s32) +DEF_HELPER_0(fistl_ST0, s32) +DEF_HELPER_0(fistll_ST0, s64) +DEF_HELPER_0(fistt_ST0, s32) +DEF_HELPER_0(fisttl_ST0, s32) +DEF_HELPER_0(fisttll_ST0, s64) +#else /* VBOX */ +DEF_HELPER_0(fsts_ST0, RTCCUINTREG) +DEF_HELPER_0(fstl_ST0, i64) +DEF_HELPER_0(fist_ST0, RTCCINTREG) +DEF_HELPER_0(fistl_ST0, RTCCINTREG) +DEF_HELPER_0(fistll_ST0, s64) +DEF_HELPER_0(fistt_ST0, RTCCINTREG) +DEF_HELPER_0(fisttl_ST0, RTCCINTREG) +DEF_HELPER_0(fisttll_ST0, s64) +#endif /* VBOX */ +DEF_HELPER_1(fldt_ST0, void, tl) +DEF_HELPER_1(fstt_ST0, void, tl) +DEF_HELPER_0(fpush, void) +DEF_HELPER_0(fpop, void) +DEF_HELPER_0(fdecstp, void) +DEF_HELPER_0(fincstp, void) +DEF_HELPER_1(ffree_STN, void, int) +DEF_HELPER_0(fmov_ST0_FT0, void) +DEF_HELPER_1(fmov_FT0_STN, void, int) +DEF_HELPER_1(fmov_ST0_STN, void, int) +DEF_HELPER_1(fmov_STN_ST0, void, int) +DEF_HELPER_1(fxchg_ST0_STN, void, int) +DEF_HELPER_0(fcom_ST0_FT0, void) +DEF_HELPER_0(fucom_ST0_FT0, void) +DEF_HELPER_0(fcomi_ST0_FT0, void) +DEF_HELPER_0(fucomi_ST0_FT0, void) +DEF_HELPER_0(fadd_ST0_FT0, void) +DEF_HELPER_0(fmul_ST0_FT0, void) +DEF_HELPER_0(fsub_ST0_FT0, void) +DEF_HELPER_0(fsubr_ST0_FT0, void) +DEF_HELPER_0(fdiv_ST0_FT0, void) +DEF_HELPER_0(fdivr_ST0_FT0, void) +DEF_HELPER_1(fadd_STN_ST0, void, int) +DEF_HELPER_1(fmul_STN_ST0, void, int) +DEF_HELPER_1(fsub_STN_ST0, void, int) +DEF_HELPER_1(fsubr_STN_ST0, void, int) +DEF_HELPER_1(fdiv_STN_ST0, void, int) +DEF_HELPER_1(fdivr_STN_ST0, void, int) +DEF_HELPER_0(fchs_ST0, void) +DEF_HELPER_0(fabs_ST0, void) +DEF_HELPER_0(fxam_ST0, void) +DEF_HELPER_0(fld1_ST0, void) +DEF_HELPER_0(fldl2t_ST0, void) +DEF_HELPER_0(fldl2e_ST0, void) +DEF_HELPER_0(fldpi_ST0, void) +DEF_HELPER_0(fldlg2_ST0, void) +DEF_HELPER_0(fldln2_ST0, void) +DEF_HELPER_0(fldz_ST0, void) +DEF_HELPER_0(fldz_FT0, void) +#ifndef VBOX +DEF_HELPER_0(fnstsw, i32) +DEF_HELPER_0(fnstcw, i32) +#else /* VBOX */ +DEF_HELPER_0(fnstsw, RTCCUINTREG) +DEF_HELPER_0(fnstcw, RTCCUINTREG) +#endif /* VBOX */ +DEF_HELPER_1(fldcw, void, i32) +DEF_HELPER_0(fclex, void) +DEF_HELPER_0(fwait, void) +DEF_HELPER_0(fninit, void) +DEF_HELPER_1(fbld_ST0, void, tl) +DEF_HELPER_1(fbst_ST0, void, tl) +DEF_HELPER_0(f2xm1, void) +DEF_HELPER_0(fyl2x, void) +DEF_HELPER_0(fptan, void) +DEF_HELPER_0(fpatan, void) +DEF_HELPER_0(fxtract, void) +DEF_HELPER_0(fprem1, void) +DEF_HELPER_0(fprem, void) +DEF_HELPER_0(fyl2xp1, void) +DEF_HELPER_0(fsqrt, void) +DEF_HELPER_0(fsincos, void) +DEF_HELPER_0(frndint, void) +DEF_HELPER_0(fscale, void) +DEF_HELPER_0(fsin, void) +DEF_HELPER_0(fcos, void) +DEF_HELPER_2(fstenv, void, tl, int) +DEF_HELPER_2(fldenv, void, tl, int) +DEF_HELPER_2(fsave, void, tl, int) +DEF_HELPER_2(frstor, void, tl, int) +DEF_HELPER_2(fxsave, void, tl, int) +DEF_HELPER_2(fxrstor, void, tl, int) +DEF_HELPER_1(bsf, tl, tl) +DEF_HELPER_1(bsr, tl, tl) +DEF_HELPER_2(lzcnt, tl, tl, int) + +/* MMX/SSE */ + +DEF_HELPER_0(enter_mmx, void) +DEF_HELPER_0(emms, void) +DEF_HELPER_2(movq, void, ptr, ptr) + +#define SHIFT 0 +#include "ops_sse_header.h" +#define SHIFT 1 +#include "ops_sse_header.h" + +DEF_HELPER_2(rclb, tl, tl, tl) +DEF_HELPER_2(rclw, tl, tl, tl) +DEF_HELPER_2(rcll, tl, tl, tl) +DEF_HELPER_2(rcrb, tl, tl, tl) +DEF_HELPER_2(rcrw, tl, tl, tl) +DEF_HELPER_2(rcrl, tl, tl, tl) +#ifdef TARGET_X86_64 +DEF_HELPER_2(rclq, tl, tl, tl) +DEF_HELPER_2(rcrq, tl, tl, tl) +#endif + +#ifdef VBOX +DEF_HELPER_1(write_eflags_vme, void, tl) +DEF_HELPER_0(read_eflags_vme, tl) +DEF_HELPER_0(cli_vme, void) +DEF_HELPER_0(sti_vme, void) +DEF_HELPER_0(check_external_event, void) +DEF_HELPER_0(dump_state, void) +DEF_HELPER_1(sync_seg, void, i32) + +void helper_external_event(void); +void helper_record_call(void); + +/* in op_helper.c */ +void sync_seg(CPUX86State *env1, int seg_reg, int selector); +void sync_ldtr(CPUX86State *env1, int selector); +#endif /* VBOX */ + +#include "def-helper.h" diff --git a/src/recompiler/target-i386/helper_template.h b/src/recompiler/target-i386/helper_template.h new file mode 100644 index 00000000..193b3274 --- /dev/null +++ b/src/recompiler/target-i386/helper_template.h @@ -0,0 +1,344 @@ +/* + * i386 helpers + * + * Copyright (c) 2008 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#define DATA_BITS (1 << (3 + SHIFT)) +#define SHIFT_MASK (DATA_BITS - 1) +#define SIGN_MASK (((target_ulong)1) << (DATA_BITS - 1)) +#if DATA_BITS <= 32 +#define SHIFT1_MASK 0x1f +#else +#define SHIFT1_MASK 0x3f +#endif + +#if DATA_BITS == 8 +#define SUFFIX b +#define DATA_TYPE uint8_t +#define DATA_STYPE int8_t +#define DATA_MASK 0xff +#elif DATA_BITS == 16 +#define SUFFIX w +#define DATA_TYPE uint16_t +#define DATA_STYPE int16_t +#define DATA_MASK 0xffff +#elif DATA_BITS == 32 +#define SUFFIX l +#define DATA_TYPE uint32_t +#define DATA_STYPE int32_t +#define DATA_MASK 0xffffffff +#elif DATA_BITS == 64 +#define SUFFIX q +#define DATA_TYPE uint64_t +#define DATA_STYPE int64_t +#define DATA_MASK 0xffffffffffffffffULL +#else +#error unhandled operand size +#endif + +/* dynamic flags computation */ + +static int glue(compute_all_add, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + target_long src1, src2; + src1 = CC_SRC; + src2 = CC_DST - CC_SRC; + cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_add, SUFFIX)(void) +{ + int cf; + target_long src1; + src1 = CC_SRC; + cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1; + return cf; +} + +static int glue(compute_all_adc, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + target_long src1, src2; + src1 = CC_SRC; + src2 = CC_DST - CC_SRC - 1; + cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_adc, SUFFIX)(void) +{ + int cf; + target_long src1; + src1 = CC_SRC; + cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1; + return cf; +} + +static int glue(compute_all_sub, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + target_long src1, src2; + src1 = CC_DST + CC_SRC; + src2 = CC_SRC; + cf = (DATA_TYPE)src1 < (DATA_TYPE)src2; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_sub, SUFFIX)(void) +{ + int cf; + target_long src1, src2; + src1 = CC_DST + CC_SRC; + src2 = CC_SRC; + cf = (DATA_TYPE)src1 < (DATA_TYPE)src2; + return cf; +} + +static int glue(compute_all_sbb, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + target_long src1, src2; + src1 = CC_DST + CC_SRC + 1; + src2 = CC_SRC; + cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_sbb, SUFFIX)(void) +{ + int cf; + target_long src1, src2; + src1 = CC_DST + CC_SRC + 1; + src2 = CC_SRC; + cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2; + return cf; +} + +static int glue(compute_all_logic, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + cf = 0; + pf = parity_table[(uint8_t)CC_DST]; + af = 0; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = 0; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_logic, SUFFIX)(void) +{ + return 0; +} + +static int glue(compute_all_inc, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + target_long src1, src2; + src1 = CC_DST - 1; + src2 = 1; + cf = CC_SRC; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = ((CC_DST & DATA_MASK) == SIGN_MASK) << 11; + return cf | pf | af | zf | sf | of; +} + +#if DATA_BITS == 32 +static int glue(compute_c_inc, SUFFIX)(void) +{ + return CC_SRC; +} +#endif + +static int glue(compute_all_dec, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + target_long src1, src2; + src1 = CC_DST + 1; + src2 = 1; + cf = CC_SRC; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = ((CC_DST & DATA_MASK) == ((target_ulong)SIGN_MASK - 1)) << 11; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_all_shl, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + cf = (CC_SRC >> (DATA_BITS - 1)) & CC_C; + pf = parity_table[(uint8_t)CC_DST]; + af = 0; /* undefined */ + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + /* of is defined if shift count == 1 */ + of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_shl, SUFFIX)(void) +{ + return (CC_SRC >> (DATA_BITS - 1)) & CC_C; +} + +#if DATA_BITS == 32 +static int glue(compute_c_sar, SUFFIX)(void) +{ + return CC_SRC & 1; +} +#endif + +static int glue(compute_all_sar, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + cf = CC_SRC & 1; + pf = parity_table[(uint8_t)CC_DST]; + af = 0; /* undefined */ + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + /* of is defined if shift count == 1 */ + of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +#if DATA_BITS == 32 +static int glue(compute_c_mul, SUFFIX)(void) +{ + int cf; + cf = (CC_SRC != 0); + return cf; +} +#endif + +/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and + CF are modified and it is slower to do that. */ +static int glue(compute_all_mul, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + cf = (CC_SRC != 0); + pf = parity_table[(uint8_t)CC_DST]; + af = 0; /* undefined */ + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = cf << 11; + return cf | pf | af | zf | sf | of; +} + +/* shifts */ + +target_ulong glue(helper_rcl, SUFFIX)(target_ulong t0, target_ulong t1) +{ + int count, eflags; + target_ulong src; + target_long res; + + count = t1 & SHIFT1_MASK; +#if DATA_BITS == 16 + count = rclw_table[count]; +#elif DATA_BITS == 8 + count = rclb_table[count]; +#endif + if (count) { + eflags = helper_cc_compute_all(CC_OP); + t0 &= DATA_MASK; + src = t0; + res = (t0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1)); + if (count > 1) + res |= t0 >> (DATA_BITS + 1 - count); + t0 = res; + env->cc_tmp = (eflags & ~(CC_C | CC_O)) | + (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) | + ((src >> (DATA_BITS - count)) & CC_C); + } else { + env->cc_tmp = -1; + } + return t0; +} + +target_ulong glue(helper_rcr, SUFFIX)(target_ulong t0, target_ulong t1) +{ + int count, eflags; + target_ulong src; + target_long res; + + count = t1 & SHIFT1_MASK; +#if DATA_BITS == 16 + count = rclw_table[count]; +#elif DATA_BITS == 8 + count = rclb_table[count]; +#endif + if (count) { + eflags = helper_cc_compute_all(CC_OP); + t0 &= DATA_MASK; + src = t0; + res = (t0 >> count) | ((target_ulong)(eflags & CC_C) << (DATA_BITS - count)); + if (count > 1) + res |= t0 << (DATA_BITS + 1 - count); + t0 = res; + env->cc_tmp = (eflags & ~(CC_C | CC_O)) | + (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) | + ((src >> (count - 1)) & CC_C); + } else { + env->cc_tmp = -1; + } + return t0; +} + +#undef DATA_BITS +#undef SHIFT_MASK +#undef SHIFT1_MASK +#undef SIGN_MASK +#undef DATA_TYPE +#undef DATA_STYPE +#undef DATA_MASK +#undef SUFFIX diff --git a/src/recompiler/target-i386/op_helper.c b/src/recompiler/target-i386/op_helper.c new file mode 100644 index 00000000..07b58f8d --- /dev/null +++ b/src/recompiler/target-i386/op_helper.c @@ -0,0 +1,7164 @@ +/* + * i386 helpers + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#include "exec.h" +#include "exec-all.h" +#include "host-utils.h" +#include "ioport.h" + +#ifdef VBOX +# include "qemu-common.h" +# include <math.h> +# include "tcg.h" +# include <VBox/err.h> +#endif /* VBOX */ + +//#define DEBUG_PCALL + + +#ifdef DEBUG_PCALL +# define LOG_PCALL(...) qemu_log_mask(CPU_LOG_PCALL, ## __VA_ARGS__) +# define LOG_PCALL_STATE(env) \ + log_cpu_state_mask(CPU_LOG_PCALL, (env), X86_DUMP_CCOP) +#else +# define LOG_PCALL(...) do { } while (0) +# define LOG_PCALL_STATE(env) do { } while (0) +#endif + + +#if 0 +#define raise_exception_err(a, b)\ +do {\ + qemu_log("raise_exception line=%d\n", __LINE__);\ + (raise_exception_err)(a, b);\ +} while (0) +#endif + +static const uint8_t parity_table[256] = { + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, +}; + +/* modulo 17 table */ +static const uint8_t rclw_table[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9,10,11,12,13,14,15, + 16, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 9,10,11,12,13,14, +}; + +/* modulo 9 table */ +static const uint8_t rclb_table[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 0, 1, 2, 3, 4, 5, + 6, 7, 8, 0, 1, 2, 3, 4, +}; + +static const CPU86_LDouble f15rk[7] = +{ + 0.00000000000000000000L, + 1.00000000000000000000L, + 3.14159265358979323851L, /*pi*/ + 0.30102999566398119523L, /*lg2*/ + 0.69314718055994530943L, /*ln2*/ + 1.44269504088896340739L, /*l2e*/ + 3.32192809488736234781L, /*l2t*/ +}; + +/* broken thread support */ + +static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED; + +void helper_lock(void) +{ + spin_lock(&global_cpu_lock); +} + +void helper_unlock(void) +{ + spin_unlock(&global_cpu_lock); +} + +void helper_write_eflags(target_ulong t0, uint32_t update_mask) +{ + load_eflags(t0, update_mask); +} + +target_ulong helper_read_eflags(void) +{ + uint32_t eflags; + eflags = helper_cc_compute_all(CC_OP); + eflags |= (DF & DF_MASK); + eflags |= env->eflags & ~(VM_MASK | RF_MASK); + return eflags; +} + +#ifdef VBOX + +void helper_write_eflags_vme(target_ulong t0) +{ + unsigned int new_eflags = t0; + + assert(env->eflags & (1<<VM_SHIFT)); + + /* if virtual interrupt pending and (virtual) interrupts will be enabled -> #GP */ + /* if TF will be set -> #GP */ + if ( ((new_eflags & IF_MASK) && (env->eflags & VIP_MASK)) + || (new_eflags & TF_MASK)) { + raise_exception(EXCP0D_GPF); + } else { + load_eflags(new_eflags, + (TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff); + + if (new_eflags & IF_MASK) { + env->eflags |= VIF_MASK; + } else { + env->eflags &= ~VIF_MASK; + } + } +} + +target_ulong helper_read_eflags_vme(void) +{ + uint32_t eflags; + eflags = helper_cc_compute_all(CC_OP); + eflags |= (DF & DF_MASK); + eflags |= env->eflags & ~(VM_MASK | RF_MASK); + if (env->eflags & VIF_MASK) + eflags |= IF_MASK; + else + eflags &= ~IF_MASK; + + /* According to AMD manual, should be read with IOPL == 3 */ + eflags |= (3 << IOPL_SHIFT); + + /* We only use helper_read_eflags_vme() in 16-bits mode */ + return eflags & 0xffff; +} + +void helper_dump_state() +{ + LogRel(("CS:EIP=%08x:%08x, FLAGS=%08x\n", env->segs[R_CS].base, env->eip, env->eflags)); + LogRel(("EAX=%08x\tECX=%08x\tEDX=%08x\tEBX=%08x\n", + (uint32_t)env->regs[R_EAX], (uint32_t)env->regs[R_ECX], + (uint32_t)env->regs[R_EDX], (uint32_t)env->regs[R_EBX])); + LogRel(("ESP=%08x\tEBP=%08x\tESI=%08x\tEDI=%08x\n", + (uint32_t)env->regs[R_ESP], (uint32_t)env->regs[R_EBP], + (uint32_t)env->regs[R_ESI], (uint32_t)env->regs[R_EDI])); +} + +/** + * Updates e2 with the DESC_A_MASK, writes it to the descriptor table, and + * returns the updated e2. + * + * @returns e2 with A set. + * @param e2 The 2nd selector DWORD. + */ +static uint32_t set_segment_accessed(int selector, uint32_t e2) +{ + SegmentCache *dt = selector & X86_SEL_LDT ? &env->ldt : &env->gdt; + target_ulong ptr = dt->base + (selector & X86_SEL_MASK); + + e2 |= DESC_A_MASK; + stl_kernel(ptr + 4, e2); + return e2; +} + +#endif /* VBOX */ + +/* return non zero if error */ +static inline int load_segment(uint32_t *e1_ptr, uint32_t *e2_ptr, + int selector) +{ + SegmentCache *dt; + int index; + target_ulong ptr; + + if (selector & 0x4) + dt = &env->ldt; + else + dt = &env->gdt; + index = selector & ~7; + if ((index + 7) > dt->limit) + return -1; + ptr = dt->base + index; + *e1_ptr = ldl_kernel(ptr); + *e2_ptr = ldl_kernel(ptr + 4); + return 0; +} + +static inline unsigned int get_seg_limit(uint32_t e1, uint32_t e2) +{ + unsigned int limit; + limit = (e1 & 0xffff) | (e2 & 0x000f0000); + if (e2 & DESC_G_MASK) + limit = (limit << 12) | 0xfff; + return limit; +} + +static inline uint32_t get_seg_base(uint32_t e1, uint32_t e2) +{ + return ((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000)); +} + +static inline void load_seg_cache_raw_dt(SegmentCache *sc, uint32_t e1, uint32_t e2) +{ + sc->base = get_seg_base(e1, e2); + sc->limit = get_seg_limit(e1, e2); +#ifndef VBOX + sc->flags = e2; +#else + sc->flags = e2 & DESC_RAW_FLAG_BITS; + sc->newselector = 0; + sc->fVBoxFlags = CPUMSELREG_FLAGS_VALID; +#endif +} + +/* init the segment cache in vm86 mode. */ +static inline void load_seg_vm(int seg, int selector) +{ + selector &= 0xffff; +#ifdef VBOX + /* flags must be 0xf3; expand-up read/write accessed data segment with DPL=3. (VT-x) */ + unsigned flags = DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | DESC_A_MASK; + flags |= (3 << DESC_DPL_SHIFT); + + cpu_x86_load_seg_cache(env, seg, selector, + (selector << 4), 0xffff, flags); +#else /* VBOX */ + cpu_x86_load_seg_cache(env, seg, selector, + (selector << 4), 0xffff, 0); +#endif /* VBOX */ +} + +static inline void get_ss_esp_from_tss(uint32_t *ss_ptr, + uint32_t *esp_ptr, int dpl) +{ +#ifndef VBOX + int type, index, shift; +#else + unsigned int type, index, shift; +#endif + +#if 0 + { + int i; + printf("TR: base=%p limit=%x\n", env->tr.base, env->tr.limit); + for(i=0;i<env->tr.limit;i++) { + printf("%02x ", env->tr.base[i]); + if ((i & 7) == 7) printf("\n"); + } + printf("\n"); + } +#endif + + if (!(env->tr.flags & DESC_P_MASK)) + cpu_abort(env, "invalid tss"); + type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf; + if ((type & 7) != 3) + cpu_abort(env, "invalid tss type"); + shift = type >> 3; + index = (dpl * 4 + 2) << shift; + if (index + (4 << shift) - 1 > env->tr.limit) + raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc); + if (shift == 0) { + *esp_ptr = lduw_kernel(env->tr.base + index); + *ss_ptr = lduw_kernel(env->tr.base + index + 2); + } else { + *esp_ptr = ldl_kernel(env->tr.base + index); + *ss_ptr = lduw_kernel(env->tr.base + index + 4); + } +} + +/* XXX: merge with load_seg() */ +static void tss_load_seg(int seg_reg, int selector) +{ + uint32_t e1, e2; + int rpl, dpl, cpl; + +#ifdef VBOX + e1 = e2 = 0; /* gcc warning? */ + cpl = env->hflags & HF_CPL_MASK; + /* Trying to load a selector with CPL=1? */ + if (cpl == 0 && (selector & 3) == 1 && (env->state & CPU_RAW_RING0)) + { + Log(("RPL 1 -> sel %04X -> %04X (tss_load_seg)\n", selector, selector & 0xfffc)); + selector = selector & 0xfffc; + } +#endif /* VBOX */ + + if ((selector & 0xfffc) != 0) { + if (load_segment(&e1, &e2, selector) != 0) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + if (!(e2 & DESC_S_MASK)) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + rpl = selector & 3; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + if (seg_reg == R_CS) { + if (!(e2 & DESC_CS_MASK)) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + /* XXX: is it correct ? */ + if (dpl != rpl) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + if ((e2 & DESC_C_MASK) && dpl > rpl) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + } else if (seg_reg == R_SS) { + /* SS must be writable data */ + if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK)) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + if (dpl != cpl || dpl != rpl) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + } else { + /* not readable code */ + if ((e2 & DESC_CS_MASK) && !(e2 & DESC_R_MASK)) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + /* if data or non conforming code, checks the rights */ + if (((e2 >> DESC_TYPE_SHIFT) & 0xf) < 12) { + if (dpl < cpl || dpl < rpl) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + } + } + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc); + cpu_x86_load_seg_cache(env, seg_reg, selector, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); + } else { + if (seg_reg == R_SS || seg_reg == R_CS) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); +#ifdef VBOX +# if 0 /** @todo now we ignore loading 0 selectors, need to check what is correct once */ + cpu_x86_load_seg_cache(env, seg_reg, selector, + 0, 0, 0); +# endif +#endif /* VBOX */ + } +} + +#define SWITCH_TSS_JMP 0 +#define SWITCH_TSS_IRET 1 +#define SWITCH_TSS_CALL 2 + +/* XXX: restore CPU state in registers (PowerPC case) */ +static void switch_tss(int tss_selector, + uint32_t e1, uint32_t e2, int source, + uint32_t next_eip) +{ + int tss_limit, tss_limit_max, type, old_tss_limit_max, old_type, v1, v2, i; + target_ulong tss_base; + uint32_t new_regs[8], new_segs[6]; + uint32_t new_eflags, new_eip, new_cr3, new_ldt, new_trap; + uint32_t old_eflags, eflags_mask; + SegmentCache *dt; +#ifndef VBOX + int index; +#else + unsigned int index; +#endif + target_ulong ptr; + + type = (e2 >> DESC_TYPE_SHIFT) & 0xf; + LOG_PCALL("switch_tss: sel=0x%04x type=%d src=%d\n", tss_selector, type, source); + + /* if task gate, we read the TSS segment and we load it */ + if (type == 5) { + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc); + tss_selector = e1 >> 16; + if (tss_selector & 4) + raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc); + if (load_segment(&e1, &e2, tss_selector) != 0) + raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc); + if (e2 & DESC_S_MASK) + raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc); + type = (e2 >> DESC_TYPE_SHIFT) & 0xf; + if ((type & 7) != 1) + raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc); + } + + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc); + + if (type & 8) + tss_limit_max = 103; + else + tss_limit_max = 43; + tss_limit = get_seg_limit(e1, e2); + tss_base = get_seg_base(e1, e2); + if ((tss_selector & 4) != 0 || + tss_limit < tss_limit_max) + raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc); + old_type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf; + if (old_type & 8) + old_tss_limit_max = 103; + else + old_tss_limit_max = 43; + +#ifndef VBOX /* The old TSS is written first... */ + /* read all the registers from the new TSS */ + if (type & 8) { + /* 32 bit */ + new_cr3 = ldl_kernel(tss_base + 0x1c); + new_eip = ldl_kernel(tss_base + 0x20); + new_eflags = ldl_kernel(tss_base + 0x24); + for(i = 0; i < 8; i++) + new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4)); + for(i = 0; i < 6; i++) + new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4)); + new_ldt = lduw_kernel(tss_base + 0x60); + new_trap = ldl_kernel(tss_base + 0x64); + } else { + /* 16 bit */ + new_cr3 = 0; + new_eip = lduw_kernel(tss_base + 0x0e); + new_eflags = lduw_kernel(tss_base + 0x10); + for(i = 0; i < 8; i++) + new_regs[i] = lduw_kernel(tss_base + (0x12 + i * 2)) | 0xffff0000; + for(i = 0; i < 4; i++) + new_segs[i] = lduw_kernel(tss_base + (0x22 + i * 4)); + new_ldt = lduw_kernel(tss_base + 0x2a); + new_segs[R_FS] = 0; + new_segs[R_GS] = 0; + new_trap = 0; + } +#endif + + /* NOTE: we must avoid memory exceptions during the task switch, + so we make dummy accesses before */ + /* XXX: it can still fail in some cases, so a bigger hack is + necessary to valid the TLB after having done the accesses */ + + v1 = ldub_kernel(env->tr.base); + v2 = ldub_kernel(env->tr.base + old_tss_limit_max); + stb_kernel(env->tr.base, v1); + stb_kernel(env->tr.base + old_tss_limit_max, v2); + + /* clear busy bit (it is restartable) */ + if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_IRET) { + target_ulong ptr; + uint32_t e2; + ptr = env->gdt.base + (env->tr.selector & ~7); + e2 = ldl_kernel(ptr + 4); + e2 &= ~DESC_TSS_BUSY_MASK; + stl_kernel(ptr + 4, e2); + } + old_eflags = compute_eflags(); + if (source == SWITCH_TSS_IRET) + old_eflags &= ~NT_MASK; + + /* save the current state in the old TSS */ + if (type & 8) { + /* 32 bit */ + stl_kernel(env->tr.base + 0x20, next_eip); + stl_kernel(env->tr.base + 0x24, old_eflags); + stl_kernel(env->tr.base + (0x28 + 0 * 4), EAX); + stl_kernel(env->tr.base + (0x28 + 1 * 4), ECX); + stl_kernel(env->tr.base + (0x28 + 2 * 4), EDX); + stl_kernel(env->tr.base + (0x28 + 3 * 4), EBX); + stl_kernel(env->tr.base + (0x28 + 4 * 4), ESP); + stl_kernel(env->tr.base + (0x28 + 5 * 4), EBP); + stl_kernel(env->tr.base + (0x28 + 6 * 4), ESI); + stl_kernel(env->tr.base + (0x28 + 7 * 4), EDI); + for(i = 0; i < 6; i++) + stw_kernel(env->tr.base + (0x48 + i * 4), env->segs[i].selector); +#if defined(VBOX) && defined(DEBUG) + printf("TSS 32 bits switch\n"); + printf("Saving CS=%08X\n", env->segs[R_CS].selector); +#endif + } else { + /* 16 bit */ + stw_kernel(env->tr.base + 0x0e, next_eip); + stw_kernel(env->tr.base + 0x10, old_eflags); + stw_kernel(env->tr.base + (0x12 + 0 * 2), EAX); + stw_kernel(env->tr.base + (0x12 + 1 * 2), ECX); + stw_kernel(env->tr.base + (0x12 + 2 * 2), EDX); + stw_kernel(env->tr.base + (0x12 + 3 * 2), EBX); + stw_kernel(env->tr.base + (0x12 + 4 * 2), ESP); + stw_kernel(env->tr.base + (0x12 + 5 * 2), EBP); + stw_kernel(env->tr.base + (0x12 + 6 * 2), ESI); + stw_kernel(env->tr.base + (0x12 + 7 * 2), EDI); + for(i = 0; i < 4; i++) + stw_kernel(env->tr.base + (0x22 + i * 2), env->segs[i].selector); + } + +#ifdef VBOX + /* read all the registers from the new TSS - may be the same as the old one */ + if (type & 8) { + /* 32 bit */ + new_cr3 = ldl_kernel(tss_base + 0x1c); + new_eip = ldl_kernel(tss_base + 0x20); + new_eflags = ldl_kernel(tss_base + 0x24); + for(i = 0; i < 8; i++) + new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4)); + for(i = 0; i < 6; i++) + new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4)); + new_ldt = lduw_kernel(tss_base + 0x60); + new_trap = ldl_kernel(tss_base + 0x64); + } else { + /* 16 bit */ + new_cr3 = 0; + new_eip = lduw_kernel(tss_base + 0x0e); + new_eflags = lduw_kernel(tss_base + 0x10); + for(i = 0; i < 8; i++) + new_regs[i] = lduw_kernel(tss_base + (0x12 + i * 2)) | 0xffff0000; + for(i = 0; i < 4; i++) + new_segs[i] = lduw_kernel(tss_base + (0x22 + i * 2)); + new_ldt = lduw_kernel(tss_base + 0x2a); + new_segs[R_FS] = 0; + new_segs[R_GS] = 0; + new_trap = 0; + } +#endif + + /* now if an exception occurs, it will occurs in the next task + context */ + + if (source == SWITCH_TSS_CALL) { + stw_kernel(tss_base, env->tr.selector); + new_eflags |= NT_MASK; + } + + /* set busy bit */ + if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_CALL) { + target_ulong ptr; + uint32_t e2; + ptr = env->gdt.base + (tss_selector & ~7); + e2 = ldl_kernel(ptr + 4); + e2 |= DESC_TSS_BUSY_MASK; + stl_kernel(ptr + 4, e2); + } + + /* set the new CPU state */ + /* from this point, any exception which occurs can give problems */ + env->cr[0] |= CR0_TS_MASK; + env->hflags |= HF_TS_MASK; + env->tr.selector = tss_selector; + env->tr.base = tss_base; + env->tr.limit = tss_limit; +#ifndef VBOX + env->tr.flags = e2 & ~DESC_TSS_BUSY_MASK; +#else + env->tr.flags = (e2 | DESC_TSS_BUSY_MASK) & DESC_RAW_FLAG_BITS; + env->tr.fVBoxFlags = CPUMSELREG_FLAGS_VALID; + env->tr.newselector = 0; +#endif + + if ((type & 8) && (env->cr[0] & CR0_PG_MASK)) { + cpu_x86_update_cr3(env, new_cr3); + } + + /* load all registers without an exception, then reload them with + possible exception */ + env->eip = new_eip; + eflags_mask = TF_MASK | AC_MASK | ID_MASK | + IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK; + if (!(type & 8)) + eflags_mask &= 0xffff; + load_eflags(new_eflags, eflags_mask); + /* XXX: what to do in 16 bit case ? */ + EAX = new_regs[0]; + ECX = new_regs[1]; + EDX = new_regs[2]; + EBX = new_regs[3]; + ESP = new_regs[4]; + EBP = new_regs[5]; + ESI = new_regs[6]; + EDI = new_regs[7]; + if (new_eflags & VM_MASK) { + for(i = 0; i < 6; i++) + load_seg_vm(i, new_segs[i]); + /* in vm86, CPL is always 3 */ + cpu_x86_set_cpl(env, 3); + } else { + /* CPL is set the RPL of CS */ + cpu_x86_set_cpl(env, new_segs[R_CS] & 3); + /* first just selectors as the rest may trigger exceptions */ + for(i = 0; i < 6; i++) + cpu_x86_load_seg_cache(env, i, new_segs[i], 0, 0, 0); + } + + env->ldt.selector = new_ldt & ~4; + env->ldt.base = 0; + env->ldt.limit = 0; + env->ldt.flags = 0; +#ifdef VBOX + env->ldt.flags = DESC_INTEL_UNUSABLE; + env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID; + env->ldt.newselector = 0; +#endif + + /* load the LDT */ + if (new_ldt & 4) + raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc); + + if ((new_ldt & 0xfffc) != 0) { + dt = &env->gdt; + index = new_ldt & ~7; + if ((index + 7) > dt->limit) + raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc); + ptr = dt->base + index; + e1 = ldl_kernel(ptr); + e2 = ldl_kernel(ptr + 4); + if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2) + raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc); + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc); + load_seg_cache_raw_dt(&env->ldt, e1, e2); + } + + /* load the segments */ + if (!(new_eflags & VM_MASK)) { + tss_load_seg(R_CS, new_segs[R_CS]); + tss_load_seg(R_SS, new_segs[R_SS]); + tss_load_seg(R_ES, new_segs[R_ES]); + tss_load_seg(R_DS, new_segs[R_DS]); + tss_load_seg(R_FS, new_segs[R_FS]); + tss_load_seg(R_GS, new_segs[R_GS]); + } + + /* check that EIP is in the CS segment limits */ + if (new_eip > env->segs[R_CS].limit) { + /* XXX: different exception if CALL ? */ + raise_exception_err(EXCP0D_GPF, 0); + } + +#ifndef CONFIG_USER_ONLY + /* reset local breakpoints */ + if (env->dr[7] & 0x55) { + for (i = 0; i < 4; i++) { + if (hw_breakpoint_enabled(env->dr[7], i) == 0x1) + hw_breakpoint_remove(env, i); + } + env->dr[7] &= ~0x55; + } +#endif +} + +/* check if Port I/O is allowed in TSS */ +static inline void check_io(int addr, int size) +{ +#ifndef VBOX + int io_offset, val, mask; +#else + int val, mask; + unsigned int io_offset; +#endif /* VBOX */ + + /* TSS must be a valid 32 bit one */ + if (!(env->tr.flags & DESC_P_MASK) || + ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 11 || + env->tr.limit < 103) + goto fail; + io_offset = lduw_kernel(env->tr.base + 0x66); + io_offset += (addr >> 3); + /* Note: the check needs two bytes */ + if ((io_offset + 1) > env->tr.limit) + goto fail; + val = lduw_kernel(env->tr.base + io_offset); + val >>= (addr & 7); + mask = (1 << size) - 1; + /* all bits must be zero to allow the I/O */ + if ((val & mask) != 0) { + fail: + raise_exception_err(EXCP0D_GPF, 0); + } +} + +#ifdef VBOX + +/* Keep in sync with gen_check_external_event() */ +void helper_check_external_event() +{ + if ( (env->interrupt_request & ( CPU_INTERRUPT_EXTERNAL_FLUSH_TLB + | CPU_INTERRUPT_EXTERNAL_EXIT + | CPU_INTERRUPT_EXTERNAL_TIMER + | CPU_INTERRUPT_EXTERNAL_DMA)) + || ( (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD) + && (env->eflags & IF_MASK) + && !(env->hflags & HF_INHIBIT_IRQ_MASK) ) ) + { + helper_external_event(); + } + +} + +void helper_sync_seg(uint32_t reg) +{ + if (env->segs[reg].newselector) + sync_seg(env, reg, env->segs[reg].newselector); +} + +#endif /* VBOX */ + +void helper_check_iob(uint32_t t0) +{ + check_io(t0, 1); +} + +void helper_check_iow(uint32_t t0) +{ + check_io(t0, 2); +} + +void helper_check_iol(uint32_t t0) +{ + check_io(t0, 4); +} + +void helper_outb(uint32_t port, uint32_t data) +{ +#ifndef VBOX + cpu_outb(port, data & 0xff); +#else + cpu_outb(env, port, data & 0xff); +#endif +} + +target_ulong helper_inb(uint32_t port) +{ +#ifndef VBOX + return cpu_inb(port); +#else + return cpu_inb(env, port); +#endif +} + +void helper_outw(uint32_t port, uint32_t data) +{ +#ifndef VBOX + cpu_outw(port, data & 0xffff); +#else + cpu_outw(env, port, data & 0xffff); +#endif +} + +target_ulong helper_inw(uint32_t port) +{ +#ifndef VBOX + return cpu_inw(port); +#else + return cpu_inw(env, port); +#endif +} + +void helper_outl(uint32_t port, uint32_t data) +{ +#ifndef VBOX + cpu_outl(port, data); +#else + cpu_outl(env, port, data); +#endif +} + +target_ulong helper_inl(uint32_t port) +{ +#ifndef VBOX + return cpu_inl(port); +#else + return cpu_inl(env, port); +#endif +} + +static inline unsigned int get_sp_mask(unsigned int e2) +{ + if (e2 & DESC_B_MASK) + return 0xffffffff; + else + return 0xffff; +} + +static int exeption_has_error_code(int intno) +{ + switch(intno) { + case 8: + case 10: + case 11: + case 12: + case 13: + case 14: + case 17: + return 1; + } + return 0; +} + +#ifdef TARGET_X86_64 +#define SET_ESP(val, sp_mask)\ +do {\ + if ((sp_mask) == 0xffff)\ + ESP = (ESP & ~0xffff) | ((val) & 0xffff);\ + else if ((sp_mask) == 0xffffffffLL)\ + ESP = (uint32_t)(val);\ + else\ + ESP = (val);\ +} while (0) +#else +#define SET_ESP(val, sp_mask) ESP = (ESP & ~(sp_mask)) | ((val) & (sp_mask)) +#endif + +/* in 64-bit machines, this can overflow. So this segment addition macro + * can be used to trim the value to 32-bit whenever needed */ +#define SEG_ADDL(ssp, sp, sp_mask) ((uint32_t)((ssp) + (sp & (sp_mask)))) + +/* XXX: add a is_user flag to have proper security support */ +#define PUSHW(ssp, sp, sp_mask, val)\ +{\ + sp -= 2;\ + stw_kernel((ssp) + (sp & (sp_mask)), (val));\ +} + +#define PUSHL(ssp, sp, sp_mask, val)\ +{\ + sp -= 4;\ + stl_kernel(SEG_ADDL(ssp, sp, sp_mask), (uint32_t)(val));\ +} + +#define POPW(ssp, sp, sp_mask, val)\ +{\ + val = lduw_kernel((ssp) + (sp & (sp_mask)));\ + sp += 2;\ +} + +#define POPL(ssp, sp, sp_mask, val)\ +{\ + val = (uint32_t)ldl_kernel(SEG_ADDL(ssp, sp, sp_mask));\ + sp += 4;\ +} + +/* protected mode interrupt */ +static void do_interrupt_protected(int intno, int is_int, int error_code, + unsigned int next_eip, int is_hw) +{ + SegmentCache *dt; + target_ulong ptr, ssp; + int type, dpl, selector, ss_dpl, cpl; + int has_error_code, new_stack, shift; + uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0; + uint32_t old_eip, sp_mask; + +#ifdef VBOX + if (remR3NotifyTrap(env, intno, error_code, next_eip) != VINF_SUCCESS) + cpu_loop_exit(); +#endif + + has_error_code = 0; + if (!is_int && !is_hw) + has_error_code = exeption_has_error_code(intno); + if (is_int) + old_eip = next_eip; + else + old_eip = env->eip; + + dt = &env->idt; +#ifndef VBOX + if (intno * 8 + 7 > dt->limit) +#else + if ((unsigned)intno * 8 + 7 > dt->limit) +#endif + raise_exception_err(EXCP0D_GPF, intno * 8 + 2); + ptr = dt->base + intno * 8; + e1 = ldl_kernel(ptr); + e2 = ldl_kernel(ptr + 4); + /* check gate type */ + type = (e2 >> DESC_TYPE_SHIFT) & 0x1f; + switch(type) { + case 5: /* task gate */ +#ifdef VBOX + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + /* check privilege if software int */ + if (is_int && dpl < cpl) + raise_exception_err(EXCP0D_GPF, intno * 8 + 2); +#endif + /* must do that check here to return the correct error code */ + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, intno * 8 + 2); + switch_tss(intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip); + if (has_error_code) { + int type; + uint32_t mask; + /* push the error code */ + type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf; + shift = type >> 3; + if (env->segs[R_SS].flags & DESC_B_MASK) + mask = 0xffffffff; + else + mask = 0xffff; + esp = (ESP - (2 << shift)) & mask; + ssp = env->segs[R_SS].base + esp; + if (shift) + stl_kernel(ssp, error_code); + else + stw_kernel(ssp, error_code); + SET_ESP(esp, mask); + } + return; + case 6: /* 286 interrupt gate */ + case 7: /* 286 trap gate */ + case 14: /* 386 interrupt gate */ + case 15: /* 386 trap gate */ + break; + default: + raise_exception_err(EXCP0D_GPF, intno * 8 + 2); + break; + } + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + /* check privilege if software int */ + if (is_int && dpl < cpl) + raise_exception_err(EXCP0D_GPF, intno * 8 + 2); + /* check valid bit */ + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, intno * 8 + 2); + selector = e1 >> 16; + offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff); + if ((selector & 0xfffc) == 0) + raise_exception_err(EXCP0D_GPF, 0); + + if (load_segment(&e1, &e2, selector) != 0) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); +#ifdef VBOX /** @todo figure out when this is done one day... */ + if (!(e2 & DESC_A_MASK)) + e2 = set_segment_accessed(selector, e2); +#endif + if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK))) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + if (dpl > cpl) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc); + if (!(e2 & DESC_C_MASK) && dpl < cpl) { + /* to inner privilege */ + get_ss_esp_from_tss(&ss, &esp, dpl); + if ((ss & 0xfffc) == 0) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if ((ss & 3) != dpl) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if (load_segment(&ss_e1, &ss_e2, ss) != 0) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); +#ifdef VBOX /** @todo figure out when this is done one day... */ + if (!(ss_e2 & DESC_A_MASK)) + ss_e2 = set_segment_accessed(ss, ss_e2); +#endif + ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3; + if (ss_dpl != dpl) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if (!(ss_e2 & DESC_S_MASK) || + (ss_e2 & DESC_CS_MASK) || + !(ss_e2 & DESC_W_MASK)) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if (!(ss_e2 & DESC_P_MASK)) +#ifdef VBOX /* See page 3-477 of 253666.pdf */ + raise_exception_err(EXCP0C_STACK, ss & 0xfffc); +#else + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); +#endif + new_stack = 1; + sp_mask = get_sp_mask(ss_e2); + ssp = get_seg_base(ss_e1, ss_e2); +#if defined(VBOX) && defined(DEBUG) + printf("new stack %04X:%08X gate dpl=%d\n", ss, esp, dpl); +#endif + } else if ((e2 & DESC_C_MASK) || dpl == cpl) { + /* to same privilege */ + if (env->eflags & VM_MASK) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + new_stack = 0; + sp_mask = get_sp_mask(env->segs[R_SS].flags); + ssp = env->segs[R_SS].base; + esp = ESP; + dpl = cpl; + } else { + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + new_stack = 0; /* avoid warning */ + sp_mask = 0; /* avoid warning */ + ssp = 0; /* avoid warning */ + esp = 0; /* avoid warning */ + } + + shift = type >> 3; + +#if 0 + /* XXX: check that enough room is available */ + push_size = 6 + (new_stack << 2) + (has_error_code << 1); + if (env->eflags & VM_MASK) + push_size += 8; + push_size <<= shift; +#endif + if (shift == 1) { + if (new_stack) { + if (env->eflags & VM_MASK) { + PUSHL(ssp, esp, sp_mask, env->segs[R_GS].selector); + PUSHL(ssp, esp, sp_mask, env->segs[R_FS].selector); + PUSHL(ssp, esp, sp_mask, env->segs[R_DS].selector); + PUSHL(ssp, esp, sp_mask, env->segs[R_ES].selector); + } + PUSHL(ssp, esp, sp_mask, env->segs[R_SS].selector); + PUSHL(ssp, esp, sp_mask, ESP); + } + PUSHL(ssp, esp, sp_mask, compute_eflags()); + PUSHL(ssp, esp, sp_mask, env->segs[R_CS].selector); + PUSHL(ssp, esp, sp_mask, old_eip); + if (has_error_code) { + PUSHL(ssp, esp, sp_mask, error_code); + } + } else { + if (new_stack) { + if (env->eflags & VM_MASK) { + PUSHW(ssp, esp, sp_mask, env->segs[R_GS].selector); + PUSHW(ssp, esp, sp_mask, env->segs[R_FS].selector); + PUSHW(ssp, esp, sp_mask, env->segs[R_DS].selector); + PUSHW(ssp, esp, sp_mask, env->segs[R_ES].selector); + } + PUSHW(ssp, esp, sp_mask, env->segs[R_SS].selector); + PUSHW(ssp, esp, sp_mask, ESP); + } + PUSHW(ssp, esp, sp_mask, compute_eflags()); + PUSHW(ssp, esp, sp_mask, env->segs[R_CS].selector); + PUSHW(ssp, esp, sp_mask, old_eip); + if (has_error_code) { + PUSHW(ssp, esp, sp_mask, error_code); + } + } + + if (new_stack) { + if (env->eflags & VM_MASK) { + cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0); + cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0, 0); + cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0, 0); + cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0, 0); + } + ss = (ss & ~3) | dpl; + cpu_x86_load_seg_cache(env, R_SS, ss, + ssp, get_seg_limit(ss_e1, ss_e2), ss_e2); + } + SET_ESP(esp, sp_mask); + + selector = (selector & ~3) | dpl; + cpu_x86_load_seg_cache(env, R_CS, selector, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); + cpu_x86_set_cpl(env, dpl); + env->eip = offset; + + /* interrupt gate clear IF mask */ + if ((type & 1) == 0) { + env->eflags &= ~IF_MASK; + } +#ifndef VBOX + env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK); +#else + /* + * We must clear VIP/VIF too on interrupt entry, as otherwise FreeBSD + * gets confused by seemingly changed EFLAGS. See #3491 and + * public bug #2341. + */ + env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK | VIF_MASK | VIP_MASK); +#endif +} + +#ifdef VBOX + +/* check if VME interrupt redirection is enabled in TSS */ +DECLINLINE(bool) is_vme_irq_redirected(int intno) +{ + unsigned int io_offset, intredir_offset; + unsigned char val, mask; + + /* TSS must be a valid 32 bit one */ + if (!(env->tr.flags & DESC_P_MASK) || + ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 11 || + env->tr.limit < 103) + goto fail; + io_offset = lduw_kernel(env->tr.base + 0x66); + /* Make sure the io bitmap offset is valid; anything less than sizeof(VBOXTSS) means there's none. */ + if (io_offset < 0x68 + 0x20) + io_offset = 0x68 + 0x20; + /* the virtual interrupt redirection bitmap is located below the io bitmap */ + intredir_offset = io_offset - 0x20; + + intredir_offset += (intno >> 3); + if ((intredir_offset) > env->tr.limit) + goto fail; + + val = ldub_kernel(env->tr.base + intredir_offset); + mask = 1 << (unsigned char)(intno & 7); + + /* bit set means no redirection. */ + if ((val & mask) != 0) { + return false; + } + return true; + +fail: + raise_exception_err(EXCP0D_GPF, 0); + return true; +} + +/* V86 mode software interrupt with CR4.VME=1 */ +static void do_soft_interrupt_vme(int intno, int error_code, unsigned int next_eip) +{ + target_ulong ptr, ssp; + int selector; + uint32_t offset, esp; + uint32_t old_cs, old_eflags; + uint32_t iopl; + + iopl = ((env->eflags >> IOPL_SHIFT) & 3); + + if (!is_vme_irq_redirected(intno)) + { + if (iopl == 3) + { + do_interrupt_protected(intno, 1, error_code, next_eip, 0); + return; + } + else + raise_exception_err(EXCP0D_GPF, 0); + } + + /* virtual mode idt is at linear address 0 */ + ptr = 0 + intno * 4; + offset = lduw_kernel(ptr); + selector = lduw_kernel(ptr + 2); + esp = ESP; + ssp = env->segs[R_SS].base; + old_cs = env->segs[R_CS].selector; + + old_eflags = compute_eflags(); + if (iopl < 3) + { + /* copy VIF into IF and set IOPL to 3 */ + if (env->eflags & VIF_MASK) + old_eflags |= IF_MASK; + else + old_eflags &= ~IF_MASK; + + old_eflags |= (3 << IOPL_SHIFT); + } + + /* XXX: use SS segment size ? */ + PUSHW(ssp, esp, 0xffff, old_eflags); + PUSHW(ssp, esp, 0xffff, old_cs); + PUSHW(ssp, esp, 0xffff, next_eip); + + /* update processor state */ + ESP = (ESP & ~0xffff) | (esp & 0xffff); + env->eip = offset; + env->segs[R_CS].selector = selector; + env->segs[R_CS].base = (selector << 4); + env->eflags &= ~(TF_MASK | RF_MASK); + + if (iopl < 3) + env->eflags &= ~VIF_MASK; + else + env->eflags &= ~IF_MASK; +} + +#endif /* VBOX */ + +#ifdef TARGET_X86_64 + +#define PUSHQ(sp, val)\ +{\ + sp -= 8;\ + stq_kernel(sp, (val));\ +} + +#define POPQ(sp, val)\ +{\ + val = ldq_kernel(sp);\ + sp += 8;\ +} + +static inline target_ulong get_rsp_from_tss(int level) +{ + int index; + +#if 0 + printf("TR: base=" TARGET_FMT_lx " limit=%x\n", + env->tr.base, env->tr.limit); +#endif + + if (!(env->tr.flags & DESC_P_MASK)) + cpu_abort(env, "invalid tss"); + index = 8 * level + 4; + if ((index + 7) > env->tr.limit) + raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc); + return ldq_kernel(env->tr.base + index); +} + +/* 64 bit interrupt */ +static void do_interrupt64(int intno, int is_int, int error_code, + target_ulong next_eip, int is_hw) +{ + SegmentCache *dt; + target_ulong ptr; + int type, dpl, selector, cpl, ist; + int has_error_code, new_stack; + uint32_t e1, e2, e3, ss; + target_ulong old_eip, esp, offset; + +#ifdef VBOX + if (remR3NotifyTrap(env, intno, error_code, next_eip) != VINF_SUCCESS) + cpu_loop_exit(); +#endif + + has_error_code = 0; + if (!is_int && !is_hw) + has_error_code = exeption_has_error_code(intno); + if (is_int) + old_eip = next_eip; + else + old_eip = env->eip; + + dt = &env->idt; + if (intno * 16 + 15 > dt->limit) + raise_exception_err(EXCP0D_GPF, intno * 16 + 2); + ptr = dt->base + intno * 16; + e1 = ldl_kernel(ptr); + e2 = ldl_kernel(ptr + 4); + e3 = ldl_kernel(ptr + 8); + /* check gate type */ + type = (e2 >> DESC_TYPE_SHIFT) & 0x1f; + switch(type) { + case 14: /* 386 interrupt gate */ + case 15: /* 386 trap gate */ + break; + default: + raise_exception_err(EXCP0D_GPF, intno * 16 + 2); + break; + } + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + /* check privilege if software int */ + if (is_int && dpl < cpl) + raise_exception_err(EXCP0D_GPF, intno * 16 + 2); + /* check valid bit */ + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, intno * 16 + 2); + selector = e1 >> 16; + offset = ((target_ulong)e3 << 32) | (e2 & 0xffff0000) | (e1 & 0x0000ffff); + ist = e2 & 7; + if ((selector & 0xfffc) == 0) + raise_exception_err(EXCP0D_GPF, 0); + + if (load_segment(&e1, &e2, selector) != 0) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK))) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + if (dpl > cpl) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc); + if (!(e2 & DESC_L_MASK) || (e2 & DESC_B_MASK)) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if ((!(e2 & DESC_C_MASK) && dpl < cpl) || ist != 0) { + /* to inner privilege */ + if (ist != 0) + esp = get_rsp_from_tss(ist + 3); + else + esp = get_rsp_from_tss(dpl); + esp &= ~0xfLL; /* align stack */ + ss = 0; + new_stack = 1; + } else if ((e2 & DESC_C_MASK) || dpl == cpl) { + /* to same privilege */ + if (env->eflags & VM_MASK) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + new_stack = 0; + if (ist != 0) + esp = get_rsp_from_tss(ist + 3); + else + esp = ESP; + esp &= ~0xfLL; /* align stack */ + dpl = cpl; + } else { + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + new_stack = 0; /* avoid warning */ + esp = 0; /* avoid warning */ + } + + PUSHQ(esp, env->segs[R_SS].selector); + PUSHQ(esp, ESP); + PUSHQ(esp, compute_eflags()); + PUSHQ(esp, env->segs[R_CS].selector); + PUSHQ(esp, old_eip); + if (has_error_code) { + PUSHQ(esp, error_code); + } + + if (new_stack) { + ss = 0 | dpl; +#ifndef VBOX + cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, 0); +#else + cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, dpl << DESC_DPL_SHIFT); +#endif + } + ESP = esp; + + selector = (selector & ~3) | dpl; + cpu_x86_load_seg_cache(env, R_CS, selector, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); + cpu_x86_set_cpl(env, dpl); + env->eip = offset; + + /* interrupt gate clear IF mask */ + if ((type & 1) == 0) { + env->eflags &= ~IF_MASK; + } +#ifndef VBOX + env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK); +#else /* VBOX */ + /* + * We must clear VIP/VIF too on interrupt entry, as otherwise FreeBSD + * gets confused by seemingly changed EFLAGS. See #3491 and + * public bug #2341. + */ + env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK | VIF_MASK | VIP_MASK); +#endif /* VBOX */ +} +#endif + +#ifdef TARGET_X86_64 +#if defined(CONFIG_USER_ONLY) +void helper_syscall(int next_eip_addend) +{ + env->exception_index = EXCP_SYSCALL; + env->exception_next_eip = env->eip + next_eip_addend; + cpu_loop_exit(); +} +#else +void helper_syscall(int next_eip_addend) +{ + int selector; + + if (!(env->efer & MSR_EFER_SCE)) { + raise_exception_err(EXCP06_ILLOP, 0); + } + selector = (env->star >> 32) & 0xffff; + if (env->hflags & HF_LMA_MASK) { + int code64; + + ECX = env->eip + next_eip_addend; + env->regs[11] = compute_eflags(); + + code64 = env->hflags & HF_CS64_MASK; + + cpu_x86_set_cpl(env, 0); + cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK); + cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_W_MASK | DESC_A_MASK); + env->eflags &= ~env->fmask; + load_eflags(env->eflags, 0); + if (code64) + env->eip = env->lstar; + else + env->eip = env->cstar; + } else { + ECX = (uint32_t)(env->eip + next_eip_addend); + + cpu_x86_set_cpl(env, 0); + cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_W_MASK | DESC_A_MASK); + env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK); + env->eip = (uint32_t)env->star; + } +} +#endif +#endif + +#ifdef TARGET_X86_64 +void helper_sysret(int dflag) +{ + int cpl, selector; + + if (!(env->efer & MSR_EFER_SCE)) { + raise_exception_err(EXCP06_ILLOP, 0); + } + cpl = env->hflags & HF_CPL_MASK; + if (!(env->cr[0] & CR0_PE_MASK) || cpl != 0) { + raise_exception_err(EXCP0D_GPF, 0); + } + selector = (env->star >> 48) & 0xffff; + if (env->hflags & HF_LMA_MASK) { + if (dflag == 2) { + cpu_x86_load_seg_cache(env, R_CS, (selector + 16) | 3, + 0, 0xffffffff, + DESC_G_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | + DESC_L_MASK); + env->eip = ECX; + } else { + cpu_x86_load_seg_cache(env, R_CS, selector | 3, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); + env->eip = (uint32_t)ECX; + } + cpu_x86_load_seg_cache(env, R_SS, selector + 8, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_W_MASK | DESC_A_MASK); + load_eflags((uint32_t)(env->regs[11]), TF_MASK | AC_MASK | ID_MASK | + IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK); + cpu_x86_set_cpl(env, 3); + } else { + cpu_x86_load_seg_cache(env, R_CS, selector | 3, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); + env->eip = (uint32_t)ECX; + cpu_x86_load_seg_cache(env, R_SS, selector + 8, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_W_MASK | DESC_A_MASK); + env->eflags |= IF_MASK; + cpu_x86_set_cpl(env, 3); + } +} +#endif + +#ifdef VBOX + +/** + * Checks and processes external VMM events. + * Called by op_check_external_event() when any of the flags is set and can be serviced. + */ +void helper_external_event(void) +{ +# if defined(RT_OS_DARWIN) && defined(VBOX_STRICT) + uintptr_t uSP; +# ifdef RT_ARCH_AMD64 + __asm__ __volatile__("movq %%rsp, %0" : "=r" (uSP)); +# else + __asm__ __volatile__("movl %%esp, %0" : "=r" (uSP)); +# endif + AssertMsg(!(uSP & 15), ("xSP=%#p\n", uSP)); +# endif + /* Keep in sync with flags checked by gen_check_external_event() */ + if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD) + { + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, + ~CPU_INTERRUPT_EXTERNAL_HARD); + cpu_interrupt(env, CPU_INTERRUPT_HARD); + } + if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_EXIT) + { + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, + ~CPU_INTERRUPT_EXTERNAL_EXIT); + cpu_exit(env); + } + if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_DMA) + { + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, + ~CPU_INTERRUPT_EXTERNAL_DMA); + remR3DmaRun(env); + } + if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_TIMER) + { + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, + ~CPU_INTERRUPT_EXTERNAL_TIMER); + remR3TimersRun(env); + } + if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_FLUSH_TLB) + { + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, + ~CPU_INTERRUPT_EXTERNAL_HARD); + cpu_interrupt(env, CPU_INTERRUPT_HARD); + } +} + +/* helper for recording call instruction addresses for later scanning */ +void helper_record_call() +{ + if ( !(env->state & CPU_RAW_RING0) + && (env->cr[0] & CR0_PG_MASK) + && !(env->eflags & X86_EFL_IF)) + remR3RecordCall(env); +} + +#endif /* VBOX */ + +/* real mode interrupt */ +static void do_interrupt_real(int intno, int is_int, int error_code, + unsigned int next_eip) +{ + SegmentCache *dt; + target_ulong ptr, ssp; + int selector; + uint32_t offset, esp; + uint32_t old_cs, old_eip; + + /* real mode (simpler !) */ + dt = &env->idt; +#ifndef VBOX + if (intno * 4 + 3 > dt->limit) +#else + if ((unsigned)intno * 4 + 3 > dt->limit) +#endif + raise_exception_err(EXCP0D_GPF, intno * 8 + 2); + ptr = dt->base + intno * 4; + offset = lduw_kernel(ptr); + selector = lduw_kernel(ptr + 2); + esp = ESP; + ssp = env->segs[R_SS].base; + if (is_int) + old_eip = next_eip; + else + old_eip = env->eip; + old_cs = env->segs[R_CS].selector; + /* XXX: use SS segment size ? */ + PUSHW(ssp, esp, 0xffff, compute_eflags()); + PUSHW(ssp, esp, 0xffff, old_cs); + PUSHW(ssp, esp, 0xffff, old_eip); + + /* update processor state */ + ESP = (ESP & ~0xffff) | (esp & 0xffff); + env->eip = offset; + env->segs[R_CS].selector = selector; + env->segs[R_CS].base = (selector << 4); + env->eflags &= ~(IF_MASK | TF_MASK | AC_MASK | RF_MASK); +} + +/* fake user mode interrupt */ +void do_interrupt_user(int intno, int is_int, int error_code, + target_ulong next_eip) +{ + SegmentCache *dt; + target_ulong ptr; + int dpl, cpl, shift; + uint32_t e2; + + dt = &env->idt; + if (env->hflags & HF_LMA_MASK) { + shift = 4; + } else { + shift = 3; + } + ptr = dt->base + (intno << shift); + e2 = ldl_kernel(ptr + 4); + + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + /* check privilege if software int */ + if (is_int && dpl < cpl) + raise_exception_err(EXCP0D_GPF, (intno << shift) + 2); + + /* Since we emulate only user space, we cannot do more than + exiting the emulation with the suitable exception and error + code */ + if (is_int) + EIP = next_eip; +} + +#if !defined(CONFIG_USER_ONLY) +static void handle_even_inj(int intno, int is_int, int error_code, + int is_hw, int rm) +{ + uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)); + if (!(event_inj & SVM_EVTINJ_VALID)) { + int type; + if (is_int) + type = SVM_EVTINJ_TYPE_SOFT; + else + type = SVM_EVTINJ_TYPE_EXEPT; + event_inj = intno | type | SVM_EVTINJ_VALID; + if (!rm && exeption_has_error_code(intno)) { + event_inj |= SVM_EVTINJ_VALID_ERR; + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err), error_code); + } + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj); + } +} +#endif + +/* + * Begin execution of an interruption. is_int is TRUE if coming from + * the int instruction. next_eip is the EIP value AFTER the interrupt + * instruction. It is only relevant if is_int is TRUE. + */ +void do_interrupt(int intno, int is_int, int error_code, + target_ulong next_eip, int is_hw) +{ + if (qemu_loglevel_mask(CPU_LOG_INT)) { + if ((env->cr[0] & CR0_PE_MASK)) { + static int count; + qemu_log("%6d: v=%02x e=%04x i=%d cpl=%d IP=%04x:" TARGET_FMT_lx " pc=" TARGET_FMT_lx " SP=%04x:" TARGET_FMT_lx, + count, intno, error_code, is_int, + env->hflags & HF_CPL_MASK, + env->segs[R_CS].selector, EIP, + (int)env->segs[R_CS].base + EIP, + env->segs[R_SS].selector, ESP); + if (intno == 0x0e) { + qemu_log(" CR2=" TARGET_FMT_lx, env->cr[2]); + } else { + qemu_log(" EAX=" TARGET_FMT_lx, EAX); + } + qemu_log("\n"); + log_cpu_state(env, X86_DUMP_CCOP); +#if 0 + { + int i; + uint8_t *ptr; + qemu_log(" code="); + ptr = env->segs[R_CS].base + env->eip; + for(i = 0; i < 16; i++) { + qemu_log(" %02x", ldub(ptr + i)); + } + qemu_log("\n"); + } +#endif + count++; + } + } +#ifdef VBOX + if (RT_UNLIKELY(env->state & CPU_EMULATE_SINGLE_STEP)) { + if (is_int) { + RTLogPrintf("do_interrupt: %#04x err=%#x pc=%#RGv%s\n", + intno, error_code, (RTGCPTR)env->eip, is_hw ? " hw" : ""); + } else { + RTLogPrintf("do_interrupt: %#04x err=%#x pc=%#RGv next=%#RGv%s\n", + intno, error_code, (RTGCPTR)env->eip, (RTGCPTR)next_eip, is_hw ? " hw" : ""); + } + } +#endif + if (env->cr[0] & CR0_PE_MASK) { +#if !defined(CONFIG_USER_ONLY) + if (env->hflags & HF_SVMI_MASK) + handle_even_inj(intno, is_int, error_code, is_hw, 0); +#endif +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + do_interrupt64(intno, is_int, error_code, next_eip, is_hw); + } else +#endif + { +#ifdef VBOX + /* int xx *, v86 code and VME enabled? */ + if ( (env->eflags & VM_MASK) + && (env->cr[4] & CR4_VME_MASK) + && is_int + && !is_hw + && env->eip + 1 != next_eip /* single byte int 3 goes straight to the protected mode handler */ + ) + do_soft_interrupt_vme(intno, error_code, next_eip); + else +#endif /* VBOX */ + do_interrupt_protected(intno, is_int, error_code, next_eip, is_hw); + } + } else { +#if !defined(CONFIG_USER_ONLY) + if (env->hflags & HF_SVMI_MASK) + handle_even_inj(intno, is_int, error_code, is_hw, 1); +#endif + do_interrupt_real(intno, is_int, error_code, next_eip); + } + +#if !defined(CONFIG_USER_ONLY) + if (env->hflags & HF_SVMI_MASK) { + uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)); + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID); + } +#endif +} + +/* This should come from sysemu.h - if we could include it here... */ +void qemu_system_reset_request(void); + +/* + * Check nested exceptions and change to double or triple fault if + * needed. It should only be called, if this is not an interrupt. + * Returns the new exception number. + */ +static int check_exception(int intno, int *error_code) +{ + int first_contributory = env->old_exception == 0 || + (env->old_exception >= 10 && + env->old_exception <= 13); + int second_contributory = intno == 0 || + (intno >= 10 && intno <= 13); + + qemu_log_mask(CPU_LOG_INT, "check_exception old: 0x%x new 0x%x\n", + env->old_exception, intno); + +#if !defined(CONFIG_USER_ONLY) + if (env->old_exception == EXCP08_DBLE) { + if (env->hflags & HF_SVMI_MASK) + helper_vmexit(SVM_EXIT_SHUTDOWN, 0); /* does not return */ + + qemu_log_mask(CPU_LOG_RESET, "Triple fault\n"); + +# ifndef VBOX + qemu_system_reset_request(); + return EXCP_HLT; +# else + remR3RaiseRC(env->pVM, VINF_EM_TRIPLE_FAULT); + return EXCP_RC; +# endif + } +#endif + + if ((first_contributory && second_contributory) + || (env->old_exception == EXCP0E_PAGE && + (second_contributory || (intno == EXCP0E_PAGE)))) { + intno = EXCP08_DBLE; + *error_code = 0; + } + + if (second_contributory || (intno == EXCP0E_PAGE) || + (intno == EXCP08_DBLE)) + env->old_exception = intno; + + return intno; +} + +/* + * Signal an interruption. It is executed in the main CPU loop. + * is_int is TRUE if coming from the int instruction. next_eip is the + * EIP value AFTER the interrupt instruction. It is only relevant if + * is_int is TRUE. + */ +static void QEMU_NORETURN raise_interrupt(int intno, int is_int, int error_code, + int next_eip_addend) +{ +#if defined(VBOX) && defined(DEBUG) + Log2(("raise_interrupt: %x %x %x %RGv\n", intno, is_int, error_code, (RTGCPTR)env->eip + next_eip_addend)); +#endif + if (!is_int) { + helper_svm_check_intercept_param(SVM_EXIT_EXCP_BASE + intno, error_code); + intno = check_exception(intno, &error_code); + } else { + helper_svm_check_intercept_param(SVM_EXIT_SWINT, 0); + } + + env->exception_index = intno; + env->error_code = error_code; + env->exception_is_int = is_int; + env->exception_next_eip = env->eip + next_eip_addend; + cpu_loop_exit(); +} + +/* shortcuts to generate exceptions */ + +void raise_exception_err(int exception_index, int error_code) +{ + raise_interrupt(exception_index, 0, error_code, 0); +} + +void raise_exception(int exception_index) +{ + raise_interrupt(exception_index, 0, 0, 0); +} + +void raise_exception_env(int exception_index, CPUState *nenv) +{ + env = nenv; + raise_exception(exception_index); +} +/* SMM support */ + +#if defined(CONFIG_USER_ONLY) + +void do_smm_enter(void) +{ +} + +void helper_rsm(void) +{ +} + +#else + +#ifdef TARGET_X86_64 +#define SMM_REVISION_ID 0x00020064 +#else +#define SMM_REVISION_ID 0x00020000 +#endif + +void do_smm_enter(void) +{ + target_ulong sm_state; + SegmentCache *dt; + int i, offset; + + qemu_log_mask(CPU_LOG_INT, "SMM: enter\n"); + log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP); + + env->hflags |= HF_SMM_MASK; + cpu_smm_update(env); + + sm_state = env->smbase + 0x8000; + +#ifdef TARGET_X86_64 + for(i = 0; i < 6; i++) { + dt = &env->segs[i]; + offset = 0x7e00 + i * 16; + stw_phys(sm_state + offset, dt->selector); + stw_phys(sm_state + offset + 2, (dt->flags >> 8) & 0xf0ff); + stl_phys(sm_state + offset + 4, dt->limit); + stq_phys(sm_state + offset + 8, dt->base); + } + + stq_phys(sm_state + 0x7e68, env->gdt.base); + stl_phys(sm_state + 0x7e64, env->gdt.limit); + + stw_phys(sm_state + 0x7e70, env->ldt.selector); + stq_phys(sm_state + 0x7e78, env->ldt.base); + stl_phys(sm_state + 0x7e74, env->ldt.limit); + stw_phys(sm_state + 0x7e72, (env->ldt.flags >> 8) & 0xf0ff); + + stq_phys(sm_state + 0x7e88, env->idt.base); + stl_phys(sm_state + 0x7e84, env->idt.limit); + + stw_phys(sm_state + 0x7e90, env->tr.selector); + stq_phys(sm_state + 0x7e98, env->tr.base); + stl_phys(sm_state + 0x7e94, env->tr.limit); + stw_phys(sm_state + 0x7e92, (env->tr.flags >> 8) & 0xf0ff); + + stq_phys(sm_state + 0x7ed0, env->efer); + + stq_phys(sm_state + 0x7ff8, EAX); + stq_phys(sm_state + 0x7ff0, ECX); + stq_phys(sm_state + 0x7fe8, EDX); + stq_phys(sm_state + 0x7fe0, EBX); + stq_phys(sm_state + 0x7fd8, ESP); + stq_phys(sm_state + 0x7fd0, EBP); + stq_phys(sm_state + 0x7fc8, ESI); + stq_phys(sm_state + 0x7fc0, EDI); + for(i = 8; i < 16; i++) + stq_phys(sm_state + 0x7ff8 - i * 8, env->regs[i]); + stq_phys(sm_state + 0x7f78, env->eip); + stl_phys(sm_state + 0x7f70, compute_eflags()); + stl_phys(sm_state + 0x7f68, env->dr[6]); + stl_phys(sm_state + 0x7f60, env->dr[7]); + + stl_phys(sm_state + 0x7f48, env->cr[4]); + stl_phys(sm_state + 0x7f50, env->cr[3]); + stl_phys(sm_state + 0x7f58, env->cr[0]); + + stl_phys(sm_state + 0x7efc, SMM_REVISION_ID); + stl_phys(sm_state + 0x7f00, env->smbase); +#else + stl_phys(sm_state + 0x7ffc, env->cr[0]); + stl_phys(sm_state + 0x7ff8, env->cr[3]); + stl_phys(sm_state + 0x7ff4, compute_eflags()); + stl_phys(sm_state + 0x7ff0, env->eip); + stl_phys(sm_state + 0x7fec, EDI); + stl_phys(sm_state + 0x7fe8, ESI); + stl_phys(sm_state + 0x7fe4, EBP); + stl_phys(sm_state + 0x7fe0, ESP); + stl_phys(sm_state + 0x7fdc, EBX); + stl_phys(sm_state + 0x7fd8, EDX); + stl_phys(sm_state + 0x7fd4, ECX); + stl_phys(sm_state + 0x7fd0, EAX); + stl_phys(sm_state + 0x7fcc, env->dr[6]); + stl_phys(sm_state + 0x7fc8, env->dr[7]); + + stl_phys(sm_state + 0x7fc4, env->tr.selector); + stl_phys(sm_state + 0x7f64, env->tr.base); + stl_phys(sm_state + 0x7f60, env->tr.limit); + stl_phys(sm_state + 0x7f5c, (env->tr.flags >> 8) & 0xf0ff); + + stl_phys(sm_state + 0x7fc0, env->ldt.selector); + stl_phys(sm_state + 0x7f80, env->ldt.base); + stl_phys(sm_state + 0x7f7c, env->ldt.limit); + stl_phys(sm_state + 0x7f78, (env->ldt.flags >> 8) & 0xf0ff); + + stl_phys(sm_state + 0x7f74, env->gdt.base); + stl_phys(sm_state + 0x7f70, env->gdt.limit); + + stl_phys(sm_state + 0x7f58, env->idt.base); + stl_phys(sm_state + 0x7f54, env->idt.limit); + + for(i = 0; i < 6; i++) { + dt = &env->segs[i]; + if (i < 3) + offset = 0x7f84 + i * 12; + else + offset = 0x7f2c + (i - 3) * 12; + stl_phys(sm_state + 0x7fa8 + i * 4, dt->selector); + stl_phys(sm_state + offset + 8, dt->base); + stl_phys(sm_state + offset + 4, dt->limit); + stl_phys(sm_state + offset, (dt->flags >> 8) & 0xf0ff); + } + stl_phys(sm_state + 0x7f14, env->cr[4]); + + stl_phys(sm_state + 0x7efc, SMM_REVISION_ID); + stl_phys(sm_state + 0x7ef8, env->smbase); +#endif + /* init SMM cpu state */ + +#ifdef TARGET_X86_64 + cpu_load_efer(env, 0); +#endif + load_eflags(0, ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); + env->eip = 0x00008000; + cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase, + 0xffffffff, 0); + cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0); + cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, 0); + cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0); + cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0); + cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0); + + cpu_x86_update_cr0(env, + env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK | CR0_PG_MASK)); + cpu_x86_update_cr4(env, 0); + env->dr[7] = 0x00000400; + CC_OP = CC_OP_EFLAGS; +} + +void helper_rsm(void) +{ +#ifdef VBOX + cpu_abort(env, "helper_rsm"); +#else /* !VBOX */ + target_ulong sm_state; + int i, offset; + uint32_t val; + + sm_state = env->smbase + 0x8000; +#ifdef TARGET_X86_64 + cpu_load_efer(env, ldq_phys(sm_state + 0x7ed0)); + + for(i = 0; i < 6; i++) { + offset = 0x7e00 + i * 16; + cpu_x86_load_seg_cache(env, i, + lduw_phys(sm_state + offset), + ldq_phys(sm_state + offset + 8), + ldl_phys(sm_state + offset + 4), + (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8); + } + + env->gdt.base = ldq_phys(sm_state + 0x7e68); + env->gdt.limit = ldl_phys(sm_state + 0x7e64); + + env->ldt.selector = lduw_phys(sm_state + 0x7e70); + env->ldt.base = ldq_phys(sm_state + 0x7e78); + env->ldt.limit = ldl_phys(sm_state + 0x7e74); + env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8; +#ifdef VBOX + env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID; + env->ldt.newselector = 0; +#endif + + env->idt.base = ldq_phys(sm_state + 0x7e88); + env->idt.limit = ldl_phys(sm_state + 0x7e84); + + env->tr.selector = lduw_phys(sm_state + 0x7e90); + env->tr.base = ldq_phys(sm_state + 0x7e98); + env->tr.limit = ldl_phys(sm_state + 0x7e94); + env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8; +#ifdef VBOX + env->tr.fVBoxFlags = CPUMSELREG_FLAGS_VALID; + env->tr.newselector = 0; +#endif + + EAX = ldq_phys(sm_state + 0x7ff8); + ECX = ldq_phys(sm_state + 0x7ff0); + EDX = ldq_phys(sm_state + 0x7fe8); + EBX = ldq_phys(sm_state + 0x7fe0); + ESP = ldq_phys(sm_state + 0x7fd8); + EBP = ldq_phys(sm_state + 0x7fd0); + ESI = ldq_phys(sm_state + 0x7fc8); + EDI = ldq_phys(sm_state + 0x7fc0); + for(i = 8; i < 16; i++) + env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8); + env->eip = ldq_phys(sm_state + 0x7f78); + load_eflags(ldl_phys(sm_state + 0x7f70), + ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); + env->dr[6] = ldl_phys(sm_state + 0x7f68); + env->dr[7] = ldl_phys(sm_state + 0x7f60); + + cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48)); + cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50)); + cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58)); + + val = ldl_phys(sm_state + 0x7efc); /* revision ID */ + if (val & 0x20000) { + env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff; + } +#else + cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc)); + cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8)); + load_eflags(ldl_phys(sm_state + 0x7ff4), + ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); + env->eip = ldl_phys(sm_state + 0x7ff0); + EDI = ldl_phys(sm_state + 0x7fec); + ESI = ldl_phys(sm_state + 0x7fe8); + EBP = ldl_phys(sm_state + 0x7fe4); + ESP = ldl_phys(sm_state + 0x7fe0); + EBX = ldl_phys(sm_state + 0x7fdc); + EDX = ldl_phys(sm_state + 0x7fd8); + ECX = ldl_phys(sm_state + 0x7fd4); + EAX = ldl_phys(sm_state + 0x7fd0); + env->dr[6] = ldl_phys(sm_state + 0x7fcc); + env->dr[7] = ldl_phys(sm_state + 0x7fc8); + + env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff; + env->tr.base = ldl_phys(sm_state + 0x7f64); + env->tr.limit = ldl_phys(sm_state + 0x7f60); + env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8; +#ifdef VBOX + env->tr.fVBoxFlags = CPUMSELREG_FLAGS_VALID; + env->tr.newselector = 0; +#endif + + env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff; + env->ldt.base = ldl_phys(sm_state + 0x7f80); + env->ldt.limit = ldl_phys(sm_state + 0x7f7c); + env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8; +#ifdef VBOX + env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID; + env->ldt.newselector = 0; +#endif + + env->gdt.base = ldl_phys(sm_state + 0x7f74); + env->gdt.limit = ldl_phys(sm_state + 0x7f70); + + env->idt.base = ldl_phys(sm_state + 0x7f58); + env->idt.limit = ldl_phys(sm_state + 0x7f54); + + for(i = 0; i < 6; i++) { + if (i < 3) + offset = 0x7f84 + i * 12; + else + offset = 0x7f2c + (i - 3) * 12; + cpu_x86_load_seg_cache(env, i, + ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff, + ldl_phys(sm_state + offset + 8), + ldl_phys(sm_state + offset + 4), + (ldl_phys(sm_state + offset) & 0xf0ff) << 8); + } + cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14)); + + val = ldl_phys(sm_state + 0x7efc); /* revision ID */ + if (val & 0x20000) { + env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff; + } +#endif + CC_OP = CC_OP_EFLAGS; + env->hflags &= ~HF_SMM_MASK; + cpu_smm_update(env); + + qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n"); + log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP); +#endif /* !VBOX */ +} + +#endif /* !CONFIG_USER_ONLY */ + + +/* division, flags are undefined */ + +void helper_divb_AL(target_ulong t0) +{ + unsigned int num, den, q, r; + + num = (EAX & 0xffff); + den = (t0 & 0xff); + if (den == 0) { + raise_exception(EXCP00_DIVZ); + } + q = (num / den); + if (q > 0xff) + raise_exception(EXCP00_DIVZ); + q &= 0xff; + r = (num % den) & 0xff; + EAX = (EAX & ~0xffff) | (r << 8) | q; +} + +void helper_idivb_AL(target_ulong t0) +{ + int num, den, q, r; + + num = (int16_t)EAX; + den = (int8_t)t0; + if (den == 0) { + raise_exception(EXCP00_DIVZ); + } + q = (num / den); + if (q != (int8_t)q) + raise_exception(EXCP00_DIVZ); + q &= 0xff; + r = (num % den) & 0xff; + EAX = (EAX & ~0xffff) | (r << 8) | q; +} + +void helper_divw_AX(target_ulong t0) +{ + unsigned int num, den, q, r; + + num = (EAX & 0xffff) | ((EDX & 0xffff) << 16); + den = (t0 & 0xffff); + if (den == 0) { + raise_exception(EXCP00_DIVZ); + } + q = (num / den); + if (q > 0xffff) + raise_exception(EXCP00_DIVZ); + q &= 0xffff; + r = (num % den) & 0xffff; + EAX = (EAX & ~0xffff) | q; + EDX = (EDX & ~0xffff) | r; +} + +void helper_idivw_AX(target_ulong t0) +{ + int num, den, q, r; + + num = (EAX & 0xffff) | ((EDX & 0xffff) << 16); + den = (int16_t)t0; + if (den == 0) { + raise_exception(EXCP00_DIVZ); + } + q = (num / den); + if (q != (int16_t)q) + raise_exception(EXCP00_DIVZ); + q &= 0xffff; + r = (num % den) & 0xffff; + EAX = (EAX & ~0xffff) | q; + EDX = (EDX & ~0xffff) | r; +} + +void helper_divl_EAX(target_ulong t0) +{ + unsigned int den, r; + uint64_t num, q; + + num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32); + den = t0; + if (den == 0) { + raise_exception(EXCP00_DIVZ); + } + q = (num / den); + r = (num % den); + if (q > 0xffffffff) + raise_exception(EXCP00_DIVZ); + EAX = (uint32_t)q; + EDX = (uint32_t)r; +} + +void helper_idivl_EAX(target_ulong t0) +{ + int den, r; + int64_t num, q; + + num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32); + den = t0; + if (den == 0) { + raise_exception(EXCP00_DIVZ); + } + q = (num / den); + r = (num % den); + if (q != (int32_t)q) + raise_exception(EXCP00_DIVZ); + EAX = (uint32_t)q; + EDX = (uint32_t)r; +} + +/* bcd */ + +/* XXX: exception */ +void helper_aam(int base) +{ + int al, ah; + al = EAX & 0xff; + ah = al / base; + al = al % base; + EAX = (EAX & ~0xffff) | al | (ah << 8); + CC_DST = al; +} + +void helper_aad(int base) +{ + int al, ah; + al = EAX & 0xff; + ah = (EAX >> 8) & 0xff; + al = ((ah * base) + al) & 0xff; + EAX = (EAX & ~0xffff) | al; + CC_DST = al; +} + +void helper_aaa(void) +{ + int icarry; + int al, ah, af; + int eflags; + + eflags = helper_cc_compute_all(CC_OP); + af = eflags & CC_A; + al = EAX & 0xff; + ah = (EAX >> 8) & 0xff; + + icarry = (al > 0xf9); + if (((al & 0x0f) > 9 ) || af) { + al = (al + 6) & 0x0f; + ah = (ah + 1 + icarry) & 0xff; + eflags |= CC_C | CC_A; + } else { + eflags &= ~(CC_C | CC_A); + al &= 0x0f; + } + EAX = (EAX & ~0xffff) | al | (ah << 8); + CC_SRC = eflags; +} + +void helper_aas(void) +{ + int icarry; + int al, ah, af; + int eflags; + + eflags = helper_cc_compute_all(CC_OP); + af = eflags & CC_A; + al = EAX & 0xff; + ah = (EAX >> 8) & 0xff; + + icarry = (al < 6); + if (((al & 0x0f) > 9 ) || af) { + al = (al - 6) & 0x0f; + ah = (ah - 1 - icarry) & 0xff; + eflags |= CC_C | CC_A; + } else { + eflags &= ~(CC_C | CC_A); + al &= 0x0f; + } + EAX = (EAX & ~0xffff) | al | (ah << 8); + CC_SRC = eflags; +} + +void helper_daa(void) +{ + int al, af, cf; + int eflags; + + eflags = helper_cc_compute_all(CC_OP); + cf = eflags & CC_C; + af = eflags & CC_A; + al = EAX & 0xff; + + eflags = 0; + if (((al & 0x0f) > 9 ) || af) { + al = (al + 6) & 0xff; + eflags |= CC_A; + } + if ((al > 0x9f) || cf) { + al = (al + 0x60) & 0xff; + eflags |= CC_C; + } + EAX = (EAX & ~0xff) | al; + /* well, speed is not an issue here, so we compute the flags by hand */ + eflags |= (al == 0) << 6; /* zf */ + eflags |= parity_table[al]; /* pf */ + eflags |= (al & 0x80); /* sf */ + CC_SRC = eflags; +} + +void helper_das(void) +{ + int al, al1, af, cf; + int eflags; + + eflags = helper_cc_compute_all(CC_OP); + cf = eflags & CC_C; + af = eflags & CC_A; + al = EAX & 0xff; + + eflags = 0; + al1 = al; + if (((al & 0x0f) > 9 ) || af) { + eflags |= CC_A; + if (al < 6 || cf) + eflags |= CC_C; + al = (al - 6) & 0xff; + } + if ((al1 > 0x99) || cf) { + al = (al - 0x60) & 0xff; + eflags |= CC_C; + } + EAX = (EAX & ~0xff) | al; + /* well, speed is not an issue here, so we compute the flags by hand */ + eflags |= (al == 0) << 6; /* zf */ + eflags |= parity_table[al]; /* pf */ + eflags |= (al & 0x80); /* sf */ + CC_SRC = eflags; +} + +void helper_into(int next_eip_addend) +{ + int eflags; + eflags = helper_cc_compute_all(CC_OP); + if (eflags & CC_O) { + raise_interrupt(EXCP04_INTO, 1, 0, next_eip_addend); + } +} + +void helper_cmpxchg8b(target_ulong a0) +{ + uint64_t d; + int eflags; + + eflags = helper_cc_compute_all(CC_OP); + d = ldq(a0); + if (d == (((uint64_t)EDX << 32) | (uint32_t)EAX)) { + stq(a0, ((uint64_t)ECX << 32) | (uint32_t)EBX); + eflags |= CC_Z; + } else { + /* always do the store */ + stq(a0, d); + EDX = (uint32_t)(d >> 32); + EAX = (uint32_t)d; + eflags &= ~CC_Z; + } + CC_SRC = eflags; +} + +#ifdef TARGET_X86_64 +void helper_cmpxchg16b(target_ulong a0) +{ + uint64_t d0, d1; + int eflags; + + if ((a0 & 0xf) != 0) + raise_exception(EXCP0D_GPF); + eflags = helper_cc_compute_all(CC_OP); + d0 = ldq(a0); + d1 = ldq(a0 + 8); + if (d0 == EAX && d1 == EDX) { + stq(a0, EBX); + stq(a0 + 8, ECX); + eflags |= CC_Z; + } else { + /* always do the store */ + stq(a0, d0); + stq(a0 + 8, d1); + EDX = d1; + EAX = d0; + eflags &= ~CC_Z; + } + CC_SRC = eflags; +} +#endif + +void helper_single_step(void) +{ +#ifndef CONFIG_USER_ONLY + check_hw_breakpoints(env, 1); + env->dr[6] |= DR6_BS; +#endif + raise_exception(EXCP01_DB); +} + +void helper_cpuid(void) +{ + uint32_t eax, ebx, ecx, edx; + + helper_svm_check_intercept_param(SVM_EXIT_CPUID, 0); + + cpu_x86_cpuid(env, (uint32_t)EAX, (uint32_t)ECX, &eax, &ebx, &ecx, &edx); + EAX = eax; + EBX = ebx; + ECX = ecx; + EDX = edx; +} + +void helper_enter_level(int level, int data32, target_ulong t1) +{ + target_ulong ssp; + uint32_t esp_mask, esp, ebp; + + esp_mask = get_sp_mask(env->segs[R_SS].flags); + ssp = env->segs[R_SS].base; + ebp = EBP; + esp = ESP; + if (data32) { + /* 32 bit */ + esp -= 4; + while (--level) { + esp -= 4; + ebp -= 4; + stl(ssp + (esp & esp_mask), ldl(ssp + (ebp & esp_mask))); + } + esp -= 4; + stl(ssp + (esp & esp_mask), t1); + } else { + /* 16 bit */ + esp -= 2; + while (--level) { + esp -= 2; + ebp -= 2; + stw(ssp + (esp & esp_mask), lduw(ssp + (ebp & esp_mask))); + } + esp -= 2; + stw(ssp + (esp & esp_mask), t1); + } +} + +#ifdef TARGET_X86_64 +void helper_enter64_level(int level, int data64, target_ulong t1) +{ + target_ulong esp, ebp; + ebp = EBP; + esp = ESP; + + if (data64) { + /* 64 bit */ + esp -= 8; + while (--level) { + esp -= 8; + ebp -= 8; + stq(esp, ldq(ebp)); + } + esp -= 8; + stq(esp, t1); + } else { + /* 16 bit */ + esp -= 2; + while (--level) { + esp -= 2; + ebp -= 2; + stw(esp, lduw(ebp)); + } + esp -= 2; + stw(esp, t1); + } +} +#endif + +void helper_lldt(int selector) +{ + SegmentCache *dt; + uint32_t e1, e2; +#ifndef VBOX + int index, entry_limit; +#else + unsigned int index, entry_limit; +#endif + target_ulong ptr; + +#ifdef VBOX + Log(("helper_lldt_T0: old ldtr=%RTsel {.base=%RGv, .limit=%RGv} new=%RTsel\n", + (RTSEL)env->ldt.selector, (RTGCPTR)env->ldt.base, (RTGCPTR)env->ldt.limit, (RTSEL)(selector & 0xffff))); +#endif + + selector &= 0xffff; + if ((selector & 0xfffc) == 0) { + /* XXX: NULL selector case: invalid LDT */ + env->ldt.base = 0; + env->ldt.limit = 0; +#ifdef VBOX + env->ldt.flags = DESC_INTEL_UNUSABLE; + env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID; + env->ldt.newselector = 0; +#endif + } else { + if (selector & 0x4) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + dt = &env->gdt; + index = selector & ~7; +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) + entry_limit = 15; + else +#endif + entry_limit = 7; + if ((index + entry_limit) > dt->limit) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + ptr = dt->base + index; + e1 = ldl_kernel(ptr); + e2 = ldl_kernel(ptr + 4); + if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc); +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + uint32_t e3; + e3 = ldl_kernel(ptr + 8); + load_seg_cache_raw_dt(&env->ldt, e1, e2); + env->ldt.base |= (target_ulong)e3 << 32; + } else +#endif + { + load_seg_cache_raw_dt(&env->ldt, e1, e2); + } + } + env->ldt.selector = selector; +#ifdef VBOX + Log(("helper_lldt_T0: new ldtr=%RTsel {.base=%RGv, .limit=%RGv}\n", + (RTSEL)env->ldt.selector, (RTGCPTR)env->ldt.base, (RTGCPTR)env->ldt.limit)); +#endif +} + +void helper_ltr(int selector) +{ + SegmentCache *dt; + uint32_t e1, e2; +#ifndef VBOX + int index, type, entry_limit; +#else + unsigned int index; + int type, entry_limit; +#endif + target_ulong ptr; + +#ifdef VBOX + Log(("helper_ltr: pc=%RGv old tr=%RTsel {.base=%RGv, .limit=%RGv, .flags=%RX32} new=%RTsel\n", + (RTGCPTR)env->eip, (RTSEL)env->tr.selector, (RTGCPTR)env->tr.base, (RTGCPTR)env->tr.limit, + env->tr.flags, (RTSEL)(selector & 0xffff))); +#endif + selector &= 0xffff; + if ((selector & 0xfffc) == 0) { + /* NULL selector case: invalid TR */ +#ifdef VBOX + raise_exception_err(EXCP0A_TSS, 0); +#else + env->tr.base = 0; + env->tr.limit = 0; + env->tr.flags = 0; +#endif + } else { + if (selector & 0x4) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + dt = &env->gdt; + index = selector & ~7; +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) + entry_limit = 15; + else +#endif + entry_limit = 7; + if ((index + entry_limit) > dt->limit) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + ptr = dt->base + index; + e1 = ldl_kernel(ptr); + e2 = ldl_kernel(ptr + 4); + type = (e2 >> DESC_TYPE_SHIFT) & 0xf; + if ((e2 & DESC_S_MASK) || + (type != 1 && type != 9)) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc); +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + uint32_t e3, e4; + e3 = ldl_kernel(ptr + 8); + e4 = ldl_kernel(ptr + 12); + if ((e4 >> DESC_TYPE_SHIFT) & 0xf) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + load_seg_cache_raw_dt(&env->tr, e1, e2); + env->tr.base |= (target_ulong)e3 << 32; + } else +#endif + { + load_seg_cache_raw_dt(&env->tr, e1, e2); + } + env->tr.flags |= DESC_TSS_BUSY_MASK; + e2 |= DESC_TSS_BUSY_MASK; + stl_kernel(ptr + 4, e2); + } + env->tr.selector = selector; +#ifdef VBOX + Log(("helper_ltr: new tr=%RTsel {.base=%RGv, .limit=%RGv, .flags=%RX32} new=%RTsel\n", + (RTSEL)env->tr.selector, (RTGCPTR)env->tr.base, (RTGCPTR)env->tr.limit, + env->tr.flags, (RTSEL)(selector & 0xffff))); +#endif +} + +/* only works if protected mode and not VM86. seg_reg must be != R_CS */ +void helper_load_seg(int seg_reg, int selector) +{ + uint32_t e1, e2; + int cpl, dpl, rpl; + SegmentCache *dt; +#ifndef VBOX + int index; +#else + unsigned int index; +#endif + target_ulong ptr; + + selector &= 0xffff; + cpl = env->hflags & HF_CPL_MASK; +#ifdef VBOX + + /* Trying to load a selector with CPL=1? */ + if (cpl == 0 && (selector & 3) == 1 && (env->state & CPU_RAW_RING0)) + { + Log(("RPL 1 -> sel %04X -> %04X (helper_load_seg)\n", selector, selector & 0xfffc)); + selector = selector & 0xfffc; + } +#endif /* VBOX */ + if ((selector & 0xfffc) == 0) { + /* null selector case */ +#ifndef VBOX + if (seg_reg == R_SS +#ifdef TARGET_X86_64 + && (!(env->hflags & HF_CS64_MASK) || cpl == 3) +#endif + ) + raise_exception_err(EXCP0D_GPF, 0); + cpu_x86_load_seg_cache(env, seg_reg, selector, 0, 0, 0); +#else + if (seg_reg == R_SS) { + if (!(env->hflags & HF_CS64_MASK) || cpl == 3) + raise_exception_err(EXCP0D_GPF, 0); + e2 = (cpl << DESC_DPL_SHIFT) | DESC_INTEL_UNUSABLE; + } else { + e2 = DESC_INTEL_UNUSABLE; + } + cpu_x86_load_seg_cache_with_clean_flags(env, seg_reg, selector, 0, 0, e2); +#endif + } else { + + if (selector & 0x4) + dt = &env->ldt; + else + dt = &env->gdt; + index = selector & ~7; + if ((index + 7) > dt->limit) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + ptr = dt->base + index; + e1 = ldl_kernel(ptr); + e2 = ldl_kernel(ptr + 4); + + if (!(e2 & DESC_S_MASK)) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + rpl = selector & 3; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + if (seg_reg == R_SS) { + /* must be writable segment */ + if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK)) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (rpl != cpl || dpl != cpl) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + } else { + /* must be readable segment */ + if ((e2 & (DESC_CS_MASK | DESC_R_MASK)) == DESC_CS_MASK) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + + if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) { + /* if not conforming code, test rights */ + if (dpl < cpl || dpl < rpl) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + } + } + + if (!(e2 & DESC_P_MASK)) { + if (seg_reg == R_SS) + raise_exception_err(EXCP0C_STACK, selector & 0xfffc); + else + raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc); + } + + /* set the access bit if not already set */ + if (!(e2 & DESC_A_MASK)) { + e2 |= DESC_A_MASK; + stl_kernel(ptr + 4, e2); + } + + cpu_x86_load_seg_cache(env, seg_reg, selector, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); +#if 0 + qemu_log("load_seg: sel=0x%04x base=0x%08lx limit=0x%08lx flags=%08x\n", + selector, (unsigned long)sc->base, sc->limit, sc->flags); +#endif + } +} + +/* protected mode jump */ +void helper_ljmp_protected(int new_cs, target_ulong new_eip, + int next_eip_addend) +{ + int gate_cs, type; + uint32_t e1, e2, cpl, dpl, rpl, limit; + target_ulong next_eip; + +#ifdef VBOX /** @todo Why do we do this? */ + e1 = e2 = 0; +#endif + if ((new_cs & 0xfffc) == 0) + raise_exception_err(EXCP0D_GPF, 0); + if (load_segment(&e1, &e2, new_cs) != 0) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + cpl = env->hflags & HF_CPL_MASK; + if (e2 & DESC_S_MASK) { + if (!(e2 & DESC_CS_MASK)) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + if (e2 & DESC_C_MASK) { + /* conforming code segment */ + if (dpl > cpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } else { + /* non conforming code segment */ + rpl = new_cs & 3; + if (rpl > cpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + if (dpl != cpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc); + limit = get_seg_limit(e1, e2); + if (new_eip > limit && + !(env->hflags & HF_LMA_MASK) && !(e2 & DESC_L_MASK)) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); +#ifdef VBOX + if (!(e2 & DESC_A_MASK)) + e2 = set_segment_accessed(new_cs, e2); +#endif + cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl, + get_seg_base(e1, e2), limit, e2); + EIP = new_eip; + } else { + /* jump to call or task gate */ + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + rpl = new_cs & 3; + cpl = env->hflags & HF_CPL_MASK; + type = (e2 >> DESC_TYPE_SHIFT) & 0xf; + switch(type) { + case 1: /* 286 TSS */ + case 9: /* 386 TSS */ + case 5: /* task gate */ + if (dpl < cpl || dpl < rpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + next_eip = env->eip + next_eip_addend; + switch_tss(new_cs, e1, e2, SWITCH_TSS_JMP, next_eip); + CC_OP = CC_OP_EFLAGS; + break; + case 4: /* 286 call gate */ + case 12: /* 386 call gate */ + if ((dpl < cpl) || (dpl < rpl)) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc); + gate_cs = e1 >> 16; + new_eip = (e1 & 0xffff); + if (type == 12) + new_eip |= (e2 & 0xffff0000); + if (load_segment(&e1, &e2, gate_cs) != 0) + raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc); + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + /* must be code segment */ + if (((e2 & (DESC_S_MASK | DESC_CS_MASK)) != + (DESC_S_MASK | DESC_CS_MASK))) + raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc); + if (((e2 & DESC_C_MASK) && (dpl > cpl)) || + (!(e2 & DESC_C_MASK) && (dpl != cpl))) + raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc); + if (!(e2 & DESC_P_MASK)) +#ifdef VBOX /* See page 3-514 of 253666.pdf */ + raise_exception_err(EXCP0B_NOSEG, gate_cs & 0xfffc); +#else + raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc); +#endif + limit = get_seg_limit(e1, e2); + if (new_eip > limit) + raise_exception_err(EXCP0D_GPF, 0); + cpu_x86_load_seg_cache(env, R_CS, (gate_cs & 0xfffc) | cpl, + get_seg_base(e1, e2), limit, e2); + EIP = new_eip; + break; + default: + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + break; + } + } +} + +/* real mode call */ +void helper_lcall_real(int new_cs, target_ulong new_eip1, + int shift, int next_eip) +{ + int new_eip; + uint32_t esp, esp_mask; + target_ulong ssp; + + new_eip = new_eip1; + esp = ESP; + esp_mask = get_sp_mask(env->segs[R_SS].flags); + ssp = env->segs[R_SS].base; + if (shift) { + PUSHL(ssp, esp, esp_mask, env->segs[R_CS].selector); + PUSHL(ssp, esp, esp_mask, next_eip); + } else { + PUSHW(ssp, esp, esp_mask, env->segs[R_CS].selector); + PUSHW(ssp, esp, esp_mask, next_eip); + } + + SET_ESP(esp, esp_mask); + env->eip = new_eip; + env->segs[R_CS].selector = new_cs; + env->segs[R_CS].base = (new_cs << 4); +} + +/* protected mode call */ +void helper_lcall_protected(int new_cs, target_ulong new_eip, + int shift, int next_eip_addend) +{ + int new_stack, i; + uint32_t e1, e2, cpl, dpl, rpl, selector, offset, param_count; + uint32_t ss = 0, ss_e1 = 0, ss_e2 = 0, sp, type, ss_dpl, sp_mask; + uint32_t val, limit, old_sp_mask; + target_ulong ssp, old_ssp, next_eip; + +#ifdef VBOX /** @todo Why do we do this? */ + e1 = e2 = 0; +#endif + next_eip = env->eip + next_eip_addend; + LOG_PCALL("lcall %04x:%08x s=%d\n", new_cs, (uint32_t)new_eip, shift); + LOG_PCALL_STATE(env); + if ((new_cs & 0xfffc) == 0) + raise_exception_err(EXCP0D_GPF, 0); + if (load_segment(&e1, &e2, new_cs) != 0) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + cpl = env->hflags & HF_CPL_MASK; + LOG_PCALL("desc=%08x:%08x\n", e1, e2); + if (e2 & DESC_S_MASK) { + if (!(e2 & DESC_CS_MASK)) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + if (e2 & DESC_C_MASK) { + /* conforming code segment */ + if (dpl > cpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } else { + /* non conforming code segment */ + rpl = new_cs & 3; + if (rpl > cpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + if (dpl != cpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc); +#ifdef VBOX + if (!(e2 & DESC_A_MASK)) + e2 = set_segment_accessed(new_cs, e2); +#endif + +#ifdef TARGET_X86_64 + /* XXX: check 16/32 bit cases in long mode */ + if (shift == 2) { + target_ulong rsp; + /* 64 bit case */ + rsp = ESP; + PUSHQ(rsp, env->segs[R_CS].selector); + PUSHQ(rsp, next_eip); + /* from this point, not restartable */ + ESP = rsp; + cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), e2); + EIP = new_eip; + } else +#endif + { + sp = ESP; + sp_mask = get_sp_mask(env->segs[R_SS].flags); + ssp = env->segs[R_SS].base; + if (shift) { + PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector); + PUSHL(ssp, sp, sp_mask, next_eip); + } else { + PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector); + PUSHW(ssp, sp, sp_mask, next_eip); + } + + limit = get_seg_limit(e1, e2); + if (new_eip > limit) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + /* from this point, not restartable */ + SET_ESP(sp, sp_mask); + cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl, + get_seg_base(e1, e2), limit, e2); + EIP = new_eip; + } + } else { + /* check gate type */ + type = (e2 >> DESC_TYPE_SHIFT) & 0x1f; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + rpl = new_cs & 3; + switch(type) { + case 1: /* available 286 TSS */ + case 9: /* available 386 TSS */ + case 5: /* task gate */ + if (dpl < cpl || dpl < rpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + switch_tss(new_cs, e1, e2, SWITCH_TSS_CALL, next_eip); + CC_OP = CC_OP_EFLAGS; + return; + case 4: /* 286 call gate */ + case 12: /* 386 call gate */ + break; + default: + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + break; + } + shift = type >> 3; + + if (dpl < cpl || dpl < rpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + /* check valid bit */ + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc); + selector = e1 >> 16; + offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff); + param_count = e2 & 0x1f; + if ((selector & 0xfffc) == 0) + raise_exception_err(EXCP0D_GPF, 0); + + if (load_segment(&e1, &e2, selector) != 0) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK))) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + if (dpl > cpl) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc); + + if (!(e2 & DESC_C_MASK) && dpl < cpl) { + /* to inner privilege */ + get_ss_esp_from_tss(&ss, &sp, dpl); + LOG_PCALL("new ss:esp=%04x:%08x param_count=%d ESP=" TARGET_FMT_lx "\n", + ss, sp, param_count, ESP); + if ((ss & 0xfffc) == 0) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if ((ss & 3) != dpl) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if (load_segment(&ss_e1, &ss_e2, ss) != 0) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3; + if (ss_dpl != dpl) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if (!(ss_e2 & DESC_S_MASK) || + (ss_e2 & DESC_CS_MASK) || + !(ss_e2 & DESC_W_MASK)) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if (!(ss_e2 & DESC_P_MASK)) +#ifdef VBOX /* See page 3-99 of 253666.pdf */ + raise_exception_err(EXCP0C_STACK, ss & 0xfffc); +#else + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); +#endif + + // push_size = ((param_count * 2) + 8) << shift; + + old_sp_mask = get_sp_mask(env->segs[R_SS].flags); + old_ssp = env->segs[R_SS].base; + + sp_mask = get_sp_mask(ss_e2); + ssp = get_seg_base(ss_e1, ss_e2); + if (shift) { + PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector); + PUSHL(ssp, sp, sp_mask, ESP); + for(i = param_count - 1; i >= 0; i--) { + val = ldl_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask)); + PUSHL(ssp, sp, sp_mask, val); + } + } else { + PUSHW(ssp, sp, sp_mask, env->segs[R_SS].selector); + PUSHW(ssp, sp, sp_mask, ESP); + for(i = param_count - 1; i >= 0; i--) { + val = lduw_kernel(old_ssp + ((ESP + i * 2) & old_sp_mask)); + PUSHW(ssp, sp, sp_mask, val); + } + } + new_stack = 1; + } else { + /* to same privilege */ + sp = ESP; + sp_mask = get_sp_mask(env->segs[R_SS].flags); + ssp = env->segs[R_SS].base; + // push_size = (4 << shift); + new_stack = 0; + } + + if (shift) { + PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector); + PUSHL(ssp, sp, sp_mask, next_eip); + } else { + PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector); + PUSHW(ssp, sp, sp_mask, next_eip); + } + + /* from this point, not restartable */ + + if (new_stack) { + ss = (ss & ~3) | dpl; + cpu_x86_load_seg_cache(env, R_SS, ss, + ssp, + get_seg_limit(ss_e1, ss_e2), + ss_e2); + } + + selector = (selector & ~3) | dpl; + cpu_x86_load_seg_cache(env, R_CS, selector, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); + cpu_x86_set_cpl(env, dpl); + SET_ESP(sp, sp_mask); + EIP = offset; + } +} + +/* real and vm86 mode iret */ +void helper_iret_real(int shift) +{ + uint32_t sp, new_cs, new_eip, new_eflags, sp_mask; + target_ulong ssp; + int eflags_mask; +#ifdef VBOX + bool fVME = false; + + remR3TrapClear(env->pVM); +#endif /* VBOX */ + + sp_mask = 0xffff; /* XXXX: use SS segment size ? */ + sp = ESP; + ssp = env->segs[R_SS].base; + if (shift == 1) { + /* 32 bits */ + POPL(ssp, sp, sp_mask, new_eip); + POPL(ssp, sp, sp_mask, new_cs); + new_cs &= 0xffff; + POPL(ssp, sp, sp_mask, new_eflags); + } else { + /* 16 bits */ + POPW(ssp, sp, sp_mask, new_eip); + POPW(ssp, sp, sp_mask, new_cs); + POPW(ssp, sp, sp_mask, new_eflags); + } +#ifdef VBOX + if ( (env->eflags & VM_MASK) + && ((env->eflags >> IOPL_SHIFT) & 3) != 3 + && (env->cr[4] & CR4_VME_MASK)) /* implied or else we would fault earlier */ + { + fVME = true; + /* if virtual interrupt pending and (virtual) interrupts will be enabled -> #GP */ + /* if TF will be set -> #GP */ + if ( ((new_eflags & IF_MASK) && (env->eflags & VIP_MASK)) + || (new_eflags & TF_MASK)) + raise_exception(EXCP0D_GPF); + } +#endif /* VBOX */ + ESP = (ESP & ~sp_mask) | (sp & sp_mask); + env->segs[R_CS].selector = new_cs; + env->segs[R_CS].base = (new_cs << 4); + env->eip = new_eip; +#ifdef VBOX + if (fVME) + eflags_mask = TF_MASK | AC_MASK | ID_MASK | RF_MASK | NT_MASK; + else +#endif + if (env->eflags & VM_MASK) + eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | RF_MASK | NT_MASK; + else + eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | IOPL_MASK | RF_MASK | NT_MASK; + if (shift == 0) + eflags_mask &= 0xffff; + load_eflags(new_eflags, eflags_mask); + env->hflags2 &= ~HF2_NMI_MASK; +#ifdef VBOX + if (fVME) + { + if (new_eflags & IF_MASK) + env->eflags |= VIF_MASK; + else + env->eflags &= ~VIF_MASK; + } +#endif /* VBOX */ +} + +static inline void validate_seg(int seg_reg, int cpl) +{ + int dpl; + uint32_t e2; + + /* XXX: on x86_64, we do not want to nullify FS and GS because + they may still contain a valid base. I would be interested to + know how a real x86_64 CPU behaves */ + if ((seg_reg == R_FS || seg_reg == R_GS) && + (env->segs[seg_reg].selector & 0xfffc) == 0) + return; + + e2 = env->segs[seg_reg].flags; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) { + /* data or non conforming code segment */ + if (dpl < cpl) { + cpu_x86_load_seg_cache(env, seg_reg, 0, 0, 0, 0); + } + } +} + +/* protected mode iret */ +static inline void helper_ret_protected(int shift, int is_iret, int addend) +{ + uint32_t new_cs, new_eflags, new_ss; + uint32_t new_es, new_ds, new_fs, new_gs; + uint32_t e1, e2, ss_e1, ss_e2; + int cpl, dpl, rpl, eflags_mask, iopl; + target_ulong ssp, sp, new_eip, new_esp, sp_mask; + +#ifdef VBOX /** @todo Why do we do this? */ + ss_e1 = ss_e2 = e1 = e2 = 0; +#endif + +#ifdef TARGET_X86_64 + if (shift == 2) + sp_mask = -1; + else +#endif + sp_mask = get_sp_mask(env->segs[R_SS].flags); + sp = ESP; + ssp = env->segs[R_SS].base; + new_eflags = 0; /* avoid warning */ +#ifdef TARGET_X86_64 + if (shift == 2) { + POPQ(sp, new_eip); + POPQ(sp, new_cs); + new_cs &= 0xffff; + if (is_iret) { + POPQ(sp, new_eflags); + } + } else +#endif + if (shift == 1) { + /* 32 bits */ + POPL(ssp, sp, sp_mask, new_eip); + POPL(ssp, sp, sp_mask, new_cs); + new_cs &= 0xffff; + if (is_iret) { + POPL(ssp, sp, sp_mask, new_eflags); +#define LOG_GROUP LOG_GROUP_REM +#if defined(VBOX) && defined(DEBUG) + Log(("iret: new CS %04X (old=%x)\n", new_cs, env->segs[R_CS].selector)); + Log(("iret: new EIP %08X\n", (uint32_t)new_eip)); + Log(("iret: new EFLAGS %08X\n", new_eflags)); + Log(("iret: EAX=%08x\n", (uint32_t)EAX)); +#endif + if (new_eflags & VM_MASK) + goto return_to_vm86; + } +#ifdef VBOX + if ((new_cs & 0x3) == 1 && (env->state & CPU_RAW_RING0)) + { + if ( !EMIsRawRing1Enabled(env->pVM) + || env->segs[R_CS].selector == (new_cs & 0xfffc)) + { + Log(("RPL 1 -> new_cs %04X -> %04X\n", new_cs, new_cs & 0xfffc)); + new_cs = new_cs & 0xfffc; + } + else + { + /* Ugly assumption: assume a genuine switch to ring-1. */ + Log(("Genuine switch to ring-1 (iret)\n")); + } + } + else if ((new_cs & 0x3) == 2 && (env->state & CPU_RAW_RING0) && EMIsRawRing1Enabled(env->pVM)) + { + Log(("RPL 2 -> new_cs %04X -> %04X\n", new_cs, (new_cs & 0xfffc) | 1)); + new_cs = (new_cs & 0xfffc) | 1; + } +#endif + } else { + /* 16 bits */ + POPW(ssp, sp, sp_mask, new_eip); + POPW(ssp, sp, sp_mask, new_cs); + if (is_iret) + POPW(ssp, sp, sp_mask, new_eflags); + } + LOG_PCALL("lret new %04x:" TARGET_FMT_lx " s=%d addend=0x%x\n", + new_cs, new_eip, shift, addend); + LOG_PCALL_STATE(env); + if ((new_cs & 0xfffc) == 0) + { +#if defined(VBOX) && defined(DEBUG) + Log(("new_cs & 0xfffc) == 0\n")); +#endif + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + if (load_segment(&e1, &e2, new_cs) != 0) + { +#if defined(VBOX) && defined(DEBUG) + Log(("load_segment failed\n")); +#endif + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + if (!(e2 & DESC_S_MASK) || + !(e2 & DESC_CS_MASK)) + { +#if defined(VBOX) && defined(DEBUG) + Log(("e2 mask %08x\n", e2)); +#endif + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + cpl = env->hflags & HF_CPL_MASK; + rpl = new_cs & 3; + if (rpl < cpl) + { +#if defined(VBOX) && defined(DEBUG) + Log(("rpl < cpl (%d vs %d)\n", rpl, cpl)); +#endif + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + + if (e2 & DESC_C_MASK) { + if (dpl > rpl) + { +#if defined(VBOX) && defined(DEBUG) + Log(("dpl > rpl (%d vs %d)\n", dpl, rpl)); +#endif + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + } else { + if (dpl != rpl) + { +#if defined(VBOX) && defined(DEBUG) + Log(("dpl != rpl (%d vs %d) e1=%x e2=%x\n", dpl, rpl, e1, e2)); +#endif + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + } + if (!(e2 & DESC_P_MASK)) + { +#if defined(VBOX) && defined(DEBUG) + Log(("DESC_P_MASK e2=%08x\n", e2)); +#endif + raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc); + } + + sp += addend; + if (rpl == cpl && (!(env->hflags & HF_CS64_MASK) || + ((env->hflags & HF_CS64_MASK) && !is_iret))) { + /* return to same privilege level */ +#ifdef VBOX + if (!(e2 & DESC_A_MASK)) + e2 = set_segment_accessed(new_cs, e2); +#endif + cpu_x86_load_seg_cache(env, R_CS, new_cs, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); + } else { + /* return to different privilege level */ +#ifdef TARGET_X86_64 + if (shift == 2) { + POPQ(sp, new_esp); + POPQ(sp, new_ss); + new_ss &= 0xffff; + } else +#endif + if (shift == 1) { + /* 32 bits */ + POPL(ssp, sp, sp_mask, new_esp); + POPL(ssp, sp, sp_mask, new_ss); + new_ss &= 0xffff; + } else { + /* 16 bits */ + POPW(ssp, sp, sp_mask, new_esp); + POPW(ssp, sp, sp_mask, new_ss); + } + LOG_PCALL("new ss:esp=%04x:" TARGET_FMT_lx "\n", + new_ss, new_esp); + if ((new_ss & 0xfffc) == 0) { +#ifdef TARGET_X86_64 + /* NULL ss is allowed in long mode if cpl != 3*/ +# ifndef VBOX + /* XXX: test CS64 ? */ + if ((env->hflags & HF_LMA_MASK) && rpl != 3) { + cpu_x86_load_seg_cache(env, R_SS, new_ss, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (rpl << DESC_DPL_SHIFT) | + DESC_W_MASK | DESC_A_MASK); + ss_e2 = DESC_B_MASK; /* XXX: should not be needed ? */ + } else +# else /* VBOX */ + if ((env->hflags & HF_LMA_MASK) && rpl != 3 && (e2 & DESC_L_MASK)) { + if (!(e2 & DESC_A_MASK)) + e2 = set_segment_accessed(new_cs, e2); + cpu_x86_load_seg_cache_with_clean_flags(env, R_SS, new_ss, + 0, 0xffffffff, + DESC_INTEL_UNUSABLE | (rpl << DESC_DPL_SHIFT) ); + ss_e2 = DESC_B_MASK; /* not really used */ + } else +# endif +#endif + { +#if defined(VBOX) && defined(DEBUG) + Log(("NULL ss, rpl=%d\n", rpl)); +#endif + raise_exception_err(EXCP0D_GPF, 0); + } + } else { + if ((new_ss & 3) != rpl) + { +#if defined(VBOX) && defined(DEBUG) + Log(("new_ss=%x != rpl=%d\n", new_ss, rpl)); +#endif + raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc); + } + if (load_segment(&ss_e1, &ss_e2, new_ss) != 0) + { +#if defined(VBOX) && defined(DEBUG) + Log(("new_ss=%x load error\n", new_ss)); +#endif + raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc); + } + if (!(ss_e2 & DESC_S_MASK) || + (ss_e2 & DESC_CS_MASK) || + !(ss_e2 & DESC_W_MASK)) + { +#if defined(VBOX) && defined(DEBUG) + Log(("new_ss=%x ss_e2=%#x bad type\n", new_ss, ss_e2)); +#endif + raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc); + } + dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3; + if (dpl != rpl) + { +#if defined(VBOX) && defined(DEBUG) + Log(("SS.dpl=%u != rpl=%u\n", dpl, rpl)); +#endif + raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc); + } + if (!(ss_e2 & DESC_P_MASK)) + { +#if defined(VBOX) && defined(DEBUG) + Log(("new_ss=%#x #NP\n", new_ss)); +#endif + raise_exception_err(EXCP0B_NOSEG, new_ss & 0xfffc); + } +#ifdef VBOX + if (!(e2 & DESC_A_MASK)) + e2 = set_segment_accessed(new_cs, e2); + if (!(ss_e2 & DESC_A_MASK)) + ss_e2 = set_segment_accessed(new_ss, ss_e2); +#endif + cpu_x86_load_seg_cache(env, R_SS, new_ss, + get_seg_base(ss_e1, ss_e2), + get_seg_limit(ss_e1, ss_e2), + ss_e2); + } + + cpu_x86_load_seg_cache(env, R_CS, new_cs, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); + cpu_x86_set_cpl(env, rpl); + sp = new_esp; +#ifdef TARGET_X86_64 + if (env->hflags & HF_CS64_MASK) + sp_mask = -1; + else +#endif + sp_mask = get_sp_mask(ss_e2); + + /* validate data segments */ + validate_seg(R_ES, rpl); + validate_seg(R_DS, rpl); + validate_seg(R_FS, rpl); + validate_seg(R_GS, rpl); + + sp += addend; + } + SET_ESP(sp, sp_mask); + env->eip = new_eip; + if (is_iret) { + /* NOTE: 'cpl' is the _old_ CPL */ + eflags_mask = TF_MASK | AC_MASK | ID_MASK | RF_MASK | NT_MASK; + if (cpl == 0) +#ifdef VBOX + eflags_mask |= IOPL_MASK | VIF_MASK | VIP_MASK; +#else + eflags_mask |= IOPL_MASK; +#endif + iopl = (env->eflags >> IOPL_SHIFT) & 3; + if (cpl <= iopl) + eflags_mask |= IF_MASK; + if (shift == 0) + eflags_mask &= 0xffff; + load_eflags(new_eflags, eflags_mask); + } + return; + + return_to_vm86: + POPL(ssp, sp, sp_mask, new_esp); + POPL(ssp, sp, sp_mask, new_ss); + POPL(ssp, sp, sp_mask, new_es); + POPL(ssp, sp, sp_mask, new_ds); + POPL(ssp, sp, sp_mask, new_fs); + POPL(ssp, sp, sp_mask, new_gs); + + /* modify processor state */ + load_eflags(new_eflags, TF_MASK | AC_MASK | ID_MASK | + IF_MASK | IOPL_MASK | VM_MASK | NT_MASK | VIF_MASK | VIP_MASK); + load_seg_vm(R_CS, new_cs & 0xffff); + cpu_x86_set_cpl(env, 3); + load_seg_vm(R_SS, new_ss & 0xffff); + load_seg_vm(R_ES, new_es & 0xffff); + load_seg_vm(R_DS, new_ds & 0xffff); + load_seg_vm(R_FS, new_fs & 0xffff); + load_seg_vm(R_GS, new_gs & 0xffff); + + env->eip = new_eip & 0xffff; + ESP = new_esp; +} + +void helper_iret_protected(int shift, int next_eip) +{ + int tss_selector, type; + uint32_t e1, e2; + +#ifdef VBOX + Log(("iret (shift=%d new_eip=%#x)\n", shift, next_eip)); + e1 = e2 = 0; /** @todo Why do we do this? */ + remR3TrapClear(env->pVM); +#endif + + /* specific case for TSS */ + if (env->eflags & NT_MASK) { +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) + { +#if defined(VBOX) && defined(DEBUG) + Log(("eflags.NT=1 on iret in long mode\n")); +#endif + raise_exception_err(EXCP0D_GPF, 0); + } +#endif + tss_selector = lduw_kernel(env->tr.base + 0); + if (tss_selector & 4) + raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc); + if (load_segment(&e1, &e2, tss_selector) != 0) + raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc); + type = (e2 >> DESC_TYPE_SHIFT) & 0x17; + /* NOTE: we check both segment and busy TSS */ + if (type != 3) + raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc); + switch_tss(tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip); + } else { + helper_ret_protected(shift, 1, 0); + } + env->hflags2 &= ~HF2_NMI_MASK; +} + +void helper_lret_protected(int shift, int addend) +{ + helper_ret_protected(shift, 0, addend); +} + +void helper_sysenter(void) +{ + if (env->sysenter_cs == 0) { + raise_exception_err(EXCP0D_GPF, 0); + } + env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK); + cpu_x86_set_cpl(env, 0); + +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK); + } else +#endif + { + cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); + } + cpu_x86_load_seg_cache(env, R_SS, (env->sysenter_cs + 8) & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_W_MASK | DESC_A_MASK); + ESP = env->sysenter_esp; + EIP = env->sysenter_eip; +} + +void helper_sysexit(int dflag) +{ + int cpl; + + cpl = env->hflags & HF_CPL_MASK; + if (env->sysenter_cs == 0 || cpl != 0) { + raise_exception_err(EXCP0D_GPF, 0); + } + cpu_x86_set_cpl(env, 3); +#ifdef TARGET_X86_64 + if (dflag == 2) { + cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 32) & 0xfffc) | 3, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK); + cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 40) & 0xfffc) | 3, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_W_MASK | DESC_A_MASK); + } else +#endif + { + cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 16) & 0xfffc) | 3, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 24) & 0xfffc) | 3, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_W_MASK | DESC_A_MASK); + } + ESP = ECX; + EIP = EDX; +} + +#if defined(CONFIG_USER_ONLY) +target_ulong helper_read_crN(int reg) +{ + return 0; +} + +void helper_write_crN(int reg, target_ulong t0) +{ +} + +void helper_movl_drN_T0(int reg, target_ulong t0) +{ +} +#else +target_ulong helper_read_crN(int reg) +{ + target_ulong val; + + helper_svm_check_intercept_param(SVM_EXIT_READ_CR0 + reg, 0); + switch(reg) { + default: + val = env->cr[reg]; + break; + case 8: + if (!(env->hflags2 & HF2_VINTR_MASK)) { +#ifndef VBOX + val = cpu_get_apic_tpr(env->apic_state); +#else /* VBOX */ + val = cpu_get_apic_tpr(env); +#endif /* VBOX */ + } else { + val = env->v_tpr; + } + break; + } + return val; +} + +void helper_write_crN(int reg, target_ulong t0) +{ + helper_svm_check_intercept_param(SVM_EXIT_WRITE_CR0 + reg, 0); + switch(reg) { + case 0: + cpu_x86_update_cr0(env, t0); + break; + case 3: + cpu_x86_update_cr3(env, t0); + break; + case 4: + cpu_x86_update_cr4(env, t0); + break; + case 8: + if (!(env->hflags2 & HF2_VINTR_MASK)) { +#ifndef VBOX + cpu_set_apic_tpr(env->apic_state, t0); +#else /* VBOX */ + cpu_set_apic_tpr(env, t0); +#endif /* VBOX */ + } + env->v_tpr = t0 & 0x0f; + break; + default: + env->cr[reg] = t0; + break; + } +} + +void helper_movl_drN_T0(int reg, target_ulong t0) +{ + int i; + + if (reg < 4) { + hw_breakpoint_remove(env, reg); + env->dr[reg] = t0; + hw_breakpoint_insert(env, reg); +# ifndef VBOX + } else if (reg == 7) { +# else + } else if (reg == 7 || reg == 5) { /* (DR5 is an alias for DR7.) */ + if (t0 & X86_DR7_MBZ_MASK) + raise_exception_err(EXCP0D_GPF, 0); + t0 |= X86_DR7_RA1_MASK; + t0 &= ~X86_DR7_RAZ_MASK; +# endif + for (i = 0; i < 4; i++) + hw_breakpoint_remove(env, i); + env->dr[7] = t0; + for (i = 0; i < 4; i++) + hw_breakpoint_insert(env, i); + } else { +# ifndef VBOX + env->dr[reg] = t0; +# else + if (t0 & X86_DR6_MBZ_MASK) + raise_exception_err(EXCP0D_GPF, 0); + t0 |= X86_DR6_RA1_MASK; + t0 &= ~X86_DR6_RAZ_MASK; + env->dr[6] = t0; /* (DR4 is an alias for DR6.) */ +# endif + } +} +#endif + +void helper_lmsw(target_ulong t0) +{ + /* only 4 lower bits of CR0 are modified. PE cannot be set to zero + if already set to one. */ + t0 = (env->cr[0] & ~0xe) | (t0 & 0xf); + helper_write_crN(0, t0); +} + +void helper_clts(void) +{ + env->cr[0] &= ~CR0_TS_MASK; + env->hflags &= ~HF_TS_MASK; +} + +void helper_invlpg(target_ulong addr) +{ + helper_svm_check_intercept_param(SVM_EXIT_INVLPG, 0); + tlb_flush_page(env, addr); +} + +void helper_rdtsc(void) +{ + uint64_t val; + + if ((env->cr[4] & CR4_TSD_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) { + raise_exception(EXCP0D_GPF); + } + helper_svm_check_intercept_param(SVM_EXIT_RDTSC, 0); + + val = cpu_get_tsc(env) + env->tsc_offset; + EAX = (uint32_t)(val); + EDX = (uint32_t)(val >> 32); +} + +void helper_rdtscp(void) +{ + helper_rdtsc(); +#ifndef VBOX + ECX = (uint32_t)(env->tsc_aux); +#else /* VBOX */ + uint64_t val; + if (cpu_rdmsr(env, MSR_K8_TSC_AUX, &val) == 0) + ECX = (uint32_t)(val); + else + ECX = 0; +#endif /* VBOX */ +} + +void helper_rdpmc(void) +{ +#ifdef VBOX + /* If X86_CR4_PCE is *not* set, then CPL must be zero. */ + if (!(env->cr[4] & CR4_PCE_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) { + raise_exception(EXCP0D_GPF); + } + /* Just return zero here; rather tricky to properly emulate this, especially as the specs are a mess. */ + EAX = 0; + EDX = 0; +#else /* !VBOX */ + if ((env->cr[4] & CR4_PCE_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) { + raise_exception(EXCP0D_GPF); + } + helper_svm_check_intercept_param(SVM_EXIT_RDPMC, 0); + + /* currently unimplemented */ + raise_exception_err(EXCP06_ILLOP, 0); +#endif /* !VBOX */ +} + +#if defined(CONFIG_USER_ONLY) +void helper_wrmsr(void) +{ +} + +void helper_rdmsr(void) +{ +} +#else +void helper_wrmsr(void) +{ + uint64_t val; + + helper_svm_check_intercept_param(SVM_EXIT_MSR, 1); + + val = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32); + + switch((uint32_t)ECX) { + case MSR_IA32_SYSENTER_CS: + env->sysenter_cs = val & 0xffff; + break; + case MSR_IA32_SYSENTER_ESP: + env->sysenter_esp = val; + break; + case MSR_IA32_SYSENTER_EIP: + env->sysenter_eip = val; + break; + case MSR_IA32_APICBASE: +# ifndef VBOX /* The CPUMSetGuestMsr call below does this now. */ + cpu_set_apic_base(env->apic_state, val); +# endif + break; + case MSR_EFER: + { + uint64_t update_mask; + update_mask = 0; + if (env->cpuid_ext2_features & CPUID_EXT2_SYSCALL) + update_mask |= MSR_EFER_SCE; + if (env->cpuid_ext2_features & CPUID_EXT2_LM) + update_mask |= MSR_EFER_LME; + if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR) + update_mask |= MSR_EFER_FFXSR; + if (env->cpuid_ext2_features & CPUID_EXT2_NX) + update_mask |= MSR_EFER_NXE; + if (env->cpuid_ext3_features & CPUID_EXT3_SVM) + update_mask |= MSR_EFER_SVME; + if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR) + update_mask |= MSR_EFER_FFXSR; + cpu_load_efer(env, (env->efer & ~update_mask) | + (val & update_mask)); + } + break; + case MSR_STAR: + env->star = val; + break; + case MSR_PAT: + env->pat = val; + break; + case MSR_VM_HSAVE_PA: + env->vm_hsave = val; + break; +#ifdef TARGET_X86_64 + case MSR_LSTAR: + env->lstar = val; + break; + case MSR_CSTAR: + env->cstar = val; + break; + case MSR_FMASK: + env->fmask = val; + break; + case MSR_FSBASE: + env->segs[R_FS].base = val; + break; + case MSR_GSBASE: + env->segs[R_GS].base = val; + break; + case MSR_KERNELGSBASE: + env->kernelgsbase = val; + break; +#endif +# ifndef VBOX + case MSR_MTRRphysBase(0): + case MSR_MTRRphysBase(1): + case MSR_MTRRphysBase(2): + case MSR_MTRRphysBase(3): + case MSR_MTRRphysBase(4): + case MSR_MTRRphysBase(5): + case MSR_MTRRphysBase(6): + case MSR_MTRRphysBase(7): + env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysBase(0)) / 2].base = val; + break; + case MSR_MTRRphysMask(0): + case MSR_MTRRphysMask(1): + case MSR_MTRRphysMask(2): + case MSR_MTRRphysMask(3): + case MSR_MTRRphysMask(4): + case MSR_MTRRphysMask(5): + case MSR_MTRRphysMask(6): + case MSR_MTRRphysMask(7): + env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysMask(0)) / 2].mask = val; + break; + case MSR_MTRRfix64K_00000: + env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix64K_00000] = val; + break; + case MSR_MTRRfix16K_80000: + case MSR_MTRRfix16K_A0000: + env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix16K_80000 + 1] = val; + break; + case MSR_MTRRfix4K_C0000: + case MSR_MTRRfix4K_C8000: + case MSR_MTRRfix4K_D0000: + case MSR_MTRRfix4K_D8000: + case MSR_MTRRfix4K_E0000: + case MSR_MTRRfix4K_E8000: + case MSR_MTRRfix4K_F0000: + case MSR_MTRRfix4K_F8000: + env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix4K_C0000 + 3] = val; + break; + case MSR_MTRRdefType: + env->mtrr_deftype = val; + break; + case MSR_MCG_STATUS: + env->mcg_status = val; + break; + case MSR_MCG_CTL: + if ((env->mcg_cap & MCG_CTL_P) + && (val == 0 || val == ~(uint64_t)0)) + env->mcg_ctl = val; + break; + case MSR_TSC_AUX: + env->tsc_aux = val; + break; +# endif /* !VBOX */ + default: +# ifndef VBOX + if ((uint32_t)ECX >= MSR_MC0_CTL + && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) { + uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL; + if ((offset & 0x3) != 0 + || (val == 0 || val == ~(uint64_t)0)) + env->mce_banks[offset] = val; + break; + } + /* XXX: exception ? */ +# endif + break; + } + +# ifdef VBOX + /* call CPUM. */ + if (cpu_wrmsr(env, (uint32_t)ECX, val) != 0) + { + /** @todo be a brave man and raise a \#GP(0) here as we should... */ + } +# endif +} + +void helper_rdmsr(void) +{ + uint64_t val; + + helper_svm_check_intercept_param(SVM_EXIT_MSR, 0); + + switch((uint32_t)ECX) { + case MSR_IA32_SYSENTER_CS: + val = env->sysenter_cs; + break; + case MSR_IA32_SYSENTER_ESP: + val = env->sysenter_esp; + break; + case MSR_IA32_SYSENTER_EIP: + val = env->sysenter_eip; + break; + case MSR_IA32_APICBASE: +#ifndef VBOX + val = cpu_get_apic_base(env->apic_state); +#else /* VBOX */ + val = cpu_get_apic_base(env); +#endif /* VBOX */ + break; + case MSR_EFER: + val = env->efer; + break; + case MSR_STAR: + val = env->star; + break; + case MSR_PAT: + val = env->pat; + break; + case MSR_VM_HSAVE_PA: + val = env->vm_hsave; + break; +# ifndef VBOX /* forward to CPUMQueryGuestMsr. */ + case MSR_IA32_PERF_STATUS: + /* tsc_increment_by_tick */ + val = 1000ULL; + /* CPU multiplier */ + val |= (((uint64_t)4ULL) << 40); + break; +# endif /* !VBOX */ +#ifdef TARGET_X86_64 + case MSR_LSTAR: + val = env->lstar; + break; + case MSR_CSTAR: + val = env->cstar; + break; + case MSR_FMASK: + val = env->fmask; + break; + case MSR_FSBASE: + val = env->segs[R_FS].base; + break; + case MSR_GSBASE: + val = env->segs[R_GS].base; + break; + case MSR_KERNELGSBASE: + val = env->kernelgsbase; + break; +# ifndef VBOX + case MSR_TSC_AUX: + val = env->tsc_aux; + break; +# endif /*!VBOX*/ +#endif +# ifndef VBOX + case MSR_MTRRphysBase(0): + case MSR_MTRRphysBase(1): + case MSR_MTRRphysBase(2): + case MSR_MTRRphysBase(3): + case MSR_MTRRphysBase(4): + case MSR_MTRRphysBase(5): + case MSR_MTRRphysBase(6): + case MSR_MTRRphysBase(7): + val = env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysBase(0)) / 2].base; + break; + case MSR_MTRRphysMask(0): + case MSR_MTRRphysMask(1): + case MSR_MTRRphysMask(2): + case MSR_MTRRphysMask(3): + case MSR_MTRRphysMask(4): + case MSR_MTRRphysMask(5): + case MSR_MTRRphysMask(6): + case MSR_MTRRphysMask(7): + val = env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysMask(0)) / 2].mask; + break; + case MSR_MTRRfix64K_00000: + val = env->mtrr_fixed[0]; + break; + case MSR_MTRRfix16K_80000: + case MSR_MTRRfix16K_A0000: + val = env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix16K_80000 + 1]; + break; + case MSR_MTRRfix4K_C0000: + case MSR_MTRRfix4K_C8000: + case MSR_MTRRfix4K_D0000: + case MSR_MTRRfix4K_D8000: + case MSR_MTRRfix4K_E0000: + case MSR_MTRRfix4K_E8000: + case MSR_MTRRfix4K_F0000: + case MSR_MTRRfix4K_F8000: + val = env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix4K_C0000 + 3]; + break; + case MSR_MTRRdefType: + val = env->mtrr_deftype; + break; + case MSR_MTRRcap: + if (env->cpuid_features & CPUID_MTRR) + val = MSR_MTRRcap_VCNT | MSR_MTRRcap_FIXRANGE_SUPPORT | MSR_MTRRcap_WC_SUPPORTED; + else + /* XXX: exception ? */ + val = 0; + break; + case MSR_MCG_CAP: + val = env->mcg_cap; + break; + case MSR_MCG_CTL: + if (env->mcg_cap & MCG_CTL_P) + val = env->mcg_ctl; + else + val = 0; + break; + case MSR_MCG_STATUS: + val = env->mcg_status; + break; +# endif /* !VBOX */ + default: +# ifndef VBOX + if ((uint32_t)ECX >= MSR_MC0_CTL + && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) { + uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL; + val = env->mce_banks[offset]; + break; + } + /* XXX: exception ? */ + val = 0; +# else /* VBOX */ + if (cpu_rdmsr(env, (uint32_t)ECX, &val) != 0) + { + /** @todo be a brave man and raise a \#GP(0) here as we should... */ + val = 0; + } +# endif /* VBOX */ + break; + } + EAX = (uint32_t)(val); + EDX = (uint32_t)(val >> 32); + +# ifdef VBOX_STRICT + if ((uint32_t)ECX != MSR_IA32_TSC) { + if (cpu_rdmsr(env, (uint32_t)ECX, &val) != 0) + val = 0; + AssertMsg(val == RT_MAKE_U64(EAX, EDX), ("idMsr=%#x val=%#llx eax:edx=%#llx\n", (uint32_t)ECX, val, RT_MAKE_U64(EAX, EDX))); + } +# endif +} +#endif + +target_ulong helper_lsl(target_ulong selector1) +{ + unsigned int limit; + uint32_t e1, e2, eflags, selector; + int rpl, dpl, cpl, type; + + selector = selector1 & 0xffff; + eflags = helper_cc_compute_all(CC_OP); + if ((selector & 0xfffc) == 0) + goto fail; + if (load_segment(&e1, &e2, selector) != 0) + goto fail; + rpl = selector & 3; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + if (e2 & DESC_S_MASK) { + if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) { + /* conforming */ + } else { + if (dpl < cpl || dpl < rpl) + goto fail; + } + } else { + type = (e2 >> DESC_TYPE_SHIFT) & 0xf; + switch(type) { + case 1: + case 2: + case 3: + case 9: + case 11: + break; + default: + goto fail; + } + if (dpl < cpl || dpl < rpl) { + fail: + CC_SRC = eflags & ~CC_Z; + return 0; + } + } + limit = get_seg_limit(e1, e2); + CC_SRC = eflags | CC_Z; + return limit; +} + +target_ulong helper_lar(target_ulong selector1) +{ + uint32_t e1, e2, eflags, selector; + int rpl, dpl, cpl, type; + + selector = selector1 & 0xffff; + eflags = helper_cc_compute_all(CC_OP); + if ((selector & 0xfffc) == 0) + goto fail; + if (load_segment(&e1, &e2, selector) != 0) + goto fail; + rpl = selector & 3; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + if (e2 & DESC_S_MASK) { + if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) { + /* conforming */ + } else { + if (dpl < cpl || dpl < rpl) + goto fail; + } + } else { + type = (e2 >> DESC_TYPE_SHIFT) & 0xf; + switch(type) { + case 1: + case 2: + case 3: + case 4: + case 5: + case 9: + case 11: + case 12: + break; + default: + goto fail; + } + if (dpl < cpl || dpl < rpl) { + fail: + CC_SRC = eflags & ~CC_Z; + return 0; + } + } + CC_SRC = eflags | CC_Z; +#ifdef VBOX /* AMD says 0x00ffff00, while intel says 0x00fxff00. Bochs and IEM does like AMD says (x=f). */ + return e2 & 0x00ffff00; +#else + return e2 & 0x00f0ff00; +#endif +} + +void helper_verr(target_ulong selector1) +{ + uint32_t e1, e2, eflags, selector; + int rpl, dpl, cpl; + + selector = selector1 & 0xffff; + eflags = helper_cc_compute_all(CC_OP); + if ((selector & 0xfffc) == 0) + goto fail; + if (load_segment(&e1, &e2, selector) != 0) + goto fail; + if (!(e2 & DESC_S_MASK)) + goto fail; + rpl = selector & 3; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + if (e2 & DESC_CS_MASK) { + if (!(e2 & DESC_R_MASK)) + goto fail; + if (!(e2 & DESC_C_MASK)) { + if (dpl < cpl || dpl < rpl) + goto fail; + } + } else { + if (dpl < cpl || dpl < rpl) { + fail: + CC_SRC = eflags & ~CC_Z; + return; + } + } + CC_SRC = eflags | CC_Z; +} + +void helper_verw(target_ulong selector1) +{ + uint32_t e1, e2, eflags, selector; + int rpl, dpl, cpl; + + selector = selector1 & 0xffff; + eflags = helper_cc_compute_all(CC_OP); + if ((selector & 0xfffc) == 0) + goto fail; + if (load_segment(&e1, &e2, selector) != 0) + goto fail; + if (!(e2 & DESC_S_MASK)) + goto fail; + rpl = selector & 3; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + if (e2 & DESC_CS_MASK) { + goto fail; + } else { + if (dpl < cpl || dpl < rpl) + goto fail; + if (!(e2 & DESC_W_MASK)) { + fail: + CC_SRC = eflags & ~CC_Z; + return; + } + } + CC_SRC = eflags | CC_Z; +} + +/* x87 FPU helpers */ + +static void fpu_set_exception(int mask) +{ + env->fpus |= mask; + if (env->fpus & (~env->fpuc & FPUC_EM)) + env->fpus |= FPUS_SE | FPUS_B; +} + +static inline CPU86_LDouble helper_fdiv(CPU86_LDouble a, CPU86_LDouble b) +{ + if (b == 0.0) + fpu_set_exception(FPUS_ZE); + return a / b; +} + +static void fpu_raise_exception(void) +{ + if (env->cr[0] & CR0_NE_MASK) { + raise_exception(EXCP10_COPR); + } +#if !defined(CONFIG_USER_ONLY) + else { + cpu_set_ferr(env); + } +#endif +} + +void helper_flds_FT0(uint32_t val) +{ + union { + float32 f; + uint32_t i; + } u; + u.i = val; + FT0 = float32_to_floatx(u.f, &env->fp_status); +} + +void helper_fldl_FT0(uint64_t val) +{ + union { + float64 f; + uint64_t i; + } u; + u.i = val; + FT0 = float64_to_floatx(u.f, &env->fp_status); +} + +void helper_fildl_FT0(int32_t val) +{ + FT0 = int32_to_floatx(val, &env->fp_status); +} + +void helper_flds_ST0(uint32_t val) +{ + int new_fpstt; + union { + float32 f; + uint32_t i; + } u; + new_fpstt = (env->fpstt - 1) & 7; + u.i = val; + env->fpregs[new_fpstt].d = float32_to_floatx(u.f, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +void helper_fldl_ST0(uint64_t val) +{ + int new_fpstt; + union { + float64 f; + uint64_t i; + } u; + new_fpstt = (env->fpstt - 1) & 7; + u.i = val; + env->fpregs[new_fpstt].d = float64_to_floatx(u.f, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +void helper_fildl_ST0(int32_t val) +{ + int new_fpstt; + new_fpstt = (env->fpstt - 1) & 7; + env->fpregs[new_fpstt].d = int32_to_floatx(val, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +void helper_fildll_ST0(int64_t val) +{ + int new_fpstt; + new_fpstt = (env->fpstt - 1) & 7; + env->fpregs[new_fpstt].d = int64_to_floatx(val, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +#ifndef VBOX +uint32_t helper_fsts_ST0(void) +#else +RTCCUINTREG helper_fsts_ST0(void) +#endif +{ + union { + float32 f; + uint32_t i; + } u; + u.f = floatx_to_float32(ST0, &env->fp_status); + return u.i; +} + +uint64_t helper_fstl_ST0(void) +{ + union { + float64 f; + uint64_t i; + } u; + u.f = floatx_to_float64(ST0, &env->fp_status); + return u.i; +} + +#ifndef VBOX +int32_t helper_fist_ST0(void) +#else +RTCCINTREG helper_fist_ST0(void) +#endif +{ + int32_t val; + val = floatx_to_int32(ST0, &env->fp_status); + if (val != (int16_t)val) + val = -32768; + return val; +} + +#ifndef VBOX +int32_t helper_fistl_ST0(void) +#else +RTCCINTREG helper_fistl_ST0(void) +#endif +{ + int32_t val; + val = floatx_to_int32(ST0, &env->fp_status); + return val; +} + +int64_t helper_fistll_ST0(void) +{ + int64_t val; + val = floatx_to_int64(ST0, &env->fp_status); + return val; +} + +#ifndef VBOX +int32_t helper_fistt_ST0(void) +#else +RTCCINTREG helper_fistt_ST0(void) +#endif +{ + int32_t val; + val = floatx_to_int32_round_to_zero(ST0, &env->fp_status); + if (val != (int16_t)val) + val = -32768; + return val; +} + +#ifndef VBOX +int32_t helper_fisttl_ST0(void) +#else +RTCCINTREG helper_fisttl_ST0(void) +#endif +{ + int32_t val; + val = floatx_to_int32_round_to_zero(ST0, &env->fp_status); + return val; +} + +int64_t helper_fisttll_ST0(void) +{ + int64_t val; + val = floatx_to_int64_round_to_zero(ST0, &env->fp_status); + return val; +} + +void helper_fldt_ST0(target_ulong ptr) +{ + int new_fpstt; + new_fpstt = (env->fpstt - 1) & 7; + env->fpregs[new_fpstt].d = helper_fldt(ptr); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +void helper_fstt_ST0(target_ulong ptr) +{ + helper_fstt(ST0, ptr); +} + +void helper_fpush(void) +{ + fpush(); +} + +void helper_fpop(void) +{ + fpop(); +} + +void helper_fdecstp(void) +{ + env->fpstt = (env->fpstt - 1) & 7; + env->fpus &= (~0x4700); +} + +void helper_fincstp(void) +{ + env->fpstt = (env->fpstt + 1) & 7; + env->fpus &= (~0x4700); +} + +/* FPU move */ + +void helper_ffree_STN(int st_index) +{ + env->fptags[(env->fpstt + st_index) & 7] = 1; +} + +void helper_fmov_ST0_FT0(void) +{ + ST0 = FT0; +} + +void helper_fmov_FT0_STN(int st_index) +{ + FT0 = ST(st_index); +} + +void helper_fmov_ST0_STN(int st_index) +{ + ST0 = ST(st_index); +} + +void helper_fmov_STN_ST0(int st_index) +{ + ST(st_index) = ST0; +} + +void helper_fxchg_ST0_STN(int st_index) +{ + CPU86_LDouble tmp; + tmp = ST(st_index); + ST(st_index) = ST0; + ST0 = tmp; +} + +/* FPU operations */ + +static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; + +void helper_fcom_ST0_FT0(void) +{ + int ret; + + ret = floatx_compare(ST0, FT0, &env->fp_status); + env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; +} + +void helper_fucom_ST0_FT0(void) +{ + int ret; + + ret = floatx_compare_quiet(ST0, FT0, &env->fp_status); + env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret+ 1]; +} + +static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; + +void helper_fcomi_ST0_FT0(void) +{ + int eflags; + int ret; + + ret = floatx_compare(ST0, FT0, &env->fp_status); + eflags = helper_cc_compute_all(CC_OP); + eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; + CC_SRC = eflags; +} + +void helper_fucomi_ST0_FT0(void) +{ + int eflags; + int ret; + + ret = floatx_compare_quiet(ST0, FT0, &env->fp_status); + eflags = helper_cc_compute_all(CC_OP); + eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; + CC_SRC = eflags; +} + +void helper_fadd_ST0_FT0(void) +{ + ST0 += FT0; +} + +void helper_fmul_ST0_FT0(void) +{ + ST0 *= FT0; +} + +void helper_fsub_ST0_FT0(void) +{ + ST0 -= FT0; +} + +void helper_fsubr_ST0_FT0(void) +{ + ST0 = FT0 - ST0; +} + +void helper_fdiv_ST0_FT0(void) +{ + ST0 = helper_fdiv(ST0, FT0); +} + +void helper_fdivr_ST0_FT0(void) +{ + ST0 = helper_fdiv(FT0, ST0); +} + +/* fp operations between STN and ST0 */ + +void helper_fadd_STN_ST0(int st_index) +{ + ST(st_index) += ST0; +} + +void helper_fmul_STN_ST0(int st_index) +{ + ST(st_index) *= ST0; +} + +void helper_fsub_STN_ST0(int st_index) +{ + ST(st_index) -= ST0; +} + +void helper_fsubr_STN_ST0(int st_index) +{ + CPU86_LDouble *p; + p = &ST(st_index); + *p = ST0 - *p; +} + +void helper_fdiv_STN_ST0(int st_index) +{ + CPU86_LDouble *p; + p = &ST(st_index); + *p = helper_fdiv(*p, ST0); +} + +void helper_fdivr_STN_ST0(int st_index) +{ + CPU86_LDouble *p; + p = &ST(st_index); + *p = helper_fdiv(ST0, *p); +} + +/* misc FPU operations */ +void helper_fchs_ST0(void) +{ + ST0 = floatx_chs(ST0); +} + +void helper_fabs_ST0(void) +{ + ST0 = floatx_abs(ST0); +} + +void helper_fld1_ST0(void) +{ + ST0 = f15rk[1]; +} + +void helper_fldl2t_ST0(void) +{ + ST0 = f15rk[6]; +} + +void helper_fldl2e_ST0(void) +{ + ST0 = f15rk[5]; +} + +void helper_fldpi_ST0(void) +{ + ST0 = f15rk[2]; +} + +void helper_fldlg2_ST0(void) +{ + ST0 = f15rk[3]; +} + +void helper_fldln2_ST0(void) +{ + ST0 = f15rk[4]; +} + +void helper_fldz_ST0(void) +{ + ST0 = f15rk[0]; +} + +void helper_fldz_FT0(void) +{ + FT0 = f15rk[0]; +} + +#ifndef VBOX +uint32_t helper_fnstsw(void) +#else +RTCCUINTREG helper_fnstsw(void) +#endif +{ + return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; +} + +#ifndef VBOX +uint32_t helper_fnstcw(void) +#else +RTCCUINTREG helper_fnstcw(void) +#endif +{ + return env->fpuc; +} + +static void update_fp_status(void) +{ + int rnd_type; + + /* set rounding mode */ + switch(env->fpuc & RC_MASK) { + default: + case RC_NEAR: + rnd_type = float_round_nearest_even; + break; + case RC_DOWN: + rnd_type = float_round_down; + break; + case RC_UP: + rnd_type = float_round_up; + break; + case RC_CHOP: + rnd_type = float_round_to_zero; + break; + } + set_float_rounding_mode(rnd_type, &env->fp_status); +#ifdef FLOATX80 + switch((env->fpuc >> 8) & 3) { + case 0: + rnd_type = 32; + break; + case 2: + rnd_type = 64; + break; + case 3: + default: + rnd_type = 80; + break; + } + set_floatx80_rounding_precision(rnd_type, &env->fp_status); +#endif +} + +void helper_fldcw(uint32_t val) +{ + env->fpuc = val; + update_fp_status(); +} + +void helper_fclex(void) +{ + env->fpus &= 0x7f00; +} + +void helper_fwait(void) +{ + if (env->fpus & FPUS_SE) + fpu_raise_exception(); +} + +void helper_fninit(void) +{ + env->fpus = 0; + env->fpstt = 0; + env->fpuc = 0x37f; + env->fptags[0] = 1; + env->fptags[1] = 1; + env->fptags[2] = 1; + env->fptags[3] = 1; + env->fptags[4] = 1; + env->fptags[5] = 1; + env->fptags[6] = 1; + env->fptags[7] = 1; +} + +/* BCD ops */ + +void helper_fbld_ST0(target_ulong ptr) +{ + CPU86_LDouble tmp; + uint64_t val; + unsigned int v; + int i; + + val = 0; + for(i = 8; i >= 0; i--) { + v = ldub(ptr + i); + val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); + } + tmp = val; + if (ldub(ptr + 9) & 0x80) + tmp = -tmp; + fpush(); + ST0 = tmp; +} + +void helper_fbst_ST0(target_ulong ptr) +{ + int v; + target_ulong mem_ref, mem_end; + int64_t val; + + val = floatx_to_int64(ST0, &env->fp_status); + mem_ref = ptr; + mem_end = mem_ref + 9; + if (val < 0) { + stb(mem_end, 0x80); + val = -val; + } else { + stb(mem_end, 0x00); + } + while (mem_ref < mem_end) { + if (val == 0) + break; + v = val % 100; + val = val / 100; + v = ((v / 10) << 4) | (v % 10); + stb(mem_ref++, v); + } + while (mem_ref < mem_end) { + stb(mem_ref++, 0); + } +} + +void helper_f2xm1(void) +{ + ST0 = pow(2.0,ST0) - 1.0; +} + +void helper_fyl2x(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if (fptemp>0.0){ + fptemp = log(fptemp)/log(2.0); /* log2(ST) */ + ST1 *= fptemp; + fpop(); + } else { + env->fpus &= (~0x4700); + env->fpus |= 0x400; + } +} + +void helper_fptan(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = tan(fptemp); + fpush(); + ST0 = 1.0; + env->fpus &= (~0x400); /* C2 <-- 0 */ + /* the above code is for |arg| < 2**52 only */ + } +} + +void helper_fpatan(void) +{ + CPU86_LDouble fptemp, fpsrcop; + + fpsrcop = ST1; + fptemp = ST0; + ST1 = atan2(fpsrcop,fptemp); + fpop(); +} + +void helper_fxtract(void) +{ + CPU86_LDoubleU temp; + unsigned int expdif; + + temp.d = ST0; + expdif = EXPD(temp) - EXPBIAS; + /*DP exponent bias*/ + ST0 = expdif; + fpush(); + BIASEXPONENT(temp); + ST0 = temp.d; +} + +void helper_fprem1(void) +{ + CPU86_LDouble dblq, fpsrcop, fptemp; + CPU86_LDoubleU fpsrcop1, fptemp1; + int expdif; + signed long long int q; + +#ifndef VBOX /* Unfortunately, we cannot handle isinf/isnan easily in wrapper */ + if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) { +#else + if ((ST0 != ST0) || (ST1 != ST1) || (ST1 == 0.0)) { +#endif + ST0 = 0.0 / 0.0; /* NaN */ + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + return; + } + + fpsrcop = ST0; + fptemp = ST1; + fpsrcop1.d = fpsrcop; + fptemp1.d = fptemp; + expdif = EXPD(fpsrcop1) - EXPD(fptemp1); + + if (expdif < 0) { + /* optimisation? taken from the AMD docs */ + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + /* ST0 is unchanged */ + return; + } + + if (expdif < 53) { + dblq = fpsrcop / fptemp; + /* round dblq towards nearest integer */ + dblq = rint(dblq); + ST0 = fpsrcop - fptemp * dblq; + + /* convert dblq to q by truncating towards zero */ + if (dblq < 0.0) + q = (signed long long int)(-dblq); + else + q = (signed long long int)dblq; + + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + /* (C0,C3,C1) <-- (q2,q1,q0) */ + env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ + env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ + env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ + } else { + env->fpus |= 0x400; /* C2 <-- 1 */ + fptemp = pow(2.0, expdif - 50); + fpsrcop = (ST0 / ST1) / fptemp; + /* fpsrcop = integer obtained by chopping */ + fpsrcop = (fpsrcop < 0.0) ? + -(floor(fabs(fpsrcop))) : floor(fpsrcop); + ST0 -= (ST1 * fpsrcop * fptemp); + } +} + +void helper_fprem(void) +{ + CPU86_LDouble dblq, fpsrcop, fptemp; + CPU86_LDoubleU fpsrcop1, fptemp1; + int expdif; + signed long long int q; + +#ifndef VBOX /* Unfortunately, we cannot easily handle isinf/isnan in wrapper */ + if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) { +#else + if ((ST0 != ST0) || (ST1 != ST1) || (ST1 == 0.0)) { +#endif + ST0 = 0.0 / 0.0; /* NaN */ + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + return; + } + + fpsrcop = (CPU86_LDouble)ST0; + fptemp = (CPU86_LDouble)ST1; + fpsrcop1.d = fpsrcop; + fptemp1.d = fptemp; + expdif = EXPD(fpsrcop1) - EXPD(fptemp1); + + if (expdif < 0) { + /* optimisation? taken from the AMD docs */ + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + /* ST0 is unchanged */ + return; + } + + if ( expdif < 53 ) { + dblq = fpsrcop/*ST0*/ / fptemp/*ST1*/; + /* round dblq towards zero */ + dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq); + ST0 = fpsrcop/*ST0*/ - fptemp * dblq; + + /* convert dblq to q by truncating towards zero */ + if (dblq < 0.0) + q = (signed long long int)(-dblq); + else + q = (signed long long int)dblq; + + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + /* (C0,C3,C1) <-- (q2,q1,q0) */ + env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ + env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ + env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ + } else { + int N = 32 + (expdif % 32); /* as per AMD docs */ + env->fpus |= 0x400; /* C2 <-- 1 */ + fptemp = pow(2.0, (double)(expdif - N)); + fpsrcop = (ST0 / ST1) / fptemp; + /* fpsrcop = integer obtained by chopping */ + fpsrcop = (fpsrcop < 0.0) ? + -(floor(fabs(fpsrcop))) : floor(fpsrcop); + ST0 -= (ST1 * fpsrcop * fptemp); + } +} + +void helper_fyl2xp1(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if ((fptemp+1.0)>0.0) { + fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */ + ST1 *= fptemp; + fpop(); + } else { + env->fpus &= (~0x4700); + env->fpus |= 0x400; + } +} + +void helper_fsqrt(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if (fptemp<0.0) { + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + env->fpus |= 0x400; + } + ST0 = sqrt(fptemp); +} + +void helper_fsincos(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = sin(fptemp); + fpush(); + ST0 = cos(fptemp); + env->fpus &= (~0x400); /* C2 <-- 0 */ + /* the above code is for |arg| < 2**63 only */ + } +} + +void helper_frndint(void) +{ + ST0 = floatx_round_to_int(ST0, &env->fp_status); +} + +void helper_fscale(void) +{ + ST0 = ldexp (ST0, (int)(ST1)); +} + +void helper_fsin(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = sin(fptemp); + env->fpus &= (~0x400); /* C2 <-- 0 */ + /* the above code is for |arg| < 2**53 only */ + } +} + +void helper_fcos(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = cos(fptemp); + env->fpus &= (~0x400); /* C2 <-- 0 */ + /* the above code is for |arg5 < 2**63 only */ + } +} + +void helper_fxam_ST0(void) +{ + CPU86_LDoubleU temp; + int expdif; + + temp.d = ST0; + + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + if (SIGND(temp)) + env->fpus |= 0x200; /* C1 <-- 1 */ + + /* XXX: test fptags too */ + expdif = EXPD(temp); + if (expdif == MAXEXPD) { +#ifdef USE_X86LDOUBLE + if (MANTD(temp) == 0x8000000000000000ULL) +#else + if (MANTD(temp) == 0) +#endif + env->fpus |= 0x500 /*Infinity*/; + else + env->fpus |= 0x100 /*NaN*/; + } else if (expdif == 0) { + if (MANTD(temp) == 0) + env->fpus |= 0x4000 /*Zero*/; + else + env->fpus |= 0x4400 /*Denormal*/; + } else { + env->fpus |= 0x400; + } +} + +void helper_fstenv(target_ulong ptr, int data32) +{ + int fpus, fptag, exp, i; + uint64_t mant; + CPU86_LDoubleU tmp; + + fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + fptag = 0; + for (i=7; i>=0; i--) { + fptag <<= 2; + if (env->fptags[i]) { + fptag |= 3; + } else { + tmp.d = env->fpregs[i].d; + exp = EXPD(tmp); + mant = MANTD(tmp); + if (exp == 0 && mant == 0) { + /* zero */ + fptag |= 1; + } else if (exp == 0 || exp == MAXEXPD +#ifdef USE_X86LDOUBLE + || (mant & (1LL << 63)) == 0 +#endif + ) { + /* NaNs, infinity, denormal */ + fptag |= 2; + } + } + } + if (data32) { + /* 32 bit */ + stl(ptr, env->fpuc); + stl(ptr + 4, fpus); + stl(ptr + 8, fptag); + stl(ptr + 12, 0); /* fpip */ + stl(ptr + 16, 0); /* fpcs */ + stl(ptr + 20, 0); /* fpoo */ + stl(ptr + 24, 0); /* fpos */ + } else { + /* 16 bit */ + stw(ptr, env->fpuc); + stw(ptr + 2, fpus); + stw(ptr + 4, fptag); + stw(ptr + 6, 0); + stw(ptr + 8, 0); + stw(ptr + 10, 0); + stw(ptr + 12, 0); + } +} + +void helper_fldenv(target_ulong ptr, int data32) +{ + int i, fpus, fptag; + + if (data32) { + env->fpuc = lduw(ptr); + fpus = lduw(ptr + 4); + fptag = lduw(ptr + 8); + } + else { + env->fpuc = lduw(ptr); + fpus = lduw(ptr + 2); + fptag = lduw(ptr + 4); + } + env->fpstt = (fpus >> 11) & 7; + env->fpus = fpus & ~0x3800; + for(i = 0;i < 8; i++) { + env->fptags[i] = ((fptag & 3) == 3); + fptag >>= 2; + } +} + +void helper_fsave(target_ulong ptr, int data32) +{ + CPU86_LDouble tmp; + int i; + + helper_fstenv(ptr, data32); + + ptr += (14 << data32); + for(i = 0;i < 8; i++) { + tmp = ST(i); + helper_fstt(tmp, ptr); + ptr += 10; + } + + /* fninit */ + env->fpus = 0; + env->fpstt = 0; + env->fpuc = 0x37f; + env->fptags[0] = 1; + env->fptags[1] = 1; + env->fptags[2] = 1; + env->fptags[3] = 1; + env->fptags[4] = 1; + env->fptags[5] = 1; + env->fptags[6] = 1; + env->fptags[7] = 1; +} + +void helper_frstor(target_ulong ptr, int data32) +{ + CPU86_LDouble tmp; + int i; + + helper_fldenv(ptr, data32); + ptr += (14 << data32); + + for(i = 0;i < 8; i++) { + tmp = helper_fldt(ptr); + ST(i) = tmp; + ptr += 10; + } +} + +void helper_fxsave(target_ulong ptr, int data64) +{ + int fpus, fptag, i, nb_xmm_regs; + CPU86_LDouble tmp; + target_ulong addr; + + /* The operand must be 16 byte aligned */ + if (ptr & 0xf) { + raise_exception(EXCP0D_GPF); + } + + fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + fptag = 0; + for(i = 0; i < 8; i++) { + fptag |= (env->fptags[i] << i); + } + stw(ptr, env->fpuc); + stw(ptr + 2, fpus); + stw(ptr + 4, fptag ^ 0xff); +#ifdef TARGET_X86_64 + if (data64) { + stq(ptr + 0x08, 0); /* rip */ + stq(ptr + 0x10, 0); /* rdp */ + } else +#endif + { + stl(ptr + 0x08, 0); /* eip */ + stl(ptr + 0x0c, 0); /* sel */ + stl(ptr + 0x10, 0); /* dp */ + stl(ptr + 0x14, 0); /* sel */ + } + + addr = ptr + 0x20; + for(i = 0;i < 8; i++) { + tmp = ST(i); + helper_fstt(tmp, addr); + addr += 16; + } + + if (env->cr[4] & CR4_OSFXSR_MASK) { + /* XXX: finish it */ + stl(ptr + 0x18, env->mxcsr); /* mxcsr */ + stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */ + if (env->hflags & HF_CS64_MASK) + nb_xmm_regs = 16; + else + nb_xmm_regs = 8; + addr = ptr + 0xa0; + /* Fast FXSAVE leaves out the XMM registers */ + if (!(env->efer & MSR_EFER_FFXSR) + || (env->hflags & HF_CPL_MASK) + || !(env->hflags & HF_LMA_MASK)) { + for(i = 0; i < nb_xmm_regs; i++) { + stq(addr, env->xmm_regs[i].XMM_Q(0)); + stq(addr + 8, env->xmm_regs[i].XMM_Q(1)); + addr += 16; + } + } + } +} + +void helper_fxrstor(target_ulong ptr, int data64) +{ + int i, fpus, fptag, nb_xmm_regs; + CPU86_LDouble tmp; + target_ulong addr; + + /* The operand must be 16 byte aligned */ + if (ptr & 0xf) { + raise_exception(EXCP0D_GPF); + } + + env->fpuc = lduw(ptr); + fpus = lduw(ptr + 2); + fptag = lduw(ptr + 4); + env->fpstt = (fpus >> 11) & 7; + env->fpus = fpus & ~0x3800; + fptag ^= 0xff; + for(i = 0;i < 8; i++) { + env->fptags[i] = ((fptag >> i) & 1); + } + + addr = ptr + 0x20; + for(i = 0;i < 8; i++) { + tmp = helper_fldt(addr); + ST(i) = tmp; + addr += 16; + } + + if (env->cr[4] & CR4_OSFXSR_MASK) { + /* XXX: finish it */ + env->mxcsr = ldl(ptr + 0x18); + //ldl(ptr + 0x1c); + if (env->hflags & HF_CS64_MASK) + nb_xmm_regs = 16; + else + nb_xmm_regs = 8; + addr = ptr + 0xa0; + /* Fast FXRESTORE leaves out the XMM registers */ + if (!(env->efer & MSR_EFER_FFXSR) + || (env->hflags & HF_CPL_MASK) + || !(env->hflags & HF_LMA_MASK)) { + for(i = 0; i < nb_xmm_regs; i++) { +#if !defined(VBOX) || __GNUC__ < 4 + env->xmm_regs[i].XMM_Q(0) = ldq(addr); + env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8); +#else /* VBOX + __GNUC__ >= 4: gcc 4.x compiler bug - it runs out of registers for the 64-bit value. */ +# if 1 + env->xmm_regs[i].XMM_L(0) = ldl(addr); + env->xmm_regs[i].XMM_L(1) = ldl(addr + 4); + env->xmm_regs[i].XMM_L(2) = ldl(addr + 8); + env->xmm_regs[i].XMM_L(3) = ldl(addr + 12); +# else + /* this works fine on Mac OS X, gcc 4.0.1 */ + uint64_t u64 = ldq(addr); + env->xmm_regs[i].XMM_Q(0); + u64 = ldq(addr + 4); + env->xmm_regs[i].XMM_Q(1) = u64; +# endif +#endif + addr += 16; + } + } + } +} + +#ifndef USE_X86LDOUBLE + +void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f) +{ + CPU86_LDoubleU temp; + int e; + + temp.d = f; + /* mantissa */ + *pmant = (MANTD(temp) << 11) | (1LL << 63); + /* exponent + sign */ + e = EXPD(temp) - EXPBIAS + 16383; + e |= SIGND(temp) >> 16; + *pexp = e; +} + +CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper) +{ + CPU86_LDoubleU temp; + int e; + uint64_t ll; + + /* XXX: handle overflow ? */ + e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */ + e |= (upper >> 4) & 0x800; /* sign */ + ll = (mant >> 11) & ((1LL << 52) - 1); +#ifdef __arm__ + temp.l.upper = (e << 20) | (ll >> 32); + temp.l.lower = ll; +#else + temp.ll = ll | ((uint64_t)e << 52); +#endif + return temp.d; +} + +#else + +void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f) +{ + CPU86_LDoubleU temp; + + temp.d = f; + *pmant = temp.l.lower; + *pexp = temp.l.upper; +} + +CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper) +{ + CPU86_LDoubleU temp; + + temp.l.upper = upper; + temp.l.lower = mant; + return temp.d; +} +#endif + +#ifdef TARGET_X86_64 + +//#define DEBUG_MULDIV + +static void add128(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) +{ + *plow += a; + /* carry test */ + if (*plow < a) + (*phigh)++; + *phigh += b; +} + +static void neg128(uint64_t *plow, uint64_t *phigh) +{ + *plow = ~ *plow; + *phigh = ~ *phigh; + add128(plow, phigh, 1, 0); +} + +/* return TRUE if overflow */ +static int div64(uint64_t *plow, uint64_t *phigh, uint64_t b) +{ + uint64_t q, r, a1, a0; + int i, qb, ab; + + a0 = *plow; + a1 = *phigh; + if (a1 == 0) { + q = a0 / b; + r = a0 % b; + *plow = q; + *phigh = r; + } else { + if (a1 >= b) + return 1; + /* XXX: use a better algorithm */ + for(i = 0; i < 64; i++) { + ab = a1 >> 63; + a1 = (a1 << 1) | (a0 >> 63); + if (ab || a1 >= b) { + a1 -= b; + qb = 1; + } else { + qb = 0; + } + a0 = (a0 << 1) | qb; + } +#if defined(DEBUG_MULDIV) + printf("div: 0x%016" PRIx64 "%016" PRIx64 " / 0x%016" PRIx64 ": q=0x%016" PRIx64 " r=0x%016" PRIx64 "\n", + *phigh, *plow, b, a0, a1); +#endif + *plow = a0; + *phigh = a1; + } + return 0; +} + +/* return TRUE if overflow */ +static int idiv64(uint64_t *plow, uint64_t *phigh, int64_t b) +{ + int sa, sb; + sa = ((int64_t)*phigh < 0); + if (sa) + neg128(plow, phigh); + sb = (b < 0); + if (sb) + b = -b; + if (div64(plow, phigh, b) != 0) + return 1; + if (sa ^ sb) { + if (*plow > (1ULL << 63)) + return 1; + *plow = - *plow; + } else { + if (*plow >= (1ULL << 63)) + return 1; + } + if (sa) + *phigh = - *phigh; + return 0; +} + +void helper_mulq_EAX_T0(target_ulong t0) +{ + uint64_t r0, r1; + + mulu64(&r0, &r1, EAX, t0); + EAX = r0; + EDX = r1; + CC_DST = r0; + CC_SRC = r1; +} + +void helper_imulq_EAX_T0(target_ulong t0) +{ + uint64_t r0, r1; + + muls64(&r0, &r1, EAX, t0); + EAX = r0; + EDX = r1; + CC_DST = r0; + CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63)); +} + +target_ulong helper_imulq_T0_T1(target_ulong t0, target_ulong t1) +{ + uint64_t r0, r1; + + muls64(&r0, &r1, t0, t1); + CC_DST = r0; + CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63)); + return r0; +} + +void helper_divq_EAX(target_ulong t0) +{ + uint64_t r0, r1; + if (t0 == 0) { + raise_exception(EXCP00_DIVZ); + } + r0 = EAX; + r1 = EDX; + if (div64(&r0, &r1, t0)) + raise_exception(EXCP00_DIVZ); + EAX = r0; + EDX = r1; +} + +void helper_idivq_EAX(target_ulong t0) +{ + uint64_t r0, r1; + if (t0 == 0) { + raise_exception(EXCP00_DIVZ); + } + r0 = EAX; + r1 = EDX; + if (idiv64(&r0, &r1, t0)) + raise_exception(EXCP00_DIVZ); + EAX = r0; + EDX = r1; +} +#endif + +static void do_hlt(void) +{ + env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */ + env->halted = 1; + env->exception_index = EXCP_HLT; + cpu_loop_exit(); +} + +void helper_hlt(int next_eip_addend) +{ + helper_svm_check_intercept_param(SVM_EXIT_HLT, 0); + EIP += next_eip_addend; + + do_hlt(); +} + +void helper_monitor(target_ulong ptr) +{ +#ifdef VBOX + if ((uint32_t)ECX > 1) + raise_exception(EXCP0D_GPF); +#else /* !VBOX */ + if ((uint32_t)ECX != 0) + raise_exception(EXCP0D_GPF); +#endif /* !VBOX */ + /* XXX: store address ? */ + helper_svm_check_intercept_param(SVM_EXIT_MONITOR, 0); +} + +void helper_mwait(int next_eip_addend) +{ + if ((uint32_t)ECX != 0) + raise_exception(EXCP0D_GPF); +#ifdef VBOX + helper_hlt(next_eip_addend); +#else /* !VBOX */ + helper_svm_check_intercept_param(SVM_EXIT_MWAIT, 0); + EIP += next_eip_addend; + + /* XXX: not complete but not completely erroneous */ + if (env->cpu_index != 0 || env->next_cpu != NULL) { + /* more than one CPU: do not sleep because another CPU may + wake this one */ + } else { + do_hlt(); + } +#endif /* !VBOX */ +} + +void helper_debug(void) +{ + env->exception_index = EXCP_DEBUG; + cpu_loop_exit(); +} + +void helper_reset_rf(void) +{ + env->eflags &= ~RF_MASK; +} + +void helper_raise_interrupt(int intno, int next_eip_addend) +{ + raise_interrupt(intno, 1, 0, next_eip_addend); +} + +void helper_raise_exception(int exception_index) +{ + raise_exception(exception_index); +} + +void helper_cli(void) +{ + env->eflags &= ~IF_MASK; +} + +void helper_sti(void) +{ + env->eflags |= IF_MASK; +} + +#ifdef VBOX +void helper_cli_vme(void) +{ + env->eflags &= ~VIF_MASK; +} + +void helper_sti_vme(void) +{ + /* First check, then change eflags according to the AMD manual */ + if (env->eflags & VIP_MASK) { + raise_exception(EXCP0D_GPF); + } + env->eflags |= VIF_MASK; +} +#endif /* VBOX */ + +#if 0 +/* vm86plus instructions */ +void helper_cli_vm(void) +{ + env->eflags &= ~VIF_MASK; +} + +void helper_sti_vm(void) +{ + env->eflags |= VIF_MASK; + if (env->eflags & VIP_MASK) { + raise_exception(EXCP0D_GPF); + } +} +#endif + +void helper_set_inhibit_irq(void) +{ + env->hflags |= HF_INHIBIT_IRQ_MASK; +} + +void helper_reset_inhibit_irq(void) +{ + env->hflags &= ~HF_INHIBIT_IRQ_MASK; +} + +void helper_boundw(target_ulong a0, int v) +{ + int low, high; + low = ldsw(a0); + high = ldsw(a0 + 2); + v = (int16_t)v; + if (v < low || v > high) { + raise_exception(EXCP05_BOUND); + } +} + +void helper_boundl(target_ulong a0, int v) +{ + int low, high; + low = ldl(a0); + high = ldl(a0 + 4); + if (v < low || v > high) { + raise_exception(EXCP05_BOUND); + } +} + +static float approx_rsqrt(float a) +{ + return 1.0 / sqrt(a); +} + +static float approx_rcp(float a) +{ + return 1.0 / a; +} + +#if !defined(CONFIG_USER_ONLY) + +#define MMUSUFFIX _mmu + +#define SHIFT 0 +#include "softmmu_template.h" + +#define SHIFT 1 +#include "softmmu_template.h" + +#define SHIFT 2 +#include "softmmu_template.h" + +#define SHIFT 3 +#include "softmmu_template.h" + +#endif + +#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) +/* This code assumes real physical address always fit into host CPU reg, + which is wrong in general, but true for our current use cases. */ +RTCCUINTREG REGPARM __ldb_vbox_phys(RTCCUINTREG addr) +{ + return remR3PhysReadS8(addr); +} +RTCCUINTREG REGPARM __ldub_vbox_phys(RTCCUINTREG addr) +{ + return remR3PhysReadU8(addr); +} +void REGPARM __stb_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val) +{ + remR3PhysWriteU8(addr, val); +} +RTCCUINTREG REGPARM __ldw_vbox_phys(RTCCUINTREG addr) +{ + return remR3PhysReadS16(addr); +} +RTCCUINTREG REGPARM __lduw_vbox_phys(RTCCUINTREG addr) +{ + return remR3PhysReadU16(addr); +} +void REGPARM __stw_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val) +{ + remR3PhysWriteU16(addr, val); +} +RTCCUINTREG REGPARM __ldl_vbox_phys(RTCCUINTREG addr) +{ + return remR3PhysReadS32(addr); +} +RTCCUINTREG REGPARM __ldul_vbox_phys(RTCCUINTREG addr) +{ + return remR3PhysReadU32(addr); +} +void REGPARM __stl_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val) +{ + remR3PhysWriteU32(addr, val); +} +uint64_t REGPARM __ldq_vbox_phys(RTCCUINTREG addr) +{ + return remR3PhysReadU64(addr); +} +void REGPARM __stq_vbox_phys(RTCCUINTREG addr, uint64_t val) +{ + remR3PhysWriteU64(addr, val); +} +#endif /* VBOX */ + +#if !defined(CONFIG_USER_ONLY) +/* try to fill the TLB and return an exception if error. If retaddr is + NULL, it means that the function was called in C code (i.e. not + from generated code or from helper.c) */ +/* XXX: fix it to restore all registers */ +void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr) +{ + TranslationBlock *tb; + int ret; + uintptr_t pc; + CPUX86State *saved_env; + + /* XXX: hack to restore env in all cases, even if not called from + generated code */ + saved_env = env; + env = cpu_single_env; + + ret = cpu_x86_handle_mmu_fault(env, addr, is_write, mmu_idx, 1); + if (ret) { + if (retaddr) { + /* now we have a real cpu fault */ + pc = (uintptr_t)retaddr; + tb = tb_find_pc(pc); + if (tb) { + /* the PC is inside the translated code. It means that we have + a virtual CPU fault */ + cpu_restore_state(tb, env, pc, NULL); + } + } + raise_exception_err(env->exception_index, env->error_code); + } + env = saved_env; +} +#endif + +#ifdef VBOX + +/** + * Correctly computes the eflags. + * @returns eflags. + * @param env1 CPU environment. + */ +uint32_t raw_compute_eflags(CPUX86State *env1) +{ + CPUX86State *savedenv = env; + uint32_t efl; + env = env1; + efl = compute_eflags(); + env = savedenv; + return efl; +} + +/** + * Reads byte from virtual address in guest memory area. + * XXX: is it working for any addresses? swapped out pages? + * @returns read data byte. + * @param env1 CPU environment. + * @param pvAddr GC Virtual address. + */ +uint8_t read_byte(CPUX86State *env1, target_ulong addr) +{ + CPUX86State *savedenv = env; + uint8_t u8; + env = env1; + u8 = ldub_kernel(addr); + env = savedenv; + return u8; +} + +/** + * Reads byte from virtual address in guest memory area. + * XXX: is it working for any addresses? swapped out pages? + * @returns read data byte. + * @param env1 CPU environment. + * @param pvAddr GC Virtual address. + */ +uint16_t read_word(CPUX86State *env1, target_ulong addr) +{ + CPUX86State *savedenv = env; + uint16_t u16; + env = env1; + u16 = lduw_kernel(addr); + env = savedenv; + return u16; +} + +/** + * Reads byte from virtual address in guest memory area. + * XXX: is it working for any addresses? swapped out pages? + * @returns read data byte. + * @param env1 CPU environment. + * @param pvAddr GC Virtual address. + */ +uint32_t read_dword(CPUX86State *env1, target_ulong addr) +{ + CPUX86State *savedenv = env; + uint32_t u32; + env = env1; + u32 = ldl_kernel(addr); + env = savedenv; + return u32; +} + +/** + * Writes byte to virtual address in guest memory area. + * XXX: is it working for any addresses? swapped out pages? + * @returns read data byte. + * @param env1 CPU environment. + * @param pvAddr GC Virtual address. + * @param val byte value + */ +void write_byte(CPUX86State *env1, target_ulong addr, uint8_t val) +{ + CPUX86State *savedenv = env; + env = env1; + stb(addr, val); + env = savedenv; +} + +void write_word(CPUX86State *env1, target_ulong addr, uint16_t val) +{ + CPUX86State *savedenv = env; + env = env1; + stw(addr, val); + env = savedenv; +} + +void write_dword(CPUX86State *env1, target_ulong addr, uint32_t val) +{ + CPUX86State *savedenv = env; + env = env1; + stl(addr, val); + env = savedenv; +} + +/** + * Correctly loads selector into segment register with updating internal + * qemu data/caches. + * @param env1 CPU environment. + * @param seg_reg Segment register. + * @param selector Selector to load. + */ +void sync_seg(CPUX86State *env1, int seg_reg, int selector) +{ + CPUX86State *savedenv = env; +#ifdef FORCE_SEGMENT_SYNC + jmp_buf old_buf; +#endif + + env = env1; + + if ( env->eflags & X86_EFL_VM + || !(env->cr[0] & X86_CR0_PE)) + { + load_seg_vm(seg_reg, selector); + + env = savedenv; + + /* Successful sync. */ + Assert(env1->segs[seg_reg].newselector == 0); + } + else + { + /* For some reasons, it works even w/o save/restore of the jump buffer, so as code is + time critical - let's not do that */ +#ifdef FORCE_SEGMENT_SYNC + memcpy(&old_buf, &env1->jmp_env, sizeof(old_buf)); +#endif + if (setjmp(env1->jmp_env) == 0) + { + if (seg_reg == R_CS) + { + uint32_t e1, e2; + e1 = e2 = 0; + load_segment(&e1, &e2, selector); + cpu_x86_load_seg_cache(env, R_CS, selector, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); + } + else + helper_load_seg(seg_reg, selector); + /* We used to use tss_load_seg(seg_reg, selector); which, for some reasons ignored + loading 0 selectors, what, in order, lead to subtle problems like #3588 */ + + env = savedenv; + + /* Successful sync. */ + Assert(env1->segs[seg_reg].newselector == 0); + } + else + { + env = savedenv; + + /* Postpone sync until the guest uses the selector. */ + env1->segs[seg_reg].selector = selector; /* hidden values are now incorrect, but will be resynced when this register is accessed. */ + env1->segs[seg_reg].newselector = selector; + Log(("sync_seg: out of sync seg_reg=%d selector=%#x\n", seg_reg, selector)); + env1->exception_index = -1; + env1->error_code = 0; + env1->old_exception = -1; + } +#ifdef FORCE_SEGMENT_SYNC + memcpy(&env1->jmp_env, &old_buf, sizeof(old_buf)); +#endif + } + +} + +DECLINLINE(void) tb_reset_jump(TranslationBlock *tb, int n) +{ + tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n])); +} + + +int emulate_single_instr(CPUX86State *env1) +{ + TranslationBlock *tb; + TranslationBlock *current; + int flags; + uint8_t *tc_ptr; + target_ulong old_eip; + + /* ensures env is loaded! */ + CPUX86State *savedenv = env; + env = env1; + + RAWEx_ProfileStart(env, STATS_EMULATE_SINGLE_INSTR); + + current = env->current_tb; + env->current_tb = NULL; + flags = env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK)); + + /* + * Translate only one instruction. + */ + ASMAtomicOrU32(&env->state, CPU_EMULATE_SINGLE_INSTR); + tb = tb_gen_code(env, env->eip + env->segs[R_CS].base, + env->segs[R_CS].base, flags, 0); + + ASMAtomicAndU32(&env->state, ~CPU_EMULATE_SINGLE_INSTR); + + + /* tb_link_phys: */ + tb->jmp_first = (TranslationBlock *)((intptr_t)tb | 2); + tb->jmp_next[0] = NULL; + tb->jmp_next[1] = NULL; + Assert(tb->jmp_next[0] == NULL); + Assert(tb->jmp_next[1] == NULL); + if (tb->tb_next_offset[0] != 0xffff) + tb_reset_jump(tb, 0); + if (tb->tb_next_offset[1] != 0xffff) + tb_reset_jump(tb, 1); + + /* + * Execute it using emulation + */ + old_eip = env->eip; + env->current_tb = tb; + + /* + * eip remains the same for repeated instructions; no idea why qemu doesn't do a jump inside the generated code + * perhaps not a very safe hack + */ + while (old_eip == env->eip) + { + tc_ptr = tb->tc_ptr; + +#if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM) + int fake_ret; + tcg_qemu_tb_exec(tc_ptr, fake_ret); +#else + tcg_qemu_tb_exec(tc_ptr); +#endif + + /* + * Exit once we detect an external interrupt and interrupts are enabled + */ + if ( (env->interrupt_request & (CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_EXTERNAL_TIMER)) + || ( (env->eflags & IF_MASK) + && !(env->hflags & HF_INHIBIT_IRQ_MASK) + && (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD) ) + ) + { + break; + } + if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_FLUSH_TLB) { + tlb_flush(env, true); + } + } + env->current_tb = current; + + tb_phys_invalidate(tb, -1); + tb_free(tb); +/* + Assert(tb->tb_next_offset[0] == 0xffff); + Assert(tb->tb_next_offset[1] == 0xffff); + Assert(tb->tb_next[0] == 0xffff); + Assert(tb->tb_next[1] == 0xffff); + Assert(tb->jmp_next[0] == NULL); + Assert(tb->jmp_next[1] == NULL); + Assert(tb->jmp_first == NULL); */ + + RAWEx_ProfileStop(env, STATS_EMULATE_SINGLE_INSTR); + + /* + * Execute the next instruction when we encounter instruction fusing. + */ + if (env->hflags & HF_INHIBIT_IRQ_MASK) + { + Log(("REM: Emulating next instruction due to instruction fusing (HF_INHIBIT_IRQ_MASK) at %RGv\n", env->eip)); + env->hflags &= ~HF_INHIBIT_IRQ_MASK; + emulate_single_instr(env); + } + + env = savedenv; + return 0; +} + +/** + * Correctly loads a new ldtr selector. + * + * @param env1 CPU environment. + * @param selector Selector to load. + */ +void sync_ldtr(CPUX86State *env1, int selector) +{ + CPUX86State *saved_env = env; + if (setjmp(env1->jmp_env) == 0) + { + env = env1; + helper_lldt(selector); + env = saved_env; + } + else + { + env = saved_env; +#ifdef VBOX_STRICT + cpu_abort(env1, "sync_ldtr: selector=%#x\n", selector); +#endif + } +} + +int get_ss_esp_from_tss_raw(CPUX86State *env1, uint32_t *ss_ptr, + uint32_t *esp_ptr, int dpl) +{ + int type, index, shift; + + CPUX86State *savedenv = env; + env = env1; + + if (!(env->tr.flags & DESC_P_MASK)) + cpu_abort(env, "invalid tss"); + type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf; + if ((type & 7) != 3) + cpu_abort(env, "invalid tss type %d", type); + shift = type >> 3; + index = (dpl * 4 + 2) << shift; + if (index + (4 << shift) - 1 > env->tr.limit) + { + env = savedenv; + return 0; + } + //raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc); + + if (shift == 0) { + *esp_ptr = lduw_kernel(env->tr.base + index); + *ss_ptr = lduw_kernel(env->tr.base + index + 2); + } else { + *esp_ptr = ldl_kernel(env->tr.base + index); + *ss_ptr = lduw_kernel(env->tr.base + index + 4); + } + + env = savedenv; + return 1; +} + +//***************************************************************************** +// Needs to be at the bottom of the file (overriding macros) + +static inline CPU86_LDouble helper_fldt_raw(uint8_t *ptr) +{ +#ifdef USE_X86LDOUBLE + CPU86_LDoubleU tmp; + tmp.l.lower = *(uint64_t const *)ptr; + tmp.l.upper = *(uint16_t const *)(ptr + 8); + return tmp.d; +#else +# error "Busted FPU saving/restoring!" + return *(CPU86_LDouble *)ptr; +#endif +} + +static inline void helper_fstt_raw(CPU86_LDouble f, uint8_t *ptr) +{ +#ifdef USE_X86LDOUBLE + CPU86_LDoubleU tmp; + tmp.d = f; + *(uint64_t *)(ptr + 0) = tmp.l.lower; + *(uint16_t *)(ptr + 8) = tmp.l.upper; + *(uint16_t *)(ptr + 10) = 0; + *(uint32_t *)(ptr + 12) = 0; + AssertCompile(sizeof(long double) > 8); +#else +# error "Busted FPU saving/restoring!" + *(CPU86_LDouble *)ptr = f; +#endif +} + +#undef stw +#undef stl +#undef stq +#define stw(a,b) *(uint16_t *)(a) = (uint16_t)(b) +#define stl(a,b) *(uint32_t *)(a) = (uint32_t)(b) +#define stq(a,b) *(uint64_t *)(a) = (uint64_t)(b) + +//***************************************************************************** +void restore_raw_fp_state(CPUX86State *env, uint8_t *ptr) +{ + int fpus, fptag, i, nb_xmm_regs; + CPU86_LDouble tmp; + uint8_t *addr; + int data64 = !!(env->hflags & HF_LMA_MASK); + + if (env->cpuid_features & CPUID_FXSR) + { + fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + fptag = 0; + for(i = 0; i < 8; i++) { + fptag |= (env->fptags[i] << i); + } + stw(ptr, env->fpuc); + stw(ptr + 2, fpus); + stw(ptr + 4, fptag ^ 0xff); + + addr = ptr + 0x20; + for(i = 0;i < 8; i++) { + tmp = ST(i); + helper_fstt_raw(tmp, addr); + addr += 16; + } + + if (env->cr[4] & CR4_OSFXSR_MASK) { + /* XXX: finish it */ + stl(ptr + 0x18, env->mxcsr); /* mxcsr */ + stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */ + nb_xmm_regs = 8 << data64; + addr = ptr + 0xa0; + for(i = 0; i < nb_xmm_regs; i++) { +#if __GNUC__ < 4 + stq(addr, env->xmm_regs[i].XMM_Q(0)); + stq(addr + 8, env->xmm_regs[i].XMM_Q(1)); +#else /* VBOX + __GNUC__ >= 4: gcc 4.x compiler bug - it runs out of registers for the 64-bit value. */ + stl(addr, env->xmm_regs[i].XMM_L(0)); + stl(addr + 4, env->xmm_regs[i].XMM_L(1)); + stl(addr + 8, env->xmm_regs[i].XMM_L(2)); + stl(addr + 12, env->xmm_regs[i].XMM_L(3)); +#endif + addr += 16; + } + } + } + else + { + PX86FPUSTATE fp = (PX86FPUSTATE)ptr; + int fptag; + + fp->FCW = env->fpuc; + fp->FSW = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + fptag = 0; + for (i=7; i>=0; i--) { + fptag <<= 2; + if (env->fptags[i]) { + fptag |= 3; + } else { + /* the FPU automatically computes it */ + } + } + fp->FTW = fptag; + + for(i = 0;i < 8; i++) { + tmp = ST(i); + helper_fstt_raw(tmp, &fp->regs[i].au8[0]); + } + } +} + +//***************************************************************************** +#undef lduw +#undef ldl +#undef ldq +#define lduw(a) *(uint16_t *)(a) +#define ldl(a) *(uint32_t *)(a) +#define ldq(a) *(uint64_t *)(a) +//***************************************************************************** +void save_raw_fp_state(CPUX86State *env, uint8_t *ptr) +{ + int i, fpus, fptag, nb_xmm_regs; + CPU86_LDouble tmp; + uint8_t *addr; + int data64 = !!(env->hflags & HF_LMA_MASK); /* don't use HF_CS64_MASK here as cs hasn't been synced when this function is called. */ + + if (env->cpuid_features & CPUID_FXSR) + { + env->fpuc = lduw(ptr); + fpus = lduw(ptr + 2); + fptag = lduw(ptr + 4); + env->fpstt = (fpus >> 11) & 7; + env->fpus = fpus & ~0x3800; + fptag ^= 0xff; + for(i = 0;i < 8; i++) { + env->fptags[i] = ((fptag >> i) & 1); + } + + addr = ptr + 0x20; + for(i = 0;i < 8; i++) { + tmp = helper_fldt_raw(addr); + ST(i) = tmp; + addr += 16; + } + + if (env->cr[4] & CR4_OSFXSR_MASK) { + /* XXX: finish it, endianness */ + env->mxcsr = ldl(ptr + 0x18); + //ldl(ptr + 0x1c); + nb_xmm_regs = 8 << data64; + addr = ptr + 0xa0; + for(i = 0; i < nb_xmm_regs; i++) { +#if HC_ARCH_BITS == 32 + /* this is a workaround for http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35135 */ + env->xmm_regs[i].XMM_L(0) = ldl(addr); + env->xmm_regs[i].XMM_L(1) = ldl(addr + 4); + env->xmm_regs[i].XMM_L(2) = ldl(addr + 8); + env->xmm_regs[i].XMM_L(3) = ldl(addr + 12); +#else + env->xmm_regs[i].XMM_Q(0) = ldq(addr); + env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8); +#endif + addr += 16; + } + } + } + else + { + PX86FPUSTATE fp = (PX86FPUSTATE)ptr; + int fptag, j; + + env->fpuc = fp->FCW; + env->fpstt = (fp->FSW >> 11) & 7; + env->fpus = fp->FSW & ~0x3800; + fptag = fp->FTW; + for(i = 0;i < 8; i++) { + env->fptags[i] = ((fptag & 3) == 3); + fptag >>= 2; + } + j = env->fpstt; + for(i = 0;i < 8; i++) { + tmp = helper_fldt_raw(&fp->regs[i].au8[0]); + ST(i) = tmp; + } + } +} +//***************************************************************************** +//***************************************************************************** + +#endif /* VBOX */ + +/* Secure Virtual Machine helpers */ + +#if defined(CONFIG_USER_ONLY) + +void helper_vmrun(int aflag, int next_eip_addend) +{ +} +void helper_vmmcall(void) +{ +} +void helper_vmload(int aflag) +{ +} +void helper_vmsave(int aflag) +{ +} +void helper_stgi(void) +{ +} +void helper_clgi(void) +{ +} +void helper_skinit(void) +{ +} +void helper_invlpga(int aflag) +{ +} +void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1) +{ +} +void helper_svm_check_intercept_param(uint32_t type, uint64_t param) +{ +} + +void helper_svm_check_io(uint32_t port, uint32_t param, + uint32_t next_eip_addend) +{ +} +#else + +static inline void svm_save_seg(target_phys_addr_t addr, + const SegmentCache *sc) +{ + stw_phys(addr + offsetof(struct vmcb_seg, selector), + sc->selector); + stq_phys(addr + offsetof(struct vmcb_seg, base), + sc->base); + stl_phys(addr + offsetof(struct vmcb_seg, limit), + sc->limit); + stw_phys(addr + offsetof(struct vmcb_seg, attrib), + ((sc->flags >> 8) & 0xff) | ((sc->flags >> 12) & 0x0f00)); +} + +static inline void svm_load_seg(target_phys_addr_t addr, SegmentCache *sc) +{ + unsigned int flags; + + sc->selector = lduw_phys(addr + offsetof(struct vmcb_seg, selector)); + sc->base = ldq_phys(addr + offsetof(struct vmcb_seg, base)); + sc->limit = ldl_phys(addr + offsetof(struct vmcb_seg, limit)); + flags = lduw_phys(addr + offsetof(struct vmcb_seg, attrib)); + sc->flags = ((flags & 0xff) << 8) | ((flags & 0x0f00) << 12); +} + +static inline void svm_load_seg_cache(target_phys_addr_t addr, + CPUState *env, int seg_reg) +{ + SegmentCache sc1, *sc = &sc1; + svm_load_seg(addr, sc); + cpu_x86_load_seg_cache(env, seg_reg, sc->selector, + sc->base, sc->limit, sc->flags); +} + +void helper_vmrun(int aflag, int next_eip_addend) +{ + target_ulong addr; + uint32_t event_inj; + uint32_t int_ctl; + + helper_svm_check_intercept_param(SVM_EXIT_VMRUN, 0); + + if (aflag == 2) + addr = EAX; + else + addr = (uint32_t)EAX; + + qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmrun! " TARGET_FMT_lx "\n", addr); + + env->vm_vmcb = addr; + + /* save the current CPU state in the hsave page */ + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base), env->gdt.base); + stl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit); + + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base), env->idt.base); + stl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit), env->idt.limit); + + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0), env->cr[0]); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr2), env->cr[2]); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3), env->cr[3]); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4), env->cr[4]); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6), env->dr[6]); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7), env->dr[7]); + + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer), env->efer); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags), compute_eflags()); + + svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.es), + &env->segs[R_ES]); + svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.cs), + &env->segs[R_CS]); + svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ss), + &env->segs[R_SS]); + svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ds), + &env->segs[R_DS]); + + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip), + EIP + next_eip_addend); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp), ESP); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax), EAX); + + /* load the interception bitmaps so we do not need to access the + vmcb in svm mode */ + env->intercept = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept)); + env->intercept_cr_read = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_read)); + env->intercept_cr_write = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write)); + env->intercept_dr_read = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read)); + env->intercept_dr_write = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write)); + env->intercept_exceptions = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions)); + + /* enable intercepts */ + env->hflags |= HF_SVMI_MASK; + + env->tsc_offset = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.tsc_offset)); + + env->gdt.base = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base)); + env->gdt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit)); + + env->idt.base = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base)); + env->idt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit)); + + /* clear exit_info_2 so we behave like the real hardware */ + stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0); + + cpu_x86_update_cr0(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0))); + cpu_x86_update_cr4(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4))); + cpu_x86_update_cr3(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3))); + env->cr[2] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2)); + int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)); + env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK); + if (int_ctl & V_INTR_MASKING_MASK) { + env->v_tpr = int_ctl & V_TPR_MASK; + env->hflags2 |= HF2_VINTR_MASK; + if (env->eflags & IF_MASK) + env->hflags2 |= HF2_HIF_MASK; + } + + cpu_load_efer(env, + ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer))); + env->eflags = 0; + load_eflags(ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags)), + ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); + CC_OP = CC_OP_EFLAGS; + + svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.es), + env, R_ES); + svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.cs), + env, R_CS); + svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.ss), + env, R_SS); + svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.ds), + env, R_DS); + + EIP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip)); + env->eip = EIP; + ESP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp)); + EAX = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax)); + env->dr[7] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7)); + env->dr[6] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6)); + cpu_x86_set_cpl(env, ldub_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl))); + + /* FIXME: guest state consistency checks */ + + switch(ldub_phys(env->vm_vmcb + offsetof(struct vmcb, control.tlb_ctl))) { + case TLB_CONTROL_DO_NOTHING: + break; + case TLB_CONTROL_FLUSH_ALL_ASID: + /* FIXME: this is not 100% correct but should work for now */ + tlb_flush(env, 1); + break; + } + + env->hflags2 |= HF2_GIF_MASK; + + if (int_ctl & V_IRQ_MASK) { + env->interrupt_request |= CPU_INTERRUPT_VIRQ; + } + + /* maybe we need to inject an event */ + event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)); + if (event_inj & SVM_EVTINJ_VALID) { + uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK; + uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR; + uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err)); + + qemu_log_mask(CPU_LOG_TB_IN_ASM, "Injecting(%#hx): ", valid_err); + /* FIXME: need to implement valid_err */ + switch (event_inj & SVM_EVTINJ_TYPE_MASK) { + case SVM_EVTINJ_TYPE_INTR: + env->exception_index = vector; + env->error_code = event_inj_err; + env->exception_is_int = 0; + env->exception_next_eip = -1; + qemu_log_mask(CPU_LOG_TB_IN_ASM, "INTR"); + /* XXX: is it always correct ? */ + do_interrupt(vector, 0, 0, 0, 1); + break; + case SVM_EVTINJ_TYPE_NMI: + env->exception_index = EXCP02_NMI; + env->error_code = event_inj_err; + env->exception_is_int = 0; + env->exception_next_eip = EIP; + qemu_log_mask(CPU_LOG_TB_IN_ASM, "NMI"); + cpu_loop_exit(); + break; + case SVM_EVTINJ_TYPE_EXEPT: + env->exception_index = vector; + env->error_code = event_inj_err; + env->exception_is_int = 0; + env->exception_next_eip = -1; + qemu_log_mask(CPU_LOG_TB_IN_ASM, "EXEPT"); + cpu_loop_exit(); + break; + case SVM_EVTINJ_TYPE_SOFT: + env->exception_index = vector; + env->error_code = event_inj_err; + env->exception_is_int = 1; + env->exception_next_eip = EIP; + qemu_log_mask(CPU_LOG_TB_IN_ASM, "SOFT"); + cpu_loop_exit(); + break; + } + qemu_log_mask(CPU_LOG_TB_IN_ASM, " %#x %#x\n", env->exception_index, env->error_code); + } +} + +void helper_vmmcall(void) +{ + helper_svm_check_intercept_param(SVM_EXIT_VMMCALL, 0); + raise_exception(EXCP06_ILLOP); +} + +void helper_vmload(int aflag) +{ + target_ulong addr; + helper_svm_check_intercept_param(SVM_EXIT_VMLOAD, 0); + + if (aflag == 2) + addr = EAX; + else + addr = (uint32_t)EAX; + + qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmload! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n", + addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)), + env->segs[R_FS].base); + + svm_load_seg_cache(addr + offsetof(struct vmcb, save.fs), + env, R_FS); + svm_load_seg_cache(addr + offsetof(struct vmcb, save.gs), + env, R_GS); + svm_load_seg(addr + offsetof(struct vmcb, save.tr), + &env->tr); + svm_load_seg(addr + offsetof(struct vmcb, save.ldtr), + &env->ldt); + +#ifdef TARGET_X86_64 + env->kernelgsbase = ldq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base)); + env->lstar = ldq_phys(addr + offsetof(struct vmcb, save.lstar)); + env->cstar = ldq_phys(addr + offsetof(struct vmcb, save.cstar)); + env->fmask = ldq_phys(addr + offsetof(struct vmcb, save.sfmask)); +#endif + env->star = ldq_phys(addr + offsetof(struct vmcb, save.star)); + env->sysenter_cs = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_cs)); + env->sysenter_esp = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_esp)); + env->sysenter_eip = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_eip)); +} + +void helper_vmsave(int aflag) +{ + target_ulong addr; + helper_svm_check_intercept_param(SVM_EXIT_VMSAVE, 0); + + if (aflag == 2) + addr = EAX; + else + addr = (uint32_t)EAX; + + qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmsave! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n", + addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)), + env->segs[R_FS].base); + + svm_save_seg(addr + offsetof(struct vmcb, save.fs), + &env->segs[R_FS]); + svm_save_seg(addr + offsetof(struct vmcb, save.gs), + &env->segs[R_GS]); + svm_save_seg(addr + offsetof(struct vmcb, save.tr), + &env->tr); + svm_save_seg(addr + offsetof(struct vmcb, save.ldtr), + &env->ldt); + +#ifdef TARGET_X86_64 + stq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base), env->kernelgsbase); + stq_phys(addr + offsetof(struct vmcb, save.lstar), env->lstar); + stq_phys(addr + offsetof(struct vmcb, save.cstar), env->cstar); + stq_phys(addr + offsetof(struct vmcb, save.sfmask), env->fmask); +#endif + stq_phys(addr + offsetof(struct vmcb, save.star), env->star); + stq_phys(addr + offsetof(struct vmcb, save.sysenter_cs), env->sysenter_cs); + stq_phys(addr + offsetof(struct vmcb, save.sysenter_esp), env->sysenter_esp); + stq_phys(addr + offsetof(struct vmcb, save.sysenter_eip), env->sysenter_eip); +} + +void helper_stgi(void) +{ + helper_svm_check_intercept_param(SVM_EXIT_STGI, 0); + env->hflags2 |= HF2_GIF_MASK; +} + +void helper_clgi(void) +{ + helper_svm_check_intercept_param(SVM_EXIT_CLGI, 0); + env->hflags2 &= ~HF2_GIF_MASK; +} + +void helper_skinit(void) +{ + helper_svm_check_intercept_param(SVM_EXIT_SKINIT, 0); + /* XXX: not implemented */ + raise_exception(EXCP06_ILLOP); +} + +void helper_invlpga(int aflag) +{ + target_ulong addr; + helper_svm_check_intercept_param(SVM_EXIT_INVLPGA, 0); + + if (aflag == 2) + addr = EAX; + else + addr = (uint32_t)EAX; + + /* XXX: could use the ASID to see if it is needed to do the + flush */ + tlb_flush_page(env, addr); +} + +void helper_svm_check_intercept_param(uint32_t type, uint64_t param) +{ + if (likely(!(env->hflags & HF_SVMI_MASK))) + return; +#ifndef VBOX + switch(type) { + case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR0 + 8: + if (env->intercept_cr_read & (1 << (type - SVM_EXIT_READ_CR0))) { + helper_vmexit(type, param); + } + break; + case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR0 + 8: + if (env->intercept_cr_write & (1 << (type - SVM_EXIT_WRITE_CR0))) { + helper_vmexit(type, param); + } + break; + case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR0 + 7: + if (env->intercept_dr_read & (1 << (type - SVM_EXIT_READ_DR0))) { + helper_vmexit(type, param); + } + break; + case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR0 + 7: + if (env->intercept_dr_write & (1 << (type - SVM_EXIT_WRITE_DR0))) { + helper_vmexit(type, param); + } + break; + case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 31: + if (env->intercept_exceptions & (1 << (type - SVM_EXIT_EXCP_BASE))) { + helper_vmexit(type, param); + } + break; + case SVM_EXIT_MSR: + if (env->intercept & (1ULL << (SVM_EXIT_MSR - SVM_EXIT_INTR))) { + /* FIXME: this should be read in at vmrun (faster this way?) */ + uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.msrpm_base_pa)); + uint32_t t0, t1; + switch((uint32_t)ECX) { + case 0 ... 0x1fff: + t0 = (ECX * 2) % 8; + t1 = ECX / 8; + break; + case 0xc0000000 ... 0xc0001fff: + t0 = (8192 + ECX - 0xc0000000) * 2; + t1 = (t0 / 8); + t0 %= 8; + break; + case 0xc0010000 ... 0xc0011fff: + t0 = (16384 + ECX - 0xc0010000) * 2; + t1 = (t0 / 8); + t0 %= 8; + break; + default: + helper_vmexit(type, param); + t0 = 0; + t1 = 0; + break; + } + if (ldub_phys(addr + t1) & ((1 << param) << t0)) + helper_vmexit(type, param); + } + break; + default: + if (env->intercept & (1ULL << (type - SVM_EXIT_INTR))) { + helper_vmexit(type, param); + } + break; + } +#else /* VBOX */ + AssertMsgFailed(("We shouldn't be here, HM supported differently!")); +#endif /* VBOX */ +} + +void helper_svm_check_io(uint32_t port, uint32_t param, + uint32_t next_eip_addend) +{ + if (env->intercept & (1ULL << (SVM_EXIT_IOIO - SVM_EXIT_INTR))) { + /* FIXME: this should be read in at vmrun (faster this way?) */ + uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.iopm_base_pa)); + uint16_t mask = (1 << ((param >> 4) & 7)) - 1; + if(lduw_phys(addr + port / 8) & (mask << (port & 7))) { + /* next EIP */ + stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), + env->eip + next_eip_addend); + helper_vmexit(SVM_EXIT_IOIO, param | (port << 16)); + } + } +} + +/* Note: currently only 32 bits of exit_code are used */ +void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1) +{ + uint32_t int_ctl; + + qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmexit(%08x, %016" PRIx64 ", %016" PRIx64 ", " TARGET_FMT_lx ")!\n", + exit_code, exit_info_1, + ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2)), + EIP); + + if(env->hflags & HF_INHIBIT_IRQ_MASK) { + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), SVM_INTERRUPT_SHADOW_MASK); + env->hflags &= ~HF_INHIBIT_IRQ_MASK; + } else { + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), 0); + } + + /* Save the VM state in the vmcb */ + svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.es), + &env->segs[R_ES]); + svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.cs), + &env->segs[R_CS]); + svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ss), + &env->segs[R_SS]); + svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ds), + &env->segs[R_DS]); + + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base), env->gdt.base); + stl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit); + + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base), env->idt.base); + stl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit), env->idt.limit); + + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer), env->efer); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0), env->cr[0]); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2), env->cr[2]); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]); + + int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)); + int_ctl &= ~(V_TPR_MASK | V_IRQ_MASK); + int_ctl |= env->v_tpr & V_TPR_MASK; + if (env->interrupt_request & CPU_INTERRUPT_VIRQ) + int_ctl |= V_IRQ_MASK; + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl); + + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags), compute_eflags()); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip), env->eip); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp), ESP); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax), EAX); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7), env->dr[7]); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6), env->dr[6]); + stb_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl), env->hflags & HF_CPL_MASK); + + /* Reload the host state from vm_hsave */ + env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK); + env->hflags &= ~HF_SVMI_MASK; + env->intercept = 0; + env->intercept_exceptions = 0; + env->interrupt_request &= ~CPU_INTERRUPT_VIRQ; + env->tsc_offset = 0; + + env->gdt.base = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base)); + env->gdt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit)); + + env->idt.base = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base)); + env->idt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit)); + + cpu_x86_update_cr0(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK); + cpu_x86_update_cr4(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4))); + cpu_x86_update_cr3(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3))); + /* we need to set the efer after the crs so the hidden flags get + set properly */ + cpu_load_efer(env, + ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer))); + env->eflags = 0; + load_eflags(ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags)), + ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); + CC_OP = CC_OP_EFLAGS; + + svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.es), + env, R_ES); + svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.cs), + env, R_CS); + svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.ss), + env, R_SS); + svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.ds), + env, R_DS); + + EIP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip)); + ESP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp)); + EAX = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax)); + + env->dr[6] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6)); + env->dr[7] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7)); + + /* other setups */ + cpu_x86_set_cpl(env, 0); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_code), exit_code); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), exit_info_1); + + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info), + ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj))); + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info_err), + ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err))); + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), 0); + + env->hflags2 &= ~HF2_GIF_MASK; + /* FIXME: Resets the current ASID register to zero (host ASID). */ + + /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */ + + /* Clears the TSC_OFFSET inside the processor. */ + + /* If the host is in PAE mode, the processor reloads the host's PDPEs + from the page table indicated the host's CR3. If the PDPEs contain + illegal state, the processor causes a shutdown. */ + + /* Forces CR0.PE = 1, RFLAGS.VM = 0. */ + env->cr[0] |= CR0_PE_MASK; + env->eflags &= ~VM_MASK; + + /* Disables all breakpoints in the host DR7 register. */ + + /* Checks the reloaded host state for consistency. */ + + /* If the host's rIP reloaded by #VMEXIT is outside the limit of the + host's code segment or non-canonical (in the case of long mode), a + #GP fault is delivered inside the host.) */ + + /* remove any pending exception */ + env->exception_index = -1; + env->error_code = 0; + env->old_exception = -1; + + cpu_loop_exit(); +} + +#endif + +/* MMX/SSE */ +/* XXX: optimize by storing fptt and fptags in the static cpu state */ +void helper_enter_mmx(void) +{ + env->fpstt = 0; + *(uint32_t *)(env->fptags) = 0; + *(uint32_t *)(env->fptags + 4) = 0; +} + +void helper_emms(void) +{ + /* set to empty state */ + *(uint32_t *)(env->fptags) = 0x01010101; + *(uint32_t *)(env->fptags + 4) = 0x01010101; +} + +/* XXX: suppress */ +void helper_movq(void *d, void *s) +{ + *(uint64_t *)d = *(uint64_t *)s; +} + +#define SHIFT 0 +#include "ops_sse.h" + +#define SHIFT 1 +#include "ops_sse.h" + +#define SHIFT 0 +#include "helper_template.h" +#undef SHIFT + +#define SHIFT 1 +#include "helper_template.h" +#undef SHIFT + +#define SHIFT 2 +#include "helper_template.h" +#undef SHIFT + +#ifdef TARGET_X86_64 + +#define SHIFT 3 +#include "helper_template.h" +#undef SHIFT + +#endif + +/* bit operations */ +target_ulong helper_bsf(target_ulong t0) +{ + int count; + target_ulong res; + + res = t0; + count = 0; + while ((res & 1) == 0) { + count++; + res >>= 1; + } + return count; +} + +target_ulong helper_lzcnt(target_ulong t0, int wordsize) +{ + int count; + target_ulong res, mask; + + if (wordsize > 0 && t0 == 0) { + return wordsize; + } + res = t0; + count = TARGET_LONG_BITS - 1; + mask = (target_ulong)1 << (TARGET_LONG_BITS - 1); + while ((res & mask) == 0) { + count--; + res <<= 1; + } + if (wordsize > 0) { + return wordsize - 1 - count; + } + return count; +} + +target_ulong helper_bsr(target_ulong t0) +{ + return helper_lzcnt(t0, 0); +} + +static int compute_all_eflags(void) +{ + return CC_SRC; +} + +static int compute_c_eflags(void) +{ + return CC_SRC & CC_C; +} + +uint32_t helper_cc_compute_all(int op) +{ + switch (op) { + default: /* should never happen */ return 0; + + case CC_OP_EFLAGS: return compute_all_eflags(); + + case CC_OP_MULB: return compute_all_mulb(); + case CC_OP_MULW: return compute_all_mulw(); + case CC_OP_MULL: return compute_all_mull(); + + case CC_OP_ADDB: return compute_all_addb(); + case CC_OP_ADDW: return compute_all_addw(); + case CC_OP_ADDL: return compute_all_addl(); + + case CC_OP_ADCB: return compute_all_adcb(); + case CC_OP_ADCW: return compute_all_adcw(); + case CC_OP_ADCL: return compute_all_adcl(); + + case CC_OP_SUBB: return compute_all_subb(); + case CC_OP_SUBW: return compute_all_subw(); + case CC_OP_SUBL: return compute_all_subl(); + + case CC_OP_SBBB: return compute_all_sbbb(); + case CC_OP_SBBW: return compute_all_sbbw(); + case CC_OP_SBBL: return compute_all_sbbl(); + + case CC_OP_LOGICB: return compute_all_logicb(); + case CC_OP_LOGICW: return compute_all_logicw(); + case CC_OP_LOGICL: return compute_all_logicl(); + + case CC_OP_INCB: return compute_all_incb(); + case CC_OP_INCW: return compute_all_incw(); + case CC_OP_INCL: return compute_all_incl(); + + case CC_OP_DECB: return compute_all_decb(); + case CC_OP_DECW: return compute_all_decw(); + case CC_OP_DECL: return compute_all_decl(); + + case CC_OP_SHLB: return compute_all_shlb(); + case CC_OP_SHLW: return compute_all_shlw(); + case CC_OP_SHLL: return compute_all_shll(); + + case CC_OP_SARB: return compute_all_sarb(); + case CC_OP_SARW: return compute_all_sarw(); + case CC_OP_SARL: return compute_all_sarl(); + +#ifdef TARGET_X86_64 + case CC_OP_MULQ: return compute_all_mulq(); + + case CC_OP_ADDQ: return compute_all_addq(); + + case CC_OP_ADCQ: return compute_all_adcq(); + + case CC_OP_SUBQ: return compute_all_subq(); + + case CC_OP_SBBQ: return compute_all_sbbq(); + + case CC_OP_LOGICQ: return compute_all_logicq(); + + case CC_OP_INCQ: return compute_all_incq(); + + case CC_OP_DECQ: return compute_all_decq(); + + case CC_OP_SHLQ: return compute_all_shlq(); + + case CC_OP_SARQ: return compute_all_sarq(); +#endif + } +} + +uint32_t helper_cc_compute_c(int op) +{ + switch (op) { + default: /* should never happen */ return 0; + + case CC_OP_EFLAGS: return compute_c_eflags(); + + case CC_OP_MULB: return compute_c_mull(); + case CC_OP_MULW: return compute_c_mull(); + case CC_OP_MULL: return compute_c_mull(); + + case CC_OP_ADDB: return compute_c_addb(); + case CC_OP_ADDW: return compute_c_addw(); + case CC_OP_ADDL: return compute_c_addl(); + + case CC_OP_ADCB: return compute_c_adcb(); + case CC_OP_ADCW: return compute_c_adcw(); + case CC_OP_ADCL: return compute_c_adcl(); + + case CC_OP_SUBB: return compute_c_subb(); + case CC_OP_SUBW: return compute_c_subw(); + case CC_OP_SUBL: return compute_c_subl(); + + case CC_OP_SBBB: return compute_c_sbbb(); + case CC_OP_SBBW: return compute_c_sbbw(); + case CC_OP_SBBL: return compute_c_sbbl(); + + case CC_OP_LOGICB: return compute_c_logicb(); + case CC_OP_LOGICW: return compute_c_logicw(); + case CC_OP_LOGICL: return compute_c_logicl(); + + case CC_OP_INCB: return compute_c_incl(); + case CC_OP_INCW: return compute_c_incl(); + case CC_OP_INCL: return compute_c_incl(); + + case CC_OP_DECB: return compute_c_incl(); + case CC_OP_DECW: return compute_c_incl(); + case CC_OP_DECL: return compute_c_incl(); + + case CC_OP_SHLB: return compute_c_shlb(); + case CC_OP_SHLW: return compute_c_shlw(); + case CC_OP_SHLL: return compute_c_shll(); + + case CC_OP_SARB: return compute_c_sarl(); + case CC_OP_SARW: return compute_c_sarl(); + case CC_OP_SARL: return compute_c_sarl(); + +#ifdef TARGET_X86_64 + case CC_OP_MULQ: return compute_c_mull(); + + case CC_OP_ADDQ: return compute_c_addq(); + + case CC_OP_ADCQ: return compute_c_adcq(); + + case CC_OP_SUBQ: return compute_c_subq(); + + case CC_OP_SBBQ: return compute_c_sbbq(); + + case CC_OP_LOGICQ: return compute_c_logicq(); + + case CC_OP_INCQ: return compute_c_incl(); + + case CC_OP_DECQ: return compute_c_incl(); + + case CC_OP_SHLQ: return compute_c_shlq(); + + case CC_OP_SARQ: return compute_c_sarl(); +#endif + } +} diff --git a/src/recompiler/target-i386/ops_sse.h b/src/recompiler/target-i386/ops_sse.h new file mode 100644 index 00000000..4c8b89e2 --- /dev/null +++ b/src/recompiler/target-i386/ops_sse.h @@ -0,0 +1,2111 @@ +/* + * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support + * + * Copyright (c) 2005 Fabrice Bellard + * Copyright (c) 2008 Intel Corporation <andrew.zaborowski@intel.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#if SHIFT == 0 +#define Reg MMXReg +#define XMM_ONLY(...) +#define B(n) MMX_B(n) +#define W(n) MMX_W(n) +#define L(n) MMX_L(n) +#define Q(n) q +#define SUFFIX _mmx +#else +#define Reg XMMReg +#define XMM_ONLY(...) __VA_ARGS__ +#define B(n) XMM_B(n) +#define W(n) XMM_W(n) +#define L(n) XMM_L(n) +#define Q(n) XMM_Q(n) +#define SUFFIX _xmm +#endif + +void glue(helper_psrlw, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 15) { + d->Q(0) = 0; +#if SHIFT == 1 + d->Q(1) = 0; +#endif + } else { + shift = s->B(0); + d->W(0) >>= shift; + d->W(1) >>= shift; + d->W(2) >>= shift; + d->W(3) >>= shift; +#if SHIFT == 1 + d->W(4) >>= shift; + d->W(5) >>= shift; + d->W(6) >>= shift; + d->W(7) >>= shift; +#endif + } +} + +void glue(helper_psraw, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 15) { + shift = 15; + } else { + shift = s->B(0); + } + d->W(0) = (int16_t)d->W(0) >> shift; + d->W(1) = (int16_t)d->W(1) >> shift; + d->W(2) = (int16_t)d->W(2) >> shift; + d->W(3) = (int16_t)d->W(3) >> shift; +#if SHIFT == 1 + d->W(4) = (int16_t)d->W(4) >> shift; + d->W(5) = (int16_t)d->W(5) >> shift; + d->W(6) = (int16_t)d->W(6) >> shift; + d->W(7) = (int16_t)d->W(7) >> shift; +#endif +} + +void glue(helper_psllw, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 15) { + d->Q(0) = 0; +#if SHIFT == 1 + d->Q(1) = 0; +#endif + } else { + shift = s->B(0); + d->W(0) <<= shift; + d->W(1) <<= shift; + d->W(2) <<= shift; + d->W(3) <<= shift; +#if SHIFT == 1 + d->W(4) <<= shift; + d->W(5) <<= shift; + d->W(6) <<= shift; + d->W(7) <<= shift; +#endif + } +} + +void glue(helper_psrld, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 31) { + d->Q(0) = 0; +#if SHIFT == 1 + d->Q(1) = 0; +#endif + } else { + shift = s->B(0); + d->L(0) >>= shift; + d->L(1) >>= shift; +#if SHIFT == 1 + d->L(2) >>= shift; + d->L(3) >>= shift; +#endif + } +} + +void glue(helper_psrad, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 31) { + shift = 31; + } else { + shift = s->B(0); + } + d->L(0) = (int32_t)d->L(0) >> shift; + d->L(1) = (int32_t)d->L(1) >> shift; +#if SHIFT == 1 + d->L(2) = (int32_t)d->L(2) >> shift; + d->L(3) = (int32_t)d->L(3) >> shift; +#endif +} + +void glue(helper_pslld, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 31) { + d->Q(0) = 0; +#if SHIFT == 1 + d->Q(1) = 0; +#endif + } else { + shift = s->B(0); + d->L(0) <<= shift; + d->L(1) <<= shift; +#if SHIFT == 1 + d->L(2) <<= shift; + d->L(3) <<= shift; +#endif + } +} + +void glue(helper_psrlq, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 63) { + d->Q(0) = 0; +#if SHIFT == 1 + d->Q(1) = 0; +#endif + } else { + shift = s->B(0); + d->Q(0) >>= shift; +#if SHIFT == 1 + d->Q(1) >>= shift; +#endif + } +} + +void glue(helper_psllq, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 63) { + d->Q(0) = 0; +#if SHIFT == 1 + d->Q(1) = 0; +#endif + } else { + shift = s->B(0); + d->Q(0) <<= shift; +#if SHIFT == 1 + d->Q(1) <<= shift; +#endif + } +} + +#if SHIFT == 1 +void glue(helper_psrldq, SUFFIX)(Reg *d, Reg *s) +{ + int shift, i; + + shift = s->L(0); + if (shift > 16) + shift = 16; + for(i = 0; i < 16 - shift; i++) + d->B(i) = d->B(i + shift); + for(i = 16 - shift; i < 16; i++) + d->B(i) = 0; +} + +void glue(helper_pslldq, SUFFIX)(Reg *d, Reg *s) +{ + int shift, i; + + shift = s->L(0); + if (shift > 16) + shift = 16; + for(i = 15; i >= shift; i--) + d->B(i) = d->B(i - shift); + for(i = 0; i < shift; i++) + d->B(i) = 0; +} +#endif + +#define SSE_HELPER_B(name, F)\ +void glue(name, SUFFIX) (Reg *d, Reg *s)\ +{\ + d->B(0) = F(d->B(0), s->B(0));\ + d->B(1) = F(d->B(1), s->B(1));\ + d->B(2) = F(d->B(2), s->B(2));\ + d->B(3) = F(d->B(3), s->B(3));\ + d->B(4) = F(d->B(4), s->B(4));\ + d->B(5) = F(d->B(5), s->B(5));\ + d->B(6) = F(d->B(6), s->B(6));\ + d->B(7) = F(d->B(7), s->B(7));\ + XMM_ONLY(\ + d->B(8) = F(d->B(8), s->B(8));\ + d->B(9) = F(d->B(9), s->B(9));\ + d->B(10) = F(d->B(10), s->B(10));\ + d->B(11) = F(d->B(11), s->B(11));\ + d->B(12) = F(d->B(12), s->B(12));\ + d->B(13) = F(d->B(13), s->B(13));\ + d->B(14) = F(d->B(14), s->B(14));\ + d->B(15) = F(d->B(15), s->B(15));\ + )\ +} + +#define SSE_HELPER_W(name, F)\ +void glue(name, SUFFIX) (Reg *d, Reg *s)\ +{\ + d->W(0) = F(d->W(0), s->W(0));\ + d->W(1) = F(d->W(1), s->W(1));\ + d->W(2) = F(d->W(2), s->W(2));\ + d->W(3) = F(d->W(3), s->W(3));\ + XMM_ONLY(\ + d->W(4) = F(d->W(4), s->W(4));\ + d->W(5) = F(d->W(5), s->W(5));\ + d->W(6) = F(d->W(6), s->W(6));\ + d->W(7) = F(d->W(7), s->W(7));\ + )\ +} + +#define SSE_HELPER_L(name, F)\ +void glue(name, SUFFIX) (Reg *d, Reg *s)\ +{\ + d->L(0) = F(d->L(0), s->L(0));\ + d->L(1) = F(d->L(1), s->L(1));\ + XMM_ONLY(\ + d->L(2) = F(d->L(2), s->L(2));\ + d->L(3) = F(d->L(3), s->L(3));\ + )\ +} + +#define SSE_HELPER_Q(name, F)\ +void glue(name, SUFFIX) (Reg *d, Reg *s)\ +{\ + d->Q(0) = F(d->Q(0), s->Q(0));\ + XMM_ONLY(\ + d->Q(1) = F(d->Q(1), s->Q(1));\ + )\ +} + +#if SHIFT == 0 +static inline int satub(int x) +{ + if (x < 0) + return 0; + else if (x > 255) + return 255; + else + return x; +} + +static inline int satuw(int x) +{ + if (x < 0) + return 0; + else if (x > 65535) + return 65535; + else + return x; +} + +static inline int satsb(int x) +{ + if (x < -128) + return -128; + else if (x > 127) + return 127; + else + return x; +} + +static inline int satsw(int x) +{ + if (x < -32768) + return -32768; + else if (x > 32767) + return 32767; + else + return x; +} + +#define FADD(a, b) ((a) + (b)) +#define FADDUB(a, b) satub((a) + (b)) +#define FADDUW(a, b) satuw((a) + (b)) +#define FADDSB(a, b) satsb((int8_t)(a) + (int8_t)(b)) +#define FADDSW(a, b) satsw((int16_t)(a) + (int16_t)(b)) + +#define FSUB(a, b) ((a) - (b)) +#define FSUBUB(a, b) satub((a) - (b)) +#define FSUBUW(a, b) satuw((a) - (b)) +#define FSUBSB(a, b) satsb((int8_t)(a) - (int8_t)(b)) +#define FSUBSW(a, b) satsw((int16_t)(a) - (int16_t)(b)) +#define FMINUB(a, b) ((a) < (b)) ? (a) : (b) +#define FMINSW(a, b) ((int16_t)(a) < (int16_t)(b)) ? (a) : (b) +#define FMAXUB(a, b) ((a) > (b)) ? (a) : (b) +#define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b) + +#define FAND(a, b) (a) & (b) +#define FANDN(a, b) ((~(a)) & (b)) +#define FOR(a, b) (a) | (b) +#define FXOR(a, b) (a) ^ (b) + +#define FCMPGTB(a, b) (int8_t)(a) > (int8_t)(b) ? -1 : 0 +#define FCMPGTW(a, b) (int16_t)(a) > (int16_t)(b) ? -1 : 0 +#define FCMPGTL(a, b) (int32_t)(a) > (int32_t)(b) ? -1 : 0 +#define FCMPEQ(a, b) (a) == (b) ? -1 : 0 + +#define FMULLW(a, b) (a) * (b) +#define FMULHRW(a, b) ((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16 +#define FMULHUW(a, b) (a) * (b) >> 16 +#define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16 + +#define FAVG(a, b) ((a) + (b) + 1) >> 1 +#endif + +SSE_HELPER_B(helper_paddb, FADD) +SSE_HELPER_W(helper_paddw, FADD) +SSE_HELPER_L(helper_paddl, FADD) +SSE_HELPER_Q(helper_paddq, FADD) + +SSE_HELPER_B(helper_psubb, FSUB) +SSE_HELPER_W(helper_psubw, FSUB) +SSE_HELPER_L(helper_psubl, FSUB) +SSE_HELPER_Q(helper_psubq, FSUB) + +SSE_HELPER_B(helper_paddusb, FADDUB) +SSE_HELPER_B(helper_paddsb, FADDSB) +SSE_HELPER_B(helper_psubusb, FSUBUB) +SSE_HELPER_B(helper_psubsb, FSUBSB) + +SSE_HELPER_W(helper_paddusw, FADDUW) +SSE_HELPER_W(helper_paddsw, FADDSW) +SSE_HELPER_W(helper_psubusw, FSUBUW) +SSE_HELPER_W(helper_psubsw, FSUBSW) + +SSE_HELPER_B(helper_pminub, FMINUB) +SSE_HELPER_B(helper_pmaxub, FMAXUB) + +SSE_HELPER_W(helper_pminsw, FMINSW) +SSE_HELPER_W(helper_pmaxsw, FMAXSW) + +SSE_HELPER_Q(helper_pand, FAND) +SSE_HELPER_Q(helper_pandn, FANDN) +SSE_HELPER_Q(helper_por, FOR) +SSE_HELPER_Q(helper_pxor, FXOR) + +SSE_HELPER_B(helper_pcmpgtb, FCMPGTB) +SSE_HELPER_W(helper_pcmpgtw, FCMPGTW) +SSE_HELPER_L(helper_pcmpgtl, FCMPGTL) + +SSE_HELPER_B(helper_pcmpeqb, FCMPEQ) +SSE_HELPER_W(helper_pcmpeqw, FCMPEQ) +SSE_HELPER_L(helper_pcmpeql, FCMPEQ) + +SSE_HELPER_W(helper_pmullw, FMULLW) +#if SHIFT == 0 +SSE_HELPER_W(helper_pmulhrw, FMULHRW) +#endif +SSE_HELPER_W(helper_pmulhuw, FMULHUW) +SSE_HELPER_W(helper_pmulhw, FMULHW) + +SSE_HELPER_B(helper_pavgb, FAVG) +SSE_HELPER_W(helper_pavgw, FAVG) + +void glue(helper_pmuludq, SUFFIX) (Reg *d, Reg *s) +{ + d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0); +#if SHIFT == 1 + d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2); +#endif +} + +void glue(helper_pmaddwd, SUFFIX) (Reg *d, Reg *s) +{ + int i; + + for(i = 0; i < (2 << SHIFT); i++) { + d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) + + (int16_t)s->W(2*i+1) * (int16_t)d->W(2*i+1); + } +} + +#if SHIFT == 0 +static inline int abs1(int a) +{ + if (a < 0) + return -a; + else + return a; +} +#endif +void glue(helper_psadbw, SUFFIX) (Reg *d, Reg *s) +{ + unsigned int val; + + val = 0; + val += abs1(d->B(0) - s->B(0)); + val += abs1(d->B(1) - s->B(1)); + val += abs1(d->B(2) - s->B(2)); + val += abs1(d->B(3) - s->B(3)); + val += abs1(d->B(4) - s->B(4)); + val += abs1(d->B(5) - s->B(5)); + val += abs1(d->B(6) - s->B(6)); + val += abs1(d->B(7) - s->B(7)); + d->Q(0) = val; +#if SHIFT == 1 + val = 0; + val += abs1(d->B(8) - s->B(8)); + val += abs1(d->B(9) - s->B(9)); + val += abs1(d->B(10) - s->B(10)); + val += abs1(d->B(11) - s->B(11)); + val += abs1(d->B(12) - s->B(12)); + val += abs1(d->B(13) - s->B(13)); + val += abs1(d->B(14) - s->B(14)); + val += abs1(d->B(15) - s->B(15)); + d->Q(1) = val; +#endif +} + +void glue(helper_maskmov, SUFFIX) (Reg *d, Reg *s, target_ulong a0) +{ + int i; + for(i = 0; i < (8 << SHIFT); i++) { + if (s->B(i) & 0x80) + stb(a0 + i, d->B(i)); + } +} + +void glue(helper_movl_mm_T0, SUFFIX) (Reg *d, uint32_t val) +{ + d->L(0) = val; + d->L(1) = 0; +#if SHIFT == 1 + d->Q(1) = 0; +#endif +} + +#ifdef TARGET_X86_64 +void glue(helper_movq_mm_T0, SUFFIX) (Reg *d, uint64_t val) +{ + d->Q(0) = val; +#if SHIFT == 1 + d->Q(1) = 0; +#endif +} +#endif + +#if SHIFT == 0 +void glue(helper_pshufw, SUFFIX) (Reg *d, Reg *s, int order) +{ + Reg r; + r.W(0) = s->W(order & 3); + r.W(1) = s->W((order >> 2) & 3); + r.W(2) = s->W((order >> 4) & 3); + r.W(3) = s->W((order >> 6) & 3); + *d = r; +} +#else +void helper_shufps(Reg *d, Reg *s, int order) +{ + Reg r; + r.L(0) = d->L(order & 3); + r.L(1) = d->L((order >> 2) & 3); + r.L(2) = s->L((order >> 4) & 3); + r.L(3) = s->L((order >> 6) & 3); + *d = r; +} + +void helper_shufpd(Reg *d, Reg *s, int order) +{ + Reg r; + r.Q(0) = d->Q(order & 1); + r.Q(1) = s->Q((order >> 1) & 1); + *d = r; +} + +void glue(helper_pshufd, SUFFIX) (Reg *d, Reg *s, int order) +{ + Reg r; + r.L(0) = s->L(order & 3); + r.L(1) = s->L((order >> 2) & 3); + r.L(2) = s->L((order >> 4) & 3); + r.L(3) = s->L((order >> 6) & 3); + *d = r; +} + +void glue(helper_pshuflw, SUFFIX) (Reg *d, Reg *s, int order) +{ + Reg r; + r.W(0) = s->W(order & 3); + r.W(1) = s->W((order >> 2) & 3); + r.W(2) = s->W((order >> 4) & 3); + r.W(3) = s->W((order >> 6) & 3); + r.Q(1) = s->Q(1); + *d = r; +} + +void glue(helper_pshufhw, SUFFIX) (Reg *d, Reg *s, int order) +{ + Reg r; + r.Q(0) = s->Q(0); + r.W(4) = s->W(4 + (order & 3)); + r.W(5) = s->W(4 + ((order >> 2) & 3)); + r.W(6) = s->W(4 + ((order >> 4) & 3)); + r.W(7) = s->W(4 + ((order >> 6) & 3)); + *d = r; +} +#endif + +#if SHIFT == 1 +/* FPU ops */ +/* XXX: not accurate */ + +#define SSE_HELPER_S(name, F)\ +void helper_ ## name ## ps (Reg *d, Reg *s)\ +{\ + d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ + d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\ + d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));\ + d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));\ +}\ +\ +void helper_ ## name ## ss (Reg *d, Reg *s)\ +{\ + d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ +}\ +void helper_ ## name ## pd (Reg *d, Reg *s)\ +{\ + d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ + d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\ +}\ +\ +void helper_ ## name ## sd (Reg *d, Reg *s)\ +{\ + d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ +} + +#define FPU_ADD(size, a, b) float ## size ## _add(a, b, &env->sse_status) +#define FPU_SUB(size, a, b) float ## size ## _sub(a, b, &env->sse_status) +#define FPU_MUL(size, a, b) float ## size ## _mul(a, b, &env->sse_status) +#define FPU_DIV(size, a, b) float ## size ## _div(a, b, &env->sse_status) +#define FPU_MIN(size, a, b) (a) < (b) ? (a) : (b) +#define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b) +#define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status) + +SSE_HELPER_S(add, FPU_ADD) +SSE_HELPER_S(sub, FPU_SUB) +SSE_HELPER_S(mul, FPU_MUL) +SSE_HELPER_S(div, FPU_DIV) +SSE_HELPER_S(min, FPU_MIN) +SSE_HELPER_S(max, FPU_MAX) +SSE_HELPER_S(sqrt, FPU_SQRT) + + +/* float to float conversions */ +void helper_cvtps2pd(Reg *d, Reg *s) +{ + float32 s0, s1; + s0 = s->XMM_S(0); + s1 = s->XMM_S(1); + d->XMM_D(0) = float32_to_float64(s0, &env->sse_status); + d->XMM_D(1) = float32_to_float64(s1, &env->sse_status); +} + +void helper_cvtpd2ps(Reg *d, Reg *s) +{ + d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status); + d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status); + d->Q(1) = 0; +} + +void helper_cvtss2sd(Reg *d, Reg *s) +{ + d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status); +} + +void helper_cvtsd2ss(Reg *d, Reg *s) +{ + d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status); +} + +/* integer to float */ +void helper_cvtdq2ps(Reg *d, Reg *s) +{ + d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status); + d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status); + d->XMM_S(2) = int32_to_float32(s->XMM_L(2), &env->sse_status); + d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status); +} + +void helper_cvtdq2pd(Reg *d, Reg *s) +{ + int32_t l0, l1; + l0 = (int32_t)s->XMM_L(0); + l1 = (int32_t)s->XMM_L(1); + d->XMM_D(0) = int32_to_float64(l0, &env->sse_status); + d->XMM_D(1) = int32_to_float64(l1, &env->sse_status); +} + +void helper_cvtpi2ps(XMMReg *d, MMXReg *s) +{ + d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status); + d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status); +} + +void helper_cvtpi2pd(XMMReg *d, MMXReg *s) +{ + d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status); + d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status); +} + +void helper_cvtsi2ss(XMMReg *d, uint32_t val) +{ + d->XMM_S(0) = int32_to_float32(val, &env->sse_status); +} + +void helper_cvtsi2sd(XMMReg *d, uint32_t val) +{ + d->XMM_D(0) = int32_to_float64(val, &env->sse_status); +} + +#ifdef TARGET_X86_64 +void helper_cvtsq2ss(XMMReg *d, uint64_t val) +{ + d->XMM_S(0) = int64_to_float32(val, &env->sse_status); +} + +void helper_cvtsq2sd(XMMReg *d, uint64_t val) +{ + d->XMM_D(0) = int64_to_float64(val, &env->sse_status); +} +#endif + +/* float to integer */ +void helper_cvtps2dq(XMMReg *d, XMMReg *s) +{ + d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status); + d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status); + d->XMM_L(2) = float32_to_int32(s->XMM_S(2), &env->sse_status); + d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status); +} + +void helper_cvtpd2dq(XMMReg *d, XMMReg *s) +{ + d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status); + d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status); + d->XMM_Q(1) = 0; +} + +void helper_cvtps2pi(MMXReg *d, XMMReg *s) +{ + d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status); + d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status); +} + +void helper_cvtpd2pi(MMXReg *d, XMMReg *s) +{ + d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status); + d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status); +} + +int32_t helper_cvtss2si(XMMReg *s) +{ + return float32_to_int32(s->XMM_S(0), &env->sse_status); +} + +int32_t helper_cvtsd2si(XMMReg *s) +{ + return float64_to_int32(s->XMM_D(0), &env->sse_status); +} + +#ifdef TARGET_X86_64 +int64_t helper_cvtss2sq(XMMReg *s) +{ + return float32_to_int64(s->XMM_S(0), &env->sse_status); +} + +int64_t helper_cvtsd2sq(XMMReg *s) +{ + return float64_to_int64(s->XMM_D(0), &env->sse_status); +} +#endif + +/* float to integer truncated */ +void helper_cvttps2dq(XMMReg *d, XMMReg *s) +{ + d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); + d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status); + d->XMM_L(2) = float32_to_int32_round_to_zero(s->XMM_S(2), &env->sse_status); + d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status); +} + +void helper_cvttpd2dq(XMMReg *d, XMMReg *s) +{ + d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); + d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status); + d->XMM_Q(1) = 0; +} + +void helper_cvttps2pi(MMXReg *d, XMMReg *s) +{ + d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); + d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status); +} + +void helper_cvttpd2pi(MMXReg *d, XMMReg *s) +{ + d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); + d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status); +} + +int32_t helper_cvttss2si(XMMReg *s) +{ + return float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); +} + +int32_t helper_cvttsd2si(XMMReg *s) +{ + return float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); +} + +#ifdef TARGET_X86_64 +int64_t helper_cvttss2sq(XMMReg *s) +{ + return float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status); +} + +int64_t helper_cvttsd2sq(XMMReg *s) +{ + return float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status); +} +#endif + +void helper_rsqrtps(XMMReg *d, XMMReg *s) +{ + d->XMM_S(0) = approx_rsqrt(s->XMM_S(0)); + d->XMM_S(1) = approx_rsqrt(s->XMM_S(1)); + d->XMM_S(2) = approx_rsqrt(s->XMM_S(2)); + d->XMM_S(3) = approx_rsqrt(s->XMM_S(3)); +} + +void helper_rsqrtss(XMMReg *d, XMMReg *s) +{ + d->XMM_S(0) = approx_rsqrt(s->XMM_S(0)); +} + +void helper_rcpps(XMMReg *d, XMMReg *s) +{ + d->XMM_S(0) = approx_rcp(s->XMM_S(0)); + d->XMM_S(1) = approx_rcp(s->XMM_S(1)); + d->XMM_S(2) = approx_rcp(s->XMM_S(2)); + d->XMM_S(3) = approx_rcp(s->XMM_S(3)); +} + +void helper_rcpss(XMMReg *d, XMMReg *s) +{ + d->XMM_S(0) = approx_rcp(s->XMM_S(0)); +} + +static inline uint64_t helper_extrq(uint64_t src, int shift, int len) +{ + uint64_t mask; + + if (len == 0) { + mask = ~0LL; + } else { + mask = (1ULL << len) - 1; + } + return (src >> shift) & mask; +} + +void helper_extrq_r(XMMReg *d, XMMReg *s) +{ + d->XMM_Q(0) = helper_extrq(d->XMM_Q(0), s->XMM_B(1), s->XMM_B(0)); +} + +void helper_extrq_i(XMMReg *d, int index, int length) +{ + d->XMM_Q(0) = helper_extrq(d->XMM_Q(0), index, length); +} + +static inline uint64_t helper_insertq(uint64_t src, int shift, int len) +{ + uint64_t mask; + + if (len == 0) { + mask = ~0ULL; + } else { + mask = (1ULL << len) - 1; + } + return (src & ~(mask << shift)) | ((src & mask) << shift); +} + +void helper_insertq_r(XMMReg *d, XMMReg *s) +{ + d->XMM_Q(0) = helper_insertq(s->XMM_Q(0), s->XMM_B(9), s->XMM_B(8)); +} + +void helper_insertq_i(XMMReg *d, int index, int length) +{ + d->XMM_Q(0) = helper_insertq(d->XMM_Q(0), index, length); +} + +void helper_haddps(XMMReg *d, XMMReg *s) +{ + XMMReg r; + r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1); + r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3); + r.XMM_S(2) = s->XMM_S(0) + s->XMM_S(1); + r.XMM_S(3) = s->XMM_S(2) + s->XMM_S(3); + *d = r; +} + +void helper_haddpd(XMMReg *d, XMMReg *s) +{ + XMMReg r; + r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1); + r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1); + *d = r; +} + +void helper_hsubps(XMMReg *d, XMMReg *s) +{ + XMMReg r; + r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1); + r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3); + r.XMM_S(2) = s->XMM_S(0) - s->XMM_S(1); + r.XMM_S(3) = s->XMM_S(2) - s->XMM_S(3); + *d = r; +} + +void helper_hsubpd(XMMReg *d, XMMReg *s) +{ + XMMReg r; + r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1); + r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1); + *d = r; +} + +void helper_addsubps(XMMReg *d, XMMReg *s) +{ + d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0); + d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1); + d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2); + d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3); +} + +void helper_addsubpd(XMMReg *d, XMMReg *s) +{ + d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0); + d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1); +} + +/* XXX: unordered */ +#define SSE_HELPER_CMP(name, F)\ +void helper_ ## name ## ps (Reg *d, Reg *s)\ +{\ + d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ + d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1));\ + d->XMM_L(2) = F(32, d->XMM_S(2), s->XMM_S(2));\ + d->XMM_L(3) = F(32, d->XMM_S(3), s->XMM_S(3));\ +}\ +\ +void helper_ ## name ## ss (Reg *d, Reg *s)\ +{\ + d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ +}\ +void helper_ ## name ## pd (Reg *d, Reg *s)\ +{\ + d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ + d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1));\ +}\ +\ +void helper_ ## name ## sd (Reg *d, Reg *s)\ +{\ + d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ +} + +#define FPU_CMPEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? -1 : 0 +#define FPU_CMPLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? -1 : 0 +#define FPU_CMPLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? -1 : 0 +#define FPU_CMPUNORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? - 1 : 0 +#define FPU_CMPNEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? 0 : -1 +#define FPU_CMPNLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? 0 : -1 +#define FPU_CMPNLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? 0 : -1 +#define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1 + +SSE_HELPER_CMP(cmpeq, FPU_CMPEQ) +SSE_HELPER_CMP(cmplt, FPU_CMPLT) +SSE_HELPER_CMP(cmple, FPU_CMPLE) +SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD) +SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ) +SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT) +SSE_HELPER_CMP(cmpnle, FPU_CMPNLE) +SSE_HELPER_CMP(cmpord, FPU_CMPORD) + +static const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; + +void helper_ucomiss(Reg *d, Reg *s) +{ + int ret; + float32 s0, s1; + + s0 = d->XMM_S(0); + s1 = s->XMM_S(0); + ret = float32_compare_quiet(s0, s1, &env->sse_status); + CC_SRC = comis_eflags[ret + 1]; +} + +void helper_comiss(Reg *d, Reg *s) +{ + int ret; + float32 s0, s1; + + s0 = d->XMM_S(0); + s1 = s->XMM_S(0); + ret = float32_compare(s0, s1, &env->sse_status); + CC_SRC = comis_eflags[ret + 1]; +} + +void helper_ucomisd(Reg *d, Reg *s) +{ + int ret; + float64 d0, d1; + + d0 = d->XMM_D(0); + d1 = s->XMM_D(0); + ret = float64_compare_quiet(d0, d1, &env->sse_status); + CC_SRC = comis_eflags[ret + 1]; +} + +void helper_comisd(Reg *d, Reg *s) +{ + int ret; + float64 d0, d1; + + d0 = d->XMM_D(0); + d1 = s->XMM_D(0); + ret = float64_compare(d0, d1, &env->sse_status); + CC_SRC = comis_eflags[ret + 1]; +} + +uint32_t helper_movmskps(Reg *s) +{ + int b0, b1, b2, b3; + b0 = s->XMM_L(0) >> 31; + b1 = s->XMM_L(1) >> 31; + b2 = s->XMM_L(2) >> 31; + b3 = s->XMM_L(3) >> 31; + return b0 | (b1 << 1) | (b2 << 2) | (b3 << 3); +} + +uint32_t helper_movmskpd(Reg *s) +{ + int b0, b1; + b0 = s->XMM_L(1) >> 31; + b1 = s->XMM_L(3) >> 31; + return b0 | (b1 << 1); +} + +#endif + +uint32_t glue(helper_pmovmskb, SUFFIX)(Reg *s) +{ + uint32_t val; + val = 0; + val |= (s->B(0) >> 7); + val |= (s->B(1) >> 6) & 0x02; + val |= (s->B(2) >> 5) & 0x04; + val |= (s->B(3) >> 4) & 0x08; + val |= (s->B(4) >> 3) & 0x10; + val |= (s->B(5) >> 2) & 0x20; + val |= (s->B(6) >> 1) & 0x40; + val |= (s->B(7)) & 0x80; +#if SHIFT == 1 + val |= (s->B(8) << 1) & 0x0100; + val |= (s->B(9) << 2) & 0x0200; + val |= (s->B(10) << 3) & 0x0400; + val |= (s->B(11) << 4) & 0x0800; + val |= (s->B(12) << 5) & 0x1000; + val |= (s->B(13) << 6) & 0x2000; + val |= (s->B(14) << 7) & 0x4000; + val |= (s->B(15) << 8) & 0x8000; +#endif + return val; +} + +void glue(helper_packsswb, SUFFIX) (Reg *d, Reg *s) +{ + Reg r; + + r.B(0) = satsb((int16_t)d->W(0)); + r.B(1) = satsb((int16_t)d->W(1)); + r.B(2) = satsb((int16_t)d->W(2)); + r.B(3) = satsb((int16_t)d->W(3)); +#if SHIFT == 1 + r.B(4) = satsb((int16_t)d->W(4)); + r.B(5) = satsb((int16_t)d->W(5)); + r.B(6) = satsb((int16_t)d->W(6)); + r.B(7) = satsb((int16_t)d->W(7)); +#endif + r.B((4 << SHIFT) + 0) = satsb((int16_t)s->W(0)); + r.B((4 << SHIFT) + 1) = satsb((int16_t)s->W(1)); + r.B((4 << SHIFT) + 2) = satsb((int16_t)s->W(2)); + r.B((4 << SHIFT) + 3) = satsb((int16_t)s->W(3)); +#if SHIFT == 1 + r.B(12) = satsb((int16_t)s->W(4)); + r.B(13) = satsb((int16_t)s->W(5)); + r.B(14) = satsb((int16_t)s->W(6)); + r.B(15) = satsb((int16_t)s->W(7)); +#endif + *d = r; +} + +void glue(helper_packuswb, SUFFIX) (Reg *d, Reg *s) +{ + Reg r; + + r.B(0) = satub((int16_t)d->W(0)); + r.B(1) = satub((int16_t)d->W(1)); + r.B(2) = satub((int16_t)d->W(2)); + r.B(3) = satub((int16_t)d->W(3)); +#if SHIFT == 1 + r.B(4) = satub((int16_t)d->W(4)); + r.B(5) = satub((int16_t)d->W(5)); + r.B(6) = satub((int16_t)d->W(6)); + r.B(7) = satub((int16_t)d->W(7)); +#endif + r.B((4 << SHIFT) + 0) = satub((int16_t)s->W(0)); + r.B((4 << SHIFT) + 1) = satub((int16_t)s->W(1)); + r.B((4 << SHIFT) + 2) = satub((int16_t)s->W(2)); + r.B((4 << SHIFT) + 3) = satub((int16_t)s->W(3)); +#if SHIFT == 1 + r.B(12) = satub((int16_t)s->W(4)); + r.B(13) = satub((int16_t)s->W(5)); + r.B(14) = satub((int16_t)s->W(6)); + r.B(15) = satub((int16_t)s->W(7)); +#endif + *d = r; +} + +void glue(helper_packssdw, SUFFIX) (Reg *d, Reg *s) +{ + Reg r; + + r.W(0) = satsw(d->L(0)); + r.W(1) = satsw(d->L(1)); +#if SHIFT == 1 + r.W(2) = satsw(d->L(2)); + r.W(3) = satsw(d->L(3)); +#endif + r.W((2 << SHIFT) + 0) = satsw(s->L(0)); + r.W((2 << SHIFT) + 1) = satsw(s->L(1)); +#if SHIFT == 1 + r.W(6) = satsw(s->L(2)); + r.W(7) = satsw(s->L(3)); +#endif + *d = r; +} + +#define UNPCK_OP(base_name, base) \ + \ +void glue(helper_punpck ## base_name ## bw, SUFFIX) (Reg *d, Reg *s) \ +{ \ + Reg r; \ + \ + r.B(0) = d->B((base << (SHIFT + 2)) + 0); \ + r.B(1) = s->B((base << (SHIFT + 2)) + 0); \ + r.B(2) = d->B((base << (SHIFT + 2)) + 1); \ + r.B(3) = s->B((base << (SHIFT + 2)) + 1); \ + r.B(4) = d->B((base << (SHIFT + 2)) + 2); \ + r.B(5) = s->B((base << (SHIFT + 2)) + 2); \ + r.B(6) = d->B((base << (SHIFT + 2)) + 3); \ + r.B(7) = s->B((base << (SHIFT + 2)) + 3); \ +XMM_ONLY( \ + r.B(8) = d->B((base << (SHIFT + 2)) + 4); \ + r.B(9) = s->B((base << (SHIFT + 2)) + 4); \ + r.B(10) = d->B((base << (SHIFT + 2)) + 5); \ + r.B(11) = s->B((base << (SHIFT + 2)) + 5); \ + r.B(12) = d->B((base << (SHIFT + 2)) + 6); \ + r.B(13) = s->B((base << (SHIFT + 2)) + 6); \ + r.B(14) = d->B((base << (SHIFT + 2)) + 7); \ + r.B(15) = s->B((base << (SHIFT + 2)) + 7); \ +) \ + *d = r; \ +} \ + \ +void glue(helper_punpck ## base_name ## wd, SUFFIX) (Reg *d, Reg *s) \ +{ \ + Reg r; \ + \ + r.W(0) = d->W((base << (SHIFT + 1)) + 0); \ + r.W(1) = s->W((base << (SHIFT + 1)) + 0); \ + r.W(2) = d->W((base << (SHIFT + 1)) + 1); \ + r.W(3) = s->W((base << (SHIFT + 1)) + 1); \ +XMM_ONLY( \ + r.W(4) = d->W((base << (SHIFT + 1)) + 2); \ + r.W(5) = s->W((base << (SHIFT + 1)) + 2); \ + r.W(6) = d->W((base << (SHIFT + 1)) + 3); \ + r.W(7) = s->W((base << (SHIFT + 1)) + 3); \ +) \ + *d = r; \ +} \ + \ +void glue(helper_punpck ## base_name ## dq, SUFFIX) (Reg *d, Reg *s) \ +{ \ + Reg r; \ + \ + r.L(0) = d->L((base << SHIFT) + 0); \ + r.L(1) = s->L((base << SHIFT) + 0); \ +XMM_ONLY( \ + r.L(2) = d->L((base << SHIFT) + 1); \ + r.L(3) = s->L((base << SHIFT) + 1); \ +) \ + *d = r; \ +} \ + \ +XMM_ONLY( \ +void glue(helper_punpck ## base_name ## qdq, SUFFIX) (Reg *d, Reg *s) \ +{ \ + Reg r; \ + \ + r.Q(0) = d->Q(base); \ + r.Q(1) = s->Q(base); \ + *d = r; \ +} \ +) + +UNPCK_OP(l, 0) +UNPCK_OP(h, 1) + +/* 3DNow! float ops */ +#if SHIFT == 0 +void helper_pi2fd(MMXReg *d, MMXReg *s) +{ + d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status); + d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status); +} + +void helper_pi2fw(MMXReg *d, MMXReg *s) +{ + d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status); + d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status); +} + +void helper_pf2id(MMXReg *d, MMXReg *s) +{ + d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status); + d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status); +} + +void helper_pf2iw(MMXReg *d, MMXReg *s) +{ + d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status)); + d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status)); +} + +void helper_pfacc(MMXReg *d, MMXReg *s) +{ + MMXReg r; + r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + *d = r; +} + +void helper_pfadd(MMXReg *d, MMXReg *s) +{ + d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); +} + +void helper_pfcmpeq(MMXReg *d, MMXReg *s) +{ + d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0; + d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0; +} + +void helper_pfcmpge(MMXReg *d, MMXReg *s) +{ + d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; + d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; +} + +void helper_pfcmpgt(MMXReg *d, MMXReg *s) +{ + d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; + d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; +} + +void helper_pfmax(MMXReg *d, MMXReg *s) +{ + if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status)) + d->MMX_S(0) = s->MMX_S(0); + if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status)) + d->MMX_S(1) = s->MMX_S(1); +} + +void helper_pfmin(MMXReg *d, MMXReg *s) +{ + if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status)) + d->MMX_S(0) = s->MMX_S(0); + if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status)) + d->MMX_S(1) = s->MMX_S(1); +} + +void helper_pfmul(MMXReg *d, MMXReg *s) +{ + d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); +} + +void helper_pfnacc(MMXReg *d, MMXReg *s) +{ + MMXReg r; + r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + *d = r; +} + +void helper_pfpnacc(MMXReg *d, MMXReg *s) +{ + MMXReg r; + r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + *d = r; +} + +void helper_pfrcp(MMXReg *d, MMXReg *s) +{ + d->MMX_S(0) = approx_rcp(s->MMX_S(0)); + d->MMX_S(1) = d->MMX_S(0); +} + +void helper_pfrsqrt(MMXReg *d, MMXReg *s) +{ + d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff; + d->MMX_S(1) = approx_rsqrt(d->MMX_S(1)); + d->MMX_L(1) |= s->MMX_L(0) & 0x80000000; + d->MMX_L(0) = d->MMX_L(1); +} + +void helper_pfsub(MMXReg *d, MMXReg *s) +{ + d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); +} + +void helper_pfsubr(MMXReg *d, MMXReg *s) +{ + d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status); +} + +void helper_pswapd(MMXReg *d, MMXReg *s) +{ + MMXReg r; + r.MMX_L(0) = s->MMX_L(1); + r.MMX_L(1) = s->MMX_L(0); + *d = r; +} +#endif + +/* SSSE3 op helpers */ +void glue(helper_pshufb, SUFFIX) (Reg *d, Reg *s) +{ + int i; + Reg r; + + for (i = 0; i < (8 << SHIFT); i++) + r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1))); + + *d = r; +} + +void glue(helper_phaddw, SUFFIX) (Reg *d, Reg *s) +{ + d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); + d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); + XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); + XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); + d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); + d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); + XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); + XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); +} + +void glue(helper_phaddd, SUFFIX) (Reg *d, Reg *s) +{ + d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); + XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); + d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); + XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); +} + +void glue(helper_phaddsw, SUFFIX) (Reg *d, Reg *s) +{ + d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); + d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); + XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); + XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); + d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); + d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); + XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); + XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); +} + +void glue(helper_pmaddubsw, SUFFIX) (Reg *d, Reg *s) +{ + d->W(0) = satsw((int8_t)s->B( 0) * (uint8_t)d->B( 0) + + (int8_t)s->B( 1) * (uint8_t)d->B( 1)); + d->W(1) = satsw((int8_t)s->B( 2) * (uint8_t)d->B( 2) + + (int8_t)s->B( 3) * (uint8_t)d->B( 3)); + d->W(2) = satsw((int8_t)s->B( 4) * (uint8_t)d->B( 4) + + (int8_t)s->B( 5) * (uint8_t)d->B( 5)); + d->W(3) = satsw((int8_t)s->B( 6) * (uint8_t)d->B( 6) + + (int8_t)s->B( 7) * (uint8_t)d->B( 7)); +#if SHIFT == 1 + d->W(4) = satsw((int8_t)s->B( 8) * (uint8_t)d->B( 8) + + (int8_t)s->B( 9) * (uint8_t)d->B( 9)); + d->W(5) = satsw((int8_t)s->B(10) * (uint8_t)d->B(10) + + (int8_t)s->B(11) * (uint8_t)d->B(11)); + d->W(6) = satsw((int8_t)s->B(12) * (uint8_t)d->B(12) + + (int8_t)s->B(13) * (uint8_t)d->B(13)); + d->W(7) = satsw((int8_t)s->B(14) * (uint8_t)d->B(14) + + (int8_t)s->B(15) * (uint8_t)d->B(15)); +#endif +} + +void glue(helper_phsubw, SUFFIX) (Reg *d, Reg *s) +{ + d->W(0) = (int16_t)d->W(0) - (int16_t)d->W(1); + d->W(1) = (int16_t)d->W(2) - (int16_t)d->W(3); + XMM_ONLY(d->W(2) = (int16_t)d->W(4) - (int16_t)d->W(5)); + XMM_ONLY(d->W(3) = (int16_t)d->W(6) - (int16_t)d->W(7)); + d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) - (int16_t)s->W(1); + d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) - (int16_t)s->W(3); + XMM_ONLY(d->W(6) = (int16_t)s->W(4) - (int16_t)s->W(5)); + XMM_ONLY(d->W(7) = (int16_t)s->W(6) - (int16_t)s->W(7)); +} + +void glue(helper_phsubd, SUFFIX) (Reg *d, Reg *s) +{ + d->L(0) = (int32_t)d->L(0) - (int32_t)d->L(1); + XMM_ONLY(d->L(1) = (int32_t)d->L(2) - (int32_t)d->L(3)); + d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) - (int32_t)s->L(1); + XMM_ONLY(d->L(3) = (int32_t)s->L(2) - (int32_t)s->L(3)); +} + +void glue(helper_phsubsw, SUFFIX) (Reg *d, Reg *s) +{ + d->W(0) = satsw((int16_t)d->W(0) - (int16_t)d->W(1)); + d->W(1) = satsw((int16_t)d->W(2) - (int16_t)d->W(3)); + XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) - (int16_t)d->W(5))); + XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) - (int16_t)d->W(7))); + d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) - (int16_t)s->W(1)); + d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) - (int16_t)s->W(3)); + XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) - (int16_t)s->W(5))); + XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) - (int16_t)s->W(7))); +} + +#define FABSB(_, x) x > INT8_MAX ? -(int8_t ) x : x +#define FABSW(_, x) x > INT16_MAX ? -(int16_t) x : x +#define FABSL(_, x) x > INT32_MAX ? -(int32_t) x : x +SSE_HELPER_B(helper_pabsb, FABSB) +SSE_HELPER_W(helper_pabsw, FABSW) +SSE_HELPER_L(helper_pabsd, FABSL) + +#define FMULHRSW(d, s) ((int16_t) d * (int16_t) s + 0x4000) >> 15 +SSE_HELPER_W(helper_pmulhrsw, FMULHRSW) + +#define FSIGNB(d, s) s <= INT8_MAX ? s ? d : 0 : -(int8_t ) d +#define FSIGNW(d, s) s <= INT16_MAX ? s ? d : 0 : -(int16_t) d +#define FSIGNL(d, s) s <= INT32_MAX ? s ? d : 0 : -(int32_t) d +SSE_HELPER_B(helper_psignb, FSIGNB) +SSE_HELPER_W(helper_psignw, FSIGNW) +SSE_HELPER_L(helper_psignd, FSIGNL) + +void glue(helper_palignr, SUFFIX) (Reg *d, Reg *s, int32_t shift) +{ + Reg r; + + /* XXX could be checked during translation */ + if (shift >= (16 << SHIFT)) { + r.Q(0) = 0; + XMM_ONLY(r.Q(1) = 0); + } else { + shift <<= 3; +#define SHR(v, i) (i < 64 && i > -64 ? i > 0 ? v >> (i) : (v << -(i)) : 0) +#if SHIFT == 0 + r.Q(0) = SHR(s->Q(0), shift - 0) | + SHR(d->Q(0), shift - 64); +#else + r.Q(0) = SHR(s->Q(0), shift - 0) | + SHR(s->Q(1), shift - 64) | + SHR(d->Q(0), shift - 128) | + SHR(d->Q(1), shift - 192); + r.Q(1) = SHR(s->Q(0), shift + 64) | + SHR(s->Q(1), shift - 0) | + SHR(d->Q(0), shift - 64) | + SHR(d->Q(1), shift - 128); +#endif +#undef SHR + } + + *d = r; +} + +#define XMM0 env->xmm_regs[0] + +#if SHIFT == 1 +#define SSE_HELPER_V(name, elem, num, F)\ +void glue(name, SUFFIX) (Reg *d, Reg *s)\ +{\ + d->elem(0) = F(d->elem(0), s->elem(0), XMM0.elem(0));\ + d->elem(1) = F(d->elem(1), s->elem(1), XMM0.elem(1));\ + if (num > 2) {\ + d->elem(2) = F(d->elem(2), s->elem(2), XMM0.elem(2));\ + d->elem(3) = F(d->elem(3), s->elem(3), XMM0.elem(3));\ + if (num > 4) {\ + d->elem(4) = F(d->elem(4), s->elem(4), XMM0.elem(4));\ + d->elem(5) = F(d->elem(5), s->elem(5), XMM0.elem(5));\ + d->elem(6) = F(d->elem(6), s->elem(6), XMM0.elem(6));\ + d->elem(7) = F(d->elem(7), s->elem(7), XMM0.elem(7));\ + if (num > 8) {\ + d->elem(8) = F(d->elem(8), s->elem(8), XMM0.elem(8));\ + d->elem(9) = F(d->elem(9), s->elem(9), XMM0.elem(9));\ + d->elem(10) = F(d->elem(10), s->elem(10), XMM0.elem(10));\ + d->elem(11) = F(d->elem(11), s->elem(11), XMM0.elem(11));\ + d->elem(12) = F(d->elem(12), s->elem(12), XMM0.elem(12));\ + d->elem(13) = F(d->elem(13), s->elem(13), XMM0.elem(13));\ + d->elem(14) = F(d->elem(14), s->elem(14), XMM0.elem(14));\ + d->elem(15) = F(d->elem(15), s->elem(15), XMM0.elem(15));\ + }\ + }\ + }\ +} + +#define SSE_HELPER_I(name, elem, num, F)\ +void glue(name, SUFFIX) (Reg *d, Reg *s, uint32_t imm)\ +{\ + d->elem(0) = F(d->elem(0), s->elem(0), ((imm >> 0) & 1));\ + d->elem(1) = F(d->elem(1), s->elem(1), ((imm >> 1) & 1));\ + if (num > 2) {\ + d->elem(2) = F(d->elem(2), s->elem(2), ((imm >> 2) & 1));\ + d->elem(3) = F(d->elem(3), s->elem(3), ((imm >> 3) & 1));\ + if (num > 4) {\ + d->elem(4) = F(d->elem(4), s->elem(4), ((imm >> 4) & 1));\ + d->elem(5) = F(d->elem(5), s->elem(5), ((imm >> 5) & 1));\ + d->elem(6) = F(d->elem(6), s->elem(6), ((imm >> 6) & 1));\ + d->elem(7) = F(d->elem(7), s->elem(7), ((imm >> 7) & 1));\ + if (num > 8) {\ + d->elem(8) = F(d->elem(8), s->elem(8), ((imm >> 8) & 1));\ + d->elem(9) = F(d->elem(9), s->elem(9), ((imm >> 9) & 1));\ + d->elem(10) = F(d->elem(10), s->elem(10), ((imm >> 10) & 1));\ + d->elem(11) = F(d->elem(11), s->elem(11), ((imm >> 11) & 1));\ + d->elem(12) = F(d->elem(12), s->elem(12), ((imm >> 12) & 1));\ + d->elem(13) = F(d->elem(13), s->elem(13), ((imm >> 13) & 1));\ + d->elem(14) = F(d->elem(14), s->elem(14), ((imm >> 14) & 1));\ + d->elem(15) = F(d->elem(15), s->elem(15), ((imm >> 15) & 1));\ + }\ + }\ + }\ +} + +/* SSE4.1 op helpers */ +#define FBLENDVB(d, s, m) (m & 0x80) ? s : d +#define FBLENDVPS(d, s, m) (m & 0x80000000) ? s : d +#define FBLENDVPD(d, s, m) (m & 0x8000000000000000LL) ? s : d +SSE_HELPER_V(helper_pblendvb, B, 16, FBLENDVB) +SSE_HELPER_V(helper_blendvps, L, 4, FBLENDVPS) +SSE_HELPER_V(helper_blendvpd, Q, 2, FBLENDVPD) + +void glue(helper_ptest, SUFFIX) (Reg *d, Reg *s) +{ + uint64_t zf = (s->Q(0) & d->Q(0)) | (s->Q(1) & d->Q(1)); + uint64_t cf = (s->Q(0) & ~d->Q(0)) | (s->Q(1) & ~d->Q(1)); + + CC_SRC = (zf ? 0 : CC_Z) | (cf ? 0 : CC_C); +} + +#define SSE_HELPER_F(name, elem, num, F)\ +void glue(name, SUFFIX) (Reg *d, Reg *s)\ +{\ + d->elem(0) = F(0);\ + d->elem(1) = F(1);\ + if (num > 2) {\ + d->elem(2) = F(2);\ + d->elem(3) = F(3);\ + if (num > 4) {\ + d->elem(4) = F(4);\ + d->elem(5) = F(5);\ + d->elem(6) = F(6);\ + d->elem(7) = F(7);\ + }\ + }\ +} + +SSE_HELPER_F(helper_pmovsxbw, W, 8, (int8_t) s->B) +SSE_HELPER_F(helper_pmovsxbd, L, 4, (int8_t) s->B) +SSE_HELPER_F(helper_pmovsxbq, Q, 2, (int8_t) s->B) +SSE_HELPER_F(helper_pmovsxwd, L, 4, (int16_t) s->W) +SSE_HELPER_F(helper_pmovsxwq, Q, 2, (int16_t) s->W) +SSE_HELPER_F(helper_pmovsxdq, Q, 2, (int32_t) s->L) +SSE_HELPER_F(helper_pmovzxbw, W, 8, s->B) +SSE_HELPER_F(helper_pmovzxbd, L, 4, s->B) +SSE_HELPER_F(helper_pmovzxbq, Q, 2, s->B) +SSE_HELPER_F(helper_pmovzxwd, L, 4, s->W) +SSE_HELPER_F(helper_pmovzxwq, Q, 2, s->W) +SSE_HELPER_F(helper_pmovzxdq, Q, 2, s->L) + +void glue(helper_pmuldq, SUFFIX) (Reg *d, Reg *s) +{ + d->Q(0) = (int64_t) (int32_t) d->L(0) * (int32_t) s->L(0); + d->Q(1) = (int64_t) (int32_t) d->L(2) * (int32_t) s->L(2); +} + +#define FCMPEQQ(d, s) d == s ? -1 : 0 +SSE_HELPER_Q(helper_pcmpeqq, FCMPEQQ) + +void glue(helper_packusdw, SUFFIX) (Reg *d, Reg *s) +{ + d->W(0) = satuw((int32_t) d->L(0)); + d->W(1) = satuw((int32_t) d->L(1)); + d->W(2) = satuw((int32_t) d->L(2)); + d->W(3) = satuw((int32_t) d->L(3)); + d->W(4) = satuw((int32_t) s->L(0)); + d->W(5) = satuw((int32_t) s->L(1)); + d->W(6) = satuw((int32_t) s->L(2)); + d->W(7) = satuw((int32_t) s->L(3)); +} + +#define FMINSB(d, s) MIN((int8_t) d, (int8_t) s) +#define FMINSD(d, s) MIN((int32_t) d, (int32_t) s) +#define FMAXSB(d, s) MAX((int8_t) d, (int8_t) s) +#define FMAXSD(d, s) MAX((int32_t) d, (int32_t) s) +SSE_HELPER_B(helper_pminsb, FMINSB) +SSE_HELPER_L(helper_pminsd, FMINSD) +SSE_HELPER_W(helper_pminuw, MIN) +SSE_HELPER_L(helper_pminud, MIN) +SSE_HELPER_B(helper_pmaxsb, FMAXSB) +SSE_HELPER_L(helper_pmaxsd, FMAXSD) +SSE_HELPER_W(helper_pmaxuw, MAX) +SSE_HELPER_L(helper_pmaxud, MAX) + +#define FMULLD(d, s) (int32_t) d * (int32_t) s +SSE_HELPER_L(helper_pmulld, FMULLD) + +void glue(helper_phminposuw, SUFFIX) (Reg *d, Reg *s) +{ + int idx = 0; + + if (s->W(1) < s->W(idx)) + idx = 1; + if (s->W(2) < s->W(idx)) + idx = 2; + if (s->W(3) < s->W(idx)) + idx = 3; + if (s->W(4) < s->W(idx)) + idx = 4; + if (s->W(5) < s->W(idx)) + idx = 5; + if (s->W(6) < s->W(idx)) + idx = 6; + if (s->W(7) < s->W(idx)) + idx = 7; + + d->Q(1) = 0; + d->L(1) = 0; + d->W(1) = idx; + d->W(0) = s->W(idx); +} + +void glue(helper_roundps, SUFFIX) (Reg *d, Reg *s, uint32_t mode) +{ + signed char prev_rounding_mode; + + prev_rounding_mode = env->sse_status.float_rounding_mode; + if (!(mode & (1 << 2))) + switch (mode & 3) { + case 0: + set_float_rounding_mode(float_round_nearest_even, &env->sse_status); + break; + case 1: + set_float_rounding_mode(float_round_down, &env->sse_status); + break; + case 2: + set_float_rounding_mode(float_round_up, &env->sse_status); + break; + case 3: + set_float_rounding_mode(float_round_to_zero, &env->sse_status); + break; + } + + d->L(0) = float64_round_to_int(s->L(0), &env->sse_status); + d->L(1) = float64_round_to_int(s->L(1), &env->sse_status); + d->L(2) = float64_round_to_int(s->L(2), &env->sse_status); + d->L(3) = float64_round_to_int(s->L(3), &env->sse_status); + +#if 0 /* TODO */ + if (mode & (1 << 3)) + set_float_exception_flags( + get_float_exception_flags(&env->sse_status) & + ~float_flag_inexact, + &env->sse_status); +#endif + env->sse_status.float_rounding_mode = prev_rounding_mode; +} + +void glue(helper_roundpd, SUFFIX) (Reg *d, Reg *s, uint32_t mode) +{ + signed char prev_rounding_mode; + + prev_rounding_mode = env->sse_status.float_rounding_mode; + if (!(mode & (1 << 2))) + switch (mode & 3) { + case 0: + set_float_rounding_mode(float_round_nearest_even, &env->sse_status); + break; + case 1: + set_float_rounding_mode(float_round_down, &env->sse_status); + break; + case 2: + set_float_rounding_mode(float_round_up, &env->sse_status); + break; + case 3: + set_float_rounding_mode(float_round_to_zero, &env->sse_status); + break; + } + + d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status); + d->Q(1) = float64_round_to_int(s->Q(1), &env->sse_status); + +#if 0 /* TODO */ + if (mode & (1 << 3)) + set_float_exception_flags( + get_float_exception_flags(&env->sse_status) & + ~float_flag_inexact, + &env->sse_status); +#endif + env->sse_status.float_rounding_mode = prev_rounding_mode; +} + +void glue(helper_roundss, SUFFIX) (Reg *d, Reg *s, uint32_t mode) +{ + signed char prev_rounding_mode; + + prev_rounding_mode = env->sse_status.float_rounding_mode; + if (!(mode & (1 << 2))) + switch (mode & 3) { + case 0: + set_float_rounding_mode(float_round_nearest_even, &env->sse_status); + break; + case 1: + set_float_rounding_mode(float_round_down, &env->sse_status); + break; + case 2: + set_float_rounding_mode(float_round_up, &env->sse_status); + break; + case 3: + set_float_rounding_mode(float_round_to_zero, &env->sse_status); + break; + } + + d->L(0) = float64_round_to_int(s->L(0), &env->sse_status); + +#if 0 /* TODO */ + if (mode & (1 << 3)) + set_float_exception_flags( + get_float_exception_flags(&env->sse_status) & + ~float_flag_inexact, + &env->sse_status); +#endif + env->sse_status.float_rounding_mode = prev_rounding_mode; +} + +void glue(helper_roundsd, SUFFIX) (Reg *d, Reg *s, uint32_t mode) +{ + signed char prev_rounding_mode; + + prev_rounding_mode = env->sse_status.float_rounding_mode; + if (!(mode & (1 << 2))) + switch (mode & 3) { + case 0: + set_float_rounding_mode(float_round_nearest_even, &env->sse_status); + break; + case 1: + set_float_rounding_mode(float_round_down, &env->sse_status); + break; + case 2: + set_float_rounding_mode(float_round_up, &env->sse_status); + break; + case 3: + set_float_rounding_mode(float_round_to_zero, &env->sse_status); + break; + } + + d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status); + +#if 0 /* TODO */ + if (mode & (1 << 3)) + set_float_exception_flags( + get_float_exception_flags(&env->sse_status) & + ~float_flag_inexact, + &env->sse_status); +#endif + env->sse_status.float_rounding_mode = prev_rounding_mode; +} + +#define FBLENDP(d, s, m) m ? s : d +SSE_HELPER_I(helper_blendps, L, 4, FBLENDP) +SSE_HELPER_I(helper_blendpd, Q, 2, FBLENDP) +SSE_HELPER_I(helper_pblendw, W, 8, FBLENDP) + +void glue(helper_dpps, SUFFIX) (Reg *d, Reg *s, uint32_t mask) +{ + float32 iresult = 0 /*float32_zero*/; + + if (mask & (1 << 4)) + iresult = float32_add(iresult, + float32_mul(d->L(0), s->L(0), &env->sse_status), + &env->sse_status); + if (mask & (1 << 5)) + iresult = float32_add(iresult, + float32_mul(d->L(1), s->L(1), &env->sse_status), + &env->sse_status); + if (mask & (1 << 6)) + iresult = float32_add(iresult, + float32_mul(d->L(2), s->L(2), &env->sse_status), + &env->sse_status); + if (mask & (1 << 7)) + iresult = float32_add(iresult, + float32_mul(d->L(3), s->L(3), &env->sse_status), + &env->sse_status); + d->L(0) = (mask & (1 << 0)) ? iresult : 0 /*float32_zero*/; + d->L(1) = (mask & (1 << 1)) ? iresult : 0 /*float32_zero*/; + d->L(2) = (mask & (1 << 2)) ? iresult : 0 /*float32_zero*/; + d->L(3) = (mask & (1 << 3)) ? iresult : 0 /*float32_zero*/; +} + +void glue(helper_dppd, SUFFIX) (Reg *d, Reg *s, uint32_t mask) +{ + float64 iresult = 0 /*float64_zero*/; + + if (mask & (1 << 4)) + iresult = float64_add(iresult, + float64_mul(d->Q(0), s->Q(0), &env->sse_status), + &env->sse_status); + if (mask & (1 << 5)) + iresult = float64_add(iresult, + float64_mul(d->Q(1), s->Q(1), &env->sse_status), + &env->sse_status); + d->Q(0) = (mask & (1 << 0)) ? iresult : 0 /*float64_zero*/; + d->Q(1) = (mask & (1 << 1)) ? iresult : 0 /*float64_zero*/; +} + +void glue(helper_mpsadbw, SUFFIX) (Reg *d, Reg *s, uint32_t offset) +{ + int s0 = (offset & 3) << 2; + int d0 = (offset & 4) << 0; + int i; + Reg r; + + for (i = 0; i < 8; i++, d0++) { + r.W(i) = 0; + r.W(i) += abs1(d->B(d0 + 0) - s->B(s0 + 0)); + r.W(i) += abs1(d->B(d0 + 1) - s->B(s0 + 1)); + r.W(i) += abs1(d->B(d0 + 2) - s->B(s0 + 2)); + r.W(i) += abs1(d->B(d0 + 3) - s->B(s0 + 3)); + } + + *d = r; +} + +/* SSE4.2 op helpers */ +/* it's unclear whether signed or unsigned */ +#define FCMPGTQ(d, s) d > s ? -1 : 0 +SSE_HELPER_Q(helper_pcmpgtq, FCMPGTQ) + +static inline int pcmp_elen(int reg, uint32_t ctrl) +{ + int val; + + /* Presence of REX.W is indicated by a bit higher than 7 set */ + if (ctrl >> 8) + val = abs1((int64_t) env->regs[reg]); + else + val = abs1((int32_t) env->regs[reg]); + + if (ctrl & 1) { + if (val > 8) + return 8; + } else + if (val > 16) + return 16; + + return val; +} + +static inline int pcmp_ilen(Reg *r, uint8_t ctrl) +{ + int val = 0; + + if (ctrl & 1) { + while (val < 8 && r->W(val)) + val++; + } else + while (val < 16 && r->B(val)) + val++; + + return val; +} + +static inline int pcmp_val(Reg *r, uint8_t ctrl, int i) +{ + switch ((ctrl >> 0) & 3) { + case 0: + return r->B(i); + case 1: + return r->W(i); + case 2: + return (int8_t) r->B(i); + case 3: + default: + return (int16_t) r->W(i); + } +} + +static inline unsigned pcmpxstrx(Reg *d, Reg *s, + int8_t ctrl, int valids, int validd) +{ + unsigned int res = 0; + int v; + int j, i; + int upper = (ctrl & 1) ? 7 : 15; + + valids--; + validd--; + + CC_SRC = (valids < upper ? CC_Z : 0) | (validd < upper ? CC_S : 0); + + switch ((ctrl >> 2) & 3) { + case 0: + for (j = valids; j >= 0; j--) { + res <<= 1; + v = pcmp_val(s, ctrl, j); + for (i = validd; i >= 0; i--) + res |= (v == pcmp_val(d, ctrl, i)); + } + break; + case 1: + for (j = valids; j >= 0; j--) { + res <<= 1; + v = pcmp_val(s, ctrl, j); + for (i = ((validd - 1) | 1); i >= 0; i -= 2) + res |= (pcmp_val(d, ctrl, i - 0) <= v && + pcmp_val(d, ctrl, i - 1) >= v); + } + break; + case 2: + res = (2 << (upper - MAX(valids, validd))) - 1; + res <<= MAX(valids, validd) - MIN(valids, validd); + for (i = MIN(valids, validd); i >= 0; i--) { + res <<= 1; + v = pcmp_val(s, ctrl, i); + res |= (v == pcmp_val(d, ctrl, i)); + } + break; + case 3: + for (j = valids - validd; j >= 0; j--) { + res <<= 1; + res |= 1; + for (i = MIN(upper - j, validd); i >= 0; i--) + res &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i)); + } + break; + } + + switch ((ctrl >> 4) & 3) { + case 1: + res ^= (2 << upper) - 1; + break; + case 3: + res ^= (2 << valids) - 1; + break; + } + + if (res) + CC_SRC |= CC_C; + if (res & 1) + CC_SRC |= CC_O; + + return res; +} + +static inline int rffs1(unsigned int val) +{ + int ret = 1, hi; + + for (hi = sizeof(val) * 4; hi; hi /= 2) + if (val >> hi) { + val >>= hi; + ret += hi; + } + + return ret; +} + +static inline int ffs1(unsigned int val) +{ + int ret = 1, hi; + + for (hi = sizeof(val) * 4; hi; hi /= 2) + if (val << hi) { + val <<= hi; + ret += hi; + } + + return ret; +} + +void glue(helper_pcmpestri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl) +{ + unsigned int res = pcmpxstrx(d, s, ctrl, + pcmp_elen(R_EDX, ctrl), + pcmp_elen(R_EAX, ctrl)); + + if (res) +#ifndef VBOX + env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1; +#else + env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1(res) : ffs1(res)) - 1; +#endif + else + env->regs[R_ECX] = 16 >> (ctrl & (1 << 0)); +} + +void glue(helper_pcmpestrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl) +{ + int i; + unsigned int res = pcmpxstrx(d, s, ctrl, + pcmp_elen(R_EDX, ctrl), + pcmp_elen(R_EAX, ctrl)); + + if ((ctrl >> 6) & 1) { +#ifndef VBOX + if (ctrl & 1) + for (i = 0; i <= 8; i--, res >>= 1) + d->W(i) = (res & 1) ? ~0 : 0; + else + for (i = 0; i <= 16; i--, res >>= 1) + d->B(i) = (res & 1) ? ~0 : 0; +#else + if (ctrl & 1) + for (i = 0; i < 8; i++, res >>= 1) { + d->W(i) = (res & 1) ? ~0 : 0; + } + else + for (i = 0; i < 16; i++, res >>= 1) { + d->B(i) = (res & 1) ? ~0 : 0; + } +#endif + } else { + d->Q(1) = 0; + d->Q(0) = res; + } +} + +void glue(helper_pcmpistri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl) +{ + unsigned int res = pcmpxstrx(d, s, ctrl, + pcmp_ilen(s, ctrl), + pcmp_ilen(d, ctrl)); + + if (res) + env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1; + else + env->regs[R_ECX] = 16 >> (ctrl & (1 << 0)); +} + +void glue(helper_pcmpistrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl) +{ + int i; + unsigned int res = pcmpxstrx(d, s, ctrl, + pcmp_ilen(s, ctrl), + pcmp_ilen(d, ctrl)); + + if ((ctrl >> 6) & 1) { +#ifndef VBOX + if (ctrl & 1) + for (i = 0; i <= 8; i--, res >>= 1) + d->W(i) = (res & 1) ? ~0 : 0; + else + for (i = 0; i <= 16; i--, res >>= 1) + d->B(i) = (res & 1) ? ~0 : 0; +#else + if (ctrl & 1) + for (i = 0; i < 8; i++, res >>= 1) { + d->W(i) = (res & 1) ? ~0 : 0; + } + else + for (i = 0; i < 16; i++, res >>= 1) { + d->B(i) = (res & 1) ? ~0 : 0; + } +#endif + } else { + d->Q(1) = 0; + d->Q(0) = res; + } +} + +#define CRCPOLY 0x1edc6f41 +#define CRCPOLY_BITREV 0x82f63b78 +target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len) +{ + target_ulong crc = (msg & ((target_ulong) -1 >> + (TARGET_LONG_BITS - len))) ^ crc1; + + while (len--) + crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_BITREV : 0); + + return crc; +} + +#define POPMASK(i) ((target_ulong) -1 / ((1LL << (1 << i)) + 1)) +#define POPCOUNT(n, i) (n & POPMASK(i)) + ((n >> (1 << i)) & POPMASK(i)) +target_ulong helper_popcnt(target_ulong n, uint32_t type) +{ + CC_SRC = n ? 0 : CC_Z; + + n = POPCOUNT(n, 0); + n = POPCOUNT(n, 1); + n = POPCOUNT(n, 2); + n = POPCOUNT(n, 3); + if (type == 1) + return n & 0xff; + + n = POPCOUNT(n, 4); +#ifndef TARGET_X86_64 + return n; +#else + if (type == 2) + return n & 0xff; + + return POPCOUNT(n, 5); +#endif +} +#endif + +#undef SHIFT +#undef XMM_ONLY +#undef Reg +#undef B +#undef W +#undef L +#undef Q +#undef SUFFIX diff --git a/src/recompiler/target-i386/ops_sse_header.h b/src/recompiler/target-i386/ops_sse_header.h new file mode 100644 index 00000000..efa3e905 --- /dev/null +++ b/src/recompiler/target-i386/ops_sse_header.h @@ -0,0 +1,359 @@ +/* + * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support + * + * Copyright (c) 2005 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#if SHIFT == 0 +#define Reg MMXReg +#define SUFFIX _mmx +#else +#define Reg XMMReg +#define SUFFIX _xmm +#endif + +#define dh_alias_Reg ptr +#define dh_alias_XMMReg ptr +#define dh_alias_MMXReg ptr +#define dh_ctype_Reg Reg * +#define dh_ctype_XMMReg XMMReg * +#define dh_ctype_MMXReg MMXReg * +#define dh_is_signed_Reg dh_is_signed_ptr +#define dh_is_signed_XMMReg dh_is_signed_ptr +#define dh_is_signed_MMXReg dh_is_signed_ptr + +DEF_HELPER_2(glue(psrlw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psraw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psllw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psrld, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psrad, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pslld, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psrlq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psllq, SUFFIX), void, Reg, Reg) + +#if SHIFT == 1 +DEF_HELPER_2(glue(psrldq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pslldq, SUFFIX), void, Reg, Reg) +#endif + +#define SSE_HELPER_B(name, F)\ + DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg) + +#define SSE_HELPER_W(name, F)\ + DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg) + +#define SSE_HELPER_L(name, F)\ + DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg) + +#define SSE_HELPER_Q(name, F)\ + DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg) + +SSE_HELPER_B(paddb, FADD) +SSE_HELPER_W(paddw, FADD) +SSE_HELPER_L(paddl, FADD) +SSE_HELPER_Q(paddq, FADD) + +SSE_HELPER_B(psubb, FSUB) +SSE_HELPER_W(psubw, FSUB) +SSE_HELPER_L(psubl, FSUB) +SSE_HELPER_Q(psubq, FSUB) + +SSE_HELPER_B(paddusb, FADDUB) +SSE_HELPER_B(paddsb, FADDSB) +SSE_HELPER_B(psubusb, FSUBUB) +SSE_HELPER_B(psubsb, FSUBSB) + +SSE_HELPER_W(paddusw, FADDUW) +SSE_HELPER_W(paddsw, FADDSW) +SSE_HELPER_W(psubusw, FSUBUW) +SSE_HELPER_W(psubsw, FSUBSW) + +SSE_HELPER_B(pminub, FMINUB) +SSE_HELPER_B(pmaxub, FMAXUB) + +SSE_HELPER_W(pminsw, FMINSW) +SSE_HELPER_W(pmaxsw, FMAXSW) + +SSE_HELPER_Q(pand, FAND) +SSE_HELPER_Q(pandn, FANDN) +SSE_HELPER_Q(por, FOR) +SSE_HELPER_Q(pxor, FXOR) + +SSE_HELPER_B(pcmpgtb, FCMPGTB) +SSE_HELPER_W(pcmpgtw, FCMPGTW) +SSE_HELPER_L(pcmpgtl, FCMPGTL) + +SSE_HELPER_B(pcmpeqb, FCMPEQ) +SSE_HELPER_W(pcmpeqw, FCMPEQ) +SSE_HELPER_L(pcmpeql, FCMPEQ) + +SSE_HELPER_W(pmullw, FMULLW) +#if SHIFT == 0 +SSE_HELPER_W(pmulhrw, FMULHRW) +#endif +SSE_HELPER_W(pmulhuw, FMULHUW) +SSE_HELPER_W(pmulhw, FMULHW) + +SSE_HELPER_B(pavgb, FAVG) +SSE_HELPER_W(pavgw, FAVG) + +DEF_HELPER_2(glue(pmuludq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmaddwd, SUFFIX), void, Reg, Reg) + +DEF_HELPER_2(glue(psadbw, SUFFIX), void, Reg, Reg) +DEF_HELPER_3(glue(maskmov, SUFFIX), void, Reg, Reg, tl) +DEF_HELPER_2(glue(movl_mm_T0, SUFFIX), void, Reg, i32) +#ifdef TARGET_X86_64 +DEF_HELPER_2(glue(movq_mm_T0, SUFFIX), void, Reg, i64) +#endif + +#if SHIFT == 0 +DEF_HELPER_3(glue(pshufw, SUFFIX), void, Reg, Reg, int) +#else +DEF_HELPER_3(shufps, void, Reg, Reg, int) +DEF_HELPER_3(shufpd, void, Reg, Reg, int) +DEF_HELPER_3(glue(pshufd, SUFFIX), void, Reg, Reg, int) +DEF_HELPER_3(glue(pshuflw, SUFFIX), void, Reg, Reg, int) +DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int) +#endif + +#if SHIFT == 1 +/* FPU ops */ +/* XXX: not accurate */ + +#define SSE_HELPER_S(name, F)\ + DEF_HELPER_2(name ## ps , void, Reg, Reg) \ + DEF_HELPER_2(name ## ss , void, Reg, Reg) \ + DEF_HELPER_2(name ## pd , void, Reg, Reg) \ + DEF_HELPER_2(name ## sd , void, Reg, Reg) + +SSE_HELPER_S(add, FPU_ADD) +SSE_HELPER_S(sub, FPU_SUB) +SSE_HELPER_S(mul, FPU_MUL) +SSE_HELPER_S(div, FPU_DIV) +SSE_HELPER_S(min, FPU_MIN) +SSE_HELPER_S(max, FPU_MAX) +SSE_HELPER_S(sqrt, FPU_SQRT) + + +DEF_HELPER_2(cvtps2pd, void, Reg, Reg) +DEF_HELPER_2(cvtpd2ps, void, Reg, Reg) +DEF_HELPER_2(cvtss2sd, void, Reg, Reg) +DEF_HELPER_2(cvtsd2ss, void, Reg, Reg) +DEF_HELPER_2(cvtdq2ps, void, Reg, Reg) +DEF_HELPER_2(cvtdq2pd, void, Reg, Reg) +DEF_HELPER_2(cvtpi2ps, void, XMMReg, MMXReg) +DEF_HELPER_2(cvtpi2pd, void, XMMReg, MMXReg) +DEF_HELPER_2(cvtsi2ss, void, XMMReg, i32) +DEF_HELPER_2(cvtsi2sd, void, XMMReg, i32) + +#ifdef TARGET_X86_64 +DEF_HELPER_2(cvtsq2ss, void, XMMReg, i64) +DEF_HELPER_2(cvtsq2sd, void, XMMReg, i64) +#endif + +DEF_HELPER_2(cvtps2dq, void, XMMReg, XMMReg) +DEF_HELPER_2(cvtpd2dq, void, XMMReg, XMMReg) +DEF_HELPER_2(cvtps2pi, void, MMXReg, XMMReg) +DEF_HELPER_2(cvtpd2pi, void, MMXReg, XMMReg) +DEF_HELPER_1(cvtss2si, s32, XMMReg) +DEF_HELPER_1(cvtsd2si, s32, XMMReg) +#ifdef TARGET_X86_64 +DEF_HELPER_1(cvtss2sq, s64, XMMReg) +DEF_HELPER_1(cvtsd2sq, s64, XMMReg) +#endif + +DEF_HELPER_2(cvttps2dq, void, XMMReg, XMMReg) +DEF_HELPER_2(cvttpd2dq, void, XMMReg, XMMReg) +DEF_HELPER_2(cvttps2pi, void, MMXReg, XMMReg) +DEF_HELPER_2(cvttpd2pi, void, MMXReg, XMMReg) +DEF_HELPER_1(cvttss2si, s32, XMMReg) +DEF_HELPER_1(cvttsd2si, s32, XMMReg) +#ifdef TARGET_X86_64 +DEF_HELPER_1(cvttss2sq, s64, XMMReg) +DEF_HELPER_1(cvttsd2sq, s64, XMMReg) +#endif + +DEF_HELPER_2(rsqrtps, void, XMMReg, XMMReg) +DEF_HELPER_2(rsqrtss, void, XMMReg, XMMReg) +DEF_HELPER_2(rcpps, void, XMMReg, XMMReg) +DEF_HELPER_2(rcpss, void, XMMReg, XMMReg) +DEF_HELPER_2(extrq_r, void, XMMReg, XMMReg) +DEF_HELPER_3(extrq_i, void, XMMReg, int, int) +DEF_HELPER_2(insertq_r, void, XMMReg, XMMReg) +DEF_HELPER_3(insertq_i, void, XMMReg, int, int) +DEF_HELPER_2(haddps, void, XMMReg, XMMReg) +DEF_HELPER_2(haddpd, void, XMMReg, XMMReg) +DEF_HELPER_2(hsubps, void, XMMReg, XMMReg) +DEF_HELPER_2(hsubpd, void, XMMReg, XMMReg) +DEF_HELPER_2(addsubps, void, XMMReg, XMMReg) +DEF_HELPER_2(addsubpd, void, XMMReg, XMMReg) + +#define SSE_HELPER_CMP(name, F)\ + DEF_HELPER_2( name ## ps , void, Reg, Reg) \ + DEF_HELPER_2( name ## ss , void, Reg, Reg) \ + DEF_HELPER_2( name ## pd , void, Reg, Reg) \ + DEF_HELPER_2( name ## sd , void, Reg, Reg) + +SSE_HELPER_CMP(cmpeq, FPU_CMPEQ) +SSE_HELPER_CMP(cmplt, FPU_CMPLT) +SSE_HELPER_CMP(cmple, FPU_CMPLE) +SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD) +SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ) +SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT) +SSE_HELPER_CMP(cmpnle, FPU_CMPNLE) +SSE_HELPER_CMP(cmpord, FPU_CMPORD) + +DEF_HELPER_2(ucomiss, void, Reg, Reg) +DEF_HELPER_2(comiss, void, Reg, Reg) +DEF_HELPER_2(ucomisd, void, Reg, Reg) +DEF_HELPER_2(comisd, void, Reg, Reg) +DEF_HELPER_1(movmskps, i32, Reg) +DEF_HELPER_1(movmskpd, i32, Reg) +#endif + +DEF_HELPER_1(glue(pmovmskb, SUFFIX), i32, Reg) +DEF_HELPER_2(glue(packsswb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(packuswb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(packssdw, SUFFIX), void, Reg, Reg) +#define UNPCK_OP(base_name, base) \ + DEF_HELPER_2(glue(punpck ## base_name ## bw, SUFFIX) , void, Reg, Reg) \ + DEF_HELPER_2(glue(punpck ## base_name ## wd, SUFFIX) , void, Reg, Reg) \ + DEF_HELPER_2(glue(punpck ## base_name ## dq, SUFFIX) , void, Reg, Reg) + +UNPCK_OP(l, 0) +UNPCK_OP(h, 1) + +#if SHIFT == 1 +DEF_HELPER_2(glue(punpcklqdq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(punpckhqdq, SUFFIX), void, Reg, Reg) +#endif + +/* 3DNow! float ops */ +#if SHIFT == 0 +DEF_HELPER_2(pi2fd, void, MMXReg, MMXReg) +DEF_HELPER_2(pi2fw, void, MMXReg, MMXReg) +DEF_HELPER_2(pf2id, void, MMXReg, MMXReg) +DEF_HELPER_2(pf2iw, void, MMXReg, MMXReg) +DEF_HELPER_2(pfacc, void, MMXReg, MMXReg) +DEF_HELPER_2(pfadd, void, MMXReg, MMXReg) +DEF_HELPER_2(pfcmpeq, void, MMXReg, MMXReg) +DEF_HELPER_2(pfcmpge, void, MMXReg, MMXReg) +DEF_HELPER_2(pfcmpgt, void, MMXReg, MMXReg) +DEF_HELPER_2(pfmax, void, MMXReg, MMXReg) +DEF_HELPER_2(pfmin, void, MMXReg, MMXReg) +DEF_HELPER_2(pfmul, void, MMXReg, MMXReg) +DEF_HELPER_2(pfnacc, void, MMXReg, MMXReg) +DEF_HELPER_2(pfpnacc, void, MMXReg, MMXReg) +DEF_HELPER_2(pfrcp, void, MMXReg, MMXReg) +DEF_HELPER_2(pfrsqrt, void, MMXReg, MMXReg) +DEF_HELPER_2(pfsub, void, MMXReg, MMXReg) +DEF_HELPER_2(pfsubr, void, MMXReg, MMXReg) +DEF_HELPER_2(pswapd, void, MMXReg, MMXReg) +#endif + +/* SSSE3 op helpers */ +DEF_HELPER_2(glue(phaddw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(phaddd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(phaddsw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(phsubw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(phsubd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(phsubsw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pabsb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pabsw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pabsd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmaddubsw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmulhrsw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pshufb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psignb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psignw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psignd, SUFFIX), void, Reg, Reg) +DEF_HELPER_3(glue(palignr, SUFFIX), void, Reg, Reg, s32) + +/* SSE4.1 op helpers */ +#if SHIFT == 1 +DEF_HELPER_2(glue(pblendvb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(blendvps, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(blendvpd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(ptest, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovsxbw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovsxbd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovsxbq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovsxwd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovsxwq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovsxdq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovzxbw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovzxbd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovzxbq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovzxwd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovzxwq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovzxdq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmuldq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pcmpeqq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(packusdw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pminsb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pminsd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pminuw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pminud, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmaxsb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmaxsd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmaxuw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmaxud, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmulld, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(phminposuw, SUFFIX), void, Reg, Reg) +DEF_HELPER_3(glue(roundps, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(roundpd, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(roundss, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(roundsd, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(blendps, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(blendpd, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(pblendw, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(dpps, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(dppd, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(mpsadbw, SUFFIX), void, Reg, Reg, i32) +#endif + +/* SSE4.2 op helpers */ +#if SHIFT == 1 +DEF_HELPER_2(glue(pcmpgtq, SUFFIX), void, Reg, Reg) +DEF_HELPER_3(glue(pcmpestri, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(pcmpestrm, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(pcmpistri, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(pcmpistrm, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(crc32, tl, i32, tl, i32) +DEF_HELPER_2(popcnt, tl, tl, i32) +#endif + +#undef SHIFT +#undef Reg +#undef SUFFIX + +#undef SSE_HELPER_B +#undef SSE_HELPER_W +#undef SSE_HELPER_L +#undef SSE_HELPER_Q +#undef SSE_HELPER_S +#undef SSE_HELPER_CMP +#undef UNPCK_OP diff --git a/src/recompiler/target-i386/svm.h b/src/recompiler/target-i386/svm.h new file mode 100644 index 00000000..a224aead --- /dev/null +++ b/src/recompiler/target-i386/svm.h @@ -0,0 +1,222 @@ +#ifndef __SVM_H +#define __SVM_H + +#define TLB_CONTROL_DO_NOTHING 0 +#define TLB_CONTROL_FLUSH_ALL_ASID 1 + +#define V_TPR_MASK 0x0f + +#define V_IRQ_SHIFT 8 +#define V_IRQ_MASK (1 << V_IRQ_SHIFT) + +#define V_INTR_PRIO_SHIFT 16 +#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) + +#define V_IGN_TPR_SHIFT 20 +#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) + +#define V_INTR_MASKING_SHIFT 24 +#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) + +#define SVM_INTERRUPT_SHADOW_MASK 1 + +#define SVM_IOIO_STR_SHIFT 2 +#define SVM_IOIO_REP_SHIFT 3 +#define SVM_IOIO_SIZE_SHIFT 4 +#define SVM_IOIO_ASIZE_SHIFT 7 + +#define SVM_IOIO_TYPE_MASK 1 +#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT) +#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT) +#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) +#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) + +#define SVM_EVTINJ_VEC_MASK 0xff + +#define SVM_EVTINJ_TYPE_SHIFT 8 +#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_VALID (1 << 31) +#define SVM_EVTINJ_VALID_ERR (1 << 11) + +#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK + +#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR +#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI +#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT +#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT + +#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID +#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR + +#define SVM_EXIT_READ_CR0 0x000 +#define SVM_EXIT_READ_CR3 0x003 +#define SVM_EXIT_READ_CR4 0x004 +#define SVM_EXIT_READ_CR8 0x008 +#define SVM_EXIT_WRITE_CR0 0x010 +#define SVM_EXIT_WRITE_CR3 0x013 +#define SVM_EXIT_WRITE_CR4 0x014 +#define SVM_EXIT_WRITE_CR8 0x018 +#define SVM_EXIT_READ_DR0 0x020 +#define SVM_EXIT_READ_DR1 0x021 +#define SVM_EXIT_READ_DR2 0x022 +#define SVM_EXIT_READ_DR3 0x023 +#define SVM_EXIT_READ_DR4 0x024 +#define SVM_EXIT_READ_DR5 0x025 +#define SVM_EXIT_READ_DR6 0x026 +#define SVM_EXIT_READ_DR7 0x027 +#define SVM_EXIT_WRITE_DR0 0x030 +#define SVM_EXIT_WRITE_DR1 0x031 +#define SVM_EXIT_WRITE_DR2 0x032 +#define SVM_EXIT_WRITE_DR3 0x033 +#define SVM_EXIT_WRITE_DR4 0x034 +#define SVM_EXIT_WRITE_DR5 0x035 +#define SVM_EXIT_WRITE_DR6 0x036 +#define SVM_EXIT_WRITE_DR7 0x037 +#define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_INTR 0x060 +#define SVM_EXIT_NMI 0x061 +#define SVM_EXIT_SMI 0x062 +#define SVM_EXIT_INIT 0x063 +#define SVM_EXIT_VINTR 0x064 +#define SVM_EXIT_CR0_SEL_WRITE 0x065 +#define SVM_EXIT_IDTR_READ 0x066 +#define SVM_EXIT_GDTR_READ 0x067 +#define SVM_EXIT_LDTR_READ 0x068 +#define SVM_EXIT_TR_READ 0x069 +#define SVM_EXIT_IDTR_WRITE 0x06a +#define SVM_EXIT_GDTR_WRITE 0x06b +#define SVM_EXIT_LDTR_WRITE 0x06c +#define SVM_EXIT_TR_WRITE 0x06d +#define SVM_EXIT_RDTSC 0x06e +#define SVM_EXIT_RDPMC 0x06f +#define SVM_EXIT_PUSHF 0x070 +#define SVM_EXIT_POPF 0x071 +#define SVM_EXIT_CPUID 0x072 +#define SVM_EXIT_RSM 0x073 +#define SVM_EXIT_IRET 0x074 +#define SVM_EXIT_SWINT 0x075 +#define SVM_EXIT_INVD 0x076 +#define SVM_EXIT_PAUSE 0x077 +#define SVM_EXIT_HLT 0x078 +#define SVM_EXIT_INVLPG 0x079 +#define SVM_EXIT_INVLPGA 0x07a +#define SVM_EXIT_IOIO 0x07b +#define SVM_EXIT_MSR 0x07c +#define SVM_EXIT_TASK_SWITCH 0x07d +#define SVM_EXIT_FERR_FREEZE 0x07e +#define SVM_EXIT_SHUTDOWN 0x07f +#define SVM_EXIT_VMRUN 0x080 +#define SVM_EXIT_VMMCALL 0x081 +#define SVM_EXIT_VMLOAD 0x082 +#define SVM_EXIT_VMSAVE 0x083 +#define SVM_EXIT_STGI 0x084 +#define SVM_EXIT_CLGI 0x085 +#define SVM_EXIT_SKINIT 0x086 +#define SVM_EXIT_RDTSCP 0x087 +#define SVM_EXIT_ICEBP 0x088 +#define SVM_EXIT_WBINVD 0x089 +/* only included in documentation, maybe wrong */ +#define SVM_EXIT_MONITOR 0x08a +#define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_NPF 0x400 + +#define SVM_EXIT_ERR -1 + +#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */ + +struct __attribute__ ((__packed__)) vmcb_control_area { + uint16_t intercept_cr_read; + uint16_t intercept_cr_write; + uint16_t intercept_dr_read; + uint16_t intercept_dr_write; + uint32_t intercept_exceptions; + uint64_t intercept; + uint8_t reserved_1[44]; + uint64_t iopm_base_pa; + uint64_t msrpm_base_pa; + uint64_t tsc_offset; + uint32_t asid; + uint8_t tlb_ctl; + uint8_t reserved_2[3]; + uint32_t int_ctl; + uint32_t int_vector; + uint32_t int_state; + uint8_t reserved_3[4]; + uint64_t exit_code; + uint64_t exit_info_1; + uint64_t exit_info_2; + uint32_t exit_int_info; + uint32_t exit_int_info_err; + uint64_t nested_ctl; + uint8_t reserved_4[16]; + uint32_t event_inj; + uint32_t event_inj_err; + uint64_t nested_cr3; + uint64_t lbr_ctl; + uint8_t reserved_5[832]; +}; + +struct __attribute__ ((__packed__)) vmcb_seg { + uint16_t selector; + uint16_t attrib; + uint32_t limit; + uint64_t base; +}; + +struct __attribute__ ((__packed__)) vmcb_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + uint8_t reserved_1[43]; + uint8_t cpl; + uint8_t reserved_2[4]; + uint64_t efer; + uint8_t reserved_3[112]; + uint64_t cr4; + uint64_t cr3; + uint64_t cr0; + uint64_t dr7; + uint64_t dr6; + uint64_t rflags; + uint64_t rip; + uint8_t reserved_4[88]; + uint64_t rsp; + uint8_t reserved_5[24]; + uint64_t rax; + uint64_t star; + uint64_t lstar; + uint64_t cstar; + uint64_t sfmask; + uint64_t kernel_gs_base; + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + uint64_t cr2; + uint8_t reserved_6[32]; + uint64_t g_pat; + uint64_t dbgctl; + uint64_t br_from; + uint64_t br_to; + uint64_t last_excp_from; + uint64_t last_excp_to; +}; + +struct __attribute__ ((__packed__)) vmcb { + struct vmcb_control_area control; + struct vmcb_save_area save; +}; + +#endif diff --git a/src/recompiler/target-i386/translate.c b/src/recompiler/target-i386/translate.c new file mode 100644 index 00000000..1b82f3f5 --- /dev/null +++ b/src/recompiler/target-i386/translate.c @@ -0,0 +1,8385 @@ +/* + * i386 translation + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#ifndef VBOX +#include <inttypes.h> +#include <signal.h> +#endif /* !VBOX */ + +#include "cpu.h" +#include "exec-all.h" +#include "disas.h" +#include "tcg-op.h" + +#include "helper.h" +#define GEN_HELPER 1 +#include "helper.h" + +#define PREFIX_REPZ 0x01 +#define PREFIX_REPNZ 0x02 +#define PREFIX_LOCK 0x04 +#define PREFIX_DATA 0x08 +#define PREFIX_ADR 0x10 + +#ifdef TARGET_X86_64 +#define X86_64_ONLY(x) x +#define X86_64_DEF(...) __VA_ARGS__ +#define CODE64(s) ((s)->code64) +#define REX_X(s) ((s)->rex_x) +#define REX_B(s) ((s)->rex_b) +# ifdef VBOX +# define IS_LONG_MODE(s) ((s)->lma) +# endif +/* XXX: gcc generates push/pop in some opcodes, so we cannot use them */ +#if 1 +#define BUGGY_64(x) NULL +#endif +#else +#define X86_64_ONLY(x) NULL +#define X86_64_DEF(...) +#define CODE64(s) 0 +#define REX_X(s) 0 +#define REX_B(s) 0 +# ifdef VBOX +# define IS_LONG_MODE(s) 0 +# endif +#endif + +//#define MACRO_TEST 1 + +/* global register indexes */ +static TCGv_ptr cpu_env; +static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp; +static TCGv_i32 cpu_cc_op; +static TCGv cpu_regs[CPU_NB_REGS]; +/* local temps */ +static TCGv cpu_T[2], cpu_T3; +/* local register indexes (only used inside old micro ops) */ +static TCGv cpu_tmp0, cpu_tmp4; +static TCGv_ptr cpu_ptr0, cpu_ptr1; +static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32; +static TCGv_i64 cpu_tmp1_i64; +static TCGv cpu_tmp5; + +static uint8_t gen_opc_cc_op[OPC_BUF_SIZE]; + +#include "gen-icount.h" + +#ifdef TARGET_X86_64 +static int x86_64_hregs; +#endif + +#ifdef VBOX + +/* Special/override code readers to hide patched code. */ + +uint8_t ldub_code_raw(target_ulong pc) +{ + uint8_t b; + +# ifdef VBOX_WITH_RAW_MODE + if (!remR3GetOpcode(cpu_single_env, pc, &b)) +# endif + b = ldub_code(pc); + return b; +} +# define ldub_code(a) ldub_code_raw(a) + +uint16_t lduw_code_raw(target_ulong pc) +{ + uint16_t u16; + u16 = (uint16_t)ldub_code_raw(pc); + u16 |= (uint16_t)ldub_code_raw(pc + 1) << 8; + return u16; +} +# define lduw_code(a) lduw_code_raw(a) + + +uint32_t ldl_code_raw(target_ulong pc) +{ + uint32_t u32; + u32 = (uint32_t)ldub_code_raw(pc); + u32 |= (uint32_t)ldub_code_raw(pc + 1) << 8; + u32 |= (uint32_t)ldub_code_raw(pc + 2) << 16; + u32 |= (uint32_t)ldub_code_raw(pc + 3) << 24; + return u32; +} +# define ldl_code(a) ldl_code_raw(a) + +#endif /* VBOX */ + +typedef struct DisasContext { + /* current insn context */ + int override; /* -1 if no override */ + int prefix; + int aflag, dflag; + target_ulong pc; /* pc = eip + cs_base */ + int is_jmp; /* 1 = means jump (stop translation), 2 means CPU + static state change (stop translation) */ + /* current block context */ + target_ulong cs_base; /* base of CS segment */ + int pe; /* protected mode */ + int code32; /* 32 bit code segment */ +#ifdef TARGET_X86_64 + int lma; /* long mode active */ + int code64; /* 64 bit code segment */ + int rex_x, rex_b; +#endif + int ss32; /* 32 bit stack segment */ + int cc_op; /* current CC operation */ + int addseg; /* non zero if either DS/ES/SS have a non zero base */ + int f_st; /* currently unused */ + int vm86; /* vm86 mode */ +#ifdef VBOX + int vme; /* CR4.VME */ + int pvi; /* CR4.PVI */ + int record_call; /* record calls for CSAM or not? */ +#endif + int cpl; + int iopl; + int tf; /* TF cpu flag */ + int singlestep_enabled; /* "hardware" single step enabled */ + int jmp_opt; /* use direct block chaining for direct jumps */ + int mem_index; /* select memory access functions */ + uint64_t flags; /* all execution flags */ + struct TranslationBlock *tb; + int popl_esp_hack; /* for correct popl with esp base handling */ + int rip_offset; /* only used in x86_64, but left for simplicity */ + int cpuid_features; + int cpuid_ext_features; + int cpuid_ext2_features; + int cpuid_ext3_features; +} DisasContext; + +static void gen_eob(DisasContext *s); +static void gen_jmp(DisasContext *s, target_ulong eip); +static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num); + +#ifdef VBOX +static void gen_check_external_event(void); +#endif + +/* i386 arith/logic operations */ +enum { + OP_ADDL, + OP_ORL, + OP_ADCL, + OP_SBBL, + OP_ANDL, + OP_SUBL, + OP_XORL, + OP_CMPL, +}; + +/* i386 shift ops */ +enum { + OP_ROL, + OP_ROR, + OP_RCL, + OP_RCR, + OP_SHL, + OP_SHR, + OP_SHL1, /* undocumented */ + OP_SAR = 7, +}; + +enum { + JCC_O, + JCC_B, + JCC_Z, + JCC_BE, + JCC_S, + JCC_P, + JCC_L, + JCC_LE, +}; + +/* operand size */ +enum { + OT_BYTE = 0, + OT_WORD, + OT_LONG, + OT_QUAD, +}; + +enum { + /* I386 int registers */ + OR_EAX, /* MUST be even numbered */ + OR_ECX, + OR_EDX, + OR_EBX, + OR_ESP, + OR_EBP, + OR_ESI, + OR_EDI, + + OR_TMP0 = 16, /* temporary operand register */ + OR_TMP1, + OR_A0, /* temporary register used when doing address evaluation */ +}; + +static inline void gen_op_movl_T0_0(void) +{ + tcg_gen_movi_tl(cpu_T[0], 0); +} + +static inline void gen_op_movl_T0_im(int32_t val) +{ + tcg_gen_movi_tl(cpu_T[0], val); +} + +static inline void gen_op_movl_T0_imu(uint32_t val) +{ + tcg_gen_movi_tl(cpu_T[0], val); +} + +static inline void gen_op_movl_T1_im(int32_t val) +{ + tcg_gen_movi_tl(cpu_T[1], val); +} + +static inline void gen_op_movl_T1_imu(uint32_t val) +{ + tcg_gen_movi_tl(cpu_T[1], val); +} + +static inline void gen_op_movl_A0_im(uint32_t val) +{ + tcg_gen_movi_tl(cpu_A0, val); +} + +#ifdef TARGET_X86_64 +static inline void gen_op_movq_A0_im(int64_t val) +{ + tcg_gen_movi_tl(cpu_A0, val); +} +#endif + +static inline void gen_movtl_T0_im(target_ulong val) +{ + tcg_gen_movi_tl(cpu_T[0], val); +} + +static inline void gen_movtl_T1_im(target_ulong val) +{ + tcg_gen_movi_tl(cpu_T[1], val); +} + +static inline void gen_op_andl_T0_ffff(void) +{ + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff); +} + +static inline void gen_op_andl_T0_im(uint32_t val) +{ + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], val); +} + +static inline void gen_op_movl_T0_T1(void) +{ + tcg_gen_mov_tl(cpu_T[0], cpu_T[1]); +} + +static inline void gen_op_andl_A0_ffff(void) +{ + tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffff); +} + +#ifdef TARGET_X86_64 + +#define NB_OP_SIZES 4 + +#else /* !TARGET_X86_64 */ + +#define NB_OP_SIZES 3 + +#endif /* !TARGET_X86_64 */ + +#if defined(HOST_WORDS_BIGENDIAN) +#define REG_B_OFFSET (sizeof(target_ulong) - 1) +#define REG_H_OFFSET (sizeof(target_ulong) - 2) +#define REG_W_OFFSET (sizeof(target_ulong) - 2) +#define REG_L_OFFSET (sizeof(target_ulong) - 4) +#define REG_LH_OFFSET (sizeof(target_ulong) - 8) +#else +#define REG_B_OFFSET 0 +#define REG_H_OFFSET 1 +#define REG_W_OFFSET 0 +#define REG_L_OFFSET 0 +#define REG_LH_OFFSET 4 +#endif + +static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0) +{ + TCGv tmp; + + switch(ot) { + case OT_BYTE: + tmp = tcg_temp_new(); + tcg_gen_ext8u_tl(tmp, t0); + if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) { + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xff); + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp); + } else { + tcg_gen_shli_tl(tmp, tmp, 8); + tcg_gen_andi_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], ~0xff00); + tcg_gen_or_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], tmp); + } + tcg_temp_free(tmp); + break; + case OT_WORD: + tmp = tcg_temp_new(); + tcg_gen_ext16u_tl(tmp, t0); + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff); + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp); + tcg_temp_free(tmp); + break; + default: /* XXX this shouldn't be reached; abort? */ + case OT_LONG: + /* For x86_64, this sets the higher half of register to zero. + For i386, this is equivalent to a mov. */ + tcg_gen_ext32u_tl(cpu_regs[reg], t0); + break; +#ifdef TARGET_X86_64 + case OT_QUAD: + tcg_gen_mov_tl(cpu_regs[reg], t0); + break; +#endif + } +} + +static inline void gen_op_mov_reg_T0(int ot, int reg) +{ + gen_op_mov_reg_v(ot, reg, cpu_T[0]); +} + +static inline void gen_op_mov_reg_T1(int ot, int reg) +{ + gen_op_mov_reg_v(ot, reg, cpu_T[1]); +} + +static inline void gen_op_mov_reg_A0(int size, int reg) +{ + TCGv tmp; + + switch(size) { + case 0: + tmp = tcg_temp_new(); + tcg_gen_ext16u_tl(tmp, cpu_A0); + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff); + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp); + tcg_temp_free(tmp); + break; + default: /* XXX this shouldn't be reached; abort? */ + case 1: + /* For x86_64, this sets the higher half of register to zero. + For i386, this is equivalent to a mov. */ + tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0); + break; +#ifdef TARGET_X86_64 + case 2: + tcg_gen_mov_tl(cpu_regs[reg], cpu_A0); + break; +#endif + } +} + +static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg) +{ + switch(ot) { + case OT_BYTE: + if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) { + goto std_case; + } else { + tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8); + tcg_gen_ext8u_tl(t0, t0); + } + break; + default: + std_case: + tcg_gen_mov_tl(t0, cpu_regs[reg]); + break; + } +} + +static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg) +{ + gen_op_mov_v_reg(ot, cpu_T[t_index], reg); +} + +static inline void gen_op_movl_A0_reg(int reg) +{ + tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]); +} + +static inline void gen_op_addl_A0_im(int32_t val) +{ + tcg_gen_addi_tl(cpu_A0, cpu_A0, val); +#ifdef TARGET_X86_64 + tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff); +#endif +} + +#ifdef TARGET_X86_64 +static inline void gen_op_addq_A0_im(int64_t val) +{ + tcg_gen_addi_tl(cpu_A0, cpu_A0, val); +} +#endif + +static void gen_add_A0_im(DisasContext *s, int val) +{ +#ifdef TARGET_X86_64 + if (CODE64(s)) + gen_op_addq_A0_im(val); + else +#endif + gen_op_addl_A0_im(val); +} + +static inline void gen_op_addl_T0_T1(void) +{ + tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]); +} + +static inline void gen_op_jmp_T0(void) +{ + tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip)); +} + +static inline void gen_op_add_reg_im(int size, int reg, int32_t val) +{ + switch(size) { + case 0: + tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val); + tcg_gen_ext16u_tl(cpu_tmp0, cpu_tmp0); + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff); + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0); + break; + case 1: + tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val); + /* For x86_64, this sets the higher half of register to zero. + For i386, this is equivalent to a nop. */ + tcg_gen_ext32u_tl(cpu_tmp0, cpu_tmp0); + tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0); + break; +#ifdef TARGET_X86_64 + case 2: + tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val); + break; +#endif + } +} + +static inline void gen_op_add_reg_T0(int size, int reg) +{ + switch(size) { + case 0: + tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]); + tcg_gen_ext16u_tl(cpu_tmp0, cpu_tmp0); + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff); + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0); + break; + case 1: + tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]); + /* For x86_64, this sets the higher half of register to zero. + For i386, this is equivalent to a nop. */ + tcg_gen_ext32u_tl(cpu_tmp0, cpu_tmp0); + tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0); + break; +#ifdef TARGET_X86_64 + case 2: + tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]); + break; +#endif + } +} + +static inline void gen_op_set_cc_op(int32_t val) +{ + tcg_gen_movi_i32(cpu_cc_op, val); +} + +static inline void gen_op_addl_A0_reg_sN(int shift, int reg) +{ + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]); + if (shift != 0) + tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift); + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); + /* For x86_64, this sets the higher half of register to zero. + For i386, this is equivalent to a nop. */ + tcg_gen_ext32u_tl(cpu_A0, cpu_A0); +} + +#ifdef VBOX +DECLINLINE(void) gen_op_seg_check(int reg, bool keepA0) +{ + /* It seems segments doesn't get out of sync - if they do in fact - enable below code. */ +# ifdef FORCE_SEGMENT_SYNC +# if 1 + TCGv t0; + + /* Considering poor quality of TCG optimizer - better call directly */ + t0 = tcg_temp_local_new(TCG_TYPE_TL); + tcg_gen_movi_tl(t0, reg); + tcg_gen_helper_0_1(helper_sync_seg, t0); + tcg_temp_free(t0); +# else + /* Our segments could be outdated, thus check for newselector field to see if update really needed */ + int skip_label; + TCGv t0, a0; + + /* For other segments this check is waste of time, and also TCG is unable to cope with this code, + for data/stack segments, as expects alive cpu_T[0] */ + if (reg != R_GS) + return; + + if (keepA0) + { + /* we need to store old cpu_A0 */ + a0 = tcg_temp_local_new(TCG_TYPE_TL); + tcg_gen_mov_tl(a0, cpu_A0); + } + + skip_label = gen_new_label(); + t0 = tcg_temp_local_new(TCG_TYPE_TL); + + tcg_gen_ld32u_tl(t0, cpu_env, offsetof(CPUState, segs[reg].newselector) + REG_L_OFFSET); + tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, skip_label); + tcg_gen_ld32u_tl(t0, cpu_env, offsetof(CPUState, eflags) + REG_L_OFFSET); + tcg_gen_andi_tl(t0, t0, VM_MASK); + tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, skip_label); + tcg_gen_movi_tl(t0, reg); + + tcg_gen_helper_0_1(helper_sync_seg, t0); + + tcg_temp_free(t0); + + gen_set_label(skip_label); + if (keepA0) + { + tcg_gen_mov_tl(cpu_A0, a0); + tcg_temp_free(a0); + } +# endif /* 0 */ +# endif /* FORCE_SEGMENT_SYNC */ +} +#endif /* VBOX */ + +static inline void gen_op_movl_A0_seg(int reg) +{ +#ifdef VBOX + gen_op_seg_check(reg, false); +#endif + tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET); +} + +static inline void gen_op_addl_A0_seg(int reg) +{ +#ifdef VBOX + gen_op_seg_check(reg, true); +#endif + tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base)); + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); +#ifdef TARGET_X86_64 + tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff); +#endif +} + +#ifdef TARGET_X86_64 +static inline void gen_op_movq_A0_seg(int reg) +{ +#ifdef VBOX + gen_op_seg_check(reg, false); +#endif + tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base)); +} + +static inline void gen_op_addq_A0_seg(int reg) +{ +#ifdef VBOX + gen_op_seg_check(reg, true); +#endif + tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base)); + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); +} + +static inline void gen_op_movq_A0_reg(int reg) +{ + tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]); +} + +static inline void gen_op_addq_A0_reg_sN(int shift, int reg) +{ + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]); + if (shift != 0) + tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift); + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); +} +#endif + +static inline void gen_op_lds_T0_A0(int idx) +{ + int mem_index = (idx >> 2) - 1; + switch(idx & 3) { + case 0: + tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index); + break; + case 1: + tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index); + break; + default: + case 2: + tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index); + break; + } +} + +static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0) +{ + int mem_index = (idx >> 2) - 1; + switch(idx & 3) { + case 0: + tcg_gen_qemu_ld8u(t0, a0, mem_index); + break; + case 1: + tcg_gen_qemu_ld16u(t0, a0, mem_index); + break; + case 2: + tcg_gen_qemu_ld32u(t0, a0, mem_index); + break; + default: + case 3: + /* Should never happen on 32-bit targets. */ +#ifdef TARGET_X86_64 + tcg_gen_qemu_ld64(t0, a0, mem_index); +#endif + break; + } +} + +/* XXX: always use ldu or lds */ +static inline void gen_op_ld_T0_A0(int idx) +{ + gen_op_ld_v(idx, cpu_T[0], cpu_A0); +} + +static inline void gen_op_ldu_T0_A0(int idx) +{ + gen_op_ld_v(idx, cpu_T[0], cpu_A0); +} + +static inline void gen_op_ld_T1_A0(int idx) +{ + gen_op_ld_v(idx, cpu_T[1], cpu_A0); +} + +static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0) +{ + int mem_index = (idx >> 2) - 1; + switch(idx & 3) { + case 0: + tcg_gen_qemu_st8(t0, a0, mem_index); + break; + case 1: + tcg_gen_qemu_st16(t0, a0, mem_index); + break; + case 2: + tcg_gen_qemu_st32(t0, a0, mem_index); + break; + default: + case 3: + /* Should never happen on 32-bit targets. */ +#ifdef TARGET_X86_64 + tcg_gen_qemu_st64(t0, a0, mem_index); +#endif + break; + } +} + +static inline void gen_op_st_T0_A0(int idx) +{ + gen_op_st_v(idx, cpu_T[0], cpu_A0); +} + +static inline void gen_op_st_T1_A0(int idx) +{ + gen_op_st_v(idx, cpu_T[1], cpu_A0); +} + +#ifdef VBOX + +static void gen_check_external_event(void) +{ +# if 1 + /** @todo once TCG codegen improves, we may want to use version + from else version */ + gen_helper_check_external_event(); +# else + int skip_label; + TCGv t0; + + skip_label = gen_new_label(); + t0 = tcg_temp_local_new(TCG_TYPE_TL); + /* t0 = cpu_tmp0; */ + + tcg_gen_ld32u_tl(t0, cpu_env, offsetof(CPUState, interrupt_request)); + /* Keep in sync with helper_check_external_event() */ + tcg_gen_andi_tl(t0, t0, + CPU_INTERRUPT_EXTERNAL_EXIT + | CPU_INTERRUPT_EXTERNAL_TIMER + | CPU_INTERRUPT_EXTERNAL_DMA + | CPU_INTERRUPT_EXTERNAL_HARD); + /** @todo predict branch as taken */ + tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, skip_label); + tcg_temp_free(t0); + + gen_helper_check_external_event(); + + gen_set_label(skip_label); +# endif +} + +#endif /* VBOX */ + +static inline void gen_jmp_im(target_ulong pc) +{ + tcg_gen_movi_tl(cpu_tmp0, pc); + tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, eip)); +} + +#ifdef VBOX +DECLINLINE(void) gen_update_eip(target_ulong pc) +{ + gen_jmp_im(pc); +# ifdef VBOX_DUMP_STATE + gen_helper_dump_state(); +# endif +} +#endif /* VBOX */ + +static inline void gen_string_movl_A0_ESI(DisasContext *s) +{ + int override; + + override = s->override; +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + if (override >= 0) { + gen_op_movq_A0_seg(override); + gen_op_addq_A0_reg_sN(0, R_ESI); + } else { + gen_op_movq_A0_reg(R_ESI); + } + } else +#endif + if (s->aflag) { + /* 32 bit address */ + if (s->addseg && override < 0) + override = R_DS; + if (override >= 0) { + gen_op_movl_A0_seg(override); + gen_op_addl_A0_reg_sN(0, R_ESI); + } else { + gen_op_movl_A0_reg(R_ESI); + } + } else { + /* 16 address, always override */ + if (override < 0) + override = R_DS; + gen_op_movl_A0_reg(R_ESI); + gen_op_andl_A0_ffff(); + gen_op_addl_A0_seg(override); + } +} + +static inline void gen_string_movl_A0_EDI(DisasContext *s) +{ +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_movq_A0_reg(R_EDI); + } else +#endif + if (s->aflag) { + if (s->addseg) { + gen_op_movl_A0_seg(R_ES); + gen_op_addl_A0_reg_sN(0, R_EDI); + } else { + gen_op_movl_A0_reg(R_EDI); + } + } else { + gen_op_movl_A0_reg(R_EDI); + gen_op_andl_A0_ffff(); + gen_op_addl_A0_seg(R_ES); + } +} + +static inline void gen_op_movl_T0_Dshift(int ot) +{ + tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUState, df)); + tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot); +}; + +static void gen_extu(int ot, TCGv reg) +{ + switch(ot) { + case OT_BYTE: + tcg_gen_ext8u_tl(reg, reg); + break; + case OT_WORD: + tcg_gen_ext16u_tl(reg, reg); + break; + case OT_LONG: + tcg_gen_ext32u_tl(reg, reg); + break; + default: + break; + } +} + +static void gen_exts(int ot, TCGv reg) +{ + switch(ot) { + case OT_BYTE: + tcg_gen_ext8s_tl(reg, reg); + break; + case OT_WORD: + tcg_gen_ext16s_tl(reg, reg); + break; + case OT_LONG: + tcg_gen_ext32s_tl(reg, reg); + break; + default: + break; + } +} + +static inline void gen_op_jnz_ecx(int size, int label1) +{ + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]); + gen_extu(size + 1, cpu_tmp0); + tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1); +} + +static inline void gen_op_jz_ecx(int size, int label1) +{ + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]); + gen_extu(size + 1, cpu_tmp0); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1); +} + +static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n) +{ + switch (ot) { + case 0: gen_helper_inb(v, n); break; + case 1: gen_helper_inw(v, n); break; + case 2: gen_helper_inl(v, n); break; + } + +} + +static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n) +{ + switch (ot) { + case 0: gen_helper_outb(v, n); break; + case 1: gen_helper_outw(v, n); break; + case 2: gen_helper_outl(v, n); break; + } + +} + +static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip, + uint32_t svm_flags) +{ + int state_saved; + target_ulong next_eip; + + state_saved = 0; + if (s->pe && (s->cpl > s->iopl || s->vm86)) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(cur_eip); + state_saved = 1; + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + switch (ot) { + case 0: gen_helper_check_iob(cpu_tmp2_i32); break; + case 1: gen_helper_check_iow(cpu_tmp2_i32); break; + case 2: gen_helper_check_iol(cpu_tmp2_i32); break; + } + } + if(s->flags & HF_SVMI_MASK) { + if (!state_saved) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(cur_eip); + } + svm_flags |= (1 << (4 + ot)); + next_eip = s->pc - s->cs_base; + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_svm_check_io(cpu_tmp2_i32, tcg_const_i32(svm_flags), + tcg_const_i32(next_eip - cur_eip)); + } +} + +static inline void gen_movs(DisasContext *s, int ot) +{ + gen_string_movl_A0_ESI(s); + gen_op_ld_T0_A0(ot + s->mem_index); + gen_string_movl_A0_EDI(s); + gen_op_st_T0_A0(ot + s->mem_index); + gen_op_movl_T0_Dshift(ot); + gen_op_add_reg_T0(s->aflag, R_ESI); + gen_op_add_reg_T0(s->aflag, R_EDI); +} + +static inline void gen_update_cc_op(DisasContext *s) +{ + if (s->cc_op != CC_OP_DYNAMIC) { + gen_op_set_cc_op(s->cc_op); + s->cc_op = CC_OP_DYNAMIC; + } +} + +static void gen_op_update1_cc(void) +{ + tcg_gen_discard_tl(cpu_cc_src); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); +} + +static void gen_op_update2_cc(void) +{ + tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); +} + +static inline void gen_op_cmpl_T0_T1_cc(void) +{ + tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); + tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]); +} + +static inline void gen_op_testl_T0_T1_cc(void) +{ + tcg_gen_discard_tl(cpu_cc_src); + tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]); +} + +static void gen_op_update_neg_cc(void) +{ + tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); +} + +/* compute eflags.C to reg */ +static void gen_compute_eflags_c(TCGv reg) +{ + gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_cc_op); + tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); +} + +/* compute all eflags to cc_src */ +static void gen_compute_eflags(TCGv reg) +{ + gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_cc_op); + tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); +} + +static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) +{ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + switch(jcc_op) { + case JCC_O: + gen_compute_eflags(cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_B: + gen_compute_eflags_c(cpu_T[0]); + break; + case JCC_Z: + gen_compute_eflags(cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_BE: + gen_compute_eflags(cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6); + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_S: + gen_compute_eflags(cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_P: + gen_compute_eflags(cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_L: + gen_compute_eflags(cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */ + tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */ + tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + default: + case JCC_LE: + gen_compute_eflags(cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */ + tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */ + tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */ + tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4); + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + } +} + +/* return true if setcc_slow is not needed (WARNING: must be kept in + sync with gen_jcc1) */ +static int is_fast_jcc_case(DisasContext *s, int b) +{ + int jcc_op; + jcc_op = (b >> 1) & 7; + switch(s->cc_op) { + /* we optimize the cmp/jcc case */ + case CC_OP_SUBB: + case CC_OP_SUBW: + case CC_OP_SUBL: + case CC_OP_SUBQ: + if (jcc_op == JCC_O || jcc_op == JCC_P) + goto slow_jcc; + break; + + /* some jumps are easy to compute */ + case CC_OP_ADDB: + case CC_OP_ADDW: + case CC_OP_ADDL: + case CC_OP_ADDQ: + + case CC_OP_LOGICB: + case CC_OP_LOGICW: + case CC_OP_LOGICL: + case CC_OP_LOGICQ: + + case CC_OP_INCB: + case CC_OP_INCW: + case CC_OP_INCL: + case CC_OP_INCQ: + + case CC_OP_DECB: + case CC_OP_DECW: + case CC_OP_DECL: + case CC_OP_DECQ: + + case CC_OP_SHLB: + case CC_OP_SHLW: + case CC_OP_SHLL: + case CC_OP_SHLQ: + if (jcc_op != JCC_Z && jcc_op != JCC_S) + goto slow_jcc; + break; + default: + slow_jcc: + return 0; + } + return 1; +} + +/* generate a conditional jump to label 'l1' according to jump opcode + value 'b'. In the fast case, T0 is guaranted not to be used. */ +static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) +{ + int inv, jcc_op, size, cond; + TCGv t0; + + inv = b & 1; + jcc_op = (b >> 1) & 7; + + switch(cc_op) { + /* we optimize the cmp/jcc case */ + case CC_OP_SUBB: + case CC_OP_SUBW: + case CC_OP_SUBL: + case CC_OP_SUBQ: + + size = cc_op - CC_OP_SUBB; + switch(jcc_op) { + case JCC_Z: + fast_jcc_z: + switch(size) { + case 0: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff); + t0 = cpu_tmp0; + break; + case 1: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff); + t0 = cpu_tmp0; + break; +#ifdef TARGET_X86_64 + case 2: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff); + t0 = cpu_tmp0; + break; +#endif + default: + t0 = cpu_cc_dst; + break; + } + tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1); + break; + case JCC_S: + fast_jcc_s: + switch(size) { + case 0: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80); + tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, + 0, l1); + break; + case 1: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000); + tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, + 0, l1); + break; +#ifdef TARGET_X86_64 + case 2: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000); + tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, + 0, l1); + break; +#endif + default: + tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, + 0, l1); + break; + } + break; + + case JCC_B: + cond = inv ? TCG_COND_GEU : TCG_COND_LTU; + goto fast_jcc_b; + case JCC_BE: + cond = inv ? TCG_COND_GTU : TCG_COND_LEU; + fast_jcc_b: + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); + switch(size) { + case 0: + t0 = cpu_tmp0; + tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff); + tcg_gen_andi_tl(t0, cpu_cc_src, 0xff); + break; + case 1: + t0 = cpu_tmp0; + tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff); + tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff); + break; +#ifdef TARGET_X86_64 + case 2: + t0 = cpu_tmp0; + tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff); + tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff); + break; +#endif + default: + t0 = cpu_cc_src; + break; + } + tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1); + break; + + case JCC_L: + cond = inv ? TCG_COND_GE : TCG_COND_LT; + goto fast_jcc_l; + case JCC_LE: + cond = inv ? TCG_COND_GT : TCG_COND_LE; + fast_jcc_l: + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); + switch(size) { + case 0: + t0 = cpu_tmp0; + tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4); + tcg_gen_ext8s_tl(t0, cpu_cc_src); + break; + case 1: + t0 = cpu_tmp0; + tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4); + tcg_gen_ext16s_tl(t0, cpu_cc_src); + break; +#ifdef TARGET_X86_64 + case 2: + t0 = cpu_tmp0; + tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4); + tcg_gen_ext32s_tl(t0, cpu_cc_src); + break; +#endif + default: + t0 = cpu_cc_src; + break; + } + tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1); + break; + + default: + goto slow_jcc; + } + break; + + /* some jumps are easy to compute */ + case CC_OP_ADDB: + case CC_OP_ADDW: + case CC_OP_ADDL: + case CC_OP_ADDQ: + + case CC_OP_ADCB: + case CC_OP_ADCW: + case CC_OP_ADCL: + case CC_OP_ADCQ: + + case CC_OP_SBBB: + case CC_OP_SBBW: + case CC_OP_SBBL: + case CC_OP_SBBQ: + + case CC_OP_LOGICB: + case CC_OP_LOGICW: + case CC_OP_LOGICL: + case CC_OP_LOGICQ: + + case CC_OP_INCB: + case CC_OP_INCW: + case CC_OP_INCL: + case CC_OP_INCQ: + + case CC_OP_DECB: + case CC_OP_DECW: + case CC_OP_DECL: + case CC_OP_DECQ: + + case CC_OP_SHLB: + case CC_OP_SHLW: + case CC_OP_SHLL: + case CC_OP_SHLQ: + + case CC_OP_SARB: + case CC_OP_SARW: + case CC_OP_SARL: + case CC_OP_SARQ: + switch(jcc_op) { + case JCC_Z: + size = (cc_op - CC_OP_ADDB) & 3; + goto fast_jcc_z; + case JCC_S: + size = (cc_op - CC_OP_ADDB) & 3; + goto fast_jcc_s; + default: + goto slow_jcc; + } + break; + default: + slow_jcc: + gen_setcc_slow_T0(s, jcc_op); + tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, + cpu_T[0], 0, l1); + break; + } +} + +/* XXX: does not work with gdbstub "ice" single step - not a + serious problem */ +static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip) +{ + int l1, l2; + + l1 = gen_new_label(); + l2 = gen_new_label(); + gen_op_jnz_ecx(s->aflag, l1); + gen_set_label(l2); + gen_jmp_tb(s, next_eip, 1); + gen_set_label(l1); + return l2; +} + +static inline void gen_stos(DisasContext *s, int ot) +{ + gen_op_mov_TN_reg(OT_LONG, 0, R_EAX); + gen_string_movl_A0_EDI(s); + gen_op_st_T0_A0(ot + s->mem_index); + gen_op_movl_T0_Dshift(ot); + gen_op_add_reg_T0(s->aflag, R_EDI); +} + +static inline void gen_lods(DisasContext *s, int ot) +{ + gen_string_movl_A0_ESI(s); + gen_op_ld_T0_A0(ot + s->mem_index); + gen_op_mov_reg_T0(ot, R_EAX); + gen_op_movl_T0_Dshift(ot); + gen_op_add_reg_T0(s->aflag, R_ESI); +} + +static inline void gen_scas(DisasContext *s, int ot) +{ + gen_op_mov_TN_reg(OT_LONG, 0, R_EAX); + gen_string_movl_A0_EDI(s); + gen_op_ld_T1_A0(ot + s->mem_index); + gen_op_cmpl_T0_T1_cc(); + gen_op_movl_T0_Dshift(ot); + gen_op_add_reg_T0(s->aflag, R_EDI); +} + +static inline void gen_cmps(DisasContext *s, int ot) +{ + gen_string_movl_A0_ESI(s); + gen_op_ld_T0_A0(ot + s->mem_index); + gen_string_movl_A0_EDI(s); + gen_op_ld_T1_A0(ot + s->mem_index); + gen_op_cmpl_T0_T1_cc(); + gen_op_movl_T0_Dshift(ot); + gen_op_add_reg_T0(s->aflag, R_ESI); + gen_op_add_reg_T0(s->aflag, R_EDI); +} + +static inline void gen_ins(DisasContext *s, int ot) +{ + if (use_icount) + gen_io_start(); + gen_string_movl_A0_EDI(s); + /* Note: we must do this dummy write first to be restartable in + case of page fault. */ + gen_op_movl_T0_0(); + gen_op_st_T0_A0(ot + s->mem_index); + gen_op_mov_TN_reg(OT_WORD, 1, R_EDX); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]); + tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff); + gen_helper_in_func(ot, cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(ot + s->mem_index); + gen_op_movl_T0_Dshift(ot); + gen_op_add_reg_T0(s->aflag, R_EDI); + if (use_icount) + gen_io_end(); +} + +static inline void gen_outs(DisasContext *s, int ot) +{ + if (use_icount) + gen_io_start(); + gen_string_movl_A0_ESI(s); + gen_op_ld_T0_A0(ot + s->mem_index); + + gen_op_mov_TN_reg(OT_WORD, 1, R_EDX); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]); + tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff); + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]); + gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32); + + gen_op_movl_T0_Dshift(ot); + gen_op_add_reg_T0(s->aflag, R_ESI); + if (use_icount) + gen_io_end(); +} + +/* same method as Valgrind : we generate jumps to current or next + instruction */ +#define GEN_REPZ(op) \ +static inline void gen_repz_ ## op(DisasContext *s, int ot, \ + target_ulong cur_eip, target_ulong next_eip) \ +{ \ + int l2;\ + gen_update_cc_op(s); \ + l2 = gen_jz_ecx_string(s, next_eip); \ + gen_ ## op(s, ot); \ + gen_op_add_reg_im(s->aflag, R_ECX, -1); \ + /* a loop would cause two single step exceptions if ECX = 1 \ + before rep string_insn */ \ + if (!s->jmp_opt) \ + gen_op_jz_ecx(s->aflag, l2); \ + gen_jmp(s, cur_eip); \ +} + +#define GEN_REPZ2(op) \ +static inline void gen_repz_ ## op(DisasContext *s, int ot, \ + target_ulong cur_eip, \ + target_ulong next_eip, \ + int nz) \ +{ \ + int l2;\ + gen_update_cc_op(s); \ + l2 = gen_jz_ecx_string(s, next_eip); \ + gen_ ## op(s, ot); \ + gen_op_add_reg_im(s->aflag, R_ECX, -1); \ + gen_op_set_cc_op(CC_OP_SUBB + ot); \ + gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2); \ + if (!s->jmp_opt) \ + gen_op_jz_ecx(s->aflag, l2); \ + gen_jmp(s, cur_eip); \ +} + +GEN_REPZ(movs) +GEN_REPZ(stos) +GEN_REPZ(lods) +GEN_REPZ(ins) +GEN_REPZ(outs) +GEN_REPZ2(scas) +GEN_REPZ2(cmps) + +static void gen_helper_fp_arith_ST0_FT0(int op) +{ + switch (op) { + case 0: gen_helper_fadd_ST0_FT0(); break; + case 1: gen_helper_fmul_ST0_FT0(); break; + case 2: gen_helper_fcom_ST0_FT0(); break; + case 3: gen_helper_fcom_ST0_FT0(); break; + case 4: gen_helper_fsub_ST0_FT0(); break; + case 5: gen_helper_fsubr_ST0_FT0(); break; + case 6: gen_helper_fdiv_ST0_FT0(); break; + case 7: gen_helper_fdivr_ST0_FT0(); break; + } +} + +/* NOTE the exception in "r" op ordering */ +static void gen_helper_fp_arith_STN_ST0(int op, int opreg) +{ + TCGv_i32 tmp = tcg_const_i32(opreg); + switch (op) { + case 0: gen_helper_fadd_STN_ST0(tmp); break; + case 1: gen_helper_fmul_STN_ST0(tmp); break; + case 4: gen_helper_fsubr_STN_ST0(tmp); break; + case 5: gen_helper_fsub_STN_ST0(tmp); break; + case 6: gen_helper_fdivr_STN_ST0(tmp); break; + case 7: gen_helper_fdiv_STN_ST0(tmp); break; + } +} + +/* if d == OR_TMP0, it means memory operand (address in A0) */ +static void gen_op(DisasContext *s1, int op, int ot, int d) +{ + if (d != OR_TMP0) { + gen_op_mov_TN_reg(ot, 0, d); + } else { + gen_op_ld_T0_A0(ot + s1->mem_index); + } + switch(op) { + case OP_ADCL: + if (s1->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s1->cc_op); + gen_compute_eflags_c(cpu_tmp4); + tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4); + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4); + tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2); + tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot); + s1->cc_op = CC_OP_DYNAMIC; + break; + case OP_SBBL: + if (s1->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s1->cc_op); + gen_compute_eflags_c(cpu_tmp4); + tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4); + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4); + tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2); + tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot); + s1->cc_op = CC_OP_DYNAMIC; + break; + case OP_ADDL: + gen_op_addl_T0_T1(); + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + gen_op_update2_cc(); + s1->cc_op = CC_OP_ADDB + ot; + break; + case OP_SUBL: + tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + gen_op_update2_cc(); + s1->cc_op = CC_OP_SUBB + ot; + break; + default: + case OP_ANDL: + tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + gen_op_update1_cc(); + s1->cc_op = CC_OP_LOGICB + ot; + break; + case OP_ORL: + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + gen_op_update1_cc(); + s1->cc_op = CC_OP_LOGICB + ot; + break; + case OP_XORL: + tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + gen_op_update1_cc(); + s1->cc_op = CC_OP_LOGICB + ot; + break; + case OP_CMPL: + gen_op_cmpl_T0_T1_cc(); + s1->cc_op = CC_OP_SUBB + ot; + break; + } +} + +/* if d == OR_TMP0, it means memory operand (address in A0) */ +static void gen_inc(DisasContext *s1, int ot, int d, int c) +{ + if (d != OR_TMP0) + gen_op_mov_TN_reg(ot, 0, d); + else + gen_op_ld_T0_A0(ot + s1->mem_index); + if (s1->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s1->cc_op); + if (c > 0) { + tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1); + s1->cc_op = CC_OP_INCB + ot; + } else { + tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1); + s1->cc_op = CC_OP_DECB + ot; + } + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + gen_compute_eflags_c(cpu_cc_src); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); +} + +static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, + int is_right, int is_arith) +{ + target_ulong mask; + int shift_label; + TCGv t0, t1; + + if (ot == OT_QUAD) + mask = 0x3f; + else + mask = 0x1f; + + /* load */ + if (op1 == OR_TMP0) + gen_op_ld_T0_A0(ot + s->mem_index); + else + gen_op_mov_TN_reg(ot, 0, op1); + + tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask); + + tcg_gen_addi_tl(cpu_tmp5, cpu_T[1], -1); + + if (is_right) { + if (is_arith) { + gen_exts(ot, cpu_T[0]); + tcg_gen_sar_tl(cpu_T3, cpu_T[0], cpu_tmp5); + tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } else { + gen_extu(ot, cpu_T[0]); + tcg_gen_shr_tl(cpu_T3, cpu_T[0], cpu_tmp5); + tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } + } else { + tcg_gen_shl_tl(cpu_T3, cpu_T[0], cpu_tmp5); + tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } + + /* store */ + if (op1 == OR_TMP0) + gen_op_st_T0_A0(ot + s->mem_index); + else + gen_op_mov_reg_T0(ot, op1); + + /* update eflags if non zero shift */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + + /* XXX: inefficient */ + t0 = tcg_temp_local_new(); + t1 = tcg_temp_local_new(); + + tcg_gen_mov_tl(t0, cpu_T[0]); + tcg_gen_mov_tl(t1, cpu_T3); + + shift_label = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, shift_label); + + tcg_gen_mov_tl(cpu_cc_src, t1); + tcg_gen_mov_tl(cpu_cc_dst, t0); + if (is_right) + tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot); + else + tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot); + + gen_set_label(shift_label); + s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ + + tcg_temp_free(t0); + tcg_temp_free(t1); +} + +static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2, + int is_right, int is_arith) +{ + int mask; + + if (ot == OT_QUAD) + mask = 0x3f; + else + mask = 0x1f; + + /* load */ + if (op1 == OR_TMP0) + gen_op_ld_T0_A0(ot + s->mem_index); + else + gen_op_mov_TN_reg(ot, 0, op1); + + op2 &= mask; + if (op2 != 0) { + if (is_right) { + if (is_arith) { + gen_exts(ot, cpu_T[0]); + tcg_gen_sari_tl(cpu_tmp4, cpu_T[0], op2 - 1); + tcg_gen_sari_tl(cpu_T[0], cpu_T[0], op2); + } else { + gen_extu(ot, cpu_T[0]); + tcg_gen_shri_tl(cpu_tmp4, cpu_T[0], op2 - 1); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], op2); + } + } else { + tcg_gen_shli_tl(cpu_tmp4, cpu_T[0], op2 - 1); + tcg_gen_shli_tl(cpu_T[0], cpu_T[0], op2); + } + } + + /* store */ + if (op1 == OR_TMP0) + gen_op_st_T0_A0(ot + s->mem_index); + else + gen_op_mov_reg_T0(ot, op1); + + /* update eflags if non zero shift */ + if (op2 != 0) { + tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + if (is_right) + s->cc_op = CC_OP_SARB + ot; + else + s->cc_op = CC_OP_SHLB + ot; + } +} + +static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2) +{ + if (arg2 >= 0) + tcg_gen_shli_tl(ret, arg1, arg2); + else + tcg_gen_shri_tl(ret, arg1, -arg2); +} + +static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, + int is_right) +{ + target_ulong mask; + int label1, label2, data_bits; + TCGv t0, t1, t2, a0; + + /* XXX: inefficient, but we must use local temps */ + t0 = tcg_temp_local_new(); + t1 = tcg_temp_local_new(); + t2 = tcg_temp_local_new(); + a0 = tcg_temp_local_new(); + + if (ot == OT_QUAD) + mask = 0x3f; + else + mask = 0x1f; + + /* load */ + if (op1 == OR_TMP0) { + tcg_gen_mov_tl(a0, cpu_A0); + gen_op_ld_v(ot + s->mem_index, t0, a0); + } else { + gen_op_mov_v_reg(ot, t0, op1); + } + + tcg_gen_mov_tl(t1, cpu_T[1]); + + tcg_gen_andi_tl(t1, t1, mask); + + /* Must test zero case to avoid using undefined behaviour in TCG + shifts. */ + label1 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1); + + if (ot <= OT_WORD) + tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1); + else + tcg_gen_mov_tl(cpu_tmp0, t1); + + gen_extu(ot, t0); + tcg_gen_mov_tl(t2, t0); + + data_bits = 8 << ot; + /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX: + fix TCG definition) */ + if (is_right) { + tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0); + tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0); + tcg_gen_shl_tl(t0, t0, cpu_tmp0); + } else { + tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0); + tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0); + tcg_gen_shr_tl(t0, t0, cpu_tmp0); + } + tcg_gen_or_tl(t0, t0, cpu_tmp4); + + gen_set_label(label1); + /* store */ + if (op1 == OR_TMP0) { + gen_op_st_v(ot + s->mem_index, t0, a0); + } else { + gen_op_mov_reg_v(ot, op1, t0); + } + + /* update eflags */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + + label2 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2); + + gen_compute_eflags(cpu_cc_src); + tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C)); + tcg_gen_xor_tl(cpu_tmp0, t2, t0); + tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1)); + tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O); + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0); + if (is_right) { + tcg_gen_shri_tl(t0, t0, data_bits - 1); + } + tcg_gen_andi_tl(t0, t0, CC_C); + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0); + + tcg_gen_discard_tl(cpu_cc_dst); + tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS); + + gen_set_label(label2); + s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); + tcg_temp_free(a0); +} + +static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2, + int is_right) +{ + int mask; + int data_bits; + TCGv t0, t1, a0; + + /* XXX: inefficient, but we must use local temps */ + t0 = tcg_temp_local_new(); + t1 = tcg_temp_local_new(); + a0 = tcg_temp_local_new(); + + if (ot == OT_QUAD) + mask = 0x3f; + else + mask = 0x1f; + + /* load */ + if (op1 == OR_TMP0) { + tcg_gen_mov_tl(a0, cpu_A0); + gen_op_ld_v(ot + s->mem_index, t0, a0); + } else { + gen_op_mov_v_reg(ot, t0, op1); + } + + gen_extu(ot, t0); + tcg_gen_mov_tl(t1, t0); + + op2 &= mask; + data_bits = 8 << ot; + if (op2 != 0) { + int shift = op2 & ((1 << (3 + ot)) - 1); + if (is_right) { + tcg_gen_shri_tl(cpu_tmp4, t0, shift); + tcg_gen_shli_tl(t0, t0, data_bits - shift); + } + else { + tcg_gen_shli_tl(cpu_tmp4, t0, shift); + tcg_gen_shri_tl(t0, t0, data_bits - shift); + } + tcg_gen_or_tl(t0, t0, cpu_tmp4); + } + + /* store */ + if (op1 == OR_TMP0) { + gen_op_st_v(ot + s->mem_index, t0, a0); + } else { + gen_op_mov_reg_v(ot, op1, t0); + } + + if (op2 != 0) { + /* update eflags */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + + gen_compute_eflags(cpu_cc_src); + tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C)); + tcg_gen_xor_tl(cpu_tmp0, t1, t0); + tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1)); + tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O); + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0); + if (is_right) { + tcg_gen_shri_tl(t0, t0, data_bits - 1); + } + tcg_gen_andi_tl(t0, t0, CC_C); + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0); + + tcg_gen_discard_tl(cpu_cc_dst); + tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS); + s->cc_op = CC_OP_EFLAGS; + } + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(a0); +} + +/* XXX: add faster immediate = 1 case */ +static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, + int is_right) +{ + int label1; + + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + + /* load */ + if (op1 == OR_TMP0) + gen_op_ld_T0_A0(ot + s->mem_index); + else + gen_op_mov_TN_reg(ot, 0, op1); + + if (is_right) { + switch (ot) { + case 0: gen_helper_rcrb(cpu_T[0], cpu_T[0], cpu_T[1]); break; + case 1: gen_helper_rcrw(cpu_T[0], cpu_T[0], cpu_T[1]); break; + case 2: gen_helper_rcrl(cpu_T[0], cpu_T[0], cpu_T[1]); break; +#ifdef TARGET_X86_64 + case 3: gen_helper_rcrq(cpu_T[0], cpu_T[0], cpu_T[1]); break; +#endif + } + } else { + switch (ot) { + case 0: gen_helper_rclb(cpu_T[0], cpu_T[0], cpu_T[1]); break; + case 1: gen_helper_rclw(cpu_T[0], cpu_T[0], cpu_T[1]); break; + case 2: gen_helper_rcll(cpu_T[0], cpu_T[0], cpu_T[1]); break; +#ifdef TARGET_X86_64 + case 3: gen_helper_rclq(cpu_T[0], cpu_T[0], cpu_T[1]); break; +#endif + } + } + /* store */ + if (op1 == OR_TMP0) + gen_op_st_T0_A0(ot + s->mem_index); + else + gen_op_mov_reg_T0(ot, op1); + + /* update eflags */ + label1 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1); + + tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp); + tcg_gen_discard_tl(cpu_cc_dst); + tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS); + + gen_set_label(label1); + s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ +} + +/* XXX: add faster immediate case */ +static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, + int is_right) +{ + int label1, label2, data_bits; + target_ulong mask; + TCGv t0, t1, t2, a0; + + t0 = tcg_temp_local_new(); + t1 = tcg_temp_local_new(); + t2 = tcg_temp_local_new(); + a0 = tcg_temp_local_new(); + + if (ot == OT_QUAD) + mask = 0x3f; + else + mask = 0x1f; + + /* load */ + if (op1 == OR_TMP0) { + tcg_gen_mov_tl(a0, cpu_A0); + gen_op_ld_v(ot + s->mem_index, t0, a0); + } else { + gen_op_mov_v_reg(ot, t0, op1); + } + + tcg_gen_andi_tl(cpu_T3, cpu_T3, mask); + + tcg_gen_mov_tl(t1, cpu_T[1]); + tcg_gen_mov_tl(t2, cpu_T3); + + /* Must test zero case to avoid using undefined behaviour in TCG + shifts. */ + label1 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1); + + tcg_gen_addi_tl(cpu_tmp5, t2, -1); + if (ot == OT_WORD) { + /* Note: we implement the Intel behaviour for shift count > 16 */ + if (is_right) { + tcg_gen_andi_tl(t0, t0, 0xffff); + tcg_gen_shli_tl(cpu_tmp0, t1, 16); + tcg_gen_or_tl(t0, t0, cpu_tmp0); + tcg_gen_ext32u_tl(t0, t0); + + tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5); + + /* only needed if count > 16, but a test would complicate */ + tcg_gen_subfi_tl(cpu_tmp5, 32, t2); + tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5); + + tcg_gen_shr_tl(t0, t0, t2); + + tcg_gen_or_tl(t0, t0, cpu_tmp0); + } else { + /* XXX: not optimal */ + tcg_gen_andi_tl(t0, t0, 0xffff); + tcg_gen_shli_tl(t1, t1, 16); + tcg_gen_or_tl(t1, t1, t0); + tcg_gen_ext32u_tl(t1, t1); + + tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5); + tcg_gen_subfi_tl(cpu_tmp0, 32, cpu_tmp5); + tcg_gen_shr_tl(cpu_tmp5, t1, cpu_tmp0); + tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp5); + + tcg_gen_shl_tl(t0, t0, t2); + tcg_gen_subfi_tl(cpu_tmp5, 32, t2); + tcg_gen_shr_tl(t1, t1, cpu_tmp5); + tcg_gen_or_tl(t0, t0, t1); + } + } else { + data_bits = 8 << ot; + if (is_right) { + if (ot == OT_LONG) + tcg_gen_ext32u_tl(t0, t0); + + tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5); + + tcg_gen_shr_tl(t0, t0, t2); + tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2); + tcg_gen_shl_tl(t1, t1, cpu_tmp5); + tcg_gen_or_tl(t0, t0, t1); + + } else { + if (ot == OT_LONG) + tcg_gen_ext32u_tl(t1, t1); + + tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5); + + tcg_gen_shl_tl(t0, t0, t2); + tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2); + tcg_gen_shr_tl(t1, t1, cpu_tmp5); + tcg_gen_or_tl(t0, t0, t1); + } + } + tcg_gen_mov_tl(t1, cpu_tmp4); + + gen_set_label(label1); + /* store */ + if (op1 == OR_TMP0) { + gen_op_st_v(ot + s->mem_index, t0, a0); + } else { + gen_op_mov_reg_v(ot, op1, t0); + } + + /* update eflags */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + + label2 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2); + + tcg_gen_mov_tl(cpu_cc_src, t1); + tcg_gen_mov_tl(cpu_cc_dst, t0); + if (is_right) { + tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot); + } else { + tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot); + } + gen_set_label(label2); + s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); + tcg_temp_free(a0); +} + +static void gen_shift(DisasContext *s1, int op, int ot, int d, int s) +{ + if (s != OR_TMP1) + gen_op_mov_TN_reg(ot, 1, s); + switch(op) { + case OP_ROL: + gen_rot_rm_T1(s1, ot, d, 0); + break; + case OP_ROR: + gen_rot_rm_T1(s1, ot, d, 1); + break; + case OP_SHL: + case OP_SHL1: + gen_shift_rm_T1(s1, ot, d, 0, 0); + break; + case OP_SHR: + gen_shift_rm_T1(s1, ot, d, 1, 0); + break; + case OP_SAR: + gen_shift_rm_T1(s1, ot, d, 1, 1); + break; + case OP_RCL: + gen_rotc_rm_T1(s1, ot, d, 0); + break; + case OP_RCR: + gen_rotc_rm_T1(s1, ot, d, 1); + break; + } +} + +static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c) +{ + switch(op) { + case OP_ROL: + gen_rot_rm_im(s1, ot, d, c, 0); + break; + case OP_ROR: + gen_rot_rm_im(s1, ot, d, c, 1); + break; + case OP_SHL: + case OP_SHL1: + gen_shift_rm_im(s1, ot, d, c, 0, 0); + break; + case OP_SHR: + gen_shift_rm_im(s1, ot, d, c, 1, 0); + break; + case OP_SAR: + gen_shift_rm_im(s1, ot, d, c, 1, 1); + break; + default: + /* currently not optimized */ + gen_op_movl_T1_im(c); + gen_shift(s1, op, ot, d, OR_TMP1); + break; + } +} + +static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr) +{ + target_long disp; + int havesib; + int base; + int index; + int scale; + int opreg; + int mod, rm, code, override, must_add_seg; + + override = s->override; + must_add_seg = s->addseg; + if (override >= 0) + must_add_seg = 1; + mod = (modrm >> 6) & 3; + rm = modrm & 7; + + if (s->aflag) { + + havesib = 0; + base = rm; + index = 0; + scale = 0; + + if (base == 4) { + havesib = 1; + code = ldub_code(s->pc++); + scale = (code >> 6) & 3; + index = ((code >> 3) & 7) | REX_X(s); + base = (code & 7); + } + base |= REX_B(s); + + switch (mod) { + case 0: + if ((base & 7) == 5) { + base = -1; + disp = (int32_t)ldl_code(s->pc); + s->pc += 4; + if (CODE64(s) && !havesib) { + disp += s->pc + s->rip_offset; + } + } else { + disp = 0; + } + break; + case 1: + disp = (int8_t)ldub_code(s->pc++); + break; + default: + case 2: +#ifdef VBOX + disp = (int32_t)ldl_code(s->pc); +#else + disp = ldl_code(s->pc); +#endif + s->pc += 4; + break; + } + + if (base >= 0) { + /* for correct popl handling with esp */ + if (base == 4 && s->popl_esp_hack) + disp += s->popl_esp_hack; +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_movq_A0_reg(base); + if (disp != 0) { + gen_op_addq_A0_im(disp); + } + } else +#endif + { + gen_op_movl_A0_reg(base); + if (disp != 0) + gen_op_addl_A0_im(disp); + } + } else { +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_movq_A0_im(disp); + } else +#endif + { + gen_op_movl_A0_im(disp); + } + } + /* index == 4 means no index */ + if (havesib && (index != 4)) { +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_addq_A0_reg_sN(scale, index); + } else +#endif + { + gen_op_addl_A0_reg_sN(scale, index); + } + } + if (must_add_seg) { + if (override < 0) { + if (base == R_EBP || base == R_ESP) + override = R_SS; + else + override = R_DS; + } +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_addq_A0_seg(override); + } else +#endif + { + gen_op_addl_A0_seg(override); + } + } + } else { + switch (mod) { + case 0: + if (rm == 6) { + disp = lduw_code(s->pc); + s->pc += 2; + gen_op_movl_A0_im(disp); + rm = 0; /* avoid SS override */ + goto no_rm; + } else { + disp = 0; + } + break; + case 1: + disp = (int8_t)ldub_code(s->pc++); + break; + default: + case 2: + disp = lduw_code(s->pc); + s->pc += 2; + break; + } + switch(rm) { + case 0: + gen_op_movl_A0_reg(R_EBX); + gen_op_addl_A0_reg_sN(0, R_ESI); + break; + case 1: + gen_op_movl_A0_reg(R_EBX); + gen_op_addl_A0_reg_sN(0, R_EDI); + break; + case 2: + gen_op_movl_A0_reg(R_EBP); + gen_op_addl_A0_reg_sN(0, R_ESI); + break; + case 3: + gen_op_movl_A0_reg(R_EBP); + gen_op_addl_A0_reg_sN(0, R_EDI); + break; + case 4: + gen_op_movl_A0_reg(R_ESI); + break; + case 5: + gen_op_movl_A0_reg(R_EDI); + break; + case 6: + gen_op_movl_A0_reg(R_EBP); + break; + default: + case 7: + gen_op_movl_A0_reg(R_EBX); + break; + } + if (disp != 0) + gen_op_addl_A0_im(disp); + gen_op_andl_A0_ffff(); + no_rm: + if (must_add_seg) { + if (override < 0) { + if (rm == 2 || rm == 3 || rm == 6) + override = R_SS; + else + override = R_DS; + } + gen_op_addl_A0_seg(override); + } + } + + opreg = OR_A0; + disp = 0; + *reg_ptr = opreg; + *offset_ptr = disp; +} + +static void gen_nop_modrm(DisasContext *s, int modrm) +{ + int mod, rm, base, code; + + mod = (modrm >> 6) & 3; + if (mod == 3) + return; + rm = modrm & 7; + + if (s->aflag) { + + base = rm; + + if (base == 4) { + code = ldub_code(s->pc++); + base = (code & 7); + } + + switch (mod) { + case 0: + if (base == 5) { + s->pc += 4; + } + break; + case 1: + s->pc++; + break; + default: + case 2: + s->pc += 4; + break; + } + } else { + switch (mod) { + case 0: + if (rm == 6) { + s->pc += 2; + } + break; + case 1: + s->pc++; + break; + default: + case 2: + s->pc += 2; + break; + } + } +} + +/* used for LEA and MOV AX, mem */ +static void gen_add_A0_ds_seg(DisasContext *s) +{ + int override, must_add_seg; + must_add_seg = s->addseg; + override = R_DS; + if (s->override >= 0) { + override = s->override; + must_add_seg = 1; + } + if (must_add_seg) { +#ifdef TARGET_X86_64 + if (CODE64(s)) { + gen_op_addq_A0_seg(override); + } else +#endif + { + gen_op_addl_A0_seg(override); + } + } +} + +/* generate modrm memory load or store of 'reg'. TMP0 is used if reg == + OR_TMP0 */ +static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store) +{ + int mod, rm, opreg, disp; + + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + if (mod == 3) { + if (is_store) { + if (reg != OR_TMP0) + gen_op_mov_TN_reg(ot, 0, reg); + gen_op_mov_reg_T0(ot, rm); + } else { + gen_op_mov_TN_reg(ot, 0, rm); + if (reg != OR_TMP0) + gen_op_mov_reg_T0(ot, reg); + } + } else { + gen_lea_modrm(s, modrm, &opreg, &disp); + if (is_store) { + if (reg != OR_TMP0) + gen_op_mov_TN_reg(ot, 0, reg); + gen_op_st_T0_A0(ot + s->mem_index); + } else { + gen_op_ld_T0_A0(ot + s->mem_index); + if (reg != OR_TMP0) + gen_op_mov_reg_T0(ot, reg); + } + } +} + +static inline uint32_t insn_get(DisasContext *s, int ot) +{ + uint32_t ret; + + switch(ot) { + case OT_BYTE: + ret = ldub_code(s->pc); + s->pc++; + break; + case OT_WORD: + ret = lduw_code(s->pc); + s->pc += 2; + break; + default: + case OT_LONG: + ret = ldl_code(s->pc); + s->pc += 4; + break; + } + return ret; +} + +static inline int insn_const_size(unsigned int ot) +{ + if (ot <= OT_LONG) + return 1 << ot; + else + return 4; +} + +static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) +{ + TranslationBlock *tb; + target_ulong pc; + + pc = s->cs_base + eip; + tb = s->tb; + /* NOTE: we handle the case where the TB spans two pages here */ + if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) || + (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK)) { +#ifdef VBOX + gen_check_external_event(); +#endif /* VBOX */ + /* jump to same page: we can use a direct jump */ + tcg_gen_goto_tb(tb_num); + gen_jmp_im(eip); + tcg_gen_exit_tb((intptr_t)tb + tb_num); + } else { + /* jump to another page: currently not optimized */ + gen_jmp_im(eip); + gen_eob(s); + } +} + +static inline void gen_jcc(DisasContext *s, int b, + target_ulong val, target_ulong next_eip) +{ + int l1, l2, cc_op; + + cc_op = s->cc_op; + gen_update_cc_op(s); + if (s->jmp_opt) { + l1 = gen_new_label(); + gen_jcc1(s, cc_op, b, l1); + + gen_goto_tb(s, 0, next_eip); + + gen_set_label(l1); + gen_goto_tb(s, 1, val); + s->is_jmp = DISAS_TB_JUMP; + } else { + + l1 = gen_new_label(); + l2 = gen_new_label(); + gen_jcc1(s, cc_op, b, l1); + + gen_jmp_im(next_eip); + tcg_gen_br(l2); + + gen_set_label(l1); + gen_jmp_im(val); + gen_set_label(l2); + gen_eob(s); + } +} + +static void gen_setcc(DisasContext *s, int b) +{ + int inv, jcc_op, l1; + TCGv t0; + + if (is_fast_jcc_case(s, b)) { + /* nominal case: we use a jump */ + /* XXX: make it faster by adding new instructions in TCG */ + t0 = tcg_temp_local_new(); + tcg_gen_movi_tl(t0, 0); + l1 = gen_new_label(); + gen_jcc1(s, s->cc_op, b ^ 1, l1); + tcg_gen_movi_tl(t0, 1); + gen_set_label(l1); + tcg_gen_mov_tl(cpu_T[0], t0); + tcg_temp_free(t0); + } else { + /* slow case: it is more efficient not to generate a jump, + although it is questionnable whether this optimization is + worth to */ + inv = b & 1; + jcc_op = (b >> 1) & 7; + gen_setcc_slow_T0(s, jcc_op); + if (inv) { + tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1); + } + } +} + +static inline void gen_op_movl_T0_seg(int seg_reg) +{ + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,segs[seg_reg].selector)); +} + +static inline void gen_op_movl_seg_T0_vm(int seg_reg) +{ + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff); + tcg_gen_st32_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,segs[seg_reg].selector)); + tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4); + tcg_gen_st_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,segs[seg_reg].base)); +} + +/* move T0 to seg_reg and compute if the CPU state may change. Never + call this function with seg_reg == R_CS */ +static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip) +{ + if (s->pe && !s->vm86) { + /* XXX: optimize by finding processor state dynamically */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(cur_eip); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_load_seg(tcg_const_i32(seg_reg), cpu_tmp2_i32); + /* abort translation because the addseg value may change or + because ss32 may change. For R_SS, translation must always + stop as a special handling must be done to disable hardware + interrupts for the next instruction */ + if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) + s->is_jmp = DISAS_TB_JUMP; + } else { + gen_op_movl_seg_T0_vm(seg_reg); + if (seg_reg == R_SS) + s->is_jmp = DISAS_TB_JUMP; + } +} + +static inline int svm_is_rep(int prefixes) +{ + return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0); +} + +static inline void +gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start, + uint32_t type, uint64_t param) +{ + /* no SVM activated; fast case */ + if (likely(!(s->flags & HF_SVMI_MASK))) + return; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_svm_check_intercept_param(tcg_const_i32(type), + tcg_const_i64(param)); +} + +static inline void +gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type) +{ + gen_svm_check_intercept_param(s, pc_start, type, 0); +} + +static inline void gen_stack_update(DisasContext *s, int addend) +{ +#ifdef TARGET_X86_64 + if (CODE64(s)) { + gen_op_add_reg_im(2, R_ESP, addend); + } else +#endif + if (s->ss32) { + gen_op_add_reg_im(1, R_ESP, addend); + } else { + gen_op_add_reg_im(0, R_ESP, addend); + } +} + +/* generate a push. It depends on ss32, addseg and dflag */ +static void gen_push_T0(DisasContext *s) +{ +#ifdef TARGET_X86_64 + if (CODE64(s)) { + gen_op_movq_A0_reg(R_ESP); + if (s->dflag) { + gen_op_addq_A0_im(-8); + gen_op_st_T0_A0(OT_QUAD + s->mem_index); + } else { + gen_op_addq_A0_im(-2); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + } + gen_op_mov_reg_A0(2, R_ESP); + } else +#endif + { + gen_op_movl_A0_reg(R_ESP); + if (!s->dflag) + gen_op_addl_A0_im(-2); + else + gen_op_addl_A0_im(-4); + if (s->ss32) { + if (s->addseg) { + tcg_gen_mov_tl(cpu_T[1], cpu_A0); + gen_op_addl_A0_seg(R_SS); + } + } else { + gen_op_andl_A0_ffff(); + tcg_gen_mov_tl(cpu_T[1], cpu_A0); + gen_op_addl_A0_seg(R_SS); + } + gen_op_st_T0_A0(s->dflag + 1 + s->mem_index); + if (s->ss32 && !s->addseg) + gen_op_mov_reg_A0(1, R_ESP); + else + gen_op_mov_reg_T1(s->ss32 + 1, R_ESP); + } +} + +/* generate a push. It depends on ss32, addseg and dflag */ +/* slower version for T1, only used for call Ev */ +static void gen_push_T1(DisasContext *s) +{ +#ifdef TARGET_X86_64 + if (CODE64(s)) { + gen_op_movq_A0_reg(R_ESP); + if (s->dflag) { + gen_op_addq_A0_im(-8); + gen_op_st_T1_A0(OT_QUAD + s->mem_index); + } else { + gen_op_addq_A0_im(-2); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + } + gen_op_mov_reg_A0(2, R_ESP); + } else +#endif + { + gen_op_movl_A0_reg(R_ESP); + if (!s->dflag) + gen_op_addl_A0_im(-2); + else + gen_op_addl_A0_im(-4); + if (s->ss32) { + if (s->addseg) { + gen_op_addl_A0_seg(R_SS); + } + } else { + gen_op_andl_A0_ffff(); + gen_op_addl_A0_seg(R_SS); + } + gen_op_st_T1_A0(s->dflag + 1 + s->mem_index); + + if (s->ss32 && !s->addseg) + gen_op_mov_reg_A0(1, R_ESP); + else + gen_stack_update(s, -(2 << s->dflag)); + } +} + +/* two step pop is necessary for precise exceptions */ +static void gen_pop_T0(DisasContext *s) +{ +#ifdef TARGET_X86_64 + if (CODE64(s)) { + gen_op_movq_A0_reg(R_ESP); + gen_op_ld_T0_A0((s->dflag ? OT_QUAD : OT_WORD) + s->mem_index); + } else +#endif + { + gen_op_movl_A0_reg(R_ESP); + if (s->ss32) { + if (s->addseg) + gen_op_addl_A0_seg(R_SS); + } else { + gen_op_andl_A0_ffff(); + gen_op_addl_A0_seg(R_SS); + } + gen_op_ld_T0_A0(s->dflag + 1 + s->mem_index); + } +} + +static void gen_pop_update(DisasContext *s) +{ +#ifdef TARGET_X86_64 + if (CODE64(s) && s->dflag) { + gen_stack_update(s, 8); + } else +#endif + { + gen_stack_update(s, 2 << s->dflag); + } +} + +static void gen_stack_A0(DisasContext *s) +{ + gen_op_movl_A0_reg(R_ESP); + if (!s->ss32) + gen_op_andl_A0_ffff(); + tcg_gen_mov_tl(cpu_T[1], cpu_A0); + if (s->addseg) + gen_op_addl_A0_seg(R_SS); +} + +/* NOTE: wrap around in 16 bit not fully handled */ +static void gen_pusha(DisasContext *s) +{ + int i; + gen_op_movl_A0_reg(R_ESP); + gen_op_addl_A0_im(-(16 << s->dflag)); + if (!s->ss32) + gen_op_andl_A0_ffff(); + tcg_gen_mov_tl(cpu_T[1], cpu_A0); + if (s->addseg) + gen_op_addl_A0_seg(R_SS); + for(i = 0;i < 8; i++) { + gen_op_mov_TN_reg(OT_LONG, 0, 7 - i); + gen_op_st_T0_A0(OT_WORD + s->dflag + s->mem_index); + gen_op_addl_A0_im(2 << s->dflag); + } + gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP); +} + +/* NOTE: wrap around in 16 bit not fully handled */ +static void gen_popa(DisasContext *s) +{ + int i; + gen_op_movl_A0_reg(R_ESP); + if (!s->ss32) + gen_op_andl_A0_ffff(); + tcg_gen_mov_tl(cpu_T[1], cpu_A0); + tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 16 << s->dflag); + if (s->addseg) + gen_op_addl_A0_seg(R_SS); + for(i = 0;i < 8; i++) { + /* ESP is not reloaded */ + if (i != 3) { + gen_op_ld_T0_A0(OT_WORD + s->dflag + s->mem_index); + gen_op_mov_reg_T0(OT_WORD + s->dflag, 7 - i); + } + gen_op_addl_A0_im(2 << s->dflag); + } + gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP); +} + +static void gen_enter(DisasContext *s, int esp_addend, int level) +{ + int ot, opsize; + + level &= 0x1f; +#ifdef TARGET_X86_64 + if (CODE64(s)) { + ot = s->dflag ? OT_QUAD : OT_WORD; + opsize = 1 << ot; + + gen_op_movl_A0_reg(R_ESP); + gen_op_addq_A0_im(-opsize); + tcg_gen_mov_tl(cpu_T[1], cpu_A0); + + /* push bp */ + gen_op_mov_TN_reg(OT_LONG, 0, R_EBP); + gen_op_st_T0_A0(ot + s->mem_index); + if (level) { + /* XXX: must save state */ + gen_helper_enter64_level(tcg_const_i32(level), + tcg_const_i32((ot == OT_QUAD)), + cpu_T[1]); + } + gen_op_mov_reg_T1(ot, R_EBP); + tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level)); + gen_op_mov_reg_T1(OT_QUAD, R_ESP); + } else +#endif + { + ot = s->dflag + OT_WORD; + opsize = 2 << s->dflag; + + gen_op_movl_A0_reg(R_ESP); + gen_op_addl_A0_im(-opsize); + if (!s->ss32) + gen_op_andl_A0_ffff(); + tcg_gen_mov_tl(cpu_T[1], cpu_A0); + if (s->addseg) + gen_op_addl_A0_seg(R_SS); + /* push bp */ + gen_op_mov_TN_reg(OT_LONG, 0, R_EBP); + gen_op_st_T0_A0(ot + s->mem_index); + if (level) { + /* XXX: must save state */ + gen_helper_enter_level(tcg_const_i32(level), + tcg_const_i32(s->dflag), + cpu_T[1]); + } + gen_op_mov_reg_T1(ot, R_EBP); + tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level)); + gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP); + } +} + +static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip) +{ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(cur_eip); + gen_helper_raise_exception(tcg_const_i32(trapno)); + s->is_jmp = DISAS_TB_JUMP; +} + +/* an interrupt is different from an exception because of the + privilege checks */ +static void gen_interrupt(DisasContext *s, int intno, + target_ulong cur_eip, target_ulong next_eip) +{ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(cur_eip); + gen_helper_raise_interrupt(tcg_const_i32(intno), + tcg_const_i32(next_eip - cur_eip)); + s->is_jmp = DISAS_TB_JUMP; +} + +static void gen_debug(DisasContext *s, target_ulong cur_eip) +{ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(cur_eip); + gen_helper_debug(); + s->is_jmp = DISAS_TB_JUMP; +} + +/* generate a generic end of block. Trace exception is also generated + if needed */ +static void gen_eob(DisasContext *s) +{ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + if (s->tb->flags & HF_INHIBIT_IRQ_MASK) { + gen_helper_reset_inhibit_irq(); + } + if (s->tb->flags & HF_RF_MASK) { + gen_helper_reset_rf(); + } + if ( s->singlestep_enabled +#ifdef VBOX + && ( !(cpu_single_env->state & CPU_EMULATE_SINGLE_STEP) + || !(s->prefix & (PREFIX_REPNZ | PREFIX_REPZ) )) +#endif + ) { + gen_helper_debug(); + } else if (s->tf) { + gen_helper_single_step(); + } else { + tcg_gen_exit_tb(0); + } + s->is_jmp = DISAS_TB_JUMP; +} + +/* generate a jump to eip. No segment change must happen before as a + direct call to the next block may occur */ +static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num) +{ + if (s->jmp_opt) { + gen_update_cc_op(s); + gen_goto_tb(s, tb_num, eip); + s->is_jmp = DISAS_TB_JUMP; + } else { + gen_jmp_im(eip); + gen_eob(s); + } +} + +static void gen_jmp(DisasContext *s, target_ulong eip) +{ + gen_jmp_tb(s, eip, 0); +} + +static inline void gen_ldq_env_A0(int idx, int offset) +{ + int mem_index = (idx >> 2) - 1; + tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset); +} + +static inline void gen_stq_env_A0(int idx, int offset) +{ + int mem_index = (idx >> 2) - 1; + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset); + tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index); +} + +static inline void gen_ldo_env_A0(int idx, int offset) +{ + int mem_index = (idx >> 2) - 1; + tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8); + tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_tmp0, mem_index); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1))); +} + +static inline void gen_sto_env_A0(int idx, int offset) +{ + int mem_index = (idx >> 2) - 1; + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index); + tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8); + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1))); + tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_tmp0, mem_index); +} + +static inline void gen_op_movo(int d_offset, int s_offset) +{ + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset); + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8); +} + +static inline void gen_op_movq(int d_offset, int s_offset) +{ + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset); +} + +static inline void gen_op_movl(int d_offset, int s_offset) +{ + tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset); + tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset); +} + +static inline void gen_op_movq_env_0(int d_offset) +{ + tcg_gen_movi_i64(cpu_tmp1_i64, 0); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset); +} + +#define SSE_SPECIAL ((void *)1) +#define SSE_DUMMY ((void *)2) + +#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm } +#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \ + gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, } + +static void *sse_op_table1[256][4] = { + /* 3DNow! extensions */ + [0x0e] = { SSE_DUMMY }, /* femms */ + [0x0f] = { SSE_DUMMY }, /* pf... */ + /* pure SSE operations */ + [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ + [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ + [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */ + [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */ + [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm }, + [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm }, + [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */ + [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */ + + [0x28] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */ + [0x29] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */ + [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */ + [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */ + [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */ + [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */ + [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd }, + [0x2f] = { gen_helper_comiss, gen_helper_comisd }, + [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */ + [0x51] = SSE_FOP(sqrt), + [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL }, + [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL }, + [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */ + [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */ + [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */ + [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */ + [0x58] = SSE_FOP(add), + [0x59] = SSE_FOP(mul), + [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps, + gen_helper_cvtss2sd, gen_helper_cvtsd2ss }, + [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq }, + [0x5c] = SSE_FOP(sub), + [0x5d] = SSE_FOP(min), + [0x5e] = SSE_FOP(div), + [0x5f] = SSE_FOP(max), + + [0xc2] = SSE_FOP(cmpeq), + [0xc6] = { gen_helper_shufps, gen_helper_shufpd }, + + [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */ + [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */ + + /* MMX ops and their SSE extensions */ + [0x60] = MMX_OP2(punpcklbw), + [0x61] = MMX_OP2(punpcklwd), + [0x62] = MMX_OP2(punpckldq), + [0x63] = MMX_OP2(packsswb), + [0x64] = MMX_OP2(pcmpgtb), + [0x65] = MMX_OP2(pcmpgtw), + [0x66] = MMX_OP2(pcmpgtl), + [0x67] = MMX_OP2(packuswb), + [0x68] = MMX_OP2(punpckhbw), + [0x69] = MMX_OP2(punpckhwd), + [0x6a] = MMX_OP2(punpckhdq), + [0x6b] = MMX_OP2(packssdw), + [0x6c] = { NULL, gen_helper_punpcklqdq_xmm }, + [0x6d] = { NULL, gen_helper_punpckhqdq_xmm }, + [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */ + [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */ + [0x70] = { gen_helper_pshufw_mmx, + gen_helper_pshufd_xmm, + gen_helper_pshufhw_xmm, + gen_helper_pshuflw_xmm }, + [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */ + [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */ + [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */ + [0x74] = MMX_OP2(pcmpeqb), + [0x75] = MMX_OP2(pcmpeqw), + [0x76] = MMX_OP2(pcmpeql), + [0x77] = { SSE_DUMMY }, /* emms */ + [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */ + [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r }, + [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps }, + [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps }, + [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */ + [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */ + [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */ + [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */ + [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps }, + [0xd1] = MMX_OP2(psrlw), + [0xd2] = MMX_OP2(psrld), + [0xd3] = MMX_OP2(psrlq), + [0xd4] = MMX_OP2(paddq), + [0xd5] = MMX_OP2(pmullw), + [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, + [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */ + [0xd8] = MMX_OP2(psubusb), + [0xd9] = MMX_OP2(psubusw), + [0xda] = MMX_OP2(pminub), + [0xdb] = MMX_OP2(pand), + [0xdc] = MMX_OP2(paddusb), + [0xdd] = MMX_OP2(paddusw), + [0xde] = MMX_OP2(pmaxub), + [0xdf] = MMX_OP2(pandn), + [0xe0] = MMX_OP2(pavgb), + [0xe1] = MMX_OP2(psraw), + [0xe2] = MMX_OP2(psrad), + [0xe3] = MMX_OP2(pavgw), + [0xe4] = MMX_OP2(pmulhuw), + [0xe5] = MMX_OP2(pmulhw), + [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq }, + [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */ + [0xe8] = MMX_OP2(psubsb), + [0xe9] = MMX_OP2(psubsw), + [0xea] = MMX_OP2(pminsw), + [0xeb] = MMX_OP2(por), + [0xec] = MMX_OP2(paddsb), + [0xed] = MMX_OP2(paddsw), + [0xee] = MMX_OP2(pmaxsw), + [0xef] = MMX_OP2(pxor), + [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */ + [0xf1] = MMX_OP2(psllw), + [0xf2] = MMX_OP2(pslld), + [0xf3] = MMX_OP2(psllq), + [0xf4] = MMX_OP2(pmuludq), + [0xf5] = MMX_OP2(pmaddwd), + [0xf6] = MMX_OP2(psadbw), + [0xf7] = MMX_OP2(maskmov), + [0xf8] = MMX_OP2(psubb), + [0xf9] = MMX_OP2(psubw), + [0xfa] = MMX_OP2(psubl), + [0xfb] = MMX_OP2(psubq), + [0xfc] = MMX_OP2(paddb), + [0xfd] = MMX_OP2(paddw), + [0xfe] = MMX_OP2(paddl), +}; + +static void *sse_op_table2[3 * 8][2] = { + [0 + 2] = MMX_OP2(psrlw), + [0 + 4] = MMX_OP2(psraw), + [0 + 6] = MMX_OP2(psllw), + [8 + 2] = MMX_OP2(psrld), + [8 + 4] = MMX_OP2(psrad), + [8 + 6] = MMX_OP2(pslld), + [16 + 2] = MMX_OP2(psrlq), + [16 + 3] = { NULL, gen_helper_psrldq_xmm }, + [16 + 6] = MMX_OP2(psllq), + [16 + 7] = { NULL, gen_helper_pslldq_xmm }, +}; + +static void *sse_op_table3[4 * 3] = { + gen_helper_cvtsi2ss, + gen_helper_cvtsi2sd, + X86_64_ONLY(gen_helper_cvtsq2ss), + X86_64_ONLY(gen_helper_cvtsq2sd), + + gen_helper_cvttss2si, + gen_helper_cvttsd2si, + X86_64_ONLY(gen_helper_cvttss2sq), + X86_64_ONLY(gen_helper_cvttsd2sq), + + gen_helper_cvtss2si, + gen_helper_cvtsd2si, + X86_64_ONLY(gen_helper_cvtss2sq), + X86_64_ONLY(gen_helper_cvtsd2sq), +}; + +static void *sse_op_table4[8][4] = { + SSE_FOP(cmpeq), + SSE_FOP(cmplt), + SSE_FOP(cmple), + SSE_FOP(cmpunord), + SSE_FOP(cmpneq), + SSE_FOP(cmpnlt), + SSE_FOP(cmpnle), + SSE_FOP(cmpord), +}; + +static void *sse_op_table5[256] = { + [0x0c] = gen_helper_pi2fw, + [0x0d] = gen_helper_pi2fd, + [0x1c] = gen_helper_pf2iw, + [0x1d] = gen_helper_pf2id, + [0x8a] = gen_helper_pfnacc, + [0x8e] = gen_helper_pfpnacc, + [0x90] = gen_helper_pfcmpge, + [0x94] = gen_helper_pfmin, + [0x96] = gen_helper_pfrcp, + [0x97] = gen_helper_pfrsqrt, + [0x9a] = gen_helper_pfsub, + [0x9e] = gen_helper_pfadd, + [0xa0] = gen_helper_pfcmpgt, + [0xa4] = gen_helper_pfmax, + [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */ + [0xa7] = gen_helper_movq, /* pfrsqit1 */ + [0xaa] = gen_helper_pfsubr, + [0xae] = gen_helper_pfacc, + [0xb0] = gen_helper_pfcmpeq, + [0xb4] = gen_helper_pfmul, + [0xb6] = gen_helper_movq, /* pfrcpit2 */ + [0xb7] = gen_helper_pmulhrw_mmx, + [0xbb] = gen_helper_pswapd, + [0xbf] = gen_helper_pavgb_mmx /* pavgusb */ +}; + +struct sse_op_helper_s { + void *op[2]; uint32_t ext_mask; +}; +#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 } +#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 } +#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 } +#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 } +static struct sse_op_helper_s sse_op_table6[256] = { + [0x00] = SSSE3_OP(pshufb), + [0x01] = SSSE3_OP(phaddw), + [0x02] = SSSE3_OP(phaddd), + [0x03] = SSSE3_OP(phaddsw), + [0x04] = SSSE3_OP(pmaddubsw), + [0x05] = SSSE3_OP(phsubw), + [0x06] = SSSE3_OP(phsubd), + [0x07] = SSSE3_OP(phsubsw), + [0x08] = SSSE3_OP(psignb), + [0x09] = SSSE3_OP(psignw), + [0x0a] = SSSE3_OP(psignd), + [0x0b] = SSSE3_OP(pmulhrsw), + [0x10] = SSE41_OP(pblendvb), + [0x14] = SSE41_OP(blendvps), + [0x15] = SSE41_OP(blendvpd), + [0x17] = SSE41_OP(ptest), + [0x1c] = SSSE3_OP(pabsb), + [0x1d] = SSSE3_OP(pabsw), + [0x1e] = SSSE3_OP(pabsd), + [0x20] = SSE41_OP(pmovsxbw), + [0x21] = SSE41_OP(pmovsxbd), + [0x22] = SSE41_OP(pmovsxbq), + [0x23] = SSE41_OP(pmovsxwd), + [0x24] = SSE41_OP(pmovsxwq), + [0x25] = SSE41_OP(pmovsxdq), + [0x28] = SSE41_OP(pmuldq), + [0x29] = SSE41_OP(pcmpeqq), + [0x2a] = SSE41_SPECIAL, /* movntqda */ + [0x2b] = SSE41_OP(packusdw), + [0x30] = SSE41_OP(pmovzxbw), + [0x31] = SSE41_OP(pmovzxbd), + [0x32] = SSE41_OP(pmovzxbq), + [0x33] = SSE41_OP(pmovzxwd), + [0x34] = SSE41_OP(pmovzxwq), + [0x35] = SSE41_OP(pmovzxdq), + [0x37] = SSE42_OP(pcmpgtq), + [0x38] = SSE41_OP(pminsb), + [0x39] = SSE41_OP(pminsd), + [0x3a] = SSE41_OP(pminuw), + [0x3b] = SSE41_OP(pminud), + [0x3c] = SSE41_OP(pmaxsb), + [0x3d] = SSE41_OP(pmaxsd), + [0x3e] = SSE41_OP(pmaxuw), + [0x3f] = SSE41_OP(pmaxud), + [0x40] = SSE41_OP(pmulld), + [0x41] = SSE41_OP(phminposuw), +}; + +static struct sse_op_helper_s sse_op_table7[256] = { + [0x08] = SSE41_OP(roundps), + [0x09] = SSE41_OP(roundpd), + [0x0a] = SSE41_OP(roundss), + [0x0b] = SSE41_OP(roundsd), + [0x0c] = SSE41_OP(blendps), + [0x0d] = SSE41_OP(blendpd), + [0x0e] = SSE41_OP(pblendw), + [0x0f] = SSSE3_OP(palignr), + [0x14] = SSE41_SPECIAL, /* pextrb */ + [0x15] = SSE41_SPECIAL, /* pextrw */ + [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */ + [0x17] = SSE41_SPECIAL, /* extractps */ + [0x20] = SSE41_SPECIAL, /* pinsrb */ + [0x21] = SSE41_SPECIAL, /* insertps */ + [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */ + [0x40] = SSE41_OP(dpps), + [0x41] = SSE41_OP(dppd), + [0x42] = SSE41_OP(mpsadbw), + [0x60] = SSE42_OP(pcmpestrm), + [0x61] = SSE42_OP(pcmpestri), + [0x62] = SSE42_OP(pcmpistrm), + [0x63] = SSE42_OP(pcmpistri), +}; + +static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) +{ + int b1, op1_offset, op2_offset, is_xmm, val, ot; + int modrm, mod, rm, reg, reg_addr, offset_addr; + void *sse_op2; + + b &= 0xff; + if (s->prefix & PREFIX_DATA) + b1 = 1; + else if (s->prefix & PREFIX_REPZ) + b1 = 2; + else if (s->prefix & PREFIX_REPNZ) + b1 = 3; + else + b1 = 0; + sse_op2 = sse_op_table1[b][b1]; + if (!sse_op2) + goto illegal_op; + if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) { + is_xmm = 1; + } else { + if (b1 == 0) { + /* MMX case */ + is_xmm = 0; + } else { + is_xmm = 1; + } + } + /* simple MMX/SSE operation */ + if (s->flags & HF_TS_MASK) { + gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + return; + } + if (s->flags & HF_EM_MASK) { + illegal_op: + gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base); + return; + } + if (is_xmm && !(s->flags & HF_OSFXSR_MASK)) + if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA)) + goto illegal_op; + if (b == 0x0e) { + if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) + goto illegal_op; + /* femms */ + gen_helper_emms(); + return; + } + if (b == 0x77) { + /* emms */ + gen_helper_emms(); + return; + } + /* prepare MMX state (XXX: optimize by storing fptt and fptags in + the static cpu state) */ + if (!is_xmm) { + gen_helper_enter_mmx(); + } + + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7); + if (is_xmm) + reg |= rex_r; + mod = (modrm >> 6) & 3; + if (sse_op2 == SSE_SPECIAL) { + b |= (b1 << 8); + switch(b) { + case 0x0e7: /* movntq */ + if (mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx)); + break; + case 0x1e7: /* movntdq */ + case 0x02b: /* movntps */ + case 0x12b: /* movntps */ + if (mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg])); + break; + case 0x3f0: /* lddqu */ + if (mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg])); + break; + case 0x22b: /* movntss */ + case 0x32b: /* movntsd */ + if (mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (b1 & 1) { + gen_stq_env_A0(s->mem_index, offsetof(CPUX86State, + xmm_regs[reg])); + } else { + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_L(0))); + gen_op_st_T0_A0(OT_LONG + s->mem_index); + } + break; + case 0x6e: /* movd mm, ea */ +#ifdef TARGET_X86_64 + if (s->dflag == 2) { + gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0); + tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx)); + } else +#endif + { + gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + offsetof(CPUX86State,fpregs[reg].mmx)); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32); + } + break; + case 0x16e: /* movd xmm, ea */ +#ifdef TARGET_X86_64 + if (s->dflag == 2) { + gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + offsetof(CPUX86State,xmm_regs[reg])); + gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]); + } else +#endif + { + gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + offsetof(CPUX86State,xmm_regs[reg])); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32); + } + break; + case 0x6f: /* movq mm, ea */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx)); + } else { + rm = (modrm & 7); + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, + offsetof(CPUX86State,fpregs[rm].mmx)); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, + offsetof(CPUX86State,fpregs[reg].mmx)); + } + break; + case 0x010: /* movups */ + case 0x110: /* movupd */ + case 0x028: /* movaps */ + case 0x128: /* movapd */ + case 0x16f: /* movdqa xmm, ea */ + case 0x26f: /* movdqu xmm, ea */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg])); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]), + offsetof(CPUX86State,xmm_regs[rm])); + } + break; + case 0x210: /* movss xmm, ea */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); + gen_op_movl_T0_0(); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3))); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)), + offsetof(CPUX86State,xmm_regs[rm].XMM_L(0))); + } + break; + case 0x310: /* movsd xmm, ea */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + gen_op_movl_T0_0(); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3))); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + } + break; + case 0x012: /* movlps */ + case 0x112: /* movlpd */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + } else { + /* movhlps */ + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1))); + } + break; + case 0x212: /* movsldup */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg])); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)), + offsetof(CPUX86State,xmm_regs[rm].XMM_L(0))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)), + offsetof(CPUX86State,xmm_regs[rm].XMM_L(2))); + } + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)), + offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)), + offsetof(CPUX86State,xmm_regs[reg].XMM_L(2))); + break; + case 0x312: /* movddup */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + } + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)), + offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + break; + case 0x016: /* movhps */ + case 0x116: /* movhpd */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1))); + } else { + /* movlhps */ + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)), + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + } + break; + case 0x216: /* movshdup */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg])); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)), + offsetof(CPUX86State,xmm_regs[rm].XMM_L(1))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)), + offsetof(CPUX86State,xmm_regs[rm].XMM_L(3))); + } + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)), + offsetof(CPUX86State,xmm_regs[reg].XMM_L(1))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)), + offsetof(CPUX86State,xmm_regs[reg].XMM_L(3))); + break; + case 0x178: + case 0x378: + { + int bit_index, field_length; + + if (b1 == 1 && reg != 0) + goto illegal_op; + field_length = ldub_code(s->pc++) & 0x3F; + bit_index = ldub_code(s->pc++) & 0x3F; + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + offsetof(CPUX86State,xmm_regs[reg])); + if (b1 == 1) + gen_helper_extrq_i(cpu_ptr0, tcg_const_i32(bit_index), + tcg_const_i32(field_length)); + else + gen_helper_insertq_i(cpu_ptr0, tcg_const_i32(bit_index), + tcg_const_i32(field_length)); + } + break; + case 0x7e: /* movd ea, mm */ +#ifdef TARGET_X86_64 + if (s->dflag == 2) { + tcg_gen_ld_i64(cpu_T[0], cpu_env, + offsetof(CPUX86State,fpregs[reg].mmx)); + gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1); + } else +#endif + { + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0))); + gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1); + } + break; + case 0x17e: /* movd ea, xmm */ +#ifdef TARGET_X86_64 + if (s->dflag == 2) { + tcg_gen_ld_i64(cpu_T[0], cpu_env, + offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1); + } else +#endif + { + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); + gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1); + } + break; + case 0x27e: /* movq xmm, ea */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + } + gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1))); + break; + case 0x7f: /* movq ea, mm */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx)); + } else { + rm = (modrm & 7); + gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx), + offsetof(CPUX86State,fpregs[reg].mmx)); + } + break; + case 0x011: /* movups */ + case 0x111: /* movupd */ + case 0x029: /* movaps */ + case 0x129: /* movapd */ + case 0x17f: /* movdqa ea, xmm */ + case 0x27f: /* movdqu ea, xmm */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg])); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]), + offsetof(CPUX86State,xmm_regs[reg])); + } + break; + case 0x211: /* movss ea, xmm */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); + gen_op_st_T0_A0(OT_LONG + s->mem_index); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)), + offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); + } + break; + case 0x311: /* movsd ea, xmm */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)), + offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + } + break; + case 0x013: /* movlps */ + case 0x113: /* movlpd */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + } else { + goto illegal_op; + } + break; + case 0x017: /* movhps */ + case 0x117: /* movhpd */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1))); + } else { + goto illegal_op; + } + break; + case 0x71: /* shift mm, im */ + case 0x72: + case 0x73: + case 0x171: /* shift xmm, im */ + case 0x172: + case 0x173: + if (b1 >= 2) { + goto illegal_op; + } + val = ldub_code(s->pc++); + if (is_xmm) { + gen_op_movl_T0_im(val); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0))); + gen_op_movl_T0_0(); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(1))); + op1_offset = offsetof(CPUX86State,xmm_t0); + } else { + gen_op_movl_T0_im(val); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0))); + gen_op_movl_T0_0(); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1))); + op1_offset = offsetof(CPUX86State,mmx_t0); + } + sse_op2 = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1]; + if (!sse_op2) + goto illegal_op; + if (is_xmm) { + rm = (modrm & 7) | REX_B(s); + op2_offset = offsetof(CPUX86State,xmm_regs[rm]); + } else { + rm = (modrm & 7); + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); + } + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset); + ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1); + break; + case 0x050: /* movmskps */ + rm = (modrm & 7) | REX_B(s); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + offsetof(CPUX86State,xmm_regs[rm])); + gen_helper_movmskps(cpu_tmp2_i32, cpu_ptr0); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_mov_reg_T0(OT_LONG, reg); + break; + case 0x150: /* movmskpd */ + rm = (modrm & 7) | REX_B(s); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + offsetof(CPUX86State,xmm_regs[rm])); + gen_helper_movmskpd(cpu_tmp2_i32, cpu_ptr0); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_mov_reg_T0(OT_LONG, reg); + break; + case 0x02a: /* cvtpi2ps */ + case 0x12a: /* cvtpi2pd */ + gen_helper_enter_mmx(); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + op2_offset = offsetof(CPUX86State,mmx_t0); + gen_ldq_env_A0(s->mem_index, op2_offset); + } else { + rm = (modrm & 7); + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); + } + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + switch(b >> 8) { + case 0x0: + gen_helper_cvtpi2ps(cpu_ptr0, cpu_ptr1); + break; + default: + case 0x1: + gen_helper_cvtpi2pd(cpu_ptr0, cpu_ptr1); + break; + } + break; + case 0x22a: /* cvtsi2ss */ + case 0x32a: /* cvtsi2sd */ + ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)]; + if (ot == OT_LONG) { + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + ((void (*)(TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_tmp2_i32); + } else { + ((void (*)(TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_T[0]); + } + break; + case 0x02c: /* cvttps2pi */ + case 0x12c: /* cvttpd2pi */ + case 0x02d: /* cvtps2pi */ + case 0x12d: /* cvtpd2pi */ + gen_helper_enter_mmx(); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + op2_offset = offsetof(CPUX86State,xmm_t0); + gen_ldo_env_A0(s->mem_index, op2_offset); + } else { + rm = (modrm & 7) | REX_B(s); + op2_offset = offsetof(CPUX86State,xmm_regs[rm]); + } + op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + switch(b) { + case 0x02c: + gen_helper_cvttps2pi(cpu_ptr0, cpu_ptr1); + break; + case 0x12c: + gen_helper_cvttpd2pi(cpu_ptr0, cpu_ptr1); + break; + case 0x02d: + gen_helper_cvtps2pi(cpu_ptr0, cpu_ptr1); + break; + case 0x12d: + gen_helper_cvtpd2pi(cpu_ptr0, cpu_ptr1); + break; + } + break; + case 0x22c: /* cvttss2si */ + case 0x32c: /* cvttsd2si */ + case 0x22d: /* cvtss2si */ + case 0x32d: /* cvtsd2si */ + ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if ((b >> 8) & 1) { + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_Q(0))); + } else { + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0))); + } + op2_offset = offsetof(CPUX86State,xmm_t0); + } else { + rm = (modrm & 7) | REX_B(s); + op2_offset = offsetof(CPUX86State,xmm_regs[rm]); + } + sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 + + (b & 1) * 4]; + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset); + if (ot == OT_LONG) { + ((void (*)(TCGv_i32, TCGv_ptr))sse_op2)(cpu_tmp2_i32, cpu_ptr0); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + } else { + ((void (*)(TCGv, TCGv_ptr))sse_op2)(cpu_T[0], cpu_ptr0); + } + gen_op_mov_reg_T0(ot, reg); + break; + case 0xc4: /* pinsrw */ + case 0x1c4: + s->rip_offset = 1; + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); + val = ldub_code(s->pc++); + if (b1) { + val &= 7; + tcg_gen_st16_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,xmm_regs[reg].XMM_W(val))); + } else { + val &= 3; + tcg_gen_st16_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val))); + } + break; + case 0xc5: /* pextrw */ + case 0x1c5: + if (mod != 3) + goto illegal_op; + ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; + val = ldub_code(s->pc++); + if (b1) { + val &= 7; + rm = (modrm & 7) | REX_B(s); + tcg_gen_ld16u_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,xmm_regs[rm].XMM_W(val))); + } else { + val &= 3; + rm = (modrm & 7); + tcg_gen_ld16u_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val))); + } + reg = ((modrm >> 3) & 7) | rex_r; + gen_op_mov_reg_T0(ot, reg); + break; + case 0x1d6: /* movq ea, xmm */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)), + offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1))); + } + break; + case 0x2d6: /* movq2dq */ + gen_helper_enter_mmx(); + rm = (modrm & 7); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), + offsetof(CPUX86State,fpregs[rm].mmx)); + gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1))); + break; + case 0x3d6: /* movdq2q */ + gen_helper_enter_mmx(); + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx), + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + break; + case 0xd7: /* pmovmskb */ + case 0x1d7: + if (mod != 3) + goto illegal_op; + if (b1) { + rm = (modrm & 7) | REX_B(s); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm])); + gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_ptr0); + } else { + rm = (modrm & 7); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx)); + gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_ptr0); + } + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + reg = ((modrm >> 3) & 7) | rex_r; + gen_op_mov_reg_T0(OT_LONG, reg); + break; + case 0x138: + if (s->prefix & PREFIX_REPNZ) + goto crc32; + case 0x038: + b = modrm; + modrm = ldub_code(s->pc++); + rm = modrm & 7; + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + if (b1 >= 2) { + goto illegal_op; + } + + sse_op2 = sse_op_table6[b].op[b1]; + if (!sse_op2) + goto illegal_op; + if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask)) + goto illegal_op; + + if (b1) { + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + if (mod == 3) { + op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); + } else { + op2_offset = offsetof(CPUX86State,xmm_t0); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + switch (b) { + case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */ + case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */ + case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */ + gen_ldq_env_A0(s->mem_index, op2_offset + + offsetof(XMMReg, XMM_Q(0))); + break; + case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */ + case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */ + tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0, + (s->mem_index >> 2) - 1); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0); + tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset + + offsetof(XMMReg, XMM_L(0))); + break; + case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */ + tcg_gen_qemu_ld16u(cpu_tmp0, cpu_A0, + (s->mem_index >> 2) - 1); + tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset + + offsetof(XMMReg, XMM_W(0))); + break; + case 0x2a: /* movntqda */ + gen_ldo_env_A0(s->mem_index, op1_offset); + return; + default: + gen_ldo_env_A0(s->mem_index, op2_offset); + } + } + } else { + op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); + if (mod == 3) { + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); + } else { + op2_offset = offsetof(CPUX86State,mmx_t0); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, op2_offset); + } + } + if (sse_op2 == SSE_SPECIAL) + goto illegal_op; + + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1); + + if (b == 0x17) + s->cc_op = CC_OP_EFLAGS; + break; + case 0x338: /* crc32 */ + crc32: + b = modrm; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + + if (b != 0xf0 && b != 0xf1) + goto illegal_op; + if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) + goto illegal_op; + + if (b == 0xf0) + ot = OT_BYTE; + else if (b == 0xf1 && s->dflag != 2) + if (s->prefix & PREFIX_DATA) + ot = OT_WORD; + else + ot = OT_LONG; + else + ot = OT_QUAD; + + gen_op_mov_TN_reg(OT_LONG, 0, reg); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + gen_helper_crc32(cpu_T[0], cpu_tmp2_i32, + cpu_T[0], tcg_const_i32(8 << ot)); + + ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; + gen_op_mov_reg_T0(ot, reg); + break; + case 0x03a: + case 0x13a: + b = modrm; + modrm = ldub_code(s->pc++); + rm = modrm & 7; + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + if (b1 >= 2) { + goto illegal_op; + } + + sse_op2 = sse_op_table7[b].op[b1]; + if (!sse_op2) + goto illegal_op; + if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask)) + goto illegal_op; + + if (sse_op2 == SSE_SPECIAL) { + ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; + rm = (modrm & 7) | REX_B(s); + if (mod != 3) + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + reg = ((modrm >> 3) & 7) | rex_r; + val = ldub_code(s->pc++); + switch (b) { + case 0x14: /* pextrb */ + tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_B(val & 15))); + if (mod == 3) + gen_op_mov_reg_T0(ot, rm); + else + tcg_gen_qemu_st8(cpu_T[0], cpu_A0, + (s->mem_index >> 2) - 1); + break; + case 0x15: /* pextrw */ + tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_W(val & 7))); + if (mod == 3) + gen_op_mov_reg_T0(ot, rm); + else + tcg_gen_qemu_st16(cpu_T[0], cpu_A0, + (s->mem_index >> 2) - 1); + break; + case 0x16: + if (ot == OT_LONG) { /* pextrd */ + tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, + offsetof(CPUX86State, + xmm_regs[reg].XMM_L(val & 3))); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + if (mod == 3) + gen_op_mov_reg_v(ot, rm, cpu_T[0]); + else + tcg_gen_qemu_st32(cpu_T[0], cpu_A0, + (s->mem_index >> 2) - 1); + } else { /* pextrq */ +#ifdef TARGET_X86_64 + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, + offsetof(CPUX86State, + xmm_regs[reg].XMM_Q(val & 1))); + if (mod == 3) + gen_op_mov_reg_v(ot, rm, cpu_tmp1_i64); + else + tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); +#else + goto illegal_op; +#endif + } + break; + case 0x17: /* extractps */ + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_L(val & 3))); + if (mod == 3) + gen_op_mov_reg_T0(ot, rm); + else + tcg_gen_qemu_st32(cpu_T[0], cpu_A0, + (s->mem_index >> 2) - 1); + break; + case 0x20: /* pinsrb */ + if (mod == 3) + gen_op_mov_TN_reg(OT_LONG, 0, rm); + else + tcg_gen_qemu_ld8u(cpu_tmp0, cpu_A0, + (s->mem_index >> 2) - 1); + tcg_gen_st8_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_B(val & 15))); + break; + case 0x21: /* insertps */ + if (mod == 3) { + tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, + offsetof(CPUX86State,xmm_regs[rm] + .XMM_L((val >> 6) & 3))); + } else { + tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0, + (s->mem_index >> 2) - 1); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0); + } + tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, + offsetof(CPUX86State,xmm_regs[reg] + .XMM_L((val >> 4) & 3))); + if ((val >> 0) & 1) + tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), + cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_L(0))); + if ((val >> 1) & 1) + tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), + cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_L(1))); + if ((val >> 2) & 1) + tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), + cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_L(2))); + if ((val >> 3) & 1) + tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), + cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_L(3))); + break; + case 0x22: + if (ot == OT_LONG) { /* pinsrd */ + if (mod == 3) + gen_op_mov_v_reg(ot, cpu_tmp0, rm); + else + tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0, + (s->mem_index >> 2) - 1); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0); + tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, + offsetof(CPUX86State, + xmm_regs[reg].XMM_L(val & 3))); + } else { /* pinsrq */ +#ifdef TARGET_X86_64 + if (mod == 3) + gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm); + else + tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, + offsetof(CPUX86State, + xmm_regs[reg].XMM_Q(val & 1))); +#else + goto illegal_op; +#endif + } + break; + } + return; + } + + if (b1) { + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + if (mod == 3) { + op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); + } else { + op2_offset = offsetof(CPUX86State,xmm_t0); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldo_env_A0(s->mem_index, op2_offset); + } + } else { + op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); + if (mod == 3) { + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); + } else { + op2_offset = offsetof(CPUX86State,mmx_t0); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, op2_offset); + } + } + val = ldub_code(s->pc++); + + if ((b & 0xfc) == 0x60) { /* pcmpXstrX */ + s->cc_op = CC_OP_EFLAGS; + + if (s->dflag == 2) + /* The helper must use entire 64-bit gp registers */ + val |= 1 << 8; + } + + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val)); + break; + default: + goto illegal_op; + } + } else { + /* generic MMX or SSE operation */ + switch(b) { + case 0x70: /* pshufx insn */ + case 0xc6: /* pshufx insn */ + case 0xc2: /* compare insns */ + s->rip_offset = 1; + break; + default: + break; + } + if (is_xmm) { + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + op2_offset = offsetof(CPUX86State,xmm_t0); + if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) || + b == 0xc2)) { + /* specific case for SSE single instructions */ + if (b1 == 2) { + /* 32 bit access */ + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0))); + } else { + /* 64 bit access */ + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_D(0))); + } + } else { + gen_ldo_env_A0(s->mem_index, op2_offset); + } + } else { + rm = (modrm & 7) | REX_B(s); + op2_offset = offsetof(CPUX86State,xmm_regs[rm]); + } + } else { + op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + op2_offset = offsetof(CPUX86State,mmx_t0); + gen_ldq_env_A0(s->mem_index, op2_offset); + } else { + rm = (modrm & 7); + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); + } + } + switch(b) { + case 0x0f: /* 3DNow! data insns */ + if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) + goto illegal_op; + val = ldub_code(s->pc++); + sse_op2 = sse_op_table5[val]; + if (!sse_op2) + goto illegal_op; + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1); + break; + case 0x70: /* pshufx insn */ + case 0xc6: /* pshufx insn */ + val = ldub_code(s->pc++); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val)); + break; + case 0xc2: + /* compare insns */ + val = ldub_code(s->pc++); + if (val >= 8) + goto illegal_op; + sse_op2 = sse_op_table4[val][b1]; + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1); + break; + case 0xf7: + /* maskmov : we must prepare A0 */ + if (mod != 3) + goto illegal_op; +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_movq_A0_reg(R_EDI); + } else +#endif + { + gen_op_movl_A0_reg(R_EDI); + if (s->aflag == 0) + gen_op_andl_A0_ffff(); + } + gen_add_A0_ds_seg(s); + + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + ((void (*)(TCGv_ptr, TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_ptr1, cpu_A0); + break; + default: + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1); + break; + } + if (b == 0x2e || b == 0x2f) { + s->cc_op = CC_OP_EFLAGS; + } + } +} + +#ifdef VBOX +/* Checks if it's an invalid lock sequence. Only a few instructions + can be used together with the lock prefix and of those only the + form that write a memory operand. So, this is kind of annoying + work to do... + The AMD manual lists the following instructions. + ADC + ADD + AND + BTC + BTR + BTS + CMPXCHG + CMPXCHG8B + CMPXCHG16B + DEC + INC + NEG + NOT + OR + SBB + SUB + XADD + XCHG + XOR */ +static bool is_invalid_lock_sequence(DisasContext *s, target_ulong pc_start, int b) +{ + target_ulong pc = s->pc; + int modrm, mod, op; + + /* X={8,16,32,64} Y={16,32,64} */ + switch (b) + { + /* /2: ADC reg/memX, immX */ + /* /0: ADD reg/memX, immX */ + /* /4: AND reg/memX, immX */ + /* /1: OR reg/memX, immX */ + /* /3: SBB reg/memX, immX */ + /* /5: SUB reg/memX, immX */ + /* /6: XOR reg/memX, immX */ + case 0x80: + case 0x81: + case 0x83: + modrm = ldub_code(pc++); + op = (modrm >> 3) & 7; + if (op == 7) /* /7: CMP */ + break; + mod = (modrm >> 6) & 3; + if (mod == 3) /* register destination */ + break; + return false; + + case 0x10: /* /r: ADC reg/mem8, reg8 */ + case 0x11: /* /r: ADC reg/memX, regY */ + case 0x00: /* /r: ADD reg/mem8, reg8 */ + case 0x01: /* /r: ADD reg/memX, regY */ + case 0x20: /* /r: AND reg/mem8, reg8 */ + case 0x21: /* /r: AND reg/memY, regY */ + case 0x08: /* /r: OR reg/mem8, reg8 */ + case 0x09: /* /r: OR reg/memY, regY */ + case 0x18: /* /r: SBB reg/mem8, reg8 */ + case 0x19: /* /r: SBB reg/memY, regY */ + case 0x28: /* /r: SUB reg/mem8, reg8 */ + case 0x29: /* /r: SUB reg/memY, regY */ + case 0x86: /* /r: XCHG reg/mem8, reg8 or XCHG reg8, reg/mem8 */ + case 0x87: /* /r: XCHG reg/memY, regY or XCHG regY, reg/memY */ + case 0x30: /* /r: XOR reg/mem8, reg8 */ + case 0x31: /* /r: XOR reg/memY, regY */ + modrm = ldub_code(pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) /* register destination */ + break; + return false; + + /* /1: DEC reg/memX */ + /* /0: INC reg/memX */ + case 0xfe: + case 0xff: + modrm = ldub_code(pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) /* register destination */ + break; + return false; + + /* /3: NEG reg/memX */ + /* /2: NOT reg/memX */ + case 0xf6: + case 0xf7: + modrm = ldub_code(pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) /* register destination */ + break; + return false; + + case 0x0f: + b = ldub_code(pc++); + switch (b) + { + /* /7: BTC reg/memY, imm8 */ + /* /6: BTR reg/memY, imm8 */ + /* /5: BTS reg/memY, imm8 */ + case 0xba: + modrm = ldub_code(pc++); + op = (modrm >> 3) & 7; + if (op < 5) + break; + mod = (modrm >> 6) & 3; + if (mod == 3) /* register destination */ + break; + return false; + + case 0xbb: /* /r: BTC reg/memY, regY */ + case 0xb3: /* /r: BTR reg/memY, regY */ + case 0xab: /* /r: BTS reg/memY, regY */ + case 0xb0: /* /r: CMPXCHG reg/mem8, reg8 */ + case 0xb1: /* /r: CMPXCHG reg/memY, regY */ + case 0xc0: /* /r: XADD reg/mem8, reg8 */ + case 0xc1: /* /r: XADD reg/memY, regY */ + modrm = ldub_code(pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) /* register destination */ + break; + return false; + + /* /1: CMPXCHG8B mem64 or CMPXCHG16B mem128 */ + case 0xc7: + modrm = ldub_code(pc++); + op = (modrm >> 3) & 7; + if (op != 1) + break; + return false; + } + break; + } + + /* illegal sequence. The s->pc is past the lock prefix and that + is sufficient for the TB, I think. */ + Log(("illegal lock sequence %RGv (b=%#x)\n", pc_start, b)); + return true; +} +#endif /* VBOX */ + +/* convert one instruction. s->is_jmp is set if the translation must + be stopped. Return the next pc value */ +static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) +{ + int b, prefixes, aflag, dflag; + int shift, ot; + int modrm, reg, rm, mod, reg_addr, op, opreg, offset_addr, val; + target_ulong next_eip, tval; + int rex_w, rex_r; + + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) + tcg_gen_debug_insn_start(pc_start); + s->pc = pc_start; + prefixes = 0; + aflag = s->code32; + dflag = s->code32; + s->override = -1; + rex_w = -1; + rex_r = 0; +#ifdef TARGET_X86_64 + s->rex_x = 0; + s->rex_b = 0; + x86_64_hregs = 0; +#endif + s->rip_offset = 0; /* for relative ip address */ +#ifdef VBOX + /* nike: seems only slow down things */ +# if 0 + /* Always update EIP. Otherwise one must be very careful with generated code that can raise exceptions. */ + + gen_update_eip(pc_start - s->cs_base); +# endif +#endif /* VBOX */ + + next_byte: + b = ldub_code(s->pc); + s->pc++; + /* check prefixes */ +#ifdef TARGET_X86_64 + if (CODE64(s)) { + switch (b) { + case 0xf3: + prefixes |= PREFIX_REPZ; + goto next_byte; + case 0xf2: + prefixes |= PREFIX_REPNZ; + goto next_byte; + case 0xf0: + prefixes |= PREFIX_LOCK; + goto next_byte; + case 0x2e: + s->override = R_CS; + goto next_byte; + case 0x36: + s->override = R_SS; + goto next_byte; + case 0x3e: + s->override = R_DS; + goto next_byte; + case 0x26: + s->override = R_ES; + goto next_byte; + case 0x64: + s->override = R_FS; + goto next_byte; + case 0x65: + s->override = R_GS; + goto next_byte; + case 0x66: + prefixes |= PREFIX_DATA; + goto next_byte; + case 0x67: + prefixes |= PREFIX_ADR; + goto next_byte; + case 0x40 ... 0x4f: + /* REX prefix */ + rex_w = (b >> 3) & 1; + rex_r = (b & 0x4) << 1; + s->rex_x = (b & 0x2) << 2; + REX_B(s) = (b & 0x1) << 3; + x86_64_hregs = 1; /* select uniform byte register addressing */ + goto next_byte; + } + if (rex_w == 1) { + /* 0x66 is ignored if rex.w is set */ + dflag = 2; + } else { + if (prefixes & PREFIX_DATA) + dflag ^= 1; + } + if (!(prefixes & PREFIX_ADR)) + aflag = 2; + } else +#endif + { + switch (b) { + case 0xf3: + prefixes |= PREFIX_REPZ; + goto next_byte; + case 0xf2: + prefixes |= PREFIX_REPNZ; + goto next_byte; + case 0xf0: + prefixes |= PREFIX_LOCK; + goto next_byte; + case 0x2e: + s->override = R_CS; + goto next_byte; + case 0x36: + s->override = R_SS; + goto next_byte; + case 0x3e: + s->override = R_DS; + goto next_byte; + case 0x26: + s->override = R_ES; + goto next_byte; + case 0x64: + s->override = R_FS; + goto next_byte; + case 0x65: + s->override = R_GS; + goto next_byte; + case 0x66: + prefixes |= PREFIX_DATA; + goto next_byte; + case 0x67: + prefixes |= PREFIX_ADR; + goto next_byte; + } + if (prefixes & PREFIX_DATA) + dflag ^= 1; + if (prefixes & PREFIX_ADR) + aflag ^= 1; + } + + s->prefix = prefixes; + s->aflag = aflag; + s->dflag = dflag; + + /* lock generation */ +#ifndef VBOX + if (prefixes & PREFIX_LOCK) + gen_helper_lock(); +#else /* VBOX */ + if (prefixes & PREFIX_LOCK) { + if (is_invalid_lock_sequence(s, pc_start, b)) { + gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base); + return s->pc; + } + gen_helper_lock(); + } +#endif /* VBOX */ + + /* now check op code */ + reswitch: + switch(b) { + case 0x0f: + /**************************/ + /* extended op code */ + b = ldub_code(s->pc++) | 0x100; + goto reswitch; + + /**************************/ + /* arith & logic */ + case 0x00 ... 0x05: + case 0x08 ... 0x0d: + case 0x10 ... 0x15: + case 0x18 ... 0x1d: + case 0x20 ... 0x25: + case 0x28 ... 0x2d: + case 0x30 ... 0x35: + case 0x38 ... 0x3d: + { + int op, f, val; + op = (b >> 3) & 7; + f = (b >> 1) & 3; + + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + switch(f) { + case 0: /* OP Ev, Gv */ + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + opreg = OR_TMP0; + } else if (op == OP_XORL && rm == reg) { + xor_zero: + /* xor reg, reg optimisation */ + gen_op_movl_T0_0(); + s->cc_op = CC_OP_LOGICB + ot; + gen_op_mov_reg_T0(ot, reg); + gen_op_update1_cc(); + break; + } else { + opreg = rm; + } + gen_op_mov_TN_reg(ot, 1, reg); + gen_op(s, op, ot, opreg); + break; + case 1: /* OP Gv, Ev */ + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + reg = ((modrm >> 3) & 7) | rex_r; + rm = (modrm & 7) | REX_B(s); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T1_A0(ot + s->mem_index); + } else if (op == OP_XORL && rm == reg) { + goto xor_zero; + } else { + gen_op_mov_TN_reg(ot, 1, rm); + } + gen_op(s, op, ot, reg); + break; + case 2: /* OP A, Iv */ + val = insn_get(s, ot); + gen_op_movl_T1_im(val); + gen_op(s, op, ot, OR_EAX); + break; + } + } + break; + + case 0x82: + if (CODE64(s)) + goto illegal_op; + case 0x80: /* GRP1 */ + case 0x81: + case 0x83: + { + int val; + + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + op = (modrm >> 3) & 7; + + if (mod != 3) { + if (b == 0x83) + s->rip_offset = 1; + else + s->rip_offset = insn_const_size(ot); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + opreg = OR_TMP0; + } else { + opreg = rm; + } + + switch(b) { + default: + case 0x80: + case 0x81: + case 0x82: + val = insn_get(s, ot); + break; + case 0x83: + val = (int8_t)insn_get(s, OT_BYTE); + break; + } + gen_op_movl_T1_im(val); + gen_op(s, op, ot, opreg); + } + break; + + /**************************/ + /* inc, dec, and other misc arith */ + case 0x40 ... 0x47: /* inc Gv */ + ot = dflag ? OT_LONG : OT_WORD; + gen_inc(s, ot, OR_EAX + (b & 7), 1); + break; + case 0x48 ... 0x4f: /* dec Gv */ + ot = dflag ? OT_LONG : OT_WORD; + gen_inc(s, ot, OR_EAX + (b & 7), -1); + break; + case 0xf6: /* GRP3 */ + case 0xf7: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + op = (modrm >> 3) & 7; + if (mod != 3) { + if (op == 0) + s->rip_offset = insn_const_size(ot); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T0_A0(ot + s->mem_index); + } else { + gen_op_mov_TN_reg(ot, 0, rm); + } + + switch(op) { + case 0: /* test */ + val = insn_get(s, ot); + gen_op_movl_T1_im(val); + gen_op_testl_T0_T1_cc(); + s->cc_op = CC_OP_LOGICB + ot; + break; + case 2: /* not */ + tcg_gen_not_tl(cpu_T[0], cpu_T[0]); + if (mod != 3) { + gen_op_st_T0_A0(ot + s->mem_index); + } else { + gen_op_mov_reg_T0(ot, rm); + } + break; + case 3: /* neg */ + tcg_gen_neg_tl(cpu_T[0], cpu_T[0]); + if (mod != 3) { + gen_op_st_T0_A0(ot + s->mem_index); + } else { + gen_op_mov_reg_T0(ot, rm); + } + gen_op_update_neg_cc(); + s->cc_op = CC_OP_SUBB + ot; + break; + case 4: /* mul */ + switch(ot) { + case OT_BYTE: + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX); + tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00); + s->cc_op = CC_OP_MULB; + break; + case OT_WORD: + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); + tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); + gen_op_mov_reg_T0(OT_WORD, R_EDX); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); + s->cc_op = CC_OP_MULW; + break; + default: + case OT_LONG: +#ifdef TARGET_X86_64 + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); + tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]); + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_LONG, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32); + gen_op_mov_reg_T0(OT_LONG, R_EDX); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); +#else + { + TCGv_i64 t0, t1; + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); + tcg_gen_extu_i32_i64(t0, cpu_T[0]); + tcg_gen_extu_i32_i64(t1, cpu_T[1]); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + gen_op_mov_reg_T0(OT_LONG, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_shri_i64(t0, t0, 32); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + gen_op_mov_reg_T0(OT_LONG, R_EDX); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); + } +#endif + s->cc_op = CC_OP_MULL; + break; +#ifdef TARGET_X86_64 + case OT_QUAD: + gen_helper_mulq_EAX_T0(cpu_T[0]); + s->cc_op = CC_OP_MULQ; + break; +#endif + } + break; + case 5: /* imul */ + switch(ot) { + case OT_BYTE: + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX); + tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); + s->cc_op = CC_OP_MULB; + break; + case OT_WORD: + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); + gen_op_mov_reg_T0(OT_WORD, R_EDX); + s->cc_op = CC_OP_MULW; + break; + default: + case OT_LONG: +#ifdef TARGET_X86_64 + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]); + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_LONG, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32); + gen_op_mov_reg_T0(OT_LONG, R_EDX); +#else + { + TCGv_i64 t0, t1; + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); + tcg_gen_ext_i32_i64(t0, cpu_T[0]); + tcg_gen_ext_i32_i64(t1, cpu_T[1]); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + gen_op_mov_reg_T0(OT_LONG, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31); + tcg_gen_shri_i64(t0, t0, 32); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + gen_op_mov_reg_T0(OT_LONG, R_EDX); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); + } +#endif + s->cc_op = CC_OP_MULL; + break; +#ifdef TARGET_X86_64 + case OT_QUAD: + gen_helper_imulq_EAX_T0(cpu_T[0]); + s->cc_op = CC_OP_MULQ; + break; +#endif + } + break; + case 6: /* div */ + switch(ot) { + case OT_BYTE: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_divb_AL(cpu_T[0]); + break; + case OT_WORD: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_divw_AX(cpu_T[0]); + break; + default: + case OT_LONG: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_divl_EAX(cpu_T[0]); + break; +#ifdef TARGET_X86_64 + case OT_QUAD: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_divq_EAX(cpu_T[0]); + break; +#endif + } + break; + case 7: /* idiv */ + switch(ot) { + case OT_BYTE: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_idivb_AL(cpu_T[0]); + break; + case OT_WORD: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_idivw_AX(cpu_T[0]); + break; + default: + case OT_LONG: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_idivl_EAX(cpu_T[0]); + break; +#ifdef TARGET_X86_64 + case OT_QUAD: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_idivq_EAX(cpu_T[0]); + break; +#endif + } + break; + default: + goto illegal_op; + } + break; + + case 0xfe: /* GRP4 */ + case 0xff: /* GRP5 */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + op = (modrm >> 3) & 7; + if (op >= 2 && b == 0xfe) { + goto illegal_op; + } + if (CODE64(s)) { + if (op == 2 || op == 4) { + /* operand size for jumps is 64 bit */ + ot = OT_QUAD; + } else if (op == 3 || op == 5) { + ot = dflag ? OT_LONG + (rex_w == 1) : OT_WORD; + } else if (op == 6) { + /* default push size is 64 bit */ + ot = dflag ? OT_QUAD : OT_WORD; + } + } + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (op >= 2 && op != 3 && op != 5) + gen_op_ld_T0_A0(ot + s->mem_index); + } else { + gen_op_mov_TN_reg(ot, 0, rm); + } + + switch(op) { + case 0: /* inc Ev */ + if (mod != 3) + opreg = OR_TMP0; + else + opreg = rm; + gen_inc(s, ot, opreg, 1); + break; + case 1: /* dec Ev */ + if (mod != 3) + opreg = OR_TMP0; + else + opreg = rm; + gen_inc(s, ot, opreg, -1); + break; + case 2: /* call Ev */ + /* XXX: optimize if memory (no 'and' is necessary) */ +#ifdef VBOX_WITH_CALL_RECORD + if (s->record_call) + gen_op_record_call(); +#endif + if (s->dflag == 0) + gen_op_andl_T0_ffff(); + next_eip = s->pc - s->cs_base; + gen_movtl_T1_im(next_eip); + gen_push_T1(s); + gen_op_jmp_T0(); + gen_eob(s); + break; + case 3: /* lcall Ev */ + gen_op_ld_T1_A0(ot + s->mem_index); + gen_add_A0_im(s, 1 << (ot - OT_WORD + 1)); + gen_op_ldu_T0_A0(OT_WORD + s->mem_index); + do_lcall: + if (s->pe && !s->vm86) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_lcall_protected(cpu_tmp2_i32, cpu_T[1], + tcg_const_i32(dflag), + tcg_const_i32(s->pc - pc_start)); + } else { + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_lcall_real(cpu_tmp2_i32, cpu_T[1], + tcg_const_i32(dflag), + tcg_const_i32(s->pc - s->cs_base)); + } + gen_eob(s); + break; + case 4: /* jmp Ev */ + if (s->dflag == 0) + gen_op_andl_T0_ffff(); + gen_op_jmp_T0(); + gen_eob(s); + break; + case 5: /* ljmp Ev */ + gen_op_ld_T1_A0(ot + s->mem_index); + gen_add_A0_im(s, 1 << (ot - OT_WORD + 1)); + gen_op_ldu_T0_A0(OT_WORD + s->mem_index); + do_ljmp: + if (s->pe && !s->vm86) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_ljmp_protected(cpu_tmp2_i32, cpu_T[1], + tcg_const_i32(s->pc - pc_start)); + } else { + gen_op_movl_seg_T0_vm(R_CS); + gen_op_movl_T0_T1(); + gen_op_jmp_T0(); + } + gen_eob(s); + break; + case 6: /* push Ev */ + gen_push_T0(s); + break; + default: + goto illegal_op; + } + break; + + case 0x84: /* test Ev, Gv */ + case 0x85: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + gen_op_mov_TN_reg(ot, 1, reg); + gen_op_testl_T0_T1_cc(); + s->cc_op = CC_OP_LOGICB + ot; + break; + + case 0xa8: /* test eAX, Iv */ + case 0xa9: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + val = insn_get(s, ot); + + gen_op_mov_TN_reg(ot, 0, OR_EAX); + gen_op_movl_T1_im(val); + gen_op_testl_T0_T1_cc(); + s->cc_op = CC_OP_LOGICB + ot; + break; + + case 0x98: /* CWDE/CBW */ +#ifdef TARGET_X86_64 + if (dflag == 2) { + gen_op_mov_TN_reg(OT_LONG, 0, R_EAX); + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); + gen_op_mov_reg_T0(OT_QUAD, R_EAX); + } else +#endif + if (dflag == 1) { + gen_op_mov_TN_reg(OT_WORD, 0, R_EAX); + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); + gen_op_mov_reg_T0(OT_LONG, R_EAX); + } else { + gen_op_mov_TN_reg(OT_BYTE, 0, R_EAX); + tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + } + break; + case 0x99: /* CDQ/CWD */ +#ifdef TARGET_X86_64 + if (dflag == 2) { + gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX); + tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 63); + gen_op_mov_reg_T0(OT_QUAD, R_EDX); + } else +#endif + if (dflag == 1) { + gen_op_mov_TN_reg(OT_LONG, 0, R_EAX); + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 31); + gen_op_mov_reg_T0(OT_LONG, R_EDX); + } else { + gen_op_mov_TN_reg(OT_WORD, 0, R_EAX); + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 15); + gen_op_mov_reg_T0(OT_WORD, R_EDX); + } + break; + case 0x1af: /* imul Gv, Ev */ + case 0x69: /* imul Gv, Ev, I */ + case 0x6b: + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + if (b == 0x69) + s->rip_offset = insn_const_size(ot); + else if (b == 0x6b) + s->rip_offset = 1; + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + if (b == 0x69) { + val = insn_get(s, ot); + gen_op_movl_T1_im(val); + } else if (b == 0x6b) { + val = (int8_t)insn_get(s, OT_BYTE); + gen_op_movl_T1_im(val); + } else { + gen_op_mov_TN_reg(ot, 1, reg); + } + +#ifdef TARGET_X86_64 + if (ot == OT_QUAD) { + gen_helper_imulq_T0_T1(cpu_T[0], cpu_T[0], cpu_T[1]); + } else +#endif + if (ot == OT_LONG) { +#ifdef TARGET_X86_64 + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]); + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); +#else + { + TCGv_i64 t0, t1; + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_ext_i32_i64(t0, cpu_T[0]); + tcg_gen_ext_i32_i64(t1, cpu_T[1]); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31); + tcg_gen_shri_i64(t0, t0, 32); + tcg_gen_trunc_i64_i32(cpu_T[1], t0); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0); + } +#endif + } else { + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); + } + gen_op_mov_reg_T0(ot, reg); + s->cc_op = CC_OP_MULB + ot; + break; + case 0x1c0: + case 0x1c1: /* xadd Ev, Gv */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + if (mod == 3) { + rm = (modrm & 7) | REX_B(s); + gen_op_mov_TN_reg(ot, 0, reg); + gen_op_mov_TN_reg(ot, 1, rm); + gen_op_addl_T0_T1(); + gen_op_mov_reg_T1(ot, reg); + gen_op_mov_reg_T0(ot, rm); + } else { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_mov_TN_reg(ot, 0, reg); + gen_op_ld_T1_A0(ot + s->mem_index); + gen_op_addl_T0_T1(); + gen_op_st_T0_A0(ot + s->mem_index); + gen_op_mov_reg_T1(ot, reg); + } + gen_op_update2_cc(); + s->cc_op = CC_OP_ADDB + ot; + break; + case 0x1b0: + case 0x1b1: /* cmpxchg Ev, Gv */ + { + int label1, label2; + TCGv t0, t1, t2, a0; + + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + t0 = tcg_temp_local_new(); + t1 = tcg_temp_local_new(); + t2 = tcg_temp_local_new(); + a0 = tcg_temp_local_new(); + gen_op_mov_v_reg(ot, t1, reg); + if (mod == 3) { + rm = (modrm & 7) | REX_B(s); + gen_op_mov_v_reg(ot, t0, rm); + } else { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + tcg_gen_mov_tl(a0, cpu_A0); + gen_op_ld_v(ot + s->mem_index, t0, a0); + rm = 0; /* avoid warning */ + } + label1 = gen_new_label(); + tcg_gen_sub_tl(t2, cpu_regs[R_EAX], t0); + gen_extu(ot, t2); + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1); + if (mod == 3) { + label2 = gen_new_label(); + gen_op_mov_reg_v(ot, R_EAX, t0); + tcg_gen_br(label2); + gen_set_label(label1); + gen_op_mov_reg_v(ot, rm, t1); + gen_set_label(label2); + } else { + tcg_gen_mov_tl(t1, t0); + gen_op_mov_reg_v(ot, R_EAX, t0); + gen_set_label(label1); + /* always store */ + gen_op_st_v(ot + s->mem_index, t1, a0); + } + tcg_gen_mov_tl(cpu_cc_src, t0); + tcg_gen_mov_tl(cpu_cc_dst, t2); + s->cc_op = CC_OP_SUBB + ot; + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); + tcg_temp_free(a0); + } + break; + case 0x1c7: /* cmpxchg8b */ + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + if ((mod == 3) || ((modrm & 0x38) != 0x8)) + goto illegal_op; +#ifdef TARGET_X86_64 + if (dflag == 2) { + if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) + goto illegal_op; + gen_jmp_im(pc_start - s->cs_base); + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_helper_cmpxchg16b(cpu_A0); + } else +#endif + { + if (!(s->cpuid_features & CPUID_CX8)) + goto illegal_op; + gen_jmp_im(pc_start - s->cs_base); + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_helper_cmpxchg8b(cpu_A0); + } + s->cc_op = CC_OP_EFLAGS; + break; + + /**************************/ + /* push/pop */ + case 0x50 ... 0x57: /* push */ + gen_op_mov_TN_reg(OT_LONG, 0, (b & 7) | REX_B(s)); + gen_push_T0(s); + break; + case 0x58 ... 0x5f: /* pop */ + if (CODE64(s)) { + ot = dflag ? OT_QUAD : OT_WORD; + } else { + ot = dflag + OT_WORD; + } + gen_pop_T0(s); + /* NOTE: order is important for pop %sp */ + gen_pop_update(s); + gen_op_mov_reg_T0(ot, (b & 7) | REX_B(s)); + break; + case 0x60: /* pusha */ + if (CODE64(s)) + goto illegal_op; + gen_pusha(s); + break; + case 0x61: /* popa */ + if (CODE64(s)) + goto illegal_op; + gen_popa(s); + break; + case 0x68: /* push Iv */ + case 0x6a: + if (CODE64(s)) { + ot = dflag ? OT_QUAD : OT_WORD; + } else { + ot = dflag + OT_WORD; + } + if (b == 0x68) + val = insn_get(s, ot); + else + val = (int8_t)insn_get(s, OT_BYTE); + gen_op_movl_T0_im(val); + gen_push_T0(s); + break; + case 0x8f: /* pop Ev */ + if (CODE64(s)) { + ot = dflag ? OT_QUAD : OT_WORD; + } else { + ot = dflag + OT_WORD; + } + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + gen_pop_T0(s); + if (mod == 3) { + /* NOTE: order is important for pop %sp */ + gen_pop_update(s); + rm = (modrm & 7) | REX_B(s); + gen_op_mov_reg_T0(ot, rm); + } else { + /* NOTE: order is important too for MMU exceptions */ + s->popl_esp_hack = 1 << ot; + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1); + s->popl_esp_hack = 0; + gen_pop_update(s); + } + break; + case 0xc8: /* enter */ + { + int level; + val = lduw_code(s->pc); + s->pc += 2; + level = ldub_code(s->pc++); + gen_enter(s, val, level); + } + break; + case 0xc9: /* leave */ + /* XXX: exception not precise (ESP is updated before potential exception) */ + if (CODE64(s)) { + gen_op_mov_TN_reg(OT_QUAD, 0, R_EBP); + gen_op_mov_reg_T0(OT_QUAD, R_ESP); + } else if (s->ss32) { + gen_op_mov_TN_reg(OT_LONG, 0, R_EBP); + gen_op_mov_reg_T0(OT_LONG, R_ESP); + } else { + gen_op_mov_TN_reg(OT_WORD, 0, R_EBP); + gen_op_mov_reg_T0(OT_WORD, R_ESP); + } + gen_pop_T0(s); + if (CODE64(s)) { + ot = dflag ? OT_QUAD : OT_WORD; + } else { + ot = dflag + OT_WORD; + } + gen_op_mov_reg_T0(ot, R_EBP); + gen_pop_update(s); + break; + case 0x06: /* push es */ + case 0x0e: /* push cs */ + case 0x16: /* push ss */ + case 0x1e: /* push ds */ + if (CODE64(s)) + goto illegal_op; + gen_op_movl_T0_seg(b >> 3); + gen_push_T0(s); + break; + case 0x1a0: /* push fs */ + case 0x1a8: /* push gs */ + gen_op_movl_T0_seg((b >> 3) & 7); + gen_push_T0(s); + break; + case 0x07: /* pop es */ + case 0x17: /* pop ss */ + case 0x1f: /* pop ds */ + if (CODE64(s)) + goto illegal_op; + reg = b >> 3; + gen_pop_T0(s); + gen_movl_seg_T0(s, reg, pc_start - s->cs_base); + gen_pop_update(s); + if (reg == R_SS) { + /* if reg == SS, inhibit interrupts/trace. */ + /* If several instructions disable interrupts, only the + _first_ does it */ + if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK)) + gen_helper_set_inhibit_irq(); + s->tf = 0; + } + if (s->is_jmp) { + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + break; + case 0x1a1: /* pop fs */ + case 0x1a9: /* pop gs */ + gen_pop_T0(s); + gen_movl_seg_T0(s, (b >> 3) & 7, pc_start - s->cs_base); + gen_pop_update(s); + if (s->is_jmp) { + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + break; + + /**************************/ + /* mov */ + case 0x88: + case 0x89: /* mov Gv, Ev */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + + /* generate a generic store */ + gen_ldst_modrm(s, modrm, ot, reg, 1); + break; + case 0xc6: + case 0xc7: /* mov Ev, Iv */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + if (mod != 3) { + s->rip_offset = insn_const_size(ot); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + } + val = insn_get(s, ot); + gen_op_movl_T0_im(val); + if (mod != 3) + gen_op_st_T0_A0(ot + s->mem_index); + else + gen_op_mov_reg_T0(ot, (modrm & 7) | REX_B(s)); + break; + case 0x8a: + case 0x8b: /* mov Ev, Gv */ +#ifdef VBOX /* dtrace hot fix */ + if (prefixes & PREFIX_LOCK) + goto illegal_op; +#endif + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = OT_WORD + dflag; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + gen_op_mov_reg_T0(ot, reg); + break; + case 0x8e: /* mov seg, Gv */ + modrm = ldub_code(s->pc++); + reg = (modrm >> 3) & 7; + if (reg >= 6 || reg == R_CS) + goto illegal_op; + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); + gen_movl_seg_T0(s, reg, pc_start - s->cs_base); + if (reg == R_SS) { + /* if reg == SS, inhibit interrupts/trace */ + /* If several instructions disable interrupts, only the + _first_ does it */ + if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK)) + gen_helper_set_inhibit_irq(); + s->tf = 0; + } + if (s->is_jmp) { + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + break; + case 0x8c: /* mov Gv, seg */ + modrm = ldub_code(s->pc++); + reg = (modrm >> 3) & 7; + mod = (modrm >> 6) & 3; + if (reg >= 6) + goto illegal_op; + gen_op_movl_T0_seg(reg); + if (mod == 3) + ot = OT_WORD + dflag; + else + ot = OT_WORD; + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1); + break; + + case 0x1b6: /* movzbS Gv, Eb */ + case 0x1b7: /* movzwS Gv, Eb */ + case 0x1be: /* movsbS Gv, Eb */ + case 0x1bf: /* movswS Gv, Eb */ + { + int d_ot; + /* d_ot is the size of destination */ + d_ot = dflag + OT_WORD; + /* ot is the size of source */ + ot = (b & 1) + OT_BYTE; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + + if (mod == 3) { + gen_op_mov_TN_reg(ot, 0, rm); + switch(ot | (b & 8)) { + case OT_BYTE: + tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]); + break; + case OT_BYTE | 8: + tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]); + break; + case OT_WORD: + tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]); + break; + default: + case OT_WORD | 8: + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); + break; + } + gen_op_mov_reg_T0(d_ot, reg); + } else { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (b & 8) { + gen_op_lds_T0_A0(ot + s->mem_index); + } else { + gen_op_ldu_T0_A0(ot + s->mem_index); + } + gen_op_mov_reg_T0(d_ot, reg); + } + } + break; + + case 0x8d: /* lea */ + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) + goto illegal_op; + reg = ((modrm >> 3) & 7) | rex_r; + /* we must ensure that no segment is added */ + s->override = -1; + val = s->addseg; + s->addseg = 0; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + s->addseg = val; + gen_op_mov_reg_A0(ot - OT_WORD, reg); + break; + + case 0xa0: /* mov EAX, Ov */ + case 0xa1: + case 0xa2: /* mov Ov, EAX */ + case 0xa3: + { + target_ulong offset_addr; + + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + offset_addr = ldq_code(s->pc); + s->pc += 8; + gen_op_movq_A0_im(offset_addr); + } else +#endif + { + if (s->aflag) { + offset_addr = insn_get(s, OT_LONG); + } else { + offset_addr = insn_get(s, OT_WORD); + } + gen_op_movl_A0_im(offset_addr); + } + gen_add_A0_ds_seg(s); + if ((b & 2) == 0) { + gen_op_ld_T0_A0(ot + s->mem_index); + gen_op_mov_reg_T0(ot, R_EAX); + } else { + gen_op_mov_TN_reg(ot, 0, R_EAX); + gen_op_st_T0_A0(ot + s->mem_index); + } + } + break; + case 0xd7: /* xlat */ +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_movq_A0_reg(R_EBX); + gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xff); + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]); + } else +#endif + { + gen_op_movl_A0_reg(R_EBX); + gen_op_mov_TN_reg(OT_LONG, 0, R_EAX); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xff); + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]); + if (s->aflag == 0) + gen_op_andl_A0_ffff(); + else + tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff); + } + gen_add_A0_ds_seg(s); + gen_op_ldu_T0_A0(OT_BYTE + s->mem_index); + gen_op_mov_reg_T0(OT_BYTE, R_EAX); + break; + case 0xb0 ... 0xb7: /* mov R, Ib */ + val = insn_get(s, OT_BYTE); + gen_op_movl_T0_im(val); + gen_op_mov_reg_T0(OT_BYTE, (b & 7) | REX_B(s)); + break; + case 0xb8 ... 0xbf: /* mov R, Iv */ +#ifdef TARGET_X86_64 + if (dflag == 2) { + uint64_t tmp; + /* 64 bit case */ + tmp = ldq_code(s->pc); + s->pc += 8; + reg = (b & 7) | REX_B(s); + gen_movtl_T0_im(tmp); + gen_op_mov_reg_T0(OT_QUAD, reg); + } else +#endif + { + ot = dflag ? OT_LONG : OT_WORD; + val = insn_get(s, ot); + reg = (b & 7) | REX_B(s); + gen_op_movl_T0_im(val); + gen_op_mov_reg_T0(ot, reg); + } + break; + + case 0x91 ... 0x97: /* xchg R, EAX */ + do_xchg_reg_eax: + ot = dflag + OT_WORD; + reg = (b & 7) | REX_B(s); + rm = R_EAX; + goto do_xchg_reg; + case 0x86: + case 0x87: /* xchg Ev, Gv */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + if (mod == 3) { + rm = (modrm & 7) | REX_B(s); + do_xchg_reg: + gen_op_mov_TN_reg(ot, 0, reg); + gen_op_mov_TN_reg(ot, 1, rm); + gen_op_mov_reg_T0(ot, rm); + gen_op_mov_reg_T1(ot, reg); + } else { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_mov_TN_reg(ot, 0, reg); + /* for xchg, lock is implicit */ + if (!(prefixes & PREFIX_LOCK)) + gen_helper_lock(); + gen_op_ld_T1_A0(ot + s->mem_index); + gen_op_st_T0_A0(ot + s->mem_index); + if (!(prefixes & PREFIX_LOCK)) + gen_helper_unlock(); + gen_op_mov_reg_T1(ot, reg); + } + break; + case 0xc4: /* les Gv */ + if (CODE64(s)) + goto illegal_op; + op = R_ES; + goto do_lxx; + case 0xc5: /* lds Gv */ + if (CODE64(s)) + goto illegal_op; + op = R_DS; + goto do_lxx; + case 0x1b2: /* lss Gv */ + op = R_SS; + goto do_lxx; + case 0x1b4: /* lfs Gv */ + op = R_FS; + goto do_lxx; + case 0x1b5: /* lgs Gv */ + op = R_GS; + do_lxx: + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + if (mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T1_A0(ot + s->mem_index); + gen_add_A0_im(s, 1 << (ot - OT_WORD + 1)); + /* load the segment first to handle exceptions properly */ + gen_op_ldu_T0_A0(OT_WORD + s->mem_index); + gen_movl_seg_T0(s, op, pc_start - s->cs_base); + /* then put the data */ + gen_op_mov_reg_T1(ot, reg); + if (s->is_jmp) { + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + break; + + /************************/ + /* shifts */ + case 0xc0: + case 0xc1: + /* shift Ev,Ib */ + shift = 2; + grp2: + { + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + op = (modrm >> 3) & 7; + + if (mod != 3) { + if (shift == 2) { + s->rip_offset = 1; + } + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + opreg = OR_TMP0; + } else { + opreg = (modrm & 7) | REX_B(s); + } + + /* simpler op */ + if (shift == 0) { + gen_shift(s, op, ot, opreg, OR_ECX); + } else { + if (shift == 2) { + shift = ldub_code(s->pc++); + } + gen_shifti(s, op, ot, opreg, shift); + } + } + break; + case 0xd0: + case 0xd1: + /* shift Ev,1 */ + shift = 1; + goto grp2; + case 0xd2: + case 0xd3: + /* shift Ev,cl */ + shift = 0; + goto grp2; + + case 0x1a4: /* shld imm */ + op = 0; + shift = 1; + goto do_shiftd; + case 0x1a5: /* shld cl */ + op = 0; + shift = 0; + goto do_shiftd; + case 0x1ac: /* shrd imm */ + op = 1; + shift = 1; + goto do_shiftd; + case 0x1ad: /* shrd cl */ + op = 1; + shift = 0; + do_shiftd: + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + reg = ((modrm >> 3) & 7) | rex_r; + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + opreg = OR_TMP0; + } else { + opreg = rm; + } + gen_op_mov_TN_reg(ot, 1, reg); + + if (shift) { + val = ldub_code(s->pc++); + tcg_gen_movi_tl(cpu_T3, val); + } else { + tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]); + } + gen_shiftd_rm_T1_T3(s, ot, opreg, op); + break; + + /************************/ + /* floats */ + case 0xd8 ... 0xdf: + if (s->flags & (HF_EM_MASK | HF_TS_MASK)) { + /* if CR0.EM or CR0.TS are set, generate an FPU exception */ + /* XXX: what to do if illegal op ? */ + gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + break; + } + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + rm = modrm & 7; + op = ((b & 7) << 3) | ((modrm >> 3) & 7); + if (mod != 3) { + /* memory op */ + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + switch(op) { + case 0x00 ... 0x07: /* fxxxs */ + case 0x10 ... 0x17: /* fixxxl */ + case 0x20 ... 0x27: /* fxxxl */ + case 0x30 ... 0x37: /* fixxx */ + { + int op1; + op1 = op & 7; + + switch(op >> 4) { + case 0: + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_flds_FT0(cpu_tmp2_i32); + break; + case 1: + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_fildl_FT0(cpu_tmp2_i32); + break; + case 2: + tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); + gen_helper_fldl_FT0(cpu_tmp1_i64); + break; + case 3: + default: + gen_op_lds_T0_A0(OT_WORD + s->mem_index); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_fildl_FT0(cpu_tmp2_i32); + break; + } + + gen_helper_fp_arith_ST0_FT0(op1); + if (op1 == 3) { + /* fcomp needs pop */ + gen_helper_fpop(); + } + } + break; + case 0x08: /* flds */ + case 0x0a: /* fsts */ + case 0x0b: /* fstps */ + case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */ + case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */ + case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */ + switch(op & 7) { + case 0: + switch(op >> 4) { + case 0: + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_flds_ST0(cpu_tmp2_i32); + break; + case 1: + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_fildl_ST0(cpu_tmp2_i32); + break; + case 2: + tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); + gen_helper_fldl_ST0(cpu_tmp1_i64); + break; + case 3: + default: + gen_op_lds_T0_A0(OT_WORD + s->mem_index); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_fildl_ST0(cpu_tmp2_i32); + break; + } + break; + case 1: + /* XXX: the corresponding CPUID bit must be tested ! */ + switch(op >> 4) { + case 1: + gen_helper_fisttl_ST0(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(OT_LONG + s->mem_index); + break; + case 2: + gen_helper_fisttll_ST0(cpu_tmp1_i64); + tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); + break; + case 3: + default: + gen_helper_fistt_ST0(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + break; + } + gen_helper_fpop(); + break; + default: + switch(op >> 4) { + case 0: + gen_helper_fsts_ST0(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(OT_LONG + s->mem_index); + break; + case 1: + gen_helper_fistl_ST0(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(OT_LONG + s->mem_index); + break; + case 2: + gen_helper_fstl_ST0(cpu_tmp1_i64); + tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); + break; + case 3: + default: + gen_helper_fist_ST0(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + break; + } + if ((op & 7) == 3) + gen_helper_fpop(); + break; + } + break; + case 0x0c: /* fldenv mem */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fldenv( + cpu_A0, tcg_const_i32(s->dflag)); + break; + case 0x0d: /* fldcw mem */ + gen_op_ld_T0_A0(OT_WORD + s->mem_index); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_fldcw(cpu_tmp2_i32); + break; + case 0x0e: /* fnstenv mem */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fstenv(cpu_A0, tcg_const_i32(s->dflag)); + break; + case 0x0f: /* fnstcw mem */ + gen_helper_fnstcw(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + break; + case 0x1d: /* fldt mem */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fldt_ST0(cpu_A0); + break; + case 0x1f: /* fstpt mem */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fstt_ST0(cpu_A0); + gen_helper_fpop(); + break; + case 0x2c: /* frstor mem */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_frstor(cpu_A0, tcg_const_i32(s->dflag)); + break; + case 0x2e: /* fnsave mem */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fsave(cpu_A0, tcg_const_i32(s->dflag)); + break; + case 0x2f: /* fnstsw mem */ + gen_helper_fnstsw(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + break; + case 0x3c: /* fbld */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fbld_ST0(cpu_A0); + break; + case 0x3e: /* fbstp */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fbst_ST0(cpu_A0); + gen_helper_fpop(); + break; + case 0x3d: /* fildll */ + tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); + gen_helper_fildll_ST0(cpu_tmp1_i64); + break; + case 0x3f: /* fistpll */ + gen_helper_fistll_ST0(cpu_tmp1_i64); + tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); + gen_helper_fpop(); + break; + default: + goto illegal_op; + } + } else { + /* register float ops */ + opreg = rm; + + switch(op) { + case 0x08: /* fld sti */ + gen_helper_fpush(); + gen_helper_fmov_ST0_STN(tcg_const_i32((opreg + 1) & 7)); + break; + case 0x09: /* fxchg sti */ + case 0x29: /* fxchg4 sti, undocumented op */ + case 0x39: /* fxchg7 sti, undocumented op */ + gen_helper_fxchg_ST0_STN(tcg_const_i32(opreg)); + break; + case 0x0a: /* grp d9/2 */ + switch(rm) { + case 0: /* fnop */ + /* check exceptions (FreeBSD FPU probe) */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fwait(); + break; + default: + goto illegal_op; + } + break; + case 0x0c: /* grp d9/4 */ + switch(rm) { + case 0: /* fchs */ + gen_helper_fchs_ST0(); + break; + case 1: /* fabs */ + gen_helper_fabs_ST0(); + break; + case 4: /* ftst */ + gen_helper_fldz_FT0(); + gen_helper_fcom_ST0_FT0(); + break; + case 5: /* fxam */ + gen_helper_fxam_ST0(); + break; + default: + goto illegal_op; + } + break; + case 0x0d: /* grp d9/5 */ + { + switch(rm) { + case 0: + gen_helper_fpush(); + gen_helper_fld1_ST0(); + break; + case 1: + gen_helper_fpush(); + gen_helper_fldl2t_ST0(); + break; + case 2: + gen_helper_fpush(); + gen_helper_fldl2e_ST0(); + break; + case 3: + gen_helper_fpush(); + gen_helper_fldpi_ST0(); + break; + case 4: + gen_helper_fpush(); + gen_helper_fldlg2_ST0(); + break; + case 5: + gen_helper_fpush(); + gen_helper_fldln2_ST0(); + break; + case 6: + gen_helper_fpush(); + gen_helper_fldz_ST0(); + break; + default: + goto illegal_op; + } + } + break; + case 0x0e: /* grp d9/6 */ + switch(rm) { + case 0: /* f2xm1 */ + gen_helper_f2xm1(); + break; + case 1: /* fyl2x */ + gen_helper_fyl2x(); + break; + case 2: /* fptan */ + gen_helper_fptan(); + break; + case 3: /* fpatan */ + gen_helper_fpatan(); + break; + case 4: /* fxtract */ + gen_helper_fxtract(); + break; + case 5: /* fprem1 */ + gen_helper_fprem1(); + break; + case 6: /* fdecstp */ + gen_helper_fdecstp(); + break; + default: + case 7: /* fincstp */ + gen_helper_fincstp(); + break; + } + break; + case 0x0f: /* grp d9/7 */ + switch(rm) { + case 0: /* fprem */ + gen_helper_fprem(); + break; + case 1: /* fyl2xp1 */ + gen_helper_fyl2xp1(); + break; + case 2: /* fsqrt */ + gen_helper_fsqrt(); + break; + case 3: /* fsincos */ + gen_helper_fsincos(); + break; + case 5: /* fscale */ + gen_helper_fscale(); + break; + case 4: /* frndint */ + gen_helper_frndint(); + break; + case 6: /* fsin */ + gen_helper_fsin(); + break; + default: + case 7: /* fcos */ + gen_helper_fcos(); + break; + } + break; + case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */ + case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */ + case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */ + { + int op1; + + op1 = op & 7; + if (op >= 0x20) { + gen_helper_fp_arith_STN_ST0(op1, opreg); + if (op >= 0x30) + gen_helper_fpop(); + } else { + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fp_arith_ST0_FT0(op1); + } + } + break; + case 0x02: /* fcom */ + case 0x22: /* fcom2, undocumented op */ + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fcom_ST0_FT0(); + break; + case 0x03: /* fcomp */ + case 0x23: /* fcomp3, undocumented op */ + case 0x32: /* fcomp5, undocumented op */ + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fcom_ST0_FT0(); + gen_helper_fpop(); + break; + case 0x15: /* da/5 */ + switch(rm) { + case 1: /* fucompp */ + gen_helper_fmov_FT0_STN(tcg_const_i32(1)); + gen_helper_fucom_ST0_FT0(); + gen_helper_fpop(); + gen_helper_fpop(); + break; + default: + goto illegal_op; + } + break; + case 0x1c: + switch(rm) { + case 0: /* feni (287 only, just do nop here) */ + break; + case 1: /* fdisi (287 only, just do nop here) */ + break; + case 2: /* fclex */ + gen_helper_fclex(); + break; + case 3: /* fninit */ + gen_helper_fninit(); + break; + case 4: /* fsetpm (287 only, just do nop here) */ + break; + default: + goto illegal_op; + } + break; + case 0x1d: /* fucomi */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fucomi_ST0_FT0(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x1e: /* fcomi */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fcomi_ST0_FT0(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x28: /* ffree sti */ + gen_helper_ffree_STN(tcg_const_i32(opreg)); + break; + case 0x2a: /* fst sti */ + gen_helper_fmov_STN_ST0(tcg_const_i32(opreg)); + break; + case 0x2b: /* fstp sti */ + case 0x0b: /* fstp1 sti, undocumented op */ + case 0x3a: /* fstp8 sti, undocumented op */ + case 0x3b: /* fstp9 sti, undocumented op */ + gen_helper_fmov_STN_ST0(tcg_const_i32(opreg)); + gen_helper_fpop(); + break; + case 0x2c: /* fucom st(i) */ + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fucom_ST0_FT0(); + break; + case 0x2d: /* fucomp st(i) */ + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fucom_ST0_FT0(); + gen_helper_fpop(); + break; + case 0x33: /* de/3 */ + switch(rm) { + case 1: /* fcompp */ + gen_helper_fmov_FT0_STN(tcg_const_i32(1)); + gen_helper_fcom_ST0_FT0(); + gen_helper_fpop(); + gen_helper_fpop(); + break; + default: + goto illegal_op; + } + break; + case 0x38: /* ffreep sti, undocumented op */ + gen_helper_ffree_STN(tcg_const_i32(opreg)); + gen_helper_fpop(); + break; + case 0x3c: /* df/4 */ + switch(rm) { + case 0: + gen_helper_fnstsw(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + break; + default: + goto illegal_op; + } + break; + case 0x3d: /* fucomip */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fucomi_ST0_FT0(); + gen_helper_fpop(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x3e: /* fcomip */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fcomi_ST0_FT0(); + gen_helper_fpop(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x10 ... 0x13: /* fcmovxx */ + case 0x18 ... 0x1b: + { + int op1, l1; + static const uint8_t fcmov_cc[8] = { + (JCC_B << 1), + (JCC_Z << 1), + (JCC_BE << 1), + (JCC_P << 1), + }; + op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1); + l1 = gen_new_label(); + gen_jcc1(s, s->cc_op, op1, l1); + gen_helper_fmov_ST0_STN(tcg_const_i32(opreg)); + gen_set_label(l1); + } + break; + default: + goto illegal_op; + } + } + break; + /************************/ + /* string ops */ + + case 0xa4: /* movsS */ + case 0xa5: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + } else { + gen_movs(s, ot); + } + break; + + case 0xaa: /* stosS */ + case 0xab: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + } else { + gen_stos(s, ot); + } + break; + case 0xac: /* lodsS */ + case 0xad: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + } else { + gen_lods(s, ot); + } + break; + case 0xae: /* scasS */ + case 0xaf: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + if (prefixes & PREFIX_REPNZ) { + gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1); + } else if (prefixes & PREFIX_REPZ) { + gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0); + } else { + gen_scas(s, ot); + s->cc_op = CC_OP_SUBB + ot; + } + break; + + case 0xa6: /* cmpsS */ + case 0xa7: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + if (prefixes & PREFIX_REPNZ) { + gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1); + } else if (prefixes & PREFIX_REPZ) { + gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0); + } else { + gen_cmps(s, ot); + s->cc_op = CC_OP_SUBB + ot; + } + break; + case 0x6c: /* insS */ + case 0x6d: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + gen_op_mov_TN_reg(OT_WORD, 0, R_EDX); + gen_op_andl_T0_ffff(); + gen_check_io(s, ot, pc_start - s->cs_base, + SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4); + if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + } else { + gen_ins(s, ot); + if (use_icount) { + gen_jmp(s, s->pc - s->cs_base); + } + } + break; + case 0x6e: /* outsS */ + case 0x6f: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + gen_op_mov_TN_reg(OT_WORD, 0, R_EDX); + gen_op_andl_T0_ffff(); + gen_check_io(s, ot, pc_start - s->cs_base, + svm_is_rep(prefixes) | 4); + if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + } else { + gen_outs(s, ot); + if (use_icount) { + gen_jmp(s, s->pc - s->cs_base); + } + } + break; + + /************************/ + /* port I/O */ + + case 0xe4: + case 0xe5: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + val = ldub_code(s->pc++); + gen_op_movl_T0_im(val); + gen_check_io(s, ot, pc_start - s->cs_base, + SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes)); + if (use_icount) + gen_io_start(); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32); + gen_op_mov_reg_T1(ot, R_EAX); + if (use_icount) { + gen_io_end(); + gen_jmp(s, s->pc - s->cs_base); + } + break; + case 0xe6: + case 0xe7: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + val = ldub_code(s->pc++); + gen_op_movl_T0_im(val); + gen_check_io(s, ot, pc_start - s->cs_base, + svm_is_rep(prefixes)); + gen_op_mov_TN_reg(ot, 1, R_EAX); + + if (use_icount) + gen_io_start(); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff); + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]); + gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32); + if (use_icount) { + gen_io_end(); + gen_jmp(s, s->pc - s->cs_base); + } + break; + case 0xec: + case 0xed: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + gen_op_mov_TN_reg(OT_WORD, 0, R_EDX); + gen_op_andl_T0_ffff(); + gen_check_io(s, ot, pc_start - s->cs_base, + SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes)); + if (use_icount) + gen_io_start(); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32); + gen_op_mov_reg_T1(ot, R_EAX); + if (use_icount) { + gen_io_end(); + gen_jmp(s, s->pc - s->cs_base); + } + break; + case 0xee: + case 0xef: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + gen_op_mov_TN_reg(OT_WORD, 0, R_EDX); + gen_op_andl_T0_ffff(); + gen_check_io(s, ot, pc_start - s->cs_base, + svm_is_rep(prefixes)); + gen_op_mov_TN_reg(ot, 1, R_EAX); + + if (use_icount) + gen_io_start(); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff); + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]); + gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32); + if (use_icount) { + gen_io_end(); + gen_jmp(s, s->pc - s->cs_base); + } + break; + + /************************/ + /* control */ + case 0xc2: /* ret im */ + val = ldsw_code(s->pc); + s->pc += 2; + gen_pop_T0(s); + if (CODE64(s) && s->dflag) + s->dflag = 2; + gen_stack_update(s, val + (2 << s->dflag)); + if (s->dflag == 0) + gen_op_andl_T0_ffff(); + gen_op_jmp_T0(); + gen_eob(s); + break; + case 0xc3: /* ret */ + gen_pop_T0(s); + gen_pop_update(s); + if (s->dflag == 0) + gen_op_andl_T0_ffff(); + gen_op_jmp_T0(); + gen_eob(s); + break; + case 0xca: /* lret im */ + val = ldsw_code(s->pc); + s->pc += 2; + do_lret: + if (s->pe && !s->vm86) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_lret_protected(tcg_const_i32(s->dflag), + tcg_const_i32(val)); + } else { + gen_stack_A0(s); + /* pop offset */ + gen_op_ld_T0_A0(1 + s->dflag + s->mem_index); + if (s->dflag == 0) + gen_op_andl_T0_ffff(); + /* NOTE: keeping EIP updated is not a problem in case of + exception */ + gen_op_jmp_T0(); + /* pop selector */ + gen_op_addl_A0_im(2 << s->dflag); + gen_op_ld_T0_A0(1 + s->dflag + s->mem_index); + gen_op_movl_seg_T0_vm(R_CS); + /* add stack offset */ + gen_stack_update(s, val + (4 << s->dflag)); + } + gen_eob(s); + break; + case 0xcb: /* lret */ + val = 0; + goto do_lret; + case 0xcf: /* iret */ + gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET); + if (!s->pe) { + /* real mode */ + gen_helper_iret_real(tcg_const_i32(s->dflag)); + s->cc_op = CC_OP_EFLAGS; + } else if (s->vm86) { +#ifdef VBOX + if (s->iopl != 3 && (!s->vme || s->dflag)) { +#else + if (s->iopl != 3) { +#endif + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_helper_iret_real(tcg_const_i32(s->dflag)); + s->cc_op = CC_OP_EFLAGS; + } + } else { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_iret_protected(tcg_const_i32(s->dflag), + tcg_const_i32(s->pc - s->cs_base)); + s->cc_op = CC_OP_EFLAGS; + } + gen_eob(s); + break; + case 0xe8: /* call im */ + { + if (dflag) + tval = (int32_t)insn_get(s, OT_LONG); + else + tval = (int16_t)insn_get(s, OT_WORD); + next_eip = s->pc - s->cs_base; + tval += next_eip; + if (s->dflag == 0) + tval &= 0xffff; + else if(!CODE64(s)) + tval &= 0xffffffff; + gen_movtl_T0_im(next_eip); + gen_push_T0(s); + gen_jmp(s, tval); + } + break; + case 0x9a: /* lcall im */ + { + unsigned int selector, offset; + + if (CODE64(s)) + goto illegal_op; + ot = dflag ? OT_LONG : OT_WORD; + offset = insn_get(s, ot); + selector = insn_get(s, OT_WORD); + + gen_op_movl_T0_im(selector); + gen_op_movl_T1_imu(offset); + } + goto do_lcall; + case 0xe9: /* jmp im */ + if (dflag) + tval = (int32_t)insn_get(s, OT_LONG); + else + tval = (int16_t)insn_get(s, OT_WORD); + tval += s->pc - s->cs_base; + if (s->dflag == 0) + tval &= 0xffff; + else if(!CODE64(s)) + tval &= 0xffffffff; + gen_jmp(s, tval); + break; + case 0xea: /* ljmp im */ + { + unsigned int selector, offset; + + if (CODE64(s)) + goto illegal_op; + ot = dflag ? OT_LONG : OT_WORD; + offset = insn_get(s, ot); + selector = insn_get(s, OT_WORD); + + gen_op_movl_T0_im(selector); + gen_op_movl_T1_imu(offset); + } + goto do_ljmp; + case 0xeb: /* jmp Jb */ + tval = (int8_t)insn_get(s, OT_BYTE); + tval += s->pc - s->cs_base; + if (s->dflag == 0) + tval &= 0xffff; + gen_jmp(s, tval); + break; + case 0x70 ... 0x7f: /* jcc Jb */ + tval = (int8_t)insn_get(s, OT_BYTE); + goto do_jcc; + case 0x180 ... 0x18f: /* jcc Jv */ + if (dflag) { + tval = (int32_t)insn_get(s, OT_LONG); + } else { + tval = (int16_t)insn_get(s, OT_WORD); + } + do_jcc: + next_eip = s->pc - s->cs_base; + tval += next_eip; + if (s->dflag == 0) + tval &= 0xffff; + gen_jcc(s, b, tval, next_eip); + break; + + case 0x190 ... 0x19f: /* setcc Gv */ + modrm = ldub_code(s->pc++); + gen_setcc(s, b); + gen_ldst_modrm(s, modrm, OT_BYTE, OR_TMP0, 1); + break; + case 0x140 ... 0x14f: /* cmov Gv, Ev */ + { + int l1; + TCGv t0; + + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + t0 = tcg_temp_local_new(); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_v(ot + s->mem_index, t0, cpu_A0); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_mov_v_reg(ot, t0, rm); + } +#ifdef TARGET_X86_64 + if (ot == OT_LONG) { + /* XXX: specific Intel behaviour ? */ + l1 = gen_new_label(); + gen_jcc1(s, s->cc_op, b ^ 1, l1); + tcg_gen_mov_tl(cpu_regs[reg], t0); + gen_set_label(l1); + tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]); + } else +#endif + { + l1 = gen_new_label(); + gen_jcc1(s, s->cc_op, b ^ 1, l1); + gen_op_mov_reg_v(ot, reg, t0); + gen_set_label(l1); + } + tcg_temp_free(t0); + } + break; + + /************************/ + /* flags */ + case 0x9c: /* pushf */ + gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF); +#ifdef VBOX + if (s->vm86 && s->iopl != 3 && (!s->vme || s->dflag)) { +#else + if (s->vm86 && s->iopl != 3) { +#endif + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); +#ifdef VBOX + if (s->vm86 && s->vme && s->iopl != 3) + gen_helper_read_eflags_vme(cpu_T[0]); + else +#endif + gen_helper_read_eflags(cpu_T[0]); + gen_push_T0(s); + } + break; + case 0x9d: /* popf */ + gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF); +#ifdef VBOX + if (s->vm86 && s->iopl != 3 && (!s->vme || s->dflag)) { +#else + if (s->vm86 && s->iopl != 3) { +#endif + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_pop_T0(s); + if (s->cpl == 0) { + if (s->dflag) { + gen_helper_write_eflags(cpu_T[0], + tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK))); + } else { + gen_helper_write_eflags(cpu_T[0], + tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK) & 0xffff)); + } + } else { + if (s->cpl <= s->iopl) { + if (s->dflag) { + gen_helper_write_eflags(cpu_T[0], + tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK))); + } else { + gen_helper_write_eflags(cpu_T[0], + tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK) & 0xffff)); + } + } else { + if (s->dflag) { + gen_helper_write_eflags(cpu_T[0], + tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK))); + } else { +#ifdef VBOX + if (s->vm86 && s->vme) + gen_helper_write_eflags_vme(cpu_T[0]); + else +#endif + gen_helper_write_eflags(cpu_T[0], + tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff)); + } + } + } + gen_pop_update(s); + s->cc_op = CC_OP_EFLAGS; + /* abort translation because TF flag may change */ + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + break; + case 0x9e: /* sahf */ + if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) + goto illegal_op; + gen_op_mov_TN_reg(OT_BYTE, 0, R_AH); + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(cpu_cc_src); + tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C); + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x9f: /* lahf */ + if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(cpu_T[0]); + /* Note: gen_compute_eflags() only gives the condition codes */ + tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02); + gen_op_mov_reg_T0(OT_BYTE, R_AH); + break; + case 0xf5: /* cmc */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(cpu_cc_src); + tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C); + s->cc_op = CC_OP_EFLAGS; + break; + case 0xf8: /* clc */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(cpu_cc_src); + tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C); + s->cc_op = CC_OP_EFLAGS; + break; + case 0xf9: /* stc */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(cpu_cc_src); + tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C); + s->cc_op = CC_OP_EFLAGS; + break; + case 0xfc: /* cld */ + tcg_gen_movi_i32(cpu_tmp2_i32, 1); + tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUState, df)); + break; + case 0xfd: /* std */ + tcg_gen_movi_i32(cpu_tmp2_i32, -1); + tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUState, df)); + break; + + /************************/ + /* bit operations */ + case 0x1ba: /* bt/bts/btr/btc Gv, im */ + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + op = (modrm >> 3) & 7; + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + if (mod != 3) { + s->rip_offset = 1; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T0_A0(ot + s->mem_index); + } else { + gen_op_mov_TN_reg(ot, 0, rm); + } + /* load shift */ + val = ldub_code(s->pc++); + gen_op_movl_T1_im(val); + if (op < 4) + goto illegal_op; + op -= 4; + goto bt_op; + case 0x1a3: /* bt Gv, Ev */ + op = 0; + goto do_btx; + case 0x1ab: /* bts */ + op = 1; + goto do_btx; + case 0x1b3: /* btr */ + op = 2; + goto do_btx; + case 0x1bb: /* btc */ + op = 3; + do_btx: + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + gen_op_mov_TN_reg(OT_LONG, 1, reg); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + /* specific case: we need to add a displacement */ + gen_exts(ot, cpu_T[1]); + tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot); + tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot); + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); + gen_op_ld_T0_A0(ot + s->mem_index); + } else { + gen_op_mov_TN_reg(ot, 0, rm); + } + bt_op: + tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1); + switch(op) { + case 0: + tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]); + tcg_gen_movi_tl(cpu_cc_dst, 0); + break; + case 1: + tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); + tcg_gen_movi_tl(cpu_tmp0, 1); + tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + break; + case 2: + tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); + tcg_gen_movi_tl(cpu_tmp0, 1); + tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); + tcg_gen_not_tl(cpu_tmp0, cpu_tmp0); + tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + break; + default: + case 3: + tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); + tcg_gen_movi_tl(cpu_tmp0, 1); + tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); + tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + break; + } + s->cc_op = CC_OP_SARB + ot; + if (op != 0) { + if (mod != 3) + gen_op_st_T0_A0(ot + s->mem_index); + else + gen_op_mov_reg_T0(ot, rm); + tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4); + tcg_gen_movi_tl(cpu_cc_dst, 0); + } + break; + case 0x1bc: /* bsf */ + case 0x1bd: /* bsr */ + { + int label1; + TCGv t0; + + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + gen_ldst_modrm(s,modrm, ot, OR_TMP0, 0); + gen_extu(ot, cpu_T[0]); + t0 = tcg_temp_local_new(); + tcg_gen_mov_tl(t0, cpu_T[0]); + if ((b & 1) && (prefixes & PREFIX_REPZ) && + (s->cpuid_ext3_features & CPUID_EXT3_ABM)) { + switch(ot) { + case OT_WORD: gen_helper_lzcnt(cpu_T[0], t0, + tcg_const_i32(16)); break; + case OT_LONG: gen_helper_lzcnt(cpu_T[0], t0, + tcg_const_i32(32)); break; + case OT_QUAD: gen_helper_lzcnt(cpu_T[0], t0, + tcg_const_i32(64)); break; + } + gen_op_mov_reg_T0(ot, reg); + } else { + label1 = gen_new_label(); + tcg_gen_movi_tl(cpu_cc_dst, 0); + tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1); + if (b & 1) { + gen_helper_bsr(cpu_T[0], t0); + } else { + gen_helper_bsf(cpu_T[0], t0); + } + gen_op_mov_reg_T0(ot, reg); + tcg_gen_movi_tl(cpu_cc_dst, 1); + gen_set_label(label1); + tcg_gen_discard_tl(cpu_cc_src); + s->cc_op = CC_OP_LOGICB + ot; + } + tcg_temp_free(t0); + } + break; + /************************/ + /* bcd */ + case 0x27: /* daa */ + if (CODE64(s)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_daa(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x2f: /* das */ + if (CODE64(s)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_das(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x37: /* aaa */ + if (CODE64(s)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_aaa(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x3f: /* aas */ + if (CODE64(s)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_aas(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0xd4: /* aam */ + if (CODE64(s)) + goto illegal_op; + val = ldub_code(s->pc++); + if (val == 0) { + gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base); + } else { + gen_helper_aam(tcg_const_i32(val)); + s->cc_op = CC_OP_LOGICB; + } + break; + case 0xd5: /* aad */ + if (CODE64(s)) + goto illegal_op; + val = ldub_code(s->pc++); + gen_helper_aad(tcg_const_i32(val)); + s->cc_op = CC_OP_LOGICB; + break; + /************************/ + /* misc */ + case 0x90: /* nop */ + /* XXX: correct lock test for all insn */ + if (prefixes & PREFIX_LOCK) { + goto illegal_op; + } + /* If REX_B is set, then this is xchg eax, r8d, not a nop. */ + if (REX_B(s)) { + goto do_xchg_reg_eax; + } + if (prefixes & PREFIX_REPZ) { + gen_svm_check_intercept(s, pc_start, SVM_EXIT_PAUSE); + } + break; + case 0x9b: /* fwait */ + if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) == + (HF_MP_MASK | HF_TS_MASK)) { + gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + } else { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fwait(); + } + break; + case 0xcc: /* int3 */ +#ifdef VBOX + if (s->vm86 && s->iopl != 3 && !s->vme) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else +#endif + gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base); + break; + case 0xcd: /* int N */ + val = ldub_code(s->pc++); +#ifdef VBOX + if (s->vm86 && s->iopl != 3 && !s->vme) { +#else + if (s->vm86 && s->iopl != 3) { +#endif + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base); + } + break; + case 0xce: /* into */ + if (CODE64(s)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_into(tcg_const_i32(s->pc - pc_start)); + break; +#ifdef WANT_ICEBP + case 0xf1: /* icebp (undocumented, exits to external debugger) */ + gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP); +#if 1 + gen_debug(s, pc_start - s->cs_base); +#else + /* start debug */ + tb_flush(cpu_single_env); + cpu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM); +#endif + break; +#endif + case 0xfa: /* cli */ + if (!s->vm86) { + if (s->cpl <= s->iopl) { + gen_helper_cli(); + } else { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } + } else { + if (s->iopl == 3) { + gen_helper_cli(); +#ifdef VBOX + } else if (s->iopl != 3 && s->vme) { + gen_helper_cli_vme(); +#endif + } else { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } + } + break; + case 0xfb: /* sti */ + if (!s->vm86) { + if (s->cpl <= s->iopl) { + gen_sti: + gen_helper_sti(); + /* interruptions are enabled only the first insn after sti */ + /* If several instructions disable interrupts, only the + _first_ does it */ + if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK)) + gen_helper_set_inhibit_irq(); + /* give a chance to handle pending irqs */ + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } else { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } + } else { + if (s->iopl == 3) { + goto gen_sti; +#ifdef VBOX + } else if (s->iopl != 3 && s->vme) { + gen_helper_sti_vme(); + /* give a chance to handle pending irqs */ + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); +#endif + } else { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } + } + break; + case 0x62: /* bound */ + if (CODE64(s)) + goto illegal_op; + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub_code(s->pc++); + reg = (modrm >> 3) & 7; + mod = (modrm >> 6) & 3; + if (mod == 3) + goto illegal_op; + gen_op_mov_TN_reg(ot, 0, reg); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_jmp_im(pc_start - s->cs_base); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + if (ot == OT_WORD) + gen_helper_boundw(cpu_A0, cpu_tmp2_i32); + else + gen_helper_boundl(cpu_A0, cpu_tmp2_i32); + break; + case 0x1c8 ... 0x1cf: /* bswap reg */ + reg = (b & 7) | REX_B(s); +#ifdef TARGET_X86_64 + if (dflag == 2) { + gen_op_mov_TN_reg(OT_QUAD, 0, reg); + tcg_gen_bswap64_i64(cpu_T[0], cpu_T[0]); + gen_op_mov_reg_T0(OT_QUAD, reg); + } else +#endif + { + gen_op_mov_TN_reg(OT_LONG, 0, reg); + tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]); + tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]); + gen_op_mov_reg_T0(OT_LONG, reg); + } + break; + case 0xd6: /* salc */ + if (CODE64(s)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_compute_eflags_c(cpu_T[0]); + tcg_gen_neg_tl(cpu_T[0], cpu_T[0]); + gen_op_mov_reg_T0(OT_BYTE, R_EAX); + break; + case 0xe0: /* loopnz */ + case 0xe1: /* loopz */ + case 0xe2: /* loop */ + case 0xe3: /* jecxz */ + { + int l1, l2, l3; + + tval = (int8_t)insn_get(s, OT_BYTE); + next_eip = s->pc - s->cs_base; + tval += next_eip; + if (s->dflag == 0) + tval &= 0xffff; + + l1 = gen_new_label(); + l2 = gen_new_label(); + l3 = gen_new_label(); + b &= 3; + switch(b) { + case 0: /* loopnz */ + case 1: /* loopz */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_op_add_reg_im(s->aflag, R_ECX, -1); + gen_op_jz_ecx(s->aflag, l3); + gen_compute_eflags(cpu_tmp0); + tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z); + if (b == 0) { + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1); + } else { + tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, l1); + } + break; + case 2: /* loop */ + gen_op_add_reg_im(s->aflag, R_ECX, -1); + gen_op_jnz_ecx(s->aflag, l1); + break; + default: + case 3: /* jcxz */ + gen_op_jz_ecx(s->aflag, l1); + break; + } + + gen_set_label(l3); + gen_jmp_im(next_eip); + tcg_gen_br(l2); + + gen_set_label(l1); + gen_jmp_im(tval); + gen_set_label(l2); + gen_eob(s); + } + break; + case 0x130: /* wrmsr */ + case 0x132: /* rdmsr */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + if (b & 2) { + gen_helper_rdmsr(); + } else { + gen_helper_wrmsr(); + } + } + break; + case 0x131: /* rdtsc */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + if (use_icount) + gen_io_start(); + gen_helper_rdtsc(); + if (use_icount) { + gen_io_end(); + gen_jmp(s, s->pc - s->cs_base); + } + break; + case 0x133: /* rdpmc */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_rdpmc(); + break; + case 0x134: /* sysenter */ +#ifndef VBOX + /* For Intel SYSENTER is valid on 64-bit */ + if (CODE64(s) && cpu_single_env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) +#else + if ( !(cpu_single_env->cpuid_features & CPUID_SEP) + || ( IS_LONG_MODE(s) + && CPUMGetGuestCpuVendor(cpu_single_env->pVM) != CPUMCPUVENDOR_INTEL)) +#endif + goto illegal_op; + if (!s->pe) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_update_cc_op(s); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_sysenter(); + gen_eob(s); + } + break; + case 0x135: /* sysexit */ +#ifndef VBOX + /* For Intel SYSEXIT is valid on 64-bit */ + if (CODE64(s) && cpu_single_env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) +#else + if ( !(cpu_single_env->cpuid_features & CPUID_SEP) + || ( IS_LONG_MODE(s) + && CPUMGetGuestCpuVendor(cpu_single_env->pVM) != CPUMCPUVENDOR_INTEL)) +#endif + goto illegal_op; + if (!s->pe) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_update_cc_op(s); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_sysexit(tcg_const_i32(dflag)); + gen_eob(s); + } + break; +#ifdef TARGET_X86_64 + case 0x105: /* syscall */ + /* XXX: is it usable in real mode ? */ + gen_update_cc_op(s); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_syscall(tcg_const_i32(s->pc - pc_start)); + gen_eob(s); + break; + case 0x107: /* sysret */ + if (!s->pe) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_update_cc_op(s); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_sysret(tcg_const_i32(s->dflag)); + /* condition codes are modified only in long mode */ + if (s->lma) + s->cc_op = CC_OP_EFLAGS; + gen_eob(s); + } + break; +#endif + case 0x1a2: /* cpuid */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_cpuid(); + break; + case 0xf4: /* hlt */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_hlt(tcg_const_i32(s->pc - pc_start)); + s->is_jmp = DISAS_TB_JUMP; + } + break; + case 0x100: + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + op = (modrm >> 3) & 7; + switch(op) { + case 0: /* sldt */ + if (!s->pe || s->vm86) + goto illegal_op; + gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ); + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,ldt.selector)); + ot = OT_WORD; + if (mod == 3) + ot += s->dflag; + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1); + break; + case 2: /* lldt */ + if (!s->pe || s->vm86) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE); + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); + gen_jmp_im(pc_start - s->cs_base); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_lldt(cpu_tmp2_i32); + } + break; + case 1: /* str */ + if (!s->pe || s->vm86) + goto illegal_op; + gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ); + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,tr.selector)); + ot = OT_WORD; + if (mod == 3) + ot += s->dflag; + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1); + break; + case 3: /* ltr */ + if (!s->pe || s->vm86) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE); + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); + gen_jmp_im(pc_start - s->cs_base); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_ltr(cpu_tmp2_i32); + } + break; + case 4: /* verr */ + case 5: /* verw */ + if (!s->pe || s->vm86) + goto illegal_op; + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + if (op == 4) + gen_helper_verr(cpu_T[0]); + else + gen_helper_verw(cpu_T[0]); + s->cc_op = CC_OP_EFLAGS; + break; + default: + goto illegal_op; + } + break; + case 0x101: + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + op = (modrm >> 3) & 7; + rm = modrm & 7; + switch(op) { + case 0: /* sgdt */ + if (mod == 3) + goto illegal_op; + gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.limit)); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + gen_add_A0_im(s, 2); + tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.base)); + gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index); + break; + case 1: + if (mod == 3) { + switch (rm) { + case 0: /* monitor */ + if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || + s->cpl != 0) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_movq_A0_reg(R_EAX); + } else +#endif + { + gen_op_movl_A0_reg(R_EAX); + if (s->aflag == 0) + gen_op_andl_A0_ffff(); + } + gen_add_A0_ds_seg(s); + gen_helper_monitor(cpu_A0); + break; + case 1: /* mwait */ + if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || + s->cpl != 0) + goto illegal_op; + gen_update_cc_op(s); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_mwait(tcg_const_i32(s->pc - pc_start)); + gen_eob(s); + break; + default: + goto illegal_op; + } + } else { /* sidt */ + gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.limit)); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + gen_add_A0_im(s, 2); + tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.base)); + gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index); + } + break; + case 2: /* lgdt */ + case 3: /* lidt */ + if (mod == 3) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + switch(rm) { + case 0: /* VMRUN */ + if (!(s->flags & HF_SVME_MASK) || !s->pe) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + break; + } else { + gen_helper_vmrun(tcg_const_i32(s->aflag), + tcg_const_i32(s->pc - pc_start)); + tcg_gen_exit_tb(0); + s->is_jmp = DISAS_TB_JUMP; + } + break; + case 1: /* VMMCALL */ + if (!(s->flags & HF_SVME_MASK)) + goto illegal_op; + gen_helper_vmmcall(); + break; + case 2: /* VMLOAD */ + if (!(s->flags & HF_SVME_MASK) || !s->pe) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + break; + } else { + gen_helper_vmload(tcg_const_i32(s->aflag)); + } + break; + case 3: /* VMSAVE */ + if (!(s->flags & HF_SVME_MASK) || !s->pe) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + break; + } else { + gen_helper_vmsave(tcg_const_i32(s->aflag)); + } + break; + case 4: /* STGI */ + if ((!(s->flags & HF_SVME_MASK) && + !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || + !s->pe) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + break; + } else { + gen_helper_stgi(); + } + break; + case 5: /* CLGI */ + if (!(s->flags & HF_SVME_MASK) || !s->pe) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + break; + } else { + gen_helper_clgi(); + } + break; + case 6: /* SKINIT */ + if ((!(s->flags & HF_SVME_MASK) && + !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || + !s->pe) + goto illegal_op; + gen_helper_skinit(); + break; + case 7: /* INVLPGA */ + if (!(s->flags & HF_SVME_MASK) || !s->pe) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + break; + } else { + gen_helper_invlpga(tcg_const_i32(s->aflag)); + } + break; + default: + goto illegal_op; + } + } else if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_svm_check_intercept(s, pc_start, + op==2 ? SVM_EXIT_GDTR_WRITE : SVM_EXIT_IDTR_WRITE); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T1_A0(OT_WORD + s->mem_index); + gen_add_A0_im(s, 2); + gen_op_ld_T0_A0(CODE64(s) + OT_LONG + s->mem_index); + if (!s->dflag) + gen_op_andl_T0_im(0xffffff); + if (op == 2) { + tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,gdt.base)); + tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,gdt.limit)); + } else { + tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,idt.base)); + tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,idt.limit)); + } + } + break; + case 4: /* smsw */ + gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0); +#if defined TARGET_X86_64 && defined HOST_WORDS_BIGENDIAN + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]) + 4); +#else + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0])); +#endif + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 1); + break; + case 6: /* lmsw */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0); + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); + gen_helper_lmsw(cpu_T[0]); + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + break; + case 7: + if (mod != 3) { /* invlpg */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_helper_invlpg(cpu_A0); + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + } else { + switch (rm) { + case 0: /* swapgs */ +#ifdef TARGET_X86_64 + if (CODE64(s)) { + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + tcg_gen_ld_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,segs[R_GS].base)); + tcg_gen_ld_tl(cpu_T[1], cpu_env, + offsetof(CPUX86State,kernelgsbase)); + tcg_gen_st_tl(cpu_T[1], cpu_env, + offsetof(CPUX86State,segs[R_GS].base)); + tcg_gen_st_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,kernelgsbase)); + } + } else +#endif + { + goto illegal_op; + } + break; + case 1: /* rdtscp */ + if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + if (use_icount) + gen_io_start(); + gen_helper_rdtscp(); + if (use_icount) { + gen_io_end(); + gen_jmp(s, s->pc - s->cs_base); + } + break; + default: + goto illegal_op; + } + } + break; + default: + goto illegal_op; + } + break; + case 0x108: /* invd */ + case 0x109: /* wbinvd */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD); + /* nothing to do */ + } + break; + case 0x63: /* arpl or movslS (x86_64) */ +#ifdef TARGET_X86_64 + if (CODE64(s)) { + int d_ot; + /* d_ot is the size of destination */ + d_ot = dflag + OT_WORD; + + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + + if (mod == 3) { + gen_op_mov_TN_reg(OT_LONG, 0, rm); + /* sign extend */ + if (d_ot == OT_QUAD) + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); + gen_op_mov_reg_T0(d_ot, reg); + } else { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (d_ot == OT_QUAD) { + gen_op_lds_T0_A0(OT_LONG + s->mem_index); + } else { + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + } + gen_op_mov_reg_T0(d_ot, reg); + } + } else +#endif + { + int label1; + TCGv t0, t1, t2, a0; + + if (!s->pe || s->vm86) + goto illegal_op; + t0 = tcg_temp_local_new(); + t1 = tcg_temp_local_new(); + t2 = tcg_temp_local_new(); + ot = OT_WORD; + modrm = ldub_code(s->pc++); + reg = (modrm >> 3) & 7; + mod = (modrm >> 6) & 3; + rm = modrm & 7; + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_v(ot + s->mem_index, t0, cpu_A0); + a0 = tcg_temp_local_new(); + tcg_gen_mov_tl(a0, cpu_A0); + } else { + gen_op_mov_v_reg(ot, t0, rm); + TCGV_UNUSED(a0); + } + gen_op_mov_v_reg(ot, t1, reg); + tcg_gen_andi_tl(cpu_tmp0, t0, 3); + tcg_gen_andi_tl(t1, t1, 3); + tcg_gen_movi_tl(t2, 0); + label1 = gen_new_label(); + tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1); + tcg_gen_andi_tl(t0, t0, ~3); + tcg_gen_or_tl(t0, t0, t1); + tcg_gen_movi_tl(t2, CC_Z); + gen_set_label(label1); + if (mod != 3) { + gen_op_st_v(ot + s->mem_index, t0, a0); + tcg_temp_free(a0); + } else { + gen_op_mov_reg_v(ot, rm, t0); + } + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(cpu_cc_src); + tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z); + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2); + s->cc_op = CC_OP_EFLAGS; + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); + } + break; + case 0x102: /* lar */ + case 0x103: /* lsl */ + { + int label1; + TCGv t0; + if (!s->pe || s->vm86) + goto illegal_op; + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); + t0 = tcg_temp_local_new(); + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + if (b == 0x102) + gen_helper_lar(t0, cpu_T[0]); + else + gen_helper_lsl(t0, cpu_T[0]); + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z); + label1 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1); + gen_op_mov_reg_v(ot, reg, t0); + gen_set_label(label1); + s->cc_op = CC_OP_EFLAGS; + tcg_temp_free(t0); + } + break; + case 0x118: + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + op = (modrm >> 3) & 7; + switch(op) { + case 0: /* prefetchnta */ + case 1: /* prefetchnt0 */ + case 2: /* prefetchnt0 */ + case 3: /* prefetchnt0 */ + if (mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + /* nothing more to do */ + break; + default: /* nop (multi byte) */ + gen_nop_modrm(s, modrm); + break; + } + break; + case 0x119 ... 0x11f: /* nop (multi byte) */ + modrm = ldub_code(s->pc++); + gen_nop_modrm(s, modrm); + break; + case 0x120: /* mov reg, crN */ + case 0x122: /* mov crN, reg */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + modrm = ldub_code(s->pc++); +#ifndef VBOX /* mod bits are always understood to be 11 (0xc0) regardless of actual content; see AMD manuals */ + if ((modrm & 0xc0) != 0xc0) + goto illegal_op; +#endif + rm = (modrm & 7) | REX_B(s); + reg = ((modrm >> 3) & 7) | rex_r; + if (CODE64(s)) + ot = OT_QUAD; + else + ot = OT_LONG; + if ((prefixes & PREFIX_LOCK) && (reg == 0) && + (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) { + reg = 8; + } + switch(reg) { + case 0: + case 2: + case 3: + case 4: + case 8: + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + if (b & 2) { + gen_op_mov_TN_reg(ot, 0, rm); + gen_helper_write_crN(tcg_const_i32(reg), cpu_T[0]); + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } else { + gen_helper_read_crN(cpu_T[0], tcg_const_i32(reg)); + gen_op_mov_reg_T0(ot, rm); + } + break; + default: + goto illegal_op; + } + } + break; + case 0x121: /* mov reg, drN */ + case 0x123: /* mov drN, reg */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + modrm = ldub_code(s->pc++); +#ifndef VBOX /* mod bits are always understood to be 11 (0xc0) regardless of actual content; see AMD manuals */ + if ((modrm & 0xc0) != 0xc0) + goto illegal_op; +#endif + rm = (modrm & 7) | REX_B(s); + reg = ((modrm >> 3) & 7) | rex_r; + if (CODE64(s)) + ot = OT_QUAD; + else + ot = OT_LONG; + /* XXX: do it dynamically with CR4.DE bit */ + if (reg == 4 || reg == 5 || reg >= 8) + goto illegal_op; + if (b & 2) { + gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg); + gen_op_mov_TN_reg(ot, 0, rm); + gen_helper_movl_drN_T0(tcg_const_i32(reg), cpu_T[0]); + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } else { + gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg); + tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,dr[reg])); + gen_op_mov_reg_T0(ot, rm); + } + } + break; + case 0x106: /* clts */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0); + gen_helper_clts(); + /* abort block because static cpu state changed */ + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + break; + /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */ + case 0x1c3: /* MOVNTI reg, mem */ + if (!(s->cpuid_features & CPUID_SSE2)) + goto illegal_op; + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) + goto illegal_op; + reg = ((modrm >> 3) & 7) | rex_r; + /* generate a generic store */ + gen_ldst_modrm(s, modrm, ot, reg, 1); + break; + case 0x1ae: + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + op = (modrm >> 3) & 7; + switch(op) { + case 0: /* fxsave */ + if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) || + (s->prefix & PREFIX_LOCK)) + goto illegal_op; + if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { + gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + break; + } + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fxsave(cpu_A0, tcg_const_i32((s->dflag == 2))); + break; + case 1: /* fxrstor */ + if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) || + (s->prefix & PREFIX_LOCK)) + goto illegal_op; + if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { + gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + break; + } + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fxrstor(cpu_A0, tcg_const_i32((s->dflag == 2))); + break; + case 2: /* ldmxcsr */ + case 3: /* stmxcsr */ + if (s->flags & HF_TS_MASK) { + gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + break; + } + if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) || + mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (op == 2) { + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr)); + } else { + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr)); + gen_op_st_T0_A0(OT_LONG + s->mem_index); + } + break; + case 5: /* lfence */ + case 6: /* mfence */ + if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE)) + goto illegal_op; + break; + case 7: /* sfence / clflush */ + if ((modrm & 0xc7) == 0xc0) { + /* sfence */ + /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */ + if (!(s->cpuid_features & CPUID_SSE)) + goto illegal_op; + } else { + /* clflush */ + if (!(s->cpuid_features & CPUID_CLFLUSH)) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + } + break; + default: + goto illegal_op; + } + break; + case 0x10d: /* 3DNow! prefetch(w) */ + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + /* ignore for now */ + break; + case 0x1aa: /* rsm */ + gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM); + if (!(s->flags & HF_SMM_MASK)) + goto illegal_op; + gen_update_cc_op(s); + gen_jmp_im(s->pc - s->cs_base); + gen_helper_rsm(); + gen_eob(s); + break; + case 0x1b8: /* SSE4.2 popcnt */ + if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) != + PREFIX_REPZ) + goto illegal_op; + if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT)) + goto illegal_op; + + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7); + + if (s->prefix & PREFIX_DATA) + ot = OT_WORD; + else if (s->dflag != 2) + ot = OT_LONG; + else + ot = OT_QUAD; + + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + gen_helper_popcnt(cpu_T[0], cpu_T[0], tcg_const_i32(ot)); + gen_op_mov_reg_T0(ot, reg); + + s->cc_op = CC_OP_EFLAGS; + break; + case 0x10e ... 0x10f: + /* 3DNow! instructions, ignore prefixes */ + s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA); + case 0x110 ... 0x117: + case 0x128 ... 0x12f: + case 0x138 ... 0x13a: + case 0x150 ... 0x179: + case 0x17c ... 0x17f: + case 0x1c2: + case 0x1c4 ... 0x1c6: + case 0x1d0 ... 0x1fe: + gen_sse(s, b, pc_start, rex_r); + break; + default: + goto illegal_op; + } + /* lock generation */ + if (s->prefix & PREFIX_LOCK) + gen_helper_unlock(); + return s->pc; + illegal_op: + if (s->prefix & PREFIX_LOCK) + gen_helper_unlock(); + /* XXX: ensure that no lock was generated */ + gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base); + return s->pc; +} + +void optimize_flags_init(void) +{ +#if TCG_TARGET_REG_BITS == 32 + assert(sizeof(CCTable) == (1 << 3)); +#else + assert(sizeof(CCTable) == (1 << 4)); +#endif + cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env"); + cpu_cc_op = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, cc_op), "cc_op"); + cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_src), + "cc_src"); + cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_dst), + "cc_dst"); + cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_tmp), + "cc_tmp"); + +#ifdef TARGET_X86_64 + cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_EAX]), "rax"); + cpu_regs[R_ECX] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_ECX]), "rcx"); + cpu_regs[R_EDX] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_EDX]), "rdx"); + cpu_regs[R_EBX] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_EBX]), "rbx"); + cpu_regs[R_ESP] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_ESP]), "rsp"); + cpu_regs[R_EBP] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_EBP]), "rbp"); + cpu_regs[R_ESI] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_ESI]), "rsi"); + cpu_regs[R_EDI] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_EDI]), "rdi"); + cpu_regs[8] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[8]), "r8"); + cpu_regs[9] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[9]), "r9"); + cpu_regs[10] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[10]), "r10"); + cpu_regs[11] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[11]), "r11"); + cpu_regs[12] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[12]), "r12"); + cpu_regs[13] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[13]), "r13"); + cpu_regs[14] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[14]), "r14"); + cpu_regs[15] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[15]), "r15"); +#else + cpu_regs[R_EAX] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_EAX]), "eax"); + cpu_regs[R_ECX] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_ECX]), "ecx"); + cpu_regs[R_EDX] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_EDX]), "edx"); + cpu_regs[R_EBX] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_EBX]), "ebx"); + cpu_regs[R_ESP] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_ESP]), "esp"); + cpu_regs[R_EBP] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_EBP]), "ebp"); + cpu_regs[R_ESI] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_ESI]), "esi"); + cpu_regs[R_EDI] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_EDI]), "edi"); +#endif + + /* register helpers */ +#define GEN_HELPER 2 +#include "helper.h" +} + +/* generate intermediate code in gen_opc_buf and gen_opparam_buf for + basic block 'tb'. If search_pc is TRUE, also generate PC + information for each intermediate instruction. */ +static inline void gen_intermediate_code_internal(CPUState *env, + TranslationBlock *tb, + int search_pc) +{ + DisasContext dc1, *dc = &dc1; + target_ulong pc_ptr; + uint16_t *gen_opc_end; + CPUBreakpoint *bp; + int j, lj; + uint64_t flags; + target_ulong pc_start; + target_ulong cs_base; + int num_insns; + int max_insns; +#ifdef VBOX + int const singlestep = env->state & CPU_EMULATE_SINGLE_STEP; +#endif + + /* generate intermediate code */ + pc_start = tb->pc; + cs_base = tb->cs_base; + flags = tb->flags; + + dc->pe = (flags >> HF_PE_SHIFT) & 1; + dc->code32 = (flags >> HF_CS32_SHIFT) & 1; + dc->ss32 = (flags >> HF_SS32_SHIFT) & 1; + dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1; + dc->f_st = 0; + dc->vm86 = (flags >> VM_SHIFT) & 1; +#ifdef VBOX + dc->vme = !!(env->cr[4] & CR4_VME_MASK); + dc->pvi = !!(env->cr[4] & CR4_PVI_MASK); +# ifdef VBOX_WITH_CALL_RECORD + if ( !(env->state & CPU_RAW_RING0) + && (env->cr[0] & CR0_PG_MASK) + && !(env->eflags & X86_EFL_IF) + && dc->code32) + dc->record_call = 1; + else + dc->record_call = 0; +# endif +#endif /* VBOX */ + dc->cpl = (flags >> HF_CPL_SHIFT) & 3; + dc->iopl = (flags >> IOPL_SHIFT) & 3; + dc->tf = (flags >> TF_SHIFT) & 1; + dc->singlestep_enabled = env->singlestep_enabled; + dc->cc_op = CC_OP_DYNAMIC; + dc->cs_base = cs_base; + dc->tb = tb; + dc->popl_esp_hack = 0; + /* select memory access functions */ + dc->mem_index = 0; + if (flags & HF_SOFTMMU_MASK) { + if (dc->cpl == 3) + dc->mem_index = 2 * 4; + else + dc->mem_index = 1 * 4; + } + dc->cpuid_features = env->cpuid_features; + dc->cpuid_ext_features = env->cpuid_ext_features; + dc->cpuid_ext2_features = env->cpuid_ext2_features; + dc->cpuid_ext3_features = env->cpuid_ext3_features; +#ifdef TARGET_X86_64 + dc->lma = (flags >> HF_LMA_SHIFT) & 1; + dc->code64 = (flags >> HF_CS64_SHIFT) & 1; +#endif + dc->flags = flags; + dc->jmp_opt = !(dc->tf || env->singlestep_enabled || + (flags & HF_INHIBIT_IRQ_MASK) +#ifndef CONFIG_SOFTMMU + || (flags & HF_SOFTMMU_MASK) +#endif + ); +#if 0 + /* check addseg logic */ + if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32)) + printf("ERROR addseg\n"); +#endif + + cpu_T[0] = tcg_temp_new(); + cpu_T[1] = tcg_temp_new(); + cpu_A0 = tcg_temp_new(); + cpu_T3 = tcg_temp_new(); + + cpu_tmp0 = tcg_temp_new(); + cpu_tmp1_i64 = tcg_temp_new_i64(); + cpu_tmp2_i32 = tcg_temp_new_i32(); + cpu_tmp3_i32 = tcg_temp_new_i32(); + cpu_tmp4 = tcg_temp_new(); + cpu_tmp5 = tcg_temp_new(); + cpu_ptr0 = tcg_temp_new_ptr(); + cpu_ptr1 = tcg_temp_new_ptr(); + + gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; + + dc->is_jmp = DISAS_NEXT; + pc_ptr = pc_start; + lj = -1; + num_insns = 0; + max_insns = tb->cflags & CF_COUNT_MASK; + if (max_insns == 0) + max_insns = CF_COUNT_MASK; + + gen_icount_start(); + for(;;) { + if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) { + QTAILQ_FOREACH(bp, &env->breakpoints, entry) { + if (bp->pc == pc_ptr && + !((bp->flags & BP_CPU) && (tb->flags & HF_RF_MASK))) { + gen_debug(dc, pc_ptr - dc->cs_base); + break; + } + } + } + if (search_pc) { + j = gen_opc_ptr - gen_opc_buf; + if (lj < j) { + lj++; + while (lj < j) + gen_opc_instr_start[lj++] = 0; + } + gen_opc_pc[lj] = pc_ptr; + gen_opc_cc_op[lj] = dc->cc_op; + gen_opc_instr_start[lj] = 1; + gen_opc_icount[lj] = num_insns; + } + if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) + gen_io_start(); + + pc_ptr = disas_insn(dc, pc_ptr); + num_insns++; + /* stop translation if indicated */ + if (dc->is_jmp) + break; +#ifdef VBOX +# ifdef DEBUG +/* + if(cpu_check_code_raw(env, pc_ptr, env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK))) == ERROR_SUCCESS) + { + //should never happen as the jump to the patch code terminates the translation block + dprintf(("QEmu is about to execute instructions in our patch block at %08X!!\n", pc_ptr)); + } +*/ +# endif /* DEBUG */ + if (env->state & CPU_EMULATE_SINGLE_INSTR) + { + env->state &= ~CPU_EMULATE_SINGLE_INSTR; + gen_jmp_im(pc_ptr - dc->cs_base); + gen_eob(dc); + break; + } +#endif /* VBOX */ + + /* if single step mode, we generate only one instruction and + generate an exception */ + /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear + the flag and abort the translation to give the irqs a + change to be happen */ + if (dc->tf || dc->singlestep_enabled || + (flags & HF_INHIBIT_IRQ_MASK)) { + gen_jmp_im(pc_ptr - dc->cs_base); + gen_eob(dc); + break; + } + /* if too long translation, stop generation too */ + if (gen_opc_ptr >= gen_opc_end || + (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) || + num_insns >= max_insns) { + gen_jmp_im(pc_ptr - dc->cs_base); + gen_eob(dc); + break; + } + if (singlestep) { + gen_jmp_im(pc_ptr - dc->cs_base); + gen_eob(dc); + break; + } + } + if (tb->cflags & CF_LAST_IO) + gen_io_end(); + gen_icount_end(tb, num_insns); + *gen_opc_ptr = INDEX_op_end; + /* we don't forget to fill the last values */ + if (search_pc) { + j = gen_opc_ptr - gen_opc_buf; + lj++; + while (lj <= j) + gen_opc_instr_start[lj++] = 0; + } + +#ifdef DEBUG_DISAS + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { + int disas_flags; + qemu_log("----------------\n"); + qemu_log("IN: %s\n", lookup_symbol(pc_start)); +#ifdef TARGET_X86_64 + if (dc->code64) + disas_flags = 2; + else +#endif + disas_flags = !dc->code32; + log_target_disas(pc_start, pc_ptr - pc_start, disas_flags); + qemu_log("\n"); + } +#endif + + if (!search_pc) { + tb->size = pc_ptr - pc_start; + tb->icount = num_insns; + } +} + +void gen_intermediate_code(CPUState *env, TranslationBlock *tb) +{ + gen_intermediate_code_internal(env, tb, 0); +} + +void gen_intermediate_code_pc(CPUState *env, TranslationBlock *tb) +{ + gen_intermediate_code_internal(env, tb, 1); +} + +void gen_pc_load(CPUState *env, TranslationBlock *tb, + uintptr_t searched_pc, int pc_pos, void *puc) +{ + int cc_op; +#ifdef DEBUG_DISAS + if (qemu_loglevel_mask(CPU_LOG_TB_OP)) { + int i; + qemu_log("RESTORE:\n"); + for(i = 0;i <= pc_pos; i++) { + if (gen_opc_instr_start[i]) { + qemu_log("0x%04x: " TARGET_FMT_lx "\n", i, gen_opc_pc[i]); + } + } + qemu_log("spc=0x%08lx pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n", + searched_pc, pc_pos, gen_opc_pc[pc_pos] - tb->cs_base, + (uint32_t)tb->cs_base); + } +#endif + env->eip = gen_opc_pc[pc_pos] - tb->cs_base; + cc_op = gen_opc_cc_op[pc_pos]; + if (cc_op != CC_OP_DYNAMIC) + env->cc_op = cc_op; +} |