summaryrefslogtreecommitdiffstats
path: root/src/recompiler/target-i386
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 03:01:46 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 03:01:46 +0000
commitf8fe689a81f906d1b91bb3220acde2a4ecb14c5b (patch)
tree26484e9d7e2c67806c2d1760196ff01aaa858e8c /src/recompiler/target-i386
parentInitial commit. (diff)
downloadvirtualbox-upstream.tar.xz
virtualbox-upstream.zip
Adding upstream version 6.0.4-dfsg.upstream/6.0.4-dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/recompiler/target-i386/Makefile.kup0
-rw-r--r--src/recompiler/target-i386/TODO32
-rw-r--r--src/recompiler/target-i386/cpu.h1215
-rw-r--r--src/recompiler/target-i386/exec.h370
-rw-r--r--src/recompiler/target-i386/helper.c1227
-rw-r--r--src/recompiler/target-i386/helper.h253
-rw-r--r--src/recompiler/target-i386/helper_template.h344
-rw-r--r--src/recompiler/target-i386/op_helper.c7164
-rw-r--r--src/recompiler/target-i386/ops_sse.h2111
-rw-r--r--src/recompiler/target-i386/ops_sse_header.h359
-rw-r--r--src/recompiler/target-i386/svm.h222
-rw-r--r--src/recompiler/target-i386/translate.c8385
12 files changed, 21682 insertions, 0 deletions
diff --git a/src/recompiler/target-i386/Makefile.kup b/src/recompiler/target-i386/Makefile.kup
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/recompiler/target-i386/Makefile.kup
diff --git a/src/recompiler/target-i386/TODO b/src/recompiler/target-i386/TODO
new file mode 100644
index 00000000..8dfd4633
--- /dev/null
+++ b/src/recompiler/target-i386/TODO
@@ -0,0 +1,32 @@
+Correctness issues:
+
+- some eflags manipulation incorrectly reset the bit 0x2.
+- SVM: test, cpu save/restore, SMM save/restore.
+- x86_64: lcall/ljmp intel/amd differences ?
+- better code fetch (different exception handling + CS.limit support)
+- user/kernel PUSHL/POPL in helper.c
+- add missing cpuid tests
+- return UD exception if LOCK prefix incorrectly used
+- test ldt limit < 7 ?
+- fix some 16 bit sp push/pop overflow (pusha/popa, lcall lret)
+- full support of segment limit/rights
+- full x87 exception support
+- improve x87 bit exactness (use bochs code ?)
+- DRx register support
+- CR0.AC emulation
+- SSE alignment checks
+- fix SSE min/max with nans
+
+Optimizations/Features:
+
+- add SVM nested paging support
+- add VMX support
+- add AVX support
+- add SSE5 support
+- fxsave/fxrstor AMD extensions
+- improve monitor/mwait support
+- faster EFLAGS update: consider SZAP, C, O can be updated separately
+ with a bit field in CC_OP and more state variables.
+- evaluate x87 stack pointer statically
+- find a way to avoid translating several time the same TB if CR0.TS
+ is set or not.
diff --git a/src/recompiler/target-i386/cpu.h b/src/recompiler/target-i386/cpu.h
new file mode 100644
index 00000000..c643db8d
--- /dev/null
+++ b/src/recompiler/target-i386/cpu.h
@@ -0,0 +1,1215 @@
+/*
+ * i386 virtual CPU header
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#ifndef CPU_I386_H
+#define CPU_I386_H
+
+#include "config.h"
+
+#ifdef TARGET_X86_64
+#define TARGET_LONG_BITS 64
+#else
+#define TARGET_LONG_BITS 32
+#endif
+
+/* target supports implicit self modifying code */
+#define TARGET_HAS_SMC
+/* support for self modifying code even if the modified instruction is
+ close to the modifying instruction */
+#define TARGET_HAS_PRECISE_SMC
+
+#define TARGET_HAS_ICE 1
+
+#ifdef TARGET_X86_64
+#define ELF_MACHINE EM_X86_64
+#else
+#define ELF_MACHINE EM_386
+#endif
+
+#define CPUState struct CPUX86State
+
+#include "cpu-defs.h"
+
+#include "softfloat.h"
+
+#ifdef VBOX
+# include <iprt/critsect.h>
+# include <iprt/thread.h>
+# include <iprt/assert.h>
+# include <iprt/asm.h>
+# include <VBox/vmm/vmm.h>
+# include <VBox/vmm/stam.h>
+# include <VBox/vmm/cpumctx.h>
+# undef MSR_IA32_APICBASE_BSP
+#endif /* VBOX */
+
+#define R_EAX 0
+#define R_ECX 1
+#define R_EDX 2
+#define R_EBX 3
+#define R_ESP 4
+#define R_EBP 5
+#define R_ESI 6
+#define R_EDI 7
+
+#define R_AL 0
+#define R_CL 1
+#define R_DL 2
+#define R_BL 3
+#define R_AH 4
+#define R_CH 5
+#define R_DH 6
+#define R_BH 7
+
+#define R_ES 0
+#define R_CS 1
+#define R_SS 2
+#define R_DS 3
+#define R_FS 4
+#define R_GS 5
+
+/* segment descriptor fields */
+#define DESC_G_MASK (1 << 23)
+#define DESC_B_SHIFT 22
+#define DESC_B_MASK (1 << DESC_B_SHIFT)
+#define DESC_L_SHIFT 21 /* x86_64 only : 64 bit code segment */
+#define DESC_L_MASK (1 << DESC_L_SHIFT)
+#define DESC_AVL_MASK (1 << 20)
+#define DESC_P_MASK (1 << 15)
+#define DESC_DPL_SHIFT 13
+#define DESC_DPL_MASK (3 << DESC_DPL_SHIFT)
+#define DESC_S_MASK (1 << 12)
+#define DESC_TYPE_SHIFT 8
+#define DESC_TYPE_MASK (15 << DESC_TYPE_SHIFT)
+#define DESC_A_MASK (1 << 8)
+
+#define DESC_CS_MASK (1 << 11) /* 1=code segment 0=data segment */
+#define DESC_C_MASK (1 << 10) /* code: conforming */
+#define DESC_R_MASK (1 << 9) /* code: readable */
+
+#define DESC_E_MASK (1 << 10) /* data: expansion direction */
+#define DESC_W_MASK (1 << 9) /* data: writable */
+
+#define DESC_TSS_BUSY_MASK (1 << 9)
+#ifdef VBOX
+# define DESC_INTEL_UNUSABLE RT_BIT_32(16+8) /**< Internal VT-x bit for NULL sectors. */
+# define DESC_RAW_FLAG_BITS UINT32_C(0x00ffffff) /**< Flag bits we load from the descriptor. */
+#endif
+
+/* eflags masks */
+#define CC_C 0x0001
+#define CC_P 0x0004
+#define CC_A 0x0010
+#define CC_Z 0x0040
+#define CC_S 0x0080
+#define CC_O 0x0800
+
+#define TF_SHIFT 8
+#define IOPL_SHIFT 12
+#define VM_SHIFT 17
+
+#define TF_MASK 0x00000100
+#define IF_MASK 0x00000200
+#define DF_MASK 0x00000400
+#define IOPL_MASK 0x00003000
+#define NT_MASK 0x00004000
+#define RF_MASK 0x00010000
+#define VM_MASK 0x00020000
+#define AC_MASK 0x00040000
+#define VIF_MASK 0x00080000
+#define VIP_MASK 0x00100000
+#define ID_MASK 0x00200000
+
+/* hidden flags - used internally by qemu to represent additional cpu
+ states. Only the CPL, INHIBIT_IRQ, SMM and SVMI are not
+ redundant. We avoid using the IOPL_MASK, TF_MASK and VM_MASK bit
+ position to ease oring with eflags. */
+/* current cpl */
+#define HF_CPL_SHIFT 0
+/* true if soft mmu is being used */
+#define HF_SOFTMMU_SHIFT 2
+/* true if hardware interrupts must be disabled for next instruction */
+#define HF_INHIBIT_IRQ_SHIFT 3
+/* 16 or 32 segments */
+#define HF_CS32_SHIFT 4
+#define HF_SS32_SHIFT 5
+/* zero base for DS, ES and SS : can be '0' only in 32 bit CS segment */
+#define HF_ADDSEG_SHIFT 6
+/* copy of CR0.PE (protected mode) */
+#define HF_PE_SHIFT 7
+#define HF_TF_SHIFT 8 /* must be same as eflags */
+#define HF_MP_SHIFT 9 /* the order must be MP, EM, TS */
+#define HF_EM_SHIFT 10
+#define HF_TS_SHIFT 11
+#define HF_IOPL_SHIFT 12 /* must be same as eflags */
+#define HF_LMA_SHIFT 14 /* only used on x86_64: long mode active */
+#define HF_CS64_SHIFT 15 /* only used on x86_64: 64 bit code segment */
+#define HF_RF_SHIFT 16 /* must be same as eflags */
+#define HF_VM_SHIFT 17 /* must be same as eflags */
+#define HF_SMM_SHIFT 19 /* CPU in SMM mode */
+#define HF_SVME_SHIFT 20 /* SVME enabled (copy of EFER.SVME) */
+#define HF_SVMI_SHIFT 21 /* SVM intercepts are active */
+#define HF_OSFXSR_SHIFT 22 /* CR4.OSFXSR */
+
+#define HF_CPL_MASK (3 << HF_CPL_SHIFT)
+#define HF_SOFTMMU_MASK (1 << HF_SOFTMMU_SHIFT)
+#define HF_INHIBIT_IRQ_MASK (1 << HF_INHIBIT_IRQ_SHIFT)
+#define HF_CS32_MASK (1 << HF_CS32_SHIFT)
+#define HF_SS32_MASK (1 << HF_SS32_SHIFT)
+#define HF_ADDSEG_MASK (1 << HF_ADDSEG_SHIFT)
+#define HF_PE_MASK (1 << HF_PE_SHIFT)
+#define HF_TF_MASK (1 << HF_TF_SHIFT)
+#define HF_MP_MASK (1 << HF_MP_SHIFT)
+#define HF_EM_MASK (1 << HF_EM_SHIFT)
+#define HF_TS_MASK (1 << HF_TS_SHIFT)
+#define HF_IOPL_MASK (3 << HF_IOPL_SHIFT)
+#define HF_LMA_MASK (1 << HF_LMA_SHIFT)
+#define HF_CS64_MASK (1 << HF_CS64_SHIFT)
+#define HF_RF_MASK (1 << HF_RF_SHIFT)
+#define HF_VM_MASK (1 << HF_VM_SHIFT)
+#define HF_SMM_MASK (1 << HF_SMM_SHIFT)
+#define HF_SVME_MASK (1 << HF_SVME_SHIFT)
+#define HF_SVMI_MASK (1 << HF_SVMI_SHIFT)
+#define HF_OSFXSR_MASK (1 << HF_OSFXSR_SHIFT)
+
+/* hflags2 */
+
+#define HF2_GIF_SHIFT 0 /* if set CPU takes interrupts */
+#define HF2_HIF_SHIFT 1 /* value of IF_MASK when entering SVM */
+#define HF2_NMI_SHIFT 2 /* CPU serving NMI */
+#define HF2_VINTR_SHIFT 3 /* value of V_INTR_MASKING bit */
+
+#define HF2_GIF_MASK (1 << HF2_GIF_SHIFT)
+#define HF2_HIF_MASK (1 << HF2_HIF_SHIFT)
+#define HF2_NMI_MASK (1 << HF2_NMI_SHIFT)
+#define HF2_VINTR_MASK (1 << HF2_VINTR_SHIFT)
+
+#define CR0_PE_SHIFT 0
+#define CR0_MP_SHIFT 1
+
+#define CR0_PE_MASK (1 << 0)
+#define CR0_MP_MASK (1 << 1)
+#define CR0_EM_MASK (1 << 2)
+#define CR0_TS_MASK (1 << 3)
+#define CR0_ET_MASK (1 << 4)
+#define CR0_NE_MASK (1 << 5)
+#define CR0_WP_MASK (1 << 16)
+#define CR0_AM_MASK (1 << 18)
+#define CR0_PG_MASK (1U << 31)
+
+#define CR4_VME_MASK (1 << 0)
+#define CR4_PVI_MASK (1 << 1)
+#define CR4_TSD_MASK (1 << 2)
+#define CR4_DE_MASK (1 << 3)
+#define CR4_PSE_MASK (1 << 4)
+#define CR4_PAE_MASK (1 << 5)
+#define CR4_MCE_MASK (1 << 6)
+#define CR4_PGE_MASK (1 << 7)
+#define CR4_PCE_MASK (1 << 8)
+#define CR4_OSFXSR_SHIFT 9
+#define CR4_OSFXSR_MASK (1 << CR4_OSFXSR_SHIFT)
+#define CR4_OSXMMEXCPT_MASK (1 << 10)
+
+#define DR6_BD (1 << 13)
+#define DR6_BS (1 << 14)
+#define DR6_BT (1 << 15)
+#define DR6_FIXED_1 0xffff0ff0
+
+#define DR7_GD (1 << 13)
+#define DR7_TYPE_SHIFT 16
+#define DR7_LEN_SHIFT 18
+#define DR7_FIXED_1 0x00000400
+
+#define PG_PRESENT_BIT 0
+#define PG_RW_BIT 1
+#define PG_USER_BIT 2
+#define PG_PWT_BIT 3
+#define PG_PCD_BIT 4
+#define PG_ACCESSED_BIT 5
+#define PG_DIRTY_BIT 6
+#define PG_PSE_BIT 7
+#define PG_GLOBAL_BIT 8
+#define PG_NX_BIT 63
+
+#define PG_PRESENT_MASK (1 << PG_PRESENT_BIT)
+#define PG_RW_MASK (1 << PG_RW_BIT)
+#define PG_USER_MASK (1 << PG_USER_BIT)
+#define PG_PWT_MASK (1 << PG_PWT_BIT)
+#define PG_PCD_MASK (1 << PG_PCD_BIT)
+#define PG_ACCESSED_MASK (1 << PG_ACCESSED_BIT)
+#define PG_DIRTY_MASK (1 << PG_DIRTY_BIT)
+#define PG_PSE_MASK (1 << PG_PSE_BIT)
+#define PG_GLOBAL_MASK (1 << PG_GLOBAL_BIT)
+#define PG_NX_MASK (1LL << PG_NX_BIT)
+
+#define PG_ERROR_W_BIT 1
+
+#define PG_ERROR_P_MASK 0x01
+#define PG_ERROR_W_MASK (1 << PG_ERROR_W_BIT)
+#define PG_ERROR_U_MASK 0x04
+#define PG_ERROR_RSVD_MASK 0x08
+#define PG_ERROR_I_D_MASK 0x10
+
+#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */
+
+#define MCE_CAP_DEF MCG_CTL_P
+#define MCE_BANKS_DEF 10
+
+#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
+
+#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
+#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
+#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
+
+#define MSR_IA32_TSC 0x10
+#define MSR_IA32_APICBASE 0x1b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+
+#define MSR_MTRRcap 0xfe
+#define MSR_MTRRcap_VCNT 8
+#define MSR_MTRRcap_FIXRANGE_SUPPORT (1 << 8)
+#define MSR_MTRRcap_WC_SUPPORTED (1 << 10)
+
+#define MSR_IA32_SYSENTER_CS 0x174
+#define MSR_IA32_SYSENTER_ESP 0x175
+#define MSR_IA32_SYSENTER_EIP 0x176
+
+#define MSR_MCG_CAP 0x179
+#define MSR_MCG_STATUS 0x17a
+#define MSR_MCG_CTL 0x17b
+
+#define MSR_IA32_PERF_STATUS 0x198
+
+#define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg))
+#define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1)
+
+#define MSR_MTRRfix64K_00000 0x250
+#define MSR_MTRRfix16K_80000 0x258
+#define MSR_MTRRfix16K_A0000 0x259
+#define MSR_MTRRfix4K_C0000 0x268
+#define MSR_MTRRfix4K_C8000 0x269
+#define MSR_MTRRfix4K_D0000 0x26a
+#define MSR_MTRRfix4K_D8000 0x26b
+#define MSR_MTRRfix4K_E0000 0x26c
+#define MSR_MTRRfix4K_E8000 0x26d
+#define MSR_MTRRfix4K_F0000 0x26e
+#define MSR_MTRRfix4K_F8000 0x26f
+
+#define MSR_PAT 0x277
+
+#define MSR_MTRRdefType 0x2ff
+
+#define MSR_MC0_CTL 0x400
+#define MSR_MC0_STATUS 0x401
+#define MSR_MC0_ADDR 0x402
+#define MSR_MC0_MISC 0x403
+
+#define MSR_EFER 0xc0000080
+
+#define MSR_EFER_SCE (1 << 0)
+#define MSR_EFER_LME (1 << 8)
+#define MSR_EFER_LMA (1 << 10)
+#define MSR_EFER_NXE (1 << 11)
+#define MSR_EFER_SVME (1 << 12)
+#define MSR_EFER_FFXSR (1 << 14)
+
+#ifdef VBOX
+# define MSR_APIC_RANGE_START 0x800
+# define MSR_APIC_RANGE_END 0x900
+#endif
+
+#define MSR_STAR 0xc0000081
+#define MSR_LSTAR 0xc0000082
+#define MSR_CSTAR 0xc0000083
+#define MSR_FMASK 0xc0000084
+#define MSR_FSBASE 0xc0000100
+#define MSR_GSBASE 0xc0000101
+#define MSR_KERNELGSBASE 0xc0000102
+#define MSR_TSC_AUX 0xc0000103
+
+#define MSR_VM_HSAVE_PA 0xc0010117
+
+/* cpuid_features bits */
+#define CPUID_FP87 (1 << 0)
+#define CPUID_VME (1 << 1)
+#define CPUID_DE (1 << 2)
+#define CPUID_PSE (1 << 3)
+#define CPUID_TSC (1 << 4)
+#define CPUID_MSR (1 << 5)
+#define CPUID_PAE (1 << 6)
+#define CPUID_MCE (1 << 7)
+#define CPUID_CX8 (1 << 8)
+#define CPUID_APIC (1 << 9)
+#define CPUID_SEP (1 << 11) /* sysenter/sysexit */
+#define CPUID_MTRR (1 << 12)
+#define CPUID_PGE (1 << 13)
+#define CPUID_MCA (1 << 14)
+#define CPUID_CMOV (1 << 15)
+#define CPUID_PAT (1 << 16)
+#define CPUID_PSE36 (1 << 17)
+#define CPUID_PN (1 << 18)
+#define CPUID_CLFLUSH (1 << 19)
+#define CPUID_DTS (1 << 21)
+#define CPUID_ACPI (1 << 22)
+#define CPUID_MMX (1 << 23)
+#define CPUID_FXSR (1 << 24)
+#define CPUID_SSE (1 << 25)
+#define CPUID_SSE2 (1 << 26)
+#define CPUID_SS (1 << 27)
+#define CPUID_HT (1 << 28)
+#define CPUID_TM (1 << 29)
+#define CPUID_IA64 (1 << 30)
+#define CPUID_PBE (1 << 31)
+
+#define CPUID_EXT_SSE3 (1 << 0)
+#define CPUID_EXT_DTES64 (1 << 2)
+#define CPUID_EXT_MONITOR (1 << 3)
+#define CPUID_EXT_DSCPL (1 << 4)
+#define CPUID_EXT_VMX (1 << 5)
+#define CPUID_EXT_SMX (1 << 6)
+#define CPUID_EXT_EST (1 << 7)
+#define CPUID_EXT_TM2 (1 << 8)
+#define CPUID_EXT_SSSE3 (1 << 9)
+#define CPUID_EXT_CID (1 << 10)
+#define CPUID_EXT_CX16 (1 << 13)
+#define CPUID_EXT_XTPR (1 << 14)
+#define CPUID_EXT_PDCM (1 << 15)
+#define CPUID_EXT_DCA (1 << 18)
+#define CPUID_EXT_SSE41 (1 << 19)
+#define CPUID_EXT_SSE42 (1 << 20)
+#define CPUID_EXT_X2APIC (1 << 21)
+#define CPUID_EXT_MOVBE (1 << 22)
+#define CPUID_EXT_POPCNT (1 << 23)
+#define CPUID_EXT_XSAVE (1 << 26)
+#define CPUID_EXT_OSXSAVE (1 << 27)
+#define CPUID_EXT_HYPERVISOR (1 << 31)
+
+#define CPUID_EXT2_SYSCALL (1 << 11)
+#define CPUID_EXT2_MP (1 << 19)
+#define CPUID_EXT2_NX (1 << 20)
+#define CPUID_EXT2_MMXEXT (1 << 22)
+#define CPUID_EXT2_FFXSR (1 << 25)
+#define CPUID_EXT2_PDPE1GB (1 << 26)
+#define CPUID_EXT2_RDTSCP (1 << 27)
+#define CPUID_EXT2_LM (1 << 29)
+#define CPUID_EXT2_3DNOWEXT (1 << 30)
+#define CPUID_EXT2_3DNOW (1 << 31)
+
+#define CPUID_EXT3_LAHF_LM (1 << 0)
+#define CPUID_EXT3_CMP_LEG (1 << 1)
+#define CPUID_EXT3_SVM (1 << 2)
+#define CPUID_EXT3_EXTAPIC (1 << 3)
+#define CPUID_EXT3_CR8LEG (1 << 4)
+#define CPUID_EXT3_ABM (1 << 5)
+#define CPUID_EXT3_SSE4A (1 << 6)
+#define CPUID_EXT3_MISALIGNSSE (1 << 7)
+#define CPUID_EXT3_3DNOWPREFETCH (1 << 8)
+#define CPUID_EXT3_OSVW (1 << 9)
+#define CPUID_EXT3_IBS (1 << 10)
+#define CPUID_EXT3_SKINIT (1 << 12)
+
+#define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
+#define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
+#define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */
+
+#define CPUID_VENDOR_AMD_1 0x68747541 /* "Auth" */
+#define CPUID_VENDOR_AMD_2 0x69746e65 /* "enti" */
+#define CPUID_VENDOR_AMD_3 0x444d4163 /* "cAMD" */
+
+#define CPUID_MWAIT_IBE (1 << 1) /* Interrupts can exit capability */
+#define CPUID_MWAIT_EMX (1 << 0) /* enumeration supported */
+
+#define EXCP00_DIVZ 0
+#define EXCP01_DB 1
+#define EXCP02_NMI 2
+#define EXCP03_INT3 3
+#define EXCP04_INTO 4
+#define EXCP05_BOUND 5
+#define EXCP06_ILLOP 6
+#define EXCP07_PREX 7
+#define EXCP08_DBLE 8
+#define EXCP09_XERR 9
+#define EXCP0A_TSS 10
+#define EXCP0B_NOSEG 11
+#define EXCP0C_STACK 12
+#define EXCP0D_GPF 13
+#define EXCP0E_PAGE 14
+#define EXCP10_COPR 16
+#define EXCP11_ALGN 17
+#define EXCP12_MCHK 18
+
+#define EXCP_SYSCALL 0x100 /* only happens in user only emulation
+ for syscall instruction */
+
+enum {
+ CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
+ CC_OP_EFLAGS, /* all cc are explicitly computed, CC_SRC = flags */
+
+ CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */
+ CC_OP_MULW,
+ CC_OP_MULL,
+ CC_OP_MULQ,
+
+ CC_OP_ADDB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+ CC_OP_ADDW,
+ CC_OP_ADDL,
+ CC_OP_ADDQ,
+
+ CC_OP_ADCB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+ CC_OP_ADCW,
+ CC_OP_ADCL,
+ CC_OP_ADCQ,
+
+ CC_OP_SUBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+ CC_OP_SUBW,
+ CC_OP_SUBL,
+ CC_OP_SUBQ,
+
+ CC_OP_SBBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+ CC_OP_SBBW,
+ CC_OP_SBBL,
+ CC_OP_SBBQ,
+
+ CC_OP_LOGICB, /* modify all flags, CC_DST = res */
+ CC_OP_LOGICW,
+ CC_OP_LOGICL,
+ CC_OP_LOGICQ,
+
+ CC_OP_INCB, /* modify all flags except, CC_DST = res, CC_SRC = C */
+ CC_OP_INCW,
+ CC_OP_INCL,
+ CC_OP_INCQ,
+
+ CC_OP_DECB, /* modify all flags except, CC_DST = res, CC_SRC = C */
+ CC_OP_DECW,
+ CC_OP_DECL,
+ CC_OP_DECQ,
+
+ CC_OP_SHLB, /* modify all flags, CC_DST = res, CC_SRC.msb = C */
+ CC_OP_SHLW,
+ CC_OP_SHLL,
+ CC_OP_SHLQ,
+
+ CC_OP_SARB, /* modify all flags, CC_DST = res, CC_SRC.lsb = C */
+ CC_OP_SARW,
+ CC_OP_SARL,
+ CC_OP_SARQ,
+
+ CC_OP_NB,
+};
+
+#ifdef FLOATX80
+#define USE_X86LDOUBLE
+#endif
+
+#ifdef USE_X86LDOUBLE
+typedef floatx80 CPU86_LDouble;
+#else
+typedef float64 CPU86_LDouble;
+#endif
+
+typedef struct SegmentCache {
+ uint32_t selector;
+#ifdef VBOX
+ /** The new selector is saved here when we are unable to sync it before invoking the recompiled code. */
+ uint16_t newselector;
+ uint16_t fVBoxFlags;
+#endif
+ target_ulong base;
+ uint32_t limit;
+ uint32_t flags;
+} SegmentCache;
+
+typedef union {
+ uint8_t _b[16];
+ uint16_t _w[8];
+ uint32_t _l[4];
+ uint64_t _q[2];
+ float32 _s[4];
+ float64 _d[2];
+} XMMReg;
+
+typedef union {
+ uint8_t _b[8];
+ uint16_t _w[4];
+ uint32_t _l[2];
+ float32 _s[2];
+ uint64_t q;
+} MMXReg;
+
+#ifdef HOST_WORDS_BIGENDIAN
+#define XMM_B(n) _b[15 - (n)]
+#define XMM_W(n) _w[7 - (n)]
+#define XMM_L(n) _l[3 - (n)]
+#define XMM_S(n) _s[3 - (n)]
+#define XMM_Q(n) _q[1 - (n)]
+#define XMM_D(n) _d[1 - (n)]
+
+#define MMX_B(n) _b[7 - (n)]
+#define MMX_W(n) _w[3 - (n)]
+#define MMX_L(n) _l[1 - (n)]
+#define MMX_S(n) _s[1 - (n)]
+#else
+#define XMM_B(n) _b[n]
+#define XMM_W(n) _w[n]
+#define XMM_L(n) _l[n]
+#define XMM_S(n) _s[n]
+#define XMM_Q(n) _q[n]
+#define XMM_D(n) _d[n]
+
+#define MMX_B(n) _b[n]
+#define MMX_W(n) _w[n]
+#define MMX_L(n) _l[n]
+#define MMX_S(n) _s[n]
+#endif
+#define MMX_Q(n) q
+
+typedef union {
+#ifdef USE_X86LDOUBLE
+ CPU86_LDouble d __attribute__((aligned(16)));
+#else
+ CPU86_LDouble d;
+#endif
+ MMXReg mmx;
+} FPReg;
+
+typedef struct {
+ uint64_t base;
+ uint64_t mask;
+} MTRRVar;
+
+#define CPU_NB_REGS64 16
+#define CPU_NB_REGS32 8
+
+#ifdef TARGET_X86_64
+#define CPU_NB_REGS CPU_NB_REGS64
+#else
+#define CPU_NB_REGS CPU_NB_REGS32
+#endif
+
+#define NB_MMU_MODES 2
+
+typedef struct CPUX86State {
+ /* standard registers */
+ target_ulong regs[CPU_NB_REGS];
+ target_ulong eip;
+ target_ulong eflags; /* eflags register. During CPU emulation, CC
+ flags and DF are set to zero because they are
+ stored elsewhere */
+
+ /* emulator internal eflags handling */
+ target_ulong cc_src;
+ target_ulong cc_dst;
+ uint32_t cc_op;
+ int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */
+ uint32_t hflags; /* TB flags, see HF_xxx constants. These flags
+ are known at translation time. */
+ uint32_t hflags2; /* various other flags, see HF2_xxx constants. */
+
+ /* segments */
+ SegmentCache segs[6]; /* selector values */
+ SegmentCache ldt;
+ SegmentCache tr;
+ SegmentCache gdt; /* only base and limit are used */
+ SegmentCache idt; /* only base and limit are used */
+
+ target_ulong cr[5]; /* NOTE: cr1 is unused */
+ int32_t a20_mask;
+
+ /* FPU state */
+ unsigned int fpstt; /* top of stack index */
+ uint16_t fpus;
+ uint16_t fpuc;
+ uint8_t fptags[8]; /* 0 = valid, 1 = empty */
+ FPReg fpregs[8];
+
+ /* emulator internal variables */
+ float_status fp_status;
+#ifdef VBOX
+ uint32_t alignment3[3]; /* force the long double to start a 16 byte line. */
+#endif
+ CPU86_LDouble ft0;
+#if defined(VBOX) && defined(RT_ARCH_X86) && !defined(RT_OS_DARWIN)
+ uint32_t alignment4; /* long double is 12 byte, pad it to 16. */
+#endif
+
+ float_status mmx_status; /* for 3DNow! float ops */
+ float_status sse_status;
+ uint32_t mxcsr;
+ XMMReg xmm_regs[CPU_NB_REGS];
+ XMMReg xmm_t0;
+ MMXReg mmx_t0;
+ target_ulong cc_tmp; /* temporary for rcr/rcl */
+
+ /* sysenter registers */
+ uint32_t sysenter_cs;
+#ifdef VBOX
+ uint32_t alignment0;
+#endif
+ target_ulong sysenter_esp;
+ target_ulong sysenter_eip;
+ uint64_t efer;
+ uint64_t star;
+
+ uint64_t vm_hsave;
+ uint64_t vm_vmcb;
+ uint64_t tsc_offset;
+ uint64_t intercept;
+ uint16_t intercept_cr_read;
+ uint16_t intercept_cr_write;
+ uint16_t intercept_dr_read;
+ uint16_t intercept_dr_write;
+ uint32_t intercept_exceptions;
+ uint8_t v_tpr;
+
+#ifdef TARGET_X86_64
+ target_ulong lstar;
+ target_ulong cstar;
+ target_ulong fmask;
+ target_ulong kernelgsbase;
+#endif
+ uint64_t system_time_msr;
+ uint64_t wall_clock_msr;
+
+ uint64_t tsc;
+
+ uint64_t pat;
+
+ /* exception/interrupt handling */
+ int error_code;
+ int exception_is_int;
+#ifdef VBOX
+# define EXCEPTION_IS_INT_VALUE_HARDWARE_IRQ 0x42 /**< Special CPUX86State::exception_is_int value indicating hardware irq. (HACK ALERT) */
+#endif
+ target_ulong exception_next_eip;
+ target_ulong dr[8]; /* debug registers */
+ union {
+ CPUBreakpoint *cpu_breakpoint[4];
+ CPUWatchpoint *cpu_watchpoint[4];
+ }; /* break/watchpoints for dr[0..3] */
+ uint32_t smbase;
+ int old_exception; /* exception in flight */
+
+ CPU_COMMON
+
+#ifdef VBOX
+ /** cpu state flags. (see defines below) */
+ uint32_t state;
+ /** The VM handle. */
+ PVM pVM;
+ /** The VMCPU handle. */
+ PVMCPU pVCpu;
+ /** code buffer for instruction emulation */
+ void *pvCodeBuffer;
+ /** code buffer size */
+ uint32_t cbCodeBuffer;
+#endif /* VBOX */
+
+ /* processor features (e.g. for CPUID insn) */
+#ifndef VBOX /* remR3CpuId deals with these */
+ uint32_t cpuid_level;
+ uint32_t cpuid_vendor1;
+ uint32_t cpuid_vendor2;
+ uint32_t cpuid_vendor3;
+ uint32_t cpuid_version;
+#endif /* !VBOX */
+ uint32_t cpuid_features;
+ uint32_t cpuid_ext_features;
+#ifndef VBOX
+ uint32_t cpuid_xlevel;
+ uint32_t cpuid_model[12];
+#endif /* !VBOX */
+ uint32_t cpuid_ext2_features;
+ uint32_t cpuid_ext3_features;
+ uint32_t cpuid_apic_id;
+#ifndef VBOX
+ int cpuid_vendor_override;
+
+ /* MTRRs */
+ uint64_t mtrr_fixed[11];
+ uint64_t mtrr_deftype;
+ MTRRVar mtrr_var[8];
+
+ /* For KVM */
+ uint32_t mp_state;
+ int32_t exception_injected;
+ int32_t interrupt_injected;
+ uint8_t soft_interrupt;
+ uint8_t nmi_injected;
+ uint8_t nmi_pending;
+ uint8_t has_error_code;
+ uint32_t sipi_vector;
+
+ uint32_t cpuid_kvm_features;
+
+ /* in order to simplify APIC support, we leave this pointer to the
+ user */
+ struct DeviceState *apic_state;
+
+ uint64 mcg_cap;
+ uint64 mcg_status;
+ uint64 mcg_ctl;
+ uint64 mce_banks[MCE_BANKS_DEF*4];
+
+ uint64_t tsc_aux;
+
+ /* vmstate */
+ uint16_t fpus_vmstate;
+ uint16_t fptag_vmstate;
+ uint16_t fpregs_format_vmstate;
+
+ uint64_t xstate_bv;
+ XMMReg ymmh_regs[CPU_NB_REGS];
+
+ uint64_t xcr0;
+#else /* VBOX */
+
+ /** Alignment padding. */
+# if HC_ARCH_BITS == 64 \
+ || ( HC_ARCH_BITS == 32 \
+ && !defined(RT_OS_WINDOWS) \
+ && ( (!defined(VBOX_ENABLE_VBOXREM64) && !defined(RT_OS_SOLARIS) && !defined(RT_OS_FREEBSD)) \
+ || (defined(VBOX_ENABLE_VBOXREM64) && (defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD))) ) )
+ uint32_t alignment2[1];
+# endif
+
+ /** Profiling tb_flush. */
+ STAMPROFILE StatTbFlush;
+
+ /** Addends for HVA -> GPA translations. */
+ target_phys_addr_t phys_addends[NB_MMU_MODES][CPU_TLB_SIZE];
+#endif /* VBOX */
+} CPUX86State;
+
+#ifdef VBOX
+
+/* Version 1.6 structure; just for loading the old saved state */
+typedef struct SegmentCache_Ver16 {
+ uint32_t selector;
+ uint32_t base;
+ uint32_t limit;
+ uint32_t flags;
+ /** The new selector is saved here when we are unable to sync it before invoking the recompiled code. */
+ uint32_t newselector;
+} SegmentCache_Ver16;
+
+# define CPU_NB_REGS_VER16 8
+
+/* Version 1.6 structure; just for loading the old saved state */
+typedef struct CPUX86State_Ver16 {
+# if TARGET_LONG_BITS > HOST_LONG_BITS
+ /* temporaries if we cannot store them in host registers */
+ uint32_t t0, t1, t2;
+# endif
+
+ /* standard registers */
+ uint32_t regs[CPU_NB_REGS_VER16];
+ uint32_t eip;
+ uint32_t eflags; /* eflags register. During CPU emulation, CC
+ flags and DF are set to zero because they are
+ stored elsewhere */
+
+ /* emulator internal eflags handling */
+ uint32_t cc_src;
+ uint32_t cc_dst;
+ uint32_t cc_op;
+ int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */
+ uint32_t hflags; /* hidden flags, see HF_xxx constants */
+
+ /* segments */
+ SegmentCache_Ver16 segs[6]; /* selector values */
+ SegmentCache_Ver16 ldt;
+ SegmentCache_Ver16 tr;
+ SegmentCache_Ver16 gdt; /* only base and limit are used */
+ SegmentCache_Ver16 idt; /* only base and limit are used */
+
+ uint32_t cr[5]; /* NOTE: cr1 is unused */
+ uint32_t a20_mask;
+
+ /* FPU state */
+ unsigned int fpstt; /* top of stack index */
+ unsigned int fpus;
+ unsigned int fpuc;
+ uint8_t fptags[8]; /* 0 = valid, 1 = empty */
+ union {
+# ifdef USE_X86LDOUBLE
+ CPU86_LDouble d __attribute__((aligned(16)));
+# else
+ CPU86_LDouble d;
+# endif
+ MMXReg mmx;
+ } fpregs[8];
+
+ /* emulator internal variables */
+ float_status fp_status;
+# ifdef VBOX
+ uint32_t alignment3[3]; /* force the long double to start a 16 byte line. */
+# endif
+ CPU86_LDouble ft0;
+# if defined(VBOX) && defined(RT_ARCH_X86) && !defined(RT_OS_DARWIN)
+ uint32_t alignment4; /* long double is 12 byte, pad it to 16. */
+# endif
+ union {
+ float f;
+ double d;
+ int i32;
+ int64_t i64;
+ } fp_convert;
+
+ float_status sse_status;
+ uint32_t mxcsr;
+ XMMReg xmm_regs[CPU_NB_REGS_VER16];
+ XMMReg xmm_t0;
+ MMXReg mmx_t0;
+
+ /* sysenter registers */
+ uint32_t sysenter_cs;
+ uint32_t sysenter_esp;
+ uint32_t sysenter_eip;
+# ifdef VBOX
+ uint32_t alignment0;
+# endif
+ uint64_t efer;
+ uint64_t star;
+
+ uint64_t pat;
+
+ /* temporary data for USE_CODE_COPY mode */
+# ifdef USE_CODE_COPY
+ uint32_t tmp0;
+ uint32_t saved_esp;
+ int native_fp_regs; /* if true, the FPU state is in the native CPU regs */
+# endif
+
+ /* exception/interrupt handling */
+ jmp_buf jmp_env;
+} CPUX86State_Ver16;
+
+/** CPUX86State state flags
+ * @{ */
+# define CPU_RAW_RING0 0x0002 /* Set after first time RawR0 is executed, never cleared. */
+# define CPU_EMULATE_SINGLE_INSTR 0x0040 /* Execute a single instruction in emulation mode */
+# define CPU_EMULATE_SINGLE_STEP 0x0080 /* go into single step mode */
+# define CPU_RAW_HM 0x0100 /* Set after first time HWACC is executed, never cleared. */
+/** @} */
+#endif /* !VBOX */
+
+#ifdef VBOX
+CPUX86State *cpu_x86_init(CPUX86State *env, const char *cpu_model);
+#else /* !VBOX */
+CPUX86State *cpu_x86_init(const char *cpu_model);
+#endif /* !VBOX */
+int cpu_x86_exec(CPUX86State *s);
+void cpu_x86_close(CPUX86State *s);
+void x86_cpu_list (FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+ const char *optarg);
+void x86_cpudef_setup(void);
+
+int cpu_get_pic_interrupt(CPUX86State *s);
+/* MSDOS compatibility mode FPU exception support */
+void cpu_set_ferr(CPUX86State *s);
+
+/* this function must always be used to load data in the segment
+ cache: it synchronizes the hflags with the segment cache values */
+#ifndef VBOX
+static inline void cpu_x86_load_seg_cache(CPUX86State *env,
+ int seg_reg, unsigned int selector,
+ target_ulong base,
+ unsigned int limit,
+ unsigned int flags)
+#else
+static inline void cpu_x86_load_seg_cache_with_clean_flags(CPUX86State *env,
+ int seg_reg, unsigned int selector,
+ target_ulong base,
+ unsigned int limit,
+ unsigned int flags)
+#endif
+{
+ SegmentCache *sc;
+ unsigned int new_hflags;
+
+ sc = &env->segs[seg_reg];
+ sc->selector = selector;
+ sc->base = base;
+ sc->limit = limit;
+ sc->flags = flags;
+#ifdef VBOX
+ sc->newselector = 0;
+ sc->fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+#endif
+
+ /* update the hidden flags */
+ {
+ if (seg_reg == R_CS) {
+#ifdef TARGET_X86_64
+ if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) {
+ /* long mode */
+ env->hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
+ env->hflags &= ~(HF_ADDSEG_MASK);
+ } else
+#endif
+ {
+ /* legacy / compatibility case */
+ new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
+ >> (DESC_B_SHIFT - HF_CS32_SHIFT);
+ env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) |
+ new_hflags;
+ }
+ }
+ new_hflags = (env->segs[R_SS].flags & DESC_B_MASK)
+ >> (DESC_B_SHIFT - HF_SS32_SHIFT);
+ if (env->hflags & HF_CS64_MASK) {
+ /* zero base assumed for DS, ES and SS in long mode */
+ } else if (!(env->cr[0] & CR0_PE_MASK) ||
+ (env->eflags & VM_MASK) ||
+ !(env->hflags & HF_CS32_MASK)) {
+ /* XXX: try to avoid this test. The problem comes from the
+ fact that is real mode or vm86 mode we only modify the
+ 'base' and 'selector' fields of the segment cache to go
+ faster. A solution may be to force addseg to one in
+ translate-i386.c. */
+ new_hflags |= HF_ADDSEG_MASK;
+ } else {
+ new_hflags |= ((env->segs[R_DS].base |
+ env->segs[R_ES].base |
+ env->segs[R_SS].base) != 0) <<
+ HF_ADDSEG_SHIFT;
+ }
+ env->hflags = (env->hflags &
+ ~(HF_SS32_MASK | HF_ADDSEG_MASK)) | new_hflags;
+ }
+}
+
+#ifdef VBOX
+/* Raw input, adjust the flags adding the stupid intel flag when applicable. */
+static inline void cpu_x86_load_seg_cache(CPUX86State *env,
+ int seg_reg, unsigned int selector,
+ target_ulong base,
+ unsigned int limit,
+ unsigned int flags)
+{
+ flags &= DESC_RAW_FLAG_BITS;
+ if (flags & DESC_P_MASK)
+ flags |= DESC_A_MASK; /* Make sure the A bit is set to avoid trouble. */
+ else if (selector < 4U)
+ flags |= DESC_INTEL_UNUSABLE;
+ cpu_x86_load_seg_cache_with_clean_flags(env, seg_reg, selector, base, limit, flags);
+}
+#endif
+
+static inline void cpu_x86_load_seg_cache_sipi(CPUX86State *env,
+ int sipi_vector)
+{
+ env->eip = 0;
+ cpu_x86_load_seg_cache(env, R_CS, sipi_vector << 8,
+ sipi_vector << 12,
+ env->segs[R_CS].limit,
+ env->segs[R_CS].flags);
+ env->halted = 0;
+}
+
+int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector,
+ target_ulong *base, unsigned int *limit,
+ unsigned int *flags);
+
+/* wrapper, just in case memory mappings must be changed */
+static inline void cpu_x86_set_cpl(CPUX86State *s, int cpl)
+{
+#if HF_CPL_MASK == 3
+ s->hflags = (s->hflags & ~HF_CPL_MASK) | cpl;
+#else
+#error HF_CPL_MASK is hardcoded
+#endif
+}
+
+/* op_helper.c */
+/* used for debug or cpu save/restore */
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f);
+CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper);
+
+/* cpu-exec.c */
+/* the following helpers are only usable in user mode simulation as
+ they can trigger unexpected exceptions */
+void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
+void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
+void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
+
+/* you can call this signal handler from your SIGBUS and SIGSEGV
+ signal handlers to inform the virtual CPU of exceptions. non zero
+ is returned if the signal was handled by the virtual CPU. */
+int cpu_x86_signal_handler(int host_signum, void *pinfo,
+ void *puc);
+
+/* cpuid.c */
+void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx);
+int cpu_x86_register (CPUX86State *env, const char *cpu_model);
+void cpu_clear_apic_feature(CPUX86State *env);
+
+/* helper.c */
+int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
+ int is_write, int mmu_idx, int is_softmmu);
+#define cpu_handle_mmu_fault cpu_x86_handle_mmu_fault
+void cpu_x86_set_a20(CPUX86State *env, int a20_state);
+
+static inline int hw_breakpoint_enabled(unsigned long dr7, int index)
+{
+ return (dr7 >> (index * 2)) & 3;
+}
+
+static inline int hw_breakpoint_type(unsigned long dr7, int index)
+{
+ return (dr7 >> (DR7_TYPE_SHIFT + (index * 4))) & 3;
+}
+
+static inline int hw_breakpoint_len(unsigned long dr7, int index)
+{
+ int len = ((dr7 >> (DR7_LEN_SHIFT + (index * 4))) & 3);
+ return (len == 2) ? 8 : len + 1;
+}
+
+void hw_breakpoint_insert(CPUX86State *env, int index);
+void hw_breakpoint_remove(CPUX86State *env, int index);
+int check_hw_breakpoints(CPUX86State *env, int force_dr6_update);
+
+/* will be suppressed */
+void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0);
+void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3);
+void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4);
+
+/* hw/pc.c */
+void cpu_smm_update(CPUX86State *env);
+uint64_t cpu_get_tsc(CPUX86State *env);
+
+/* used to debug */
+#define X86_DUMP_FPU 0x0001 /* dump FPU state too */
+#define X86_DUMP_CCOP 0x0002 /* dump qemu flag cache */
+
+#ifdef VBOX
+int cpu_rdmsr(CPUX86State *env, uint32_t idMsr, uint64_t *puValue);
+int cpu_wrmsr(CPUX86State *env, uint32_t idMsr, uint64_t uValue);
+void cpu_trap_raw(CPUX86State *env1);
+
+/* in helper.c */
+uint8_t read_byte(CPUX86State *env1, target_ulong addr);
+uint16_t read_word(CPUX86State *env1, target_ulong addr);
+void write_byte(CPUX86State *env1, target_ulong addr, uint8_t val);
+uint32_t read_dword(CPUX86State *env1, target_ulong addr);
+void write_word(CPUX86State *env1, target_ulong addr, uint16_t val);
+void write_dword(CPUX86State *env1, target_ulong addr, uint32_t val);
+/* in helper.c */
+int emulate_single_instr(CPUX86State *env1);
+int get_ss_esp_from_tss_raw(CPUX86State *env1, uint32_t *ss_ptr, uint32_t *esp_ptr, int dpl);
+
+void restore_raw_fp_state(CPUX86State *env, uint8_t *ptr);
+void save_raw_fp_state(CPUX86State *env, uint8_t *ptr);
+#endif /* VBOX */
+
+#define TARGET_PAGE_BITS 12
+
+#ifdef TARGET_X86_64
+#define TARGET_PHYS_ADDR_SPACE_BITS 52
+/* ??? This is really 48 bits, sign-extended, but the only thing
+ accessible to userland with bit 48 set is the VSYSCALL, and that
+ is handled via other mechanisms. */
+#define TARGET_VIRT_ADDR_SPACE_BITS 47
+#else
+#define TARGET_PHYS_ADDR_SPACE_BITS 36
+#define TARGET_VIRT_ADDR_SPACE_BITS 32
+#endif
+
+#define cpu_init cpu_x86_init
+#define cpu_exec cpu_x86_exec
+#define cpu_gen_code cpu_x86_gen_code
+#define cpu_signal_handler cpu_x86_signal_handler
+#define cpu_list_id x86_cpu_list
+#define cpudef_setup x86_cpudef_setup
+
+#define CPU_SAVE_VERSION 12
+
+/* MMU modes definitions */
+#define MMU_MODE0_SUFFIX _kernel
+#define MMU_MODE1_SUFFIX _user
+#define MMU_USER_IDX 1
+static inline int cpu_mmu_index (CPUState *env)
+{
+ return (env->hflags & HF_CPL_MASK) == 3 ? 1 : 0;
+}
+
+/* translate.c */
+void optimize_flags_init(void);
+
+typedef struct CCTable {
+ int (*compute_all)(void); /* return all the flags */
+ int (*compute_c)(void); /* return the C flag */
+} CCTable;
+
+#if defined(CONFIG_USER_ONLY)
+static inline void cpu_clone_regs(CPUState *env, target_ulong newsp)
+{
+ if (newsp)
+ env->regs[R_ESP] = newsp;
+ env->regs[R_EAX] = 0;
+}
+#endif
+
+#include "cpu-all.h"
+#include "svm.h"
+
+#ifndef VBOX
+#if !defined(CONFIG_USER_ONLY)
+#include "hw/apic.h"
+#endif
+#else /* VBOX */
+extern void cpu_set_apic_tpr(CPUX86State *env, uint8_t val);
+extern uint8_t cpu_get_apic_tpr(CPUX86State *env);
+extern uint64_t cpu_get_apic_base(CPUX86State *env);
+#endif /* VBOX */
+
+static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
+ target_ulong *cs_base, int *flags)
+{
+ *cs_base = env->segs[R_CS].base;
+ if (env->hflags & HF_CS64_MASK)
+ *pc = *cs_base + env->eip;
+ else
+ *pc = (uint32_t)(*cs_base + env->eip);
+ *flags = env->hflags |
+ (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK));
+}
+
+#ifndef VBOX
+void apic_init_reset(CPUState *env);
+void apic_sipi(CPUState *env);
+void do_cpu_init(CPUState *env);
+void do_cpu_sipi(CPUState *env);
+#endif /* !VBOX */
+#endif /* CPU_I386_H */
diff --git a/src/recompiler/target-i386/exec.h b/src/recompiler/target-i386/exec.h
new file mode 100644
index 00000000..355599fa
--- /dev/null
+++ b/src/recompiler/target-i386/exec.h
@@ -0,0 +1,370 @@
+/*
+ * i386 execution defines
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#include "config.h"
+#include "dyngen-exec.h"
+
+/* XXX: factorize this mess */
+#ifdef TARGET_X86_64
+#define TARGET_LONG_BITS 64
+#else
+#define TARGET_LONG_BITS 32
+#endif
+
+#include "cpu-defs.h"
+
+register struct CPUX86State *env asm(AREG0);
+
+#include "qemu-common.h"
+#include "qemu-log.h"
+
+#undef EAX
+#define EAX (env->regs[R_EAX])
+#undef ECX
+#define ECX (env->regs[R_ECX])
+#undef EDX
+#define EDX (env->regs[R_EDX])
+#undef EBX
+#define EBX (env->regs[R_EBX])
+#undef ESP
+#define ESP (env->regs[R_ESP])
+#undef EBP
+#define EBP (env->regs[R_EBP])
+#undef ESI
+#define ESI (env->regs[R_ESI])
+#undef EDI
+#define EDI (env->regs[R_EDI])
+#undef EIP
+#define EIP (env->eip)
+#define DF (env->df)
+
+#define CC_SRC (env->cc_src)
+#define CC_DST (env->cc_dst)
+#define CC_OP (env->cc_op)
+
+/* float macros */
+#define FT0 (env->ft0)
+#define ST0 (env->fpregs[env->fpstt].d)
+#define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d)
+#define ST1 ST(1)
+
+#include "cpu.h"
+#include "exec-all.h"
+
+/* op_helper.c */
+void do_interrupt(int intno, int is_int, int error_code,
+ target_ulong next_eip, int is_hw);
+void do_interrupt_user(int intno, int is_int, int error_code,
+ target_ulong next_eip);
+void QEMU_NORETURN raise_exception_err(int exception_index, int error_code);
+void QEMU_NORETURN raise_exception(int exception_index);
+void QEMU_NORETURN raise_exception_env(int exception_index, CPUState *nenv);
+void do_smm_enter(void);
+
+/* n must be a constant to be efficient */
+static inline target_long lshift(target_long x, int n)
+{
+ if (n >= 0)
+ return x << n;
+ else
+ return x >> (-n);
+}
+
+#include "helper.h"
+
+static inline void svm_check_intercept(uint32_t type)
+{
+ helper_svm_check_intercept_param(type, 0);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+
+#include "softmmu_exec.h"
+
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+#ifdef USE_X86LDOUBLE
+/* use long double functions */
+#define floatx_to_int32 floatx80_to_int32
+#define floatx_to_int64 floatx80_to_int64
+#define floatx_to_int32_round_to_zero floatx80_to_int32_round_to_zero
+#define floatx_to_int64_round_to_zero floatx80_to_int64_round_to_zero
+#define int32_to_floatx int32_to_floatx80
+#define int64_to_floatx int64_to_floatx80
+#define float32_to_floatx float32_to_floatx80
+#define float64_to_floatx float64_to_floatx80
+#define floatx_to_float32 floatx80_to_float32
+#define floatx_to_float64 floatx80_to_float64
+#define floatx_abs floatx80_abs
+#define floatx_chs floatx80_chs
+#define floatx_round_to_int floatx80_round_to_int
+#define floatx_compare floatx80_compare
+#define floatx_compare_quiet floatx80_compare_quiet
+#else
+#define floatx_to_int32 float64_to_int32
+#define floatx_to_int64 float64_to_int64
+#define floatx_to_int32_round_to_zero float64_to_int32_round_to_zero
+#define floatx_to_int64_round_to_zero float64_to_int64_round_to_zero
+#define int32_to_floatx int32_to_float64
+#define int64_to_floatx int64_to_float64
+#define float32_to_floatx float32_to_float64
+#define float64_to_floatx(x, e) (x)
+#define floatx_to_float32 float64_to_float32
+#define floatx_to_float64(x, e) (x)
+#define floatx_abs float64_abs
+#define floatx_chs float64_chs
+#define floatx_round_to_int float64_round_to_int
+#define floatx_compare float64_compare
+#define floatx_compare_quiet float64_compare_quiet
+#endif
+
+#ifdef VBOX
+# ifdef IPRT_NO_CRT
+# undef sin
+# undef cos
+# undef sqrt
+# undef pow
+# undef log
+# undef tan
+# undef atan2
+# undef floor
+# undef ceil
+# undef ldexp
+# define sin sinl
+# define cos cosl
+# define sqrt sqrtl
+# define pow powl
+# define log logl
+# define tan tanl
+# define atan2 atan2l
+# define floor floorl
+# define ceil ceill
+# define ldexp ldexpl
+# endif
+#endif
+
+#define RC_MASK 0xc00
+#define RC_NEAR 0x000
+#define RC_DOWN 0x400
+#define RC_UP 0x800
+#define RC_CHOP 0xc00
+
+#define MAXTAN 9223372036854775808.0
+
+#ifdef USE_X86LDOUBLE
+
+/* only for x86 */
+typedef union {
+ long double d;
+ struct {
+ unsigned long long lower;
+ unsigned short upper;
+ } l;
+} CPU86_LDoubleU;
+
+/* the following deal with x86 long double-precision numbers */
+#define MAXEXPD 0x7fff
+#define EXPBIAS 16383
+#define EXPD(fp) (fp.l.upper & 0x7fff)
+#define SIGND(fp) ((fp.l.upper) & 0x8000)
+#define MANTD(fp) (fp.l.lower)
+#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
+
+#else
+
+/* NOTE: arm is horrible as double 32 bit words are stored in big endian ! */
+typedef union {
+ double d;
+#if !defined(HOST_WORDS_BIGENDIAN) && !defined(__arm__)
+ struct {
+ uint32_t lower;
+ int32_t upper;
+ } l;
+#else
+ struct {
+ int32_t upper;
+ uint32_t lower;
+ } l;
+#endif
+#ifndef __arm__
+ int64_t ll;
+#endif
+} CPU86_LDoubleU;
+
+/* the following deal with IEEE double-precision numbers */
+#define MAXEXPD 0x7ff
+#define EXPBIAS 1023
+#define EXPD(fp) (((fp.l.upper) >> 20) & 0x7FF)
+#define SIGND(fp) ((fp.l.upper) & 0x80000000)
+#ifdef __arm__
+#define MANTD(fp) (fp.l.lower | ((uint64_t)(fp.l.upper & ((1 << 20) - 1)) << 32))
+#else
+#define MANTD(fp) (fp.ll & ((1LL << 52) - 1))
+#endif
+#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7ff << 20)) | (EXPBIAS << 20)
+#endif
+
+static inline void fpush(void)
+{
+ env->fpstt = (env->fpstt - 1) & 7;
+ env->fptags[env->fpstt] = 0; /* validate stack entry */
+}
+
+static inline void fpop(void)
+{
+ env->fptags[env->fpstt] = 1; /* invvalidate stack entry */
+ env->fpstt = (env->fpstt + 1) & 7;
+}
+
+#ifndef USE_X86LDOUBLE
+static inline CPU86_LDouble helper_fldt(target_ulong ptr)
+{
+ CPU86_LDoubleU temp;
+ int upper, e;
+ uint64_t ll;
+
+ /* mantissa */
+ upper = lduw(ptr + 8);
+ /* XXX: handle overflow ? */
+ e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */
+ e |= (upper >> 4) & 0x800; /* sign */
+ ll = (ldq(ptr) >> 11) & ((1LL << 52) - 1);
+#ifdef __arm__
+ temp.l.upper = (e << 20) | (ll >> 32);
+ temp.l.lower = ll;
+#else
+ temp.ll = ll | ((uint64_t)e << 52);
+#endif
+ return temp.d;
+}
+
+static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
+{
+ CPU86_LDoubleU temp;
+ int e;
+
+ temp.d = f;
+ /* mantissa */
+ stq(ptr, (MANTD(temp) << 11) | (1LL << 63));
+ /* exponent + sign */
+ e = EXPD(temp) - EXPBIAS + 16383;
+ e |= SIGND(temp) >> 16;
+ stw(ptr + 8, e);
+}
+#else
+
+/* we use memory access macros */
+
+static inline CPU86_LDouble helper_fldt(target_ulong ptr)
+{
+ CPU86_LDoubleU temp;
+
+ temp.l.lower = ldq(ptr);
+ temp.l.upper = lduw(ptr + 8);
+ return temp.d;
+}
+
+static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
+{
+ CPU86_LDoubleU temp;
+
+ temp.d = f;
+ stq(ptr, temp.l.lower);
+ stw(ptr + 8, temp.l.upper);
+}
+
+#endif /* USE_X86LDOUBLE */
+
+#define FPUS_IE (1 << 0)
+#define FPUS_DE (1 << 1)
+#define FPUS_ZE (1 << 2)
+#define FPUS_OE (1 << 3)
+#define FPUS_UE (1 << 4)
+#define FPUS_PE (1 << 5)
+#define FPUS_SF (1 << 6)
+#define FPUS_SE (1 << 7)
+#define FPUS_B (1 << 15)
+
+#define FPUC_EM 0x3f
+
+static inline uint32_t compute_eflags(void)
+{
+ return env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK);
+}
+
+/* NOTE: CC_OP must be modified manually to CC_OP_EFLAGS */
+static inline void load_eflags(int eflags, int update_mask)
+{
+ CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+ DF = 1 - (2 * ((eflags >> 10) & 1));
+ env->eflags = (env->eflags & ~update_mask) |
+ (eflags & update_mask) | 0x2;
+}
+
+static inline int cpu_has_work(CPUState *env)
+{
+ int work;
+
+ work = (env->interrupt_request & CPU_INTERRUPT_HARD) &&
+ (env->eflags & IF_MASK);
+ work |= env->interrupt_request & CPU_INTERRUPT_NMI;
+ work |= env->interrupt_request & CPU_INTERRUPT_INIT;
+ work |= env->interrupt_request & CPU_INTERRUPT_SIPI;
+
+ return work;
+}
+
+static inline int cpu_halted(CPUState *env) {
+ /* handle exit of HALTED state */
+ if (!env->halted)
+ return 0;
+ /* disable halt condition */
+ if (cpu_has_work(env)) {
+ env->halted = 0;
+ return 0;
+ }
+ return EXCP_HALTED;
+}
+
+/* load efer and update the corresponding hflags. XXX: do consistency
+ checks with cpuid bits ? */
+static inline void cpu_load_efer(CPUState *env, uint64_t val)
+{
+ env->efer = val;
+ env->hflags &= ~(HF_LMA_MASK | HF_SVME_MASK);
+ if (env->efer & MSR_EFER_LMA)
+ env->hflags |= HF_LMA_MASK;
+ if (env->efer & MSR_EFER_SVME)
+ env->hflags |= HF_SVME_MASK;
+}
+
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+ env->eip = tb->pc - tb->cs_base;
+}
+
diff --git a/src/recompiler/target-i386/helper.c b/src/recompiler/target-i386/helper.c
new file mode 100644
index 00000000..a6f37ff6
--- /dev/null
+++ b/src/recompiler/target-i386/helper.c
@@ -0,0 +1,1227 @@
+/*
+ * i386 helpers (without register variable usage)
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#ifndef VBOX
+#include <inttypes.h>
+#include <signal.h>
+#endif /* !VBOX */
+
+#include "cpu.h"
+#include "exec-all.h"
+#include "qemu-common.h"
+#include "kvm.h"
+
+//#define DEBUG_MMU
+
+/* NOTE: must be called outside the CPU execute loop */
+void cpu_reset(CPUX86State *env)
+{
+ int i;
+
+ if (qemu_loglevel_mask(CPU_LOG_RESET)) {
+ qemu_log("CPU Reset (CPU %d)\n", env->cpu_index);
+ log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
+ }
+
+ memset(env, 0, offsetof(CPUX86State, breakpoints));
+
+ tlb_flush(env, 1);
+
+ env->old_exception = -1;
+
+ /* init to reset state */
+
+#ifdef CONFIG_SOFTMMU
+ env->hflags |= HF_SOFTMMU_MASK;
+#endif
+ env->hflags2 |= HF2_GIF_MASK;
+
+ cpu_x86_update_cr0(env, 0x60000010);
+ env->a20_mask = ~0x0;
+ env->smbase = 0x30000;
+
+ env->idt.limit = 0xffff;
+ env->gdt.limit = 0xffff;
+ env->ldt.limit = 0xffff;
+ env->ldt.flags = DESC_P_MASK | (2 << DESC_TYPE_SHIFT);
+ env->tr.limit = 0xffff;
+ env->tr.flags = DESC_P_MASK | (11 << DESC_TYPE_SHIFT);
+
+ cpu_x86_load_seg_cache(env, R_CS, 0xf000, 0xffff0000, 0xffff,
+ DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK |
+ DESC_R_MASK | DESC_A_MASK);
+ cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffff,
+ DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+ DESC_A_MASK);
+ cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffff,
+ DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+ DESC_A_MASK);
+ cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffff,
+ DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+ DESC_A_MASK);
+ cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffff,
+ DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+ DESC_A_MASK);
+ cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffff,
+ DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+ DESC_A_MASK);
+
+ env->eip = 0xfff0;
+#ifndef VBOX /* We'll get the right value from CPUM. */
+ env->regs[R_EDX] = env->cpuid_version;
+#endif
+
+ env->eflags = 0x2;
+
+ /* FPU init */
+ for(i = 0;i < 8; i++)
+ env->fptags[i] = 1;
+ env->fpuc = 0x37f;
+
+ env->mxcsr = 0x1f80;
+
+ memset(env->dr, 0, sizeof(env->dr));
+ env->dr[6] = DR6_FIXED_1;
+ env->dr[7] = DR7_FIXED_1;
+ cpu_breakpoint_remove_all(env, BP_CPU);
+ cpu_watchpoint_remove_all(env, BP_CPU);
+
+#ifndef VBOX
+ env->mcg_status = 0;
+#endif
+}
+
+void cpu_x86_close(CPUX86State *env)
+{
+#ifndef VBOX
+ qemu_free(env);
+#endif
+}
+
+/***********************************************************/
+/* x86 debug */
+
+static const char *cc_op_str[] = {
+ "DYNAMIC",
+ "EFLAGS",
+
+ "MULB",
+ "MULW",
+ "MULL",
+ "MULQ",
+
+ "ADDB",
+ "ADDW",
+ "ADDL",
+ "ADDQ",
+
+ "ADCB",
+ "ADCW",
+ "ADCL",
+ "ADCQ",
+
+ "SUBB",
+ "SUBW",
+ "SUBL",
+ "SUBQ",
+
+ "SBBB",
+ "SBBW",
+ "SBBL",
+ "SBBQ",
+
+ "LOGICB",
+ "LOGICW",
+ "LOGICL",
+ "LOGICQ",
+
+ "INCB",
+ "INCW",
+ "INCL",
+ "INCQ",
+
+ "DECB",
+ "DECW",
+ "DECL",
+ "DECQ",
+
+ "SHLB",
+ "SHLW",
+ "SHLL",
+ "SHLQ",
+
+ "SARB",
+ "SARW",
+ "SARL",
+ "SARQ",
+};
+
+static void
+cpu_x86_dump_seg_cache(CPUState *env, FILE *f,
+ int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+ const char *name, struct SegmentCache *sc)
+{
+#ifdef VBOX
+# define cpu_fprintf(f, ...) RTLogPrintf(__VA_ARGS__)
+#endif
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_CS64_MASK) {
+ cpu_fprintf(f, "%-3s=%04x %016" PRIx64 " %08x %08x", name,
+ sc->selector, sc->base, sc->limit, sc->flags);
+ } else
+#endif
+ {
+ cpu_fprintf(f, "%-3s=%04x %08x %08x %08x", name, sc->selector,
+ (uint32_t)sc->base, sc->limit, sc->flags);
+ }
+
+ if (!(env->hflags & HF_PE_MASK) || !(sc->flags & DESC_P_MASK))
+ goto done;
+
+ cpu_fprintf(f, " DPL=%d ", (sc->flags & DESC_DPL_MASK) >> DESC_DPL_SHIFT);
+ if (sc->flags & DESC_S_MASK) {
+ if (sc->flags & DESC_CS_MASK) {
+ cpu_fprintf(f, (sc->flags & DESC_L_MASK) ? "CS64" :
+ ((sc->flags & DESC_B_MASK) ? "CS32" : "CS16"));
+ cpu_fprintf(f, " [%c%c", (sc->flags & DESC_C_MASK) ? 'C' : '-',
+ (sc->flags & DESC_R_MASK) ? 'R' : '-');
+ } else {
+ cpu_fprintf(f, (sc->flags & DESC_B_MASK) ? "DS " : "DS16");
+ cpu_fprintf(f, " [%c%c", (sc->flags & DESC_E_MASK) ? 'E' : '-',
+ (sc->flags & DESC_W_MASK) ? 'W' : '-');
+ }
+ cpu_fprintf(f, "%c]", (sc->flags & DESC_A_MASK) ? 'A' : '-');
+ } else {
+ static const char *sys_type_name[2][16] = {
+ { /* 32 bit mode */
+ "Reserved", "TSS16-avl", "LDT", "TSS16-busy",
+ "CallGate16", "TaskGate", "IntGate16", "TrapGate16",
+ "Reserved", "TSS32-avl", "Reserved", "TSS32-busy",
+ "CallGate32", "Reserved", "IntGate32", "TrapGate32"
+ },
+ { /* 64 bit mode */
+ "<hiword>", "Reserved", "LDT", "Reserved", "Reserved",
+ "Reserved", "Reserved", "Reserved", "Reserved",
+ "TSS64-avl", "Reserved", "TSS64-busy", "CallGate64",
+ "Reserved", "IntGate64", "TrapGate64"
+ }
+ };
+ cpu_fprintf(f, "%s",
+ sys_type_name[(env->hflags & HF_LMA_MASK) ? 1 : 0]
+ [(sc->flags & DESC_TYPE_MASK)
+ >> DESC_TYPE_SHIFT]);
+ }
+done:
+ cpu_fprintf(f, "\n");
+#ifdef VBOX
+# undef cpu_fprintf
+#endif
+}
+
+void cpu_dump_state(CPUState *env, FILE *f,
+ int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+ int flags)
+{
+ int eflags, i, nb;
+ char cc_op_name[32];
+ static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" };
+
+#ifdef VBOX
+# define cpu_fprintf(f, ...) RTLogPrintf(__VA_ARGS__)
+#endif
+ cpu_synchronize_state(env);
+
+ eflags = env->eflags;
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_CS64_MASK) {
+ cpu_fprintf(f,
+ "RAX=%016" PRIx64 " RBX=%016" PRIx64 " RCX=%016" PRIx64 " RDX=%016" PRIx64 "\n"
+ "RSI=%016" PRIx64 " RDI=%016" PRIx64 " RBP=%016" PRIx64 " RSP=%016" PRIx64 "\n"
+ "R8 =%016" PRIx64 " R9 =%016" PRIx64 " R10=%016" PRIx64 " R11=%016" PRIx64 "\n"
+ "R12=%016" PRIx64 " R13=%016" PRIx64 " R14=%016" PRIx64 " R15=%016" PRIx64 "\n"
+ "RIP=%016" PRIx64 " RFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n",
+ env->regs[R_EAX],
+ env->regs[R_EBX],
+ env->regs[R_ECX],
+ env->regs[R_EDX],
+ env->regs[R_ESI],
+ env->regs[R_EDI],
+ env->regs[R_EBP],
+ env->regs[R_ESP],
+ env->regs[8],
+ env->regs[9],
+ env->regs[10],
+ env->regs[11],
+ env->regs[12],
+ env->regs[13],
+ env->regs[14],
+ env->regs[15],
+ env->eip, eflags,
+ eflags & DF_MASK ? 'D' : '-',
+ eflags & CC_O ? 'O' : '-',
+ eflags & CC_S ? 'S' : '-',
+ eflags & CC_Z ? 'Z' : '-',
+ eflags & CC_A ? 'A' : '-',
+ eflags & CC_P ? 'P' : '-',
+ eflags & CC_C ? 'C' : '-',
+ env->hflags & HF_CPL_MASK,
+ (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1,
+ (env->a20_mask >> 20) & 1,
+ (env->hflags >> HF_SMM_SHIFT) & 1,
+ env->halted);
+ } else
+#endif
+ {
+ cpu_fprintf(f, "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n"
+ "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n"
+ "EIP=%08x EFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n",
+ (uint32_t)env->regs[R_EAX],
+ (uint32_t)env->regs[R_EBX],
+ (uint32_t)env->regs[R_ECX],
+ (uint32_t)env->regs[R_EDX],
+ (uint32_t)env->regs[R_ESI],
+ (uint32_t)env->regs[R_EDI],
+ (uint32_t)env->regs[R_EBP],
+ (uint32_t)env->regs[R_ESP],
+ (uint32_t)env->eip, eflags,
+ eflags & DF_MASK ? 'D' : '-',
+ eflags & CC_O ? 'O' : '-',
+ eflags & CC_S ? 'S' : '-',
+ eflags & CC_Z ? 'Z' : '-',
+ eflags & CC_A ? 'A' : '-',
+ eflags & CC_P ? 'P' : '-',
+ eflags & CC_C ? 'C' : '-',
+ env->hflags & HF_CPL_MASK,
+ (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1,
+ (env->a20_mask >> 20) & 1,
+ (env->hflags >> HF_SMM_SHIFT) & 1,
+ env->halted);
+ }
+
+ for(i = 0; i < 6; i++) {
+ cpu_x86_dump_seg_cache(env, f, cpu_fprintf, seg_name[i],
+ &env->segs[i]);
+ }
+ cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "LDT", &env->ldt);
+ cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "TR", &env->tr);
+
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ cpu_fprintf(f, "GDT= %016" PRIx64 " %08x\n",
+ env->gdt.base, env->gdt.limit);
+ cpu_fprintf(f, "IDT= %016" PRIx64 " %08x\n",
+ env->idt.base, env->idt.limit);
+ cpu_fprintf(f, "CR0=%08x CR2=%016" PRIx64 " CR3=%016" PRIx64 " CR4=%08x\n",
+ (uint32_t)env->cr[0],
+ env->cr[2],
+ env->cr[3],
+ (uint32_t)env->cr[4]);
+ for(i = 0; i < 4; i++)
+ cpu_fprintf(f, "DR%d=%016" PRIx64 " ", i, env->dr[i]);
+ cpu_fprintf(f, "\nDR6=%016" PRIx64 " DR7=%016" PRIx64 "\n",
+ env->dr[6], env->dr[7]);
+ } else
+#endif
+ {
+ cpu_fprintf(f, "GDT= %08x %08x\n",
+ (uint32_t)env->gdt.base, env->gdt.limit);
+ cpu_fprintf(f, "IDT= %08x %08x\n",
+ (uint32_t)env->idt.base, env->idt.limit);
+ cpu_fprintf(f, "CR0=%08x CR2=%08x CR3=%08x CR4=%08x\n",
+ (uint32_t)env->cr[0],
+ (uint32_t)env->cr[2],
+ (uint32_t)env->cr[3],
+ (uint32_t)env->cr[4]);
+ for(i = 0; i < 4; i++)
+ cpu_fprintf(f, "DR%d=%08x ", i, env->dr[i]);
+ cpu_fprintf(f, "\nDR6=%08x DR7=%08x\n", env->dr[6], env->dr[7]);
+ }
+ if (flags & X86_DUMP_CCOP) {
+ if ((unsigned)env->cc_op < CC_OP_NB)
+ snprintf(cc_op_name, sizeof(cc_op_name), "%s", cc_op_str[env->cc_op]);
+ else
+ snprintf(cc_op_name, sizeof(cc_op_name), "[%d]", env->cc_op);
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_CS64_MASK) {
+ cpu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%-8s\n",
+ env->cc_src, env->cc_dst,
+ cc_op_name);
+ } else
+#endif
+ {
+ cpu_fprintf(f, "CCS=%08x CCD=%08x CCO=%-8s\n",
+ (uint32_t)env->cc_src, (uint32_t)env->cc_dst,
+ cc_op_name);
+ }
+ }
+ cpu_fprintf(f, "EFER=%016" PRIx64 "\n", env->efer);
+ if (flags & X86_DUMP_FPU) {
+ int fptag;
+ fptag = 0;
+ for(i = 0; i < 8; i++) {
+ fptag |= ((!env->fptags[i]) << i);
+ }
+ cpu_fprintf(f, "FCW=%04x FSW=%04x [ST=%d] FTW=%02x MXCSR=%08x\n",
+ env->fpuc,
+ (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11,
+ env->fpstt,
+ fptag,
+ env->mxcsr);
+ for(i=0;i<8;i++) {
+#if defined(USE_X86LDOUBLE)
+ union {
+ long double d;
+ struct {
+ uint64_t lower;
+ uint16_t upper;
+ } l;
+ } tmp;
+ tmp.d = env->fpregs[i].d;
+ cpu_fprintf(f, "FPR%d=%016" PRIx64 " %04x",
+ i, tmp.l.lower, tmp.l.upper);
+#else
+ cpu_fprintf(f, "FPR%d=%016" PRIx64,
+ i, env->fpregs[i].mmx.q);
+#endif
+ if ((i & 1) == 1)
+ cpu_fprintf(f, "\n");
+ else
+ cpu_fprintf(f, " ");
+ }
+ if (env->hflags & HF_CS64_MASK)
+ nb = 16;
+ else
+ nb = 8;
+ for(i=0;i<nb;i++) {
+ cpu_fprintf(f, "XMM%02d=%08x%08x%08x%08x",
+ i,
+ env->xmm_regs[i].XMM_L(3),
+ env->xmm_regs[i].XMM_L(2),
+ env->xmm_regs[i].XMM_L(1),
+ env->xmm_regs[i].XMM_L(0));
+ if ((i & 1) == 1)
+ cpu_fprintf(f, "\n");
+ else
+ cpu_fprintf(f, " ");
+ }
+ }
+#ifdef VBOX
+# undef cpu_fprintf
+#endif
+}
+
+/***********************************************************/
+/* x86 mmu */
+/* XXX: add PGE support */
+
+void cpu_x86_set_a20(CPUX86State *env, int a20_state)
+{
+ a20_state = (a20_state != 0);
+ if (a20_state != ((env->a20_mask >> 20) & 1)) {
+#if defined(DEBUG_MMU)
+ printf("A20 update: a20=%d\n", a20_state);
+#endif
+ /* if the cpu is currently executing code, we must unlink it and
+ all the potentially executing TB */
+ cpu_interrupt(env, CPU_INTERRUPT_EXITTB);
+
+ /* when a20 is changed, all the MMU mappings are invalid, so
+ we must flush everything */
+ tlb_flush(env, 1);
+ env->a20_mask = ~(1 << 20) | (a20_state << 20);
+ }
+}
+
+void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0)
+{
+ int pe_state;
+
+#if defined(DEBUG_MMU)
+ printf("CR0 update: CR0=0x%08x\n", new_cr0);
+#endif
+ if ((new_cr0 & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK)) !=
+ (env->cr[0] & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK))) {
+ tlb_flush(env, 1);
+ }
+
+#ifdef TARGET_X86_64
+ if (!(env->cr[0] & CR0_PG_MASK) && (new_cr0 & CR0_PG_MASK) &&
+ (env->efer & MSR_EFER_LME)) {
+ /* enter in long mode */
+ /* XXX: generate an exception */
+ if (!(env->cr[4] & CR4_PAE_MASK))
+ return;
+ env->efer |= MSR_EFER_LMA;
+ env->hflags |= HF_LMA_MASK;
+ } else if ((env->cr[0] & CR0_PG_MASK) && !(new_cr0 & CR0_PG_MASK) &&
+ (env->efer & MSR_EFER_LMA)) {
+ /* exit long mode */
+ env->efer &= ~MSR_EFER_LMA;
+ env->hflags &= ~(HF_LMA_MASK | HF_CS64_MASK);
+ env->eip &= 0xffffffff;
+ }
+#endif
+ env->cr[0] = new_cr0 | CR0_ET_MASK;
+
+ /* update PE flag in hidden flags */
+ pe_state = (env->cr[0] & CR0_PE_MASK);
+ env->hflags = (env->hflags & ~HF_PE_MASK) | (pe_state << HF_PE_SHIFT);
+ /* ensure that ADDSEG is always set in real mode */
+ env->hflags |= ((pe_state ^ 1) << HF_ADDSEG_SHIFT);
+ /* update FPU flags */
+ env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
+ ((new_cr0 << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
+#ifdef VBOX
+ remR3ChangeCpuMode(env);
+#endif
+}
+
+/* XXX: in legacy PAE mode, generate a GPF if reserved bits are set in
+ the PDPT */
+void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3)
+{
+ env->cr[3] = new_cr3;
+ if (env->cr[0] & CR0_PG_MASK) {
+#if defined(DEBUG_MMU)
+ printf("CR3 update: CR3=" TARGET_FMT_lx "\n", new_cr3);
+#endif
+ tlb_flush(env, 0);
+ }
+}
+
+void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
+{
+#if defined(DEBUG_MMU)
+ printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
+#endif
+ if ((new_cr4 & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK)) !=
+ (env->cr[4] & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK))) {
+ tlb_flush(env, 1);
+ }
+ /* SSE handling */
+ if (!(env->cpuid_features & CPUID_SSE))
+ new_cr4 &= ~CR4_OSFXSR_MASK;
+ if (new_cr4 & CR4_OSFXSR_MASK)
+ env->hflags |= HF_OSFXSR_MASK;
+ else
+ env->hflags &= ~HF_OSFXSR_MASK;
+
+ env->cr[4] = new_cr4;
+#ifdef VBOX
+ remR3ChangeCpuMode(env);
+#endif
+}
+
+#if defined(CONFIG_USER_ONLY)
+
+int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
+ int is_write, int mmu_idx, int is_softmmu)
+{
+ /* user mode only emulation */
+ is_write &= 1;
+ env->cr[2] = addr;
+ env->error_code = (is_write << PG_ERROR_W_BIT);
+ env->error_code |= PG_ERROR_U_MASK;
+ env->exception_index = EXCP0E_PAGE;
+ return 1;
+}
+
+#else
+
+/* XXX: This value should match the one returned by CPUID
+ * and in exec.c */
+# if defined(TARGET_X86_64)
+# define PHYS_ADDR_MASK 0xfffffff000LL
+# else
+# define PHYS_ADDR_MASK 0xffffff000LL
+# endif
+
+/* return value:
+ -1 = cannot handle fault
+ 0 = nothing more to do
+ 1 = generate PF fault
+*/
+int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
+ int is_write1, int mmu_idx, int is_softmmu)
+{
+ uint64_t ptep, pte;
+ target_ulong pde_addr, pte_addr;
+ int error_code, is_dirty, prot, page_size, is_write, is_user;
+ target_phys_addr_t paddr;
+ uint32_t page_offset;
+ target_ulong vaddr, virt_addr;
+
+ is_user = mmu_idx == MMU_USER_IDX;
+#if defined(DEBUG_MMU)
+ printf("MMU fault: addr=" TARGET_FMT_lx " w=%d u=%d eip=" TARGET_FMT_lx "\n",
+ addr, is_write1, is_user, env->eip);
+#endif
+ is_write = is_write1 & 1;
+
+ if (!(env->cr[0] & CR0_PG_MASK)) {
+ pte = addr;
+ virt_addr = addr & TARGET_PAGE_MASK;
+ prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ page_size = 4096;
+ goto do_mapping;
+ }
+
+ if (env->cr[4] & CR4_PAE_MASK) {
+ uint64_t pde, pdpe;
+ target_ulong pdpe_addr;
+
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ uint64_t pml4e_addr, pml4e;
+ int32_t sext;
+
+ /* test virtual address sign extension */
+ sext = (int64_t)addr >> 47;
+ if (sext != 0 && sext != -1) {
+ env->error_code = 0;
+ env->exception_index = EXCP0D_GPF;
+ return 1;
+ }
+
+ pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
+ env->a20_mask;
+ pml4e = ldq_phys(pml4e_addr);
+ if (!(pml4e & PG_PRESENT_MASK)) {
+ error_code = 0;
+ goto do_fault;
+ }
+ if (!(env->efer & MSR_EFER_NXE) && (pml4e & PG_NX_MASK)) {
+ error_code = PG_ERROR_RSVD_MASK;
+ goto do_fault;
+ }
+ if (!(pml4e & PG_ACCESSED_MASK)) {
+ pml4e |= PG_ACCESSED_MASK;
+ stl_phys_notdirty(pml4e_addr, pml4e);
+ }
+ ptep = pml4e ^ PG_NX_MASK;
+ pdpe_addr = ((pml4e & PHYS_ADDR_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
+ env->a20_mask;
+ pdpe = ldq_phys(pdpe_addr);
+ if (!(pdpe & PG_PRESENT_MASK)) {
+ error_code = 0;
+ goto do_fault;
+ }
+ if (!(env->efer & MSR_EFER_NXE) && (pdpe & PG_NX_MASK)) {
+ error_code = PG_ERROR_RSVD_MASK;
+ goto do_fault;
+ }
+ ptep &= pdpe ^ PG_NX_MASK;
+ if (!(pdpe & PG_ACCESSED_MASK)) {
+ pdpe |= PG_ACCESSED_MASK;
+ stl_phys_notdirty(pdpe_addr, pdpe);
+ }
+ } else
+#endif
+ {
+ /* XXX: load them when cr3 is loaded ? */
+ pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
+ env->a20_mask;
+ pdpe = ldq_phys(pdpe_addr);
+ if (!(pdpe & PG_PRESENT_MASK)) {
+ error_code = 0;
+ goto do_fault;
+ }
+ ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
+ }
+
+ pde_addr = ((pdpe & PHYS_ADDR_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
+ env->a20_mask;
+ pde = ldq_phys(pde_addr);
+ if (!(pde & PG_PRESENT_MASK)) {
+ error_code = 0;
+ goto do_fault;
+ }
+ if (!(env->efer & MSR_EFER_NXE) && (pde & PG_NX_MASK)) {
+ error_code = PG_ERROR_RSVD_MASK;
+ goto do_fault;
+ }
+ ptep &= pde ^ PG_NX_MASK;
+ if (pde & PG_PSE_MASK) {
+ /* 2 MB page */
+ page_size = 2048 * 1024;
+ ptep ^= PG_NX_MASK;
+ if ((ptep & PG_NX_MASK) && is_write1 == 2)
+ goto do_fault_protect;
+ if (is_user) {
+ if (!(ptep & PG_USER_MASK))
+ goto do_fault_protect;
+ if (is_write && !(ptep & PG_RW_MASK))
+ goto do_fault_protect;
+ } else {
+ if ((env->cr[0] & CR0_WP_MASK) &&
+ is_write && !(ptep & PG_RW_MASK))
+ goto do_fault_protect;
+ }
+ is_dirty = is_write && !(pde & PG_DIRTY_MASK);
+ if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
+ pde |= PG_ACCESSED_MASK;
+ if (is_dirty)
+ pde |= PG_DIRTY_MASK;
+ stl_phys_notdirty(pde_addr, pde);
+ }
+ /* align to page_size */
+ pte = pde & ((PHYS_ADDR_MASK & ~(page_size - 1)) | 0xfff);
+ virt_addr = addr & ~(page_size - 1);
+ } else {
+ /* 4 KB page */
+ if (!(pde & PG_ACCESSED_MASK)) {
+ pde |= PG_ACCESSED_MASK;
+ stl_phys_notdirty(pde_addr, pde);
+ }
+ pte_addr = ((pde & PHYS_ADDR_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
+ env->a20_mask;
+ pte = ldq_phys(pte_addr);
+ if (!(pte & PG_PRESENT_MASK)) {
+ error_code = 0;
+ goto do_fault;
+ }
+ if (!(env->efer & MSR_EFER_NXE) && (pte & PG_NX_MASK)) {
+ error_code = PG_ERROR_RSVD_MASK;
+ goto do_fault;
+ }
+ /* combine pde and pte nx, user and rw protections */
+ ptep &= pte ^ PG_NX_MASK;
+ ptep ^= PG_NX_MASK;
+ if ((ptep & PG_NX_MASK) && is_write1 == 2)
+ goto do_fault_protect;
+ if (is_user) {
+ if (!(ptep & PG_USER_MASK))
+ goto do_fault_protect;
+ if (is_write && !(ptep & PG_RW_MASK))
+ goto do_fault_protect;
+ } else {
+ if ((env->cr[0] & CR0_WP_MASK) &&
+ is_write && !(ptep & PG_RW_MASK))
+ goto do_fault_protect;
+ }
+ is_dirty = is_write && !(pte & PG_DIRTY_MASK);
+ if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
+ pte |= PG_ACCESSED_MASK;
+ if (is_dirty)
+ pte |= PG_DIRTY_MASK;
+ stl_phys_notdirty(pte_addr, pte);
+ }
+ page_size = 4096;
+ virt_addr = addr & ~0xfff;
+ pte = pte & (PHYS_ADDR_MASK | 0xfff);
+ }
+ } else {
+ uint32_t pde;
+
+ /* page directory entry */
+ pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) &
+ env->a20_mask;
+ pde = ldl_phys(pde_addr);
+ if (!(pde & PG_PRESENT_MASK)) {
+ error_code = 0;
+ goto do_fault;
+ }
+ /* if PSE bit is set, then we use a 4MB page */
+ if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
+ page_size = 4096 * 1024;
+ if (is_user) {
+ if (!(pde & PG_USER_MASK))
+ goto do_fault_protect;
+ if (is_write && !(pde & PG_RW_MASK))
+ goto do_fault_protect;
+ } else {
+ if ((env->cr[0] & CR0_WP_MASK) &&
+ is_write && !(pde & PG_RW_MASK))
+ goto do_fault_protect;
+ }
+ is_dirty = is_write && !(pde & PG_DIRTY_MASK);
+ if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
+ pde |= PG_ACCESSED_MASK;
+ if (is_dirty)
+ pde |= PG_DIRTY_MASK;
+ stl_phys_notdirty(pde_addr, pde);
+ }
+
+ pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */
+ ptep = pte;
+ virt_addr = addr & ~(page_size - 1);
+ } else {
+ if (!(pde & PG_ACCESSED_MASK)) {
+ pde |= PG_ACCESSED_MASK;
+ stl_phys_notdirty(pde_addr, pde);
+ }
+
+ /* page directory entry */
+ pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
+ env->a20_mask;
+ pte = ldl_phys(pte_addr);
+ if (!(pte & PG_PRESENT_MASK)) {
+ error_code = 0;
+ goto do_fault;
+ }
+ /* combine pde and pte user and rw protections */
+ ptep = pte & pde;
+ if (is_user) {
+ if (!(ptep & PG_USER_MASK))
+ goto do_fault_protect;
+ if (is_write && !(ptep & PG_RW_MASK))
+ goto do_fault_protect;
+ } else {
+ if ((env->cr[0] & CR0_WP_MASK) &&
+ is_write && !(ptep & PG_RW_MASK))
+ goto do_fault_protect;
+ }
+ is_dirty = is_write && !(pte & PG_DIRTY_MASK);
+ if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
+ pte |= PG_ACCESSED_MASK;
+ if (is_dirty)
+ pte |= PG_DIRTY_MASK;
+ stl_phys_notdirty(pte_addr, pte);
+ }
+ page_size = 4096;
+ virt_addr = addr & ~0xfff;
+ }
+ }
+ /* the page can be put in the TLB */
+ prot = PAGE_READ;
+ if (!(ptep & PG_NX_MASK))
+ prot |= PAGE_EXEC;
+ if (pte & PG_DIRTY_MASK) {
+ /* only set write access if already dirty... otherwise wait
+ for dirty access */
+ if (is_user) {
+ if (ptep & PG_RW_MASK)
+ prot |= PAGE_WRITE;
+ } else {
+ if (!(env->cr[0] & CR0_WP_MASK) ||
+ (ptep & PG_RW_MASK))
+ prot |= PAGE_WRITE;
+ }
+ }
+ do_mapping:
+#ifndef VBOX
+ pte = pte & env->a20_mask;
+#endif
+
+ /* Even if 4MB pages, we map only one 4KB page in the cache to
+ avoid filling it too fast */
+ page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1);
+ paddr = (pte & TARGET_PAGE_MASK) + page_offset;
+#ifdef VBOX
+ paddr &= env->a20_mask;
+#endif
+ vaddr = virt_addr + page_offset;
+
+ tlb_set_page(env, vaddr, paddr, prot, mmu_idx, page_size);
+ return 0;
+ do_fault_protect:
+ error_code = PG_ERROR_P_MASK;
+ do_fault:
+ error_code |= (is_write << PG_ERROR_W_BIT);
+ if (is_user)
+ error_code |= PG_ERROR_U_MASK;
+ if (is_write1 == 2 &&
+ (env->efer & MSR_EFER_NXE) &&
+ (env->cr[4] & CR4_PAE_MASK))
+ error_code |= PG_ERROR_I_D_MASK;
+ if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
+ /* cr2 is not modified in case of exceptions */
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
+ addr);
+ } else {
+ env->cr[2] = addr;
+ }
+ env->error_code = error_code;
+ env->exception_index = EXCP0E_PAGE;
+ return 1;
+}
+
+target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
+{
+ target_ulong pde_addr, pte_addr;
+ uint64_t pte;
+ target_phys_addr_t paddr;
+ uint32_t page_offset;
+ int page_size;
+
+ if (env->cr[4] & CR4_PAE_MASK) {
+ target_ulong pdpe_addr;
+ uint64_t pde, pdpe;
+
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ uint64_t pml4e_addr, pml4e;
+ int32_t sext;
+
+ /* test virtual address sign extension */
+ sext = (int64_t)addr >> 47;
+ if (sext != 0 && sext != -1)
+ return -1;
+
+ pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
+ env->a20_mask;
+ pml4e = ldq_phys(pml4e_addr);
+ if (!(pml4e & PG_PRESENT_MASK))
+ return -1;
+
+ pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) &
+ env->a20_mask;
+ pdpe = ldq_phys(pdpe_addr);
+ if (!(pdpe & PG_PRESENT_MASK))
+ return -1;
+ } else
+#endif
+ {
+ pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
+ env->a20_mask;
+ pdpe = ldq_phys(pdpe_addr);
+ if (!(pdpe & PG_PRESENT_MASK))
+ return -1;
+ }
+
+ pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) &
+ env->a20_mask;
+ pde = ldq_phys(pde_addr);
+ if (!(pde & PG_PRESENT_MASK)) {
+ return -1;
+ }
+ if (pde & PG_PSE_MASK) {
+ /* 2 MB page */
+ page_size = 2048 * 1024;
+ pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */
+ } else {
+ /* 4 KB page */
+ pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) &
+ env->a20_mask;
+ page_size = 4096;
+ pte = ldq_phys(pte_addr);
+ }
+ if (!(pte & PG_PRESENT_MASK))
+ return -1;
+ } else {
+ uint32_t pde;
+
+ if (!(env->cr[0] & CR0_PG_MASK)) {
+ pte = addr;
+ page_size = 4096;
+ } else {
+ /* page directory entry */
+ pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask;
+ pde = ldl_phys(pde_addr);
+ if (!(pde & PG_PRESENT_MASK))
+ return -1;
+ if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
+ pte = pde & ~0x003ff000; /* align to 4MB */
+ page_size = 4096 * 1024;
+ } else {
+ /* page directory entry */
+ pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
+ pte = ldl_phys(pte_addr);
+ if (!(pte & PG_PRESENT_MASK))
+ return -1;
+ page_size = 4096;
+ }
+ }
+ pte = pte & env->a20_mask;
+ }
+
+ page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1);
+ paddr = (pte & TARGET_PAGE_MASK) + page_offset;
+ return paddr;
+}
+
+void hw_breakpoint_insert(CPUState *env, int index)
+{
+ int type, err = 0;
+
+ switch (hw_breakpoint_type(env->dr[7], index)) {
+ case 0:
+ if (hw_breakpoint_enabled(env->dr[7], index))
+ err = cpu_breakpoint_insert(env, env->dr[index], BP_CPU,
+ &env->cpu_breakpoint[index]);
+ break;
+ case 1:
+ type = BP_CPU | BP_MEM_WRITE;
+ goto insert_wp;
+ case 2:
+ /* No support for I/O watchpoints yet */
+ break;
+ case 3:
+ type = BP_CPU | BP_MEM_ACCESS;
+ insert_wp:
+ err = cpu_watchpoint_insert(env, env->dr[index],
+ hw_breakpoint_len(env->dr[7], index),
+ type, &env->cpu_watchpoint[index]);
+ break;
+ }
+ if (err)
+ env->cpu_breakpoint[index] = NULL;
+}
+
+void hw_breakpoint_remove(CPUState *env, int index)
+{
+ if (!env->cpu_breakpoint[index])
+ return;
+ switch (hw_breakpoint_type(env->dr[7], index)) {
+ case 0:
+ if (hw_breakpoint_enabled(env->dr[7], index))
+ cpu_breakpoint_remove_by_ref(env, env->cpu_breakpoint[index]);
+ break;
+ case 1:
+ case 3:
+ cpu_watchpoint_remove_by_ref(env, env->cpu_watchpoint[index]);
+ break;
+ case 2:
+ /* No support for I/O watchpoints yet */
+ break;
+ }
+}
+
+int check_hw_breakpoints(CPUState *env, int force_dr6_update)
+{
+ target_ulong dr6;
+ int reg, type;
+ int hit_enabled = 0;
+
+ dr6 = env->dr[6] & ~0xf;
+ for (reg = 0; reg < 4; reg++) {
+ type = hw_breakpoint_type(env->dr[7], reg);
+ if ((type == 0 && env->dr[reg] == env->eip) ||
+ ((type & 1) && env->cpu_watchpoint[reg] &&
+ (env->cpu_watchpoint[reg]->flags & BP_WATCHPOINT_HIT))) {
+ dr6 |= 1 << reg;
+ if (hw_breakpoint_enabled(env->dr[7], reg))
+ hit_enabled = 1;
+ }
+ }
+ if (hit_enabled || force_dr6_update)
+ env->dr[6] = dr6;
+ return hit_enabled;
+}
+
+static CPUDebugExcpHandler *prev_debug_excp_handler;
+
+void raise_exception_env(int exception_index, CPUState *env);
+
+static void breakpoint_handler(CPUState *env)
+{
+ CPUBreakpoint *bp;
+
+ if (env->watchpoint_hit) {
+ if (env->watchpoint_hit->flags & BP_CPU) {
+ env->watchpoint_hit = NULL;
+ if (check_hw_breakpoints(env, 0))
+ raise_exception_env(EXCP01_DB, env);
+ else
+ cpu_resume_from_signal(env, NULL);
+ }
+ } else {
+ QTAILQ_FOREACH(bp, &env->breakpoints, entry)
+ if (bp->pc == env->eip) {
+ if (bp->flags & BP_CPU) {
+ check_hw_breakpoints(env, 1);
+ raise_exception_env(EXCP01_DB, env);
+ }
+ break;
+ }
+ }
+ if (prev_debug_excp_handler)
+ prev_debug_excp_handler(env);
+}
+
+#ifndef VBOX
+/* This should come from sysemu.h - if we could include it here... */
+void qemu_system_reset_request(void);
+
+void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
+ uint64_t mcg_status, uint64_t addr, uint64_t misc)
+{
+ uint64_t mcg_cap = cenv->mcg_cap;
+ unsigned bank_num = mcg_cap & 0xff;
+ uint64_t *banks = cenv->mce_banks;
+
+ if (bank >= bank_num || !(status & MCI_STATUS_VAL))
+ return;
+
+ /*
+ * if MSR_MCG_CTL is not all 1s, the uncorrected error
+ * reporting is disabled
+ */
+ if ((status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
+ cenv->mcg_ctl != ~(uint64_t)0)
+ return;
+ banks += 4 * bank;
+ /*
+ * if MSR_MCi_CTL is not all 1s, the uncorrected error
+ * reporting is disabled for the bank
+ */
+ if ((status & MCI_STATUS_UC) && banks[0] != ~(uint64_t)0)
+ return;
+ if (status & MCI_STATUS_UC) {
+ if ((cenv->mcg_status & MCG_STATUS_MCIP) ||
+ !(cenv->cr[4] & CR4_MCE_MASK)) {
+ fprintf(stderr, "injects mce exception while previous "
+ "one is in progress!\n");
+ qemu_log_mask(CPU_LOG_RESET, "Triple fault\n");
+ qemu_system_reset_request();
+ return;
+ }
+ if (banks[1] & MCI_STATUS_VAL)
+ status |= MCI_STATUS_OVER;
+ banks[2] = addr;
+ banks[3] = misc;
+ cenv->mcg_status = mcg_status;
+ banks[1] = status;
+ cpu_interrupt(cenv, CPU_INTERRUPT_MCE);
+ } else if (!(banks[1] & MCI_STATUS_VAL)
+ || !(banks[1] & MCI_STATUS_UC)) {
+ if (banks[1] & MCI_STATUS_VAL)
+ status |= MCI_STATUS_OVER;
+ banks[2] = addr;
+ banks[3] = misc;
+ banks[1] = status;
+ } else
+ banks[1] |= MCI_STATUS_OVER;
+}
+#endif /* !VBOX */
+#endif /* !CONFIG_USER_ONLY */
+
+#ifndef VBOX
+
+static void mce_init(CPUX86State *cenv)
+{
+ unsigned int bank, bank_num;
+
+ if (((cenv->cpuid_version >> 8)&0xf) >= 6
+ && (cenv->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)) {
+ cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF;
+ cenv->mcg_ctl = ~(uint64_t)0;
+ bank_num = MCE_BANKS_DEF;
+ for (bank = 0; bank < bank_num; bank++)
+ cenv->mce_banks[bank*4] = ~(uint64_t)0;
+ }
+}
+
+int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector,
+ target_ulong *base, unsigned int *limit,
+ unsigned int *flags)
+{
+ SegmentCache *dt;
+ target_ulong ptr;
+ uint32_t e1, e2;
+ int index;
+
+ if (selector & 0x4)
+ dt = &env->ldt;
+ else
+ dt = &env->gdt;
+ index = selector & ~7;
+ ptr = dt->base + index;
+ if ((index + 7) > dt->limit
+ || cpu_memory_rw_debug(env, ptr, (uint8_t *)&e1, sizeof(e1), 0) != 0
+ || cpu_memory_rw_debug(env, ptr+4, (uint8_t *)&e2, sizeof(e2), 0) != 0)
+ return 0;
+
+ *base = ((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000));
+ *limit = (e1 & 0xffff) | (e2 & 0x000f0000);
+ if (e2 & DESC_G_MASK)
+ *limit = (*limit << 12) | 0xfff;
+ *flags = e2;
+
+ return 1;
+}
+
+#endif /* !VBOX */
+
+#ifndef VBOX
+CPUX86State *cpu_x86_init(const char *cpu_model)
+#else
+CPUX86State *cpu_x86_init(CPUX86State *env, const char *cpu_model)
+#endif
+{
+#ifndef VBOX
+ CPUX86State *env;
+#endif
+ static int inited;
+
+#ifndef VBOX
+ env = qemu_mallocz(sizeof(CPUX86State));
+#endif
+ cpu_exec_init(env);
+ env->cpu_model_str = cpu_model;
+
+ /* init various static tables */
+ if (!inited) {
+ inited = 1;
+ optimize_flags_init();
+#ifndef CONFIG_USER_ONLY
+ prev_debug_excp_handler =
+ cpu_set_debug_excp_handler(breakpoint_handler);
+#endif
+ }
+#ifndef VBOX
+ if (cpu_x86_register(env, cpu_model) < 0) {
+ cpu_x86_close(env);
+ return NULL;
+ }
+ mce_init(env);
+#endif
+
+ qemu_init_vcpu(env);
+
+ return env;
+}
+
+#ifndef VBOX
+#if !defined(CONFIG_USER_ONLY)
+void do_cpu_init(CPUState *env)
+{
+ int sipi = env->interrupt_request & CPU_INTERRUPT_SIPI;
+ cpu_reset(env);
+ env->interrupt_request = sipi;
+ apic_init_reset(env->apic_state);
+ env->halted = !cpu_is_bsp(env);
+}
+
+void do_cpu_sipi(CPUState *env)
+{
+ apic_sipi(env->apic_state);
+}
+#else
+void do_cpu_init(CPUState *env)
+{
+}
+void do_cpu_sipi(CPUState *env)
+{
+}
+#endif
+#endif /* !VBOX */
diff --git a/src/recompiler/target-i386/helper.h b/src/recompiler/target-i386/helper.h
new file mode 100644
index 00000000..8307304a
--- /dev/null
+++ b/src/recompiler/target-i386/helper.h
@@ -0,0 +1,253 @@
+#include "def-helper.h"
+
+DEF_HELPER_FLAGS_1(cc_compute_all, TCG_CALL_PURE, i32, int)
+DEF_HELPER_FLAGS_1(cc_compute_c, TCG_CALL_PURE, i32, int)
+
+DEF_HELPER_0(lock, void)
+DEF_HELPER_0(unlock, void)
+DEF_HELPER_2(write_eflags, void, tl, i32)
+DEF_HELPER_0(read_eflags, tl)
+DEF_HELPER_1(divb_AL, void, tl)
+DEF_HELPER_1(idivb_AL, void, tl)
+DEF_HELPER_1(divw_AX, void, tl)
+DEF_HELPER_1(idivw_AX, void, tl)
+DEF_HELPER_1(divl_EAX, void, tl)
+DEF_HELPER_1(idivl_EAX, void, tl)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(mulq_EAX_T0, void, tl)
+DEF_HELPER_1(imulq_EAX_T0, void, tl)
+DEF_HELPER_2(imulq_T0_T1, tl, tl, tl)
+DEF_HELPER_1(divq_EAX, void, tl)
+DEF_HELPER_1(idivq_EAX, void, tl)
+#endif
+
+DEF_HELPER_1(aam, void, int)
+DEF_HELPER_1(aad, void, int)
+DEF_HELPER_0(aaa, void)
+DEF_HELPER_0(aas, void)
+DEF_HELPER_0(daa, void)
+DEF_HELPER_0(das, void)
+
+DEF_HELPER_1(lsl, tl, tl)
+DEF_HELPER_1(lar, tl, tl)
+DEF_HELPER_1(verr, void, tl)
+DEF_HELPER_1(verw, void, tl)
+DEF_HELPER_1(lldt, void, int)
+DEF_HELPER_1(ltr, void, int)
+DEF_HELPER_2(load_seg, void, int, int)
+DEF_HELPER_3(ljmp_protected, void, int, tl, int)
+DEF_HELPER_4(lcall_real, void, int, tl, int, int)
+DEF_HELPER_4(lcall_protected, void, int, tl, int, int)
+DEF_HELPER_1(iret_real, void, int)
+DEF_HELPER_2(iret_protected, void, int, int)
+DEF_HELPER_2(lret_protected, void, int, int)
+DEF_HELPER_1(read_crN, tl, int)
+DEF_HELPER_2(write_crN, void, int, tl)
+DEF_HELPER_1(lmsw, void, tl)
+DEF_HELPER_0(clts, void)
+DEF_HELPER_2(movl_drN_T0, void, int, tl)
+DEF_HELPER_1(invlpg, void, tl)
+
+DEF_HELPER_3(enter_level, void, int, int, tl)
+#ifdef TARGET_X86_64
+DEF_HELPER_3(enter64_level, void, int, int, tl)
+#endif
+DEF_HELPER_0(sysenter, void)
+DEF_HELPER_1(sysexit, void, int)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(syscall, void, int)
+DEF_HELPER_1(sysret, void, int)
+#endif
+DEF_HELPER_1(hlt, void, int)
+DEF_HELPER_1(monitor, void, tl)
+DEF_HELPER_1(mwait, void, int)
+DEF_HELPER_0(debug, void)
+DEF_HELPER_0(reset_rf, void)
+DEF_HELPER_2(raise_interrupt, void, int, int)
+DEF_HELPER_1(raise_exception, void, int)
+DEF_HELPER_0(cli, void)
+DEF_HELPER_0(sti, void)
+DEF_HELPER_0(set_inhibit_irq, void)
+DEF_HELPER_0(reset_inhibit_irq, void)
+DEF_HELPER_2(boundw, void, tl, int)
+DEF_HELPER_2(boundl, void, tl, int)
+DEF_HELPER_0(rsm, void)
+DEF_HELPER_1(into, void, int)
+DEF_HELPER_1(cmpxchg8b, void, tl)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(cmpxchg16b, void, tl)
+#endif
+DEF_HELPER_0(single_step, void)
+DEF_HELPER_0(cpuid, void)
+DEF_HELPER_0(rdtsc, void)
+DEF_HELPER_0(rdtscp, void)
+DEF_HELPER_0(rdpmc, void)
+DEF_HELPER_0(rdmsr, void)
+DEF_HELPER_0(wrmsr, void)
+
+DEF_HELPER_1(check_iob, void, i32)
+DEF_HELPER_1(check_iow, void, i32)
+DEF_HELPER_1(check_iol, void, i32)
+DEF_HELPER_2(outb, void, i32, i32)
+DEF_HELPER_1(inb, tl, i32)
+DEF_HELPER_2(outw, void, i32, i32)
+DEF_HELPER_1(inw, tl, i32)
+DEF_HELPER_2(outl, void, i32, i32)
+DEF_HELPER_1(inl, tl, i32)
+
+DEF_HELPER_2(svm_check_intercept_param, void, i32, i64)
+DEF_HELPER_2(vmexit, void, i32, i64)
+DEF_HELPER_3(svm_check_io, void, i32, i32, i32)
+DEF_HELPER_2(vmrun, void, int, int)
+DEF_HELPER_0(vmmcall, void)
+DEF_HELPER_1(vmload, void, int)
+DEF_HELPER_1(vmsave, void, int)
+DEF_HELPER_0(stgi, void)
+DEF_HELPER_0(clgi, void)
+DEF_HELPER_0(skinit, void)
+DEF_HELPER_1(invlpga, void, int)
+
+/* x86 FPU */
+
+DEF_HELPER_1(flds_FT0, void, i32)
+DEF_HELPER_1(fldl_FT0, void, i64)
+DEF_HELPER_1(fildl_FT0, void, s32)
+DEF_HELPER_1(flds_ST0, void, i32)
+DEF_HELPER_1(fldl_ST0, void, i64)
+DEF_HELPER_1(fildl_ST0, void, s32)
+DEF_HELPER_1(fildll_ST0, void, s64)
+#ifndef VBOX
+DEF_HELPER_0(fsts_ST0, i32)
+DEF_HELPER_0(fstl_ST0, i64)
+DEF_HELPER_0(fist_ST0, s32)
+DEF_HELPER_0(fistl_ST0, s32)
+DEF_HELPER_0(fistll_ST0, s64)
+DEF_HELPER_0(fistt_ST0, s32)
+DEF_HELPER_0(fisttl_ST0, s32)
+DEF_HELPER_0(fisttll_ST0, s64)
+#else /* VBOX */
+DEF_HELPER_0(fsts_ST0, RTCCUINTREG)
+DEF_HELPER_0(fstl_ST0, i64)
+DEF_HELPER_0(fist_ST0, RTCCINTREG)
+DEF_HELPER_0(fistl_ST0, RTCCINTREG)
+DEF_HELPER_0(fistll_ST0, s64)
+DEF_HELPER_0(fistt_ST0, RTCCINTREG)
+DEF_HELPER_0(fisttl_ST0, RTCCINTREG)
+DEF_HELPER_0(fisttll_ST0, s64)
+#endif /* VBOX */
+DEF_HELPER_1(fldt_ST0, void, tl)
+DEF_HELPER_1(fstt_ST0, void, tl)
+DEF_HELPER_0(fpush, void)
+DEF_HELPER_0(fpop, void)
+DEF_HELPER_0(fdecstp, void)
+DEF_HELPER_0(fincstp, void)
+DEF_HELPER_1(ffree_STN, void, int)
+DEF_HELPER_0(fmov_ST0_FT0, void)
+DEF_HELPER_1(fmov_FT0_STN, void, int)
+DEF_HELPER_1(fmov_ST0_STN, void, int)
+DEF_HELPER_1(fmov_STN_ST0, void, int)
+DEF_HELPER_1(fxchg_ST0_STN, void, int)
+DEF_HELPER_0(fcom_ST0_FT0, void)
+DEF_HELPER_0(fucom_ST0_FT0, void)
+DEF_HELPER_0(fcomi_ST0_FT0, void)
+DEF_HELPER_0(fucomi_ST0_FT0, void)
+DEF_HELPER_0(fadd_ST0_FT0, void)
+DEF_HELPER_0(fmul_ST0_FT0, void)
+DEF_HELPER_0(fsub_ST0_FT0, void)
+DEF_HELPER_0(fsubr_ST0_FT0, void)
+DEF_HELPER_0(fdiv_ST0_FT0, void)
+DEF_HELPER_0(fdivr_ST0_FT0, void)
+DEF_HELPER_1(fadd_STN_ST0, void, int)
+DEF_HELPER_1(fmul_STN_ST0, void, int)
+DEF_HELPER_1(fsub_STN_ST0, void, int)
+DEF_HELPER_1(fsubr_STN_ST0, void, int)
+DEF_HELPER_1(fdiv_STN_ST0, void, int)
+DEF_HELPER_1(fdivr_STN_ST0, void, int)
+DEF_HELPER_0(fchs_ST0, void)
+DEF_HELPER_0(fabs_ST0, void)
+DEF_HELPER_0(fxam_ST0, void)
+DEF_HELPER_0(fld1_ST0, void)
+DEF_HELPER_0(fldl2t_ST0, void)
+DEF_HELPER_0(fldl2e_ST0, void)
+DEF_HELPER_0(fldpi_ST0, void)
+DEF_HELPER_0(fldlg2_ST0, void)
+DEF_HELPER_0(fldln2_ST0, void)
+DEF_HELPER_0(fldz_ST0, void)
+DEF_HELPER_0(fldz_FT0, void)
+#ifndef VBOX
+DEF_HELPER_0(fnstsw, i32)
+DEF_HELPER_0(fnstcw, i32)
+#else /* VBOX */
+DEF_HELPER_0(fnstsw, RTCCUINTREG)
+DEF_HELPER_0(fnstcw, RTCCUINTREG)
+#endif /* VBOX */
+DEF_HELPER_1(fldcw, void, i32)
+DEF_HELPER_0(fclex, void)
+DEF_HELPER_0(fwait, void)
+DEF_HELPER_0(fninit, void)
+DEF_HELPER_1(fbld_ST0, void, tl)
+DEF_HELPER_1(fbst_ST0, void, tl)
+DEF_HELPER_0(f2xm1, void)
+DEF_HELPER_0(fyl2x, void)
+DEF_HELPER_0(fptan, void)
+DEF_HELPER_0(fpatan, void)
+DEF_HELPER_0(fxtract, void)
+DEF_HELPER_0(fprem1, void)
+DEF_HELPER_0(fprem, void)
+DEF_HELPER_0(fyl2xp1, void)
+DEF_HELPER_0(fsqrt, void)
+DEF_HELPER_0(fsincos, void)
+DEF_HELPER_0(frndint, void)
+DEF_HELPER_0(fscale, void)
+DEF_HELPER_0(fsin, void)
+DEF_HELPER_0(fcos, void)
+DEF_HELPER_2(fstenv, void, tl, int)
+DEF_HELPER_2(fldenv, void, tl, int)
+DEF_HELPER_2(fsave, void, tl, int)
+DEF_HELPER_2(frstor, void, tl, int)
+DEF_HELPER_2(fxsave, void, tl, int)
+DEF_HELPER_2(fxrstor, void, tl, int)
+DEF_HELPER_1(bsf, tl, tl)
+DEF_HELPER_1(bsr, tl, tl)
+DEF_HELPER_2(lzcnt, tl, tl, int)
+
+/* MMX/SSE */
+
+DEF_HELPER_0(enter_mmx, void)
+DEF_HELPER_0(emms, void)
+DEF_HELPER_2(movq, void, ptr, ptr)
+
+#define SHIFT 0
+#include "ops_sse_header.h"
+#define SHIFT 1
+#include "ops_sse_header.h"
+
+DEF_HELPER_2(rclb, tl, tl, tl)
+DEF_HELPER_2(rclw, tl, tl, tl)
+DEF_HELPER_2(rcll, tl, tl, tl)
+DEF_HELPER_2(rcrb, tl, tl, tl)
+DEF_HELPER_2(rcrw, tl, tl, tl)
+DEF_HELPER_2(rcrl, tl, tl, tl)
+#ifdef TARGET_X86_64
+DEF_HELPER_2(rclq, tl, tl, tl)
+DEF_HELPER_2(rcrq, tl, tl, tl)
+#endif
+
+#ifdef VBOX
+DEF_HELPER_1(write_eflags_vme, void, tl)
+DEF_HELPER_0(read_eflags_vme, tl)
+DEF_HELPER_0(cli_vme, void)
+DEF_HELPER_0(sti_vme, void)
+DEF_HELPER_0(check_external_event, void)
+DEF_HELPER_0(dump_state, void)
+DEF_HELPER_1(sync_seg, void, i32)
+
+void helper_external_event(void);
+void helper_record_call(void);
+
+/* in op_helper.c */
+void sync_seg(CPUX86State *env1, int seg_reg, int selector);
+void sync_ldtr(CPUX86State *env1, int selector);
+#endif /* VBOX */
+
+#include "def-helper.h"
diff --git a/src/recompiler/target-i386/helper_template.h b/src/recompiler/target-i386/helper_template.h
new file mode 100644
index 00000000..193b3274
--- /dev/null
+++ b/src/recompiler/target-i386/helper_template.h
@@ -0,0 +1,344 @@
+/*
+ * i386 helpers
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#define DATA_BITS (1 << (3 + SHIFT))
+#define SHIFT_MASK (DATA_BITS - 1)
+#define SIGN_MASK (((target_ulong)1) << (DATA_BITS - 1))
+#if DATA_BITS <= 32
+#define SHIFT1_MASK 0x1f
+#else
+#define SHIFT1_MASK 0x3f
+#endif
+
+#if DATA_BITS == 8
+#define SUFFIX b
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#define DATA_MASK 0xff
+#elif DATA_BITS == 16
+#define SUFFIX w
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#define DATA_MASK 0xffff
+#elif DATA_BITS == 32
+#define SUFFIX l
+#define DATA_TYPE uint32_t
+#define DATA_STYPE int32_t
+#define DATA_MASK 0xffffffff
+#elif DATA_BITS == 64
+#define SUFFIX q
+#define DATA_TYPE uint64_t
+#define DATA_STYPE int64_t
+#define DATA_MASK 0xffffffffffffffffULL
+#else
+#error unhandled operand size
+#endif
+
+/* dynamic flags computation */
+
+static int glue(compute_all_add, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ target_long src1, src2;
+ src1 = CC_SRC;
+ src2 = CC_DST - CC_SRC;
+ cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = (CC_DST ^ src1 ^ src2) & 0x10;
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+ return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_add, SUFFIX)(void)
+{
+ int cf;
+ target_long src1;
+ src1 = CC_SRC;
+ cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
+ return cf;
+}
+
+static int glue(compute_all_adc, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ target_long src1, src2;
+ src1 = CC_SRC;
+ src2 = CC_DST - CC_SRC - 1;
+ cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = (CC_DST ^ src1 ^ src2) & 0x10;
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+ return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_adc, SUFFIX)(void)
+{
+ int cf;
+ target_long src1;
+ src1 = CC_SRC;
+ cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
+ return cf;
+}
+
+static int glue(compute_all_sub, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ target_long src1, src2;
+ src1 = CC_DST + CC_SRC;
+ src2 = CC_SRC;
+ cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = (CC_DST ^ src1 ^ src2) & 0x10;
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+ return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_sub, SUFFIX)(void)
+{
+ int cf;
+ target_long src1, src2;
+ src1 = CC_DST + CC_SRC;
+ src2 = CC_SRC;
+ cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
+ return cf;
+}
+
+static int glue(compute_all_sbb, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ target_long src1, src2;
+ src1 = CC_DST + CC_SRC + 1;
+ src2 = CC_SRC;
+ cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = (CC_DST ^ src1 ^ src2) & 0x10;
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+ return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_sbb, SUFFIX)(void)
+{
+ int cf;
+ target_long src1, src2;
+ src1 = CC_DST + CC_SRC + 1;
+ src2 = CC_SRC;
+ cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
+ return cf;
+}
+
+static int glue(compute_all_logic, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ cf = 0;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = 0;
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = 0;
+ return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_logic, SUFFIX)(void)
+{
+ return 0;
+}
+
+static int glue(compute_all_inc, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ target_long src1, src2;
+ src1 = CC_DST - 1;
+ src2 = 1;
+ cf = CC_SRC;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = (CC_DST ^ src1 ^ src2) & 0x10;
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = ((CC_DST & DATA_MASK) == SIGN_MASK) << 11;
+ return cf | pf | af | zf | sf | of;
+}
+
+#if DATA_BITS == 32
+static int glue(compute_c_inc, SUFFIX)(void)
+{
+ return CC_SRC;
+}
+#endif
+
+static int glue(compute_all_dec, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ target_long src1, src2;
+ src1 = CC_DST + 1;
+ src2 = 1;
+ cf = CC_SRC;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = (CC_DST ^ src1 ^ src2) & 0x10;
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = ((CC_DST & DATA_MASK) == ((target_ulong)SIGN_MASK - 1)) << 11;
+ return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_all_shl, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ cf = (CC_SRC >> (DATA_BITS - 1)) & CC_C;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = 0; /* undefined */
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ /* of is defined if shift count == 1 */
+ of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
+ return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_shl, SUFFIX)(void)
+{
+ return (CC_SRC >> (DATA_BITS - 1)) & CC_C;
+}
+
+#if DATA_BITS == 32
+static int glue(compute_c_sar, SUFFIX)(void)
+{
+ return CC_SRC & 1;
+}
+#endif
+
+static int glue(compute_all_sar, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ cf = CC_SRC & 1;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = 0; /* undefined */
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ /* of is defined if shift count == 1 */
+ of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
+ return cf | pf | af | zf | sf | of;
+}
+
+#if DATA_BITS == 32
+static int glue(compute_c_mul, SUFFIX)(void)
+{
+ int cf;
+ cf = (CC_SRC != 0);
+ return cf;
+}
+#endif
+
+/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and
+ CF are modified and it is slower to do that. */
+static int glue(compute_all_mul, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ cf = (CC_SRC != 0);
+ pf = parity_table[(uint8_t)CC_DST];
+ af = 0; /* undefined */
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = cf << 11;
+ return cf | pf | af | zf | sf | of;
+}
+
+/* shifts */
+
+target_ulong glue(helper_rcl, SUFFIX)(target_ulong t0, target_ulong t1)
+{
+ int count, eflags;
+ target_ulong src;
+ target_long res;
+
+ count = t1 & SHIFT1_MASK;
+#if DATA_BITS == 16
+ count = rclw_table[count];
+#elif DATA_BITS == 8
+ count = rclb_table[count];
+#endif
+ if (count) {
+ eflags = helper_cc_compute_all(CC_OP);
+ t0 &= DATA_MASK;
+ src = t0;
+ res = (t0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1));
+ if (count > 1)
+ res |= t0 >> (DATA_BITS + 1 - count);
+ t0 = res;
+ env->cc_tmp = (eflags & ~(CC_C | CC_O)) |
+ (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
+ ((src >> (DATA_BITS - count)) & CC_C);
+ } else {
+ env->cc_tmp = -1;
+ }
+ return t0;
+}
+
+target_ulong glue(helper_rcr, SUFFIX)(target_ulong t0, target_ulong t1)
+{
+ int count, eflags;
+ target_ulong src;
+ target_long res;
+
+ count = t1 & SHIFT1_MASK;
+#if DATA_BITS == 16
+ count = rclw_table[count];
+#elif DATA_BITS == 8
+ count = rclb_table[count];
+#endif
+ if (count) {
+ eflags = helper_cc_compute_all(CC_OP);
+ t0 &= DATA_MASK;
+ src = t0;
+ res = (t0 >> count) | ((target_ulong)(eflags & CC_C) << (DATA_BITS - count));
+ if (count > 1)
+ res |= t0 << (DATA_BITS + 1 - count);
+ t0 = res;
+ env->cc_tmp = (eflags & ~(CC_C | CC_O)) |
+ (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
+ ((src >> (count - 1)) & CC_C);
+ } else {
+ env->cc_tmp = -1;
+ }
+ return t0;
+}
+
+#undef DATA_BITS
+#undef SHIFT_MASK
+#undef SHIFT1_MASK
+#undef SIGN_MASK
+#undef DATA_TYPE
+#undef DATA_STYPE
+#undef DATA_MASK
+#undef SUFFIX
diff --git a/src/recompiler/target-i386/op_helper.c b/src/recompiler/target-i386/op_helper.c
new file mode 100644
index 00000000..07b58f8d
--- /dev/null
+++ b/src/recompiler/target-i386/op_helper.c
@@ -0,0 +1,7164 @@
+/*
+ * i386 helpers
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#include "exec.h"
+#include "exec-all.h"
+#include "host-utils.h"
+#include "ioport.h"
+
+#ifdef VBOX
+# include "qemu-common.h"
+# include <math.h>
+# include "tcg.h"
+# include <VBox/err.h>
+#endif /* VBOX */
+
+//#define DEBUG_PCALL
+
+
+#ifdef DEBUG_PCALL
+# define LOG_PCALL(...) qemu_log_mask(CPU_LOG_PCALL, ## __VA_ARGS__)
+# define LOG_PCALL_STATE(env) \
+ log_cpu_state_mask(CPU_LOG_PCALL, (env), X86_DUMP_CCOP)
+#else
+# define LOG_PCALL(...) do { } while (0)
+# define LOG_PCALL_STATE(env) do { } while (0)
+#endif
+
+
+#if 0
+#define raise_exception_err(a, b)\
+do {\
+ qemu_log("raise_exception line=%d\n", __LINE__);\
+ (raise_exception_err)(a, b);\
+} while (0)
+#endif
+
+static const uint8_t parity_table[256] = {
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+};
+
+/* modulo 17 table */
+static const uint8_t rclw_table[32] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9,10,11,12,13,14,15,
+ 16, 0, 1, 2, 3, 4, 5, 6,
+ 7, 8, 9,10,11,12,13,14,
+};
+
+/* modulo 9 table */
+static const uint8_t rclb_table[32] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 0, 1, 2, 3, 4, 5, 6,
+ 7, 8, 0, 1, 2, 3, 4, 5,
+ 6, 7, 8, 0, 1, 2, 3, 4,
+};
+
+static const CPU86_LDouble f15rk[7] =
+{
+ 0.00000000000000000000L,
+ 1.00000000000000000000L,
+ 3.14159265358979323851L, /*pi*/
+ 0.30102999566398119523L, /*lg2*/
+ 0.69314718055994530943L, /*ln2*/
+ 1.44269504088896340739L, /*l2e*/
+ 3.32192809488736234781L, /*l2t*/
+};
+
+/* broken thread support */
+
+static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+
+void helper_lock(void)
+{
+ spin_lock(&global_cpu_lock);
+}
+
+void helper_unlock(void)
+{
+ spin_unlock(&global_cpu_lock);
+}
+
+void helper_write_eflags(target_ulong t0, uint32_t update_mask)
+{
+ load_eflags(t0, update_mask);
+}
+
+target_ulong helper_read_eflags(void)
+{
+ uint32_t eflags;
+ eflags = helper_cc_compute_all(CC_OP);
+ eflags |= (DF & DF_MASK);
+ eflags |= env->eflags & ~(VM_MASK | RF_MASK);
+ return eflags;
+}
+
+#ifdef VBOX
+
+void helper_write_eflags_vme(target_ulong t0)
+{
+ unsigned int new_eflags = t0;
+
+ assert(env->eflags & (1<<VM_SHIFT));
+
+ /* if virtual interrupt pending and (virtual) interrupts will be enabled -> #GP */
+ /* if TF will be set -> #GP */
+ if ( ((new_eflags & IF_MASK) && (env->eflags & VIP_MASK))
+ || (new_eflags & TF_MASK)) {
+ raise_exception(EXCP0D_GPF);
+ } else {
+ load_eflags(new_eflags,
+ (TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff);
+
+ if (new_eflags & IF_MASK) {
+ env->eflags |= VIF_MASK;
+ } else {
+ env->eflags &= ~VIF_MASK;
+ }
+ }
+}
+
+target_ulong helper_read_eflags_vme(void)
+{
+ uint32_t eflags;
+ eflags = helper_cc_compute_all(CC_OP);
+ eflags |= (DF & DF_MASK);
+ eflags |= env->eflags & ~(VM_MASK | RF_MASK);
+ if (env->eflags & VIF_MASK)
+ eflags |= IF_MASK;
+ else
+ eflags &= ~IF_MASK;
+
+ /* According to AMD manual, should be read with IOPL == 3 */
+ eflags |= (3 << IOPL_SHIFT);
+
+ /* We only use helper_read_eflags_vme() in 16-bits mode */
+ return eflags & 0xffff;
+}
+
+void helper_dump_state()
+{
+ LogRel(("CS:EIP=%08x:%08x, FLAGS=%08x\n", env->segs[R_CS].base, env->eip, env->eflags));
+ LogRel(("EAX=%08x\tECX=%08x\tEDX=%08x\tEBX=%08x\n",
+ (uint32_t)env->regs[R_EAX], (uint32_t)env->regs[R_ECX],
+ (uint32_t)env->regs[R_EDX], (uint32_t)env->regs[R_EBX]));
+ LogRel(("ESP=%08x\tEBP=%08x\tESI=%08x\tEDI=%08x\n",
+ (uint32_t)env->regs[R_ESP], (uint32_t)env->regs[R_EBP],
+ (uint32_t)env->regs[R_ESI], (uint32_t)env->regs[R_EDI]));
+}
+
+/**
+ * Updates e2 with the DESC_A_MASK, writes it to the descriptor table, and
+ * returns the updated e2.
+ *
+ * @returns e2 with A set.
+ * @param e2 The 2nd selector DWORD.
+ */
+static uint32_t set_segment_accessed(int selector, uint32_t e2)
+{
+ SegmentCache *dt = selector & X86_SEL_LDT ? &env->ldt : &env->gdt;
+ target_ulong ptr = dt->base + (selector & X86_SEL_MASK);
+
+ e2 |= DESC_A_MASK;
+ stl_kernel(ptr + 4, e2);
+ return e2;
+}
+
+#endif /* VBOX */
+
+/* return non zero if error */
+static inline int load_segment(uint32_t *e1_ptr, uint32_t *e2_ptr,
+ int selector)
+{
+ SegmentCache *dt;
+ int index;
+ target_ulong ptr;
+
+ if (selector & 0x4)
+ dt = &env->ldt;
+ else
+ dt = &env->gdt;
+ index = selector & ~7;
+ if ((index + 7) > dt->limit)
+ return -1;
+ ptr = dt->base + index;
+ *e1_ptr = ldl_kernel(ptr);
+ *e2_ptr = ldl_kernel(ptr + 4);
+ return 0;
+}
+
+static inline unsigned int get_seg_limit(uint32_t e1, uint32_t e2)
+{
+ unsigned int limit;
+ limit = (e1 & 0xffff) | (e2 & 0x000f0000);
+ if (e2 & DESC_G_MASK)
+ limit = (limit << 12) | 0xfff;
+ return limit;
+}
+
+static inline uint32_t get_seg_base(uint32_t e1, uint32_t e2)
+{
+ return ((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000));
+}
+
+static inline void load_seg_cache_raw_dt(SegmentCache *sc, uint32_t e1, uint32_t e2)
+{
+ sc->base = get_seg_base(e1, e2);
+ sc->limit = get_seg_limit(e1, e2);
+#ifndef VBOX
+ sc->flags = e2;
+#else
+ sc->flags = e2 & DESC_RAW_FLAG_BITS;
+ sc->newselector = 0;
+ sc->fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+#endif
+}
+
+/* init the segment cache in vm86 mode. */
+static inline void load_seg_vm(int seg, int selector)
+{
+ selector &= 0xffff;
+#ifdef VBOX
+ /* flags must be 0xf3; expand-up read/write accessed data segment with DPL=3. (VT-x) */
+ unsigned flags = DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | DESC_A_MASK;
+ flags |= (3 << DESC_DPL_SHIFT);
+
+ cpu_x86_load_seg_cache(env, seg, selector,
+ (selector << 4), 0xffff, flags);
+#else /* VBOX */
+ cpu_x86_load_seg_cache(env, seg, selector,
+ (selector << 4), 0xffff, 0);
+#endif /* VBOX */
+}
+
+static inline void get_ss_esp_from_tss(uint32_t *ss_ptr,
+ uint32_t *esp_ptr, int dpl)
+{
+#ifndef VBOX
+ int type, index, shift;
+#else
+ unsigned int type, index, shift;
+#endif
+
+#if 0
+ {
+ int i;
+ printf("TR: base=%p limit=%x\n", env->tr.base, env->tr.limit);
+ for(i=0;i<env->tr.limit;i++) {
+ printf("%02x ", env->tr.base[i]);
+ if ((i & 7) == 7) printf("\n");
+ }
+ printf("\n");
+ }
+#endif
+
+ if (!(env->tr.flags & DESC_P_MASK))
+ cpu_abort(env, "invalid tss");
+ type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+ if ((type & 7) != 3)
+ cpu_abort(env, "invalid tss type");
+ shift = type >> 3;
+ index = (dpl * 4 + 2) << shift;
+ if (index + (4 << shift) - 1 > env->tr.limit)
+ raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
+ if (shift == 0) {
+ *esp_ptr = lduw_kernel(env->tr.base + index);
+ *ss_ptr = lduw_kernel(env->tr.base + index + 2);
+ } else {
+ *esp_ptr = ldl_kernel(env->tr.base + index);
+ *ss_ptr = lduw_kernel(env->tr.base + index + 4);
+ }
+}
+
+/* XXX: merge with load_seg() */
+static void tss_load_seg(int seg_reg, int selector)
+{
+ uint32_t e1, e2;
+ int rpl, dpl, cpl;
+
+#ifdef VBOX
+ e1 = e2 = 0; /* gcc warning? */
+ cpl = env->hflags & HF_CPL_MASK;
+ /* Trying to load a selector with CPL=1? */
+ if (cpl == 0 && (selector & 3) == 1 && (env->state & CPU_RAW_RING0))
+ {
+ Log(("RPL 1 -> sel %04X -> %04X (tss_load_seg)\n", selector, selector & 0xfffc));
+ selector = selector & 0xfffc;
+ }
+#endif /* VBOX */
+
+ if ((selector & 0xfffc) != 0) {
+ if (load_segment(&e1, &e2, selector) != 0)
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ if (!(e2 & DESC_S_MASK))
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ rpl = selector & 3;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ if (seg_reg == R_CS) {
+ if (!(e2 & DESC_CS_MASK))
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ /* XXX: is it correct ? */
+ if (dpl != rpl)
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ if ((e2 & DESC_C_MASK) && dpl > rpl)
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ } else if (seg_reg == R_SS) {
+ /* SS must be writable data */
+ if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK))
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ if (dpl != cpl || dpl != rpl)
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ } else {
+ /* not readable code */
+ if ((e2 & DESC_CS_MASK) && !(e2 & DESC_R_MASK))
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ /* if data or non conforming code, checks the rights */
+ if (((e2 >> DESC_TYPE_SHIFT) & 0xf) < 12) {
+ if (dpl < cpl || dpl < rpl)
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ }
+ }
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+ cpu_x86_load_seg_cache(env, seg_reg, selector,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+ } else {
+ if (seg_reg == R_SS || seg_reg == R_CS)
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+#ifdef VBOX
+# if 0 /** @todo now we ignore loading 0 selectors, need to check what is correct once */
+ cpu_x86_load_seg_cache(env, seg_reg, selector,
+ 0, 0, 0);
+# endif
+#endif /* VBOX */
+ }
+}
+
+#define SWITCH_TSS_JMP 0
+#define SWITCH_TSS_IRET 1
+#define SWITCH_TSS_CALL 2
+
+/* XXX: restore CPU state in registers (PowerPC case) */
+static void switch_tss(int tss_selector,
+ uint32_t e1, uint32_t e2, int source,
+ uint32_t next_eip)
+{
+ int tss_limit, tss_limit_max, type, old_tss_limit_max, old_type, v1, v2, i;
+ target_ulong tss_base;
+ uint32_t new_regs[8], new_segs[6];
+ uint32_t new_eflags, new_eip, new_cr3, new_ldt, new_trap;
+ uint32_t old_eflags, eflags_mask;
+ SegmentCache *dt;
+#ifndef VBOX
+ int index;
+#else
+ unsigned int index;
+#endif
+ target_ulong ptr;
+
+ type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+ LOG_PCALL("switch_tss: sel=0x%04x type=%d src=%d\n", tss_selector, type, source);
+
+ /* if task gate, we read the TSS segment and we load it */
+ if (type == 5) {
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc);
+ tss_selector = e1 >> 16;
+ if (tss_selector & 4)
+ raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+ if (load_segment(&e1, &e2, tss_selector) != 0)
+ raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
+ if (e2 & DESC_S_MASK)
+ raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
+ type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+ if ((type & 7) != 1)
+ raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
+ }
+
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc);
+
+ if (type & 8)
+ tss_limit_max = 103;
+ else
+ tss_limit_max = 43;
+ tss_limit = get_seg_limit(e1, e2);
+ tss_base = get_seg_base(e1, e2);
+ if ((tss_selector & 4) != 0 ||
+ tss_limit < tss_limit_max)
+ raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+ old_type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+ if (old_type & 8)
+ old_tss_limit_max = 103;
+ else
+ old_tss_limit_max = 43;
+
+#ifndef VBOX /* The old TSS is written first... */
+ /* read all the registers from the new TSS */
+ if (type & 8) {
+ /* 32 bit */
+ new_cr3 = ldl_kernel(tss_base + 0x1c);
+ new_eip = ldl_kernel(tss_base + 0x20);
+ new_eflags = ldl_kernel(tss_base + 0x24);
+ for(i = 0; i < 8; i++)
+ new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4));
+ for(i = 0; i < 6; i++)
+ new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4));
+ new_ldt = lduw_kernel(tss_base + 0x60);
+ new_trap = ldl_kernel(tss_base + 0x64);
+ } else {
+ /* 16 bit */
+ new_cr3 = 0;
+ new_eip = lduw_kernel(tss_base + 0x0e);
+ new_eflags = lduw_kernel(tss_base + 0x10);
+ for(i = 0; i < 8; i++)
+ new_regs[i] = lduw_kernel(tss_base + (0x12 + i * 2)) | 0xffff0000;
+ for(i = 0; i < 4; i++)
+ new_segs[i] = lduw_kernel(tss_base + (0x22 + i * 4));
+ new_ldt = lduw_kernel(tss_base + 0x2a);
+ new_segs[R_FS] = 0;
+ new_segs[R_GS] = 0;
+ new_trap = 0;
+ }
+#endif
+
+ /* NOTE: we must avoid memory exceptions during the task switch,
+ so we make dummy accesses before */
+ /* XXX: it can still fail in some cases, so a bigger hack is
+ necessary to valid the TLB after having done the accesses */
+
+ v1 = ldub_kernel(env->tr.base);
+ v2 = ldub_kernel(env->tr.base + old_tss_limit_max);
+ stb_kernel(env->tr.base, v1);
+ stb_kernel(env->tr.base + old_tss_limit_max, v2);
+
+ /* clear busy bit (it is restartable) */
+ if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_IRET) {
+ target_ulong ptr;
+ uint32_t e2;
+ ptr = env->gdt.base + (env->tr.selector & ~7);
+ e2 = ldl_kernel(ptr + 4);
+ e2 &= ~DESC_TSS_BUSY_MASK;
+ stl_kernel(ptr + 4, e2);
+ }
+ old_eflags = compute_eflags();
+ if (source == SWITCH_TSS_IRET)
+ old_eflags &= ~NT_MASK;
+
+ /* save the current state in the old TSS */
+ if (type & 8) {
+ /* 32 bit */
+ stl_kernel(env->tr.base + 0x20, next_eip);
+ stl_kernel(env->tr.base + 0x24, old_eflags);
+ stl_kernel(env->tr.base + (0x28 + 0 * 4), EAX);
+ stl_kernel(env->tr.base + (0x28 + 1 * 4), ECX);
+ stl_kernel(env->tr.base + (0x28 + 2 * 4), EDX);
+ stl_kernel(env->tr.base + (0x28 + 3 * 4), EBX);
+ stl_kernel(env->tr.base + (0x28 + 4 * 4), ESP);
+ stl_kernel(env->tr.base + (0x28 + 5 * 4), EBP);
+ stl_kernel(env->tr.base + (0x28 + 6 * 4), ESI);
+ stl_kernel(env->tr.base + (0x28 + 7 * 4), EDI);
+ for(i = 0; i < 6; i++)
+ stw_kernel(env->tr.base + (0x48 + i * 4), env->segs[i].selector);
+#if defined(VBOX) && defined(DEBUG)
+ printf("TSS 32 bits switch\n");
+ printf("Saving CS=%08X\n", env->segs[R_CS].selector);
+#endif
+ } else {
+ /* 16 bit */
+ stw_kernel(env->tr.base + 0x0e, next_eip);
+ stw_kernel(env->tr.base + 0x10, old_eflags);
+ stw_kernel(env->tr.base + (0x12 + 0 * 2), EAX);
+ stw_kernel(env->tr.base + (0x12 + 1 * 2), ECX);
+ stw_kernel(env->tr.base + (0x12 + 2 * 2), EDX);
+ stw_kernel(env->tr.base + (0x12 + 3 * 2), EBX);
+ stw_kernel(env->tr.base + (0x12 + 4 * 2), ESP);
+ stw_kernel(env->tr.base + (0x12 + 5 * 2), EBP);
+ stw_kernel(env->tr.base + (0x12 + 6 * 2), ESI);
+ stw_kernel(env->tr.base + (0x12 + 7 * 2), EDI);
+ for(i = 0; i < 4; i++)
+ stw_kernel(env->tr.base + (0x22 + i * 2), env->segs[i].selector);
+ }
+
+#ifdef VBOX
+ /* read all the registers from the new TSS - may be the same as the old one */
+ if (type & 8) {
+ /* 32 bit */
+ new_cr3 = ldl_kernel(tss_base + 0x1c);
+ new_eip = ldl_kernel(tss_base + 0x20);
+ new_eflags = ldl_kernel(tss_base + 0x24);
+ for(i = 0; i < 8; i++)
+ new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4));
+ for(i = 0; i < 6; i++)
+ new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4));
+ new_ldt = lduw_kernel(tss_base + 0x60);
+ new_trap = ldl_kernel(tss_base + 0x64);
+ } else {
+ /* 16 bit */
+ new_cr3 = 0;
+ new_eip = lduw_kernel(tss_base + 0x0e);
+ new_eflags = lduw_kernel(tss_base + 0x10);
+ for(i = 0; i < 8; i++)
+ new_regs[i] = lduw_kernel(tss_base + (0x12 + i * 2)) | 0xffff0000;
+ for(i = 0; i < 4; i++)
+ new_segs[i] = lduw_kernel(tss_base + (0x22 + i * 2));
+ new_ldt = lduw_kernel(tss_base + 0x2a);
+ new_segs[R_FS] = 0;
+ new_segs[R_GS] = 0;
+ new_trap = 0;
+ }
+#endif
+
+ /* now if an exception occurs, it will occurs in the next task
+ context */
+
+ if (source == SWITCH_TSS_CALL) {
+ stw_kernel(tss_base, env->tr.selector);
+ new_eflags |= NT_MASK;
+ }
+
+ /* set busy bit */
+ if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_CALL) {
+ target_ulong ptr;
+ uint32_t e2;
+ ptr = env->gdt.base + (tss_selector & ~7);
+ e2 = ldl_kernel(ptr + 4);
+ e2 |= DESC_TSS_BUSY_MASK;
+ stl_kernel(ptr + 4, e2);
+ }
+
+ /* set the new CPU state */
+ /* from this point, any exception which occurs can give problems */
+ env->cr[0] |= CR0_TS_MASK;
+ env->hflags |= HF_TS_MASK;
+ env->tr.selector = tss_selector;
+ env->tr.base = tss_base;
+ env->tr.limit = tss_limit;
+#ifndef VBOX
+ env->tr.flags = e2 & ~DESC_TSS_BUSY_MASK;
+#else
+ env->tr.flags = (e2 | DESC_TSS_BUSY_MASK) & DESC_RAW_FLAG_BITS;
+ env->tr.fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+ env->tr.newselector = 0;
+#endif
+
+ if ((type & 8) && (env->cr[0] & CR0_PG_MASK)) {
+ cpu_x86_update_cr3(env, new_cr3);
+ }
+
+ /* load all registers without an exception, then reload them with
+ possible exception */
+ env->eip = new_eip;
+ eflags_mask = TF_MASK | AC_MASK | ID_MASK |
+ IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK;
+ if (!(type & 8))
+ eflags_mask &= 0xffff;
+ load_eflags(new_eflags, eflags_mask);
+ /* XXX: what to do in 16 bit case ? */
+ EAX = new_regs[0];
+ ECX = new_regs[1];
+ EDX = new_regs[2];
+ EBX = new_regs[3];
+ ESP = new_regs[4];
+ EBP = new_regs[5];
+ ESI = new_regs[6];
+ EDI = new_regs[7];
+ if (new_eflags & VM_MASK) {
+ for(i = 0; i < 6; i++)
+ load_seg_vm(i, new_segs[i]);
+ /* in vm86, CPL is always 3 */
+ cpu_x86_set_cpl(env, 3);
+ } else {
+ /* CPL is set the RPL of CS */
+ cpu_x86_set_cpl(env, new_segs[R_CS] & 3);
+ /* first just selectors as the rest may trigger exceptions */
+ for(i = 0; i < 6; i++)
+ cpu_x86_load_seg_cache(env, i, new_segs[i], 0, 0, 0);
+ }
+
+ env->ldt.selector = new_ldt & ~4;
+ env->ldt.base = 0;
+ env->ldt.limit = 0;
+ env->ldt.flags = 0;
+#ifdef VBOX
+ env->ldt.flags = DESC_INTEL_UNUSABLE;
+ env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+ env->ldt.newselector = 0;
+#endif
+
+ /* load the LDT */
+ if (new_ldt & 4)
+ raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+
+ if ((new_ldt & 0xfffc) != 0) {
+ dt = &env->gdt;
+ index = new_ldt & ~7;
+ if ((index + 7) > dt->limit)
+ raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+ ptr = dt->base + index;
+ e1 = ldl_kernel(ptr);
+ e2 = ldl_kernel(ptr + 4);
+ if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
+ raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+ load_seg_cache_raw_dt(&env->ldt, e1, e2);
+ }
+
+ /* load the segments */
+ if (!(new_eflags & VM_MASK)) {
+ tss_load_seg(R_CS, new_segs[R_CS]);
+ tss_load_seg(R_SS, new_segs[R_SS]);
+ tss_load_seg(R_ES, new_segs[R_ES]);
+ tss_load_seg(R_DS, new_segs[R_DS]);
+ tss_load_seg(R_FS, new_segs[R_FS]);
+ tss_load_seg(R_GS, new_segs[R_GS]);
+ }
+
+ /* check that EIP is in the CS segment limits */
+ if (new_eip > env->segs[R_CS].limit) {
+ /* XXX: different exception if CALL ? */
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+
+#ifndef CONFIG_USER_ONLY
+ /* reset local breakpoints */
+ if (env->dr[7] & 0x55) {
+ for (i = 0; i < 4; i++) {
+ if (hw_breakpoint_enabled(env->dr[7], i) == 0x1)
+ hw_breakpoint_remove(env, i);
+ }
+ env->dr[7] &= ~0x55;
+ }
+#endif
+}
+
+/* check if Port I/O is allowed in TSS */
+static inline void check_io(int addr, int size)
+{
+#ifndef VBOX
+ int io_offset, val, mask;
+#else
+ int val, mask;
+ unsigned int io_offset;
+#endif /* VBOX */
+
+ /* TSS must be a valid 32 bit one */
+ if (!(env->tr.flags & DESC_P_MASK) ||
+ ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 11 ||
+ env->tr.limit < 103)
+ goto fail;
+ io_offset = lduw_kernel(env->tr.base + 0x66);
+ io_offset += (addr >> 3);
+ /* Note: the check needs two bytes */
+ if ((io_offset + 1) > env->tr.limit)
+ goto fail;
+ val = lduw_kernel(env->tr.base + io_offset);
+ val >>= (addr & 7);
+ mask = (1 << size) - 1;
+ /* all bits must be zero to allow the I/O */
+ if ((val & mask) != 0) {
+ fail:
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+}
+
+#ifdef VBOX
+
+/* Keep in sync with gen_check_external_event() */
+void helper_check_external_event()
+{
+ if ( (env->interrupt_request & ( CPU_INTERRUPT_EXTERNAL_FLUSH_TLB
+ | CPU_INTERRUPT_EXTERNAL_EXIT
+ | CPU_INTERRUPT_EXTERNAL_TIMER
+ | CPU_INTERRUPT_EXTERNAL_DMA))
+ || ( (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD)
+ && (env->eflags & IF_MASK)
+ && !(env->hflags & HF_INHIBIT_IRQ_MASK) ) )
+ {
+ helper_external_event();
+ }
+
+}
+
+void helper_sync_seg(uint32_t reg)
+{
+ if (env->segs[reg].newselector)
+ sync_seg(env, reg, env->segs[reg].newselector);
+}
+
+#endif /* VBOX */
+
+void helper_check_iob(uint32_t t0)
+{
+ check_io(t0, 1);
+}
+
+void helper_check_iow(uint32_t t0)
+{
+ check_io(t0, 2);
+}
+
+void helper_check_iol(uint32_t t0)
+{
+ check_io(t0, 4);
+}
+
+void helper_outb(uint32_t port, uint32_t data)
+{
+#ifndef VBOX
+ cpu_outb(port, data & 0xff);
+#else
+ cpu_outb(env, port, data & 0xff);
+#endif
+}
+
+target_ulong helper_inb(uint32_t port)
+{
+#ifndef VBOX
+ return cpu_inb(port);
+#else
+ return cpu_inb(env, port);
+#endif
+}
+
+void helper_outw(uint32_t port, uint32_t data)
+{
+#ifndef VBOX
+ cpu_outw(port, data & 0xffff);
+#else
+ cpu_outw(env, port, data & 0xffff);
+#endif
+}
+
+target_ulong helper_inw(uint32_t port)
+{
+#ifndef VBOX
+ return cpu_inw(port);
+#else
+ return cpu_inw(env, port);
+#endif
+}
+
+void helper_outl(uint32_t port, uint32_t data)
+{
+#ifndef VBOX
+ cpu_outl(port, data);
+#else
+ cpu_outl(env, port, data);
+#endif
+}
+
+target_ulong helper_inl(uint32_t port)
+{
+#ifndef VBOX
+ return cpu_inl(port);
+#else
+ return cpu_inl(env, port);
+#endif
+}
+
+static inline unsigned int get_sp_mask(unsigned int e2)
+{
+ if (e2 & DESC_B_MASK)
+ return 0xffffffff;
+ else
+ return 0xffff;
+}
+
+static int exeption_has_error_code(int intno)
+{
+ switch(intno) {
+ case 8:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ case 17:
+ return 1;
+ }
+ return 0;
+}
+
+#ifdef TARGET_X86_64
+#define SET_ESP(val, sp_mask)\
+do {\
+ if ((sp_mask) == 0xffff)\
+ ESP = (ESP & ~0xffff) | ((val) & 0xffff);\
+ else if ((sp_mask) == 0xffffffffLL)\
+ ESP = (uint32_t)(val);\
+ else\
+ ESP = (val);\
+} while (0)
+#else
+#define SET_ESP(val, sp_mask) ESP = (ESP & ~(sp_mask)) | ((val) & (sp_mask))
+#endif
+
+/* in 64-bit machines, this can overflow. So this segment addition macro
+ * can be used to trim the value to 32-bit whenever needed */
+#define SEG_ADDL(ssp, sp, sp_mask) ((uint32_t)((ssp) + (sp & (sp_mask))))
+
+/* XXX: add a is_user flag to have proper security support */
+#define PUSHW(ssp, sp, sp_mask, val)\
+{\
+ sp -= 2;\
+ stw_kernel((ssp) + (sp & (sp_mask)), (val));\
+}
+
+#define PUSHL(ssp, sp, sp_mask, val)\
+{\
+ sp -= 4;\
+ stl_kernel(SEG_ADDL(ssp, sp, sp_mask), (uint32_t)(val));\
+}
+
+#define POPW(ssp, sp, sp_mask, val)\
+{\
+ val = lduw_kernel((ssp) + (sp & (sp_mask)));\
+ sp += 2;\
+}
+
+#define POPL(ssp, sp, sp_mask, val)\
+{\
+ val = (uint32_t)ldl_kernel(SEG_ADDL(ssp, sp, sp_mask));\
+ sp += 4;\
+}
+
+/* protected mode interrupt */
+static void do_interrupt_protected(int intno, int is_int, int error_code,
+ unsigned int next_eip, int is_hw)
+{
+ SegmentCache *dt;
+ target_ulong ptr, ssp;
+ int type, dpl, selector, ss_dpl, cpl;
+ int has_error_code, new_stack, shift;
+ uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0;
+ uint32_t old_eip, sp_mask;
+
+#ifdef VBOX
+ if (remR3NotifyTrap(env, intno, error_code, next_eip) != VINF_SUCCESS)
+ cpu_loop_exit();
+#endif
+
+ has_error_code = 0;
+ if (!is_int && !is_hw)
+ has_error_code = exeption_has_error_code(intno);
+ if (is_int)
+ old_eip = next_eip;
+ else
+ old_eip = env->eip;
+
+ dt = &env->idt;
+#ifndef VBOX
+ if (intno * 8 + 7 > dt->limit)
+#else
+ if ((unsigned)intno * 8 + 7 > dt->limit)
+#endif
+ raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+ ptr = dt->base + intno * 8;
+ e1 = ldl_kernel(ptr);
+ e2 = ldl_kernel(ptr + 4);
+ /* check gate type */
+ type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+ switch(type) {
+ case 5: /* task gate */
+#ifdef VBOX
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ /* check privilege if software int */
+ if (is_int && dpl < cpl)
+ raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+#endif
+ /* must do that check here to return the correct error code */
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, intno * 8 + 2);
+ switch_tss(intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip);
+ if (has_error_code) {
+ int type;
+ uint32_t mask;
+ /* push the error code */
+ type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+ shift = type >> 3;
+ if (env->segs[R_SS].flags & DESC_B_MASK)
+ mask = 0xffffffff;
+ else
+ mask = 0xffff;
+ esp = (ESP - (2 << shift)) & mask;
+ ssp = env->segs[R_SS].base + esp;
+ if (shift)
+ stl_kernel(ssp, error_code);
+ else
+ stw_kernel(ssp, error_code);
+ SET_ESP(esp, mask);
+ }
+ return;
+ case 6: /* 286 interrupt gate */
+ case 7: /* 286 trap gate */
+ case 14: /* 386 interrupt gate */
+ case 15: /* 386 trap gate */
+ break;
+ default:
+ raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+ break;
+ }
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ /* check privilege if software int */
+ if (is_int && dpl < cpl)
+ raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+ /* check valid bit */
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, intno * 8 + 2);
+ selector = e1 >> 16;
+ offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+ if ((selector & 0xfffc) == 0)
+ raise_exception_err(EXCP0D_GPF, 0);
+
+ if (load_segment(&e1, &e2, selector) != 0)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+#ifdef VBOX /** @todo figure out when this is done one day... */
+ if (!(e2 & DESC_A_MASK))
+ e2 = set_segment_accessed(selector, e2);
+#endif
+ if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ if (dpl > cpl)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+ if (!(e2 & DESC_C_MASK) && dpl < cpl) {
+ /* to inner privilege */
+ get_ss_esp_from_tss(&ss, &esp, dpl);
+ if ((ss & 0xfffc) == 0)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if ((ss & 3) != dpl)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if (load_segment(&ss_e1, &ss_e2, ss) != 0)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+#ifdef VBOX /** @todo figure out when this is done one day... */
+ if (!(ss_e2 & DESC_A_MASK))
+ ss_e2 = set_segment_accessed(ss, ss_e2);
+#endif
+ ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+ if (ss_dpl != dpl)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if (!(ss_e2 & DESC_S_MASK) ||
+ (ss_e2 & DESC_CS_MASK) ||
+ !(ss_e2 & DESC_W_MASK))
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if (!(ss_e2 & DESC_P_MASK))
+#ifdef VBOX /* See page 3-477 of 253666.pdf */
+ raise_exception_err(EXCP0C_STACK, ss & 0xfffc);
+#else
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+#endif
+ new_stack = 1;
+ sp_mask = get_sp_mask(ss_e2);
+ ssp = get_seg_base(ss_e1, ss_e2);
+#if defined(VBOX) && defined(DEBUG)
+ printf("new stack %04X:%08X gate dpl=%d\n", ss, esp, dpl);
+#endif
+ } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
+ /* to same privilege */
+ if (env->eflags & VM_MASK)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ new_stack = 0;
+ sp_mask = get_sp_mask(env->segs[R_SS].flags);
+ ssp = env->segs[R_SS].base;
+ esp = ESP;
+ dpl = cpl;
+ } else {
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ new_stack = 0; /* avoid warning */
+ sp_mask = 0; /* avoid warning */
+ ssp = 0; /* avoid warning */
+ esp = 0; /* avoid warning */
+ }
+
+ shift = type >> 3;
+
+#if 0
+ /* XXX: check that enough room is available */
+ push_size = 6 + (new_stack << 2) + (has_error_code << 1);
+ if (env->eflags & VM_MASK)
+ push_size += 8;
+ push_size <<= shift;
+#endif
+ if (shift == 1) {
+ if (new_stack) {
+ if (env->eflags & VM_MASK) {
+ PUSHL(ssp, esp, sp_mask, env->segs[R_GS].selector);
+ PUSHL(ssp, esp, sp_mask, env->segs[R_FS].selector);
+ PUSHL(ssp, esp, sp_mask, env->segs[R_DS].selector);
+ PUSHL(ssp, esp, sp_mask, env->segs[R_ES].selector);
+ }
+ PUSHL(ssp, esp, sp_mask, env->segs[R_SS].selector);
+ PUSHL(ssp, esp, sp_mask, ESP);
+ }
+ PUSHL(ssp, esp, sp_mask, compute_eflags());
+ PUSHL(ssp, esp, sp_mask, env->segs[R_CS].selector);
+ PUSHL(ssp, esp, sp_mask, old_eip);
+ if (has_error_code) {
+ PUSHL(ssp, esp, sp_mask, error_code);
+ }
+ } else {
+ if (new_stack) {
+ if (env->eflags & VM_MASK) {
+ PUSHW(ssp, esp, sp_mask, env->segs[R_GS].selector);
+ PUSHW(ssp, esp, sp_mask, env->segs[R_FS].selector);
+ PUSHW(ssp, esp, sp_mask, env->segs[R_DS].selector);
+ PUSHW(ssp, esp, sp_mask, env->segs[R_ES].selector);
+ }
+ PUSHW(ssp, esp, sp_mask, env->segs[R_SS].selector);
+ PUSHW(ssp, esp, sp_mask, ESP);
+ }
+ PUSHW(ssp, esp, sp_mask, compute_eflags());
+ PUSHW(ssp, esp, sp_mask, env->segs[R_CS].selector);
+ PUSHW(ssp, esp, sp_mask, old_eip);
+ if (has_error_code) {
+ PUSHW(ssp, esp, sp_mask, error_code);
+ }
+ }
+
+ if (new_stack) {
+ if (env->eflags & VM_MASK) {
+ cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0);
+ cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0, 0);
+ cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0, 0);
+ cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0, 0);
+ }
+ ss = (ss & ~3) | dpl;
+ cpu_x86_load_seg_cache(env, R_SS, ss,
+ ssp, get_seg_limit(ss_e1, ss_e2), ss_e2);
+ }
+ SET_ESP(esp, sp_mask);
+
+ selector = (selector & ~3) | dpl;
+ cpu_x86_load_seg_cache(env, R_CS, selector,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+ cpu_x86_set_cpl(env, dpl);
+ env->eip = offset;
+
+ /* interrupt gate clear IF mask */
+ if ((type & 1) == 0) {
+ env->eflags &= ~IF_MASK;
+ }
+#ifndef VBOX
+ env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
+#else
+ /*
+ * We must clear VIP/VIF too on interrupt entry, as otherwise FreeBSD
+ * gets confused by seemingly changed EFLAGS. See #3491 and
+ * public bug #2341.
+ */
+ env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK | VIF_MASK | VIP_MASK);
+#endif
+}
+
+#ifdef VBOX
+
+/* check if VME interrupt redirection is enabled in TSS */
+DECLINLINE(bool) is_vme_irq_redirected(int intno)
+{
+ unsigned int io_offset, intredir_offset;
+ unsigned char val, mask;
+
+ /* TSS must be a valid 32 bit one */
+ if (!(env->tr.flags & DESC_P_MASK) ||
+ ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 11 ||
+ env->tr.limit < 103)
+ goto fail;
+ io_offset = lduw_kernel(env->tr.base + 0x66);
+ /* Make sure the io bitmap offset is valid; anything less than sizeof(VBOXTSS) means there's none. */
+ if (io_offset < 0x68 + 0x20)
+ io_offset = 0x68 + 0x20;
+ /* the virtual interrupt redirection bitmap is located below the io bitmap */
+ intredir_offset = io_offset - 0x20;
+
+ intredir_offset += (intno >> 3);
+ if ((intredir_offset) > env->tr.limit)
+ goto fail;
+
+ val = ldub_kernel(env->tr.base + intredir_offset);
+ mask = 1 << (unsigned char)(intno & 7);
+
+ /* bit set means no redirection. */
+ if ((val & mask) != 0) {
+ return false;
+ }
+ return true;
+
+fail:
+ raise_exception_err(EXCP0D_GPF, 0);
+ return true;
+}
+
+/* V86 mode software interrupt with CR4.VME=1 */
+static void do_soft_interrupt_vme(int intno, int error_code, unsigned int next_eip)
+{
+ target_ulong ptr, ssp;
+ int selector;
+ uint32_t offset, esp;
+ uint32_t old_cs, old_eflags;
+ uint32_t iopl;
+
+ iopl = ((env->eflags >> IOPL_SHIFT) & 3);
+
+ if (!is_vme_irq_redirected(intno))
+ {
+ if (iopl == 3)
+ {
+ do_interrupt_protected(intno, 1, error_code, next_eip, 0);
+ return;
+ }
+ else
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+
+ /* virtual mode idt is at linear address 0 */
+ ptr = 0 + intno * 4;
+ offset = lduw_kernel(ptr);
+ selector = lduw_kernel(ptr + 2);
+ esp = ESP;
+ ssp = env->segs[R_SS].base;
+ old_cs = env->segs[R_CS].selector;
+
+ old_eflags = compute_eflags();
+ if (iopl < 3)
+ {
+ /* copy VIF into IF and set IOPL to 3 */
+ if (env->eflags & VIF_MASK)
+ old_eflags |= IF_MASK;
+ else
+ old_eflags &= ~IF_MASK;
+
+ old_eflags |= (3 << IOPL_SHIFT);
+ }
+
+ /* XXX: use SS segment size ? */
+ PUSHW(ssp, esp, 0xffff, old_eflags);
+ PUSHW(ssp, esp, 0xffff, old_cs);
+ PUSHW(ssp, esp, 0xffff, next_eip);
+
+ /* update processor state */
+ ESP = (ESP & ~0xffff) | (esp & 0xffff);
+ env->eip = offset;
+ env->segs[R_CS].selector = selector;
+ env->segs[R_CS].base = (selector << 4);
+ env->eflags &= ~(TF_MASK | RF_MASK);
+
+ if (iopl < 3)
+ env->eflags &= ~VIF_MASK;
+ else
+ env->eflags &= ~IF_MASK;
+}
+
+#endif /* VBOX */
+
+#ifdef TARGET_X86_64
+
+#define PUSHQ(sp, val)\
+{\
+ sp -= 8;\
+ stq_kernel(sp, (val));\
+}
+
+#define POPQ(sp, val)\
+{\
+ val = ldq_kernel(sp);\
+ sp += 8;\
+}
+
+static inline target_ulong get_rsp_from_tss(int level)
+{
+ int index;
+
+#if 0
+ printf("TR: base=" TARGET_FMT_lx " limit=%x\n",
+ env->tr.base, env->tr.limit);
+#endif
+
+ if (!(env->tr.flags & DESC_P_MASK))
+ cpu_abort(env, "invalid tss");
+ index = 8 * level + 4;
+ if ((index + 7) > env->tr.limit)
+ raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
+ return ldq_kernel(env->tr.base + index);
+}
+
+/* 64 bit interrupt */
+static void do_interrupt64(int intno, int is_int, int error_code,
+ target_ulong next_eip, int is_hw)
+{
+ SegmentCache *dt;
+ target_ulong ptr;
+ int type, dpl, selector, cpl, ist;
+ int has_error_code, new_stack;
+ uint32_t e1, e2, e3, ss;
+ target_ulong old_eip, esp, offset;
+
+#ifdef VBOX
+ if (remR3NotifyTrap(env, intno, error_code, next_eip) != VINF_SUCCESS)
+ cpu_loop_exit();
+#endif
+
+ has_error_code = 0;
+ if (!is_int && !is_hw)
+ has_error_code = exeption_has_error_code(intno);
+ if (is_int)
+ old_eip = next_eip;
+ else
+ old_eip = env->eip;
+
+ dt = &env->idt;
+ if (intno * 16 + 15 > dt->limit)
+ raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
+ ptr = dt->base + intno * 16;
+ e1 = ldl_kernel(ptr);
+ e2 = ldl_kernel(ptr + 4);
+ e3 = ldl_kernel(ptr + 8);
+ /* check gate type */
+ type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+ switch(type) {
+ case 14: /* 386 interrupt gate */
+ case 15: /* 386 trap gate */
+ break;
+ default:
+ raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
+ break;
+ }
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ /* check privilege if software int */
+ if (is_int && dpl < cpl)
+ raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
+ /* check valid bit */
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, intno * 16 + 2);
+ selector = e1 >> 16;
+ offset = ((target_ulong)e3 << 32) | (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+ ist = e2 & 7;
+ if ((selector & 0xfffc) == 0)
+ raise_exception_err(EXCP0D_GPF, 0);
+
+ if (load_segment(&e1, &e2, selector) != 0)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ if (dpl > cpl)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+ if (!(e2 & DESC_L_MASK) || (e2 & DESC_B_MASK))
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if ((!(e2 & DESC_C_MASK) && dpl < cpl) || ist != 0) {
+ /* to inner privilege */
+ if (ist != 0)
+ esp = get_rsp_from_tss(ist + 3);
+ else
+ esp = get_rsp_from_tss(dpl);
+ esp &= ~0xfLL; /* align stack */
+ ss = 0;
+ new_stack = 1;
+ } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
+ /* to same privilege */
+ if (env->eflags & VM_MASK)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ new_stack = 0;
+ if (ist != 0)
+ esp = get_rsp_from_tss(ist + 3);
+ else
+ esp = ESP;
+ esp &= ~0xfLL; /* align stack */
+ dpl = cpl;
+ } else {
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ new_stack = 0; /* avoid warning */
+ esp = 0; /* avoid warning */
+ }
+
+ PUSHQ(esp, env->segs[R_SS].selector);
+ PUSHQ(esp, ESP);
+ PUSHQ(esp, compute_eflags());
+ PUSHQ(esp, env->segs[R_CS].selector);
+ PUSHQ(esp, old_eip);
+ if (has_error_code) {
+ PUSHQ(esp, error_code);
+ }
+
+ if (new_stack) {
+ ss = 0 | dpl;
+#ifndef VBOX
+ cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, 0);
+#else
+ cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, dpl << DESC_DPL_SHIFT);
+#endif
+ }
+ ESP = esp;
+
+ selector = (selector & ~3) | dpl;
+ cpu_x86_load_seg_cache(env, R_CS, selector,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+ cpu_x86_set_cpl(env, dpl);
+ env->eip = offset;
+
+ /* interrupt gate clear IF mask */
+ if ((type & 1) == 0) {
+ env->eflags &= ~IF_MASK;
+ }
+#ifndef VBOX
+ env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
+#else /* VBOX */
+ /*
+ * We must clear VIP/VIF too on interrupt entry, as otherwise FreeBSD
+ * gets confused by seemingly changed EFLAGS. See #3491 and
+ * public bug #2341.
+ */
+ env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK | VIF_MASK | VIP_MASK);
+#endif /* VBOX */
+}
+#endif
+
+#ifdef TARGET_X86_64
+#if defined(CONFIG_USER_ONLY)
+void helper_syscall(int next_eip_addend)
+{
+ env->exception_index = EXCP_SYSCALL;
+ env->exception_next_eip = env->eip + next_eip_addend;
+ cpu_loop_exit();
+}
+#else
+void helper_syscall(int next_eip_addend)
+{
+ int selector;
+
+ if (!(env->efer & MSR_EFER_SCE)) {
+ raise_exception_err(EXCP06_ILLOP, 0);
+ }
+ selector = (env->star >> 32) & 0xffff;
+ if (env->hflags & HF_LMA_MASK) {
+ int code64;
+
+ ECX = env->eip + next_eip_addend;
+ env->regs[11] = compute_eflags();
+
+ code64 = env->hflags & HF_CS64_MASK;
+
+ cpu_x86_set_cpl(env, 0);
+ cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
+ cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_W_MASK | DESC_A_MASK);
+ env->eflags &= ~env->fmask;
+ load_eflags(env->eflags, 0);
+ if (code64)
+ env->eip = env->lstar;
+ else
+ env->eip = env->cstar;
+ } else {
+ ECX = (uint32_t)(env->eip + next_eip_addend);
+
+ cpu_x86_set_cpl(env, 0);
+ cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+ cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_W_MASK | DESC_A_MASK);
+ env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
+ env->eip = (uint32_t)env->star;
+ }
+}
+#endif
+#endif
+
+#ifdef TARGET_X86_64
+void helper_sysret(int dflag)
+{
+ int cpl, selector;
+
+ if (!(env->efer & MSR_EFER_SCE)) {
+ raise_exception_err(EXCP06_ILLOP, 0);
+ }
+ cpl = env->hflags & HF_CPL_MASK;
+ if (!(env->cr[0] & CR0_PE_MASK) || cpl != 0) {
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+ selector = (env->star >> 48) & 0xffff;
+ if (env->hflags & HF_LMA_MASK) {
+ if (dflag == 2) {
+ cpu_x86_load_seg_cache(env, R_CS, (selector + 16) | 3,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK |
+ DESC_L_MASK);
+ env->eip = ECX;
+ } else {
+ cpu_x86_load_seg_cache(env, R_CS, selector | 3,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+ env->eip = (uint32_t)ECX;
+ }
+ cpu_x86_load_seg_cache(env, R_SS, selector + 8,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_W_MASK | DESC_A_MASK);
+ load_eflags((uint32_t)(env->regs[11]), TF_MASK | AC_MASK | ID_MASK |
+ IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK);
+ cpu_x86_set_cpl(env, 3);
+ } else {
+ cpu_x86_load_seg_cache(env, R_CS, selector | 3,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+ env->eip = (uint32_t)ECX;
+ cpu_x86_load_seg_cache(env, R_SS, selector + 8,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_W_MASK | DESC_A_MASK);
+ env->eflags |= IF_MASK;
+ cpu_x86_set_cpl(env, 3);
+ }
+}
+#endif
+
+#ifdef VBOX
+
+/**
+ * Checks and processes external VMM events.
+ * Called by op_check_external_event() when any of the flags is set and can be serviced.
+ */
+void helper_external_event(void)
+{
+# if defined(RT_OS_DARWIN) && defined(VBOX_STRICT)
+ uintptr_t uSP;
+# ifdef RT_ARCH_AMD64
+ __asm__ __volatile__("movq %%rsp, %0" : "=r" (uSP));
+# else
+ __asm__ __volatile__("movl %%esp, %0" : "=r" (uSP));
+# endif
+ AssertMsg(!(uSP & 15), ("xSP=%#p\n", uSP));
+# endif
+ /* Keep in sync with flags checked by gen_check_external_event() */
+ if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD)
+ {
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request,
+ ~CPU_INTERRUPT_EXTERNAL_HARD);
+ cpu_interrupt(env, CPU_INTERRUPT_HARD);
+ }
+ if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_EXIT)
+ {
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request,
+ ~CPU_INTERRUPT_EXTERNAL_EXIT);
+ cpu_exit(env);
+ }
+ if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_DMA)
+ {
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request,
+ ~CPU_INTERRUPT_EXTERNAL_DMA);
+ remR3DmaRun(env);
+ }
+ if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_TIMER)
+ {
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request,
+ ~CPU_INTERRUPT_EXTERNAL_TIMER);
+ remR3TimersRun(env);
+ }
+ if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_FLUSH_TLB)
+ {
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request,
+ ~CPU_INTERRUPT_EXTERNAL_HARD);
+ cpu_interrupt(env, CPU_INTERRUPT_HARD);
+ }
+}
+
+/* helper for recording call instruction addresses for later scanning */
+void helper_record_call()
+{
+ if ( !(env->state & CPU_RAW_RING0)
+ && (env->cr[0] & CR0_PG_MASK)
+ && !(env->eflags & X86_EFL_IF))
+ remR3RecordCall(env);
+}
+
+#endif /* VBOX */
+
+/* real mode interrupt */
+static void do_interrupt_real(int intno, int is_int, int error_code,
+ unsigned int next_eip)
+{
+ SegmentCache *dt;
+ target_ulong ptr, ssp;
+ int selector;
+ uint32_t offset, esp;
+ uint32_t old_cs, old_eip;
+
+ /* real mode (simpler !) */
+ dt = &env->idt;
+#ifndef VBOX
+ if (intno * 4 + 3 > dt->limit)
+#else
+ if ((unsigned)intno * 4 + 3 > dt->limit)
+#endif
+ raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+ ptr = dt->base + intno * 4;
+ offset = lduw_kernel(ptr);
+ selector = lduw_kernel(ptr + 2);
+ esp = ESP;
+ ssp = env->segs[R_SS].base;
+ if (is_int)
+ old_eip = next_eip;
+ else
+ old_eip = env->eip;
+ old_cs = env->segs[R_CS].selector;
+ /* XXX: use SS segment size ? */
+ PUSHW(ssp, esp, 0xffff, compute_eflags());
+ PUSHW(ssp, esp, 0xffff, old_cs);
+ PUSHW(ssp, esp, 0xffff, old_eip);
+
+ /* update processor state */
+ ESP = (ESP & ~0xffff) | (esp & 0xffff);
+ env->eip = offset;
+ env->segs[R_CS].selector = selector;
+ env->segs[R_CS].base = (selector << 4);
+ env->eflags &= ~(IF_MASK | TF_MASK | AC_MASK | RF_MASK);
+}
+
+/* fake user mode interrupt */
+void do_interrupt_user(int intno, int is_int, int error_code,
+ target_ulong next_eip)
+{
+ SegmentCache *dt;
+ target_ulong ptr;
+ int dpl, cpl, shift;
+ uint32_t e2;
+
+ dt = &env->idt;
+ if (env->hflags & HF_LMA_MASK) {
+ shift = 4;
+ } else {
+ shift = 3;
+ }
+ ptr = dt->base + (intno << shift);
+ e2 = ldl_kernel(ptr + 4);
+
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ /* check privilege if software int */
+ if (is_int && dpl < cpl)
+ raise_exception_err(EXCP0D_GPF, (intno << shift) + 2);
+
+ /* Since we emulate only user space, we cannot do more than
+ exiting the emulation with the suitable exception and error
+ code */
+ if (is_int)
+ EIP = next_eip;
+}
+
+#if !defined(CONFIG_USER_ONLY)
+static void handle_even_inj(int intno, int is_int, int error_code,
+ int is_hw, int rm)
+{
+ uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+ if (!(event_inj & SVM_EVTINJ_VALID)) {
+ int type;
+ if (is_int)
+ type = SVM_EVTINJ_TYPE_SOFT;
+ else
+ type = SVM_EVTINJ_TYPE_EXEPT;
+ event_inj = intno | type | SVM_EVTINJ_VALID;
+ if (!rm && exeption_has_error_code(intno)) {
+ event_inj |= SVM_EVTINJ_VALID_ERR;
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err), error_code);
+ }
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj);
+ }
+}
+#endif
+
+/*
+ * Begin execution of an interruption. is_int is TRUE if coming from
+ * the int instruction. next_eip is the EIP value AFTER the interrupt
+ * instruction. It is only relevant if is_int is TRUE.
+ */
+void do_interrupt(int intno, int is_int, int error_code,
+ target_ulong next_eip, int is_hw)
+{
+ if (qemu_loglevel_mask(CPU_LOG_INT)) {
+ if ((env->cr[0] & CR0_PE_MASK)) {
+ static int count;
+ qemu_log("%6d: v=%02x e=%04x i=%d cpl=%d IP=%04x:" TARGET_FMT_lx " pc=" TARGET_FMT_lx " SP=%04x:" TARGET_FMT_lx,
+ count, intno, error_code, is_int,
+ env->hflags & HF_CPL_MASK,
+ env->segs[R_CS].selector, EIP,
+ (int)env->segs[R_CS].base + EIP,
+ env->segs[R_SS].selector, ESP);
+ if (intno == 0x0e) {
+ qemu_log(" CR2=" TARGET_FMT_lx, env->cr[2]);
+ } else {
+ qemu_log(" EAX=" TARGET_FMT_lx, EAX);
+ }
+ qemu_log("\n");
+ log_cpu_state(env, X86_DUMP_CCOP);
+#if 0
+ {
+ int i;
+ uint8_t *ptr;
+ qemu_log(" code=");
+ ptr = env->segs[R_CS].base + env->eip;
+ for(i = 0; i < 16; i++) {
+ qemu_log(" %02x", ldub(ptr + i));
+ }
+ qemu_log("\n");
+ }
+#endif
+ count++;
+ }
+ }
+#ifdef VBOX
+ if (RT_UNLIKELY(env->state & CPU_EMULATE_SINGLE_STEP)) {
+ if (is_int) {
+ RTLogPrintf("do_interrupt: %#04x err=%#x pc=%#RGv%s\n",
+ intno, error_code, (RTGCPTR)env->eip, is_hw ? " hw" : "");
+ } else {
+ RTLogPrintf("do_interrupt: %#04x err=%#x pc=%#RGv next=%#RGv%s\n",
+ intno, error_code, (RTGCPTR)env->eip, (RTGCPTR)next_eip, is_hw ? " hw" : "");
+ }
+ }
+#endif
+ if (env->cr[0] & CR0_PE_MASK) {
+#if !defined(CONFIG_USER_ONLY)
+ if (env->hflags & HF_SVMI_MASK)
+ handle_even_inj(intno, is_int, error_code, is_hw, 0);
+#endif
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ do_interrupt64(intno, is_int, error_code, next_eip, is_hw);
+ } else
+#endif
+ {
+#ifdef VBOX
+ /* int xx *, v86 code and VME enabled? */
+ if ( (env->eflags & VM_MASK)
+ && (env->cr[4] & CR4_VME_MASK)
+ && is_int
+ && !is_hw
+ && env->eip + 1 != next_eip /* single byte int 3 goes straight to the protected mode handler */
+ )
+ do_soft_interrupt_vme(intno, error_code, next_eip);
+ else
+#endif /* VBOX */
+ do_interrupt_protected(intno, is_int, error_code, next_eip, is_hw);
+ }
+ } else {
+#if !defined(CONFIG_USER_ONLY)
+ if (env->hflags & HF_SVMI_MASK)
+ handle_even_inj(intno, is_int, error_code, is_hw, 1);
+#endif
+ do_interrupt_real(intno, is_int, error_code, next_eip);
+ }
+
+#if !defined(CONFIG_USER_ONLY)
+ if (env->hflags & HF_SVMI_MASK) {
+ uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
+ }
+#endif
+}
+
+/* This should come from sysemu.h - if we could include it here... */
+void qemu_system_reset_request(void);
+
+/*
+ * Check nested exceptions and change to double or triple fault if
+ * needed. It should only be called, if this is not an interrupt.
+ * Returns the new exception number.
+ */
+static int check_exception(int intno, int *error_code)
+{
+ int first_contributory = env->old_exception == 0 ||
+ (env->old_exception >= 10 &&
+ env->old_exception <= 13);
+ int second_contributory = intno == 0 ||
+ (intno >= 10 && intno <= 13);
+
+ qemu_log_mask(CPU_LOG_INT, "check_exception old: 0x%x new 0x%x\n",
+ env->old_exception, intno);
+
+#if !defined(CONFIG_USER_ONLY)
+ if (env->old_exception == EXCP08_DBLE) {
+ if (env->hflags & HF_SVMI_MASK)
+ helper_vmexit(SVM_EXIT_SHUTDOWN, 0); /* does not return */
+
+ qemu_log_mask(CPU_LOG_RESET, "Triple fault\n");
+
+# ifndef VBOX
+ qemu_system_reset_request();
+ return EXCP_HLT;
+# else
+ remR3RaiseRC(env->pVM, VINF_EM_TRIPLE_FAULT);
+ return EXCP_RC;
+# endif
+ }
+#endif
+
+ if ((first_contributory && second_contributory)
+ || (env->old_exception == EXCP0E_PAGE &&
+ (second_contributory || (intno == EXCP0E_PAGE)))) {
+ intno = EXCP08_DBLE;
+ *error_code = 0;
+ }
+
+ if (second_contributory || (intno == EXCP0E_PAGE) ||
+ (intno == EXCP08_DBLE))
+ env->old_exception = intno;
+
+ return intno;
+}
+
+/*
+ * Signal an interruption. It is executed in the main CPU loop.
+ * is_int is TRUE if coming from the int instruction. next_eip is the
+ * EIP value AFTER the interrupt instruction. It is only relevant if
+ * is_int is TRUE.
+ */
+static void QEMU_NORETURN raise_interrupt(int intno, int is_int, int error_code,
+ int next_eip_addend)
+{
+#if defined(VBOX) && defined(DEBUG)
+ Log2(("raise_interrupt: %x %x %x %RGv\n", intno, is_int, error_code, (RTGCPTR)env->eip + next_eip_addend));
+#endif
+ if (!is_int) {
+ helper_svm_check_intercept_param(SVM_EXIT_EXCP_BASE + intno, error_code);
+ intno = check_exception(intno, &error_code);
+ } else {
+ helper_svm_check_intercept_param(SVM_EXIT_SWINT, 0);
+ }
+
+ env->exception_index = intno;
+ env->error_code = error_code;
+ env->exception_is_int = is_int;
+ env->exception_next_eip = env->eip + next_eip_addend;
+ cpu_loop_exit();
+}
+
+/* shortcuts to generate exceptions */
+
+void raise_exception_err(int exception_index, int error_code)
+{
+ raise_interrupt(exception_index, 0, error_code, 0);
+}
+
+void raise_exception(int exception_index)
+{
+ raise_interrupt(exception_index, 0, 0, 0);
+}
+
+void raise_exception_env(int exception_index, CPUState *nenv)
+{
+ env = nenv;
+ raise_exception(exception_index);
+}
+/* SMM support */
+
+#if defined(CONFIG_USER_ONLY)
+
+void do_smm_enter(void)
+{
+}
+
+void helper_rsm(void)
+{
+}
+
+#else
+
+#ifdef TARGET_X86_64
+#define SMM_REVISION_ID 0x00020064
+#else
+#define SMM_REVISION_ID 0x00020000
+#endif
+
+void do_smm_enter(void)
+{
+ target_ulong sm_state;
+ SegmentCache *dt;
+ int i, offset;
+
+ qemu_log_mask(CPU_LOG_INT, "SMM: enter\n");
+ log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP);
+
+ env->hflags |= HF_SMM_MASK;
+ cpu_smm_update(env);
+
+ sm_state = env->smbase + 0x8000;
+
+#ifdef TARGET_X86_64
+ for(i = 0; i < 6; i++) {
+ dt = &env->segs[i];
+ offset = 0x7e00 + i * 16;
+ stw_phys(sm_state + offset, dt->selector);
+ stw_phys(sm_state + offset + 2, (dt->flags >> 8) & 0xf0ff);
+ stl_phys(sm_state + offset + 4, dt->limit);
+ stq_phys(sm_state + offset + 8, dt->base);
+ }
+
+ stq_phys(sm_state + 0x7e68, env->gdt.base);
+ stl_phys(sm_state + 0x7e64, env->gdt.limit);
+
+ stw_phys(sm_state + 0x7e70, env->ldt.selector);
+ stq_phys(sm_state + 0x7e78, env->ldt.base);
+ stl_phys(sm_state + 0x7e74, env->ldt.limit);
+ stw_phys(sm_state + 0x7e72, (env->ldt.flags >> 8) & 0xf0ff);
+
+ stq_phys(sm_state + 0x7e88, env->idt.base);
+ stl_phys(sm_state + 0x7e84, env->idt.limit);
+
+ stw_phys(sm_state + 0x7e90, env->tr.selector);
+ stq_phys(sm_state + 0x7e98, env->tr.base);
+ stl_phys(sm_state + 0x7e94, env->tr.limit);
+ stw_phys(sm_state + 0x7e92, (env->tr.flags >> 8) & 0xf0ff);
+
+ stq_phys(sm_state + 0x7ed0, env->efer);
+
+ stq_phys(sm_state + 0x7ff8, EAX);
+ stq_phys(sm_state + 0x7ff0, ECX);
+ stq_phys(sm_state + 0x7fe8, EDX);
+ stq_phys(sm_state + 0x7fe0, EBX);
+ stq_phys(sm_state + 0x7fd8, ESP);
+ stq_phys(sm_state + 0x7fd0, EBP);
+ stq_phys(sm_state + 0x7fc8, ESI);
+ stq_phys(sm_state + 0x7fc0, EDI);
+ for(i = 8; i < 16; i++)
+ stq_phys(sm_state + 0x7ff8 - i * 8, env->regs[i]);
+ stq_phys(sm_state + 0x7f78, env->eip);
+ stl_phys(sm_state + 0x7f70, compute_eflags());
+ stl_phys(sm_state + 0x7f68, env->dr[6]);
+ stl_phys(sm_state + 0x7f60, env->dr[7]);
+
+ stl_phys(sm_state + 0x7f48, env->cr[4]);
+ stl_phys(sm_state + 0x7f50, env->cr[3]);
+ stl_phys(sm_state + 0x7f58, env->cr[0]);
+
+ stl_phys(sm_state + 0x7efc, SMM_REVISION_ID);
+ stl_phys(sm_state + 0x7f00, env->smbase);
+#else
+ stl_phys(sm_state + 0x7ffc, env->cr[0]);
+ stl_phys(sm_state + 0x7ff8, env->cr[3]);
+ stl_phys(sm_state + 0x7ff4, compute_eflags());
+ stl_phys(sm_state + 0x7ff0, env->eip);
+ stl_phys(sm_state + 0x7fec, EDI);
+ stl_phys(sm_state + 0x7fe8, ESI);
+ stl_phys(sm_state + 0x7fe4, EBP);
+ stl_phys(sm_state + 0x7fe0, ESP);
+ stl_phys(sm_state + 0x7fdc, EBX);
+ stl_phys(sm_state + 0x7fd8, EDX);
+ stl_phys(sm_state + 0x7fd4, ECX);
+ stl_phys(sm_state + 0x7fd0, EAX);
+ stl_phys(sm_state + 0x7fcc, env->dr[6]);
+ stl_phys(sm_state + 0x7fc8, env->dr[7]);
+
+ stl_phys(sm_state + 0x7fc4, env->tr.selector);
+ stl_phys(sm_state + 0x7f64, env->tr.base);
+ stl_phys(sm_state + 0x7f60, env->tr.limit);
+ stl_phys(sm_state + 0x7f5c, (env->tr.flags >> 8) & 0xf0ff);
+
+ stl_phys(sm_state + 0x7fc0, env->ldt.selector);
+ stl_phys(sm_state + 0x7f80, env->ldt.base);
+ stl_phys(sm_state + 0x7f7c, env->ldt.limit);
+ stl_phys(sm_state + 0x7f78, (env->ldt.flags >> 8) & 0xf0ff);
+
+ stl_phys(sm_state + 0x7f74, env->gdt.base);
+ stl_phys(sm_state + 0x7f70, env->gdt.limit);
+
+ stl_phys(sm_state + 0x7f58, env->idt.base);
+ stl_phys(sm_state + 0x7f54, env->idt.limit);
+
+ for(i = 0; i < 6; i++) {
+ dt = &env->segs[i];
+ if (i < 3)
+ offset = 0x7f84 + i * 12;
+ else
+ offset = 0x7f2c + (i - 3) * 12;
+ stl_phys(sm_state + 0x7fa8 + i * 4, dt->selector);
+ stl_phys(sm_state + offset + 8, dt->base);
+ stl_phys(sm_state + offset + 4, dt->limit);
+ stl_phys(sm_state + offset, (dt->flags >> 8) & 0xf0ff);
+ }
+ stl_phys(sm_state + 0x7f14, env->cr[4]);
+
+ stl_phys(sm_state + 0x7efc, SMM_REVISION_ID);
+ stl_phys(sm_state + 0x7ef8, env->smbase);
+#endif
+ /* init SMM cpu state */
+
+#ifdef TARGET_X86_64
+ cpu_load_efer(env, 0);
+#endif
+ load_eflags(0, ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+ env->eip = 0x00008000;
+ cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase,
+ 0xffffffff, 0);
+ cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0);
+ cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, 0);
+ cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0);
+ cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0);
+ cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0);
+
+ cpu_x86_update_cr0(env,
+ env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK | CR0_PG_MASK));
+ cpu_x86_update_cr4(env, 0);
+ env->dr[7] = 0x00000400;
+ CC_OP = CC_OP_EFLAGS;
+}
+
+void helper_rsm(void)
+{
+#ifdef VBOX
+ cpu_abort(env, "helper_rsm");
+#else /* !VBOX */
+ target_ulong sm_state;
+ int i, offset;
+ uint32_t val;
+
+ sm_state = env->smbase + 0x8000;
+#ifdef TARGET_X86_64
+ cpu_load_efer(env, ldq_phys(sm_state + 0x7ed0));
+
+ for(i = 0; i < 6; i++) {
+ offset = 0x7e00 + i * 16;
+ cpu_x86_load_seg_cache(env, i,
+ lduw_phys(sm_state + offset),
+ ldq_phys(sm_state + offset + 8),
+ ldl_phys(sm_state + offset + 4),
+ (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8);
+ }
+
+ env->gdt.base = ldq_phys(sm_state + 0x7e68);
+ env->gdt.limit = ldl_phys(sm_state + 0x7e64);
+
+ env->ldt.selector = lduw_phys(sm_state + 0x7e70);
+ env->ldt.base = ldq_phys(sm_state + 0x7e78);
+ env->ldt.limit = ldl_phys(sm_state + 0x7e74);
+ env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8;
+#ifdef VBOX
+ env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+ env->ldt.newselector = 0;
+#endif
+
+ env->idt.base = ldq_phys(sm_state + 0x7e88);
+ env->idt.limit = ldl_phys(sm_state + 0x7e84);
+
+ env->tr.selector = lduw_phys(sm_state + 0x7e90);
+ env->tr.base = ldq_phys(sm_state + 0x7e98);
+ env->tr.limit = ldl_phys(sm_state + 0x7e94);
+ env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8;
+#ifdef VBOX
+ env->tr.fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+ env->tr.newselector = 0;
+#endif
+
+ EAX = ldq_phys(sm_state + 0x7ff8);
+ ECX = ldq_phys(sm_state + 0x7ff0);
+ EDX = ldq_phys(sm_state + 0x7fe8);
+ EBX = ldq_phys(sm_state + 0x7fe0);
+ ESP = ldq_phys(sm_state + 0x7fd8);
+ EBP = ldq_phys(sm_state + 0x7fd0);
+ ESI = ldq_phys(sm_state + 0x7fc8);
+ EDI = ldq_phys(sm_state + 0x7fc0);
+ for(i = 8; i < 16; i++)
+ env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8);
+ env->eip = ldq_phys(sm_state + 0x7f78);
+ load_eflags(ldl_phys(sm_state + 0x7f70),
+ ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+ env->dr[6] = ldl_phys(sm_state + 0x7f68);
+ env->dr[7] = ldl_phys(sm_state + 0x7f60);
+
+ cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48));
+ cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50));
+ cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58));
+
+ val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+ if (val & 0x20000) {
+ env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff;
+ }
+#else
+ cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc));
+ cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8));
+ load_eflags(ldl_phys(sm_state + 0x7ff4),
+ ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+ env->eip = ldl_phys(sm_state + 0x7ff0);
+ EDI = ldl_phys(sm_state + 0x7fec);
+ ESI = ldl_phys(sm_state + 0x7fe8);
+ EBP = ldl_phys(sm_state + 0x7fe4);
+ ESP = ldl_phys(sm_state + 0x7fe0);
+ EBX = ldl_phys(sm_state + 0x7fdc);
+ EDX = ldl_phys(sm_state + 0x7fd8);
+ ECX = ldl_phys(sm_state + 0x7fd4);
+ EAX = ldl_phys(sm_state + 0x7fd0);
+ env->dr[6] = ldl_phys(sm_state + 0x7fcc);
+ env->dr[7] = ldl_phys(sm_state + 0x7fc8);
+
+ env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff;
+ env->tr.base = ldl_phys(sm_state + 0x7f64);
+ env->tr.limit = ldl_phys(sm_state + 0x7f60);
+ env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
+#ifdef VBOX
+ env->tr.fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+ env->tr.newselector = 0;
+#endif
+
+ env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff;
+ env->ldt.base = ldl_phys(sm_state + 0x7f80);
+ env->ldt.limit = ldl_phys(sm_state + 0x7f7c);
+ env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
+#ifdef VBOX
+ env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+ env->ldt.newselector = 0;
+#endif
+
+ env->gdt.base = ldl_phys(sm_state + 0x7f74);
+ env->gdt.limit = ldl_phys(sm_state + 0x7f70);
+
+ env->idt.base = ldl_phys(sm_state + 0x7f58);
+ env->idt.limit = ldl_phys(sm_state + 0x7f54);
+
+ for(i = 0; i < 6; i++) {
+ if (i < 3)
+ offset = 0x7f84 + i * 12;
+ else
+ offset = 0x7f2c + (i - 3) * 12;
+ cpu_x86_load_seg_cache(env, i,
+ ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
+ ldl_phys(sm_state + offset + 8),
+ ldl_phys(sm_state + offset + 4),
+ (ldl_phys(sm_state + offset) & 0xf0ff) << 8);
+ }
+ cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14));
+
+ val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+ if (val & 0x20000) {
+ env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff;
+ }
+#endif
+ CC_OP = CC_OP_EFLAGS;
+ env->hflags &= ~HF_SMM_MASK;
+ cpu_smm_update(env);
+
+ qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n");
+ log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP);
+#endif /* !VBOX */
+}
+
+#endif /* !CONFIG_USER_ONLY */
+
+
+/* division, flags are undefined */
+
+void helper_divb_AL(target_ulong t0)
+{
+ unsigned int num, den, q, r;
+
+ num = (EAX & 0xffff);
+ den = (t0 & 0xff);
+ if (den == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ q = (num / den);
+ if (q > 0xff)
+ raise_exception(EXCP00_DIVZ);
+ q &= 0xff;
+ r = (num % den) & 0xff;
+ EAX = (EAX & ~0xffff) | (r << 8) | q;
+}
+
+void helper_idivb_AL(target_ulong t0)
+{
+ int num, den, q, r;
+
+ num = (int16_t)EAX;
+ den = (int8_t)t0;
+ if (den == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ q = (num / den);
+ if (q != (int8_t)q)
+ raise_exception(EXCP00_DIVZ);
+ q &= 0xff;
+ r = (num % den) & 0xff;
+ EAX = (EAX & ~0xffff) | (r << 8) | q;
+}
+
+void helper_divw_AX(target_ulong t0)
+{
+ unsigned int num, den, q, r;
+
+ num = (EAX & 0xffff) | ((EDX & 0xffff) << 16);
+ den = (t0 & 0xffff);
+ if (den == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ q = (num / den);
+ if (q > 0xffff)
+ raise_exception(EXCP00_DIVZ);
+ q &= 0xffff;
+ r = (num % den) & 0xffff;
+ EAX = (EAX & ~0xffff) | q;
+ EDX = (EDX & ~0xffff) | r;
+}
+
+void helper_idivw_AX(target_ulong t0)
+{
+ int num, den, q, r;
+
+ num = (EAX & 0xffff) | ((EDX & 0xffff) << 16);
+ den = (int16_t)t0;
+ if (den == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ q = (num / den);
+ if (q != (int16_t)q)
+ raise_exception(EXCP00_DIVZ);
+ q &= 0xffff;
+ r = (num % den) & 0xffff;
+ EAX = (EAX & ~0xffff) | q;
+ EDX = (EDX & ~0xffff) | r;
+}
+
+void helper_divl_EAX(target_ulong t0)
+{
+ unsigned int den, r;
+ uint64_t num, q;
+
+ num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+ den = t0;
+ if (den == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ q = (num / den);
+ r = (num % den);
+ if (q > 0xffffffff)
+ raise_exception(EXCP00_DIVZ);
+ EAX = (uint32_t)q;
+ EDX = (uint32_t)r;
+}
+
+void helper_idivl_EAX(target_ulong t0)
+{
+ int den, r;
+ int64_t num, q;
+
+ num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+ den = t0;
+ if (den == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ q = (num / den);
+ r = (num % den);
+ if (q != (int32_t)q)
+ raise_exception(EXCP00_DIVZ);
+ EAX = (uint32_t)q;
+ EDX = (uint32_t)r;
+}
+
+/* bcd */
+
+/* XXX: exception */
+void helper_aam(int base)
+{
+ int al, ah;
+ al = EAX & 0xff;
+ ah = al / base;
+ al = al % base;
+ EAX = (EAX & ~0xffff) | al | (ah << 8);
+ CC_DST = al;
+}
+
+void helper_aad(int base)
+{
+ int al, ah;
+ al = EAX & 0xff;
+ ah = (EAX >> 8) & 0xff;
+ al = ((ah * base) + al) & 0xff;
+ EAX = (EAX & ~0xffff) | al;
+ CC_DST = al;
+}
+
+void helper_aaa(void)
+{
+ int icarry;
+ int al, ah, af;
+ int eflags;
+
+ eflags = helper_cc_compute_all(CC_OP);
+ af = eflags & CC_A;
+ al = EAX & 0xff;
+ ah = (EAX >> 8) & 0xff;
+
+ icarry = (al > 0xf9);
+ if (((al & 0x0f) > 9 ) || af) {
+ al = (al + 6) & 0x0f;
+ ah = (ah + 1 + icarry) & 0xff;
+ eflags |= CC_C | CC_A;
+ } else {
+ eflags &= ~(CC_C | CC_A);
+ al &= 0x0f;
+ }
+ EAX = (EAX & ~0xffff) | al | (ah << 8);
+ CC_SRC = eflags;
+}
+
+void helper_aas(void)
+{
+ int icarry;
+ int al, ah, af;
+ int eflags;
+
+ eflags = helper_cc_compute_all(CC_OP);
+ af = eflags & CC_A;
+ al = EAX & 0xff;
+ ah = (EAX >> 8) & 0xff;
+
+ icarry = (al < 6);
+ if (((al & 0x0f) > 9 ) || af) {
+ al = (al - 6) & 0x0f;
+ ah = (ah - 1 - icarry) & 0xff;
+ eflags |= CC_C | CC_A;
+ } else {
+ eflags &= ~(CC_C | CC_A);
+ al &= 0x0f;
+ }
+ EAX = (EAX & ~0xffff) | al | (ah << 8);
+ CC_SRC = eflags;
+}
+
+void helper_daa(void)
+{
+ int al, af, cf;
+ int eflags;
+
+ eflags = helper_cc_compute_all(CC_OP);
+ cf = eflags & CC_C;
+ af = eflags & CC_A;
+ al = EAX & 0xff;
+
+ eflags = 0;
+ if (((al & 0x0f) > 9 ) || af) {
+ al = (al + 6) & 0xff;
+ eflags |= CC_A;
+ }
+ if ((al > 0x9f) || cf) {
+ al = (al + 0x60) & 0xff;
+ eflags |= CC_C;
+ }
+ EAX = (EAX & ~0xff) | al;
+ /* well, speed is not an issue here, so we compute the flags by hand */
+ eflags |= (al == 0) << 6; /* zf */
+ eflags |= parity_table[al]; /* pf */
+ eflags |= (al & 0x80); /* sf */
+ CC_SRC = eflags;
+}
+
+void helper_das(void)
+{
+ int al, al1, af, cf;
+ int eflags;
+
+ eflags = helper_cc_compute_all(CC_OP);
+ cf = eflags & CC_C;
+ af = eflags & CC_A;
+ al = EAX & 0xff;
+
+ eflags = 0;
+ al1 = al;
+ if (((al & 0x0f) > 9 ) || af) {
+ eflags |= CC_A;
+ if (al < 6 || cf)
+ eflags |= CC_C;
+ al = (al - 6) & 0xff;
+ }
+ if ((al1 > 0x99) || cf) {
+ al = (al - 0x60) & 0xff;
+ eflags |= CC_C;
+ }
+ EAX = (EAX & ~0xff) | al;
+ /* well, speed is not an issue here, so we compute the flags by hand */
+ eflags |= (al == 0) << 6; /* zf */
+ eflags |= parity_table[al]; /* pf */
+ eflags |= (al & 0x80); /* sf */
+ CC_SRC = eflags;
+}
+
+void helper_into(int next_eip_addend)
+{
+ int eflags;
+ eflags = helper_cc_compute_all(CC_OP);
+ if (eflags & CC_O) {
+ raise_interrupt(EXCP04_INTO, 1, 0, next_eip_addend);
+ }
+}
+
+void helper_cmpxchg8b(target_ulong a0)
+{
+ uint64_t d;
+ int eflags;
+
+ eflags = helper_cc_compute_all(CC_OP);
+ d = ldq(a0);
+ if (d == (((uint64_t)EDX << 32) | (uint32_t)EAX)) {
+ stq(a0, ((uint64_t)ECX << 32) | (uint32_t)EBX);
+ eflags |= CC_Z;
+ } else {
+ /* always do the store */
+ stq(a0, d);
+ EDX = (uint32_t)(d >> 32);
+ EAX = (uint32_t)d;
+ eflags &= ~CC_Z;
+ }
+ CC_SRC = eflags;
+}
+
+#ifdef TARGET_X86_64
+void helper_cmpxchg16b(target_ulong a0)
+{
+ uint64_t d0, d1;
+ int eflags;
+
+ if ((a0 & 0xf) != 0)
+ raise_exception(EXCP0D_GPF);
+ eflags = helper_cc_compute_all(CC_OP);
+ d0 = ldq(a0);
+ d1 = ldq(a0 + 8);
+ if (d0 == EAX && d1 == EDX) {
+ stq(a0, EBX);
+ stq(a0 + 8, ECX);
+ eflags |= CC_Z;
+ } else {
+ /* always do the store */
+ stq(a0, d0);
+ stq(a0 + 8, d1);
+ EDX = d1;
+ EAX = d0;
+ eflags &= ~CC_Z;
+ }
+ CC_SRC = eflags;
+}
+#endif
+
+void helper_single_step(void)
+{
+#ifndef CONFIG_USER_ONLY
+ check_hw_breakpoints(env, 1);
+ env->dr[6] |= DR6_BS;
+#endif
+ raise_exception(EXCP01_DB);
+}
+
+void helper_cpuid(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ helper_svm_check_intercept_param(SVM_EXIT_CPUID, 0);
+
+ cpu_x86_cpuid(env, (uint32_t)EAX, (uint32_t)ECX, &eax, &ebx, &ecx, &edx);
+ EAX = eax;
+ EBX = ebx;
+ ECX = ecx;
+ EDX = edx;
+}
+
+void helper_enter_level(int level, int data32, target_ulong t1)
+{
+ target_ulong ssp;
+ uint32_t esp_mask, esp, ebp;
+
+ esp_mask = get_sp_mask(env->segs[R_SS].flags);
+ ssp = env->segs[R_SS].base;
+ ebp = EBP;
+ esp = ESP;
+ if (data32) {
+ /* 32 bit */
+ esp -= 4;
+ while (--level) {
+ esp -= 4;
+ ebp -= 4;
+ stl(ssp + (esp & esp_mask), ldl(ssp + (ebp & esp_mask)));
+ }
+ esp -= 4;
+ stl(ssp + (esp & esp_mask), t1);
+ } else {
+ /* 16 bit */
+ esp -= 2;
+ while (--level) {
+ esp -= 2;
+ ebp -= 2;
+ stw(ssp + (esp & esp_mask), lduw(ssp + (ebp & esp_mask)));
+ }
+ esp -= 2;
+ stw(ssp + (esp & esp_mask), t1);
+ }
+}
+
+#ifdef TARGET_X86_64
+void helper_enter64_level(int level, int data64, target_ulong t1)
+{
+ target_ulong esp, ebp;
+ ebp = EBP;
+ esp = ESP;
+
+ if (data64) {
+ /* 64 bit */
+ esp -= 8;
+ while (--level) {
+ esp -= 8;
+ ebp -= 8;
+ stq(esp, ldq(ebp));
+ }
+ esp -= 8;
+ stq(esp, t1);
+ } else {
+ /* 16 bit */
+ esp -= 2;
+ while (--level) {
+ esp -= 2;
+ ebp -= 2;
+ stw(esp, lduw(ebp));
+ }
+ esp -= 2;
+ stw(esp, t1);
+ }
+}
+#endif
+
+void helper_lldt(int selector)
+{
+ SegmentCache *dt;
+ uint32_t e1, e2;
+#ifndef VBOX
+ int index, entry_limit;
+#else
+ unsigned int index, entry_limit;
+#endif
+ target_ulong ptr;
+
+#ifdef VBOX
+ Log(("helper_lldt_T0: old ldtr=%RTsel {.base=%RGv, .limit=%RGv} new=%RTsel\n",
+ (RTSEL)env->ldt.selector, (RTGCPTR)env->ldt.base, (RTGCPTR)env->ldt.limit, (RTSEL)(selector & 0xffff)));
+#endif
+
+ selector &= 0xffff;
+ if ((selector & 0xfffc) == 0) {
+ /* XXX: NULL selector case: invalid LDT */
+ env->ldt.base = 0;
+ env->ldt.limit = 0;
+#ifdef VBOX
+ env->ldt.flags = DESC_INTEL_UNUSABLE;
+ env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+ env->ldt.newselector = 0;
+#endif
+ } else {
+ if (selector & 0x4)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ dt = &env->gdt;
+ index = selector & ~7;
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK)
+ entry_limit = 15;
+ else
+#endif
+ entry_limit = 7;
+ if ((index + entry_limit) > dt->limit)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ ptr = dt->base + index;
+ e1 = ldl_kernel(ptr);
+ e2 = ldl_kernel(ptr + 4);
+ if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ uint32_t e3;
+ e3 = ldl_kernel(ptr + 8);
+ load_seg_cache_raw_dt(&env->ldt, e1, e2);
+ env->ldt.base |= (target_ulong)e3 << 32;
+ } else
+#endif
+ {
+ load_seg_cache_raw_dt(&env->ldt, e1, e2);
+ }
+ }
+ env->ldt.selector = selector;
+#ifdef VBOX
+ Log(("helper_lldt_T0: new ldtr=%RTsel {.base=%RGv, .limit=%RGv}\n",
+ (RTSEL)env->ldt.selector, (RTGCPTR)env->ldt.base, (RTGCPTR)env->ldt.limit));
+#endif
+}
+
+void helper_ltr(int selector)
+{
+ SegmentCache *dt;
+ uint32_t e1, e2;
+#ifndef VBOX
+ int index, type, entry_limit;
+#else
+ unsigned int index;
+ int type, entry_limit;
+#endif
+ target_ulong ptr;
+
+#ifdef VBOX
+ Log(("helper_ltr: pc=%RGv old tr=%RTsel {.base=%RGv, .limit=%RGv, .flags=%RX32} new=%RTsel\n",
+ (RTGCPTR)env->eip, (RTSEL)env->tr.selector, (RTGCPTR)env->tr.base, (RTGCPTR)env->tr.limit,
+ env->tr.flags, (RTSEL)(selector & 0xffff)));
+#endif
+ selector &= 0xffff;
+ if ((selector & 0xfffc) == 0) {
+ /* NULL selector case: invalid TR */
+#ifdef VBOX
+ raise_exception_err(EXCP0A_TSS, 0);
+#else
+ env->tr.base = 0;
+ env->tr.limit = 0;
+ env->tr.flags = 0;
+#endif
+ } else {
+ if (selector & 0x4)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ dt = &env->gdt;
+ index = selector & ~7;
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK)
+ entry_limit = 15;
+ else
+#endif
+ entry_limit = 7;
+ if ((index + entry_limit) > dt->limit)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ ptr = dt->base + index;
+ e1 = ldl_kernel(ptr);
+ e2 = ldl_kernel(ptr + 4);
+ type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+ if ((e2 & DESC_S_MASK) ||
+ (type != 1 && type != 9))
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ uint32_t e3, e4;
+ e3 = ldl_kernel(ptr + 8);
+ e4 = ldl_kernel(ptr + 12);
+ if ((e4 >> DESC_TYPE_SHIFT) & 0xf)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ load_seg_cache_raw_dt(&env->tr, e1, e2);
+ env->tr.base |= (target_ulong)e3 << 32;
+ } else
+#endif
+ {
+ load_seg_cache_raw_dt(&env->tr, e1, e2);
+ }
+ env->tr.flags |= DESC_TSS_BUSY_MASK;
+ e2 |= DESC_TSS_BUSY_MASK;
+ stl_kernel(ptr + 4, e2);
+ }
+ env->tr.selector = selector;
+#ifdef VBOX
+ Log(("helper_ltr: new tr=%RTsel {.base=%RGv, .limit=%RGv, .flags=%RX32} new=%RTsel\n",
+ (RTSEL)env->tr.selector, (RTGCPTR)env->tr.base, (RTGCPTR)env->tr.limit,
+ env->tr.flags, (RTSEL)(selector & 0xffff)));
+#endif
+}
+
+/* only works if protected mode and not VM86. seg_reg must be != R_CS */
+void helper_load_seg(int seg_reg, int selector)
+{
+ uint32_t e1, e2;
+ int cpl, dpl, rpl;
+ SegmentCache *dt;
+#ifndef VBOX
+ int index;
+#else
+ unsigned int index;
+#endif
+ target_ulong ptr;
+
+ selector &= 0xffff;
+ cpl = env->hflags & HF_CPL_MASK;
+#ifdef VBOX
+
+ /* Trying to load a selector with CPL=1? */
+ if (cpl == 0 && (selector & 3) == 1 && (env->state & CPU_RAW_RING0))
+ {
+ Log(("RPL 1 -> sel %04X -> %04X (helper_load_seg)\n", selector, selector & 0xfffc));
+ selector = selector & 0xfffc;
+ }
+#endif /* VBOX */
+ if ((selector & 0xfffc) == 0) {
+ /* null selector case */
+#ifndef VBOX
+ if (seg_reg == R_SS
+#ifdef TARGET_X86_64
+ && (!(env->hflags & HF_CS64_MASK) || cpl == 3)
+#endif
+ )
+ raise_exception_err(EXCP0D_GPF, 0);
+ cpu_x86_load_seg_cache(env, seg_reg, selector, 0, 0, 0);
+#else
+ if (seg_reg == R_SS) {
+ if (!(env->hflags & HF_CS64_MASK) || cpl == 3)
+ raise_exception_err(EXCP0D_GPF, 0);
+ e2 = (cpl << DESC_DPL_SHIFT) | DESC_INTEL_UNUSABLE;
+ } else {
+ e2 = DESC_INTEL_UNUSABLE;
+ }
+ cpu_x86_load_seg_cache_with_clean_flags(env, seg_reg, selector, 0, 0, e2);
+#endif
+ } else {
+
+ if (selector & 0x4)
+ dt = &env->ldt;
+ else
+ dt = &env->gdt;
+ index = selector & ~7;
+ if ((index + 7) > dt->limit)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ ptr = dt->base + index;
+ e1 = ldl_kernel(ptr);
+ e2 = ldl_kernel(ptr + 4);
+
+ if (!(e2 & DESC_S_MASK))
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ rpl = selector & 3;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ if (seg_reg == R_SS) {
+ /* must be writable segment */
+ if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK))
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (rpl != cpl || dpl != cpl)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ } else {
+ /* must be readable segment */
+ if ((e2 & (DESC_CS_MASK | DESC_R_MASK)) == DESC_CS_MASK)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+
+ if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) {
+ /* if not conforming code, test rights */
+ if (dpl < cpl || dpl < rpl)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ }
+ }
+
+ if (!(e2 & DESC_P_MASK)) {
+ if (seg_reg == R_SS)
+ raise_exception_err(EXCP0C_STACK, selector & 0xfffc);
+ else
+ raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+ }
+
+ /* set the access bit if not already set */
+ if (!(e2 & DESC_A_MASK)) {
+ e2 |= DESC_A_MASK;
+ stl_kernel(ptr + 4, e2);
+ }
+
+ cpu_x86_load_seg_cache(env, seg_reg, selector,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+#if 0
+ qemu_log("load_seg: sel=0x%04x base=0x%08lx limit=0x%08lx flags=%08x\n",
+ selector, (unsigned long)sc->base, sc->limit, sc->flags);
+#endif
+ }
+}
+
+/* protected mode jump */
+void helper_ljmp_protected(int new_cs, target_ulong new_eip,
+ int next_eip_addend)
+{
+ int gate_cs, type;
+ uint32_t e1, e2, cpl, dpl, rpl, limit;
+ target_ulong next_eip;
+
+#ifdef VBOX /** @todo Why do we do this? */
+ e1 = e2 = 0;
+#endif
+ if ((new_cs & 0xfffc) == 0)
+ raise_exception_err(EXCP0D_GPF, 0);
+ if (load_segment(&e1, &e2, new_cs) != 0)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ cpl = env->hflags & HF_CPL_MASK;
+ if (e2 & DESC_S_MASK) {
+ if (!(e2 & DESC_CS_MASK))
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ if (e2 & DESC_C_MASK) {
+ /* conforming code segment */
+ if (dpl > cpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ } else {
+ /* non conforming code segment */
+ rpl = new_cs & 3;
+ if (rpl > cpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ if (dpl != cpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+ limit = get_seg_limit(e1, e2);
+ if (new_eip > limit &&
+ !(env->hflags & HF_LMA_MASK) && !(e2 & DESC_L_MASK))
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+#ifdef VBOX
+ if (!(e2 & DESC_A_MASK))
+ e2 = set_segment_accessed(new_cs, e2);
+#endif
+ cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+ get_seg_base(e1, e2), limit, e2);
+ EIP = new_eip;
+ } else {
+ /* jump to call or task gate */
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ rpl = new_cs & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+ switch(type) {
+ case 1: /* 286 TSS */
+ case 9: /* 386 TSS */
+ case 5: /* task gate */
+ if (dpl < cpl || dpl < rpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ next_eip = env->eip + next_eip_addend;
+ switch_tss(new_cs, e1, e2, SWITCH_TSS_JMP, next_eip);
+ CC_OP = CC_OP_EFLAGS;
+ break;
+ case 4: /* 286 call gate */
+ case 12: /* 386 call gate */
+ if ((dpl < cpl) || (dpl < rpl))
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+ gate_cs = e1 >> 16;
+ new_eip = (e1 & 0xffff);
+ if (type == 12)
+ new_eip |= (e2 & 0xffff0000);
+ if (load_segment(&e1, &e2, gate_cs) != 0)
+ raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ /* must be code segment */
+ if (((e2 & (DESC_S_MASK | DESC_CS_MASK)) !=
+ (DESC_S_MASK | DESC_CS_MASK)))
+ raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+ if (((e2 & DESC_C_MASK) && (dpl > cpl)) ||
+ (!(e2 & DESC_C_MASK) && (dpl != cpl)))
+ raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+#ifdef VBOX /* See page 3-514 of 253666.pdf */
+ raise_exception_err(EXCP0B_NOSEG, gate_cs & 0xfffc);
+#else
+ raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+#endif
+ limit = get_seg_limit(e1, e2);
+ if (new_eip > limit)
+ raise_exception_err(EXCP0D_GPF, 0);
+ cpu_x86_load_seg_cache(env, R_CS, (gate_cs & 0xfffc) | cpl,
+ get_seg_base(e1, e2), limit, e2);
+ EIP = new_eip;
+ break;
+ default:
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ break;
+ }
+ }
+}
+
+/* real mode call */
+void helper_lcall_real(int new_cs, target_ulong new_eip1,
+ int shift, int next_eip)
+{
+ int new_eip;
+ uint32_t esp, esp_mask;
+ target_ulong ssp;
+
+ new_eip = new_eip1;
+ esp = ESP;
+ esp_mask = get_sp_mask(env->segs[R_SS].flags);
+ ssp = env->segs[R_SS].base;
+ if (shift) {
+ PUSHL(ssp, esp, esp_mask, env->segs[R_CS].selector);
+ PUSHL(ssp, esp, esp_mask, next_eip);
+ } else {
+ PUSHW(ssp, esp, esp_mask, env->segs[R_CS].selector);
+ PUSHW(ssp, esp, esp_mask, next_eip);
+ }
+
+ SET_ESP(esp, esp_mask);
+ env->eip = new_eip;
+ env->segs[R_CS].selector = new_cs;
+ env->segs[R_CS].base = (new_cs << 4);
+}
+
+/* protected mode call */
+void helper_lcall_protected(int new_cs, target_ulong new_eip,
+ int shift, int next_eip_addend)
+{
+ int new_stack, i;
+ uint32_t e1, e2, cpl, dpl, rpl, selector, offset, param_count;
+ uint32_t ss = 0, ss_e1 = 0, ss_e2 = 0, sp, type, ss_dpl, sp_mask;
+ uint32_t val, limit, old_sp_mask;
+ target_ulong ssp, old_ssp, next_eip;
+
+#ifdef VBOX /** @todo Why do we do this? */
+ e1 = e2 = 0;
+#endif
+ next_eip = env->eip + next_eip_addend;
+ LOG_PCALL("lcall %04x:%08x s=%d\n", new_cs, (uint32_t)new_eip, shift);
+ LOG_PCALL_STATE(env);
+ if ((new_cs & 0xfffc) == 0)
+ raise_exception_err(EXCP0D_GPF, 0);
+ if (load_segment(&e1, &e2, new_cs) != 0)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ cpl = env->hflags & HF_CPL_MASK;
+ LOG_PCALL("desc=%08x:%08x\n", e1, e2);
+ if (e2 & DESC_S_MASK) {
+ if (!(e2 & DESC_CS_MASK))
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ if (e2 & DESC_C_MASK) {
+ /* conforming code segment */
+ if (dpl > cpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ } else {
+ /* non conforming code segment */
+ rpl = new_cs & 3;
+ if (rpl > cpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ if (dpl != cpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+#ifdef VBOX
+ if (!(e2 & DESC_A_MASK))
+ e2 = set_segment_accessed(new_cs, e2);
+#endif
+
+#ifdef TARGET_X86_64
+ /* XXX: check 16/32 bit cases in long mode */
+ if (shift == 2) {
+ target_ulong rsp;
+ /* 64 bit case */
+ rsp = ESP;
+ PUSHQ(rsp, env->segs[R_CS].selector);
+ PUSHQ(rsp, next_eip);
+ /* from this point, not restartable */
+ ESP = rsp;
+ cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2), e2);
+ EIP = new_eip;
+ } else
+#endif
+ {
+ sp = ESP;
+ sp_mask = get_sp_mask(env->segs[R_SS].flags);
+ ssp = env->segs[R_SS].base;
+ if (shift) {
+ PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector);
+ PUSHL(ssp, sp, sp_mask, next_eip);
+ } else {
+ PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector);
+ PUSHW(ssp, sp, sp_mask, next_eip);
+ }
+
+ limit = get_seg_limit(e1, e2);
+ if (new_eip > limit)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ /* from this point, not restartable */
+ SET_ESP(sp, sp_mask);
+ cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+ get_seg_base(e1, e2), limit, e2);
+ EIP = new_eip;
+ }
+ } else {
+ /* check gate type */
+ type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ rpl = new_cs & 3;
+ switch(type) {
+ case 1: /* available 286 TSS */
+ case 9: /* available 386 TSS */
+ case 5: /* task gate */
+ if (dpl < cpl || dpl < rpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ switch_tss(new_cs, e1, e2, SWITCH_TSS_CALL, next_eip);
+ CC_OP = CC_OP_EFLAGS;
+ return;
+ case 4: /* 286 call gate */
+ case 12: /* 386 call gate */
+ break;
+ default:
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ break;
+ }
+ shift = type >> 3;
+
+ if (dpl < cpl || dpl < rpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ /* check valid bit */
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+ selector = e1 >> 16;
+ offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+ param_count = e2 & 0x1f;
+ if ((selector & 0xfffc) == 0)
+ raise_exception_err(EXCP0D_GPF, 0);
+
+ if (load_segment(&e1, &e2, selector) != 0)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ if (dpl > cpl)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+
+ if (!(e2 & DESC_C_MASK) && dpl < cpl) {
+ /* to inner privilege */
+ get_ss_esp_from_tss(&ss, &sp, dpl);
+ LOG_PCALL("new ss:esp=%04x:%08x param_count=%d ESP=" TARGET_FMT_lx "\n",
+ ss, sp, param_count, ESP);
+ if ((ss & 0xfffc) == 0)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if ((ss & 3) != dpl)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if (load_segment(&ss_e1, &ss_e2, ss) != 0)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+ if (ss_dpl != dpl)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if (!(ss_e2 & DESC_S_MASK) ||
+ (ss_e2 & DESC_CS_MASK) ||
+ !(ss_e2 & DESC_W_MASK))
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if (!(ss_e2 & DESC_P_MASK))
+#ifdef VBOX /* See page 3-99 of 253666.pdf */
+ raise_exception_err(EXCP0C_STACK, ss & 0xfffc);
+#else
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+#endif
+
+ // push_size = ((param_count * 2) + 8) << shift;
+
+ old_sp_mask = get_sp_mask(env->segs[R_SS].flags);
+ old_ssp = env->segs[R_SS].base;
+
+ sp_mask = get_sp_mask(ss_e2);
+ ssp = get_seg_base(ss_e1, ss_e2);
+ if (shift) {
+ PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector);
+ PUSHL(ssp, sp, sp_mask, ESP);
+ for(i = param_count - 1; i >= 0; i--) {
+ val = ldl_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
+ PUSHL(ssp, sp, sp_mask, val);
+ }
+ } else {
+ PUSHW(ssp, sp, sp_mask, env->segs[R_SS].selector);
+ PUSHW(ssp, sp, sp_mask, ESP);
+ for(i = param_count - 1; i >= 0; i--) {
+ val = lduw_kernel(old_ssp + ((ESP + i * 2) & old_sp_mask));
+ PUSHW(ssp, sp, sp_mask, val);
+ }
+ }
+ new_stack = 1;
+ } else {
+ /* to same privilege */
+ sp = ESP;
+ sp_mask = get_sp_mask(env->segs[R_SS].flags);
+ ssp = env->segs[R_SS].base;
+ // push_size = (4 << shift);
+ new_stack = 0;
+ }
+
+ if (shift) {
+ PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector);
+ PUSHL(ssp, sp, sp_mask, next_eip);
+ } else {
+ PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector);
+ PUSHW(ssp, sp, sp_mask, next_eip);
+ }
+
+ /* from this point, not restartable */
+
+ if (new_stack) {
+ ss = (ss & ~3) | dpl;
+ cpu_x86_load_seg_cache(env, R_SS, ss,
+ ssp,
+ get_seg_limit(ss_e1, ss_e2),
+ ss_e2);
+ }
+
+ selector = (selector & ~3) | dpl;
+ cpu_x86_load_seg_cache(env, R_CS, selector,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+ cpu_x86_set_cpl(env, dpl);
+ SET_ESP(sp, sp_mask);
+ EIP = offset;
+ }
+}
+
+/* real and vm86 mode iret */
+void helper_iret_real(int shift)
+{
+ uint32_t sp, new_cs, new_eip, new_eflags, sp_mask;
+ target_ulong ssp;
+ int eflags_mask;
+#ifdef VBOX
+ bool fVME = false;
+
+ remR3TrapClear(env->pVM);
+#endif /* VBOX */
+
+ sp_mask = 0xffff; /* XXXX: use SS segment size ? */
+ sp = ESP;
+ ssp = env->segs[R_SS].base;
+ if (shift == 1) {
+ /* 32 bits */
+ POPL(ssp, sp, sp_mask, new_eip);
+ POPL(ssp, sp, sp_mask, new_cs);
+ new_cs &= 0xffff;
+ POPL(ssp, sp, sp_mask, new_eflags);
+ } else {
+ /* 16 bits */
+ POPW(ssp, sp, sp_mask, new_eip);
+ POPW(ssp, sp, sp_mask, new_cs);
+ POPW(ssp, sp, sp_mask, new_eflags);
+ }
+#ifdef VBOX
+ if ( (env->eflags & VM_MASK)
+ && ((env->eflags >> IOPL_SHIFT) & 3) != 3
+ && (env->cr[4] & CR4_VME_MASK)) /* implied or else we would fault earlier */
+ {
+ fVME = true;
+ /* if virtual interrupt pending and (virtual) interrupts will be enabled -> #GP */
+ /* if TF will be set -> #GP */
+ if ( ((new_eflags & IF_MASK) && (env->eflags & VIP_MASK))
+ || (new_eflags & TF_MASK))
+ raise_exception(EXCP0D_GPF);
+ }
+#endif /* VBOX */
+ ESP = (ESP & ~sp_mask) | (sp & sp_mask);
+ env->segs[R_CS].selector = new_cs;
+ env->segs[R_CS].base = (new_cs << 4);
+ env->eip = new_eip;
+#ifdef VBOX
+ if (fVME)
+ eflags_mask = TF_MASK | AC_MASK | ID_MASK | RF_MASK | NT_MASK;
+ else
+#endif
+ if (env->eflags & VM_MASK)
+ eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | RF_MASK | NT_MASK;
+ else
+ eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | IOPL_MASK | RF_MASK | NT_MASK;
+ if (shift == 0)
+ eflags_mask &= 0xffff;
+ load_eflags(new_eflags, eflags_mask);
+ env->hflags2 &= ~HF2_NMI_MASK;
+#ifdef VBOX
+ if (fVME)
+ {
+ if (new_eflags & IF_MASK)
+ env->eflags |= VIF_MASK;
+ else
+ env->eflags &= ~VIF_MASK;
+ }
+#endif /* VBOX */
+}
+
+static inline void validate_seg(int seg_reg, int cpl)
+{
+ int dpl;
+ uint32_t e2;
+
+ /* XXX: on x86_64, we do not want to nullify FS and GS because
+ they may still contain a valid base. I would be interested to
+ know how a real x86_64 CPU behaves */
+ if ((seg_reg == R_FS || seg_reg == R_GS) &&
+ (env->segs[seg_reg].selector & 0xfffc) == 0)
+ return;
+
+ e2 = env->segs[seg_reg].flags;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) {
+ /* data or non conforming code segment */
+ if (dpl < cpl) {
+ cpu_x86_load_seg_cache(env, seg_reg, 0, 0, 0, 0);
+ }
+ }
+}
+
+/* protected mode iret */
+static inline void helper_ret_protected(int shift, int is_iret, int addend)
+{
+ uint32_t new_cs, new_eflags, new_ss;
+ uint32_t new_es, new_ds, new_fs, new_gs;
+ uint32_t e1, e2, ss_e1, ss_e2;
+ int cpl, dpl, rpl, eflags_mask, iopl;
+ target_ulong ssp, sp, new_eip, new_esp, sp_mask;
+
+#ifdef VBOX /** @todo Why do we do this? */
+ ss_e1 = ss_e2 = e1 = e2 = 0;
+#endif
+
+#ifdef TARGET_X86_64
+ if (shift == 2)
+ sp_mask = -1;
+ else
+#endif
+ sp_mask = get_sp_mask(env->segs[R_SS].flags);
+ sp = ESP;
+ ssp = env->segs[R_SS].base;
+ new_eflags = 0; /* avoid warning */
+#ifdef TARGET_X86_64
+ if (shift == 2) {
+ POPQ(sp, new_eip);
+ POPQ(sp, new_cs);
+ new_cs &= 0xffff;
+ if (is_iret) {
+ POPQ(sp, new_eflags);
+ }
+ } else
+#endif
+ if (shift == 1) {
+ /* 32 bits */
+ POPL(ssp, sp, sp_mask, new_eip);
+ POPL(ssp, sp, sp_mask, new_cs);
+ new_cs &= 0xffff;
+ if (is_iret) {
+ POPL(ssp, sp, sp_mask, new_eflags);
+#define LOG_GROUP LOG_GROUP_REM
+#if defined(VBOX) && defined(DEBUG)
+ Log(("iret: new CS %04X (old=%x)\n", new_cs, env->segs[R_CS].selector));
+ Log(("iret: new EIP %08X\n", (uint32_t)new_eip));
+ Log(("iret: new EFLAGS %08X\n", new_eflags));
+ Log(("iret: EAX=%08x\n", (uint32_t)EAX));
+#endif
+ if (new_eflags & VM_MASK)
+ goto return_to_vm86;
+ }
+#ifdef VBOX
+ if ((new_cs & 0x3) == 1 && (env->state & CPU_RAW_RING0))
+ {
+ if ( !EMIsRawRing1Enabled(env->pVM)
+ || env->segs[R_CS].selector == (new_cs & 0xfffc))
+ {
+ Log(("RPL 1 -> new_cs %04X -> %04X\n", new_cs, new_cs & 0xfffc));
+ new_cs = new_cs & 0xfffc;
+ }
+ else
+ {
+ /* Ugly assumption: assume a genuine switch to ring-1. */
+ Log(("Genuine switch to ring-1 (iret)\n"));
+ }
+ }
+ else if ((new_cs & 0x3) == 2 && (env->state & CPU_RAW_RING0) && EMIsRawRing1Enabled(env->pVM))
+ {
+ Log(("RPL 2 -> new_cs %04X -> %04X\n", new_cs, (new_cs & 0xfffc) | 1));
+ new_cs = (new_cs & 0xfffc) | 1;
+ }
+#endif
+ } else {
+ /* 16 bits */
+ POPW(ssp, sp, sp_mask, new_eip);
+ POPW(ssp, sp, sp_mask, new_cs);
+ if (is_iret)
+ POPW(ssp, sp, sp_mask, new_eflags);
+ }
+ LOG_PCALL("lret new %04x:" TARGET_FMT_lx " s=%d addend=0x%x\n",
+ new_cs, new_eip, shift, addend);
+ LOG_PCALL_STATE(env);
+ if ((new_cs & 0xfffc) == 0)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("new_cs & 0xfffc) == 0\n"));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ if (load_segment(&e1, &e2, new_cs) != 0)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("load_segment failed\n"));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ if (!(e2 & DESC_S_MASK) ||
+ !(e2 & DESC_CS_MASK))
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("e2 mask %08x\n", e2));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ cpl = env->hflags & HF_CPL_MASK;
+ rpl = new_cs & 3;
+ if (rpl < cpl)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("rpl < cpl (%d vs %d)\n", rpl, cpl));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+
+ if (e2 & DESC_C_MASK) {
+ if (dpl > rpl)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("dpl > rpl (%d vs %d)\n", dpl, rpl));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ } else {
+ if (dpl != rpl)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("dpl != rpl (%d vs %d) e1=%x e2=%x\n", dpl, rpl, e1, e2));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ }
+ if (!(e2 & DESC_P_MASK))
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("DESC_P_MASK e2=%08x\n", e2));
+#endif
+ raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+ }
+
+ sp += addend;
+ if (rpl == cpl && (!(env->hflags & HF_CS64_MASK) ||
+ ((env->hflags & HF_CS64_MASK) && !is_iret))) {
+ /* return to same privilege level */
+#ifdef VBOX
+ if (!(e2 & DESC_A_MASK))
+ e2 = set_segment_accessed(new_cs, e2);
+#endif
+ cpu_x86_load_seg_cache(env, R_CS, new_cs,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+ } else {
+ /* return to different privilege level */
+#ifdef TARGET_X86_64
+ if (shift == 2) {
+ POPQ(sp, new_esp);
+ POPQ(sp, new_ss);
+ new_ss &= 0xffff;
+ } else
+#endif
+ if (shift == 1) {
+ /* 32 bits */
+ POPL(ssp, sp, sp_mask, new_esp);
+ POPL(ssp, sp, sp_mask, new_ss);
+ new_ss &= 0xffff;
+ } else {
+ /* 16 bits */
+ POPW(ssp, sp, sp_mask, new_esp);
+ POPW(ssp, sp, sp_mask, new_ss);
+ }
+ LOG_PCALL("new ss:esp=%04x:" TARGET_FMT_lx "\n",
+ new_ss, new_esp);
+ if ((new_ss & 0xfffc) == 0) {
+#ifdef TARGET_X86_64
+ /* NULL ss is allowed in long mode if cpl != 3*/
+# ifndef VBOX
+ /* XXX: test CS64 ? */
+ if ((env->hflags & HF_LMA_MASK) && rpl != 3) {
+ cpu_x86_load_seg_cache(env, R_SS, new_ss,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (rpl << DESC_DPL_SHIFT) |
+ DESC_W_MASK | DESC_A_MASK);
+ ss_e2 = DESC_B_MASK; /* XXX: should not be needed ? */
+ } else
+# else /* VBOX */
+ if ((env->hflags & HF_LMA_MASK) && rpl != 3 && (e2 & DESC_L_MASK)) {
+ if (!(e2 & DESC_A_MASK))
+ e2 = set_segment_accessed(new_cs, e2);
+ cpu_x86_load_seg_cache_with_clean_flags(env, R_SS, new_ss,
+ 0, 0xffffffff,
+ DESC_INTEL_UNUSABLE | (rpl << DESC_DPL_SHIFT) );
+ ss_e2 = DESC_B_MASK; /* not really used */
+ } else
+# endif
+#endif
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("NULL ss, rpl=%d\n", rpl));
+#endif
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+ } else {
+ if ((new_ss & 3) != rpl)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("new_ss=%x != rpl=%d\n", new_ss, rpl));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+ }
+ if (load_segment(&ss_e1, &ss_e2, new_ss) != 0)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("new_ss=%x load error\n", new_ss));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+ }
+ if (!(ss_e2 & DESC_S_MASK) ||
+ (ss_e2 & DESC_CS_MASK) ||
+ !(ss_e2 & DESC_W_MASK))
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("new_ss=%x ss_e2=%#x bad type\n", new_ss, ss_e2));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+ }
+ dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+ if (dpl != rpl)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("SS.dpl=%u != rpl=%u\n", dpl, rpl));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+ }
+ if (!(ss_e2 & DESC_P_MASK))
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("new_ss=%#x #NP\n", new_ss));
+#endif
+ raise_exception_err(EXCP0B_NOSEG, new_ss & 0xfffc);
+ }
+#ifdef VBOX
+ if (!(e2 & DESC_A_MASK))
+ e2 = set_segment_accessed(new_cs, e2);
+ if (!(ss_e2 & DESC_A_MASK))
+ ss_e2 = set_segment_accessed(new_ss, ss_e2);
+#endif
+ cpu_x86_load_seg_cache(env, R_SS, new_ss,
+ get_seg_base(ss_e1, ss_e2),
+ get_seg_limit(ss_e1, ss_e2),
+ ss_e2);
+ }
+
+ cpu_x86_load_seg_cache(env, R_CS, new_cs,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+ cpu_x86_set_cpl(env, rpl);
+ sp = new_esp;
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_CS64_MASK)
+ sp_mask = -1;
+ else
+#endif
+ sp_mask = get_sp_mask(ss_e2);
+
+ /* validate data segments */
+ validate_seg(R_ES, rpl);
+ validate_seg(R_DS, rpl);
+ validate_seg(R_FS, rpl);
+ validate_seg(R_GS, rpl);
+
+ sp += addend;
+ }
+ SET_ESP(sp, sp_mask);
+ env->eip = new_eip;
+ if (is_iret) {
+ /* NOTE: 'cpl' is the _old_ CPL */
+ eflags_mask = TF_MASK | AC_MASK | ID_MASK | RF_MASK | NT_MASK;
+ if (cpl == 0)
+#ifdef VBOX
+ eflags_mask |= IOPL_MASK | VIF_MASK | VIP_MASK;
+#else
+ eflags_mask |= IOPL_MASK;
+#endif
+ iopl = (env->eflags >> IOPL_SHIFT) & 3;
+ if (cpl <= iopl)
+ eflags_mask |= IF_MASK;
+ if (shift == 0)
+ eflags_mask &= 0xffff;
+ load_eflags(new_eflags, eflags_mask);
+ }
+ return;
+
+ return_to_vm86:
+ POPL(ssp, sp, sp_mask, new_esp);
+ POPL(ssp, sp, sp_mask, new_ss);
+ POPL(ssp, sp, sp_mask, new_es);
+ POPL(ssp, sp, sp_mask, new_ds);
+ POPL(ssp, sp, sp_mask, new_fs);
+ POPL(ssp, sp, sp_mask, new_gs);
+
+ /* modify processor state */
+ load_eflags(new_eflags, TF_MASK | AC_MASK | ID_MASK |
+ IF_MASK | IOPL_MASK | VM_MASK | NT_MASK | VIF_MASK | VIP_MASK);
+ load_seg_vm(R_CS, new_cs & 0xffff);
+ cpu_x86_set_cpl(env, 3);
+ load_seg_vm(R_SS, new_ss & 0xffff);
+ load_seg_vm(R_ES, new_es & 0xffff);
+ load_seg_vm(R_DS, new_ds & 0xffff);
+ load_seg_vm(R_FS, new_fs & 0xffff);
+ load_seg_vm(R_GS, new_gs & 0xffff);
+
+ env->eip = new_eip & 0xffff;
+ ESP = new_esp;
+}
+
+void helper_iret_protected(int shift, int next_eip)
+{
+ int tss_selector, type;
+ uint32_t e1, e2;
+
+#ifdef VBOX
+ Log(("iret (shift=%d new_eip=%#x)\n", shift, next_eip));
+ e1 = e2 = 0; /** @todo Why do we do this? */
+ remR3TrapClear(env->pVM);
+#endif
+
+ /* specific case for TSS */
+ if (env->eflags & NT_MASK) {
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("eflags.NT=1 on iret in long mode\n"));
+#endif
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+#endif
+ tss_selector = lduw_kernel(env->tr.base + 0);
+ if (tss_selector & 4)
+ raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+ if (load_segment(&e1, &e2, tss_selector) != 0)
+ raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+ type = (e2 >> DESC_TYPE_SHIFT) & 0x17;
+ /* NOTE: we check both segment and busy TSS */
+ if (type != 3)
+ raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+ switch_tss(tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip);
+ } else {
+ helper_ret_protected(shift, 1, 0);
+ }
+ env->hflags2 &= ~HF2_NMI_MASK;
+}
+
+void helper_lret_protected(int shift, int addend)
+{
+ helper_ret_protected(shift, 0, addend);
+}
+
+void helper_sysenter(void)
+{
+ if (env->sysenter_cs == 0) {
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+ env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK);
+ cpu_x86_set_cpl(env, 0);
+
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
+ } else
+#endif
+ {
+ cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+ }
+ cpu_x86_load_seg_cache(env, R_SS, (env->sysenter_cs + 8) & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_W_MASK | DESC_A_MASK);
+ ESP = env->sysenter_esp;
+ EIP = env->sysenter_eip;
+}
+
+void helper_sysexit(int dflag)
+{
+ int cpl;
+
+ cpl = env->hflags & HF_CPL_MASK;
+ if (env->sysenter_cs == 0 || cpl != 0) {
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+ cpu_x86_set_cpl(env, 3);
+#ifdef TARGET_X86_64
+ if (dflag == 2) {
+ cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 32) & 0xfffc) | 3,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
+ cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 40) & 0xfffc) | 3,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_W_MASK | DESC_A_MASK);
+ } else
+#endif
+ {
+ cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 16) & 0xfffc) | 3,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+ cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 24) & 0xfffc) | 3,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_W_MASK | DESC_A_MASK);
+ }
+ ESP = ECX;
+ EIP = EDX;
+}
+
+#if defined(CONFIG_USER_ONLY)
+target_ulong helper_read_crN(int reg)
+{
+ return 0;
+}
+
+void helper_write_crN(int reg, target_ulong t0)
+{
+}
+
+void helper_movl_drN_T0(int reg, target_ulong t0)
+{
+}
+#else
+target_ulong helper_read_crN(int reg)
+{
+ target_ulong val;
+
+ helper_svm_check_intercept_param(SVM_EXIT_READ_CR0 + reg, 0);
+ switch(reg) {
+ default:
+ val = env->cr[reg];
+ break;
+ case 8:
+ if (!(env->hflags2 & HF2_VINTR_MASK)) {
+#ifndef VBOX
+ val = cpu_get_apic_tpr(env->apic_state);
+#else /* VBOX */
+ val = cpu_get_apic_tpr(env);
+#endif /* VBOX */
+ } else {
+ val = env->v_tpr;
+ }
+ break;
+ }
+ return val;
+}
+
+void helper_write_crN(int reg, target_ulong t0)
+{
+ helper_svm_check_intercept_param(SVM_EXIT_WRITE_CR0 + reg, 0);
+ switch(reg) {
+ case 0:
+ cpu_x86_update_cr0(env, t0);
+ break;
+ case 3:
+ cpu_x86_update_cr3(env, t0);
+ break;
+ case 4:
+ cpu_x86_update_cr4(env, t0);
+ break;
+ case 8:
+ if (!(env->hflags2 & HF2_VINTR_MASK)) {
+#ifndef VBOX
+ cpu_set_apic_tpr(env->apic_state, t0);
+#else /* VBOX */
+ cpu_set_apic_tpr(env, t0);
+#endif /* VBOX */
+ }
+ env->v_tpr = t0 & 0x0f;
+ break;
+ default:
+ env->cr[reg] = t0;
+ break;
+ }
+}
+
+void helper_movl_drN_T0(int reg, target_ulong t0)
+{
+ int i;
+
+ if (reg < 4) {
+ hw_breakpoint_remove(env, reg);
+ env->dr[reg] = t0;
+ hw_breakpoint_insert(env, reg);
+# ifndef VBOX
+ } else if (reg == 7) {
+# else
+ } else if (reg == 7 || reg == 5) { /* (DR5 is an alias for DR7.) */
+ if (t0 & X86_DR7_MBZ_MASK)
+ raise_exception_err(EXCP0D_GPF, 0);
+ t0 |= X86_DR7_RA1_MASK;
+ t0 &= ~X86_DR7_RAZ_MASK;
+# endif
+ for (i = 0; i < 4; i++)
+ hw_breakpoint_remove(env, i);
+ env->dr[7] = t0;
+ for (i = 0; i < 4; i++)
+ hw_breakpoint_insert(env, i);
+ } else {
+# ifndef VBOX
+ env->dr[reg] = t0;
+# else
+ if (t0 & X86_DR6_MBZ_MASK)
+ raise_exception_err(EXCP0D_GPF, 0);
+ t0 |= X86_DR6_RA1_MASK;
+ t0 &= ~X86_DR6_RAZ_MASK;
+ env->dr[6] = t0; /* (DR4 is an alias for DR6.) */
+# endif
+ }
+}
+#endif
+
+void helper_lmsw(target_ulong t0)
+{
+ /* only 4 lower bits of CR0 are modified. PE cannot be set to zero
+ if already set to one. */
+ t0 = (env->cr[0] & ~0xe) | (t0 & 0xf);
+ helper_write_crN(0, t0);
+}
+
+void helper_clts(void)
+{
+ env->cr[0] &= ~CR0_TS_MASK;
+ env->hflags &= ~HF_TS_MASK;
+}
+
+void helper_invlpg(target_ulong addr)
+{
+ helper_svm_check_intercept_param(SVM_EXIT_INVLPG, 0);
+ tlb_flush_page(env, addr);
+}
+
+void helper_rdtsc(void)
+{
+ uint64_t val;
+
+ if ((env->cr[4] & CR4_TSD_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
+ raise_exception(EXCP0D_GPF);
+ }
+ helper_svm_check_intercept_param(SVM_EXIT_RDTSC, 0);
+
+ val = cpu_get_tsc(env) + env->tsc_offset;
+ EAX = (uint32_t)(val);
+ EDX = (uint32_t)(val >> 32);
+}
+
+void helper_rdtscp(void)
+{
+ helper_rdtsc();
+#ifndef VBOX
+ ECX = (uint32_t)(env->tsc_aux);
+#else /* VBOX */
+ uint64_t val;
+ if (cpu_rdmsr(env, MSR_K8_TSC_AUX, &val) == 0)
+ ECX = (uint32_t)(val);
+ else
+ ECX = 0;
+#endif /* VBOX */
+}
+
+void helper_rdpmc(void)
+{
+#ifdef VBOX
+ /* If X86_CR4_PCE is *not* set, then CPL must be zero. */
+ if (!(env->cr[4] & CR4_PCE_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
+ raise_exception(EXCP0D_GPF);
+ }
+ /* Just return zero here; rather tricky to properly emulate this, especially as the specs are a mess. */
+ EAX = 0;
+ EDX = 0;
+#else /* !VBOX */
+ if ((env->cr[4] & CR4_PCE_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
+ raise_exception(EXCP0D_GPF);
+ }
+ helper_svm_check_intercept_param(SVM_EXIT_RDPMC, 0);
+
+ /* currently unimplemented */
+ raise_exception_err(EXCP06_ILLOP, 0);
+#endif /* !VBOX */
+}
+
+#if defined(CONFIG_USER_ONLY)
+void helper_wrmsr(void)
+{
+}
+
+void helper_rdmsr(void)
+{
+}
+#else
+void helper_wrmsr(void)
+{
+ uint64_t val;
+
+ helper_svm_check_intercept_param(SVM_EXIT_MSR, 1);
+
+ val = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+
+ switch((uint32_t)ECX) {
+ case MSR_IA32_SYSENTER_CS:
+ env->sysenter_cs = val & 0xffff;
+ break;
+ case MSR_IA32_SYSENTER_ESP:
+ env->sysenter_esp = val;
+ break;
+ case MSR_IA32_SYSENTER_EIP:
+ env->sysenter_eip = val;
+ break;
+ case MSR_IA32_APICBASE:
+# ifndef VBOX /* The CPUMSetGuestMsr call below does this now. */
+ cpu_set_apic_base(env->apic_state, val);
+# endif
+ break;
+ case MSR_EFER:
+ {
+ uint64_t update_mask;
+ update_mask = 0;
+ if (env->cpuid_ext2_features & CPUID_EXT2_SYSCALL)
+ update_mask |= MSR_EFER_SCE;
+ if (env->cpuid_ext2_features & CPUID_EXT2_LM)
+ update_mask |= MSR_EFER_LME;
+ if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR)
+ update_mask |= MSR_EFER_FFXSR;
+ if (env->cpuid_ext2_features & CPUID_EXT2_NX)
+ update_mask |= MSR_EFER_NXE;
+ if (env->cpuid_ext3_features & CPUID_EXT3_SVM)
+ update_mask |= MSR_EFER_SVME;
+ if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR)
+ update_mask |= MSR_EFER_FFXSR;
+ cpu_load_efer(env, (env->efer & ~update_mask) |
+ (val & update_mask));
+ }
+ break;
+ case MSR_STAR:
+ env->star = val;
+ break;
+ case MSR_PAT:
+ env->pat = val;
+ break;
+ case MSR_VM_HSAVE_PA:
+ env->vm_hsave = val;
+ break;
+#ifdef TARGET_X86_64
+ case MSR_LSTAR:
+ env->lstar = val;
+ break;
+ case MSR_CSTAR:
+ env->cstar = val;
+ break;
+ case MSR_FMASK:
+ env->fmask = val;
+ break;
+ case MSR_FSBASE:
+ env->segs[R_FS].base = val;
+ break;
+ case MSR_GSBASE:
+ env->segs[R_GS].base = val;
+ break;
+ case MSR_KERNELGSBASE:
+ env->kernelgsbase = val;
+ break;
+#endif
+# ifndef VBOX
+ case MSR_MTRRphysBase(0):
+ case MSR_MTRRphysBase(1):
+ case MSR_MTRRphysBase(2):
+ case MSR_MTRRphysBase(3):
+ case MSR_MTRRphysBase(4):
+ case MSR_MTRRphysBase(5):
+ case MSR_MTRRphysBase(6):
+ case MSR_MTRRphysBase(7):
+ env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysBase(0)) / 2].base = val;
+ break;
+ case MSR_MTRRphysMask(0):
+ case MSR_MTRRphysMask(1):
+ case MSR_MTRRphysMask(2):
+ case MSR_MTRRphysMask(3):
+ case MSR_MTRRphysMask(4):
+ case MSR_MTRRphysMask(5):
+ case MSR_MTRRphysMask(6):
+ case MSR_MTRRphysMask(7):
+ env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysMask(0)) / 2].mask = val;
+ break;
+ case MSR_MTRRfix64K_00000:
+ env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix64K_00000] = val;
+ break;
+ case MSR_MTRRfix16K_80000:
+ case MSR_MTRRfix16K_A0000:
+ env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix16K_80000 + 1] = val;
+ break;
+ case MSR_MTRRfix4K_C0000:
+ case MSR_MTRRfix4K_C8000:
+ case MSR_MTRRfix4K_D0000:
+ case MSR_MTRRfix4K_D8000:
+ case MSR_MTRRfix4K_E0000:
+ case MSR_MTRRfix4K_E8000:
+ case MSR_MTRRfix4K_F0000:
+ case MSR_MTRRfix4K_F8000:
+ env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix4K_C0000 + 3] = val;
+ break;
+ case MSR_MTRRdefType:
+ env->mtrr_deftype = val;
+ break;
+ case MSR_MCG_STATUS:
+ env->mcg_status = val;
+ break;
+ case MSR_MCG_CTL:
+ if ((env->mcg_cap & MCG_CTL_P)
+ && (val == 0 || val == ~(uint64_t)0))
+ env->mcg_ctl = val;
+ break;
+ case MSR_TSC_AUX:
+ env->tsc_aux = val;
+ break;
+# endif /* !VBOX */
+ default:
+# ifndef VBOX
+ if ((uint32_t)ECX >= MSR_MC0_CTL
+ && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
+ uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
+ if ((offset & 0x3) != 0
+ || (val == 0 || val == ~(uint64_t)0))
+ env->mce_banks[offset] = val;
+ break;
+ }
+ /* XXX: exception ? */
+# endif
+ break;
+ }
+
+# ifdef VBOX
+ /* call CPUM. */
+ if (cpu_wrmsr(env, (uint32_t)ECX, val) != 0)
+ {
+ /** @todo be a brave man and raise a \#GP(0) here as we should... */
+ }
+# endif
+}
+
+void helper_rdmsr(void)
+{
+ uint64_t val;
+
+ helper_svm_check_intercept_param(SVM_EXIT_MSR, 0);
+
+ switch((uint32_t)ECX) {
+ case MSR_IA32_SYSENTER_CS:
+ val = env->sysenter_cs;
+ break;
+ case MSR_IA32_SYSENTER_ESP:
+ val = env->sysenter_esp;
+ break;
+ case MSR_IA32_SYSENTER_EIP:
+ val = env->sysenter_eip;
+ break;
+ case MSR_IA32_APICBASE:
+#ifndef VBOX
+ val = cpu_get_apic_base(env->apic_state);
+#else /* VBOX */
+ val = cpu_get_apic_base(env);
+#endif /* VBOX */
+ break;
+ case MSR_EFER:
+ val = env->efer;
+ break;
+ case MSR_STAR:
+ val = env->star;
+ break;
+ case MSR_PAT:
+ val = env->pat;
+ break;
+ case MSR_VM_HSAVE_PA:
+ val = env->vm_hsave;
+ break;
+# ifndef VBOX /* forward to CPUMQueryGuestMsr. */
+ case MSR_IA32_PERF_STATUS:
+ /* tsc_increment_by_tick */
+ val = 1000ULL;
+ /* CPU multiplier */
+ val |= (((uint64_t)4ULL) << 40);
+ break;
+# endif /* !VBOX */
+#ifdef TARGET_X86_64
+ case MSR_LSTAR:
+ val = env->lstar;
+ break;
+ case MSR_CSTAR:
+ val = env->cstar;
+ break;
+ case MSR_FMASK:
+ val = env->fmask;
+ break;
+ case MSR_FSBASE:
+ val = env->segs[R_FS].base;
+ break;
+ case MSR_GSBASE:
+ val = env->segs[R_GS].base;
+ break;
+ case MSR_KERNELGSBASE:
+ val = env->kernelgsbase;
+ break;
+# ifndef VBOX
+ case MSR_TSC_AUX:
+ val = env->tsc_aux;
+ break;
+# endif /*!VBOX*/
+#endif
+# ifndef VBOX
+ case MSR_MTRRphysBase(0):
+ case MSR_MTRRphysBase(1):
+ case MSR_MTRRphysBase(2):
+ case MSR_MTRRphysBase(3):
+ case MSR_MTRRphysBase(4):
+ case MSR_MTRRphysBase(5):
+ case MSR_MTRRphysBase(6):
+ case MSR_MTRRphysBase(7):
+ val = env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysBase(0)) / 2].base;
+ break;
+ case MSR_MTRRphysMask(0):
+ case MSR_MTRRphysMask(1):
+ case MSR_MTRRphysMask(2):
+ case MSR_MTRRphysMask(3):
+ case MSR_MTRRphysMask(4):
+ case MSR_MTRRphysMask(5):
+ case MSR_MTRRphysMask(6):
+ case MSR_MTRRphysMask(7):
+ val = env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysMask(0)) / 2].mask;
+ break;
+ case MSR_MTRRfix64K_00000:
+ val = env->mtrr_fixed[0];
+ break;
+ case MSR_MTRRfix16K_80000:
+ case MSR_MTRRfix16K_A0000:
+ val = env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix16K_80000 + 1];
+ break;
+ case MSR_MTRRfix4K_C0000:
+ case MSR_MTRRfix4K_C8000:
+ case MSR_MTRRfix4K_D0000:
+ case MSR_MTRRfix4K_D8000:
+ case MSR_MTRRfix4K_E0000:
+ case MSR_MTRRfix4K_E8000:
+ case MSR_MTRRfix4K_F0000:
+ case MSR_MTRRfix4K_F8000:
+ val = env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix4K_C0000 + 3];
+ break;
+ case MSR_MTRRdefType:
+ val = env->mtrr_deftype;
+ break;
+ case MSR_MTRRcap:
+ if (env->cpuid_features & CPUID_MTRR)
+ val = MSR_MTRRcap_VCNT | MSR_MTRRcap_FIXRANGE_SUPPORT | MSR_MTRRcap_WC_SUPPORTED;
+ else
+ /* XXX: exception ? */
+ val = 0;
+ break;
+ case MSR_MCG_CAP:
+ val = env->mcg_cap;
+ break;
+ case MSR_MCG_CTL:
+ if (env->mcg_cap & MCG_CTL_P)
+ val = env->mcg_ctl;
+ else
+ val = 0;
+ break;
+ case MSR_MCG_STATUS:
+ val = env->mcg_status;
+ break;
+# endif /* !VBOX */
+ default:
+# ifndef VBOX
+ if ((uint32_t)ECX >= MSR_MC0_CTL
+ && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
+ uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
+ val = env->mce_banks[offset];
+ break;
+ }
+ /* XXX: exception ? */
+ val = 0;
+# else /* VBOX */
+ if (cpu_rdmsr(env, (uint32_t)ECX, &val) != 0)
+ {
+ /** @todo be a brave man and raise a \#GP(0) here as we should... */
+ val = 0;
+ }
+# endif /* VBOX */
+ break;
+ }
+ EAX = (uint32_t)(val);
+ EDX = (uint32_t)(val >> 32);
+
+# ifdef VBOX_STRICT
+ if ((uint32_t)ECX != MSR_IA32_TSC) {
+ if (cpu_rdmsr(env, (uint32_t)ECX, &val) != 0)
+ val = 0;
+ AssertMsg(val == RT_MAKE_U64(EAX, EDX), ("idMsr=%#x val=%#llx eax:edx=%#llx\n", (uint32_t)ECX, val, RT_MAKE_U64(EAX, EDX)));
+ }
+# endif
+}
+#endif
+
+target_ulong helper_lsl(target_ulong selector1)
+{
+ unsigned int limit;
+ uint32_t e1, e2, eflags, selector;
+ int rpl, dpl, cpl, type;
+
+ selector = selector1 & 0xffff;
+ eflags = helper_cc_compute_all(CC_OP);
+ if ((selector & 0xfffc) == 0)
+ goto fail;
+ if (load_segment(&e1, &e2, selector) != 0)
+ goto fail;
+ rpl = selector & 3;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ if (e2 & DESC_S_MASK) {
+ if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) {
+ /* conforming */
+ } else {
+ if (dpl < cpl || dpl < rpl)
+ goto fail;
+ }
+ } else {
+ type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+ switch(type) {
+ case 1:
+ case 2:
+ case 3:
+ case 9:
+ case 11:
+ break;
+ default:
+ goto fail;
+ }
+ if (dpl < cpl || dpl < rpl) {
+ fail:
+ CC_SRC = eflags & ~CC_Z;
+ return 0;
+ }
+ }
+ limit = get_seg_limit(e1, e2);
+ CC_SRC = eflags | CC_Z;
+ return limit;
+}
+
+target_ulong helper_lar(target_ulong selector1)
+{
+ uint32_t e1, e2, eflags, selector;
+ int rpl, dpl, cpl, type;
+
+ selector = selector1 & 0xffff;
+ eflags = helper_cc_compute_all(CC_OP);
+ if ((selector & 0xfffc) == 0)
+ goto fail;
+ if (load_segment(&e1, &e2, selector) != 0)
+ goto fail;
+ rpl = selector & 3;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ if (e2 & DESC_S_MASK) {
+ if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) {
+ /* conforming */
+ } else {
+ if (dpl < cpl || dpl < rpl)
+ goto fail;
+ }
+ } else {
+ type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+ switch(type) {
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 9:
+ case 11:
+ case 12:
+ break;
+ default:
+ goto fail;
+ }
+ if (dpl < cpl || dpl < rpl) {
+ fail:
+ CC_SRC = eflags & ~CC_Z;
+ return 0;
+ }
+ }
+ CC_SRC = eflags | CC_Z;
+#ifdef VBOX /* AMD says 0x00ffff00, while intel says 0x00fxff00. Bochs and IEM does like AMD says (x=f). */
+ return e2 & 0x00ffff00;
+#else
+ return e2 & 0x00f0ff00;
+#endif
+}
+
+void helper_verr(target_ulong selector1)
+{
+ uint32_t e1, e2, eflags, selector;
+ int rpl, dpl, cpl;
+
+ selector = selector1 & 0xffff;
+ eflags = helper_cc_compute_all(CC_OP);
+ if ((selector & 0xfffc) == 0)
+ goto fail;
+ if (load_segment(&e1, &e2, selector) != 0)
+ goto fail;
+ if (!(e2 & DESC_S_MASK))
+ goto fail;
+ rpl = selector & 3;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ if (e2 & DESC_CS_MASK) {
+ if (!(e2 & DESC_R_MASK))
+ goto fail;
+ if (!(e2 & DESC_C_MASK)) {
+ if (dpl < cpl || dpl < rpl)
+ goto fail;
+ }
+ } else {
+ if (dpl < cpl || dpl < rpl) {
+ fail:
+ CC_SRC = eflags & ~CC_Z;
+ return;
+ }
+ }
+ CC_SRC = eflags | CC_Z;
+}
+
+void helper_verw(target_ulong selector1)
+{
+ uint32_t e1, e2, eflags, selector;
+ int rpl, dpl, cpl;
+
+ selector = selector1 & 0xffff;
+ eflags = helper_cc_compute_all(CC_OP);
+ if ((selector & 0xfffc) == 0)
+ goto fail;
+ if (load_segment(&e1, &e2, selector) != 0)
+ goto fail;
+ if (!(e2 & DESC_S_MASK))
+ goto fail;
+ rpl = selector & 3;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ if (e2 & DESC_CS_MASK) {
+ goto fail;
+ } else {
+ if (dpl < cpl || dpl < rpl)
+ goto fail;
+ if (!(e2 & DESC_W_MASK)) {
+ fail:
+ CC_SRC = eflags & ~CC_Z;
+ return;
+ }
+ }
+ CC_SRC = eflags | CC_Z;
+}
+
+/* x87 FPU helpers */
+
+static void fpu_set_exception(int mask)
+{
+ env->fpus |= mask;
+ if (env->fpus & (~env->fpuc & FPUC_EM))
+ env->fpus |= FPUS_SE | FPUS_B;
+}
+
+static inline CPU86_LDouble helper_fdiv(CPU86_LDouble a, CPU86_LDouble b)
+{
+ if (b == 0.0)
+ fpu_set_exception(FPUS_ZE);
+ return a / b;
+}
+
+static void fpu_raise_exception(void)
+{
+ if (env->cr[0] & CR0_NE_MASK) {
+ raise_exception(EXCP10_COPR);
+ }
+#if !defined(CONFIG_USER_ONLY)
+ else {
+ cpu_set_ferr(env);
+ }
+#endif
+}
+
+void helper_flds_FT0(uint32_t val)
+{
+ union {
+ float32 f;
+ uint32_t i;
+ } u;
+ u.i = val;
+ FT0 = float32_to_floatx(u.f, &env->fp_status);
+}
+
+void helper_fldl_FT0(uint64_t val)
+{
+ union {
+ float64 f;
+ uint64_t i;
+ } u;
+ u.i = val;
+ FT0 = float64_to_floatx(u.f, &env->fp_status);
+}
+
+void helper_fildl_FT0(int32_t val)
+{
+ FT0 = int32_to_floatx(val, &env->fp_status);
+}
+
+void helper_flds_ST0(uint32_t val)
+{
+ int new_fpstt;
+ union {
+ float32 f;
+ uint32_t i;
+ } u;
+ new_fpstt = (env->fpstt - 1) & 7;
+ u.i = val;
+ env->fpregs[new_fpstt].d = float32_to_floatx(u.f, &env->fp_status);
+ env->fpstt = new_fpstt;
+ env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fldl_ST0(uint64_t val)
+{
+ int new_fpstt;
+ union {
+ float64 f;
+ uint64_t i;
+ } u;
+ new_fpstt = (env->fpstt - 1) & 7;
+ u.i = val;
+ env->fpregs[new_fpstt].d = float64_to_floatx(u.f, &env->fp_status);
+ env->fpstt = new_fpstt;
+ env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fildl_ST0(int32_t val)
+{
+ int new_fpstt;
+ new_fpstt = (env->fpstt - 1) & 7;
+ env->fpregs[new_fpstt].d = int32_to_floatx(val, &env->fp_status);
+ env->fpstt = new_fpstt;
+ env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fildll_ST0(int64_t val)
+{
+ int new_fpstt;
+ new_fpstt = (env->fpstt - 1) & 7;
+ env->fpregs[new_fpstt].d = int64_to_floatx(val, &env->fp_status);
+ env->fpstt = new_fpstt;
+ env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+#ifndef VBOX
+uint32_t helper_fsts_ST0(void)
+#else
+RTCCUINTREG helper_fsts_ST0(void)
+#endif
+{
+ union {
+ float32 f;
+ uint32_t i;
+ } u;
+ u.f = floatx_to_float32(ST0, &env->fp_status);
+ return u.i;
+}
+
+uint64_t helper_fstl_ST0(void)
+{
+ union {
+ float64 f;
+ uint64_t i;
+ } u;
+ u.f = floatx_to_float64(ST0, &env->fp_status);
+ return u.i;
+}
+
+#ifndef VBOX
+int32_t helper_fist_ST0(void)
+#else
+RTCCINTREG helper_fist_ST0(void)
+#endif
+{
+ int32_t val;
+ val = floatx_to_int32(ST0, &env->fp_status);
+ if (val != (int16_t)val)
+ val = -32768;
+ return val;
+}
+
+#ifndef VBOX
+int32_t helper_fistl_ST0(void)
+#else
+RTCCINTREG helper_fistl_ST0(void)
+#endif
+{
+ int32_t val;
+ val = floatx_to_int32(ST0, &env->fp_status);
+ return val;
+}
+
+int64_t helper_fistll_ST0(void)
+{
+ int64_t val;
+ val = floatx_to_int64(ST0, &env->fp_status);
+ return val;
+}
+
+#ifndef VBOX
+int32_t helper_fistt_ST0(void)
+#else
+RTCCINTREG helper_fistt_ST0(void)
+#endif
+{
+ int32_t val;
+ val = floatx_to_int32_round_to_zero(ST0, &env->fp_status);
+ if (val != (int16_t)val)
+ val = -32768;
+ return val;
+}
+
+#ifndef VBOX
+int32_t helper_fisttl_ST0(void)
+#else
+RTCCINTREG helper_fisttl_ST0(void)
+#endif
+{
+ int32_t val;
+ val = floatx_to_int32_round_to_zero(ST0, &env->fp_status);
+ return val;
+}
+
+int64_t helper_fisttll_ST0(void)
+{
+ int64_t val;
+ val = floatx_to_int64_round_to_zero(ST0, &env->fp_status);
+ return val;
+}
+
+void helper_fldt_ST0(target_ulong ptr)
+{
+ int new_fpstt;
+ new_fpstt = (env->fpstt - 1) & 7;
+ env->fpregs[new_fpstt].d = helper_fldt(ptr);
+ env->fpstt = new_fpstt;
+ env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fstt_ST0(target_ulong ptr)
+{
+ helper_fstt(ST0, ptr);
+}
+
+void helper_fpush(void)
+{
+ fpush();
+}
+
+void helper_fpop(void)
+{
+ fpop();
+}
+
+void helper_fdecstp(void)
+{
+ env->fpstt = (env->fpstt - 1) & 7;
+ env->fpus &= (~0x4700);
+}
+
+void helper_fincstp(void)
+{
+ env->fpstt = (env->fpstt + 1) & 7;
+ env->fpus &= (~0x4700);
+}
+
+/* FPU move */
+
+void helper_ffree_STN(int st_index)
+{
+ env->fptags[(env->fpstt + st_index) & 7] = 1;
+}
+
+void helper_fmov_ST0_FT0(void)
+{
+ ST0 = FT0;
+}
+
+void helper_fmov_FT0_STN(int st_index)
+{
+ FT0 = ST(st_index);
+}
+
+void helper_fmov_ST0_STN(int st_index)
+{
+ ST0 = ST(st_index);
+}
+
+void helper_fmov_STN_ST0(int st_index)
+{
+ ST(st_index) = ST0;
+}
+
+void helper_fxchg_ST0_STN(int st_index)
+{
+ CPU86_LDouble tmp;
+ tmp = ST(st_index);
+ ST(st_index) = ST0;
+ ST0 = tmp;
+}
+
+/* FPU operations */
+
+static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
+
+void helper_fcom_ST0_FT0(void)
+{
+ int ret;
+
+ ret = floatx_compare(ST0, FT0, &env->fp_status);
+ env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
+}
+
+void helper_fucom_ST0_FT0(void)
+{
+ int ret;
+
+ ret = floatx_compare_quiet(ST0, FT0, &env->fp_status);
+ env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret+ 1];
+}
+
+static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
+
+void helper_fcomi_ST0_FT0(void)
+{
+ int eflags;
+ int ret;
+
+ ret = floatx_compare(ST0, FT0, &env->fp_status);
+ eflags = helper_cc_compute_all(CC_OP);
+ eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
+ CC_SRC = eflags;
+}
+
+void helper_fucomi_ST0_FT0(void)
+{
+ int eflags;
+ int ret;
+
+ ret = floatx_compare_quiet(ST0, FT0, &env->fp_status);
+ eflags = helper_cc_compute_all(CC_OP);
+ eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
+ CC_SRC = eflags;
+}
+
+void helper_fadd_ST0_FT0(void)
+{
+ ST0 += FT0;
+}
+
+void helper_fmul_ST0_FT0(void)
+{
+ ST0 *= FT0;
+}
+
+void helper_fsub_ST0_FT0(void)
+{
+ ST0 -= FT0;
+}
+
+void helper_fsubr_ST0_FT0(void)
+{
+ ST0 = FT0 - ST0;
+}
+
+void helper_fdiv_ST0_FT0(void)
+{
+ ST0 = helper_fdiv(ST0, FT0);
+}
+
+void helper_fdivr_ST0_FT0(void)
+{
+ ST0 = helper_fdiv(FT0, ST0);
+}
+
+/* fp operations between STN and ST0 */
+
+void helper_fadd_STN_ST0(int st_index)
+{
+ ST(st_index) += ST0;
+}
+
+void helper_fmul_STN_ST0(int st_index)
+{
+ ST(st_index) *= ST0;
+}
+
+void helper_fsub_STN_ST0(int st_index)
+{
+ ST(st_index) -= ST0;
+}
+
+void helper_fsubr_STN_ST0(int st_index)
+{
+ CPU86_LDouble *p;
+ p = &ST(st_index);
+ *p = ST0 - *p;
+}
+
+void helper_fdiv_STN_ST0(int st_index)
+{
+ CPU86_LDouble *p;
+ p = &ST(st_index);
+ *p = helper_fdiv(*p, ST0);
+}
+
+void helper_fdivr_STN_ST0(int st_index)
+{
+ CPU86_LDouble *p;
+ p = &ST(st_index);
+ *p = helper_fdiv(ST0, *p);
+}
+
+/* misc FPU operations */
+void helper_fchs_ST0(void)
+{
+ ST0 = floatx_chs(ST0);
+}
+
+void helper_fabs_ST0(void)
+{
+ ST0 = floatx_abs(ST0);
+}
+
+void helper_fld1_ST0(void)
+{
+ ST0 = f15rk[1];
+}
+
+void helper_fldl2t_ST0(void)
+{
+ ST0 = f15rk[6];
+}
+
+void helper_fldl2e_ST0(void)
+{
+ ST0 = f15rk[5];
+}
+
+void helper_fldpi_ST0(void)
+{
+ ST0 = f15rk[2];
+}
+
+void helper_fldlg2_ST0(void)
+{
+ ST0 = f15rk[3];
+}
+
+void helper_fldln2_ST0(void)
+{
+ ST0 = f15rk[4];
+}
+
+void helper_fldz_ST0(void)
+{
+ ST0 = f15rk[0];
+}
+
+void helper_fldz_FT0(void)
+{
+ FT0 = f15rk[0];
+}
+
+#ifndef VBOX
+uint32_t helper_fnstsw(void)
+#else
+RTCCUINTREG helper_fnstsw(void)
+#endif
+{
+ return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+}
+
+#ifndef VBOX
+uint32_t helper_fnstcw(void)
+#else
+RTCCUINTREG helper_fnstcw(void)
+#endif
+{
+ return env->fpuc;
+}
+
+static void update_fp_status(void)
+{
+ int rnd_type;
+
+ /* set rounding mode */
+ switch(env->fpuc & RC_MASK) {
+ default:
+ case RC_NEAR:
+ rnd_type = float_round_nearest_even;
+ break;
+ case RC_DOWN:
+ rnd_type = float_round_down;
+ break;
+ case RC_UP:
+ rnd_type = float_round_up;
+ break;
+ case RC_CHOP:
+ rnd_type = float_round_to_zero;
+ break;
+ }
+ set_float_rounding_mode(rnd_type, &env->fp_status);
+#ifdef FLOATX80
+ switch((env->fpuc >> 8) & 3) {
+ case 0:
+ rnd_type = 32;
+ break;
+ case 2:
+ rnd_type = 64;
+ break;
+ case 3:
+ default:
+ rnd_type = 80;
+ break;
+ }
+ set_floatx80_rounding_precision(rnd_type, &env->fp_status);
+#endif
+}
+
+void helper_fldcw(uint32_t val)
+{
+ env->fpuc = val;
+ update_fp_status();
+}
+
+void helper_fclex(void)
+{
+ env->fpus &= 0x7f00;
+}
+
+void helper_fwait(void)
+{
+ if (env->fpus & FPUS_SE)
+ fpu_raise_exception();
+}
+
+void helper_fninit(void)
+{
+ env->fpus = 0;
+ env->fpstt = 0;
+ env->fpuc = 0x37f;
+ env->fptags[0] = 1;
+ env->fptags[1] = 1;
+ env->fptags[2] = 1;
+ env->fptags[3] = 1;
+ env->fptags[4] = 1;
+ env->fptags[5] = 1;
+ env->fptags[6] = 1;
+ env->fptags[7] = 1;
+}
+
+/* BCD ops */
+
+void helper_fbld_ST0(target_ulong ptr)
+{
+ CPU86_LDouble tmp;
+ uint64_t val;
+ unsigned int v;
+ int i;
+
+ val = 0;
+ for(i = 8; i >= 0; i--) {
+ v = ldub(ptr + i);
+ val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
+ }
+ tmp = val;
+ if (ldub(ptr + 9) & 0x80)
+ tmp = -tmp;
+ fpush();
+ ST0 = tmp;
+}
+
+void helper_fbst_ST0(target_ulong ptr)
+{
+ int v;
+ target_ulong mem_ref, mem_end;
+ int64_t val;
+
+ val = floatx_to_int64(ST0, &env->fp_status);
+ mem_ref = ptr;
+ mem_end = mem_ref + 9;
+ if (val < 0) {
+ stb(mem_end, 0x80);
+ val = -val;
+ } else {
+ stb(mem_end, 0x00);
+ }
+ while (mem_ref < mem_end) {
+ if (val == 0)
+ break;
+ v = val % 100;
+ val = val / 100;
+ v = ((v / 10) << 4) | (v % 10);
+ stb(mem_ref++, v);
+ }
+ while (mem_ref < mem_end) {
+ stb(mem_ref++, 0);
+ }
+}
+
+void helper_f2xm1(void)
+{
+ ST0 = pow(2.0,ST0) - 1.0;
+}
+
+void helper_fyl2x(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if (fptemp>0.0){
+ fptemp = log(fptemp)/log(2.0); /* log2(ST) */
+ ST1 *= fptemp;
+ fpop();
+ } else {
+ env->fpus &= (~0x4700);
+ env->fpus |= 0x400;
+ }
+}
+
+void helper_fptan(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+ env->fpus |= 0x400;
+ } else {
+ ST0 = tan(fptemp);
+ fpush();
+ ST0 = 1.0;
+ env->fpus &= (~0x400); /* C2 <-- 0 */
+ /* the above code is for |arg| < 2**52 only */
+ }
+}
+
+void helper_fpatan(void)
+{
+ CPU86_LDouble fptemp, fpsrcop;
+
+ fpsrcop = ST1;
+ fptemp = ST0;
+ ST1 = atan2(fpsrcop,fptemp);
+ fpop();
+}
+
+void helper_fxtract(void)
+{
+ CPU86_LDoubleU temp;
+ unsigned int expdif;
+
+ temp.d = ST0;
+ expdif = EXPD(temp) - EXPBIAS;
+ /*DP exponent bias*/
+ ST0 = expdif;
+ fpush();
+ BIASEXPONENT(temp);
+ ST0 = temp.d;
+}
+
+void helper_fprem1(void)
+{
+ CPU86_LDouble dblq, fpsrcop, fptemp;
+ CPU86_LDoubleU fpsrcop1, fptemp1;
+ int expdif;
+ signed long long int q;
+
+#ifndef VBOX /* Unfortunately, we cannot handle isinf/isnan easily in wrapper */
+ if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) {
+#else
+ if ((ST0 != ST0) || (ST1 != ST1) || (ST1 == 0.0)) {
+#endif
+ ST0 = 0.0 / 0.0; /* NaN */
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ return;
+ }
+
+ fpsrcop = ST0;
+ fptemp = ST1;
+ fpsrcop1.d = fpsrcop;
+ fptemp1.d = fptemp;
+ expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+
+ if (expdif < 0) {
+ /* optimisation? taken from the AMD docs */
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ /* ST0 is unchanged */
+ return;
+ }
+
+ if (expdif < 53) {
+ dblq = fpsrcop / fptemp;
+ /* round dblq towards nearest integer */
+ dblq = rint(dblq);
+ ST0 = fpsrcop - fptemp * dblq;
+
+ /* convert dblq to q by truncating towards zero */
+ if (dblq < 0.0)
+ q = (signed long long int)(-dblq);
+ else
+ q = (signed long long int)dblq;
+
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ /* (C0,C3,C1) <-- (q2,q1,q0) */
+ env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
+ env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
+ env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
+ } else {
+ env->fpus |= 0x400; /* C2 <-- 1 */
+ fptemp = pow(2.0, expdif - 50);
+ fpsrcop = (ST0 / ST1) / fptemp;
+ /* fpsrcop = integer obtained by chopping */
+ fpsrcop = (fpsrcop < 0.0) ?
+ -(floor(fabs(fpsrcop))) : floor(fpsrcop);
+ ST0 -= (ST1 * fpsrcop * fptemp);
+ }
+}
+
+void helper_fprem(void)
+{
+ CPU86_LDouble dblq, fpsrcop, fptemp;
+ CPU86_LDoubleU fpsrcop1, fptemp1;
+ int expdif;
+ signed long long int q;
+
+#ifndef VBOX /* Unfortunately, we cannot easily handle isinf/isnan in wrapper */
+ if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) {
+#else
+ if ((ST0 != ST0) || (ST1 != ST1) || (ST1 == 0.0)) {
+#endif
+ ST0 = 0.0 / 0.0; /* NaN */
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ return;
+ }
+
+ fpsrcop = (CPU86_LDouble)ST0;
+ fptemp = (CPU86_LDouble)ST1;
+ fpsrcop1.d = fpsrcop;
+ fptemp1.d = fptemp;
+ expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+
+ if (expdif < 0) {
+ /* optimisation? taken from the AMD docs */
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ /* ST0 is unchanged */
+ return;
+ }
+
+ if ( expdif < 53 ) {
+ dblq = fpsrcop/*ST0*/ / fptemp/*ST1*/;
+ /* round dblq towards zero */
+ dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
+ ST0 = fpsrcop/*ST0*/ - fptemp * dblq;
+
+ /* convert dblq to q by truncating towards zero */
+ if (dblq < 0.0)
+ q = (signed long long int)(-dblq);
+ else
+ q = (signed long long int)dblq;
+
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ /* (C0,C3,C1) <-- (q2,q1,q0) */
+ env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
+ env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
+ env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
+ } else {
+ int N = 32 + (expdif % 32); /* as per AMD docs */
+ env->fpus |= 0x400; /* C2 <-- 1 */
+ fptemp = pow(2.0, (double)(expdif - N));
+ fpsrcop = (ST0 / ST1) / fptemp;
+ /* fpsrcop = integer obtained by chopping */
+ fpsrcop = (fpsrcop < 0.0) ?
+ -(floor(fabs(fpsrcop))) : floor(fpsrcop);
+ ST0 -= (ST1 * fpsrcop * fptemp);
+ }
+}
+
+void helper_fyl2xp1(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if ((fptemp+1.0)>0.0) {
+ fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */
+ ST1 *= fptemp;
+ fpop();
+ } else {
+ env->fpus &= (~0x4700);
+ env->fpus |= 0x400;
+ }
+}
+
+void helper_fsqrt(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if (fptemp<0.0) {
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ env->fpus |= 0x400;
+ }
+ ST0 = sqrt(fptemp);
+}
+
+void helper_fsincos(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+ env->fpus |= 0x400;
+ } else {
+ ST0 = sin(fptemp);
+ fpush();
+ ST0 = cos(fptemp);
+ env->fpus &= (~0x400); /* C2 <-- 0 */
+ /* the above code is for |arg| < 2**63 only */
+ }
+}
+
+void helper_frndint(void)
+{
+ ST0 = floatx_round_to_int(ST0, &env->fp_status);
+}
+
+void helper_fscale(void)
+{
+ ST0 = ldexp (ST0, (int)(ST1));
+}
+
+void helper_fsin(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+ env->fpus |= 0x400;
+ } else {
+ ST0 = sin(fptemp);
+ env->fpus &= (~0x400); /* C2 <-- 0 */
+ /* the above code is for |arg| < 2**53 only */
+ }
+}
+
+void helper_fcos(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+ env->fpus |= 0x400;
+ } else {
+ ST0 = cos(fptemp);
+ env->fpus &= (~0x400); /* C2 <-- 0 */
+ /* the above code is for |arg5 < 2**63 only */
+ }
+}
+
+void helper_fxam_ST0(void)
+{
+ CPU86_LDoubleU temp;
+ int expdif;
+
+ temp.d = ST0;
+
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ if (SIGND(temp))
+ env->fpus |= 0x200; /* C1 <-- 1 */
+
+ /* XXX: test fptags too */
+ expdif = EXPD(temp);
+ if (expdif == MAXEXPD) {
+#ifdef USE_X86LDOUBLE
+ if (MANTD(temp) == 0x8000000000000000ULL)
+#else
+ if (MANTD(temp) == 0)
+#endif
+ env->fpus |= 0x500 /*Infinity*/;
+ else
+ env->fpus |= 0x100 /*NaN*/;
+ } else if (expdif == 0) {
+ if (MANTD(temp) == 0)
+ env->fpus |= 0x4000 /*Zero*/;
+ else
+ env->fpus |= 0x4400 /*Denormal*/;
+ } else {
+ env->fpus |= 0x400;
+ }
+}
+
+void helper_fstenv(target_ulong ptr, int data32)
+{
+ int fpus, fptag, exp, i;
+ uint64_t mant;
+ CPU86_LDoubleU tmp;
+
+ fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+ fptag = 0;
+ for (i=7; i>=0; i--) {
+ fptag <<= 2;
+ if (env->fptags[i]) {
+ fptag |= 3;
+ } else {
+ tmp.d = env->fpregs[i].d;
+ exp = EXPD(tmp);
+ mant = MANTD(tmp);
+ if (exp == 0 && mant == 0) {
+ /* zero */
+ fptag |= 1;
+ } else if (exp == 0 || exp == MAXEXPD
+#ifdef USE_X86LDOUBLE
+ || (mant & (1LL << 63)) == 0
+#endif
+ ) {
+ /* NaNs, infinity, denormal */
+ fptag |= 2;
+ }
+ }
+ }
+ if (data32) {
+ /* 32 bit */
+ stl(ptr, env->fpuc);
+ stl(ptr + 4, fpus);
+ stl(ptr + 8, fptag);
+ stl(ptr + 12, 0); /* fpip */
+ stl(ptr + 16, 0); /* fpcs */
+ stl(ptr + 20, 0); /* fpoo */
+ stl(ptr + 24, 0); /* fpos */
+ } else {
+ /* 16 bit */
+ stw(ptr, env->fpuc);
+ stw(ptr + 2, fpus);
+ stw(ptr + 4, fptag);
+ stw(ptr + 6, 0);
+ stw(ptr + 8, 0);
+ stw(ptr + 10, 0);
+ stw(ptr + 12, 0);
+ }
+}
+
+void helper_fldenv(target_ulong ptr, int data32)
+{
+ int i, fpus, fptag;
+
+ if (data32) {
+ env->fpuc = lduw(ptr);
+ fpus = lduw(ptr + 4);
+ fptag = lduw(ptr + 8);
+ }
+ else {
+ env->fpuc = lduw(ptr);
+ fpus = lduw(ptr + 2);
+ fptag = lduw(ptr + 4);
+ }
+ env->fpstt = (fpus >> 11) & 7;
+ env->fpus = fpus & ~0x3800;
+ for(i = 0;i < 8; i++) {
+ env->fptags[i] = ((fptag & 3) == 3);
+ fptag >>= 2;
+ }
+}
+
+void helper_fsave(target_ulong ptr, int data32)
+{
+ CPU86_LDouble tmp;
+ int i;
+
+ helper_fstenv(ptr, data32);
+
+ ptr += (14 << data32);
+ for(i = 0;i < 8; i++) {
+ tmp = ST(i);
+ helper_fstt(tmp, ptr);
+ ptr += 10;
+ }
+
+ /* fninit */
+ env->fpus = 0;
+ env->fpstt = 0;
+ env->fpuc = 0x37f;
+ env->fptags[0] = 1;
+ env->fptags[1] = 1;
+ env->fptags[2] = 1;
+ env->fptags[3] = 1;
+ env->fptags[4] = 1;
+ env->fptags[5] = 1;
+ env->fptags[6] = 1;
+ env->fptags[7] = 1;
+}
+
+void helper_frstor(target_ulong ptr, int data32)
+{
+ CPU86_LDouble tmp;
+ int i;
+
+ helper_fldenv(ptr, data32);
+ ptr += (14 << data32);
+
+ for(i = 0;i < 8; i++) {
+ tmp = helper_fldt(ptr);
+ ST(i) = tmp;
+ ptr += 10;
+ }
+}
+
+void helper_fxsave(target_ulong ptr, int data64)
+{
+ int fpus, fptag, i, nb_xmm_regs;
+ CPU86_LDouble tmp;
+ target_ulong addr;
+
+ /* The operand must be 16 byte aligned */
+ if (ptr & 0xf) {
+ raise_exception(EXCP0D_GPF);
+ }
+
+ fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+ fptag = 0;
+ for(i = 0; i < 8; i++) {
+ fptag |= (env->fptags[i] << i);
+ }
+ stw(ptr, env->fpuc);
+ stw(ptr + 2, fpus);
+ stw(ptr + 4, fptag ^ 0xff);
+#ifdef TARGET_X86_64
+ if (data64) {
+ stq(ptr + 0x08, 0); /* rip */
+ stq(ptr + 0x10, 0); /* rdp */
+ } else
+#endif
+ {
+ stl(ptr + 0x08, 0); /* eip */
+ stl(ptr + 0x0c, 0); /* sel */
+ stl(ptr + 0x10, 0); /* dp */
+ stl(ptr + 0x14, 0); /* sel */
+ }
+
+ addr = ptr + 0x20;
+ for(i = 0;i < 8; i++) {
+ tmp = ST(i);
+ helper_fstt(tmp, addr);
+ addr += 16;
+ }
+
+ if (env->cr[4] & CR4_OSFXSR_MASK) {
+ /* XXX: finish it */
+ stl(ptr + 0x18, env->mxcsr); /* mxcsr */
+ stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
+ if (env->hflags & HF_CS64_MASK)
+ nb_xmm_regs = 16;
+ else
+ nb_xmm_regs = 8;
+ addr = ptr + 0xa0;
+ /* Fast FXSAVE leaves out the XMM registers */
+ if (!(env->efer & MSR_EFER_FFXSR)
+ || (env->hflags & HF_CPL_MASK)
+ || !(env->hflags & HF_LMA_MASK)) {
+ for(i = 0; i < nb_xmm_regs; i++) {
+ stq(addr, env->xmm_regs[i].XMM_Q(0));
+ stq(addr + 8, env->xmm_regs[i].XMM_Q(1));
+ addr += 16;
+ }
+ }
+ }
+}
+
+void helper_fxrstor(target_ulong ptr, int data64)
+{
+ int i, fpus, fptag, nb_xmm_regs;
+ CPU86_LDouble tmp;
+ target_ulong addr;
+
+ /* The operand must be 16 byte aligned */
+ if (ptr & 0xf) {
+ raise_exception(EXCP0D_GPF);
+ }
+
+ env->fpuc = lduw(ptr);
+ fpus = lduw(ptr + 2);
+ fptag = lduw(ptr + 4);
+ env->fpstt = (fpus >> 11) & 7;
+ env->fpus = fpus & ~0x3800;
+ fptag ^= 0xff;
+ for(i = 0;i < 8; i++) {
+ env->fptags[i] = ((fptag >> i) & 1);
+ }
+
+ addr = ptr + 0x20;
+ for(i = 0;i < 8; i++) {
+ tmp = helper_fldt(addr);
+ ST(i) = tmp;
+ addr += 16;
+ }
+
+ if (env->cr[4] & CR4_OSFXSR_MASK) {
+ /* XXX: finish it */
+ env->mxcsr = ldl(ptr + 0x18);
+ //ldl(ptr + 0x1c);
+ if (env->hflags & HF_CS64_MASK)
+ nb_xmm_regs = 16;
+ else
+ nb_xmm_regs = 8;
+ addr = ptr + 0xa0;
+ /* Fast FXRESTORE leaves out the XMM registers */
+ if (!(env->efer & MSR_EFER_FFXSR)
+ || (env->hflags & HF_CPL_MASK)
+ || !(env->hflags & HF_LMA_MASK)) {
+ for(i = 0; i < nb_xmm_regs; i++) {
+#if !defined(VBOX) || __GNUC__ < 4
+ env->xmm_regs[i].XMM_Q(0) = ldq(addr);
+ env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
+#else /* VBOX + __GNUC__ >= 4: gcc 4.x compiler bug - it runs out of registers for the 64-bit value. */
+# if 1
+ env->xmm_regs[i].XMM_L(0) = ldl(addr);
+ env->xmm_regs[i].XMM_L(1) = ldl(addr + 4);
+ env->xmm_regs[i].XMM_L(2) = ldl(addr + 8);
+ env->xmm_regs[i].XMM_L(3) = ldl(addr + 12);
+# else
+ /* this works fine on Mac OS X, gcc 4.0.1 */
+ uint64_t u64 = ldq(addr);
+ env->xmm_regs[i].XMM_Q(0);
+ u64 = ldq(addr + 4);
+ env->xmm_regs[i].XMM_Q(1) = u64;
+# endif
+#endif
+ addr += 16;
+ }
+ }
+ }
+}
+
+#ifndef USE_X86LDOUBLE
+
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f)
+{
+ CPU86_LDoubleU temp;
+ int e;
+
+ temp.d = f;
+ /* mantissa */
+ *pmant = (MANTD(temp) << 11) | (1LL << 63);
+ /* exponent + sign */
+ e = EXPD(temp) - EXPBIAS + 16383;
+ e |= SIGND(temp) >> 16;
+ *pexp = e;
+}
+
+CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper)
+{
+ CPU86_LDoubleU temp;
+ int e;
+ uint64_t ll;
+
+ /* XXX: handle overflow ? */
+ e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */
+ e |= (upper >> 4) & 0x800; /* sign */
+ ll = (mant >> 11) & ((1LL << 52) - 1);
+#ifdef __arm__
+ temp.l.upper = (e << 20) | (ll >> 32);
+ temp.l.lower = ll;
+#else
+ temp.ll = ll | ((uint64_t)e << 52);
+#endif
+ return temp.d;
+}
+
+#else
+
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f)
+{
+ CPU86_LDoubleU temp;
+
+ temp.d = f;
+ *pmant = temp.l.lower;
+ *pexp = temp.l.upper;
+}
+
+CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper)
+{
+ CPU86_LDoubleU temp;
+
+ temp.l.upper = upper;
+ temp.l.lower = mant;
+ return temp.d;
+}
+#endif
+
+#ifdef TARGET_X86_64
+
+//#define DEBUG_MULDIV
+
+static void add128(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
+{
+ *plow += a;
+ /* carry test */
+ if (*plow < a)
+ (*phigh)++;
+ *phigh += b;
+}
+
+static void neg128(uint64_t *plow, uint64_t *phigh)
+{
+ *plow = ~ *plow;
+ *phigh = ~ *phigh;
+ add128(plow, phigh, 1, 0);
+}
+
+/* return TRUE if overflow */
+static int div64(uint64_t *plow, uint64_t *phigh, uint64_t b)
+{
+ uint64_t q, r, a1, a0;
+ int i, qb, ab;
+
+ a0 = *plow;
+ a1 = *phigh;
+ if (a1 == 0) {
+ q = a0 / b;
+ r = a0 % b;
+ *plow = q;
+ *phigh = r;
+ } else {
+ if (a1 >= b)
+ return 1;
+ /* XXX: use a better algorithm */
+ for(i = 0; i < 64; i++) {
+ ab = a1 >> 63;
+ a1 = (a1 << 1) | (a0 >> 63);
+ if (ab || a1 >= b) {
+ a1 -= b;
+ qb = 1;
+ } else {
+ qb = 0;
+ }
+ a0 = (a0 << 1) | qb;
+ }
+#if defined(DEBUG_MULDIV)
+ printf("div: 0x%016" PRIx64 "%016" PRIx64 " / 0x%016" PRIx64 ": q=0x%016" PRIx64 " r=0x%016" PRIx64 "\n",
+ *phigh, *plow, b, a0, a1);
+#endif
+ *plow = a0;
+ *phigh = a1;
+ }
+ return 0;
+}
+
+/* return TRUE if overflow */
+static int idiv64(uint64_t *plow, uint64_t *phigh, int64_t b)
+{
+ int sa, sb;
+ sa = ((int64_t)*phigh < 0);
+ if (sa)
+ neg128(plow, phigh);
+ sb = (b < 0);
+ if (sb)
+ b = -b;
+ if (div64(plow, phigh, b) != 0)
+ return 1;
+ if (sa ^ sb) {
+ if (*plow > (1ULL << 63))
+ return 1;
+ *plow = - *plow;
+ } else {
+ if (*plow >= (1ULL << 63))
+ return 1;
+ }
+ if (sa)
+ *phigh = - *phigh;
+ return 0;
+}
+
+void helper_mulq_EAX_T0(target_ulong t0)
+{
+ uint64_t r0, r1;
+
+ mulu64(&r0, &r1, EAX, t0);
+ EAX = r0;
+ EDX = r1;
+ CC_DST = r0;
+ CC_SRC = r1;
+}
+
+void helper_imulq_EAX_T0(target_ulong t0)
+{
+ uint64_t r0, r1;
+
+ muls64(&r0, &r1, EAX, t0);
+ EAX = r0;
+ EDX = r1;
+ CC_DST = r0;
+ CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
+}
+
+target_ulong helper_imulq_T0_T1(target_ulong t0, target_ulong t1)
+{
+ uint64_t r0, r1;
+
+ muls64(&r0, &r1, t0, t1);
+ CC_DST = r0;
+ CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
+ return r0;
+}
+
+void helper_divq_EAX(target_ulong t0)
+{
+ uint64_t r0, r1;
+ if (t0 == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ r0 = EAX;
+ r1 = EDX;
+ if (div64(&r0, &r1, t0))
+ raise_exception(EXCP00_DIVZ);
+ EAX = r0;
+ EDX = r1;
+}
+
+void helper_idivq_EAX(target_ulong t0)
+{
+ uint64_t r0, r1;
+ if (t0 == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ r0 = EAX;
+ r1 = EDX;
+ if (idiv64(&r0, &r1, t0))
+ raise_exception(EXCP00_DIVZ);
+ EAX = r0;
+ EDX = r1;
+}
+#endif
+
+static void do_hlt(void)
+{
+ env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
+ env->halted = 1;
+ env->exception_index = EXCP_HLT;
+ cpu_loop_exit();
+}
+
+void helper_hlt(int next_eip_addend)
+{
+ helper_svm_check_intercept_param(SVM_EXIT_HLT, 0);
+ EIP += next_eip_addend;
+
+ do_hlt();
+}
+
+void helper_monitor(target_ulong ptr)
+{
+#ifdef VBOX
+ if ((uint32_t)ECX > 1)
+ raise_exception(EXCP0D_GPF);
+#else /* !VBOX */
+ if ((uint32_t)ECX != 0)
+ raise_exception(EXCP0D_GPF);
+#endif /* !VBOX */
+ /* XXX: store address ? */
+ helper_svm_check_intercept_param(SVM_EXIT_MONITOR, 0);
+}
+
+void helper_mwait(int next_eip_addend)
+{
+ if ((uint32_t)ECX != 0)
+ raise_exception(EXCP0D_GPF);
+#ifdef VBOX
+ helper_hlt(next_eip_addend);
+#else /* !VBOX */
+ helper_svm_check_intercept_param(SVM_EXIT_MWAIT, 0);
+ EIP += next_eip_addend;
+
+ /* XXX: not complete but not completely erroneous */
+ if (env->cpu_index != 0 || env->next_cpu != NULL) {
+ /* more than one CPU: do not sleep because another CPU may
+ wake this one */
+ } else {
+ do_hlt();
+ }
+#endif /* !VBOX */
+}
+
+void helper_debug(void)
+{
+ env->exception_index = EXCP_DEBUG;
+ cpu_loop_exit();
+}
+
+void helper_reset_rf(void)
+{
+ env->eflags &= ~RF_MASK;
+}
+
+void helper_raise_interrupt(int intno, int next_eip_addend)
+{
+ raise_interrupt(intno, 1, 0, next_eip_addend);
+}
+
+void helper_raise_exception(int exception_index)
+{
+ raise_exception(exception_index);
+}
+
+void helper_cli(void)
+{
+ env->eflags &= ~IF_MASK;
+}
+
+void helper_sti(void)
+{
+ env->eflags |= IF_MASK;
+}
+
+#ifdef VBOX
+void helper_cli_vme(void)
+{
+ env->eflags &= ~VIF_MASK;
+}
+
+void helper_sti_vme(void)
+{
+ /* First check, then change eflags according to the AMD manual */
+ if (env->eflags & VIP_MASK) {
+ raise_exception(EXCP0D_GPF);
+ }
+ env->eflags |= VIF_MASK;
+}
+#endif /* VBOX */
+
+#if 0
+/* vm86plus instructions */
+void helper_cli_vm(void)
+{
+ env->eflags &= ~VIF_MASK;
+}
+
+void helper_sti_vm(void)
+{
+ env->eflags |= VIF_MASK;
+ if (env->eflags & VIP_MASK) {
+ raise_exception(EXCP0D_GPF);
+ }
+}
+#endif
+
+void helper_set_inhibit_irq(void)
+{
+ env->hflags |= HF_INHIBIT_IRQ_MASK;
+}
+
+void helper_reset_inhibit_irq(void)
+{
+ env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+}
+
+void helper_boundw(target_ulong a0, int v)
+{
+ int low, high;
+ low = ldsw(a0);
+ high = ldsw(a0 + 2);
+ v = (int16_t)v;
+ if (v < low || v > high) {
+ raise_exception(EXCP05_BOUND);
+ }
+}
+
+void helper_boundl(target_ulong a0, int v)
+{
+ int low, high;
+ low = ldl(a0);
+ high = ldl(a0 + 4);
+ if (v < low || v > high) {
+ raise_exception(EXCP05_BOUND);
+ }
+}
+
+static float approx_rsqrt(float a)
+{
+ return 1.0 / sqrt(a);
+}
+
+static float approx_rcp(float a)
+{
+ return 1.0 / a;
+}
+
+#if !defined(CONFIG_USER_ONLY)
+
+#define MMUSUFFIX _mmu
+
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+#endif
+
+#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+/* This code assumes real physical address always fit into host CPU reg,
+ which is wrong in general, but true for our current use cases. */
+RTCCUINTREG REGPARM __ldb_vbox_phys(RTCCUINTREG addr)
+{
+ return remR3PhysReadS8(addr);
+}
+RTCCUINTREG REGPARM __ldub_vbox_phys(RTCCUINTREG addr)
+{
+ return remR3PhysReadU8(addr);
+}
+void REGPARM __stb_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val)
+{
+ remR3PhysWriteU8(addr, val);
+}
+RTCCUINTREG REGPARM __ldw_vbox_phys(RTCCUINTREG addr)
+{
+ return remR3PhysReadS16(addr);
+}
+RTCCUINTREG REGPARM __lduw_vbox_phys(RTCCUINTREG addr)
+{
+ return remR3PhysReadU16(addr);
+}
+void REGPARM __stw_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val)
+{
+ remR3PhysWriteU16(addr, val);
+}
+RTCCUINTREG REGPARM __ldl_vbox_phys(RTCCUINTREG addr)
+{
+ return remR3PhysReadS32(addr);
+}
+RTCCUINTREG REGPARM __ldul_vbox_phys(RTCCUINTREG addr)
+{
+ return remR3PhysReadU32(addr);
+}
+void REGPARM __stl_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val)
+{
+ remR3PhysWriteU32(addr, val);
+}
+uint64_t REGPARM __ldq_vbox_phys(RTCCUINTREG addr)
+{
+ return remR3PhysReadU64(addr);
+}
+void REGPARM __stq_vbox_phys(RTCCUINTREG addr, uint64_t val)
+{
+ remR3PhysWriteU64(addr, val);
+}
+#endif /* VBOX */
+
+#if !defined(CONFIG_USER_ONLY)
+/* try to fill the TLB and return an exception if error. If retaddr is
+ NULL, it means that the function was called in C code (i.e. not
+ from generated code or from helper.c) */
+/* XXX: fix it to restore all registers */
+void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr)
+{
+ TranslationBlock *tb;
+ int ret;
+ uintptr_t pc;
+ CPUX86State *saved_env;
+
+ /* XXX: hack to restore env in all cases, even if not called from
+ generated code */
+ saved_env = env;
+ env = cpu_single_env;
+
+ ret = cpu_x86_handle_mmu_fault(env, addr, is_write, mmu_idx, 1);
+ if (ret) {
+ if (retaddr) {
+ /* now we have a real cpu fault */
+ pc = (uintptr_t)retaddr;
+ tb = tb_find_pc(pc);
+ if (tb) {
+ /* the PC is inside the translated code. It means that we have
+ a virtual CPU fault */
+ cpu_restore_state(tb, env, pc, NULL);
+ }
+ }
+ raise_exception_err(env->exception_index, env->error_code);
+ }
+ env = saved_env;
+}
+#endif
+
+#ifdef VBOX
+
+/**
+ * Correctly computes the eflags.
+ * @returns eflags.
+ * @param env1 CPU environment.
+ */
+uint32_t raw_compute_eflags(CPUX86State *env1)
+{
+ CPUX86State *savedenv = env;
+ uint32_t efl;
+ env = env1;
+ efl = compute_eflags();
+ env = savedenv;
+ return efl;
+}
+
+/**
+ * Reads byte from virtual address in guest memory area.
+ * XXX: is it working for any addresses? swapped out pages?
+ * @returns read data byte.
+ * @param env1 CPU environment.
+ * @param pvAddr GC Virtual address.
+ */
+uint8_t read_byte(CPUX86State *env1, target_ulong addr)
+{
+ CPUX86State *savedenv = env;
+ uint8_t u8;
+ env = env1;
+ u8 = ldub_kernel(addr);
+ env = savedenv;
+ return u8;
+}
+
+/**
+ * Reads byte from virtual address in guest memory area.
+ * XXX: is it working for any addresses? swapped out pages?
+ * @returns read data byte.
+ * @param env1 CPU environment.
+ * @param pvAddr GC Virtual address.
+ */
+uint16_t read_word(CPUX86State *env1, target_ulong addr)
+{
+ CPUX86State *savedenv = env;
+ uint16_t u16;
+ env = env1;
+ u16 = lduw_kernel(addr);
+ env = savedenv;
+ return u16;
+}
+
+/**
+ * Reads byte from virtual address in guest memory area.
+ * XXX: is it working for any addresses? swapped out pages?
+ * @returns read data byte.
+ * @param env1 CPU environment.
+ * @param pvAddr GC Virtual address.
+ */
+uint32_t read_dword(CPUX86State *env1, target_ulong addr)
+{
+ CPUX86State *savedenv = env;
+ uint32_t u32;
+ env = env1;
+ u32 = ldl_kernel(addr);
+ env = savedenv;
+ return u32;
+}
+
+/**
+ * Writes byte to virtual address in guest memory area.
+ * XXX: is it working for any addresses? swapped out pages?
+ * @returns read data byte.
+ * @param env1 CPU environment.
+ * @param pvAddr GC Virtual address.
+ * @param val byte value
+ */
+void write_byte(CPUX86State *env1, target_ulong addr, uint8_t val)
+{
+ CPUX86State *savedenv = env;
+ env = env1;
+ stb(addr, val);
+ env = savedenv;
+}
+
+void write_word(CPUX86State *env1, target_ulong addr, uint16_t val)
+{
+ CPUX86State *savedenv = env;
+ env = env1;
+ stw(addr, val);
+ env = savedenv;
+}
+
+void write_dword(CPUX86State *env1, target_ulong addr, uint32_t val)
+{
+ CPUX86State *savedenv = env;
+ env = env1;
+ stl(addr, val);
+ env = savedenv;
+}
+
+/**
+ * Correctly loads selector into segment register with updating internal
+ * qemu data/caches.
+ * @param env1 CPU environment.
+ * @param seg_reg Segment register.
+ * @param selector Selector to load.
+ */
+void sync_seg(CPUX86State *env1, int seg_reg, int selector)
+{
+ CPUX86State *savedenv = env;
+#ifdef FORCE_SEGMENT_SYNC
+ jmp_buf old_buf;
+#endif
+
+ env = env1;
+
+ if ( env->eflags & X86_EFL_VM
+ || !(env->cr[0] & X86_CR0_PE))
+ {
+ load_seg_vm(seg_reg, selector);
+
+ env = savedenv;
+
+ /* Successful sync. */
+ Assert(env1->segs[seg_reg].newselector == 0);
+ }
+ else
+ {
+ /* For some reasons, it works even w/o save/restore of the jump buffer, so as code is
+ time critical - let's not do that */
+#ifdef FORCE_SEGMENT_SYNC
+ memcpy(&old_buf, &env1->jmp_env, sizeof(old_buf));
+#endif
+ if (setjmp(env1->jmp_env) == 0)
+ {
+ if (seg_reg == R_CS)
+ {
+ uint32_t e1, e2;
+ e1 = e2 = 0;
+ load_segment(&e1, &e2, selector);
+ cpu_x86_load_seg_cache(env, R_CS, selector,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+ }
+ else
+ helper_load_seg(seg_reg, selector);
+ /* We used to use tss_load_seg(seg_reg, selector); which, for some reasons ignored
+ loading 0 selectors, what, in order, lead to subtle problems like #3588 */
+
+ env = savedenv;
+
+ /* Successful sync. */
+ Assert(env1->segs[seg_reg].newselector == 0);
+ }
+ else
+ {
+ env = savedenv;
+
+ /* Postpone sync until the guest uses the selector. */
+ env1->segs[seg_reg].selector = selector; /* hidden values are now incorrect, but will be resynced when this register is accessed. */
+ env1->segs[seg_reg].newselector = selector;
+ Log(("sync_seg: out of sync seg_reg=%d selector=%#x\n", seg_reg, selector));
+ env1->exception_index = -1;
+ env1->error_code = 0;
+ env1->old_exception = -1;
+ }
+#ifdef FORCE_SEGMENT_SYNC
+ memcpy(&env1->jmp_env, &old_buf, sizeof(old_buf));
+#endif
+ }
+
+}
+
+DECLINLINE(void) tb_reset_jump(TranslationBlock *tb, int n)
+{
+ tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
+}
+
+
+int emulate_single_instr(CPUX86State *env1)
+{
+ TranslationBlock *tb;
+ TranslationBlock *current;
+ int flags;
+ uint8_t *tc_ptr;
+ target_ulong old_eip;
+
+ /* ensures env is loaded! */
+ CPUX86State *savedenv = env;
+ env = env1;
+
+ RAWEx_ProfileStart(env, STATS_EMULATE_SINGLE_INSTR);
+
+ current = env->current_tb;
+ env->current_tb = NULL;
+ flags = env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK));
+
+ /*
+ * Translate only one instruction.
+ */
+ ASMAtomicOrU32(&env->state, CPU_EMULATE_SINGLE_INSTR);
+ tb = tb_gen_code(env, env->eip + env->segs[R_CS].base,
+ env->segs[R_CS].base, flags, 0);
+
+ ASMAtomicAndU32(&env->state, ~CPU_EMULATE_SINGLE_INSTR);
+
+
+ /* tb_link_phys: */
+ tb->jmp_first = (TranslationBlock *)((intptr_t)tb | 2);
+ tb->jmp_next[0] = NULL;
+ tb->jmp_next[1] = NULL;
+ Assert(tb->jmp_next[0] == NULL);
+ Assert(tb->jmp_next[1] == NULL);
+ if (tb->tb_next_offset[0] != 0xffff)
+ tb_reset_jump(tb, 0);
+ if (tb->tb_next_offset[1] != 0xffff)
+ tb_reset_jump(tb, 1);
+
+ /*
+ * Execute it using emulation
+ */
+ old_eip = env->eip;
+ env->current_tb = tb;
+
+ /*
+ * eip remains the same for repeated instructions; no idea why qemu doesn't do a jump inside the generated code
+ * perhaps not a very safe hack
+ */
+ while (old_eip == env->eip)
+ {
+ tc_ptr = tb->tc_ptr;
+
+#if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM)
+ int fake_ret;
+ tcg_qemu_tb_exec(tc_ptr, fake_ret);
+#else
+ tcg_qemu_tb_exec(tc_ptr);
+#endif
+
+ /*
+ * Exit once we detect an external interrupt and interrupts are enabled
+ */
+ if ( (env->interrupt_request & (CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_EXTERNAL_TIMER))
+ || ( (env->eflags & IF_MASK)
+ && !(env->hflags & HF_INHIBIT_IRQ_MASK)
+ && (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD) )
+ )
+ {
+ break;
+ }
+ if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_FLUSH_TLB) {
+ tlb_flush(env, true);
+ }
+ }
+ env->current_tb = current;
+
+ tb_phys_invalidate(tb, -1);
+ tb_free(tb);
+/*
+ Assert(tb->tb_next_offset[0] == 0xffff);
+ Assert(tb->tb_next_offset[1] == 0xffff);
+ Assert(tb->tb_next[0] == 0xffff);
+ Assert(tb->tb_next[1] == 0xffff);
+ Assert(tb->jmp_next[0] == NULL);
+ Assert(tb->jmp_next[1] == NULL);
+ Assert(tb->jmp_first == NULL); */
+
+ RAWEx_ProfileStop(env, STATS_EMULATE_SINGLE_INSTR);
+
+ /*
+ * Execute the next instruction when we encounter instruction fusing.
+ */
+ if (env->hflags & HF_INHIBIT_IRQ_MASK)
+ {
+ Log(("REM: Emulating next instruction due to instruction fusing (HF_INHIBIT_IRQ_MASK) at %RGv\n", env->eip));
+ env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+ emulate_single_instr(env);
+ }
+
+ env = savedenv;
+ return 0;
+}
+
+/**
+ * Correctly loads a new ldtr selector.
+ *
+ * @param env1 CPU environment.
+ * @param selector Selector to load.
+ */
+void sync_ldtr(CPUX86State *env1, int selector)
+{
+ CPUX86State *saved_env = env;
+ if (setjmp(env1->jmp_env) == 0)
+ {
+ env = env1;
+ helper_lldt(selector);
+ env = saved_env;
+ }
+ else
+ {
+ env = saved_env;
+#ifdef VBOX_STRICT
+ cpu_abort(env1, "sync_ldtr: selector=%#x\n", selector);
+#endif
+ }
+}
+
+int get_ss_esp_from_tss_raw(CPUX86State *env1, uint32_t *ss_ptr,
+ uint32_t *esp_ptr, int dpl)
+{
+ int type, index, shift;
+
+ CPUX86State *savedenv = env;
+ env = env1;
+
+ if (!(env->tr.flags & DESC_P_MASK))
+ cpu_abort(env, "invalid tss");
+ type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+ if ((type & 7) != 3)
+ cpu_abort(env, "invalid tss type %d", type);
+ shift = type >> 3;
+ index = (dpl * 4 + 2) << shift;
+ if (index + (4 << shift) - 1 > env->tr.limit)
+ {
+ env = savedenv;
+ return 0;
+ }
+ //raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
+
+ if (shift == 0) {
+ *esp_ptr = lduw_kernel(env->tr.base + index);
+ *ss_ptr = lduw_kernel(env->tr.base + index + 2);
+ } else {
+ *esp_ptr = ldl_kernel(env->tr.base + index);
+ *ss_ptr = lduw_kernel(env->tr.base + index + 4);
+ }
+
+ env = savedenv;
+ return 1;
+}
+
+//*****************************************************************************
+// Needs to be at the bottom of the file (overriding macros)
+
+static inline CPU86_LDouble helper_fldt_raw(uint8_t *ptr)
+{
+#ifdef USE_X86LDOUBLE
+ CPU86_LDoubleU tmp;
+ tmp.l.lower = *(uint64_t const *)ptr;
+ tmp.l.upper = *(uint16_t const *)(ptr + 8);
+ return tmp.d;
+#else
+# error "Busted FPU saving/restoring!"
+ return *(CPU86_LDouble *)ptr;
+#endif
+}
+
+static inline void helper_fstt_raw(CPU86_LDouble f, uint8_t *ptr)
+{
+#ifdef USE_X86LDOUBLE
+ CPU86_LDoubleU tmp;
+ tmp.d = f;
+ *(uint64_t *)(ptr + 0) = tmp.l.lower;
+ *(uint16_t *)(ptr + 8) = tmp.l.upper;
+ *(uint16_t *)(ptr + 10) = 0;
+ *(uint32_t *)(ptr + 12) = 0;
+ AssertCompile(sizeof(long double) > 8);
+#else
+# error "Busted FPU saving/restoring!"
+ *(CPU86_LDouble *)ptr = f;
+#endif
+}
+
+#undef stw
+#undef stl
+#undef stq
+#define stw(a,b) *(uint16_t *)(a) = (uint16_t)(b)
+#define stl(a,b) *(uint32_t *)(a) = (uint32_t)(b)
+#define stq(a,b) *(uint64_t *)(a) = (uint64_t)(b)
+
+//*****************************************************************************
+void restore_raw_fp_state(CPUX86State *env, uint8_t *ptr)
+{
+ int fpus, fptag, i, nb_xmm_regs;
+ CPU86_LDouble tmp;
+ uint8_t *addr;
+ int data64 = !!(env->hflags & HF_LMA_MASK);
+
+ if (env->cpuid_features & CPUID_FXSR)
+ {
+ fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+ fptag = 0;
+ for(i = 0; i < 8; i++) {
+ fptag |= (env->fptags[i] << i);
+ }
+ stw(ptr, env->fpuc);
+ stw(ptr + 2, fpus);
+ stw(ptr + 4, fptag ^ 0xff);
+
+ addr = ptr + 0x20;
+ for(i = 0;i < 8; i++) {
+ tmp = ST(i);
+ helper_fstt_raw(tmp, addr);
+ addr += 16;
+ }
+
+ if (env->cr[4] & CR4_OSFXSR_MASK) {
+ /* XXX: finish it */
+ stl(ptr + 0x18, env->mxcsr); /* mxcsr */
+ stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
+ nb_xmm_regs = 8 << data64;
+ addr = ptr + 0xa0;
+ for(i = 0; i < nb_xmm_regs; i++) {
+#if __GNUC__ < 4
+ stq(addr, env->xmm_regs[i].XMM_Q(0));
+ stq(addr + 8, env->xmm_regs[i].XMM_Q(1));
+#else /* VBOX + __GNUC__ >= 4: gcc 4.x compiler bug - it runs out of registers for the 64-bit value. */
+ stl(addr, env->xmm_regs[i].XMM_L(0));
+ stl(addr + 4, env->xmm_regs[i].XMM_L(1));
+ stl(addr + 8, env->xmm_regs[i].XMM_L(2));
+ stl(addr + 12, env->xmm_regs[i].XMM_L(3));
+#endif
+ addr += 16;
+ }
+ }
+ }
+ else
+ {
+ PX86FPUSTATE fp = (PX86FPUSTATE)ptr;
+ int fptag;
+
+ fp->FCW = env->fpuc;
+ fp->FSW = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+ fptag = 0;
+ for (i=7; i>=0; i--) {
+ fptag <<= 2;
+ if (env->fptags[i]) {
+ fptag |= 3;
+ } else {
+ /* the FPU automatically computes it */
+ }
+ }
+ fp->FTW = fptag;
+
+ for(i = 0;i < 8; i++) {
+ tmp = ST(i);
+ helper_fstt_raw(tmp, &fp->regs[i].au8[0]);
+ }
+ }
+}
+
+//*****************************************************************************
+#undef lduw
+#undef ldl
+#undef ldq
+#define lduw(a) *(uint16_t *)(a)
+#define ldl(a) *(uint32_t *)(a)
+#define ldq(a) *(uint64_t *)(a)
+//*****************************************************************************
+void save_raw_fp_state(CPUX86State *env, uint8_t *ptr)
+{
+ int i, fpus, fptag, nb_xmm_regs;
+ CPU86_LDouble tmp;
+ uint8_t *addr;
+ int data64 = !!(env->hflags & HF_LMA_MASK); /* don't use HF_CS64_MASK here as cs hasn't been synced when this function is called. */
+
+ if (env->cpuid_features & CPUID_FXSR)
+ {
+ env->fpuc = lduw(ptr);
+ fpus = lduw(ptr + 2);
+ fptag = lduw(ptr + 4);
+ env->fpstt = (fpus >> 11) & 7;
+ env->fpus = fpus & ~0x3800;
+ fptag ^= 0xff;
+ for(i = 0;i < 8; i++) {
+ env->fptags[i] = ((fptag >> i) & 1);
+ }
+
+ addr = ptr + 0x20;
+ for(i = 0;i < 8; i++) {
+ tmp = helper_fldt_raw(addr);
+ ST(i) = tmp;
+ addr += 16;
+ }
+
+ if (env->cr[4] & CR4_OSFXSR_MASK) {
+ /* XXX: finish it, endianness */
+ env->mxcsr = ldl(ptr + 0x18);
+ //ldl(ptr + 0x1c);
+ nb_xmm_regs = 8 << data64;
+ addr = ptr + 0xa0;
+ for(i = 0; i < nb_xmm_regs; i++) {
+#if HC_ARCH_BITS == 32
+ /* this is a workaround for http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35135 */
+ env->xmm_regs[i].XMM_L(0) = ldl(addr);
+ env->xmm_regs[i].XMM_L(1) = ldl(addr + 4);
+ env->xmm_regs[i].XMM_L(2) = ldl(addr + 8);
+ env->xmm_regs[i].XMM_L(3) = ldl(addr + 12);
+#else
+ env->xmm_regs[i].XMM_Q(0) = ldq(addr);
+ env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
+#endif
+ addr += 16;
+ }
+ }
+ }
+ else
+ {
+ PX86FPUSTATE fp = (PX86FPUSTATE)ptr;
+ int fptag, j;
+
+ env->fpuc = fp->FCW;
+ env->fpstt = (fp->FSW >> 11) & 7;
+ env->fpus = fp->FSW & ~0x3800;
+ fptag = fp->FTW;
+ for(i = 0;i < 8; i++) {
+ env->fptags[i] = ((fptag & 3) == 3);
+ fptag >>= 2;
+ }
+ j = env->fpstt;
+ for(i = 0;i < 8; i++) {
+ tmp = helper_fldt_raw(&fp->regs[i].au8[0]);
+ ST(i) = tmp;
+ }
+ }
+}
+//*****************************************************************************
+//*****************************************************************************
+
+#endif /* VBOX */
+
+/* Secure Virtual Machine helpers */
+
+#if defined(CONFIG_USER_ONLY)
+
+void helper_vmrun(int aflag, int next_eip_addend)
+{
+}
+void helper_vmmcall(void)
+{
+}
+void helper_vmload(int aflag)
+{
+}
+void helper_vmsave(int aflag)
+{
+}
+void helper_stgi(void)
+{
+}
+void helper_clgi(void)
+{
+}
+void helper_skinit(void)
+{
+}
+void helper_invlpga(int aflag)
+{
+}
+void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1)
+{
+}
+void helper_svm_check_intercept_param(uint32_t type, uint64_t param)
+{
+}
+
+void helper_svm_check_io(uint32_t port, uint32_t param,
+ uint32_t next_eip_addend)
+{
+}
+#else
+
+static inline void svm_save_seg(target_phys_addr_t addr,
+ const SegmentCache *sc)
+{
+ stw_phys(addr + offsetof(struct vmcb_seg, selector),
+ sc->selector);
+ stq_phys(addr + offsetof(struct vmcb_seg, base),
+ sc->base);
+ stl_phys(addr + offsetof(struct vmcb_seg, limit),
+ sc->limit);
+ stw_phys(addr + offsetof(struct vmcb_seg, attrib),
+ ((sc->flags >> 8) & 0xff) | ((sc->flags >> 12) & 0x0f00));
+}
+
+static inline void svm_load_seg(target_phys_addr_t addr, SegmentCache *sc)
+{
+ unsigned int flags;
+
+ sc->selector = lduw_phys(addr + offsetof(struct vmcb_seg, selector));
+ sc->base = ldq_phys(addr + offsetof(struct vmcb_seg, base));
+ sc->limit = ldl_phys(addr + offsetof(struct vmcb_seg, limit));
+ flags = lduw_phys(addr + offsetof(struct vmcb_seg, attrib));
+ sc->flags = ((flags & 0xff) << 8) | ((flags & 0x0f00) << 12);
+}
+
+static inline void svm_load_seg_cache(target_phys_addr_t addr,
+ CPUState *env, int seg_reg)
+{
+ SegmentCache sc1, *sc = &sc1;
+ svm_load_seg(addr, sc);
+ cpu_x86_load_seg_cache(env, seg_reg, sc->selector,
+ sc->base, sc->limit, sc->flags);
+}
+
+void helper_vmrun(int aflag, int next_eip_addend)
+{
+ target_ulong addr;
+ uint32_t event_inj;
+ uint32_t int_ctl;
+
+ helper_svm_check_intercept_param(SVM_EXIT_VMRUN, 0);
+
+ if (aflag == 2)
+ addr = EAX;
+ else
+ addr = (uint32_t)EAX;
+
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmrun! " TARGET_FMT_lx "\n", addr);
+
+ env->vm_vmcb = addr;
+
+ /* save the current CPU state in the hsave page */
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base), env->gdt.base);
+ stl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit);
+
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base), env->idt.base);
+ stl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit), env->idt.limit);
+
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0), env->cr[0]);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr2), env->cr[2]);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3), env->cr[3]);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4), env->cr[4]);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6), env->dr[6]);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7), env->dr[7]);
+
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer), env->efer);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags), compute_eflags());
+
+ svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.es),
+ &env->segs[R_ES]);
+ svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.cs),
+ &env->segs[R_CS]);
+ svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ss),
+ &env->segs[R_SS]);
+ svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ds),
+ &env->segs[R_DS]);
+
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip),
+ EIP + next_eip_addend);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp), ESP);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax), EAX);
+
+ /* load the interception bitmaps so we do not need to access the
+ vmcb in svm mode */
+ env->intercept = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept));
+ env->intercept_cr_read = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_read));
+ env->intercept_cr_write = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write));
+ env->intercept_dr_read = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read));
+ env->intercept_dr_write = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write));
+ env->intercept_exceptions = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
+
+ /* enable intercepts */
+ env->hflags |= HF_SVMI_MASK;
+
+ env->tsc_offset = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.tsc_offset));
+
+ env->gdt.base = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base));
+ env->gdt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
+
+ env->idt.base = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base));
+ env->idt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
+
+ /* clear exit_info_2 so we behave like the real hardware */
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0);
+
+ cpu_x86_update_cr0(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0)));
+ cpu_x86_update_cr4(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4)));
+ cpu_x86_update_cr3(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3)));
+ env->cr[2] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2));
+ int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
+ env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK);
+ if (int_ctl & V_INTR_MASKING_MASK) {
+ env->v_tpr = int_ctl & V_TPR_MASK;
+ env->hflags2 |= HF2_VINTR_MASK;
+ if (env->eflags & IF_MASK)
+ env->hflags2 |= HF2_HIF_MASK;
+ }
+
+ cpu_load_efer(env,
+ ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer)));
+ env->eflags = 0;
+ load_eflags(ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags)),
+ ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+ CC_OP = CC_OP_EFLAGS;
+
+ svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.es),
+ env, R_ES);
+ svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.cs),
+ env, R_CS);
+ svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.ss),
+ env, R_SS);
+ svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.ds),
+ env, R_DS);
+
+ EIP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip));
+ env->eip = EIP;
+ ESP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp));
+ EAX = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax));
+ env->dr[7] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7));
+ env->dr[6] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6));
+ cpu_x86_set_cpl(env, ldub_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl)));
+
+ /* FIXME: guest state consistency checks */
+
+ switch(ldub_phys(env->vm_vmcb + offsetof(struct vmcb, control.tlb_ctl))) {
+ case TLB_CONTROL_DO_NOTHING:
+ break;
+ case TLB_CONTROL_FLUSH_ALL_ASID:
+ /* FIXME: this is not 100% correct but should work for now */
+ tlb_flush(env, 1);
+ break;
+ }
+
+ env->hflags2 |= HF2_GIF_MASK;
+
+ if (int_ctl & V_IRQ_MASK) {
+ env->interrupt_request |= CPU_INTERRUPT_VIRQ;
+ }
+
+ /* maybe we need to inject an event */
+ event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+ if (event_inj & SVM_EVTINJ_VALID) {
+ uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
+ uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
+ uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
+
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "Injecting(%#hx): ", valid_err);
+ /* FIXME: need to implement valid_err */
+ switch (event_inj & SVM_EVTINJ_TYPE_MASK) {
+ case SVM_EVTINJ_TYPE_INTR:
+ env->exception_index = vector;
+ env->error_code = event_inj_err;
+ env->exception_is_int = 0;
+ env->exception_next_eip = -1;
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "INTR");
+ /* XXX: is it always correct ? */
+ do_interrupt(vector, 0, 0, 0, 1);
+ break;
+ case SVM_EVTINJ_TYPE_NMI:
+ env->exception_index = EXCP02_NMI;
+ env->error_code = event_inj_err;
+ env->exception_is_int = 0;
+ env->exception_next_eip = EIP;
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "NMI");
+ cpu_loop_exit();
+ break;
+ case SVM_EVTINJ_TYPE_EXEPT:
+ env->exception_index = vector;
+ env->error_code = event_inj_err;
+ env->exception_is_int = 0;
+ env->exception_next_eip = -1;
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "EXEPT");
+ cpu_loop_exit();
+ break;
+ case SVM_EVTINJ_TYPE_SOFT:
+ env->exception_index = vector;
+ env->error_code = event_inj_err;
+ env->exception_is_int = 1;
+ env->exception_next_eip = EIP;
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "SOFT");
+ cpu_loop_exit();
+ break;
+ }
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, " %#x %#x\n", env->exception_index, env->error_code);
+ }
+}
+
+void helper_vmmcall(void)
+{
+ helper_svm_check_intercept_param(SVM_EXIT_VMMCALL, 0);
+ raise_exception(EXCP06_ILLOP);
+}
+
+void helper_vmload(int aflag)
+{
+ target_ulong addr;
+ helper_svm_check_intercept_param(SVM_EXIT_VMLOAD, 0);
+
+ if (aflag == 2)
+ addr = EAX;
+ else
+ addr = (uint32_t)EAX;
+
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmload! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
+ addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
+ env->segs[R_FS].base);
+
+ svm_load_seg_cache(addr + offsetof(struct vmcb, save.fs),
+ env, R_FS);
+ svm_load_seg_cache(addr + offsetof(struct vmcb, save.gs),
+ env, R_GS);
+ svm_load_seg(addr + offsetof(struct vmcb, save.tr),
+ &env->tr);
+ svm_load_seg(addr + offsetof(struct vmcb, save.ldtr),
+ &env->ldt);
+
+#ifdef TARGET_X86_64
+ env->kernelgsbase = ldq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base));
+ env->lstar = ldq_phys(addr + offsetof(struct vmcb, save.lstar));
+ env->cstar = ldq_phys(addr + offsetof(struct vmcb, save.cstar));
+ env->fmask = ldq_phys(addr + offsetof(struct vmcb, save.sfmask));
+#endif
+ env->star = ldq_phys(addr + offsetof(struct vmcb, save.star));
+ env->sysenter_cs = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_cs));
+ env->sysenter_esp = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_esp));
+ env->sysenter_eip = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_eip));
+}
+
+void helper_vmsave(int aflag)
+{
+ target_ulong addr;
+ helper_svm_check_intercept_param(SVM_EXIT_VMSAVE, 0);
+
+ if (aflag == 2)
+ addr = EAX;
+ else
+ addr = (uint32_t)EAX;
+
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmsave! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
+ addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
+ env->segs[R_FS].base);
+
+ svm_save_seg(addr + offsetof(struct vmcb, save.fs),
+ &env->segs[R_FS]);
+ svm_save_seg(addr + offsetof(struct vmcb, save.gs),
+ &env->segs[R_GS]);
+ svm_save_seg(addr + offsetof(struct vmcb, save.tr),
+ &env->tr);
+ svm_save_seg(addr + offsetof(struct vmcb, save.ldtr),
+ &env->ldt);
+
+#ifdef TARGET_X86_64
+ stq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base), env->kernelgsbase);
+ stq_phys(addr + offsetof(struct vmcb, save.lstar), env->lstar);
+ stq_phys(addr + offsetof(struct vmcb, save.cstar), env->cstar);
+ stq_phys(addr + offsetof(struct vmcb, save.sfmask), env->fmask);
+#endif
+ stq_phys(addr + offsetof(struct vmcb, save.star), env->star);
+ stq_phys(addr + offsetof(struct vmcb, save.sysenter_cs), env->sysenter_cs);
+ stq_phys(addr + offsetof(struct vmcb, save.sysenter_esp), env->sysenter_esp);
+ stq_phys(addr + offsetof(struct vmcb, save.sysenter_eip), env->sysenter_eip);
+}
+
+void helper_stgi(void)
+{
+ helper_svm_check_intercept_param(SVM_EXIT_STGI, 0);
+ env->hflags2 |= HF2_GIF_MASK;
+}
+
+void helper_clgi(void)
+{
+ helper_svm_check_intercept_param(SVM_EXIT_CLGI, 0);
+ env->hflags2 &= ~HF2_GIF_MASK;
+}
+
+void helper_skinit(void)
+{
+ helper_svm_check_intercept_param(SVM_EXIT_SKINIT, 0);
+ /* XXX: not implemented */
+ raise_exception(EXCP06_ILLOP);
+}
+
+void helper_invlpga(int aflag)
+{
+ target_ulong addr;
+ helper_svm_check_intercept_param(SVM_EXIT_INVLPGA, 0);
+
+ if (aflag == 2)
+ addr = EAX;
+ else
+ addr = (uint32_t)EAX;
+
+ /* XXX: could use the ASID to see if it is needed to do the
+ flush */
+ tlb_flush_page(env, addr);
+}
+
+void helper_svm_check_intercept_param(uint32_t type, uint64_t param)
+{
+ if (likely(!(env->hflags & HF_SVMI_MASK)))
+ return;
+#ifndef VBOX
+ switch(type) {
+ case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR0 + 8:
+ if (env->intercept_cr_read & (1 << (type - SVM_EXIT_READ_CR0))) {
+ helper_vmexit(type, param);
+ }
+ break;
+ case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR0 + 8:
+ if (env->intercept_cr_write & (1 << (type - SVM_EXIT_WRITE_CR0))) {
+ helper_vmexit(type, param);
+ }
+ break;
+ case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR0 + 7:
+ if (env->intercept_dr_read & (1 << (type - SVM_EXIT_READ_DR0))) {
+ helper_vmexit(type, param);
+ }
+ break;
+ case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR0 + 7:
+ if (env->intercept_dr_write & (1 << (type - SVM_EXIT_WRITE_DR0))) {
+ helper_vmexit(type, param);
+ }
+ break;
+ case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 31:
+ if (env->intercept_exceptions & (1 << (type - SVM_EXIT_EXCP_BASE))) {
+ helper_vmexit(type, param);
+ }
+ break;
+ case SVM_EXIT_MSR:
+ if (env->intercept & (1ULL << (SVM_EXIT_MSR - SVM_EXIT_INTR))) {
+ /* FIXME: this should be read in at vmrun (faster this way?) */
+ uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.msrpm_base_pa));
+ uint32_t t0, t1;
+ switch((uint32_t)ECX) {
+ case 0 ... 0x1fff:
+ t0 = (ECX * 2) % 8;
+ t1 = ECX / 8;
+ break;
+ case 0xc0000000 ... 0xc0001fff:
+ t0 = (8192 + ECX - 0xc0000000) * 2;
+ t1 = (t0 / 8);
+ t0 %= 8;
+ break;
+ case 0xc0010000 ... 0xc0011fff:
+ t0 = (16384 + ECX - 0xc0010000) * 2;
+ t1 = (t0 / 8);
+ t0 %= 8;
+ break;
+ default:
+ helper_vmexit(type, param);
+ t0 = 0;
+ t1 = 0;
+ break;
+ }
+ if (ldub_phys(addr + t1) & ((1 << param) << t0))
+ helper_vmexit(type, param);
+ }
+ break;
+ default:
+ if (env->intercept & (1ULL << (type - SVM_EXIT_INTR))) {
+ helper_vmexit(type, param);
+ }
+ break;
+ }
+#else /* VBOX */
+ AssertMsgFailed(("We shouldn't be here, HM supported differently!"));
+#endif /* VBOX */
+}
+
+void helper_svm_check_io(uint32_t port, uint32_t param,
+ uint32_t next_eip_addend)
+{
+ if (env->intercept & (1ULL << (SVM_EXIT_IOIO - SVM_EXIT_INTR))) {
+ /* FIXME: this should be read in at vmrun (faster this way?) */
+ uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.iopm_base_pa));
+ uint16_t mask = (1 << ((param >> 4) & 7)) - 1;
+ if(lduw_phys(addr + port / 8) & (mask << (port & 7))) {
+ /* next EIP */
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
+ env->eip + next_eip_addend);
+ helper_vmexit(SVM_EXIT_IOIO, param | (port << 16));
+ }
+ }
+}
+
+/* Note: currently only 32 bits of exit_code are used */
+void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1)
+{
+ uint32_t int_ctl;
+
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmexit(%08x, %016" PRIx64 ", %016" PRIx64 ", " TARGET_FMT_lx ")!\n",
+ exit_code, exit_info_1,
+ ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2)),
+ EIP);
+
+ if(env->hflags & HF_INHIBIT_IRQ_MASK) {
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), SVM_INTERRUPT_SHADOW_MASK);
+ env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+ } else {
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), 0);
+ }
+
+ /* Save the VM state in the vmcb */
+ svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.es),
+ &env->segs[R_ES]);
+ svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.cs),
+ &env->segs[R_CS]);
+ svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ss),
+ &env->segs[R_SS]);
+ svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ds),
+ &env->segs[R_DS]);
+
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base), env->gdt.base);
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit);
+
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base), env->idt.base);
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit), env->idt.limit);
+
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer), env->efer);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0), env->cr[0]);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2), env->cr[2]);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]);
+
+ int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
+ int_ctl &= ~(V_TPR_MASK | V_IRQ_MASK);
+ int_ctl |= env->v_tpr & V_TPR_MASK;
+ if (env->interrupt_request & CPU_INTERRUPT_VIRQ)
+ int_ctl |= V_IRQ_MASK;
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl);
+
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags), compute_eflags());
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip), env->eip);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp), ESP);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax), EAX);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7), env->dr[7]);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6), env->dr[6]);
+ stb_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl), env->hflags & HF_CPL_MASK);
+
+ /* Reload the host state from vm_hsave */
+ env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK);
+ env->hflags &= ~HF_SVMI_MASK;
+ env->intercept = 0;
+ env->intercept_exceptions = 0;
+ env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
+ env->tsc_offset = 0;
+
+ env->gdt.base = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base));
+ env->gdt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
+
+ env->idt.base = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base));
+ env->idt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
+
+ cpu_x86_update_cr0(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK);
+ cpu_x86_update_cr4(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4)));
+ cpu_x86_update_cr3(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3)));
+ /* we need to set the efer after the crs so the hidden flags get
+ set properly */
+ cpu_load_efer(env,
+ ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer)));
+ env->eflags = 0;
+ load_eflags(ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags)),
+ ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+ CC_OP = CC_OP_EFLAGS;
+
+ svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.es),
+ env, R_ES);
+ svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.cs),
+ env, R_CS);
+ svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.ss),
+ env, R_SS);
+ svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.ds),
+ env, R_DS);
+
+ EIP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip));
+ ESP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp));
+ EAX = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax));
+
+ env->dr[6] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6));
+ env->dr[7] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7));
+
+ /* other setups */
+ cpu_x86_set_cpl(env, 0);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_code), exit_code);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), exit_info_1);
+
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info),
+ ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)));
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info_err),
+ ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err)));
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), 0);
+
+ env->hflags2 &= ~HF2_GIF_MASK;
+ /* FIXME: Resets the current ASID register to zero (host ASID). */
+
+ /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */
+
+ /* Clears the TSC_OFFSET inside the processor. */
+
+ /* If the host is in PAE mode, the processor reloads the host's PDPEs
+ from the page table indicated the host's CR3. If the PDPEs contain
+ illegal state, the processor causes a shutdown. */
+
+ /* Forces CR0.PE = 1, RFLAGS.VM = 0. */
+ env->cr[0] |= CR0_PE_MASK;
+ env->eflags &= ~VM_MASK;
+
+ /* Disables all breakpoints in the host DR7 register. */
+
+ /* Checks the reloaded host state for consistency. */
+
+ /* If the host's rIP reloaded by #VMEXIT is outside the limit of the
+ host's code segment or non-canonical (in the case of long mode), a
+ #GP fault is delivered inside the host.) */
+
+ /* remove any pending exception */
+ env->exception_index = -1;
+ env->error_code = 0;
+ env->old_exception = -1;
+
+ cpu_loop_exit();
+}
+
+#endif
+
+/* MMX/SSE */
+/* XXX: optimize by storing fptt and fptags in the static cpu state */
+void helper_enter_mmx(void)
+{
+ env->fpstt = 0;
+ *(uint32_t *)(env->fptags) = 0;
+ *(uint32_t *)(env->fptags + 4) = 0;
+}
+
+void helper_emms(void)
+{
+ /* set to empty state */
+ *(uint32_t *)(env->fptags) = 0x01010101;
+ *(uint32_t *)(env->fptags + 4) = 0x01010101;
+}
+
+/* XXX: suppress */
+void helper_movq(void *d, void *s)
+{
+ *(uint64_t *)d = *(uint64_t *)s;
+}
+
+#define SHIFT 0
+#include "ops_sse.h"
+
+#define SHIFT 1
+#include "ops_sse.h"
+
+#define SHIFT 0
+#include "helper_template.h"
+#undef SHIFT
+
+#define SHIFT 1
+#include "helper_template.h"
+#undef SHIFT
+
+#define SHIFT 2
+#include "helper_template.h"
+#undef SHIFT
+
+#ifdef TARGET_X86_64
+
+#define SHIFT 3
+#include "helper_template.h"
+#undef SHIFT
+
+#endif
+
+/* bit operations */
+target_ulong helper_bsf(target_ulong t0)
+{
+ int count;
+ target_ulong res;
+
+ res = t0;
+ count = 0;
+ while ((res & 1) == 0) {
+ count++;
+ res >>= 1;
+ }
+ return count;
+}
+
+target_ulong helper_lzcnt(target_ulong t0, int wordsize)
+{
+ int count;
+ target_ulong res, mask;
+
+ if (wordsize > 0 && t0 == 0) {
+ return wordsize;
+ }
+ res = t0;
+ count = TARGET_LONG_BITS - 1;
+ mask = (target_ulong)1 << (TARGET_LONG_BITS - 1);
+ while ((res & mask) == 0) {
+ count--;
+ res <<= 1;
+ }
+ if (wordsize > 0) {
+ return wordsize - 1 - count;
+ }
+ return count;
+}
+
+target_ulong helper_bsr(target_ulong t0)
+{
+ return helper_lzcnt(t0, 0);
+}
+
+static int compute_all_eflags(void)
+{
+ return CC_SRC;
+}
+
+static int compute_c_eflags(void)
+{
+ return CC_SRC & CC_C;
+}
+
+uint32_t helper_cc_compute_all(int op)
+{
+ switch (op) {
+ default: /* should never happen */ return 0;
+
+ case CC_OP_EFLAGS: return compute_all_eflags();
+
+ case CC_OP_MULB: return compute_all_mulb();
+ case CC_OP_MULW: return compute_all_mulw();
+ case CC_OP_MULL: return compute_all_mull();
+
+ case CC_OP_ADDB: return compute_all_addb();
+ case CC_OP_ADDW: return compute_all_addw();
+ case CC_OP_ADDL: return compute_all_addl();
+
+ case CC_OP_ADCB: return compute_all_adcb();
+ case CC_OP_ADCW: return compute_all_adcw();
+ case CC_OP_ADCL: return compute_all_adcl();
+
+ case CC_OP_SUBB: return compute_all_subb();
+ case CC_OP_SUBW: return compute_all_subw();
+ case CC_OP_SUBL: return compute_all_subl();
+
+ case CC_OP_SBBB: return compute_all_sbbb();
+ case CC_OP_SBBW: return compute_all_sbbw();
+ case CC_OP_SBBL: return compute_all_sbbl();
+
+ case CC_OP_LOGICB: return compute_all_logicb();
+ case CC_OP_LOGICW: return compute_all_logicw();
+ case CC_OP_LOGICL: return compute_all_logicl();
+
+ case CC_OP_INCB: return compute_all_incb();
+ case CC_OP_INCW: return compute_all_incw();
+ case CC_OP_INCL: return compute_all_incl();
+
+ case CC_OP_DECB: return compute_all_decb();
+ case CC_OP_DECW: return compute_all_decw();
+ case CC_OP_DECL: return compute_all_decl();
+
+ case CC_OP_SHLB: return compute_all_shlb();
+ case CC_OP_SHLW: return compute_all_shlw();
+ case CC_OP_SHLL: return compute_all_shll();
+
+ case CC_OP_SARB: return compute_all_sarb();
+ case CC_OP_SARW: return compute_all_sarw();
+ case CC_OP_SARL: return compute_all_sarl();
+
+#ifdef TARGET_X86_64
+ case CC_OP_MULQ: return compute_all_mulq();
+
+ case CC_OP_ADDQ: return compute_all_addq();
+
+ case CC_OP_ADCQ: return compute_all_adcq();
+
+ case CC_OP_SUBQ: return compute_all_subq();
+
+ case CC_OP_SBBQ: return compute_all_sbbq();
+
+ case CC_OP_LOGICQ: return compute_all_logicq();
+
+ case CC_OP_INCQ: return compute_all_incq();
+
+ case CC_OP_DECQ: return compute_all_decq();
+
+ case CC_OP_SHLQ: return compute_all_shlq();
+
+ case CC_OP_SARQ: return compute_all_sarq();
+#endif
+ }
+}
+
+uint32_t helper_cc_compute_c(int op)
+{
+ switch (op) {
+ default: /* should never happen */ return 0;
+
+ case CC_OP_EFLAGS: return compute_c_eflags();
+
+ case CC_OP_MULB: return compute_c_mull();
+ case CC_OP_MULW: return compute_c_mull();
+ case CC_OP_MULL: return compute_c_mull();
+
+ case CC_OP_ADDB: return compute_c_addb();
+ case CC_OP_ADDW: return compute_c_addw();
+ case CC_OP_ADDL: return compute_c_addl();
+
+ case CC_OP_ADCB: return compute_c_adcb();
+ case CC_OP_ADCW: return compute_c_adcw();
+ case CC_OP_ADCL: return compute_c_adcl();
+
+ case CC_OP_SUBB: return compute_c_subb();
+ case CC_OP_SUBW: return compute_c_subw();
+ case CC_OP_SUBL: return compute_c_subl();
+
+ case CC_OP_SBBB: return compute_c_sbbb();
+ case CC_OP_SBBW: return compute_c_sbbw();
+ case CC_OP_SBBL: return compute_c_sbbl();
+
+ case CC_OP_LOGICB: return compute_c_logicb();
+ case CC_OP_LOGICW: return compute_c_logicw();
+ case CC_OP_LOGICL: return compute_c_logicl();
+
+ case CC_OP_INCB: return compute_c_incl();
+ case CC_OP_INCW: return compute_c_incl();
+ case CC_OP_INCL: return compute_c_incl();
+
+ case CC_OP_DECB: return compute_c_incl();
+ case CC_OP_DECW: return compute_c_incl();
+ case CC_OP_DECL: return compute_c_incl();
+
+ case CC_OP_SHLB: return compute_c_shlb();
+ case CC_OP_SHLW: return compute_c_shlw();
+ case CC_OP_SHLL: return compute_c_shll();
+
+ case CC_OP_SARB: return compute_c_sarl();
+ case CC_OP_SARW: return compute_c_sarl();
+ case CC_OP_SARL: return compute_c_sarl();
+
+#ifdef TARGET_X86_64
+ case CC_OP_MULQ: return compute_c_mull();
+
+ case CC_OP_ADDQ: return compute_c_addq();
+
+ case CC_OP_ADCQ: return compute_c_adcq();
+
+ case CC_OP_SUBQ: return compute_c_subq();
+
+ case CC_OP_SBBQ: return compute_c_sbbq();
+
+ case CC_OP_LOGICQ: return compute_c_logicq();
+
+ case CC_OP_INCQ: return compute_c_incl();
+
+ case CC_OP_DECQ: return compute_c_incl();
+
+ case CC_OP_SHLQ: return compute_c_shlq();
+
+ case CC_OP_SARQ: return compute_c_sarl();
+#endif
+ }
+}
diff --git a/src/recompiler/target-i386/ops_sse.h b/src/recompiler/target-i386/ops_sse.h
new file mode 100644
index 00000000..4c8b89e2
--- /dev/null
+++ b/src/recompiler/target-i386/ops_sse.h
@@ -0,0 +1,2111 @@
+/*
+ * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support
+ *
+ * Copyright (c) 2005 Fabrice Bellard
+ * Copyright (c) 2008 Intel Corporation <andrew.zaborowski@intel.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#if SHIFT == 0
+#define Reg MMXReg
+#define XMM_ONLY(...)
+#define B(n) MMX_B(n)
+#define W(n) MMX_W(n)
+#define L(n) MMX_L(n)
+#define Q(n) q
+#define SUFFIX _mmx
+#else
+#define Reg XMMReg
+#define XMM_ONLY(...) __VA_ARGS__
+#define B(n) XMM_B(n)
+#define W(n) XMM_W(n)
+#define L(n) XMM_L(n)
+#define Q(n) XMM_Q(n)
+#define SUFFIX _xmm
+#endif
+
+void glue(helper_psrlw, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 15) {
+ d->Q(0) = 0;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+ } else {
+ shift = s->B(0);
+ d->W(0) >>= shift;
+ d->W(1) >>= shift;
+ d->W(2) >>= shift;
+ d->W(3) >>= shift;
+#if SHIFT == 1
+ d->W(4) >>= shift;
+ d->W(5) >>= shift;
+ d->W(6) >>= shift;
+ d->W(7) >>= shift;
+#endif
+ }
+}
+
+void glue(helper_psraw, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 15) {
+ shift = 15;
+ } else {
+ shift = s->B(0);
+ }
+ d->W(0) = (int16_t)d->W(0) >> shift;
+ d->W(1) = (int16_t)d->W(1) >> shift;
+ d->W(2) = (int16_t)d->W(2) >> shift;
+ d->W(3) = (int16_t)d->W(3) >> shift;
+#if SHIFT == 1
+ d->W(4) = (int16_t)d->W(4) >> shift;
+ d->W(5) = (int16_t)d->W(5) >> shift;
+ d->W(6) = (int16_t)d->W(6) >> shift;
+ d->W(7) = (int16_t)d->W(7) >> shift;
+#endif
+}
+
+void glue(helper_psllw, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 15) {
+ d->Q(0) = 0;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+ } else {
+ shift = s->B(0);
+ d->W(0) <<= shift;
+ d->W(1) <<= shift;
+ d->W(2) <<= shift;
+ d->W(3) <<= shift;
+#if SHIFT == 1
+ d->W(4) <<= shift;
+ d->W(5) <<= shift;
+ d->W(6) <<= shift;
+ d->W(7) <<= shift;
+#endif
+ }
+}
+
+void glue(helper_psrld, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 31) {
+ d->Q(0) = 0;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+ } else {
+ shift = s->B(0);
+ d->L(0) >>= shift;
+ d->L(1) >>= shift;
+#if SHIFT == 1
+ d->L(2) >>= shift;
+ d->L(3) >>= shift;
+#endif
+ }
+}
+
+void glue(helper_psrad, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 31) {
+ shift = 31;
+ } else {
+ shift = s->B(0);
+ }
+ d->L(0) = (int32_t)d->L(0) >> shift;
+ d->L(1) = (int32_t)d->L(1) >> shift;
+#if SHIFT == 1
+ d->L(2) = (int32_t)d->L(2) >> shift;
+ d->L(3) = (int32_t)d->L(3) >> shift;
+#endif
+}
+
+void glue(helper_pslld, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 31) {
+ d->Q(0) = 0;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+ } else {
+ shift = s->B(0);
+ d->L(0) <<= shift;
+ d->L(1) <<= shift;
+#if SHIFT == 1
+ d->L(2) <<= shift;
+ d->L(3) <<= shift;
+#endif
+ }
+}
+
+void glue(helper_psrlq, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 63) {
+ d->Q(0) = 0;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+ } else {
+ shift = s->B(0);
+ d->Q(0) >>= shift;
+#if SHIFT == 1
+ d->Q(1) >>= shift;
+#endif
+ }
+}
+
+void glue(helper_psllq, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 63) {
+ d->Q(0) = 0;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+ } else {
+ shift = s->B(0);
+ d->Q(0) <<= shift;
+#if SHIFT == 1
+ d->Q(1) <<= shift;
+#endif
+ }
+}
+
+#if SHIFT == 1
+void glue(helper_psrldq, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift, i;
+
+ shift = s->L(0);
+ if (shift > 16)
+ shift = 16;
+ for(i = 0; i < 16 - shift; i++)
+ d->B(i) = d->B(i + shift);
+ for(i = 16 - shift; i < 16; i++)
+ d->B(i) = 0;
+}
+
+void glue(helper_pslldq, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift, i;
+
+ shift = s->L(0);
+ if (shift > 16)
+ shift = 16;
+ for(i = 15; i >= shift; i--)
+ d->B(i) = d->B(i - shift);
+ for(i = 0; i < shift; i++)
+ d->B(i) = 0;
+}
+#endif
+
+#define SSE_HELPER_B(name, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+ d->B(0) = F(d->B(0), s->B(0));\
+ d->B(1) = F(d->B(1), s->B(1));\
+ d->B(2) = F(d->B(2), s->B(2));\
+ d->B(3) = F(d->B(3), s->B(3));\
+ d->B(4) = F(d->B(4), s->B(4));\
+ d->B(5) = F(d->B(5), s->B(5));\
+ d->B(6) = F(d->B(6), s->B(6));\
+ d->B(7) = F(d->B(7), s->B(7));\
+ XMM_ONLY(\
+ d->B(8) = F(d->B(8), s->B(8));\
+ d->B(9) = F(d->B(9), s->B(9));\
+ d->B(10) = F(d->B(10), s->B(10));\
+ d->B(11) = F(d->B(11), s->B(11));\
+ d->B(12) = F(d->B(12), s->B(12));\
+ d->B(13) = F(d->B(13), s->B(13));\
+ d->B(14) = F(d->B(14), s->B(14));\
+ d->B(15) = F(d->B(15), s->B(15));\
+ )\
+}
+
+#define SSE_HELPER_W(name, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+ d->W(0) = F(d->W(0), s->W(0));\
+ d->W(1) = F(d->W(1), s->W(1));\
+ d->W(2) = F(d->W(2), s->W(2));\
+ d->W(3) = F(d->W(3), s->W(3));\
+ XMM_ONLY(\
+ d->W(4) = F(d->W(4), s->W(4));\
+ d->W(5) = F(d->W(5), s->W(5));\
+ d->W(6) = F(d->W(6), s->W(6));\
+ d->W(7) = F(d->W(7), s->W(7));\
+ )\
+}
+
+#define SSE_HELPER_L(name, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+ d->L(0) = F(d->L(0), s->L(0));\
+ d->L(1) = F(d->L(1), s->L(1));\
+ XMM_ONLY(\
+ d->L(2) = F(d->L(2), s->L(2));\
+ d->L(3) = F(d->L(3), s->L(3));\
+ )\
+}
+
+#define SSE_HELPER_Q(name, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+ d->Q(0) = F(d->Q(0), s->Q(0));\
+ XMM_ONLY(\
+ d->Q(1) = F(d->Q(1), s->Q(1));\
+ )\
+}
+
+#if SHIFT == 0
+static inline int satub(int x)
+{
+ if (x < 0)
+ return 0;
+ else if (x > 255)
+ return 255;
+ else
+ return x;
+}
+
+static inline int satuw(int x)
+{
+ if (x < 0)
+ return 0;
+ else if (x > 65535)
+ return 65535;
+ else
+ return x;
+}
+
+static inline int satsb(int x)
+{
+ if (x < -128)
+ return -128;
+ else if (x > 127)
+ return 127;
+ else
+ return x;
+}
+
+static inline int satsw(int x)
+{
+ if (x < -32768)
+ return -32768;
+ else if (x > 32767)
+ return 32767;
+ else
+ return x;
+}
+
+#define FADD(a, b) ((a) + (b))
+#define FADDUB(a, b) satub((a) + (b))
+#define FADDUW(a, b) satuw((a) + (b))
+#define FADDSB(a, b) satsb((int8_t)(a) + (int8_t)(b))
+#define FADDSW(a, b) satsw((int16_t)(a) + (int16_t)(b))
+
+#define FSUB(a, b) ((a) - (b))
+#define FSUBUB(a, b) satub((a) - (b))
+#define FSUBUW(a, b) satuw((a) - (b))
+#define FSUBSB(a, b) satsb((int8_t)(a) - (int8_t)(b))
+#define FSUBSW(a, b) satsw((int16_t)(a) - (int16_t)(b))
+#define FMINUB(a, b) ((a) < (b)) ? (a) : (b)
+#define FMINSW(a, b) ((int16_t)(a) < (int16_t)(b)) ? (a) : (b)
+#define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
+#define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b)
+
+#define FAND(a, b) (a) & (b)
+#define FANDN(a, b) ((~(a)) & (b))
+#define FOR(a, b) (a) | (b)
+#define FXOR(a, b) (a) ^ (b)
+
+#define FCMPGTB(a, b) (int8_t)(a) > (int8_t)(b) ? -1 : 0
+#define FCMPGTW(a, b) (int16_t)(a) > (int16_t)(b) ? -1 : 0
+#define FCMPGTL(a, b) (int32_t)(a) > (int32_t)(b) ? -1 : 0
+#define FCMPEQ(a, b) (a) == (b) ? -1 : 0
+
+#define FMULLW(a, b) (a) * (b)
+#define FMULHRW(a, b) ((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16
+#define FMULHUW(a, b) (a) * (b) >> 16
+#define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16
+
+#define FAVG(a, b) ((a) + (b) + 1) >> 1
+#endif
+
+SSE_HELPER_B(helper_paddb, FADD)
+SSE_HELPER_W(helper_paddw, FADD)
+SSE_HELPER_L(helper_paddl, FADD)
+SSE_HELPER_Q(helper_paddq, FADD)
+
+SSE_HELPER_B(helper_psubb, FSUB)
+SSE_HELPER_W(helper_psubw, FSUB)
+SSE_HELPER_L(helper_psubl, FSUB)
+SSE_HELPER_Q(helper_psubq, FSUB)
+
+SSE_HELPER_B(helper_paddusb, FADDUB)
+SSE_HELPER_B(helper_paddsb, FADDSB)
+SSE_HELPER_B(helper_psubusb, FSUBUB)
+SSE_HELPER_B(helper_psubsb, FSUBSB)
+
+SSE_HELPER_W(helper_paddusw, FADDUW)
+SSE_HELPER_W(helper_paddsw, FADDSW)
+SSE_HELPER_W(helper_psubusw, FSUBUW)
+SSE_HELPER_W(helper_psubsw, FSUBSW)
+
+SSE_HELPER_B(helper_pminub, FMINUB)
+SSE_HELPER_B(helper_pmaxub, FMAXUB)
+
+SSE_HELPER_W(helper_pminsw, FMINSW)
+SSE_HELPER_W(helper_pmaxsw, FMAXSW)
+
+SSE_HELPER_Q(helper_pand, FAND)
+SSE_HELPER_Q(helper_pandn, FANDN)
+SSE_HELPER_Q(helper_por, FOR)
+SSE_HELPER_Q(helper_pxor, FXOR)
+
+SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
+SSE_HELPER_W(helper_pcmpgtw, FCMPGTW)
+SSE_HELPER_L(helper_pcmpgtl, FCMPGTL)
+
+SSE_HELPER_B(helper_pcmpeqb, FCMPEQ)
+SSE_HELPER_W(helper_pcmpeqw, FCMPEQ)
+SSE_HELPER_L(helper_pcmpeql, FCMPEQ)
+
+SSE_HELPER_W(helper_pmullw, FMULLW)
+#if SHIFT == 0
+SSE_HELPER_W(helper_pmulhrw, FMULHRW)
+#endif
+SSE_HELPER_W(helper_pmulhuw, FMULHUW)
+SSE_HELPER_W(helper_pmulhw, FMULHW)
+
+SSE_HELPER_B(helper_pavgb, FAVG)
+SSE_HELPER_W(helper_pavgw, FAVG)
+
+void glue(helper_pmuludq, SUFFIX) (Reg *d, Reg *s)
+{
+ d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0);
+#if SHIFT == 1
+ d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2);
+#endif
+}
+
+void glue(helper_pmaddwd, SUFFIX) (Reg *d, Reg *s)
+{
+ int i;
+
+ for(i = 0; i < (2 << SHIFT); i++) {
+ d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) +
+ (int16_t)s->W(2*i+1) * (int16_t)d->W(2*i+1);
+ }
+}
+
+#if SHIFT == 0
+static inline int abs1(int a)
+{
+ if (a < 0)
+ return -a;
+ else
+ return a;
+}
+#endif
+void glue(helper_psadbw, SUFFIX) (Reg *d, Reg *s)
+{
+ unsigned int val;
+
+ val = 0;
+ val += abs1(d->B(0) - s->B(0));
+ val += abs1(d->B(1) - s->B(1));
+ val += abs1(d->B(2) - s->B(2));
+ val += abs1(d->B(3) - s->B(3));
+ val += abs1(d->B(4) - s->B(4));
+ val += abs1(d->B(5) - s->B(5));
+ val += abs1(d->B(6) - s->B(6));
+ val += abs1(d->B(7) - s->B(7));
+ d->Q(0) = val;
+#if SHIFT == 1
+ val = 0;
+ val += abs1(d->B(8) - s->B(8));
+ val += abs1(d->B(9) - s->B(9));
+ val += abs1(d->B(10) - s->B(10));
+ val += abs1(d->B(11) - s->B(11));
+ val += abs1(d->B(12) - s->B(12));
+ val += abs1(d->B(13) - s->B(13));
+ val += abs1(d->B(14) - s->B(14));
+ val += abs1(d->B(15) - s->B(15));
+ d->Q(1) = val;
+#endif
+}
+
+void glue(helper_maskmov, SUFFIX) (Reg *d, Reg *s, target_ulong a0)
+{
+ int i;
+ for(i = 0; i < (8 << SHIFT); i++) {
+ if (s->B(i) & 0x80)
+ stb(a0 + i, d->B(i));
+ }
+}
+
+void glue(helper_movl_mm_T0, SUFFIX) (Reg *d, uint32_t val)
+{
+ d->L(0) = val;
+ d->L(1) = 0;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+}
+
+#ifdef TARGET_X86_64
+void glue(helper_movq_mm_T0, SUFFIX) (Reg *d, uint64_t val)
+{
+ d->Q(0) = val;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+}
+#endif
+
+#if SHIFT == 0
+void glue(helper_pshufw, SUFFIX) (Reg *d, Reg *s, int order)
+{
+ Reg r;
+ r.W(0) = s->W(order & 3);
+ r.W(1) = s->W((order >> 2) & 3);
+ r.W(2) = s->W((order >> 4) & 3);
+ r.W(3) = s->W((order >> 6) & 3);
+ *d = r;
+}
+#else
+void helper_shufps(Reg *d, Reg *s, int order)
+{
+ Reg r;
+ r.L(0) = d->L(order & 3);
+ r.L(1) = d->L((order >> 2) & 3);
+ r.L(2) = s->L((order >> 4) & 3);
+ r.L(3) = s->L((order >> 6) & 3);
+ *d = r;
+}
+
+void helper_shufpd(Reg *d, Reg *s, int order)
+{
+ Reg r;
+ r.Q(0) = d->Q(order & 1);
+ r.Q(1) = s->Q((order >> 1) & 1);
+ *d = r;
+}
+
+void glue(helper_pshufd, SUFFIX) (Reg *d, Reg *s, int order)
+{
+ Reg r;
+ r.L(0) = s->L(order & 3);
+ r.L(1) = s->L((order >> 2) & 3);
+ r.L(2) = s->L((order >> 4) & 3);
+ r.L(3) = s->L((order >> 6) & 3);
+ *d = r;
+}
+
+void glue(helper_pshuflw, SUFFIX) (Reg *d, Reg *s, int order)
+{
+ Reg r;
+ r.W(0) = s->W(order & 3);
+ r.W(1) = s->W((order >> 2) & 3);
+ r.W(2) = s->W((order >> 4) & 3);
+ r.W(3) = s->W((order >> 6) & 3);
+ r.Q(1) = s->Q(1);
+ *d = r;
+}
+
+void glue(helper_pshufhw, SUFFIX) (Reg *d, Reg *s, int order)
+{
+ Reg r;
+ r.Q(0) = s->Q(0);
+ r.W(4) = s->W(4 + (order & 3));
+ r.W(5) = s->W(4 + ((order >> 2) & 3));
+ r.W(6) = s->W(4 + ((order >> 4) & 3));
+ r.W(7) = s->W(4 + ((order >> 6) & 3));
+ *d = r;
+}
+#endif
+
+#if SHIFT == 1
+/* FPU ops */
+/* XXX: not accurate */
+
+#define SSE_HELPER_S(name, F)\
+void helper_ ## name ## ps (Reg *d, Reg *s)\
+{\
+ d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+ d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
+ d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
+ d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
+}\
+\
+void helper_ ## name ## ss (Reg *d, Reg *s)\
+{\
+ d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+}\
+void helper_ ## name ## pd (Reg *d, Reg *s)\
+{\
+ d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+ d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
+}\
+\
+void helper_ ## name ## sd (Reg *d, Reg *s)\
+{\
+ d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+}
+
+#define FPU_ADD(size, a, b) float ## size ## _add(a, b, &env->sse_status)
+#define FPU_SUB(size, a, b) float ## size ## _sub(a, b, &env->sse_status)
+#define FPU_MUL(size, a, b) float ## size ## _mul(a, b, &env->sse_status)
+#define FPU_DIV(size, a, b) float ## size ## _div(a, b, &env->sse_status)
+#define FPU_MIN(size, a, b) (a) < (b) ? (a) : (b)
+#define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b)
+#define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status)
+
+SSE_HELPER_S(add, FPU_ADD)
+SSE_HELPER_S(sub, FPU_SUB)
+SSE_HELPER_S(mul, FPU_MUL)
+SSE_HELPER_S(div, FPU_DIV)
+SSE_HELPER_S(min, FPU_MIN)
+SSE_HELPER_S(max, FPU_MAX)
+SSE_HELPER_S(sqrt, FPU_SQRT)
+
+
+/* float to float conversions */
+void helper_cvtps2pd(Reg *d, Reg *s)
+{
+ float32 s0, s1;
+ s0 = s->XMM_S(0);
+ s1 = s->XMM_S(1);
+ d->XMM_D(0) = float32_to_float64(s0, &env->sse_status);
+ d->XMM_D(1) = float32_to_float64(s1, &env->sse_status);
+}
+
+void helper_cvtpd2ps(Reg *d, Reg *s)
+{
+ d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
+ d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status);
+ d->Q(1) = 0;
+}
+
+void helper_cvtss2sd(Reg *d, Reg *s)
+{
+ d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status);
+}
+
+void helper_cvtsd2ss(Reg *d, Reg *s)
+{
+ d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
+}
+
+/* integer to float */
+void helper_cvtdq2ps(Reg *d, Reg *s)
+{
+ d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status);
+ d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status);
+ d->XMM_S(2) = int32_to_float32(s->XMM_L(2), &env->sse_status);
+ d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status);
+}
+
+void helper_cvtdq2pd(Reg *d, Reg *s)
+{
+ int32_t l0, l1;
+ l0 = (int32_t)s->XMM_L(0);
+ l1 = (int32_t)s->XMM_L(1);
+ d->XMM_D(0) = int32_to_float64(l0, &env->sse_status);
+ d->XMM_D(1) = int32_to_float64(l1, &env->sse_status);
+}
+
+void helper_cvtpi2ps(XMMReg *d, MMXReg *s)
+{
+ d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status);
+ d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status);
+}
+
+void helper_cvtpi2pd(XMMReg *d, MMXReg *s)
+{
+ d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status);
+ d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status);
+}
+
+void helper_cvtsi2ss(XMMReg *d, uint32_t val)
+{
+ d->XMM_S(0) = int32_to_float32(val, &env->sse_status);
+}
+
+void helper_cvtsi2sd(XMMReg *d, uint32_t val)
+{
+ d->XMM_D(0) = int32_to_float64(val, &env->sse_status);
+}
+
+#ifdef TARGET_X86_64
+void helper_cvtsq2ss(XMMReg *d, uint64_t val)
+{
+ d->XMM_S(0) = int64_to_float32(val, &env->sse_status);
+}
+
+void helper_cvtsq2sd(XMMReg *d, uint64_t val)
+{
+ d->XMM_D(0) = int64_to_float64(val, &env->sse_status);
+}
+#endif
+
+/* float to integer */
+void helper_cvtps2dq(XMMReg *d, XMMReg *s)
+{
+ d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
+ d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
+ d->XMM_L(2) = float32_to_int32(s->XMM_S(2), &env->sse_status);
+ d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status);
+}
+
+void helper_cvtpd2dq(XMMReg *d, XMMReg *s)
+{
+ d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
+ d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
+ d->XMM_Q(1) = 0;
+}
+
+void helper_cvtps2pi(MMXReg *d, XMMReg *s)
+{
+ d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
+ d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
+}
+
+void helper_cvtpd2pi(MMXReg *d, XMMReg *s)
+{
+ d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
+ d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
+}
+
+int32_t helper_cvtss2si(XMMReg *s)
+{
+ return float32_to_int32(s->XMM_S(0), &env->sse_status);
+}
+
+int32_t helper_cvtsd2si(XMMReg *s)
+{
+ return float64_to_int32(s->XMM_D(0), &env->sse_status);
+}
+
+#ifdef TARGET_X86_64
+int64_t helper_cvtss2sq(XMMReg *s)
+{
+ return float32_to_int64(s->XMM_S(0), &env->sse_status);
+}
+
+int64_t helper_cvtsd2sq(XMMReg *s)
+{
+ return float64_to_int64(s->XMM_D(0), &env->sse_status);
+}
+#endif
+
+/* float to integer truncated */
+void helper_cvttps2dq(XMMReg *d, XMMReg *s)
+{
+ d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
+ d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
+ d->XMM_L(2) = float32_to_int32_round_to_zero(s->XMM_S(2), &env->sse_status);
+ d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status);
+}
+
+void helper_cvttpd2dq(XMMReg *d, XMMReg *s)
+{
+ d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
+ d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
+ d->XMM_Q(1) = 0;
+}
+
+void helper_cvttps2pi(MMXReg *d, XMMReg *s)
+{
+ d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
+ d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
+}
+
+void helper_cvttpd2pi(MMXReg *d, XMMReg *s)
+{
+ d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
+ d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
+}
+
+int32_t helper_cvttss2si(XMMReg *s)
+{
+ return float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
+}
+
+int32_t helper_cvttsd2si(XMMReg *s)
+{
+ return float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
+}
+
+#ifdef TARGET_X86_64
+int64_t helper_cvttss2sq(XMMReg *s)
+{
+ return float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status);
+}
+
+int64_t helper_cvttsd2sq(XMMReg *s)
+{
+ return float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status);
+}
+#endif
+
+void helper_rsqrtps(XMMReg *d, XMMReg *s)
+{
+ d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
+ d->XMM_S(1) = approx_rsqrt(s->XMM_S(1));
+ d->XMM_S(2) = approx_rsqrt(s->XMM_S(2));
+ d->XMM_S(3) = approx_rsqrt(s->XMM_S(3));
+}
+
+void helper_rsqrtss(XMMReg *d, XMMReg *s)
+{
+ d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
+}
+
+void helper_rcpps(XMMReg *d, XMMReg *s)
+{
+ d->XMM_S(0) = approx_rcp(s->XMM_S(0));
+ d->XMM_S(1) = approx_rcp(s->XMM_S(1));
+ d->XMM_S(2) = approx_rcp(s->XMM_S(2));
+ d->XMM_S(3) = approx_rcp(s->XMM_S(3));
+}
+
+void helper_rcpss(XMMReg *d, XMMReg *s)
+{
+ d->XMM_S(0) = approx_rcp(s->XMM_S(0));
+}
+
+static inline uint64_t helper_extrq(uint64_t src, int shift, int len)
+{
+ uint64_t mask;
+
+ if (len == 0) {
+ mask = ~0LL;
+ } else {
+ mask = (1ULL << len) - 1;
+ }
+ return (src >> shift) & mask;
+}
+
+void helper_extrq_r(XMMReg *d, XMMReg *s)
+{
+ d->XMM_Q(0) = helper_extrq(d->XMM_Q(0), s->XMM_B(1), s->XMM_B(0));
+}
+
+void helper_extrq_i(XMMReg *d, int index, int length)
+{
+ d->XMM_Q(0) = helper_extrq(d->XMM_Q(0), index, length);
+}
+
+static inline uint64_t helper_insertq(uint64_t src, int shift, int len)
+{
+ uint64_t mask;
+
+ if (len == 0) {
+ mask = ~0ULL;
+ } else {
+ mask = (1ULL << len) - 1;
+ }
+ return (src & ~(mask << shift)) | ((src & mask) << shift);
+}
+
+void helper_insertq_r(XMMReg *d, XMMReg *s)
+{
+ d->XMM_Q(0) = helper_insertq(s->XMM_Q(0), s->XMM_B(9), s->XMM_B(8));
+}
+
+void helper_insertq_i(XMMReg *d, int index, int length)
+{
+ d->XMM_Q(0) = helper_insertq(d->XMM_Q(0), index, length);
+}
+
+void helper_haddps(XMMReg *d, XMMReg *s)
+{
+ XMMReg r;
+ r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1);
+ r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3);
+ r.XMM_S(2) = s->XMM_S(0) + s->XMM_S(1);
+ r.XMM_S(3) = s->XMM_S(2) + s->XMM_S(3);
+ *d = r;
+}
+
+void helper_haddpd(XMMReg *d, XMMReg *s)
+{
+ XMMReg r;
+ r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1);
+ r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1);
+ *d = r;
+}
+
+void helper_hsubps(XMMReg *d, XMMReg *s)
+{
+ XMMReg r;
+ r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1);
+ r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3);
+ r.XMM_S(2) = s->XMM_S(0) - s->XMM_S(1);
+ r.XMM_S(3) = s->XMM_S(2) - s->XMM_S(3);
+ *d = r;
+}
+
+void helper_hsubpd(XMMReg *d, XMMReg *s)
+{
+ XMMReg r;
+ r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1);
+ r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1);
+ *d = r;
+}
+
+void helper_addsubps(XMMReg *d, XMMReg *s)
+{
+ d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0);
+ d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1);
+ d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2);
+ d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3);
+}
+
+void helper_addsubpd(XMMReg *d, XMMReg *s)
+{
+ d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0);
+ d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1);
+}
+
+/* XXX: unordered */
+#define SSE_HELPER_CMP(name, F)\
+void helper_ ## name ## ps (Reg *d, Reg *s)\
+{\
+ d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+ d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
+ d->XMM_L(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
+ d->XMM_L(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
+}\
+\
+void helper_ ## name ## ss (Reg *d, Reg *s)\
+{\
+ d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+}\
+void helper_ ## name ## pd (Reg *d, Reg *s)\
+{\
+ d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+ d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
+}\
+\
+void helper_ ## name ## sd (Reg *d, Reg *s)\
+{\
+ d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+}
+
+#define FPU_CMPEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? -1 : 0
+#define FPU_CMPLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? -1 : 0
+#define FPU_CMPLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? -1 : 0
+#define FPU_CMPUNORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? - 1 : 0
+#define FPU_CMPNEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPNLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPNLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1
+
+SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
+SSE_HELPER_CMP(cmplt, FPU_CMPLT)
+SSE_HELPER_CMP(cmple, FPU_CMPLE)
+SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD)
+SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ)
+SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT)
+SSE_HELPER_CMP(cmpnle, FPU_CMPNLE)
+SSE_HELPER_CMP(cmpord, FPU_CMPORD)
+
+static const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
+
+void helper_ucomiss(Reg *d, Reg *s)
+{
+ int ret;
+ float32 s0, s1;
+
+ s0 = d->XMM_S(0);
+ s1 = s->XMM_S(0);
+ ret = float32_compare_quiet(s0, s1, &env->sse_status);
+ CC_SRC = comis_eflags[ret + 1];
+}
+
+void helper_comiss(Reg *d, Reg *s)
+{
+ int ret;
+ float32 s0, s1;
+
+ s0 = d->XMM_S(0);
+ s1 = s->XMM_S(0);
+ ret = float32_compare(s0, s1, &env->sse_status);
+ CC_SRC = comis_eflags[ret + 1];
+}
+
+void helper_ucomisd(Reg *d, Reg *s)
+{
+ int ret;
+ float64 d0, d1;
+
+ d0 = d->XMM_D(0);
+ d1 = s->XMM_D(0);
+ ret = float64_compare_quiet(d0, d1, &env->sse_status);
+ CC_SRC = comis_eflags[ret + 1];
+}
+
+void helper_comisd(Reg *d, Reg *s)
+{
+ int ret;
+ float64 d0, d1;
+
+ d0 = d->XMM_D(0);
+ d1 = s->XMM_D(0);
+ ret = float64_compare(d0, d1, &env->sse_status);
+ CC_SRC = comis_eflags[ret + 1];
+}
+
+uint32_t helper_movmskps(Reg *s)
+{
+ int b0, b1, b2, b3;
+ b0 = s->XMM_L(0) >> 31;
+ b1 = s->XMM_L(1) >> 31;
+ b2 = s->XMM_L(2) >> 31;
+ b3 = s->XMM_L(3) >> 31;
+ return b0 | (b1 << 1) | (b2 << 2) | (b3 << 3);
+}
+
+uint32_t helper_movmskpd(Reg *s)
+{
+ int b0, b1;
+ b0 = s->XMM_L(1) >> 31;
+ b1 = s->XMM_L(3) >> 31;
+ return b0 | (b1 << 1);
+}
+
+#endif
+
+uint32_t glue(helper_pmovmskb, SUFFIX)(Reg *s)
+{
+ uint32_t val;
+ val = 0;
+ val |= (s->B(0) >> 7);
+ val |= (s->B(1) >> 6) & 0x02;
+ val |= (s->B(2) >> 5) & 0x04;
+ val |= (s->B(3) >> 4) & 0x08;
+ val |= (s->B(4) >> 3) & 0x10;
+ val |= (s->B(5) >> 2) & 0x20;
+ val |= (s->B(6) >> 1) & 0x40;
+ val |= (s->B(7)) & 0x80;
+#if SHIFT == 1
+ val |= (s->B(8) << 1) & 0x0100;
+ val |= (s->B(9) << 2) & 0x0200;
+ val |= (s->B(10) << 3) & 0x0400;
+ val |= (s->B(11) << 4) & 0x0800;
+ val |= (s->B(12) << 5) & 0x1000;
+ val |= (s->B(13) << 6) & 0x2000;
+ val |= (s->B(14) << 7) & 0x4000;
+ val |= (s->B(15) << 8) & 0x8000;
+#endif
+ return val;
+}
+
+void glue(helper_packsswb, SUFFIX) (Reg *d, Reg *s)
+{
+ Reg r;
+
+ r.B(0) = satsb((int16_t)d->W(0));
+ r.B(1) = satsb((int16_t)d->W(1));
+ r.B(2) = satsb((int16_t)d->W(2));
+ r.B(3) = satsb((int16_t)d->W(3));
+#if SHIFT == 1
+ r.B(4) = satsb((int16_t)d->W(4));
+ r.B(5) = satsb((int16_t)d->W(5));
+ r.B(6) = satsb((int16_t)d->W(6));
+ r.B(7) = satsb((int16_t)d->W(7));
+#endif
+ r.B((4 << SHIFT) + 0) = satsb((int16_t)s->W(0));
+ r.B((4 << SHIFT) + 1) = satsb((int16_t)s->W(1));
+ r.B((4 << SHIFT) + 2) = satsb((int16_t)s->W(2));
+ r.B((4 << SHIFT) + 3) = satsb((int16_t)s->W(3));
+#if SHIFT == 1
+ r.B(12) = satsb((int16_t)s->W(4));
+ r.B(13) = satsb((int16_t)s->W(5));
+ r.B(14) = satsb((int16_t)s->W(6));
+ r.B(15) = satsb((int16_t)s->W(7));
+#endif
+ *d = r;
+}
+
+void glue(helper_packuswb, SUFFIX) (Reg *d, Reg *s)
+{
+ Reg r;
+
+ r.B(0) = satub((int16_t)d->W(0));
+ r.B(1) = satub((int16_t)d->W(1));
+ r.B(2) = satub((int16_t)d->W(2));
+ r.B(3) = satub((int16_t)d->W(3));
+#if SHIFT == 1
+ r.B(4) = satub((int16_t)d->W(4));
+ r.B(5) = satub((int16_t)d->W(5));
+ r.B(6) = satub((int16_t)d->W(6));
+ r.B(7) = satub((int16_t)d->W(7));
+#endif
+ r.B((4 << SHIFT) + 0) = satub((int16_t)s->W(0));
+ r.B((4 << SHIFT) + 1) = satub((int16_t)s->W(1));
+ r.B((4 << SHIFT) + 2) = satub((int16_t)s->W(2));
+ r.B((4 << SHIFT) + 3) = satub((int16_t)s->W(3));
+#if SHIFT == 1
+ r.B(12) = satub((int16_t)s->W(4));
+ r.B(13) = satub((int16_t)s->W(5));
+ r.B(14) = satub((int16_t)s->W(6));
+ r.B(15) = satub((int16_t)s->W(7));
+#endif
+ *d = r;
+}
+
+void glue(helper_packssdw, SUFFIX) (Reg *d, Reg *s)
+{
+ Reg r;
+
+ r.W(0) = satsw(d->L(0));
+ r.W(1) = satsw(d->L(1));
+#if SHIFT == 1
+ r.W(2) = satsw(d->L(2));
+ r.W(3) = satsw(d->L(3));
+#endif
+ r.W((2 << SHIFT) + 0) = satsw(s->L(0));
+ r.W((2 << SHIFT) + 1) = satsw(s->L(1));
+#if SHIFT == 1
+ r.W(6) = satsw(s->L(2));
+ r.W(7) = satsw(s->L(3));
+#endif
+ *d = r;
+}
+
+#define UNPCK_OP(base_name, base) \
+ \
+void glue(helper_punpck ## base_name ## bw, SUFFIX) (Reg *d, Reg *s) \
+{ \
+ Reg r; \
+ \
+ r.B(0) = d->B((base << (SHIFT + 2)) + 0); \
+ r.B(1) = s->B((base << (SHIFT + 2)) + 0); \
+ r.B(2) = d->B((base << (SHIFT + 2)) + 1); \
+ r.B(3) = s->B((base << (SHIFT + 2)) + 1); \
+ r.B(4) = d->B((base << (SHIFT + 2)) + 2); \
+ r.B(5) = s->B((base << (SHIFT + 2)) + 2); \
+ r.B(6) = d->B((base << (SHIFT + 2)) + 3); \
+ r.B(7) = s->B((base << (SHIFT + 2)) + 3); \
+XMM_ONLY( \
+ r.B(8) = d->B((base << (SHIFT + 2)) + 4); \
+ r.B(9) = s->B((base << (SHIFT + 2)) + 4); \
+ r.B(10) = d->B((base << (SHIFT + 2)) + 5); \
+ r.B(11) = s->B((base << (SHIFT + 2)) + 5); \
+ r.B(12) = d->B((base << (SHIFT + 2)) + 6); \
+ r.B(13) = s->B((base << (SHIFT + 2)) + 6); \
+ r.B(14) = d->B((base << (SHIFT + 2)) + 7); \
+ r.B(15) = s->B((base << (SHIFT + 2)) + 7); \
+) \
+ *d = r; \
+} \
+ \
+void glue(helper_punpck ## base_name ## wd, SUFFIX) (Reg *d, Reg *s) \
+{ \
+ Reg r; \
+ \
+ r.W(0) = d->W((base << (SHIFT + 1)) + 0); \
+ r.W(1) = s->W((base << (SHIFT + 1)) + 0); \
+ r.W(2) = d->W((base << (SHIFT + 1)) + 1); \
+ r.W(3) = s->W((base << (SHIFT + 1)) + 1); \
+XMM_ONLY( \
+ r.W(4) = d->W((base << (SHIFT + 1)) + 2); \
+ r.W(5) = s->W((base << (SHIFT + 1)) + 2); \
+ r.W(6) = d->W((base << (SHIFT + 1)) + 3); \
+ r.W(7) = s->W((base << (SHIFT + 1)) + 3); \
+) \
+ *d = r; \
+} \
+ \
+void glue(helper_punpck ## base_name ## dq, SUFFIX) (Reg *d, Reg *s) \
+{ \
+ Reg r; \
+ \
+ r.L(0) = d->L((base << SHIFT) + 0); \
+ r.L(1) = s->L((base << SHIFT) + 0); \
+XMM_ONLY( \
+ r.L(2) = d->L((base << SHIFT) + 1); \
+ r.L(3) = s->L((base << SHIFT) + 1); \
+) \
+ *d = r; \
+} \
+ \
+XMM_ONLY( \
+void glue(helper_punpck ## base_name ## qdq, SUFFIX) (Reg *d, Reg *s) \
+{ \
+ Reg r; \
+ \
+ r.Q(0) = d->Q(base); \
+ r.Q(1) = s->Q(base); \
+ *d = r; \
+} \
+)
+
+UNPCK_OP(l, 0)
+UNPCK_OP(h, 1)
+
+/* 3DNow! float ops */
+#if SHIFT == 0
+void helper_pi2fd(MMXReg *d, MMXReg *s)
+{
+ d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status);
+ d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status);
+}
+
+void helper_pi2fw(MMXReg *d, MMXReg *s)
+{
+ d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status);
+ d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status);
+}
+
+void helper_pf2id(MMXReg *d, MMXReg *s)
+{
+ d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status);
+ d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pf2iw(MMXReg *d, MMXReg *s)
+{
+ d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status));
+ d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status));
+}
+
+void helper_pfacc(MMXReg *d, MMXReg *s)
+{
+ MMXReg r;
+ r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
+ r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
+ *d = r;
+}
+
+void helper_pfadd(MMXReg *d, MMXReg *s)
+{
+ d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
+ d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pfcmpeq(MMXReg *d, MMXReg *s)
+{
+ d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0;
+ d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0;
+}
+
+void helper_pfcmpge(MMXReg *d, MMXReg *s)
+{
+ d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
+ d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
+}
+
+void helper_pfcmpgt(MMXReg *d, MMXReg *s)
+{
+ d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
+ d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
+}
+
+void helper_pfmax(MMXReg *d, MMXReg *s)
+{
+ if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status))
+ d->MMX_S(0) = s->MMX_S(0);
+ if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status))
+ d->MMX_S(1) = s->MMX_S(1);
+}
+
+void helper_pfmin(MMXReg *d, MMXReg *s)
+{
+ if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status))
+ d->MMX_S(0) = s->MMX_S(0);
+ if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status))
+ d->MMX_S(1) = s->MMX_S(1);
+}
+
+void helper_pfmul(MMXReg *d, MMXReg *s)
+{
+ d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
+ d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pfnacc(MMXReg *d, MMXReg *s)
+{
+ MMXReg r;
+ r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
+ r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
+ *d = r;
+}
+
+void helper_pfpnacc(MMXReg *d, MMXReg *s)
+{
+ MMXReg r;
+ r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
+ r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
+ *d = r;
+}
+
+void helper_pfrcp(MMXReg *d, MMXReg *s)
+{
+ d->MMX_S(0) = approx_rcp(s->MMX_S(0));
+ d->MMX_S(1) = d->MMX_S(0);
+}
+
+void helper_pfrsqrt(MMXReg *d, MMXReg *s)
+{
+ d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff;
+ d->MMX_S(1) = approx_rsqrt(d->MMX_S(1));
+ d->MMX_L(1) |= s->MMX_L(0) & 0x80000000;
+ d->MMX_L(0) = d->MMX_L(1);
+}
+
+void helper_pfsub(MMXReg *d, MMXReg *s)
+{
+ d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
+ d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pfsubr(MMXReg *d, MMXReg *s)
+{
+ d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status);
+ d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pswapd(MMXReg *d, MMXReg *s)
+{
+ MMXReg r;
+ r.MMX_L(0) = s->MMX_L(1);
+ r.MMX_L(1) = s->MMX_L(0);
+ *d = r;
+}
+#endif
+
+/* SSSE3 op helpers */
+void glue(helper_pshufb, SUFFIX) (Reg *d, Reg *s)
+{
+ int i;
+ Reg r;
+
+ for (i = 0; i < (8 << SHIFT); i++)
+ r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1)));
+
+ *d = r;
+}
+
+void glue(helper_phaddw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1);
+ d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3);
+ XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5));
+ XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7));
+ d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1);
+ d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3);
+ XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5));
+ XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
+}
+
+void glue(helper_phaddd, SUFFIX) (Reg *d, Reg *s)
+{
+ d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1);
+ XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3));
+ d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1);
+ XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
+}
+
+void glue(helper_phaddsw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1));
+ d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3));
+ XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5)));
+ XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7)));
+ d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1));
+ d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3));
+ XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5)));
+ XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
+}
+
+void glue(helper_pmaddubsw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = satsw((int8_t)s->B( 0) * (uint8_t)d->B( 0) +
+ (int8_t)s->B( 1) * (uint8_t)d->B( 1));
+ d->W(1) = satsw((int8_t)s->B( 2) * (uint8_t)d->B( 2) +
+ (int8_t)s->B( 3) * (uint8_t)d->B( 3));
+ d->W(2) = satsw((int8_t)s->B( 4) * (uint8_t)d->B( 4) +
+ (int8_t)s->B( 5) * (uint8_t)d->B( 5));
+ d->W(3) = satsw((int8_t)s->B( 6) * (uint8_t)d->B( 6) +
+ (int8_t)s->B( 7) * (uint8_t)d->B( 7));
+#if SHIFT == 1
+ d->W(4) = satsw((int8_t)s->B( 8) * (uint8_t)d->B( 8) +
+ (int8_t)s->B( 9) * (uint8_t)d->B( 9));
+ d->W(5) = satsw((int8_t)s->B(10) * (uint8_t)d->B(10) +
+ (int8_t)s->B(11) * (uint8_t)d->B(11));
+ d->W(6) = satsw((int8_t)s->B(12) * (uint8_t)d->B(12) +
+ (int8_t)s->B(13) * (uint8_t)d->B(13));
+ d->W(7) = satsw((int8_t)s->B(14) * (uint8_t)d->B(14) +
+ (int8_t)s->B(15) * (uint8_t)d->B(15));
+#endif
+}
+
+void glue(helper_phsubw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = (int16_t)d->W(0) - (int16_t)d->W(1);
+ d->W(1) = (int16_t)d->W(2) - (int16_t)d->W(3);
+ XMM_ONLY(d->W(2) = (int16_t)d->W(4) - (int16_t)d->W(5));
+ XMM_ONLY(d->W(3) = (int16_t)d->W(6) - (int16_t)d->W(7));
+ d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) - (int16_t)s->W(1);
+ d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) - (int16_t)s->W(3);
+ XMM_ONLY(d->W(6) = (int16_t)s->W(4) - (int16_t)s->W(5));
+ XMM_ONLY(d->W(7) = (int16_t)s->W(6) - (int16_t)s->W(7));
+}
+
+void glue(helper_phsubd, SUFFIX) (Reg *d, Reg *s)
+{
+ d->L(0) = (int32_t)d->L(0) - (int32_t)d->L(1);
+ XMM_ONLY(d->L(1) = (int32_t)d->L(2) - (int32_t)d->L(3));
+ d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) - (int32_t)s->L(1);
+ XMM_ONLY(d->L(3) = (int32_t)s->L(2) - (int32_t)s->L(3));
+}
+
+void glue(helper_phsubsw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = satsw((int16_t)d->W(0) - (int16_t)d->W(1));
+ d->W(1) = satsw((int16_t)d->W(2) - (int16_t)d->W(3));
+ XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) - (int16_t)d->W(5)));
+ XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) - (int16_t)d->W(7)));
+ d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) - (int16_t)s->W(1));
+ d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) - (int16_t)s->W(3));
+ XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) - (int16_t)s->W(5)));
+ XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) - (int16_t)s->W(7)));
+}
+
+#define FABSB(_, x) x > INT8_MAX ? -(int8_t ) x : x
+#define FABSW(_, x) x > INT16_MAX ? -(int16_t) x : x
+#define FABSL(_, x) x > INT32_MAX ? -(int32_t) x : x
+SSE_HELPER_B(helper_pabsb, FABSB)
+SSE_HELPER_W(helper_pabsw, FABSW)
+SSE_HELPER_L(helper_pabsd, FABSL)
+
+#define FMULHRSW(d, s) ((int16_t) d * (int16_t) s + 0x4000) >> 15
+SSE_HELPER_W(helper_pmulhrsw, FMULHRSW)
+
+#define FSIGNB(d, s) s <= INT8_MAX ? s ? d : 0 : -(int8_t ) d
+#define FSIGNW(d, s) s <= INT16_MAX ? s ? d : 0 : -(int16_t) d
+#define FSIGNL(d, s) s <= INT32_MAX ? s ? d : 0 : -(int32_t) d
+SSE_HELPER_B(helper_psignb, FSIGNB)
+SSE_HELPER_W(helper_psignw, FSIGNW)
+SSE_HELPER_L(helper_psignd, FSIGNL)
+
+void glue(helper_palignr, SUFFIX) (Reg *d, Reg *s, int32_t shift)
+{
+ Reg r;
+
+ /* XXX could be checked during translation */
+ if (shift >= (16 << SHIFT)) {
+ r.Q(0) = 0;
+ XMM_ONLY(r.Q(1) = 0);
+ } else {
+ shift <<= 3;
+#define SHR(v, i) (i < 64 && i > -64 ? i > 0 ? v >> (i) : (v << -(i)) : 0)
+#if SHIFT == 0
+ r.Q(0) = SHR(s->Q(0), shift - 0) |
+ SHR(d->Q(0), shift - 64);
+#else
+ r.Q(0) = SHR(s->Q(0), shift - 0) |
+ SHR(s->Q(1), shift - 64) |
+ SHR(d->Q(0), shift - 128) |
+ SHR(d->Q(1), shift - 192);
+ r.Q(1) = SHR(s->Q(0), shift + 64) |
+ SHR(s->Q(1), shift - 0) |
+ SHR(d->Q(0), shift - 64) |
+ SHR(d->Q(1), shift - 128);
+#endif
+#undef SHR
+ }
+
+ *d = r;
+}
+
+#define XMM0 env->xmm_regs[0]
+
+#if SHIFT == 1
+#define SSE_HELPER_V(name, elem, num, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+ d->elem(0) = F(d->elem(0), s->elem(0), XMM0.elem(0));\
+ d->elem(1) = F(d->elem(1), s->elem(1), XMM0.elem(1));\
+ if (num > 2) {\
+ d->elem(2) = F(d->elem(2), s->elem(2), XMM0.elem(2));\
+ d->elem(3) = F(d->elem(3), s->elem(3), XMM0.elem(3));\
+ if (num > 4) {\
+ d->elem(4) = F(d->elem(4), s->elem(4), XMM0.elem(4));\
+ d->elem(5) = F(d->elem(5), s->elem(5), XMM0.elem(5));\
+ d->elem(6) = F(d->elem(6), s->elem(6), XMM0.elem(6));\
+ d->elem(7) = F(d->elem(7), s->elem(7), XMM0.elem(7));\
+ if (num > 8) {\
+ d->elem(8) = F(d->elem(8), s->elem(8), XMM0.elem(8));\
+ d->elem(9) = F(d->elem(9), s->elem(9), XMM0.elem(9));\
+ d->elem(10) = F(d->elem(10), s->elem(10), XMM0.elem(10));\
+ d->elem(11) = F(d->elem(11), s->elem(11), XMM0.elem(11));\
+ d->elem(12) = F(d->elem(12), s->elem(12), XMM0.elem(12));\
+ d->elem(13) = F(d->elem(13), s->elem(13), XMM0.elem(13));\
+ d->elem(14) = F(d->elem(14), s->elem(14), XMM0.elem(14));\
+ d->elem(15) = F(d->elem(15), s->elem(15), XMM0.elem(15));\
+ }\
+ }\
+ }\
+}
+
+#define SSE_HELPER_I(name, elem, num, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s, uint32_t imm)\
+{\
+ d->elem(0) = F(d->elem(0), s->elem(0), ((imm >> 0) & 1));\
+ d->elem(1) = F(d->elem(1), s->elem(1), ((imm >> 1) & 1));\
+ if (num > 2) {\
+ d->elem(2) = F(d->elem(2), s->elem(2), ((imm >> 2) & 1));\
+ d->elem(3) = F(d->elem(3), s->elem(3), ((imm >> 3) & 1));\
+ if (num > 4) {\
+ d->elem(4) = F(d->elem(4), s->elem(4), ((imm >> 4) & 1));\
+ d->elem(5) = F(d->elem(5), s->elem(5), ((imm >> 5) & 1));\
+ d->elem(6) = F(d->elem(6), s->elem(6), ((imm >> 6) & 1));\
+ d->elem(7) = F(d->elem(7), s->elem(7), ((imm >> 7) & 1));\
+ if (num > 8) {\
+ d->elem(8) = F(d->elem(8), s->elem(8), ((imm >> 8) & 1));\
+ d->elem(9) = F(d->elem(9), s->elem(9), ((imm >> 9) & 1));\
+ d->elem(10) = F(d->elem(10), s->elem(10), ((imm >> 10) & 1));\
+ d->elem(11) = F(d->elem(11), s->elem(11), ((imm >> 11) & 1));\
+ d->elem(12) = F(d->elem(12), s->elem(12), ((imm >> 12) & 1));\
+ d->elem(13) = F(d->elem(13), s->elem(13), ((imm >> 13) & 1));\
+ d->elem(14) = F(d->elem(14), s->elem(14), ((imm >> 14) & 1));\
+ d->elem(15) = F(d->elem(15), s->elem(15), ((imm >> 15) & 1));\
+ }\
+ }\
+ }\
+}
+
+/* SSE4.1 op helpers */
+#define FBLENDVB(d, s, m) (m & 0x80) ? s : d
+#define FBLENDVPS(d, s, m) (m & 0x80000000) ? s : d
+#define FBLENDVPD(d, s, m) (m & 0x8000000000000000LL) ? s : d
+SSE_HELPER_V(helper_pblendvb, B, 16, FBLENDVB)
+SSE_HELPER_V(helper_blendvps, L, 4, FBLENDVPS)
+SSE_HELPER_V(helper_blendvpd, Q, 2, FBLENDVPD)
+
+void glue(helper_ptest, SUFFIX) (Reg *d, Reg *s)
+{
+ uint64_t zf = (s->Q(0) & d->Q(0)) | (s->Q(1) & d->Q(1));
+ uint64_t cf = (s->Q(0) & ~d->Q(0)) | (s->Q(1) & ~d->Q(1));
+
+ CC_SRC = (zf ? 0 : CC_Z) | (cf ? 0 : CC_C);
+}
+
+#define SSE_HELPER_F(name, elem, num, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+ d->elem(0) = F(0);\
+ d->elem(1) = F(1);\
+ if (num > 2) {\
+ d->elem(2) = F(2);\
+ d->elem(3) = F(3);\
+ if (num > 4) {\
+ d->elem(4) = F(4);\
+ d->elem(5) = F(5);\
+ d->elem(6) = F(6);\
+ d->elem(7) = F(7);\
+ }\
+ }\
+}
+
+SSE_HELPER_F(helper_pmovsxbw, W, 8, (int8_t) s->B)
+SSE_HELPER_F(helper_pmovsxbd, L, 4, (int8_t) s->B)
+SSE_HELPER_F(helper_pmovsxbq, Q, 2, (int8_t) s->B)
+SSE_HELPER_F(helper_pmovsxwd, L, 4, (int16_t) s->W)
+SSE_HELPER_F(helper_pmovsxwq, Q, 2, (int16_t) s->W)
+SSE_HELPER_F(helper_pmovsxdq, Q, 2, (int32_t) s->L)
+SSE_HELPER_F(helper_pmovzxbw, W, 8, s->B)
+SSE_HELPER_F(helper_pmovzxbd, L, 4, s->B)
+SSE_HELPER_F(helper_pmovzxbq, Q, 2, s->B)
+SSE_HELPER_F(helper_pmovzxwd, L, 4, s->W)
+SSE_HELPER_F(helper_pmovzxwq, Q, 2, s->W)
+SSE_HELPER_F(helper_pmovzxdq, Q, 2, s->L)
+
+void glue(helper_pmuldq, SUFFIX) (Reg *d, Reg *s)
+{
+ d->Q(0) = (int64_t) (int32_t) d->L(0) * (int32_t) s->L(0);
+ d->Q(1) = (int64_t) (int32_t) d->L(2) * (int32_t) s->L(2);
+}
+
+#define FCMPEQQ(d, s) d == s ? -1 : 0
+SSE_HELPER_Q(helper_pcmpeqq, FCMPEQQ)
+
+void glue(helper_packusdw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = satuw((int32_t) d->L(0));
+ d->W(1) = satuw((int32_t) d->L(1));
+ d->W(2) = satuw((int32_t) d->L(2));
+ d->W(3) = satuw((int32_t) d->L(3));
+ d->W(4) = satuw((int32_t) s->L(0));
+ d->W(5) = satuw((int32_t) s->L(1));
+ d->W(6) = satuw((int32_t) s->L(2));
+ d->W(7) = satuw((int32_t) s->L(3));
+}
+
+#define FMINSB(d, s) MIN((int8_t) d, (int8_t) s)
+#define FMINSD(d, s) MIN((int32_t) d, (int32_t) s)
+#define FMAXSB(d, s) MAX((int8_t) d, (int8_t) s)
+#define FMAXSD(d, s) MAX((int32_t) d, (int32_t) s)
+SSE_HELPER_B(helper_pminsb, FMINSB)
+SSE_HELPER_L(helper_pminsd, FMINSD)
+SSE_HELPER_W(helper_pminuw, MIN)
+SSE_HELPER_L(helper_pminud, MIN)
+SSE_HELPER_B(helper_pmaxsb, FMAXSB)
+SSE_HELPER_L(helper_pmaxsd, FMAXSD)
+SSE_HELPER_W(helper_pmaxuw, MAX)
+SSE_HELPER_L(helper_pmaxud, MAX)
+
+#define FMULLD(d, s) (int32_t) d * (int32_t) s
+SSE_HELPER_L(helper_pmulld, FMULLD)
+
+void glue(helper_phminposuw, SUFFIX) (Reg *d, Reg *s)
+{
+ int idx = 0;
+
+ if (s->W(1) < s->W(idx))
+ idx = 1;
+ if (s->W(2) < s->W(idx))
+ idx = 2;
+ if (s->W(3) < s->W(idx))
+ idx = 3;
+ if (s->W(4) < s->W(idx))
+ idx = 4;
+ if (s->W(5) < s->W(idx))
+ idx = 5;
+ if (s->W(6) < s->W(idx))
+ idx = 6;
+ if (s->W(7) < s->W(idx))
+ idx = 7;
+
+ d->Q(1) = 0;
+ d->L(1) = 0;
+ d->W(1) = idx;
+ d->W(0) = s->W(idx);
+}
+
+void glue(helper_roundps, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+{
+ signed char prev_rounding_mode;
+
+ prev_rounding_mode = env->sse_status.float_rounding_mode;
+ if (!(mode & (1 << 2)))
+ switch (mode & 3) {
+ case 0:
+ set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
+ break;
+ case 1:
+ set_float_rounding_mode(float_round_down, &env->sse_status);
+ break;
+ case 2:
+ set_float_rounding_mode(float_round_up, &env->sse_status);
+ break;
+ case 3:
+ set_float_rounding_mode(float_round_to_zero, &env->sse_status);
+ break;
+ }
+
+ d->L(0) = float64_round_to_int(s->L(0), &env->sse_status);
+ d->L(1) = float64_round_to_int(s->L(1), &env->sse_status);
+ d->L(2) = float64_round_to_int(s->L(2), &env->sse_status);
+ d->L(3) = float64_round_to_int(s->L(3), &env->sse_status);
+
+#if 0 /* TODO */
+ if (mode & (1 << 3))
+ set_float_exception_flags(
+ get_float_exception_flags(&env->sse_status) &
+ ~float_flag_inexact,
+ &env->sse_status);
+#endif
+ env->sse_status.float_rounding_mode = prev_rounding_mode;
+}
+
+void glue(helper_roundpd, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+{
+ signed char prev_rounding_mode;
+
+ prev_rounding_mode = env->sse_status.float_rounding_mode;
+ if (!(mode & (1 << 2)))
+ switch (mode & 3) {
+ case 0:
+ set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
+ break;
+ case 1:
+ set_float_rounding_mode(float_round_down, &env->sse_status);
+ break;
+ case 2:
+ set_float_rounding_mode(float_round_up, &env->sse_status);
+ break;
+ case 3:
+ set_float_rounding_mode(float_round_to_zero, &env->sse_status);
+ break;
+ }
+
+ d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status);
+ d->Q(1) = float64_round_to_int(s->Q(1), &env->sse_status);
+
+#if 0 /* TODO */
+ if (mode & (1 << 3))
+ set_float_exception_flags(
+ get_float_exception_flags(&env->sse_status) &
+ ~float_flag_inexact,
+ &env->sse_status);
+#endif
+ env->sse_status.float_rounding_mode = prev_rounding_mode;
+}
+
+void glue(helper_roundss, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+{
+ signed char prev_rounding_mode;
+
+ prev_rounding_mode = env->sse_status.float_rounding_mode;
+ if (!(mode & (1 << 2)))
+ switch (mode & 3) {
+ case 0:
+ set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
+ break;
+ case 1:
+ set_float_rounding_mode(float_round_down, &env->sse_status);
+ break;
+ case 2:
+ set_float_rounding_mode(float_round_up, &env->sse_status);
+ break;
+ case 3:
+ set_float_rounding_mode(float_round_to_zero, &env->sse_status);
+ break;
+ }
+
+ d->L(0) = float64_round_to_int(s->L(0), &env->sse_status);
+
+#if 0 /* TODO */
+ if (mode & (1 << 3))
+ set_float_exception_flags(
+ get_float_exception_flags(&env->sse_status) &
+ ~float_flag_inexact,
+ &env->sse_status);
+#endif
+ env->sse_status.float_rounding_mode = prev_rounding_mode;
+}
+
+void glue(helper_roundsd, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+{
+ signed char prev_rounding_mode;
+
+ prev_rounding_mode = env->sse_status.float_rounding_mode;
+ if (!(mode & (1 << 2)))
+ switch (mode & 3) {
+ case 0:
+ set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
+ break;
+ case 1:
+ set_float_rounding_mode(float_round_down, &env->sse_status);
+ break;
+ case 2:
+ set_float_rounding_mode(float_round_up, &env->sse_status);
+ break;
+ case 3:
+ set_float_rounding_mode(float_round_to_zero, &env->sse_status);
+ break;
+ }
+
+ d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status);
+
+#if 0 /* TODO */
+ if (mode & (1 << 3))
+ set_float_exception_flags(
+ get_float_exception_flags(&env->sse_status) &
+ ~float_flag_inexact,
+ &env->sse_status);
+#endif
+ env->sse_status.float_rounding_mode = prev_rounding_mode;
+}
+
+#define FBLENDP(d, s, m) m ? s : d
+SSE_HELPER_I(helper_blendps, L, 4, FBLENDP)
+SSE_HELPER_I(helper_blendpd, Q, 2, FBLENDP)
+SSE_HELPER_I(helper_pblendw, W, 8, FBLENDP)
+
+void glue(helper_dpps, SUFFIX) (Reg *d, Reg *s, uint32_t mask)
+{
+ float32 iresult = 0 /*float32_zero*/;
+
+ if (mask & (1 << 4))
+ iresult = float32_add(iresult,
+ float32_mul(d->L(0), s->L(0), &env->sse_status),
+ &env->sse_status);
+ if (mask & (1 << 5))
+ iresult = float32_add(iresult,
+ float32_mul(d->L(1), s->L(1), &env->sse_status),
+ &env->sse_status);
+ if (mask & (1 << 6))
+ iresult = float32_add(iresult,
+ float32_mul(d->L(2), s->L(2), &env->sse_status),
+ &env->sse_status);
+ if (mask & (1 << 7))
+ iresult = float32_add(iresult,
+ float32_mul(d->L(3), s->L(3), &env->sse_status),
+ &env->sse_status);
+ d->L(0) = (mask & (1 << 0)) ? iresult : 0 /*float32_zero*/;
+ d->L(1) = (mask & (1 << 1)) ? iresult : 0 /*float32_zero*/;
+ d->L(2) = (mask & (1 << 2)) ? iresult : 0 /*float32_zero*/;
+ d->L(3) = (mask & (1 << 3)) ? iresult : 0 /*float32_zero*/;
+}
+
+void glue(helper_dppd, SUFFIX) (Reg *d, Reg *s, uint32_t mask)
+{
+ float64 iresult = 0 /*float64_zero*/;
+
+ if (mask & (1 << 4))
+ iresult = float64_add(iresult,
+ float64_mul(d->Q(0), s->Q(0), &env->sse_status),
+ &env->sse_status);
+ if (mask & (1 << 5))
+ iresult = float64_add(iresult,
+ float64_mul(d->Q(1), s->Q(1), &env->sse_status),
+ &env->sse_status);
+ d->Q(0) = (mask & (1 << 0)) ? iresult : 0 /*float64_zero*/;
+ d->Q(1) = (mask & (1 << 1)) ? iresult : 0 /*float64_zero*/;
+}
+
+void glue(helper_mpsadbw, SUFFIX) (Reg *d, Reg *s, uint32_t offset)
+{
+ int s0 = (offset & 3) << 2;
+ int d0 = (offset & 4) << 0;
+ int i;
+ Reg r;
+
+ for (i = 0; i < 8; i++, d0++) {
+ r.W(i) = 0;
+ r.W(i) += abs1(d->B(d0 + 0) - s->B(s0 + 0));
+ r.W(i) += abs1(d->B(d0 + 1) - s->B(s0 + 1));
+ r.W(i) += abs1(d->B(d0 + 2) - s->B(s0 + 2));
+ r.W(i) += abs1(d->B(d0 + 3) - s->B(s0 + 3));
+ }
+
+ *d = r;
+}
+
+/* SSE4.2 op helpers */
+/* it's unclear whether signed or unsigned */
+#define FCMPGTQ(d, s) d > s ? -1 : 0
+SSE_HELPER_Q(helper_pcmpgtq, FCMPGTQ)
+
+static inline int pcmp_elen(int reg, uint32_t ctrl)
+{
+ int val;
+
+ /* Presence of REX.W is indicated by a bit higher than 7 set */
+ if (ctrl >> 8)
+ val = abs1((int64_t) env->regs[reg]);
+ else
+ val = abs1((int32_t) env->regs[reg]);
+
+ if (ctrl & 1) {
+ if (val > 8)
+ return 8;
+ } else
+ if (val > 16)
+ return 16;
+
+ return val;
+}
+
+static inline int pcmp_ilen(Reg *r, uint8_t ctrl)
+{
+ int val = 0;
+
+ if (ctrl & 1) {
+ while (val < 8 && r->W(val))
+ val++;
+ } else
+ while (val < 16 && r->B(val))
+ val++;
+
+ return val;
+}
+
+static inline int pcmp_val(Reg *r, uint8_t ctrl, int i)
+{
+ switch ((ctrl >> 0) & 3) {
+ case 0:
+ return r->B(i);
+ case 1:
+ return r->W(i);
+ case 2:
+ return (int8_t) r->B(i);
+ case 3:
+ default:
+ return (int16_t) r->W(i);
+ }
+}
+
+static inline unsigned pcmpxstrx(Reg *d, Reg *s,
+ int8_t ctrl, int valids, int validd)
+{
+ unsigned int res = 0;
+ int v;
+ int j, i;
+ int upper = (ctrl & 1) ? 7 : 15;
+
+ valids--;
+ validd--;
+
+ CC_SRC = (valids < upper ? CC_Z : 0) | (validd < upper ? CC_S : 0);
+
+ switch ((ctrl >> 2) & 3) {
+ case 0:
+ for (j = valids; j >= 0; j--) {
+ res <<= 1;
+ v = pcmp_val(s, ctrl, j);
+ for (i = validd; i >= 0; i--)
+ res |= (v == pcmp_val(d, ctrl, i));
+ }
+ break;
+ case 1:
+ for (j = valids; j >= 0; j--) {
+ res <<= 1;
+ v = pcmp_val(s, ctrl, j);
+ for (i = ((validd - 1) | 1); i >= 0; i -= 2)
+ res |= (pcmp_val(d, ctrl, i - 0) <= v &&
+ pcmp_val(d, ctrl, i - 1) >= v);
+ }
+ break;
+ case 2:
+ res = (2 << (upper - MAX(valids, validd))) - 1;
+ res <<= MAX(valids, validd) - MIN(valids, validd);
+ for (i = MIN(valids, validd); i >= 0; i--) {
+ res <<= 1;
+ v = pcmp_val(s, ctrl, i);
+ res |= (v == pcmp_val(d, ctrl, i));
+ }
+ break;
+ case 3:
+ for (j = valids - validd; j >= 0; j--) {
+ res <<= 1;
+ res |= 1;
+ for (i = MIN(upper - j, validd); i >= 0; i--)
+ res &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i));
+ }
+ break;
+ }
+
+ switch ((ctrl >> 4) & 3) {
+ case 1:
+ res ^= (2 << upper) - 1;
+ break;
+ case 3:
+ res ^= (2 << valids) - 1;
+ break;
+ }
+
+ if (res)
+ CC_SRC |= CC_C;
+ if (res & 1)
+ CC_SRC |= CC_O;
+
+ return res;
+}
+
+static inline int rffs1(unsigned int val)
+{
+ int ret = 1, hi;
+
+ for (hi = sizeof(val) * 4; hi; hi /= 2)
+ if (val >> hi) {
+ val >>= hi;
+ ret += hi;
+ }
+
+ return ret;
+}
+
+static inline int ffs1(unsigned int val)
+{
+ int ret = 1, hi;
+
+ for (hi = sizeof(val) * 4; hi; hi /= 2)
+ if (val << hi) {
+ val <<= hi;
+ ret += hi;
+ }
+
+ return ret;
+}
+
+void glue(helper_pcmpestri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
+{
+ unsigned int res = pcmpxstrx(d, s, ctrl,
+ pcmp_elen(R_EDX, ctrl),
+ pcmp_elen(R_EAX, ctrl));
+
+ if (res)
+#ifndef VBOX
+ env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1;
+#else
+ env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1(res) : ffs1(res)) - 1;
+#endif
+ else
+ env->regs[R_ECX] = 16 >> (ctrl & (1 << 0));
+}
+
+void glue(helper_pcmpestrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
+{
+ int i;
+ unsigned int res = pcmpxstrx(d, s, ctrl,
+ pcmp_elen(R_EDX, ctrl),
+ pcmp_elen(R_EAX, ctrl));
+
+ if ((ctrl >> 6) & 1) {
+#ifndef VBOX
+ if (ctrl & 1)
+ for (i = 0; i <= 8; i--, res >>= 1)
+ d->W(i) = (res & 1) ? ~0 : 0;
+ else
+ for (i = 0; i <= 16; i--, res >>= 1)
+ d->B(i) = (res & 1) ? ~0 : 0;
+#else
+ if (ctrl & 1)
+ for (i = 0; i < 8; i++, res >>= 1) {
+ d->W(i) = (res & 1) ? ~0 : 0;
+ }
+ else
+ for (i = 0; i < 16; i++, res >>= 1) {
+ d->B(i) = (res & 1) ? ~0 : 0;
+ }
+#endif
+ } else {
+ d->Q(1) = 0;
+ d->Q(0) = res;
+ }
+}
+
+void glue(helper_pcmpistri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
+{
+ unsigned int res = pcmpxstrx(d, s, ctrl,
+ pcmp_ilen(s, ctrl),
+ pcmp_ilen(d, ctrl));
+
+ if (res)
+ env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1;
+ else
+ env->regs[R_ECX] = 16 >> (ctrl & (1 << 0));
+}
+
+void glue(helper_pcmpistrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
+{
+ int i;
+ unsigned int res = pcmpxstrx(d, s, ctrl,
+ pcmp_ilen(s, ctrl),
+ pcmp_ilen(d, ctrl));
+
+ if ((ctrl >> 6) & 1) {
+#ifndef VBOX
+ if (ctrl & 1)
+ for (i = 0; i <= 8; i--, res >>= 1)
+ d->W(i) = (res & 1) ? ~0 : 0;
+ else
+ for (i = 0; i <= 16; i--, res >>= 1)
+ d->B(i) = (res & 1) ? ~0 : 0;
+#else
+ if (ctrl & 1)
+ for (i = 0; i < 8; i++, res >>= 1) {
+ d->W(i) = (res & 1) ? ~0 : 0;
+ }
+ else
+ for (i = 0; i < 16; i++, res >>= 1) {
+ d->B(i) = (res & 1) ? ~0 : 0;
+ }
+#endif
+ } else {
+ d->Q(1) = 0;
+ d->Q(0) = res;
+ }
+}
+
+#define CRCPOLY 0x1edc6f41
+#define CRCPOLY_BITREV 0x82f63b78
+target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len)
+{
+ target_ulong crc = (msg & ((target_ulong) -1 >>
+ (TARGET_LONG_BITS - len))) ^ crc1;
+
+ while (len--)
+ crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_BITREV : 0);
+
+ return crc;
+}
+
+#define POPMASK(i) ((target_ulong) -1 / ((1LL << (1 << i)) + 1))
+#define POPCOUNT(n, i) (n & POPMASK(i)) + ((n >> (1 << i)) & POPMASK(i))
+target_ulong helper_popcnt(target_ulong n, uint32_t type)
+{
+ CC_SRC = n ? 0 : CC_Z;
+
+ n = POPCOUNT(n, 0);
+ n = POPCOUNT(n, 1);
+ n = POPCOUNT(n, 2);
+ n = POPCOUNT(n, 3);
+ if (type == 1)
+ return n & 0xff;
+
+ n = POPCOUNT(n, 4);
+#ifndef TARGET_X86_64
+ return n;
+#else
+ if (type == 2)
+ return n & 0xff;
+
+ return POPCOUNT(n, 5);
+#endif
+}
+#endif
+
+#undef SHIFT
+#undef XMM_ONLY
+#undef Reg
+#undef B
+#undef W
+#undef L
+#undef Q
+#undef SUFFIX
diff --git a/src/recompiler/target-i386/ops_sse_header.h b/src/recompiler/target-i386/ops_sse_header.h
new file mode 100644
index 00000000..efa3e905
--- /dev/null
+++ b/src/recompiler/target-i386/ops_sse_header.h
@@ -0,0 +1,359 @@
+/*
+ * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support
+ *
+ * Copyright (c) 2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#if SHIFT == 0
+#define Reg MMXReg
+#define SUFFIX _mmx
+#else
+#define Reg XMMReg
+#define SUFFIX _xmm
+#endif
+
+#define dh_alias_Reg ptr
+#define dh_alias_XMMReg ptr
+#define dh_alias_MMXReg ptr
+#define dh_ctype_Reg Reg *
+#define dh_ctype_XMMReg XMMReg *
+#define dh_ctype_MMXReg MMXReg *
+#define dh_is_signed_Reg dh_is_signed_ptr
+#define dh_is_signed_XMMReg dh_is_signed_ptr
+#define dh_is_signed_MMXReg dh_is_signed_ptr
+
+DEF_HELPER_2(glue(psrlw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psraw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psllw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psrld, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psrad, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pslld, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psrlq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psllq, SUFFIX), void, Reg, Reg)
+
+#if SHIFT == 1
+DEF_HELPER_2(glue(psrldq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pslldq, SUFFIX), void, Reg, Reg)
+#endif
+
+#define SSE_HELPER_B(name, F)\
+ DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+
+#define SSE_HELPER_W(name, F)\
+ DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+
+#define SSE_HELPER_L(name, F)\
+ DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+
+#define SSE_HELPER_Q(name, F)\
+ DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+
+SSE_HELPER_B(paddb, FADD)
+SSE_HELPER_W(paddw, FADD)
+SSE_HELPER_L(paddl, FADD)
+SSE_HELPER_Q(paddq, FADD)
+
+SSE_HELPER_B(psubb, FSUB)
+SSE_HELPER_W(psubw, FSUB)
+SSE_HELPER_L(psubl, FSUB)
+SSE_HELPER_Q(psubq, FSUB)
+
+SSE_HELPER_B(paddusb, FADDUB)
+SSE_HELPER_B(paddsb, FADDSB)
+SSE_HELPER_B(psubusb, FSUBUB)
+SSE_HELPER_B(psubsb, FSUBSB)
+
+SSE_HELPER_W(paddusw, FADDUW)
+SSE_HELPER_W(paddsw, FADDSW)
+SSE_HELPER_W(psubusw, FSUBUW)
+SSE_HELPER_W(psubsw, FSUBSW)
+
+SSE_HELPER_B(pminub, FMINUB)
+SSE_HELPER_B(pmaxub, FMAXUB)
+
+SSE_HELPER_W(pminsw, FMINSW)
+SSE_HELPER_W(pmaxsw, FMAXSW)
+
+SSE_HELPER_Q(pand, FAND)
+SSE_HELPER_Q(pandn, FANDN)
+SSE_HELPER_Q(por, FOR)
+SSE_HELPER_Q(pxor, FXOR)
+
+SSE_HELPER_B(pcmpgtb, FCMPGTB)
+SSE_HELPER_W(pcmpgtw, FCMPGTW)
+SSE_HELPER_L(pcmpgtl, FCMPGTL)
+
+SSE_HELPER_B(pcmpeqb, FCMPEQ)
+SSE_HELPER_W(pcmpeqw, FCMPEQ)
+SSE_HELPER_L(pcmpeql, FCMPEQ)
+
+SSE_HELPER_W(pmullw, FMULLW)
+#if SHIFT == 0
+SSE_HELPER_W(pmulhrw, FMULHRW)
+#endif
+SSE_HELPER_W(pmulhuw, FMULHUW)
+SSE_HELPER_W(pmulhw, FMULHW)
+
+SSE_HELPER_B(pavgb, FAVG)
+SSE_HELPER_W(pavgw, FAVG)
+
+DEF_HELPER_2(glue(pmuludq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaddwd, SUFFIX), void, Reg, Reg)
+
+DEF_HELPER_2(glue(psadbw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(maskmov, SUFFIX), void, Reg, Reg, tl)
+DEF_HELPER_2(glue(movl_mm_T0, SUFFIX), void, Reg, i32)
+#ifdef TARGET_X86_64
+DEF_HELPER_2(glue(movq_mm_T0, SUFFIX), void, Reg, i64)
+#endif
+
+#if SHIFT == 0
+DEF_HELPER_3(glue(pshufw, SUFFIX), void, Reg, Reg, int)
+#else
+DEF_HELPER_3(shufps, void, Reg, Reg, int)
+DEF_HELPER_3(shufpd, void, Reg, Reg, int)
+DEF_HELPER_3(glue(pshufd, SUFFIX), void, Reg, Reg, int)
+DEF_HELPER_3(glue(pshuflw, SUFFIX), void, Reg, Reg, int)
+DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int)
+#endif
+
+#if SHIFT == 1
+/* FPU ops */
+/* XXX: not accurate */
+
+#define SSE_HELPER_S(name, F)\
+ DEF_HELPER_2(name ## ps , void, Reg, Reg) \
+ DEF_HELPER_2(name ## ss , void, Reg, Reg) \
+ DEF_HELPER_2(name ## pd , void, Reg, Reg) \
+ DEF_HELPER_2(name ## sd , void, Reg, Reg)
+
+SSE_HELPER_S(add, FPU_ADD)
+SSE_HELPER_S(sub, FPU_SUB)
+SSE_HELPER_S(mul, FPU_MUL)
+SSE_HELPER_S(div, FPU_DIV)
+SSE_HELPER_S(min, FPU_MIN)
+SSE_HELPER_S(max, FPU_MAX)
+SSE_HELPER_S(sqrt, FPU_SQRT)
+
+
+DEF_HELPER_2(cvtps2pd, void, Reg, Reg)
+DEF_HELPER_2(cvtpd2ps, void, Reg, Reg)
+DEF_HELPER_2(cvtss2sd, void, Reg, Reg)
+DEF_HELPER_2(cvtsd2ss, void, Reg, Reg)
+DEF_HELPER_2(cvtdq2ps, void, Reg, Reg)
+DEF_HELPER_2(cvtdq2pd, void, Reg, Reg)
+DEF_HELPER_2(cvtpi2ps, void, XMMReg, MMXReg)
+DEF_HELPER_2(cvtpi2pd, void, XMMReg, MMXReg)
+DEF_HELPER_2(cvtsi2ss, void, XMMReg, i32)
+DEF_HELPER_2(cvtsi2sd, void, XMMReg, i32)
+
+#ifdef TARGET_X86_64
+DEF_HELPER_2(cvtsq2ss, void, XMMReg, i64)
+DEF_HELPER_2(cvtsq2sd, void, XMMReg, i64)
+#endif
+
+DEF_HELPER_2(cvtps2dq, void, XMMReg, XMMReg)
+DEF_HELPER_2(cvtpd2dq, void, XMMReg, XMMReg)
+DEF_HELPER_2(cvtps2pi, void, MMXReg, XMMReg)
+DEF_HELPER_2(cvtpd2pi, void, MMXReg, XMMReg)
+DEF_HELPER_1(cvtss2si, s32, XMMReg)
+DEF_HELPER_1(cvtsd2si, s32, XMMReg)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(cvtss2sq, s64, XMMReg)
+DEF_HELPER_1(cvtsd2sq, s64, XMMReg)
+#endif
+
+DEF_HELPER_2(cvttps2dq, void, XMMReg, XMMReg)
+DEF_HELPER_2(cvttpd2dq, void, XMMReg, XMMReg)
+DEF_HELPER_2(cvttps2pi, void, MMXReg, XMMReg)
+DEF_HELPER_2(cvttpd2pi, void, MMXReg, XMMReg)
+DEF_HELPER_1(cvttss2si, s32, XMMReg)
+DEF_HELPER_1(cvttsd2si, s32, XMMReg)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(cvttss2sq, s64, XMMReg)
+DEF_HELPER_1(cvttsd2sq, s64, XMMReg)
+#endif
+
+DEF_HELPER_2(rsqrtps, void, XMMReg, XMMReg)
+DEF_HELPER_2(rsqrtss, void, XMMReg, XMMReg)
+DEF_HELPER_2(rcpps, void, XMMReg, XMMReg)
+DEF_HELPER_2(rcpss, void, XMMReg, XMMReg)
+DEF_HELPER_2(extrq_r, void, XMMReg, XMMReg)
+DEF_HELPER_3(extrq_i, void, XMMReg, int, int)
+DEF_HELPER_2(insertq_r, void, XMMReg, XMMReg)
+DEF_HELPER_3(insertq_i, void, XMMReg, int, int)
+DEF_HELPER_2(haddps, void, XMMReg, XMMReg)
+DEF_HELPER_2(haddpd, void, XMMReg, XMMReg)
+DEF_HELPER_2(hsubps, void, XMMReg, XMMReg)
+DEF_HELPER_2(hsubpd, void, XMMReg, XMMReg)
+DEF_HELPER_2(addsubps, void, XMMReg, XMMReg)
+DEF_HELPER_2(addsubpd, void, XMMReg, XMMReg)
+
+#define SSE_HELPER_CMP(name, F)\
+ DEF_HELPER_2( name ## ps , void, Reg, Reg) \
+ DEF_HELPER_2( name ## ss , void, Reg, Reg) \
+ DEF_HELPER_2( name ## pd , void, Reg, Reg) \
+ DEF_HELPER_2( name ## sd , void, Reg, Reg)
+
+SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
+SSE_HELPER_CMP(cmplt, FPU_CMPLT)
+SSE_HELPER_CMP(cmple, FPU_CMPLE)
+SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD)
+SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ)
+SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT)
+SSE_HELPER_CMP(cmpnle, FPU_CMPNLE)
+SSE_HELPER_CMP(cmpord, FPU_CMPORD)
+
+DEF_HELPER_2(ucomiss, void, Reg, Reg)
+DEF_HELPER_2(comiss, void, Reg, Reg)
+DEF_HELPER_2(ucomisd, void, Reg, Reg)
+DEF_HELPER_2(comisd, void, Reg, Reg)
+DEF_HELPER_1(movmskps, i32, Reg)
+DEF_HELPER_1(movmskpd, i32, Reg)
+#endif
+
+DEF_HELPER_1(glue(pmovmskb, SUFFIX), i32, Reg)
+DEF_HELPER_2(glue(packsswb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(packuswb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(packssdw, SUFFIX), void, Reg, Reg)
+#define UNPCK_OP(base_name, base) \
+ DEF_HELPER_2(glue(punpck ## base_name ## bw, SUFFIX) , void, Reg, Reg) \
+ DEF_HELPER_2(glue(punpck ## base_name ## wd, SUFFIX) , void, Reg, Reg) \
+ DEF_HELPER_2(glue(punpck ## base_name ## dq, SUFFIX) , void, Reg, Reg)
+
+UNPCK_OP(l, 0)
+UNPCK_OP(h, 1)
+
+#if SHIFT == 1
+DEF_HELPER_2(glue(punpcklqdq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(punpckhqdq, SUFFIX), void, Reg, Reg)
+#endif
+
+/* 3DNow! float ops */
+#if SHIFT == 0
+DEF_HELPER_2(pi2fd, void, MMXReg, MMXReg)
+DEF_HELPER_2(pi2fw, void, MMXReg, MMXReg)
+DEF_HELPER_2(pf2id, void, MMXReg, MMXReg)
+DEF_HELPER_2(pf2iw, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfacc, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfadd, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfcmpeq, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfcmpge, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfcmpgt, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfmax, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfmin, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfmul, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfnacc, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfpnacc, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfrcp, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfrsqrt, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfsub, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfsubr, void, MMXReg, MMXReg)
+DEF_HELPER_2(pswapd, void, MMXReg, MMXReg)
+#endif
+
+/* SSSE3 op helpers */
+DEF_HELPER_2(glue(phaddw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phaddd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phaddsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phsubw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phsubd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phsubsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pabsb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pabsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pabsd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaddubsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmulhrsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pshufb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psignb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psignw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psignd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(palignr, SUFFIX), void, Reg, Reg, s32)
+
+/* SSE4.1 op helpers */
+#if SHIFT == 1
+DEF_HELPER_2(glue(pblendvb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(blendvps, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(blendvpd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(ptest, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxbw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxbd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxbq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxwd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxwq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxdq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxbw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxbd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxbq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxwd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxwq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxdq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmuldq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pcmpeqq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(packusdw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pminsb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pminsd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pminuw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pminud, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaxsb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaxsd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaxuw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaxud, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmulld, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phminposuw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(roundps, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(roundpd, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(roundss, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(roundsd, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(blendps, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(blendpd, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pblendw, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(dpps, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(dppd, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(mpsadbw, SUFFIX), void, Reg, Reg, i32)
+#endif
+
+/* SSE4.2 op helpers */
+#if SHIFT == 1
+DEF_HELPER_2(glue(pcmpgtq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(pcmpestri, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pcmpestrm, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pcmpistri, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pcmpistrm, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(crc32, tl, i32, tl, i32)
+DEF_HELPER_2(popcnt, tl, tl, i32)
+#endif
+
+#undef SHIFT
+#undef Reg
+#undef SUFFIX
+
+#undef SSE_HELPER_B
+#undef SSE_HELPER_W
+#undef SSE_HELPER_L
+#undef SSE_HELPER_Q
+#undef SSE_HELPER_S
+#undef SSE_HELPER_CMP
+#undef UNPCK_OP
diff --git a/src/recompiler/target-i386/svm.h b/src/recompiler/target-i386/svm.h
new file mode 100644
index 00000000..a224aead
--- /dev/null
+++ b/src/recompiler/target-i386/svm.h
@@ -0,0 +1,222 @@
+#ifndef __SVM_H
+#define __SVM_H
+
+#define TLB_CONTROL_DO_NOTHING 0
+#define TLB_CONTROL_FLUSH_ALL_ASID 1
+
+#define V_TPR_MASK 0x0f
+
+#define V_IRQ_SHIFT 8
+#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+
+#define V_INTR_PRIO_SHIFT 16
+#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
+
+#define V_IGN_TPR_SHIFT 20
+#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+
+#define V_INTR_MASKING_SHIFT 24
+#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+
+#define SVM_INTERRUPT_SHADOW_MASK 1
+
+#define SVM_IOIO_STR_SHIFT 2
+#define SVM_IOIO_REP_SHIFT 3
+#define SVM_IOIO_SIZE_SHIFT 4
+#define SVM_IOIO_ASIZE_SHIFT 7
+
+#define SVM_IOIO_TYPE_MASK 1
+#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
+#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
+#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
+#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
+
+#define SVM_EVTINJ_VEC_MASK 0xff
+
+#define SVM_EVTINJ_TYPE_SHIFT 8
+#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_VALID (1 << 31)
+#define SVM_EVTINJ_VALID_ERR (1 << 11)
+
+#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+
+#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
+#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
+#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
+#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
+
+#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
+#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+
+#define SVM_EXIT_READ_CR0 0x000
+#define SVM_EXIT_READ_CR3 0x003
+#define SVM_EXIT_READ_CR4 0x004
+#define SVM_EXIT_READ_CR8 0x008
+#define SVM_EXIT_WRITE_CR0 0x010
+#define SVM_EXIT_WRITE_CR3 0x013
+#define SVM_EXIT_WRITE_CR4 0x014
+#define SVM_EXIT_WRITE_CR8 0x018
+#define SVM_EXIT_READ_DR0 0x020
+#define SVM_EXIT_READ_DR1 0x021
+#define SVM_EXIT_READ_DR2 0x022
+#define SVM_EXIT_READ_DR3 0x023
+#define SVM_EXIT_READ_DR4 0x024
+#define SVM_EXIT_READ_DR5 0x025
+#define SVM_EXIT_READ_DR6 0x026
+#define SVM_EXIT_READ_DR7 0x027
+#define SVM_EXIT_WRITE_DR0 0x030
+#define SVM_EXIT_WRITE_DR1 0x031
+#define SVM_EXIT_WRITE_DR2 0x032
+#define SVM_EXIT_WRITE_DR3 0x033
+#define SVM_EXIT_WRITE_DR4 0x034
+#define SVM_EXIT_WRITE_DR5 0x035
+#define SVM_EXIT_WRITE_DR6 0x036
+#define SVM_EXIT_WRITE_DR7 0x037
+#define SVM_EXIT_EXCP_BASE 0x040
+#define SVM_EXIT_INTR 0x060
+#define SVM_EXIT_NMI 0x061
+#define SVM_EXIT_SMI 0x062
+#define SVM_EXIT_INIT 0x063
+#define SVM_EXIT_VINTR 0x064
+#define SVM_EXIT_CR0_SEL_WRITE 0x065
+#define SVM_EXIT_IDTR_READ 0x066
+#define SVM_EXIT_GDTR_READ 0x067
+#define SVM_EXIT_LDTR_READ 0x068
+#define SVM_EXIT_TR_READ 0x069
+#define SVM_EXIT_IDTR_WRITE 0x06a
+#define SVM_EXIT_GDTR_WRITE 0x06b
+#define SVM_EXIT_LDTR_WRITE 0x06c
+#define SVM_EXIT_TR_WRITE 0x06d
+#define SVM_EXIT_RDTSC 0x06e
+#define SVM_EXIT_RDPMC 0x06f
+#define SVM_EXIT_PUSHF 0x070
+#define SVM_EXIT_POPF 0x071
+#define SVM_EXIT_CPUID 0x072
+#define SVM_EXIT_RSM 0x073
+#define SVM_EXIT_IRET 0x074
+#define SVM_EXIT_SWINT 0x075
+#define SVM_EXIT_INVD 0x076
+#define SVM_EXIT_PAUSE 0x077
+#define SVM_EXIT_HLT 0x078
+#define SVM_EXIT_INVLPG 0x079
+#define SVM_EXIT_INVLPGA 0x07a
+#define SVM_EXIT_IOIO 0x07b
+#define SVM_EXIT_MSR 0x07c
+#define SVM_EXIT_TASK_SWITCH 0x07d
+#define SVM_EXIT_FERR_FREEZE 0x07e
+#define SVM_EXIT_SHUTDOWN 0x07f
+#define SVM_EXIT_VMRUN 0x080
+#define SVM_EXIT_VMMCALL 0x081
+#define SVM_EXIT_VMLOAD 0x082
+#define SVM_EXIT_VMSAVE 0x083
+#define SVM_EXIT_STGI 0x084
+#define SVM_EXIT_CLGI 0x085
+#define SVM_EXIT_SKINIT 0x086
+#define SVM_EXIT_RDTSCP 0x087
+#define SVM_EXIT_ICEBP 0x088
+#define SVM_EXIT_WBINVD 0x089
+/* only included in documentation, maybe wrong */
+#define SVM_EXIT_MONITOR 0x08a
+#define SVM_EXIT_MWAIT 0x08b
+#define SVM_EXIT_NPF 0x400
+
+#define SVM_EXIT_ERR -1
+
+#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
+
+struct __attribute__ ((__packed__)) vmcb_control_area {
+ uint16_t intercept_cr_read;
+ uint16_t intercept_cr_write;
+ uint16_t intercept_dr_read;
+ uint16_t intercept_dr_write;
+ uint32_t intercept_exceptions;
+ uint64_t intercept;
+ uint8_t reserved_1[44];
+ uint64_t iopm_base_pa;
+ uint64_t msrpm_base_pa;
+ uint64_t tsc_offset;
+ uint32_t asid;
+ uint8_t tlb_ctl;
+ uint8_t reserved_2[3];
+ uint32_t int_ctl;
+ uint32_t int_vector;
+ uint32_t int_state;
+ uint8_t reserved_3[4];
+ uint64_t exit_code;
+ uint64_t exit_info_1;
+ uint64_t exit_info_2;
+ uint32_t exit_int_info;
+ uint32_t exit_int_info_err;
+ uint64_t nested_ctl;
+ uint8_t reserved_4[16];
+ uint32_t event_inj;
+ uint32_t event_inj_err;
+ uint64_t nested_cr3;
+ uint64_t lbr_ctl;
+ uint8_t reserved_5[832];
+};
+
+struct __attribute__ ((__packed__)) vmcb_seg {
+ uint16_t selector;
+ uint16_t attrib;
+ uint32_t limit;
+ uint64_t base;
+};
+
+struct __attribute__ ((__packed__)) vmcb_save_area {
+ struct vmcb_seg es;
+ struct vmcb_seg cs;
+ struct vmcb_seg ss;
+ struct vmcb_seg ds;
+ struct vmcb_seg fs;
+ struct vmcb_seg gs;
+ struct vmcb_seg gdtr;
+ struct vmcb_seg ldtr;
+ struct vmcb_seg idtr;
+ struct vmcb_seg tr;
+ uint8_t reserved_1[43];
+ uint8_t cpl;
+ uint8_t reserved_2[4];
+ uint64_t efer;
+ uint8_t reserved_3[112];
+ uint64_t cr4;
+ uint64_t cr3;
+ uint64_t cr0;
+ uint64_t dr7;
+ uint64_t dr6;
+ uint64_t rflags;
+ uint64_t rip;
+ uint8_t reserved_4[88];
+ uint64_t rsp;
+ uint8_t reserved_5[24];
+ uint64_t rax;
+ uint64_t star;
+ uint64_t lstar;
+ uint64_t cstar;
+ uint64_t sfmask;
+ uint64_t kernel_gs_base;
+ uint64_t sysenter_cs;
+ uint64_t sysenter_esp;
+ uint64_t sysenter_eip;
+ uint64_t cr2;
+ uint8_t reserved_6[32];
+ uint64_t g_pat;
+ uint64_t dbgctl;
+ uint64_t br_from;
+ uint64_t br_to;
+ uint64_t last_excp_from;
+ uint64_t last_excp_to;
+};
+
+struct __attribute__ ((__packed__)) vmcb {
+ struct vmcb_control_area control;
+ struct vmcb_save_area save;
+};
+
+#endif
diff --git a/src/recompiler/target-i386/translate.c b/src/recompiler/target-i386/translate.c
new file mode 100644
index 00000000..1b82f3f5
--- /dev/null
+++ b/src/recompiler/target-i386/translate.c
@@ -0,0 +1,8385 @@
+/*
+ * i386 translation
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#ifndef VBOX
+#include <inttypes.h>
+#include <signal.h>
+#endif /* !VBOX */
+
+#include "cpu.h"
+#include "exec-all.h"
+#include "disas.h"
+#include "tcg-op.h"
+
+#include "helper.h"
+#define GEN_HELPER 1
+#include "helper.h"
+
+#define PREFIX_REPZ 0x01
+#define PREFIX_REPNZ 0x02
+#define PREFIX_LOCK 0x04
+#define PREFIX_DATA 0x08
+#define PREFIX_ADR 0x10
+
+#ifdef TARGET_X86_64
+#define X86_64_ONLY(x) x
+#define X86_64_DEF(...) __VA_ARGS__
+#define CODE64(s) ((s)->code64)
+#define REX_X(s) ((s)->rex_x)
+#define REX_B(s) ((s)->rex_b)
+# ifdef VBOX
+# define IS_LONG_MODE(s) ((s)->lma)
+# endif
+/* XXX: gcc generates push/pop in some opcodes, so we cannot use them */
+#if 1
+#define BUGGY_64(x) NULL
+#endif
+#else
+#define X86_64_ONLY(x) NULL
+#define X86_64_DEF(...)
+#define CODE64(s) 0
+#define REX_X(s) 0
+#define REX_B(s) 0
+# ifdef VBOX
+# define IS_LONG_MODE(s) 0
+# endif
+#endif
+
+//#define MACRO_TEST 1
+
+/* global register indexes */
+static TCGv_ptr cpu_env;
+static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
+static TCGv_i32 cpu_cc_op;
+static TCGv cpu_regs[CPU_NB_REGS];
+/* local temps */
+static TCGv cpu_T[2], cpu_T3;
+/* local register indexes (only used inside old micro ops) */
+static TCGv cpu_tmp0, cpu_tmp4;
+static TCGv_ptr cpu_ptr0, cpu_ptr1;
+static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
+static TCGv_i64 cpu_tmp1_i64;
+static TCGv cpu_tmp5;
+
+static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
+
+#include "gen-icount.h"
+
+#ifdef TARGET_X86_64
+static int x86_64_hregs;
+#endif
+
+#ifdef VBOX
+
+/* Special/override code readers to hide patched code. */
+
+uint8_t ldub_code_raw(target_ulong pc)
+{
+ uint8_t b;
+
+# ifdef VBOX_WITH_RAW_MODE
+ if (!remR3GetOpcode(cpu_single_env, pc, &b))
+# endif
+ b = ldub_code(pc);
+ return b;
+}
+# define ldub_code(a) ldub_code_raw(a)
+
+uint16_t lduw_code_raw(target_ulong pc)
+{
+ uint16_t u16;
+ u16 = (uint16_t)ldub_code_raw(pc);
+ u16 |= (uint16_t)ldub_code_raw(pc + 1) << 8;
+ return u16;
+}
+# define lduw_code(a) lduw_code_raw(a)
+
+
+uint32_t ldl_code_raw(target_ulong pc)
+{
+ uint32_t u32;
+ u32 = (uint32_t)ldub_code_raw(pc);
+ u32 |= (uint32_t)ldub_code_raw(pc + 1) << 8;
+ u32 |= (uint32_t)ldub_code_raw(pc + 2) << 16;
+ u32 |= (uint32_t)ldub_code_raw(pc + 3) << 24;
+ return u32;
+}
+# define ldl_code(a) ldl_code_raw(a)
+
+#endif /* VBOX */
+
+typedef struct DisasContext {
+ /* current insn context */
+ int override; /* -1 if no override */
+ int prefix;
+ int aflag, dflag;
+ target_ulong pc; /* pc = eip + cs_base */
+ int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
+ static state change (stop translation) */
+ /* current block context */
+ target_ulong cs_base; /* base of CS segment */
+ int pe; /* protected mode */
+ int code32; /* 32 bit code segment */
+#ifdef TARGET_X86_64
+ int lma; /* long mode active */
+ int code64; /* 64 bit code segment */
+ int rex_x, rex_b;
+#endif
+ int ss32; /* 32 bit stack segment */
+ int cc_op; /* current CC operation */
+ int addseg; /* non zero if either DS/ES/SS have a non zero base */
+ int f_st; /* currently unused */
+ int vm86; /* vm86 mode */
+#ifdef VBOX
+ int vme; /* CR4.VME */
+ int pvi; /* CR4.PVI */
+ int record_call; /* record calls for CSAM or not? */
+#endif
+ int cpl;
+ int iopl;
+ int tf; /* TF cpu flag */
+ int singlestep_enabled; /* "hardware" single step enabled */
+ int jmp_opt; /* use direct block chaining for direct jumps */
+ int mem_index; /* select memory access functions */
+ uint64_t flags; /* all execution flags */
+ struct TranslationBlock *tb;
+ int popl_esp_hack; /* for correct popl with esp base handling */
+ int rip_offset; /* only used in x86_64, but left for simplicity */
+ int cpuid_features;
+ int cpuid_ext_features;
+ int cpuid_ext2_features;
+ int cpuid_ext3_features;
+} DisasContext;
+
+static void gen_eob(DisasContext *s);
+static void gen_jmp(DisasContext *s, target_ulong eip);
+static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
+
+#ifdef VBOX
+static void gen_check_external_event(void);
+#endif
+
+/* i386 arith/logic operations */
+enum {
+ OP_ADDL,
+ OP_ORL,
+ OP_ADCL,
+ OP_SBBL,
+ OP_ANDL,
+ OP_SUBL,
+ OP_XORL,
+ OP_CMPL,
+};
+
+/* i386 shift ops */
+enum {
+ OP_ROL,
+ OP_ROR,
+ OP_RCL,
+ OP_RCR,
+ OP_SHL,
+ OP_SHR,
+ OP_SHL1, /* undocumented */
+ OP_SAR = 7,
+};
+
+enum {
+ JCC_O,
+ JCC_B,
+ JCC_Z,
+ JCC_BE,
+ JCC_S,
+ JCC_P,
+ JCC_L,
+ JCC_LE,
+};
+
+/* operand size */
+enum {
+ OT_BYTE = 0,
+ OT_WORD,
+ OT_LONG,
+ OT_QUAD,
+};
+
+enum {
+ /* I386 int registers */
+ OR_EAX, /* MUST be even numbered */
+ OR_ECX,
+ OR_EDX,
+ OR_EBX,
+ OR_ESP,
+ OR_EBP,
+ OR_ESI,
+ OR_EDI,
+
+ OR_TMP0 = 16, /* temporary operand register */
+ OR_TMP1,
+ OR_A0, /* temporary register used when doing address evaluation */
+};
+
+static inline void gen_op_movl_T0_0(void)
+{
+ tcg_gen_movi_tl(cpu_T[0], 0);
+}
+
+static inline void gen_op_movl_T0_im(int32_t val)
+{
+ tcg_gen_movi_tl(cpu_T[0], val);
+}
+
+static inline void gen_op_movl_T0_imu(uint32_t val)
+{
+ tcg_gen_movi_tl(cpu_T[0], val);
+}
+
+static inline void gen_op_movl_T1_im(int32_t val)
+{
+ tcg_gen_movi_tl(cpu_T[1], val);
+}
+
+static inline void gen_op_movl_T1_imu(uint32_t val)
+{
+ tcg_gen_movi_tl(cpu_T[1], val);
+}
+
+static inline void gen_op_movl_A0_im(uint32_t val)
+{
+ tcg_gen_movi_tl(cpu_A0, val);
+}
+
+#ifdef TARGET_X86_64
+static inline void gen_op_movq_A0_im(int64_t val)
+{
+ tcg_gen_movi_tl(cpu_A0, val);
+}
+#endif
+
+static inline void gen_movtl_T0_im(target_ulong val)
+{
+ tcg_gen_movi_tl(cpu_T[0], val);
+}
+
+static inline void gen_movtl_T1_im(target_ulong val)
+{
+ tcg_gen_movi_tl(cpu_T[1], val);
+}
+
+static inline void gen_op_andl_T0_ffff(void)
+{
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
+}
+
+static inline void gen_op_andl_T0_im(uint32_t val)
+{
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], val);
+}
+
+static inline void gen_op_movl_T0_T1(void)
+{
+ tcg_gen_mov_tl(cpu_T[0], cpu_T[1]);
+}
+
+static inline void gen_op_andl_A0_ffff(void)
+{
+ tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffff);
+}
+
+#ifdef TARGET_X86_64
+
+#define NB_OP_SIZES 4
+
+#else /* !TARGET_X86_64 */
+
+#define NB_OP_SIZES 3
+
+#endif /* !TARGET_X86_64 */
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define REG_B_OFFSET (sizeof(target_ulong) - 1)
+#define REG_H_OFFSET (sizeof(target_ulong) - 2)
+#define REG_W_OFFSET (sizeof(target_ulong) - 2)
+#define REG_L_OFFSET (sizeof(target_ulong) - 4)
+#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
+#else
+#define REG_B_OFFSET 0
+#define REG_H_OFFSET 1
+#define REG_W_OFFSET 0
+#define REG_L_OFFSET 0
+#define REG_LH_OFFSET 4
+#endif
+
+static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
+{
+ TCGv tmp;
+
+ switch(ot) {
+ case OT_BYTE:
+ tmp = tcg_temp_new();
+ tcg_gen_ext8u_tl(tmp, t0);
+ if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+ } else {
+ tcg_gen_shli_tl(tmp, tmp, 8);
+ tcg_gen_andi_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], ~0xff00);
+ tcg_gen_or_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], tmp);
+ }
+ tcg_temp_free(tmp);
+ break;
+ case OT_WORD:
+ tmp = tcg_temp_new();
+ tcg_gen_ext16u_tl(tmp, t0);
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+ tcg_temp_free(tmp);
+ break;
+ default: /* XXX this shouldn't be reached; abort? */
+ case OT_LONG:
+ /* For x86_64, this sets the higher half of register to zero.
+ For i386, this is equivalent to a mov. */
+ tcg_gen_ext32u_tl(cpu_regs[reg], t0);
+ break;
+#ifdef TARGET_X86_64
+ case OT_QUAD:
+ tcg_gen_mov_tl(cpu_regs[reg], t0);
+ break;
+#endif
+ }
+}
+
+static inline void gen_op_mov_reg_T0(int ot, int reg)
+{
+ gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+}
+
+static inline void gen_op_mov_reg_T1(int ot, int reg)
+{
+ gen_op_mov_reg_v(ot, reg, cpu_T[1]);
+}
+
+static inline void gen_op_mov_reg_A0(int size, int reg)
+{
+ TCGv tmp;
+
+ switch(size) {
+ case 0:
+ tmp = tcg_temp_new();
+ tcg_gen_ext16u_tl(tmp, cpu_A0);
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+ tcg_temp_free(tmp);
+ break;
+ default: /* XXX this shouldn't be reached; abort? */
+ case 1:
+ /* For x86_64, this sets the higher half of register to zero.
+ For i386, this is equivalent to a mov. */
+ tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0);
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
+ break;
+#endif
+ }
+}
+
+static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
+{
+ switch(ot) {
+ case OT_BYTE:
+ if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+ goto std_case;
+ } else {
+ tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
+ tcg_gen_ext8u_tl(t0, t0);
+ }
+ break;
+ default:
+ std_case:
+ tcg_gen_mov_tl(t0, cpu_regs[reg]);
+ break;
+ }
+}
+
+static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
+{
+ gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
+}
+
+static inline void gen_op_movl_A0_reg(int reg)
+{
+ tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
+}
+
+static inline void gen_op_addl_A0_im(int32_t val)
+{
+ tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+#ifdef TARGET_X86_64
+ tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+#endif
+}
+
+#ifdef TARGET_X86_64
+static inline void gen_op_addq_A0_im(int64_t val)
+{
+ tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+}
+#endif
+
+static void gen_add_A0_im(DisasContext *s, int val)
+{
+#ifdef TARGET_X86_64
+ if (CODE64(s))
+ gen_op_addq_A0_im(val);
+ else
+#endif
+ gen_op_addl_A0_im(val);
+}
+
+static inline void gen_op_addl_T0_T1(void)
+{
+ tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+}
+
+static inline void gen_op_jmp_T0(void)
+{
+ tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
+}
+
+static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
+{
+ switch(size) {
+ case 0:
+ tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
+ tcg_gen_ext16u_tl(cpu_tmp0, cpu_tmp0);
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0);
+ break;
+ case 1:
+ tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
+ /* For x86_64, this sets the higher half of register to zero.
+ For i386, this is equivalent to a nop. */
+ tcg_gen_ext32u_tl(cpu_tmp0, cpu_tmp0);
+ tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val);
+ break;
+#endif
+ }
+}
+
+static inline void gen_op_add_reg_T0(int size, int reg)
+{
+ switch(size) {
+ case 0:
+ tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
+ tcg_gen_ext16u_tl(cpu_tmp0, cpu_tmp0);
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0);
+ break;
+ case 1:
+ tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
+ /* For x86_64, this sets the higher half of register to zero.
+ For i386, this is equivalent to a nop. */
+ tcg_gen_ext32u_tl(cpu_tmp0, cpu_tmp0);
+ tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]);
+ break;
+#endif
+ }
+}
+
+static inline void gen_op_set_cc_op(int32_t val)
+{
+ tcg_gen_movi_i32(cpu_cc_op, val);
+}
+
+static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
+{
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+ if (shift != 0)
+ tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+ /* For x86_64, this sets the higher half of register to zero.
+ For i386, this is equivalent to a nop. */
+ tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
+}
+
+#ifdef VBOX
+DECLINLINE(void) gen_op_seg_check(int reg, bool keepA0)
+{
+ /* It seems segments doesn't get out of sync - if they do in fact - enable below code. */
+# ifdef FORCE_SEGMENT_SYNC
+# if 1
+ TCGv t0;
+
+ /* Considering poor quality of TCG optimizer - better call directly */
+ t0 = tcg_temp_local_new(TCG_TYPE_TL);
+ tcg_gen_movi_tl(t0, reg);
+ tcg_gen_helper_0_1(helper_sync_seg, t0);
+ tcg_temp_free(t0);
+# else
+ /* Our segments could be outdated, thus check for newselector field to see if update really needed */
+ int skip_label;
+ TCGv t0, a0;
+
+ /* For other segments this check is waste of time, and also TCG is unable to cope with this code,
+ for data/stack segments, as expects alive cpu_T[0] */
+ if (reg != R_GS)
+ return;
+
+ if (keepA0)
+ {
+ /* we need to store old cpu_A0 */
+ a0 = tcg_temp_local_new(TCG_TYPE_TL);
+ tcg_gen_mov_tl(a0, cpu_A0);
+ }
+
+ skip_label = gen_new_label();
+ t0 = tcg_temp_local_new(TCG_TYPE_TL);
+
+ tcg_gen_ld32u_tl(t0, cpu_env, offsetof(CPUState, segs[reg].newselector) + REG_L_OFFSET);
+ tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, skip_label);
+ tcg_gen_ld32u_tl(t0, cpu_env, offsetof(CPUState, eflags) + REG_L_OFFSET);
+ tcg_gen_andi_tl(t0, t0, VM_MASK);
+ tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, skip_label);
+ tcg_gen_movi_tl(t0, reg);
+
+ tcg_gen_helper_0_1(helper_sync_seg, t0);
+
+ tcg_temp_free(t0);
+
+ gen_set_label(skip_label);
+ if (keepA0)
+ {
+ tcg_gen_mov_tl(cpu_A0, a0);
+ tcg_temp_free(a0);
+ }
+# endif /* 0 */
+# endif /* FORCE_SEGMENT_SYNC */
+}
+#endif /* VBOX */
+
+static inline void gen_op_movl_A0_seg(int reg)
+{
+#ifdef VBOX
+ gen_op_seg_check(reg, false);
+#endif
+ tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET);
+}
+
+static inline void gen_op_addl_A0_seg(int reg)
+{
+#ifdef VBOX
+ gen_op_seg_check(reg, true);
+#endif
+ tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+#ifdef TARGET_X86_64
+ tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+#endif
+}
+
+#ifdef TARGET_X86_64
+static inline void gen_op_movq_A0_seg(int reg)
+{
+#ifdef VBOX
+ gen_op_seg_check(reg, false);
+#endif
+ tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base));
+}
+
+static inline void gen_op_addq_A0_seg(int reg)
+{
+#ifdef VBOX
+ gen_op_seg_check(reg, true);
+#endif
+ tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+}
+
+static inline void gen_op_movq_A0_reg(int reg)
+{
+ tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
+}
+
+static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
+{
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+ if (shift != 0)
+ tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+}
+#endif
+
+static inline void gen_op_lds_T0_A0(int idx)
+{
+ int mem_index = (idx >> 2) - 1;
+ switch(idx & 3) {
+ case 0:
+ tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index);
+ break;
+ case 1:
+ tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index);
+ break;
+ default:
+ case 2:
+ tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index);
+ break;
+ }
+}
+
+static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0)
+{
+ int mem_index = (idx >> 2) - 1;
+ switch(idx & 3) {
+ case 0:
+ tcg_gen_qemu_ld8u(t0, a0, mem_index);
+ break;
+ case 1:
+ tcg_gen_qemu_ld16u(t0, a0, mem_index);
+ break;
+ case 2:
+ tcg_gen_qemu_ld32u(t0, a0, mem_index);
+ break;
+ default:
+ case 3:
+ /* Should never happen on 32-bit targets. */
+#ifdef TARGET_X86_64
+ tcg_gen_qemu_ld64(t0, a0, mem_index);
+#endif
+ break;
+ }
+}
+
+/* XXX: always use ldu or lds */
+static inline void gen_op_ld_T0_A0(int idx)
+{
+ gen_op_ld_v(idx, cpu_T[0], cpu_A0);
+}
+
+static inline void gen_op_ldu_T0_A0(int idx)
+{
+ gen_op_ld_v(idx, cpu_T[0], cpu_A0);
+}
+
+static inline void gen_op_ld_T1_A0(int idx)
+{
+ gen_op_ld_v(idx, cpu_T[1], cpu_A0);
+}
+
+static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0)
+{
+ int mem_index = (idx >> 2) - 1;
+ switch(idx & 3) {
+ case 0:
+ tcg_gen_qemu_st8(t0, a0, mem_index);
+ break;
+ case 1:
+ tcg_gen_qemu_st16(t0, a0, mem_index);
+ break;
+ case 2:
+ tcg_gen_qemu_st32(t0, a0, mem_index);
+ break;
+ default:
+ case 3:
+ /* Should never happen on 32-bit targets. */
+#ifdef TARGET_X86_64
+ tcg_gen_qemu_st64(t0, a0, mem_index);
+#endif
+ break;
+ }
+}
+
+static inline void gen_op_st_T0_A0(int idx)
+{
+ gen_op_st_v(idx, cpu_T[0], cpu_A0);
+}
+
+static inline void gen_op_st_T1_A0(int idx)
+{
+ gen_op_st_v(idx, cpu_T[1], cpu_A0);
+}
+
+#ifdef VBOX
+
+static void gen_check_external_event(void)
+{
+# if 1
+ /** @todo once TCG codegen improves, we may want to use version
+ from else version */
+ gen_helper_check_external_event();
+# else
+ int skip_label;
+ TCGv t0;
+
+ skip_label = gen_new_label();
+ t0 = tcg_temp_local_new(TCG_TYPE_TL);
+ /* t0 = cpu_tmp0; */
+
+ tcg_gen_ld32u_tl(t0, cpu_env, offsetof(CPUState, interrupt_request));
+ /* Keep in sync with helper_check_external_event() */
+ tcg_gen_andi_tl(t0, t0,
+ CPU_INTERRUPT_EXTERNAL_EXIT
+ | CPU_INTERRUPT_EXTERNAL_TIMER
+ | CPU_INTERRUPT_EXTERNAL_DMA
+ | CPU_INTERRUPT_EXTERNAL_HARD);
+ /** @todo predict branch as taken */
+ tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, skip_label);
+ tcg_temp_free(t0);
+
+ gen_helper_check_external_event();
+
+ gen_set_label(skip_label);
+# endif
+}
+
+#endif /* VBOX */
+
+static inline void gen_jmp_im(target_ulong pc)
+{
+ tcg_gen_movi_tl(cpu_tmp0, pc);
+ tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, eip));
+}
+
+#ifdef VBOX
+DECLINLINE(void) gen_update_eip(target_ulong pc)
+{
+ gen_jmp_im(pc);
+# ifdef VBOX_DUMP_STATE
+ gen_helper_dump_state();
+# endif
+}
+#endif /* VBOX */
+
+static inline void gen_string_movl_A0_ESI(DisasContext *s)
+{
+ int override;
+
+ override = s->override;
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ if (override >= 0) {
+ gen_op_movq_A0_seg(override);
+ gen_op_addq_A0_reg_sN(0, R_ESI);
+ } else {
+ gen_op_movq_A0_reg(R_ESI);
+ }
+ } else
+#endif
+ if (s->aflag) {
+ /* 32 bit address */
+ if (s->addseg && override < 0)
+ override = R_DS;
+ if (override >= 0) {
+ gen_op_movl_A0_seg(override);
+ gen_op_addl_A0_reg_sN(0, R_ESI);
+ } else {
+ gen_op_movl_A0_reg(R_ESI);
+ }
+ } else {
+ /* 16 address, always override */
+ if (override < 0)
+ override = R_DS;
+ gen_op_movl_A0_reg(R_ESI);
+ gen_op_andl_A0_ffff();
+ gen_op_addl_A0_seg(override);
+ }
+}
+
+static inline void gen_string_movl_A0_EDI(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_movq_A0_reg(R_EDI);
+ } else
+#endif
+ if (s->aflag) {
+ if (s->addseg) {
+ gen_op_movl_A0_seg(R_ES);
+ gen_op_addl_A0_reg_sN(0, R_EDI);
+ } else {
+ gen_op_movl_A0_reg(R_EDI);
+ }
+ } else {
+ gen_op_movl_A0_reg(R_EDI);
+ gen_op_andl_A0_ffff();
+ gen_op_addl_A0_seg(R_ES);
+ }
+}
+
+static inline void gen_op_movl_T0_Dshift(int ot)
+{
+ tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUState, df));
+ tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
+};
+
+static void gen_extu(int ot, TCGv reg)
+{
+ switch(ot) {
+ case OT_BYTE:
+ tcg_gen_ext8u_tl(reg, reg);
+ break;
+ case OT_WORD:
+ tcg_gen_ext16u_tl(reg, reg);
+ break;
+ case OT_LONG:
+ tcg_gen_ext32u_tl(reg, reg);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gen_exts(int ot, TCGv reg)
+{
+ switch(ot) {
+ case OT_BYTE:
+ tcg_gen_ext8s_tl(reg, reg);
+ break;
+ case OT_WORD:
+ tcg_gen_ext16s_tl(reg, reg);
+ break;
+ case OT_LONG:
+ tcg_gen_ext32s_tl(reg, reg);
+ break;
+ default:
+ break;
+ }
+}
+
+static inline void gen_op_jnz_ecx(int size, int label1)
+{
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
+ gen_extu(size + 1, cpu_tmp0);
+ tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
+}
+
+static inline void gen_op_jz_ecx(int size, int label1)
+{
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
+ gen_extu(size + 1, cpu_tmp0);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
+}
+
+static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n)
+{
+ switch (ot) {
+ case 0: gen_helper_inb(v, n); break;
+ case 1: gen_helper_inw(v, n); break;
+ case 2: gen_helper_inl(v, n); break;
+ }
+
+}
+
+static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n)
+{
+ switch (ot) {
+ case 0: gen_helper_outb(v, n); break;
+ case 1: gen_helper_outw(v, n); break;
+ case 2: gen_helper_outl(v, n); break;
+ }
+
+}
+
+static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
+ uint32_t svm_flags)
+{
+ int state_saved;
+ target_ulong next_eip;
+
+ state_saved = 0;
+ if (s->pe && (s->cpl > s->iopl || s->vm86)) {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(cur_eip);
+ state_saved = 1;
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ switch (ot) {
+ case 0: gen_helper_check_iob(cpu_tmp2_i32); break;
+ case 1: gen_helper_check_iow(cpu_tmp2_i32); break;
+ case 2: gen_helper_check_iol(cpu_tmp2_i32); break;
+ }
+ }
+ if(s->flags & HF_SVMI_MASK) {
+ if (!state_saved) {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(cur_eip);
+ }
+ svm_flags |= (1 << (4 + ot));
+ next_eip = s->pc - s->cs_base;
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_svm_check_io(cpu_tmp2_i32, tcg_const_i32(svm_flags),
+ tcg_const_i32(next_eip - cur_eip));
+ }
+}
+
+static inline void gen_movs(DisasContext *s, int ot)
+{
+ gen_string_movl_A0_ESI(s);
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ gen_string_movl_A0_EDI(s);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ gen_op_movl_T0_Dshift(ot);
+ gen_op_add_reg_T0(s->aflag, R_ESI);
+ gen_op_add_reg_T0(s->aflag, R_EDI);
+}
+
+static inline void gen_update_cc_op(DisasContext *s)
+{
+ if (s->cc_op != CC_OP_DYNAMIC) {
+ gen_op_set_cc_op(s->cc_op);
+ s->cc_op = CC_OP_DYNAMIC;
+ }
+}
+
+static void gen_op_update1_cc(void)
+{
+ tcg_gen_discard_tl(cpu_cc_src);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
+static void gen_op_update2_cc(void)
+{
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
+static inline void gen_op_cmpl_T0_T1_cc(void)
+{
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+ tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
+}
+
+static inline void gen_op_testl_T0_T1_cc(void)
+{
+ tcg_gen_discard_tl(cpu_cc_src);
+ tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
+}
+
+static void gen_op_update_neg_cc(void)
+{
+ tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
+/* compute eflags.C to reg */
+static void gen_compute_eflags_c(TCGv reg)
+{
+ gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_cc_op);
+ tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+}
+
+/* compute all eflags to cc_src */
+static void gen_compute_eflags(TCGv reg)
+{
+ gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_cc_op);
+ tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+}
+
+static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
+{
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ switch(jcc_op) {
+ case JCC_O:
+ gen_compute_eflags(cpu_T[0]);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ break;
+ case JCC_B:
+ gen_compute_eflags_c(cpu_T[0]);
+ break;
+ case JCC_Z:
+ gen_compute_eflags(cpu_T[0]);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ break;
+ case JCC_BE:
+ gen_compute_eflags(cpu_tmp0);
+ tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
+ tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ break;
+ case JCC_S:
+ gen_compute_eflags(cpu_T[0]);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ break;
+ case JCC_P:
+ gen_compute_eflags(cpu_T[0]);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ break;
+ case JCC_L:
+ gen_compute_eflags(cpu_tmp0);
+ tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
+ tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
+ tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ break;
+ default:
+ case JCC_LE:
+ gen_compute_eflags(cpu_tmp0);
+ tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
+ tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
+ tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
+ tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
+ tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ break;
+ }
+}
+
+/* return true if setcc_slow is not needed (WARNING: must be kept in
+ sync with gen_jcc1) */
+static int is_fast_jcc_case(DisasContext *s, int b)
+{
+ int jcc_op;
+ jcc_op = (b >> 1) & 7;
+ switch(s->cc_op) {
+ /* we optimize the cmp/jcc case */
+ case CC_OP_SUBB:
+ case CC_OP_SUBW:
+ case CC_OP_SUBL:
+ case CC_OP_SUBQ:
+ if (jcc_op == JCC_O || jcc_op == JCC_P)
+ goto slow_jcc;
+ break;
+
+ /* some jumps are easy to compute */
+ case CC_OP_ADDB:
+ case CC_OP_ADDW:
+ case CC_OP_ADDL:
+ case CC_OP_ADDQ:
+
+ case CC_OP_LOGICB:
+ case CC_OP_LOGICW:
+ case CC_OP_LOGICL:
+ case CC_OP_LOGICQ:
+
+ case CC_OP_INCB:
+ case CC_OP_INCW:
+ case CC_OP_INCL:
+ case CC_OP_INCQ:
+
+ case CC_OP_DECB:
+ case CC_OP_DECW:
+ case CC_OP_DECL:
+ case CC_OP_DECQ:
+
+ case CC_OP_SHLB:
+ case CC_OP_SHLW:
+ case CC_OP_SHLL:
+ case CC_OP_SHLQ:
+ if (jcc_op != JCC_Z && jcc_op != JCC_S)
+ goto slow_jcc;
+ break;
+ default:
+ slow_jcc:
+ return 0;
+ }
+ return 1;
+}
+
+/* generate a conditional jump to label 'l1' according to jump opcode
+ value 'b'. In the fast case, T0 is guaranted not to be used. */
+static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
+{
+ int inv, jcc_op, size, cond;
+ TCGv t0;
+
+ inv = b & 1;
+ jcc_op = (b >> 1) & 7;
+
+ switch(cc_op) {
+ /* we optimize the cmp/jcc case */
+ case CC_OP_SUBB:
+ case CC_OP_SUBW:
+ case CC_OP_SUBL:
+ case CC_OP_SUBQ:
+
+ size = cc_op - CC_OP_SUBB;
+ switch(jcc_op) {
+ case JCC_Z:
+ fast_jcc_z:
+ switch(size) {
+ case 0:
+ tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff);
+ t0 = cpu_tmp0;
+ break;
+ case 1:
+ tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff);
+ t0 = cpu_tmp0;
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff);
+ t0 = cpu_tmp0;
+ break;
+#endif
+ default:
+ t0 = cpu_cc_dst;
+ break;
+ }
+ tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
+ break;
+ case JCC_S:
+ fast_jcc_s:
+ switch(size) {
+ case 0:
+ tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
+ tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
+ 0, l1);
+ break;
+ case 1:
+ tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
+ tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
+ 0, l1);
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
+ tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
+ 0, l1);
+ break;
+#endif
+ default:
+ tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst,
+ 0, l1);
+ break;
+ }
+ break;
+
+ case JCC_B:
+ cond = inv ? TCG_COND_GEU : TCG_COND_LTU;
+ goto fast_jcc_b;
+ case JCC_BE:
+ cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
+ fast_jcc_b:
+ tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
+ switch(size) {
+ case 0:
+ t0 = cpu_tmp0;
+ tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff);
+ tcg_gen_andi_tl(t0, cpu_cc_src, 0xff);
+ break;
+ case 1:
+ t0 = cpu_tmp0;
+ tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff);
+ tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff);
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ t0 = cpu_tmp0;
+ tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff);
+ tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff);
+ break;
+#endif
+ default:
+ t0 = cpu_cc_src;
+ break;
+ }
+ tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
+ break;
+
+ case JCC_L:
+ cond = inv ? TCG_COND_GE : TCG_COND_LT;
+ goto fast_jcc_l;
+ case JCC_LE:
+ cond = inv ? TCG_COND_GT : TCG_COND_LE;
+ fast_jcc_l:
+ tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
+ switch(size) {
+ case 0:
+ t0 = cpu_tmp0;
+ tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4);
+ tcg_gen_ext8s_tl(t0, cpu_cc_src);
+ break;
+ case 1:
+ t0 = cpu_tmp0;
+ tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4);
+ tcg_gen_ext16s_tl(t0, cpu_cc_src);
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ t0 = cpu_tmp0;
+ tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4);
+ tcg_gen_ext32s_tl(t0, cpu_cc_src);
+ break;
+#endif
+ default:
+ t0 = cpu_cc_src;
+ break;
+ }
+ tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
+ break;
+
+ default:
+ goto slow_jcc;
+ }
+ break;
+
+ /* some jumps are easy to compute */
+ case CC_OP_ADDB:
+ case CC_OP_ADDW:
+ case CC_OP_ADDL:
+ case CC_OP_ADDQ:
+
+ case CC_OP_ADCB:
+ case CC_OP_ADCW:
+ case CC_OP_ADCL:
+ case CC_OP_ADCQ:
+
+ case CC_OP_SBBB:
+ case CC_OP_SBBW:
+ case CC_OP_SBBL:
+ case CC_OP_SBBQ:
+
+ case CC_OP_LOGICB:
+ case CC_OP_LOGICW:
+ case CC_OP_LOGICL:
+ case CC_OP_LOGICQ:
+
+ case CC_OP_INCB:
+ case CC_OP_INCW:
+ case CC_OP_INCL:
+ case CC_OP_INCQ:
+
+ case CC_OP_DECB:
+ case CC_OP_DECW:
+ case CC_OP_DECL:
+ case CC_OP_DECQ:
+
+ case CC_OP_SHLB:
+ case CC_OP_SHLW:
+ case CC_OP_SHLL:
+ case CC_OP_SHLQ:
+
+ case CC_OP_SARB:
+ case CC_OP_SARW:
+ case CC_OP_SARL:
+ case CC_OP_SARQ:
+ switch(jcc_op) {
+ case JCC_Z:
+ size = (cc_op - CC_OP_ADDB) & 3;
+ goto fast_jcc_z;
+ case JCC_S:
+ size = (cc_op - CC_OP_ADDB) & 3;
+ goto fast_jcc_s;
+ default:
+ goto slow_jcc;
+ }
+ break;
+ default:
+ slow_jcc:
+ gen_setcc_slow_T0(s, jcc_op);
+ tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
+ cpu_T[0], 0, l1);
+ break;
+ }
+}
+
+/* XXX: does not work with gdbstub "ice" single step - not a
+ serious problem */
+static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
+{
+ int l1, l2;
+
+ l1 = gen_new_label();
+ l2 = gen_new_label();
+ gen_op_jnz_ecx(s->aflag, l1);
+ gen_set_label(l2);
+ gen_jmp_tb(s, next_eip, 1);
+ gen_set_label(l1);
+ return l2;
+}
+
+static inline void gen_stos(DisasContext *s, int ot)
+{
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+ gen_string_movl_A0_EDI(s);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ gen_op_movl_T0_Dshift(ot);
+ gen_op_add_reg_T0(s->aflag, R_EDI);
+}
+
+static inline void gen_lods(DisasContext *s, int ot)
+{
+ gen_string_movl_A0_ESI(s);
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ gen_op_mov_reg_T0(ot, R_EAX);
+ gen_op_movl_T0_Dshift(ot);
+ gen_op_add_reg_T0(s->aflag, R_ESI);
+}
+
+static inline void gen_scas(DisasContext *s, int ot)
+{
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+ gen_string_movl_A0_EDI(s);
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ gen_op_cmpl_T0_T1_cc();
+ gen_op_movl_T0_Dshift(ot);
+ gen_op_add_reg_T0(s->aflag, R_EDI);
+}
+
+static inline void gen_cmps(DisasContext *s, int ot)
+{
+ gen_string_movl_A0_ESI(s);
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ gen_string_movl_A0_EDI(s);
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ gen_op_cmpl_T0_T1_cc();
+ gen_op_movl_T0_Dshift(ot);
+ gen_op_add_reg_T0(s->aflag, R_ESI);
+ gen_op_add_reg_T0(s->aflag, R_EDI);
+}
+
+static inline void gen_ins(DisasContext *s, int ot)
+{
+ if (use_icount)
+ gen_io_start();
+ gen_string_movl_A0_EDI(s);
+ /* Note: we must do this dummy write first to be restartable in
+ case of page fault. */
+ gen_op_movl_T0_0();
+ gen_op_st_T0_A0(ot + s->mem_index);
+ gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
+ tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
+ gen_helper_in_func(ot, cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ gen_op_movl_T0_Dshift(ot);
+ gen_op_add_reg_T0(s->aflag, R_EDI);
+ if (use_icount)
+ gen_io_end();
+}
+
+static inline void gen_outs(DisasContext *s, int ot)
+{
+ if (use_icount)
+ gen_io_start();
+ gen_string_movl_A0_ESI(s);
+ gen_op_ld_T0_A0(ot + s->mem_index);
+
+ gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
+ tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
+ tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]);
+ gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+
+ gen_op_movl_T0_Dshift(ot);
+ gen_op_add_reg_T0(s->aflag, R_ESI);
+ if (use_icount)
+ gen_io_end();
+}
+
+/* same method as Valgrind : we generate jumps to current or next
+ instruction */
+#define GEN_REPZ(op) \
+static inline void gen_repz_ ## op(DisasContext *s, int ot, \
+ target_ulong cur_eip, target_ulong next_eip) \
+{ \
+ int l2;\
+ gen_update_cc_op(s); \
+ l2 = gen_jz_ecx_string(s, next_eip); \
+ gen_ ## op(s, ot); \
+ gen_op_add_reg_im(s->aflag, R_ECX, -1); \
+ /* a loop would cause two single step exceptions if ECX = 1 \
+ before rep string_insn */ \
+ if (!s->jmp_opt) \
+ gen_op_jz_ecx(s->aflag, l2); \
+ gen_jmp(s, cur_eip); \
+}
+
+#define GEN_REPZ2(op) \
+static inline void gen_repz_ ## op(DisasContext *s, int ot, \
+ target_ulong cur_eip, \
+ target_ulong next_eip, \
+ int nz) \
+{ \
+ int l2;\
+ gen_update_cc_op(s); \
+ l2 = gen_jz_ecx_string(s, next_eip); \
+ gen_ ## op(s, ot); \
+ gen_op_add_reg_im(s->aflag, R_ECX, -1); \
+ gen_op_set_cc_op(CC_OP_SUBB + ot); \
+ gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2); \
+ if (!s->jmp_opt) \
+ gen_op_jz_ecx(s->aflag, l2); \
+ gen_jmp(s, cur_eip); \
+}
+
+GEN_REPZ(movs)
+GEN_REPZ(stos)
+GEN_REPZ(lods)
+GEN_REPZ(ins)
+GEN_REPZ(outs)
+GEN_REPZ2(scas)
+GEN_REPZ2(cmps)
+
+static void gen_helper_fp_arith_ST0_FT0(int op)
+{
+ switch (op) {
+ case 0: gen_helper_fadd_ST0_FT0(); break;
+ case 1: gen_helper_fmul_ST0_FT0(); break;
+ case 2: gen_helper_fcom_ST0_FT0(); break;
+ case 3: gen_helper_fcom_ST0_FT0(); break;
+ case 4: gen_helper_fsub_ST0_FT0(); break;
+ case 5: gen_helper_fsubr_ST0_FT0(); break;
+ case 6: gen_helper_fdiv_ST0_FT0(); break;
+ case 7: gen_helper_fdivr_ST0_FT0(); break;
+ }
+}
+
+/* NOTE the exception in "r" op ordering */
+static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
+{
+ TCGv_i32 tmp = tcg_const_i32(opreg);
+ switch (op) {
+ case 0: gen_helper_fadd_STN_ST0(tmp); break;
+ case 1: gen_helper_fmul_STN_ST0(tmp); break;
+ case 4: gen_helper_fsubr_STN_ST0(tmp); break;
+ case 5: gen_helper_fsub_STN_ST0(tmp); break;
+ case 6: gen_helper_fdivr_STN_ST0(tmp); break;
+ case 7: gen_helper_fdiv_STN_ST0(tmp); break;
+ }
+}
+
+/* if d == OR_TMP0, it means memory operand (address in A0) */
+static void gen_op(DisasContext *s1, int op, int ot, int d)
+{
+ if (d != OR_TMP0) {
+ gen_op_mov_TN_reg(ot, 0, d);
+ } else {
+ gen_op_ld_T0_A0(ot + s1->mem_index);
+ }
+ switch(op) {
+ case OP_ADCL:
+ if (s1->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s1->cc_op);
+ gen_compute_eflags_c(cpu_tmp4);
+ tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
+ tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
+ tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot);
+ s1->cc_op = CC_OP_DYNAMIC;
+ break;
+ case OP_SBBL:
+ if (s1->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s1->cc_op);
+ gen_compute_eflags_c(cpu_tmp4);
+ tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
+ tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
+ tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot);
+ s1->cc_op = CC_OP_DYNAMIC;
+ break;
+ case OP_ADDL:
+ gen_op_addl_T0_T1();
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ gen_op_update2_cc();
+ s1->cc_op = CC_OP_ADDB + ot;
+ break;
+ case OP_SUBL:
+ tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ gen_op_update2_cc();
+ s1->cc_op = CC_OP_SUBB + ot;
+ break;
+ default:
+ case OP_ANDL:
+ tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ gen_op_update1_cc();
+ s1->cc_op = CC_OP_LOGICB + ot;
+ break;
+ case OP_ORL:
+ tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ gen_op_update1_cc();
+ s1->cc_op = CC_OP_LOGICB + ot;
+ break;
+ case OP_XORL:
+ tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ gen_op_update1_cc();
+ s1->cc_op = CC_OP_LOGICB + ot;
+ break;
+ case OP_CMPL:
+ gen_op_cmpl_T0_T1_cc();
+ s1->cc_op = CC_OP_SUBB + ot;
+ break;
+ }
+}
+
+/* if d == OR_TMP0, it means memory operand (address in A0) */
+static void gen_inc(DisasContext *s1, int ot, int d, int c)
+{
+ if (d != OR_TMP0)
+ gen_op_mov_TN_reg(ot, 0, d);
+ else
+ gen_op_ld_T0_A0(ot + s1->mem_index);
+ if (s1->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s1->cc_op);
+ if (c > 0) {
+ tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
+ s1->cc_op = CC_OP_INCB + ot;
+ } else {
+ tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
+ s1->cc_op = CC_OP_DECB + ot;
+ }
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ gen_compute_eflags_c(cpu_cc_src);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
+static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
+ int is_right, int is_arith)
+{
+ target_ulong mask;
+ int shift_label;
+ TCGv t0, t1;
+
+ if (ot == OT_QUAD)
+ mask = 0x3f;
+ else
+ mask = 0x1f;
+
+ /* load */
+ if (op1 == OR_TMP0)
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_TN_reg(ot, 0, op1);
+
+ tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
+
+ tcg_gen_addi_tl(cpu_tmp5, cpu_T[1], -1);
+
+ if (is_right) {
+ if (is_arith) {
+ gen_exts(ot, cpu_T[0]);
+ tcg_gen_sar_tl(cpu_T3, cpu_T[0], cpu_tmp5);
+ tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ } else {
+ gen_extu(ot, cpu_T[0]);
+ tcg_gen_shr_tl(cpu_T3, cpu_T[0], cpu_tmp5);
+ tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ }
+ } else {
+ tcg_gen_shl_tl(cpu_T3, cpu_T[0], cpu_tmp5);
+ tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ }
+
+ /* store */
+ if (op1 == OR_TMP0)
+ gen_op_st_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_reg_T0(ot, op1);
+
+ /* update eflags if non zero shift */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+
+ /* XXX: inefficient */
+ t0 = tcg_temp_local_new();
+ t1 = tcg_temp_local_new();
+
+ tcg_gen_mov_tl(t0, cpu_T[0]);
+ tcg_gen_mov_tl(t1, cpu_T3);
+
+ shift_label = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, shift_label);
+
+ tcg_gen_mov_tl(cpu_cc_src, t1);
+ tcg_gen_mov_tl(cpu_cc_dst, t0);
+ if (is_right)
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
+ else
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
+
+ gen_set_label(shift_label);
+ s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+}
+
+static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
+ int is_right, int is_arith)
+{
+ int mask;
+
+ if (ot == OT_QUAD)
+ mask = 0x3f;
+ else
+ mask = 0x1f;
+
+ /* load */
+ if (op1 == OR_TMP0)
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_TN_reg(ot, 0, op1);
+
+ op2 &= mask;
+ if (op2 != 0) {
+ if (is_right) {
+ if (is_arith) {
+ gen_exts(ot, cpu_T[0]);
+ tcg_gen_sari_tl(cpu_tmp4, cpu_T[0], op2 - 1);
+ tcg_gen_sari_tl(cpu_T[0], cpu_T[0], op2);
+ } else {
+ gen_extu(ot, cpu_T[0]);
+ tcg_gen_shri_tl(cpu_tmp4, cpu_T[0], op2 - 1);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], op2);
+ }
+ } else {
+ tcg_gen_shli_tl(cpu_tmp4, cpu_T[0], op2 - 1);
+ tcg_gen_shli_tl(cpu_T[0], cpu_T[0], op2);
+ }
+ }
+
+ /* store */
+ if (op1 == OR_TMP0)
+ gen_op_st_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_reg_T0(ot, op1);
+
+ /* update eflags if non zero shift */
+ if (op2 != 0) {
+ tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ if (is_right)
+ s->cc_op = CC_OP_SARB + ot;
+ else
+ s->cc_op = CC_OP_SHLB + ot;
+ }
+}
+
+static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2)
+{
+ if (arg2 >= 0)
+ tcg_gen_shli_tl(ret, arg1, arg2);
+ else
+ tcg_gen_shri_tl(ret, arg1, -arg2);
+}
+
+static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
+ int is_right)
+{
+ target_ulong mask;
+ int label1, label2, data_bits;
+ TCGv t0, t1, t2, a0;
+
+ /* XXX: inefficient, but we must use local temps */
+ t0 = tcg_temp_local_new();
+ t1 = tcg_temp_local_new();
+ t2 = tcg_temp_local_new();
+ a0 = tcg_temp_local_new();
+
+ if (ot == OT_QUAD)
+ mask = 0x3f;
+ else
+ mask = 0x1f;
+
+ /* load */
+ if (op1 == OR_TMP0) {
+ tcg_gen_mov_tl(a0, cpu_A0);
+ gen_op_ld_v(ot + s->mem_index, t0, a0);
+ } else {
+ gen_op_mov_v_reg(ot, t0, op1);
+ }
+
+ tcg_gen_mov_tl(t1, cpu_T[1]);
+
+ tcg_gen_andi_tl(t1, t1, mask);
+
+ /* Must test zero case to avoid using undefined behaviour in TCG
+ shifts. */
+ label1 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1);
+
+ if (ot <= OT_WORD)
+ tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1);
+ else
+ tcg_gen_mov_tl(cpu_tmp0, t1);
+
+ gen_extu(ot, t0);
+ tcg_gen_mov_tl(t2, t0);
+
+ data_bits = 8 << ot;
+ /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX:
+ fix TCG definition) */
+ if (is_right) {
+ tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0);
+ tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0);
+ tcg_gen_shl_tl(t0, t0, cpu_tmp0);
+ } else {
+ tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0);
+ tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0);
+ tcg_gen_shr_tl(t0, t0, cpu_tmp0);
+ }
+ tcg_gen_or_tl(t0, t0, cpu_tmp4);
+
+ gen_set_label(label1);
+ /* store */
+ if (op1 == OR_TMP0) {
+ gen_op_st_v(ot + s->mem_index, t0, a0);
+ } else {
+ gen_op_mov_reg_v(ot, op1, t0);
+ }
+
+ /* update eflags */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+
+ label2 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
+
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
+ tcg_gen_xor_tl(cpu_tmp0, t2, t0);
+ tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
+ tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
+ tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
+ if (is_right) {
+ tcg_gen_shri_tl(t0, t0, data_bits - 1);
+ }
+ tcg_gen_andi_tl(t0, t0, CC_C);
+ tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
+
+ tcg_gen_discard_tl(cpu_cc_dst);
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
+
+ gen_set_label(label2);
+ s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ tcg_temp_free(t2);
+ tcg_temp_free(a0);
+}
+
+static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
+ int is_right)
+{
+ int mask;
+ int data_bits;
+ TCGv t0, t1, a0;
+
+ /* XXX: inefficient, but we must use local temps */
+ t0 = tcg_temp_local_new();
+ t1 = tcg_temp_local_new();
+ a0 = tcg_temp_local_new();
+
+ if (ot == OT_QUAD)
+ mask = 0x3f;
+ else
+ mask = 0x1f;
+
+ /* load */
+ if (op1 == OR_TMP0) {
+ tcg_gen_mov_tl(a0, cpu_A0);
+ gen_op_ld_v(ot + s->mem_index, t0, a0);
+ } else {
+ gen_op_mov_v_reg(ot, t0, op1);
+ }
+
+ gen_extu(ot, t0);
+ tcg_gen_mov_tl(t1, t0);
+
+ op2 &= mask;
+ data_bits = 8 << ot;
+ if (op2 != 0) {
+ int shift = op2 & ((1 << (3 + ot)) - 1);
+ if (is_right) {
+ tcg_gen_shri_tl(cpu_tmp4, t0, shift);
+ tcg_gen_shli_tl(t0, t0, data_bits - shift);
+ }
+ else {
+ tcg_gen_shli_tl(cpu_tmp4, t0, shift);
+ tcg_gen_shri_tl(t0, t0, data_bits - shift);
+ }
+ tcg_gen_or_tl(t0, t0, cpu_tmp4);
+ }
+
+ /* store */
+ if (op1 == OR_TMP0) {
+ gen_op_st_v(ot + s->mem_index, t0, a0);
+ } else {
+ gen_op_mov_reg_v(ot, op1, t0);
+ }
+
+ if (op2 != 0) {
+ /* update eflags */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
+ tcg_gen_xor_tl(cpu_tmp0, t1, t0);
+ tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
+ tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
+ tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
+ if (is_right) {
+ tcg_gen_shri_tl(t0, t0, data_bits - 1);
+ }
+ tcg_gen_andi_tl(t0, t0, CC_C);
+ tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
+
+ tcg_gen_discard_tl(cpu_cc_dst);
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
+ s->cc_op = CC_OP_EFLAGS;
+ }
+
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ tcg_temp_free(a0);
+}
+
+/* XXX: add faster immediate = 1 case */
+static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
+ int is_right)
+{
+ int label1;
+
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+
+ /* load */
+ if (op1 == OR_TMP0)
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_TN_reg(ot, 0, op1);
+
+ if (is_right) {
+ switch (ot) {
+ case 0: gen_helper_rcrb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+ case 1: gen_helper_rcrw(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+ case 2: gen_helper_rcrl(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+#ifdef TARGET_X86_64
+ case 3: gen_helper_rcrq(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+#endif
+ }
+ } else {
+ switch (ot) {
+ case 0: gen_helper_rclb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+ case 1: gen_helper_rclw(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+ case 2: gen_helper_rcll(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+#ifdef TARGET_X86_64
+ case 3: gen_helper_rclq(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+#endif
+ }
+ }
+ /* store */
+ if (op1 == OR_TMP0)
+ gen_op_st_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_reg_T0(ot, op1);
+
+ /* update eflags */
+ label1 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
+
+ tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
+ tcg_gen_discard_tl(cpu_cc_dst);
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
+
+ gen_set_label(label1);
+ s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+}
+
+/* XXX: add faster immediate case */
+static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
+ int is_right)
+{
+ int label1, label2, data_bits;
+ target_ulong mask;
+ TCGv t0, t1, t2, a0;
+
+ t0 = tcg_temp_local_new();
+ t1 = tcg_temp_local_new();
+ t2 = tcg_temp_local_new();
+ a0 = tcg_temp_local_new();
+
+ if (ot == OT_QUAD)
+ mask = 0x3f;
+ else
+ mask = 0x1f;
+
+ /* load */
+ if (op1 == OR_TMP0) {
+ tcg_gen_mov_tl(a0, cpu_A0);
+ gen_op_ld_v(ot + s->mem_index, t0, a0);
+ } else {
+ gen_op_mov_v_reg(ot, t0, op1);
+ }
+
+ tcg_gen_andi_tl(cpu_T3, cpu_T3, mask);
+
+ tcg_gen_mov_tl(t1, cpu_T[1]);
+ tcg_gen_mov_tl(t2, cpu_T3);
+
+ /* Must test zero case to avoid using undefined behaviour in TCG
+ shifts. */
+ label1 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
+
+ tcg_gen_addi_tl(cpu_tmp5, t2, -1);
+ if (ot == OT_WORD) {
+ /* Note: we implement the Intel behaviour for shift count > 16 */
+ if (is_right) {
+ tcg_gen_andi_tl(t0, t0, 0xffff);
+ tcg_gen_shli_tl(cpu_tmp0, t1, 16);
+ tcg_gen_or_tl(t0, t0, cpu_tmp0);
+ tcg_gen_ext32u_tl(t0, t0);
+
+ tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
+
+ /* only needed if count > 16, but a test would complicate */
+ tcg_gen_subfi_tl(cpu_tmp5, 32, t2);
+ tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
+
+ tcg_gen_shr_tl(t0, t0, t2);
+
+ tcg_gen_or_tl(t0, t0, cpu_tmp0);
+ } else {
+ /* XXX: not optimal */
+ tcg_gen_andi_tl(t0, t0, 0xffff);
+ tcg_gen_shli_tl(t1, t1, 16);
+ tcg_gen_or_tl(t1, t1, t0);
+ tcg_gen_ext32u_tl(t1, t1);
+
+ tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
+ tcg_gen_subfi_tl(cpu_tmp0, 32, cpu_tmp5);
+ tcg_gen_shr_tl(cpu_tmp5, t1, cpu_tmp0);
+ tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp5);
+
+ tcg_gen_shl_tl(t0, t0, t2);
+ tcg_gen_subfi_tl(cpu_tmp5, 32, t2);
+ tcg_gen_shr_tl(t1, t1, cpu_tmp5);
+ tcg_gen_or_tl(t0, t0, t1);
+ }
+ } else {
+ data_bits = 8 << ot;
+ if (is_right) {
+ if (ot == OT_LONG)
+ tcg_gen_ext32u_tl(t0, t0);
+
+ tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
+
+ tcg_gen_shr_tl(t0, t0, t2);
+ tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2);
+ tcg_gen_shl_tl(t1, t1, cpu_tmp5);
+ tcg_gen_or_tl(t0, t0, t1);
+
+ } else {
+ if (ot == OT_LONG)
+ tcg_gen_ext32u_tl(t1, t1);
+
+ tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
+
+ tcg_gen_shl_tl(t0, t0, t2);
+ tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2);
+ tcg_gen_shr_tl(t1, t1, cpu_tmp5);
+ tcg_gen_or_tl(t0, t0, t1);
+ }
+ }
+ tcg_gen_mov_tl(t1, cpu_tmp4);
+
+ gen_set_label(label1);
+ /* store */
+ if (op1 == OR_TMP0) {
+ gen_op_st_v(ot + s->mem_index, t0, a0);
+ } else {
+ gen_op_mov_reg_v(ot, op1, t0);
+ }
+
+ /* update eflags */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+
+ label2 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
+
+ tcg_gen_mov_tl(cpu_cc_src, t1);
+ tcg_gen_mov_tl(cpu_cc_dst, t0);
+ if (is_right) {
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
+ } else {
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
+ }
+ gen_set_label(label2);
+ s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ tcg_temp_free(t2);
+ tcg_temp_free(a0);
+}
+
+static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
+{
+ if (s != OR_TMP1)
+ gen_op_mov_TN_reg(ot, 1, s);
+ switch(op) {
+ case OP_ROL:
+ gen_rot_rm_T1(s1, ot, d, 0);
+ break;
+ case OP_ROR:
+ gen_rot_rm_T1(s1, ot, d, 1);
+ break;
+ case OP_SHL:
+ case OP_SHL1:
+ gen_shift_rm_T1(s1, ot, d, 0, 0);
+ break;
+ case OP_SHR:
+ gen_shift_rm_T1(s1, ot, d, 1, 0);
+ break;
+ case OP_SAR:
+ gen_shift_rm_T1(s1, ot, d, 1, 1);
+ break;
+ case OP_RCL:
+ gen_rotc_rm_T1(s1, ot, d, 0);
+ break;
+ case OP_RCR:
+ gen_rotc_rm_T1(s1, ot, d, 1);
+ break;
+ }
+}
+
+static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c)
+{
+ switch(op) {
+ case OP_ROL:
+ gen_rot_rm_im(s1, ot, d, c, 0);
+ break;
+ case OP_ROR:
+ gen_rot_rm_im(s1, ot, d, c, 1);
+ break;
+ case OP_SHL:
+ case OP_SHL1:
+ gen_shift_rm_im(s1, ot, d, c, 0, 0);
+ break;
+ case OP_SHR:
+ gen_shift_rm_im(s1, ot, d, c, 1, 0);
+ break;
+ case OP_SAR:
+ gen_shift_rm_im(s1, ot, d, c, 1, 1);
+ break;
+ default:
+ /* currently not optimized */
+ gen_op_movl_T1_im(c);
+ gen_shift(s1, op, ot, d, OR_TMP1);
+ break;
+ }
+}
+
+static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr)
+{
+ target_long disp;
+ int havesib;
+ int base;
+ int index;
+ int scale;
+ int opreg;
+ int mod, rm, code, override, must_add_seg;
+
+ override = s->override;
+ must_add_seg = s->addseg;
+ if (override >= 0)
+ must_add_seg = 1;
+ mod = (modrm >> 6) & 3;
+ rm = modrm & 7;
+
+ if (s->aflag) {
+
+ havesib = 0;
+ base = rm;
+ index = 0;
+ scale = 0;
+
+ if (base == 4) {
+ havesib = 1;
+ code = ldub_code(s->pc++);
+ scale = (code >> 6) & 3;
+ index = ((code >> 3) & 7) | REX_X(s);
+ base = (code & 7);
+ }
+ base |= REX_B(s);
+
+ switch (mod) {
+ case 0:
+ if ((base & 7) == 5) {
+ base = -1;
+ disp = (int32_t)ldl_code(s->pc);
+ s->pc += 4;
+ if (CODE64(s) && !havesib) {
+ disp += s->pc + s->rip_offset;
+ }
+ } else {
+ disp = 0;
+ }
+ break;
+ case 1:
+ disp = (int8_t)ldub_code(s->pc++);
+ break;
+ default:
+ case 2:
+#ifdef VBOX
+ disp = (int32_t)ldl_code(s->pc);
+#else
+ disp = ldl_code(s->pc);
+#endif
+ s->pc += 4;
+ break;
+ }
+
+ if (base >= 0) {
+ /* for correct popl handling with esp */
+ if (base == 4 && s->popl_esp_hack)
+ disp += s->popl_esp_hack;
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_movq_A0_reg(base);
+ if (disp != 0) {
+ gen_op_addq_A0_im(disp);
+ }
+ } else
+#endif
+ {
+ gen_op_movl_A0_reg(base);
+ if (disp != 0)
+ gen_op_addl_A0_im(disp);
+ }
+ } else {
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_movq_A0_im(disp);
+ } else
+#endif
+ {
+ gen_op_movl_A0_im(disp);
+ }
+ }
+ /* index == 4 means no index */
+ if (havesib && (index != 4)) {
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_addq_A0_reg_sN(scale, index);
+ } else
+#endif
+ {
+ gen_op_addl_A0_reg_sN(scale, index);
+ }
+ }
+ if (must_add_seg) {
+ if (override < 0) {
+ if (base == R_EBP || base == R_ESP)
+ override = R_SS;
+ else
+ override = R_DS;
+ }
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_addq_A0_seg(override);
+ } else
+#endif
+ {
+ gen_op_addl_A0_seg(override);
+ }
+ }
+ } else {
+ switch (mod) {
+ case 0:
+ if (rm == 6) {
+ disp = lduw_code(s->pc);
+ s->pc += 2;
+ gen_op_movl_A0_im(disp);
+ rm = 0; /* avoid SS override */
+ goto no_rm;
+ } else {
+ disp = 0;
+ }
+ break;
+ case 1:
+ disp = (int8_t)ldub_code(s->pc++);
+ break;
+ default:
+ case 2:
+ disp = lduw_code(s->pc);
+ s->pc += 2;
+ break;
+ }
+ switch(rm) {
+ case 0:
+ gen_op_movl_A0_reg(R_EBX);
+ gen_op_addl_A0_reg_sN(0, R_ESI);
+ break;
+ case 1:
+ gen_op_movl_A0_reg(R_EBX);
+ gen_op_addl_A0_reg_sN(0, R_EDI);
+ break;
+ case 2:
+ gen_op_movl_A0_reg(R_EBP);
+ gen_op_addl_A0_reg_sN(0, R_ESI);
+ break;
+ case 3:
+ gen_op_movl_A0_reg(R_EBP);
+ gen_op_addl_A0_reg_sN(0, R_EDI);
+ break;
+ case 4:
+ gen_op_movl_A0_reg(R_ESI);
+ break;
+ case 5:
+ gen_op_movl_A0_reg(R_EDI);
+ break;
+ case 6:
+ gen_op_movl_A0_reg(R_EBP);
+ break;
+ default:
+ case 7:
+ gen_op_movl_A0_reg(R_EBX);
+ break;
+ }
+ if (disp != 0)
+ gen_op_addl_A0_im(disp);
+ gen_op_andl_A0_ffff();
+ no_rm:
+ if (must_add_seg) {
+ if (override < 0) {
+ if (rm == 2 || rm == 3 || rm == 6)
+ override = R_SS;
+ else
+ override = R_DS;
+ }
+ gen_op_addl_A0_seg(override);
+ }
+ }
+
+ opreg = OR_A0;
+ disp = 0;
+ *reg_ptr = opreg;
+ *offset_ptr = disp;
+}
+
+static void gen_nop_modrm(DisasContext *s, int modrm)
+{
+ int mod, rm, base, code;
+
+ mod = (modrm >> 6) & 3;
+ if (mod == 3)
+ return;
+ rm = modrm & 7;
+
+ if (s->aflag) {
+
+ base = rm;
+
+ if (base == 4) {
+ code = ldub_code(s->pc++);
+ base = (code & 7);
+ }
+
+ switch (mod) {
+ case 0:
+ if (base == 5) {
+ s->pc += 4;
+ }
+ break;
+ case 1:
+ s->pc++;
+ break;
+ default:
+ case 2:
+ s->pc += 4;
+ break;
+ }
+ } else {
+ switch (mod) {
+ case 0:
+ if (rm == 6) {
+ s->pc += 2;
+ }
+ break;
+ case 1:
+ s->pc++;
+ break;
+ default:
+ case 2:
+ s->pc += 2;
+ break;
+ }
+ }
+}
+
+/* used for LEA and MOV AX, mem */
+static void gen_add_A0_ds_seg(DisasContext *s)
+{
+ int override, must_add_seg;
+ must_add_seg = s->addseg;
+ override = R_DS;
+ if (s->override >= 0) {
+ override = s->override;
+ must_add_seg = 1;
+ }
+ if (must_add_seg) {
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ gen_op_addq_A0_seg(override);
+ } else
+#endif
+ {
+ gen_op_addl_A0_seg(override);
+ }
+ }
+}
+
+/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
+ OR_TMP0 */
+static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store)
+{
+ int mod, rm, opreg, disp;
+
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ if (mod == 3) {
+ if (is_store) {
+ if (reg != OR_TMP0)
+ gen_op_mov_TN_reg(ot, 0, reg);
+ gen_op_mov_reg_T0(ot, rm);
+ } else {
+ gen_op_mov_TN_reg(ot, 0, rm);
+ if (reg != OR_TMP0)
+ gen_op_mov_reg_T0(ot, reg);
+ }
+ } else {
+ gen_lea_modrm(s, modrm, &opreg, &disp);
+ if (is_store) {
+ if (reg != OR_TMP0)
+ gen_op_mov_TN_reg(ot, 0, reg);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ if (reg != OR_TMP0)
+ gen_op_mov_reg_T0(ot, reg);
+ }
+ }
+}
+
+static inline uint32_t insn_get(DisasContext *s, int ot)
+{
+ uint32_t ret;
+
+ switch(ot) {
+ case OT_BYTE:
+ ret = ldub_code(s->pc);
+ s->pc++;
+ break;
+ case OT_WORD:
+ ret = lduw_code(s->pc);
+ s->pc += 2;
+ break;
+ default:
+ case OT_LONG:
+ ret = ldl_code(s->pc);
+ s->pc += 4;
+ break;
+ }
+ return ret;
+}
+
+static inline int insn_const_size(unsigned int ot)
+{
+ if (ot <= OT_LONG)
+ return 1 << ot;
+ else
+ return 4;
+}
+
+static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
+{
+ TranslationBlock *tb;
+ target_ulong pc;
+
+ pc = s->cs_base + eip;
+ tb = s->tb;
+ /* NOTE: we handle the case where the TB spans two pages here */
+ if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
+ (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK)) {
+#ifdef VBOX
+ gen_check_external_event();
+#endif /* VBOX */
+ /* jump to same page: we can use a direct jump */
+ tcg_gen_goto_tb(tb_num);
+ gen_jmp_im(eip);
+ tcg_gen_exit_tb((intptr_t)tb + tb_num);
+ } else {
+ /* jump to another page: currently not optimized */
+ gen_jmp_im(eip);
+ gen_eob(s);
+ }
+}
+
+static inline void gen_jcc(DisasContext *s, int b,
+ target_ulong val, target_ulong next_eip)
+{
+ int l1, l2, cc_op;
+
+ cc_op = s->cc_op;
+ gen_update_cc_op(s);
+ if (s->jmp_opt) {
+ l1 = gen_new_label();
+ gen_jcc1(s, cc_op, b, l1);
+
+ gen_goto_tb(s, 0, next_eip);
+
+ gen_set_label(l1);
+ gen_goto_tb(s, 1, val);
+ s->is_jmp = DISAS_TB_JUMP;
+ } else {
+
+ l1 = gen_new_label();
+ l2 = gen_new_label();
+ gen_jcc1(s, cc_op, b, l1);
+
+ gen_jmp_im(next_eip);
+ tcg_gen_br(l2);
+
+ gen_set_label(l1);
+ gen_jmp_im(val);
+ gen_set_label(l2);
+ gen_eob(s);
+ }
+}
+
+static void gen_setcc(DisasContext *s, int b)
+{
+ int inv, jcc_op, l1;
+ TCGv t0;
+
+ if (is_fast_jcc_case(s, b)) {
+ /* nominal case: we use a jump */
+ /* XXX: make it faster by adding new instructions in TCG */
+ t0 = tcg_temp_local_new();
+ tcg_gen_movi_tl(t0, 0);
+ l1 = gen_new_label();
+ gen_jcc1(s, s->cc_op, b ^ 1, l1);
+ tcg_gen_movi_tl(t0, 1);
+ gen_set_label(l1);
+ tcg_gen_mov_tl(cpu_T[0], t0);
+ tcg_temp_free(t0);
+ } else {
+ /* slow case: it is more efficient not to generate a jump,
+ although it is questionnable whether this optimization is
+ worth to */
+ inv = b & 1;
+ jcc_op = (b >> 1) & 7;
+ gen_setcc_slow_T0(s, jcc_op);
+ if (inv) {
+ tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
+ }
+ }
+}
+
+static inline void gen_op_movl_T0_seg(int seg_reg)
+{
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,segs[seg_reg].selector));
+}
+
+static inline void gen_op_movl_seg_T0_vm(int seg_reg)
+{
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
+ tcg_gen_st32_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,segs[seg_reg].selector));
+ tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
+ tcg_gen_st_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,segs[seg_reg].base));
+}
+
+/* move T0 to seg_reg and compute if the CPU state may change. Never
+ call this function with seg_reg == R_CS */
+static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip)
+{
+ if (s->pe && !s->vm86) {
+ /* XXX: optimize by finding processor state dynamically */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(cur_eip);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_load_seg(tcg_const_i32(seg_reg), cpu_tmp2_i32);
+ /* abort translation because the addseg value may change or
+ because ss32 may change. For R_SS, translation must always
+ stop as a special handling must be done to disable hardware
+ interrupts for the next instruction */
+ if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
+ s->is_jmp = DISAS_TB_JUMP;
+ } else {
+ gen_op_movl_seg_T0_vm(seg_reg);
+ if (seg_reg == R_SS)
+ s->is_jmp = DISAS_TB_JUMP;
+ }
+}
+
+static inline int svm_is_rep(int prefixes)
+{
+ return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
+}
+
+static inline void
+gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
+ uint32_t type, uint64_t param)
+{
+ /* no SVM activated; fast case */
+ if (likely(!(s->flags & HF_SVMI_MASK)))
+ return;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_svm_check_intercept_param(tcg_const_i32(type),
+ tcg_const_i64(param));
+}
+
+static inline void
+gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
+{
+ gen_svm_check_intercept_param(s, pc_start, type, 0);
+}
+
+static inline void gen_stack_update(DisasContext *s, int addend)
+{
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ gen_op_add_reg_im(2, R_ESP, addend);
+ } else
+#endif
+ if (s->ss32) {
+ gen_op_add_reg_im(1, R_ESP, addend);
+ } else {
+ gen_op_add_reg_im(0, R_ESP, addend);
+ }
+}
+
+/* generate a push. It depends on ss32, addseg and dflag */
+static void gen_push_T0(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ gen_op_movq_A0_reg(R_ESP);
+ if (s->dflag) {
+ gen_op_addq_A0_im(-8);
+ gen_op_st_T0_A0(OT_QUAD + s->mem_index);
+ } else {
+ gen_op_addq_A0_im(-2);
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ }
+ gen_op_mov_reg_A0(2, R_ESP);
+ } else
+#endif
+ {
+ gen_op_movl_A0_reg(R_ESP);
+ if (!s->dflag)
+ gen_op_addl_A0_im(-2);
+ else
+ gen_op_addl_A0_im(-4);
+ if (s->ss32) {
+ if (s->addseg) {
+ tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+ gen_op_addl_A0_seg(R_SS);
+ }
+ } else {
+ gen_op_andl_A0_ffff();
+ tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+ gen_op_addl_A0_seg(R_SS);
+ }
+ gen_op_st_T0_A0(s->dflag + 1 + s->mem_index);
+ if (s->ss32 && !s->addseg)
+ gen_op_mov_reg_A0(1, R_ESP);
+ else
+ gen_op_mov_reg_T1(s->ss32 + 1, R_ESP);
+ }
+}
+
+/* generate a push. It depends on ss32, addseg and dflag */
+/* slower version for T1, only used for call Ev */
+static void gen_push_T1(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ gen_op_movq_A0_reg(R_ESP);
+ if (s->dflag) {
+ gen_op_addq_A0_im(-8);
+ gen_op_st_T1_A0(OT_QUAD + s->mem_index);
+ } else {
+ gen_op_addq_A0_im(-2);
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ }
+ gen_op_mov_reg_A0(2, R_ESP);
+ } else
+#endif
+ {
+ gen_op_movl_A0_reg(R_ESP);
+ if (!s->dflag)
+ gen_op_addl_A0_im(-2);
+ else
+ gen_op_addl_A0_im(-4);
+ if (s->ss32) {
+ if (s->addseg) {
+ gen_op_addl_A0_seg(R_SS);
+ }
+ } else {
+ gen_op_andl_A0_ffff();
+ gen_op_addl_A0_seg(R_SS);
+ }
+ gen_op_st_T1_A0(s->dflag + 1 + s->mem_index);
+
+ if (s->ss32 && !s->addseg)
+ gen_op_mov_reg_A0(1, R_ESP);
+ else
+ gen_stack_update(s, -(2 << s->dflag));
+ }
+}
+
+/* two step pop is necessary for precise exceptions */
+static void gen_pop_T0(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ gen_op_movq_A0_reg(R_ESP);
+ gen_op_ld_T0_A0((s->dflag ? OT_QUAD : OT_WORD) + s->mem_index);
+ } else
+#endif
+ {
+ gen_op_movl_A0_reg(R_ESP);
+ if (s->ss32) {
+ if (s->addseg)
+ gen_op_addl_A0_seg(R_SS);
+ } else {
+ gen_op_andl_A0_ffff();
+ gen_op_addl_A0_seg(R_SS);
+ }
+ gen_op_ld_T0_A0(s->dflag + 1 + s->mem_index);
+ }
+}
+
+static void gen_pop_update(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+ if (CODE64(s) && s->dflag) {
+ gen_stack_update(s, 8);
+ } else
+#endif
+ {
+ gen_stack_update(s, 2 << s->dflag);
+ }
+}
+
+static void gen_stack_A0(DisasContext *s)
+{
+ gen_op_movl_A0_reg(R_ESP);
+ if (!s->ss32)
+ gen_op_andl_A0_ffff();
+ tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+ if (s->addseg)
+ gen_op_addl_A0_seg(R_SS);
+}
+
+/* NOTE: wrap around in 16 bit not fully handled */
+static void gen_pusha(DisasContext *s)
+{
+ int i;
+ gen_op_movl_A0_reg(R_ESP);
+ gen_op_addl_A0_im(-(16 << s->dflag));
+ if (!s->ss32)
+ gen_op_andl_A0_ffff();
+ tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+ if (s->addseg)
+ gen_op_addl_A0_seg(R_SS);
+ for(i = 0;i < 8; i++) {
+ gen_op_mov_TN_reg(OT_LONG, 0, 7 - i);
+ gen_op_st_T0_A0(OT_WORD + s->dflag + s->mem_index);
+ gen_op_addl_A0_im(2 << s->dflag);
+ }
+ gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
+}
+
+/* NOTE: wrap around in 16 bit not fully handled */
+static void gen_popa(DisasContext *s)
+{
+ int i;
+ gen_op_movl_A0_reg(R_ESP);
+ if (!s->ss32)
+ gen_op_andl_A0_ffff();
+ tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+ tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 16 << s->dflag);
+ if (s->addseg)
+ gen_op_addl_A0_seg(R_SS);
+ for(i = 0;i < 8; i++) {
+ /* ESP is not reloaded */
+ if (i != 3) {
+ gen_op_ld_T0_A0(OT_WORD + s->dflag + s->mem_index);
+ gen_op_mov_reg_T0(OT_WORD + s->dflag, 7 - i);
+ }
+ gen_op_addl_A0_im(2 << s->dflag);
+ }
+ gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
+}
+
+static void gen_enter(DisasContext *s, int esp_addend, int level)
+{
+ int ot, opsize;
+
+ level &= 0x1f;
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ ot = s->dflag ? OT_QUAD : OT_WORD;
+ opsize = 1 << ot;
+
+ gen_op_movl_A0_reg(R_ESP);
+ gen_op_addq_A0_im(-opsize);
+ tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+
+ /* push bp */
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ if (level) {
+ /* XXX: must save state */
+ gen_helper_enter64_level(tcg_const_i32(level),
+ tcg_const_i32((ot == OT_QUAD)),
+ cpu_T[1]);
+ }
+ gen_op_mov_reg_T1(ot, R_EBP);
+ tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
+ gen_op_mov_reg_T1(OT_QUAD, R_ESP);
+ } else
+#endif
+ {
+ ot = s->dflag + OT_WORD;
+ opsize = 2 << s->dflag;
+
+ gen_op_movl_A0_reg(R_ESP);
+ gen_op_addl_A0_im(-opsize);
+ if (!s->ss32)
+ gen_op_andl_A0_ffff();
+ tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+ if (s->addseg)
+ gen_op_addl_A0_seg(R_SS);
+ /* push bp */
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ if (level) {
+ /* XXX: must save state */
+ gen_helper_enter_level(tcg_const_i32(level),
+ tcg_const_i32(s->dflag),
+ cpu_T[1]);
+ }
+ gen_op_mov_reg_T1(ot, R_EBP);
+ tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
+ gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
+ }
+}
+
+static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
+{
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(cur_eip);
+ gen_helper_raise_exception(tcg_const_i32(trapno));
+ s->is_jmp = DISAS_TB_JUMP;
+}
+
+/* an interrupt is different from an exception because of the
+ privilege checks */
+static void gen_interrupt(DisasContext *s, int intno,
+ target_ulong cur_eip, target_ulong next_eip)
+{
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(cur_eip);
+ gen_helper_raise_interrupt(tcg_const_i32(intno),
+ tcg_const_i32(next_eip - cur_eip));
+ s->is_jmp = DISAS_TB_JUMP;
+}
+
+static void gen_debug(DisasContext *s, target_ulong cur_eip)
+{
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(cur_eip);
+ gen_helper_debug();
+ s->is_jmp = DISAS_TB_JUMP;
+}
+
+/* generate a generic end of block. Trace exception is also generated
+ if needed */
+static void gen_eob(DisasContext *s)
+{
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
+ gen_helper_reset_inhibit_irq();
+ }
+ if (s->tb->flags & HF_RF_MASK) {
+ gen_helper_reset_rf();
+ }
+ if ( s->singlestep_enabled
+#ifdef VBOX
+ && ( !(cpu_single_env->state & CPU_EMULATE_SINGLE_STEP)
+ || !(s->prefix & (PREFIX_REPNZ | PREFIX_REPZ) ))
+#endif
+ ) {
+ gen_helper_debug();
+ } else if (s->tf) {
+ gen_helper_single_step();
+ } else {
+ tcg_gen_exit_tb(0);
+ }
+ s->is_jmp = DISAS_TB_JUMP;
+}
+
+/* generate a jump to eip. No segment change must happen before as a
+ direct call to the next block may occur */
+static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
+{
+ if (s->jmp_opt) {
+ gen_update_cc_op(s);
+ gen_goto_tb(s, tb_num, eip);
+ s->is_jmp = DISAS_TB_JUMP;
+ } else {
+ gen_jmp_im(eip);
+ gen_eob(s);
+ }
+}
+
+static void gen_jmp(DisasContext *s, target_ulong eip)
+{
+ gen_jmp_tb(s, eip, 0);
+}
+
+static inline void gen_ldq_env_A0(int idx, int offset)
+{
+ int mem_index = (idx >> 2) - 1;
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
+}
+
+static inline void gen_stq_env_A0(int idx, int offset)
+{
+ int mem_index = (idx >> 2) - 1;
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
+}
+
+static inline void gen_ldo_env_A0(int idx, int offset)
+{
+ int mem_index = (idx >> 2) - 1;
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
+ tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_tmp0, mem_index);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
+}
+
+static inline void gen_sto_env_A0(int idx, int offset)
+{
+ int mem_index = (idx >> 2) - 1;
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
+ tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_tmp0, mem_index);
+}
+
+static inline void gen_op_movo(int d_offset, int s_offset)
+{
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8);
+}
+
+static inline void gen_op_movq(int d_offset, int s_offset)
+{
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+}
+
+static inline void gen_op_movl(int d_offset, int s_offset)
+{
+ tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
+}
+
+static inline void gen_op_movq_env_0(int d_offset)
+{
+ tcg_gen_movi_i64(cpu_tmp1_i64, 0);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+}
+
+#define SSE_SPECIAL ((void *)1)
+#define SSE_DUMMY ((void *)2)
+
+#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
+#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
+ gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
+
+static void *sse_op_table1[256][4] = {
+ /* 3DNow! extensions */
+ [0x0e] = { SSE_DUMMY }, /* femms */
+ [0x0f] = { SSE_DUMMY }, /* pf... */
+ /* pure SSE operations */
+ [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
+ [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
+ [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
+ [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */
+ [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
+ [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
+ [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */
+ [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */
+
+ [0x28] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
+ [0x29] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
+ [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
+ [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
+ [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
+ [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
+ [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
+ [0x2f] = { gen_helper_comiss, gen_helper_comisd },
+ [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
+ [0x51] = SSE_FOP(sqrt),
+ [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
+ [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
+ [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
+ [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
+ [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
+ [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
+ [0x58] = SSE_FOP(add),
+ [0x59] = SSE_FOP(mul),
+ [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
+ gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
+ [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
+ [0x5c] = SSE_FOP(sub),
+ [0x5d] = SSE_FOP(min),
+ [0x5e] = SSE_FOP(div),
+ [0x5f] = SSE_FOP(max),
+
+ [0xc2] = SSE_FOP(cmpeq),
+ [0xc6] = { gen_helper_shufps, gen_helper_shufpd },
+
+ [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */
+ [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */
+
+ /* MMX ops and their SSE extensions */
+ [0x60] = MMX_OP2(punpcklbw),
+ [0x61] = MMX_OP2(punpcklwd),
+ [0x62] = MMX_OP2(punpckldq),
+ [0x63] = MMX_OP2(packsswb),
+ [0x64] = MMX_OP2(pcmpgtb),
+ [0x65] = MMX_OP2(pcmpgtw),
+ [0x66] = MMX_OP2(pcmpgtl),
+ [0x67] = MMX_OP2(packuswb),
+ [0x68] = MMX_OP2(punpckhbw),
+ [0x69] = MMX_OP2(punpckhwd),
+ [0x6a] = MMX_OP2(punpckhdq),
+ [0x6b] = MMX_OP2(packssdw),
+ [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
+ [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
+ [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
+ [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
+ [0x70] = { gen_helper_pshufw_mmx,
+ gen_helper_pshufd_xmm,
+ gen_helper_pshufhw_xmm,
+ gen_helper_pshuflw_xmm },
+ [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
+ [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
+ [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
+ [0x74] = MMX_OP2(pcmpeqb),
+ [0x75] = MMX_OP2(pcmpeqw),
+ [0x76] = MMX_OP2(pcmpeql),
+ [0x77] = { SSE_DUMMY }, /* emms */
+ [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
+ [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
+ [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
+ [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
+ [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
+ [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
+ [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
+ [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
+ [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
+ [0xd1] = MMX_OP2(psrlw),
+ [0xd2] = MMX_OP2(psrld),
+ [0xd3] = MMX_OP2(psrlq),
+ [0xd4] = MMX_OP2(paddq),
+ [0xd5] = MMX_OP2(pmullw),
+ [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
+ [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
+ [0xd8] = MMX_OP2(psubusb),
+ [0xd9] = MMX_OP2(psubusw),
+ [0xda] = MMX_OP2(pminub),
+ [0xdb] = MMX_OP2(pand),
+ [0xdc] = MMX_OP2(paddusb),
+ [0xdd] = MMX_OP2(paddusw),
+ [0xde] = MMX_OP2(pmaxub),
+ [0xdf] = MMX_OP2(pandn),
+ [0xe0] = MMX_OP2(pavgb),
+ [0xe1] = MMX_OP2(psraw),
+ [0xe2] = MMX_OP2(psrad),
+ [0xe3] = MMX_OP2(pavgw),
+ [0xe4] = MMX_OP2(pmulhuw),
+ [0xe5] = MMX_OP2(pmulhw),
+ [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
+ [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */
+ [0xe8] = MMX_OP2(psubsb),
+ [0xe9] = MMX_OP2(psubsw),
+ [0xea] = MMX_OP2(pminsw),
+ [0xeb] = MMX_OP2(por),
+ [0xec] = MMX_OP2(paddsb),
+ [0xed] = MMX_OP2(paddsw),
+ [0xee] = MMX_OP2(pmaxsw),
+ [0xef] = MMX_OP2(pxor),
+ [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
+ [0xf1] = MMX_OP2(psllw),
+ [0xf2] = MMX_OP2(pslld),
+ [0xf3] = MMX_OP2(psllq),
+ [0xf4] = MMX_OP2(pmuludq),
+ [0xf5] = MMX_OP2(pmaddwd),
+ [0xf6] = MMX_OP2(psadbw),
+ [0xf7] = MMX_OP2(maskmov),
+ [0xf8] = MMX_OP2(psubb),
+ [0xf9] = MMX_OP2(psubw),
+ [0xfa] = MMX_OP2(psubl),
+ [0xfb] = MMX_OP2(psubq),
+ [0xfc] = MMX_OP2(paddb),
+ [0xfd] = MMX_OP2(paddw),
+ [0xfe] = MMX_OP2(paddl),
+};
+
+static void *sse_op_table2[3 * 8][2] = {
+ [0 + 2] = MMX_OP2(psrlw),
+ [0 + 4] = MMX_OP2(psraw),
+ [0 + 6] = MMX_OP2(psllw),
+ [8 + 2] = MMX_OP2(psrld),
+ [8 + 4] = MMX_OP2(psrad),
+ [8 + 6] = MMX_OP2(pslld),
+ [16 + 2] = MMX_OP2(psrlq),
+ [16 + 3] = { NULL, gen_helper_psrldq_xmm },
+ [16 + 6] = MMX_OP2(psllq),
+ [16 + 7] = { NULL, gen_helper_pslldq_xmm },
+};
+
+static void *sse_op_table3[4 * 3] = {
+ gen_helper_cvtsi2ss,
+ gen_helper_cvtsi2sd,
+ X86_64_ONLY(gen_helper_cvtsq2ss),
+ X86_64_ONLY(gen_helper_cvtsq2sd),
+
+ gen_helper_cvttss2si,
+ gen_helper_cvttsd2si,
+ X86_64_ONLY(gen_helper_cvttss2sq),
+ X86_64_ONLY(gen_helper_cvttsd2sq),
+
+ gen_helper_cvtss2si,
+ gen_helper_cvtsd2si,
+ X86_64_ONLY(gen_helper_cvtss2sq),
+ X86_64_ONLY(gen_helper_cvtsd2sq),
+};
+
+static void *sse_op_table4[8][4] = {
+ SSE_FOP(cmpeq),
+ SSE_FOP(cmplt),
+ SSE_FOP(cmple),
+ SSE_FOP(cmpunord),
+ SSE_FOP(cmpneq),
+ SSE_FOP(cmpnlt),
+ SSE_FOP(cmpnle),
+ SSE_FOP(cmpord),
+};
+
+static void *sse_op_table5[256] = {
+ [0x0c] = gen_helper_pi2fw,
+ [0x0d] = gen_helper_pi2fd,
+ [0x1c] = gen_helper_pf2iw,
+ [0x1d] = gen_helper_pf2id,
+ [0x8a] = gen_helper_pfnacc,
+ [0x8e] = gen_helper_pfpnacc,
+ [0x90] = gen_helper_pfcmpge,
+ [0x94] = gen_helper_pfmin,
+ [0x96] = gen_helper_pfrcp,
+ [0x97] = gen_helper_pfrsqrt,
+ [0x9a] = gen_helper_pfsub,
+ [0x9e] = gen_helper_pfadd,
+ [0xa0] = gen_helper_pfcmpgt,
+ [0xa4] = gen_helper_pfmax,
+ [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
+ [0xa7] = gen_helper_movq, /* pfrsqit1 */
+ [0xaa] = gen_helper_pfsubr,
+ [0xae] = gen_helper_pfacc,
+ [0xb0] = gen_helper_pfcmpeq,
+ [0xb4] = gen_helper_pfmul,
+ [0xb6] = gen_helper_movq, /* pfrcpit2 */
+ [0xb7] = gen_helper_pmulhrw_mmx,
+ [0xbb] = gen_helper_pswapd,
+ [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
+};
+
+struct sse_op_helper_s {
+ void *op[2]; uint32_t ext_mask;
+};
+#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
+#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
+#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
+#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
+static struct sse_op_helper_s sse_op_table6[256] = {
+ [0x00] = SSSE3_OP(pshufb),
+ [0x01] = SSSE3_OP(phaddw),
+ [0x02] = SSSE3_OP(phaddd),
+ [0x03] = SSSE3_OP(phaddsw),
+ [0x04] = SSSE3_OP(pmaddubsw),
+ [0x05] = SSSE3_OP(phsubw),
+ [0x06] = SSSE3_OP(phsubd),
+ [0x07] = SSSE3_OP(phsubsw),
+ [0x08] = SSSE3_OP(psignb),
+ [0x09] = SSSE3_OP(psignw),
+ [0x0a] = SSSE3_OP(psignd),
+ [0x0b] = SSSE3_OP(pmulhrsw),
+ [0x10] = SSE41_OP(pblendvb),
+ [0x14] = SSE41_OP(blendvps),
+ [0x15] = SSE41_OP(blendvpd),
+ [0x17] = SSE41_OP(ptest),
+ [0x1c] = SSSE3_OP(pabsb),
+ [0x1d] = SSSE3_OP(pabsw),
+ [0x1e] = SSSE3_OP(pabsd),
+ [0x20] = SSE41_OP(pmovsxbw),
+ [0x21] = SSE41_OP(pmovsxbd),
+ [0x22] = SSE41_OP(pmovsxbq),
+ [0x23] = SSE41_OP(pmovsxwd),
+ [0x24] = SSE41_OP(pmovsxwq),
+ [0x25] = SSE41_OP(pmovsxdq),
+ [0x28] = SSE41_OP(pmuldq),
+ [0x29] = SSE41_OP(pcmpeqq),
+ [0x2a] = SSE41_SPECIAL, /* movntqda */
+ [0x2b] = SSE41_OP(packusdw),
+ [0x30] = SSE41_OP(pmovzxbw),
+ [0x31] = SSE41_OP(pmovzxbd),
+ [0x32] = SSE41_OP(pmovzxbq),
+ [0x33] = SSE41_OP(pmovzxwd),
+ [0x34] = SSE41_OP(pmovzxwq),
+ [0x35] = SSE41_OP(pmovzxdq),
+ [0x37] = SSE42_OP(pcmpgtq),
+ [0x38] = SSE41_OP(pminsb),
+ [0x39] = SSE41_OP(pminsd),
+ [0x3a] = SSE41_OP(pminuw),
+ [0x3b] = SSE41_OP(pminud),
+ [0x3c] = SSE41_OP(pmaxsb),
+ [0x3d] = SSE41_OP(pmaxsd),
+ [0x3e] = SSE41_OP(pmaxuw),
+ [0x3f] = SSE41_OP(pmaxud),
+ [0x40] = SSE41_OP(pmulld),
+ [0x41] = SSE41_OP(phminposuw),
+};
+
+static struct sse_op_helper_s sse_op_table7[256] = {
+ [0x08] = SSE41_OP(roundps),
+ [0x09] = SSE41_OP(roundpd),
+ [0x0a] = SSE41_OP(roundss),
+ [0x0b] = SSE41_OP(roundsd),
+ [0x0c] = SSE41_OP(blendps),
+ [0x0d] = SSE41_OP(blendpd),
+ [0x0e] = SSE41_OP(pblendw),
+ [0x0f] = SSSE3_OP(palignr),
+ [0x14] = SSE41_SPECIAL, /* pextrb */
+ [0x15] = SSE41_SPECIAL, /* pextrw */
+ [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
+ [0x17] = SSE41_SPECIAL, /* extractps */
+ [0x20] = SSE41_SPECIAL, /* pinsrb */
+ [0x21] = SSE41_SPECIAL, /* insertps */
+ [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
+ [0x40] = SSE41_OP(dpps),
+ [0x41] = SSE41_OP(dppd),
+ [0x42] = SSE41_OP(mpsadbw),
+ [0x60] = SSE42_OP(pcmpestrm),
+ [0x61] = SSE42_OP(pcmpestri),
+ [0x62] = SSE42_OP(pcmpistrm),
+ [0x63] = SSE42_OP(pcmpistri),
+};
+
+static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
+{
+ int b1, op1_offset, op2_offset, is_xmm, val, ot;
+ int modrm, mod, rm, reg, reg_addr, offset_addr;
+ void *sse_op2;
+
+ b &= 0xff;
+ if (s->prefix & PREFIX_DATA)
+ b1 = 1;
+ else if (s->prefix & PREFIX_REPZ)
+ b1 = 2;
+ else if (s->prefix & PREFIX_REPNZ)
+ b1 = 3;
+ else
+ b1 = 0;
+ sse_op2 = sse_op_table1[b][b1];
+ if (!sse_op2)
+ goto illegal_op;
+ if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
+ is_xmm = 1;
+ } else {
+ if (b1 == 0) {
+ /* MMX case */
+ is_xmm = 0;
+ } else {
+ is_xmm = 1;
+ }
+ }
+ /* simple MMX/SSE operation */
+ if (s->flags & HF_TS_MASK) {
+ gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+ return;
+ }
+ if (s->flags & HF_EM_MASK) {
+ illegal_op:
+ gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+ return;
+ }
+ if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
+ if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))
+ goto illegal_op;
+ if (b == 0x0e) {
+ if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
+ goto illegal_op;
+ /* femms */
+ gen_helper_emms();
+ return;
+ }
+ if (b == 0x77) {
+ /* emms */
+ gen_helper_emms();
+ return;
+ }
+ /* prepare MMX state (XXX: optimize by storing fptt and fptags in
+ the static cpu state) */
+ if (!is_xmm) {
+ gen_helper_enter_mmx();
+ }
+
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7);
+ if (is_xmm)
+ reg |= rex_r;
+ mod = (modrm >> 6) & 3;
+ if (sse_op2 == SSE_SPECIAL) {
+ b |= (b1 << 8);
+ switch(b) {
+ case 0x0e7: /* movntq */
+ if (mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
+ break;
+ case 0x1e7: /* movntdq */
+ case 0x02b: /* movntps */
+ case 0x12b: /* movntps */
+ if (mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+ break;
+ case 0x3f0: /* lddqu */
+ if (mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+ break;
+ case 0x22b: /* movntss */
+ case 0x32b: /* movntsd */
+ if (mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if (b1 & 1) {
+ gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,
+ xmm_regs[reg]));
+ } else {
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(0)));
+ gen_op_st_T0_A0(OT_LONG + s->mem_index);
+ }
+ break;
+ case 0x6e: /* movd mm, ea */
+#ifdef TARGET_X86_64
+ if (s->dflag == 2) {
+ gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
+ tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
+ } else
+#endif
+ {
+ gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ offsetof(CPUX86State,fpregs[reg].mmx));
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
+ }
+ break;
+ case 0x16e: /* movd xmm, ea */
+#ifdef TARGET_X86_64
+ if (s->dflag == 2) {
+ gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg]));
+ gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
+ } else
+#endif
+ {
+ gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg]));
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
+ }
+ break;
+ case 0x6f: /* movq mm, ea */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
+ } else {
+ rm = (modrm & 7);
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
+ offsetof(CPUX86State,fpregs[rm].mmx));
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
+ offsetof(CPUX86State,fpregs[reg].mmx));
+ }
+ break;
+ case 0x010: /* movups */
+ case 0x110: /* movupd */
+ case 0x028: /* movaps */
+ case 0x128: /* movapd */
+ case 0x16f: /* movdqa xmm, ea */
+ case 0x26f: /* movdqu xmm, ea */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
+ offsetof(CPUX86State,xmm_regs[rm]));
+ }
+ break;
+ case 0x210: /* movss xmm, ea */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+ gen_op_movl_T0_0();
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
+ }
+ break;
+ case 0x310: /* movsd xmm, ea */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ gen_op_movl_T0_0();
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+ }
+ break;
+ case 0x012: /* movlps */
+ case 0x112: /* movlpd */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ } else {
+ /* movhlps */
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
+ }
+ break;
+ case 0x212: /* movsldup */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
+ }
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+ break;
+ case 0x312: /* movddup */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+ }
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ break;
+ case 0x016: /* movhps */
+ case 0x116: /* movhpd */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+ } else {
+ /* movlhps */
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+ }
+ break;
+ case 0x216: /* movshdup */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_L(1)));
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_L(3)));
+ }
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+ break;
+ case 0x178:
+ case 0x378:
+ {
+ int bit_index, field_length;
+
+ if (b1 == 1 && reg != 0)
+ goto illegal_op;
+ field_length = ldub_code(s->pc++) & 0x3F;
+ bit_index = ldub_code(s->pc++) & 0x3F;
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg]));
+ if (b1 == 1)
+ gen_helper_extrq_i(cpu_ptr0, tcg_const_i32(bit_index),
+ tcg_const_i32(field_length));
+ else
+ gen_helper_insertq_i(cpu_ptr0, tcg_const_i32(bit_index),
+ tcg_const_i32(field_length));
+ }
+ break;
+ case 0x7e: /* movd ea, mm */
+#ifdef TARGET_X86_64
+ if (s->dflag == 2) {
+ tcg_gen_ld_i64(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,fpregs[reg].mmx));
+ gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
+ } else
+#endif
+ {
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
+ gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+ }
+ break;
+ case 0x17e: /* movd ea, xmm */
+#ifdef TARGET_X86_64
+ if (s->dflag == 2) {
+ tcg_gen_ld_i64(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
+ } else
+#endif
+ {
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+ gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+ }
+ break;
+ case 0x27e: /* movq xmm, ea */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+ }
+ gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+ break;
+ case 0x7f: /* movq ea, mm */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
+ } else {
+ rm = (modrm & 7);
+ gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
+ offsetof(CPUX86State,fpregs[reg].mmx));
+ }
+ break;
+ case 0x011: /* movups */
+ case 0x111: /* movupd */
+ case 0x029: /* movaps */
+ case 0x129: /* movapd */
+ case 0x17f: /* movdqa ea, xmm */
+ case 0x27f: /* movdqu ea, xmm */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
+ offsetof(CPUX86State,xmm_regs[reg]));
+ }
+ break;
+ case 0x211: /* movss ea, xmm */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+ gen_op_st_T0_A0(OT_LONG + s->mem_index);
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+ }
+ break;
+ case 0x311: /* movsd ea, xmm */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ }
+ break;
+ case 0x013: /* movlps */
+ case 0x113: /* movlpd */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ } else {
+ goto illegal_op;
+ }
+ break;
+ case 0x017: /* movhps */
+ case 0x117: /* movhpd */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+ } else {
+ goto illegal_op;
+ }
+ break;
+ case 0x71: /* shift mm, im */
+ case 0x72:
+ case 0x73:
+ case 0x171: /* shift xmm, im */
+ case 0x172:
+ case 0x173:
+ if (b1 >= 2) {
+ goto illegal_op;
+ }
+ val = ldub_code(s->pc++);
+ if (is_xmm) {
+ gen_op_movl_T0_im(val);
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+ gen_op_movl_T0_0();
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(1)));
+ op1_offset = offsetof(CPUX86State,xmm_t0);
+ } else {
+ gen_op_movl_T0_im(val);
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
+ gen_op_movl_T0_0();
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
+ op1_offset = offsetof(CPUX86State,mmx_t0);
+ }
+ sse_op2 = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1];
+ if (!sse_op2)
+ goto illegal_op;
+ if (is_xmm) {
+ rm = (modrm & 7) | REX_B(s);
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+ } else {
+ rm = (modrm & 7);
+ op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+ }
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+ break;
+ case 0x050: /* movmskps */
+ rm = (modrm & 7) | REX_B(s);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ offsetof(CPUX86State,xmm_regs[rm]));
+ gen_helper_movmskps(cpu_tmp2_i32, cpu_ptr0);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_mov_reg_T0(OT_LONG, reg);
+ break;
+ case 0x150: /* movmskpd */
+ rm = (modrm & 7) | REX_B(s);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ offsetof(CPUX86State,xmm_regs[rm]));
+ gen_helper_movmskpd(cpu_tmp2_i32, cpu_ptr0);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_mov_reg_T0(OT_LONG, reg);
+ break;
+ case 0x02a: /* cvtpi2ps */
+ case 0x12a: /* cvtpi2pd */
+ gen_helper_enter_mmx();
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ op2_offset = offsetof(CPUX86State,mmx_t0);
+ gen_ldq_env_A0(s->mem_index, op2_offset);
+ } else {
+ rm = (modrm & 7);
+ op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+ }
+ op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ switch(b >> 8) {
+ case 0x0:
+ gen_helper_cvtpi2ps(cpu_ptr0, cpu_ptr1);
+ break;
+ default:
+ case 0x1:
+ gen_helper_cvtpi2pd(cpu_ptr0, cpu_ptr1);
+ break;
+ }
+ break;
+ case 0x22a: /* cvtsi2ss */
+ case 0x32a: /* cvtsi2sd */
+ ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+ op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)];
+ if (ot == OT_LONG) {
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ ((void (*)(TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_tmp2_i32);
+ } else {
+ ((void (*)(TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_T[0]);
+ }
+ break;
+ case 0x02c: /* cvttps2pi */
+ case 0x12c: /* cvttpd2pi */
+ case 0x02d: /* cvtps2pi */
+ case 0x12d: /* cvtpd2pi */
+ gen_helper_enter_mmx();
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ op2_offset = offsetof(CPUX86State,xmm_t0);
+ gen_ldo_env_A0(s->mem_index, op2_offset);
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+ }
+ op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ switch(b) {
+ case 0x02c:
+ gen_helper_cvttps2pi(cpu_ptr0, cpu_ptr1);
+ break;
+ case 0x12c:
+ gen_helper_cvttpd2pi(cpu_ptr0, cpu_ptr1);
+ break;
+ case 0x02d:
+ gen_helper_cvtps2pi(cpu_ptr0, cpu_ptr1);
+ break;
+ case 0x12d:
+ gen_helper_cvtpd2pi(cpu_ptr0, cpu_ptr1);
+ break;
+ }
+ break;
+ case 0x22c: /* cvttss2si */
+ case 0x32c: /* cvttsd2si */
+ case 0x22d: /* cvtss2si */
+ case 0x32d: /* cvtsd2si */
+ ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if ((b >> 8) & 1) {
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
+ } else {
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+ }
+ op2_offset = offsetof(CPUX86State,xmm_t0);
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+ }
+ sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
+ (b & 1) * 4];
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
+ if (ot == OT_LONG) {
+ ((void (*)(TCGv_i32, TCGv_ptr))sse_op2)(cpu_tmp2_i32, cpu_ptr0);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ } else {
+ ((void (*)(TCGv, TCGv_ptr))sse_op2)(cpu_T[0], cpu_ptr0);
+ }
+ gen_op_mov_reg_T0(ot, reg);
+ break;
+ case 0xc4: /* pinsrw */
+ case 0x1c4:
+ s->rip_offset = 1;
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+ val = ldub_code(s->pc++);
+ if (b1) {
+ val &= 7;
+ tcg_gen_st16_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg].XMM_W(val)));
+ } else {
+ val &= 3;
+ tcg_gen_st16_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
+ }
+ break;
+ case 0xc5: /* pextrw */
+ case 0x1c5:
+ if (mod != 3)
+ goto illegal_op;
+ ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+ val = ldub_code(s->pc++);
+ if (b1) {
+ val &= 7;
+ rm = (modrm & 7) | REX_B(s);
+ tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,xmm_regs[rm].XMM_W(val)));
+ } else {
+ val &= 3;
+ rm = (modrm & 7);
+ tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
+ }
+ reg = ((modrm >> 3) & 7) | rex_r;
+ gen_op_mov_reg_T0(ot, reg);
+ break;
+ case 0x1d6: /* movq ea, xmm */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
+ }
+ break;
+ case 0x2d6: /* movq2dq */
+ gen_helper_enter_mmx();
+ rm = (modrm & 7);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+ offsetof(CPUX86State,fpregs[rm].mmx));
+ gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+ break;
+ case 0x3d6: /* movdq2q */
+ gen_helper_enter_mmx();
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+ break;
+ case 0xd7: /* pmovmskb */
+ case 0x1d7:
+ if (mod != 3)
+ goto illegal_op;
+ if (b1) {
+ rm = (modrm & 7) | REX_B(s);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
+ gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_ptr0);
+ } else {
+ rm = (modrm & 7);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
+ gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_ptr0);
+ }
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ gen_op_mov_reg_T0(OT_LONG, reg);
+ break;
+ case 0x138:
+ if (s->prefix & PREFIX_REPNZ)
+ goto crc32;
+ case 0x038:
+ b = modrm;
+ modrm = ldub_code(s->pc++);
+ rm = modrm & 7;
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ if (b1 >= 2) {
+ goto illegal_op;
+ }
+
+ sse_op2 = sse_op_table6[b].op[b1];
+ if (!sse_op2)
+ goto illegal_op;
+ if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
+ goto illegal_op;
+
+ if (b1) {
+ op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
+ } else {
+ op2_offset = offsetof(CPUX86State,xmm_t0);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ switch (b) {
+ case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
+ case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
+ case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
+ gen_ldq_env_A0(s->mem_index, op2_offset +
+ offsetof(XMMReg, XMM_Q(0)));
+ break;
+ case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
+ case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
+ tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
+ offsetof(XMMReg, XMM_L(0)));
+ break;
+ case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
+ tcg_gen_qemu_ld16u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
+ offsetof(XMMReg, XMM_W(0)));
+ break;
+ case 0x2a: /* movntqda */
+ gen_ldo_env_A0(s->mem_index, op1_offset);
+ return;
+ default:
+ gen_ldo_env_A0(s->mem_index, op2_offset);
+ }
+ }
+ } else {
+ op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+ } else {
+ op2_offset = offsetof(CPUX86State,mmx_t0);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, op2_offset);
+ }
+ }
+ if (sse_op2 == SSE_SPECIAL)
+ goto illegal_op;
+
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+
+ if (b == 0x17)
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x338: /* crc32 */
+ crc32:
+ b = modrm;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+
+ if (b != 0xf0 && b != 0xf1)
+ goto illegal_op;
+ if (!(s->cpuid_ext_features & CPUID_EXT_SSE42))
+ goto illegal_op;
+
+ if (b == 0xf0)
+ ot = OT_BYTE;
+ else if (b == 0xf1 && s->dflag != 2)
+ if (s->prefix & PREFIX_DATA)
+ ot = OT_WORD;
+ else
+ ot = OT_LONG;
+ else
+ ot = OT_QUAD;
+
+ gen_op_mov_TN_reg(OT_LONG, 0, reg);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+ gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
+ cpu_T[0], tcg_const_i32(8 << ot));
+
+ ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+ gen_op_mov_reg_T0(ot, reg);
+ break;
+ case 0x03a:
+ case 0x13a:
+ b = modrm;
+ modrm = ldub_code(s->pc++);
+ rm = modrm & 7;
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ if (b1 >= 2) {
+ goto illegal_op;
+ }
+
+ sse_op2 = sse_op_table7[b].op[b1];
+ if (!sse_op2)
+ goto illegal_op;
+ if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
+ goto illegal_op;
+
+ if (sse_op2 == SSE_SPECIAL) {
+ ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+ rm = (modrm & 7) | REX_B(s);
+ if (mod != 3)
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ val = ldub_code(s->pc++);
+ switch (b) {
+ case 0x14: /* pextrb */
+ tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_B(val & 15)));
+ if (mod == 3)
+ gen_op_mov_reg_T0(ot, rm);
+ else
+ tcg_gen_qemu_st8(cpu_T[0], cpu_A0,
+ (s->mem_index >> 2) - 1);
+ break;
+ case 0x15: /* pextrw */
+ tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_W(val & 7)));
+ if (mod == 3)
+ gen_op_mov_reg_T0(ot, rm);
+ else
+ tcg_gen_qemu_st16(cpu_T[0], cpu_A0,
+ (s->mem_index >> 2) - 1);
+ break;
+ case 0x16:
+ if (ot == OT_LONG) { /* pextrd */
+ tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
+ offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(val & 3)));
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ if (mod == 3)
+ gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+ else
+ tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
+ (s->mem_index >> 2) - 1);
+ } else { /* pextrq */
+#ifdef TARGET_X86_64
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
+ offsetof(CPUX86State,
+ xmm_regs[reg].XMM_Q(val & 1)));
+ if (mod == 3)
+ gen_op_mov_reg_v(ot, rm, cpu_tmp1_i64);
+ else
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+#else
+ goto illegal_op;
+#endif
+ }
+ break;
+ case 0x17: /* extractps */
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(val & 3)));
+ if (mod == 3)
+ gen_op_mov_reg_T0(ot, rm);
+ else
+ tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
+ (s->mem_index >> 2) - 1);
+ break;
+ case 0x20: /* pinsrb */
+ if (mod == 3)
+ gen_op_mov_TN_reg(OT_LONG, 0, rm);
+ else
+ tcg_gen_qemu_ld8u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_st8_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_B(val & 15)));
+ break;
+ case 0x21: /* insertps */
+ if (mod == 3) {
+ tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
+ offsetof(CPUX86State,xmm_regs[rm]
+ .XMM_L((val >> 6) & 3)));
+ } else {
+ tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+ }
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg]
+ .XMM_L((val >> 4) & 3)));
+ if ((val >> 0) & 1)
+ tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+ cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(0)));
+ if ((val >> 1) & 1)
+ tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+ cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(1)));
+ if ((val >> 2) & 1)
+ tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+ cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(2)));
+ if ((val >> 3) & 1)
+ tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+ cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(3)));
+ break;
+ case 0x22:
+ if (ot == OT_LONG) { /* pinsrd */
+ if (mod == 3)
+ gen_op_mov_v_reg(ot, cpu_tmp0, rm);
+ else
+ tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
+ offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(val & 3)));
+ } else { /* pinsrq */
+#ifdef TARGET_X86_64
+ if (mod == 3)
+ gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
+ else
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
+ offsetof(CPUX86State,
+ xmm_regs[reg].XMM_Q(val & 1)));
+#else
+ goto illegal_op;
+#endif
+ }
+ break;
+ }
+ return;
+ }
+
+ if (b1) {
+ op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
+ } else {
+ op2_offset = offsetof(CPUX86State,xmm_t0);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldo_env_A0(s->mem_index, op2_offset);
+ }
+ } else {
+ op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+ } else {
+ op2_offset = offsetof(CPUX86State,mmx_t0);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, op2_offset);
+ }
+ }
+ val = ldub_code(s->pc++);
+
+ if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
+ s->cc_op = CC_OP_EFLAGS;
+
+ if (s->dflag == 2)
+ /* The helper must use entire 64-bit gp registers */
+ val |= 1 << 8;
+ }
+
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+ break;
+ default:
+ goto illegal_op;
+ }
+ } else {
+ /* generic MMX or SSE operation */
+ switch(b) {
+ case 0x70: /* pshufx insn */
+ case 0xc6: /* pshufx insn */
+ case 0xc2: /* compare insns */
+ s->rip_offset = 1;
+ break;
+ default:
+ break;
+ }
+ if (is_xmm) {
+ op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ op2_offset = offsetof(CPUX86State,xmm_t0);
+ if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) ||
+ b == 0xc2)) {
+ /* specific case for SSE single instructions */
+ if (b1 == 2) {
+ /* 32 bit access */
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+ } else {
+ /* 64 bit access */
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_D(0)));
+ }
+ } else {
+ gen_ldo_env_A0(s->mem_index, op2_offset);
+ }
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+ }
+ } else {
+ op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ op2_offset = offsetof(CPUX86State,mmx_t0);
+ gen_ldq_env_A0(s->mem_index, op2_offset);
+ } else {
+ rm = (modrm & 7);
+ op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+ }
+ }
+ switch(b) {
+ case 0x0f: /* 3DNow! data insns */
+ if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
+ goto illegal_op;
+ val = ldub_code(s->pc++);
+ sse_op2 = sse_op_table5[val];
+ if (!sse_op2)
+ goto illegal_op;
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+ break;
+ case 0x70: /* pshufx insn */
+ case 0xc6: /* pshufx insn */
+ val = ldub_code(s->pc++);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+ break;
+ case 0xc2:
+ /* compare insns */
+ val = ldub_code(s->pc++);
+ if (val >= 8)
+ goto illegal_op;
+ sse_op2 = sse_op_table4[val][b1];
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+ break;
+ case 0xf7:
+ /* maskmov : we must prepare A0 */
+ if (mod != 3)
+ goto illegal_op;
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_movq_A0_reg(R_EDI);
+ } else
+#endif
+ {
+ gen_op_movl_A0_reg(R_EDI);
+ if (s->aflag == 0)
+ gen_op_andl_A0_ffff();
+ }
+ gen_add_A0_ds_seg(s);
+
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_ptr1, cpu_A0);
+ break;
+ default:
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+ break;
+ }
+ if (b == 0x2e || b == 0x2f) {
+ s->cc_op = CC_OP_EFLAGS;
+ }
+ }
+}
+
+#ifdef VBOX
+/* Checks if it's an invalid lock sequence. Only a few instructions
+ can be used together with the lock prefix and of those only the
+ form that write a memory operand. So, this is kind of annoying
+ work to do...
+ The AMD manual lists the following instructions.
+ ADC
+ ADD
+ AND
+ BTC
+ BTR
+ BTS
+ CMPXCHG
+ CMPXCHG8B
+ CMPXCHG16B
+ DEC
+ INC
+ NEG
+ NOT
+ OR
+ SBB
+ SUB
+ XADD
+ XCHG
+ XOR */
+static bool is_invalid_lock_sequence(DisasContext *s, target_ulong pc_start, int b)
+{
+ target_ulong pc = s->pc;
+ int modrm, mod, op;
+
+ /* X={8,16,32,64} Y={16,32,64} */
+ switch (b)
+ {
+ /* /2: ADC reg/memX, immX */
+ /* /0: ADD reg/memX, immX */
+ /* /4: AND reg/memX, immX */
+ /* /1: OR reg/memX, immX */
+ /* /3: SBB reg/memX, immX */
+ /* /5: SUB reg/memX, immX */
+ /* /6: XOR reg/memX, immX */
+ case 0x80:
+ case 0x81:
+ case 0x83:
+ modrm = ldub_code(pc++);
+ op = (modrm >> 3) & 7;
+ if (op == 7) /* /7: CMP */
+ break;
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) /* register destination */
+ break;
+ return false;
+
+ case 0x10: /* /r: ADC reg/mem8, reg8 */
+ case 0x11: /* /r: ADC reg/memX, regY */
+ case 0x00: /* /r: ADD reg/mem8, reg8 */
+ case 0x01: /* /r: ADD reg/memX, regY */
+ case 0x20: /* /r: AND reg/mem8, reg8 */
+ case 0x21: /* /r: AND reg/memY, regY */
+ case 0x08: /* /r: OR reg/mem8, reg8 */
+ case 0x09: /* /r: OR reg/memY, regY */
+ case 0x18: /* /r: SBB reg/mem8, reg8 */
+ case 0x19: /* /r: SBB reg/memY, regY */
+ case 0x28: /* /r: SUB reg/mem8, reg8 */
+ case 0x29: /* /r: SUB reg/memY, regY */
+ case 0x86: /* /r: XCHG reg/mem8, reg8 or XCHG reg8, reg/mem8 */
+ case 0x87: /* /r: XCHG reg/memY, regY or XCHG regY, reg/memY */
+ case 0x30: /* /r: XOR reg/mem8, reg8 */
+ case 0x31: /* /r: XOR reg/memY, regY */
+ modrm = ldub_code(pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) /* register destination */
+ break;
+ return false;
+
+ /* /1: DEC reg/memX */
+ /* /0: INC reg/memX */
+ case 0xfe:
+ case 0xff:
+ modrm = ldub_code(pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) /* register destination */
+ break;
+ return false;
+
+ /* /3: NEG reg/memX */
+ /* /2: NOT reg/memX */
+ case 0xf6:
+ case 0xf7:
+ modrm = ldub_code(pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) /* register destination */
+ break;
+ return false;
+
+ case 0x0f:
+ b = ldub_code(pc++);
+ switch (b)
+ {
+ /* /7: BTC reg/memY, imm8 */
+ /* /6: BTR reg/memY, imm8 */
+ /* /5: BTS reg/memY, imm8 */
+ case 0xba:
+ modrm = ldub_code(pc++);
+ op = (modrm >> 3) & 7;
+ if (op < 5)
+ break;
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) /* register destination */
+ break;
+ return false;
+
+ case 0xbb: /* /r: BTC reg/memY, regY */
+ case 0xb3: /* /r: BTR reg/memY, regY */
+ case 0xab: /* /r: BTS reg/memY, regY */
+ case 0xb0: /* /r: CMPXCHG reg/mem8, reg8 */
+ case 0xb1: /* /r: CMPXCHG reg/memY, regY */
+ case 0xc0: /* /r: XADD reg/mem8, reg8 */
+ case 0xc1: /* /r: XADD reg/memY, regY */
+ modrm = ldub_code(pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) /* register destination */
+ break;
+ return false;
+
+ /* /1: CMPXCHG8B mem64 or CMPXCHG16B mem128 */
+ case 0xc7:
+ modrm = ldub_code(pc++);
+ op = (modrm >> 3) & 7;
+ if (op != 1)
+ break;
+ return false;
+ }
+ break;
+ }
+
+ /* illegal sequence. The s->pc is past the lock prefix and that
+ is sufficient for the TB, I think. */
+ Log(("illegal lock sequence %RGv (b=%#x)\n", pc_start, b));
+ return true;
+}
+#endif /* VBOX */
+
+/* convert one instruction. s->is_jmp is set if the translation must
+ be stopped. Return the next pc value */
+static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
+{
+ int b, prefixes, aflag, dflag;
+ int shift, ot;
+ int modrm, reg, rm, mod, reg_addr, op, opreg, offset_addr, val;
+ target_ulong next_eip, tval;
+ int rex_w, rex_r;
+
+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+ tcg_gen_debug_insn_start(pc_start);
+ s->pc = pc_start;
+ prefixes = 0;
+ aflag = s->code32;
+ dflag = s->code32;
+ s->override = -1;
+ rex_w = -1;
+ rex_r = 0;
+#ifdef TARGET_X86_64
+ s->rex_x = 0;
+ s->rex_b = 0;
+ x86_64_hregs = 0;
+#endif
+ s->rip_offset = 0; /* for relative ip address */
+#ifdef VBOX
+ /* nike: seems only slow down things */
+# if 0
+ /* Always update EIP. Otherwise one must be very careful with generated code that can raise exceptions. */
+
+ gen_update_eip(pc_start - s->cs_base);
+# endif
+#endif /* VBOX */
+
+ next_byte:
+ b = ldub_code(s->pc);
+ s->pc++;
+ /* check prefixes */
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ switch (b) {
+ case 0xf3:
+ prefixes |= PREFIX_REPZ;
+ goto next_byte;
+ case 0xf2:
+ prefixes |= PREFIX_REPNZ;
+ goto next_byte;
+ case 0xf0:
+ prefixes |= PREFIX_LOCK;
+ goto next_byte;
+ case 0x2e:
+ s->override = R_CS;
+ goto next_byte;
+ case 0x36:
+ s->override = R_SS;
+ goto next_byte;
+ case 0x3e:
+ s->override = R_DS;
+ goto next_byte;
+ case 0x26:
+ s->override = R_ES;
+ goto next_byte;
+ case 0x64:
+ s->override = R_FS;
+ goto next_byte;
+ case 0x65:
+ s->override = R_GS;
+ goto next_byte;
+ case 0x66:
+ prefixes |= PREFIX_DATA;
+ goto next_byte;
+ case 0x67:
+ prefixes |= PREFIX_ADR;
+ goto next_byte;
+ case 0x40 ... 0x4f:
+ /* REX prefix */
+ rex_w = (b >> 3) & 1;
+ rex_r = (b & 0x4) << 1;
+ s->rex_x = (b & 0x2) << 2;
+ REX_B(s) = (b & 0x1) << 3;
+ x86_64_hregs = 1; /* select uniform byte register addressing */
+ goto next_byte;
+ }
+ if (rex_w == 1) {
+ /* 0x66 is ignored if rex.w is set */
+ dflag = 2;
+ } else {
+ if (prefixes & PREFIX_DATA)
+ dflag ^= 1;
+ }
+ if (!(prefixes & PREFIX_ADR))
+ aflag = 2;
+ } else
+#endif
+ {
+ switch (b) {
+ case 0xf3:
+ prefixes |= PREFIX_REPZ;
+ goto next_byte;
+ case 0xf2:
+ prefixes |= PREFIX_REPNZ;
+ goto next_byte;
+ case 0xf0:
+ prefixes |= PREFIX_LOCK;
+ goto next_byte;
+ case 0x2e:
+ s->override = R_CS;
+ goto next_byte;
+ case 0x36:
+ s->override = R_SS;
+ goto next_byte;
+ case 0x3e:
+ s->override = R_DS;
+ goto next_byte;
+ case 0x26:
+ s->override = R_ES;
+ goto next_byte;
+ case 0x64:
+ s->override = R_FS;
+ goto next_byte;
+ case 0x65:
+ s->override = R_GS;
+ goto next_byte;
+ case 0x66:
+ prefixes |= PREFIX_DATA;
+ goto next_byte;
+ case 0x67:
+ prefixes |= PREFIX_ADR;
+ goto next_byte;
+ }
+ if (prefixes & PREFIX_DATA)
+ dflag ^= 1;
+ if (prefixes & PREFIX_ADR)
+ aflag ^= 1;
+ }
+
+ s->prefix = prefixes;
+ s->aflag = aflag;
+ s->dflag = dflag;
+
+ /* lock generation */
+#ifndef VBOX
+ if (prefixes & PREFIX_LOCK)
+ gen_helper_lock();
+#else /* VBOX */
+ if (prefixes & PREFIX_LOCK) {
+ if (is_invalid_lock_sequence(s, pc_start, b)) {
+ gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+ return s->pc;
+ }
+ gen_helper_lock();
+ }
+#endif /* VBOX */
+
+ /* now check op code */
+ reswitch:
+ switch(b) {
+ case 0x0f:
+ /**************************/
+ /* extended op code */
+ b = ldub_code(s->pc++) | 0x100;
+ goto reswitch;
+
+ /**************************/
+ /* arith & logic */
+ case 0x00 ... 0x05:
+ case 0x08 ... 0x0d:
+ case 0x10 ... 0x15:
+ case 0x18 ... 0x1d:
+ case 0x20 ... 0x25:
+ case 0x28 ... 0x2d:
+ case 0x30 ... 0x35:
+ case 0x38 ... 0x3d:
+ {
+ int op, f, val;
+ op = (b >> 3) & 7;
+ f = (b >> 1) & 3;
+
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ switch(f) {
+ case 0: /* OP Ev, Gv */
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ opreg = OR_TMP0;
+ } else if (op == OP_XORL && rm == reg) {
+ xor_zero:
+ /* xor reg, reg optimisation */
+ gen_op_movl_T0_0();
+ s->cc_op = CC_OP_LOGICB + ot;
+ gen_op_mov_reg_T0(ot, reg);
+ gen_op_update1_cc();
+ break;
+ } else {
+ opreg = rm;
+ }
+ gen_op_mov_TN_reg(ot, 1, reg);
+ gen_op(s, op, ot, opreg);
+ break;
+ case 1: /* OP Gv, Ev */
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ reg = ((modrm >> 3) & 7) | rex_r;
+ rm = (modrm & 7) | REX_B(s);
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ } else if (op == OP_XORL && rm == reg) {
+ goto xor_zero;
+ } else {
+ gen_op_mov_TN_reg(ot, 1, rm);
+ }
+ gen_op(s, op, ot, reg);
+ break;
+ case 2: /* OP A, Iv */
+ val = insn_get(s, ot);
+ gen_op_movl_T1_im(val);
+ gen_op(s, op, ot, OR_EAX);
+ break;
+ }
+ }
+ break;
+
+ case 0x82:
+ if (CODE64(s))
+ goto illegal_op;
+ case 0x80: /* GRP1 */
+ case 0x81:
+ case 0x83:
+ {
+ int val;
+
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ op = (modrm >> 3) & 7;
+
+ if (mod != 3) {
+ if (b == 0x83)
+ s->rip_offset = 1;
+ else
+ s->rip_offset = insn_const_size(ot);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ opreg = OR_TMP0;
+ } else {
+ opreg = rm;
+ }
+
+ switch(b) {
+ default:
+ case 0x80:
+ case 0x81:
+ case 0x82:
+ val = insn_get(s, ot);
+ break;
+ case 0x83:
+ val = (int8_t)insn_get(s, OT_BYTE);
+ break;
+ }
+ gen_op_movl_T1_im(val);
+ gen_op(s, op, ot, opreg);
+ }
+ break;
+
+ /**************************/
+ /* inc, dec, and other misc arith */
+ case 0x40 ... 0x47: /* inc Gv */
+ ot = dflag ? OT_LONG : OT_WORD;
+ gen_inc(s, ot, OR_EAX + (b & 7), 1);
+ break;
+ case 0x48 ... 0x4f: /* dec Gv */
+ ot = dflag ? OT_LONG : OT_WORD;
+ gen_inc(s, ot, OR_EAX + (b & 7), -1);
+ break;
+ case 0xf6: /* GRP3 */
+ case 0xf7:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ op = (modrm >> 3) & 7;
+ if (mod != 3) {
+ if (op == 0)
+ s->rip_offset = insn_const_size(ot);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_mov_TN_reg(ot, 0, rm);
+ }
+
+ switch(op) {
+ case 0: /* test */
+ val = insn_get(s, ot);
+ gen_op_movl_T1_im(val);
+ gen_op_testl_T0_T1_cc();
+ s->cc_op = CC_OP_LOGICB + ot;
+ break;
+ case 2: /* not */
+ tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
+ if (mod != 3) {
+ gen_op_st_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_mov_reg_T0(ot, rm);
+ }
+ break;
+ case 3: /* neg */
+ tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
+ if (mod != 3) {
+ gen_op_st_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_mov_reg_T0(ot, rm);
+ }
+ gen_op_update_neg_cc();
+ s->cc_op = CC_OP_SUBB + ot;
+ break;
+ case 4: /* mul */
+ switch(ot) {
+ case OT_BYTE:
+ gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
+ tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
+ /* XXX: use 32 bit mul which could be faster */
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ gen_op_mov_reg_T0(OT_WORD, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
+ s->cc_op = CC_OP_MULB;
+ break;
+ case OT_WORD:
+ gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
+ tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
+ /* XXX: use 32 bit mul which could be faster */
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ gen_op_mov_reg_T0(OT_WORD, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
+ gen_op_mov_reg_T0(OT_WORD, R_EDX);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+ s->cc_op = CC_OP_MULW;
+ break;
+ default:
+ case OT_LONG:
+#ifdef TARGET_X86_64
+ gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+ tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ gen_op_mov_reg_T0(OT_LONG, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
+ gen_op_mov_reg_T0(OT_LONG, R_EDX);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+#else
+ {
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+ tcg_gen_extu_i32_i64(t0, cpu_T[0]);
+ tcg_gen_extu_i32_i64(t1, cpu_T[1]);
+ tcg_gen_mul_i64(t0, t0, t1);
+ tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+ gen_op_mov_reg_T0(OT_LONG, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_shri_i64(t0, t0, 32);
+ tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+ gen_op_mov_reg_T0(OT_LONG, R_EDX);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+ }
+#endif
+ s->cc_op = CC_OP_MULL;
+ break;
+#ifdef TARGET_X86_64
+ case OT_QUAD:
+ gen_helper_mulq_EAX_T0(cpu_T[0]);
+ s->cc_op = CC_OP_MULQ;
+ break;
+#endif
+ }
+ break;
+ case 5: /* imul */
+ switch(ot) {
+ case OT_BYTE:
+ gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
+ tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
+ /* XXX: use 32 bit mul which could be faster */
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ gen_op_mov_reg_T0(OT_WORD, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+ s->cc_op = CC_OP_MULB;
+ break;
+ case OT_WORD:
+ gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
+ tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
+ /* XXX: use 32 bit mul which could be faster */
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ gen_op_mov_reg_T0(OT_WORD, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
+ gen_op_mov_reg_T0(OT_WORD, R_EDX);
+ s->cc_op = CC_OP_MULW;
+ break;
+ default:
+ case OT_LONG:
+#ifdef TARGET_X86_64
+ gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+ tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ gen_op_mov_reg_T0(OT_LONG, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
+ gen_op_mov_reg_T0(OT_LONG, R_EDX);
+#else
+ {
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+ tcg_gen_ext_i32_i64(t0, cpu_T[0]);
+ tcg_gen_ext_i32_i64(t1, cpu_T[1]);
+ tcg_gen_mul_i64(t0, t0, t1);
+ tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+ gen_op_mov_reg_T0(OT_LONG, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
+ tcg_gen_shri_i64(t0, t0, 32);
+ tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+ gen_op_mov_reg_T0(OT_LONG, R_EDX);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+ }
+#endif
+ s->cc_op = CC_OP_MULL;
+ break;
+#ifdef TARGET_X86_64
+ case OT_QUAD:
+ gen_helper_imulq_EAX_T0(cpu_T[0]);
+ s->cc_op = CC_OP_MULQ;
+ break;
+#endif
+ }
+ break;
+ case 6: /* div */
+ switch(ot) {
+ case OT_BYTE:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_divb_AL(cpu_T[0]);
+ break;
+ case OT_WORD:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_divw_AX(cpu_T[0]);
+ break;
+ default:
+ case OT_LONG:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_divl_EAX(cpu_T[0]);
+ break;
+#ifdef TARGET_X86_64
+ case OT_QUAD:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_divq_EAX(cpu_T[0]);
+ break;
+#endif
+ }
+ break;
+ case 7: /* idiv */
+ switch(ot) {
+ case OT_BYTE:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_idivb_AL(cpu_T[0]);
+ break;
+ case OT_WORD:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_idivw_AX(cpu_T[0]);
+ break;
+ default:
+ case OT_LONG:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_idivl_EAX(cpu_T[0]);
+ break;
+#ifdef TARGET_X86_64
+ case OT_QUAD:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_idivq_EAX(cpu_T[0]);
+ break;
+#endif
+ }
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+
+ case 0xfe: /* GRP4 */
+ case 0xff: /* GRP5 */
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ op = (modrm >> 3) & 7;
+ if (op >= 2 && b == 0xfe) {
+ goto illegal_op;
+ }
+ if (CODE64(s)) {
+ if (op == 2 || op == 4) {
+ /* operand size for jumps is 64 bit */
+ ot = OT_QUAD;
+ } else if (op == 3 || op == 5) {
+ ot = dflag ? OT_LONG + (rex_w == 1) : OT_WORD;
+ } else if (op == 6) {
+ /* default push size is 64 bit */
+ ot = dflag ? OT_QUAD : OT_WORD;
+ }
+ }
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if (op >= 2 && op != 3 && op != 5)
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_mov_TN_reg(ot, 0, rm);
+ }
+
+ switch(op) {
+ case 0: /* inc Ev */
+ if (mod != 3)
+ opreg = OR_TMP0;
+ else
+ opreg = rm;
+ gen_inc(s, ot, opreg, 1);
+ break;
+ case 1: /* dec Ev */
+ if (mod != 3)
+ opreg = OR_TMP0;
+ else
+ opreg = rm;
+ gen_inc(s, ot, opreg, -1);
+ break;
+ case 2: /* call Ev */
+ /* XXX: optimize if memory (no 'and' is necessary) */
+#ifdef VBOX_WITH_CALL_RECORD
+ if (s->record_call)
+ gen_op_record_call();
+#endif
+ if (s->dflag == 0)
+ gen_op_andl_T0_ffff();
+ next_eip = s->pc - s->cs_base;
+ gen_movtl_T1_im(next_eip);
+ gen_push_T1(s);
+ gen_op_jmp_T0();
+ gen_eob(s);
+ break;
+ case 3: /* lcall Ev */
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
+ gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
+ do_lcall:
+ if (s->pe && !s->vm86) {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_lcall_protected(cpu_tmp2_i32, cpu_T[1],
+ tcg_const_i32(dflag),
+ tcg_const_i32(s->pc - pc_start));
+ } else {
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_lcall_real(cpu_tmp2_i32, cpu_T[1],
+ tcg_const_i32(dflag),
+ tcg_const_i32(s->pc - s->cs_base));
+ }
+ gen_eob(s);
+ break;
+ case 4: /* jmp Ev */
+ if (s->dflag == 0)
+ gen_op_andl_T0_ffff();
+ gen_op_jmp_T0();
+ gen_eob(s);
+ break;
+ case 5: /* ljmp Ev */
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
+ gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
+ do_ljmp:
+ if (s->pe && !s->vm86) {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_ljmp_protected(cpu_tmp2_i32, cpu_T[1],
+ tcg_const_i32(s->pc - pc_start));
+ } else {
+ gen_op_movl_seg_T0_vm(R_CS);
+ gen_op_movl_T0_T1();
+ gen_op_jmp_T0();
+ }
+ gen_eob(s);
+ break;
+ case 6: /* push Ev */
+ gen_push_T0(s);
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+
+ case 0x84: /* test Ev, Gv */
+ case 0x85:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+ gen_op_mov_TN_reg(ot, 1, reg);
+ gen_op_testl_T0_T1_cc();
+ s->cc_op = CC_OP_LOGICB + ot;
+ break;
+
+ case 0xa8: /* test eAX, Iv */
+ case 0xa9:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ val = insn_get(s, ot);
+
+ gen_op_mov_TN_reg(ot, 0, OR_EAX);
+ gen_op_movl_T1_im(val);
+ gen_op_testl_T0_T1_cc();
+ s->cc_op = CC_OP_LOGICB + ot;
+ break;
+
+ case 0x98: /* CWDE/CBW */
+#ifdef TARGET_X86_64
+ if (dflag == 2) {
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+ tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+ gen_op_mov_reg_T0(OT_QUAD, R_EAX);
+ } else
+#endif
+ if (dflag == 1) {
+ gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
+ tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+ gen_op_mov_reg_T0(OT_LONG, R_EAX);
+ } else {
+ gen_op_mov_TN_reg(OT_BYTE, 0, R_EAX);
+ tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
+ gen_op_mov_reg_T0(OT_WORD, R_EAX);
+ }
+ break;
+ case 0x99: /* CDQ/CWD */
+#ifdef TARGET_X86_64
+ if (dflag == 2) {
+ gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX);
+ tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 63);
+ gen_op_mov_reg_T0(OT_QUAD, R_EDX);
+ } else
+#endif
+ if (dflag == 1) {
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+ tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 31);
+ gen_op_mov_reg_T0(OT_LONG, R_EDX);
+ } else {
+ gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
+ tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 15);
+ gen_op_mov_reg_T0(OT_WORD, R_EDX);
+ }
+ break;
+ case 0x1af: /* imul Gv, Ev */
+ case 0x69: /* imul Gv, Ev, I */
+ case 0x6b:
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ if (b == 0x69)
+ s->rip_offset = insn_const_size(ot);
+ else if (b == 0x6b)
+ s->rip_offset = 1;
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+ if (b == 0x69) {
+ val = insn_get(s, ot);
+ gen_op_movl_T1_im(val);
+ } else if (b == 0x6b) {
+ val = (int8_t)insn_get(s, OT_BYTE);
+ gen_op_movl_T1_im(val);
+ } else {
+ gen_op_mov_TN_reg(ot, 1, reg);
+ }
+
+#ifdef TARGET_X86_64
+ if (ot == OT_QUAD) {
+ gen_helper_imulq_T0_T1(cpu_T[0], cpu_T[0], cpu_T[1]);
+ } else
+#endif
+ if (ot == OT_LONG) {
+#ifdef TARGET_X86_64
+ tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+#else
+ {
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_ext_i32_i64(t0, cpu_T[0]);
+ tcg_gen_ext_i32_i64(t1, cpu_T[1]);
+ tcg_gen_mul_i64(t0, t0, t1);
+ tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
+ tcg_gen_shri_i64(t0, t0, 32);
+ tcg_gen_trunc_i64_i32(cpu_T[1], t0);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
+ }
+#endif
+ } else {
+ tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
+ /* XXX: use 32 bit mul which could be faster */
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+ }
+ gen_op_mov_reg_T0(ot, reg);
+ s->cc_op = CC_OP_MULB + ot;
+ break;
+ case 0x1c0:
+ case 0x1c1: /* xadd Ev, Gv */
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_mov_TN_reg(ot, 0, reg);
+ gen_op_mov_TN_reg(ot, 1, rm);
+ gen_op_addl_T0_T1();
+ gen_op_mov_reg_T1(ot, reg);
+ gen_op_mov_reg_T0(ot, rm);
+ } else {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_mov_TN_reg(ot, 0, reg);
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ gen_op_addl_T0_T1();
+ gen_op_st_T0_A0(ot + s->mem_index);
+ gen_op_mov_reg_T1(ot, reg);
+ }
+ gen_op_update2_cc();
+ s->cc_op = CC_OP_ADDB + ot;
+ break;
+ case 0x1b0:
+ case 0x1b1: /* cmpxchg Ev, Gv */
+ {
+ int label1, label2;
+ TCGv t0, t1, t2, a0;
+
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ t0 = tcg_temp_local_new();
+ t1 = tcg_temp_local_new();
+ t2 = tcg_temp_local_new();
+ a0 = tcg_temp_local_new();
+ gen_op_mov_v_reg(ot, t1, reg);
+ if (mod == 3) {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_mov_v_reg(ot, t0, rm);
+ } else {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ tcg_gen_mov_tl(a0, cpu_A0);
+ gen_op_ld_v(ot + s->mem_index, t0, a0);
+ rm = 0; /* avoid warning */
+ }
+ label1 = gen_new_label();
+ tcg_gen_sub_tl(t2, cpu_regs[R_EAX], t0);
+ gen_extu(ot, t2);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
+ if (mod == 3) {
+ label2 = gen_new_label();
+ gen_op_mov_reg_v(ot, R_EAX, t0);
+ tcg_gen_br(label2);
+ gen_set_label(label1);
+ gen_op_mov_reg_v(ot, rm, t1);
+ gen_set_label(label2);
+ } else {
+ tcg_gen_mov_tl(t1, t0);
+ gen_op_mov_reg_v(ot, R_EAX, t0);
+ gen_set_label(label1);
+ /* always store */
+ gen_op_st_v(ot + s->mem_index, t1, a0);
+ }
+ tcg_gen_mov_tl(cpu_cc_src, t0);
+ tcg_gen_mov_tl(cpu_cc_dst, t2);
+ s->cc_op = CC_OP_SUBB + ot;
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ tcg_temp_free(t2);
+ tcg_temp_free(a0);
+ }
+ break;
+ case 0x1c7: /* cmpxchg8b */
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ if ((mod == 3) || ((modrm & 0x38) != 0x8))
+ goto illegal_op;
+#ifdef TARGET_X86_64
+ if (dflag == 2) {
+ if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
+ goto illegal_op;
+ gen_jmp_im(pc_start - s->cs_base);
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_helper_cmpxchg16b(cpu_A0);
+ } else
+#endif
+ {
+ if (!(s->cpuid_features & CPUID_CX8))
+ goto illegal_op;
+ gen_jmp_im(pc_start - s->cs_base);
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_helper_cmpxchg8b(cpu_A0);
+ }
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+
+ /**************************/
+ /* push/pop */
+ case 0x50 ... 0x57: /* push */
+ gen_op_mov_TN_reg(OT_LONG, 0, (b & 7) | REX_B(s));
+ gen_push_T0(s);
+ break;
+ case 0x58 ... 0x5f: /* pop */
+ if (CODE64(s)) {
+ ot = dflag ? OT_QUAD : OT_WORD;
+ } else {
+ ot = dflag + OT_WORD;
+ }
+ gen_pop_T0(s);
+ /* NOTE: order is important for pop %sp */
+ gen_pop_update(s);
+ gen_op_mov_reg_T0(ot, (b & 7) | REX_B(s));
+ break;
+ case 0x60: /* pusha */
+ if (CODE64(s))
+ goto illegal_op;
+ gen_pusha(s);
+ break;
+ case 0x61: /* popa */
+ if (CODE64(s))
+ goto illegal_op;
+ gen_popa(s);
+ break;
+ case 0x68: /* push Iv */
+ case 0x6a:
+ if (CODE64(s)) {
+ ot = dflag ? OT_QUAD : OT_WORD;
+ } else {
+ ot = dflag + OT_WORD;
+ }
+ if (b == 0x68)
+ val = insn_get(s, ot);
+ else
+ val = (int8_t)insn_get(s, OT_BYTE);
+ gen_op_movl_T0_im(val);
+ gen_push_T0(s);
+ break;
+ case 0x8f: /* pop Ev */
+ if (CODE64(s)) {
+ ot = dflag ? OT_QUAD : OT_WORD;
+ } else {
+ ot = dflag + OT_WORD;
+ }
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ gen_pop_T0(s);
+ if (mod == 3) {
+ /* NOTE: order is important for pop %sp */
+ gen_pop_update(s);
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_mov_reg_T0(ot, rm);
+ } else {
+ /* NOTE: order is important too for MMU exceptions */
+ s->popl_esp_hack = 1 << ot;
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+ s->popl_esp_hack = 0;
+ gen_pop_update(s);
+ }
+ break;
+ case 0xc8: /* enter */
+ {
+ int level;
+ val = lduw_code(s->pc);
+ s->pc += 2;
+ level = ldub_code(s->pc++);
+ gen_enter(s, val, level);
+ }
+ break;
+ case 0xc9: /* leave */
+ /* XXX: exception not precise (ESP is updated before potential exception) */
+ if (CODE64(s)) {
+ gen_op_mov_TN_reg(OT_QUAD, 0, R_EBP);
+ gen_op_mov_reg_T0(OT_QUAD, R_ESP);
+ } else if (s->ss32) {
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
+ gen_op_mov_reg_T0(OT_LONG, R_ESP);
+ } else {
+ gen_op_mov_TN_reg(OT_WORD, 0, R_EBP);
+ gen_op_mov_reg_T0(OT_WORD, R_ESP);
+ }
+ gen_pop_T0(s);
+ if (CODE64(s)) {
+ ot = dflag ? OT_QUAD : OT_WORD;
+ } else {
+ ot = dflag + OT_WORD;
+ }
+ gen_op_mov_reg_T0(ot, R_EBP);
+ gen_pop_update(s);
+ break;
+ case 0x06: /* push es */
+ case 0x0e: /* push cs */
+ case 0x16: /* push ss */
+ case 0x1e: /* push ds */
+ if (CODE64(s))
+ goto illegal_op;
+ gen_op_movl_T0_seg(b >> 3);
+ gen_push_T0(s);
+ break;
+ case 0x1a0: /* push fs */
+ case 0x1a8: /* push gs */
+ gen_op_movl_T0_seg((b >> 3) & 7);
+ gen_push_T0(s);
+ break;
+ case 0x07: /* pop es */
+ case 0x17: /* pop ss */
+ case 0x1f: /* pop ds */
+ if (CODE64(s))
+ goto illegal_op;
+ reg = b >> 3;
+ gen_pop_T0(s);
+ gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
+ gen_pop_update(s);
+ if (reg == R_SS) {
+ /* if reg == SS, inhibit interrupts/trace. */
+ /* If several instructions disable interrupts, only the
+ _first_ does it */
+ if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
+ gen_helper_set_inhibit_irq();
+ s->tf = 0;
+ }
+ if (s->is_jmp) {
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ break;
+ case 0x1a1: /* pop fs */
+ case 0x1a9: /* pop gs */
+ gen_pop_T0(s);
+ gen_movl_seg_T0(s, (b >> 3) & 7, pc_start - s->cs_base);
+ gen_pop_update(s);
+ if (s->is_jmp) {
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ break;
+
+ /**************************/
+ /* mov */
+ case 0x88:
+ case 0x89: /* mov Gv, Ev */
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+
+ /* generate a generic store */
+ gen_ldst_modrm(s, modrm, ot, reg, 1);
+ break;
+ case 0xc6:
+ case 0xc7: /* mov Ev, Iv */
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod != 3) {
+ s->rip_offset = insn_const_size(ot);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ }
+ val = insn_get(s, ot);
+ gen_op_movl_T0_im(val);
+ if (mod != 3)
+ gen_op_st_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_reg_T0(ot, (modrm & 7) | REX_B(s));
+ break;
+ case 0x8a:
+ case 0x8b: /* mov Ev, Gv */
+#ifdef VBOX /* dtrace hot fix */
+ if (prefixes & PREFIX_LOCK)
+ goto illegal_op;
+#endif
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = OT_WORD + dflag;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+ gen_op_mov_reg_T0(ot, reg);
+ break;
+ case 0x8e: /* mov seg, Gv */
+ modrm = ldub_code(s->pc++);
+ reg = (modrm >> 3) & 7;
+ if (reg >= 6 || reg == R_CS)
+ goto illegal_op;
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+ gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
+ if (reg == R_SS) {
+ /* if reg == SS, inhibit interrupts/trace */
+ /* If several instructions disable interrupts, only the
+ _first_ does it */
+ if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
+ gen_helper_set_inhibit_irq();
+ s->tf = 0;
+ }
+ if (s->is_jmp) {
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ break;
+ case 0x8c: /* mov Gv, seg */
+ modrm = ldub_code(s->pc++);
+ reg = (modrm >> 3) & 7;
+ mod = (modrm >> 6) & 3;
+ if (reg >= 6)
+ goto illegal_op;
+ gen_op_movl_T0_seg(reg);
+ if (mod == 3)
+ ot = OT_WORD + dflag;
+ else
+ ot = OT_WORD;
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+ break;
+
+ case 0x1b6: /* movzbS Gv, Eb */
+ case 0x1b7: /* movzwS Gv, Eb */
+ case 0x1be: /* movsbS Gv, Eb */
+ case 0x1bf: /* movswS Gv, Eb */
+ {
+ int d_ot;
+ /* d_ot is the size of destination */
+ d_ot = dflag + OT_WORD;
+ /* ot is the size of source */
+ ot = (b & 1) + OT_BYTE;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+
+ if (mod == 3) {
+ gen_op_mov_TN_reg(ot, 0, rm);
+ switch(ot | (b & 8)) {
+ case OT_BYTE:
+ tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
+ break;
+ case OT_BYTE | 8:
+ tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
+ break;
+ case OT_WORD:
+ tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
+ break;
+ default:
+ case OT_WORD | 8:
+ tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+ break;
+ }
+ gen_op_mov_reg_T0(d_ot, reg);
+ } else {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if (b & 8) {
+ gen_op_lds_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_ldu_T0_A0(ot + s->mem_index);
+ }
+ gen_op_mov_reg_T0(d_ot, reg);
+ }
+ }
+ break;
+
+ case 0x8d: /* lea */
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod == 3)
+ goto illegal_op;
+ reg = ((modrm >> 3) & 7) | rex_r;
+ /* we must ensure that no segment is added */
+ s->override = -1;
+ val = s->addseg;
+ s->addseg = 0;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ s->addseg = val;
+ gen_op_mov_reg_A0(ot - OT_WORD, reg);
+ break;
+
+ case 0xa0: /* mov EAX, Ov */
+ case 0xa1:
+ case 0xa2: /* mov Ov, EAX */
+ case 0xa3:
+ {
+ target_ulong offset_addr;
+
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ offset_addr = ldq_code(s->pc);
+ s->pc += 8;
+ gen_op_movq_A0_im(offset_addr);
+ } else
+#endif
+ {
+ if (s->aflag) {
+ offset_addr = insn_get(s, OT_LONG);
+ } else {
+ offset_addr = insn_get(s, OT_WORD);
+ }
+ gen_op_movl_A0_im(offset_addr);
+ }
+ gen_add_A0_ds_seg(s);
+ if ((b & 2) == 0) {
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ gen_op_mov_reg_T0(ot, R_EAX);
+ } else {
+ gen_op_mov_TN_reg(ot, 0, R_EAX);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ }
+ }
+ break;
+ case 0xd7: /* xlat */
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_movq_A0_reg(R_EBX);
+ gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xff);
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]);
+ } else
+#endif
+ {
+ gen_op_movl_A0_reg(R_EBX);
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xff);
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]);
+ if (s->aflag == 0)
+ gen_op_andl_A0_ffff();
+ else
+ tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+ }
+ gen_add_A0_ds_seg(s);
+ gen_op_ldu_T0_A0(OT_BYTE + s->mem_index);
+ gen_op_mov_reg_T0(OT_BYTE, R_EAX);
+ break;
+ case 0xb0 ... 0xb7: /* mov R, Ib */
+ val = insn_get(s, OT_BYTE);
+ gen_op_movl_T0_im(val);
+ gen_op_mov_reg_T0(OT_BYTE, (b & 7) | REX_B(s));
+ break;
+ case 0xb8 ... 0xbf: /* mov R, Iv */
+#ifdef TARGET_X86_64
+ if (dflag == 2) {
+ uint64_t tmp;
+ /* 64 bit case */
+ tmp = ldq_code(s->pc);
+ s->pc += 8;
+ reg = (b & 7) | REX_B(s);
+ gen_movtl_T0_im(tmp);
+ gen_op_mov_reg_T0(OT_QUAD, reg);
+ } else
+#endif
+ {
+ ot = dflag ? OT_LONG : OT_WORD;
+ val = insn_get(s, ot);
+ reg = (b & 7) | REX_B(s);
+ gen_op_movl_T0_im(val);
+ gen_op_mov_reg_T0(ot, reg);
+ }
+ break;
+
+ case 0x91 ... 0x97: /* xchg R, EAX */
+ do_xchg_reg_eax:
+ ot = dflag + OT_WORD;
+ reg = (b & 7) | REX_B(s);
+ rm = R_EAX;
+ goto do_xchg_reg;
+ case 0x86:
+ case 0x87: /* xchg Ev, Gv */
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) {
+ rm = (modrm & 7) | REX_B(s);
+ do_xchg_reg:
+ gen_op_mov_TN_reg(ot, 0, reg);
+ gen_op_mov_TN_reg(ot, 1, rm);
+ gen_op_mov_reg_T0(ot, rm);
+ gen_op_mov_reg_T1(ot, reg);
+ } else {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_mov_TN_reg(ot, 0, reg);
+ /* for xchg, lock is implicit */
+ if (!(prefixes & PREFIX_LOCK))
+ gen_helper_lock();
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ if (!(prefixes & PREFIX_LOCK))
+ gen_helper_unlock();
+ gen_op_mov_reg_T1(ot, reg);
+ }
+ break;
+ case 0xc4: /* les Gv */
+ if (CODE64(s))
+ goto illegal_op;
+ op = R_ES;
+ goto do_lxx;
+ case 0xc5: /* lds Gv */
+ if (CODE64(s))
+ goto illegal_op;
+ op = R_DS;
+ goto do_lxx;
+ case 0x1b2: /* lss Gv */
+ op = R_SS;
+ goto do_lxx;
+ case 0x1b4: /* lfs Gv */
+ op = R_FS;
+ goto do_lxx;
+ case 0x1b5: /* lgs Gv */
+ op = R_GS;
+ do_lxx:
+ ot = dflag ? OT_LONG : OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ if (mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
+ /* load the segment first to handle exceptions properly */
+ gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
+ gen_movl_seg_T0(s, op, pc_start - s->cs_base);
+ /* then put the data */
+ gen_op_mov_reg_T1(ot, reg);
+ if (s->is_jmp) {
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ break;
+
+ /************************/
+ /* shifts */
+ case 0xc0:
+ case 0xc1:
+ /* shift Ev,Ib */
+ shift = 2;
+ grp2:
+ {
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ op = (modrm >> 3) & 7;
+
+ if (mod != 3) {
+ if (shift == 2) {
+ s->rip_offset = 1;
+ }
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ opreg = OR_TMP0;
+ } else {
+ opreg = (modrm & 7) | REX_B(s);
+ }
+
+ /* simpler op */
+ if (shift == 0) {
+ gen_shift(s, op, ot, opreg, OR_ECX);
+ } else {
+ if (shift == 2) {
+ shift = ldub_code(s->pc++);
+ }
+ gen_shifti(s, op, ot, opreg, shift);
+ }
+ }
+ break;
+ case 0xd0:
+ case 0xd1:
+ /* shift Ev,1 */
+ shift = 1;
+ goto grp2;
+ case 0xd2:
+ case 0xd3:
+ /* shift Ev,cl */
+ shift = 0;
+ goto grp2;
+
+ case 0x1a4: /* shld imm */
+ op = 0;
+ shift = 1;
+ goto do_shiftd;
+ case 0x1a5: /* shld cl */
+ op = 0;
+ shift = 0;
+ goto do_shiftd;
+ case 0x1ac: /* shrd imm */
+ op = 1;
+ shift = 1;
+ goto do_shiftd;
+ case 0x1ad: /* shrd cl */
+ op = 1;
+ shift = 0;
+ do_shiftd:
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ opreg = OR_TMP0;
+ } else {
+ opreg = rm;
+ }
+ gen_op_mov_TN_reg(ot, 1, reg);
+
+ if (shift) {
+ val = ldub_code(s->pc++);
+ tcg_gen_movi_tl(cpu_T3, val);
+ } else {
+ tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]);
+ }
+ gen_shiftd_rm_T1_T3(s, ot, opreg, op);
+ break;
+
+ /************************/
+ /* floats */
+ case 0xd8 ... 0xdf:
+ if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
+ /* if CR0.EM or CR0.TS are set, generate an FPU exception */
+ /* XXX: what to do if illegal op ? */
+ gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+ break;
+ }
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ rm = modrm & 7;
+ op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+ if (mod != 3) {
+ /* memory op */
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ switch(op) {
+ case 0x00 ... 0x07: /* fxxxs */
+ case 0x10 ... 0x17: /* fixxxl */
+ case 0x20 ... 0x27: /* fxxxl */
+ case 0x30 ... 0x37: /* fixxx */
+ {
+ int op1;
+ op1 = op & 7;
+
+ switch(op >> 4) {
+ case 0:
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_flds_FT0(cpu_tmp2_i32);
+ break;
+ case 1:
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_fildl_FT0(cpu_tmp2_i32);
+ break;
+ case 2:
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ gen_helper_fldl_FT0(cpu_tmp1_i64);
+ break;
+ case 3:
+ default:
+ gen_op_lds_T0_A0(OT_WORD + s->mem_index);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_fildl_FT0(cpu_tmp2_i32);
+ break;
+ }
+
+ gen_helper_fp_arith_ST0_FT0(op1);
+ if (op1 == 3) {
+ /* fcomp needs pop */
+ gen_helper_fpop();
+ }
+ }
+ break;
+ case 0x08: /* flds */
+ case 0x0a: /* fsts */
+ case 0x0b: /* fstps */
+ case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
+ case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
+ case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
+ switch(op & 7) {
+ case 0:
+ switch(op >> 4) {
+ case 0:
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_flds_ST0(cpu_tmp2_i32);
+ break;
+ case 1:
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_fildl_ST0(cpu_tmp2_i32);
+ break;
+ case 2:
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ gen_helper_fldl_ST0(cpu_tmp1_i64);
+ break;
+ case 3:
+ default:
+ gen_op_lds_T0_A0(OT_WORD + s->mem_index);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_fildl_ST0(cpu_tmp2_i32);
+ break;
+ }
+ break;
+ case 1:
+ /* XXX: the corresponding CPUID bit must be tested ! */
+ switch(op >> 4) {
+ case 1:
+ gen_helper_fisttl_ST0(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(OT_LONG + s->mem_index);
+ break;
+ case 2:
+ gen_helper_fisttll_ST0(cpu_tmp1_i64);
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ break;
+ case 3:
+ default:
+ gen_helper_fistt_ST0(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ break;
+ }
+ gen_helper_fpop();
+ break;
+ default:
+ switch(op >> 4) {
+ case 0:
+ gen_helper_fsts_ST0(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(OT_LONG + s->mem_index);
+ break;
+ case 1:
+ gen_helper_fistl_ST0(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(OT_LONG + s->mem_index);
+ break;
+ case 2:
+ gen_helper_fstl_ST0(cpu_tmp1_i64);
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ break;
+ case 3:
+ default:
+ gen_helper_fist_ST0(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ break;
+ }
+ if ((op & 7) == 3)
+ gen_helper_fpop();
+ break;
+ }
+ break;
+ case 0x0c: /* fldenv mem */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fldenv(
+ cpu_A0, tcg_const_i32(s->dflag));
+ break;
+ case 0x0d: /* fldcw mem */
+ gen_op_ld_T0_A0(OT_WORD + s->mem_index);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_fldcw(cpu_tmp2_i32);
+ break;
+ case 0x0e: /* fnstenv mem */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fstenv(cpu_A0, tcg_const_i32(s->dflag));
+ break;
+ case 0x0f: /* fnstcw mem */
+ gen_helper_fnstcw(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ break;
+ case 0x1d: /* fldt mem */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fldt_ST0(cpu_A0);
+ break;
+ case 0x1f: /* fstpt mem */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fstt_ST0(cpu_A0);
+ gen_helper_fpop();
+ break;
+ case 0x2c: /* frstor mem */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_frstor(cpu_A0, tcg_const_i32(s->dflag));
+ break;
+ case 0x2e: /* fnsave mem */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fsave(cpu_A0, tcg_const_i32(s->dflag));
+ break;
+ case 0x2f: /* fnstsw mem */
+ gen_helper_fnstsw(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ break;
+ case 0x3c: /* fbld */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fbld_ST0(cpu_A0);
+ break;
+ case 0x3e: /* fbstp */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fbst_ST0(cpu_A0);
+ gen_helper_fpop();
+ break;
+ case 0x3d: /* fildll */
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ gen_helper_fildll_ST0(cpu_tmp1_i64);
+ break;
+ case 0x3f: /* fistpll */
+ gen_helper_fistll_ST0(cpu_tmp1_i64);
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ gen_helper_fpop();
+ break;
+ default:
+ goto illegal_op;
+ }
+ } else {
+ /* register float ops */
+ opreg = rm;
+
+ switch(op) {
+ case 0x08: /* fld sti */
+ gen_helper_fpush();
+ gen_helper_fmov_ST0_STN(tcg_const_i32((opreg + 1) & 7));
+ break;
+ case 0x09: /* fxchg sti */
+ case 0x29: /* fxchg4 sti, undocumented op */
+ case 0x39: /* fxchg7 sti, undocumented op */
+ gen_helper_fxchg_ST0_STN(tcg_const_i32(opreg));
+ break;
+ case 0x0a: /* grp d9/2 */
+ switch(rm) {
+ case 0: /* fnop */
+ /* check exceptions (FreeBSD FPU probe) */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fwait();
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x0c: /* grp d9/4 */
+ switch(rm) {
+ case 0: /* fchs */
+ gen_helper_fchs_ST0();
+ break;
+ case 1: /* fabs */
+ gen_helper_fabs_ST0();
+ break;
+ case 4: /* ftst */
+ gen_helper_fldz_FT0();
+ gen_helper_fcom_ST0_FT0();
+ break;
+ case 5: /* fxam */
+ gen_helper_fxam_ST0();
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x0d: /* grp d9/5 */
+ {
+ switch(rm) {
+ case 0:
+ gen_helper_fpush();
+ gen_helper_fld1_ST0();
+ break;
+ case 1:
+ gen_helper_fpush();
+ gen_helper_fldl2t_ST0();
+ break;
+ case 2:
+ gen_helper_fpush();
+ gen_helper_fldl2e_ST0();
+ break;
+ case 3:
+ gen_helper_fpush();
+ gen_helper_fldpi_ST0();
+ break;
+ case 4:
+ gen_helper_fpush();
+ gen_helper_fldlg2_ST0();
+ break;
+ case 5:
+ gen_helper_fpush();
+ gen_helper_fldln2_ST0();
+ break;
+ case 6:
+ gen_helper_fpush();
+ gen_helper_fldz_ST0();
+ break;
+ default:
+ goto illegal_op;
+ }
+ }
+ break;
+ case 0x0e: /* grp d9/6 */
+ switch(rm) {
+ case 0: /* f2xm1 */
+ gen_helper_f2xm1();
+ break;
+ case 1: /* fyl2x */
+ gen_helper_fyl2x();
+ break;
+ case 2: /* fptan */
+ gen_helper_fptan();
+ break;
+ case 3: /* fpatan */
+ gen_helper_fpatan();
+ break;
+ case 4: /* fxtract */
+ gen_helper_fxtract();
+ break;
+ case 5: /* fprem1 */
+ gen_helper_fprem1();
+ break;
+ case 6: /* fdecstp */
+ gen_helper_fdecstp();
+ break;
+ default:
+ case 7: /* fincstp */
+ gen_helper_fincstp();
+ break;
+ }
+ break;
+ case 0x0f: /* grp d9/7 */
+ switch(rm) {
+ case 0: /* fprem */
+ gen_helper_fprem();
+ break;
+ case 1: /* fyl2xp1 */
+ gen_helper_fyl2xp1();
+ break;
+ case 2: /* fsqrt */
+ gen_helper_fsqrt();
+ break;
+ case 3: /* fsincos */
+ gen_helper_fsincos();
+ break;
+ case 5: /* fscale */
+ gen_helper_fscale();
+ break;
+ case 4: /* frndint */
+ gen_helper_frndint();
+ break;
+ case 6: /* fsin */
+ gen_helper_fsin();
+ break;
+ default:
+ case 7: /* fcos */
+ gen_helper_fcos();
+ break;
+ }
+ break;
+ case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
+ case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
+ case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
+ {
+ int op1;
+
+ op1 = op & 7;
+ if (op >= 0x20) {
+ gen_helper_fp_arith_STN_ST0(op1, opreg);
+ if (op >= 0x30)
+ gen_helper_fpop();
+ } else {
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fp_arith_ST0_FT0(op1);
+ }
+ }
+ break;
+ case 0x02: /* fcom */
+ case 0x22: /* fcom2, undocumented op */
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fcom_ST0_FT0();
+ break;
+ case 0x03: /* fcomp */
+ case 0x23: /* fcomp3, undocumented op */
+ case 0x32: /* fcomp5, undocumented op */
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fcom_ST0_FT0();
+ gen_helper_fpop();
+ break;
+ case 0x15: /* da/5 */
+ switch(rm) {
+ case 1: /* fucompp */
+ gen_helper_fmov_FT0_STN(tcg_const_i32(1));
+ gen_helper_fucom_ST0_FT0();
+ gen_helper_fpop();
+ gen_helper_fpop();
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x1c:
+ switch(rm) {
+ case 0: /* feni (287 only, just do nop here) */
+ break;
+ case 1: /* fdisi (287 only, just do nop here) */
+ break;
+ case 2: /* fclex */
+ gen_helper_fclex();
+ break;
+ case 3: /* fninit */
+ gen_helper_fninit();
+ break;
+ case 4: /* fsetpm (287 only, just do nop here) */
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x1d: /* fucomi */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fucomi_ST0_FT0();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x1e: /* fcomi */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fcomi_ST0_FT0();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x28: /* ffree sti */
+ gen_helper_ffree_STN(tcg_const_i32(opreg));
+ break;
+ case 0x2a: /* fst sti */
+ gen_helper_fmov_STN_ST0(tcg_const_i32(opreg));
+ break;
+ case 0x2b: /* fstp sti */
+ case 0x0b: /* fstp1 sti, undocumented op */
+ case 0x3a: /* fstp8 sti, undocumented op */
+ case 0x3b: /* fstp9 sti, undocumented op */
+ gen_helper_fmov_STN_ST0(tcg_const_i32(opreg));
+ gen_helper_fpop();
+ break;
+ case 0x2c: /* fucom st(i) */
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fucom_ST0_FT0();
+ break;
+ case 0x2d: /* fucomp st(i) */
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fucom_ST0_FT0();
+ gen_helper_fpop();
+ break;
+ case 0x33: /* de/3 */
+ switch(rm) {
+ case 1: /* fcompp */
+ gen_helper_fmov_FT0_STN(tcg_const_i32(1));
+ gen_helper_fcom_ST0_FT0();
+ gen_helper_fpop();
+ gen_helper_fpop();
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x38: /* ffreep sti, undocumented op */
+ gen_helper_ffree_STN(tcg_const_i32(opreg));
+ gen_helper_fpop();
+ break;
+ case 0x3c: /* df/4 */
+ switch(rm) {
+ case 0:
+ gen_helper_fnstsw(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_mov_reg_T0(OT_WORD, R_EAX);
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x3d: /* fucomip */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fucomi_ST0_FT0();
+ gen_helper_fpop();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x3e: /* fcomip */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fcomi_ST0_FT0();
+ gen_helper_fpop();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x10 ... 0x13: /* fcmovxx */
+ case 0x18 ... 0x1b:
+ {
+ int op1, l1;
+ static const uint8_t fcmov_cc[8] = {
+ (JCC_B << 1),
+ (JCC_Z << 1),
+ (JCC_BE << 1),
+ (JCC_P << 1),
+ };
+ op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
+ l1 = gen_new_label();
+ gen_jcc1(s, s->cc_op, op1, l1);
+ gen_helper_fmov_ST0_STN(tcg_const_i32(opreg));
+ gen_set_label(l1);
+ }
+ break;
+ default:
+ goto illegal_op;
+ }
+ }
+ break;
+ /************************/
+ /* string ops */
+
+ case 0xa4: /* movsS */
+ case 0xa5:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+ } else {
+ gen_movs(s, ot);
+ }
+ break;
+
+ case 0xaa: /* stosS */
+ case 0xab:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+ } else {
+ gen_stos(s, ot);
+ }
+ break;
+ case 0xac: /* lodsS */
+ case 0xad:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+ } else {
+ gen_lods(s, ot);
+ }
+ break;
+ case 0xae: /* scasS */
+ case 0xaf:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ if (prefixes & PREFIX_REPNZ) {
+ gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
+ } else if (prefixes & PREFIX_REPZ) {
+ gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
+ } else {
+ gen_scas(s, ot);
+ s->cc_op = CC_OP_SUBB + ot;
+ }
+ break;
+
+ case 0xa6: /* cmpsS */
+ case 0xa7:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ if (prefixes & PREFIX_REPNZ) {
+ gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
+ } else if (prefixes & PREFIX_REPZ) {
+ gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
+ } else {
+ gen_cmps(s, ot);
+ s->cc_op = CC_OP_SUBB + ot;
+ }
+ break;
+ case 0x6c: /* insS */
+ case 0x6d:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag ? OT_LONG : OT_WORD;
+ gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
+ gen_op_andl_T0_ffff();
+ gen_check_io(s, ot, pc_start - s->cs_base,
+ SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
+ if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+ } else {
+ gen_ins(s, ot);
+ if (use_icount) {
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ }
+ break;
+ case 0x6e: /* outsS */
+ case 0x6f:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag ? OT_LONG : OT_WORD;
+ gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
+ gen_op_andl_T0_ffff();
+ gen_check_io(s, ot, pc_start - s->cs_base,
+ svm_is_rep(prefixes) | 4);
+ if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+ } else {
+ gen_outs(s, ot);
+ if (use_icount) {
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ }
+ break;
+
+ /************************/
+ /* port I/O */
+
+ case 0xe4:
+ case 0xe5:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag ? OT_LONG : OT_WORD;
+ val = ldub_code(s->pc++);
+ gen_op_movl_T0_im(val);
+ gen_check_io(s, ot, pc_start - s->cs_base,
+ SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
+ if (use_icount)
+ gen_io_start();
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
+ gen_op_mov_reg_T1(ot, R_EAX);
+ if (use_icount) {
+ gen_io_end();
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ break;
+ case 0xe6:
+ case 0xe7:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag ? OT_LONG : OT_WORD;
+ val = ldub_code(s->pc++);
+ gen_op_movl_T0_im(val);
+ gen_check_io(s, ot, pc_start - s->cs_base,
+ svm_is_rep(prefixes));
+ gen_op_mov_TN_reg(ot, 1, R_EAX);
+
+ if (use_icount)
+ gen_io_start();
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
+ tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+ gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+ if (use_icount) {
+ gen_io_end();
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ break;
+ case 0xec:
+ case 0xed:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag ? OT_LONG : OT_WORD;
+ gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
+ gen_op_andl_T0_ffff();
+ gen_check_io(s, ot, pc_start - s->cs_base,
+ SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
+ if (use_icount)
+ gen_io_start();
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
+ gen_op_mov_reg_T1(ot, R_EAX);
+ if (use_icount) {
+ gen_io_end();
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ break;
+ case 0xee:
+ case 0xef:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag ? OT_LONG : OT_WORD;
+ gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
+ gen_op_andl_T0_ffff();
+ gen_check_io(s, ot, pc_start - s->cs_base,
+ svm_is_rep(prefixes));
+ gen_op_mov_TN_reg(ot, 1, R_EAX);
+
+ if (use_icount)
+ gen_io_start();
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
+ tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+ gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+ if (use_icount) {
+ gen_io_end();
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ break;
+
+ /************************/
+ /* control */
+ case 0xc2: /* ret im */
+ val = ldsw_code(s->pc);
+ s->pc += 2;
+ gen_pop_T0(s);
+ if (CODE64(s) && s->dflag)
+ s->dflag = 2;
+ gen_stack_update(s, val + (2 << s->dflag));
+ if (s->dflag == 0)
+ gen_op_andl_T0_ffff();
+ gen_op_jmp_T0();
+ gen_eob(s);
+ break;
+ case 0xc3: /* ret */
+ gen_pop_T0(s);
+ gen_pop_update(s);
+ if (s->dflag == 0)
+ gen_op_andl_T0_ffff();
+ gen_op_jmp_T0();
+ gen_eob(s);
+ break;
+ case 0xca: /* lret im */
+ val = ldsw_code(s->pc);
+ s->pc += 2;
+ do_lret:
+ if (s->pe && !s->vm86) {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_lret_protected(tcg_const_i32(s->dflag),
+ tcg_const_i32(val));
+ } else {
+ gen_stack_A0(s);
+ /* pop offset */
+ gen_op_ld_T0_A0(1 + s->dflag + s->mem_index);
+ if (s->dflag == 0)
+ gen_op_andl_T0_ffff();
+ /* NOTE: keeping EIP updated is not a problem in case of
+ exception */
+ gen_op_jmp_T0();
+ /* pop selector */
+ gen_op_addl_A0_im(2 << s->dflag);
+ gen_op_ld_T0_A0(1 + s->dflag + s->mem_index);
+ gen_op_movl_seg_T0_vm(R_CS);
+ /* add stack offset */
+ gen_stack_update(s, val + (4 << s->dflag));
+ }
+ gen_eob(s);
+ break;
+ case 0xcb: /* lret */
+ val = 0;
+ goto do_lret;
+ case 0xcf: /* iret */
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
+ if (!s->pe) {
+ /* real mode */
+ gen_helper_iret_real(tcg_const_i32(s->dflag));
+ s->cc_op = CC_OP_EFLAGS;
+ } else if (s->vm86) {
+#ifdef VBOX
+ if (s->iopl != 3 && (!s->vme || s->dflag)) {
+#else
+ if (s->iopl != 3) {
+#endif
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_helper_iret_real(tcg_const_i32(s->dflag));
+ s->cc_op = CC_OP_EFLAGS;
+ }
+ } else {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_iret_protected(tcg_const_i32(s->dflag),
+ tcg_const_i32(s->pc - s->cs_base));
+ s->cc_op = CC_OP_EFLAGS;
+ }
+ gen_eob(s);
+ break;
+ case 0xe8: /* call im */
+ {
+ if (dflag)
+ tval = (int32_t)insn_get(s, OT_LONG);
+ else
+ tval = (int16_t)insn_get(s, OT_WORD);
+ next_eip = s->pc - s->cs_base;
+ tval += next_eip;
+ if (s->dflag == 0)
+ tval &= 0xffff;
+ else if(!CODE64(s))
+ tval &= 0xffffffff;
+ gen_movtl_T0_im(next_eip);
+ gen_push_T0(s);
+ gen_jmp(s, tval);
+ }
+ break;
+ case 0x9a: /* lcall im */
+ {
+ unsigned int selector, offset;
+
+ if (CODE64(s))
+ goto illegal_op;
+ ot = dflag ? OT_LONG : OT_WORD;
+ offset = insn_get(s, ot);
+ selector = insn_get(s, OT_WORD);
+
+ gen_op_movl_T0_im(selector);
+ gen_op_movl_T1_imu(offset);
+ }
+ goto do_lcall;
+ case 0xe9: /* jmp im */
+ if (dflag)
+ tval = (int32_t)insn_get(s, OT_LONG);
+ else
+ tval = (int16_t)insn_get(s, OT_WORD);
+ tval += s->pc - s->cs_base;
+ if (s->dflag == 0)
+ tval &= 0xffff;
+ else if(!CODE64(s))
+ tval &= 0xffffffff;
+ gen_jmp(s, tval);
+ break;
+ case 0xea: /* ljmp im */
+ {
+ unsigned int selector, offset;
+
+ if (CODE64(s))
+ goto illegal_op;
+ ot = dflag ? OT_LONG : OT_WORD;
+ offset = insn_get(s, ot);
+ selector = insn_get(s, OT_WORD);
+
+ gen_op_movl_T0_im(selector);
+ gen_op_movl_T1_imu(offset);
+ }
+ goto do_ljmp;
+ case 0xeb: /* jmp Jb */
+ tval = (int8_t)insn_get(s, OT_BYTE);
+ tval += s->pc - s->cs_base;
+ if (s->dflag == 0)
+ tval &= 0xffff;
+ gen_jmp(s, tval);
+ break;
+ case 0x70 ... 0x7f: /* jcc Jb */
+ tval = (int8_t)insn_get(s, OT_BYTE);
+ goto do_jcc;
+ case 0x180 ... 0x18f: /* jcc Jv */
+ if (dflag) {
+ tval = (int32_t)insn_get(s, OT_LONG);
+ } else {
+ tval = (int16_t)insn_get(s, OT_WORD);
+ }
+ do_jcc:
+ next_eip = s->pc - s->cs_base;
+ tval += next_eip;
+ if (s->dflag == 0)
+ tval &= 0xffff;
+ gen_jcc(s, b, tval, next_eip);
+ break;
+
+ case 0x190 ... 0x19f: /* setcc Gv */
+ modrm = ldub_code(s->pc++);
+ gen_setcc(s, b);
+ gen_ldst_modrm(s, modrm, OT_BYTE, OR_TMP0, 1);
+ break;
+ case 0x140 ... 0x14f: /* cmov Gv, Ev */
+ {
+ int l1;
+ TCGv t0;
+
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ t0 = tcg_temp_local_new();
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_mov_v_reg(ot, t0, rm);
+ }
+#ifdef TARGET_X86_64
+ if (ot == OT_LONG) {
+ /* XXX: specific Intel behaviour ? */
+ l1 = gen_new_label();
+ gen_jcc1(s, s->cc_op, b ^ 1, l1);
+ tcg_gen_mov_tl(cpu_regs[reg], t0);
+ gen_set_label(l1);
+ tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]);
+ } else
+#endif
+ {
+ l1 = gen_new_label();
+ gen_jcc1(s, s->cc_op, b ^ 1, l1);
+ gen_op_mov_reg_v(ot, reg, t0);
+ gen_set_label(l1);
+ }
+ tcg_temp_free(t0);
+ }
+ break;
+
+ /************************/
+ /* flags */
+ case 0x9c: /* pushf */
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
+#ifdef VBOX
+ if (s->vm86 && s->iopl != 3 && (!s->vme || s->dflag)) {
+#else
+ if (s->vm86 && s->iopl != 3) {
+#endif
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+#ifdef VBOX
+ if (s->vm86 && s->vme && s->iopl != 3)
+ gen_helper_read_eflags_vme(cpu_T[0]);
+ else
+#endif
+ gen_helper_read_eflags(cpu_T[0]);
+ gen_push_T0(s);
+ }
+ break;
+ case 0x9d: /* popf */
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
+#ifdef VBOX
+ if (s->vm86 && s->iopl != 3 && (!s->vme || s->dflag)) {
+#else
+ if (s->vm86 && s->iopl != 3) {
+#endif
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_pop_T0(s);
+ if (s->cpl == 0) {
+ if (s->dflag) {
+ gen_helper_write_eflags(cpu_T[0],
+ tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK)));
+ } else {
+ gen_helper_write_eflags(cpu_T[0],
+ tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK) & 0xffff));
+ }
+ } else {
+ if (s->cpl <= s->iopl) {
+ if (s->dflag) {
+ gen_helper_write_eflags(cpu_T[0],
+ tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK)));
+ } else {
+ gen_helper_write_eflags(cpu_T[0],
+ tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK) & 0xffff));
+ }
+ } else {
+ if (s->dflag) {
+ gen_helper_write_eflags(cpu_T[0],
+ tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK)));
+ } else {
+#ifdef VBOX
+ if (s->vm86 && s->vme)
+ gen_helper_write_eflags_vme(cpu_T[0]);
+ else
+#endif
+ gen_helper_write_eflags(cpu_T[0],
+ tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff));
+ }
+ }
+ }
+ gen_pop_update(s);
+ s->cc_op = CC_OP_EFLAGS;
+ /* abort translation because TF flag may change */
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ break;
+ case 0x9e: /* sahf */
+ if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
+ goto illegal_op;
+ gen_op_mov_TN_reg(OT_BYTE, 0, R_AH);
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
+ tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x9f: /* lahf */
+ if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(cpu_T[0]);
+ /* Note: gen_compute_eflags() only gives the condition codes */
+ tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02);
+ gen_op_mov_reg_T0(OT_BYTE, R_AH);
+ break;
+ case 0xf5: /* cmc */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0xf8: /* clc */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0xf9: /* stc */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0xfc: /* cld */
+ tcg_gen_movi_i32(cpu_tmp2_i32, 1);
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUState, df));
+ break;
+ case 0xfd: /* std */
+ tcg_gen_movi_i32(cpu_tmp2_i32, -1);
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUState, df));
+ break;
+
+ /************************/
+ /* bit operations */
+ case 0x1ba: /* bt/bts/btr/btc Gv, im */
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ op = (modrm >> 3) & 7;
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ if (mod != 3) {
+ s->rip_offset = 1;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_mov_TN_reg(ot, 0, rm);
+ }
+ /* load shift */
+ val = ldub_code(s->pc++);
+ gen_op_movl_T1_im(val);
+ if (op < 4)
+ goto illegal_op;
+ op -= 4;
+ goto bt_op;
+ case 0x1a3: /* bt Gv, Ev */
+ op = 0;
+ goto do_btx;
+ case 0x1ab: /* bts */
+ op = 1;
+ goto do_btx;
+ case 0x1b3: /* btr */
+ op = 2;
+ goto do_btx;
+ case 0x1bb: /* btc */
+ op = 3;
+ do_btx:
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_mov_TN_reg(OT_LONG, 1, reg);
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ /* specific case: we need to add a displacement */
+ gen_exts(ot, cpu_T[1]);
+ tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
+ tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_mov_TN_reg(ot, 0, rm);
+ }
+ bt_op:
+ tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1);
+ switch(op) {
+ case 0:
+ tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]);
+ tcg_gen_movi_tl(cpu_cc_dst, 0);
+ break;
+ case 1:
+ tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
+ tcg_gen_movi_tl(cpu_tmp0, 1);
+ tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
+ tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ break;
+ case 2:
+ tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
+ tcg_gen_movi_tl(cpu_tmp0, 1);
+ tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
+ tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
+ tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ break;
+ default:
+ case 3:
+ tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
+ tcg_gen_movi_tl(cpu_tmp0, 1);
+ tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
+ tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ break;
+ }
+ s->cc_op = CC_OP_SARB + ot;
+ if (op != 0) {
+ if (mod != 3)
+ gen_op_st_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_reg_T0(ot, rm);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
+ tcg_gen_movi_tl(cpu_cc_dst, 0);
+ }
+ break;
+ case 0x1bc: /* bsf */
+ case 0x1bd: /* bsr */
+ {
+ int label1;
+ TCGv t0;
+
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ gen_ldst_modrm(s,modrm, ot, OR_TMP0, 0);
+ gen_extu(ot, cpu_T[0]);
+ t0 = tcg_temp_local_new();
+ tcg_gen_mov_tl(t0, cpu_T[0]);
+ if ((b & 1) && (prefixes & PREFIX_REPZ) &&
+ (s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
+ switch(ot) {
+ case OT_WORD: gen_helper_lzcnt(cpu_T[0], t0,
+ tcg_const_i32(16)); break;
+ case OT_LONG: gen_helper_lzcnt(cpu_T[0], t0,
+ tcg_const_i32(32)); break;
+ case OT_QUAD: gen_helper_lzcnt(cpu_T[0], t0,
+ tcg_const_i32(64)); break;
+ }
+ gen_op_mov_reg_T0(ot, reg);
+ } else {
+ label1 = gen_new_label();
+ tcg_gen_movi_tl(cpu_cc_dst, 0);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1);
+ if (b & 1) {
+ gen_helper_bsr(cpu_T[0], t0);
+ } else {
+ gen_helper_bsf(cpu_T[0], t0);
+ }
+ gen_op_mov_reg_T0(ot, reg);
+ tcg_gen_movi_tl(cpu_cc_dst, 1);
+ gen_set_label(label1);
+ tcg_gen_discard_tl(cpu_cc_src);
+ s->cc_op = CC_OP_LOGICB + ot;
+ }
+ tcg_temp_free(t0);
+ }
+ break;
+ /************************/
+ /* bcd */
+ case 0x27: /* daa */
+ if (CODE64(s))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_daa();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x2f: /* das */
+ if (CODE64(s))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_das();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x37: /* aaa */
+ if (CODE64(s))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_aaa();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x3f: /* aas */
+ if (CODE64(s))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_aas();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0xd4: /* aam */
+ if (CODE64(s))
+ goto illegal_op;
+ val = ldub_code(s->pc++);
+ if (val == 0) {
+ gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
+ } else {
+ gen_helper_aam(tcg_const_i32(val));
+ s->cc_op = CC_OP_LOGICB;
+ }
+ break;
+ case 0xd5: /* aad */
+ if (CODE64(s))
+ goto illegal_op;
+ val = ldub_code(s->pc++);
+ gen_helper_aad(tcg_const_i32(val));
+ s->cc_op = CC_OP_LOGICB;
+ break;
+ /************************/
+ /* misc */
+ case 0x90: /* nop */
+ /* XXX: correct lock test for all insn */
+ if (prefixes & PREFIX_LOCK) {
+ goto illegal_op;
+ }
+ /* If REX_B is set, then this is xchg eax, r8d, not a nop. */
+ if (REX_B(s)) {
+ goto do_xchg_reg_eax;
+ }
+ if (prefixes & PREFIX_REPZ) {
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_PAUSE);
+ }
+ break;
+ case 0x9b: /* fwait */
+ if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
+ (HF_MP_MASK | HF_TS_MASK)) {
+ gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+ } else {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fwait();
+ }
+ break;
+ case 0xcc: /* int3 */
+#ifdef VBOX
+ if (s->vm86 && s->iopl != 3 && !s->vme) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else
+#endif
+ gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
+ break;
+ case 0xcd: /* int N */
+ val = ldub_code(s->pc++);
+#ifdef VBOX
+ if (s->vm86 && s->iopl != 3 && !s->vme) {
+#else
+ if (s->vm86 && s->iopl != 3) {
+#endif
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
+ }
+ break;
+ case 0xce: /* into */
+ if (CODE64(s))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_into(tcg_const_i32(s->pc - pc_start));
+ break;
+#ifdef WANT_ICEBP
+ case 0xf1: /* icebp (undocumented, exits to external debugger) */
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
+#if 1
+ gen_debug(s, pc_start - s->cs_base);
+#else
+ /* start debug */
+ tb_flush(cpu_single_env);
+ cpu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
+#endif
+ break;
+#endif
+ case 0xfa: /* cli */
+ if (!s->vm86) {
+ if (s->cpl <= s->iopl) {
+ gen_helper_cli();
+ } else {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ }
+ } else {
+ if (s->iopl == 3) {
+ gen_helper_cli();
+#ifdef VBOX
+ } else if (s->iopl != 3 && s->vme) {
+ gen_helper_cli_vme();
+#endif
+ } else {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ }
+ }
+ break;
+ case 0xfb: /* sti */
+ if (!s->vm86) {
+ if (s->cpl <= s->iopl) {
+ gen_sti:
+ gen_helper_sti();
+ /* interruptions are enabled only the first insn after sti */
+ /* If several instructions disable interrupts, only the
+ _first_ does it */
+ if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
+ gen_helper_set_inhibit_irq();
+ /* give a chance to handle pending irqs */
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ } else {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ }
+ } else {
+ if (s->iopl == 3) {
+ goto gen_sti;
+#ifdef VBOX
+ } else if (s->iopl != 3 && s->vme) {
+ gen_helper_sti_vme();
+ /* give a chance to handle pending irqs */
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+#endif
+ } else {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ }
+ }
+ break;
+ case 0x62: /* bound */
+ if (CODE64(s))
+ goto illegal_op;
+ ot = dflag ? OT_LONG : OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = (modrm >> 3) & 7;
+ mod = (modrm >> 6) & 3;
+ if (mod == 3)
+ goto illegal_op;
+ gen_op_mov_TN_reg(ot, 0, reg);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_jmp_im(pc_start - s->cs_base);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ if (ot == OT_WORD)
+ gen_helper_boundw(cpu_A0, cpu_tmp2_i32);
+ else
+ gen_helper_boundl(cpu_A0, cpu_tmp2_i32);
+ break;
+ case 0x1c8 ... 0x1cf: /* bswap reg */
+ reg = (b & 7) | REX_B(s);
+#ifdef TARGET_X86_64
+ if (dflag == 2) {
+ gen_op_mov_TN_reg(OT_QUAD, 0, reg);
+ tcg_gen_bswap64_i64(cpu_T[0], cpu_T[0]);
+ gen_op_mov_reg_T0(OT_QUAD, reg);
+ } else
+#endif
+ {
+ gen_op_mov_TN_reg(OT_LONG, 0, reg);
+ tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]);
+ gen_op_mov_reg_T0(OT_LONG, reg);
+ }
+ break;
+ case 0xd6: /* salc */
+ if (CODE64(s))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags_c(cpu_T[0]);
+ tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
+ gen_op_mov_reg_T0(OT_BYTE, R_EAX);
+ break;
+ case 0xe0: /* loopnz */
+ case 0xe1: /* loopz */
+ case 0xe2: /* loop */
+ case 0xe3: /* jecxz */
+ {
+ int l1, l2, l3;
+
+ tval = (int8_t)insn_get(s, OT_BYTE);
+ next_eip = s->pc - s->cs_base;
+ tval += next_eip;
+ if (s->dflag == 0)
+ tval &= 0xffff;
+
+ l1 = gen_new_label();
+ l2 = gen_new_label();
+ l3 = gen_new_label();
+ b &= 3;
+ switch(b) {
+ case 0: /* loopnz */
+ case 1: /* loopz */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_op_add_reg_im(s->aflag, R_ECX, -1);
+ gen_op_jz_ecx(s->aflag, l3);
+ gen_compute_eflags(cpu_tmp0);
+ tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z);
+ if (b == 0) {
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1);
+ } else {
+ tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, l1);
+ }
+ break;
+ case 2: /* loop */
+ gen_op_add_reg_im(s->aflag, R_ECX, -1);
+ gen_op_jnz_ecx(s->aflag, l1);
+ break;
+ default:
+ case 3: /* jcxz */
+ gen_op_jz_ecx(s->aflag, l1);
+ break;
+ }
+
+ gen_set_label(l3);
+ gen_jmp_im(next_eip);
+ tcg_gen_br(l2);
+
+ gen_set_label(l1);
+ gen_jmp_im(tval);
+ gen_set_label(l2);
+ gen_eob(s);
+ }
+ break;
+ case 0x130: /* wrmsr */
+ case 0x132: /* rdmsr */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ if (b & 2) {
+ gen_helper_rdmsr();
+ } else {
+ gen_helper_wrmsr();
+ }
+ }
+ break;
+ case 0x131: /* rdtsc */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ if (use_icount)
+ gen_io_start();
+ gen_helper_rdtsc();
+ if (use_icount) {
+ gen_io_end();
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ break;
+ case 0x133: /* rdpmc */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_rdpmc();
+ break;
+ case 0x134: /* sysenter */
+#ifndef VBOX
+ /* For Intel SYSENTER is valid on 64-bit */
+ if (CODE64(s) && cpu_single_env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
+#else
+ if ( !(cpu_single_env->cpuid_features & CPUID_SEP)
+ || ( IS_LONG_MODE(s)
+ && CPUMGetGuestCpuVendor(cpu_single_env->pVM) != CPUMCPUVENDOR_INTEL))
+#endif
+ goto illegal_op;
+ if (!s->pe) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_update_cc_op(s);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_sysenter();
+ gen_eob(s);
+ }
+ break;
+ case 0x135: /* sysexit */
+#ifndef VBOX
+ /* For Intel SYSEXIT is valid on 64-bit */
+ if (CODE64(s) && cpu_single_env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
+#else
+ if ( !(cpu_single_env->cpuid_features & CPUID_SEP)
+ || ( IS_LONG_MODE(s)
+ && CPUMGetGuestCpuVendor(cpu_single_env->pVM) != CPUMCPUVENDOR_INTEL))
+#endif
+ goto illegal_op;
+ if (!s->pe) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_update_cc_op(s);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_sysexit(tcg_const_i32(dflag));
+ gen_eob(s);
+ }
+ break;
+#ifdef TARGET_X86_64
+ case 0x105: /* syscall */
+ /* XXX: is it usable in real mode ? */
+ gen_update_cc_op(s);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_syscall(tcg_const_i32(s->pc - pc_start));
+ gen_eob(s);
+ break;
+ case 0x107: /* sysret */
+ if (!s->pe) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_update_cc_op(s);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_sysret(tcg_const_i32(s->dflag));
+ /* condition codes are modified only in long mode */
+ if (s->lma)
+ s->cc_op = CC_OP_EFLAGS;
+ gen_eob(s);
+ }
+ break;
+#endif
+ case 0x1a2: /* cpuid */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_cpuid();
+ break;
+ case 0xf4: /* hlt */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_hlt(tcg_const_i32(s->pc - pc_start));
+ s->is_jmp = DISAS_TB_JUMP;
+ }
+ break;
+ case 0x100:
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ op = (modrm >> 3) & 7;
+ switch(op) {
+ case 0: /* sldt */
+ if (!s->pe || s->vm86)
+ goto illegal_op;
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,ldt.selector));
+ ot = OT_WORD;
+ if (mod == 3)
+ ot += s->dflag;
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+ break;
+ case 2: /* lldt */
+ if (!s->pe || s->vm86)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+ gen_jmp_im(pc_start - s->cs_base);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_lldt(cpu_tmp2_i32);
+ }
+ break;
+ case 1: /* str */
+ if (!s->pe || s->vm86)
+ goto illegal_op;
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,tr.selector));
+ ot = OT_WORD;
+ if (mod == 3)
+ ot += s->dflag;
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+ break;
+ case 3: /* ltr */
+ if (!s->pe || s->vm86)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+ gen_jmp_im(pc_start - s->cs_base);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_ltr(cpu_tmp2_i32);
+ }
+ break;
+ case 4: /* verr */
+ case 5: /* verw */
+ if (!s->pe || s->vm86)
+ goto illegal_op;
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ if (op == 4)
+ gen_helper_verr(cpu_T[0]);
+ else
+ gen_helper_verw(cpu_T[0]);
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x101:
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ op = (modrm >> 3) & 7;
+ rm = modrm & 7;
+ switch(op) {
+ case 0: /* sgdt */
+ if (mod == 3)
+ goto illegal_op;
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.limit));
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ gen_add_A0_im(s, 2);
+ tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.base));
+ gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
+ break;
+ case 1:
+ if (mod == 3) {
+ switch (rm) {
+ case 0: /* monitor */
+ if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
+ s->cpl != 0)
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_movq_A0_reg(R_EAX);
+ } else
+#endif
+ {
+ gen_op_movl_A0_reg(R_EAX);
+ if (s->aflag == 0)
+ gen_op_andl_A0_ffff();
+ }
+ gen_add_A0_ds_seg(s);
+ gen_helper_monitor(cpu_A0);
+ break;
+ case 1: /* mwait */
+ if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
+ s->cpl != 0)
+ goto illegal_op;
+ gen_update_cc_op(s);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_mwait(tcg_const_i32(s->pc - pc_start));
+ gen_eob(s);
+ break;
+ default:
+ goto illegal_op;
+ }
+ } else { /* sidt */
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.limit));
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ gen_add_A0_im(s, 2);
+ tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.base));
+ gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
+ }
+ break;
+ case 2: /* lgdt */
+ case 3: /* lidt */
+ if (mod == 3) {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ switch(rm) {
+ case 0: /* VMRUN */
+ if (!(s->flags & HF_SVME_MASK) || !s->pe)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ break;
+ } else {
+ gen_helper_vmrun(tcg_const_i32(s->aflag),
+ tcg_const_i32(s->pc - pc_start));
+ tcg_gen_exit_tb(0);
+ s->is_jmp = DISAS_TB_JUMP;
+ }
+ break;
+ case 1: /* VMMCALL */
+ if (!(s->flags & HF_SVME_MASK))
+ goto illegal_op;
+ gen_helper_vmmcall();
+ break;
+ case 2: /* VMLOAD */
+ if (!(s->flags & HF_SVME_MASK) || !s->pe)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ break;
+ } else {
+ gen_helper_vmload(tcg_const_i32(s->aflag));
+ }
+ break;
+ case 3: /* VMSAVE */
+ if (!(s->flags & HF_SVME_MASK) || !s->pe)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ break;
+ } else {
+ gen_helper_vmsave(tcg_const_i32(s->aflag));
+ }
+ break;
+ case 4: /* STGI */
+ if ((!(s->flags & HF_SVME_MASK) &&
+ !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
+ !s->pe)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ break;
+ } else {
+ gen_helper_stgi();
+ }
+ break;
+ case 5: /* CLGI */
+ if (!(s->flags & HF_SVME_MASK) || !s->pe)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ break;
+ } else {
+ gen_helper_clgi();
+ }
+ break;
+ case 6: /* SKINIT */
+ if ((!(s->flags & HF_SVME_MASK) &&
+ !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
+ !s->pe)
+ goto illegal_op;
+ gen_helper_skinit();
+ break;
+ case 7: /* INVLPGA */
+ if (!(s->flags & HF_SVME_MASK) || !s->pe)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ break;
+ } else {
+ gen_helper_invlpga(tcg_const_i32(s->aflag));
+ }
+ break;
+ default:
+ goto illegal_op;
+ }
+ } else if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_svm_check_intercept(s, pc_start,
+ op==2 ? SVM_EXIT_GDTR_WRITE : SVM_EXIT_IDTR_WRITE);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_T1_A0(OT_WORD + s->mem_index);
+ gen_add_A0_im(s, 2);
+ gen_op_ld_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
+ if (!s->dflag)
+ gen_op_andl_T0_im(0xffffff);
+ if (op == 2) {
+ tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,gdt.base));
+ tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,gdt.limit));
+ } else {
+ tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,idt.base));
+ tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,idt.limit));
+ }
+ }
+ break;
+ case 4: /* smsw */
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
+#if defined TARGET_X86_64 && defined HOST_WORDS_BIGENDIAN
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]) + 4);
+#else
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
+#endif
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 1);
+ break;
+ case 6: /* lmsw */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+ gen_helper_lmsw(cpu_T[0]);
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ break;
+ case 7:
+ if (mod != 3) { /* invlpg */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_helper_invlpg(cpu_A0);
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ } else {
+ switch (rm) {
+ case 0: /* swapgs */
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ tcg_gen_ld_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,segs[R_GS].base));
+ tcg_gen_ld_tl(cpu_T[1], cpu_env,
+ offsetof(CPUX86State,kernelgsbase));
+ tcg_gen_st_tl(cpu_T[1], cpu_env,
+ offsetof(CPUX86State,segs[R_GS].base));
+ tcg_gen_st_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,kernelgsbase));
+ }
+ } else
+#endif
+ {
+ goto illegal_op;
+ }
+ break;
+ case 1: /* rdtscp */
+ if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ if (use_icount)
+ gen_io_start();
+ gen_helper_rdtscp();
+ if (use_icount) {
+ gen_io_end();
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ break;
+ default:
+ goto illegal_op;
+ }
+ }
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x108: /* invd */
+ case 0x109: /* wbinvd */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
+ /* nothing to do */
+ }
+ break;
+ case 0x63: /* arpl or movslS (x86_64) */
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ int d_ot;
+ /* d_ot is the size of destination */
+ d_ot = dflag + OT_WORD;
+
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+
+ if (mod == 3) {
+ gen_op_mov_TN_reg(OT_LONG, 0, rm);
+ /* sign extend */
+ if (d_ot == OT_QUAD)
+ tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+ gen_op_mov_reg_T0(d_ot, reg);
+ } else {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if (d_ot == OT_QUAD) {
+ gen_op_lds_T0_A0(OT_LONG + s->mem_index);
+ } else {
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ }
+ gen_op_mov_reg_T0(d_ot, reg);
+ }
+ } else
+#endif
+ {
+ int label1;
+ TCGv t0, t1, t2, a0;
+
+ if (!s->pe || s->vm86)
+ goto illegal_op;
+ t0 = tcg_temp_local_new();
+ t1 = tcg_temp_local_new();
+ t2 = tcg_temp_local_new();
+ ot = OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = (modrm >> 3) & 7;
+ mod = (modrm >> 6) & 3;
+ rm = modrm & 7;
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
+ a0 = tcg_temp_local_new();
+ tcg_gen_mov_tl(a0, cpu_A0);
+ } else {
+ gen_op_mov_v_reg(ot, t0, rm);
+ TCGV_UNUSED(a0);
+ }
+ gen_op_mov_v_reg(ot, t1, reg);
+ tcg_gen_andi_tl(cpu_tmp0, t0, 3);
+ tcg_gen_andi_tl(t1, t1, 3);
+ tcg_gen_movi_tl(t2, 0);
+ label1 = gen_new_label();
+ tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
+ tcg_gen_andi_tl(t0, t0, ~3);
+ tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_movi_tl(t2, CC_Z);
+ gen_set_label(label1);
+ if (mod != 3) {
+ gen_op_st_v(ot + s->mem_index, t0, a0);
+ tcg_temp_free(a0);
+ } else {
+ gen_op_mov_reg_v(ot, rm, t0);
+ }
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
+ tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
+ s->cc_op = CC_OP_EFLAGS;
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ tcg_temp_free(t2);
+ }
+ break;
+ case 0x102: /* lar */
+ case 0x103: /* lsl */
+ {
+ int label1;
+ TCGv t0;
+ if (!s->pe || s->vm86)
+ goto illegal_op;
+ ot = dflag ? OT_LONG : OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+ t0 = tcg_temp_local_new();
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ if (b == 0x102)
+ gen_helper_lar(t0, cpu_T[0]);
+ else
+ gen_helper_lsl(t0, cpu_T[0]);
+ tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
+ label1 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
+ gen_op_mov_reg_v(ot, reg, t0);
+ gen_set_label(label1);
+ s->cc_op = CC_OP_EFLAGS;
+ tcg_temp_free(t0);
+ }
+ break;
+ case 0x118:
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ op = (modrm >> 3) & 7;
+ switch(op) {
+ case 0: /* prefetchnta */
+ case 1: /* prefetchnt0 */
+ case 2: /* prefetchnt0 */
+ case 3: /* prefetchnt0 */
+ if (mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ /* nothing more to do */
+ break;
+ default: /* nop (multi byte) */
+ gen_nop_modrm(s, modrm);
+ break;
+ }
+ break;
+ case 0x119 ... 0x11f: /* nop (multi byte) */
+ modrm = ldub_code(s->pc++);
+ gen_nop_modrm(s, modrm);
+ break;
+ case 0x120: /* mov reg, crN */
+ case 0x122: /* mov crN, reg */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ modrm = ldub_code(s->pc++);
+#ifndef VBOX /* mod bits are always understood to be 11 (0xc0) regardless of actual content; see AMD manuals */
+ if ((modrm & 0xc0) != 0xc0)
+ goto illegal_op;
+#endif
+ rm = (modrm & 7) | REX_B(s);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ if (CODE64(s))
+ ot = OT_QUAD;
+ else
+ ot = OT_LONG;
+ if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
+ (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
+ reg = 8;
+ }
+ switch(reg) {
+ case 0:
+ case 2:
+ case 3:
+ case 4:
+ case 8:
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ if (b & 2) {
+ gen_op_mov_TN_reg(ot, 0, rm);
+ gen_helper_write_crN(tcg_const_i32(reg), cpu_T[0]);
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ } else {
+ gen_helper_read_crN(cpu_T[0], tcg_const_i32(reg));
+ gen_op_mov_reg_T0(ot, rm);
+ }
+ break;
+ default:
+ goto illegal_op;
+ }
+ }
+ break;
+ case 0x121: /* mov reg, drN */
+ case 0x123: /* mov drN, reg */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ modrm = ldub_code(s->pc++);
+#ifndef VBOX /* mod bits are always understood to be 11 (0xc0) regardless of actual content; see AMD manuals */
+ if ((modrm & 0xc0) != 0xc0)
+ goto illegal_op;
+#endif
+ rm = (modrm & 7) | REX_B(s);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ if (CODE64(s))
+ ot = OT_QUAD;
+ else
+ ot = OT_LONG;
+ /* XXX: do it dynamically with CR4.DE bit */
+ if (reg == 4 || reg == 5 || reg >= 8)
+ goto illegal_op;
+ if (b & 2) {
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
+ gen_op_mov_TN_reg(ot, 0, rm);
+ gen_helper_movl_drN_T0(tcg_const_i32(reg), cpu_T[0]);
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ } else {
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
+ tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,dr[reg]));
+ gen_op_mov_reg_T0(ot, rm);
+ }
+ }
+ break;
+ case 0x106: /* clts */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
+ gen_helper_clts();
+ /* abort block because static cpu state changed */
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ break;
+ /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
+ case 0x1c3: /* MOVNTI reg, mem */
+ if (!(s->cpuid_features & CPUID_SSE2))
+ goto illegal_op;
+ ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod == 3)
+ goto illegal_op;
+ reg = ((modrm >> 3) & 7) | rex_r;
+ /* generate a generic store */
+ gen_ldst_modrm(s, modrm, ot, reg, 1);
+ break;
+ case 0x1ae:
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ op = (modrm >> 3) & 7;
+ switch(op) {
+ case 0: /* fxsave */
+ if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
+ (s->prefix & PREFIX_LOCK))
+ goto illegal_op;
+ if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
+ gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+ break;
+ }
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fxsave(cpu_A0, tcg_const_i32((s->dflag == 2)));
+ break;
+ case 1: /* fxrstor */
+ if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
+ (s->prefix & PREFIX_LOCK))
+ goto illegal_op;
+ if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
+ gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+ break;
+ }
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fxrstor(cpu_A0, tcg_const_i32((s->dflag == 2)));
+ break;
+ case 2: /* ldmxcsr */
+ case 3: /* stmxcsr */
+ if (s->flags & HF_TS_MASK) {
+ gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+ break;
+ }
+ if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
+ mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if (op == 2) {
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
+ } else {
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
+ gen_op_st_T0_A0(OT_LONG + s->mem_index);
+ }
+ break;
+ case 5: /* lfence */
+ case 6: /* mfence */
+ if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE))
+ goto illegal_op;
+ break;
+ case 7: /* sfence / clflush */
+ if ((modrm & 0xc7) == 0xc0) {
+ /* sfence */
+ /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */
+ if (!(s->cpuid_features & CPUID_SSE))
+ goto illegal_op;
+ } else {
+ /* clflush */
+ if (!(s->cpuid_features & CPUID_CLFLUSH))
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ }
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x10d: /* 3DNow! prefetch(w) */
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ /* ignore for now */
+ break;
+ case 0x1aa: /* rsm */
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
+ if (!(s->flags & HF_SMM_MASK))
+ goto illegal_op;
+ gen_update_cc_op(s);
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_helper_rsm();
+ gen_eob(s);
+ break;
+ case 0x1b8: /* SSE4.2 popcnt */
+ if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
+ PREFIX_REPZ)
+ goto illegal_op;
+ if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
+ goto illegal_op;
+
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7);
+
+ if (s->prefix & PREFIX_DATA)
+ ot = OT_WORD;
+ else if (s->dflag != 2)
+ ot = OT_LONG;
+ else
+ ot = OT_QUAD;
+
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+ gen_helper_popcnt(cpu_T[0], cpu_T[0], tcg_const_i32(ot));
+ gen_op_mov_reg_T0(ot, reg);
+
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x10e ... 0x10f:
+ /* 3DNow! instructions, ignore prefixes */
+ s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
+ case 0x110 ... 0x117:
+ case 0x128 ... 0x12f:
+ case 0x138 ... 0x13a:
+ case 0x150 ... 0x179:
+ case 0x17c ... 0x17f:
+ case 0x1c2:
+ case 0x1c4 ... 0x1c6:
+ case 0x1d0 ... 0x1fe:
+ gen_sse(s, b, pc_start, rex_r);
+ break;
+ default:
+ goto illegal_op;
+ }
+ /* lock generation */
+ if (s->prefix & PREFIX_LOCK)
+ gen_helper_unlock();
+ return s->pc;
+ illegal_op:
+ if (s->prefix & PREFIX_LOCK)
+ gen_helper_unlock();
+ /* XXX: ensure that no lock was generated */
+ gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+ return s->pc;
+}
+
+void optimize_flags_init(void)
+{
+#if TCG_TARGET_REG_BITS == 32
+ assert(sizeof(CCTable) == (1 << 3));
+#else
+ assert(sizeof(CCTable) == (1 << 4));
+#endif
+ cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
+ cpu_cc_op = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, cc_op), "cc_op");
+ cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_src),
+ "cc_src");
+ cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_dst),
+ "cc_dst");
+ cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_tmp),
+ "cc_tmp");
+
+#ifdef TARGET_X86_64
+ cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EAX]), "rax");
+ cpu_regs[R_ECX] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_ECX]), "rcx");
+ cpu_regs[R_EDX] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EDX]), "rdx");
+ cpu_regs[R_EBX] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EBX]), "rbx");
+ cpu_regs[R_ESP] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_ESP]), "rsp");
+ cpu_regs[R_EBP] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EBP]), "rbp");
+ cpu_regs[R_ESI] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_ESI]), "rsi");
+ cpu_regs[R_EDI] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EDI]), "rdi");
+ cpu_regs[8] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[8]), "r8");
+ cpu_regs[9] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[9]), "r9");
+ cpu_regs[10] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[10]), "r10");
+ cpu_regs[11] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[11]), "r11");
+ cpu_regs[12] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[12]), "r12");
+ cpu_regs[13] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[13]), "r13");
+ cpu_regs[14] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[14]), "r14");
+ cpu_regs[15] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[15]), "r15");
+#else
+ cpu_regs[R_EAX] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EAX]), "eax");
+ cpu_regs[R_ECX] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_ECX]), "ecx");
+ cpu_regs[R_EDX] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EDX]), "edx");
+ cpu_regs[R_EBX] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EBX]), "ebx");
+ cpu_regs[R_ESP] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_ESP]), "esp");
+ cpu_regs[R_EBP] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EBP]), "ebp");
+ cpu_regs[R_ESI] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_ESI]), "esi");
+ cpu_regs[R_EDI] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EDI]), "edi");
+#endif
+
+ /* register helpers */
+#define GEN_HELPER 2
+#include "helper.h"
+}
+
+/* generate intermediate code in gen_opc_buf and gen_opparam_buf for
+ basic block 'tb'. If search_pc is TRUE, also generate PC
+ information for each intermediate instruction. */
+static inline void gen_intermediate_code_internal(CPUState *env,
+ TranslationBlock *tb,
+ int search_pc)
+{
+ DisasContext dc1, *dc = &dc1;
+ target_ulong pc_ptr;
+ uint16_t *gen_opc_end;
+ CPUBreakpoint *bp;
+ int j, lj;
+ uint64_t flags;
+ target_ulong pc_start;
+ target_ulong cs_base;
+ int num_insns;
+ int max_insns;
+#ifdef VBOX
+ int const singlestep = env->state & CPU_EMULATE_SINGLE_STEP;
+#endif
+
+ /* generate intermediate code */
+ pc_start = tb->pc;
+ cs_base = tb->cs_base;
+ flags = tb->flags;
+
+ dc->pe = (flags >> HF_PE_SHIFT) & 1;
+ dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
+ dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
+ dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
+ dc->f_st = 0;
+ dc->vm86 = (flags >> VM_SHIFT) & 1;
+#ifdef VBOX
+ dc->vme = !!(env->cr[4] & CR4_VME_MASK);
+ dc->pvi = !!(env->cr[4] & CR4_PVI_MASK);
+# ifdef VBOX_WITH_CALL_RECORD
+ if ( !(env->state & CPU_RAW_RING0)
+ && (env->cr[0] & CR0_PG_MASK)
+ && !(env->eflags & X86_EFL_IF)
+ && dc->code32)
+ dc->record_call = 1;
+ else
+ dc->record_call = 0;
+# endif
+#endif /* VBOX */
+ dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
+ dc->iopl = (flags >> IOPL_SHIFT) & 3;
+ dc->tf = (flags >> TF_SHIFT) & 1;
+ dc->singlestep_enabled = env->singlestep_enabled;
+ dc->cc_op = CC_OP_DYNAMIC;
+ dc->cs_base = cs_base;
+ dc->tb = tb;
+ dc->popl_esp_hack = 0;
+ /* select memory access functions */
+ dc->mem_index = 0;
+ if (flags & HF_SOFTMMU_MASK) {
+ if (dc->cpl == 3)
+ dc->mem_index = 2 * 4;
+ else
+ dc->mem_index = 1 * 4;
+ }
+ dc->cpuid_features = env->cpuid_features;
+ dc->cpuid_ext_features = env->cpuid_ext_features;
+ dc->cpuid_ext2_features = env->cpuid_ext2_features;
+ dc->cpuid_ext3_features = env->cpuid_ext3_features;
+#ifdef TARGET_X86_64
+ dc->lma = (flags >> HF_LMA_SHIFT) & 1;
+ dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
+#endif
+ dc->flags = flags;
+ dc->jmp_opt = !(dc->tf || env->singlestep_enabled ||
+ (flags & HF_INHIBIT_IRQ_MASK)
+#ifndef CONFIG_SOFTMMU
+ || (flags & HF_SOFTMMU_MASK)
+#endif
+ );
+#if 0
+ /* check addseg logic */
+ if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
+ printf("ERROR addseg\n");
+#endif
+
+ cpu_T[0] = tcg_temp_new();
+ cpu_T[1] = tcg_temp_new();
+ cpu_A0 = tcg_temp_new();
+ cpu_T3 = tcg_temp_new();
+
+ cpu_tmp0 = tcg_temp_new();
+ cpu_tmp1_i64 = tcg_temp_new_i64();
+ cpu_tmp2_i32 = tcg_temp_new_i32();
+ cpu_tmp3_i32 = tcg_temp_new_i32();
+ cpu_tmp4 = tcg_temp_new();
+ cpu_tmp5 = tcg_temp_new();
+ cpu_ptr0 = tcg_temp_new_ptr();
+ cpu_ptr1 = tcg_temp_new_ptr();
+
+ gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+
+ dc->is_jmp = DISAS_NEXT;
+ pc_ptr = pc_start;
+ lj = -1;
+ num_insns = 0;
+ max_insns = tb->cflags & CF_COUNT_MASK;
+ if (max_insns == 0)
+ max_insns = CF_COUNT_MASK;
+
+ gen_icount_start();
+ for(;;) {
+ if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
+ QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
+ if (bp->pc == pc_ptr &&
+ !((bp->flags & BP_CPU) && (tb->flags & HF_RF_MASK))) {
+ gen_debug(dc, pc_ptr - dc->cs_base);
+ break;
+ }
+ }
+ }
+ if (search_pc) {
+ j = gen_opc_ptr - gen_opc_buf;
+ if (lj < j) {
+ lj++;
+ while (lj < j)
+ gen_opc_instr_start[lj++] = 0;
+ }
+ gen_opc_pc[lj] = pc_ptr;
+ gen_opc_cc_op[lj] = dc->cc_op;
+ gen_opc_instr_start[lj] = 1;
+ gen_opc_icount[lj] = num_insns;
+ }
+ if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+ gen_io_start();
+
+ pc_ptr = disas_insn(dc, pc_ptr);
+ num_insns++;
+ /* stop translation if indicated */
+ if (dc->is_jmp)
+ break;
+#ifdef VBOX
+# ifdef DEBUG
+/*
+ if(cpu_check_code_raw(env, pc_ptr, env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK))) == ERROR_SUCCESS)
+ {
+ //should never happen as the jump to the patch code terminates the translation block
+ dprintf(("QEmu is about to execute instructions in our patch block at %08X!!\n", pc_ptr));
+ }
+*/
+# endif /* DEBUG */
+ if (env->state & CPU_EMULATE_SINGLE_INSTR)
+ {
+ env->state &= ~CPU_EMULATE_SINGLE_INSTR;
+ gen_jmp_im(pc_ptr - dc->cs_base);
+ gen_eob(dc);
+ break;
+ }
+#endif /* VBOX */
+
+ /* if single step mode, we generate only one instruction and
+ generate an exception */
+ /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
+ the flag and abort the translation to give the irqs a
+ change to be happen */
+ if (dc->tf || dc->singlestep_enabled ||
+ (flags & HF_INHIBIT_IRQ_MASK)) {
+ gen_jmp_im(pc_ptr - dc->cs_base);
+ gen_eob(dc);
+ break;
+ }
+ /* if too long translation, stop generation too */
+ if (gen_opc_ptr >= gen_opc_end ||
+ (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
+ num_insns >= max_insns) {
+ gen_jmp_im(pc_ptr - dc->cs_base);
+ gen_eob(dc);
+ break;
+ }
+ if (singlestep) {
+ gen_jmp_im(pc_ptr - dc->cs_base);
+ gen_eob(dc);
+ break;
+ }
+ }
+ if (tb->cflags & CF_LAST_IO)
+ gen_io_end();
+ gen_icount_end(tb, num_insns);
+ *gen_opc_ptr = INDEX_op_end;
+ /* we don't forget to fill the last values */
+ if (search_pc) {
+ j = gen_opc_ptr - gen_opc_buf;
+ lj++;
+ while (lj <= j)
+ gen_opc_instr_start[lj++] = 0;
+ }
+
+#ifdef DEBUG_DISAS
+ if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
+ int disas_flags;
+ qemu_log("----------------\n");
+ qemu_log("IN: %s\n", lookup_symbol(pc_start));
+#ifdef TARGET_X86_64
+ if (dc->code64)
+ disas_flags = 2;
+ else
+#endif
+ disas_flags = !dc->code32;
+ log_target_disas(pc_start, pc_ptr - pc_start, disas_flags);
+ qemu_log("\n");
+ }
+#endif
+
+ if (!search_pc) {
+ tb->size = pc_ptr - pc_start;
+ tb->icount = num_insns;
+ }
+}
+
+void gen_intermediate_code(CPUState *env, TranslationBlock *tb)
+{
+ gen_intermediate_code_internal(env, tb, 0);
+}
+
+void gen_intermediate_code_pc(CPUState *env, TranslationBlock *tb)
+{
+ gen_intermediate_code_internal(env, tb, 1);
+}
+
+void gen_pc_load(CPUState *env, TranslationBlock *tb,
+ uintptr_t searched_pc, int pc_pos, void *puc)
+{
+ int cc_op;
+#ifdef DEBUG_DISAS
+ if (qemu_loglevel_mask(CPU_LOG_TB_OP)) {
+ int i;
+ qemu_log("RESTORE:\n");
+ for(i = 0;i <= pc_pos; i++) {
+ if (gen_opc_instr_start[i]) {
+ qemu_log("0x%04x: " TARGET_FMT_lx "\n", i, gen_opc_pc[i]);
+ }
+ }
+ qemu_log("spc=0x%08lx pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
+ searched_pc, pc_pos, gen_opc_pc[pc_pos] - tb->cs_base,
+ (uint32_t)tb->cs_base);
+ }
+#endif
+ env->eip = gen_opc_pc[pc_pos] - tb->cs_base;
+ cc_op = gen_opc_cc_op[pc_pos];
+ if (cc_op != CC_OP_DYNAMIC)
+ env->cc_op = cc_op;
+}