diff options
Diffstat (limited to '')
-rw-r--r-- | src/VBox/VMM/VMMR0/HMR0A.asm | 2184 |
1 files changed, 2184 insertions, 0 deletions
diff --git a/src/VBox/VMM/VMMR0/HMR0A.asm b/src/VBox/VMM/VMMR0/HMR0A.asm new file mode 100644 index 00000000..3db49a1e --- /dev/null +++ b/src/VBox/VMM/VMMR0/HMR0A.asm @@ -0,0 +1,2184 @@ +; $Id: HMR0A.asm $ +;; @file +; HM - Ring-0 VMX, SVM world-switch and helper routines +; + +; +; Copyright (C) 2006-2019 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; + +;********************************************************************************************************************************* +;* Header Files * +;********************************************************************************************************************************* +%include "VBox/asmdefs.mac" +%include "VBox/err.mac" +%include "VBox/vmm/hm_vmx.mac" +%include "VBox/vmm/cpum.mac" +%include "VBox/vmm/vm.mac" +%include "iprt/x86.mac" +%include "HMInternal.mac" + +%ifdef RT_OS_OS2 ;; @todo fix OMF support in yasm and kick nasm out completely. + %macro vmwrite 2, + int3 + %endmacro + %define vmlaunch int3 + %define vmresume int3 + %define vmsave int3 + %define vmload int3 + %define vmrun int3 + %define clgi int3 + %define stgi int3 + %macro invlpga 2, + int3 + %endmacro +%endif + +;********************************************************************************************************************************* +;* Defined Constants And Macros * +;********************************************************************************************************************************* +;; The offset of the XMM registers in X86FXSTATE. +; Use define because I'm too lazy to convert the struct. +%define XMM_OFF_IN_X86FXSTATE 160 + +;; Spectre filler for 32-bit mode. +; Some user space address that points to a 4MB page boundrary in hope that it +; will somehow make it less useful. +%define SPECTRE_FILLER32 0x227fffff +;; Spectre filler for 64-bit mode. +; Choosen to be an invalid address (also with 5 level paging). +%define SPECTRE_FILLER64 0x02204204207fffff +;; Spectre filler for the current CPU mode. +%ifdef RT_ARCH_AMD64 + %define SPECTRE_FILLER SPECTRE_FILLER64 +%else + %define SPECTRE_FILLER SPECTRE_FILLER32 +%endif + +;; +; Determine skipping restoring of GDTR, IDTR, TR across VMX non-root operation +; +%ifdef RT_ARCH_AMD64 + %define VMX_SKIP_GDTR + %define VMX_SKIP_TR + %define VBOX_SKIP_RESTORE_SEG + %ifdef RT_OS_DARWIN + ; Load the NULL selector into DS, ES, FS and GS on 64-bit darwin so we don't + ; risk loading a stale LDT value or something invalid. + %define HM_64_BIT_USE_NULL_SEL + ; Darwin (Mavericks) uses IDTR limit to store the CPU Id so we need to restore it always. + ; See @bugref{6875}. + %else + %define VMX_SKIP_IDTR + %endif +%endif + +;; @def MYPUSHAD +; Macro generating an equivalent to pushad + +;; @def MYPOPAD +; Macro generating an equivalent to popad + +;; @def MYPUSHSEGS +; Macro saving all segment registers on the stack. +; @param 1 full width register name +; @param 2 16-bit register name for \a 1. + +;; @def MYPOPSEGS +; Macro restoring all segment registers on the stack +; @param 1 full width register name +; @param 2 16-bit register name for \a 1. + +%ifdef ASM_CALL64_GCC + %macro MYPUSHAD64 0 + push r15 + push r14 + push r13 + push r12 + push rbx + %endmacro + %macro MYPOPAD64 0 + pop rbx + pop r12 + pop r13 + pop r14 + pop r15 + %endmacro + +%else ; ASM_CALL64_MSC + %macro MYPUSHAD64 0 + push r15 + push r14 + push r13 + push r12 + push rbx + push rsi + push rdi + %endmacro + %macro MYPOPAD64 0 + pop rdi + pop rsi + pop rbx + pop r12 + pop r13 + pop r14 + pop r15 + %endmacro +%endif + +%ifdef VBOX_SKIP_RESTORE_SEG + %macro MYPUSHSEGS64 2 + %endmacro + + %macro MYPOPSEGS64 2 + %endmacro +%else ; !VBOX_SKIP_RESTORE_SEG + ; trashes, rax, rdx & rcx + %macro MYPUSHSEGS64 2 + %ifndef HM_64_BIT_USE_NULL_SEL + mov %2, es + push %1 + mov %2, ds + push %1 + %endif + + ; Special case for FS; Windows and Linux either don't use it or restore it when leaving kernel mode, Solaris OTOH doesn't and we must save it. + mov ecx, MSR_K8_FS_BASE + rdmsr + push rdx + push rax + %ifndef HM_64_BIT_USE_NULL_SEL + push fs + %endif + + ; Special case for GS; OSes typically use swapgs to reset the hidden base register for GS on entry into the kernel. The same happens on exit + mov ecx, MSR_K8_GS_BASE + rdmsr + push rdx + push rax + %ifndef HM_64_BIT_USE_NULL_SEL + push gs + %endif + %endmacro + + ; trashes, rax, rdx & rcx + %macro MYPOPSEGS64 2 + ; Note: do not step through this code with a debugger! + %ifndef HM_64_BIT_USE_NULL_SEL + xor eax, eax + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + %endif + + %ifndef HM_64_BIT_USE_NULL_SEL + pop gs + %endif + pop rax + pop rdx + mov ecx, MSR_K8_GS_BASE + wrmsr + + %ifndef HM_64_BIT_USE_NULL_SEL + pop fs + %endif + pop rax + pop rdx + mov ecx, MSR_K8_FS_BASE + wrmsr + ; Now it's safe to step again + + %ifndef HM_64_BIT_USE_NULL_SEL + pop %1 + mov ds, %2 + pop %1 + mov es, %2 + %endif + %endmacro +%endif ; VBOX_SKIP_RESTORE_SEG + +%macro MYPUSHAD32 0 + pushad +%endmacro +%macro MYPOPAD32 0 + popad +%endmacro + +%macro MYPUSHSEGS32 2 + push ds + push es + push fs + push gs +%endmacro +%macro MYPOPSEGS32 2 + pop gs + pop fs + pop es + pop ds +%endmacro + +%ifdef RT_ARCH_AMD64 + %define MYPUSHAD MYPUSHAD64 + %define MYPOPAD MYPOPAD64 + %define MYPUSHSEGS MYPUSHSEGS64 + %define MYPOPSEGS MYPOPSEGS64 +%else + %define MYPUSHAD MYPUSHAD32 + %define MYPOPAD MYPOPAD32 + %define MYPUSHSEGS MYPUSHSEGS32 + %define MYPOPSEGS MYPOPSEGS32 +%endif + +;; +; Creates an indirect branch prediction barrier on CPUs that need and supports that. +; @clobbers eax, edx, ecx +; @param 1 How to address CPUMCTX. +; @param 2 Which flag to test for (CPUMCTX_WSF_IBPB_ENTRY or CPUMCTX_WSF_IBPB_EXIT) +%macro INDIRECT_BRANCH_PREDICTION_BARRIER 2 + test byte [%1 + CPUMCTX.fWorldSwitcher], %2 + jz %%no_indirect_branch_barrier + mov ecx, MSR_IA32_PRED_CMD + mov eax, MSR_IA32_PRED_CMD_F_IBPB + xor edx, edx + wrmsr +%%no_indirect_branch_barrier: +%endmacro + +;; +; Creates an indirect branch prediction and L1D barrier on CPUs that need and supports that. +; @clobbers eax, edx, ecx +; @param 1 How to address CPUMCTX. +; @param 2 Which IBPB flag to test for (CPUMCTX_WSF_IBPB_ENTRY or CPUMCTX_WSF_IBPB_EXIT) +; @param 3 Which FLUSH flag to test for (CPUMCTX_WSF_L1D_ENTRY) +%macro INDIRECT_BRANCH_PREDICTION_AND_L1_CACHE_BARRIER 3 + ; Only one test+jmp when disabled CPUs. + test byte [%1 + CPUMCTX.fWorldSwitcher], (%2 | %3) + jz %%no_barrier_needed + + ; The eax:edx value is the same for both. + AssertCompile(MSR_IA32_PRED_CMD_F_IBPB == MSR_IA32_FLUSH_CMD_F_L1D) + mov eax, MSR_IA32_PRED_CMD_F_IBPB + xor edx, edx + + ; Indirect branch barrier. + test byte [%1 + CPUMCTX.fWorldSwitcher], %2 + jz %%no_indirect_branch_barrier + mov ecx, MSR_IA32_PRED_CMD + wrmsr +%%no_indirect_branch_barrier: + + ; Level 1 data cache flush. + test byte [%1 + CPUMCTX.fWorldSwitcher], %3 + jz %%no_cache_flush_barrier + mov ecx, MSR_IA32_FLUSH_CMD + wrmsr +%%no_cache_flush_barrier: + +%%no_barrier_needed: +%endmacro + + +;********************************************************************************************************************************* +;* External Symbols * +;********************************************************************************************************************************* +%ifdef VBOX_WITH_KERNEL_USING_XMM +extern NAME(CPUMIsGuestFPUStateActive) +%endif + + +BEGINCODE + + +;/** +; * Restores host-state fields. +; * +; * @returns VBox status code +; * @param f32RestoreHost x86: [ebp + 08h] msc: ecx gcc: edi RestoreHost flags. +; * @param pRestoreHost x86: [ebp + 0ch] msc: rdx gcc: rsi Pointer to the RestoreHost struct. +; */ +ALIGNCODE(16) +BEGINPROC VMXRestoreHostState +%ifdef RT_ARCH_AMD64 + %ifndef ASM_CALL64_GCC + ; Use GCC's input registers since we'll be needing both rcx and rdx further + ; down with the wrmsr instruction. Use the R10 and R11 register for saving + ; RDI and RSI since MSC preserve the two latter registers. + mov r10, rdi + mov r11, rsi + mov rdi, rcx + mov rsi, rdx + %endif + + test edi, VMX_RESTORE_HOST_GDTR + jz .test_idtr + lgdt [rsi + VMXRESTOREHOST.HostGdtr] + +.test_idtr: + test edi, VMX_RESTORE_HOST_IDTR + jz .test_ds + lidt [rsi + VMXRESTOREHOST.HostIdtr] + +.test_ds: + test edi, VMX_RESTORE_HOST_SEL_DS + jz .test_es + mov ax, [rsi + VMXRESTOREHOST.uHostSelDS] + mov ds, eax + +.test_es: + test edi, VMX_RESTORE_HOST_SEL_ES + jz .test_tr + mov ax, [rsi + VMXRESTOREHOST.uHostSelES] + mov es, eax + +.test_tr: + test edi, VMX_RESTORE_HOST_SEL_TR + jz .test_fs + ; When restoring the TR, we must first clear the busy flag or we'll end up faulting. + mov dx, [rsi + VMXRESTOREHOST.uHostSelTR] + mov ax, dx + and eax, X86_SEL_MASK_OFF_RPL ; Mask away TI and RPL bits leaving only the descriptor offset. + test edi, VMX_RESTORE_HOST_GDT_READ_ONLY | VMX_RESTORE_HOST_GDT_NEED_WRITABLE + jnz .gdt_readonly + add rax, qword [rsi + VMXRESTOREHOST.HostGdtr + 2] ; xAX <- descriptor offset + GDTR.pGdt. + and dword [rax + 4], ~RT_BIT(9) ; Clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit). + ltr dx + jmp short .test_fs +.gdt_readonly: + test edi, VMX_RESTORE_HOST_GDT_NEED_WRITABLE + jnz .gdt_readonly_need_writable + mov rcx, cr0 + mov r9, rcx + add rax, qword [rsi + VMXRESTOREHOST.HostGdtr + 2] ; xAX <- descriptor offset + GDTR.pGdt. + and rcx, ~X86_CR0_WP + mov cr0, rcx + and dword [rax + 4], ~RT_BIT(9) ; Clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit). + ltr dx + mov cr0, r9 + jmp short .test_fs +.gdt_readonly_need_writable: + add rax, qword [rsi + VMXRESTOREHOST.HostGdtrRw + 2] ; xAX <- descriptor offset + GDTR.pGdtRw. + and dword [rax + 4], ~RT_BIT(9) ; Clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit). + lgdt [rsi + VMXRESTOREHOST.HostGdtrRw] + ltr dx + lgdt [rsi + VMXRESTOREHOST.HostGdtr] ; Load the original GDT + +.test_fs: + ; + ; When restoring the selector values for FS and GS, we'll temporarily trash + ; the base address (at least the high 32-bit bits, but quite possibly the + ; whole base address), the wrmsr will restore it correctly. (VT-x actually + ; restores the base correctly when leaving guest mode, but not the selector + ; value, so there is little problem with interrupts being enabled prior to + ; this restore job.) + ; We'll disable ints once for both FS and GS as that's probably faster. + ; + test edi, VMX_RESTORE_HOST_SEL_FS | VMX_RESTORE_HOST_SEL_GS + jz .restore_success + pushfq + cli ; (see above) + + test edi, VMX_RESTORE_HOST_SEL_FS + jz .test_gs + mov ax, word [rsi + VMXRESTOREHOST.uHostSelFS] + mov fs, eax + mov eax, dword [rsi + VMXRESTOREHOST.uHostFSBase] ; uHostFSBase - Lo + mov edx, dword [rsi + VMXRESTOREHOST.uHostFSBase + 4h] ; uHostFSBase - Hi + mov ecx, MSR_K8_FS_BASE + wrmsr + +.test_gs: + test edi, VMX_RESTORE_HOST_SEL_GS + jz .restore_flags + mov ax, word [rsi + VMXRESTOREHOST.uHostSelGS] + mov gs, eax + mov eax, dword [rsi + VMXRESTOREHOST.uHostGSBase] ; uHostGSBase - Lo + mov edx, dword [rsi + VMXRESTOREHOST.uHostGSBase + 4h] ; uHostGSBase - Hi + mov ecx, MSR_K8_GS_BASE + wrmsr + +.restore_flags: + popfq + +.restore_success: + mov eax, VINF_SUCCESS + %ifndef ASM_CALL64_GCC + ; Restore RDI and RSI on MSC. + mov rdi, r10 + mov rsi, r11 + %endif +%else ; RT_ARCH_X86 + mov eax, VERR_NOT_IMPLEMENTED +%endif + ret +ENDPROC VMXRestoreHostState + + +;/** +; * Dispatches an NMI to the host. +; */ +ALIGNCODE(16) +BEGINPROC VMXDispatchHostNmi + int 2 ; NMI is always vector 2. The IDT[2] IRQ handler cannot be anything else. See Intel spec. 6.3.1 "External Interrupts". + ret +ENDPROC VMXDispatchHostNmi + + +;/** +; * Executes VMWRITE, 64-bit value. +; * +; * @returns VBox status code. +; * @param idxField x86: [ebp + 08h] msc: rcx gcc: rdi VMCS index. +; * @param u64Data x86: [ebp + 0ch] msc: rdx gcc: rsi VM field value. +; */ +ALIGNCODE(16) +BEGINPROC VMXWriteVmcs64 +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + and edi, 0ffffffffh + xor rax, rax + vmwrite rdi, rsi + %else + and ecx, 0ffffffffh + xor rax, rax + vmwrite rcx, rdx + %endif +%else ; RT_ARCH_X86 + mov ecx, [esp + 4] ; idxField + lea edx, [esp + 8] ; &u64Data + vmwrite ecx, [edx] ; low dword + jz .done + jc .done + inc ecx + xor eax, eax + vmwrite ecx, [edx + 4] ; high dword +.done: +%endif ; RT_ARCH_X86 + jnc .valid_vmcs + mov eax, VERR_VMX_INVALID_VMCS_PTR + ret +.valid_vmcs: + jnz .the_end + mov eax, VERR_VMX_INVALID_VMCS_FIELD +.the_end: + ret +ENDPROC VMXWriteVmcs64 + + +;/** +; * Executes VMREAD, 64-bit value. +; * +; * @returns VBox status code. +; * @param idxField VMCS index. +; * @param pData Where to store VM field value. +; */ +;DECLASM(int) VMXReadVmcs64(uint32_t idxField, uint64_t *pData); +ALIGNCODE(16) +BEGINPROC VMXReadVmcs64 +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + and edi, 0ffffffffh + xor rax, rax + vmread [rsi], rdi + %else + and ecx, 0ffffffffh + xor rax, rax + vmread [rdx], rcx + %endif +%else ; RT_ARCH_X86 + mov ecx, [esp + 4] ; idxField + mov edx, [esp + 8] ; pData + vmread [edx], ecx ; low dword + jz .done + jc .done + inc ecx + xor eax, eax + vmread [edx + 4], ecx ; high dword +.done: +%endif ; RT_ARCH_X86 + jnc .valid_vmcs + mov eax, VERR_VMX_INVALID_VMCS_PTR + ret +.valid_vmcs: + jnz .the_end + mov eax, VERR_VMX_INVALID_VMCS_FIELD +.the_end: + ret +ENDPROC VMXReadVmcs64 + + +;/** +; * Executes VMREAD, 32-bit value. +; * +; * @returns VBox status code. +; * @param idxField VMCS index. +; * @param pu32Data Where to store VM field value. +; */ +;DECLASM(int) VMXReadVmcs32(uint32_t idxField, uint32_t *pu32Data); +ALIGNCODE(16) +BEGINPROC VMXReadVmcs32 +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + and edi, 0ffffffffh + xor rax, rax + vmread r10, rdi + mov [rsi], r10d + %else + and ecx, 0ffffffffh + xor rax, rax + vmread r10, rcx + mov [rdx], r10d + %endif +%else ; RT_ARCH_X86 + mov ecx, [esp + 4] ; idxField + mov edx, [esp + 8] ; pu32Data + xor eax, eax + vmread [edx], ecx +%endif ; RT_ARCH_X86 + jnc .valid_vmcs + mov eax, VERR_VMX_INVALID_VMCS_PTR + ret +.valid_vmcs: + jnz .the_end + mov eax, VERR_VMX_INVALID_VMCS_FIELD +.the_end: + ret +ENDPROC VMXReadVmcs32 + + +;/** +; * Executes VMWRITE, 32-bit value. +; * +; * @returns VBox status code. +; * @param idxField VMCS index. +; * @param u32Data Where to store VM field value. +; */ +;DECLASM(int) VMXWriteVmcs32(uint32_t idxField, uint32_t u32Data); +ALIGNCODE(16) +BEGINPROC VMXWriteVmcs32 +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + and edi, 0ffffffffh + and esi, 0ffffffffh + xor rax, rax + vmwrite rdi, rsi + %else + and ecx, 0ffffffffh + and edx, 0ffffffffh + xor rax, rax + vmwrite rcx, rdx + %endif +%else ; RT_ARCH_X86 + mov ecx, [esp + 4] ; idxField + mov edx, [esp + 8] ; u32Data + xor eax, eax + vmwrite ecx, edx +%endif ; RT_ARCH_X86 + jnc .valid_vmcs + mov eax, VERR_VMX_INVALID_VMCS_PTR + ret +.valid_vmcs: + jnz .the_end + mov eax, VERR_VMX_INVALID_VMCS_FIELD +.the_end: + ret +ENDPROC VMXWriteVmcs32 + + +;/** +; * Executes VMXON. +; * +; * @returns VBox status code. +; * @param HCPhysVMXOn Physical address of VMXON structure. +; */ +;DECLASM(int) VMXEnable(RTHCPHYS HCPhysVMXOn); +BEGINPROC VMXEnable +%ifdef RT_ARCH_AMD64 + xor rax, rax + %ifdef ASM_CALL64_GCC + push rdi + %else + push rcx + %endif + vmxon [rsp] +%else ; RT_ARCH_X86 + xor eax, eax + vmxon [esp + 4] +%endif ; RT_ARCH_X86 + jnc .good + mov eax, VERR_VMX_INVALID_VMXON_PTR + jmp .the_end + +.good: + jnz .the_end + mov eax, VERR_VMX_VMXON_FAILED + +.the_end: +%ifdef RT_ARCH_AMD64 + add rsp, 8 +%endif + ret +ENDPROC VMXEnable + + +;/** +; * Executes VMXOFF. +; */ +;DECLASM(void) VMXDisable(void); +BEGINPROC VMXDisable + vmxoff +.the_end: + ret +ENDPROC VMXDisable + + +;/** +; * Executes VMCLEAR. +; * +; * @returns VBox status code. +; * @param HCPhysVmcs Physical address of VM control structure. +; */ +;DECLASM(int) VMXClearVmcs(RTHCPHYS HCPhysVmcs); +ALIGNCODE(16) +BEGINPROC VMXClearVmcs +%ifdef RT_ARCH_AMD64 + xor rax, rax + %ifdef ASM_CALL64_GCC + push rdi + %else + push rcx + %endif + vmclear [rsp] +%else ; RT_ARCH_X86 + xor eax, eax + vmclear [esp + 4] +%endif ; RT_ARCH_X86 + jnc .the_end + mov eax, VERR_VMX_INVALID_VMCS_PTR +.the_end: +%ifdef RT_ARCH_AMD64 + add rsp, 8 +%endif + ret +ENDPROC VMXClearVmcs + + +;/** +; * Executes VMPTRLD. +; * +; * @returns VBox status code. +; * @param HCPhysVmcs Physical address of VMCS structure. +; */ +;DECLASM(int) VMXActivateVmcs(RTHCPHYS HCPhysVmcs); +ALIGNCODE(16) +BEGINPROC VMXActivateVmcs +%ifdef RT_ARCH_AMD64 + xor rax, rax + %ifdef ASM_CALL64_GCC + push rdi + %else + push rcx + %endif + vmptrld [rsp] +%else + xor eax, eax + vmptrld [esp + 4] +%endif + jnc .the_end + mov eax, VERR_VMX_INVALID_VMCS_PTR +.the_end: +%ifdef RT_ARCH_AMD64 + add rsp, 8 +%endif + ret +ENDPROC VMXActivateVmcs + + +;/** +; * Executes VMPTRST. +; * +; * @returns VBox status code. +; * @param [esp + 04h] gcc:rdi msc:rcx Param 1 - First parameter - Address that will receive the current pointer. +; */ +;DECLASM(int) VMXGetActivatedVmcs(RTHCPHYS *pVMCS); +BEGINPROC VMXGetActivatedVmcs +%ifdef RT_OS_OS2 + mov eax, VERR_NOT_SUPPORTED + ret +%else + %ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + vmptrst qword [rdi] + %else + vmptrst qword [rcx] + %endif + %else + vmptrst qword [esp+04h] + %endif + xor eax, eax +.the_end: + ret +%endif +ENDPROC VMXGetActivatedVmcs + +;/** +; * Invalidate a page using INVEPT. +; @param enmTlbFlush msc:ecx gcc:edi x86:[esp+04] Type of flush. +; @param pDescriptor msc:edx gcc:esi x86:[esp+08] Descriptor pointer. +; */ +;DECLASM(int) VMXR0InvEPT(VMXTLBFLUSHEPT enmTlbFlush, uint64_t *pDescriptor); +BEGINPROC VMXR0InvEPT +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + and edi, 0ffffffffh + xor rax, rax +; invept rdi, qword [rsi] + DB 0x66, 0x0F, 0x38, 0x80, 0x3E + %else + and ecx, 0ffffffffh + xor rax, rax +; invept rcx, qword [rdx] + DB 0x66, 0x0F, 0x38, 0x80, 0xA + %endif +%else + mov ecx, [esp + 4] + mov edx, [esp + 8] + xor eax, eax +; invept ecx, qword [edx] + DB 0x66, 0x0F, 0x38, 0x80, 0xA +%endif + jnc .valid_vmcs + mov eax, VERR_VMX_INVALID_VMCS_PTR + ret +.valid_vmcs: + jnz .the_end + mov eax, VERR_INVALID_PARAMETER +.the_end: + ret +ENDPROC VMXR0InvEPT + + +;/** +; * Invalidate a page using invvpid +; @param enmTlbFlush msc:ecx gcc:edi x86:[esp+04] Type of flush +; @param pDescriptor msc:edx gcc:esi x86:[esp+08] Descriptor pointer +; */ +;DECLASM(int) VMXR0InvVPID(VMXTLBFLUSHVPID enmTlbFlush, uint64_t *pDescriptor); +BEGINPROC VMXR0InvVPID +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + and edi, 0ffffffffh + xor rax, rax +; invvpid rdi, qword [rsi] + DB 0x66, 0x0F, 0x38, 0x81, 0x3E + %else + and ecx, 0ffffffffh + xor rax, rax +; invvpid rcx, qword [rdx] + DB 0x66, 0x0F, 0x38, 0x81, 0xA + %endif +%else + mov ecx, [esp + 4] + mov edx, [esp + 8] + xor eax, eax +; invvpid ecx, qword [edx] + DB 0x66, 0x0F, 0x38, 0x81, 0xA +%endif + jnc .valid_vmcs + mov eax, VERR_VMX_INVALID_VMCS_PTR + ret +.valid_vmcs: + jnz .the_end + mov eax, VERR_INVALID_PARAMETER +.the_end: + ret +ENDPROC VMXR0InvVPID + + +%if GC_ARCH_BITS == 64 +;; +; Executes INVLPGA +; +; @param pPageGC msc:rcx gcc:rdi x86:[esp+04] Virtual page to invalidate +; @param uASID msc:rdx gcc:rsi x86:[esp+0C] Tagged TLB id +; +;DECLASM(void) SVMR0InvlpgA(RTGCPTR pPageGC, uint32_t uASID); +BEGINPROC SVMR0InvlpgA +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + mov rax, rdi + mov rcx, rsi + %else + mov rax, rcx + mov rcx, rdx + %endif +%else + mov eax, [esp + 4] + mov ecx, [esp + 0Ch] +%endif + invlpga [xAX], ecx + ret +ENDPROC SVMR0InvlpgA + +%else ; GC_ARCH_BITS != 64 +;; +; Executes INVLPGA +; +; @param pPageGC msc:ecx gcc:edi x86:[esp+04] Virtual page to invalidate +; @param uASID msc:edx gcc:esi x86:[esp+08] Tagged TLB id +; +;DECLASM(void) SVMR0InvlpgA(RTGCPTR pPageGC, uint32_t uASID); +BEGINPROC SVMR0InvlpgA +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + movzx rax, edi + mov ecx, esi + %else + ; from http://www.cs.cmu.edu/~fp/courses/15213-s06/misc/asm64-handout.pdf: + ; ``Perhaps unexpectedly, instructions that move or generate 32-bit register + ; values also set the upper 32 bits of the register to zero. Consequently + ; there is no need for an instruction movzlq.'' + mov eax, ecx + mov ecx, edx + %endif +%else + mov eax, [esp + 4] + mov ecx, [esp + 8] +%endif + invlpga [xAX], ecx + ret +ENDPROC SVMR0InvlpgA + +%endif ; GC_ARCH_BITS != 64 + + +%ifdef VBOX_WITH_KERNEL_USING_XMM + +;; +; Wrapper around vmx.pfnStartVM that preserves host XMM registers and +; load the guest ones when necessary. +; +; @cproto DECLASM(int) HMR0VMXStartVMhmR0DumpDescriptorM(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, +; PVMCPU pVCpu, PFNHMVMXSTARTVM pfnStartVM); +; +; @returns eax +; +; @param fResumeVM msc:rcx +; @param pCtx msc:rdx +; @param pVMCSCache msc:r8 +; @param pVM msc:r9 +; @param pVCpu msc:[rbp+30h] The cross context virtual CPU structure of the calling EMT. +; @param pfnStartVM msc:[rbp+38h] +; +; @remarks This is essentially the same code as hmR0SVMRunWrapXMM, only the parameters differ a little bit. +; +; @remarks Drivers shouldn't use AVX registers without saving+loading: +; https://msdn.microsoft.com/en-us/library/windows/hardware/ff545910%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396 +; However the compiler docs have different idea: +; https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx +; We'll go with the former for now. +; +; ASSUMING 64-bit and windows for now. +; +ALIGNCODE(16) +BEGINPROC hmR0VMXStartVMWrapXMM + push xBP + mov xBP, xSP + sub xSP, 0b0h + 040h ; Don't bother optimizing the frame size. + + ; spill input parameters. + mov [xBP + 010h], rcx ; fResumeVM + mov [xBP + 018h], rdx ; pCtx + mov [xBP + 020h], r8 ; pVMCSCache + mov [xBP + 028h], r9 ; pVM + + ; Ask CPUM whether we've started using the FPU yet. + mov rcx, [xBP + 30h] ; pVCpu + call NAME(CPUMIsGuestFPUStateActive) + test al, al + jnz .guest_fpu_state_active + + ; No need to mess with XMM registers just call the start routine and return. + mov r11, [xBP + 38h] ; pfnStartVM + mov r10, [xBP + 30h] ; pVCpu + mov [xSP + 020h], r10 + mov rcx, [xBP + 010h] ; fResumeVM + mov rdx, [xBP + 018h] ; pCtx + mov r8, [xBP + 020h] ; pVMCSCache + mov r9, [xBP + 028h] ; pVM + call r11 + + leave + ret + +ALIGNCODE(8) +.guest_fpu_state_active: + ; Save the non-volatile host XMM registers. + movdqa [rsp + 040h + 000h], xmm6 + movdqa [rsp + 040h + 010h], xmm7 + movdqa [rsp + 040h + 020h], xmm8 + movdqa [rsp + 040h + 030h], xmm9 + movdqa [rsp + 040h + 040h], xmm10 + movdqa [rsp + 040h + 050h], xmm11 + movdqa [rsp + 040h + 060h], xmm12 + movdqa [rsp + 040h + 070h], xmm13 + movdqa [rsp + 040h + 080h], xmm14 + movdqa [rsp + 040h + 090h], xmm15 + stmxcsr [rsp + 040h + 0a0h] + + mov r10, [xBP + 018h] ; pCtx + mov eax, [r10 + CPUMCTX.fXStateMask] + test eax, eax + jz .guest_fpu_state_manually + + ; + ; Using XSAVE to load the guest XMM, YMM and ZMM registers. + ; + and eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS + xor edx, edx + mov r10, [r10 + CPUMCTX.pXStateR0] + xrstor [r10] + + ; Make the call (same as in the other case ). + mov r11, [xBP + 38h] ; pfnStartVM + mov r10, [xBP + 30h] ; pVCpu + mov [xSP + 020h], r10 + mov rcx, [xBP + 010h] ; fResumeVM + mov rdx, [xBP + 018h] ; pCtx + mov r8, [xBP + 020h] ; pVMCSCache + mov r9, [xBP + 028h] ; pVM + call r11 + + mov r11d, eax ; save return value (xsave below uses eax) + + ; Save the guest XMM registers. + mov r10, [xBP + 018h] ; pCtx + mov eax, [r10 + CPUMCTX.fXStateMask] + and eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS + xor edx, edx + mov r10, [r10 + CPUMCTX.pXStateR0] + xsave [r10] + + mov eax, r11d ; restore return value. + +.restore_non_volatile_host_xmm_regs: + ; Load the non-volatile host XMM registers. + movdqa xmm6, [rsp + 040h + 000h] + movdqa xmm7, [rsp + 040h + 010h] + movdqa xmm8, [rsp + 040h + 020h] + movdqa xmm9, [rsp + 040h + 030h] + movdqa xmm10, [rsp + 040h + 040h] + movdqa xmm11, [rsp + 040h + 050h] + movdqa xmm12, [rsp + 040h + 060h] + movdqa xmm13, [rsp + 040h + 070h] + movdqa xmm14, [rsp + 040h + 080h] + movdqa xmm15, [rsp + 040h + 090h] + ldmxcsr [rsp + 040h + 0a0h] + leave + ret + + ; + ; No XSAVE, load and save the guest XMM registers manually. + ; +.guest_fpu_state_manually: + ; Load the full guest XMM register state. + mov r10, [r10 + CPUMCTX.pXStateR0] + movdqa xmm0, [r10 + XMM_OFF_IN_X86FXSTATE + 000h] + movdqa xmm1, [r10 + XMM_OFF_IN_X86FXSTATE + 010h] + movdqa xmm2, [r10 + XMM_OFF_IN_X86FXSTATE + 020h] + movdqa xmm3, [r10 + XMM_OFF_IN_X86FXSTATE + 030h] + movdqa xmm4, [r10 + XMM_OFF_IN_X86FXSTATE + 040h] + movdqa xmm5, [r10 + XMM_OFF_IN_X86FXSTATE + 050h] + movdqa xmm6, [r10 + XMM_OFF_IN_X86FXSTATE + 060h] + movdqa xmm7, [r10 + XMM_OFF_IN_X86FXSTATE + 070h] + movdqa xmm8, [r10 + XMM_OFF_IN_X86FXSTATE + 080h] + movdqa xmm9, [r10 + XMM_OFF_IN_X86FXSTATE + 090h] + movdqa xmm10, [r10 + XMM_OFF_IN_X86FXSTATE + 0a0h] + movdqa xmm11, [r10 + XMM_OFF_IN_X86FXSTATE + 0b0h] + movdqa xmm12, [r10 + XMM_OFF_IN_X86FXSTATE + 0c0h] + movdqa xmm13, [r10 + XMM_OFF_IN_X86FXSTATE + 0d0h] + movdqa xmm14, [r10 + XMM_OFF_IN_X86FXSTATE + 0e0h] + movdqa xmm15, [r10 + XMM_OFF_IN_X86FXSTATE + 0f0h] + ldmxcsr [r10 + X86FXSTATE.MXCSR] + + ; Make the call (same as in the other case ). + mov r11, [xBP + 38h] ; pfnStartVM + mov r10, [xBP + 30h] ; pVCpu + mov [xSP + 020h], r10 + mov rcx, [xBP + 010h] ; fResumeVM + mov rdx, [xBP + 018h] ; pCtx + mov r8, [xBP + 020h] ; pVMCSCache + mov r9, [xBP + 028h] ; pVM + call r11 + + ; Save the guest XMM registers. + mov r10, [xBP + 018h] ; pCtx + mov r10, [r10 + CPUMCTX.pXStateR0] + stmxcsr [r10 + X86FXSTATE.MXCSR] + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 000h], xmm0 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 010h], xmm1 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 020h], xmm2 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 030h], xmm3 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 040h], xmm4 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 050h], xmm5 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 060h], xmm6 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 070h], xmm7 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 080h], xmm8 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 090h], xmm9 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0a0h], xmm10 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0b0h], xmm11 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0c0h], xmm12 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0d0h], xmm13 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0e0h], xmm14 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0f0h], xmm15 + jmp .restore_non_volatile_host_xmm_regs +ENDPROC hmR0VMXStartVMWrapXMM + +;; +; Wrapper around svm.pfnVMRun that preserves host XMM registers and +; load the guest ones when necessary. +; +; @cproto DECLASM(int) hmR0SVMRunWrapXMM(RTHCPHYS HCPhysVmcbHost, RTHCPHYS HCPhysVmcb, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu, +; PFNHMSVMVMRUN pfnVMRun); +; +; @returns eax +; +; @param HCPhysVmcbHost msc:rcx +; @param HCPhysVmcb msc:rdx +; @param pCtx msc:r8 +; @param pVM msc:r9 +; @param pVCpu msc:[rbp+30h] The cross context virtual CPU structure of the calling EMT. +; @param pfnVMRun msc:[rbp+38h] +; +; @remarks This is essentially the same code as hmR0VMXStartVMWrapXMM, only the parameters differ a little bit. +; +; @remarks Drivers shouldn't use AVX registers without saving+loading: +; https://msdn.microsoft.com/en-us/library/windows/hardware/ff545910%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396 +; However the compiler docs have different idea: +; https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx +; We'll go with the former for now. +; +; ASSUMING 64-bit and windows for now. +ALIGNCODE(16) +BEGINPROC hmR0SVMRunWrapXMM + push xBP + mov xBP, xSP + sub xSP, 0b0h + 040h ; Don't bother optimizing the frame size. + + ; spill input parameters. + mov [xBP + 010h], rcx ; HCPhysVmcbHost + mov [xBP + 018h], rdx ; HCPhysVmcb + mov [xBP + 020h], r8 ; pCtx + mov [xBP + 028h], r9 ; pVM + + ; Ask CPUM whether we've started using the FPU yet. + mov rcx, [xBP + 30h] ; pVCpu + call NAME(CPUMIsGuestFPUStateActive) + test al, al + jnz .guest_fpu_state_active + + ; No need to mess with XMM registers just call the start routine and return. + mov r11, [xBP + 38h] ; pfnVMRun + mov r10, [xBP + 30h] ; pVCpu + mov [xSP + 020h], r10 + mov rcx, [xBP + 010h] ; HCPhysVmcbHost + mov rdx, [xBP + 018h] ; HCPhysVmcb + mov r8, [xBP + 020h] ; pCtx + mov r9, [xBP + 028h] ; pVM + call r11 + + leave + ret + +ALIGNCODE(8) +.guest_fpu_state_active: + ; Save the non-volatile host XMM registers. + movdqa [rsp + 040h + 000h], xmm6 + movdqa [rsp + 040h + 010h], xmm7 + movdqa [rsp + 040h + 020h], xmm8 + movdqa [rsp + 040h + 030h], xmm9 + movdqa [rsp + 040h + 040h], xmm10 + movdqa [rsp + 040h + 050h], xmm11 + movdqa [rsp + 040h + 060h], xmm12 + movdqa [rsp + 040h + 070h], xmm13 + movdqa [rsp + 040h + 080h], xmm14 + movdqa [rsp + 040h + 090h], xmm15 + stmxcsr [rsp + 040h + 0a0h] + + mov r10, [xBP + 020h] ; pCtx + mov eax, [r10 + CPUMCTX.fXStateMask] + test eax, eax + jz .guest_fpu_state_manually + + ; + ; Using XSAVE. + ; + and eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS + xor edx, edx + mov r10, [r10 + CPUMCTX.pXStateR0] + xrstor [r10] + + ; Make the call (same as in the other case ). + mov r11, [xBP + 38h] ; pfnVMRun + mov r10, [xBP + 30h] ; pVCpu + mov [xSP + 020h], r10 + mov rcx, [xBP + 010h] ; HCPhysVmcbHost + mov rdx, [xBP + 018h] ; HCPhysVmcb + mov r8, [xBP + 020h] ; pCtx + mov r9, [xBP + 028h] ; pVM + call r11 + + mov r11d, eax ; save return value (xsave below uses eax) + + ; Save the guest XMM registers. + mov r10, [xBP + 020h] ; pCtx + mov eax, [r10 + CPUMCTX.fXStateMask] + and eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS + xor edx, edx + mov r10, [r10 + CPUMCTX.pXStateR0] + xsave [r10] + + mov eax, r11d ; restore return value. + +.restore_non_volatile_host_xmm_regs: + ; Load the non-volatile host XMM registers. + movdqa xmm6, [rsp + 040h + 000h] + movdqa xmm7, [rsp + 040h + 010h] + movdqa xmm8, [rsp + 040h + 020h] + movdqa xmm9, [rsp + 040h + 030h] + movdqa xmm10, [rsp + 040h + 040h] + movdqa xmm11, [rsp + 040h + 050h] + movdqa xmm12, [rsp + 040h + 060h] + movdqa xmm13, [rsp + 040h + 070h] + movdqa xmm14, [rsp + 040h + 080h] + movdqa xmm15, [rsp + 040h + 090h] + ldmxcsr [rsp + 040h + 0a0h] + leave + ret + + ; + ; No XSAVE, load and save the guest XMM registers manually. + ; +.guest_fpu_state_manually: + ; Load the full guest XMM register state. + mov r10, [r10 + CPUMCTX.pXStateR0] + movdqa xmm0, [r10 + XMM_OFF_IN_X86FXSTATE + 000h] + movdqa xmm1, [r10 + XMM_OFF_IN_X86FXSTATE + 010h] + movdqa xmm2, [r10 + XMM_OFF_IN_X86FXSTATE + 020h] + movdqa xmm3, [r10 + XMM_OFF_IN_X86FXSTATE + 030h] + movdqa xmm4, [r10 + XMM_OFF_IN_X86FXSTATE + 040h] + movdqa xmm5, [r10 + XMM_OFF_IN_X86FXSTATE + 050h] + movdqa xmm6, [r10 + XMM_OFF_IN_X86FXSTATE + 060h] + movdqa xmm7, [r10 + XMM_OFF_IN_X86FXSTATE + 070h] + movdqa xmm8, [r10 + XMM_OFF_IN_X86FXSTATE + 080h] + movdqa xmm9, [r10 + XMM_OFF_IN_X86FXSTATE + 090h] + movdqa xmm10, [r10 + XMM_OFF_IN_X86FXSTATE + 0a0h] + movdqa xmm11, [r10 + XMM_OFF_IN_X86FXSTATE + 0b0h] + movdqa xmm12, [r10 + XMM_OFF_IN_X86FXSTATE + 0c0h] + movdqa xmm13, [r10 + XMM_OFF_IN_X86FXSTATE + 0d0h] + movdqa xmm14, [r10 + XMM_OFF_IN_X86FXSTATE + 0e0h] + movdqa xmm15, [r10 + XMM_OFF_IN_X86FXSTATE + 0f0h] + ldmxcsr [r10 + X86FXSTATE.MXCSR] + + ; Make the call (same as in the other case ). + mov r11, [xBP + 38h] ; pfnVMRun + mov r10, [xBP + 30h] ; pVCpu + mov [xSP + 020h], r10 + mov rcx, [xBP + 010h] ; HCPhysVmcbHost + mov rdx, [xBP + 018h] ; HCPhysVmcb + mov r8, [xBP + 020h] ; pCtx + mov r9, [xBP + 028h] ; pVM + call r11 + + ; Save the guest XMM registers. + mov r10, [xBP + 020h] ; pCtx + mov r10, [r10 + CPUMCTX.pXStateR0] + stmxcsr [r10 + X86FXSTATE.MXCSR] + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 000h], xmm0 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 010h], xmm1 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 020h], xmm2 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 030h], xmm3 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 040h], xmm4 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 050h], xmm5 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 060h], xmm6 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 070h], xmm7 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 080h], xmm8 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 090h], xmm9 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0a0h], xmm10 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0b0h], xmm11 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0c0h], xmm12 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0d0h], xmm13 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0e0h], xmm14 + movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0f0h], xmm15 + jmp .restore_non_volatile_host_xmm_regs +ENDPROC hmR0SVMRunWrapXMM + +%endif ; VBOX_WITH_KERNEL_USING_XMM + + +;; @def RESTORE_STATE_VM32 +; Macro restoring essential host state and updating guest state +; for common host, 32-bit guest for VT-x. +%macro RESTORE_STATE_VM32 0 + ; Restore base and limit of the IDTR & GDTR. + %ifndef VMX_SKIP_IDTR + lidt [xSP] + add xSP, xCB * 2 + %endif + %ifndef VMX_SKIP_GDTR + lgdt [xSP] + add xSP, xCB * 2 + %endif + + push xDI + %ifndef VMX_SKIP_TR + mov xDI, [xSP + xCB * 3] ; pCtx (*3 to skip the saved xDI, TR, LDTR). + %else + mov xDI, [xSP + xCB * 2] ; pCtx (*2 to skip the saved xDI, LDTR). + %endif + + mov [ss:xDI + CPUMCTX.eax], eax + mov xAX, SPECTRE_FILLER + mov [ss:xDI + CPUMCTX.ebx], ebx + mov xBX, xAX + mov [ss:xDI + CPUMCTX.ecx], ecx + mov xCX, xAX + mov [ss:xDI + CPUMCTX.edx], edx + mov xDX, xAX + mov [ss:xDI + CPUMCTX.esi], esi + mov xSI, xAX + mov [ss:xDI + CPUMCTX.ebp], ebp + mov xBP, xAX + mov xAX, cr2 + mov [ss:xDI + CPUMCTX.cr2], xAX + + %ifdef RT_ARCH_AMD64 + pop xAX ; The guest edi we pushed above. + mov dword [ss:xDI + CPUMCTX.edi], eax + %else + pop dword [ss:xDI + CPUMCTX.edi] ; The guest edi we pushed above. + %endif + + ; Fight spectre. + INDIRECT_BRANCH_PREDICTION_BARRIER ss:xDI, CPUMCTX_WSF_IBPB_EXIT + + %ifndef VMX_SKIP_TR + ; Restore TSS selector; must mark it as not busy before using ltr (!) + ; ASSUME that this is supposed to be 'BUSY'. (saves 20-30 ticks on the T42p) + ; @todo get rid of sgdt + pop xBX ; Saved TR + sub xSP, xCB * 2 + sgdt [xSP] + mov xAX, xBX + and eax, X86_SEL_MASK_OFF_RPL ; Mask away TI and RPL bits leaving only the descriptor offset. + add xAX, [xSP + 2] ; eax <- GDTR.address + descriptor offset. + and dword [ss:xAX + 4], ~RT_BIT(9) ; Clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit). + ltr bx + add xSP, xCB * 2 + %endif + + pop xAX ; Saved LDTR + %ifdef RT_ARCH_AMD64 + cmp eax, 0 + je %%skip_ldt_write32 + %endif + lldt ax + +%%skip_ldt_write32: + add xSP, xCB ; pCtx + + %ifdef VMX_USE_CACHED_VMCS_ACCESSES + pop xDX ; Saved pCache + + ; Note! If we get here as a result of invalid VMCS pointer, all the following + ; vmread's will fail (only eflags.cf=1 will be set) but that shouldn't cause any + ; trouble only just less efficient. + mov ecx, [ss:xDX + VMCSCACHE.Read.cValidEntries] + cmp ecx, 0 ; Can't happen + je %%no_cached_read32 + jmp %%cached_read32 + +ALIGN(16) +%%cached_read32: + dec xCX + mov eax, [ss:xDX + VMCSCACHE.Read.aField + xCX * 4] + ; Note! This leaves the high 32 bits of the cache entry unmodified!! + vmread [ss:xDX + VMCSCACHE.Read.aFieldVal + xCX * 8], xAX + cmp xCX, 0 + jnz %%cached_read32 +%%no_cached_read32: + %endif + + ; Restore segment registers. + MYPOPSEGS xAX, ax + + ; Restore the host XCR0 if necessary. + pop xCX + test ecx, ecx + jnz %%xcr0_after_skip + pop xAX + pop xDX + xsetbv ; ecx is already zero. +%%xcr0_after_skip: + + ; Restore general purpose registers. + MYPOPAD +%endmacro + + +;; +; Prepares for and executes VMLAUNCH/VMRESUME (32 bits guest mode) +; +; @returns VBox status code +; @param fResume x86:[ebp+8], msc:rcx,gcc:rdi Whether to use vmlauch/vmresume. +; @param pCtx x86:[ebp+c], msc:rdx,gcc:rsi Pointer to the guest-CPU context. +; @param pCache x86:[ebp+10],msc:r8, gcc:rdx Pointer to the VMCS cache. +; @param pVM x86:[ebp+14],msc:r9, gcc:rcx The cross context VM structure. +; @param pVCpu x86:[ebp+18],msc:[ebp+30],gcc:r8 The cross context virtual CPU structure of the calling EMT. +; +ALIGNCODE(16) +BEGINPROC VMXR0StartVM32 + push xBP + mov xBP, xSP + + pushf + cli + + ; + ; Save all general purpose host registers. + ; + MYPUSHAD + + ; + ; First we have to write some final guest CPU context registers. + ; + mov eax, VMX_VMCS_HOST_RIP +%ifdef RT_ARCH_AMD64 + lea r10, [.vmlaunch_done wrt rip] + vmwrite rax, r10 +%else + mov ecx, .vmlaunch_done + vmwrite eax, ecx +%endif + ; Note: assumes success! + + ; + ; Unify input parameter registers. + ; +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + ; fResume already in rdi + ; pCtx already in rsi + mov rbx, rdx ; pCache + %else + mov rdi, rcx ; fResume + mov rsi, rdx ; pCtx + mov rbx, r8 ; pCache + %endif +%else + mov edi, [ebp + 8] ; fResume + mov esi, [ebp + 12] ; pCtx + mov ebx, [ebp + 16] ; pCache +%endif + + ; + ; Save the host XCR0 and load the guest one if necessary. + ; Note! Trashes rdx and rcx. + ; +%ifdef ASM_CALL64_MSC + mov rax, [xBP + 30h] ; pVCpu +%elifdef ASM_CALL64_GCC + mov rax, r8 ; pVCpu +%else + mov eax, [xBP + 18h] ; pVCpu +%endif + test byte [xAX + VMCPU.hm + HMCPU.fLoadSaveGuestXcr0], 1 + jz .xcr0_before_skip + + xor ecx, ecx + xgetbv ; Save the host one on the stack. + push xDX + push xAX + + mov eax, [xSI + CPUMCTX.aXcr] ; Load the guest one. + mov edx, [xSI + CPUMCTX.aXcr + 4] + xor ecx, ecx ; paranoia + xsetbv + + push 0 ; Indicate that we must restore XCR0 (popped into ecx, thus 0). + jmp .xcr0_before_done + +.xcr0_before_skip: + push 3fh ; indicate that we need not. +.xcr0_before_done: + + ; + ; Save segment registers. + ; Note! Trashes rdx & rcx, so we moved it here (amd64 case). + ; + MYPUSHSEGS xAX, ax + +%ifdef VMX_USE_CACHED_VMCS_ACCESSES + mov ecx, [xBX + VMCSCACHE.Write.cValidEntries] + cmp ecx, 0 + je .no_cached_writes + mov edx, ecx + mov ecx, 0 + jmp .cached_write + +ALIGN(16) +.cached_write: + mov eax, [xBX + VMCSCACHE.Write.aField + xCX * 4] + vmwrite xAX, [xBX + VMCSCACHE.Write.aFieldVal + xCX * 8] + inc xCX + cmp xCX, xDX + jl .cached_write + + mov dword [xBX + VMCSCACHE.Write.cValidEntries], 0 +.no_cached_writes: + + ; Save the pCache pointer. + push xBX +%endif + + ; Save the pCtx pointer. + push xSI + + ; Save host LDTR. + xor eax, eax + sldt ax + push xAX + +%ifndef VMX_SKIP_TR + ; The host TR limit is reset to 0x67; save & restore it manually. + str eax + push xAX +%endif + +%ifndef VMX_SKIP_GDTR + ; VT-x only saves the base of the GDTR & IDTR and resets the limit to 0xffff; we must restore the limit correctly! + sub xSP, xCB * 2 + sgdt [xSP] +%endif +%ifndef VMX_SKIP_IDTR + sub xSP, xCB * 2 + sidt [xSP] +%endif + + ; Load CR2 if necessary (may be expensive as writing CR2 is a synchronizing instruction). + mov xBX, [xSI + CPUMCTX.cr2] + mov xDX, cr2 + cmp xBX, xDX + je .skip_cr2_write32 + mov cr2, xBX + +.skip_cr2_write32: + mov eax, VMX_VMCS_HOST_RSP + vmwrite xAX, xSP + ; Note: assumes success! + ; Don't mess with ESP anymore!!! + + ; Fight spectre and similar. + INDIRECT_BRANCH_PREDICTION_AND_L1_CACHE_BARRIER xSI, CPUMCTX_WSF_IBPB_ENTRY, CPUMCTX_WSF_L1D_ENTRY + + ; Load guest general purpose registers. + mov eax, [xSI + CPUMCTX.eax] + mov ebx, [xSI + CPUMCTX.ebx] + mov ecx, [xSI + CPUMCTX.ecx] + mov edx, [xSI + CPUMCTX.edx] + mov ebp, [xSI + CPUMCTX.ebp] + + ; Resume or start VM? + cmp xDI, 0 ; fResume + + ; Load guest edi & esi. + mov edi, [xSI + CPUMCTX.edi] + mov esi, [xSI + CPUMCTX.esi] + + je .vmlaunch_launch + + vmresume + jc near .vmxstart_invalid_vmcs_ptr + jz near .vmxstart_start_failed + jmp .vmlaunch_done; ; Here if vmresume detected a failure. + +.vmlaunch_launch: + vmlaunch + jc near .vmxstart_invalid_vmcs_ptr + jz near .vmxstart_start_failed + jmp .vmlaunch_done; ; Here if vmlaunch detected a failure. + +ALIGNCODE(16) ;; @todo YASM BUG - this alignment is wrong on darwin, it's 1 byte off. +.vmlaunch_done: + RESTORE_STATE_VM32 + mov eax, VINF_SUCCESS + +.vmstart_end: + popf + pop xBP + ret + +.vmxstart_invalid_vmcs_ptr: + RESTORE_STATE_VM32 + mov eax, VERR_VMX_INVALID_VMCS_PTR_TO_START_VM + jmp .vmstart_end + +.vmxstart_start_failed: + RESTORE_STATE_VM32 + mov eax, VERR_VMX_UNABLE_TO_START_VM + jmp .vmstart_end + +ENDPROC VMXR0StartVM32 + + +%ifdef RT_ARCH_AMD64 +;; @def RESTORE_STATE_VM64 +; Macro restoring essential host state and updating guest state +; for 64-bit host, 64-bit guest for VT-x. +; +%macro RESTORE_STATE_VM64 0 + ; Restore base and limit of the IDTR & GDTR + %ifndef VMX_SKIP_IDTR + lidt [xSP] + add xSP, xCB * 2 + %endif + %ifndef VMX_SKIP_GDTR + lgdt [xSP] + add xSP, xCB * 2 + %endif + + push xDI + %ifndef VMX_SKIP_TR + mov xDI, [xSP + xCB * 3] ; pCtx (*3 to skip the saved xDI, TR, LDTR) + %else + mov xDI, [xSP + xCB * 2] ; pCtx (*2 to skip the saved xDI, LDTR) + %endif + + mov qword [xDI + CPUMCTX.eax], rax + mov rax, SPECTRE_FILLER64 + mov qword [xDI + CPUMCTX.ebx], rbx + mov rbx, rax + mov qword [xDI + CPUMCTX.ecx], rcx + mov rcx, rax + mov qword [xDI + CPUMCTX.edx], rdx + mov rdx, rax + mov qword [xDI + CPUMCTX.esi], rsi + mov rsi, rax + mov qword [xDI + CPUMCTX.ebp], rbp + mov rbp, rax + mov qword [xDI + CPUMCTX.r8], r8 + mov r8, rax + mov qword [xDI + CPUMCTX.r9], r9 + mov r9, rax + mov qword [xDI + CPUMCTX.r10], r10 + mov r10, rax + mov qword [xDI + CPUMCTX.r11], r11 + mov r11, rax + mov qword [xDI + CPUMCTX.r12], r12 + mov r12, rax + mov qword [xDI + CPUMCTX.r13], r13 + mov r13, rax + mov qword [xDI + CPUMCTX.r14], r14 + mov r14, rax + mov qword [xDI + CPUMCTX.r15], r15 + mov r15, rax + mov rax, cr2 + mov qword [xDI + CPUMCTX.cr2], rax + + pop xAX ; The guest rdi we pushed above + mov qword [xDI + CPUMCTX.edi], rax + + ; Fight spectre. + INDIRECT_BRANCH_PREDICTION_BARRIER xDI, CPUMCTX_WSF_IBPB_EXIT + + %ifndef VMX_SKIP_TR + ; Restore TSS selector; must mark it as not busy before using ltr (!) + ; ASSUME that this is supposed to be 'BUSY'. (saves 20-30 ticks on the T42p). + ; @todo get rid of sgdt + pop xBX ; Saved TR + sub xSP, xCB * 2 + sgdt [xSP] + mov xAX, xBX + and eax, X86_SEL_MASK_OFF_RPL ; Mask away TI and RPL bits leaving only the descriptor offset. + add xAX, [xSP + 2] ; eax <- GDTR.address + descriptor offset. + and dword [xAX + 4], ~RT_BIT(9) ; Clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit). + ltr bx + add xSP, xCB * 2 + %endif + + pop xAX ; Saved LDTR + cmp eax, 0 + je %%skip_ldt_write64 + lldt ax + +%%skip_ldt_write64: + pop xSI ; pCtx (needed in rsi by the macros below) + + %ifdef VMX_USE_CACHED_VMCS_ACCESSES + pop xDX ; Saved pCache + + ; Note! If we get here as a result of invalid VMCS pointer, all the following + ; vmread's will fail (only eflags.cf=1 will be set) but that shouldn't cause any + ; trouble only just less efficient. + mov ecx, [xDX + VMCSCACHE.Read.cValidEntries] + cmp ecx, 0 ; Can't happen + je %%no_cached_read64 + jmp %%cached_read64 + +ALIGN(16) +%%cached_read64: + dec xCX + mov eax, [xDX + VMCSCACHE.Read.aField + xCX * 4] + vmread [xDX + VMCSCACHE.Read.aFieldVal + xCX * 8], xAX + cmp xCX, 0 + jnz %%cached_read64 +%%no_cached_read64: + %endif + + ; Restore segment registers. + MYPOPSEGS xAX, ax + + ; Restore the host XCR0 if necessary. + pop xCX + test ecx, ecx + jnz %%xcr0_after_skip + pop xAX + pop xDX + xsetbv ; ecx is already zero. +%%xcr0_after_skip: + + ; Restore general purpose registers. + MYPOPAD +%endmacro + + +;; +; Prepares for and executes VMLAUNCH/VMRESUME (64 bits guest mode) +; +; @returns VBox status code +; @param fResume msc:rcx, gcc:rdi Whether to use vmlauch/vmresume. +; @param pCtx msc:rdx, gcc:rsi Pointer to the guest-CPU context. +; @param pCache msc:r8, gcc:rdx Pointer to the VMCS cache. +; @param pVM msc:r9, gcc:rcx The cross context VM structure. +; @param pVCpu msc:[ebp+30], gcc:r8 The cross context virtual CPU structure of the calling EMT. +; +ALIGNCODE(16) +BEGINPROC VMXR0StartVM64 + push xBP + mov xBP, xSP + + pushf + cli + + ; Save all general purpose host registers. + MYPUSHAD + + ; First we have to save some final CPU context registers. + lea r10, [.vmlaunch64_done wrt rip] + mov rax, VMX_VMCS_HOST_RIP ; Return address (too difficult to continue after VMLAUNCH?). + vmwrite rax, r10 + ; Note: assumes success! + + ; + ; Unify the input parameter registers. + ; +%ifdef ASM_CALL64_GCC + ; fResume already in rdi + ; pCtx already in rsi + mov rbx, rdx ; pCache +%else + mov rdi, rcx ; fResume + mov rsi, rdx ; pCtx + mov rbx, r8 ; pCache +%endif + + ; + ; Save the host XCR0 and load the guest one if necessary. + ; Note! Trashes rdx and rcx. + ; +%ifdef ASM_CALL64_MSC + mov rax, [xBP + 30h] ; pVCpu +%else + mov rax, r8 ; pVCpu +%endif + test byte [xAX + VMCPU.hm + HMCPU.fLoadSaveGuestXcr0], 1 + jz .xcr0_before_skip + + xor ecx, ecx + xgetbv ; Save the host one on the stack. + push xDX + push xAX + + mov eax, [xSI + CPUMCTX.aXcr] ; Load the guest one. + mov edx, [xSI + CPUMCTX.aXcr + 4] + xor ecx, ecx ; paranoia + xsetbv + + push 0 ; Indicate that we must restore XCR0 (popped into ecx, thus 0). + jmp .xcr0_before_done + +.xcr0_before_skip: + push 3fh ; indicate that we need not. +.xcr0_before_done: + + ; + ; Save segment registers. + ; Note! Trashes rdx & rcx, so we moved it here (amd64 case). + ; + MYPUSHSEGS xAX, ax + +%ifdef VMX_USE_CACHED_VMCS_ACCESSES + mov ecx, [xBX + VMCSCACHE.Write.cValidEntries] + cmp ecx, 0 + je .no_cached_writes + mov edx, ecx + mov ecx, 0 + jmp .cached_write + +ALIGN(16) +.cached_write: + mov eax, [xBX + VMCSCACHE.Write.aField + xCX * 4] + vmwrite xAX, [xBX + VMCSCACHE.Write.aFieldVal + xCX * 8] + inc xCX + cmp xCX, xDX + jl .cached_write + + mov dword [xBX + VMCSCACHE.Write.cValidEntries], 0 +.no_cached_writes: + + ; Save the pCache pointer. + push xBX +%endif + + ; Save the pCtx pointer. + push xSI + + ; Save host LDTR. + xor eax, eax + sldt ax + push xAX + +%ifndef VMX_SKIP_TR + ; The host TR limit is reset to 0x67; save & restore it manually. + str eax + push xAX +%endif + +%ifndef VMX_SKIP_GDTR + ; VT-x only saves the base of the GDTR & IDTR and resets the limit to 0xffff; we must restore the limit correctly! + sub xSP, xCB * 2 + sgdt [xSP] +%endif +%ifndef VMX_SKIP_IDTR + sub xSP, xCB * 2 + sidt [xSP] +%endif + + ; Load CR2 if necessary (may be expensive as writing CR2 is a synchronizing instruction). + mov rbx, qword [xSI + CPUMCTX.cr2] + mov rdx, cr2 + cmp rbx, rdx + je .skip_cr2_write + mov cr2, rbx + +.skip_cr2_write: + mov eax, VMX_VMCS_HOST_RSP + vmwrite xAX, xSP + ; Note: assumes success! + ; Don't mess with ESP anymore!!! + + ; Fight spectre and similar. + INDIRECT_BRANCH_PREDICTION_AND_L1_CACHE_BARRIER xSI, CPUMCTX_WSF_IBPB_ENTRY, CPUMCTX_WSF_L1D_ENTRY + + ; Load guest general purpose registers. + mov rax, qword [xSI + CPUMCTX.eax] + mov rbx, qword [xSI + CPUMCTX.ebx] + mov rcx, qword [xSI + CPUMCTX.ecx] + mov rdx, qword [xSI + CPUMCTX.edx] + mov rbp, qword [xSI + CPUMCTX.ebp] + mov r8, qword [xSI + CPUMCTX.r8] + mov r9, qword [xSI + CPUMCTX.r9] + mov r10, qword [xSI + CPUMCTX.r10] + mov r11, qword [xSI + CPUMCTX.r11] + mov r12, qword [xSI + CPUMCTX.r12] + mov r13, qword [xSI + CPUMCTX.r13] + mov r14, qword [xSI + CPUMCTX.r14] + mov r15, qword [xSI + CPUMCTX.r15] + + ; Resume or start VM? + cmp xDI, 0 ; fResume + + ; Load guest rdi & rsi. + mov rdi, qword [xSI + CPUMCTX.edi] + mov rsi, qword [xSI + CPUMCTX.esi] + + je .vmlaunch64_launch + + vmresume + jc near .vmxstart64_invalid_vmcs_ptr + jz near .vmxstart64_start_failed + jmp .vmlaunch64_done; ; Here if vmresume detected a failure. + +.vmlaunch64_launch: + vmlaunch + jc near .vmxstart64_invalid_vmcs_ptr + jz near .vmxstart64_start_failed + jmp .vmlaunch64_done; ; Here if vmlaunch detected a failure. + +ALIGNCODE(16) +.vmlaunch64_done: + RESTORE_STATE_VM64 + mov eax, VINF_SUCCESS + +.vmstart64_end: + popf + pop xBP + ret + +.vmxstart64_invalid_vmcs_ptr: + RESTORE_STATE_VM64 + mov eax, VERR_VMX_INVALID_VMCS_PTR_TO_START_VM + jmp .vmstart64_end + +.vmxstart64_start_failed: + RESTORE_STATE_VM64 + mov eax, VERR_VMX_UNABLE_TO_START_VM + jmp .vmstart64_end +ENDPROC VMXR0StartVM64 +%endif ; RT_ARCH_AMD64 + + +;; +; Prepares for and executes VMRUN (32 bits guests) +; +; @returns VBox status code +; @param HCPhysVmcbHost msc:rcx,gcc:rdi Physical address of host VMCB. +; @param HCPhysVmcb msc:rdx,gcc:rsi Physical address of guest VMCB. +; @param pCtx msc:r8,gcc:rdx Pointer to the guest CPU-context. +; @param pVM msc:r9,gcc:rcx The cross context VM structure. +; @param pVCpu msc:[rsp+28],gcc:r8 The cross context virtual CPU structure of the calling EMT. +; +ALIGNCODE(16) +BEGINPROC SVMR0VMRun +%ifdef RT_ARCH_AMD64 ; fake a cdecl stack frame + %ifdef ASM_CALL64_GCC + push r8 ; pVCpu + push rcx ; pVM + push rdx ; pCtx + push rsi ; HCPhysVmcb + push rdi ; HCPhysVmcbHost + %else + mov rax, [rsp + 28h] + push rax ; pVCpu + push r9 ; pVM + push r8 ; pCtx + push rdx ; HCPhysVmcb + push rcx ; HCPhysVmcbHost + %endif + push 0 +%endif + push xBP + mov xBP, xSP + pushf + + ; Save all general purpose host registers. + MYPUSHAD + + ; Load pCtx into xSI. + mov xSI, [xBP + xCB * 2 + RTHCPHYS_CB * 2] ; pCtx + + ; Save the host XCR0 and load the guest one if necessary. + mov xAX, [xBP + xCB * 2 + RTHCPHYS_CB * 2 + xCB * 2] ; pVCpu + test byte [xAX + VMCPU.hm + HMCPU.fLoadSaveGuestXcr0], 1 + jz .xcr0_before_skip + + xor ecx, ecx + xgetbv ; Save the host XCR0 on the stack + push xDX + push xAX + + mov xSI, [xBP + xCB * 2 + RTHCPHYS_CB * 2] ; pCtx + mov eax, [xSI + CPUMCTX.aXcr] ; load the guest XCR0 + mov edx, [xSI + CPUMCTX.aXcr + 4] + xor ecx, ecx ; paranoia + xsetbv + + push 0 ; indicate that we must restore XCR0 (popped into ecx, thus 0) + jmp .xcr0_before_done + +.xcr0_before_skip: + push 3fh ; indicate that we need not restore XCR0 +.xcr0_before_done: + + ; Save guest CPU-context pointer for simplifying saving of the GPRs afterwards. + push xSI + + ; Save host fs, gs, sysenter msr etc. + mov xAX, [xBP + xCB * 2] ; HCPhysVmcbHost (64 bits physical address; x86: take low dword only) + push xAX ; save for the vmload after vmrun + vmsave + + ; Fight spectre. + INDIRECT_BRANCH_PREDICTION_BARRIER xSI, CPUMCTX_WSF_IBPB_ENTRY + + ; Setup xAX for VMLOAD. + mov xAX, [xBP + xCB * 2 + RTHCPHYS_CB] ; HCPhysVmcb (64 bits physical address; x86: take low dword only) + + ; Load guest general purpose registers. + ; eax is loaded from the VMCB by VMRUN. + mov ebx, [xSI + CPUMCTX.ebx] + mov ecx, [xSI + CPUMCTX.ecx] + mov edx, [xSI + CPUMCTX.edx] + mov edi, [xSI + CPUMCTX.edi] + mov ebp, [xSI + CPUMCTX.ebp] + mov esi, [xSI + CPUMCTX.esi] + + ; Clear the global interrupt flag & execute sti to make sure external interrupts cause a world switch. + clgi + sti + + ; Load guest fs, gs, sysenter msr etc. + vmload + + ; Run the VM. + vmrun + + ; Save guest fs, gs, sysenter msr etc. + vmsave + + ; Load host fs, gs, sysenter msr etc. + pop xAX ; load HCPhysVmcbHost (pushed above) + vmload + + ; Set the global interrupt flag again, but execute cli to make sure IF=0. + cli + stgi + + ; Pop the context pointer (pushed above) and save the guest GPRs (sans RSP and RAX). + pop xAX + + mov [ss:xAX + CPUMCTX.ebx], ebx + mov xBX, SPECTRE_FILLER + mov [ss:xAX + CPUMCTX.ecx], ecx + mov xCX, xBX + mov [ss:xAX + CPUMCTX.edx], edx + mov xDX, xBX + mov [ss:xAX + CPUMCTX.esi], esi + mov xSI, xBX + mov [ss:xAX + CPUMCTX.edi], edi + mov xDI, xBX + mov [ss:xAX + CPUMCTX.ebp], ebp + mov xBP, xBX + + ; Fight spectre. Note! Trashes xAX! + INDIRECT_BRANCH_PREDICTION_BARRIER ss:xAX, CPUMCTX_WSF_IBPB_EXIT + + ; Restore the host xcr0 if necessary. + pop xCX + test ecx, ecx + jnz .xcr0_after_skip + pop xAX + pop xDX + xsetbv ; ecx is already zero +.xcr0_after_skip: + + ; Restore host general purpose registers. + MYPOPAD + + mov eax, VINF_SUCCESS + + popf + pop xBP +%ifdef RT_ARCH_AMD64 + add xSP, 6*xCB +%endif + ret +ENDPROC SVMR0VMRun + + +%ifdef RT_ARCH_AMD64 +;; +; Prepares for and executes VMRUN (64 bits guests) +; +; @returns VBox status code +; @param HCPhysVmcbHost msc:rcx,gcc:rdi Physical address of host VMCB. +; @param HCPhysVmcb msc:rdx,gcc:rsi Physical address of guest VMCB. +; @param pCtx msc:r8,gcc:rdx Pointer to the guest-CPU context. +; @param pVM msc:r9,gcc:rcx The cross context VM structure. +; @param pVCpu msc:[rsp+28],gcc:r8 The cross context virtual CPU structure of the calling EMT. +; +ALIGNCODE(16) +BEGINPROC SVMR0VMRun64 + ; Fake a cdecl stack frame + %ifdef ASM_CALL64_GCC + push r8 ;pVCpu + push rcx ;pVM + push rdx ;pCtx + push rsi ;HCPhysVmcb + push rdi ;HCPhysVmcbHost + %else + mov rax, [rsp + 28h] + push rax ; rbp + 30h pVCpu + push r9 ; rbp + 28h pVM + push r8 ; rbp + 20h pCtx + push rdx ; rbp + 18h HCPhysVmcb + push rcx ; rbp + 10h HCPhysVmcbHost + %endif + push 0 ; rbp + 08h "fake ret addr" + push rbp ; rbp + 00h + mov rbp, rsp + pushf + + ; Manual save and restore: + ; - General purpose registers except RIP, RSP, RAX + ; + ; Trashed: + ; - CR2 (we don't care) + ; - LDTR (reset to 0) + ; - DRx (presumably not changed at all) + ; - DR7 (reset to 0x400) + + ; Save all general purpose host registers. + MYPUSHAD + + ; Load pCtx into xSI. + mov xSI, [rbp + xCB * 2 + RTHCPHYS_CB * 2] + + ; Save the host XCR0 and load the guest one if necessary. + mov rax, [xBP + 30h] ; pVCpu + test byte [xAX + VMCPU.hm + HMCPU.fLoadSaveGuestXcr0], 1 + jz .xcr0_before_skip + + xor ecx, ecx + xgetbv ; save the host XCR0 on the stack. + push xDX + push xAX + + mov xSI, [xBP + xCB * 2 + RTHCPHYS_CB * 2] ; pCtx + mov eax, [xSI + CPUMCTX.aXcr] ; load the guest XCR0 + mov edx, [xSI + CPUMCTX.aXcr + 4] + xor ecx, ecx ; paranoia + xsetbv + + push 0 ; indicate that we must restore XCR0 (popped into ecx, thus 0) + jmp .xcr0_before_done + +.xcr0_before_skip: + push 3fh ; indicate that we need not restore XCR0 +.xcr0_before_done: + + ; Save guest CPU-context pointer for simplifying saving of the GPRs afterwards. + push rsi + + ; Save host fs, gs, sysenter msr etc. + mov rax, [rbp + xCB * 2] ; HCPhysVmcbHost (64 bits physical address; x86: take low dword only) + push rax ; save for the vmload after vmrun + vmsave + + ; Fight spectre. + INDIRECT_BRANCH_PREDICTION_BARRIER xSI, CPUMCTX_WSF_IBPB_ENTRY + + ; Setup rax for VMLOAD. + mov rax, [rbp + xCB * 2 + RTHCPHYS_CB] ; HCPhysVmcb (64 bits physical address; take low dword only) + + ; Load guest general purpose registers (rax is loaded from the VMCB by VMRUN). + mov rbx, qword [xSI + CPUMCTX.ebx] + mov rcx, qword [xSI + CPUMCTX.ecx] + mov rdx, qword [xSI + CPUMCTX.edx] + mov rdi, qword [xSI + CPUMCTX.edi] + mov rbp, qword [xSI + CPUMCTX.ebp] + mov r8, qword [xSI + CPUMCTX.r8] + mov r9, qword [xSI + CPUMCTX.r9] + mov r10, qword [xSI + CPUMCTX.r10] + mov r11, qword [xSI + CPUMCTX.r11] + mov r12, qword [xSI + CPUMCTX.r12] + mov r13, qword [xSI + CPUMCTX.r13] + mov r14, qword [xSI + CPUMCTX.r14] + mov r15, qword [xSI + CPUMCTX.r15] + mov rsi, qword [xSI + CPUMCTX.esi] + + ; Clear the global interrupt flag & execute sti to make sure external interrupts cause a world switch. + clgi + sti + + ; Load guest FS, GS, Sysenter MSRs etc. + vmload + + ; Run the VM. + vmrun + + ; Save guest fs, gs, sysenter msr etc. + vmsave + + ; Load host fs, gs, sysenter msr etc. + pop rax ; load HCPhysVmcbHost (pushed above) + vmload + + ; Set the global interrupt flag again, but execute cli to make sure IF=0. + cli + stgi + + ; Pop the context pointer (pushed above) and save the guest GPRs (sans RSP and RAX). + pop rax + + mov qword [rax + CPUMCTX.ebx], rbx + mov rbx, SPECTRE_FILLER64 + mov qword [rax + CPUMCTX.ecx], rcx + mov rcx, rbx + mov qword [rax + CPUMCTX.edx], rdx + mov rdx, rbx + mov qword [rax + CPUMCTX.esi], rsi + mov rsi, rbx + mov qword [rax + CPUMCTX.edi], rdi + mov rdi, rbx + mov qword [rax + CPUMCTX.ebp], rbp + mov rbp, rbx + mov qword [rax + CPUMCTX.r8], r8 + mov r8, rbx + mov qword [rax + CPUMCTX.r9], r9 + mov r9, rbx + mov qword [rax + CPUMCTX.r10], r10 + mov r10, rbx + mov qword [rax + CPUMCTX.r11], r11 + mov r11, rbx + mov qword [rax + CPUMCTX.r12], r12 + mov r12, rbx + mov qword [rax + CPUMCTX.r13], r13 + mov r13, rbx + mov qword [rax + CPUMCTX.r14], r14 + mov r14, rbx + mov qword [rax + CPUMCTX.r15], r15 + mov r15, rbx + + ; Fight spectre. Note! Trashes rax! + INDIRECT_BRANCH_PREDICTION_BARRIER rax, CPUMCTX_WSF_IBPB_EXIT + + ; Restore the host xcr0 if necessary. + pop xCX + test ecx, ecx + jnz .xcr0_after_skip + pop xAX + pop xDX + xsetbv ; ecx is already zero +.xcr0_after_skip: + + ; Restore host general purpose registers. + MYPOPAD + + mov eax, VINF_SUCCESS + + popf + pop rbp + add rsp, 6 * xCB + ret +ENDPROC SVMR0VMRun64 +%endif ; RT_ARCH_AMD64 + |