diff options
Diffstat (limited to 'src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxRestore.asm')
-rw-r--r-- | src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxRestore.asm | 596 |
1 files changed, 596 insertions, 0 deletions
diff --git a/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxRestore.asm b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxRestore.asm new file mode 100644 index 00000000..628203c6 --- /dev/null +++ b/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-RegCtxRestore.asm @@ -0,0 +1,596 @@ +; $Id: bs3-cmn-RegCtxRestore.asm $ +;; @file +; BS3Kit - Bs3RegCtxRestore. +; + +; +; Copyright (C) 2007-2019 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; +; The contents of this file may alternatively be used under the terms +; of the Common Development and Distribution License Version 1.0 +; (CDDL) only, as it comes in the "COPYING.CDDL" file of the +; VirtualBox OSE distribution, in which case the provisions of the +; CDDL are applicable instead of those of the GPL. +; +; You may elect to license modified versions of this file under the +; terms and conditions of either the GPL or the CDDL or both. +; + +%include "bs3kit-template-header.mac" + + +BS3_EXTERN_SYSTEM16 Bs3Gdt +BS3_EXTERN_DATA16 g_bBs3CurrentMode +BS3_EXTERN_DATA16 g_fBs3TrapNoV86Assist +%if TMPL_BITS != 64 +BS3_EXTERN_DATA16 g_uBs3CpuDetected +%endif +TMPL_BEGIN_TEXT +BS3_EXTERN_CMN Bs3Syscall +BS3_EXTERN_CMN Bs3Panic +TMPL_BEGIN_TEXT + + +;; +; Restores the given register context. +; +; @param pRegCtx +; @param fFlags +; @uses All registers and may trash stack immediately before the resume point. +; +; @note Only respects the BS3_MODE_CODE_MASK part of pRegCtx->bMode. +; +%if TMPL_BITS == 16 +BS3_PROC_BEGIN_CMN Bs3RegCtxRestore_aborts, BS3_PBC_FAR ; special entry point for when watcom applies __aborts +BS3_PROC_BEGIN_CMN Bs3RegCtxRestore_aborts, BS3_PBC_NEAR ; special entry point for when watcom applies __aborts + CPU 8086 + xor xAX, xAX + push xAX ; fake return address. + push xAX + jmp _Bs3RegCtxRestore_f16 +%elif TMPL_BITS == 32 +BS3_PROC_BEGIN_CMN Bs3RegCtxRestore_aborts, BS3_PBC_NEAR ; special entry point for when watcom applies __aborts + push 0feedfaceh ; fake return address. +%endif +BS3_PROC_BEGIN_CMN Bs3RegCtxRestore, BS3_PBC_HYBRID + BS3_CALL_CONV_PROLOG 2 + push xBP + mov xBP, xSP + + ; + ; If we're not in ring-0, ask the kernel to restore it for us (quicker + ; and less problematic if we're in a funny context right now with weird + ; CS or SS values). + ; +%if TMPL_BITS == 16 + cmp byte [BS3_DATA16_WRT(g_bBs3CurrentMode)], BS3_MODE_RM + je .in_ring0 + test byte [BS3_DATA16_WRT(g_bBs3CurrentMode)], BS3_MODE_CODE_V86 + jnz .do_syscall_restore_ctx +%endif + mov ax, ss + test al, 3 + jz .in_ring0 + +.do_syscall_restore_ctx: +%if TMPL_BITS != 16 +.do_syscall_restore_ctx_restore_ds: + mov cx, ds + mov xSI, [xBP + xCB*2] + movzx edx, word [xBP + xCB*3] + mov eax, BS3_SYSCALL_RESTORE_CTX +%else + mov si, [bp + xCB + cbCurRetAddr] + mov cx, [bp + xCB + cbCurRetAddr + 2] + mov dx, [bp + xCB + cbCurRetAddr + sCB] + mov ax, BS3_SYSCALL_RESTORE_CTX +%endif + call Bs3Syscall + call Bs3Panic + +%if TMPL_BITS == 16 +.do_syscall_restore_ctx_restore_ds: + push es + pop ds + jmp .do_syscall_restore_ctx +%endif + + ; + ; Prologue. Loads ES with BS3KIT_GRPNM_DATA16/FLAT (for g_bBs3CurrentMode + ; and g_uBs3CpuDetected), DS:xBX with pRegCtx and fFlags into xCX. + ; +.in_ring0: +%if TMPL_BITS == 16 + mov ax, BS3_SEL_DATA16 + mov es, ax + lds bx, [bp + xCB + cbCurRetAddr] + mov cx, [bp + xCB + cbCurRetAddr + sCB] +%elif TMPL_BITS == 32 + mov ax, BS3_SEL_R0_DS32 + mov ds, ax + mov xBX, [xBP + xCB*2] + movzx xCX, word [xBP + xCB*3] +%else + mov ax, BS3_SEL_R0_DS64 + mov ds, ax + mov xBX, [xBP + xCB*2] + movzx xCX, word [xBP + xCB*3] +%endif + + +%if TMPL_BITS != 64 + ; Restoring a 64-bit context is best done from 64-bit code. + mov al, [xBX + BS3REGCTX.bMode] + test al, BS3_MODE_CODE_64 + jnz .do_syscall_restore_ctx_restore_ds +%endif + + ; The remainder must be done with interrupts disabled. + cli + + ; + ; Update g_bs3CurrentMode. + ; +%if TMPL_BITS == 64 + mov al, [xBX + BS3REGCTX.bMode] +%endif + and al, BS3_MODE_CODE_MASK + mov ah, [BS3_ONLY_16BIT(es:) BS3_DATA16_WRT(g_bBs3CurrentMode)] + and ah, ~BS3_MODE_CODE_MASK + or al, ah + mov [BS3_ONLY_16BIT(es:) BS3_DATA16_WRT(g_bBs3CurrentMode)], al + + ; + ; Set g_fBs3TrapNoV86Assist if BS3REGCTXRESTORE_F_NO_V86_ASSIST specified. + ; + test cl, BS3REGCTXRESTORE_F_NO_V86_ASSIST + jz .no_f_no_v86_assist + mov byte [BS3_ONLY_16BIT(es:) BS3_DATA16_WRT(g_fBs3TrapNoV86Assist)], 1 +.no_f_no_v86_assist: + +%if TMPL_BITS == 16 + ; + ; Check what the CPU can do. + ; + cmp byte [es:BS3_DATA16_WRT(g_uBs3CpuDetected)], BS3CPU_80386 + jae .restore_full + + ; Do the 80286 specifics first. + cmp byte [es:BS3_DATA16_WRT(g_uBs3CpuDetected)], BS3CPU_80286 + jb .restore_16_bit_ancient + CPU 286 + + lmsw [bx + BS3REGCTX.cr0] + cmp byte [es:BS3_DATA16_WRT(g_bBs3CurrentMode)], BS3_MODE_RM + je .restore_16_bit_ancient + lldt [bx + BS3REGCTX.ldtr] + + ; TR - complicated because we need to clear the busy bit. ASSUMES GDT. + str ax + cmp ax, [bx + BS3REGCTX.tr] + je .skip_tr_286 + + mov di, word [xBX + BS3REGCTX.tr] + or di, di ; check for null. + jz .load_tr_286 + + push ds + push BS3_SEL_SYSTEM16 + pop ds + add di, Bs3Gdt wrt BS3SYSTEM16 + add di, X86DESCGENERIC_BIT_OFF_TYPE / 8 + and byte [di], ~(X86_SEL_TYPE_SYS_TSS_BUSY_MASK << (X86DESCGENERIC_BIT_OFF_TYPE % 8)) + pop ds + +.load_tr_286: + ltr [bx + BS3REGCTX.tr] +.skip_tr_286: + +.restore_16_bit_ancient: + CPU 8086 + ; Some general registers. + mov cx, [bx + BS3REGCTX.rcx] + mov dx, [bx + BS3REGCTX.rdx] + + ; Do the return frame and final registers (keep short as we're not quite + ; NMI safe here if pRegCtx is on the stack). + cmp byte [es:BS3_DATA16_WRT(g_bBs3CurrentMode)], BS3_MODE_RM + mov di, [bx + BS3REGCTX.rsp] + je .restore_16_bit_same_privilege + cmp byte [bx + BS3REGCTX.bCpl], 0 + je .restore_16_bit_same_privilege + + mov ax, [bx + BS3REGCTX.ss] + push ax + mov ax, [bx + BS3REGCTX.rsp] + push ax + mov ax, [bx + BS3REGCTX.rflags] + push ax + mov ax, [bx + BS3REGCTX.cs] + push ax + mov ax, [bx + BS3REGCTX.rip] + push ax + mov ax, [bx + BS3REGCTX.ds] + push ax + + mov si, [bx + BS3REGCTX.rsi] + mov di, [bx + BS3REGCTX.rdi] + mov es, [bx + BS3REGCTX.es] + mov ax, [bx + BS3REGCTX.rax] + mov bp, [bx + BS3REGCTX.rbp] ; restore late for better stacks. + mov bx, [bx + BS3REGCTX.rbx] + + pop ds + iret + +.restore_16_bit_same_privilege: + sub di, 2*5 ; iret frame + pop ds + mov si, di + mov es, [bx + BS3REGCTX.ss] ; ES is target stack segment. + cld + + mov ax, [bx + BS3REGCTX.ds] + stosw + mov ax, [bx + BS3REGCTX.rbp] ; Restore esp as late as possible for better stacks. + stosw + mov ax, [bx + BS3REGCTX.rip] + stosw + mov ax, [bx + BS3REGCTX.cs] + stosw + mov ax, [bx + BS3REGCTX.rflags] + stosw + + mov di, [bx + BS3REGCTX.rdi] + mov es, [bx + BS3REGCTX.es] + mov ax, [bx + BS3REGCTX.rax] + mov ss, [bx + BS3REGCTX.ss] + mov sp, si + mov si, [bx + BS3REGCTX.rsi] + mov bx, [bx + BS3REGCTX.rbx] + + pop ds + pop bp + iret + + CPU 386 +%endif + +.restore_full: + ; + ; 80386 or later. + ; For 32-bit and 16-bit versions, we always use 32-bit iret. + ; + + ; Restore control registers if they've changed. + test cl, BS3REGCTXRESTORE_F_SKIP_CRX + jnz .skip_control_regs + test byte [xBX + BS3REGCTX.fbFlags], BS3REG_CTX_F_NO_CR0_IS_MSW | BS3REG_CTX_F_NO_CR2_CR3 + jnz .skip_control_regs + + test byte [xBX + BS3REGCTX.fbFlags], BS3REG_CTX_F_NO_CR4 ; (old 486s and 386s didn't have CR4) + jnz .skip_cr4 +%if TMPL_BITS != 64 + test word [BS3_ONLY_16BIT(es:) BS3_DATA16_WRT(g_uBs3CpuDetected)], BS3CPU_F_CPUID + jz .skip_cr4 +%endif + mov sAX, [xBX + BS3REGCTX.cr4] + mov sDX, cr4 + cmp sAX, sDX + je .skip_cr4 + mov cr4, sAX +.skip_cr4: + + mov sAX, [xBX + BS3REGCTX.cr0] + mov sDX, cr0 + cmp sAX, sDX + je .skip_cr0 + mov cr0, sAX +.skip_cr0: + + mov sAX, [xBX + BS3REGCTX.cr3] + mov sDX, cr3 + cmp sAX, sDX + je .skip_cr3 + mov cr3, sAX +.skip_cr3: + + mov sAX, [xBX + BS3REGCTX.cr2] + mov sDX, cr2 + cmp sAX, sDX + je .skip_cr2 + mov cr2, sAX +.skip_cr2: + + ; + ; Restore + ; +%if TMPL_BITS != 64 + ; We cannot restore ldtr and tr if we're in real-mode. + cmp byte [BS3_ONLY_16BIT(es:) BS3_DATA16_WRT(g_bBs3CurrentMode)], BS3_MODE_RM + je .skip_control_regs +%endif + test byte [xBX + BS3REGCTX.fbFlags], BS3REG_CTX_F_NO_TR_LDTR + jnz .skip_control_regs + + ; LDTR + sldt ax + cmp ax, [xBX + BS3REGCTX.ldtr] + je .skip_ldtr + lldt [xBX + BS3REGCTX.ldtr] +.skip_ldtr: + + ; TR - complicated because we need to clear the busy bit. ASSUMES GDT. + str ax + cmp ax, [xBX + BS3REGCTX.tr] + je .skip_tr + + movzx edi, word [xBX + BS3REGCTX.tr] + or edi, edi ; check for null. + jz .load_tr + +%if TMPL_BITS == 16 + push ds + push BS3_SEL_SYSTEM16 + pop ds + add xDI, Bs3Gdt wrt BS3SYSTEM16 +%else + add xDI, Bs3Gdt wrt FLAT +%endif + add xDI, X86DESCGENERIC_BIT_OFF_TYPE / 8 + and byte [xDI], ~(X86_SEL_TYPE_SYS_TSS_BUSY_MASK << (X86DESCGENERIC_BIT_OFF_TYPE % 8)) +%if TMPL_BITS == 16 + pop ds +%endif +.load_tr: + ltr [xBX + BS3REGCTX.tr] +.skip_tr: + +.skip_control_regs: + + +%if TMPL_BITS == 64 + ; + ; 64-bit returns are simple because ss:rsp are always restored. + ; + ; A small complication here when returning to a 16-bit stack (only + ; applicable to 16-bit and 32-bit code), iret doesn't touch the high + ; ESP bits and we can easily later end up with trap handlers + ; accessing memory never intended as stack. + ; + mov rcx, qword [xBX + BS3REGCTX.rsp] ; (also 1st param for conv call below) + cmp rcx, 0ffffh + ja .iretq_maybe_annoying_16bit_stack + cmp rsp, 0ffffh + ja .iretq_maybe_annoying_16bit_stack +.iretq_ok: + + movzx eax, word [xBX + BS3REGCTX.ss] + push rax + push qword [xBX + BS3REGCTX.rsp] + push qword [xBX + BS3REGCTX.rflags] + movzx eax, word [xBX + BS3REGCTX.cs] + push rax + push qword [xBX + BS3REGCTX.rip] + +.iretq_restore_regs_and_iret: + mov es, [xBX + BS3REGCTX.es] + mov fs, [xBX + BS3REGCTX.fs] + mov gs, [xBX + BS3REGCTX.gs] + mov rax, [xBX + BS3REGCTX.rax] + mov rdx, [xBX + BS3REGCTX.rdx] + mov rcx, [xBX + BS3REGCTX.rcx] + mov rsi, [xBX + BS3REGCTX.rsi] + mov rdi, [xBX + BS3REGCTX.rdi] + mov r8, [xBX + BS3REGCTX.r8] + mov r9, [xBX + BS3REGCTX.r9] + mov r10, [xBX + BS3REGCTX.r10] + mov r11, [xBX + BS3REGCTX.r11] + mov r12, [xBX + BS3REGCTX.r12] + mov r13, [xBX + BS3REGCTX.r13] + mov r14, [xBX + BS3REGCTX.r14] + mov r15, [xBX + BS3REGCTX.r15] + mov rbp, [xBX + BS3REGCTX.rbp] ; restore late for better stacks. + mov ds, [xBX + BS3REGCTX.ds] + mov rbx, [xBX + BS3REGCTX.rbx] + iretq + +.iretq_maybe_annoying_16bit_stack: + movzx edx, word [xBX + BS3REGCTX.ss] ; (also 2nd param for conv call below) + lar eax, dx + jnz .iretq_ok + test eax, X86LAR_F_D | X86LAR_F_L + jnz .iretq_ok ; Returning to a big of long SS needs not extra work. + + lar eax, word [xBX + BS3REGCTX.cs] + jnz .iretq_ok + test eax, X86LAR_F_L + jnz .iretq_ok ; It doesn't matter when returning to 64-bit code. + + ; Convert ss:sp to a flat address. + BS3_EXTERN_CMN Bs3SelFar32ToFlat32NoClobber + call Bs3SelFar32ToFlat32NoClobber + mov rdi, rax + + ; 2nd return frame (32-bit, same CPL). + mov eax, [xBX + BS3REGCTX.rflags] + mov [rdi - 4], eax + movzx eax, word [xBX + BS3REGCTX.cs] + mov [rdi - 8], eax + mov eax, [xBX + BS3REGCTX.rip] + mov [rdi - 12], eax + mov ecx, [xBX + BS3REGCTX.rsp] + sub cx, 12 + mov [rdi - 16], ecx + + ; 1st return frame. + movzx eax, word [xBX + BS3REGCTX.ss] + push rax ; new 16-bit SS + sub cx, 4 + push rcx ; new esp + mov rax, [xBX + BS3REGCTX.rflags] + and rax, ~(X86_EFL_NT | X86_EFL_TF) + push rax ; rflags + AssertCompile(BS3_SEL_RING_SHIFT == 8) + mov eax, BS3_SEL_R0_CS32 + add ah, [xBX + BS3REGCTX.bCpl] + or al, [xBX + BS3REGCTX.bCpl] + push rax ; 32-bit CS + push .iretq_pop_real_esp_and_iret_again wrt FLAT + jmp .iretq_restore_regs_and_iret + + BS3_SET_BITS 32 +.iretq_pop_real_esp_and_iret_again: + pop esp + iretd + BS3_SET_BITS 64 + +%else + ; + ; 32-bit/16-bit is more complicated as we have three different iret frames. + ; + mov al, [BS3_ONLY_16BIT(es:) BS3_DATA16_WRT(g_bBs3CurrentMode)] + cmp al, BS3_MODE_RM + je .iretd_same_cpl_rm + + test dword [xBX + BS3REGCTX.rflags], X86_EFL_VM + jnz .restore_v8086 + + cmp byte [xBX + BS3REGCTX.bCpl], 0 + je .iretd_same_cpl + + ; + ; IRETD to different CPL. Frame includes ss:esp. + ; +.iretd_different_cpl: + or eax, 0ffffffffh ; poison unused parts of segment pushes + mov ax, [xBX + BS3REGCTX.ss] + push eax + push dword [xBX + BS3REGCTX.rsp] + push dword [xBX + BS3REGCTX.rflags] + mov ax, [xBX + BS3REGCTX.cs] + push eax + push dword [xBX + BS3REGCTX.rip] + push dword [xBX + BS3REGCTX.rbp] ; Restore esp as late as possible for better stacks. + mov ax, [xBX + BS3REGCTX.ds] + push xAX + + mov es, [xBX + BS3REGCTX.es] + mov fs, [xBX + BS3REGCTX.fs] + mov gs, [xBX + BS3REGCTX.gs] + mov eax, [xBX + BS3REGCTX.rax] + mov edx, [xBX + BS3REGCTX.rdx] + mov ecx, [xBX + BS3REGCTX.rcx] + mov esi, [xBX + BS3REGCTX.rsi] + %if TMPL_BITS == 16 ; if SS is 16-bit, we will not be able to restore the high word. +;; @todo 16-bit stack will also mess us up in 32-bit code, so this needs fixing (see 64-bit above). + mov edi, [xBX + BS3REGCTX.rsp] + mov di, sp + mov esp, edi + %endif + mov edi, [xBX + BS3REGCTX.rdi] + mov ebx, [xBX + BS3REGCTX.rbx] + + pop ds + pop ebp + iretd + + ; + ; IRETD to same CPL (includes real mode). + ; +.iretd_same_cpl_rm: + ; Use STOSD/ES:EDI to create the frame. + mov es, [xBX + BS3REGCTX.ss] + mov esi, [xBX + BS3REGCTX.rsp] + sub esi, 5*4 + movzx edi, si + jmp .es_edi_is_pointing_to_return_frame_location + +.iretd_same_cpl: + ; Use STOSD/ES:EDI to create the frame. + mov es, [xBX + BS3REGCTX.ss] + mov edi, [xBX + BS3REGCTX.rsp] + sub edi, 5*4 + + ; Which part of the stack pointer is actually used depends on the SS.D/B bit. + lar eax, [xBX + BS3REGCTX.ss] + jnz .using_32_bit_stack_pointer + test eax, X86LAR_F_D + jnz .using_32_bit_stack_pointer +.using_16_bit_stack_pointer: + mov esi, edi ; save rsp for later. + movzx edi, di + jmp .es_edi_is_pointing_to_return_frame_location +.using_32_bit_stack_pointer: + mov esi, edi +.es_edi_is_pointing_to_return_frame_location: + cld + mov ax, [xBX + BS3REGCTX.ds] + o32 stosd + mov eax, [xBX + BS3REGCTX.rbp] ; Restore esp as late as possible for better stacks. + o32 stosd + mov eax, [xBX + BS3REGCTX.rip] + o32 stosd + mov ax, [xBX + BS3REGCTX.cs] + o32 stosd + mov eax, [xBX + BS3REGCTX.rflags] + o32 stosd + + mov es, [xBX + BS3REGCTX.es] + mov fs, [xBX + BS3REGCTX.fs] + mov gs, [xBX + BS3REGCTX.gs] + mov eax, [xBX + BS3REGCTX.rax] + mov edx, [xBX + BS3REGCTX.rdx] + mov ecx, [xBX + BS3REGCTX.rcx] + mov edi, [xBX + BS3REGCTX.rdi] + mov ebp, [xBX + BS3REGCTX.rbp] ; restore late for better stacks. + + mov ss, [xBX + BS3REGCTX.ss] + mov esp, esi + mov esi, [xBX + BS3REGCTX.rsi] + mov ebx, [xBX + BS3REGCTX.rbx] + + o32 pop ds + pop ebp + iretd + + ; + ; IRETD to v8086 mode. Frame includes ss:esp and the 4 data segment registers. + ; +.restore_v8086: + or eax, 0ffffffffh ; poison unused parts of segment pushes + mov eax, [xBX + BS3REGCTX.gs] + push eax + mov eax, [xBX + BS3REGCTX.fs] + push eax + mov eax, [xBX + BS3REGCTX.ds] + push eax + mov eax, [xBX + BS3REGCTX.es] + push eax + mov eax, [xBX + BS3REGCTX.ss] + push eax + push dword [xBX + BS3REGCTX.rsp] + push dword [xBX + BS3REGCTX.rflags] + mov ax, [xBX + BS3REGCTX.cs] + push eax + push dword [xBX + BS3REGCTX.rip] + + mov eax, [xBX + BS3REGCTX.rax] + mov edx, [xBX + BS3REGCTX.rdx] + mov ecx, [xBX + BS3REGCTX.rcx] + mov esi, [xBX + BS3REGCTX.rsi] + mov edi, [xBX + BS3REGCTX.rdi] + mov ebp, [xBX + BS3REGCTX.rbp] ; restore late for better stacks. + mov ebx, [xBX + BS3REGCTX.rbx] + + iretd +%endif +BS3_PROC_END_CMN Bs3RegCtxRestore + |