diff options
| author | Lorry Tar Creator <lorry-tar-importer@baserock.org> | 2014-03-26 19:21:20 +0000 |
|---|---|---|
| committer | <> | 2014-05-08 15:03:54 +0000 |
| commit | fb123f93f9f5ce42c8e5785d2f8e0edaf951740e (patch) | |
| tree | c2103d76aec5f1f10892cd1d3a38e24f665ae5db /src/VBox/VMM/VMMRC/CPUMRCA.asm | |
| parent | 58ed4748338f9466599adfc8a9171280ed99e23f (diff) | |
| download | VirtualBox-master.tar.gz | |
Imported from /home/lorry/working-area/delta_VirtualBox/VirtualBox-4.3.10.tar.bz2.HEADVirtualBox-4.3.10master
Diffstat (limited to 'src/VBox/VMM/VMMRC/CPUMRCA.asm')
| -rw-r--r-- | src/VBox/VMM/VMMRC/CPUMRCA.asm | 199 |
1 files changed, 199 insertions, 0 deletions
diff --git a/src/VBox/VMM/VMMRC/CPUMRCA.asm b/src/VBox/VMM/VMMRC/CPUMRCA.asm index 43326128..13520783 100644 --- a/src/VBox/VMM/VMMRC/CPUMRCA.asm +++ b/src/VBox/VMM/VMMRC/CPUMRCA.asm @@ -43,6 +43,205 @@ extern NAME(CPUMRCAssertPreExecutionSanity) BEGINCODE +;; Macro for FXSAVE/FXRSTOR leaky behaviour on AMD CPUs, see cpumR3CheckLeakyFpu(). +; Cleans the FPU state, if necessary, before restoring the FPU. +; +; This macro ASSUMES CR0.TS is not set! +; @remarks Trashes xAX!! +; Changes here should also be reflected in CPUMR0A.asm's copy! +%macro CLEANFPU 0 + test dword [xDX + CPUMCPU.fUseFlags], CPUM_USE_FFXSR_LEAKY + jz .nothing_to_clean + + xor eax, eax + fnstsw ax ; Get FSW + test eax, RT_BIT(7) ; If FSW.ES (bit 7) is set, clear it to not cause FPU exceptions + ; while clearing & loading the FPU bits in 'clean_fpu' + jz clean_fpu + fnclex + +.clean_fpu: + ffree st7 ; Clear FPU stack register(7)'s tag entry to prevent overflow if a wraparound occurs + ; for the upcoming push (load) + fild dword [xDX + CPUMCPU.Guest.fpu] ; Explicit FPU load to overwrite FIP, FOP, FDP registers in the FPU. + +.nothing_to_clean: +%endmacro + + +;; +; Handles lazy FPU saving and restoring. +; +; This handler will implement lazy fpu (sse/mmx/stuff) saving. +; Two actions may be taken in this handler since the Guest OS may +; be doing lazy fpu switching. So, we'll have to generate those +; traps which the Guest CPU CTX shall have according to the +; its CR0 flags. If no traps for the Guest OS, we'll save the host +; context and restore the guest context. +; +; @returns 0 if caller should continue execution. +; @returns VINF_EM_RAW_GUEST_TRAP if a guest trap should be generated. +; @param pCPUMCPU x86:[esp+4] GCC:rdi MSC:rcx CPUMCPU pointer +; +align 16 +BEGINPROC cpumHandleLazyFPUAsm + ; + ; Figure out what to do. + ; + ; There are two basic actions: + ; 1. Save host fpu and restore guest fpu. + ; 2. Generate guest trap. + ; + ; When entering the hypervisor we'll always enable MP (for proper wait + ; trapping) and TS (for intercepting all fpu/mmx/sse stuff). The EM flag + ; is taken from the guest OS in order to get proper SSE handling. + ; + ; + ; Actions taken depending on the guest CR0 flags: + ; + ; 3 2 1 + ; TS | EM | MP | FPUInstr | WAIT :: VMM Action + ; ------------------------------------------------------------------------ + ; 0 | 0 | 0 | Exec | Exec :: Clear TS & MP, Save HC, Load GC. + ; 0 | 0 | 1 | Exec | Exec :: Clear TS, Save HC, Load GC. + ; 0 | 1 | 0 | #NM | Exec :: Clear TS & MP, Save HC, Load GC; + ; 0 | 1 | 1 | #NM | Exec :: Clear TS, Save HC, Load GC. + ; 1 | 0 | 0 | #NM | Exec :: Clear MP, Save HC, Load GC. (EM is already cleared.) + ; 1 | 0 | 1 | #NM | #NM :: Go to host taking trap there. + ; 1 | 1 | 0 | #NM | Exec :: Clear MP, Save HC, Load GC. (EM is already set.) + ; 1 | 1 | 1 | #NM | #NM :: Go to host taking trap there. + + ; + ; Before taking any of these actions we're checking if we have already + ; loaded the GC FPU. Because if we have, this is an trap for the guest - raw ring-3. + ; +%ifdef RT_ARCH_AMD64 + %ifdef RT_OS_WINDOWS + mov xDX, rcx + %else + mov xDX, rdi + %endif +%else + mov xDX, dword [esp + 4] +%endif + test dword [xDX + CPUMCPU.fUseFlags], CPUM_USED_FPU + jz hlfpua_not_loaded + jmp hlfpua_to_host + + ; + ; Take action. + ; +align 16 +hlfpua_not_loaded: + mov eax, [xDX + CPUMCPU.Guest.cr0] + and eax, X86_CR0_MP | X86_CR0_EM | X86_CR0_TS +%ifdef RT_ARCH_AMD64 + lea r8, [hlfpuajmp1 wrt rip] + jmp qword [rax*4 + r8] +%else + jmp dword [eax*2 + hlfpuajmp1] +%endif +align 16 +;; jump table using fpu related cr0 flags as index. +hlfpuajmp1: + RTCCPTR_DEF hlfpua_switch_fpu_ctx + RTCCPTR_DEF hlfpua_switch_fpu_ctx + RTCCPTR_DEF hlfpua_switch_fpu_ctx + RTCCPTR_DEF hlfpua_switch_fpu_ctx + RTCCPTR_DEF hlfpua_switch_fpu_ctx + RTCCPTR_DEF hlfpua_to_host + RTCCPTR_DEF hlfpua_switch_fpu_ctx + RTCCPTR_DEF hlfpua_to_host +;; and mask for cr0. +hlfpu_afFlags: + RTCCPTR_DEF ~(X86_CR0_TS | X86_CR0_MP) + RTCCPTR_DEF ~(X86_CR0_TS) + RTCCPTR_DEF ~(X86_CR0_TS | X86_CR0_MP) + RTCCPTR_DEF ~(X86_CR0_TS) + RTCCPTR_DEF ~(X86_CR0_MP) + RTCCPTR_DEF 0 + RTCCPTR_DEF ~(X86_CR0_MP) + RTCCPTR_DEF 0 + + ; + ; Action - switch FPU context and change cr0 flags. + ; +align 16 +hlfpua_switch_fpu_ctx: + ; Paranoia. This function was previously used in ring-0, not any longer. +%ifdef IN_RING3 +%error "This function is not written for ring-3" +%endif +%ifdef IN_RING0 +%error "This function is not written for ring-0" +%endif + + mov xCX, cr0 +%ifdef RT_ARCH_AMD64 + lea r8, [hlfpu_afFlags wrt rip] + and rcx, [rax*4 + r8] ; calc the new cr0 flags. +%else + and ecx, [eax*2 + hlfpu_afFlags] ; calc the new cr0 flags. +%endif + mov xAX, cr0 + and xAX, ~(X86_CR0_TS | X86_CR0_EM) + mov cr0, xAX ; clear flags so we don't trap here. +%ifndef RT_ARCH_AMD64 + mov eax, edx ; Calculate the PCPUM pointer + sub eax, [edx + CPUMCPU.offCPUM] + test dword [eax + CPUM.CPUFeatures.edx], X86_CPUID_FEATURE_EDX_FXSR + jz short hlfpua_no_fxsave +%endif + +%ifdef RT_ARCH_AMD64 + ; Use explicit REX prefix. See @bugref{6398}. + o64 fxsave [xDX + CPUMCPU.Host.fpu] +%else + fxsave [xDX + CPUMCPU.Host.fpu] +%endif + or dword [xDX + CPUMCPU.fUseFlags], (CPUM_USED_FPU | CPUM_USED_FPU_SINCE_REM) +%ifdef RT_ARCH_AMD64 + o64 fxrstor [xDX + CPUMCPU.Guest.fpu] +%else + fxrstor [xDX + CPUMCPU.Guest.fpu] +%endif +hlfpua_finished_switch: + + ; Load new CR0 value. + ;; @todo Optimize the many unconditional CR0 writes. + mov cr0, xCX ; load the new cr0 flags. + + ; return continue execution. + xor eax, eax + ret + +%ifndef RT_ARCH_AMD64 +; legacy support. +hlfpua_no_fxsave: + fnsave [xDX + CPUMCPU.Host.fpu] + or dword [xDX + CPUMCPU.fUseFlags], dword (CPUM_USED_FPU | CPUM_USED_FPU_SINCE_REM) ; yasm / nasm + mov eax, [xDX + CPUMCPU.Guest.fpu] ; control word + not eax ; 1 means exception ignored (6 LS bits) + and eax, byte 03Fh ; 6 LS bits only + test eax, [xDX + CPUMCPU.Guest.fpu + 4] ; status word + jz short hlfpua_no_exceptions_pending + ; technically incorrect, but we certainly don't want any exceptions now!! + and dword [xDX + CPUMCPU.Guest.fpu + 4], ~03Fh +hlfpua_no_exceptions_pending: + frstor [xDX + CPUMCPU.Guest.fpu] + jmp near hlfpua_finished_switch +%endif ; !RT_ARCH_AMD64 + + + ; + ; Action - Generate Guest trap. + ; +hlfpua_action_4: +hlfpua_to_host: + mov eax, VINF_EM_RAW_GUEST_TRAP + ret +ENDPROC cpumHandleLazyFPUAsm + ;; ; Calls a guest trap/interrupt handler directly |
