diff options
| author | Lorry Tar Creator <lorry-tar-importer@baserock.org> | 2014-03-26 19:21:20 +0000 |
|---|---|---|
| committer | <> | 2014-05-08 15:03:54 +0000 |
| commit | fb123f93f9f5ce42c8e5785d2f8e0edaf951740e (patch) | |
| tree | c2103d76aec5f1f10892cd1d3a38e24f665ae5db /src/VBox/VMM/VMMR0 | |
| parent | 58ed4748338f9466599adfc8a9171280ed99e23f (diff) | |
| download | VirtualBox-master.tar.gz | |
Imported from /home/lorry/working-area/delta_VirtualBox/VirtualBox-4.3.10.tar.bz2.HEADVirtualBox-4.3.10master
Diffstat (limited to 'src/VBox/VMM/VMMR0')
31 files changed, 18743 insertions, 11419 deletions
diff --git a/src/VBox/VMM/VMMR0/CPUMR0.cpp b/src/VBox/VMM/VMMR0/CPUMR0.cpp index 4fbebab4..d4289388 100644 --- a/src/VBox/VMM/VMMR0/CPUMR0.cpp +++ b/src/VBox/VMM/VMMR0/CPUMR0.cpp @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2006-2011 Oracle Corporation + * Copyright (C) 2006-2013 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -25,7 +25,7 @@ #include <VBox/vmm/vm.h> #include <VBox/err.h> #include <VBox/log.h> -#include <VBox/vmm/hwaccm.h> +#include <VBox/vmm/hm.h> #include <iprt/assert.h> #include <iprt/asm-amd64-x86.h> #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI @@ -47,6 +47,10 @@ typedef struct CPUMHOSTLAPIC { /** Indicates that the entry is in use and have valid data. */ bool fEnabled; + /** Whether it's operating in X2APIC mode (EXTD). */ + bool fX2Apic; + /** The APIC version number. */ + uint32_t uVersion; /** Has APIC_REG_LVT_THMR. Not used. */ uint32_t fHasThermal; /** The physical address of the APIC registers. */ @@ -71,6 +75,25 @@ typedef struct CPUMHOSTLAPIC static CPUMHOSTLAPIC g_aLApics[RTCPUSET_MAX_CPUS]; #endif +/** + * CPUID bits to unify among all cores. + */ +static struct +{ + uint32_t uLeaf; /**< Leaf to check. */ + uint32_t ecx; /**< which bits in ecx to unify between CPUs. */ + uint32_t edx; /**< which bits in edx to unify between CPUs. */ +} +const g_aCpuidUnifyBits[] = +{ + { + 0x00000001, + X86_CPUID_FEATURE_ECX_CX16 | X86_CPUID_FEATURE_ECX_MONITOR, + X86_CPUID_FEATURE_EDX_CX8 + } +}; + + /******************************************************************************* * Internal Functions * @@ -79,13 +102,14 @@ static CPUMHOSTLAPIC g_aLApics[RTCPUSET_MAX_CPUS]; static int cpumR0MapLocalApics(void); static void cpumR0UnmapLocalApics(void); #endif +static int cpumR0SaveHostDebugState(PVMCPU pVCpu); /** * Does the Ring-0 CPU initialization once during module load. * XXX Host-CPU hot-plugging? */ -VMMR0DECL(int) CPUMR0ModuleInit(void) +VMMR0_INT_DECL(int) CPUMR0ModuleInit(void) { int rc = VINF_SUCCESS; #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI @@ -98,7 +122,7 @@ VMMR0DECL(int) CPUMR0ModuleInit(void) /** * Terminate the module. */ -VMMR0DECL(int) CPUMR0ModuleTerm(void) +VMMR0_INT_DECL(int) CPUMR0ModuleTerm(void) { #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI cpumR0UnmapLocalApics(); @@ -108,6 +132,52 @@ VMMR0DECL(int) CPUMR0ModuleTerm(void) /** + * + * + * Check the CPUID features of this particular CPU and disable relevant features + * for the guest which do not exist on this CPU. We have seen systems where the + * X86_CPUID_FEATURE_ECX_MONITOR feature flag is only set on some host CPUs, see + * @bugref{5436}. + * + * @note This function might be called simultaneously on more than one CPU! + * + * @param idCpu The identifier for the CPU the function is called on. + * @param pvUser1 Pointer to the VM structure. + * @param pvUser2 Ignored. + */ +static DECLCALLBACK(void) cpumR0CheckCpuid(RTCPUID idCpu, void *pvUser1, void *pvUser2) +{ + PVM pVM = (PVM)pvUser1; + PCPUM pCPUM = &pVM->cpum.s; + + NOREF(idCpu); NOREF(pvUser2); + for (uint32_t i = 0; i < RT_ELEMENTS(g_aCpuidUnifyBits); i++) + { + /* Note! Cannot use cpumCpuIdGetLeaf from here because we're not + necessarily in the VM process context. So, we using the + legacy arrays as temporary storage. */ + + uint32_t uLeaf = g_aCpuidUnifyBits[i].uLeaf; + PCPUMCPUID pLegacyLeaf; + if (uLeaf < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdStd)) + pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdStd[uLeaf]; + else if (uLeaf - UINT32_C(0x80000000) < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdExt)) + pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdExt[uLeaf - UINT32_C(0x80000000)]; + else if (uLeaf - UINT32_C(0xc0000000) < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdCentaur)) + pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdCentaur[uLeaf - UINT32_C(0xc0000000)]; + else + continue; + + uint32_t eax, ebx, ecx, edx; + ASMCpuIdExSlow(uLeaf, 0, 0, 0, &eax, &ebx, &ecx, &edx); + + ASMAtomicAndU32(&pLegacyLeaf->ecx, ecx | ~g_aCpuidUnifyBits[i].ecx); + ASMAtomicAndU32(&pLegacyLeaf->edx, edx | ~g_aCpuidUnifyBits[i].edx); + } +} + + +/** * Does Ring-0 CPUM initialization. * * This is mainly to check that the Host CPU mode is compatible @@ -116,7 +186,7 @@ VMMR0DECL(int) CPUMR0ModuleTerm(void) * @returns VBox status code. * @param pVM Pointer to the VM. */ -VMMR0DECL(int) CPUMR0Init(PVM pVM) +VMMR0_INT_DECL(int) CPUMR0InitVM(PVM pVM) { LogFlow(("CPUMR0Init: %p\n", pVM)); @@ -150,9 +220,9 @@ VMMR0DECL(int) CPUMR0Init(PVM pVM) uint32_t u32Dummy; uint32_t fFeatures; ASMCpuId(1, &u32CpuVersion, &u32Dummy, &u32Dummy, &fFeatures); - uint32_t u32Family = u32CpuVersion >> 8; - uint32_t u32Model = (u32CpuVersion >> 4) & 0xF; - uint32_t u32Stepping = u32CpuVersion & 0xF; + uint32_t const u32Family = u32CpuVersion >> 8; + uint32_t const u32Model = (u32CpuVersion >> 4) & 0xF; + uint32_t const u32Stepping = u32CpuVersion & 0xF; if ( (fFeatures & X86_CPUID_FEATURE_EDX_SEP) && ( u32Family != 6 /* (> pentium pro) */ || u32Model >= 3 @@ -181,8 +251,7 @@ VMMR0DECL(int) CPUMR0Init(PVM pVM) */ uint32_t cExt = 0; ASMCpuId(0x80000000, &cExt, &u32Dummy, &u32Dummy, &u32Dummy); - if ( cExt >= 0x80000001 - && cExt <= 0x8000ffff) + if (ASMIsValidExtRange(cExt)) { uint32_t fExtFeaturesEDX = ASMCpuId_EDX(0x80000001); if (fExtFeaturesEDX & X86_CPUID_EXT_FEATURE_EDX_SYSCALL) @@ -204,6 +273,39 @@ VMMR0DECL(int) CPUMR0Init(PVM pVM) } } } + + /* + * Unify/cross check some CPUID feature bits on all available CPU cores + * and threads. We've seen CPUs where the monitor support differed. + * + * Because the hyper heap isn't always mapped into ring-0, we cannot + * access it from a RTMpOnAll callback. We use the legacy CPUID arrays + * as temp ring-0 accessible memory instead, ASSUMING that they're all + * up to date when we get here. + */ + RTMpOnAll(cpumR0CheckCpuid, pVM, NULL); + + for (uint32_t i = 0; i < RT_ELEMENTS(g_aCpuidUnifyBits); i++) + { + uint32_t uLeaf = g_aCpuidUnifyBits[i].uLeaf; + PCPUMCPUIDLEAF pLeaf = cpumCpuIdGetLeaf(pVM, uLeaf, 0); + if (pLeaf) + { + PCPUMCPUID pLegacyLeaf; + if (uLeaf < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdStd)) + pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdStd[uLeaf]; + else if (uLeaf - UINT32_C(0x80000000) < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdExt)) + pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdExt[uLeaf - UINT32_C(0x80000000)]; + else if (uLeaf - UINT32_C(0xc0000000) < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdCentaur)) + pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdCentaur[uLeaf - UINT32_C(0xc0000000)]; + else + continue; + + pLeaf->uEcx = pLegacyLeaf->ecx; + pLeaf->uEdx = pLegacyLeaf->edx; + } + } + } @@ -224,23 +326,27 @@ VMMR0DECL(int) CPUMR0Init(PVM pVM) /** - * Lazily sync in the FPU/XMM state + * Trap handler for device-not-available fault (#NM). + * Device not available, FP or (F)WAIT instruction. * * @returns VBox status code. + * @retval VINF_SUCCESS if the guest FPU state is loaded. + * @retval VINF_EM_RAW_GUEST_TRAP if it is a guest trap. + * * @param pVM Pointer to the VM. * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. + * @param pCtx Pointer to the guest-CPU context. */ -VMMR0DECL(int) CPUMR0LoadGuestFPU(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +VMMR0_INT_DECL(int) CPUMR0Trap07Handler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) { Assert(pVM->cpum.s.CPUFeatures.edx.u1FXSR); Assert(ASMGetCR4() & X86_CR4_OSFSXR); /* If the FPU state has already been loaded, then it's a guest trap. */ - if (pVCpu->cpum.s.fUseFlags & CPUM_USED_FPU) + if (CPUMIsGuestFPUStateActive(pVCpu)) { - Assert( ((pCtx->cr0 & (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS)) == (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS)) - || ((pCtx->cr0 & (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS)) == (X86_CR0_MP | X86_CR0_TS))); + Assert( ((pCtx->cr0 & (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS)) == (X86_CR0_MP | X86_CR0_TS)) + || ((pCtx->cr0 & (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS)) == (X86_CR0_MP | X86_CR0_TS | X86_CR0_EM))); return VINF_EM_RAW_GUEST_TRAP; } @@ -272,12 +378,28 @@ VMMR0DECL(int) CPUMR0LoadGuestFPU(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) switch (pCtx->cr0 & (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS)) { case X86_CR0_MP | X86_CR0_TS: - case X86_CR0_MP | X86_CR0_EM | X86_CR0_TS: + case X86_CR0_MP | X86_CR0_TS | X86_CR0_EM: return VINF_EM_RAW_GUEST_TRAP; default: break; } + return CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx); +} + + +/** + * Saves the host-FPU/XMM state and loads the guest-FPU state into the CPU. + * + * @returns VBox status code. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + */ +VMMR0_INT_DECL(int) CPUMR0LoadGuestFPU(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); #if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) if (CPUMIsGuestInLongModeEx(pCtx)) { @@ -286,27 +408,27 @@ VMMR0DECL(int) CPUMR0LoadGuestFPU(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) /* Save the host state and record the fact (CPUM_USED_FPU | CPUM_USED_FPU_SINCE_REM). */ cpumR0SaveHostFPUState(&pVCpu->cpum.s); - /* Restore the state on entry as we need to be in 64 bits mode to access the full state. */ + /* Restore the state on entry as we need to be in 64-bit mode to access the full state. */ pVCpu->cpum.s.fUseFlags |= CPUM_SYNC_FPU_STATE; } else #endif { -#ifndef CPUM_CAN_HANDLE_NM_TRAPS_IN_KERNEL_MODE -# if defined(VBOX_WITH_HYBRID_32BIT_KERNEL) || defined(VBOX_WITH_KERNEL_USING_XMM) /** @todo remove the #else here and move cpumHandleLazyFPUAsm back to VMMGC after branching out 3.0!!. */ - Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_MANUAL_XMM_RESTORE)); + Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE)); /** @todo Move the FFXR handling down into - * cpumR0SaveHostRestoreguestFPUState to optimize the + * cpumR0SaveHostRestoreGuestFPUState to optimize the * VBOX_WITH_KERNEL_USING_XMM handling. */ /* Clear MSR_K6_EFER_FFXSR or else we'll be unable to save/restore the XMM state with fxsave/fxrstor. */ - uint64_t SavedEFER = 0; + uint64_t uHostEfer = 0; + bool fRestoreEfer = false; if (pVM->cpum.s.CPUFeaturesExt.edx & X86_CPUID_AMD_FEATURE_EDX_FFXSR) { - SavedEFER = ASMRdMsr(MSR_K6_EFER); - if (SavedEFER & MSR_K6_EFER_FFXSR) + uHostEfer = ASMRdMsr(MSR_K6_EFER); + if (uHostEfer & MSR_K6_EFER_FFXSR) { - ASMWrMsr(MSR_K6_EFER, SavedEFER & ~MSR_K6_EFER_FFXSR); - pVCpu->cpum.s.fUseFlags |= CPUM_MANUAL_XMM_RESTORE; + ASMWrMsr(MSR_K6_EFER, uHostEfer & ~MSR_K6_EFER_FFXSR); + pVCpu->cpum.s.fUseFlags |= CPUM_USED_MANUAL_XMM_RESTORE; + fRestoreEfer = true; } } @@ -314,71 +436,8 @@ VMMR0DECL(int) CPUMR0LoadGuestFPU(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) cpumR0SaveHostRestoreGuestFPUState(&pVCpu->cpum.s); /* Restore EFER. */ - if (pVCpu->cpum.s.fUseFlags & CPUM_MANUAL_XMM_RESTORE) - ASMWrMsr(MSR_K6_EFER, SavedEFER); - -# else - uint64_t oldMsrEFERHost = 0; - uint32_t oldCR0 = ASMGetCR0(); - - /* Clear MSR_K6_EFER_FFXSR or else we'll be unable to save/restore the XMM state with fxsave/fxrstor. */ - if (pVM->cpum.s.CPUFeaturesExt.edx & X86_CPUID_AMD_FEATURE_EDX_FFXSR) - { - /** @todo Do we really need to read this every time?? The host could change this on the fly though. - * bird: what about starting by skipping the ASMWrMsr below if we didn't - * change anything? Ditto for the stuff in CPUMR0SaveGuestFPU. */ - oldMsrEFERHost = ASMRdMsr(MSR_K6_EFER); - if (oldMsrEFERHost & MSR_K6_EFER_FFXSR) - { - ASMWrMsr(MSR_K6_EFER, oldMsrEFERHost & ~MSR_K6_EFER_FFXSR); - pVCpu->cpum.s.fUseFlags |= CPUM_MANUAL_XMM_RESTORE; - } - } - - /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */ - int rc = CPUMHandleLazyFPU(pVCpu); - AssertRC(rc); - Assert(CPUMIsGuestFPUStateActive(pVCpu)); - - /* Restore EFER MSR */ - if (pVCpu->cpum.s.fUseFlags & CPUM_MANUAL_XMM_RESTORE) - ASMWrMsr(MSR_K6_EFER, oldMsrEFERHost); - - /* CPUMHandleLazyFPU could have changed CR0; restore it. */ - ASMSetCR0(oldCR0); -# endif - -#else /* CPUM_CAN_HANDLE_NM_TRAPS_IN_KERNEL_MODE */ - - /* - * Save the FPU control word and MXCSR, so we can restore the state properly afterwards. - * We don't want the guest to be able to trigger floating point/SSE exceptions on the host. - */ - pVCpu->cpum.s.Host.fpu.FCW = CPUMGetFCW(); - if (pVM->cpum.s.CPUFeatures.edx.u1SSE) - pVCpu->cpum.s.Host.fpu.MXCSR = CPUMGetMXCSR(); - - cpumR0LoadFPU(pCtx); - - /* - * The MSR_K6_EFER_FFXSR feature is AMD only so far, but check the cpuid just in case Intel adds it in the future. - * - * MSR_K6_EFER_FFXSR changes the behaviour of fxsave and fxrstore: the XMM state isn't saved/restored - */ - if (pVM->cpum.s.CPUFeaturesExt.edx & X86_CPUID_AMD_FEATURE_EDX_FFXSR) - { - /** @todo Do we really need to read this every time?? The host could change this on the fly though. */ - uint64_t msrEFERHost = ASMRdMsr(MSR_K6_EFER); - - if (msrEFERHost & MSR_K6_EFER_FFXSR) - { - /* fxrstor doesn't restore the XMM state! */ - cpumR0LoadXMM(pCtx); - pVCpu->cpum.s.fUseFlags |= CPUM_MANUAL_XMM_RESTORE; - } - } - -#endif /* CPUM_CAN_HANDLE_NM_TRAPS_IN_KERNEL_MODE */ + if (fRestoreEfer) + ASMWrMsr(MSR_K6_EFER, uHostEfer); } Assert((pVCpu->cpum.s.fUseFlags & (CPUM_USED_FPU | CPUM_USED_FPU_SINCE_REM)) == (CPUM_USED_FPU | CPUM_USED_FPU_SINCE_REM)); @@ -394,7 +453,7 @@ VMMR0DECL(int) CPUMR0LoadGuestFPU(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) * @param pVCpu Pointer to the VMCPU. * @param pCtx Pointer to the guest CPU context. */ -VMMR0DECL(int) CPUMR0SaveGuestFPU(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +VMMR0_INT_DECL(int) CPUMR0SaveGuestFPU(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) { Assert(pVM->cpum.s.CPUFeatures.edx.u1FXSR); Assert(ASMGetCR4() & X86_CR4_OSFSXR); @@ -406,7 +465,7 @@ VMMR0DECL(int) CPUMR0SaveGuestFPU(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) { if (!(pVCpu->cpum.s.fUseFlags & CPUM_SYNC_FPU_STATE)) { - HWACCMR0SaveFPUState(pVM, pVCpu, pCtx); + HMR0SaveFPUState(pVM, pVCpu, pCtx); cpumR0RestoreHostFPUState(&pVCpu->cpum.s); } /* else nothing to do; we didn't perform a world switch */ @@ -414,8 +473,7 @@ VMMR0DECL(int) CPUMR0SaveGuestFPU(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) else #endif { -#ifndef CPUM_CAN_HANDLE_NM_TRAPS_IN_KERNEL_MODE -# ifdef VBOX_WITH_KERNEL_USING_XMM +#ifdef VBOX_WITH_KERNEL_USING_XMM /* * We've already saved the XMM registers in the assembly wrapper, so * we have to save them before saving the entire FPU state and put them @@ -426,203 +484,244 @@ VMMR0DECL(int) CPUMR0SaveGuestFPU(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) * We could just all this in assembly. */ uint128_t aGuestXmmRegs[16]; memcpy(&aGuestXmmRegs[0], &pVCpu->cpum.s.Guest.fpu.aXMM[0], sizeof(aGuestXmmRegs)); -# endif +#endif /* Clear MSR_K6_EFER_FFXSR or else we'll be unable to save/restore the XMM state with fxsave/fxrstor. */ - uint64_t oldMsrEFERHost = 0; - if (pVCpu->cpum.s.fUseFlags & CPUM_MANUAL_XMM_RESTORE) + uint64_t uHostEfer = 0; + bool fRestoreEfer = false; + if (pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE) { - oldMsrEFERHost = ASMRdMsr(MSR_K6_EFER); - ASMWrMsr(MSR_K6_EFER, oldMsrEFERHost & ~MSR_K6_EFER_FFXSR); + uHostEfer = ASMRdMsr(MSR_K6_EFER); + if (uHostEfer & MSR_K6_EFER_FFXSR) + { + ASMWrMsr(MSR_K6_EFER, uHostEfer & ~MSR_K6_EFER_FFXSR); + fRestoreEfer = true; + } } + cpumR0SaveGuestRestoreHostFPUState(&pVCpu->cpum.s); /* Restore EFER MSR */ - if (pVCpu->cpum.s.fUseFlags & CPUM_MANUAL_XMM_RESTORE) - ASMWrMsr(MSR_K6_EFER, oldMsrEFERHost | MSR_K6_EFER_FFXSR); + if (fRestoreEfer) + ASMWrMsr(MSR_K6_EFER, uHostEfer | MSR_K6_EFER_FFXSR); -# ifdef VBOX_WITH_KERNEL_USING_XMM +#ifdef VBOX_WITH_KERNEL_USING_XMM memcpy(&pVCpu->cpum.s.Guest.fpu.aXMM[0], &aGuestXmmRegs[0], sizeof(aGuestXmmRegs)); -# endif - -#else /* CPUM_CAN_HANDLE_NM_TRAPS_IN_KERNEL_MODE */ -# ifdef VBOX_WITH_KERNEL_USING_XMM -# error "Fix all the NM_TRAPS_IN_KERNEL_MODE code path. I'm not going to fix unused code now." -# endif - cpumR0SaveFPU(pCtx); - if (pVCpu->cpum.s.fUseFlags & CPUM_MANUAL_XMM_RESTORE) - { - /* fxsave doesn't save the XMM state! */ - cpumR0SaveXMM(pCtx); - } - - /* - * Restore the original FPU control word and MXCSR. - * We don't want the guest to be able to trigger floating point/SSE exceptions on the host. - */ - cpumR0SetFCW(pVCpu->cpum.s.Host.fpu.FCW); - if (pVM->cpum.s.CPUFeatures.edx.u1SSE) - cpumR0SetMXCSR(pVCpu->cpum.s.Host.fpu.MXCSR); -#endif /* CPUM_CAN_HANDLE_NM_TRAPS_IN_KERNEL_MODE */ +#endif } - pVCpu->cpum.s.fUseFlags &= ~(CPUM_USED_FPU | CPUM_SYNC_FPU_STATE | CPUM_MANUAL_XMM_RESTORE); + pVCpu->cpum.s.fUseFlags &= ~(CPUM_USED_FPU | CPUM_SYNC_FPU_STATE | CPUM_USED_MANUAL_XMM_RESTORE); return VINF_SUCCESS; } /** - * Save guest debug state + * Saves the host debug state, setting CPUM_USED_HOST_DEBUG_STATE and loading + * DR7 with safe values. * * @returns VBox status code. - * @param pVM Pointer to the VM. * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - * @param fDR6 Whether to include DR6 or not. */ -VMMR0DECL(int) CPUMR0SaveGuestDebugState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, bool fDR6) +static int cpumR0SaveHostDebugState(PVMCPU pVCpu) { - Assert(pVCpu->cpum.s.fUseFlags & CPUM_USE_DEBUG_REGS); - - /* Save the guest's debug state. The caller is responsible for DR7. */ -#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) - if (CPUMIsGuestInLongModeEx(pCtx)) - { - if (!(pVCpu->cpum.s.fUseFlags & CPUM_SYNC_DEBUG_STATE)) - { - uint64_t dr6 = pCtx->dr[6]; - - HWACCMR0SaveDebugState(pVM, pVCpu, pCtx); - if (!fDR6) /* dr6 was already up-to-date */ - pCtx->dr[6] = dr6; - } - } - else -#endif - { + /* + * Save the host state. + */ #ifdef VBOX_WITH_HYBRID_32BIT_KERNEL - cpumR0SaveDRx(&pCtx->dr[0]); + AssertCompile((uintptr_t)&pVCpu->cpum.s.Host.dr3 - (uintptr_t)&pVCpu->cpum.s.Host.dr0 == sizeof(uint64_t) * 3); + cpumR0SaveDRx(&pVCpu->cpum.s.Host.dr0); #else - pCtx->dr[0] = ASMGetDR0(); - pCtx->dr[1] = ASMGetDR1(); - pCtx->dr[2] = ASMGetDR2(); - pCtx->dr[3] = ASMGetDR3(); + pVCpu->cpum.s.Host.dr0 = ASMGetDR0(); + pVCpu->cpum.s.Host.dr1 = ASMGetDR1(); + pVCpu->cpum.s.Host.dr2 = ASMGetDR2(); + pVCpu->cpum.s.Host.dr3 = ASMGetDR3(); #endif - if (fDR6) - pCtx->dr[6] = ASMGetDR6(); - } + pVCpu->cpum.s.Host.dr6 = ASMGetDR6(); + /** @todo dr7 might already have been changed to 0x400; don't care right now as it's harmless. */ + pVCpu->cpum.s.Host.dr7 = ASMGetDR7(); + + /* Preemption paranoia. */ + ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_USED_DEBUG_REGS_HOST); /* - * Restore the host's debug state. DR0-3, DR6 and only then DR7! - * DR7 contains 0x400 right now. + * Make sure DR7 is harmless or else we could trigger breakpoints when + * load guest or hypervisor DRx values later. */ - CPUMR0LoadHostDebugState(pVM, pVCpu); - Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_USE_DEBUG_REGS)); + if (pVCpu->cpum.s.Host.dr7 != X86_DR7_INIT_VAL) + ASMSetDR7(X86_DR7_INIT_VAL); + return VINF_SUCCESS; } /** - * Lazily sync in the debug state + * Saves the guest DRx state residing in host registers and restore the host + * register values. * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - * @param fDR6 Whether to include DR6 or not. + * The guest DRx state is only saved if CPUMR0LoadGuestDebugState was called, + * since it's assumed that we're shadowing the guest DRx register values + * accurately when using the combined hypervisor debug register values + * (CPUMR0LoadHyperDebugState). + * + * @returns true if either guest or hypervisor debug registers were loaded. + * @param pVCpu The cross context CPU structure for the calling EMT. + * @param fDr6 Whether to include DR6 or not. + * @thread EMT(pVCpu) */ -VMMR0DECL(int) CPUMR0LoadGuestDebugState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, bool fDR6) +VMMR0_INT_DECL(bool) CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(PVMCPU pVCpu, bool fDr6) { - /* Save the host state. */ - CPUMR0SaveHostDebugState(pVM, pVCpu); - Assert(ASMGetDR7() == X86_DR7_INIT_VAL); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + bool const fDrXLoaded = RT_BOOL(pVCpu->cpum.s.fUseFlags & (CPUM_USED_DEBUG_REGS_GUEST | CPUM_USED_DEBUG_REGS_HYPER)); - /* Activate the guest state DR0-3; DR7 is left to the caller. */ -#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) - if (CPUMIsGuestInLongModeEx(pCtx)) + /* + * Do we need to save the guest DRx registered loaded into host registers? + * (DR7 and DR6 (if fDr6 is true) are left to the caller.) + */ + if (pVCpu->cpum.s.fUseFlags & CPUM_USED_DEBUG_REGS_GUEST) { - /* Restore the state on entry as we need to be in 64 bits mode to access the full state. */ - pVCpu->cpum.s.fUseFlags |= CPUM_SYNC_DEBUG_STATE; - } - else +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.s.Guest)) + { + uint64_t uDr6 = pVCpu->cpum.s.Guest.dr[6]; + HMR0SaveDebugState(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->cpum.s.Guest); + if (!fDr6) + pVCpu->cpum.s.Guest.dr[6] = uDr6; + } + else +#endif + { +#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL + cpumR0SaveDRx(&pVCpu->cpum.s.Guest.dr[0]); +#else + pVCpu->cpum.s.Guest.dr[0] = ASMGetDR0(); + pVCpu->cpum.s.Guest.dr[1] = ASMGetDR1(); + pVCpu->cpum.s.Guest.dr[2] = ASMGetDR2(); + pVCpu->cpum.s.Guest.dr[3] = ASMGetDR3(); #endif + if (fDr6) + pVCpu->cpum.s.Guest.dr[6] = ASMGetDR6(); + } + } + ASMAtomicAndU32(&pVCpu->cpum.s.fUseFlags, ~( CPUM_USED_DEBUG_REGS_GUEST | CPUM_USED_DEBUG_REGS_HYPER + | CPUM_SYNC_DEBUG_REGS_GUEST | CPUM_SYNC_DEBUG_REGS_HYPER)); + + /* + * Restore the host's debug state. DR0-3, DR6 and only then DR7! + */ + if (pVCpu->cpum.s.fUseFlags & CPUM_USED_DEBUG_REGS_HOST) { + /* A bit of paranoia first... */ + uint64_t uCurDR7 = ASMGetDR7(); + if (uCurDR7 != X86_DR7_INIT_VAL) + ASMSetDR7(X86_DR7_INIT_VAL); + #ifdef VBOX_WITH_HYBRID_32BIT_KERNEL - cpumR0LoadDRx(&pCtx->dr[0]); + AssertCompile((uintptr_t)&pVCpu->cpum.s.Host.dr3 - (uintptr_t)&pVCpu->cpum.s.Host.dr0 == sizeof(uint64_t) * 3); + cpumR0LoadDRx(&pVCpu->cpum.s.Host.dr0); #else - ASMSetDR0(pCtx->dr[0]); - ASMSetDR1(pCtx->dr[1]); - ASMSetDR2(pCtx->dr[2]); - ASMSetDR3(pCtx->dr[3]); + ASMSetDR0(pVCpu->cpum.s.Host.dr0); + ASMSetDR1(pVCpu->cpum.s.Host.dr1); + ASMSetDR2(pVCpu->cpum.s.Host.dr2); + ASMSetDR3(pVCpu->cpum.s.Host.dr3); #endif - if (fDR6) - ASMSetDR6(pCtx->dr[6]); + /** @todo consider only updating if they differ, esp. DR6. Need to figure how + * expensive DRx reads are over DRx writes. */ + ASMSetDR6(pVCpu->cpum.s.Host.dr6); + ASMSetDR7(pVCpu->cpum.s.Host.dr7); + + ASMAtomicAndU32(&pVCpu->cpum.s.fUseFlags, ~CPUM_USED_DEBUG_REGS_HOST); } - pVCpu->cpum.s.fUseFlags |= CPUM_USE_DEBUG_REGS; - return VINF_SUCCESS; + return fDrXLoaded; } + /** - * Save the host debug state + * Saves the guest DRx state if it resides host registers. * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. + * This does NOT clear any use flags, so the host registers remains loaded with + * the guest DRx state upon return. The purpose is only to make sure the values + * in the CPU context structure is up to date. + * + * @returns true if the host registers contains guest values, false if not. + * @param pVCpu The cross context CPU structure for the calling EMT. + * @param fDr6 Whether to include DR6 or not. + * @thread EMT(pVCpu) */ -VMMR0DECL(int) CPUMR0SaveHostDebugState(PVM pVM, PVMCPU pVCpu) +VMMR0_INT_DECL(bool) CPUMR0DebugStateMaybeSaveGuest(PVMCPU pVCpu, bool fDr6) { - NOREF(pVM); - - /* Save the host state. */ + /* + * Do we need to save the guest DRx registered loaded into host registers? + * (DR7 and DR6 (if fDr6 is true) are left to the caller.) + */ + if (pVCpu->cpum.s.fUseFlags & CPUM_USED_DEBUG_REGS_GUEST) + { +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.s.Guest)) + { + uint64_t uDr6 = pVCpu->cpum.s.Guest.dr[6]; + HMR0SaveDebugState(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->cpum.s.Guest); + if (!fDr6) + pVCpu->cpum.s.Guest.dr[6] = uDr6; + } + else +#endif + { #ifdef VBOX_WITH_HYBRID_32BIT_KERNEL - AssertCompile((uintptr_t)&pVCpu->cpum.s.Host.dr3 - (uintptr_t)&pVCpu->cpum.s.Host.dr0 == sizeof(uint64_t) * 3); - cpumR0SaveDRx(&pVCpu->cpum.s.Host.dr0); + cpumR0SaveDRx(&pVCpu->cpum.s.Guest.dr[0]); #else - pVCpu->cpum.s.Host.dr0 = ASMGetDR0(); - pVCpu->cpum.s.Host.dr1 = ASMGetDR1(); - pVCpu->cpum.s.Host.dr2 = ASMGetDR2(); - pVCpu->cpum.s.Host.dr3 = ASMGetDR3(); + pVCpu->cpum.s.Guest.dr[0] = ASMGetDR0(); + pVCpu->cpum.s.Guest.dr[1] = ASMGetDR1(); + pVCpu->cpum.s.Guest.dr[2] = ASMGetDR2(); + pVCpu->cpum.s.Guest.dr[3] = ASMGetDR3(); #endif - pVCpu->cpum.s.Host.dr6 = ASMGetDR6(); - /** @todo dr7 might already have been changed to 0x400; don't care right now as it's harmless. */ - pVCpu->cpum.s.Host.dr7 = ASMGetDR7(); - /* Make sure DR7 is harmless or else we could trigger breakpoints when restoring dr0-3 (!) */ - ASMSetDR7(X86_DR7_INIT_VAL); - - return VINF_SUCCESS; + if (fDr6) + pVCpu->cpum.s.Guest.dr[6] = ASMGetDR6(); + } + return true; + } + return false; } + /** - * Load the host debug state + * Lazily sync in the debug state. * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. + * @param pVCpu The cross context CPU structure for the calling EMT. + * @param fDr6 Whether to include DR6 or not. + * @thread EMT(pVCpu) */ -VMMR0DECL(int) CPUMR0LoadHostDebugState(PVM pVM, PVMCPU pVCpu) +VMMR0_INT_DECL(void) CPUMR0LoadGuestDebugState(PVMCPU pVCpu, bool fDr6) { - Assert(pVCpu->cpum.s.fUseFlags & (CPUM_USE_DEBUG_REGS | CPUM_USE_DEBUG_REGS_HYPER)); - NOREF(pVM); + /* + * Save the host state and disarm all host BPs. + */ + cpumR0SaveHostDebugState(pVCpu); + Assert(ASMGetDR7() == X86_DR7_INIT_VAL); /* - * Restore the host's debug state. DR0-3, DR6 and only then DR7! - * DR7 contains 0x400 right now. + * Activate the guest state DR0-3. + * DR7 and DR6 (if fDr6 is true) are left to the caller. */ +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.s.Guest)) + ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_SYNC_DEBUG_REGS_GUEST); /* Postpone it to the world switch. */ + else +#endif + { #ifdef VBOX_WITH_HYBRID_32BIT_KERNEL - AssertCompile((uintptr_t)&pVCpu->cpum.s.Host.dr3 - (uintptr_t)&pVCpu->cpum.s.Host.dr0 == sizeof(uint64_t) * 3); - cpumR0LoadDRx(&pVCpu->cpum.s.Host.dr0); + cpumR0LoadDRx(&pVCpu->cpum.s.Guest.dr[0]); #else - ASMSetDR0(pVCpu->cpum.s.Host.dr0); - ASMSetDR1(pVCpu->cpum.s.Host.dr1); - ASMSetDR2(pVCpu->cpum.s.Host.dr2); - ASMSetDR3(pVCpu->cpum.s.Host.dr3); + ASMSetDR0(pVCpu->cpum.s.Guest.dr[0]); + ASMSetDR1(pVCpu->cpum.s.Guest.dr[1]); + ASMSetDR2(pVCpu->cpum.s.Guest.dr[2]); + ASMSetDR3(pVCpu->cpum.s.Guest.dr[3]); #endif - ASMSetDR6(pVCpu->cpum.s.Host.dr6); - ASMSetDR7(pVCpu->cpum.s.Host.dr7); + if (fDr6) + ASMSetDR6(pVCpu->cpum.s.Guest.dr[6]); - pVCpu->cpum.s.fUseFlags &= ~(CPUM_USE_DEBUG_REGS | CPUM_USE_DEBUG_REGS_HYPER); - return VINF_SUCCESS; + ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_USED_DEBUG_REGS_GUEST); + } } @@ -630,83 +729,89 @@ VMMR0DECL(int) CPUMR0LoadHostDebugState(PVM pVM, PVMCPU pVCpu) * Lazily sync in the hypervisor debug state * * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - * @param fDR6 Whether to include DR6 or not. + * @param pVCpu The cross context CPU structure for the calling EMT. + * @param fDr6 Whether to include DR6 or not. + * @thread EMT(pVCpu) */ -VMMR0DECL(int) CPUMR0LoadHyperDebugState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, bool fDR6) +VMMR0_INT_DECL(void) CPUMR0LoadHyperDebugState(PVMCPU pVCpu, bool fDr6) { - NOREF(pCtx); - - /* Save the host state. */ - CPUMR0SaveHostDebugState(pVM, pVCpu); + /* + * Save the host state and disarm all host BPs. + */ + cpumR0SaveHostDebugState(pVCpu); Assert(ASMGetDR7() == X86_DR7_INIT_VAL); - /* Activate the guest state DR0-3; DR7 is left to the caller. */ + /* + * Make sure the hypervisor values are up to date. + */ + CPUMRecalcHyperDRx(pVCpu, UINT8_MAX /* no loading, please */, true); + + /* + * Activate the guest state DR0-3. + * DR7 and DR6 (if fDr6 is true) are left to the caller. + */ #if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) - if (CPUMIsGuestInLongModeEx(pCtx)) - { - AssertFailed(); - return VERR_NOT_IMPLEMENTED; - } + if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.s.Guest)) + ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_SYNC_DEBUG_REGS_HYPER); /* Postpone it. */ else #endif { #ifdef VBOX_WITH_HYBRID_32BIT_KERNEL - AssertFailed(); - return VERR_NOT_IMPLEMENTED; + cpumR0LoadDRx(&pVCpu->cpum.s.Hyper.dr[0]); #else - ASMSetDR0(CPUMGetHyperDR0(pVCpu)); - ASMSetDR1(CPUMGetHyperDR1(pVCpu)); - ASMSetDR2(CPUMGetHyperDR2(pVCpu)); - ASMSetDR3(CPUMGetHyperDR3(pVCpu)); + ASMSetDR0(pVCpu->cpum.s.Hyper.dr[0]); + ASMSetDR1(pVCpu->cpum.s.Hyper.dr[1]); + ASMSetDR2(pVCpu->cpum.s.Hyper.dr[2]); + ASMSetDR3(pVCpu->cpum.s.Hyper.dr[3]); #endif - if (fDR6) - ASMSetDR6(CPUMGetHyperDR6(pVCpu)); - } + if (fDr6) + ASMSetDR6(X86_DR6_INIT_VAL); - pVCpu->cpum.s.fUseFlags |= CPUM_USE_DEBUG_REGS_HYPER; - return VINF_SUCCESS; + ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_USED_DEBUG_REGS_HYPER); + } } #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI /** - * Worker for cpumR0MapLocalApics. Check each CPU for a present Local APIC. - * Play safe and treat each CPU separate. + * Per-CPU callback that probes the CPU for APIC support. + * + * @param idCpu The identifier for the CPU the function is called on. + * @param pvUser1 Ignored. + * @param pvUser2 Ignored. */ -static DECLCALLBACK(void) cpumR0MapLocalApicWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2) +static DECLCALLBACK(void) cpumR0MapLocalApicCpuProber(RTCPUID idCpu, void *pvUser1, void *pvUser2) { NOREF(pvUser1); NOREF(pvUser2); int iCpu = RTMpCpuIdToSetIndex(idCpu); AssertReturnVoid(iCpu >= 0 && (unsigned)iCpu < RT_ELEMENTS(g_aLApics)); - uint32_t u32MaxIdx, u32EBX, u32ECX, u32EDX; - ASMCpuId(0, &u32MaxIdx, &u32EBX, &u32ECX, &u32EDX); - if ( ( ( u32EBX == X86_CPUID_VENDOR_INTEL_EBX - && u32ECX == X86_CPUID_VENDOR_INTEL_ECX - && u32EDX == X86_CPUID_VENDOR_INTEL_EDX) - || ( u32EBX == X86_CPUID_VENDOR_AMD_EBX - && u32ECX == X86_CPUID_VENDOR_AMD_ECX - && u32EDX == X86_CPUID_VENDOR_AMD_EDX) - || ( u32EBX == X86_CPUID_VENDOR_VIA_EBX - && u32ECX == X86_CPUID_VENDOR_VIA_ECX - && u32EDX == X86_CPUID_VENDOR_VIA_EDX)) - && u32MaxIdx >= 1) + /* + * Check for APIC support. + */ + uint32_t uMaxLeaf, u32EBX, u32ECX, u32EDX; + ASMCpuId(0, &uMaxLeaf, &u32EBX, &u32ECX, &u32EDX); + if ( ( ASMIsIntelCpuEx(u32EBX, u32ECX, u32EDX) + || ASMIsAmdCpuEx(u32EBX, u32ECX, u32EDX) + || ASMIsViaCentaurCpuEx(u32EBX, u32ECX, u32EDX)) + && ASMIsValidStdRange(uMaxLeaf)) { - ASMCpuId(1, &u32MaxIdx, &u32EBX, &u32ECX, &u32EDX); + uint32_t uDummy; + ASMCpuId(1, &uDummy, &u32EBX, &u32ECX, &u32EDX); if ( (u32EDX & X86_CPUID_FEATURE_EDX_APIC) && (u32EDX & X86_CPUID_FEATURE_EDX_MSR)) { + /* + * Safe to access the MSR. Read it and calc the BASE (a little complicated). + */ uint64_t u64ApicBase = ASMRdMsr(MSR_IA32_APICBASE); - uint64_t u64Mask = UINT64_C(0x0000000ffffff000); + uint64_t u64Mask = MSR_IA32_APICBASE_BASE_MIN; /* see Intel Manual: Local APIC Status and Location: MAXPHYADDR default is bit 36 */ - uint32_t u32MaxExtIdx; - ASMCpuId(0x80000000, &u32MaxExtIdx, &u32EBX, &u32ECX, &u32EDX); - if ( u32MaxExtIdx >= UINT32_C(0x80000008) - && u32MaxExtIdx < UINT32_C(0x8000ffff)) + uint32_t uMaxExtLeaf; + ASMCpuId(0x80000000, &uMaxExtLeaf, &u32EBX, &u32ECX, &u32EDX); + if ( uMaxExtLeaf >= UINT32_C(0x80000008) + && ASMIsValidExtRange(uMaxExtLeaf)) { uint32_t u32PhysBits; ASMCpuId(0x80000008, &u32PhysBits, &u32EBX, &u32ECX, &u32EDX); @@ -714,14 +819,68 @@ static DECLCALLBACK(void) cpumR0MapLocalApicWorker(RTCPUID idCpu, void *pvUser1, u64Mask = ((UINT64_C(1) << u32PhysBits) - 1) & UINT64_C(0xfffffffffffff000); } - uint64_t const u64PhysBase = u64ApicBase & u64Mask; - g_aLApics[iCpu].PhysBase = (RTHCPHYS)u64PhysBase; - g_aLApics[iCpu].fEnabled = g_aLApics[iCpu].PhysBase == u64PhysBase; + AssertCompile(sizeof(g_aLApics[iCpu].PhysBase) == sizeof(u64ApicBase)); + g_aLApics[iCpu].PhysBase = u64ApicBase & u64Mask; + g_aLApics[iCpu].fEnabled = RT_BOOL(u64ApicBase & MSR_IA32_APICBASE_EN); + g_aLApics[iCpu].fX2Apic = (u64ApicBase & (MSR_IA32_APICBASE_EXTD | MSR_IA32_APICBASE_EN)) + == (MSR_IA32_APICBASE_EXTD | MSR_IA32_APICBASE_EN); } } } + +/** + * Per-CPU callback that verifies our APIC expectations. + * + * @param idCpu The identifier for the CPU the function is called on. + * @param pvUser1 Ignored. + * @param pvUser2 Ignored. + */ +static DECLCALLBACK(void) cpumR0MapLocalApicCpuChecker(RTCPUID idCpu, void *pvUser1, void *pvUser2) +{ + int iCpu = RTMpCpuIdToSetIndex(idCpu); + AssertReturnVoid(iCpu >= 0 && (unsigned)iCpu < RT_ELEMENTS(g_aLApics)); + if (!g_aLApics[iCpu].fEnabled) + return; + + /* + * 0x0X 82489 external APIC + * 0x1X Local APIC + * 0x2X..0xFF reserved + */ + uint32_t uApicVersion; + if (g_aLApics[iCpu].fX2Apic) + uApicVersion = ApicX2RegRead32(APIC_REG_VERSION); + else + uApicVersion = ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_VERSION); + if ((APIC_REG_VERSION_GET_VER(uApicVersion) & 0xF0) == 0x10) + { + g_aLApics[iCpu].uVersion = uApicVersion; + g_aLApics[iCpu].fHasThermal = APIC_REG_VERSION_GET_MAX_LVT(uApicVersion) >= 5; + +#if 0 /* enable if you need it. */ + if (g_aLApics[iCpu].fX2Apic) + SUPR0Printf("CPUM: X2APIC %02u - ver %#010x, lint0=%#07x lint1=%#07x pc=%#07x thmr=%#07x\n", + iCpu, uApicVersion, + ApicX2RegRead32(APIC_REG_LVT_LINT0), ApicX2RegRead32(APIC_REG_LVT_LINT1), + ApicX2RegRead32(APIC_REG_LVT_PC), ApicX2RegRead32(APIC_REG_LVT_THMR) ); + else + SUPR0Printf("CPUM: APIC %02u at %RGp (mapped at %p) - ver %#010x, lint0=%#07x lint1=%#07x pc=%#07x thmr=%#07x\n", + iCpu, g_aLApics[iCpu].PhysBase, g_aLApics[iCpu].pv, uApicVersion, + ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_LINT0), ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_LINT1), + ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_PC), ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_THMR) ); +#endif + } + else + { + g_aLApics[iCpu].fEnabled = false; + g_aLApics[iCpu].fX2Apic = false; + SUPR0Printf("VBox/CPUM: Unsupported APIC version %#x (iCpu=%d)\n", uApicVersion, iCpu); + } +} + + /** * Map the MMIO page of each local APIC in the system. */ @@ -737,15 +896,13 @@ static int cpumR0MapLocalApics(void) } /* - * Create mappings for all online CPUs we think have APICs. + * Create mappings for all online CPUs we think have legacy APICs. */ - /** @todo r=bird: This code is not adequately handling CPUs that are - * offline or unplugged at init time and later bought into action. */ - int rc = RTMpOnAll(cpumR0MapLocalApicWorker, NULL, NULL); + int rc = RTMpOnAll(cpumR0MapLocalApicCpuProber, NULL, NULL); for (unsigned iCpu = 0; RT_SUCCESS(rc) && iCpu < RT_ELEMENTS(g_aLApics); iCpu++) { - if (g_aLApics[iCpu].fEnabled) + if (g_aLApics[iCpu].fEnabled && !g_aLApics[iCpu].fX2Apic) { rc = RTR0MemObjEnterPhys(&g_aLApics[iCpu].hMemObj, g_aLApics[iCpu].PhysBase, PAGE_SIZE, RTMEM_CACHE_POLICY_MMIO); @@ -755,43 +912,47 @@ static int cpumR0MapLocalApics(void) PAGE_SIZE, RTMEM_PROT_READ | RTMEM_PROT_WRITE); if (RT_SUCCESS(rc)) { - void *pvApicBase = RTR0MemObjAddress(g_aLApics[iCpu].hMapObj); - - /* - * 0x0X 82489 external APIC - * 0x1X Local APIC - * 0x2X..0xFF reserved - */ - /** @todo r=bird: The local APIC is usually at the same address for all CPUs, - * and therefore inaccessible by the other CPUs. */ - uint32_t ApicVersion = ApicRegRead(pvApicBase, APIC_REG_VERSION); - if ((APIC_REG_VERSION_GET_VER(ApicVersion) & 0xF0) == 0x10) - { - g_aLApics[iCpu].fHasThermal = APIC_REG_VERSION_GET_MAX_LVT(ApicVersion) >= 5; - g_aLApics[iCpu].pv = pvApicBase; - Log(("CPUM: APIC %02u at %RGp (mapped at %p) - ver %#x, lint0=%#x lint1=%#x pc=%#x thmr=%#x\n", - iCpu, g_aLApics[iCpu].PhysBase, g_aLApics[iCpu].pv, ApicVersion, - ApicRegRead(pvApicBase, APIC_REG_LVT_LINT0), - ApicRegRead(pvApicBase, APIC_REG_LVT_LINT1), - ApicRegRead(pvApicBase, APIC_REG_LVT_PC), - ApicRegRead(pvApicBase, APIC_REG_LVT_THMR) - )); - continue; - } - - RTR0MemObjFree(g_aLApics[iCpu].hMapObj, true /* fFreeMappings */); + g_aLApics[iCpu].pv = RTR0MemObjAddress(g_aLApics[iCpu].hMapObj); + continue; } RTR0MemObjFree(g_aLApics[iCpu].hMemObj, true /* fFreeMappings */); } g_aLApics[iCpu].fEnabled = false; } + g_aLApics[iCpu].pv = NULL; } + + /* + * Check the APICs. + */ + if (RT_SUCCESS(rc)) + rc = RTMpOnAll(cpumR0MapLocalApicCpuChecker, NULL, NULL); + if (RT_FAILURE(rc)) { cpumR0UnmapLocalApics(); return rc; } +#ifdef LOG_ENABLED + /* + * Log the result (pretty useless, requires enabling CPUM in VBoxDrv + * and !VBOX_WITH_R0_LOGGING). + */ + if (LogIsEnabled()) + { + uint32_t cEnabled = 0; + uint32_t cX2Apics = 0; + for (unsigned iCpu = 0; iCpu < RT_ELEMENTS(g_aLApics); iCpu++) + if (g_aLApics[iCpu].fEnabled) + { + cEnabled++; + cX2Apics += g_aLApics[iCpu].fX2Apic; + } + Log(("CPUM: %u APICs, %u X2APICs\n", cEnabled, cX2Apics)); + } +#endif + return VINF_SUCCESS; } @@ -810,6 +971,7 @@ static void cpumR0UnmapLocalApics(void) g_aLApics[iCpu].hMapObj = NIL_RTR0MEMOBJ; g_aLApics[iCpu].hMemObj = NIL_RTR0MEMOBJ; g_aLApics[iCpu].fEnabled = false; + g_aLApics[iCpu].fX2Apic = false; g_aLApics[iCpu].pv = NULL; } } @@ -817,17 +979,23 @@ static void cpumR0UnmapLocalApics(void) /** - * Write the Local APIC mapping address of the current host CPU to CPUM to be - * able to access the APIC registers in the raw mode switcher for disabling/ - * re-enabling the NMI. Must be called with disabled preemption or disabled - * interrupts! + * Updates CPUMCPU::pvApicBase and CPUMCPU::fX2Apic prior to world switch. * - * @param pVM Pointer to the VM. + * Writes the Local APIC mapping address of the current host CPU to CPUMCPU so + * the world switchers can access the APIC registers for the purpose of + * disabling and re-enabling the NMIs. Must be called with disabled preemption + * or disabled interrupts! + * + * @param pVCpu Pointer to the cross context CPU structure of the + * calling EMT. * @param idHostCpu The ID of the current host CPU. */ -VMMR0DECL(void) CPUMR0SetLApic(PVM pVM, RTCPUID idHostCpu) +VMMR0_INT_DECL(void) CPUMR0SetLApic(PVMCPU pVCpu, RTCPUID idHostCpu) { - pVM->cpum.s.pvApicBase = g_aLApics[RTMpCpuIdToSetIndex(idHostCpu)].pv; + int idxCpu = RTMpCpuIdToSetIndex(idHostCpu); + pVCpu->cpum.s.pvApicBase = g_aLApics[idxCpu].pv; + pVCpu->cpum.s.fX2Apic = g_aLApics[idxCpu].fX2Apic; +// Log6(("CPUMR0SetLApic: pvApicBase=%p fX2Apic=%d\n", g_aLApics[idxCpu].pv, g_aLApics[idxCpu].fX2Apic)); } #endif /* VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI */ diff --git a/src/VBox/VMM/VMMR0/CPUMR0A.asm b/src/VBox/VMM/VMMR0/CPUMR0A.asm index aa09beac..befc1817 100644 --- a/src/VBox/VMM/VMMR0/CPUMR0A.asm +++ b/src/VBox/VMM/VMMR0/CPUMR0A.asm @@ -4,7 +4,7 @@ ; ; -; Copyright (C) 2006-2007 Oracle Corporation +; Copyright (C) 2006-2013 Oracle Corporation ; ; This file is part of VirtualBox Open Source Edition (OSE), as ; available from http://www.virtualbox.org. This file is free software; @@ -66,6 +66,56 @@ GLOBALNAME g_fCPUMIs64bitHost BEGINCODE +;; Macro for FXSAVE/FXRSTOR leaky behaviour on AMD CPUs, see cpumR3CheckLeakyFpu(). +; Cleans the FPU state, if necessary, before restoring the FPU. +; +; This macro ASSUMES CR0.TS is not set! +; @remarks Trashes xAX!! +; Changes here should also be reflected in CPUMRCA.asm's copy! +%macro CLEANFPU 0 + test dword [xDX + CPUMCPU.fUseFlags], CPUM_USE_FFXSR_LEAKY + jz .nothing_to_clean + + xor eax, eax + fnstsw ax ; Get FSW + test eax, RT_BIT(7) ; If FSW.ES (bit 7) is set, clear it to not cause FPU exceptions + ; while clearing & loading the FPU bits in 'clean_fpu' + jz .clean_fpu + fnclex + +.clean_fpu: + ffree st7 ; Clear FPU stack register(7)'s tag entry to prevent overflow if a wraparound occurs + ; for the upcoming push (load) + fild dword [xDX + CPUMCPU.Guest.fpu] ; Explicit FPU load to overwrite FIP, FOP, FDP registers in the FPU. + +.nothing_to_clean: +%endmacro + +;; Macro to save and modify CR0 (if necessary) before touching the FPU state +; so as to not cause any FPU exceptions. +; +; @remarks Uses xCX for backing-up CR0 (if CR0 needs to be modified) otherwise clears xCX. +; @remarks Trashes xAX. +%macro SAVE_CR0_CLEAR_FPU_TRAPS 0 + xor ecx, ecx + mov xAX, cr0 + test eax, X86_CR0_TS | X86_CR0_EM ; Make sure its safe to access the FPU state. + jz %%skip_cr0_write + mov xCX, xAX ; Save old CR0 + and xAX, ~(X86_CR0_TS | X86_CR0_EM) + mov cr0, xAX +%%skip_cr0_write: +%endmacro + +;; Macro to restore CR0 from xCX if necessary. +; +; @remarks xCX should contain the CR0 value to restore or 0 if no restoration is needed. +%macro RESTORE_CR0 0 + cmp ecx, 0 + je %%skip_cr0_restore + mov cr0, xCX +%%skip_cr0_restore: +%endmacro ;; ; Saves the host FPU/XMM state and restores the guest state. @@ -90,10 +140,9 @@ BEGINPROC cpumR0SaveHostRestoreGuestFPUState ; Switch the state. or dword [xDX + CPUMCPU.fUseFlags], (CPUM_USED_FPU | CPUM_USED_FPU_SINCE_REM) - mov xAX, cr0 ; Make sure its safe to access the FPU state. - mov xCX, xAX ; save old CR0 - and xAX, ~(X86_CR0_TS | X86_CR0_EM) - mov cr0, xAX ;; @todo optimize this. + ; Clear CR0 FPU bits to not cause exceptions, uses xCX + SAVE_CR0_CLEAR_FPU_TRAPS + ; Do NOT use xCX from this point! %ifdef VBOX_WITH_HYBRID_32BIT_KERNEL cmp byte [NAME(g_fCPUMIs64bitHost)], 0 @@ -103,8 +152,14 @@ BEGINPROC cpumR0SaveHostRestoreGuestFPUState .legacy_mode: %endif ; VBOX_WITH_HYBRID_32BIT_KERNEL - fxsave [xDX + CPUMCPU.Host.fpu] ; ASSUMES that all VT-x/AMD-V boxes sports fxsave/fxrstor (safe assumption) +%ifdef RT_ARCH_AMD64 + ; Use explicit REX prefix. See @bugref{6398}. + o64 fxsave [xDX + CPUMCPU.Host.fpu] ; ASSUMES that all VT-x/AMD-V boxes sports fxsave/fxrstor (safe assumption) + o64 fxrstor [xDX + CPUMCPU.Guest.fpu] +%else + fxsave [xDX + CPUMCPU.Host.fpu] ; ASSUMES that all VT-x/AMD-V boxes sports fxsave/fxrstor (safe assumption) fxrstor [xDX + CPUMCPU.Guest.fpu] +%endif %ifdef VBOX_WITH_KERNEL_USING_XMM ; Restore the non-volatile xmm registers. ASSUMING 64-bit windows @@ -122,7 +177,8 @@ BEGINPROC cpumR0SaveHostRestoreGuestFPUState %endif .done: - mov cr0, xCX ; and restore old CR0 again ;; @todo optimize this. + ; Restore CR0 from xCX if it was previously saved. + RESTORE_CR0 popf xor eax, eax ret @@ -132,8 +188,8 @@ ALIGNCODE(16) BITS 64 .sixtyfourbit_mode: and edx, 0ffffffffh - fxsave [rdx + CPUMCPU.Host.fpu] - fxrstor [rdx + CPUMCPU.Guest.fpu] + o64 fxsave [rdx + CPUMCPU.Host.fpu] + o64 fxrstor [rdx + CPUMCPU.Guest.fpu] jmp far [.fpret wrt rip] .fpret: ; 16:32 Pointer to .the_end. dd .done, NAME(SUPR0AbsKernelCS) @@ -160,14 +216,15 @@ BEGINPROC cpumR0SaveHostFPUState ; Switch the state. or dword [xDX + CPUMCPU.fUseFlags], (CPUM_USED_FPU | CPUM_USED_FPU_SINCE_REM) - mov xAX, cr0 ; Make sure its safe to access the FPU state. - mov xCX, xAX ; save old CR0 - and xAX, ~(X86_CR0_TS | X86_CR0_EM) - mov cr0, xAX ;; @todo optimize this. + ; Clear CR0 FPU bits to not cause exceptions, uses xCX + SAVE_CR0_CLEAR_FPU_TRAPS + ; Do NOT use xCX from this point! fxsave [xDX + CPUMCPU.Host.fpu] ; ASSUMES that all VT-x/AMD-V boxes support fxsave/fxrstor (safe assumption) - mov cr0, xCX ; and restore old CR0 again ;; @todo optimize this. + ; Restore CR0 from xCX if it was saved previously. + RESTORE_CR0 + popf xor eax, eax ret @@ -203,10 +260,9 @@ BEGINPROC cpumR0SaveGuestRestoreHostFPUState pushf ; The darwin kernel can get upset or upset things if an cli ; interrupt occurs while we're doing fxsave/fxrstor/cr0. - mov xAX, cr0 ; Make sure it's safe to access the FPU state. - mov xCX, xAX ; save old CR0 - and xAX, ~(X86_CR0_TS | X86_CR0_EM) - mov cr0, xAX ;; @todo optimize this. + ; Clear CR0 FPU bits to not cause exceptions, uses xCX + SAVE_CR0_CLEAR_FPU_TRAPS + ; Do NOT use xCX from this point! %ifdef VBOX_WITH_HYBRID_32BIT_KERNEL cmp byte [NAME(g_fCPUMIs64bitHost)], 0 @@ -216,11 +272,18 @@ BEGINPROC cpumR0SaveGuestRestoreHostFPUState .legacy_mode: %endif ; VBOX_WITH_HYBRID_32BIT_KERNEL - fxsave [xDX + CPUMCPU.Guest.fpu] ; ASSUMES that all VT-x/AMD-V boxes support fxsave/fxrstor (safe assumption) +%ifdef RT_ARCH_AMD64 + ; Use explicit REX prefix. See @bugref{6398}. + o64 fxsave [xDX + CPUMCPU.Guest.fpu] ; ASSUMES that all VT-x/AMD-V boxes support fxsave/fxrstor (safe assumption) + o64 fxrstor [xDX + CPUMCPU.Host.fpu] +%else + fxsave [xDX + CPUMCPU.Guest.fpu] ; ASSUMES that all VT-x/AMD-V boxes support fxsave/fxrstor (safe assumption) fxrstor [xDX + CPUMCPU.Host.fpu] +%endif .done: - mov cr0, xCX ; and restore old CR0 again ;; @todo optimize this. + ; Restore CR0 from xCX if it was previously saved. + RESTORE_CR0 and dword [xDX + CPUMCPU.fUseFlags], ~CPUM_USED_FPU popf .fpu_not_used: @@ -232,8 +295,8 @@ ALIGNCODE(16) BITS 64 .sixtyfourbit_mode: and edx, 0ffffffffh - fxsave [rdx + CPUMCPU.Guest.fpu] - fxrstor [rdx + CPUMCPU.Host.fpu] + o64 fxsave [rdx + CPUMCPU.Guest.fpu] + o64 fxrstor [rdx + CPUMCPU.Host.fpu] jmp far [.fpret wrt rip] .fpret: ; 16:32 Pointer to .the_end. dd .done, NAME(SUPR0AbsKernelCS) @@ -268,10 +331,9 @@ BEGINPROC cpumR0RestoreHostFPUState pushf ; The darwin kernel can get upset or upset things if an cli ; interrupt occurs while we're doing fxsave/fxrstor/cr0. - mov xAX, cr0 - mov xCX, xAX ; save old CR0 - and xAX, ~(X86_CR0_TS | X86_CR0_EM) - mov cr0, xAX + ; Clear CR0 FPU bits to not cause exceptions, uses xCX + SAVE_CR0_CLEAR_FPU_TRAPS + ; Do NOT use xCX from this point! %ifdef VBOX_WITH_HYBRID_32BIT_KERNEL cmp byte [NAME(g_fCPUMIs64bitHost)], 0 @@ -281,10 +343,15 @@ BEGINPROC cpumR0RestoreHostFPUState .legacy_mode: %endif ; VBOX_WITH_HYBRID_32BIT_KERNEL +%ifdef RT_ARCH_AMD64 + o64 fxrstor [xDX + CPUMCPU.Host.fpu] +%else fxrstor [xDX + CPUMCPU.Host.fpu] +%endif .done: - mov cr0, xCX ; and restore old CR0 again + ; Restore CR0 from xCX if it was previously saved. + RESTORE_CR0 and dword [xDX + CPUMCPU.fUseFlags], ~CPUM_USED_FPU popf .fpu_not_used: @@ -296,7 +363,7 @@ ALIGNCODE(16) BITS 64 .sixtyfourbit_mode: and edx, 0ffffffffh - fxrstor [rdx + CPUMCPU.Host.fpu] + o64 fxrstor [rdx + CPUMCPU.Host.fpu] jmp far [.fpret wrt rip] .fpret: ; 16:32 Pointer to .the_end. dd .done, NAME(SUPR0AbsKernelCS) diff --git a/src/VBox/VMM/VMMR0/CPUMR0UnusedA.asm b/src/VBox/VMM/VMMR0/CPUMR0UnusedA.asm index 1a4b5868..d61db912 100644 --- a/src/VBox/VMM/VMMR0/CPUMR0UnusedA.asm +++ b/src/VBox/VMM/VMMR0/CPUMR0UnusedA.asm @@ -4,7 +4,7 @@ ; ; -; Copyright (C) 2006-2007 Oracle Corporation +; Copyright (C) 2006-2011 Oracle Corporation ; ; This file is part of VirtualBox Open Source Edition (OSE), as ; available from http://www.virtualbox.org. This file is free software; diff --git a/src/VBox/VMM/VMMR0/GMMR0.cpp b/src/VBox/VMM/VMMR0/GMMR0.cpp index 54b7a732..aa0cd610 100644 --- a/src/VBox/VMM/VMMR0/GMMR0.cpp +++ b/src/VBox/VMM/VMMR0/GMMR0.cpp @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2007-2012 Oracle Corporation + * Copyright (C) 2007-2013 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -1959,7 +1959,7 @@ static uint32_t gmmR0AllocateChunkId(PGMM pGMM) if ( (uint32_t)idChunk < GMM_CHUNKID_LAST && idChunk > NIL_GMM_CHUNKID) { - idChunk = ASMBitNextClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1, idChunk); + idChunk = ASMBitNextClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1, idChunk - 1); if (idChunk > NIL_GMM_CHUNKID) { AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID); @@ -2380,12 +2380,13 @@ static uint32_t gmmR0AllocatePagesInBoundMode(PGVM pGVM, uint32_t iPage, uint32_ /** - * Checks if we should start picking pages from chunks of other VMs. + * Checks if we should start picking pages from chunks of other VMs because + * we're getting close to the system memory or reserved limit. * * @returns @c true if we should, @c false if we should first try allocate more * chunks. */ -static bool gmmR0ShouldAllocatePagesInOtherChunks(PGVM pGVM) +static bool gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLimits(PGVM pGVM) { /* * Don't allocate a new chunk if we're @@ -2413,6 +2414,24 @@ static bool gmmR0ShouldAllocatePagesInOtherChunks(PGVM pGVM) /** + * Checks if we should start picking pages from chunks of other VMs because + * there is a lot of free pages around. + * + * @returns @c true if we should, @c false if we should first try allocate more + * chunks. + */ +static bool gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLotsFree(PGMM pGMM) +{ + /* + * Setting the limit at 16 chunks (32 MB) at the moment. + */ + if (pGMM->PrivateX.cFreePages >= GMM_CHUNK_NUM_PAGES * 16) + return true; + return false; +} + + +/** * Common worker for GMMR0AllocateHandyPages and GMMR0AllocatePages. * * @returns VBox status code: @@ -2536,8 +2555,12 @@ static int gmmR0AllocatePagesNew(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMPAGE { /* Maybe we should try getting pages from chunks "belonging" to other VMs before allocating more chunks? */ - if (gmmR0ShouldAllocatePagesInOtherChunks(pGVM)) + bool fTriedOnSameAlready = false; + if (gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLimits(pGVM)) + { iPage = gmmR0AllocatePagesFromSameNode(&pGMM->PrivateX, pGVM, iPage, cPages, paPages); + fTriedOnSameAlready = true; + } /* Allocate memory from empty chunks. */ if (iPage < cPages) @@ -2547,6 +2570,16 @@ static int gmmR0AllocatePagesNew(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMPAGE if (iPage < cPages) iPage = gmmR0AllocatePagesFromEmptyChunksOnSameNode(&pGMM->Shared, pGVM, iPage, cPages, paPages); + /* If there is a lof of free pages spread around, try not waste + system memory on more chunks. (Should trigger defragmentation.) */ + if ( !fTriedOnSameAlready + && gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLotsFree(pGMM)) + { + iPage = gmmR0AllocatePagesFromSameNode(&pGMM->PrivateX, pGVM, iPage, cPages, paPages); + if (iPage < cPages) + iPage = gmmR0AllocatePagesIndiscriminately(&pGMM->PrivateX, pGVM, iPage, cPages, paPages); + } + /* * Ok, try allocate new chunks. */ @@ -5119,7 +5152,7 @@ GMMR0DECL(int) GMMR0CheckSharedModules(PVM pVM, PVMCPU pVCpu) Args.idCpu = pVCpu->idCpu; rc = RTAvlGCPtrDoWithAll(&pGVM->gmm.s.pSharedModuleTree, true /* fFromLeft */, gmmR0CheckSharedModule, &Args); - Log(("GMMR0CheckSharedModules done!\n")); + Log(("GMMR0CheckSharedModules done (rc=%Rrc)!\n", rc)); GMM_CHECK_SANITY_UPON_LEAVING(pGMM); } else diff --git a/src/VBox/VMM/VMMR0/GMMR0Internal.h b/src/VBox/VMM/VMMR0/GMMR0Internal.h index ca6f5e16..0ce8986b 100644 --- a/src/VBox/VMM/VMMR0/GMMR0Internal.h +++ b/src/VBox/VMM/VMMR0/GMMR0Internal.h @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2007 Oracle Corporation + * Copyright (C) 2007-2012 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; diff --git a/src/VBox/VMM/VMMR0/GVMMR0.cpp b/src/VBox/VMM/VMMR0/GVMMR0.cpp index ed0d6035..41604258 100644 --- a/src/VBox/VMM/VMMR0/GVMMR0.cpp +++ b/src/VBox/VMM/VMMR0/GVMMR0.cpp @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2007-2010 Oracle Corporation + * Copyright (C) 2007-2012 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -544,7 +544,7 @@ GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, ui /* * String switch time! */ - if (strncmp(pszName, "/GVMM/", sizeof("/GVMM/") - 1)) + if (strncmp(pszName, RT_STR_TUPLE("/GVMM/"))) return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */ int rc = VINF_SUCCESS; pszName += sizeof("/GVMM/") - 1; @@ -612,7 +612,7 @@ GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, /* * String switch time! */ - if (strncmp(pszName, "/GVMM/", sizeof("/GVMM/") - 1)) + if (strncmp(pszName, RT_STR_TUPLE("/GVMM/"))) return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */ int rc = VINF_SUCCESS; pszName += sizeof("/GVMM/") - 1; @@ -867,8 +867,9 @@ GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppV pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD; } - rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1, 0, - RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS); + rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1, + 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE, + NIL_RTR0PROCESS); if (RT_SUCCESS(rc)) { pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj); @@ -887,14 +888,18 @@ GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppV pVM->aCpus[0].hNativeThreadR0 = hEMT0; pGVMM->cEMTs += cCpus; - VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus); + rc = VMMR0ThreadCtxHooksCreate(&pVM->aCpus[0]); + if (RT_SUCCESS(rc)) + { + VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus); - gvmmR0UsedUnlock(pGVMM); - gvmmR0CreateDestroyUnlock(pGVMM); + gvmmR0UsedUnlock(pGVMM); + gvmmR0CreateDestroyUnlock(pGVMM); - *ppVM = pVM; - Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVM->pVMR3, pGVM, iHandle)); - return VINF_SUCCESS; + *ppVM = pVM; + Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVM->pVMR3, pGVM, iHandle)); + return VINF_SUCCESS; + } } RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); @@ -1068,13 +1073,13 @@ GVMMR0DECL(int) GVMMR0DestroyVM(PVM pVM) PGVMM pGVMM; GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE); - /* * Validate the VM structure, state and caller. */ AssertPtrReturn(pVM, VERR_INVALID_POINTER); AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER); - AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState), VERR_WRONG_ORDER); + AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState), + VERR_WRONG_ORDER); uint32_t hGVM = pVM->hSelf; AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE); @@ -1084,7 +1089,7 @@ GVMMR0DECL(int) GVMMR0DestroyVM(PVM pVM) AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER); RTPROCESS ProcId = RTProcSelf(); - RTNATIVETHREAD hSelf = RTThreadNativeSelf(); + RTNATIVETHREAD hSelf = RTThreadNativeSelf(); AssertReturn( ( pHandle->hEMT0 == hSelf && pHandle->ProcId == ProcId) || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER); @@ -1097,7 +1102,7 @@ GVMMR0DECL(int) GVMMR0DestroyVM(PVM pVM) int rc = gvmmR0CreateDestroyLock(pGVMM); AssertRC(rc); - /* be careful here because we might theoretically be racing someone else cleaning up. */ + /* Be careful here because we might theoretically be racing someone else cleaning up. */ if ( pHandle->pVM == pVM && ( ( pHandle->hEMT0 == hSelf && pHandle->ProcId == ProcId) @@ -1111,6 +1116,15 @@ GVMMR0DECL(int) GVMMR0DestroyVM(PVM pVM) pHandle->pvObj = NULL; gvmmR0CreateDestroyUnlock(pGVMM); + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + /** @todo Can we busy wait here for all thread-context hooks to be + * deregistered before releasing (destroying) it? Only until we find a + * solution for not deregistering hooks everytime we're leaving HMR0 + * context. */ + VMMR0ThreadCtxHooksRelease(&pVM->aCpus[idCpu]); + } + SUPR0ObjRelease(pvObj, pHandle->pSession); } else @@ -1336,7 +1350,8 @@ GVMMR0DECL(int) GVMMR0RegisterVCpu(PVM pVM, VMCPUID idCpu) pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf(); - return VINF_SUCCESS; + rc = VMMR0ThreadCtxHooksCreate(&pVM->aCpus[idCpu]); + return rc; } diff --git a/src/VBox/VMM/VMMR0/GVMMR0Internal.h b/src/VBox/VMM/VMMR0/GVMMR0Internal.h index 40b1b000..7e937f04 100644 --- a/src/VBox/VMM/VMMR0/GVMMR0Internal.h +++ b/src/VBox/VMM/VMMR0/GVMMR0Internal.h @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2007 Oracle Corporation + * Copyright (C) 2007-2010 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; diff --git a/src/VBox/VMM/VMMR0/HWACCMR0.cpp b/src/VBox/VMM/VMMR0/HMR0.cpp index e4c1d79a..9793d7ac 100644 --- a/src/VBox/VMM/VMMR0/HWACCMR0.cpp +++ b/src/VBox/VMM/VMMR0/HMR0.cpp @@ -1,10 +1,10 @@ -/* $Id: HWACCMR0.cpp $ */ +/* $Id: HMR0.cpp $ */ /** @file * Hardware Assisted Virtualization Manager (HM) - Host Context Ring-0. */ /* - * Copyright (C) 2006-2011 Oracle Corporation + * Copyright (C) 2006-2013 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -19,13 +19,13 @@ /******************************************************************************* * Header Files * *******************************************************************************/ -#define LOG_GROUP LOG_GROUP_HWACCM -#include <VBox/vmm/hwaccm.h> +#define LOG_GROUP LOG_GROUP_HM +#include <VBox/vmm/hm.h> #include <VBox/vmm/pgm.h> -#include "HWACCMInternal.h" +#include "HMInternal.h" #include <VBox/vmm/vm.h> -#include <VBox/vmm/hwacc_vmx.h> -#include <VBox/vmm/hwacc_svm.h> +#include <VBox/vmm/hm_vmx.h> +#include <VBox/vmm/hm_svm.h> #include <VBox/err.h> #include <VBox/log.h> #include <iprt/assert.h> @@ -40,8 +40,8 @@ #include <iprt/string.h> #include <iprt/thread.h> #include <iprt/x86.h> -#include "HWVMXR0.h" -#include "HWSVMR0.h" +#include "HMVMXR0.h" +#include "HMSVMR0.h" /******************************************************************************* @@ -81,25 +81,24 @@ typedef HMR0FIRSTRC *PHMR0FIRSTRC; static struct { /** Per CPU globals. */ - HMGLOBLCPUINFO aCpuInfo[RTCPUSET_MAX_CPUS]; + HMGLOBALCPUINFO aCpuInfo[RTCPUSET_MAX_CPUS]; /** @name Ring-0 method table for AMD-V and VT-x specific operations. * @{ */ - DECLR0CALLBACKMEMBER(int, pfnEnterSession,(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu)); - DECLR0CALLBACKMEMBER(int, pfnLeaveSession,(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)); - DECLR0CALLBACKMEMBER(int, pfnSaveHostState,(PVM pVM, PVMCPU pVCpu)); - DECLR0CALLBACKMEMBER(int, pfnLoadGuestState,(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)); - DECLR0CALLBACKMEMBER(int, pfnRunGuestCode,(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)); - DECLR0CALLBACKMEMBER(int, pfnEnableCpu,(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, - bool fEnabledByHost)); - DECLR0CALLBACKMEMBER(int, pfnDisableCpu,(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)); - DECLR0CALLBACKMEMBER(int, pfnInitVM,(PVM pVM)); - DECLR0CALLBACKMEMBER(int, pfnTermVM,(PVM pVM)); - DECLR0CALLBACKMEMBER(int, pfnSetupVM,(PVM pVM)); + DECLR0CALLBACKMEMBER(int, pfnEnterSession,(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)); + DECLR0CALLBACKMEMBER(void, pfnThreadCtxCallback,(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit)); + DECLR0CALLBACKMEMBER(int, pfnSaveHostState,(PVM pVM, PVMCPU pVCpu)); + DECLR0CALLBACKMEMBER(int, pfnRunGuestCode,(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)); + DECLR0CALLBACKMEMBER(int, pfnEnableCpu,(PHMGLOBALCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, + bool fEnabledByHost, void *pvArg)); + DECLR0CALLBACKMEMBER(int, pfnDisableCpu,(PHMGLOBALCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)); + DECLR0CALLBACKMEMBER(int, pfnInitVM,(PVM pVM)); + DECLR0CALLBACKMEMBER(int, pfnTermVM,(PVM pVM)); + DECLR0CALLBACKMEMBER(int, pfnSetupVM,(PVM pVM)); /** @} */ /** Maximum ASID allowed. */ - uint32_t uMaxASID; + uint32_t uMaxAsid; /** VT-x data. */ struct @@ -114,29 +113,15 @@ static struct uint8_t cPreemptTimerShift; /** Host CR4 value (set by ring-0 VMX init) */ - uint64_t hostCR4; + /** @todo This isn't used for anything relevant. Remove later? */ + uint64_t u64HostCr4; /** Host EFER value (set by ring-0 VMX init) */ - uint64_t hostEFER; + uint64_t u64HostEfer; /** VMX MSR values */ - struct - { - uint64_t feature_ctrl; - uint64_t vmx_basic_info; - VMX_CAPABILITY vmx_pin_ctls; - VMX_CAPABILITY vmx_proc_ctls; - VMX_CAPABILITY vmx_proc_ctls2; - VMX_CAPABILITY vmx_exit; - VMX_CAPABILITY vmx_entry; - uint64_t vmx_misc; - uint64_t vmx_cr0_fixed0; - uint64_t vmx_cr0_fixed1; - uint64_t vmx_cr4_fixed0; - uint64_t vmx_cr4_fixed1; - uint64_t vmx_vmcs_enum; - uint64_t vmx_eptcaps; - } msr; + VMXMSRS Msrs; + /* Last instruction error */ uint32_t ulLastInstrError; } vmx; @@ -144,8 +129,8 @@ static struct /** AMD-V information. */ struct { - /* HWCR msr (for diagnostics) */ - uint64_t msrHWCR; + /* HWCR MSR (for diagnostics) */ + uint64_t u64MsrHwcr; /** SVM revision. */ uint32_t u32Rev; @@ -157,7 +142,7 @@ static struct bool fSupported; } svm; /** Saved error from detection */ - int32_t lLastError; + int32_t lLastError; struct { @@ -171,13 +156,13 @@ static struct /** Indicates whether the host is suspending or not. We'll refuse a few * actions when the host is being suspended to speed up the suspending and * avoid trouble. */ - volatile bool fSuspended; + volatile bool fSuspended; /** Whether we've already initialized all CPUs. * @remarks We could check the EnableAllCpusOnce state, but this is * simpler and hopefully easier to understand. */ bool fEnabled; - /** Serialize initialization in HWACCMR0EnableAllCpus. */ + /** Serialize initialization in HMR0EnableAllCpus. */ RTONCE EnableAllCpusOnce; } g_HvmR0; @@ -196,12 +181,12 @@ static void hmR0FirstRcInit(PHMR0FIRSTRC pFirstRc) /** - * Try se the status code (success ignored). + * Try set the status code (success ignored). * * @param pFirstRc The first return code structure. * @param rc The status code. */ -static void hmR0FirstRcSetStatus(PHMR0FIRSTRC pFirstRc, int rc) +static void hmR0FirstRcSetStatus(PHMR0FIRSTRC pFirstRc, int rc) { if ( RT_FAILURE(rc) && ASMAtomicCmpXchgS32(&pFirstRc->rc, rc, VINF_SUCCESS)) @@ -239,7 +224,7 @@ static RTCPUID hmR0FirstRcGetCpuId(PHMR0FIRSTRC pFirstRc) /** @name Dummy callback handlers. * @{ */ -static DECLCALLBACK(int) hmR0DummyEnter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu) +static DECLCALLBACK(int) hmR0DummyEnter(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu) { NOREF(pVM); NOREF(pVCpu); NOREF(pCpu); return VINF_SUCCESS; @@ -251,14 +236,19 @@ static DECLCALLBACK(int) hmR0DummyLeave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) return VINF_SUCCESS; } -static DECLCALLBACK(int) hmR0DummyEnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, - bool fEnabledBySystem) +static DECLCALLBACK(void) hmR0DummyThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit) +{ + NOREF(enmEvent); NOREF(pVCpu); NOREF(fGlobalInit); +} + +static DECLCALLBACK(int) hmR0DummyEnableCpu(PHMGLOBALCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, + bool fEnabledBySystem, void *pvArg) { - NOREF(pCpu); NOREF(pVM); NOREF(pvCpuPage); NOREF(HCPhysCpuPage); NOREF(fEnabledBySystem); + NOREF(pCpu); NOREF(pVM); NOREF(pvCpuPage); NOREF(HCPhysCpuPage); NOREF(fEnabledBySystem); NOREF(pvArg); return VINF_SUCCESS; } -static DECLCALLBACK(int) hmR0DummyDisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage) +static DECLCALLBACK(int) hmR0DummyDisableCpu(PHMGLOBALCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage) { NOREF(pCpu); NOREF(pvCpuPage); NOREF(HCPhysCpuPage); return VINF_SUCCESS; @@ -336,8 +326,8 @@ static bool hmR0InitIntelIsSubjectToVmxPreemptionTimerErratum(void) || u == UINT32_C(0x000106E5) /* 322373.pdf - AAO95 - B1 - Intel Xeon Processor 3400 Series */ || u == UINT32_C(0x000106E5) /* 322166.pdf - AAN92 - B1 - Intel CoreTM i7-800 and i5-700 Desktop Processor Series */ || u == UINT32_C(0x000106E5) /* 320767.pdf - AAP86 - B1 - Intel Core i7-900 Mobile Processor Extreme Edition Series, Intel Core i7-800 and i7-700 Mobile Processor Series */ - || u == UINT32_C(0x000106A0) /*?321333.pdf - AAM126 - C0 - Intel Xeon Processor 3500 Series Specification */ - || u == UINT32_C(0x000106A1) /*?321333.pdf - AAM126 - C1 - Intel Xeon Processor 3500 Series Specification */ + || u == UINT32_C(0x000106A0) /* 321333.pdf - AAM126 - C0 - Intel Xeon Processor 3500 Series Specification */ + || u == UINT32_C(0x000106A1) /* 321333.pdf - AAM126 - C1 - Intel Xeon Processor 3500 Series Specification */ || u == UINT32_C(0x000106A4) /* 320836.pdf - AAJ124 - C0 - Intel Core i7-900 Desktop Processor Extreme Edition Series and Intel Core i7-900 Desktop Processor Series */ || u == UINT32_C(0x000106A5) /* 321333.pdf - AAM126 - D0 - Intel Xeon Processor 3500 Series Specification */ || u == UINT32_C(0x000106A5) /* 321324.pdf - AAK139 - D0 - Intel Xeon Processor 5500 Series Specification */ @@ -365,7 +355,7 @@ static int hmR0InitIntel(uint32_t u32FeaturesECX, uint32_t u32FeaturesEDX) ) { /** @todo move this into a separate function. */ - g_HvmR0.vmx.msr.feature_ctrl = ASMRdMsr(MSR_IA32_FEATURE_CONTROL); + g_HvmR0.vmx.Msrs.u64FeatureCtrl = ASMRdMsr(MSR_IA32_FEATURE_CONTROL); /* * First try use native kernel API for controlling VT-x. @@ -395,136 +385,136 @@ static int hmR0InitIntel(uint32_t u32FeaturesECX, uint32_t u32FeaturesEDX) } if (RT_SUCCESS(g_HvmR0.lLastError)) { - /* Reread in case we've changed it. */ - g_HvmR0.vmx.msr.feature_ctrl = ASMRdMsr(MSR_IA32_FEATURE_CONTROL); + /* Reread in case it was changed by hmR0InitIntelCpu(). */ + g_HvmR0.vmx.Msrs.u64FeatureCtrl = ASMRdMsr(MSR_IA32_FEATURE_CONTROL); + + /* + * Read all relevant registers and MSRs. + */ + g_HvmR0.vmx.u64HostCr4 = ASMGetCR4(); + g_HvmR0.vmx.u64HostEfer = ASMRdMsr(MSR_K6_EFER); + g_HvmR0.vmx.Msrs.u64BasicInfo = ASMRdMsr(MSR_IA32_VMX_BASIC_INFO); + g_HvmR0.vmx.Msrs.VmxPinCtls.u = ASMRdMsr(MSR_IA32_VMX_PINBASED_CTLS); + g_HvmR0.vmx.Msrs.VmxProcCtls.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS); + g_HvmR0.vmx.Msrs.VmxExit.u = ASMRdMsr(MSR_IA32_VMX_EXIT_CTLS); + g_HvmR0.vmx.Msrs.VmxEntry.u = ASMRdMsr(MSR_IA32_VMX_ENTRY_CTLS); + g_HvmR0.vmx.Msrs.u64Misc = ASMRdMsr(MSR_IA32_VMX_MISC); + g_HvmR0.vmx.Msrs.u64Cr0Fixed0 = ASMRdMsr(MSR_IA32_VMX_CR0_FIXED0); + g_HvmR0.vmx.Msrs.u64Cr0Fixed1 = ASMRdMsr(MSR_IA32_VMX_CR0_FIXED1); + g_HvmR0.vmx.Msrs.u64Cr4Fixed0 = ASMRdMsr(MSR_IA32_VMX_CR4_FIXED0); + g_HvmR0.vmx.Msrs.u64Cr4Fixed1 = ASMRdMsr(MSR_IA32_VMX_CR4_FIXED1); + g_HvmR0.vmx.Msrs.u64VmcsEnum = ASMRdMsr(MSR_IA32_VMX_VMCS_ENUM); + /* VPID 16 bits ASID. */ + g_HvmR0.uMaxAsid = 0x10000; /* exclusive */ + + if (g_HvmR0.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL) + { + g_HvmR0.vmx.Msrs.VmxProcCtls2.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2); + if (g_HvmR0.vmx.Msrs.VmxProcCtls2.n.allowed1 & (VMX_VMCS_CTRL_PROC_EXEC2_EPT | VMX_VMCS_CTRL_PROC_EXEC2_VPID)) + g_HvmR0.vmx.Msrs.u64EptVpidCaps = ASMRdMsr(MSR_IA32_VMX_EPT_VPID_CAP); - if ( (g_HvmR0.vmx.msr.feature_ctrl & (MSR_IA32_FEATURE_CONTROL_VMXON | MSR_IA32_FEATURE_CONTROL_LOCK)) - == (MSR_IA32_FEATURE_CONTROL_VMXON | MSR_IA32_FEATURE_CONTROL_LOCK)) + if (g_HvmR0.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VMFUNC) + g_HvmR0.vmx.Msrs.u64Vmfunc = ASMRdMsr(MSR_IA32_VMX_VMFUNC); + } + + if (!g_HvmR0.vmx.fUsingSUPR0EnableVTx) { /* - * Read all relevant MSR. + * Enter root mode */ - g_HvmR0.vmx.msr.vmx_basic_info = ASMRdMsr(MSR_IA32_VMX_BASIC_INFO); - g_HvmR0.vmx.msr.vmx_pin_ctls.u = ASMRdMsr(MSR_IA32_VMX_PINBASED_CTLS); - g_HvmR0.vmx.msr.vmx_proc_ctls.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS); - g_HvmR0.vmx.msr.vmx_exit.u = ASMRdMsr(MSR_IA32_VMX_EXIT_CTLS); - g_HvmR0.vmx.msr.vmx_entry.u = ASMRdMsr(MSR_IA32_VMX_ENTRY_CTLS); - g_HvmR0.vmx.msr.vmx_misc = ASMRdMsr(MSR_IA32_VMX_MISC); - g_HvmR0.vmx.msr.vmx_cr0_fixed0 = ASMRdMsr(MSR_IA32_VMX_CR0_FIXED0); - g_HvmR0.vmx.msr.vmx_cr0_fixed1 = ASMRdMsr(MSR_IA32_VMX_CR0_FIXED1); - g_HvmR0.vmx.msr.vmx_cr4_fixed0 = ASMRdMsr(MSR_IA32_VMX_CR4_FIXED0); - g_HvmR0.vmx.msr.vmx_cr4_fixed1 = ASMRdMsr(MSR_IA32_VMX_CR4_FIXED1); - g_HvmR0.vmx.msr.vmx_vmcs_enum = ASMRdMsr(MSR_IA32_VMX_VMCS_ENUM); - g_HvmR0.vmx.hostCR4 = ASMGetCR4(); - g_HvmR0.vmx.hostEFER = ASMRdMsr(MSR_K6_EFER); - /* VPID 16 bits ASID. */ - g_HvmR0.uMaxASID = 0x10000; /* exclusive */ - - if (g_HvmR0.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL) + RTR0MEMOBJ hScatchMemObj; + rc = RTR0MemObjAllocCont(&hScatchMemObj, PAGE_SIZE, false /* fExecutable */); + if (RT_FAILURE(rc)) { - g_HvmR0.vmx.msr.vmx_proc_ctls2.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2); - if ( g_HvmR0.vmx.msr.vmx_proc_ctls2.n.allowed1 - & (VMX_VMCS_CTRL_PROC_EXEC2_EPT | VMX_VMCS_CTRL_PROC_EXEC2_VPID)) - g_HvmR0.vmx.msr.vmx_eptcaps = ASMRdMsr(MSR_IA32_VMX_EPT_CAPS); + LogRel(("hmR0InitIntel: RTR0MemObjAllocCont(,PAGE_SIZE,false) -> %Rrc\n", rc)); + return rc; } - if (!g_HvmR0.vmx.fUsingSUPR0EnableVTx) - { - /* - * Enter root mode - */ - RTR0MEMOBJ hScatchMemObj; - rc = RTR0MemObjAllocCont(&hScatchMemObj, PAGE_SIZE, true /*fExecutable*/); - if (RT_FAILURE(rc)) - { - LogRel(("hmR0InitIntel: RTR0MemObjAllocCont(,PAGE_SIZE,true) -> %Rrc\n", rc)); - return rc; - } + void *pvScatchPage = RTR0MemObjAddress(hScatchMemObj); + RTHCPHYS HCPhysScratchPage = RTR0MemObjGetPagePhysAddr(hScatchMemObj, 0); + ASMMemZeroPage(pvScatchPage); - void *pvScatchPage = RTR0MemObjAddress(hScatchMemObj); - RTHCPHYS HCPhysScratchPage = RTR0MemObjGetPagePhysAddr(hScatchMemObj, 0); - ASMMemZeroPage(pvScatchPage); + /* Set revision dword at the beginning of the structure. */ + *(uint32_t *)pvScatchPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(g_HvmR0.vmx.Msrs.u64BasicInfo); - /* Set revision dword at the beginning of the structure. */ - *(uint32_t *)pvScatchPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(g_HvmR0.vmx.msr.vmx_basic_info); + /* Make sure we don't get rescheduled to another cpu during this probe. */ + RTCCUINTREG fFlags = ASMIntDisableFlags(); - /* Make sure we don't get rescheduled to another cpu during this probe. */ - RTCCUINTREG fFlags = ASMIntDisableFlags(); + /* + * Check CR4.VMXE + */ + g_HvmR0.vmx.u64HostCr4 = ASMGetCR4(); + if (!(g_HvmR0.vmx.u64HostCr4 & X86_CR4_VMXE)) + { + /* In theory this bit could be cleared behind our back. Which would cause + #UD faults when we try to execute the VMX instructions... */ + ASMSetCR4(g_HvmR0.vmx.u64HostCr4 | X86_CR4_VMXE); + } + /* + * The only way of checking if we're in VMX root mode or not is to try and enter it. + * There is no instruction or control bit that tells us if we're in VMX root mode. + * Therefore, try and enter VMX root mode here. + */ + rc = VMXEnable(HCPhysScratchPage); + if (RT_SUCCESS(rc)) + { + g_HvmR0.vmx.fSupported = true; + VMXDisable(); + } + else + { /* - * Check CR4.VMXE + * KVM leaves the CPU in VMX root mode. Not only is this not allowed, + * it will crash the host when we enter raw mode, because: + * + * (a) clearing X86_CR4_VMXE in CR4 causes a #GP (we no longer modify + * this bit), and + * (b) turning off paging causes a #GP (unavoidable when switching + * from long to 32 bits mode or 32 bits to PAE). + * + * They should fix their code, but until they do we simply refuse to run. */ - g_HvmR0.vmx.hostCR4 = ASMGetCR4(); - if (!(g_HvmR0.vmx.hostCR4 & X86_CR4_VMXE)) - { - /* In theory this bit could be cleared behind our back. Which would cause - #UD faults when we try to execute the VMX instructions... */ - ASMSetCR4(g_HvmR0.vmx.hostCR4 | X86_CR4_VMXE); - } - - /* Enter VMX Root Mode */ - rc = VMXEnable(HCPhysScratchPage); - if (RT_SUCCESS(rc)) - { - g_HvmR0.vmx.fSupported = true; - VMXDisable(); - } - else - { - /* - * KVM leaves the CPU in VMX root mode. Not only is this not allowed, - * it will crash the host when we enter raw mode, because: - * - * (a) clearing X86_CR4_VMXE in CR4 causes a #GP (we no longer modify - * this bit), and - * (b) turning off paging causes a #GP (unavoidable when switching - * from long to 32 bits mode or 32 bits to PAE). - * - * They should fix their code, but until they do we simply refuse to run. - */ - g_HvmR0.lLastError = VERR_VMX_IN_VMX_ROOT_MODE; - } - - /* Restore CR4 again; don't leave the X86_CR4_VMXE flag set - if it wasn't so before (some software could incorrectly - think it's in VMX mode). */ - ASMSetCR4(g_HvmR0.vmx.hostCR4); - ASMSetFlags(fFlags); - - RTR0MemObjFree(hScatchMemObj, false); + g_HvmR0.lLastError = VERR_VMX_IN_VMX_ROOT_MODE; + Assert(g_HvmR0.vmx.fSupported == false); } - } - else - { - AssertFailed(); /* can't hit this case anymore */ - g_HvmR0.lLastError = VERR_VMX_ILLEGAL_FEATURE_CONTROL_MSR; + + /* Restore CR4 again; don't leave the X86_CR4_VMXE flag set + if it wasn't so before (some software could incorrectly + think it's in VMX mode). */ + ASMSetCR4(g_HvmR0.vmx.u64HostCr4); + ASMSetFlags(fFlags); + + RTR0MemObjFree(hScatchMemObj, false); } if (g_HvmR0.vmx.fSupported) { + rc = VMXR0GlobalInit(); + if (RT_FAILURE(rc)) + g_HvmR0.lLastError = rc; + /* * Install the VT-x methods. */ - g_HvmR0.pfnEnterSession = VMXR0Enter; - g_HvmR0.pfnLeaveSession = VMXR0Leave; - g_HvmR0.pfnSaveHostState = VMXR0SaveHostState; - g_HvmR0.pfnLoadGuestState = VMXR0LoadGuestState; - g_HvmR0.pfnRunGuestCode = VMXR0RunGuestCode; - g_HvmR0.pfnEnableCpu = VMXR0EnableCpu; - g_HvmR0.pfnDisableCpu = VMXR0DisableCpu; - g_HvmR0.pfnInitVM = VMXR0InitVM; - g_HvmR0.pfnTermVM = VMXR0TermVM; - g_HvmR0.pfnSetupVM = VMXR0SetupVM; + g_HvmR0.pfnEnterSession = VMXR0Enter; + g_HvmR0.pfnThreadCtxCallback = VMXR0ThreadCtxCallback; + g_HvmR0.pfnSaveHostState = VMXR0SaveHostState; + g_HvmR0.pfnRunGuestCode = VMXR0RunGuestCode; + g_HvmR0.pfnEnableCpu = VMXR0EnableCpu; + g_HvmR0.pfnDisableCpu = VMXR0DisableCpu; + g_HvmR0.pfnInitVM = VMXR0InitVM; + g_HvmR0.pfnTermVM = VMXR0TermVM; + g_HvmR0.pfnSetupVM = VMXR0SetupVM; /* - * Check for the VMX-Preemption Timer and adjust for the * "VMX-Preemption + * Check for the VMX-Preemption Timer and adjust for the "VMX-Preemption * Timer Does Not Count Down at the Rate Specified" erratum. */ - if ( g_HvmR0.vmx.msr.vmx_pin_ctls.n.allowed1 - & VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_PREEMPT_TIMER) + if (g_HvmR0.vmx.Msrs.VmxPinCtls.n.allowed1 & VMX_VMCS_CTRL_PIN_EXEC_PREEMPT_TIMER) { g_HvmR0.vmx.fUsePreemptTimer = true; - g_HvmR0.vmx.cPreemptTimerShift = MSR_IA32_VMX_MISC_PREEMPT_TSC_BIT(g_HvmR0.vmx.msr.vmx_misc); + g_HvmR0.vmx.cPreemptTimerShift = MSR_IA32_VMX_MISC_PREEMPT_TSC_BIT(g_HvmR0.vmx.Msrs.u64Misc); if (hmR0InitIntelIsSubjectToVmxPreemptionTimerErratum()) g_HvmR0.vmx.cPreemptTimerShift = 0; /* This is about right most of the time here. */ } @@ -543,33 +533,47 @@ static int hmR0InitIntel(uint32_t u32FeaturesECX, uint32_t u32FeaturesEDX) /** * AMD-specific initialization code. + * + * @returns VBox status code. */ -static void hmR0InitAmd(uint32_t u32FeaturesEDX) +static int hmR0InitAmd(uint32_t u32FeaturesEDX, uint32_t uMaxExtLeaf) { /* * Read all SVM MSRs if SVM is available. (same goes for RDMSR/WRMSR) * We also assume all SVM-enabled CPUs support fxsave/fxrstor. */ + int rc; if ( (g_HvmR0.cpuid.u32AMDFeatureECX & X86_CPUID_AMD_FEATURE_ECX_SVM) && (u32FeaturesEDX & X86_CPUID_FEATURE_EDX_MSR) && (u32FeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR) + && ASMIsValidExtRange(uMaxExtLeaf) + && uMaxExtLeaf >= 0x8000000a ) { - g_HvmR0.pfnEnterSession = SVMR0Enter; - g_HvmR0.pfnLeaveSession = SVMR0Leave; - g_HvmR0.pfnSaveHostState = SVMR0SaveHostState; - g_HvmR0.pfnLoadGuestState = SVMR0LoadGuestState; - g_HvmR0.pfnRunGuestCode = SVMR0RunGuestCode; - g_HvmR0.pfnEnableCpu = SVMR0EnableCpu; - g_HvmR0.pfnDisableCpu = SVMR0DisableCpu; - g_HvmR0.pfnInitVM = SVMR0InitVM; - g_HvmR0.pfnTermVM = SVMR0TermVM; - g_HvmR0.pfnSetupVM = SVMR0SetupVM; + /* Call the global AMD-V initialization routine. */ + rc = SVMR0GlobalInit(); + if (RT_FAILURE(rc)) + { + g_HvmR0.lLastError = rc; + return rc; + } + + /* + * Install the AMD-V methods. + */ + g_HvmR0.pfnEnterSession = SVMR0Enter; + g_HvmR0.pfnThreadCtxCallback = SVMR0ThreadCtxCallback; + g_HvmR0.pfnSaveHostState = SVMR0SaveHostState; + g_HvmR0.pfnRunGuestCode = SVMR0RunGuestCode; + g_HvmR0.pfnEnableCpu = SVMR0EnableCpu; + g_HvmR0.pfnDisableCpu = SVMR0DisableCpu; + g_HvmR0.pfnInitVM = SVMR0InitVM; + g_HvmR0.pfnTermVM = SVMR0TermVM; + g_HvmR0.pfnSetupVM = SVMR0SetupVM; /* Query AMD features. */ uint32_t u32Dummy; - ASMCpuId(0x8000000A, &g_HvmR0.svm.u32Rev, &g_HvmR0.uMaxASID, - &u32Dummy, &g_HvmR0.svm.u32Features); + ASMCpuId(0x8000000a, &g_HvmR0.svm.u32Rev, &g_HvmR0.uMaxAsid, &u32Dummy, &g_HvmR0.svm.u32Features); /* * We need to check if AMD-V has been properly initialized on all CPUs. @@ -577,7 +581,7 @@ static void hmR0InitAmd(uint32_t u32FeaturesEDX) */ HMR0FIRSTRC FirstRc; hmR0FirstRcInit(&FirstRc); - int rc = RTMpOnAll(hmR0InitAmdCpu, &FirstRc, NULL); + rc = RTMpOnAll(hmR0InitAmdCpu, &FirstRc, NULL); AssertRC(rc); if (RT_SUCCESS(rc)) rc = hmR0FirstRcGetStatus(&FirstRc); @@ -587,15 +591,23 @@ static void hmR0InitAmd(uint32_t u32FeaturesEDX) #endif if (RT_SUCCESS(rc)) { - /* Read the HWCR msr for diagnostics. */ - g_HvmR0.svm.msrHWCR = ASMRdMsr(MSR_K8_HWCR); + /* Read the HWCR MSR for diagnostics. */ + g_HvmR0.svm.u64MsrHwcr = ASMRdMsr(MSR_K8_HWCR); g_HvmR0.svm.fSupported = true; } else + { g_HvmR0.lLastError = rc; + if (rc == VERR_SVM_DISABLED || rc == VERR_SVM_IN_USE) + rc = VINF_SUCCESS; /* Don't fail if AMD-V is disabled or in use. */ + } } else + { + rc = VINF_SUCCESS; /* Don't fail if AMD-V is not supported. See @bugref{6785}. */ g_HvmR0.lLastError = VERR_SVM_NO_SVM; + } + return rc; } @@ -604,7 +616,7 @@ static void hmR0InitAmd(uint32_t u32FeaturesEDX) * * @returns VBox status code. */ -VMMR0DECL(int) HWACCMR0Init(void) +VMMR0_INT_DECL(int) HMR0Init(void) { /* * Initialize the globals. @@ -616,16 +628,15 @@ VMMR0DECL(int) HWACCMR0Init(void) g_HvmR0.aCpuInfo[i].hMemObj = NIL_RTR0MEMOBJ; /* Fill in all callbacks with placeholders. */ - g_HvmR0.pfnEnterSession = hmR0DummyEnter; - g_HvmR0.pfnLeaveSession = hmR0DummyLeave; - g_HvmR0.pfnSaveHostState = hmR0DummySaveHostState; - g_HvmR0.pfnLoadGuestState = hmR0DummyLoadGuestState; - g_HvmR0.pfnRunGuestCode = hmR0DummyRunGuestCode; - g_HvmR0.pfnEnableCpu = hmR0DummyEnableCpu; - g_HvmR0.pfnDisableCpu = hmR0DummyDisableCpu; - g_HvmR0.pfnInitVM = hmR0DummyInitVM; - g_HvmR0.pfnTermVM = hmR0DummyTermVM; - g_HvmR0.pfnSetupVM = hmR0DummySetupVM; + g_HvmR0.pfnEnterSession = hmR0DummyEnter; + g_HvmR0.pfnThreadCtxCallback = hmR0DummyThreadCtxCallback; + g_HvmR0.pfnSaveHostState = hmR0DummySaveHostState; + g_HvmR0.pfnRunGuestCode = hmR0DummyRunGuestCode; + g_HvmR0.pfnEnableCpu = hmR0DummyEnableCpu; + g_HvmR0.pfnDisableCpu = hmR0DummyDisableCpu; + g_HvmR0.pfnInitVM = hmR0DummyInitVM; + g_HvmR0.pfnTermVM = hmR0DummyTermVM; + g_HvmR0.pfnSetupVM = hmR0DummySetupVM; /* Default is global VT-x/AMD-V init. */ g_HvmR0.fGlobalInit = true; @@ -645,40 +656,45 @@ VMMR0DECL(int) HWACCMR0Init(void) int rc; if (ASMHasCpuId()) { - uint32_t u32FeaturesECX, u32FeaturesEDX; - uint32_t u32VendorEBX, u32VendorECX, u32VendorEDX; - uint32_t u32Dummy; - /* Standard features. */ - ASMCpuId(0, &u32Dummy, &u32VendorEBX, &u32VendorECX, &u32VendorEDX); - ASMCpuId(1, &u32Dummy, &u32Dummy, &u32FeaturesECX, &u32FeaturesEDX); - - /* Query AMD features. */ - ASMCpuId(0x80000001, &u32Dummy, &u32Dummy, - &g_HvmR0.cpuid.u32AMDFeatureECX, - &g_HvmR0.cpuid.u32AMDFeatureEDX); - - /* Go to CPU specific initialization code. */ - if ( ( u32VendorEBX == X86_CPUID_VENDOR_INTEL_EBX - && u32VendorECX == X86_CPUID_VENDOR_INTEL_ECX - && u32VendorEDX == X86_CPUID_VENDOR_INTEL_EDX) - || ( u32VendorEBX == X86_CPUID_VENDOR_VIA_EBX - && u32VendorECX == X86_CPUID_VENDOR_VIA_ECX - && u32VendorEDX == X86_CPUID_VENDOR_VIA_EDX)) + uint32_t uMaxLeaf, u32VendorEBX, u32VendorECX, u32VendorEDX; + ASMCpuId(0, &uMaxLeaf, &u32VendorEBX, &u32VendorECX, &u32VendorEDX); + if (ASMIsValidStdRange(uMaxLeaf)) { - rc = hmR0InitIntel(u32FeaturesECX, u32FeaturesEDX); - if (RT_FAILURE(rc)) - return rc; + uint32_t u32FeaturesECX, u32FeaturesEDX, u32Dummy; + ASMCpuId(1, &u32Dummy, &u32Dummy, &u32FeaturesECX, &u32FeaturesEDX); + + /* Query AMD features. */ + uint32_t uMaxExtLeaf = ASMCpuId_EAX(0x80000000); + if (ASMIsValidExtRange(uMaxExtLeaf)) + ASMCpuId(0x80000001, &u32Dummy, &u32Dummy, + &g_HvmR0.cpuid.u32AMDFeatureECX, + &g_HvmR0.cpuid.u32AMDFeatureEDX); + else + g_HvmR0.cpuid.u32AMDFeatureECX = g_HvmR0.cpuid.u32AMDFeatureEDX = 0; + + /* Go to CPU specific initialization code. */ + if ( ASMIsIntelCpuEx(u32VendorEBX, u32VendorECX, u32VendorEDX) + || ASMIsViaCentaurCpuEx(u32VendorEBX, u32VendorECX, u32VendorEDX)) + { + rc = hmR0InitIntel(u32FeaturesECX, u32FeaturesEDX); + if (RT_FAILURE(rc)) + return rc; + } + else if (ASMIsAmdCpuEx(u32VendorEBX, u32VendorECX, u32VendorEDX)) + { + rc = hmR0InitAmd(u32FeaturesEDX, uMaxExtLeaf); + if (RT_FAILURE(rc)) + return rc; + } + else + g_HvmR0.lLastError = VERR_HM_UNKNOWN_CPU; } - else if ( u32VendorEBX == X86_CPUID_VENDOR_AMD_EBX - && u32VendorECX == X86_CPUID_VENDOR_AMD_ECX - && u32VendorEDX == X86_CPUID_VENDOR_AMD_EDX) - hmR0InitAmd(u32FeaturesEDX); else - g_HvmR0.lLastError = VERR_HWACCM_UNKNOWN_CPU; + g_HvmR0.lLastError = VERR_HM_UNKNOWN_CPU; } else - g_HvmR0.lLastError = VERR_HWACCM_NO_CPUID; + g_HvmR0.lLastError = VERR_HM_NO_CPUID; /* * Register notification callbacks that we can use to disable/enable CPUs @@ -704,7 +720,7 @@ VMMR0DECL(int) HWACCMR0Init(void) * * @returns VBox status code. */ -VMMR0DECL(int) HWACCMR0Term(void) +VMMR0_INT_DECL(int) HMR0Term(void) { int rc; if ( g_HvmR0.vmx.fSupported @@ -741,7 +757,7 @@ VMMR0DECL(int) HWACCMR0Term(void) { HMR0FIRSTRC FirstRc; hmR0FirstRcInit(&FirstRc); - rc = RTMpOnAll(hmR0DisableCpuCallback, NULL, &FirstRc); + rc = RTMpOnAll(hmR0DisableCpuCallback, NULL /* pvUser 1 */, &FirstRc); Assert(RT_SUCCESS(rc) || rc == VERR_NOT_SUPPORTED); if (RT_SUCCESS(rc)) { @@ -762,13 +778,23 @@ VMMR0DECL(int) HWACCMR0Term(void) } } } + + /** @todo This needs cleaning up. There's no matching + * hmR0TermIntel()/hmR0TermAmd() and all the VT-x/AMD-V specific bits + * should move into their respective modules. */ + /* Finally, call global VT-x/AMD-V termination. */ + if (g_HvmR0.vmx.fSupported) + VMXR0GlobalTerm(); + else if (g_HvmR0.svm.fSupported) + SVMR0GlobalTerm(); + return rc; } /** - * Worker function used by hmR0PowerCallback and HWACCMR0Init to initalize - * VT-x on a CPU. + * Worker function used by hmR0PowerCallback() and HMR0Init() to initalize VT-x + * on a CPU. * * @param idCpu The identifier for the CPU the function is called on. * @param pvUser1 Pointer to the first RC structure. @@ -776,40 +802,62 @@ VMMR0DECL(int) HWACCMR0Term(void) */ static DECLCALLBACK(void) hmR0InitIntelCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2) { + /** @todo Unify code with SUPR0QueryVTCaps(). */ PHMR0FIRSTRC pFirstRc = (PHMR0FIRSTRC)pvUser1; - Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /// @todo fix idCpu == index assumption (rainy day) + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /** @todo fix idCpu == index assumption (rainy day) */ NOREF(pvUser2); - /* - * Both the LOCK and VMXON bit must be set; otherwise VMXON will generate a #GP. - * Once the lock bit is set, this MSR can no longer be modified. - */ - uint64_t fFC = ASMRdMsr(MSR_IA32_FEATURE_CONTROL); - if ( !(fFC & (MSR_IA32_FEATURE_CONTROL_VMXON | MSR_IA32_FEATURE_CONTROL_LOCK)) - || ( (fFC & (MSR_IA32_FEATURE_CONTROL_VMXON | MSR_IA32_FEATURE_CONTROL_LOCK)) - == MSR_IA32_FEATURE_CONTROL_VMXON ) /* Some BIOSes forget to set the locked bit. */ - ) + uint64_t fFC = ASMRdMsr(MSR_IA32_FEATURE_CONTROL); + bool const fInSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE); + bool fMsrLocked = RT_BOOL(fFC & MSR_IA32_FEATURE_CONTROL_LOCK); + bool fSmxVmxAllowed = RT_BOOL(fFC & MSR_IA32_FEATURE_CONTROL_SMX_VMXON); + bool fVmxAllowed = RT_BOOL(fFC & MSR_IA32_FEATURE_CONTROL_VMXON); + + /* Check if the LOCK bit is set but excludes the required VMXON bit. */ + int rc = VERR_HM_IPE_1; + if (fMsrLocked) { - /* MSR is not yet locked; we can change it ourselves here. */ - ASMWrMsr(MSR_IA32_FEATURE_CONTROL, - g_HvmR0.vmx.msr.feature_ctrl | MSR_IA32_FEATURE_CONTROL_VMXON | MSR_IA32_FEATURE_CONTROL_LOCK); - fFC = ASMRdMsr(MSR_IA32_FEATURE_CONTROL); + if (fInSmxMode && !fSmxVmxAllowed) + rc = VERR_VMX_MSR_SMX_VMXON_DISABLED; + else if (!fInSmxMode && !fVmxAllowed) + rc = VERR_VMX_MSR_VMXON_DISABLED; + else + rc = VINF_SUCCESS; } - - int rc; - if ( (fFC & (MSR_IA32_FEATURE_CONTROL_VMXON | MSR_IA32_FEATURE_CONTROL_LOCK)) - == (MSR_IA32_FEATURE_CONTROL_VMXON | MSR_IA32_FEATURE_CONTROL_LOCK)) - rc = VINF_SUCCESS; else - rc = VERR_VMX_MSR_LOCKED_OR_DISABLED; + { + /* + * MSR is not yet locked; we can change it ourselves here. + * Once the lock bit is set, this MSR can no longer be modified. + */ + fFC |= MSR_IA32_FEATURE_CONTROL_LOCK; + if (fInSmxMode) + fFC |= MSR_IA32_FEATURE_CONTROL_SMX_VMXON; + else + fFC |= MSR_IA32_FEATURE_CONTROL_VMXON; + + ASMWrMsr(MSR_IA32_FEATURE_CONTROL, fFC); + + /* Verify. */ + fFC = ASMRdMsr(MSR_IA32_FEATURE_CONTROL); + fMsrLocked = RT_BOOL(fFC & MSR_IA32_FEATURE_CONTROL_LOCK); + fSmxVmxAllowed = fMsrLocked && RT_BOOL(fFC & MSR_IA32_FEATURE_CONTROL_SMX_VMXON); + fVmxAllowed = fMsrLocked && RT_BOOL(fFC & MSR_IA32_FEATURE_CONTROL_VMXON); + bool const fAllowed = fInSmxMode ? fSmxVmxAllowed : fVmxAllowed; + if (fAllowed) + rc = VINF_SUCCESS; + else + rc = VERR_VMX_MSR_LOCKING_FAILED; + } hmR0FirstRcSetStatus(pFirstRc, rc); } /** - * Worker function used by hmR0PowerCallback and HWACCMR0Init to initalize - * VT-x / AMD-V on a CPU. + * Worker function used by hmR0PowerCallback() and HMR0Init() to initalize AMD-V + * on a CPU. * * @param idCpu The identifier for the CPU the function is called on. * @param pvUser1 Pointer to the first RC structure. @@ -818,7 +866,8 @@ static DECLCALLBACK(void) hmR0InitIntelCpu(RTCPUID idCpu, void *pvUser1, void *p static DECLCALLBACK(void) hmR0InitAmdCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2) { PHMR0FIRSTRC pFirstRc = (PHMR0FIRSTRC)pvUser1; - Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /// @todo fix idCpu == index assumption (rainy day) + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /** @todo fix idCpu == index assumption (rainy day) */ NOREF(pvUser2); /* Check if SVM is disabled. */ @@ -853,36 +902,39 @@ static DECLCALLBACK(void) hmR0InitAmdCpu(RTCPUID idCpu, void *pvUser1, void *pvU } - /** - * Disable VT-x or AMD-V on the current CPU + * Enable VT-x or AMD-V on the current CPU * * @returns VBox status code. - * @param pVM Pointer to the VM (can be 0). + * @param pVM Pointer to the VM (can be NULL). * @param idCpu The identifier for the CPU the function is called on. + * + * @remarks Maybe called with interrupts disabled! */ static int hmR0EnableCpu(PVM pVM, RTCPUID idCpu) { - PHMGLOBLCPUINFO pCpu = &g_HvmR0.aCpuInfo[idCpu]; + PHMGLOBALCPUINFO pCpu = &g_HvmR0.aCpuInfo[idCpu]; - Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /// @todo fix idCpu == index assumption (rainy day) + Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /** @todo fix idCpu == index assumption (rainy day) */ Assert(idCpu < RT_ELEMENTS(g_HvmR0.aCpuInfo)); Assert(!pCpu->fConfigured); - Assert(!g_HvmR0.fGlobalInit || ASMAtomicReadBool(&pCpu->fInUse) == false); - pCpu->idCpu = idCpu; - pCpu->uCurrentASID = 0; /* we'll aways increment this the first time (host uses ASID 0) */ - /* Do NOT reset cTLBFlushes here, see @bugref{6255}. */ + pCpu->idCpu = idCpu; + /* Do NOT reset cTlbFlushes here, see @bugref{6255}. */ int rc; if (g_HvmR0.vmx.fSupported && g_HvmR0.vmx.fUsingSUPR0EnableVTx) - rc = g_HvmR0.pfnEnableCpu(pCpu, pVM, NULL, NIL_RTHCPHYS, true); + rc = g_HvmR0.pfnEnableCpu(pCpu, pVM, NULL /* pvCpuPage */, NIL_RTHCPHYS, true, &g_HvmR0.vmx.Msrs); else { AssertLogRelMsgReturn(pCpu->hMemObj != NIL_RTR0MEMOBJ, ("hmR0EnableCpu failed idCpu=%u.\n", idCpu), VERR_HM_IPE_1); void *pvCpuPage = RTR0MemObjAddress(pCpu->hMemObj); RTHCPHYS HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0); - rc = g_HvmR0.pfnEnableCpu(pCpu, pVM, pvCpuPage, HCPhysCpuPage, false); + + if (g_HvmR0.vmx.fSupported) + rc = g_HvmR0.pfnEnableCpu(pCpu, pVM, pvCpuPage, HCPhysCpuPage, false, &g_HvmR0.vmx.Msrs); + else + rc = g_HvmR0.pfnEnableCpu(pCpu, pVM, pvCpuPage, HCPhysCpuPage, false, NULL /* pvArg */); } AssertRC(rc); if (RT_SUCCESS(rc)) @@ -893,11 +945,10 @@ static int hmR0EnableCpu(PVM pVM, RTCPUID idCpu) /** - * Worker function passed to RTMpOnAll, RTMpOnOthers and RTMpOnSpecific that - * is to be called on the target cpus. + * Worker function passed to RTMpOnAll() that is to be called on all CPUs. * * @param idCpu The identifier for the CPU the function is called on. - * @param pvUser1 The 1st user argument. + * @param pvUser1 Opaque pointer to the VM (can be NULL!). * @param pvUser2 The 2nd user argument. */ static DECLCALLBACK(void) hmR0EnableCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2) @@ -905,21 +956,21 @@ static DECLCALLBACK(void) hmR0EnableCpuCallback(RTCPUID idCpu, void *pvUser1, vo PVM pVM = (PVM)pvUser1; /* can be NULL! */ PHMR0FIRSTRC pFirstRc = (PHMR0FIRSTRC)pvUser2; AssertReturnVoid(g_HvmR0.fGlobalInit); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); hmR0FirstRcSetStatus(pFirstRc, hmR0EnableCpu(pVM, idCpu)); } /** - * RTOnce callback employed by HWACCMR0EnableAllCpus. + * RTOnce callback employed by HMR0EnableAllCpus. * * @returns VBox status code. * @param pvUser Pointer to the VM. * @param pvUserIgnore NULL, ignored. */ -static DECLCALLBACK(int32_t) hmR0EnableAllCpuOnce(void *pvUser, void *pvUserIgnore) +static DECLCALLBACK(int32_t) hmR0EnableAllCpuOnce(void *pvUser) { PVM pVM = (PVM)pvUser; - NOREF(pvUserIgnore); /* * Indicate that we've initialized. @@ -933,14 +984,17 @@ static DECLCALLBACK(int32_t) hmR0EnableAllCpuOnce(void *pvUser, void *pvUserIgno /* * The global init variable is set by the first VM. */ - g_HvmR0.fGlobalInit = pVM->hwaccm.s.fGlobalInit; + g_HvmR0.fGlobalInit = pVM->hm.s.fGlobalInit; +#ifdef VBOX_STRICT for (unsigned i = 0; i < RT_ELEMENTS(g_HvmR0.aCpuInfo); i++) { Assert(g_HvmR0.aCpuInfo[i].hMemObj == NIL_RTR0MEMOBJ); - g_HvmR0.aCpuInfo[i].fConfigured = false; - g_HvmR0.aCpuInfo[i].cTLBFlushes = 0; + Assert(!g_HvmR0.aCpuInfo[i].fConfigured); + Assert(!g_HvmR0.aCpuInfo[i].cTlbFlushes); + Assert(!g_HvmR0.aCpuInfo[i].uCurrentAsid); } +#endif int rc; if ( g_HvmR0.vmx.fSupported @@ -952,7 +1006,7 @@ static DECLCALLBACK(int32_t) hmR0EnableAllCpuOnce(void *pvUser, void *pvUserIgno rc = SUPR0EnableVTx(true /* fEnable */); if (RT_SUCCESS(rc)) /* If the host provides a VT-x init API, then we'll rely on that for global init. */ - g_HvmR0.fGlobalInit = pVM->hwaccm.s.fGlobalInit = true; + g_HvmR0.fGlobalInit = pVM->hm.s.fGlobalInit = true; else AssertMsgFailed(("hmR0EnableAllCpuOnce/SUPR0EnableVTx: rc=%Rrc\n", rc)); } @@ -961,14 +1015,14 @@ static DECLCALLBACK(int32_t) hmR0EnableAllCpuOnce(void *pvUser, void *pvUserIgno /* * We're doing the job ourselves. */ - /* Allocate one page per cpu for the global vt-x and amd-v pages */ + /* Allocate one page per cpu for the global VT-x and AMD-V pages */ for (unsigned i = 0; i < RT_ELEMENTS(g_HvmR0.aCpuInfo); i++) { Assert(g_HvmR0.aCpuInfo[i].hMemObj == NIL_RTR0MEMOBJ); if (RTMpIsCpuPossible(RTMpCpuIdFromSetIndex(i))) { - rc = RTR0MemObjAllocCont(&g_HvmR0.aCpuInfo[i].hMemObj, PAGE_SIZE, true /* executable R0 mapping */); + rc = RTR0MemObjAllocCont(&g_HvmR0.aCpuInfo[i].hMemObj, PAGE_SIZE, false /* executable R0 mapping */); AssertLogRelRCReturn(rc, rc); void *pvR0 = RTR0MemObjAddress(g_HvmR0.aCpuInfo[i].hMemObj); Assert(pvR0); @@ -979,7 +1033,8 @@ static DECLCALLBACK(int32_t) hmR0EnableAllCpuOnce(void *pvUser, void *pvUserIgno rc = VINF_SUCCESS; } - if (RT_SUCCESS(rc) && g_HvmR0.fGlobalInit) + if ( RT_SUCCESS(rc) + && g_HvmR0.fGlobalInit) { /* First time, so initialize each cpu/core. */ HMR0FIRSTRC FirstRc; @@ -995,19 +1050,19 @@ static DECLCALLBACK(int32_t) hmR0EnableAllCpuOnce(void *pvUser, void *pvUserIgno /** - * Sets up HWACCM on all cpus. + * Sets up HM on all cpus. * * @returns VBox status code. * @param pVM Pointer to the VM. */ -VMMR0DECL(int) HWACCMR0EnableAllCpus(PVM pVM) +VMMR0_INT_DECL(int) HMR0EnableAllCpus(PVM pVM) { - /* Make sure we don't touch hwaccm after we've disabled hwaccm in + /* Make sure we don't touch HM after we've disabled HM in preparation of a suspend. */ if (ASMAtomicReadBool(&g_HvmR0.fSuspended)) - return VERR_HWACCM_SUSPEND_PENDING; + return VERR_HM_SUSPEND_PENDING; - return RTOnce(&g_HvmR0.EnableAllCpusOnce, hmR0EnableAllCpuOnce, pVM, NULL); + return RTOnce(&g_HvmR0.EnableAllCpusOnce, hmR0EnableAllCpuOnce, pVM); } @@ -1016,15 +1071,17 @@ VMMR0DECL(int) HWACCMR0EnableAllCpus(PVM pVM) * * @returns VBox status code. * @param idCpu The identifier for the CPU the function is called on. + * + * @remarks Must be called with preemption disabled. */ static int hmR0DisableCpu(RTCPUID idCpu) { - PHMGLOBLCPUINFO pCpu = &g_HvmR0.aCpuInfo[idCpu]; + PHMGLOBALCPUINFO pCpu = &g_HvmR0.aCpuInfo[idCpu]; Assert(!g_HvmR0.vmx.fSupported || !g_HvmR0.vmx.fUsingSUPR0EnableVTx); - Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /// @todo fix idCpu == index assumption (rainy day) + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /** @todo fix idCpu == index assumption (rainy day) */ Assert(idCpu < RT_ELEMENTS(g_HvmR0.aCpuInfo)); - Assert(!g_HvmR0.fGlobalInit || ASMAtomicReadBool(&pCpu->fInUse) == false); Assert(!pCpu->fConfigured || pCpu->hMemObj != NIL_RTR0MEMOBJ); if (pCpu->hMemObj == NIL_RTR0MEMOBJ) @@ -1035,34 +1092,26 @@ static int hmR0DisableCpu(RTCPUID idCpu) { void *pvCpuPage = RTR0MemObjAddress(pCpu->hMemObj); RTHCPHYS HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0); - if (idCpu == RTMpCpuId()) - { - rc = g_HvmR0.pfnDisableCpu(pCpu, pvCpuPage, HCPhysCpuPage); - AssertRC(rc); - } - else - { - pCpu->fIgnoreAMDVInUseError = true; - rc = VINF_SUCCESS; - } + + rc = g_HvmR0.pfnDisableCpu(pCpu, pvCpuPage, HCPhysCpuPage); + AssertRCReturn(rc, rc); pCpu->fConfigured = false; } else rc = VINF_SUCCESS; /* nothing to do */ - pCpu->uCurrentASID = 0; return rc; } /** - * Worker function passed to RTMpOnAll, RTMpOnOthers and RTMpOnSpecific that - * is to be called on the target cpus. + * Worker function passed to RTMpOnAll() that is to be called on the target + * CPUs. * * @param idCpu The identifier for the CPU the function is called on. * @param pvUser1 The 1st user argument. - * @param pvUser2 The 2nd user argument. + * @param pvUser2 Opaque pointer to the FirstRc. */ static DECLCALLBACK(void) hmR0DisableCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2) { @@ -1085,7 +1134,7 @@ static DECLCALLBACK(void) hmR0MpEventCallback(RTMPEVENT enmEvent, RTCPUID idCpu, /* * We only care about uninitializing a CPU that is going offline. When a - * CPU comes online, the initialization is done lazily in HWACCMR0Enter(). + * CPU comes online, the initialization is done lazily in HMR0Enter(). */ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); switch (enmEvent) @@ -1135,7 +1184,7 @@ static DECLCALLBACK(void) hmR0PowerCallback(RTPOWEREVENT enmEvent, void *pvUser) if (g_HvmR0.fGlobalInit) { /* Turn off VT-x or AMD-V on all CPUs. */ - rc = RTMpOnAll(hmR0DisableCpuCallback, NULL, &FirstRc); + rc = RTMpOnAll(hmR0DisableCpuCallback, NULL /* pvUser 1 */, &FirstRc); Assert(RT_SUCCESS(rc) || rc == VERR_NOT_SUPPORTED); } /* else nothing to do here for the local init case */ @@ -1158,7 +1207,7 @@ static DECLCALLBACK(void) hmR0PowerCallback(RTPOWEREVENT enmEvent, void *pvUser) if (g_HvmR0.fGlobalInit) { /* Turn VT-x or AMD-V back on on all CPUs. */ - rc = RTMpOnAll(hmR0EnableCpuCallback, NULL, &FirstRc /* output ignored */); + rc = RTMpOnAll(hmR0EnableCpuCallback, NULL /* pVM */, &FirstRc /* output ignored */); Assert(RT_SUCCESS(rc) || rc == VERR_NOT_SUPPORTED); } /* else nothing to do here for the local init case */ @@ -1179,59 +1228,46 @@ static DECLCALLBACK(void) hmR0PowerCallback(RTPOWEREVENT enmEvent, void *pvUser) * @returns VBox status code. * @param pVM Pointer to the VM. */ -VMMR0DECL(int) HWACCMR0InitVM(PVM pVM) +VMMR0_INT_DECL(int) HMR0InitVM(PVM pVM) { AssertReturn(pVM, VERR_INVALID_PARAMETER); #ifdef LOG_ENABLED - SUPR0Printf("HWACCMR0InitVM: %p\n", pVM); + SUPR0Printf("HMR0InitVM: %p\n", pVM); #endif - /* Make sure we don't touch hwaccm after we've disabled hwaccm in preparation of a suspend. */ + /* Make sure we don't touch HM after we've disabled HM in preparation of a suspend. */ if (ASMAtomicReadBool(&g_HvmR0.fSuspended)) - return VERR_HWACCM_SUSPEND_PENDING; + return VERR_HM_SUSPEND_PENDING; /* * Copy globals to the VM structure. */ - pVM->hwaccm.s.vmx.fSupported = g_HvmR0.vmx.fSupported; - pVM->hwaccm.s.svm.fSupported = g_HvmR0.svm.fSupported; - - pVM->hwaccm.s.vmx.fUsePreemptTimer = g_HvmR0.vmx.fUsePreemptTimer; - pVM->hwaccm.s.vmx.cPreemptTimerShift = g_HvmR0.vmx.cPreemptTimerShift; - pVM->hwaccm.s.vmx.msr.feature_ctrl = g_HvmR0.vmx.msr.feature_ctrl; - pVM->hwaccm.s.vmx.hostCR4 = g_HvmR0.vmx.hostCR4; - pVM->hwaccm.s.vmx.hostEFER = g_HvmR0.vmx.hostEFER; - pVM->hwaccm.s.vmx.msr.vmx_basic_info = g_HvmR0.vmx.msr.vmx_basic_info; - pVM->hwaccm.s.vmx.msr.vmx_pin_ctls = g_HvmR0.vmx.msr.vmx_pin_ctls; - pVM->hwaccm.s.vmx.msr.vmx_proc_ctls = g_HvmR0.vmx.msr.vmx_proc_ctls; - pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2 = g_HvmR0.vmx.msr.vmx_proc_ctls2; - pVM->hwaccm.s.vmx.msr.vmx_exit = g_HvmR0.vmx.msr.vmx_exit; - pVM->hwaccm.s.vmx.msr.vmx_entry = g_HvmR0.vmx.msr.vmx_entry; - pVM->hwaccm.s.vmx.msr.vmx_misc = g_HvmR0.vmx.msr.vmx_misc; - pVM->hwaccm.s.vmx.msr.vmx_cr0_fixed0 = g_HvmR0.vmx.msr.vmx_cr0_fixed0; - pVM->hwaccm.s.vmx.msr.vmx_cr0_fixed1 = g_HvmR0.vmx.msr.vmx_cr0_fixed1; - pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed0 = g_HvmR0.vmx.msr.vmx_cr4_fixed0; - pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed1 = g_HvmR0.vmx.msr.vmx_cr4_fixed1; - pVM->hwaccm.s.vmx.msr.vmx_vmcs_enum = g_HvmR0.vmx.msr.vmx_vmcs_enum; - pVM->hwaccm.s.vmx.msr.vmx_eptcaps = g_HvmR0.vmx.msr.vmx_eptcaps; - pVM->hwaccm.s.svm.msrHWCR = g_HvmR0.svm.msrHWCR; - pVM->hwaccm.s.svm.u32Rev = g_HvmR0.svm.u32Rev; - pVM->hwaccm.s.svm.u32Features = g_HvmR0.svm.u32Features; - pVM->hwaccm.s.cpuid.u32AMDFeatureECX = g_HvmR0.cpuid.u32AMDFeatureECX; - pVM->hwaccm.s.cpuid.u32AMDFeatureEDX = g_HvmR0.cpuid.u32AMDFeatureEDX; - pVM->hwaccm.s.lLastError = g_HvmR0.lLastError; - - pVM->hwaccm.s.uMaxASID = g_HvmR0.uMaxASID; - - - if (!pVM->hwaccm.s.cMaxResumeLoops) /* allow ring-3 overrides */ + /** @todo r=ramshankar: Why do we do this for MSRs? We never change them in the + * per-VM structures anyway... */ + pVM->hm.s.vmx.fSupported = g_HvmR0.vmx.fSupported; + pVM->hm.s.svm.fSupported = g_HvmR0.svm.fSupported; + + pVM->hm.s.vmx.fUsePreemptTimer = g_HvmR0.vmx.fUsePreemptTimer; + pVM->hm.s.vmx.cPreemptTimerShift = g_HvmR0.vmx.cPreemptTimerShift; + pVM->hm.s.vmx.u64HostCr4 = g_HvmR0.vmx.u64HostCr4; + pVM->hm.s.vmx.u64HostEfer = g_HvmR0.vmx.u64HostEfer; + pVM->hm.s.vmx.Msrs = g_HvmR0.vmx.Msrs; + pVM->hm.s.svm.u64MsrHwcr = g_HvmR0.svm.u64MsrHwcr; + pVM->hm.s.svm.u32Rev = g_HvmR0.svm.u32Rev; + pVM->hm.s.svm.u32Features = g_HvmR0.svm.u32Features; + pVM->hm.s.cpuid.u32AMDFeatureECX = g_HvmR0.cpuid.u32AMDFeatureECX; + pVM->hm.s.cpuid.u32AMDFeatureEDX = g_HvmR0.cpuid.u32AMDFeatureEDX; + pVM->hm.s.lLastError = g_HvmR0.lLastError; + + pVM->hm.s.uMaxAsid = g_HvmR0.uMaxAsid; + + + if (!pVM->hm.s.cMaxResumeLoops) /* allow ring-3 overrides */ { - pVM->hwaccm.s.cMaxResumeLoops = 1024; -#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION + pVM->hm.s.cMaxResumeLoops = 1024; if (RTThreadPreemptIsPendingTrusty()) - pVM->hwaccm.s.cMaxResumeLoops = 8192; -#endif + pVM->hm.s.cMaxResumeLoops = 8192; } /* @@ -1241,32 +1277,19 @@ VMMR0DECL(int) HWACCMR0InitVM(PVM pVM) { PVMCPU pVCpu = &pVM->aCpus[i]; - pVCpu->hwaccm.s.idEnteredCpu = NIL_RTCPUID; + pVCpu->hm.s.idEnteredCpu = NIL_RTCPUID; /* Invalidate the last cpu we were running on. */ - pVCpu->hwaccm.s.idLastCpu = NIL_RTCPUID; + pVCpu->hm.s.idLastCpu = NIL_RTCPUID; /* We'll aways increment this the first time (host uses ASID 0) */ - pVCpu->hwaccm.s.uCurrentASID = 0; + pVCpu->hm.s.uCurrentAsid = 0; } /* * Call the hardware specific initialization method. - * - * Note! The fInUse handling here isn't correct as we can we can be - * rescheduled to a different cpu, but the fInUse case is mostly for - * debugging... Disabling preemption isn't an option when allocating - * memory, so we'll let it slip for now. */ - RTCCUINTREG fFlags = ASMIntDisableFlags(); - PHMGLOBLCPUINFO pCpu = HWACCMR0GetCurrentCpu(); - ASMAtomicWriteBool(&pCpu->fInUse, true); - ASMSetFlags(fFlags); - - int rc = g_HvmR0.pfnInitVM(pVM); - - ASMAtomicWriteBool(&pCpu->fInUse, false); - return rc; + return g_HvmR0.pfnInitVM(pVM); } @@ -1276,33 +1299,18 @@ VMMR0DECL(int) HWACCMR0InitVM(PVM pVM) * @returns VBox status code. * @param pVM Pointer to the VM. */ -VMMR0DECL(int) HWACCMR0TermVM(PVM pVM) +VMMR0_INT_DECL(int) HMR0TermVM(PVM pVM) { - Log(("HWACCMR0TermVM: %p\n", pVM)); + Log(("HMR0TermVM: %p\n", pVM)); AssertReturn(pVM, VERR_INVALID_PARAMETER); - /* Make sure we don't touch hm after we've disabled hwaccm in preparation - of a suspend. */ - /** @todo r=bird: This cannot be right, the termination functions are - * just freeing memory and resetting pVM/pVCpu members... - * ==> memory leak. */ - AssertReturn(!ASMAtomicReadBool(&g_HvmR0.fSuspended), VERR_HWACCM_SUSPEND_PENDING); - /* * Call the hardware specific method. * - * Note! Not correct as we can be rescheduled to a different cpu, but the - * fInUse case is mostly for debugging. + * Note! We might be preparing for a suspend, so the pfnTermVM() functions should probably not + * mess with VT-x/AMD-V features on the CPU, currently all they do is free memory so this is safe. */ - RTCCUINTREG fFlags = ASMIntDisableFlags(); - PHMGLOBLCPUINFO pCpu = HWACCMR0GetCurrentCpu(); - ASMAtomicWriteBool(&pCpu->fInUse, true); - ASMSetFlags(fFlags); - - int rc = g_HvmR0.pfnTermVM(pVM); - - ASMAtomicWriteBool(&pCpu->fInUse, false); - return rc; + return g_HvmR0.pfnTermVM(pVM); } @@ -1314,35 +1322,34 @@ VMMR0DECL(int) HWACCMR0TermVM(PVM pVM) * @returns VBox status code. * @param pVM Pointer to the VM. */ -VMMR0DECL(int) HWACCMR0SetupVM(PVM pVM) +VMMR0_INT_DECL(int) HMR0SetupVM(PVM pVM) { - Log(("HWACCMR0SetupVM: %p\n", pVM)); + Log(("HMR0SetupVM: %p\n", pVM)); AssertReturn(pVM, VERR_INVALID_PARAMETER); - /* Make sure we don't touch hwaccm after we've disabled hwaccm in + /* Make sure we don't touch HM after we've disabled HM in preparation of a suspend. */ - AssertReturn(!ASMAtomicReadBool(&g_HvmR0.fSuspended), VERR_HWACCM_SUSPEND_PENDING); + AssertReturn(!ASMAtomicReadBool(&g_HvmR0.fSuspended), VERR_HM_SUSPEND_PENDING); + /* On first entry we'll sync everything. */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + HMCPU_CF_RESET_TO(&pVM->aCpus[i], HM_CHANGED_HOST_CONTEXT | HM_CHANGED_ALL_GUEST); /* - * Call the hardware specific setup VM method. This requires the CPU to be + * Call the hardware specific setup VM method. This requires the CPU to be * enabled for AMD-V/VT-x and preemption to be prevented. */ - RTCCUINTREG fFlags = ASMIntDisableFlags(); - RTCPUID idCpu = RTMpCpuId(); - PHMGLOBLCPUINFO pCpu = &g_HvmR0.aCpuInfo[idCpu]; - ASMAtomicWriteBool(&pCpu->fInUse, true); - - /* On first entry we'll sync everything. */ - for (VMCPUID i = 0; i < pVM->cCpus; i++) - pVM->aCpus[i].hwaccm.s.fContextUseFlags = HWACCM_CHANGED_ALL; + RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER; + RTThreadPreemptDisable(&PreemptState); + RTCPUID idCpu = RTMpCpuId(); + PHMGLOBALCPUINFO pCpu = &g_HvmR0.aCpuInfo[idCpu]; /* Enable VT-x or AMD-V if local init is required. */ int rc; if (!g_HvmR0.fGlobalInit) { rc = hmR0EnableCpu(pVM, idCpu); - AssertReturnStmt(RT_SUCCESS_NP(rc), ASMSetFlags(fFlags), rc); + AssertRCReturnStmt(rc, RTThreadPreemptRestore(&PreemptState), rc); } /* Setup VT-x or AMD-V. */ @@ -1355,9 +1362,36 @@ VMMR0DECL(int) HWACCMR0SetupVM(PVM pVM) AssertRC(rc2); } - ASMAtomicWriteBool(&pCpu->fInUse, false); - ASMSetFlags(fFlags); + RTThreadPreemptRestore(&PreemptState); + return rc; +} + +/** + * Turns on HM on the CPU if necessary and initializes the bare minimum state + * required for entering HM context. + * + * @returns VBox status code. + * @param pvCpu Pointer to the VMCPU. + * + * @remarks No-long-jump zone!!! + */ +VMMR0_INT_DECL(int) HMR0EnterCpu(PVMCPU pVCpu) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + int rc = VINF_SUCCESS; + RTCPUID idCpu = RTMpCpuId(); + PHMGLOBALCPUINFO pCpu = &g_HvmR0.aCpuInfo[idCpu]; + AssertPtr(pCpu); + + /* Enable VT-x or AMD-V if local init is required, or enable if it's a freshly onlined CPU. */ + if (!pCpu->fConfigured) + rc = hmR0EnableCpu(pVCpu->CTX_SUFF(pVM), idCpu); + + /* Reload host-context (back from ring-3/migrated CPUs), reload host context & shared bits. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE); + pVCpu->hm.s.idEnteredCpu = idCpu; return rc; } @@ -1371,57 +1405,35 @@ VMMR0DECL(int) HWACCMR0SetupVM(PVM pVM) * * @remarks This is called with preemption disabled. */ -VMMR0DECL(int) HWACCMR0Enter(PVM pVM, PVMCPU pVCpu) +VMMR0_INT_DECL(int) HMR0Enter(PVM pVM, PVMCPU pVCpu) { - RTCPUID idCpu = RTMpCpuId(); - PHMGLOBLCPUINFO pCpu = &g_HvmR0.aCpuInfo[idCpu]; - /* Make sure we can't enter a session after we've disabled HM in preparation of a suspend. */ - AssertReturn(!ASMAtomicReadBool(&g_HvmR0.fSuspended), VERR_HWACCM_SUSPEND_PENDING); - ASMAtomicWriteBool(&pCpu->fInUse, true); - - AssertMsg(pVCpu->hwaccm.s.idEnteredCpu == NIL_RTCPUID, ("%d", (int)pVCpu->hwaccm.s.idEnteredCpu)); - pVCpu->hwaccm.s.idEnteredCpu = idCpu; - - PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu); - - /* Always load the guest's FPU/XMM state on-demand. */ - CPUMDeactivateGuestFPUState(pVCpu); - - /* Always load the guest's debug state on-demand. */ - CPUMDeactivateGuestDebugState(pVCpu); - - /* Always reload the host context and the guest's CR0 register. (!!!!) */ - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0 | HWACCM_CHANGED_HOST_CONTEXT; + AssertReturn(!ASMAtomicReadBool(&g_HvmR0.fSuspended), VERR_HM_SUSPEND_PENDING); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); - /* Setup the register and mask according to the current execution mode. */ - if (pCtx->msrEFER & MSR_K6_EFER_LMA) - pVM->hwaccm.s.u64RegisterMask = UINT64_C(0xFFFFFFFFFFFFFFFF); - else - pVM->hwaccm.s.u64RegisterMask = UINT64_C(0xFFFFFFFF); - - /* Enable VT-x or AMD-V if local init is required, or enable if it's a - freshly onlined CPU. */ - int rc; - if ( !pCpu->fConfigured - || !g_HvmR0.fGlobalInit) - { - rc = hmR0EnableCpu(pVM, idCpu); - AssertRCReturn(rc, rc); - } + /* Load the bare minimum state required for entering HM. */ + int rc = HMR0EnterCpu(pVCpu); + AssertRCReturn(rc, rc); #ifdef VBOX_WITH_2X_4GB_ADDR_SPACE + AssertReturn(!VMMR0ThreadCtxHooksAreRegistered(pVCpu), VERR_HM_IPE_5); bool fStartedSet = PGMR0DynMapStartOrMigrateAutoSet(pVCpu); #endif - rc = g_HvmR0.pfnEnterSession(pVM, pVCpu, pCpu); - AssertRC(rc); - /* We must save the host context here (VT-x) as we might be rescheduled on - a different cpu after a long jump back to ring 3. */ - rc |= g_HvmR0.pfnSaveHostState(pVM, pVCpu); - AssertRC(rc); - rc |= g_HvmR0.pfnLoadGuestState(pVM, pVCpu, pCtx); - AssertRC(rc); + RTCPUID idCpu = RTMpCpuId(); + PHMGLOBALCPUINFO pCpu = &g_HvmR0.aCpuInfo[idCpu]; + PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu); + Assert(pCpu); + Assert(pCtx); + Assert(HMCPU_CF_IS_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE)); + + rc = g_HvmR0.pfnEnterSession(pVM, pVCpu, pCpu); + AssertMsgRCReturn(rc, ("pfnEnterSession failed. rc=%Rrc pVCpu=%p HostCpuId=%u\n", rc, pVCpu, idCpu), rc); + + /* Load the host as we may be resuming code after a longjmp and quite + possibly now be scheduled on a different CPU. */ + rc = g_HvmR0.pfnSaveHostState(pVM, pVCpu); + AssertMsgRCReturn(rc, ("pfnSaveHostState failed. rc=%Rrc pVCpu=%p HostCpuId=%u\n", rc, pVCpu, idCpu), rc); #ifdef VBOX_WITH_2X_4GB_ADDR_SPACE if (fStartedSet) @@ -1431,81 +1443,60 @@ VMMR0DECL(int) HWACCMR0Enter(PVM pVM, PVMCPU pVCpu) /* Keep track of the CPU owning the VMCS for debugging scheduling weirdness and ring-3 calls. */ if (RT_FAILURE(rc)) - pVCpu->hwaccm.s.idEnteredCpu = NIL_RTCPUID; + pVCpu->hm.s.idEnteredCpu = NIL_RTCPUID; return rc; } /** - * Leaves the VT-x or AMD-V session. + * Deinitializes the bare minimum state used for HM context and if necessary + * disable HM on the CPU. * * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. + * @param pVCpu Pointer to the VMCPU. * - * @remarks Called with preemption disabled just like HWACCMR0Enter, our - * counterpart. + * @remarks No-long-jump zone!!! */ -VMMR0DECL(int) HWACCMR0Leave(PVM pVM, PVMCPU pVCpu) +VMMR0_INT_DECL(int) HMR0LeaveCpu(PVMCPU pVCpu) { - int rc; - RTCPUID idCpu = RTMpCpuId(); - PHMGLOBLCPUINFO pCpu = &g_HvmR0.aCpuInfo[idCpu]; - PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); - /** @todo r=bird: This can't be entirely right? */ - AssertReturn(!ASMAtomicReadBool(&g_HvmR0.fSuspended), VERR_HWACCM_SUSPEND_PENDING); + RTCPUID idCpu = RTMpCpuId(); + PHMGLOBALCPUINFO pCpu = &g_HvmR0.aCpuInfo[idCpu]; - /* - * Save the guest FPU and XMM state if necessary. - * - * Note! It's rather tricky with longjmps done by e.g. Log statements or - * the page fault handler. We must restore the host FPU here to make - * absolutely sure we don't leave the guest FPU state active or trash - * somebody else's FPU state. - */ - if (CPUMIsGuestFPUStateActive(pVCpu)) + if ( !g_HvmR0.fGlobalInit + && pCpu->fConfigured) { - Log2(("CPUMR0SaveGuestFPU\n")); - CPUMR0SaveGuestFPU(pVM, pVCpu, pCtx); - - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0; - Assert(!CPUMIsGuestFPUStateActive(pVCpu)); + int rc = hmR0DisableCpu(idCpu); + AssertRCReturn(rc, rc); + Assert(!pCpu->fConfigured); } - rc = g_HvmR0.pfnLeaveSession(pVM, pVCpu, pCtx); + /* Reset these to force a TLB flush for the next entry. */ + pVCpu->hm.s.idLastCpu = NIL_RTCPUID; + pVCpu->hm.s.idEnteredCpu = NIL_RTCPUID; + VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); - /* We don't pass on invlpg information to the recompiler for nested paging - guests, so we must make sure the recompiler flushes its TLB the next - time it executes code. */ - if ( pVM->hwaccm.s.fNestedPaging - && CPUMIsGuestInPagedProtectedModeEx(pCtx)) - CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH); + /* Clear the VCPU <-> host CPU mapping as we've left HM context. */ + ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID); - /* Keep track of the CPU owning the VMCS for debugging scheduling weirdness - and ring-3 calls. */ - AssertMsgStmt( pVCpu->hwaccm.s.idEnteredCpu == idCpu - || RT_FAILURE_NP(rc), - ("Owner is %u, I'm %u", pVCpu->hwaccm.s.idEnteredCpu, idCpu), - rc = VERR_HM_WRONG_CPU_1); - pVCpu->hwaccm.s.idEnteredCpu = NIL_RTCPUID; + return VINF_SUCCESS; +} - /* - * Disable VT-x or AMD-V if local init was done before. - */ - if (!g_HvmR0.fGlobalInit) - { - rc = hmR0DisableCpu(idCpu); - AssertRC(rc); - /* Reset these to force a TLB flush for the next entry. (-> EXPENSIVE) */ - pVCpu->hwaccm.s.idLastCpu = NIL_RTCPUID; - pVCpu->hwaccm.s.uCurrentASID = 0; - VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); - } +/** + * Thread-context hook for HM. + * + * @param enmEvent The thread-context event. + * @param pvUser Opaque pointer to the VMCPU. + */ +VMMR0_INT_DECL(void) HMR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, void *pvUser) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + Assert(pVCpu); + Assert(g_HvmR0.pfnThreadCtxCallback); - ASMAtomicWriteBool(&pCpu->fInUse, false); - return rc; + g_HvmR0.pfnThreadCtxCallback(enmEvent, pVCpu, g_HvmR0.fGlobalInit); } @@ -1516,20 +1507,25 @@ VMMR0DECL(int) HWACCMR0Leave(PVM pVM, PVMCPU pVCpu) * @param pVM Pointer to the VM. * @param pVCpu Pointer to the VMCPU. * - * @remarks Called with preemption disabled and after first having called - * HWACCMR0Enter. + * @remarks Can be called with preemption enabled if thread-context hooks are + * used!!! */ -VMMR0DECL(int) HWACCMR0RunGuestCode(PVM pVM, PVMCPU pVCpu) +VMMR0_INT_DECL(int) HMR0RunGuestCode(PVM pVM, PVMCPU pVCpu) { #ifdef VBOX_STRICT - PHMGLOBLCPUINFO pCpu = &g_HvmR0.aCpuInfo[RTMpCpuId()]; - Assert(!VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)); - Assert(pCpu->fConfigured); - AssertReturn(!ASMAtomicReadBool(&g_HvmR0.fSuspended), VERR_HWACCM_SUSPEND_PENDING); - Assert(ASMAtomicReadBool(&pCpu->fInUse) == true); + /* With thread-context hooks we would be running this code with preemption enabled. */ + if (!RTThreadPreemptIsEnabled(NIL_RTTHREAD)) + { + PHMGLOBALCPUINFO pCpu = &g_HvmR0.aCpuInfo[RTMpCpuId()]; + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)); + Assert(pCpu->fConfigured); + AssertReturn(!ASMAtomicReadBool(&g_HvmR0.fSuspended), VERR_HM_SUSPEND_PENDING); + } #endif #ifdef VBOX_WITH_2X_4GB_ADDR_SPACE + AssertReturn(!VMMR0ThreadCtxHooksAreRegistered(pVCpu), VERR_HM_IPE_4); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); PGMRZDynMapStartAutoSet(pVCpu); #endif @@ -1551,12 +1547,12 @@ VMMR0DECL(int) HWACCMR0RunGuestCode(PVM pVM, PVMCPU pVCpu) * @param pVCpu Pointer to the VMCPU. * @param pCtx Pointer to the guest CPU context. */ -VMMR0DECL(int) HWACCMR0SaveFPUState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +VMMR0_INT_DECL(int) HMR0SaveFPUState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFpu64SwitchBack); - if (pVM->hwaccm.s.vmx.fSupported) - return VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnSaveGuestFPU64, 0, NULL); - return SVMR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnSaveGuestFPU64, 0, NULL); + STAM_COUNTER_INC(&pVCpu->hm.s.StatFpu64SwitchBack); + if (pVM->hm.s.vmx.fSupported) + return VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, HM64ON32OP_HMRCSaveGuestFPU64, 0, NULL); + return SVMR0Execute64BitsHandler(pVM, pVCpu, pCtx, HM64ON32OP_HMRCSaveGuestFPU64, 0, NULL); } @@ -1568,12 +1564,12 @@ VMMR0DECL(int) HWACCMR0SaveFPUState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) * @param pVCpu Pointer to the VMCPU. * @param pCtx Pointer to the guest CPU context. */ -VMMR0DECL(int) HWACCMR0SaveDebugState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +VMMR0_INT_DECL(int) HMR0SaveDebugState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDebug64SwitchBack); - if (pVM->hwaccm.s.vmx.fSupported) - return VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnSaveGuestDebug64, 0, NULL); - return SVMR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnSaveGuestDebug64, 0, NULL); + STAM_COUNTER_INC(&pVCpu->hm.s.StatDebug64SwitchBack); + if (pVM->hm.s.vmx.fSupported) + return VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, HM64ON32OP_HMRCSaveGuestDebug64, 0, NULL); + return SVMR0Execute64BitsHandler(pVM, pVCpu, pCtx, HM64ON32OP_HMRCSaveGuestDebug64, 0, NULL); } @@ -1583,19 +1579,19 @@ VMMR0DECL(int) HWACCMR0SaveDebugState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) * @returns VBox status code. * @param pVM Pointer to the VM. */ -VMMR0DECL(int) HWACCMR0TestSwitcher3264(PVM pVM) +VMMR0_INT_DECL(int) HMR0TestSwitcher3264(PVM pVM) { - PVMCPU pVCpu = &pVM->aCpus[0]; - PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu); + PVMCPU pVCpu = &pVM->aCpus[0]; + PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu); uint32_t aParam[5] = {0, 1, 2, 3, 4}; int rc; - STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatWorldSwitch3264, z); - if (pVM->hwaccm.s.vmx.fSupported) - rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnTest64, 5, &aParam[0]); + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z); + if (pVM->hm.s.vmx.fSupported) + rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, HM64ON32OP_HMRCTestSwitcher64, 5, &aParam[0]); else - rc = SVMR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnTest64, 5, &aParam[0]); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatWorldSwitch3264, z); + rc = SVMR0Execute64BitsHandler(pVM, pVCpu, pCtx, HM64ON32OP_HMRCTestSwitcher64, 5, &aParam[0]); + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z); return rc; } @@ -1607,7 +1603,7 @@ VMMR0DECL(int) HWACCMR0TestSwitcher3264(PVM pVM) * * @returns Suspend pending or not. */ -VMMR0DECL(bool) HWACCMR0SuspendPending(void) +VMMR0_INT_DECL(bool) HMR0SuspendPending(void) { return ASMAtomicReadBool(&g_HvmR0.fSuspended); } @@ -1619,8 +1615,9 @@ VMMR0DECL(bool) HWACCMR0SuspendPending(void) * * @returns The cpu structure pointer. */ -VMMR0DECL(PHMGLOBLCPUINFO) HWACCMR0GetCurrentCpu(void) +VMMR0DECL(PHMGLOBALCPUINFO) HMR0GetCurrentCpu(void) { + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); RTCPUID idCpu = RTMpCpuId(); Assert(idCpu < RT_ELEMENTS(g_HvmR0.aCpuInfo)); return &g_HvmR0.aCpuInfo[idCpu]; @@ -1634,7 +1631,7 @@ VMMR0DECL(PHMGLOBLCPUINFO) HWACCMR0GetCurrentCpu(void) * @returns The cpu structure pointer. * @param idCpu id of the VCPU. */ -VMMR0DECL(PHMGLOBLCPUINFO) HWACCMR0GetCurrentCpuEx(RTCPUID idCpu) +VMMR0DECL(PHMGLOBALCPUINFO) HMR0GetCurrentCpuEx(RTCPUID idCpu) { Assert(idCpu < RT_ELEMENTS(g_HvmR0.aCpuInfo)); return &g_HvmR0.aCpuInfo[idCpu]; @@ -1651,14 +1648,15 @@ VMMR0DECL(PHMGLOBLCPUINFO) HWACCMR0GetCurrentCpuEx(RTCPUID idCpu) * @param uAndVal AND mask for saving the result in eax. * @param cbSize Read size. */ -VMMR0DECL(void) HWACCMR0SavePendingIOPortRead(PVMCPU pVCpu, RTGCPTR GCPtrRip, RTGCPTR GCPtrRipNext, unsigned uPort, unsigned uAndVal, unsigned cbSize) +VMMR0_INT_DECL(void) HMR0SavePendingIOPortRead(PVMCPU pVCpu, RTGCPTR GCPtrRip, RTGCPTR GCPtrRipNext, + unsigned uPort, unsigned uAndVal, unsigned cbSize) { - pVCpu->hwaccm.s.PendingIO.enmType = HWACCMPENDINGIO_PORT_READ; - pVCpu->hwaccm.s.PendingIO.GCPtrRip = GCPtrRip; - pVCpu->hwaccm.s.PendingIO.GCPtrRipNext = GCPtrRipNext; - pVCpu->hwaccm.s.PendingIO.s.Port.uPort = uPort; - pVCpu->hwaccm.s.PendingIO.s.Port.uAndVal = uAndVal; - pVCpu->hwaccm.s.PendingIO.s.Port.cbSize = cbSize; + pVCpu->hm.s.PendingIO.enmType = HMPENDINGIO_PORT_READ; + pVCpu->hm.s.PendingIO.GCPtrRip = GCPtrRip; + pVCpu->hm.s.PendingIO.GCPtrRipNext = GCPtrRipNext; + pVCpu->hm.s.PendingIO.s.Port.uPort = uPort; + pVCpu->hm.s.PendingIO.s.Port.uAndVal = uAndVal; + pVCpu->hm.s.PendingIO.s.Port.cbSize = cbSize; return; } @@ -1672,14 +1670,15 @@ VMMR0DECL(void) HWACCMR0SavePendingIOPortRead(PVMCPU pVCpu, RTGCPTR GCPtrRip, RT * @param uAndVal AND mask for fetching the result from eax. * @param cbSize Read size. */ -VMMR0DECL(void) HWACCMR0SavePendingIOPortWrite(PVMCPU pVCpu, RTGCPTR GCPtrRip, RTGCPTR GCPtrRipNext, unsigned uPort, unsigned uAndVal, unsigned cbSize) +VMMR0_INT_DECL(void) HMR0SavePendingIOPortWrite(PVMCPU pVCpu, RTGCPTR GCPtrRip, RTGCPTR GCPtrRipNext, + unsigned uPort, unsigned uAndVal, unsigned cbSize) { - pVCpu->hwaccm.s.PendingIO.enmType = HWACCMPENDINGIO_PORT_WRITE; - pVCpu->hwaccm.s.PendingIO.GCPtrRip = GCPtrRip; - pVCpu->hwaccm.s.PendingIO.GCPtrRipNext = GCPtrRipNext; - pVCpu->hwaccm.s.PendingIO.s.Port.uPort = uPort; - pVCpu->hwaccm.s.PendingIO.s.Port.uAndVal = uAndVal; - pVCpu->hwaccm.s.PendingIO.s.Port.cbSize = cbSize; + pVCpu->hm.s.PendingIO.enmType = HMPENDINGIO_PORT_WRITE; + pVCpu->hm.s.PendingIO.GCPtrRip = GCPtrRip; + pVCpu->hm.s.PendingIO.GCPtrRipNext = GCPtrRipNext; + pVCpu->hm.s.PendingIO.s.Port.uPort = uPort; + pVCpu->hm.s.PendingIO.s.Port.uAndVal = uAndVal; + pVCpu->hm.s.PendingIO.s.Port.cbSize = cbSize; return; } @@ -1690,20 +1689,21 @@ VMMR0DECL(void) HWACCMR0SavePendingIOPortWrite(PVMCPU pVCpu, RTGCPTR GCPtrRip, R * * @returns VBox status code. * @param pVM Pointer to the VM. + * @param enmSwitcher The switcher we're about to use. * @param pfVTxDisabled Where to store whether VT-x was disabled or not. */ -VMMR0DECL(int) HWACCMR0EnterSwitcher(PVM pVM, bool *pfVTxDisabled) +VMMR0_INT_DECL(int) HMR0EnterSwitcher(PVM pVM, VMMSWITCHER enmSwitcher, bool *pfVTxDisabled) { Assert(!(ASMGetFlags() & X86_EFL_IF) || !RTThreadPreemptIsEnabled(NIL_RTTHREAD)); *pfVTxDisabled = false; - if ( !g_HvmR0.fEnabled - || !g_HvmR0.vmx.fSupported /* no such issues with AMD-V */ - || !g_HvmR0.fGlobalInit /* Local init implies the CPU is currently not in VMX root mode. */) - return VINF_SUCCESS; /* nothing to do */ + /* No such issues with AMD-V */ + if (!g_HvmR0.vmx.fSupported) + return VINF_SUCCESS; - switch (VMMGetSwitcher(pVM)) + /* Check if the swithcing we're up to is safe. */ + switch (enmSwitcher) { case VMMSWITCHER_32_TO_32: case VMMSWITCHER_PAE_TO_PAE: @@ -1719,7 +1719,27 @@ VMMR0DECL(int) HWACCMR0EnterSwitcher(PVM pVM, bool *pfVTxDisabled) AssertFailedReturn(VERR_HM_WRONG_SWITCHER); } - PHMGLOBLCPUINFO pCpu = HWACCMR0GetCurrentCpu(); + /* When using SUPR0EnableVTx we must let the host suspend and resume VT-x, + regardless of whether we're currently using VT-x or not. */ + if (g_HvmR0.vmx.fUsingSUPR0EnableVTx) + { + *pfVTxDisabled = SUPR0SuspendVTxOnCpu(); + return VINF_SUCCESS; + } + + /** @todo Check if this code is presumtive wrt other VT-x users on the + * system... */ + + /* Nothing to do if we haven't enabled VT-x. */ + if (!g_HvmR0.fEnabled) + return VINF_SUCCESS; + + /* Local init implies the CPU is currently not in VMX root mode. */ + if (!g_HvmR0.fGlobalInit) + return VINF_SUCCESS; + + /* Ok, disable VT-x. */ + PHMGLOBALCPUINFO pCpu = HMR0GetCurrentCpu(); AssertReturn(pCpu && pCpu->hMemObj != NIL_RTR0MEMOBJ, VERR_HM_IPE_2); *pfVTxDisabled = true; @@ -1733,27 +1753,31 @@ VMMR0DECL(int) HWACCMR0EnterSwitcher(PVM pVM, bool *pfVTxDisabled) * Raw-mode switcher hook - re-enable VT-x if was active *and* the current * switcher turned off paging. * - * @returns VBox status code. * @param pVM Pointer to the VM. * @param fVTxDisabled Whether VT-x was disabled or not. */ -VMMR0DECL(int) HWACCMR0LeaveSwitcher(PVM pVM, bool fVTxDisabled) +VMMR0_INT_DECL(void) HMR0LeaveSwitcher(PVM pVM, bool fVTxDisabled) { Assert(!(ASMGetFlags() & X86_EFL_IF)); if (!fVTxDisabled) - return VINF_SUCCESS; /* nothing to do */ + return; /* nothing to do */ - Assert(g_HvmR0.fEnabled); Assert(g_HvmR0.vmx.fSupported); - Assert(g_HvmR0.fGlobalInit); + if (g_HvmR0.vmx.fUsingSUPR0EnableVTx) + SUPR0ResumeVTxOnCpu(fVTxDisabled); + else + { + Assert(g_HvmR0.fEnabled); + Assert(g_HvmR0.fGlobalInit); - PHMGLOBLCPUINFO pCpu = HWACCMR0GetCurrentCpu(); - AssertReturn(pCpu && pCpu->hMemObj != NIL_RTR0MEMOBJ, VERR_HM_IPE_2); + PHMGLOBALCPUINFO pCpu = HMR0GetCurrentCpu(); + AssertReturnVoid(pCpu && pCpu->hMemObj != NIL_RTR0MEMOBJ); - void *pvCpuPage = RTR0MemObjAddress(pCpu->hMemObj); - RTHCPHYS HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0); - return VMXR0EnableCpu(pCpu, pVM, pvCpuPage, HCPhysCpuPage, false); + void *pvCpuPage = RTR0MemObjAddress(pCpu->hMemObj); + RTHCPHYS HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0); + VMXR0EnableCpu(pCpu, pVM, pvCpuPage, HCPhysCpuPage, false, &g_HvmR0.vmx.Msrs); + } } #ifdef VBOX_STRICT @@ -1765,7 +1789,7 @@ VMMR0DECL(int) HWACCMR0LeaveSwitcher(PVM pVM, bool fVTxDisabled) * @param Sel Selector number. * @param pszMsg Message to prepend the log entry with. */ -VMMR0DECL(void) HWACCMR0DumpDescriptor(PCX86DESCHC pDesc, RTSEL Sel, const char *pszMsg) +VMMR0DECL(void) HMR0DumpDescriptor(PCX86DESCHC pDesc, RTSEL Sel, const char *pszMsg) { /* * Make variable description string. @@ -1886,7 +1910,7 @@ VMMR0DECL(void) HWACCMR0DumpDescriptor(PCX86DESCHC pDesc, RTSEL Sel, const char * @param pVCpu Pointer to the VMCPU. * @param pCtx Pointer to the CPU context. */ -VMMR0DECL(void) HWACCMDumpRegs(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +VMMR0DECL(void) HMDumpRegs(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) { NOREF(pVM); diff --git a/src/VBox/VMM/VMMR0/HWACCMR0A.asm b/src/VBox/VMM/VMMR0/HMR0A.asm index f6098619..151a0784 100644 --- a/src/VBox/VMM/VMMR0/HWACCMR0A.asm +++ b/src/VBox/VMM/VMMR0/HMR0A.asm @@ -1,10 +1,10 @@ -; $Id: HWACCMR0A.asm $ +; $Id: HMR0A.asm $ ;; @file -; VMXM - R0 vmx helpers +; HM - Ring-0 VMX, SVM world-switch and helper routines ; ; -; Copyright (C) 2006-2007 Oracle Corporation +; Copyright (C) 2006-2013 Oracle Corporation ; ; This file is part of VirtualBox Open Source Edition (OSE), as ; available from http://www.virtualbox.org. This file is free software; @@ -20,10 +20,10 @@ ;******************************************************************************* %include "VBox/asmdefs.mac" %include "VBox/err.mac" -%include "VBox/vmm/hwacc_vmx.mac" +%include "VBox/vmm/hm_vmx.mac" %include "VBox/vmm/cpum.mac" %include "iprt/x86.mac" -%include "HWACCMInternal.mac" +%include "HMInternal.mac" %ifdef RT_OS_OS2 ;; @todo fix OMF support in yasm and kick nasm out completely. %macro vmwrite 2, @@ -55,19 +55,19 @@ ;; ; Load the NULL selector into DS, ES, FS and GS on 64-bit darwin so we don't ; risk loading a stale LDT value or something invalid. - %define HWACCM_64_BIT_USE_NULL_SEL + %define HM_64_BIT_USE_NULL_SEL %endif %endif %endif +%ifdef RT_ARCH_AMD64 + %define VBOX_SKIP_RESTORE_SEG +%endif + ;; The offset of the XMM registers in X86FXSTATE. ; Use define because I'm too lazy to convert the struct. %define XMM_OFF_IN_X86FXSTATE 160 - -;; This is too risky wrt. stability, performance and correctness. -;%define VBOX_WITH_DR6_EXPERIMENT 1 - ;; @def MYPUSHAD ; Macro generating an equivalent to pushad @@ -154,9 +154,16 @@ %endmacro %endif +%ifdef VBOX_SKIP_RESTORE_SEG +%macro MYPUSHSEGS64 2 +%endmacro + +%macro MYPOPSEGS64 2 +%endmacro +%else ; !VBOX_SKIP_RESTORE_SEG ; trashes, rax, rdx & rcx %macro MYPUSHSEGS64 2 - %ifndef HWACCM_64_BIT_USE_NULL_SEL + %ifndef HM_64_BIT_USE_NULL_SEL mov %2, es push %1 mov %2, ds @@ -168,7 +175,7 @@ rdmsr push rdx push rax - %ifndef HWACCM_64_BIT_USE_NULL_SEL + %ifndef HM_64_BIT_USE_NULL_SEL push fs %endif @@ -177,7 +184,7 @@ rdmsr push rdx push rax - %ifndef HWACCM_64_BIT_USE_NULL_SEL + %ifndef HM_64_BIT_USE_NULL_SEL push gs %endif %endmacro @@ -185,7 +192,7 @@ ; trashes, rax, rdx & rcx %macro MYPOPSEGS64 2 ; Note: do not step through this code with a debugger! - %ifndef HWACCM_64_BIT_USE_NULL_SEL + %ifndef HM_64_BIT_USE_NULL_SEL xor eax, eax mov ds, ax mov es, ax @@ -193,7 +200,7 @@ mov gs, ax %endif - %ifndef HWACCM_64_BIT_USE_NULL_SEL + %ifndef HM_64_BIT_USE_NULL_SEL pop gs %endif pop rax @@ -201,7 +208,7 @@ mov ecx, MSR_K8_GS_BASE wrmsr - %ifndef HWACCM_64_BIT_USE_NULL_SEL + %ifndef HM_64_BIT_USE_NULL_SEL pop fs %endif pop rax @@ -210,13 +217,14 @@ wrmsr ; Now it's safe to step again - %ifndef HWACCM_64_BIT_USE_NULL_SEL + %ifndef HM_64_BIT_USE_NULL_SEL pop %1 mov ds, %2 pop %1 mov es, %2 %endif %endmacro +%endif ; VBOX_SKIP_RESTORE_SEG %macro MYPUSHAD32 0 pushad @@ -272,6 +280,119 @@ BEGINCODE ;/** +; * Restores host-state fields. +; * +; * @returns VBox status code +; * @param f32RestoreHost x86: [ebp + 08h] msc: ecx gcc: edi RestoreHost flags. +; * @param pRestoreHost x86: [ebp + 0ch] msc: rdx gcc: rsi Pointer to the RestoreHost struct. +; */ +ALIGNCODE(16) +BEGINPROC VMXRestoreHostState +%ifdef RT_ARCH_AMD64 + %ifndef ASM_CALL64_GCC + ; Use GCC's input registers since we'll be needing both rcx and rdx further + ; down with the wrmsr instruction. Use the R10 and R11 register for saving + ; RDI and RSI since MSC preserve the two latter registers. + mov r10, rdi + mov r11, rsi + mov rdi, rcx + mov rsi, rdx + %endif + + test edi, VMX_RESTORE_HOST_GDTR + jz .test_idtr + lgdt [rsi + VMXRESTOREHOST.HostGdtr] + +.test_idtr: + test edi, VMX_RESTORE_HOST_IDTR + jz .test_ds + lidt [rsi + VMXRESTOREHOST.HostIdtr] + +.test_ds: + test edi, VMX_RESTORE_HOST_SEL_DS + jz .test_es + mov ax, [rsi + VMXRESTOREHOST.uHostSelDS] + mov ds, eax + +.test_es: + test edi, VMX_RESTORE_HOST_SEL_ES + jz .test_tr + mov ax, [rsi + VMXRESTOREHOST.uHostSelES] + mov es, eax + +.test_tr: + test edi, VMX_RESTORE_HOST_SEL_TR + jz .test_fs + ; When restoring the TR, we must first clear the busy flag or we'll end up faulting. + mov dx, [rsi + VMXRESTOREHOST.uHostSelTR] + mov ax, dx + and eax, X86_SEL_MASK_OFF_RPL ; Mask away TI and RPL bits leaving only the descriptor offset. + add rax, qword [rsi + VMXRESTOREHOST.HostGdtr + 2] ; xAX <- descriptor offset + GDTR.pGdt. + and dword [rax + 4], ~RT_BIT(9) ; Clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit). + ltr dx + +.test_fs: + ; + ; When restoring the selector values for FS and GS, we'll temporarily trash + ; the base address (at least the high 32-bit bits, but quite possibly the + ; whole base address), the wrmsr will restore it correctly. (VT-x actually + ; restores the base correctly when leaving guest mode, but not the selector + ; value, so there is little problem with interrupts being enabled prior to + ; this restore job.) + ; We'll disable ints once for both FS and GS as that's probably faster. + ; + test edi, VMX_RESTORE_HOST_SEL_FS | VMX_RESTORE_HOST_SEL_GS + jz .restore_success + pushfq + cli ; (see above) + + test edi, VMX_RESTORE_HOST_SEL_FS + jz .test_gs + mov ax, word [rsi + VMXRESTOREHOST.uHostSelFS] + mov fs, eax + mov eax, dword [rsi + VMXRESTOREHOST.uHostFSBase] ; uHostFSBase - Lo + mov edx, dword [rsi + VMXRESTOREHOST.uHostFSBase + 4h] ; uHostFSBase - Hi + mov ecx, MSR_K8_FS_BASE + wrmsr + +.test_gs: + test edi, VMX_RESTORE_HOST_SEL_GS + jz .restore_flags + mov ax, word [rsi + VMXRESTOREHOST.uHostSelGS] + mov gs, eax + mov eax, dword [rsi + VMXRESTOREHOST.uHostGSBase] ; uHostGSBase - Lo + mov edx, dword [rsi + VMXRESTOREHOST.uHostGSBase + 4h] ; uHostGSBase - Hi + mov ecx, MSR_K8_GS_BASE + wrmsr + +.restore_flags: + popfq + +.restore_success: + mov eax, VINF_SUCCESS + %ifndef ASM_CALL64_GCC + ; Restore RDI and RSI on MSC. + mov rdi, r10 + mov rsi, r11 + %endif +%else ; RT_ARCH_X86 + mov eax, VERR_NOT_IMPLEMENTED +%endif + ret +ENDPROC VMXRestoreHostState + + +;/** +; * Dispatches an NMI to the host. +; */ +ALIGNCODE(16) +BEGINPROC VMXDispatchHostNmi + int 2 ; NMI is always vector 2. The IDT[2] IRQ handler cannot be anything else. See Intel spec. 6.3.1 "External Interrupts". + ret +ENDPROC VMXDispatchHostNmi + + +;/** ; * Executes VMWRITE, 64-bit value. ; * ; * @returns VBox status code @@ -279,7 +400,7 @@ BEGINCODE ; * @param u64Data x86: [ebp + 0ch] msc: rdx gcc: rsi VM field value ; */ ALIGNCODE(16) -BEGINPROC VMXWriteVMCS64 +BEGINPROC VMXWriteVmcs64 %ifdef RT_ARCH_AMD64 %ifdef ASM_CALL64_GCC and edi, 0ffffffffh @@ -334,7 +455,7 @@ BITS 64 dd .the_end, NAME(SUPR0AbsKernelCS) BITS 32 %endif ; VBOX_WITH_HYBRID_32BIT_KERNEL -ENDPROC VMXWriteVMCS64 +ENDPROC VMXWriteVmcs64 ;/** @@ -344,9 +465,9 @@ ENDPROC VMXWriteVMCS64 ; * @param idxField VMCS index ; * @param pData Ptr to store VM field value ; */ -;DECLASM(int) VMXReadVMCS64(uint32_t idxField, uint64_t *pData); +;DECLASM(int) VMXReadVmcs64(uint32_t idxField, uint64_t *pData); ALIGNCODE(16) -BEGINPROC VMXReadVMCS64 +BEGINPROC VMXReadVmcs64 %ifdef RT_ARCH_AMD64 %ifdef ASM_CALL64_GCC and edi, 0ffffffffh @@ -401,7 +522,7 @@ BITS 64 dd .the_end, NAME(SUPR0AbsKernelCS) BITS 32 %endif ; VBOX_WITH_HYBRID_32BIT_KERNEL -ENDPROC VMXReadVMCS64 +ENDPROC VMXReadVmcs64 ;/** @@ -411,9 +532,9 @@ ENDPROC VMXReadVMCS64 ; * @param idxField VMCS index ; * @param pu32Data Ptr to store VM field value ; */ -;DECLASM(int) VMXReadVMCS32(uint32_t idxField, uint32_t *pu32Data); +;DECLASM(int) VMXReadVmcs32(uint32_t idxField, uint32_t *pu32Data); ALIGNCODE(16) -BEGINPROC VMXReadVMCS32 +BEGINPROC VMXReadVmcs32 %ifdef RT_ARCH_AMD64 %ifdef ASM_CALL64_GCC and edi, 0ffffffffh @@ -466,7 +587,7 @@ BITS 64 dd .the_end, NAME(SUPR0AbsKernelCS) BITS 32 %endif ; VBOX_WITH_HYBRID_32BIT_KERNEL -ENDPROC VMXReadVMCS32 +ENDPROC VMXReadVmcs32 ;/** @@ -476,9 +597,9 @@ ENDPROC VMXReadVMCS32 ; * @param idxField VMCS index ; * @param u32Data Ptr to store VM field value ; */ -;DECLASM(int) VMXWriteVMCS32(uint32_t idxField, uint32_t u32Data); +;DECLASM(int) VMXWriteVmcs32(uint32_t idxField, uint32_t u32Data); ALIGNCODE(16) -BEGINPROC VMXWriteVMCS32 +BEGINPROC VMXWriteVmcs32 %ifdef RT_ARCH_AMD64 %ifdef ASM_CALL64_GCC and edi, 0ffffffffh @@ -530,7 +651,7 @@ BITS 64 dd .the_end, NAME(SUPR0AbsKernelCS) BITS 32 %endif ; VBOX_WITH_HYBRID_32BIT_KERNEL -ENDPROC VMXWriteVMCS32 +ENDPROC VMXWriteVmcs32 ;/** @@ -566,7 +687,7 @@ BEGINPROC VMXEnable .good: jnz .the_end - mov eax, VERR_VMX_GENERIC + mov eax, VERR_VMX_VMXON_FAILED .the_end: %ifdef RT_ARCH_AMD64 @@ -582,9 +703,9 @@ BITS 64 and edx, 0ffffffffh xor eax, eax vmxon [rdx] - mov r8d, VERR_INVALID_PARAMETER + mov r8d, VERR_VMX_VMXON_FAILED cmovz eax, r8d - mov r9d, VERR_VMX_INVALID_VMCS_PTR + mov r9d, VERR_VMX_INVALID_VMXON_PTR cmovc eax, r9d jmp far [.fpret wrt rip] .fpret: ; 16:32 Pointer to .the_end. @@ -627,11 +748,11 @@ ENDPROC VMXDisable ; * Executes VMCLEAR ; * ; * @returns VBox status code -; * @param HCPhysVMCS Physical address of VM control structure +; * @param HCPhysVmcs Physical address of VM control structure ; */ -;DECLASM(int) VMXClearVMCS(RTHCPHYS HCPhysVMCS); +;DECLASM(int) VMXClearVmcs(RTHCPHYS HCPhysVmcs); ALIGNCODE(16) -BEGINPROC VMXClearVMCS +BEGINPROC VMXClearVmcs %ifdef RT_ARCH_AMD64 xor rax, rax %ifdef ASM_CALL64_GCC @@ -663,7 +784,7 @@ BEGINPROC VMXClearVMCS ALIGNCODE(16) BITS 64 .sixtyfourbit_mode: - lea rdx, [rsp + 4] ; &HCPhysVMCS + lea rdx, [rsp + 4] ; &HCPhysVmcs and edx, 0ffffffffh xor eax, eax vmclear [rdx] @@ -674,18 +795,18 @@ BITS 64 dd .the_end, NAME(SUPR0AbsKernelCS) BITS 32 %endif -ENDPROC VMXClearVMCS +ENDPROC VMXClearVmcs ;/** ; * Executes VMPTRLD ; * ; * @returns VBox status code -; * @param HCPhysVMCS Physical address of VMCS structure +; * @param HCPhysVmcs Physical address of VMCS structure ; */ -;DECLASM(int) VMXActivateVMCS(RTHCPHYS HCPhysVMCS); +;DECLASM(int) VMXActivateVmcs(RTHCPHYS HCPhysVmcs); ALIGNCODE(16) -BEGINPROC VMXActivateVMCS +BEGINPROC VMXActivateVmcs %ifdef RT_ARCH_AMD64 xor rax, rax %ifdef ASM_CALL64_GCC @@ -717,7 +838,7 @@ BEGINPROC VMXActivateVMCS ALIGNCODE(16) BITS 64 .sixtyfourbit_mode: - lea rdx, [rsp + 4] ; &HCPhysVMCS + lea rdx, [rsp + 4] ; &HCPhysVmcs and edx, 0ffffffffh xor eax, eax vmptrld [rdx] @@ -728,7 +849,7 @@ BITS 64 dd .the_end, NAME(SUPR0AbsKernelCS) BITS 32 %endif ; VBOX_WITH_HYBRID_32BIT_KERNEL -ENDPROC VMXActivateVMCS +ENDPROC VMXActivateVmcs ;/** @@ -737,8 +858,8 @@ ENDPROC VMXActivateVMCS ; * @returns VBox status code ; * @param [esp + 04h] gcc:rdi msc:rcx Param 1 - First parameter - Address that will receive the current pointer ; */ -;DECLASM(int) VMXGetActivateVMCS(RTHCPHYS *pVMCS); -BEGINPROC VMXGetActivateVMCS +;DECLASM(int) VMXGetActivatedVmcs(RTHCPHYS *pVMCS); +BEGINPROC VMXGetActivatedVmcs %ifdef RT_OS_OS2 mov eax, VERR_NOT_SUPPORTED ret @@ -767,7 +888,7 @@ BEGINPROC VMXGetActivateVMCS ALIGNCODE(16) BITS 64 .sixtyfourbit_mode: - lea rdx, [rsp + 4] ; &HCPhysVMCS + lea rdx, [rsp + 4] ; &HCPhysVmcs and edx, 0ffffffffh vmptrst qword [rdx] xor eax, eax @@ -777,7 +898,7 @@ BITS 64 BITS 32 %endif ; VBOX_WITH_HYBRID_32BIT_KERNEL %endif -ENDPROC VMXGetActivateVMCS +ENDPROC VMXGetActivatedVmcs ;/** ; * Invalidate a page using invept @@ -970,9 +1091,9 @@ ENDPROC SVMR0InvlpgA ; * @param pGdtr Where to store the 64-bit GDTR. ; * @param pIdtr Where to store the 64-bit IDTR. ; */ -;DECLASM(void) hwaccmR0Get64bitGDTRandIDTR(PX86XDTR64 pGdtr, PX86XDTR64 pIdtr); +;DECLASM(void) HMR0Get64bitGdtrAndIdtr(PX86XDTR64 pGdtr, PX86XDTR64 pIdtr); ALIGNCODE(16) -BEGINPROC hwaccmR0Get64bitGDTRandIDTR +BEGINPROC HMR0Get64bitGdtrAndIdtr db 0xea ; jmp far .sixtyfourbit_mode dd .sixtyfourbit_mode, NAME(SUPR0Abs64bitKernelCS) .the_end: @@ -990,16 +1111,16 @@ BITS 64 .fpret: ; 16:32 Pointer to .the_end. dd .the_end, NAME(SUPR0AbsKernelCS) BITS 32 -ENDPROC hwaccmR0Get64bitGDTRandIDTR +ENDPROC HMR0Get64bitGdtrAndIdtr ;/** ; * Gets 64-bit CR3 on darwin. ; * @returns CR3 ; */ -;DECLASM(uint64_t) hwaccmR0Get64bitCR3(void); +;DECLASM(uint64_t) HMR0Get64bitCR3(void); ALIGNCODE(16) -BEGINPROC hwaccmR0Get64bitCR3 +BEGINPROC HMR0Get64bitCR3 db 0xea ; jmp far .sixtyfourbit_mode dd .sixtyfourbit_mode, NAME(SUPR0Abs64bitKernelCS) .the_end: @@ -1015,7 +1136,7 @@ BITS 64 .fpret: ; 16:32 Pointer to .the_end. dd .the_end, NAME(SUPR0AbsKernelCS) BITS 32 -ENDPROC hwaccmR0Get64bitCR3 +ENDPROC HMR0Get64bitCR3 %endif ; VBOX_WITH_HYBRID_32BIT_KERNEL @@ -1025,7 +1146,7 @@ ENDPROC hwaccmR0Get64bitCR3 ; Wrapper around vmx.pfnStartVM that preserves host XMM registers and ; load the guest ones when necessary. ; -; @cproto DECLASM(int) hwaccmR0VMXStartVMWrapXMM(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu, PFNHWACCMVMXSTARTVM pfnStartVM); +; @cproto DECLASM(int) HMR0VMXStartVMWrapXMM(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu, PFNHMVMXSTARTVM pfnStartVM); ; ; @returns eax ; @@ -1036,11 +1157,11 @@ ENDPROC hwaccmR0Get64bitCR3 ; @param pVCpu msc:[rbp+30h] ; @param pfnStartVM msc:[rbp+38h] ; -; @remarks This is essentially the same code as hwaccmR0SVMRunWrapXMM, only the parameters differ a little bit. +; @remarks This is essentially the same code as HMR0SVMRunWrapXMM, only the parameters differ a little bit. ; ; ASSUMING 64-bit and windows for now. ALIGNCODE(16) -BEGINPROC hwaccmR0VMXStartVMWrapXMM +BEGINPROC HMR0VMXStartVMWrapXMM push xBP mov xBP, xSP sub xSP, 0a0h + 040h ; Don't bother optimizing the frame size. @@ -1147,13 +1268,13 @@ ALIGNCODE(8) movdqa xmm15, [rsp + 040h + 090h] leave ret -ENDPROC hwaccmR0VMXStartVMWrapXMM +ENDPROC HMR0VMXStartVMWrapXMM ;; ; Wrapper around svm.pfnVMRun that preserves host XMM registers and ; load the guest ones when necessary. ; -; @cproto DECLASM(int) hwaccmR0SVMRunWrapXMM(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu, PFNHWACCMSVMVMRUN pfnVMRun); +; @cproto DECLASM(int) HMR0SVMRunWrapXMM(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu, PFNHMSVMVMRUN pfnVMRun); ; ; @returns eax ; @@ -1164,11 +1285,11 @@ ENDPROC hwaccmR0VMXStartVMWrapXMM ; @param pVCpu msc:[rbp+30h] ; @param pfnVMRun msc:[rbp+38h] ; -; @remarks This is essentially the same code as hwaccmR0VMXStartVMWrapXMM, only the parameters differ a little bit. +; @remarks This is essentially the same code as HMR0VMXStartVMWrapXMM, only the parameters differ a little bit. ; ; ASSUMING 64-bit and windows for now. ALIGNCODE(16) -BEGINPROC hwaccmR0SVMRunWrapXMM +BEGINPROC HMR0SVMRunWrapXMM push xBP mov xBP, xSP sub xSP, 0a0h + 040h ; Don't bother optimizing the frame size. @@ -1275,7 +1396,7 @@ ALIGNCODE(8) movdqa xmm15, [rsp + 040h + 090h] leave ret -ENDPROC hwaccmR0SVMRunWrapXMM +ENDPROC HMR0SVMRunWrapXMM %endif ; VBOX_WITH_KERNEL_USING_XMM @@ -1299,7 +1420,7 @@ ENDPROC hwaccmR0SVMRunWrapXMM %define MYPOPSEGS MYPOPSEGS32 %endif -%include "HWACCMR0Mixed.mac" +%include "HMR0Mixed.mac" %ifdef VBOX_WITH_HYBRID_32BIT_KERNEL @@ -1487,7 +1608,7 @@ ENDPROC SVMR0VMRun64 %define RT_ARCH_AMD64 %undef ASM_CALL64_MSC %define ASM_CALL64_GCC - %define xS 8 + %define xCB 8 %define xSP rsp %define xBP rbp %define xAX rax @@ -1502,5 +1623,6 @@ ENDPROC SVMR0VMRun64 %define MYPUSHSEGS MYPUSHSEGS64 %define MYPOPSEGS MYPOPSEGS64 - %include "HWACCMR0Mixed.mac" + %include "HMR0Mixed.mac" %endif ; VBOX_WITH_HYBRID_32BIT_KERNEL + diff --git a/src/VBox/VMM/VMMR0/HMR0Mixed.mac b/src/VBox/VMM/VMMR0/HMR0Mixed.mac new file mode 100644 index 00000000..c46e7b5b --- /dev/null +++ b/src/VBox/VMM/VMMR0/HMR0Mixed.mac @@ -0,0 +1,775 @@ +; $Id: HMR0Mixed.mac $ +;; @file +; HM - Ring-0 Host 32/64, Guest 32/64 world-switch routines +; +; Darwin uses this to build two versions in the hybrid case. +; Included by HMR0A.asm with RT_ARCH_AMD64 defined or undefined. +; + +; +; Copyright (C) 2006-2013 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; + +%ifdef RT_ARCH_AMD64 + ;; + ; Keep these macro definitions in this file as it gets included and compiled + ; with RT_ARCH_AMD64 once and RT_ARCH_X86 once. + %define VMX_SKIP_GDTR + %ifdef RT_OS_DARWIN + ; Darwin (Mavericks) uses IDTR limit to store the CPUID so we need to restore it always. + ; See @bugref{6875}. + %elifdef RT_OS_WINDOWS + ; Windows 8.1 RTM also seems to be using the IDTR limit for something. See @bugref{6956}. + ;; @todo figure out what exactly it does and try and restrict it more. + %else + %define VMX_SKIP_IDTR + %endif + %define VMX_SKIP_TR +%endif + +;; @def RESTORE_STATE_VM32 +; Macro restoring essential host state and updating guest state +; for common host, 32-bit guest for VT-x. +%macro RESTORE_STATE_VM32 0 + ; Restore base and limit of the IDTR & GDTR. + %ifndef VMX_SKIP_IDTR + lidt [xSP] + add xSP, xCB * 2 + %endif + %ifndef VMX_SKIP_GDTR + lgdt [xSP] + add xSP, xCB * 2 + %endif + + push xDI + %ifndef VMX_SKIP_TR + mov xDI, [xSP + xCB * 3] ; pCtx (*3 to skip the saved xDI, TR, LDTR). + %else + mov xDI, [xSP + xCB * 2] ; pCtx (*2 to skip the saved xDI, LDTR). + %endif + + mov [ss:xDI + CPUMCTX.eax], eax + mov [ss:xDI + CPUMCTX.ebx], ebx + mov [ss:xDI + CPUMCTX.ecx], ecx + mov [ss:xDI + CPUMCTX.edx], edx + mov [ss:xDI + CPUMCTX.esi], esi + mov [ss:xDI + CPUMCTX.ebp], ebp + mov xAX, cr2 + mov [ss:xDI + CPUMCTX.cr2], xAX + + %ifdef RT_ARCH_AMD64 + pop xAX ; The guest edi we pushed above. + mov dword [ss:xDI + CPUMCTX.edi], eax + %else + pop dword [ss:xDI + CPUMCTX.edi] ; The guest edi we pushed above. + %endif + + %ifndef VMX_SKIP_TR + ; Restore TSS selector; must mark it as not busy before using ltr (!) + ; ASSUME that this is supposed to be 'BUSY'. (saves 20-30 ticks on the T42p) + ; @todo get rid of sgdt + pop xBX ; Saved TR + sub xSP, xCB * 2 + sgdt [xSP] + mov xAX, xBX + and eax, X86_SEL_MASK_OFF_RPL ; Mask away TI and RPL bits leaving only the descriptor offset. + add xAX, [xSP + 2] ; eax <- GDTR.address + descriptor offset. + and dword [ss:xAX + 4], ~RT_BIT(9) ; Clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit). + ltr bx + add xSP, xCB * 2 + %endif + + pop xAX ; Saved LDTR + %ifdef RT_ARCH_AMD64 + cmp eax, 0 + je %%skip_ldt_write32 + %endif + lldt ax + +%%skip_ldt_write32: + add xSP, xCB ; pCtx + + %ifdef VMX_USE_CACHED_VMCS_ACCESSES + pop xDX ; Saved pCache + + ; Note! If we get here as a result of invalid VMCS pointer, all the following + ; vmread's will fail (only eflags.cf=1 will be set) but that shouldn't cause any + ; trouble only just less efficient. + mov ecx, [ss:xDX + VMCSCACHE.Read.cValidEntries] + cmp ecx, 0 ; Can't happen + je %%no_cached_read32 + jmp %%cached_read32 + +ALIGN(16) +%%cached_read32: + dec xCX + mov eax, [ss:xDX + VMCSCACHE.Read.aField + xCX * 4] + vmread [ss:xDX + VMCSCACHE.Read.aFieldVal + xCX * 8], xAX + cmp xCX, 0 + jnz %%cached_read32 +%%no_cached_read32: + %endif + + ; Restore segment registers. + MYPOPSEGS xAX, ax + + ; Restore general purpose registers. + MYPOPAD +%endmacro + + +;/** +; * Prepares for and executes VMLAUNCH/VMRESUME (32 bits guest mode) +; * +; * @returns VBox status code +; * @param fResume x86:[ebp+8], msc:rcx,gcc:rdi Whether to use vmlauch/vmresume. +; * @param pCtx x86:[ebp+c], msc:rdx,gcc:rsi Pointer to the guest-CPU context. +; * @param pCache x86:[esp+10],msc:r8, gcc:rdx Pointer to the VMCS cache. +; */ +ALIGNCODE(16) +BEGINPROC MY_NAME(VMXR0StartVM32) + push xBP + mov xBP, xSP + + pushf + cli + + ; Save all general purpose host registers. + MYPUSHAD + + ; First we have to save some final CPU context registers. + mov eax, VMX_VMCS_HOST_RIP +%ifdef RT_ARCH_AMD64 + lea r10, [.vmlaunch_done wrt rip] + vmwrite rax, r10 +%else + mov ecx, .vmlaunch_done + vmwrite eax, ecx +%endif + ; Note: assumes success! + + ; Save guest-CPU context pointer. +%ifdef RT_ARCH_AMD64 + %ifdef ASM_CALL64_GCC + ; fResume already in rdi + ; pCtx already in rsi + mov rbx, rdx ; pCache + %else + mov rdi, rcx ; fResume + mov rsi, rdx ; pCtx + mov rbx, r8 ; pCache + %endif +%else + mov edi, [ebp + 8] ; fResume + mov esi, [ebp + 12] ; pCtx + mov ebx, [ebp + 16] ; pCache +%endif + + ; Save segment registers. + ; Note: MYPUSHSEGS trashes rdx & rcx, so we moved it here (msvc amd64 case). + MYPUSHSEGS xAX, ax + +%ifdef VMX_USE_CACHED_VMCS_ACCESSES + mov ecx, [xBX + VMCSCACHE.Write.cValidEntries] + cmp ecx, 0 + je .no_cached_writes + mov edx, ecx + mov ecx, 0 + jmp .cached_write + +ALIGN(16) +.cached_write: + mov eax, [xBX + VMCSCACHE.Write.aField + xCX * 4] + vmwrite xAX, [xBX + VMCSCACHE.Write.aFieldVal + xCX * 8] + inc xCX + cmp xCX, xDX + jl .cached_write + + mov dword [xBX + VMCSCACHE.Write.cValidEntries], 0 +.no_cached_writes: + + ; Save the pCache pointer. + push xBX +%endif + + ; Save the pCtx pointer. + push xSI + + ; Save host LDTR. + xor eax, eax + sldt ax + push xAX + +%ifndef VMX_SKIP_TR + ; The host TR limit is reset to 0x67; save & restore it manually. + str eax + push xAX +%endif + +%ifndef VMX_SKIP_GDTR + ; VT-x only saves the base of the GDTR & IDTR and resets the limit to 0xffff; we must restore the limit correctly! + sub xSP, xCB * 2 + sgdt [xSP] +%endif +%ifndef VMX_SKIP_IDTR + sub xSP, xCB * 2 + sidt [xSP] +%endif + + ; Load CR2 if necessary (may be expensive as writing CR2 is a synchronizing instruction). + mov xBX, [xSI + CPUMCTX.cr2] + mov xDX, cr2 + cmp xBX, xDX + je .skip_cr2_write32 + mov cr2, xBX + +.skip_cr2_write32: + mov eax, VMX_VMCS_HOST_RSP + vmwrite xAX, xSP + ; Note: assumes success! + ; Don't mess with ESP anymore!!! + + ; Load guest general purpose registers. + mov eax, [xSI + CPUMCTX.eax] + mov ebx, [xSI + CPUMCTX.ebx] + mov ecx, [xSI + CPUMCTX.ecx] + mov edx, [xSI + CPUMCTX.edx] + mov ebp, [xSI + CPUMCTX.ebp] + + ; Resume or start VM? + cmp xDI, 0 ; fResume + je .vmlaunch_launch + + ; Load guest edi & esi. + mov edi, [xSI + CPUMCTX.edi] + mov esi, [xSI + CPUMCTX.esi] + + vmresume + jmp .vmlaunch_done; ; Here if vmresume detected a failure. + +.vmlaunch_launch: + ; Save guest edi & esi. + mov edi, [xSI + CPUMCTX.edi] + mov esi, [xSI + CPUMCTX.esi] + + vmlaunch + jmp .vmlaunch_done; ; Here if vmlaunch detected a failure. + +ALIGNCODE(16) ;; @todo YASM BUG - this alignment is wrong on darwin, it's 1 byte off. +.vmlaunch_done: + jc near .vmxstart_invalid_vmcs_ptr + jz near .vmxstart_start_failed + + RESTORE_STATE_VM32 + mov eax, VINF_SUCCESS + +.vmstart_end: + popf + pop xBP + ret + +.vmxstart_invalid_vmcs_ptr: + RESTORE_STATE_VM32 + mov eax, VERR_VMX_INVALID_VMCS_PTR_TO_START_VM + jmp .vmstart_end + +.vmxstart_start_failed: + RESTORE_STATE_VM32 + mov eax, VERR_VMX_UNABLE_TO_START_VM + jmp .vmstart_end + +ENDPROC MY_NAME(VMXR0StartVM32) + + +%ifdef RT_ARCH_AMD64 +;; @def RESTORE_STATE_VM64 +; Macro restoring essential host state and updating guest state +; for 64-bit host, 64-bit guest for VT-x. +; +%macro RESTORE_STATE_VM64 0 + ; Restore base and limit of the IDTR & GDTR + %ifndef VMX_SKIP_IDTR + lidt [xSP] + add xSP, xCB * 2 + %endif + %ifndef VMX_SKIP_GDTR + lgdt [xSP] + add xSP, xCB * 2 + %endif + + push xDI + %ifndef VMX_SKIP_TR + mov xDI, [xSP + xCB * 3] ; pCtx (*3 to skip the saved xDI, TR, LDTR) + %else + mov xDI, [xSP + xCB * 2] ; pCtx (*2 to skip the saved xDI, LDTR) + %endif + + mov qword [xDI + CPUMCTX.eax], rax + mov qword [xDI + CPUMCTX.ebx], rbx + mov qword [xDI + CPUMCTX.ecx], rcx + mov qword [xDI + CPUMCTX.edx], rdx + mov qword [xDI + CPUMCTX.esi], rsi + mov qword [xDI + CPUMCTX.ebp], rbp + mov qword [xDI + CPUMCTX.r8], r8 + mov qword [xDI + CPUMCTX.r9], r9 + mov qword [xDI + CPUMCTX.r10], r10 + mov qword [xDI + CPUMCTX.r11], r11 + mov qword [xDI + CPUMCTX.r12], r12 + mov qword [xDI + CPUMCTX.r13], r13 + mov qword [xDI + CPUMCTX.r14], r14 + mov qword [xDI + CPUMCTX.r15], r15 + mov rax, cr2 + mov qword [xDI + CPUMCTX.cr2], rax + + pop xAX ; The guest rdi we pushed above + mov qword [xDI + CPUMCTX.edi], rax + + %ifndef VMX_SKIP_TR + ; Restore TSS selector; must mark it as not busy before using ltr (!) + ; ASSUME that this is supposed to be 'BUSY'. (saves 20-30 ticks on the T42p). + ; @todo get rid of sgdt + pop xBX ; Saved TR + sub xSP, xCB * 2 + sgdt [xSP] + mov xAX, xBX + and eax, X86_SEL_MASK_OFF_RPL ; Mask away TI and RPL bits leaving only the descriptor offset. + add xAX, [xSP + 2] ; eax <- GDTR.address + descriptor offset. + and dword [xAX + 4], ~RT_BIT(9) ; Clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit). + ltr bx + add xSP, xCB * 2 + %endif + + pop xAX ; Saved LDTR + cmp eax, 0 + je %%skip_ldt_write64 + lldt ax + +%%skip_ldt_write64: + pop xSI ; pCtx (needed in rsi by the macros below) + + %ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + ; Save the guest MSRs and load the host MSRs. + LOADHOSTMSREX MSR_K8_KERNEL_GS_BASE, CPUMCTX.msrKERNELGSBASE + LOADHOSTMSREX MSR_K8_SF_MASK, CPUMCTX.msrSFMASK + LOADHOSTMSREX MSR_K6_STAR, CPUMCTX.msrSTAR + LOADHOSTMSREX MSR_K8_LSTAR, CPUMCTX.msrLSTAR + %endif + + %ifdef VMX_USE_CACHED_VMCS_ACCESSES + pop xDX ; Saved pCache + + ; Note! If we get here as a result of invalid VMCS pointer, all the following + ; vmread's will fail (only eflags.cf=1 will be set) but that shouldn't cause any + ; trouble only just less efficient. + mov ecx, [xDX + VMCSCACHE.Read.cValidEntries] + cmp ecx, 0 ; Can't happen + je %%no_cached_read64 + jmp %%cached_read64 + +ALIGN(16) +%%cached_read64: + dec xCX + mov eax, [xDX + VMCSCACHE.Read.aField + xCX * 4] + vmread [xDX + VMCSCACHE.Read.aFieldVal + xCX * 8], xAX + cmp xCX, 0 + jnz %%cached_read64 +%%no_cached_read64: + %endif + + ; Restore segment registers. + MYPOPSEGS xAX, ax + + ; Restore general purpose registers. + MYPOPAD +%endmacro + + +;/** +; * Prepares for and executes VMLAUNCH/VMRESUME (64 bits guest mode) +; * +; * @returns VBox status code +; * @param fResume msc:rcx, gcc:rdi Whether to use vmlauch/vmresume. +; * @param pCtx msc:rdx, gcc:rsi Pointer to the guest-CPU context. +; * @param pCache msc:r8, gcc:rdx Pointer to the VMCS cache. +; */ +ALIGNCODE(16) +BEGINPROC MY_NAME(VMXR0StartVM64) + push xBP + mov xBP, xSP + + pushf + cli + + ; Save all general purpose host registers. + MYPUSHAD + + ; First we have to save some final CPU context registers. + lea r10, [.vmlaunch64_done wrt rip] + mov rax, VMX_VMCS_HOST_RIP ; Return address (too difficult to continue after VMLAUNCH?). + vmwrite rax, r10 + ; Note: assumes success! + + ; Save guest-CPU context pointer. +%ifdef ASM_CALL64_GCC + ; fResume already in rdi + ; pCtx already in rsi + mov rbx, rdx ; pCache +%else + mov rdi, rcx ; fResume + mov rsi, rdx ; pCtx + mov rbx, r8 ; pCache +%endif + + ; Save segment registers. + ; Note: MYPUSHSEGS trashes rdx & rcx, so we moved it here (msvc amd64 case). + MYPUSHSEGS xAX, ax + +%ifdef VMX_USE_CACHED_VMCS_ACCESSES + mov ecx, [xBX + VMCSCACHE.Write.cValidEntries] + cmp ecx, 0 + je .no_cached_writes + mov edx, ecx + mov ecx, 0 + jmp .cached_write + +ALIGN(16) +.cached_write: + mov eax, [xBX + VMCSCACHE.Write.aField + xCX * 4] + vmwrite xAX, [xBX + VMCSCACHE.Write.aFieldVal + xCX * 8] + inc xCX + cmp xCX, xDX + jl .cached_write + + mov dword [xBX + VMCSCACHE.Write.cValidEntries], 0 +.no_cached_writes: + + ; Save the pCache pointer. + push xBX +%endif + +%ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + ; Save the host MSRs and load the guest MSRs. + LOADGUESTMSR MSR_K8_LSTAR, CPUMCTX.msrLSTAR + LOADGUESTMSR MSR_K6_STAR, CPUMCTX.msrSTAR + LOADGUESTMSR MSR_K8_SF_MASK, CPUMCTX.msrSFMASK + LOADGUESTMSR MSR_K8_KERNEL_GS_BASE, CPUMCTX.msrKERNELGSBASE +%endif + + ; Save the pCtx pointer. + push xSI + + ; Save host LDTR. + xor eax, eax + sldt ax + push xAX + +%ifndef VMX_SKIP_TR + ; The host TR limit is reset to 0x67; save & restore it manually. + str eax + push xAX +%endif + +%ifndef VMX_SKIP_GDTR + ; VT-x only saves the base of the GDTR & IDTR and resets the limit to 0xffff; we must restore the limit correctly! + sub xSP, xCB * 2 + sgdt [xSP] +%endif +%ifndef VMX_SKIP_IDTR + sub xSP, xCB * 2 + sidt [xSP] +%endif + + ; Load CR2 if necessary (may be expensive as writing CR2 is a synchronizing instruction). + mov rbx, qword [xSI + CPUMCTX.cr2] + mov rdx, cr2 + cmp rbx, rdx + je .skip_cr2_write + mov cr2, rbx + +.skip_cr2_write: + mov eax, VMX_VMCS_HOST_RSP + vmwrite xAX, xSP + ; Note: assumes success! + ; Don't mess with ESP anymore!!! + + ; Load guest general purpose registers. + mov rax, qword [xSI + CPUMCTX.eax] + mov rbx, qword [xSI + CPUMCTX.ebx] + mov rcx, qword [xSI + CPUMCTX.ecx] + mov rdx, qword [xSI + CPUMCTX.edx] + mov rbp, qword [xSI + CPUMCTX.ebp] + mov r8, qword [xSI + CPUMCTX.r8] + mov r9, qword [xSI + CPUMCTX.r9] + mov r10, qword [xSI + CPUMCTX.r10] + mov r11, qword [xSI + CPUMCTX.r11] + mov r12, qword [xSI + CPUMCTX.r12] + mov r13, qword [xSI + CPUMCTX.r13] + mov r14, qword [xSI + CPUMCTX.r14] + mov r15, qword [xSI + CPUMCTX.r15] + + ; Resume or start VM? + cmp xDI, 0 ; fResume + je .vmlaunch64_launch + + ; Load guest rdi & rsi. + mov rdi, qword [xSI + CPUMCTX.edi] + mov rsi, qword [xSI + CPUMCTX.esi] + + vmresume + jmp .vmlaunch64_done; ; Here if vmresume detected a failure. + +.vmlaunch64_launch: + ; Save guest rdi & rsi. + mov rdi, qword [xSI + CPUMCTX.edi] + mov rsi, qword [xSI + CPUMCTX.esi] + + vmlaunch + jmp .vmlaunch64_done; ; Here if vmlaunch detected a failure. + +ALIGNCODE(16) +.vmlaunch64_done: + jc near .vmxstart64_invalid_vmcs_ptr + jz near .vmxstart64_start_failed + + RESTORE_STATE_VM64 + mov eax, VINF_SUCCESS + +.vmstart64_end: + popf + pop xBP + ret + +.vmxstart64_invalid_vmcs_ptr: + RESTORE_STATE_VM64 + mov eax, VERR_VMX_INVALID_VMCS_PTR_TO_START_VM + jmp .vmstart64_end + +.vmxstart64_start_failed: + RESTORE_STATE_VM64 + mov eax, VERR_VMX_UNABLE_TO_START_VM + jmp .vmstart64_end +ENDPROC MY_NAME(VMXR0StartVM64) +%endif ; RT_ARCH_AMD64 + + +;/** +; * Prepares for and executes VMRUN (32 bits guests) +; * +; * @returns VBox status code +; * @param HCPhysVMCB Physical address of host VMCB. +; * @param HCPhysVMCB Physical address of guest VMCB. +; * @param pCtx Pointer to the guest CPU-context. +; */ +ALIGNCODE(16) +BEGINPROC MY_NAME(SVMR0VMRun) +%ifdef RT_ARCH_AMD64 ; fake a cdecl stack frame + %ifdef ASM_CALL64_GCC + push rdx + push rsi + push rdi + %else + push r8 + push rdx + push rcx + %endif + push 0 +%endif + push xBP + mov xBP, xSP + pushf + + ; Save all general purpose host registers. + MYPUSHAD + + ; Save guest CPU-context pointer. + mov xSI, [xBP + xCB * 2 + RTHCPHYS_CB * 2] ; pCtx + push xSI ; push for saving the state at the end + + ; Save host fs, gs, sysenter msr etc. + mov xAX, [xBP + xCB * 2] ; pVMCBHostPhys (64 bits physical address; x86: take low dword only) + push xAX ; save for the vmload after vmrun + vmsave + + ; Setup eax for VMLOAD. + mov xAX, [xBP + xCB * 2 + RTHCPHYS_CB] ; pVMCBPhys (64 bits physical address; take low dword only) + + ; Load guest general purpose registers. + ; eax is loaded from the VMCB by VMRUN. + mov ebx, [xSI + CPUMCTX.ebx] + mov ecx, [xSI + CPUMCTX.ecx] + mov edx, [xSI + CPUMCTX.edx] + mov edi, [xSI + CPUMCTX.edi] + mov ebp, [xSI + CPUMCTX.ebp] + mov esi, [xSI + CPUMCTX.esi] + + ; Clear the global interrupt flag & execute sti to make sure external interrupts cause a world switch. + clgi + sti + + ; Load guest fs, gs, sysenter msr etc. + vmload + ; Run the VM. + vmrun + + ; eax is in the VMCB already; we can use it here. + + ; Save guest fs, gs, sysenter msr etc. + vmsave + + ; Load host fs, gs, sysenter msr etc. + pop xAX ; Pushed above + vmload + + ; Set the global interrupt flag again, but execute cli to make sure IF=0. + cli + stgi + + pop xAX ; pCtx + + mov [ss:xAX + CPUMCTX.ebx], ebx + mov [ss:xAX + CPUMCTX.ecx], ecx + mov [ss:xAX + CPUMCTX.edx], edx + mov [ss:xAX + CPUMCTX.esi], esi + mov [ss:xAX + CPUMCTX.edi], edi + mov [ss:xAX + CPUMCTX.ebp], ebp + + ; Restore host general purpose registers. + MYPOPAD + + mov eax, VINF_SUCCESS + + popf + pop xBP +%ifdef RT_ARCH_AMD64 + add xSP, 4*xCB +%endif + ret +ENDPROC MY_NAME(SVMR0VMRun) + +%ifdef RT_ARCH_AMD64 +;/** +; * Prepares for and executes VMRUN (64 bits guests) +; * +; * @returns VBox status code +; * @param HCPhysVMCB Physical address of host VMCB. +; * @param HCPhysVMCB Physical address of guest VMCB. +; * @param pCtx Pointer to the guest-CPU context. +; */ +ALIGNCODE(16) +BEGINPROC MY_NAME(SVMR0VMRun64) + ; Fake a cdecl stack frame + %ifdef ASM_CALL64_GCC + push rdx + push rsi + push rdi + %else + push r8 + push rdx + push rcx + %endif + push 0 + push rbp + mov rbp, rsp + pushf + + ; Manual save and restore: + ; - General purpose registers except RIP, RSP, RAX + ; + ; Trashed: + ; - CR2 (we don't care) + ; - LDTR (reset to 0) + ; - DRx (presumably not changed at all) + ; - DR7 (reset to 0x400) + ; + + ; Save all general purpose host registers. + MYPUSHAD + + ; Save guest CPU-context pointer. + mov rsi, [rbp + xCB * 2 + RTHCPHYS_CB * 2] ; pCtx + push rsi ; push for saving the state at the end + + ; Save host fs, gs, sysenter msr etc. + mov rax, [rbp + xCB * 2] ; pVMCBHostPhys (64 bits physical address; x86: take low dword only) + push rax ; Save for the vmload after vmrun + vmsave + + ; Setup eax for VMLOAD. + mov rax, [rbp + xCB * 2 + RTHCPHYS_CB] ; pVMCBPhys (64 bits physical address; take low dword only) + + ; Load guest general purpose registers. + ; rax is loaded from the VMCB by VMRUN. + mov rbx, qword [xSI + CPUMCTX.ebx] + mov rcx, qword [xSI + CPUMCTX.ecx] + mov rdx, qword [xSI + CPUMCTX.edx] + mov rdi, qword [xSI + CPUMCTX.edi] + mov rbp, qword [xSI + CPUMCTX.ebp] + mov r8, qword [xSI + CPUMCTX.r8] + mov r9, qword [xSI + CPUMCTX.r9] + mov r10, qword [xSI + CPUMCTX.r10] + mov r11, qword [xSI + CPUMCTX.r11] + mov r12, qword [xSI + CPUMCTX.r12] + mov r13, qword [xSI + CPUMCTX.r13] + mov r14, qword [xSI + CPUMCTX.r14] + mov r15, qword [xSI + CPUMCTX.r15] + mov rsi, qword [xSI + CPUMCTX.esi] + + ; Clear the global interrupt flag & execute sti to make sure external interrupts cause a world switch. + clgi + sti + + ; Load guest fs, gs, sysenter msr etc. + vmload + ; Run the VM. + vmrun + + ; rax is in the VMCB already; we can use it here. + + ; Save guest fs, gs, sysenter msr etc. + vmsave + + ; Load host fs, gs, sysenter msr etc. + pop rax ; pushed above + vmload + + ; Set the global interrupt flag again, but execute cli to make sure IF=0. + cli + stgi + + pop rax ; pCtx + + mov qword [rax + CPUMCTX.ebx], rbx + mov qword [rax + CPUMCTX.ecx], rcx + mov qword [rax + CPUMCTX.edx], rdx + mov qword [rax + CPUMCTX.esi], rsi + mov qword [rax + CPUMCTX.edi], rdi + mov qword [rax + CPUMCTX.ebp], rbp + mov qword [rax + CPUMCTX.r8], r8 + mov qword [rax + CPUMCTX.r9], r9 + mov qword [rax + CPUMCTX.r10], r10 + mov qword [rax + CPUMCTX.r11], r11 + mov qword [rax + CPUMCTX.r12], r12 + mov qword [rax + CPUMCTX.r13], r13 + mov qword [rax + CPUMCTX.r14], r14 + mov qword [rax + CPUMCTX.r15], r15 + + ; Restore host general purpose registers. + MYPOPAD + + mov eax, VINF_SUCCESS + + popf + pop rbp + add rsp, 4 * xCB + ret +ENDPROC MY_NAME(SVMR0VMRun64) +%endif ; RT_ARCH_AMD64 + diff --git a/src/VBox/VMM/VMMR0/HMSVMR0.cpp b/src/VBox/VMM/VMMR0/HMSVMR0.cpp new file mode 100644 index 00000000..c65e143f --- /dev/null +++ b/src/VBox/VMM/VMMR0/HMSVMR0.cpp @@ -0,0 +1,4989 @@ +/* $Id: HMSVMR0.cpp $ */ +/** @file + * HM SVM (AMD-V) - Host Context Ring-0. + */ + +/* + * Copyright (C) 2013 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/******************************************************************************* +* Header Files * +*******************************************************************************/ +#define LOG_GROUP LOG_GROUP_HM +#include <iprt/asm-amd64-x86.h> +#include <iprt/thread.h> + +#include "HMInternal.h" +#include <VBox/vmm/vm.h> +#include "HMSVMR0.h" +#include <VBox/vmm/pdmapi.h> +#include <VBox/vmm/dbgf.h> +#include <VBox/vmm/iom.h> +#include <VBox/vmm/tm.h> + +#ifdef DEBUG_ramshankar +# define HMSVM_SYNC_FULL_GUEST_STATE +# define HMSVM_ALWAYS_TRAP_ALL_XCPTS +# define HMSVM_ALWAYS_TRAP_PF +# define HMSVM_ALWAYS_TRAP_TASK_SWITCH +#endif + + +/******************************************************************************* +* Defined Constants And Macros * +*******************************************************************************/ +#ifdef VBOX_WITH_STATISTICS +# define HMSVM_EXITCODE_STAM_COUNTER_INC(u64ExitCode) do { \ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll); \ + if ((u64ExitCode) == SVM_EXIT_NPF) \ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf); \ + else \ + STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[(u64ExitCode) & MASK_EXITREASON_STAT]); \ + } while (0) +#else +# define HMSVM_EXITCODE_STAM_COUNTER_INC(u64ExitCode) do { } while (0) +#endif + +/** If we decide to use a function table approach this can be useful to + * switch to a "static DECLCALLBACK(int)". */ +#define HMSVM_EXIT_DECL static int + +/** @name Segment attribute conversion between CPU and AMD-V VMCB format. + * + * The CPU format of the segment attribute is described in X86DESCATTRBITS + * which is 16-bits (i.e. includes 4 bits of the segment limit). + * + * The AMD-V VMCB format the segment attribute is compact 12-bits (strictly + * only the attribute bits and nothing else). Upper 4-bits are unused. + * + * @{ */ +#define HMSVM_CPU_2_VMCB_SEG_ATTR(a) ( ((a) & 0xff) | (((a) & 0xf000) >> 4) ) +#define HMSVM_VMCB_2_CPU_SEG_ATTR(a) ( ((a) & 0xff) | (((a) & 0x0f00) << 4) ) +/** @} */ + +/** @name Macros for loading, storing segment registers to/from the VMCB. + * @{ */ +#define HMSVM_LOAD_SEG_REG(REG, reg) \ + do \ + { \ + Assert(pCtx->reg.fFlags & CPUMSELREG_FLAGS_VALID); \ + Assert(pCtx->reg.ValidSel == pCtx->reg.Sel); \ + pVmcb->guest.REG.u16Sel = pCtx->reg.Sel; \ + pVmcb->guest.REG.u32Limit = pCtx->reg.u32Limit; \ + pVmcb->guest.REG.u64Base = pCtx->reg.u64Base; \ + pVmcb->guest.REG.u16Attr = HMSVM_CPU_2_VMCB_SEG_ATTR(pCtx->reg.Attr.u); \ + } while (0) + +#define HMSVM_SAVE_SEG_REG(REG, reg) \ + do \ + { \ + pMixedCtx->reg.Sel = pVmcb->guest.REG.u16Sel; \ + pMixedCtx->reg.ValidSel = pVmcb->guest.REG.u16Sel; \ + pMixedCtx->reg.fFlags = CPUMSELREG_FLAGS_VALID; \ + pMixedCtx->reg.u32Limit = pVmcb->guest.REG.u32Limit; \ + pMixedCtx->reg.u64Base = pVmcb->guest.REG.u64Base; \ + pMixedCtx->reg.Attr.u = HMSVM_VMCB_2_CPU_SEG_ATTR(pVmcb->guest.REG.u16Attr); \ + } while (0) +/** @} */ + +/** Macro for checking and returning from the using function for + * \#VMEXIT intercepts that maybe caused during delivering of another + * event in the guest. */ +#define HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY() \ + do \ + { \ + int rc = hmR0SvmCheckExitDueToEventDelivery(pVCpu, pCtx, pSvmTransient); \ + if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT)) \ + return VINF_SUCCESS; \ + else if (RT_UNLIKELY(rc == VINF_EM_RESET)) \ + return rc; \ + } while (0) + +/** Macro for upgrading a @a a_rc to VINF_EM_DBG_STEPPED after emulating an + * instruction that exited. */ +#define HMSVM_CHECK_SINGLE_STEP(a_pVCpu, a_rc) \ + do { \ + if ((a_pVCpu)->hm.s.fSingleInstruction && (a_rc) == VINF_SUCCESS) \ + (a_rc) = VINF_EM_DBG_STEPPED; \ + } while (0) + +/** Assert that preemption is disabled or covered by thread-context hooks. */ +#define HMSVM_ASSERT_PREEMPT_SAFE() Assert( VMMR0ThreadCtxHooksAreRegistered(pVCpu) \ + || !RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + +/** Assert that we haven't migrated CPUs when thread-context hooks are not + * used. */ +#define HMSVM_ASSERT_CPU_SAFE() AssertMsg( VMMR0ThreadCtxHooksAreRegistered(pVCpu) \ + || pVCpu->hm.s.idEnteredCpu == RTMpCpuId(), \ + ("Illegal migration! Entered on CPU %u Current %u\n", \ + pVCpu->hm.s.idEnteredCpu, RTMpCpuId())); + +/** Exception bitmap mask for all contributory exceptions. + * + * Page fault is deliberately excluded here as it's conditional as to whether + * it's contributory or benign. Page faults are handled separately. + */ +#define HMSVM_CONTRIBUTORY_XCPT_MASK ( RT_BIT(X86_XCPT_GP) | RT_BIT(X86_XCPT_NP) | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_TS) \ + | RT_BIT(X86_XCPT_DE)) + +/** @name VMCB Clean Bits. + * + * These flags are used for VMCB-state caching. A set VMCB Clean bit indicates + * AMD-V doesn't need to reload the corresponding value(s) from the VMCB in + * memory. + * + * @{ */ +/** All intercepts vectors, TSC offset, PAUSE filter counter. */ +#define HMSVM_VMCB_CLEAN_INTERCEPTS RT_BIT(0) +/** I/O permission bitmap, MSR permission bitmap. */ +#define HMSVM_VMCB_CLEAN_IOPM_MSRPM RT_BIT(1) +/** ASID. */ +#define HMSVM_VMCB_CLEAN_ASID RT_BIT(2) +/** TRP: V_TPR, V_IRQ, V_INTR_PRIO, V_IGN_TPR, V_INTR_MASKING, +V_INTR_VECTOR. */ +#define HMSVM_VMCB_CLEAN_TPR RT_BIT(3) +/** Nested Paging: Nested CR3 (nCR3), PAT. */ +#define HMSVM_VMCB_CLEAN_NP RT_BIT(4) +/** Control registers (CR0, CR3, CR4, EFER). */ +#define HMSVM_VMCB_CLEAN_CRX_EFER RT_BIT(5) +/** Debug registers (DR6, DR7). */ +#define HMSVM_VMCB_CLEAN_DRX RT_BIT(6) +/** GDT, IDT limit and base. */ +#define HMSVM_VMCB_CLEAN_DT RT_BIT(7) +/** Segment register: CS, SS, DS, ES limit and base. */ +#define HMSVM_VMCB_CLEAN_SEG RT_BIT(8) +/** CR2.*/ +#define HMSVM_VMCB_CLEAN_CR2 RT_BIT(9) +/** Last-branch record (DbgCtlMsr, br_from, br_to, lastint_from, lastint_to) */ +#define HMSVM_VMCB_CLEAN_LBR RT_BIT(10) +/** AVIC (AVIC APIC_BAR; AVIC APIC_BACKING_PAGE, AVIC +PHYSICAL_TABLE and AVIC LOGICAL_TABLE Pointers). */ +#define HMSVM_VMCB_CLEAN_AVIC RT_BIT(11) +/** Mask of all valid VMCB Clean bits. */ +#define HMSVM_VMCB_CLEAN_ALL ( HMSVM_VMCB_CLEAN_INTERCEPTS \ + | HMSVM_VMCB_CLEAN_IOPM_MSRPM \ + | HMSVM_VMCB_CLEAN_ASID \ + | HMSVM_VMCB_CLEAN_TPR \ + | HMSVM_VMCB_CLEAN_NP \ + | HMSVM_VMCB_CLEAN_CRX_EFER \ + | HMSVM_VMCB_CLEAN_DRX \ + | HMSVM_VMCB_CLEAN_DT \ + | HMSVM_VMCB_CLEAN_SEG \ + | HMSVM_VMCB_CLEAN_CR2 \ + | HMSVM_VMCB_CLEAN_LBR \ + | HMSVM_VMCB_CLEAN_AVIC) +/** @} */ + +/** @name SVM transient. + * + * A state structure for holding miscellaneous information across AMD-V + * VMRUN/#VMEXIT operation, restored after the transition. + * + * @{ */ +typedef struct SVMTRANSIENT +{ + /** The host's rflags/eflags. */ + RTCCUINTREG uEflags; +#if HC_ARCH_BITS == 32 + uint32_t u32Alignment0; +#endif + + /** The #VMEXIT exit code (the EXITCODE field in the VMCB). */ + uint64_t u64ExitCode; + /** The guest's TPR value used for TPR shadowing. */ + uint8_t u8GuestTpr; + /** Alignment. */ + uint8_t abAlignment0[7]; + + /** Whether the guest FPU state was active at the time of #VMEXIT. */ + bool fWasGuestFPUStateActive; + /** Whether the guest debug state was active at the time of #VMEXIT. */ + bool fWasGuestDebugStateActive; + /** Whether the hyper debug state was active at the time of #VMEXIT. */ + bool fWasHyperDebugStateActive; + /** Whether the TSC offset mode needs to be updated. */ + bool fUpdateTscOffsetting; + /** Whether the TSC_AUX MSR needs restoring on #VMEXIT. */ + bool fRestoreTscAuxMsr; + /** Whether the #VMEXIT was caused by a page-fault during delivery of a + * contributary exception or a page-fault. */ + bool fVectoringPF; +} SVMTRANSIENT, *PSVMTRANSIENT; +AssertCompileMemberAlignment(SVMTRANSIENT, u64ExitCode, sizeof(uint64_t)); +AssertCompileMemberAlignment(SVMTRANSIENT, fWasGuestFPUStateActive, sizeof(uint64_t)); +/** @} */ + +/** + * MSRPM (MSR permission bitmap) read permissions (for guest RDMSR). + */ +typedef enum SVMMSREXITREAD +{ + /** Reading this MSR causes a VM-exit. */ + SVMMSREXIT_INTERCEPT_READ = 0xb, + /** Reading this MSR does not cause a VM-exit. */ + SVMMSREXIT_PASSTHRU_READ +} SVMMSREXITREAD; + +/** + * MSRPM (MSR permission bitmap) write permissions (for guest WRMSR). + */ +typedef enum SVMMSREXITWRITE +{ + /** Writing to this MSR causes a VM-exit. */ + SVMMSREXIT_INTERCEPT_WRITE = 0xd, + /** Writing to this MSR does not cause a VM-exit. */ + SVMMSREXIT_PASSTHRU_WRITE +} SVMMSREXITWRITE; + +/** + * SVM VM-exit handler. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. + * @param pSvmTransient Pointer to the SVM-transient structure. + */ +typedef int FNSVMEXITHANDLER(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient); + +/******************************************************************************* +* Internal Functions * +*******************************************************************************/ +static void hmR0SvmSetMsrPermission(PVMCPU pVCpu, unsigned uMsr, SVMMSREXITREAD enmRead, SVMMSREXITWRITE enmWrite); +static void hmR0SvmPendingEventToTrpmTrap(PVMCPU pVCpu); +static void hmR0SvmLeave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx); + +/** @name VM-exit handlers. + * @{ + */ +static FNSVMEXITHANDLER hmR0SvmExitIntr; +static FNSVMEXITHANDLER hmR0SvmExitWbinvd; +static FNSVMEXITHANDLER hmR0SvmExitInvd; +static FNSVMEXITHANDLER hmR0SvmExitCpuid; +static FNSVMEXITHANDLER hmR0SvmExitRdtsc; +static FNSVMEXITHANDLER hmR0SvmExitRdtscp; +static FNSVMEXITHANDLER hmR0SvmExitRdpmc; +static FNSVMEXITHANDLER hmR0SvmExitInvlpg; +static FNSVMEXITHANDLER hmR0SvmExitHlt; +static FNSVMEXITHANDLER hmR0SvmExitMonitor; +static FNSVMEXITHANDLER hmR0SvmExitMwait; +static FNSVMEXITHANDLER hmR0SvmExitShutdown; +static FNSVMEXITHANDLER hmR0SvmExitReadCRx; +static FNSVMEXITHANDLER hmR0SvmExitWriteCRx; +static FNSVMEXITHANDLER hmR0SvmExitSetPendingXcptUD; +static FNSVMEXITHANDLER hmR0SvmExitMsr; +static FNSVMEXITHANDLER hmR0SvmExitReadDRx; +static FNSVMEXITHANDLER hmR0SvmExitWriteDRx; +static FNSVMEXITHANDLER hmR0SvmExitIOInstr; +static FNSVMEXITHANDLER hmR0SvmExitNestedPF; +static FNSVMEXITHANDLER hmR0SvmExitVIntr; +static FNSVMEXITHANDLER hmR0SvmExitTaskSwitch; +static FNSVMEXITHANDLER hmR0SvmExitVmmCall; +static FNSVMEXITHANDLER hmR0SvmExitXcptPF; +static FNSVMEXITHANDLER hmR0SvmExitXcptNM; +static FNSVMEXITHANDLER hmR0SvmExitXcptMF; +static FNSVMEXITHANDLER hmR0SvmExitXcptDB; +/** @} */ + +DECLINLINE(int) hmR0SvmHandleExit(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PSVMTRANSIENT pSvmTransient); + +/******************************************************************************* +* Global Variables * +*******************************************************************************/ +/** Ring-0 memory object for the IO bitmap. */ +RTR0MEMOBJ g_hMemObjIOBitmap = NIL_RTR0MEMOBJ; +/** Physical address of the IO bitmap. */ +RTHCPHYS g_HCPhysIOBitmap = 0; +/** Virtual address of the IO bitmap. */ +R0PTRTYPE(void *) g_pvIOBitmap = NULL; + + +/** + * Sets up and activates AMD-V on the current CPU. + * + * @returns VBox status code. + * @param pCpu Pointer to the CPU info struct. + * @param pVM Pointer to the VM (can be NULL after a resume!). + * @param pvCpuPage Pointer to the global CPU page. + * @param HCPhysCpuPage Physical address of the global CPU page. + * @param fEnabledByHost Whether the host OS has already initialized AMD-V. + * @param pvArg Unused on AMD-V. + */ +VMMR0DECL(int) SVMR0EnableCpu(PHMGLOBALCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost, + void *pvArg) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + AssertReturn(!fEnabledByHost, VERR_INVALID_PARAMETER); + AssertReturn( HCPhysCpuPage + && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER); + AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER); + NOREF(pvArg); + NOREF(fEnabledByHost); + + /* Paranoid: Disable interrupt as, in theory, interrupt handlers might mess with EFER. */ + RTCCUINTREG uEflags = ASMIntDisableFlags(); + + /* + * We must turn on AMD-V and setup the host state physical address, as those MSRs are per CPU. + */ + uint64_t u64HostEfer = ASMRdMsr(MSR_K6_EFER); + if (u64HostEfer & MSR_K6_EFER_SVME) + { + /* If the VBOX_HWVIRTEX_IGNORE_SVM_IN_USE is active, then we blindly use AMD-V. */ + if ( pVM + && pVM->hm.s.svm.fIgnoreInUseError) + { + pCpu->fIgnoreAMDVInUseError = true; + } + + if (!pCpu->fIgnoreAMDVInUseError) + { + ASMSetFlags(uEflags); + return VERR_SVM_IN_USE; + } + } + + /* Turn on AMD-V in the EFER MSR. */ + ASMWrMsr(MSR_K6_EFER, u64HostEfer | MSR_K6_EFER_SVME); + + /* Write the physical page address where the CPU will store the host state while executing the VM. */ + ASMWrMsr(MSR_K8_VM_HSAVE_PA, HCPhysCpuPage); + + /* Restore interrupts. */ + ASMSetFlags(uEflags); + + /* + * Theoretically, other hypervisors may have used ASIDs, ideally we should flush all non-zero ASIDs + * when enabling SVM. AMD doesn't have an SVM instruction to flush all ASIDs (flushing is done + * upon VMRUN). Therefore, just set the fFlushAsidBeforeUse flag which instructs hmR0SvmSetupTLB() + * to flush the TLB with before using a new ASID. + */ + pCpu->fFlushAsidBeforeUse = true; + + /* + * Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. + */ + ++pCpu->cTlbFlushes; + + return VINF_SUCCESS; +} + + +/** + * Deactivates AMD-V on the current CPU. + * + * @returns VBox status code. + * @param pCpu Pointer to the CPU info struct. + * @param pvCpuPage Pointer to the global CPU page. + * @param HCPhysCpuPage Physical address of the global CPU page. + */ +VMMR0DECL(int) SVMR0DisableCpu(PHMGLOBALCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + AssertReturn( HCPhysCpuPage + && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER); + AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER); + NOREF(pCpu); + + /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with EFER. */ + RTCCUINTREG uEflags = ASMIntDisableFlags(); + + /* Turn off AMD-V in the EFER MSR. */ + uint64_t u64HostEfer = ASMRdMsr(MSR_K6_EFER); + ASMWrMsr(MSR_K6_EFER, u64HostEfer & ~MSR_K6_EFER_SVME); + + /* Invalidate host state physical address. */ + ASMWrMsr(MSR_K8_VM_HSAVE_PA, 0); + + /* Restore interrupts. */ + ASMSetFlags(uEflags); + + return VINF_SUCCESS; +} + + +/** + * Does global AMD-V initialization (called during module initialization). + * + * @returns VBox status code. + */ +VMMR0DECL(int) SVMR0GlobalInit(void) +{ + /* + * Allocate 12 KB for the IO bitmap. Since this is non-optional and we always intercept all IO accesses, it's done + * once globally here instead of per-VM. + */ + Assert(g_hMemObjIOBitmap == NIL_RTR0MEMOBJ); + int rc = RTR0MemObjAllocCont(&g_hMemObjIOBitmap, 3 << PAGE_SHIFT, false /* fExecutable */); + if (RT_FAILURE(rc)) + return rc; + + g_pvIOBitmap = RTR0MemObjAddress(g_hMemObjIOBitmap); + g_HCPhysIOBitmap = RTR0MemObjGetPagePhysAddr(g_hMemObjIOBitmap, 0 /* iPage */); + + /* Set all bits to intercept all IO accesses. */ + ASMMemFill32(g_pvIOBitmap, 3 << PAGE_SHIFT, UINT32_C(0xffffffff)); + return VINF_SUCCESS; +} + + +/** + * Does global AMD-V termination (called during module termination). + */ +VMMR0DECL(void) SVMR0GlobalTerm(void) +{ + if (g_hMemObjIOBitmap != NIL_RTR0MEMOBJ) + { + RTR0MemObjFree(g_hMemObjIOBitmap, true /* fFreeMappings */); + g_pvIOBitmap = NULL; + g_HCPhysIOBitmap = 0; + g_hMemObjIOBitmap = NIL_RTR0MEMOBJ; + } +} + + +/** + * Frees any allocated per-VCPU structures for a VM. + * + * @param pVM Pointer to the VM. + */ +DECLINLINE(void) hmR0SvmFreeStructs(PVM pVM) +{ + for (uint32_t i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + AssertPtr(pVCpu); + + if (pVCpu->hm.s.svm.hMemObjVmcbHost != NIL_RTR0MEMOBJ) + { + RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjVmcbHost, false); + pVCpu->hm.s.svm.pvVmcbHost = 0; + pVCpu->hm.s.svm.HCPhysVmcbHost = 0; + pVCpu->hm.s.svm.hMemObjVmcbHost = NIL_RTR0MEMOBJ; + } + + if (pVCpu->hm.s.svm.hMemObjVmcb != NIL_RTR0MEMOBJ) + { + RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjVmcb, false); + pVCpu->hm.s.svm.pvVmcb = 0; + pVCpu->hm.s.svm.HCPhysVmcb = 0; + pVCpu->hm.s.svm.hMemObjVmcb = NIL_RTR0MEMOBJ; + } + + if (pVCpu->hm.s.svm.hMemObjMsrBitmap != NIL_RTR0MEMOBJ) + { + RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjMsrBitmap, false); + pVCpu->hm.s.svm.pvMsrBitmap = 0; + pVCpu->hm.s.svm.HCPhysMsrBitmap = 0; + pVCpu->hm.s.svm.hMemObjMsrBitmap = NIL_RTR0MEMOBJ; + } + } +} + + +/** + * Does per-VM AMD-V initialization. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + */ +VMMR0DECL(int) SVMR0InitVM(PVM pVM) +{ + int rc = VERR_INTERNAL_ERROR_5; + + /* + * Check for an AMD CPU erratum which requires us to flush the TLB before every world-switch. + */ + uint32_t u32Family; + uint32_t u32Model; + uint32_t u32Stepping; + if (HMAmdIsSubjectToErratum170(&u32Family, &u32Model, &u32Stepping)) + { + Log4(("SVMR0InitVM: AMD cpu with erratum 170 family %#x model %#x stepping %#x\n", u32Family, u32Model, u32Stepping)); + pVM->hm.s.svm.fAlwaysFlushTLB = true; + } + + /* + * Initialize the R0 memory objects up-front so we can properly cleanup on allocation failures. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + pVCpu->hm.s.svm.hMemObjVmcbHost = NIL_RTR0MEMOBJ; + pVCpu->hm.s.svm.hMemObjVmcb = NIL_RTR0MEMOBJ; + pVCpu->hm.s.svm.hMemObjMsrBitmap = NIL_RTR0MEMOBJ; + } + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + /* + * Allocate one page for the host-context VM control block (VMCB). This is used for additional host-state (such as + * FS, GS, Kernel GS Base, etc.) apart from the host-state save area specified in MSR_K8_VM_HSAVE_PA. + */ + rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjVmcbHost, 1 << PAGE_SHIFT, false /* fExecutable */); + if (RT_FAILURE(rc)) + goto failure_cleanup; + + pVCpu->hm.s.svm.pvVmcbHost = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVmcbHost); + pVCpu->hm.s.svm.HCPhysVmcbHost = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjVmcbHost, 0 /* iPage */); + Assert(pVCpu->hm.s.svm.HCPhysVmcbHost < _4G); + ASMMemZeroPage(pVCpu->hm.s.svm.pvVmcbHost); + + /* + * Allocate one page for the guest-state VMCB. + */ + rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjVmcb, 1 << PAGE_SHIFT, false /* fExecutable */); + if (RT_FAILURE(rc)) + goto failure_cleanup; + + pVCpu->hm.s.svm.pvVmcb = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVmcb); + pVCpu->hm.s.svm.HCPhysVmcb = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjVmcb, 0 /* iPage */); + Assert(pVCpu->hm.s.svm.HCPhysVmcb < _4G); + ASMMemZeroPage(pVCpu->hm.s.svm.pvVmcb); + + /* + * Allocate two pages (8 KB) for the MSR permission bitmap. There doesn't seem to be a way to convince + * SVM to not require one. + */ + rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjMsrBitmap, 2 << PAGE_SHIFT, false /* fExecutable */); + if (RT_FAILURE(rc)) + goto failure_cleanup; + + pVCpu->hm.s.svm.pvMsrBitmap = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjMsrBitmap); + pVCpu->hm.s.svm.HCPhysMsrBitmap = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjMsrBitmap, 0 /* iPage */); + /* Set all bits to intercept all MSR accesses (changed later on). */ + ASMMemFill32(pVCpu->hm.s.svm.pvMsrBitmap, 2 << PAGE_SHIFT, 0xffffffff); + } + + return VINF_SUCCESS; + +failure_cleanup: + hmR0SvmFreeStructs(pVM); + return rc; +} + + +/** + * Does per-VM AMD-V termination. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + */ +VMMR0DECL(int) SVMR0TermVM(PVM pVM) +{ + hmR0SvmFreeStructs(pVM); + return VINF_SUCCESS; +} + + +/** + * Sets the permission bits for the specified MSR in the MSRPM. + * + * @param pVCpu Pointer to the VMCPU. + * @param uMsr The MSR for which the access permissions are being set. + * @param enmRead MSR read permissions. + * @param enmWrite MSR write permissions. + */ +static void hmR0SvmSetMsrPermission(PVMCPU pVCpu, unsigned uMsr, SVMMSREXITREAD enmRead, SVMMSREXITWRITE enmWrite) +{ + unsigned ulBit; + uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap; + + /* + * Layout: + * Byte offset MSR range + * 0x000 - 0x7ff 0x00000000 - 0x00001fff + * 0x800 - 0xfff 0xc0000000 - 0xc0001fff + * 0x1000 - 0x17ff 0xc0010000 - 0xc0011fff + * 0x1800 - 0x1fff Reserved + */ + if (uMsr <= 0x00001FFF) + { + /* Pentium-compatible MSRs. */ + ulBit = uMsr * 2; + } + else if ( uMsr >= 0xC0000000 + && uMsr <= 0xC0001FFF) + { + /* AMD Sixth Generation x86 Processor MSRs. */ + ulBit = (uMsr - 0xC0000000) * 2; + pbMsrBitmap += 0x800; + } + else if ( uMsr >= 0xC0010000 + && uMsr <= 0xC0011FFF) + { + /* AMD Seventh and Eighth Generation Processor MSRs. */ + ulBit = (uMsr - 0xC0001000) * 2; + pbMsrBitmap += 0x1000; + } + else + { + AssertFailed(); + return; + } + + Assert(ulBit < 0x3fff /* 16 * 1024 - 1 */); + if (enmRead == SVMMSREXIT_INTERCEPT_READ) + ASMBitSet(pbMsrBitmap, ulBit); + else + ASMBitClear(pbMsrBitmap, ulBit); + + if (enmWrite == SVMMSREXIT_INTERCEPT_WRITE) + ASMBitSet(pbMsrBitmap, ulBit + 1); + else + ASMBitClear(pbMsrBitmap, ulBit + 1); + + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_IOPM_MSRPM; +} + + +/** + * Sets up AMD-V for the specified VM. + * This function is only called once per-VM during initalization. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + */ +VMMR0DECL(int) SVMR0SetupVM(PVM pVM) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + AssertReturn(pVM, VERR_INVALID_PARAMETER); + Assert(pVM->hm.s.svm.fSupported); + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + PSVMVMCB pVmcb = (PSVMVMCB)pVM->aCpus[i].hm.s.svm.pvVmcb; + + AssertMsgReturn(pVmcb, ("Invalid pVmcb for vcpu[%u]\n", i), VERR_SVM_INVALID_PVMCB); + + /* Trap exceptions unconditionally (debug purposes). */ +#ifdef HMSVM_ALWAYS_TRAP_PF + pVmcb->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_PF); +#endif +#ifdef HMSVM_ALWAYS_TRAP_ALL_XCPTS + /* If you add any exceptions here, make sure to update hmR0SvmHandleExit(). */ + pVmcb->ctrl.u32InterceptException |= 0 + | RT_BIT(X86_XCPT_BP) + | RT_BIT(X86_XCPT_DB) + | RT_BIT(X86_XCPT_DE) + | RT_BIT(X86_XCPT_NM) + | RT_BIT(X86_XCPT_UD) + | RT_BIT(X86_XCPT_NP) + | RT_BIT(X86_XCPT_SS) + | RT_BIT(X86_XCPT_GP) + | RT_BIT(X86_XCPT_PF) + | RT_BIT(X86_XCPT_MF) + ; +#endif + + /* Set up unconditional intercepts and conditions. */ + pVmcb->ctrl.u32InterceptCtrl1 = SVM_CTRL1_INTERCEPT_INTR /* External interrupt causes a VM-exit. */ + | SVM_CTRL1_INTERCEPT_NMI /* Non-Maskable Interrupts causes a VM-exit. */ + | SVM_CTRL1_INTERCEPT_INIT /* INIT signal causes a VM-exit. */ + | SVM_CTRL1_INTERCEPT_RDPMC /* RDPMC causes a VM-exit. */ + | SVM_CTRL1_INTERCEPT_CPUID /* CPUID causes a VM-exit. */ + | SVM_CTRL1_INTERCEPT_RSM /* RSM causes a VM-exit. */ + | SVM_CTRL1_INTERCEPT_HLT /* HLT causes a VM-exit. */ + | SVM_CTRL1_INTERCEPT_INOUT_BITMAP /* Use the IOPM to cause IOIO VM-exits. */ + | SVM_CTRL1_INTERCEPT_MSR_SHADOW /* MSR access not covered by MSRPM causes a VM-exit.*/ + | SVM_CTRL1_INTERCEPT_INVLPGA /* INVLPGA causes a VM-exit. */ + | SVM_CTRL1_INTERCEPT_SHUTDOWN /* Shutdown events causes a VM-exit. */ + | SVM_CTRL1_INTERCEPT_FERR_FREEZE; /* Intercept "freezing" during legacy FPU handling. */ + + pVmcb->ctrl.u32InterceptCtrl2 = SVM_CTRL2_INTERCEPT_VMRUN /* VMRUN causes a VM-exit. */ + | SVM_CTRL2_INTERCEPT_VMMCALL /* VMMCALL causes a VM-exit. */ + | SVM_CTRL2_INTERCEPT_VMLOAD /* VMLOAD causes a VM-exit. */ + | SVM_CTRL2_INTERCEPT_VMSAVE /* VMSAVE causes a VM-exit. */ + | SVM_CTRL2_INTERCEPT_STGI /* STGI causes a VM-exit. */ + | SVM_CTRL2_INTERCEPT_CLGI /* CLGI causes a VM-exit. */ + | SVM_CTRL2_INTERCEPT_SKINIT /* SKINIT causes a VM-exit. */ + | SVM_CTRL2_INTERCEPT_WBINVD /* WBINVD causes a VM-exit. */ + | SVM_CTRL2_INTERCEPT_MONITOR /* MONITOR causes a VM-exit. */ + | SVM_CTRL2_INTERCEPT_MWAIT; /* MWAIT causes a VM-exit. */ + + /* CR0, CR4 reads must be intercepted, our shadow values are not necessarily the same as the guest's. */ + pVmcb->ctrl.u16InterceptRdCRx = RT_BIT(0) | RT_BIT(4); + + /* CR0, CR4 writes must be intercepted for the same reasons as above. */ + pVmcb->ctrl.u16InterceptWrCRx = RT_BIT(0) | RT_BIT(4); + + /* Intercept all DRx reads and writes by default. Changed later on. */ + pVmcb->ctrl.u16InterceptRdDRx = 0xffff; + pVmcb->ctrl.u16InterceptWrDRx = 0xffff; + + /* Virtualize masking of INTR interrupts. (reads/writes from/to CR8 go to the V_TPR register) */ + pVmcb->ctrl.IntCtrl.n.u1VIrqMasking = 1; + + /* Ignore the priority in the TPR. This is necessary for delivering PIC style (ExtInt) interrupts and we currently + deliver both PIC and APIC interrupts alike. See hmR0SvmInjectPendingEvent() */ + pVmcb->ctrl.IntCtrl.n.u1IgnoreTPR = 1; + + /* Set IO and MSR bitmap permission bitmap physical addresses. */ + pVmcb->ctrl.u64IOPMPhysAddr = g_HCPhysIOBitmap; + pVmcb->ctrl.u64MSRPMPhysAddr = pVCpu->hm.s.svm.HCPhysMsrBitmap; + + /* No LBR virtualization. */ + pVmcb->ctrl.u64LBRVirt = 0; + + /* Initially set all VMCB clean bits to 0 indicating that everything should be loaded from the VMCB in memory. */ + pVmcb->ctrl.u64VmcbCleanBits = 0; + + /* The host ASID MBZ, for the guest start with 1. */ + pVmcb->ctrl.TLBCtrl.n.u32ASID = 1; + + /* + * Setup the PAT MSR (applicable for Nested Paging only). + * The default value should be 0x0007040600070406ULL, but we want to treat all guest memory as WB, + * so choose type 6 for all PAT slots. + */ + pVmcb->guest.u64GPAT = UINT64_C(0x0006060606060606); + + /* Setup Nested Paging. This doesn't change throughout the execution time of the VM. */ + pVmcb->ctrl.NestedPaging.n.u1NestedPaging = pVM->hm.s.fNestedPaging; + + /* Without Nested Paging, we need additionally intercepts. */ + if (!pVM->hm.s.fNestedPaging) + { + /* CR3 reads/writes must be intercepted; our shadow values differ from the guest values. */ + pVmcb->ctrl.u16InterceptRdCRx |= RT_BIT(3); + pVmcb->ctrl.u16InterceptWrCRx |= RT_BIT(3); + + /* Intercept INVLPG and task switches (may change CR3, EFLAGS, LDT). */ + pVmcb->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_INVLPG + | SVM_CTRL1_INTERCEPT_TASK_SWITCH; + + /* Page faults must be intercepted to implement shadow paging. */ + pVmcb->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_PF); + } + +#ifdef HMSVM_ALWAYS_TRAP_TASK_SWITCH + pVmcb->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_TASK_SWITCH; +#endif + + /* + * The following MSRs are saved/restored automatically during the world-switch. + * Don't intercept guest read/write accesses to these MSRs. + */ + hmR0SvmSetMsrPermission(pVCpu, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE); + hmR0SvmSetMsrPermission(pVCpu, MSR_K8_CSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE); + hmR0SvmSetMsrPermission(pVCpu, MSR_K6_STAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE); + hmR0SvmSetMsrPermission(pVCpu, MSR_K8_SF_MASK, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE); + hmR0SvmSetMsrPermission(pVCpu, MSR_K8_FS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE); + hmR0SvmSetMsrPermission(pVCpu, MSR_K8_GS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE); + hmR0SvmSetMsrPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE); + hmR0SvmSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_CS, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE); + hmR0SvmSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_ESP, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE); + hmR0SvmSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_EIP, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE); + } + + return VINF_SUCCESS; +} + + +/** + * Invalidates a guest page by guest virtual address. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param GCVirt Guest virtual address of the page to invalidate. + */ +VMMR0DECL(int) SVMR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt) +{ + AssertReturn(pVM, VERR_INVALID_PARAMETER); + Assert(pVM->hm.s.svm.fSupported); + + bool fFlushPending = pVM->hm.s.svm.fAlwaysFlushTLB || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_FLUSH); + + /* Skip it if a TLB flush is already pending. */ + if (!fFlushPending) + { + Log4(("SVMR0InvalidatePage %RGv\n", GCVirt)); + + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + AssertMsgReturn(pVmcb, ("Invalid pVmcb!\n"), VERR_SVM_INVALID_PVMCB); + +#if HC_ARCH_BITS == 32 + /* If we get a flush in 64-bit guest mode, then force a full TLB flush. INVLPGA takes only 32-bit addresses. */ + if (CPUMIsGuestInLongMode(pVCpu)) + VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); + else +#endif + { + SVMR0InvlpgA(GCVirt, pVmcb->ctrl.TLBCtrl.n.u32ASID); + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt); + } + } + return VINF_SUCCESS; +} + + +/** + * Flushes the appropriate tagged-TLB entries. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +static void hmR0SvmFlushTaggedTlb(PVMCPU pVCpu) +{ + PVM pVM = pVCpu->CTX_SUFF(pVM); + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + PHMGLOBALCPUINFO pCpu = HMR0GetCurrentCpu(); + + /* + * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last. + * This can happen both for start & resume due to long jumps back to ring-3. + * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB, + * so we cannot reuse the ASIDs without flushing. + */ + bool fNewAsid = false; + Assert(pCpu->idCpu != NIL_RTCPUID); + if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu + || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch); + pVCpu->hm.s.fForceTLBFlush = true; + fNewAsid = true; + } + + /* Set TLB flush state as checked until we return from the world switch. */ + ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); + + /* Check for explicit TLB shootdowns. */ + if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) + { + pVCpu->hm.s.fForceTLBFlush = true; + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb); + } + + pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_NOTHING; + + if (pVM->hm.s.svm.fAlwaysFlushTLB) + { + /* + * This is the AMD erratum 170. We need to flush the entire TLB for each world switch. Sad. + */ + pCpu->uCurrentAsid = 1; + pVCpu->hm.s.uCurrentAsid = 1; + pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes; + pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE; + + /* Clear the VMCB Clean Bit for NP while flushing the TLB. See @bugref{7152}. */ + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_NP; + } + else if (pVCpu->hm.s.fForceTLBFlush) + { + /* Clear the VMCB Clean Bit for NP while flushing the TLB. See @bugref{7152}. */ + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_NP; + + if (fNewAsid) + { + ++pCpu->uCurrentAsid; + bool fHitASIDLimit = false; + if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid) + { + pCpu->uCurrentAsid = 1; /* Wraparound at 1; host uses 0 */ + pCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */ + fHitASIDLimit = true; + + if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID) + { + pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT; + pCpu->fFlushAsidBeforeUse = true; + } + else + { + pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE; + pCpu->fFlushAsidBeforeUse = false; + } + } + + if ( !fHitASIDLimit + && pCpu->fFlushAsidBeforeUse) + { + if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID) + pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT; + else + { + pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE; + pCpu->fFlushAsidBeforeUse = false; + } + } + + pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid; + pVCpu->hm.s.idLastCpu = pCpu->idCpu; + pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes; + } + else + { + if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID) + pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT; + else + pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE; + } + + pVCpu->hm.s.fForceTLBFlush = false; + } + /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should + * not be executed. See hmQueueInvlPage() where it is commented + * out. Support individual entry flushing someday. */ +#if 0 + else + { + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN)) + { + /* Deal with pending TLB shootdown actions which were queued when we were not executing code. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown); + for (uint32_t i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++) + SVMR0InvlpgA(pVCpu->hm.s.TlbShootdown.aPages[i], pVmcb->ctrl.TLBCtrl.n.u32ASID); + + pVCpu->hm.s.TlbShootdown.cPages = 0; + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN); + } + } +#endif + + + /* Update VMCB with the ASID. */ + if (pVmcb->ctrl.TLBCtrl.n.u32ASID != pVCpu->hm.s.uCurrentAsid) + { + pVmcb->ctrl.TLBCtrl.n.u32ASID = pVCpu->hm.s.uCurrentAsid; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_ASID; + } + + AssertMsg(pVCpu->hm.s.idLastCpu == pCpu->idCpu, + ("vcpu idLastCpu=%x pcpu idCpu=%x\n", pVCpu->hm.s.idLastCpu, pCpu->idCpu)); + AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes, + ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes)); + AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid, + ("cpu%d uCurrentAsid = %x\n", pCpu->idCpu, pCpu->uCurrentAsid)); + AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid, + ("cpu%d VM uCurrentAsid = %x\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid)); + +#ifdef VBOX_WITH_STATISTICS + if (pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_NOTHING) + STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); + else if ( pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_SINGLE_CONTEXT + || pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_SINGLE_CONTEXT_RETAIN_GLOBALS) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid); + } + else + { + Assert(pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_ENTIRE); + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushEntire); + } +#endif +} + + +/** @name 64-bit guest on 32-bit host OS helper functions. + * + * The host CPU is still 64-bit capable but the host OS is running in 32-bit + * mode (code segment, paging). These wrappers/helpers perform the necessary + * bits for the 32->64 switcher. + * + * @{ */ +#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) +/** + * Prepares for and executes VMRUN (64-bit guests on a 32-bit host). + * + * @returns VBox status code. + * @param HCPhysVmcbHost Physical address of host VMCB. + * @param HCPhysVmcb Physical address of the VMCB. + * @param pCtx Pointer to the guest-CPU context. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +DECLASM(int) SVMR0VMSwitcherRun64(RTHCPHYS HCPhysVmcbHost, RTHCPHYS HCPhysVmcb, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu) +{ + uint32_t aParam[4]; + aParam[0] = (uint32_t)(HCPhysVmcbHost); /* Param 1: HCPhysVmcbHost - Lo. */ + aParam[1] = (uint32_t)(HCPhysVmcbHost >> 32); /* Param 1: HCPhysVmcbHost - Hi. */ + aParam[2] = (uint32_t)(HCPhysVmcb); /* Param 2: HCPhysVmcb - Lo. */ + aParam[3] = (uint32_t)(HCPhysVmcb >> 32); /* Param 2: HCPhysVmcb - Hi. */ + + return SVMR0Execute64BitsHandler(pVM, pVCpu, pCtx, HM64ON32OP_SVMRCVMRun64, 4, &aParam[0]); +} + + +/** + * Executes the specified VMRUN handler in 64-bit mode. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * @param enmOp The operation to perform. + * @param cbParam Number of parameters. + * @param paParam Array of 32-bit parameters. + */ +VMMR0DECL(int) SVMR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, HM64ON32OP enmOp, uint32_t cbParam, + uint32_t *paParam) +{ + AssertReturn(pVM->hm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER); + Assert(enmOp > HM64ON32OP_INVALID && enmOp < HM64ON32OP_END); + + /* Disable interrupts. */ + RTHCUINTREG uOldEFlags = ASMIntDisableFlags(); + +#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI + RTCPUID idHostCpu = RTMpCpuId(); + CPUMR0SetLApic(pVCpu, idHostCpu); +#endif + + CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu)); + CPUMSetHyperEIP(pVCpu, enmOp); + for (int i = (int)cbParam - 1; i >= 0; i--) + CPUMPushHyper(pVCpu, paParam[i]); + + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z); + /* Call the switcher. */ + int rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum)); + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z); + + /* Restore interrupts. */ + ASMSetFlags(uOldEFlags); + return rc; +} + +#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) */ +/** @} */ + + +/** + * Adds an exception to the intercept exception bitmap in the VMCB and updates + * the corresponding VMCB Clean bit. + * + * @param pVmcb Pointer to the VMCB. + * @param u32Xcpt The value of the exception (X86_XCPT_*). + */ +DECLINLINE(void) hmR0SvmAddXcptIntercept(PSVMVMCB pVmcb, uint32_t u32Xcpt) +{ + if (!(pVmcb->ctrl.u32InterceptException & RT_BIT(u32Xcpt))) + { + pVmcb->ctrl.u32InterceptException |= RT_BIT(u32Xcpt); + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS; + } +} + + +/** + * Removes an exception from the intercept-exception bitmap in the VMCB and + * updates the corresponding VMCB Clean bit. + * + * @param pVmcb Pointer to the VMCB. + * @param u32Xcpt The value of the exception (X86_XCPT_*). + */ +DECLINLINE(void) hmR0SvmRemoveXcptIntercept(PSVMVMCB pVmcb, uint32_t u32Xcpt) +{ +#ifndef HMSVM_ALWAYS_TRAP_ALL_XCPTS + if (pVmcb->ctrl.u32InterceptException & RT_BIT(u32Xcpt)) + { + pVmcb->ctrl.u32InterceptException &= ~RT_BIT(u32Xcpt); + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS; + } +#endif +} + + +/** + * Loads the guest CR0 control register into the guest-state area in the VMCB. + * Although the guest CR0 is a separate field in the VMCB we have to consider + * the FPU state itself which is shared between the host and the guest. + * + * @returns VBox status code. + * @param pVM Pointer to the VMCPU. + * @param pVmcb Pointer to the VMCB. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + */ +static void hmR0SvmLoadSharedCR0(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx) +{ + /* + * Guest CR0. + */ + PVM pVM = pVCpu->CTX_SUFF(pVM); + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0)) + { + uint64_t u64GuestCR0 = pCtx->cr0; + + /* Always enable caching. */ + u64GuestCR0 &= ~(X86_CR0_CD | X86_CR0_NW); + + /* + * When Nested Paging is not available use shadow page tables and intercept #PFs (the latter done in SVMR0SetupVM()). + */ + if (!pVM->hm.s.fNestedPaging) + { + u64GuestCR0 |= X86_CR0_PG; /* When Nested Paging is not available, use shadow page tables. */ + u64GuestCR0 |= X86_CR0_WP; /* Guest CPL 0 writes to its read-only pages should cause a #PF VM-exit. */ + } + + /* + * Guest FPU bits. + */ + bool fInterceptNM = false; + bool fInterceptMF = false; + u64GuestCR0 |= X86_CR0_NE; /* Use internal x87 FPU exceptions handling rather than external interrupts. */ + if (CPUMIsGuestFPUStateActive(pVCpu)) + { + /* Catch floating point exceptions if we need to report them to the guest in a different way. */ + if (!(u64GuestCR0 & X86_CR0_NE)) + { + Log4(("hmR0SvmLoadGuestControlRegs: Intercepting Guest CR0.MP Old-style FPU handling!!!\n")); + fInterceptMF = true; + } + } + else + { + fInterceptNM = true; /* Guest FPU inactive, VM-exit on #NM for lazy FPU loading. */ + u64GuestCR0 |= X86_CR0_TS /* Guest can task switch quickly and do lazy FPU syncing. */ + | X86_CR0_MP; /* FWAIT/WAIT should not ignore CR0.TS and should generate #NM. */ + } + + /* + * Update the exception intercept bitmap. + */ + if (fInterceptNM) + hmR0SvmAddXcptIntercept(pVmcb, X86_XCPT_NM); + else + hmR0SvmRemoveXcptIntercept(pVmcb, X86_XCPT_NM); + + if (fInterceptMF) + hmR0SvmAddXcptIntercept(pVmcb, X86_XCPT_MF); + else + hmR0SvmRemoveXcptIntercept(pVmcb, X86_XCPT_MF); + + pVmcb->guest.u64CR0 = u64GuestCR0; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER; + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR0); + } +} + + +/** + * Loads the guest control registers (CR2, CR3, CR4) into the VMCB. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pVmcb Pointer to the VMCB. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0SvmLoadGuestControlRegs(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx) +{ + PVM pVM = pVCpu->CTX_SUFF(pVM); + + /* + * Guest CR2. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR2)) + { + pVmcb->guest.u64CR2 = pCtx->cr2; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CR2; + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR2); + } + + /* + * Guest CR3. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR3)) + { + if (pVM->hm.s.fNestedPaging) + { + PGMMODE enmShwPagingMode; +#if HC_ARCH_BITS == 32 + if (CPUMIsGuestInLongModeEx(pCtx)) + enmShwPagingMode = PGMMODE_AMD64_NX; + else +#endif + enmShwPagingMode = PGMGetHostMode(pVM); + + pVmcb->ctrl.u64NestedPagingCR3 = PGMGetNestedCR3(pVCpu, enmShwPagingMode); + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_NP; + Assert(pVmcb->ctrl.u64NestedPagingCR3); + pVmcb->guest.u64CR3 = pCtx->cr3; + } + else + pVmcb->guest.u64CR3 = PGMGetHyperCR3(pVCpu); + + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER; + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR3); + } + + /* + * Guest CR4. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR4)) + { + uint64_t u64GuestCR4 = pCtx->cr4; + if (!pVM->hm.s.fNestedPaging) + { + switch (pVCpu->hm.s.enmShadowMode) + { + case PGMMODE_REAL: + case PGMMODE_PROTECTED: /* Protected mode, no paging. */ + AssertFailed(); + return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; + + case PGMMODE_32_BIT: /* 32-bit paging. */ + u64GuestCR4 &= ~X86_CR4_PAE; + break; + + case PGMMODE_PAE: /* PAE paging. */ + case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */ + /** Must use PAE paging as we could use physical memory > 4 GB */ + u64GuestCR4 |= X86_CR4_PAE; + break; + + case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */ + case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */ +#ifdef VBOX_ENABLE_64_BITS_GUESTS + break; +#else + AssertFailed(); + return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; +#endif + + default: /* shut up gcc */ + AssertFailed(); + return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; + } + } + + pVmcb->guest.u64CR4 = u64GuestCR4; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER; + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR4); + } + + return VINF_SUCCESS; +} + + +/** + * Loads the guest segment registers into the VMCB. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pVmcb Pointer to the VMCB. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + */ +static void hmR0SvmLoadGuestSegmentRegs(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx) +{ + /* Guest Segment registers: CS, SS, DS, ES, FS, GS. */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS)) + { + HMSVM_LOAD_SEG_REG(CS, cs); + HMSVM_LOAD_SEG_REG(SS, ss); + HMSVM_LOAD_SEG_REG(DS, ds); + HMSVM_LOAD_SEG_REG(ES, es); + HMSVM_LOAD_SEG_REG(FS, fs); + HMSVM_LOAD_SEG_REG(GS, gs); + + pVmcb->guest.u8CPL = pCtx->ss.Attr.n.u2Dpl; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_SEG; + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS); + } + + /* Guest TR. */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_TR)) + { + HMSVM_LOAD_SEG_REG(TR, tr); + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_TR); + } + + /* Guest LDTR. */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_LDTR)) + { + HMSVM_LOAD_SEG_REG(LDTR, ldtr); + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_LDTR); + } + + /* Guest GDTR. */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_GDTR)) + { + pVmcb->guest.GDTR.u32Limit = pCtx->gdtr.cbGdt; + pVmcb->guest.GDTR.u64Base = pCtx->gdtr.pGdt; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DT; + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_GDTR); + } + + /* Guest IDTR. */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_IDTR)) + { + pVmcb->guest.IDTR.u32Limit = pCtx->idtr.cbIdt; + pVmcb->guest.IDTR.u64Base = pCtx->idtr.pIdt; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DT; + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_IDTR); + } +} + + +/** + * Loads the guest MSRs into the VMCB. + * + * @param pVCpu Pointer to the VMCPU. + * @param pVmcb Pointer to the VMCB. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + */ +static void hmR0SvmLoadGuestMsrs(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx) +{ + /* Guest Sysenter MSRs. */ + pVmcb->guest.u64SysEnterCS = pCtx->SysEnter.cs; + pVmcb->guest.u64SysEnterEIP = pCtx->SysEnter.eip; + pVmcb->guest.u64SysEnterESP = pCtx->SysEnter.esp; + + /* + * Guest EFER MSR. + * AMD-V requires guest EFER.SVME to be set. Weird. . + * See AMD spec. 15.5.1 "Basic Operation" | "Canonicalization and Consistency Checks". + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_SVM_GUEST_EFER_MSR)) + { + pVmcb->guest.u64EFER = pCtx->msrEFER | MSR_K6_EFER_SVME; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER; + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_SVM_GUEST_EFER_MSR); + } + + /* 64-bit MSRs. */ + if (CPUMIsGuestInLongModeEx(pCtx)) + { + pVmcb->guest.FS.u64Base = pCtx->fs.u64Base; + pVmcb->guest.GS.u64Base = pCtx->gs.u64Base; + } + else + { + /* If the guest isn't in 64-bit mode, clear MSR_K6_LME bit from guest EFER otherwise AMD-V expects amd64 shadow paging. */ + if (pCtx->msrEFER & MSR_K6_EFER_LME) + { + pVmcb->guest.u64EFER &= ~MSR_K6_EFER_LME; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER; + } + } + + + /** @todo The following are used in 64-bit only (SYSCALL/SYSRET) but they might + * be writable in 32-bit mode. Clarify with AMD spec. */ + pVmcb->guest.u64STAR = pCtx->msrSTAR; + pVmcb->guest.u64LSTAR = pCtx->msrLSTAR; + pVmcb->guest.u64CSTAR = pCtx->msrCSTAR; + pVmcb->guest.u64SFMASK = pCtx->msrSFMASK; + pVmcb->guest.u64KernelGSBase = pCtx->msrKERNELGSBASE; +} + + +/** + * Loads the guest state into the VMCB and programs the necessary intercepts + * accordingly. + * + * @param pVCpu Pointer to the VMCPU. + * @param pVmcb Pointer to the VMCB. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + * @remarks Requires EFLAGS to be up-to-date in the VMCB! + */ +static void hmR0SvmLoadSharedDebugState(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx) +{ + if (!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_DEBUG)) + return; + Assert((pCtx->dr[6] & X86_DR6_RA1_MASK) == X86_DR6_RA1_MASK); Assert((pCtx->dr[6] & X86_DR6_RAZ_MASK) == 0); + Assert((pCtx->dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK); Assert((pCtx->dr[7] & X86_DR7_RAZ_MASK) == 0); + + bool fInterceptDB = false; + bool fInterceptMovDRx = false; + + /* + * Anyone single stepping on the host side? If so, we'll have to use the + * trap flag in the guest EFLAGS since AMD-V doesn't have a trap flag on + * the VMM level like VT-x implementations does. + */ + bool const fStepping = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu); + if (fStepping) + { + pVCpu->hm.s.fClearTrapFlag = true; + pVmcb->guest.u64RFlags |= X86_EFL_TF; + fInterceptDB = true; + fInterceptMovDRx = true; /* Need clean DR6, no guest mess. */ + } + + PVM pVM = pVCpu->CTX_SUFF(pVM); + if (fStepping || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK)) + { + /* + * Use the combined guest and host DRx values found in the hypervisor + * register set because the debugger has breakpoints active or someone + * is single stepping on the host side. + * + * Note! DBGF expects a clean DR6 state before executing guest code. + */ +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if ( CPUMIsGuestInLongModeEx(pCtx) + && !CPUMIsHyperDebugStateActivePending(pVCpu)) + { + CPUMR0LoadHyperDebugState(pVCpu, false /* include DR6 */); + Assert(!CPUMIsGuestDebugStateActivePending(pVCpu)); + Assert(CPUMIsHyperDebugStateActivePending(pVCpu)); + } + else +#endif + if (!CPUMIsHyperDebugStateActive(pVCpu)) + { + CPUMR0LoadHyperDebugState(pVCpu, false /* include DR6 */); + Assert(!CPUMIsGuestDebugStateActive(pVCpu)); + Assert(CPUMIsHyperDebugStateActive(pVCpu)); + } + + /* Update DR6 & DR7. (The other DRx values are handled by CPUM one way or the other.) */ + if ( pVmcb->guest.u64DR6 != X86_DR6_INIT_VAL + || pVmcb->guest.u64DR7 != CPUMGetHyperDR7(pVCpu)) + { + pVmcb->guest.u64DR7 = CPUMGetHyperDR7(pVCpu); + pVmcb->guest.u64DR6 = X86_DR6_INIT_VAL; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX; + pVCpu->hm.s.fUsingHyperDR7 = true; + } + + /** @todo If we cared, we could optimize to allow the guest to read registers + * with the same values. */ + fInterceptDB = true; + fInterceptMovDRx = true; + Log5(("hmR0SvmLoadSharedDebugState: Loaded hyper DRx\n")); + } + else + { + /* + * Update DR6, DR7 with the guest values if necessary. + */ + if ( pVmcb->guest.u64DR7 != pCtx->dr[7] + || pVmcb->guest.u64DR6 != pCtx->dr[6]) + { + pVmcb->guest.u64DR7 = pCtx->dr[7]; + pVmcb->guest.u64DR6 = pCtx->dr[6]; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX; + pVCpu->hm.s.fUsingHyperDR7 = false; + } + + /* + * If the guest has enabled debug registers, we need to load them prior to + * executing guest code so they'll trigger at the right time. + */ + if (pCtx->dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD)) /** @todo Why GD? */ + { +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if ( CPUMIsGuestInLongModeEx(pCtx) + && !CPUMIsGuestDebugStateActivePending(pVCpu)) + { + CPUMR0LoadGuestDebugState(pVCpu, false /* include DR6 */); + STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed); + Assert(!CPUMIsHyperDebugStateActivePending(pVCpu)); + Assert(CPUMIsGuestDebugStateActivePending(pVCpu)); + } + else +#endif + if (!CPUMIsGuestDebugStateActive(pVCpu)) + { + CPUMR0LoadGuestDebugState(pVCpu, false /* include DR6 */); + STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed); + Assert(!CPUMIsHyperDebugStateActive(pVCpu)); + Assert(CPUMIsGuestDebugStateActive(pVCpu)); + } + Log5(("hmR0SvmLoadSharedDebugState: Loaded guest DRx\n")); + } + /* + * If no debugging enabled, we'll lazy load DR0-3. We don't need to + * intercept #DB as DR6 is updated in the VMCB. + */ +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + else if ( !CPUMIsGuestDebugStateActivePending(pVCpu) + && !CPUMIsGuestDebugStateActive(pVCpu)) +#else + else if (!CPUMIsGuestDebugStateActive(pVCpu)) +#endif + { + fInterceptMovDRx = true; + } + } + + /* + * Set up the intercepts. + */ + if (fInterceptDB) + hmR0SvmAddXcptIntercept(pVmcb, X86_XCPT_DB); + else + hmR0SvmRemoveXcptIntercept(pVmcb, X86_XCPT_DB); + + if (fInterceptMovDRx) + { + if ( pVmcb->ctrl.u16InterceptRdDRx != 0xffff + || pVmcb->ctrl.u16InterceptWrDRx != 0xffff) + { + pVmcb->ctrl.u16InterceptRdDRx = 0xffff; + pVmcb->ctrl.u16InterceptWrDRx = 0xffff; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS; + } + } + else + { + if ( pVmcb->ctrl.u16InterceptRdDRx + || pVmcb->ctrl.u16InterceptWrDRx) + { + pVmcb->ctrl.u16InterceptRdDRx = 0; + pVmcb->ctrl.u16InterceptWrDRx = 0; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS; + } + } + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_DEBUG); +} + + +/** + * Loads the guest APIC state (currently just the TPR). + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pVmcb Pointer to the VMCB. + * @param pCtx Pointer to the guest-CPU context. + */ +static int hmR0SvmLoadGuestApicState(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx) +{ + if (!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_SVM_GUEST_APIC_STATE)) + return VINF_SUCCESS; + + bool fPendingIntr; + uint8_t u8Tpr; + int rc = PDMApicGetTPR(pVCpu, &u8Tpr, &fPendingIntr, NULL /* pu8PendingIrq */); + AssertRCReturn(rc, rc); + + /* Assume that we need to trap all TPR accesses and thus need not check on + every #VMEXIT if we should update the TPR. */ + Assert(pVmcb->ctrl.IntCtrl.n.u1VIrqMasking); + pVCpu->hm.s.svm.fSyncVTpr = false; + + /* 32-bit guests uses LSTAR MSR for patching guest code which touches the TPR. */ + if (pVCpu->CTX_SUFF(pVM)->hm.s.fTPRPatchingActive) + { + pCtx->msrLSTAR = u8Tpr; + + /* If there are interrupts pending, intercept LSTAR writes, otherwise don't intercept reads or writes. */ + if (fPendingIntr) + hmR0SvmSetMsrPermission(pVCpu, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_INTERCEPT_WRITE); + else + { + hmR0SvmSetMsrPermission(pVCpu, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE); + pVCpu->hm.s.svm.fSyncVTpr = true; + } + } + else + { + /* Bits 3-0 of the VTPR field correspond to bits 7-4 of the TPR (which is the Task-Priority Class). */ + pVmcb->ctrl.IntCtrl.n.u8VTPR = (u8Tpr >> 4); + + /* If there are interrupts pending, intercept CR8 writes to evaluate ASAP if we can deliver the interrupt to the guest. */ + if (fPendingIntr) + pVmcb->ctrl.u16InterceptWrCRx |= RT_BIT(8); + else + { + pVmcb->ctrl.u16InterceptWrCRx &= ~RT_BIT(8); + pVCpu->hm.s.svm.fSyncVTpr = true; + } + + pVmcb->ctrl.u64VmcbCleanBits &= ~(HMSVM_VMCB_CLEAN_INTERCEPTS | HMSVM_VMCB_CLEAN_TPR); + } + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_SVM_GUEST_APIC_STATE); + return rc; +} + + +/** + * Sets up the appropriate function to run guest code. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0SvmSetupVMRunHandler(PVMCPU pVCpu, PCPUMCTX pCtx) +{ + if (CPUMIsGuestInLongModeEx(pCtx)) + { +#ifndef VBOX_ENABLE_64_BITS_GUESTS + return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; +#endif + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests); /* Guaranteed by hmR3InitFinalizeR0(). */ +#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + /* 32-bit host. We need to switch to 64-bit before running the 64-bit guest. */ + pVCpu->hm.s.svm.pfnVMRun = SVMR0VMSwitcherRun64; +#else + /* 64-bit host or hybrid host. */ + pVCpu->hm.s.svm.pfnVMRun = SVMR0VMRun64; +#endif + } + else + { + /* Guest is not in long mode, use the 32-bit handler. */ + pVCpu->hm.s.svm.pfnVMRun = SVMR0VMRun; + } + return VINF_SUCCESS; +} + + +/** + * Enters the AMD-V session. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCpu Pointer to the CPU info struct. + */ +VMMR0DECL(int) SVMR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + Assert(pVM->hm.s.svm.fSupported); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + NOREF(pCpu); + + LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu)); + Assert(HMCPU_CF_IS_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE)); + + pVCpu->hm.s.fLeaveDone = false; + return VINF_SUCCESS; +} + + +/** + * Thread-context callback for AMD-V. + * + * @param enmEvent The thread-context event. + * @param pVCpu Pointer to the VMCPU. + * @param fGlobalInit Whether global VT-x/AMD-V init. is used. + * @thread EMT(pVCpu) + */ +VMMR0DECL(void) SVMR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit) +{ + switch (enmEvent) + { + case RTTHREADCTXEVENT_PREEMPTING: + { + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(VMMR0ThreadCtxHooksAreRegistered(pVCpu)); + VMCPU_ASSERT_EMT(pVCpu); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu); + + /* No longjmps (log-flush, locks) in this fragile context. */ + VMMRZCallRing3Disable(pVCpu); + + if (!pVCpu->hm.s.fLeaveDone) + { + hmR0SvmLeave(pVM, pVCpu, pCtx); + pVCpu->hm.s.fLeaveDone = true; + } + + /* Leave HM context, takes care of local init (term). */ + int rc = HMR0LeaveCpu(pVCpu); + AssertRC(rc); NOREF(rc); + + /* Restore longjmp state. */ + VMMRZCallRing3Enable(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatPreemptPreempting); + break; + } + + case RTTHREADCTXEVENT_RESUMED: + { + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(VMMR0ThreadCtxHooksAreRegistered(pVCpu)); + VMCPU_ASSERT_EMT(pVCpu); + + /* No longjmps (log-flush, locks) in this fragile context. */ + VMMRZCallRing3Disable(pVCpu); + + /* + * Initialize the bare minimum state required for HM. This takes care of + * initializing AMD-V if necessary (onlined CPUs, local init etc.) + */ + int rc = HMR0EnterCpu(pVCpu); + AssertRC(rc); NOREF(rc); + Assert(HMCPU_CF_IS_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE)); + + pVCpu->hm.s.fLeaveDone = false; + + /* Restore longjmp state. */ + VMMRZCallRing3Enable(pVCpu); + break; + } + + default: + break; + } +} + + +/** + * Saves the host state. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * + * @remarks No-long-jump zone!!! + */ +VMMR0DECL(int) SVMR0SaveHostState(PVM pVM, PVMCPU pVCpu) +{ + NOREF(pVM); + NOREF(pVCpu); + /* Nothing to do here. AMD-V does this for us automatically during the world-switch. */ + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_HOST_CONTEXT); + return VINF_SUCCESS; +} + + +/** + * Loads the guest state into the VMCB. The CPU state will be loaded from these + * fields on every successful VM-entry. + * + * Also sets up the appropriate VMRUN function to execute guest code based on + * the guest CPU mode. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0SvmLoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + AssertMsgReturn(pVmcb, ("Invalid pVmcb\n"), VERR_SVM_INVALID_PVMCB); + + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestState, x); + + int rc = hmR0SvmLoadGuestControlRegs(pVCpu, pVmcb, pCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0SvmLoadGuestControlRegs! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + hmR0SvmLoadGuestSegmentRegs(pVCpu, pVmcb, pCtx); + hmR0SvmLoadGuestMsrs(pVCpu, pVmcb, pCtx); + + pVmcb->guest.u64RIP = pCtx->rip; + pVmcb->guest.u64RSP = pCtx->rsp; + pVmcb->guest.u64RFlags = pCtx->eflags.u32; + pVmcb->guest.u64RAX = pCtx->rax; + + rc = hmR0SvmLoadGuestApicState(pVCpu, pVmcb, pCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0SvmLoadGuestApicState! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + rc = hmR0SvmSetupVMRunHandler(pVCpu, pCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0SvmSetupVMRunHandler! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + /* Clear any unused and reserved bits. */ + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_RIP /* Unused (loaded unconditionally). */ + | HM_CHANGED_GUEST_RSP + | HM_CHANGED_GUEST_RFLAGS + | HM_CHANGED_GUEST_SYSENTER_CS_MSR + | HM_CHANGED_GUEST_SYSENTER_EIP_MSR + | HM_CHANGED_GUEST_SYSENTER_ESP_MSR + | HM_CHANGED_SVM_RESERVED1 /* Reserved. */ + | HM_CHANGED_SVM_RESERVED2 + | HM_CHANGED_SVM_RESERVED3); + + /* All the guest state bits should be loaded except maybe the host context and/or shared host/guest bits. */ + AssertMsg( !HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_ALL_GUEST) + || HMCPU_CF_IS_PENDING_ONLY(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE), + ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu))); + + Log4(("Load: CS:RIP=%04x:%RX64 EFL=%#x SS:RSP=%04x:%RX64\n", pCtx->cs.Sel, pCtx->rip, pCtx->eflags.u, pCtx->ss, pCtx->rsp)); + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestState, x); + return rc; +} + + +/** + * Loads the state shared between the host and guest into the + * VMCB. + * + * @param pVCpu Pointer to the VMCPU. + * @param pVmcb Pointer to the VMCB. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + */ +static void hmR0SvmLoadSharedState(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0)) + hmR0SvmLoadSharedCR0(pVCpu, pVmcb, pCtx); + + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_DEBUG)) + hmR0SvmLoadSharedDebugState(pVCpu, pVmcb, pCtx); + + AssertMsg(!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_GUEST_SHARED_STATE), + ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu))); +} + + +/** + * Saves the entire guest state from the VMCB into the + * guest-CPU context. Currently there is no residual state left in the CPU that + * is not updated in the VMCB. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +static void hmR0SvmSaveGuestState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + Assert(VMMRZCallRing3IsEnabled(pVCpu)); + + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + + pMixedCtx->rip = pVmcb->guest.u64RIP; + pMixedCtx->rsp = pVmcb->guest.u64RSP; + pMixedCtx->eflags.u32 = pVmcb->guest.u64RFlags; + pMixedCtx->rax = pVmcb->guest.u64RAX; + + /* + * Guest interrupt shadow. + */ + if (pVmcb->ctrl.u64IntShadow & SVM_INTERRUPT_SHADOW_ACTIVE) + EMSetInhibitInterruptsPC(pVCpu, pMixedCtx->rip); + else + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); + + /* + * Guest Control registers: CR2, CR3 (handled at the end) - accesses to other control registers are always intercepted. + */ + pMixedCtx->cr2 = pVmcb->guest.u64CR2; + + /* + * Guest MSRs. + */ + pMixedCtx->msrSTAR = pVmcb->guest.u64STAR; /* legacy syscall eip, cs & ss */ + pMixedCtx->msrLSTAR = pVmcb->guest.u64LSTAR; /* 64-bit mode syscall rip */ + pMixedCtx->msrCSTAR = pVmcb->guest.u64CSTAR; /* compatibility mode syscall rip */ + pMixedCtx->msrSFMASK = pVmcb->guest.u64SFMASK; /* syscall flag mask */ + pMixedCtx->msrKERNELGSBASE = pVmcb->guest.u64KernelGSBase; /* swapgs exchange value */ + pMixedCtx->SysEnter.cs = pVmcb->guest.u64SysEnterCS; + pMixedCtx->SysEnter.eip = pVmcb->guest.u64SysEnterEIP; + pMixedCtx->SysEnter.esp = pVmcb->guest.u64SysEnterESP; + + /* + * Guest segment registers (includes FS, GS base MSRs for 64-bit guests). + */ + HMSVM_SAVE_SEG_REG(CS, cs); + HMSVM_SAVE_SEG_REG(SS, ss); + HMSVM_SAVE_SEG_REG(DS, ds); + HMSVM_SAVE_SEG_REG(ES, es); + HMSVM_SAVE_SEG_REG(FS, fs); + HMSVM_SAVE_SEG_REG(GS, gs); + + /* + * Correct the hidden CS granularity bit. Haven't seen it being wrong in any other + * register (yet). + */ + /** @todo SELM might need to be fixed as it too should not care about the + * granularity bit. See @bugref{6785}. */ + if ( !pMixedCtx->cs.Attr.n.u1Granularity + && pMixedCtx->cs.Attr.n.u1Present + && pMixedCtx->cs.u32Limit > UINT32_C(0xfffff)) + { + Assert((pMixedCtx->cs.u32Limit & 0xfff) == 0xfff); + pMixedCtx->cs.Attr.n.u1Granularity = 1; + } + +#ifdef VBOX_STRICT +# define HMSVM_ASSERT_SEG_GRANULARITY(reg) \ + AssertMsg( !pMixedCtx->reg.Attr.n.u1Present \ + || ( pMixedCtx->reg.Attr.n.u1Granularity \ + ? (pMixedCtx->reg.u32Limit & 0xfff) == 0xfff \ + : pMixedCtx->reg.u32Limit <= UINT32_C(0xfffff)), \ + ("Invalid Segment Attributes Limit=%#RX32 Attr=%#RX32 Base=%#RX64\n", pMixedCtx->reg.u32Limit, \ + pMixedCtx->reg.Attr.u, pMixedCtx->reg.u64Base)) + + HMSVM_ASSERT_SEG_GRANULARITY(cs); + HMSVM_ASSERT_SEG_GRANULARITY(ss); + HMSVM_ASSERT_SEG_GRANULARITY(ds); + HMSVM_ASSERT_SEG_GRANULARITY(es); + HMSVM_ASSERT_SEG_GRANULARITY(fs); + HMSVM_ASSERT_SEG_GRANULARITY(gs); + +# undef HMSVM_ASSERT_SEL_GRANULARITY +#endif + + /* + * Sync the hidden SS DPL field. AMD CPUs have a separate CPL field in the VMCB and uses that + * and thus it's possible that when the CPL changes during guest execution that the SS DPL + * isn't updated by AMD-V. Observed on some AMD Fusion CPUs with 64-bit guests. + * See AMD spec. 15.5.1 "Basic operation". + */ + Assert(!(pVmcb->guest.u8CPL & ~0x3)); + pMixedCtx->ss.Attr.n.u2Dpl = pVmcb->guest.u8CPL & 0x3; + + /* + * Guest Descriptor-Table registers. + */ + HMSVM_SAVE_SEG_REG(TR, tr); + HMSVM_SAVE_SEG_REG(LDTR, ldtr); + pMixedCtx->gdtr.cbGdt = pVmcb->guest.GDTR.u32Limit; + pMixedCtx->gdtr.pGdt = pVmcb->guest.GDTR.u64Base; + + pMixedCtx->idtr.cbIdt = pVmcb->guest.IDTR.u32Limit; + pMixedCtx->idtr.pIdt = pVmcb->guest.IDTR.u64Base; + + /* + * Guest Debug registers. + */ + if (!pVCpu->hm.s.fUsingHyperDR7) + { + pMixedCtx->dr[6] = pVmcb->guest.u64DR6; + pMixedCtx->dr[7] = pVmcb->guest.u64DR7; + } + else + { + Assert(pVmcb->guest.u64DR7 == CPUMGetHyperDR7(pVCpu)); + CPUMSetHyperDR6(pVCpu, pVmcb->guest.u64DR6); + } + + /* + * With Nested Paging, CR3 changes are not intercepted. Therefore, sync. it now. + * This is done as the very last step of syncing the guest state, as PGMUpdateCR3() may cause longjmp's to ring-3. + */ + if ( pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging + && pMixedCtx->cr3 != pVmcb->guest.u64CR3) + { + CPUMSetGuestCR3(pVCpu, pVmcb->guest.u64CR3); + PGMUpdateCR3(pVCpu, pVmcb->guest.u64CR3); + } +} + + +/** + * Does the necessary state syncing before returning to ring-3 for any reason + * (longjmp, preemption, voluntary exits to ring-3) from AMD-V. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jmp zone!!! + */ +static void hmR0SvmLeave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + Assert(VMMR0IsLogFlushDisabled(pVCpu)); + + /* + * !!! IMPORTANT !!! + * If you modify code here, make sure to check whether hmR0SvmCallRing3Callback() needs to be updated too. + */ + + /* Restore host FPU state if necessary and resync on next R0 reentry .*/ + if (CPUMIsGuestFPUStateActive(pVCpu)) + { + CPUMR0SaveGuestFPU(pVM, pVCpu, pCtx); + Assert(!CPUMIsGuestFPUStateActive(pVCpu)); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + } + + /* + * Restore host debug registers if necessary and resync on next R0 reentry. + */ +#ifdef VBOX_STRICT + if (CPUMIsHyperDebugStateActive(pVCpu)) + { + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + Assert(pVmcb->ctrl.u16InterceptRdDRx == 0xffff); + Assert(pVmcb->ctrl.u16InterceptWrDRx == 0xffff); + } +#endif + if (CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, false /* save DR6 */)) + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG); + + Assert(!CPUMIsHyperDebugStateActive(pVCpu)); + Assert(!CPUMIsGuestDebugStateActive(pVCpu)); + + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatLoadGuestState); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit1); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit2); + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3); + + VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC); +} + + +/** + * Leaves the AMD-V session. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + */ +static int hmR0SvmLeaveSession(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + HM_DISABLE_PREEMPT_IF_NEEDED(); + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before + and done this from the SVMR0ThreadCtxCallback(). */ + if (!pVCpu->hm.s.fLeaveDone) + { + hmR0SvmLeave(pVM, pVCpu, pCtx); + pVCpu->hm.s.fLeaveDone = true; + } + + /* + * !!! IMPORTANT !!! + * If you modify code here, make sure to check whether hmR0SvmCallRing3Callback() needs to be updated too. + */ + /* Deregister hook now that we've left HM context before re-enabling preemption. */ + if (VMMR0ThreadCtxHooksAreRegistered(pVCpu)) + VMMR0ThreadCtxHooksDeregister(pVCpu); + + /* Leave HM context. This takes care of local init (term). */ + int rc = HMR0LeaveCpu(pVCpu); + + HM_RESTORE_PREEMPT_IF_NEEDED(); + return rc; +} + + +/** + * Does the necessary state syncing before doing a longjmp to ring-3. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jmp zone!!! + */ +static int hmR0SvmLongJmpToRing3(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + return hmR0SvmLeaveSession(pVM, pVCpu, pCtx); +} + + +/** + * VMMRZCallRing3() callback wrapper which saves the guest state (or restores + * any remaining host state) before we longjump to ring-3 and possibly get + * preempted. + * + * @param pVCpu Pointer to the VMCPU. + * @param enmOperation The operation causing the ring-3 longjump. + * @param pvUser The user argument (pointer to the possibly + * out-of-date guest-CPU context). + */ +DECLCALLBACK(int) hmR0SvmCallRing3Callback(PVMCPU pVCpu, VMMCALLRING3 enmOperation, void *pvUser) +{ + if (enmOperation == VMMCALLRING3_VM_R0_ASSERTION) + { + /* + * !!! IMPORTANT !!! + * If you modify code here, make sure to check whether hmR0SvmLeave() and hmR0SvmLeaveSession() needs + * to be updated too. This is a stripped down version which gets out ASAP trying to not trigger any assertion. + */ + VMMRZCallRing3RemoveNotification(pVCpu); + VMMRZCallRing3Disable(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + /* Restore host FPU state if necessary and resync on next R0 reentry .*/ + if (CPUMIsGuestFPUStateActive(pVCpu)) + CPUMR0SaveGuestFPU(pVCpu->CTX_SUFF(pVM), pVCpu, (PCPUMCTX)pvUser); + + /* Restore host debug registers if necessary and resync on next R0 reentry. */ + CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, false /* save DR6 */); + + /* Deregister hook now that we've left HM context before re-enabling preemption. */ + if (VMMR0ThreadCtxHooksAreRegistered(pVCpu)) + VMMR0ThreadCtxHooksDeregister(pVCpu); + + /* Leave HM context. This takes care of local init (term). */ + HMR0LeaveCpu(pVCpu); + + HM_RESTORE_PREEMPT_IF_NEEDED(); + return VINF_SUCCESS; + } + + Assert(pVCpu); + Assert(pvUser); + Assert(VMMRZCallRing3IsEnabled(pVCpu)); + HMSVM_ASSERT_PREEMPT_SAFE(); + + VMMRZCallRing3Disable(pVCpu); + Assert(VMMR0IsLogFlushDisabled(pVCpu)); + + Log4(("hmR0SvmCallRing3Callback->hmR0SvmLongJmpToRing3\n")); + int rc = hmR0SvmLongJmpToRing3(pVCpu->CTX_SUFF(pVM), pVCpu, (PCPUMCTX)pvUser); + AssertRCReturn(rc, rc); + + VMMRZCallRing3Enable(pVCpu); + return VINF_SUCCESS; +} + + +/** + * Take necessary actions before going back to ring-3. + * + * An action requires us to go back to ring-3. This function does the necessary + * steps before we can safely return to ring-3. This is not the same as longjmps + * to ring-3, this is voluntary. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * @param rcExit The reason for exiting to ring-3. Can be + * VINF_VMM_UNKNOWN_RING3_CALL. + */ +static void hmR0SvmExitToRing3(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, int rcExit) +{ + Assert(pVM); + Assert(pVCpu); + Assert(pCtx); + HMSVM_ASSERT_PREEMPT_SAFE(); + + /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */ + VMMRZCallRing3Disable(pVCpu); + Log4(("hmR0SvmExitToRing3: rcExit=%d\n", rcExit)); + + /* We need to do this only while truly exiting the "inner loop" back to ring-3 and -not- for any longjmp to ring3. */ + if (pVCpu->hm.s.Event.fPending) + { + hmR0SvmPendingEventToTrpmTrap(pVCpu); + Assert(!pVCpu->hm.s.Event.fPending); + } + + /* Sync. the necessary state for going back to ring-3. */ + hmR0SvmLeaveSession(pVM, pVCpu, pCtx); + STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3); + + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3); + CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR + | CPUM_CHANGED_LDTR + | CPUM_CHANGED_GDTR + | CPUM_CHANGED_IDTR + | CPUM_CHANGED_TR + | CPUM_CHANGED_HIDDEN_SEL_REGS); + if ( pVM->hm.s.fNestedPaging + && CPUMIsGuestPagingEnabledEx(pCtx)) + { + CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH); + } + + /* Make sure we've undo the trap flag if we tried to single step something. */ + if (pVCpu->hm.s.fClearTrapFlag) + { + pCtx->eflags.Bits.u1TF = 0; + pVCpu->hm.s.fClearTrapFlag = false; + } + + /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */ + if (rcExit != VINF_EM_RAW_INTERRUPT) + HMCPU_CF_SET(pVCpu, HM_CHANGED_ALL_GUEST); + + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3); + + /* We do -not- want any longjmp notifications after this! We must return to ring-3 ASAP. */ + VMMRZCallRing3RemoveNotification(pVCpu); + VMMRZCallRing3Enable(pVCpu); +} + + +/** + * Updates the use of TSC offsetting mode for the CPU and adjusts the necessary + * intercepts. + * + * @param pVCpu Pointer to the VMCPU. + * + * @remarks No-long-jump zone!!! + */ +static void hmR0SvmUpdateTscOffsetting(PVMCPU pVCpu) +{ + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + if (TMCpuTickCanUseRealTSC(pVCpu, &pVmcb->ctrl.u64TSCOffset)) + { + uint64_t u64CurTSC = ASMReadTSC(); + if (u64CurTSC + pVmcb->ctrl.u64TSCOffset > TMCpuTickGetLastSeen(pVCpu)) + { + pVmcb->ctrl.u32InterceptCtrl1 &= ~SVM_CTRL1_INTERCEPT_RDTSC; + pVmcb->ctrl.u32InterceptCtrl2 &= ~SVM_CTRL2_INTERCEPT_RDTSCP; + STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset); + } + else + { + pVmcb->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_RDTSC; + pVmcb->ctrl.u32InterceptCtrl2 |= SVM_CTRL2_INTERCEPT_RDTSCP; + STAM_COUNTER_INC(&pVCpu->hm.s.StatTscInterceptOverFlow); + } + } + else + { + pVmcb->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_RDTSC; + pVmcb->ctrl.u32InterceptCtrl2 |= SVM_CTRL2_INTERCEPT_RDTSCP; + STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept); + } + + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS; +} + + +/** + * Sets an event as a pending event to be injected into the guest. + * + * @param pVCpu Pointer to the VMCPU. + * @param pEvent Pointer to the SVM event. + * @param GCPtrFaultAddress The fault-address (CR2) in case it's a + * page-fault. + * + * @remarks Statistics counter assumes this is a guest event being reflected to + * the guest i.e. 'StatInjectPendingReflect' is incremented always. + */ +DECLINLINE(void) hmR0SvmSetPendingEvent(PVMCPU pVCpu, PSVMEVENT pEvent, RTGCUINTPTR GCPtrFaultAddress) +{ + Assert(!pVCpu->hm.s.Event.fPending); + Assert(pEvent->n.u1Valid); + + pVCpu->hm.s.Event.u64IntInfo = pEvent->u; + pVCpu->hm.s.Event.fPending = true; + pVCpu->hm.s.Event.GCPtrFaultAddress = GCPtrFaultAddress; + + Log4(("hmR0SvmSetPendingEvent: u=%#RX64 u8Vector=%#x Type=%#x ErrorCodeValid=%RTbool ErrorCode=%#RX32\n", pEvent->u, + pEvent->n.u8Vector, (uint8_t)pEvent->n.u3Type, !!pEvent->n.u1ErrorCodeValid, pEvent->n.u32ErrorCode)); + + STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingReflect); +} + + +/** + * Injects an event into the guest upon VMRUN by updating the relevant field + * in the VMCB. + * + * @param pVCpu Pointer to the VMCPU. + * @param pVmcb Pointer to the guest VMCB. + * @param pCtx Pointer to the guest-CPU context. + * @param pEvent Pointer to the event. + * + * @remarks No-long-jump zone!!! + * @remarks Requires CR0! + */ +DECLINLINE(void) hmR0SvmInjectEventVmcb(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx, PSVMEVENT pEvent) +{ + pVmcb->ctrl.EventInject.u = pEvent->u; + STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[pEvent->n.u8Vector & MASK_INJECT_IRQ_STAT]); + + Log4(("hmR0SvmInjectEventVmcb: u=%#RX64 u8Vector=%#x Type=%#x ErrorCodeValid=%RTbool ErrorCode=%#RX32\n", pEvent->u, + pEvent->n.u8Vector, (uint8_t)pEvent->n.u3Type, !!pEvent->n.u1ErrorCodeValid, pEvent->n.u32ErrorCode)); +} + + + +/** + * Converts any TRPM trap into a pending HM event. This is typically used when + * entering from ring-3 (not longjmp returns). + * + * @param pVCpu Pointer to the VMCPU. + */ +static void hmR0SvmTrpmTrapToPendingEvent(PVMCPU pVCpu) +{ + Assert(TRPMHasTrap(pVCpu)); + Assert(!pVCpu->hm.s.Event.fPending); + + uint8_t uVector; + TRPMEVENT enmTrpmEvent; + RTGCUINT uErrCode; + RTGCUINTPTR GCPtrFaultAddress; + uint8_t cbInstr; + + int rc = TRPMQueryTrapAll(pVCpu, &uVector, &enmTrpmEvent, &uErrCode, &GCPtrFaultAddress, &cbInstr); + AssertRC(rc); + + SVMEVENT Event; + Event.u = 0; + Event.n.u1Valid = 1; + Event.n.u8Vector = uVector; + + /* Refer AMD spec. 15.20 "Event Injection" for the format. */ + if (enmTrpmEvent == TRPM_TRAP) + { + Event.n.u3Type = SVM_EVENT_EXCEPTION; + switch (uVector) + { + case X86_XCPT_PF: + case X86_XCPT_DF: + case X86_XCPT_TS: + case X86_XCPT_NP: + case X86_XCPT_SS: + case X86_XCPT_GP: + case X86_XCPT_AC: + { + Event.n.u1ErrorCodeValid = 1; + Event.n.u32ErrorCode = uErrCode; + break; + } + } + } + else if (enmTrpmEvent == TRPM_HARDWARE_INT) + { + if (uVector == X86_XCPT_NMI) + Event.n.u3Type = SVM_EVENT_NMI; + else + Event.n.u3Type = SVM_EVENT_EXTERNAL_IRQ; + } + else if (enmTrpmEvent == TRPM_SOFTWARE_INT) + Event.n.u3Type = SVM_EVENT_SOFTWARE_INT; + else + AssertMsgFailed(("Invalid TRPM event type %d\n", enmTrpmEvent)); + + rc = TRPMResetTrap(pVCpu); + AssertRC(rc); + + Log4(("TRPM->HM event: u=%#RX64 u8Vector=%#x uErrorCodeValid=%RTbool uErrorCode=%#RX32\n", Event.u, Event.n.u8Vector, + !!Event.n.u1ErrorCodeValid, Event.n.u32ErrorCode)); + + hmR0SvmSetPendingEvent(pVCpu, &Event, GCPtrFaultAddress); + STAM_COUNTER_DEC(&pVCpu->hm.s.StatInjectPendingReflect); +} + + +/** + * Converts any pending SVM event into a TRPM trap. Typically used when leaving + * AMD-V to execute any instruction. + * + * @param pvCpu Pointer to the VMCPU. + */ +static void hmR0SvmPendingEventToTrpmTrap(PVMCPU pVCpu) +{ + Assert(pVCpu->hm.s.Event.fPending); + Assert(TRPMQueryTrap(pVCpu, NULL /* pu8TrapNo */, NULL /* pEnmType */) == VERR_TRPM_NO_ACTIVE_TRAP); + + SVMEVENT Event; + Event.u = pVCpu->hm.s.Event.u64IntInfo; + + uint8_t uVector = Event.n.u8Vector; + uint8_t uVectorType = Event.n.u3Type; + + TRPMEVENT enmTrapType; + switch (uVectorType) + { + case SVM_EVENT_EXTERNAL_IRQ: + case SVM_EVENT_NMI: + enmTrapType = TRPM_HARDWARE_INT; + break; + case SVM_EVENT_SOFTWARE_INT: + enmTrapType = TRPM_SOFTWARE_INT; + break; + case SVM_EVENT_EXCEPTION: + enmTrapType = TRPM_TRAP; + break; + default: + AssertMsgFailed(("Invalid pending-event type %#x\n", uVectorType)); + enmTrapType = TRPM_32BIT_HACK; + break; + } + + Log4(("HM event->TRPM: uVector=%#x enmTrapType=%d\n", uVector, uVectorType)); + + int rc = TRPMAssertTrap(pVCpu, uVector, enmTrapType); + AssertRC(rc); + + if (Event.n.u1ErrorCodeValid) + TRPMSetErrorCode(pVCpu, Event.n.u32ErrorCode); + + if ( uVectorType == SVM_EVENT_EXCEPTION + && uVector == X86_XCPT_PF) + { + TRPMSetFaultAddress(pVCpu, pVCpu->hm.s.Event.GCPtrFaultAddress); + Assert(pVCpu->hm.s.Event.GCPtrFaultAddress == CPUMGetGuestCR2(pVCpu)); + } + else if (uVectorType == SVM_EVENT_SOFTWARE_INT) + { + AssertMsg( uVectorType == SVM_EVENT_SOFTWARE_INT + || (uVector == X86_XCPT_BP || uVector == X86_XCPT_OF), + ("Invalid vector: uVector=%#x uVectorType=%#x\n", uVector, uVectorType)); + TRPMSetInstrLength(pVCpu, pVCpu->hm.s.Event.cbInstr); + } + pVCpu->hm.s.Event.fPending = false; +} + + +/** + * Gets the guest's interrupt-shadow. + * + * @returns The guest's interrupt-shadow. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + * @remarks Has side-effects with VMCPU_FF_INHIBIT_INTERRUPTS force-flag. + */ +DECLINLINE(uint32_t) hmR0SvmGetGuestIntrShadow(PVMCPU pVCpu, PCPUMCTX pCtx) +{ + /* + * Instructions like STI and MOV SS inhibit interrupts till the next instruction completes. Check if we should + * inhibit interrupts or clear any existing interrupt-inhibition. + */ + uint32_t uIntrState = 0; + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) + { + if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu)) + { + /* + * We can clear the inhibit force flag as even if we go back to the recompiler without executing guest code in + * AMD-V, the flag's condition to be cleared is met and thus the cleared state is correct. + */ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); + } + else + uIntrState = SVM_INTERRUPT_SHADOW_ACTIVE; + } + return uIntrState; +} + + +/** + * Sets the virtual interrupt intercept control in the VMCB which + * instructs AMD-V to cause a #VMEXIT as soon as the guest is in a state to + * receive interrupts. + * + * @param pVmcb Pointer to the VMCB. + */ +DECLINLINE(void) hmR0SvmSetVirtIntrIntercept(PSVMVMCB pVmcb) +{ + if (!(pVmcb->ctrl.u32InterceptCtrl1 & SVM_CTRL1_INTERCEPT_VINTR)) + { + pVmcb->ctrl.IntCtrl.n.u1VIrqValid = 1; /* A virtual interrupt is pending. */ + pVmcb->ctrl.IntCtrl.n.u8VIrqVector = 0; /* Not necessary as we #VMEXIT for delivering the interrupt. */ + pVmcb->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_VINTR; + pVmcb->ctrl.u64VmcbCleanBits &= ~(HMSVM_VMCB_CLEAN_INTERCEPTS | HMSVM_VMCB_CLEAN_TPR); + + Log4(("Setting VINTR intercept\n")); + } +} + + +/** + * Evaluates the event to be delivered to the guest and sets it as the pending + * event. + * + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + */ +static void hmR0SvmEvaluatePendingEvent(PVMCPU pVCpu, PCPUMCTX pCtx) +{ + Assert(!pVCpu->hm.s.Event.fPending); + Log4Func(("\n")); + + const bool fIntShadow = !!hmR0SvmGetGuestIntrShadow(pVCpu, pCtx); + const bool fBlockInt = !(pCtx->eflags.u32 & X86_EFL_IF); + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + + SVMEVENT Event; + Event.u = 0; + /** @todo SMI. SMIs take priority over NMIs. */ + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INTERRUPT_NMI)) /* NMI. NMIs take priority over regular interrupts . */ + { + if (!fIntShadow) + { + Log4(("Pending NMI\n")); + + Event.n.u1Valid = 1; + Event.n.u8Vector = X86_XCPT_NMI; + Event.n.u3Type = SVM_EVENT_NMI; + + hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI); + } + else + hmR0SvmSetVirtIntrIntercept(pVmcb); + } + else if (VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))) + { + /* + * Check if the guest can receive external interrupts (PIC/APIC). Once we do PDMGetInterrupt() we -must- deliver + * the interrupt ASAP. We must not execute any guest code until we inject the interrupt which is why it is + * evaluated here and not set as pending, solely based on the force-flags. + */ + if ( !fBlockInt + && !fIntShadow) + { + uint8_t u8Interrupt; + int rc = PDMGetInterrupt(pVCpu, &u8Interrupt); + if (RT_SUCCESS(rc)) + { + Log4(("Injecting external interrupt u8Interrupt=%#x\n", u8Interrupt)); + + Event.n.u1Valid = 1; + Event.n.u8Vector = u8Interrupt; + Event.n.u3Type = SVM_EVENT_EXTERNAL_IRQ; + + hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */); + } + else + { + /** @todo Does this actually happen? If not turn it into an assertion. */ + Assert(!VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))); + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq); + } + } + else + hmR0SvmSetVirtIntrIntercept(pVmcb); + } +} + + +/** + * Injects any pending events into the guest if the guest is in a state to + * receive them. + * + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + */ +static void hmR0SvmInjectPendingEvent(PVMCPU pVCpu, PCPUMCTX pCtx) +{ + Assert(!TRPMHasTrap(pVCpu)); + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + Log4Func(("\n")); + + const bool fIntShadow = !!hmR0SvmGetGuestIntrShadow(pVCpu, pCtx); + const bool fBlockInt = !(pCtx->eflags.u32 & X86_EFL_IF); + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + + if (pVCpu->hm.s.Event.fPending) /* First, inject any pending HM events. */ + { + SVMEVENT Event; + Event.u = pVCpu->hm.s.Event.u64IntInfo; + Assert(Event.n.u1Valid); +#ifdef VBOX_STRICT + if (Event.n.u3Type == SVM_EVENT_EXTERNAL_IRQ) + { + Assert(!fBlockInt); + Assert(!fIntShadow); + } + else if (Event.n.u3Type == SVM_EVENT_NMI) + Assert(!fIntShadow); +#endif + + Log4(("Injecting pending HM event.\n")); + hmR0SvmInjectEventVmcb(pVCpu, pVmcb, pCtx, &Event); + pVCpu->hm.s.Event.fPending = false; + +#ifdef VBOX_WITH_STATISTICS + if (Event.n.u3Type == SVM_EVENT_EXTERNAL_IRQ) + STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterrupt); + else + STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectXcpt); +#endif + } + + /* Update the guest interrupt shadow in the VMCB. */ + pVmcb->ctrl.u64IntShadow = !!fIntShadow; +} + + +/** + * Reports world-switch error and dumps some useful debug info. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param rcVMRun The return code from VMRUN (or + * VERR_SVM_INVALID_GUEST_STATE for invalid + * guest-state). + * @param pCtx Pointer to the guest-CPU context. + */ +static void hmR0SvmReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rcVMRun, PCPUMCTX pCtx) +{ + HMSVM_ASSERT_PREEMPT_SAFE(); + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + + if (rcVMRun == VERR_SVM_INVALID_GUEST_STATE) + { + HMDumpRegs(pVM, pVCpu, pCtx); +#ifdef VBOX_STRICT + Log4(("ctrl.u64VmcbCleanBits %#RX64\n", pVmcb->ctrl.u64VmcbCleanBits)); + Log4(("ctrl.u16InterceptRdCRx %#x\n", pVmcb->ctrl.u16InterceptRdCRx)); + Log4(("ctrl.u16InterceptWrCRx %#x\n", pVmcb->ctrl.u16InterceptWrCRx)); + Log4(("ctrl.u16InterceptRdDRx %#x\n", pVmcb->ctrl.u16InterceptRdDRx)); + Log4(("ctrl.u16InterceptWrDRx %#x\n", pVmcb->ctrl.u16InterceptWrDRx)); + Log4(("ctrl.u32InterceptException %#x\n", pVmcb->ctrl.u32InterceptException)); + Log4(("ctrl.u32InterceptCtrl1 %#x\n", pVmcb->ctrl.u32InterceptCtrl1)); + Log4(("ctrl.u32InterceptCtrl2 %#x\n", pVmcb->ctrl.u32InterceptCtrl2)); + Log4(("ctrl.u64IOPMPhysAddr %#RX64\n", pVmcb->ctrl.u64IOPMPhysAddr)); + Log4(("ctrl.u64MSRPMPhysAddr %#RX64\n", pVmcb->ctrl.u64MSRPMPhysAddr)); + Log4(("ctrl.u64TSCOffset %#RX64\n", pVmcb->ctrl.u64TSCOffset)); + + Log4(("ctrl.TLBCtrl.u32ASID %#x\n", pVmcb->ctrl.TLBCtrl.n.u32ASID)); + Log4(("ctrl.TLBCtrl.u8TLBFlush %#x\n", pVmcb->ctrl.TLBCtrl.n.u8TLBFlush)); + Log4(("ctrl.TLBCtrl.u24Reserved %#x\n", pVmcb->ctrl.TLBCtrl.n.u24Reserved)); + + Log4(("ctrl.IntCtrl.u8VTPR %#x\n", pVmcb->ctrl.IntCtrl.n.u8VTPR)); + Log4(("ctrl.IntCtrl.u1VIrqValid %#x\n", pVmcb->ctrl.IntCtrl.n.u1VIrqValid)); + Log4(("ctrl.IntCtrl.u7Reserved %#x\n", pVmcb->ctrl.IntCtrl.n.u7Reserved)); + Log4(("ctrl.IntCtrl.u4VIrqPriority %#x\n", pVmcb->ctrl.IntCtrl.n.u4VIrqPriority)); + Log4(("ctrl.IntCtrl.u1IgnoreTPR %#x\n", pVmcb->ctrl.IntCtrl.n.u1IgnoreTPR)); + Log4(("ctrl.IntCtrl.u3Reserved %#x\n", pVmcb->ctrl.IntCtrl.n.u3Reserved)); + Log4(("ctrl.IntCtrl.u1VIrqMasking %#x\n", pVmcb->ctrl.IntCtrl.n.u1VIrqMasking)); + Log4(("ctrl.IntCtrl.u6Reserved %#x\n", pVmcb->ctrl.IntCtrl.n.u6Reserved)); + Log4(("ctrl.IntCtrl.u8VIrqVector %#x\n", pVmcb->ctrl.IntCtrl.n.u8VIrqVector)); + Log4(("ctrl.IntCtrl.u24Reserved %#x\n", pVmcb->ctrl.IntCtrl.n.u24Reserved)); + + Log4(("ctrl.u64IntShadow %#RX64\n", pVmcb->ctrl.u64IntShadow)); + Log4(("ctrl.u64ExitCode %#RX64\n", pVmcb->ctrl.u64ExitCode)); + Log4(("ctrl.u64ExitInfo1 %#RX64\n", pVmcb->ctrl.u64ExitInfo1)); + Log4(("ctrl.u64ExitInfo2 %#RX64\n", pVmcb->ctrl.u64ExitInfo2)); + Log4(("ctrl.ExitIntInfo.u8Vector %#x\n", pVmcb->ctrl.ExitIntInfo.n.u8Vector)); + Log4(("ctrl.ExitIntInfo.u3Type %#x\n", pVmcb->ctrl.ExitIntInfo.n.u3Type)); + Log4(("ctrl.ExitIntInfo.u1ErrorCodeValid %#x\n", pVmcb->ctrl.ExitIntInfo.n.u1ErrorCodeValid)); + Log4(("ctrl.ExitIntInfo.u19Reserved %#x\n", pVmcb->ctrl.ExitIntInfo.n.u19Reserved)); + Log4(("ctrl.ExitIntInfo.u1Valid %#x\n", pVmcb->ctrl.ExitIntInfo.n.u1Valid)); + Log4(("ctrl.ExitIntInfo.u32ErrorCode %#x\n", pVmcb->ctrl.ExitIntInfo.n.u32ErrorCode)); + Log4(("ctrl.NestedPaging %#RX64\n", pVmcb->ctrl.NestedPaging.u)); + Log4(("ctrl.EventInject.u8Vector %#x\n", pVmcb->ctrl.EventInject.n.u8Vector)); + Log4(("ctrl.EventInject.u3Type %#x\n", pVmcb->ctrl.EventInject.n.u3Type)); + Log4(("ctrl.EventInject.u1ErrorCodeValid %#x\n", pVmcb->ctrl.EventInject.n.u1ErrorCodeValid)); + Log4(("ctrl.EventInject.u19Reserved %#x\n", pVmcb->ctrl.EventInject.n.u19Reserved)); + Log4(("ctrl.EventInject.u1Valid %#x\n", pVmcb->ctrl.EventInject.n.u1Valid)); + Log4(("ctrl.EventInject.u32ErrorCode %#x\n", pVmcb->ctrl.EventInject.n.u32ErrorCode)); + + Log4(("ctrl.u64NestedPagingCR3 %#RX64\n", pVmcb->ctrl.u64NestedPagingCR3)); + Log4(("ctrl.u64LBRVirt %#RX64\n", pVmcb->ctrl.u64LBRVirt)); + + Log4(("guest.CS.u16Sel %RTsel\n", pVmcb->guest.CS.u16Sel)); + Log4(("guest.CS.u16Attr %#x\n", pVmcb->guest.CS.u16Attr)); + Log4(("guest.CS.u32Limit %#RX32\n", pVmcb->guest.CS.u32Limit)); + Log4(("guest.CS.u64Base %#RX64\n", pVmcb->guest.CS.u64Base)); + Log4(("guest.DS.u16Sel %#RTsel\n", pVmcb->guest.DS.u16Sel)); + Log4(("guest.DS.u16Attr %#x\n", pVmcb->guest.DS.u16Attr)); + Log4(("guest.DS.u32Limit %#RX32\n", pVmcb->guest.DS.u32Limit)); + Log4(("guest.DS.u64Base %#RX64\n", pVmcb->guest.DS.u64Base)); + Log4(("guest.ES.u16Sel %RTsel\n", pVmcb->guest.ES.u16Sel)); + Log4(("guest.ES.u16Attr %#x\n", pVmcb->guest.ES.u16Attr)); + Log4(("guest.ES.u32Limit %#RX32\n", pVmcb->guest.ES.u32Limit)); + Log4(("guest.ES.u64Base %#RX64\n", pVmcb->guest.ES.u64Base)); + Log4(("guest.FS.u16Sel %RTsel\n", pVmcb->guest.FS.u16Sel)); + Log4(("guest.FS.u16Attr %#x\n", pVmcb->guest.FS.u16Attr)); + Log4(("guest.FS.u32Limit %#RX32\n", pVmcb->guest.FS.u32Limit)); + Log4(("guest.FS.u64Base %#RX64\n", pVmcb->guest.FS.u64Base)); + Log4(("guest.GS.u16Sel %RTsel\n", pVmcb->guest.GS.u16Sel)); + Log4(("guest.GS.u16Attr %#x\n", pVmcb->guest.GS.u16Attr)); + Log4(("guest.GS.u32Limit %#RX32\n", pVmcb->guest.GS.u32Limit)); + Log4(("guest.GS.u64Base %#RX64\n", pVmcb->guest.GS.u64Base)); + + Log4(("guest.GDTR.u32Limit %#RX32\n", pVmcb->guest.GDTR.u32Limit)); + Log4(("guest.GDTR.u64Base %#RX64\n", pVmcb->guest.GDTR.u64Base)); + + Log4(("guest.LDTR.u16Sel %RTsel\n", pVmcb->guest.LDTR.u16Sel)); + Log4(("guest.LDTR.u16Attr %#x\n", pVmcb->guest.LDTR.u16Attr)); + Log4(("guest.LDTR.u32Limit %#RX32\n", pVmcb->guest.LDTR.u32Limit)); + Log4(("guest.LDTR.u64Base %#RX64\n", pVmcb->guest.LDTR.u64Base)); + + Log4(("guest.IDTR.u32Limit %#RX32\n", pVmcb->guest.IDTR.u32Limit)); + Log4(("guest.IDTR.u64Base %#RX64\n", pVmcb->guest.IDTR.u64Base)); + + Log4(("guest.TR.u16Sel %RTsel\n", pVmcb->guest.TR.u16Sel)); + Log4(("guest.TR.u16Attr %#x\n", pVmcb->guest.TR.u16Attr)); + Log4(("guest.TR.u32Limit %#RX32\n", pVmcb->guest.TR.u32Limit)); + Log4(("guest.TR.u64Base %#RX64\n", pVmcb->guest.TR.u64Base)); + + Log4(("guest.u8CPL %#x\n", pVmcb->guest.u8CPL)); + Log4(("guest.u64CR0 %#RX64\n", pVmcb->guest.u64CR0)); + Log4(("guest.u64CR2 %#RX64\n", pVmcb->guest.u64CR2)); + Log4(("guest.u64CR3 %#RX64\n", pVmcb->guest.u64CR3)); + Log4(("guest.u64CR4 %#RX64\n", pVmcb->guest.u64CR4)); + Log4(("guest.u64DR6 %#RX64\n", pVmcb->guest.u64DR6)); + Log4(("guest.u64DR7 %#RX64\n", pVmcb->guest.u64DR7)); + + Log4(("guest.u64RIP %#RX64\n", pVmcb->guest.u64RIP)); + Log4(("guest.u64RSP %#RX64\n", pVmcb->guest.u64RSP)); + Log4(("guest.u64RAX %#RX64\n", pVmcb->guest.u64RAX)); + Log4(("guest.u64RFlags %#RX64\n", pVmcb->guest.u64RFlags)); + + Log4(("guest.u64SysEnterCS %#RX64\n", pVmcb->guest.u64SysEnterCS)); + Log4(("guest.u64SysEnterEIP %#RX64\n", pVmcb->guest.u64SysEnterEIP)); + Log4(("guest.u64SysEnterESP %#RX64\n", pVmcb->guest.u64SysEnterESP)); + + Log4(("guest.u64EFER %#RX64\n", pVmcb->guest.u64EFER)); + Log4(("guest.u64STAR %#RX64\n", pVmcb->guest.u64STAR)); + Log4(("guest.u64LSTAR %#RX64\n", pVmcb->guest.u64LSTAR)); + Log4(("guest.u64CSTAR %#RX64\n", pVmcb->guest.u64CSTAR)); + Log4(("guest.u64SFMASK %#RX64\n", pVmcb->guest.u64SFMASK)); + Log4(("guest.u64KernelGSBase %#RX64\n", pVmcb->guest.u64KernelGSBase)); + Log4(("guest.u64GPAT %#RX64\n", pVmcb->guest.u64GPAT)); + Log4(("guest.u64DBGCTL %#RX64\n", pVmcb->guest.u64DBGCTL)); + Log4(("guest.u64BR_FROM %#RX64\n", pVmcb->guest.u64BR_FROM)); + Log4(("guest.u64BR_TO %#RX64\n", pVmcb->guest.u64BR_TO)); + Log4(("guest.u64LASTEXCPFROM %#RX64\n", pVmcb->guest.u64LASTEXCPFROM)); + Log4(("guest.u64LASTEXCPTO %#RX64\n", pVmcb->guest.u64LASTEXCPTO)); +#endif + } + else + Log4(("hmR0SvmReportWorldSwitchError: rcVMRun=%d\n", rcVMRun)); +} + + +/** + * Check per-VM and per-VCPU force flag actions that require us to go back to + * ring-3 for one reason or another. + * + * @returns VBox status code (information status code included). + * @retval VINF_SUCCESS if we don't have any actions that require going back to + * ring-3. + * @retval VINF_PGM_SYNC_CR3 if we have pending PGM CR3 sync. + * @retval VINF_EM_PENDING_REQUEST if we have pending requests (like hardware + * interrupts) + * @retval VINF_PGM_POOL_FLUSH_PENDING if PGM is doing a pool flush and requires + * all EMTs to be in ring-3. + * @retval VINF_EM_RAW_TO_R3 if there is pending DMA requests. + * @retval VINF_EM_NO_MEMORY PGM is out of memory, we need to return + * to the EM loop. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + */ +static int hmR0SvmCheckForceFlags(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + Assert(VMMRZCallRing3IsEnabled(pVCpu)); + + /* On AMD-V we don't need to update CR3, PAE PDPES lazily. See hmR0SvmSaveGuestState(). */ + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3)); + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)); + + if ( VM_FF_IS_PENDING(pVM, !pVCpu->hm.s.fSingleInstruction + ? VM_FF_HP_R0_PRE_HM_MASK : VM_FF_HP_R0_PRE_HM_STEP_MASK) + || VMCPU_FF_IS_PENDING(pVCpu, !pVCpu->hm.s.fSingleInstruction + ? VMCPU_FF_HP_R0_PRE_HM_MASK : VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) ) + { + /* Pending PGM C3 sync. */ + if (VMCPU_FF_IS_PENDING(pVCpu,VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)) + { + int rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3)); + if (rc != VINF_SUCCESS) + { + Log4(("hmR0SvmCheckForceFlags: PGMSyncCR3 forcing us back to ring-3. rc=%d\n", rc)); + return rc; + } + } + + /* Pending HM-to-R3 operations (critsects, timers, EMT rendezvous etc.) */ + /* -XXX- what was that about single stepping? */ + if ( VM_FF_IS_PENDING(pVM, VM_FF_HM_TO_R3_MASK) + || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK)) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF); + int rc = RT_UNLIKELY(VM_FF_IS_PENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3; + Log4(("hmR0SvmCheckForceFlags: HM_TO_R3 forcing us back to ring-3. rc=%d\n", rc)); + return rc; + } + + /* Pending VM request packets, such as hardware interrupts. */ + if ( VM_FF_IS_PENDING(pVM, VM_FF_REQUEST) + || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_REQUEST)) + { + Log4(("hmR0SvmCheckForceFlags: Pending VM request forcing us back to ring-3\n")); + return VINF_EM_PENDING_REQUEST; + } + + /* Pending PGM pool flushes. */ + if (VM_FF_IS_PENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING)) + { + Log4(("hmR0SvmCheckForceFlags: PGM pool flush pending forcing us back to ring-3\n")); + return VINF_PGM_POOL_FLUSH_PENDING; + } + + /* Pending DMA requests. */ + if (VM_FF_IS_PENDING(pVM, VM_FF_PDM_DMA)) + { + Log4(("hmR0SvmCheckForceFlags: Pending DMA request forcing us back to ring-3\n")); + return VINF_EM_RAW_TO_R3; + } + } + + return VINF_SUCCESS; +} + + +/** + * Does the preparations before executing guest code in AMD-V. + * + * This may cause longjmps to ring-3 and may even result in rescheduling to the + * recompiler. We must be cautious what we do here regarding committing + * guest-state information into the the VMCB assuming we assuredly execute the + * guest in AMD-V. If we fall back to the recompiler after updating the VMCB and + * clearing the common-state (TRPM/forceflags), we must undo those changes so + * that the recompiler can (and should) use them when it resumes guest + * execution. Otherwise such operations must be done when we can no longer + * exit to ring-3. + * + * @returns VBox status code (informational status codes included). + * @retval VINF_SUCCESS if we can proceed with running the guest. + * @retval VINF_* scheduling changes, we have to go back to ring-3. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * @param pSvmTransient Pointer to the SVM transient structure. + */ +static int hmR0SvmPreRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_ASSERT_PREEMPT_SAFE(); + + /* Check force flag actions that might require us to go back to ring-3. */ + int rc = hmR0SvmCheckForceFlags(pVM, pVCpu, pCtx); + if (rc != VINF_SUCCESS) + return rc; + + if (TRPMHasTrap(pVCpu)) + hmR0SvmTrpmTrapToPendingEvent(pVCpu); + else if (!pVCpu->hm.s.Event.fPending) + hmR0SvmEvaluatePendingEvent(pVCpu, pCtx); + +#ifdef HMSVM_SYNC_FULL_GUEST_STATE + HMCPU_CF_SET(pVCpu, HM_CHANGED_ALL_GUEST); +#endif + + /* Load the guest bits that are not shared with the host in any way since we can longjmp or get preempted. */ + rc = hmR0SvmLoadGuestState(pVM, pVCpu, pCtx); + AssertRCReturn(rc, rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadFull); + + /* + * If we're not intercepting TPR changes in the guest, save the guest TPR before the world-switch + * so we can update it on the way back if the guest changed the TPR. + */ + if (pVCpu->hm.s.svm.fSyncVTpr) + { + if (pVM->hm.s.fTPRPatchingActive) + pSvmTransient->u8GuestTpr = pCtx->msrLSTAR; + else + { + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + pSvmTransient->u8GuestTpr = pVmcb->ctrl.IntCtrl.n.u8VTPR; + } + } + + /* + * No longjmps to ring-3 from this point on!!! + * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic. + * This also disables flushing of the R0-logger instance (if any). + */ + VMMRZCallRing3Disable(pVCpu); + + /* + * We disable interrupts so that we don't miss any interrupts that would flag preemption (IPI/timers etc.) + * when thread-context hooks aren't used and we've been running with preemption disabled for a while. + * + * We need to check for force-flags that could've possible been altered since we last checked them (e.g. + * by PDMGetInterrupt() leaving the PDM critical section, see @bugref{6398}). + * + * We also check a couple of other force-flags as a last opportunity to get the EMT back to ring-3 before + * executing guest code. + */ + pSvmTransient->uEflags = ASMIntDisableFlags(); + if ( VM_FF_IS_PENDING(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC) + || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK)) + { + ASMSetFlags(pSvmTransient->uEflags); + VMMRZCallRing3Enable(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF); + return VINF_EM_RAW_TO_R3; + } + if (RTThreadPreemptIsPending(NIL_RTTHREAD)) + { + ASMSetFlags(pSvmTransient->uEflags); + VMMRZCallRing3Enable(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatPendingHostIrq); + return VINF_EM_RAW_INTERRUPT; + } + + return VINF_SUCCESS; +} + + +/** + * Prepares to run guest code in AMD-V and we've committed to doing so. This + * means there is no backing out to ring-3 or anywhere else at this + * point. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * @param pSvmTransient Pointer to the SVM transient structure. + * + * @remarks Called with preemption disabled. + * @remarks No-long-jump zone!!! + */ +static void hmR0SvmPreRunGuestCommitted(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + Assert(VMMR0IsLogFlushDisabled(pVCpu)); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM); + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); /* Indicate the start of guest execution. */ + + hmR0SvmInjectPendingEvent(pVCpu, pCtx); + + /* Load the state shared between host and guest (FPU, debug). */ + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_GUEST_SHARED_STATE)) + hmR0SvmLoadSharedState(pVCpu, pVmcb, pCtx); + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_HOST_CONTEXT); /* Preemption might set this, nothing to do on AMD-V. */ + AssertMsg(!HMCPU_CF_VALUE(pVCpu), ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu))); + + /* Setup TSC offsetting. */ + if ( pSvmTransient->fUpdateTscOffsetting + || HMR0GetCurrentCpu()->idCpu != pVCpu->hm.s.idLastCpu) + { + hmR0SvmUpdateTscOffsetting(pVCpu); + pSvmTransient->fUpdateTscOffsetting = false; + } + + /* If we've migrating CPUs, mark the VMCB Clean bits as dirty. */ + if (HMR0GetCurrentCpu()->idCpu != pVCpu->hm.s.idLastCpu) + pVmcb->ctrl.u64VmcbCleanBits = 0; + + /* Store status of the shared guest-host state at the time of VMRUN. */ +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (CPUMIsGuestInLongModeEx(pCtx)) + { + pSvmTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActivePending(pVCpu); + pSvmTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActivePending(pVCpu); + } + else +#endif + { + pSvmTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu); + pSvmTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu); + } + pSvmTransient->fWasGuestFPUStateActive = CPUMIsGuestFPUStateActive(pVCpu); + + /* Flush the appropriate tagged-TLB entries. */ + ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB-shootdowns, set this across the world switch. */ + hmR0SvmFlushTaggedTlb(pVCpu); + Assert(HMR0GetCurrentCpu()->idCpu == pVCpu->hm.s.idLastCpu); + + STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x); + + TMNotifyStartOfExecution(pVCpu); /* Finally, notify TM to resume its clocks as we're about + to start executing. */ + + /* + * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that + * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}. + * + * This should be done -after- any RDTSCPs for obtaining the host timestamp (TM, STAM etc). + */ + if ( (pVM->hm.s.cpuid.u32AMDFeatureEDX & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) + && !(pVmcb->ctrl.u32InterceptCtrl2 & SVM_CTRL2_INTERCEPT_RDTSCP)) + { + /** @todo r=bird: I cannot find any place where the guest TSC_AUX value is + * saved. */ + hmR0SvmSetMsrPermission(pVCpu, MSR_K8_TSC_AUX, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE); + pVCpu->hm.s.u64HostTscAux = ASMRdMsr(MSR_K8_TSC_AUX); + uint64_t u64GuestTscAux = CPUMR0GetGuestTscAux(pVCpu); + if (u64GuestTscAux != pVCpu->hm.s.u64HostTscAux) + ASMWrMsr(MSR_K8_TSC_AUX, u64GuestTscAux); + pSvmTransient->fRestoreTscAuxMsr = true; + } + else + { + hmR0SvmSetMsrPermission(pVCpu, MSR_K8_TSC_AUX, SVMMSREXIT_INTERCEPT_READ, SVMMSREXIT_INTERCEPT_WRITE); + pSvmTransient->fRestoreTscAuxMsr = false; + } + + /* If VMCB Clean bits isn't supported by the CPU, simply mark all state-bits as dirty, indicating (re)load-from-VMCB. */ + if (!(pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_VMCB_CLEAN)) + pVmcb->ctrl.u64VmcbCleanBits = 0; +} + + +/** + * Wrapper for running the guest code in AMD-V. + * + * @returns VBox strict status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0SvmRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + /* + * 64-bit Windows uses XMM registers in the kernel as the Microsoft compiler expresses floating-point operations + * using SSE instructions. Some XMM registers (XMM6-XMM15) are callee-saved and thus the need for this XMM wrapper. + * Refer MSDN docs. "Configuring Programs for 64-bit / x64 Software Conventions / Register Usage" for details. + */ +#ifdef VBOX_WITH_KERNEL_USING_XMM + return HMR0SVMRunWrapXMM(pVCpu->hm.s.svm.HCPhysVmcbHost, pVCpu->hm.s.svm.HCPhysVmcb, pCtx, pVM, pVCpu, + pVCpu->hm.s.svm.pfnVMRun); +#else + return pVCpu->hm.s.svm.pfnVMRun(pVCpu->hm.s.svm.HCPhysVmcbHost, pVCpu->hm.s.svm.HCPhysVmcb, pCtx, pVM, pVCpu); +#endif +} + + +/** + * Performs some essential restoration of state after running guest code in + * AMD-V. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * @param pSvmTransient Pointer to the SVM transient structure. + * @param rcVMRun Return code of VMRUN. + * + * @remarks Called with interrupts disabled. + * @remarks No-long-jump zone!!! This function will however re-enable longjmps + * unconditionally when it is safe to do so. + */ +static void hmR0SvmPostRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PSVMTRANSIENT pSvmTransient, int rcVMRun) +{ + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + + ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB-shootdowns. */ + ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for TLB-shootdowns. */ + + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + pVmcb->ctrl.u64VmcbCleanBits = HMSVM_VMCB_CLEAN_ALL; /* Mark the VMCB-state cache as unmodified by VMM. */ + + if (pSvmTransient->fRestoreTscAuxMsr) + { + uint64_t u64GuestTscAux = ASMRdMsr(MSR_K8_TSC_AUX); + CPUMR0SetGuestTscAux(pVCpu, u64GuestTscAux); + if (u64GuestTscAux != pVCpu->hm.s.u64HostTscAux) + ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hm.s.u64HostTscAux); + } + + if (!(pVmcb->ctrl.u32InterceptCtrl1 & SVM_CTRL1_INTERCEPT_RDTSC)) + { + /** @todo Find a way to fix hardcoding a guestimate. */ + TMCpuTickSetLastSeen(pVCpu, ASMReadTSC() + pVmcb->ctrl.u64TSCOffset - 0x400); + } + + STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatExit1, x); + TMNotifyEndOfExecution(pVCpu); /* Notify TM that the guest is no longer running. */ + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM); + + Assert(!(ASMGetFlags() & X86_EFL_IF)); + ASMSetFlags(pSvmTransient->uEflags); /* Enable interrupts. */ + VMMRZCallRing3Enable(pVCpu); /* It is now safe to do longjmps to ring-3!!! */ + + /* If VMRUN failed, we can bail out early. This does -not- cover SVM_EXIT_INVALID. */ + if (RT_UNLIKELY(rcVMRun != VINF_SUCCESS)) + { + Log4(("VMRUN failure: rcVMRun=%Rrc\n", rcVMRun)); + return; + } + + pSvmTransient->u64ExitCode = pVmcb->ctrl.u64ExitCode; /* Save the #VMEXIT reason. */ + pSvmTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */ + hmR0SvmSaveGuestState(pVCpu, pMixedCtx); /* Save the guest state from the VMCB to the guest-CPU context. */ + + if (RT_LIKELY(pSvmTransient->u64ExitCode != (uint64_t)SVM_EXIT_INVALID)) + { + if (pVCpu->hm.s.svm.fSyncVTpr) + { + /* TPR patching (for 32-bit guests) uses LSTAR MSR for holding the TPR value, otherwise uses the VTPR. */ + if ( pVM->hm.s.fTPRPatchingActive + && (pMixedCtx->msrLSTAR & 0xff) != pSvmTransient->u8GuestTpr) + { + int rc = PDMApicSetTPR(pVCpu, pMixedCtx->msrLSTAR & 0xff); + AssertRC(rc); + HMCPU_CF_SET(pVCpu, HM_CHANGED_SVM_GUEST_APIC_STATE); + } + else if (pSvmTransient->u8GuestTpr != pVmcb->ctrl.IntCtrl.n.u8VTPR) + { + int rc = PDMApicSetTPR(pVCpu, pVmcb->ctrl.IntCtrl.n.u8VTPR << 4); + AssertRC(rc); + HMCPU_CF_SET(pVCpu, HM_CHANGED_SVM_GUEST_APIC_STATE); + } + } + } +} + + +/** + * Runs the guest code using AMD-V. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + */ +VMMR0DECL(int) SVMR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + Assert(VMMRZCallRing3IsEnabled(pVCpu)); + HMSVM_ASSERT_PREEMPT_SAFE(); + VMMRZCallRing3SetNotification(pVCpu, hmR0SvmCallRing3Callback, pCtx); + + SVMTRANSIENT SvmTransient; + SvmTransient.fUpdateTscOffsetting = true; + uint32_t cLoops = 0; + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + int rc = VERR_INTERNAL_ERROR_5; + + for (;; cLoops++) + { + Assert(!HMR0SuspendPending()); + HMSVM_ASSERT_CPU_SAFE(); + + /* Preparatory work for running guest code, this may force us to return + to ring-3. This bugger disables interrupts on VINF_SUCCESS! */ + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x); + rc = hmR0SvmPreRunGuest(pVM, pVCpu, pCtx, &SvmTransient); + if (rc != VINF_SUCCESS) + break; + + hmR0SvmPreRunGuestCommitted(pVM, pVCpu, pCtx, &SvmTransient); + rc = hmR0SvmRunGuest(pVM, pVCpu, pCtx); + + /* Restore any residual host-state and save any bits shared between host + and guest into the guest-CPU state. Re-enables interrupts! */ + hmR0SvmPostRunGuest(pVM, pVCpu, pCtx, &SvmTransient, rc); + + if (RT_UNLIKELY( rc != VINF_SUCCESS /* Check for VMRUN errors. */ + || SvmTransient.u64ExitCode == (uint64_t)SVM_EXIT_INVALID)) /* Check for invalid guest-state errors. */ + { + if (rc == VINF_SUCCESS) + rc = VERR_SVM_INVALID_GUEST_STATE; + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x); + hmR0SvmReportWorldSwitchError(pVM, pVCpu, rc, pCtx); + break; + } + + /* Handle the #VMEXIT. */ + HMSVM_EXITCODE_STAM_COUNTER_INC(SvmTransient.u64ExitCode); + STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x); + rc = hmR0SvmHandleExit(pVCpu, pCtx, &SvmTransient); + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x); + if (rc != VINF_SUCCESS) + break; + else if (cLoops > pVM->hm.s.cMaxResumeLoops) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume); + rc = VINF_EM_RAW_INTERRUPT; + break; + } + } + + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x); + if (rc == VERR_EM_INTERPRETER) + rc = VINF_EM_RAW_EMULATE_INSTR; + else if (rc == VINF_EM_RESET) + rc = VINF_EM_TRIPLE_FAULT; + + /* Prepare to return to ring-3. This will remove longjmp notifications. */ + hmR0SvmExitToRing3(pVM, pVCpu, pCtx, rc); + Assert(!VMMRZCallRing3IsNotificationSet(pVCpu)); + return rc; +} + + +/** + * Handles a #VMEXIT (for all EXITCODE values except SVM_EXIT_INVALID). + * + * @returns VBox status code (informational status codes included). + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * @param pSvmTransient Pointer to the SVM transient structure. + */ +DECLINLINE(int) hmR0SvmHandleExit(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + Assert(pSvmTransient->u64ExitCode != (uint64_t)SVM_EXIT_INVALID); + Assert(pSvmTransient->u64ExitCode <= SVM_EXIT_MAX); + + /* + * The ordering of the case labels is based on most-frequently-occurring VM-exits for most guests under + * normal workloads (for some definition of "normal"). + */ + uint32_t u32ExitCode = pSvmTransient->u64ExitCode; + switch (pSvmTransient->u64ExitCode) + { + case SVM_EXIT_NPF: + return hmR0SvmExitNestedPF(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_IOIO: + return hmR0SvmExitIOInstr(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_RDTSC: + return hmR0SvmExitRdtsc(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_RDTSCP: + return hmR0SvmExitRdtscp(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_CPUID: + return hmR0SvmExitCpuid(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_EXCEPTION_E: /* X86_XCPT_PF */ + return hmR0SvmExitXcptPF(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_EXCEPTION_7: /* X86_XCPT_NM */ + return hmR0SvmExitXcptNM(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_EXCEPTION_10: /* X86_XCPT_MF */ + return hmR0SvmExitXcptMF(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_EXCEPTION_1: /* X86_XCPT_DB */ + return hmR0SvmExitXcptDB(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_MONITOR: + return hmR0SvmExitMonitor(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_MWAIT: + return hmR0SvmExitMwait(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_HLT: + return hmR0SvmExitHlt(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_READ_CR0: + case SVM_EXIT_READ_CR3: + case SVM_EXIT_READ_CR4: + return hmR0SvmExitReadCRx(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_WRITE_CR0: + case SVM_EXIT_WRITE_CR3: + case SVM_EXIT_WRITE_CR4: + case SVM_EXIT_WRITE_CR8: + return hmR0SvmExitWriteCRx(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_VINTR: + return hmR0SvmExitVIntr(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_INTR: + case SVM_EXIT_FERR_FREEZE: + case SVM_EXIT_NMI: + return hmR0SvmExitIntr(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_MSR: + return hmR0SvmExitMsr(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_INVLPG: + return hmR0SvmExitInvlpg(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_WBINVD: + return hmR0SvmExitWbinvd(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_INVD: + return hmR0SvmExitInvd(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_RDPMC: + return hmR0SvmExitRdpmc(pVCpu, pCtx, pSvmTransient); + + default: + { + switch (pSvmTransient->u64ExitCode) + { + case SVM_EXIT_READ_DR0: case SVM_EXIT_READ_DR1: case SVM_EXIT_READ_DR2: case SVM_EXIT_READ_DR3: + case SVM_EXIT_READ_DR6: case SVM_EXIT_READ_DR7: case SVM_EXIT_READ_DR8: case SVM_EXIT_READ_DR9: + case SVM_EXIT_READ_DR10: case SVM_EXIT_READ_DR11: case SVM_EXIT_READ_DR12: case SVM_EXIT_READ_DR13: + case SVM_EXIT_READ_DR14: case SVM_EXIT_READ_DR15: + return hmR0SvmExitReadDRx(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_WRITE_DR0: case SVM_EXIT_WRITE_DR1: case SVM_EXIT_WRITE_DR2: case SVM_EXIT_WRITE_DR3: + case SVM_EXIT_WRITE_DR6: case SVM_EXIT_WRITE_DR7: case SVM_EXIT_WRITE_DR8: case SVM_EXIT_WRITE_DR9: + case SVM_EXIT_WRITE_DR10: case SVM_EXIT_WRITE_DR11: case SVM_EXIT_WRITE_DR12: case SVM_EXIT_WRITE_DR13: + case SVM_EXIT_WRITE_DR14: case SVM_EXIT_WRITE_DR15: + return hmR0SvmExitWriteDRx(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_TASK_SWITCH: + return hmR0SvmExitTaskSwitch(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_VMMCALL: + return hmR0SvmExitVmmCall(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_SHUTDOWN: + return hmR0SvmExitShutdown(pVCpu, pCtx, pSvmTransient); + + case SVM_EXIT_SMI: + case SVM_EXIT_INIT: + { + /* + * We don't intercept NMIs. As for INIT signals, it really shouldn't ever happen here. If it ever does, + * we want to know about it so log the exit code and bail. + */ + AssertMsgFailed(("hmR0SvmHandleExit: Unexpected exit %#RX32\n", (uint32_t)pSvmTransient->u64ExitCode)); + pVCpu->hm.s.u32HMError = (uint32_t)pSvmTransient->u64ExitCode; + return VERR_SVM_UNEXPECTED_EXIT; + } + + case SVM_EXIT_INVLPGA: + case SVM_EXIT_RSM: + case SVM_EXIT_VMRUN: + case SVM_EXIT_VMLOAD: + case SVM_EXIT_VMSAVE: + case SVM_EXIT_STGI: + case SVM_EXIT_CLGI: + case SVM_EXIT_SKINIT: + return hmR0SvmExitSetPendingXcptUD(pVCpu, pCtx, pSvmTransient); + +#ifdef HMSVM_ALWAYS_TRAP_ALL_XCPTS + case SVM_EXIT_EXCEPTION_0: /* X86_XCPT_DE */ + /* SVM_EXIT_EXCEPTION_1: */ /* X86_XCPT_DB - Handled above. */ + case SVM_EXIT_EXCEPTION_2: /* X86_XCPT_NMI */ + case SVM_EXIT_EXCEPTION_3: /* X86_XCPT_BP */ + case SVM_EXIT_EXCEPTION_4: /* X86_XCPT_OF */ + case SVM_EXIT_EXCEPTION_5: /* X86_XCPT_BR */ + case SVM_EXIT_EXCEPTION_6: /* X86_XCPT_UD */ + /* SVM_EXIT_EXCEPTION_7: */ /* X86_XCPT_NM - Handled above. */ + case SVM_EXIT_EXCEPTION_8: /* X86_XCPT_DF */ + case SVM_EXIT_EXCEPTION_9: /* X86_XCPT_CO_SEG_OVERRUN */ + case SVM_EXIT_EXCEPTION_A: /* X86_XCPT_TS */ + case SVM_EXIT_EXCEPTION_B: /* X86_XCPT_NP */ + case SVM_EXIT_EXCEPTION_C: /* X86_XCPT_SS */ + case SVM_EXIT_EXCEPTION_D: /* X86_XCPT_GP */ + /* SVM_EXIT_EXCEPTION_E: */ /* X86_XCPT_PF - Handled above. */ + /* SVM_EXIT_EXCEPTION_10: */ /* X86_XCPT_MF - Handled above. */ + case SVM_EXIT_EXCEPTION_11: /* X86_XCPT_AC */ + case SVM_EXIT_EXCEPTION_12: /* X86_XCPT_MC */ + case SVM_EXIT_EXCEPTION_13: /* X86_XCPT_XF */ + case SVM_EXIT_EXCEPTION_F: /* Reserved */ + case SVM_EXIT_EXCEPTION_14: case SVM_EXIT_EXCEPTION_15: case SVM_EXIT_EXCEPTION_16: + case SVM_EXIT_EXCEPTION_17: case SVM_EXIT_EXCEPTION_18: case SVM_EXIT_EXCEPTION_19: + case SVM_EXIT_EXCEPTION_1A: case SVM_EXIT_EXCEPTION_1B: case SVM_EXIT_EXCEPTION_1C: + case SVM_EXIT_EXCEPTION_1D: case SVM_EXIT_EXCEPTION_1E: case SVM_EXIT_EXCEPTION_1F: + { + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + SVMEVENT Event; + Event.u = 0; + Event.n.u1Valid = 1; + Event.n.u3Type = SVM_EVENT_EXCEPTION; + Event.n.u8Vector = pSvmTransient->u64ExitCode - SVM_EXIT_EXCEPTION_0; + + switch (Event.n.u8Vector) + { + case X86_XCPT_DE: + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE); + break; + + case X86_XCPT_BP: + /** Saves the wrong EIP on the stack (pointing to the int3) instead of the + * next instruction. */ + /** @todo Investigate this later. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP); + break; + + case X86_XCPT_UD: + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD); + break; + + case X86_XCPT_NP: + Event.n.u1ErrorCodeValid = 1; + Event.n.u32ErrorCode = pVmcb->ctrl.u64ExitInfo1; + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP); + break; + + case X86_XCPT_SS: + Event.n.u1ErrorCodeValid = 1; + Event.n.u32ErrorCode = pVmcb->ctrl.u64ExitInfo1; + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS); + break; + + case X86_XCPT_GP: + Event.n.u1ErrorCodeValid = 1; + Event.n.u32ErrorCode = pVmcb->ctrl.u64ExitInfo1; + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP); + break; + + default: + AssertMsgFailed(("hmR0SvmHandleExit: Unexpected exit caused by exception %#x\n", Event.n.u8Vector)); + pVCpu->hm.s.u32HMError = Event.n.u8Vector; + return VERR_SVM_UNEXPECTED_XCPT_EXIT; + } + + Log4(("#Xcpt: Vector=%#x at CS:RIP=%04x:%RGv\n", Event.n.u8Vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip)); + hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */); + return VINF_SUCCESS; + } +#endif /* HMSVM_ALWAYS_TRAP_ALL_XCPTS */ + + default: + { + AssertMsgFailed(("hmR0SvmHandleExit: Unknown exit code %#x\n", u32ExitCode)); + pVCpu->hm.s.u32HMError = u32ExitCode; + return VERR_SVM_UNKNOWN_EXIT; + } + } + } + } + return VERR_INTERNAL_ERROR_5; /* Should never happen. */ +} + + +#ifdef DEBUG +/* Is there some generic IPRT define for this that are not in Runtime/internal/\* ?? */ +# define HMSVM_ASSERT_PREEMPT_CPUID_VAR() \ + RTCPUID const idAssertCpu = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId() + +# define HMSVM_ASSERT_PREEMPT_CPUID() \ + do \ + { \ + RTCPUID const idAssertCpuNow = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId(); \ + AssertMsg(idAssertCpu == idAssertCpuNow, ("SVM %#x, %#x\n", idAssertCpu, idAssertCpuNow)); \ + } while (0) + +# define HMSVM_VALIDATE_EXIT_HANDLER_PARAMS() \ + do { \ + AssertPtr(pVCpu); \ + AssertPtr(pCtx); \ + AssertPtr(pSvmTransient); \ + Assert(ASMIntAreEnabled()); \ + HMSVM_ASSERT_PREEMPT_SAFE(); \ + HMSVM_ASSERT_PREEMPT_CPUID_VAR(); \ + Log4Func(("vcpu[%u] -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-\n", (uint32_t)pVCpu->idCpu)); \ + HMSVM_ASSERT_PREEMPT_SAFE(); \ + if (VMMR0IsLogFlushDisabled(pVCpu)) \ + HMSVM_ASSERT_PREEMPT_CPUID(); \ + } while (0) +#else /* Release builds */ +# define HMSVM_VALIDATE_EXIT_HANDLER_PARAMS() do { } while (0) +#endif + + +/** + * Worker for hmR0SvmInterpretInvlpg(). + * + * @return VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pCpu Pointer to the disassembler state. + * @param pRegFrame Pointer to the register frame. + */ +static int hmR0SvmInterpretInvlPgEx(PVMCPU pVCpu, PDISCPUSTATE pCpu, PCPUMCTXCORE pRegFrame) +{ + DISQPVPARAMVAL Param1; + RTGCPTR GCPtrPage; + + int rc = DISQueryParamVal(pRegFrame, pCpu, &pCpu->Param1, &Param1, DISQPVWHICH_SRC); + if (RT_FAILURE(rc)) + return VERR_EM_INTERPRETER; + + if ( Param1.type == DISQPV_TYPE_IMMEDIATE + || Param1.type == DISQPV_TYPE_ADDRESS) + { + if (!(Param1.flags & (DISQPV_FLAG_32 | DISQPV_FLAG_64))) + return VERR_EM_INTERPRETER; + + GCPtrPage = Param1.val.val64; + VBOXSTRICTRC rc2 = EMInterpretInvlpg(pVCpu->CTX_SUFF(pVM), pVCpu, pRegFrame, GCPtrPage); + rc = VBOXSTRICTRC_VAL(rc2); + } + else + { + Log4(("hmR0SvmInterpretInvlPgEx invalid parameter type %#x\n", Param1.type)); + rc = VERR_EM_INTERPRETER; + } + + return rc; +} + + +/** + * Interprets INVLPG. + * + * @returns VBox status code. + * @retval VINF_* Scheduling instructions. + * @retval VERR_EM_INTERPRETER Something we can't cope with. + * @retval VERR_* Fatal errors. + * + * @param pVM Pointer to the VM. + * @param pRegFrame Pointer to the register frame. + * + * @remarks Updates the RIP if the instruction was executed successfully. + */ +static int hmR0SvmInterpretInvlpg(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame) +{ + /* Only allow 32 & 64 bit code. */ + if (CPUMGetGuestCodeBits(pVCpu) != 16) + { + PDISSTATE pDis = &pVCpu->hm.s.DisState; + int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL /* pcbInstr */); + if ( RT_SUCCESS(rc) + && pDis->pCurInstr->uOpcode == OP_INVLPG) + { + rc = hmR0SvmInterpretInvlPgEx(pVCpu, pDis, pRegFrame); + if (RT_SUCCESS(rc)) + pRegFrame->rip += pDis->cbInstr; + return rc; + } + else + Log4(("hmR0SvmInterpretInvlpg: EMInterpretDisasCurrent returned %Rrc uOpCode=%#x\n", rc, pDis->pCurInstr->uOpcode)); + } + return VERR_EM_INTERPRETER; +} + + +/** + * Sets an invalid-opcode (#UD) exception as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + */ +DECLINLINE(void) hmR0SvmSetPendingXcptUD(PVMCPU pVCpu) +{ + SVMEVENT Event; + Event.u = 0; + Event.n.u1Valid = 1; + Event.n.u3Type = SVM_EVENT_EXCEPTION; + Event.n.u8Vector = X86_XCPT_UD; + hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */); +} + + +/** + * Sets a debug (#DB) exception as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + */ +DECLINLINE(void) hmR0SvmSetPendingXcptDB(PVMCPU pVCpu) +{ + SVMEVENT Event; + Event.u = 0; + Event.n.u1Valid = 1; + Event.n.u3Type = SVM_EVENT_EXCEPTION; + Event.n.u8Vector = X86_XCPT_DB; + hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */); +} + + +/** + * Sets a page fault (#PF) exception as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * @param u32ErrCode The error-code for the page-fault. + * @param uFaultAddress The page fault address (CR2). + * + * @remarks This updates the guest CR2 with @a uFaultAddress! + */ +DECLINLINE(void) hmR0SvmSetPendingXcptPF(PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t u32ErrCode, RTGCUINTPTR uFaultAddress) +{ + SVMEVENT Event; + Event.u = 0; + Event.n.u1Valid = 1; + Event.n.u3Type = SVM_EVENT_EXCEPTION; + Event.n.u8Vector = X86_XCPT_PF; + Event.n.u1ErrorCodeValid = 1; + Event.n.u32ErrorCode = u32ErrCode; + + /* Update CR2 of the guest. */ + if (pCtx->cr2 != uFaultAddress) + { + pCtx->cr2 = uFaultAddress; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR2); + } + + hmR0SvmSetPendingEvent(pVCpu, &Event, uFaultAddress); +} + + +/** + * Sets a device-not-available (#NM) exception as pending-for-injection into the + * VM. + * + * @param pVCpu Pointer to the VMCPU. + */ +DECLINLINE(void) hmR0SvmSetPendingXcptNM(PVMCPU pVCpu) +{ + SVMEVENT Event; + Event.u = 0; + Event.n.u1Valid = 1; + Event.n.u3Type = SVM_EVENT_EXCEPTION; + Event.n.u8Vector = X86_XCPT_NM; + hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */); +} + + +/** + * Sets a math-fault (#MF) exception as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + */ +DECLINLINE(void) hmR0SvmSetPendingXcptMF(PVMCPU pVCpu) +{ + SVMEVENT Event; + Event.u = 0; + Event.n.u1Valid = 1; + Event.n.u3Type = SVM_EVENT_EXCEPTION; + Event.n.u8Vector = X86_XCPT_MF; + hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */); +} + + +/** + * Sets a double fault (#DF) exception as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + */ +DECLINLINE(void) hmR0SvmSetPendingXcptDF(PVMCPU pVCpu) +{ + SVMEVENT Event; + Event.u = 0; + Event.n.u1Valid = 1; + Event.n.u3Type = SVM_EVENT_EXCEPTION; + Event.n.u8Vector = X86_XCPT_DF; + Event.n.u1ErrorCodeValid = 1; + Event.n.u32ErrorCode = 0; + hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */); +} + + +/** + * Emulates a simple MOV TPR (CR8) instruction, used for TPR patching on 32-bit + * guests. This simply looks up the patch record at EIP and does the required. + * + * This VMMCALL is used a fallback mechanism when mov to/from cr8 isn't exactly + * like how we want it to be (e.g. not followed by shr 4 as is usually done for + * TPR). See hmR3ReplaceTprInstr() for the details. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + */ +static int hmR0SvmEmulateMovTpr(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + Log4(("Emulated VMMCall TPR access replacement at RIP=%RGv\n", pCtx->rip)); + for (;;) + { + bool fPending; + uint8_t u8Tpr; + + PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip); + if (!pPatch) + break; + + switch (pPatch->enmType) + { + case HMTPRINSTR_READ: + { + int rc = PDMApicGetTPR(pVCpu, &u8Tpr, &fPending, NULL /* pu8PendingIrq */); + AssertRC(rc); + + rc = DISWriteReg32(CPUMCTX2CORE(pCtx), pPatch->uDstOperand, u8Tpr); + AssertRC(rc); + pCtx->rip += pPatch->cbOp; + break; + } + + case HMTPRINSTR_WRITE_REG: + case HMTPRINSTR_WRITE_IMM: + { + if (pPatch->enmType == HMTPRINSTR_WRITE_REG) + { + uint32_t u32Val; + int rc = DISFetchReg32(CPUMCTX2CORE(pCtx), pPatch->uSrcOperand, &u32Val); + AssertRC(rc); + u8Tpr = u32Val; + } + else + u8Tpr = (uint8_t)pPatch->uSrcOperand; + + int rc2 = PDMApicSetTPR(pVCpu, u8Tpr); + AssertRC(rc2); + HMCPU_CF_SET(pVCpu, HM_CHANGED_SVM_GUEST_APIC_STATE); + + pCtx->rip += pPatch->cbOp; + break; + } + + default: + AssertMsgFailed(("Unexpected patch type %d\n", pPatch->enmType)); + pVCpu->hm.s.u32HMError = pPatch->enmType; + return VERR_SVM_UNEXPECTED_PATCH_TYPE; + } + } + + return VINF_SUCCESS; +} + + +/** + * Determines if an exception is a contributory exception. Contributory + * exceptions are ones which can cause double-faults. Page-fault is + * intentionally not included here as it's a conditional contributory exception. + * + * @returns true if the exception is contributory, false otherwise. + * @param uVector The exception vector. + */ +DECLINLINE(bool) hmR0SvmIsContributoryXcpt(const uint32_t uVector) +{ + switch (uVector) + { + case X86_XCPT_GP: + case X86_XCPT_SS: + case X86_XCPT_NP: + case X86_XCPT_TS: + case X86_XCPT_DE: + return true; + default: + break; + } + return false; +} + + +/** + * Handle a condition that occurred while delivering an event through the guest + * IDT. + * + * @returns VBox status code (informational error codes included). + * @retval VINF_SUCCESS if we should continue handling the VM-exit. + * @retval VINF_HM_DOUBLE_FAULT if a #DF condition was detected and we ought to + * continue execution of the guest which will delivery the #DF. + * @retval VINF_EM_RESET if we detected a triple-fault condition. + * + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * @param pSvmTransient Pointer to the SVM transient structure. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0SvmCheckExitDueToEventDelivery(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + int rc = VINF_SUCCESS; + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + + /* See AMD spec. 15.7.3 "EXITINFO Pseudo-Code". The EXITINTINFO (if valid) contains the prior exception (IDT vector) + * that was trying to be delivered to the guest which caused a #VMEXIT which was intercepted (Exit vector). */ + if (pVmcb->ctrl.ExitIntInfo.n.u1Valid) + { + uint8_t uIdtVector = pVmcb->ctrl.ExitIntInfo.n.u8Vector; + + typedef enum + { + SVMREFLECTXCPT_XCPT, /* Reflect the exception to the guest or for further evaluation by VMM. */ + SVMREFLECTXCPT_DF, /* Reflect the exception as a double-fault to the guest. */ + SVMREFLECTXCPT_TF, /* Indicate a triple faulted state to the VMM. */ + SVMREFLECTXCPT_NONE /* Nothing to reflect. */ + } SVMREFLECTXCPT; + + SVMREFLECTXCPT enmReflect = SVMREFLECTXCPT_NONE; + if (pVmcb->ctrl.ExitIntInfo.n.u3Type == SVM_EVENT_EXCEPTION) + { + if (pSvmTransient->u64ExitCode - SVM_EXIT_EXCEPTION_0 <= SVM_EXIT_EXCEPTION_1F) + { + uint8_t uExitVector = (uint8_t)(pSvmTransient->u64ExitCode - SVM_EXIT_EXCEPTION_0); + +#ifdef VBOX_STRICT + if ( hmR0SvmIsContributoryXcpt(uIdtVector) + && uExitVector == X86_XCPT_PF) + { + Log4(("IDT: Contributory #PF uCR2=%#RX64\n", pVCpu->idCpu, pCtx->cr2)); + } +#endif + if ( uExitVector == X86_XCPT_PF + && uIdtVector == X86_XCPT_PF) + { + pSvmTransient->fVectoringPF = true; + Log4(("IDT: Vectoring #PF uCR2=%#RX64\n", pCtx->cr2)); + } + else if ( (pVmcb->ctrl.u32InterceptException & HMSVM_CONTRIBUTORY_XCPT_MASK) + && hmR0SvmIsContributoryXcpt(uExitVector) + && ( hmR0SvmIsContributoryXcpt(uIdtVector) + || uIdtVector == X86_XCPT_PF)) + { + enmReflect = SVMREFLECTXCPT_DF; + Log4(("IDT: Pending vectoring #DF %#RX64 uIdtVector=%#x uExitVector=%#x\n", pVCpu->hm.s.Event.u64IntInfo, + uIdtVector, uExitVector)); + } + else if (uIdtVector == X86_XCPT_DF) + { + enmReflect = SVMREFLECTXCPT_TF; + Log4(("IDT: Pending vectoring triple-fault %#RX64 uIdtVector=%#x uExitVector=%#x\n", + pVCpu->hm.s.Event.u64IntInfo, uIdtVector, uExitVector)); + } + else + enmReflect = SVMREFLECTXCPT_XCPT; + } + else + { + /* + * If event delivery caused an #VMEXIT that is not an exception (e.g. #NPF) then reflect the original + * exception to the guest after handling the VM-exit. + */ + enmReflect = SVMREFLECTXCPT_XCPT; + } + } + else if (pVmcb->ctrl.ExitIntInfo.n.u3Type != SVM_EVENT_SOFTWARE_INT) + { + /* Ignore software interrupts (INT n) as they reoccur when restarting the instruction. */ + enmReflect = SVMREFLECTXCPT_XCPT; + } + + switch (enmReflect) + { + case SVMREFLECTXCPT_XCPT: + { + Assert(pVmcb->ctrl.ExitIntInfo.n.u3Type != SVM_EVENT_SOFTWARE_INT); + hmR0SvmSetPendingEvent(pVCpu, &pVmcb->ctrl.ExitIntInfo, 0 /* GCPtrFaultAddress */); + + /* If uExitVector is #PF, CR2 value will be updated from the VMCB if it's a guest #PF. See hmR0SvmExitXcptPF(). */ + Log4(("IDT: Pending vectoring event %#RX64 ErrValid=%RTbool Err=%#RX32\n", pVmcb->ctrl.ExitIntInfo.u, + !!pVmcb->ctrl.ExitIntInfo.n.u1ErrorCodeValid, pVmcb->ctrl.ExitIntInfo.n.u32ErrorCode)); + break; + } + + case SVMREFLECTXCPT_DF: + { + hmR0SvmSetPendingXcptDF(pVCpu); + rc = VINF_HM_DOUBLE_FAULT; + break; + } + + case SVMREFLECTXCPT_TF: + { + rc = VINF_EM_RESET; + break; + } + + default: + Assert(rc == VINF_SUCCESS); + break; + } + } + Assert(rc == VINF_SUCCESS || rc == VINF_HM_DOUBLE_FAULT || rc == VINF_EM_RESET); + return rc; +} + + +/** + * Advances the guest RIP in the if the NRIP_SAVE feature is supported by the + * CPU, otherwise advances the RIP by @a cb bytes. + * + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * @param cb RIP increment value in bytes. + * + * @remarks Use this function only from #VMEXIT's where the NRIP value is valid + * when NRIP_SAVE is supported by the CPU! + */ +DECLINLINE(void) hmR0SvmUpdateRip(PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t cb) +{ + if (pVCpu->CTX_SUFF(pVM)->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_NRIP_SAVE) + { + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + pCtx->rip = pVmcb->ctrl.u64NextRIP; + } + else + pCtx->rip += cb; +} + + +/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */ +/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- #VMEXIT handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */ +/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */ + +/** @name VM-exit handlers. + * @{ + */ + +/** + * #VMEXIT handler for external interrupts, NMIs, FPU assertion freeze and INIT + * signals (SVM_EXIT_INTR, SVM_EXIT_NMI, SVM_EXIT_FERR_FREEZE, SVM_EXIT_INIT). + */ +HMSVM_EXIT_DECL hmR0SvmExitIntr(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + + if (pSvmTransient->u64ExitCode == SVM_EXIT_NMI) + STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC); + else if (pSvmTransient->u64ExitCode == SVM_EXIT_INTR) + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitExtInt); + + /* + * AMD-V has no preemption timer and the generic periodic preemption timer has no way to signal -before- the timer + * fires if the current interrupt is our own timer or a some other host interrupt. We also cannot examine what + * interrupt it is until the host actually take the interrupt. + * + * Going back to executing guest code here unconditionally causes random scheduling problems (observed on an + * AMD Phenom 9850 Quad-Core on Windows 64-bit host). + */ + return VINF_EM_RAW_INTERRUPT; +} + + +/** + * #VMEXIT handler for WBINVD (SVM_EXIT_WBINVD). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitWbinvd(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + + hmR0SvmUpdateRip(pVCpu, pCtx, 2); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWbinvd); + int rc = VINF_SUCCESS; + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + return rc; +} + + +/** + * #VMEXIT handler for INVD (SVM_EXIT_INVD). Unconditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitInvd(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + + hmR0SvmUpdateRip(pVCpu, pCtx, 2); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvd); + int rc = VINF_SUCCESS; + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + return rc; +} + + +/** + * #VMEXIT handler for INVD (SVM_EXIT_CPUID). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitCpuid(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + PVM pVM = pVCpu->CTX_SUFF(pVM); + int rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + hmR0SvmUpdateRip(pVCpu, pCtx, 2); + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + } + else + { + AssertMsgFailed(("hmR0SvmExitCpuid: EMInterpretCpuId failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCpuid); + return rc; +} + + +/** + * #VMEXIT handler for RDTSC (SVM_EXIT_RDTSC). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitRdtsc(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + PVM pVM = pVCpu->CTX_SUFF(pVM); + int rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + hmR0SvmUpdateRip(pVCpu, pCtx, 2); + pSvmTransient->fUpdateTscOffsetting = true; + + /* Single step check. */ + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + } + else + { + AssertMsgFailed(("hmR0SvmExitRdtsc: EMInterpretRdtsc failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc); + return rc; +} + + +/** + * #VMEXIT handler for RDTSCP (SVM_EXIT_RDTSCP). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitRdtscp(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = EMInterpretRdtscp(pVCpu->CTX_SUFF(pVM), pVCpu, pCtx); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + hmR0SvmUpdateRip(pVCpu, pCtx, 3); + pSvmTransient->fUpdateTscOffsetting = true; + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + } + else + { + AssertMsgFailed(("hmR0SvmExitRdtsc: EMInterpretRdtscp failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtscp); + return rc; +} + + +/** + * #VMEXIT handler for RDPMC (SVM_EXIT_RDPMC). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitRdpmc(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = EMInterpretRdpmc(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + hmR0SvmUpdateRip(pVCpu, pCtx, 2); + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + } + else + { + AssertMsgFailed(("hmR0SvmExitRdpmc: EMInterpretRdpmc failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdpmc); + return rc; +} + + +/** + * #VMEXIT handler for INVLPG (SVM_EXIT_INVLPG). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitInvlpg(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + PVM pVM = pVCpu->CTX_SUFF(pVM); + Assert(!pVM->hm.s.fNestedPaging); + + /** @todo Decode Assist. */ + int rc = hmR0SvmInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx)); /* Updates RIP if successful. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvlpg); + Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER); + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + return rc; +} + + +/** + * #VMEXIT handler for HLT (SVM_EXIT_HLT). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitHlt(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + hmR0SvmUpdateRip(pVCpu, pCtx, 1); + int rc = EMShouldContinueAfterHalt(pVCpu, pCtx) ? VINF_SUCCESS : VINF_EM_HALT; + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt); + return rc; +} + + +/** + * #VMEXIT handler for MONITOR (SVM_EXIT_MONITOR). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitMonitor(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = EMInterpretMonitor(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + hmR0SvmUpdateRip(pVCpu, pCtx, 3); + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + } + else + { + AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0SvmExitMonitor: EMInterpretMonitor failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor); + return rc; +} + + +/** + * #VMEXIT handler for MWAIT (SVM_EXIT_MWAIT). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitMwait(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + VBOXSTRICTRC rc2 = EMInterpretMWait(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx)); + int rc = VBOXSTRICTRC_VAL(rc2); + if ( rc == VINF_EM_HALT + || rc == VINF_SUCCESS) + { + hmR0SvmUpdateRip(pVCpu, pCtx, 3); + + if ( rc == VINF_EM_HALT + && EMMonitorWaitShouldContinue(pVCpu, pCtx)) + { + rc = VINF_SUCCESS; + } + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + } + else + { + AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0SvmExitMwait: EMInterpretMWait failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + AssertMsg(rc == VINF_SUCCESS || rc == VINF_EM_HALT || rc == VERR_EM_INTERPRETER, + ("hmR0SvmExitMwait: EMInterpretMWait failed rc=%Rrc\n", rc)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait); + return rc; +} + + +/** + * #VMEXIT handler for shutdown (triple-fault) (SVM_EXIT_SHUTDOWN). + * Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitShutdown(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + return VINF_EM_RESET; +} + + +/** + * #VMEXIT handler for CRx reads (SVM_EXIT_READ_CR*). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitReadCRx(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + + Log4(("hmR0SvmExitReadCRx: CS:RIP=%04x:%#RX64\n", pCtx->cs.Sel, pCtx->rip)); + + /** @todo Decode Assist. */ + VBOXSTRICTRC rc2 = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0 /* pvFault */); + int rc = VBOXSTRICTRC_VAL(rc2); + AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3, + ("hmR0SvmExitReadCRx: EMInterpretInstruction failed rc=%Rrc\n", rc)); + Assert((pSvmTransient->u64ExitCode - SVM_EXIT_READ_CR0) <= 15); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxRead[pSvmTransient->u64ExitCode - SVM_EXIT_READ_CR0]); + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + return rc; +} + + +/** + * #VMEXIT handler for CRx writes (SVM_EXIT_WRITE_CR*). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitWriteCRx(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + /** @todo Decode Assist. */ + VBOXSTRICTRC rc2 = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0 /* pvFault */); + int rc = VBOXSTRICTRC_VAL(rc2); + if (rc == VINF_SUCCESS) + { + /* RIP has been updated by EMInterpretInstruction(). */ + Assert((pSvmTransient->u64ExitCode - SVM_EXIT_WRITE_CR0) <= 15); + switch (pSvmTransient->u64ExitCode - SVM_EXIT_WRITE_CR0) + { + case 0: /* CR0. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + break; + + case 3: /* CR3. */ + Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR3); + break; + + case 4: /* CR4. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR4); + break; + + case 8: /* CR8 (TPR). */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_SVM_GUEST_APIC_STATE); + break; + + default: + AssertMsgFailed(("hmR0SvmExitWriteCRx: Invalid/Unexpected Write-CRx exit. u64ExitCode=%#RX64 %#x CRx=%#RX64\n", + pSvmTransient->u64ExitCode, pSvmTransient->u64ExitCode - SVM_EXIT_WRITE_CR0)); + break; + } + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + } + else + Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3); + return rc; +} + + +/** + * #VMEXIT handler for instructions that result in a #UD exception delivered to + * the guest. + */ +HMSVM_EXIT_DECL hmR0SvmExitSetPendingXcptUD(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + hmR0SvmSetPendingXcptUD(pVCpu); + return VINF_SUCCESS; +} + + +/** + * #VMEXIT handler for MSR read and writes (SVM_EXIT_MSR). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitMsr(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + PVM pVM = pVCpu->CTX_SUFF(pVM); + + int rc; + if (pVmcb->ctrl.u64ExitInfo1 == SVM_EXIT1_MSR_WRITE) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWrmsr); + + /* Handle TPR patching; intercepted LSTAR write. */ + if ( pVM->hm.s.fTPRPatchingActive + && pCtx->ecx == MSR_K8_LSTAR) + { + if ((pCtx->eax & 0xff) != pSvmTransient->u8GuestTpr) + { + /* Our patch code uses LSTAR for TPR caching for 32-bit guests. */ + int rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff); + AssertRC(rc2); + HMCPU_CF_SET(pVCpu, HM_CHANGED_SVM_GUEST_APIC_STATE); + } + hmR0SvmUpdateRip(pVCpu, pCtx, 2); + rc = VINF_SUCCESS; + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + return rc; + } + + if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_NRIP_SAVE) + { + rc = EMInterpretWrmsr(pVM, pVCpu, CPUMCTX2CORE(pCtx)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + pCtx->rip = pVmcb->ctrl.u64NextRIP; + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + } + else + AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0SvmExitMsr: EMInterpretWrmsr failed rc=%Rrc\n", rc)); + } + else + { + rc = VBOXSTRICTRC_TODO(EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0 /* pvFault */)); + if (RT_UNLIKELY(rc != VINF_SUCCESS)) + AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0SvmExitMsr: WrMsr. EMInterpretInstruction failed rc=%Rrc\n", rc)); + /* RIP updated by EMInterpretInstruction(). */ + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + } + + /* If this is an X2APIC WRMSR access, update the APIC state as well. */ + if ( pCtx->ecx >= MSR_IA32_X2APIC_START + && pCtx->ecx <= MSR_IA32_X2APIC_END) + { + /* + * We've already saved the APIC related guest-state (TPR) in hmR0SvmPostRunGuest(). When full APIC register + * virtualization is implemented we'll have to make sure APIC state is saved from the VMCB before + * EMInterpretWrmsr() changes it. + */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_SVM_GUEST_APIC_STATE); + } + else if (pCtx->ecx == MSR_K6_EFER) + HMCPU_CF_SET(pVCpu, HM_CHANGED_SVM_GUEST_EFER_MSR); + else if (pCtx->ecx == MSR_IA32_TSC) + pSvmTransient->fUpdateTscOffsetting = true; + } + else + { + /* MSR Read access. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdmsr); + Assert(pVmcb->ctrl.u64ExitInfo1 == SVM_EXIT1_MSR_READ); + + if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_NRIP_SAVE) + { + rc = EMInterpretRdmsr(pVM, pVCpu, CPUMCTX2CORE(pCtx)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + pCtx->rip = pVmcb->ctrl.u64NextRIP; + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + } + else + AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0SvmExitMsr: EMInterpretRdmsr failed rc=%Rrc\n", rc)); + } + else + { + rc = VBOXSTRICTRC_TODO(EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0)); + if (RT_UNLIKELY(rc != VINF_SUCCESS)) + AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0SvmExitMsr: RdMsr. EMInterpretInstruction failed rc=%Rrc\n", rc)); + /* RIP updated by EMInterpretInstruction(). */ + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + } + } + + /* RIP has been updated by EMInterpret[Rd|Wr]msr(). */ + return rc; +} + + +/** + * #VMEXIT handler for DRx read (SVM_EXIT_READ_DRx). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitReadDRx(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead); + + /* We should -not- get this VM-exit if the guest's debug registers were active. */ + if (pSvmTransient->fWasGuestDebugStateActive) + { + AssertMsgFailed(("hmR0SvmHandleExit: Unexpected exit %#RX32\n", (uint32_t)pSvmTransient->u64ExitCode)); + pVCpu->hm.s.u32HMError = (uint32_t)pSvmTransient->u64ExitCode; + return VERR_SVM_UNEXPECTED_EXIT; + } + + /* + * Lazy DR0-3 loading. + */ + if (!pSvmTransient->fWasHyperDebugStateActive) + { + Assert(!DBGFIsStepping(pVCpu)); Assert(!pVCpu->hm.s.fSingleInstruction); + Log5(("hmR0SvmExitReadDRx: Lazy loading guest debug registers\n")); + + /* Don't intercept DRx read and writes. */ + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + pVmcb->ctrl.u16InterceptRdDRx = 0; + pVmcb->ctrl.u16InterceptWrDRx = 0; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS; + + /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */ + VMMRZCallRing3Disable(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + /* Save the host & load the guest debug state, restart execution of the MOV DRx instruction. */ + CPUMR0LoadGuestDebugState(pVCpu, false /* include DR6 */); + Assert(CPUMIsGuestDebugStateActive(pVCpu) || HC_ARCH_BITS == 32); + + HM_RESTORE_PREEMPT_IF_NEEDED(); + VMMRZCallRing3Enable(pVCpu); + + STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch); + return VINF_SUCCESS; + } + + /* + * Interpret the read/writing of DRx. + */ + /** @todo Decode assist. */ + VBOXSTRICTRC rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0 /* pvFault */); + Log5(("hmR0SvmExitReadDRx: Emulated DRx access: rc=%Rrc\n", VBOXSTRICTRC_VAL(rc))); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + /* Not necessary for read accesses but whatever doesn't hurt for now, will be fixed with decode assist. */ + /** @todo CPUM should set this flag! */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG); + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + } + else + Assert(rc == VERR_EM_INTERPRETER); + return VBOXSTRICTRC_TODO(rc); +} + + +/** + * #VMEXIT handler for DRx write (SVM_EXIT_WRITE_DRx). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitWriteDRx(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + /* For now it's the same since we interpret the instruction anyway. Will change when using of Decode Assist is implemented. */ + int rc = hmR0SvmExitReadDRx(pVCpu, pCtx, pSvmTransient); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite); + STAM_COUNTER_DEC(&pVCpu->hm.s.StatExitDRxRead); + return rc; +} + + +/** + * #VMEXIT handler for I/O instructions (SVM_EXIT_IOIO). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitIOInstr(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* I/O operation lookup arrays. */ + static uint32_t const s_aIOSize[8] = { 0, 1, 2, 0, 4, 0, 0, 0 }; /* Size of the I/O accesses in bytes. */ + static uint32_t const s_aIOOpAnd[8] = { 0, 0xff, 0xffff, 0, 0xffffffff, 0, 0, 0 }; /* AND masks for saving + the result (in AL/AX/EAX). */ + Log4(("hmR0SvmExitIOInstr: CS:RIP=%04x:%#RX64\n", pCtx->cs.Sel, pCtx->rip)); + + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + PVM pVM = pVCpu->CTX_SUFF(pVM); + + /* Refer AMD spec. 15.10.2 "IN and OUT Behaviour" and Figure 15-2. "EXITINFO1 for IOIO Intercept" for the format. */ + SVMIOIOEXIT IoExitInfo; + IoExitInfo.u = (uint32_t)pVmcb->ctrl.u64ExitInfo1; + uint32_t uIOWidth = (IoExitInfo.u >> 4) & 0x7; + uint32_t cbValue = s_aIOSize[uIOWidth]; + uint32_t uAndVal = s_aIOOpAnd[uIOWidth]; + + if (RT_UNLIKELY(!cbValue)) + { + AssertMsgFailed(("hmR0SvmExitIOInstr: Invalid IO operation. uIOWidth=%u\n", uIOWidth)); + return VERR_EM_INTERPRETER; + } + + VBOXSTRICTRC rcStrict; + if (IoExitInfo.n.u1STR) + { + /* INS/OUTS - I/O String instruction. */ + PDISCPUSTATE pDis = &pVCpu->hm.s.DisState; + + /** @todo Huh? why can't we use the segment prefix information given by AMD-V + * in EXITINFO1? Investigate once this thing is up and running. */ + + rcStrict = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL); + if (rcStrict == VINF_SUCCESS) + { + if (IoExitInfo.n.u1Type == SVM_IOIO_WRITE) + { + rcStrict = IOMInterpretOUTSEx(pVM, pVCpu, CPUMCTX2CORE(pCtx), IoExitInfo.n.u16Port, pDis->fPrefix, + (DISCPUMODE)pDis->uAddrMode, cbValue); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringWrite); + } + else + { + rcStrict = IOMInterpretINSEx(pVM, pVCpu, CPUMCTX2CORE(pCtx), IoExitInfo.n.u16Port, pDis->fPrefix, + (DISCPUMODE)pDis->uAddrMode, cbValue); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringRead); + } + } + else + rcStrict = VINF_EM_RAW_EMULATE_INSTR; + } + else + { + /* IN/OUT - I/O instruction. */ + Assert(!IoExitInfo.n.u1REP); + + if (IoExitInfo.n.u1Type == SVM_IOIO_WRITE) + { + rcStrict = IOMIOPortWrite(pVM, pVCpu, IoExitInfo.n.u16Port, pCtx->eax & uAndVal, cbValue); + if (rcStrict == VINF_IOM_R3_IOPORT_WRITE) + HMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pVmcb->ctrl.u64ExitInfo2, IoExitInfo.n.u16Port, uAndVal, cbValue); + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite); + } + else + { + uint32_t u32Val = 0; + + rcStrict = IOMIOPortRead(pVM, pVCpu, IoExitInfo.n.u16Port, &u32Val, cbValue); + if (IOM_SUCCESS(rcStrict)) + { + /* Save result of I/O IN instr. in AL/AX/EAX. */ + pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal); + } + else if (rcStrict == VINF_IOM_R3_IOPORT_READ) + HMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pVmcb->ctrl.u64ExitInfo2, IoExitInfo.n.u16Port, uAndVal, cbValue); + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead); + } + } + + if (IOM_SUCCESS(rcStrict)) + { + /* AMD-V saves the RIP of the instruction following the IO instruction in EXITINFO2. */ + pCtx->rip = pVmcb->ctrl.u64ExitInfo2; + + /* + * If any I/O breakpoints are armed, we need to check if one triggered + * and take appropriate action. + * Note that the I/O breakpoint type is undefined if CR4.DE is 0. + */ + /** @todo Optimize away the DBGFBpIsHwIoArmed call by having DBGF tell the + * execution engines about whether hyper BPs and such are pending. */ + uint32_t const uDr7 = pCtx->dr[7]; + if (RT_UNLIKELY( ( (uDr7 & X86_DR7_ENABLED_MASK) + && X86_DR7_ANY_RW_IO(uDr7) + && (pCtx->cr4 & X86_CR4_DE)) + || DBGFBpIsHwIoArmed(pVM))) + { + /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */ + VMMRZCallRing3Disable(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck); + CPUMR0DebugStateMaybeSaveGuest(pVCpu, false /*fDr6*/); + + VBOXSTRICTRC rcStrict2 = DBGFBpCheckIo(pVM, pVCpu, pCtx, IoExitInfo.n.u16Port, cbValue); + if (rcStrict2 == VINF_EM_RAW_GUEST_TRAP) + { + /* Raise #DB. */ + pVmcb->guest.u64DR6 = pCtx->dr[6]; + pVmcb->guest.u64DR7 = pCtx->dr[7]; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX; + hmR0SvmSetPendingXcptDB(pVCpu); + } + /* rcStrict is VINF_SUCCESS or in [VINF_EM_FIRST..VINF_EM_LAST]. */ + else if ( rcStrict2 != VINF_SUCCESS + && (rcStrict == VINF_SUCCESS || rcStrict2 < rcStrict)) + rcStrict = rcStrict2; + + HM_RESTORE_PREEMPT_IF_NEEDED(); + VMMRZCallRing3Enable(pVCpu); + } + + HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict); + } + +#ifdef VBOX_STRICT + if (rcStrict == VINF_IOM_R3_IOPORT_READ) + Assert(IoExitInfo.n.u1Type == SVM_IOIO_READ); + else if (rcStrict == VINF_IOM_R3_IOPORT_WRITE) + Assert(IoExitInfo.n.u1Type == SVM_IOIO_WRITE); + else + { + /** @todo r=bird: This is missing a bunch of VINF_EM_FIRST..VINF_EM_LAST + * statuses, that the VMM device and some others may return. See + * IOM_SUCCESS() for guidance. */ + AssertMsg( RT_FAILURE(rcStrict) + || rcStrict == VINF_SUCCESS + || rcStrict == VINF_EM_RAW_EMULATE_INSTR + || rcStrict == VINF_EM_DBG_BREAKPOINT + || rcStrict == VINF_EM_RAW_GUEST_TRAP + || rcStrict == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + } +#endif + return VBOXSTRICTRC_TODO(rcStrict); +} + + +/** + * #VMEXIT handler for Nested Page-faults (SVM_EXIT_NPF). Conditional + * #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitNestedPF(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + PVM pVM = pVCpu->CTX_SUFF(pVM); + Assert(pVM->hm.s.fNestedPaging); + + HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(); + + /* See AMD spec. 15.25.6 "Nested versus Guest Page Faults, Fault Ordering" for VMCB details for #NPF. */ + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + uint32_t u32ErrCode = pVmcb->ctrl.u64ExitInfo1; + RTGCPHYS GCPhysFaultAddr = pVmcb->ctrl.u64ExitInfo2; + + Log4(("#NPF at CS:RIP=%04x:%#RX64 faultaddr=%RGp errcode=%#x \n", pCtx->cs.Sel, pCtx->rip, GCPhysFaultAddr, u32ErrCode)); + +#ifdef VBOX_HM_WITH_GUEST_PATCHING + /* TPR patching for 32-bit guests, using the reserved bit in the page tables for MMIO regions. */ + if ( pVM->hm.s.fTRPPatchingAllowed + && (GCPhysFaultAddr & PAGE_OFFSET_MASK) == 0x80 /* TPR offset. */ + && ( !(u32ErrCode & X86_TRAP_PF_P) /* Not present */ + || (u32ErrCode & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) == (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) /* MMIO page. */ + && !CPUMIsGuestInLongModeEx(pCtx) + && !CPUMGetGuestCPL(pVCpu) + && pVM->hm.s.cPatches < RT_ELEMENTS(pVM->hm.s.aPatches)) + { + RTGCPHYS GCPhysApicBase = pCtx->msrApicBase; + GCPhysApicBase &= PAGE_BASE_GC_MASK; + + if (GCPhysFaultAddr == GCPhysApicBase + 0x80) + { + /* Only attempt to patch the instruction once. */ + PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip); + if (!pPatch) + return VINF_EM_HM_PATCH_TPR_INSTR; + } + } +#endif + + /* + * Determine the nested paging mode. + */ + PGMMODE enmNestedPagingMode; +#if HC_ARCH_BITS == 32 + if (CPUMIsGuestInLongModeEx(pCtx)) + enmNestedPagingMode = PGMMODE_AMD64_NX; + else +#endif + enmNestedPagingMode = PGMGetHostMode(pVM); + + /* + * MMIO optimization using the reserved (RSVD) bit in the guest page tables for MMIO pages. + */ + int rc; + Assert((u32ErrCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != X86_TRAP_PF_RSVD); + if ((u32ErrCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) == (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) + { + VBOXSTRICTRC rc2 = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, enmNestedPagingMode, CPUMCTX2CORE(pCtx), GCPhysFaultAddr, + u32ErrCode); + rc = VBOXSTRICTRC_VAL(rc2); + + /* + * If we succeed, resume guest execution. + * If we fail in interpreting the instruction because we couldn't get the guest physical address + * of the page containing the instruction via the guest's page tables (we would invalidate the guest page + * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this + * weird case. See @bugref{6043}. + */ + if ( rc == VINF_SUCCESS + || rc == VERR_PAGE_TABLE_NOT_PRESENT + || rc == VERR_PAGE_NOT_PRESENT) + { + /* Successfully handled MMIO operation. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_SVM_GUEST_APIC_STATE); + rc = VINF_SUCCESS; + } + return rc; + } + + TRPMAssertXcptPF(pVCpu, GCPhysFaultAddr, u32ErrCode); + rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, enmNestedPagingMode, u32ErrCode, CPUMCTX2CORE(pCtx), GCPhysFaultAddr); + TRPMResetTrap(pVCpu); + + Log4(("#NPF: PGMR0Trap0eHandlerNestedPaging returned %Rrc CS:RIP=%04x:%#RX64\n", rc, pCtx->cs.Sel, pCtx->rip)); + + /* + * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}. + */ + if ( rc == VINF_SUCCESS + || rc == VERR_PAGE_TABLE_NOT_PRESENT + || rc == VERR_PAGE_NOT_PRESENT) + { + /* We've successfully synced our shadow page tables. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF); + rc = VINF_SUCCESS; + } + + return rc; +} + + +/** + * #VMEXIT handler for virtual interrupt (SVM_EXIT_VINTR). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitVIntr(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + pVmcb->ctrl.IntCtrl.n.u1VIrqValid = 0; /* No virtual interrupts pending, we'll inject the current one before reentry. */ + pVmcb->ctrl.IntCtrl.n.u8VIrqVector = 0; + + /* Indicate that we no longer need to VM-exit when the guest is ready to receive interrupts, it is now ready. */ + pVmcb->ctrl.u32InterceptCtrl1 &= ~SVM_CTRL1_INTERCEPT_VINTR; + pVmcb->ctrl.u64VmcbCleanBits &= ~(HMSVM_VMCB_CLEAN_INTERCEPTS | HMSVM_VMCB_CLEAN_TPR); + + /* Deliver the pending interrupt via hmR0SvmPreRunGuest()->hmR0SvmInjectEventVmcb() and resume guest execution. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow); + return VINF_SUCCESS; +} + + +/** + * #VMEXIT handler for task switches (SVM_EXIT_TASK_SWITCH). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitTaskSwitch(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + +#ifndef HMSVM_ALWAYS_TRAP_TASK_SWITCH + Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging); +#endif + + /* Check if this task-switch occurred while delivery an event through the guest IDT. */ + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + if ( !(pVmcb->ctrl.u64ExitInfo2 & (SVM_EXIT2_TASK_SWITCH_IRET | SVM_EXIT2_TASK_SWITCH_JMP)) + && pVCpu->hm.s.Event.fPending) + { + /* + * AMD-V does not provide us with the original exception but we have it in u64IntInfo since we + * injected the event during VM-entry. Software interrupts and exceptions will be regenerated + * when the recompiler restarts the instruction. + */ + SVMEVENT Event; + Event.u = pVCpu->hm.s.Event.u64IntInfo; + if ( Event.n.u3Type == SVM_EVENT_EXCEPTION + || Event.n.u3Type == SVM_EVENT_SOFTWARE_INT) + { + pVCpu->hm.s.Event.fPending = false; + } + else + Log4(("hmR0SvmExitTaskSwitch: TS occurred during event delivery. Kept pending u8Vector=%#x\n", Event.n.u8Vector)); + } + + /** @todo Emulate task switch someday, currently just going back to ring-3 for + * emulation. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch); + return VERR_EM_INTERPRETER; +} + + +/** + * #VMEXIT handler for VMMCALL (SVM_EXIT_VMMCALL). Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitVmmCall(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + + int rc = hmR0SvmEmulateMovTpr(pVCpu->CTX_SUFF(pVM), pVCpu, pCtx); + if (RT_LIKELY(rc == VINF_SUCCESS)) + HMSVM_CHECK_SINGLE_STEP(pVCpu, rc); + else + hmR0SvmSetPendingXcptUD(pVCpu); + return VINF_SUCCESS; +} + + +/** + * #VMEXIT handler for page-fault exceptions (SVM_EXIT_EXCEPTION_E). Conditional + * #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitXcptPF(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + + HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(); + + /* See AMD spec. 15.12.15 "#PF (Page Fault)". */ + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + uint32_t u32ErrCode = pVmcb->ctrl.u64ExitInfo1; + RTGCUINTPTR uFaultAddress = pVmcb->ctrl.u64ExitInfo2; + PVM pVM = pVCpu->CTX_SUFF(pVM); + +#if defined(HMSVM_ALWAYS_TRAP_ALL_XCPTS) || defined(HMSVM_ALWAYS_TRAP_PF) + if (pVM->hm.s.fNestedPaging) + { + pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory or vectoring #PF. */ + if (!pSvmTransient->fVectoringPF) + { + /* A genuine guest #PF, reflect it to the guest. */ + hmR0SvmSetPendingXcptPF(pVCpu, pCtx, u32ErrCode, uFaultAddress); + Log4(("#PF: Guest page fault at %04X:%RGv FaultAddr=%RGv ErrCode=%#x\n", pCtx->cs.Sel, (RTGCPTR)pCtx->rip, + uFaultAddress, u32ErrCode)); + } + else + { + /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */ + hmR0SvmSetPendingXcptDF(pVCpu); + Log4(("Pending #DF due to vectoring #PF. NP\n")); + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF); + return VINF_SUCCESS; + } +#endif + + Assert(!pVM->hm.s.fNestedPaging); + +#ifdef VBOX_HM_WITH_GUEST_PATCHING + /* Shortcut for APIC TPR reads and writes; only applicable to 32-bit guests. */ + if ( pVM->hm.s.fTRPPatchingAllowed + && (uFaultAddress & 0xfff) == 0x80 /* TPR offset. */ + && !(u32ErrCode & X86_TRAP_PF_P) /* Not present. */ + && !CPUMIsGuestInLongModeEx(pCtx) + && !CPUMGetGuestCPL(pVCpu) + && pVM->hm.s.cPatches < RT_ELEMENTS(pVM->hm.s.aPatches)) + { + RTGCPHYS GCPhysApicBase; + GCPhysApicBase = pCtx->msrApicBase; + GCPhysApicBase &= PAGE_BASE_GC_MASK; + + /* Check if the page at the fault-address is the APIC base. */ + RTGCPHYS GCPhysPage; + int rc2 = PGMGstGetPage(pVCpu, (RTGCPTR)uFaultAddress, NULL /* pfFlags */, &GCPhysPage); + if ( rc2 == VINF_SUCCESS + && GCPhysPage == GCPhysApicBase) + { + /* Only attempt to patch the instruction once. */ + PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip); + if (!pPatch) + return VINF_EM_HM_PATCH_TPR_INSTR; + } + } +#endif + + Log4(("#PF: uFaultAddress=%#RX64 CS:RIP=%#04x:%#RX64 u32ErrCode %#RX32 cr3=%#RX64\n", uFaultAddress, pCtx->cs.Sel, + pCtx->rip, u32ErrCode, pCtx->cr3)); + + TRPMAssertXcptPF(pVCpu, uFaultAddress, u32ErrCode); + int rc = PGMTrap0eHandler(pVCpu, u32ErrCode, CPUMCTX2CORE(pCtx), (RTGCPTR)uFaultAddress); + + Log4(("#PF rc=%Rrc\n", rc)); + + if (rc == VINF_SUCCESS) + { + /* Successfully synced shadow pages tables or emulated an MMIO instruction. */ + TRPMResetTrap(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF); + HMCPU_CF_SET(pVCpu, HM_CHANGED_SVM_GUEST_APIC_STATE); + return rc; + } + else if (rc == VINF_EM_RAW_GUEST_TRAP) + { + pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory or vectoring #PF. */ + + if (!pSvmTransient->fVectoringPF) + { + /* It's a guest page fault and needs to be reflected to the guest. */ + u32ErrCode = TRPMGetErrorCode(pVCpu); /* The error code might have been changed. */ + TRPMResetTrap(pVCpu); + hmR0SvmSetPendingXcptPF(pVCpu, pCtx, u32ErrCode, uFaultAddress); + } + else + { + /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */ + TRPMResetTrap(pVCpu); + hmR0SvmSetPendingXcptDF(pVCpu); + Log4(("#PF: Pending #DF due to vectoring #PF\n")); + } + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF); + return VINF_SUCCESS; + } + + TRPMResetTrap(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM); + return rc; +} + + +/** + * #VMEXIT handler for device-not-available exceptions (SVM_EXIT_EXCEPTION_7). + * Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitXcptNM(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + + HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(); + + /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */ + VMMRZCallRing3Disable(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + int rc; + /* If the guest FPU was active at the time of the #NM exit, then it's a guest fault. */ + if (pSvmTransient->fWasGuestFPUStateActive) + { + rc = VINF_EM_RAW_GUEST_TRAP; + Assert(CPUMIsGuestFPUStateActive(pVCpu) || HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0)); + } + else + { +#ifndef HMSVM_ALWAYS_TRAP_ALL_XCPTS + Assert(!pSvmTransient->fWasGuestFPUStateActive); +#endif + rc = CPUMR0Trap07Handler(pVCpu->CTX_SUFF(pVM), pVCpu, pCtx); + Assert(rc == VINF_EM_RAW_GUEST_TRAP || (rc == VINF_SUCCESS && CPUMIsGuestFPUStateActive(pVCpu))); + } + + HM_RESTORE_PREEMPT_IF_NEEDED(); + VMMRZCallRing3Enable(pVCpu); + + if (rc == VINF_SUCCESS) + { + /* Guest FPU state was activated, we'll want to change CR0 FPU intercepts before the next VM-reentry. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowNM); + } + else + { + /* Forward #NM to the guest. */ + Assert(rc == VINF_EM_RAW_GUEST_TRAP); + hmR0SvmSetPendingXcptNM(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNM); + } + return VINF_SUCCESS; +} + + +/** + * #VMEXIT handler for math-fault exceptions (SVM_EXIT_EXCEPTION_10). + * Conditional #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitXcptMF(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + + HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(); + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF); + + if (!(pCtx->cr0 & X86_CR0_NE)) + { + /* Old-style FPU error reporting needs some extra work. */ + /** @todo don't fall back to the recompiler, but do it manually. */ + return VERR_EM_INTERPRETER; + } + + hmR0SvmSetPendingXcptMF(pVCpu); + return VINF_SUCCESS; +} + + +/** + * #VMEXIT handler for debug exceptions (SVM_EXIT_EXCEPTION_1). Conditional + * #VMEXIT. + */ +HMSVM_EXIT_DECL hmR0SvmExitXcptDB(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient) +{ + HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(); + + HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(); + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB); + + /* If we set the trap flag above, we have to clear it. */ + if (pVCpu->hm.s.fClearTrapFlag) + { + pVCpu->hm.s.fClearTrapFlag = false; + pCtx->eflags.Bits.u1TF = 0; + } + + /* This can be a fault-type #DB (instruction breakpoint) or a trap-type #DB (data breakpoint). However, for both cases + DR6 and DR7 are updated to what the exception handler expects. See AMD spec. 15.12.2 "#DB (Debug)". */ + PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; + PVM pVM = pVCpu->CTX_SUFF(pVM); + int rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), pVmcb->guest.u64DR6, pVCpu->hm.s.fSingleInstruction); + if (rc == VINF_EM_RAW_GUEST_TRAP) + { + Log5(("hmR0SvmExitXcptDB: DR6=%#RX64 -> guest trap\n", pVmcb->guest.u64DR6)); + if (CPUMIsHyperDebugStateActive(pVCpu)) + CPUMSetGuestDR6(pVCpu, CPUMGetGuestDR6(pVCpu) | pVmcb->guest.u64DR6); + + /* Reflect the exception back to the guest. */ + hmR0SvmSetPendingXcptDB(pVCpu); + rc = VINF_SUCCESS; + } + + /* + * Update DR6. + */ + if (CPUMIsHyperDebugStateActive(pVCpu)) + { + Log5(("hmR0SvmExitXcptDB: DR6=%#RX64 -> %Rrc\n", pVmcb->guest.u64DR6, rc)); + pVmcb->guest.u64DR6 = X86_DR6_INIT_VAL; + pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX; + } + else + { + AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc)); + Assert(!pVCpu->hm.s.fSingleInstruction && !DBGFIsStepping(pVCpu)); + } + + return rc; +} + +/** @} */ + diff --git a/src/VBox/VMM/VMMR0/HMSVMR0.h b/src/VBox/VMM/VMMR0/HMSVMR0.h new file mode 100644 index 00000000..aed968fd --- /dev/null +++ b/src/VBox/VMM/VMMR0/HMSVMR0.h @@ -0,0 +1,99 @@ +/* $Id: HMSVMR0.h $ */ +/** @file + * HM SVM (AMD-V) - Internal header file. + */ + +/* + * Copyright (C) 2006-2013 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef ___HMSVMR0_h +#define ___HMSVMR0_h + +#include <VBox/cdefs.h> +#include <VBox/types.h> +#include <VBox/vmm/em.h> +#include <VBox/vmm/stam.h> +#include <VBox/dis.h> +#include <VBox/vmm/hm.h> +#include <VBox/vmm/pgm.h> +#include <VBox/vmm/hm_svm.h> + +RT_C_DECLS_BEGIN + +/** @defgroup grp_svm_int Internal + * @ingroup grp_svm + * @internal + * @{ + */ + +#ifdef IN_RING0 + +VMMR0DECL(int) SVMR0GlobalInit(void); +VMMR0DECL(void) SVMR0GlobalTerm(void); +VMMR0DECL(int) SVMR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu); +VMMR0DECL(void) SVMR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit); +VMMR0DECL(int) SVMR0EnableCpu(PHMGLOBALCPUINFO pCpu, PVM pVM, void *pvPageCpu, RTHCPHYS HCPhysCpuPage, bool fEnabledBySystem, + void *pvArg); +VMMR0DECL(int) SVMR0DisableCpu(PHMGLOBALCPUINFO pCpu, void *pvPageCpu, RTHCPHYS pPageCpuPhys); +VMMR0DECL(int) SVMR0InitVM(PVM pVM); +VMMR0DECL(int) SVMR0TermVM(PVM pVM); +VMMR0DECL(int) SVMR0SetupVM(PVM pVM); +VMMR0DECL(int) SVMR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx); +VMMR0DECL(int) SVMR0SaveHostState(PVM pVM, PVMCPU pVCpu); + +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) +DECLASM(int) SVMR0VMSwitcherRun64(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu); +VMMR0DECL(int) SVMR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, HM64ON32OP enmOp, uint32_t cbParam, + uint32_t *paParam); +#endif /* HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */ + +/** + * Prepares for and executes VMRUN (32-bit guests). + * + * @returns VBox status code. + * @param pVMCBHostPhys Physical address of host VMCB. + * @param pVMCBPhys Physical address of the VMCB. + * @param pCtx Pointer to the guest CPU context. + * @param pVM Pointer to the VM. (not used) + * @param pVCpu Pointer to the VMCPU. (not used) + */ +DECLASM(int) SVMR0VMRun(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu); + + +/** + * Prepares for and executes VMRUN (64-bit guests). + * + * @returns VBox status code. + * @param pVMCBHostPhys Physical address of host VMCB. + * @param pVMCBPhys Physical address of the VMCB. + * @param pCtx Pointer to the guest CPU context. + * @param pVM Pointer to the VM. (not used) + * @param pVCpu Pointer to the VMCPU. (not used) + */ +DECLASM(int) SVMR0VMRun64(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu); + +/** + * Executes INVLPGA. + * + * @param pPageGC Virtual page to invalidate. + * @param u32ASID Tagged TLB id. + */ +DECLASM(void) SVMR0InvlpgA(RTGCPTR pPageGC, uint32_t u32ASID); + +#endif /* IN_RING0 */ + +/** @} */ + +RT_C_DECLS_END + +#endif /* ___HMSVMR0_h */ + diff --git a/src/VBox/VMM/VMMR0/HMVMXR0.cpp b/src/VBox/VMM/VMMR0/HMVMXR0.cpp new file mode 100644 index 00000000..afd83258 --- /dev/null +++ b/src/VBox/VMM/VMMR0/HMVMXR0.cpp @@ -0,0 +1,10993 @@ +/* $Id: HMVMXR0.cpp $ */ +/** @file + * HM VMX (Intel VT-x) - Host Context Ring-0. + */ + +/* + * Copyright (C) 2012-2013 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/******************************************************************************* +* Header Files * +*******************************************************************************/ +#define LOG_GROUP LOG_GROUP_HM +#include <iprt/asm-amd64-x86.h> +#include <iprt/thread.h> +#include <iprt/string.h> + +#include "HMInternal.h" +#include <VBox/vmm/vm.h> +#include "HMVMXR0.h" +#include <VBox/vmm/pdmapi.h> +#include <VBox/vmm/dbgf.h> +#include <VBox/vmm/iem.h> +#include <VBox/vmm/iom.h> +#include <VBox/vmm/selm.h> +#include <VBox/vmm/tm.h> +#ifdef VBOX_WITH_REM +# include <VBox/vmm/rem.h> +#endif +#ifdef DEBUG_ramshankar +#define HMVMX_SAVE_FULL_GUEST_STATE +#define HMVMX_SYNC_FULL_GUEST_STATE +#define HMVMX_ALWAYS_CHECK_GUEST_STATE +#define HMVMX_ALWAYS_TRAP_ALL_XCPTS +#define HMVMX_ALWAYS_TRAP_PF +#define HMVMX_ALWAYS_SWAP_FPU_STATE +#endif + + +/******************************************************************************* +* Defined Constants And Macros * +*******************************************************************************/ +#if defined(RT_ARCH_AMD64) +# define HMVMX_IS_64BIT_HOST_MODE() (true) +typedef RTHCUINTREG HMVMXHCUINTREG; +#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL) +extern "C" uint32_t g_fVMXIs64bitHost; +# define HMVMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0) +typedef uint64_t HMVMXHCUINTREG; +#else +# define HMVMX_IS_64BIT_HOST_MODE() (false) +typedef RTHCUINTREG HMVMXHCUINTREG; +#endif + +/** Use the function table. */ +#define HMVMX_USE_FUNCTION_TABLE + +/** Determine which tagged-TLB flush handler to use. */ +#define HMVMX_FLUSH_TAGGED_TLB_EPT_VPID 0 +#define HMVMX_FLUSH_TAGGED_TLB_EPT 1 +#define HMVMX_FLUSH_TAGGED_TLB_VPID 2 +#define HMVMX_FLUSH_TAGGED_TLB_NONE 3 + +/** @name Updated-guest-state flags. + * @{ */ +#define HMVMX_UPDATED_GUEST_RIP RT_BIT(0) +#define HMVMX_UPDATED_GUEST_RSP RT_BIT(1) +#define HMVMX_UPDATED_GUEST_RFLAGS RT_BIT(2) +#define HMVMX_UPDATED_GUEST_CR0 RT_BIT(3) +#define HMVMX_UPDATED_GUEST_CR3 RT_BIT(4) +#define HMVMX_UPDATED_GUEST_CR4 RT_BIT(5) +#define HMVMX_UPDATED_GUEST_GDTR RT_BIT(6) +#define HMVMX_UPDATED_GUEST_IDTR RT_BIT(7) +#define HMVMX_UPDATED_GUEST_LDTR RT_BIT(8) +#define HMVMX_UPDATED_GUEST_TR RT_BIT(9) +#define HMVMX_UPDATED_GUEST_SEGMENT_REGS RT_BIT(10) +#define HMVMX_UPDATED_GUEST_DEBUG RT_BIT(11) +#define HMVMX_UPDATED_GUEST_FS_BASE_MSR RT_BIT(12) +#define HMVMX_UPDATED_GUEST_GS_BASE_MSR RT_BIT(13) +#define HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR RT_BIT(14) +#define HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR RT_BIT(15) +#define HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR RT_BIT(16) +#define HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS RT_BIT(17) +#define HMVMX_UPDATED_GUEST_ACTIVITY_STATE RT_BIT(18) +#define HMVMX_UPDATED_GUEST_APIC_STATE RT_BIT(19) +#define HMVMX_UPDATED_GUEST_ALL ( HMVMX_UPDATED_GUEST_RIP \ + | HMVMX_UPDATED_GUEST_RSP \ + | HMVMX_UPDATED_GUEST_RFLAGS \ + | HMVMX_UPDATED_GUEST_CR0 \ + | HMVMX_UPDATED_GUEST_CR3 \ + | HMVMX_UPDATED_GUEST_CR4 \ + | HMVMX_UPDATED_GUEST_GDTR \ + | HMVMX_UPDATED_GUEST_IDTR \ + | HMVMX_UPDATED_GUEST_LDTR \ + | HMVMX_UPDATED_GUEST_TR \ + | HMVMX_UPDATED_GUEST_SEGMENT_REGS \ + | HMVMX_UPDATED_GUEST_DEBUG \ + | HMVMX_UPDATED_GUEST_FS_BASE_MSR \ + | HMVMX_UPDATED_GUEST_GS_BASE_MSR \ + | HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR \ + | HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR \ + | HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR \ + | HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS \ + | HMVMX_UPDATED_GUEST_ACTIVITY_STATE \ + | HMVMX_UPDATED_GUEST_APIC_STATE) +/** @} */ + +/** @name + * Flags to skip redundant reads of some common VMCS fields that are not part of + * the guest-CPU state but are in the transient structure. + */ +#define HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO RT_BIT(0) +#define HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE RT_BIT(1) +#define HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION RT_BIT(2) +#define HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN RT_BIT(3) +#define HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO RT_BIT(4) +#define HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE RT_BIT(5) +#define HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_INFO RT_BIT(6) +/** @} */ + +/** @name + * States of the VMCS. + * + * This does not reflect all possible VMCS states but currently only those + * needed for maintaining the VMCS consistently even when thread-context hooks + * are used. Maybe later this can be extended (i.e. Nested Virtualization). + */ +#define HMVMX_VMCS_STATE_CLEAR RT_BIT(0) +#define HMVMX_VMCS_STATE_ACTIVE RT_BIT(1) +#define HMVMX_VMCS_STATE_LAUNCHED RT_BIT(2) +/** @} */ + +/** + * Exception bitmap mask for real-mode guests (real-on-v86). + * + * We need to intercept all exceptions manually (except #PF). #NM is also + * handled separately, see hmR0VmxLoadSharedCR0(). #PF need not be intercepted + * even in real-mode if we have Nested Paging support. + */ +#define HMVMX_REAL_MODE_XCPT_MASK ( RT_BIT(X86_XCPT_DE) | RT_BIT(X86_XCPT_DB) | RT_BIT(X86_XCPT_NMI) \ + | RT_BIT(X86_XCPT_BP) | RT_BIT(X86_XCPT_OF) | RT_BIT(X86_XCPT_BR) \ + | RT_BIT(X86_XCPT_UD) /* RT_BIT(X86_XCPT_NM) */ | RT_BIT(X86_XCPT_DF) \ + | RT_BIT(X86_XCPT_CO_SEG_OVERRUN) | RT_BIT(X86_XCPT_TS) | RT_BIT(X86_XCPT_NP) \ + | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_GP) /* RT_BIT(X86_XCPT_PF) */ \ + | RT_BIT(X86_XCPT_MF) | RT_BIT(X86_XCPT_AC) | RT_BIT(X86_XCPT_MC) \ + | RT_BIT(X86_XCPT_XF)) + +/** + * Exception bitmap mask for all contributory exceptions. + * + * Page fault is deliberately excluded here as it's conditional as to whether + * it's contributory or benign. Page faults are handled separately. + */ +#define HMVMX_CONTRIBUTORY_XCPT_MASK ( RT_BIT(X86_XCPT_GP) | RT_BIT(X86_XCPT_NP) | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_TS) \ + | RT_BIT(X86_XCPT_DE)) + +/** Maximum VM-instruction error number. */ +#define HMVMX_INSTR_ERROR_MAX 28 + +/** Profiling macro. */ +#ifdef HM_PROFILE_EXIT_DISPATCH +# define HMVMX_START_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitDispatch, ed) +# define HMVMX_STOP_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitDispatch, ed) +#else +# define HMVMX_START_EXIT_DISPATCH_PROF() do { } while (0) +# define HMVMX_STOP_EXIT_DISPATCH_PROF() do { } while (0) +#endif + +/** Assert that preemption is disabled or covered by thread-context hooks. */ +#define HMVMX_ASSERT_PREEMPT_SAFE() Assert( VMMR0ThreadCtxHooksAreRegistered(pVCpu) \ + || !RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + +/** Assert that we haven't migrated CPUs when thread-context hooks are not + * used. */ +#define HMVMX_ASSERT_CPU_SAFE() AssertMsg( VMMR0ThreadCtxHooksAreRegistered(pVCpu) \ + || pVCpu->hm.s.idEnteredCpu == RTMpCpuId(), \ + ("Illegal migration! Entered on CPU %u Current %u\n", \ + pVCpu->hm.s.idEnteredCpu, RTMpCpuId())); \ + +/** Helper macro for VM-exit handlers called unexpectedly. */ +#define HMVMX_RETURN_UNEXPECTED_EXIT() \ + do { \ + pVCpu->hm.s.u32HMError = pVmxTransient->uExitReason; \ + return VERR_VMX_UNEXPECTED_EXIT; \ + } while (0) + + +/******************************************************************************* +* Structures and Typedefs * +*******************************************************************************/ +/** + * VMX transient state. + * + * A state structure for holding miscellaneous information across + * VMX non-root operation and restored after the transition. + */ +typedef struct VMXTRANSIENT +{ + /** The host's rflags/eflags. */ + RTCCUINTREG uEflags; +#if HC_ARCH_BITS == 32 + uint32_t u32Alignment0; +#endif + /** The guest's LSTAR MSR value used for TPR patching for 32-bit guests. */ + uint64_t u64LStarMsr; + /** The guest's TPR value used for TPR shadowing. */ + uint8_t u8GuestTpr; + /** Alignment. */ + uint8_t abAlignment0[7]; + + /** The basic VM-exit reason. */ + uint16_t uExitReason; + /** Alignment. */ + uint16_t u16Alignment0; + /** The VM-exit interruption error code. */ + uint32_t uExitIntErrorCode; + /** The VM-exit exit qualification. */ + uint64_t uExitQualification; + + /** The VM-exit interruption-information field. */ + uint32_t uExitIntInfo; + /** The VM-exit instruction-length field. */ + uint32_t cbInstr; + /** The VM-exit instruction-information field. */ + union + { + /** Plain unsigned int representation. */ + uint32_t u; + /** INS and OUTS information. */ + struct + { + uint32_t u6Reserved0 : 7; + /** The address size; 0=16-bit, 1=32-bit, 2=64-bit, rest undefined. */ + uint32_t u3AddrSize : 3; + uint32_t u5Reserved1 : 5; + /** The segment register (X86_SREG_XXX). */ + uint32_t iSegReg : 3; + uint32_t uReserved2 : 14; + } StrIo; + } ExitInstrInfo; + /** Whether the VM-entry failed or not. */ + bool fVMEntryFailed; + /** Alignment. */ + uint8_t abAlignment1[3]; + + /** The VM-entry interruption-information field. */ + uint32_t uEntryIntInfo; + /** The VM-entry exception error code field. */ + uint32_t uEntryXcptErrorCode; + /** The VM-entry instruction length field. */ + uint32_t cbEntryInstr; + + /** IDT-vectoring information field. */ + uint32_t uIdtVectoringInfo; + /** IDT-vectoring error code. */ + uint32_t uIdtVectoringErrorCode; + + /** Mask of currently read VMCS fields; HMVMX_UPDATED_TRANSIENT_*. */ + uint32_t fVmcsFieldsRead; + + /** Whether the guest FPU was active at the time of VM-exit. */ + bool fWasGuestFPUStateActive; + /** Whether the guest debug state was active at the time of VM-exit. */ + bool fWasGuestDebugStateActive; + /** Whether the hyper debug state was active at the time of VM-exit. */ + bool fWasHyperDebugStateActive; + /** Whether TSC-offsetting should be setup before VM-entry. */ + bool fUpdateTscOffsettingAndPreemptTimer; + /** Whether the VM-exit was caused by a page-fault during delivery of a + * contributory exception or a page-fault. */ + bool fVectoringPF; +} VMXTRANSIENT; +AssertCompileMemberAlignment(VMXTRANSIENT, uExitReason, sizeof(uint64_t)); +AssertCompileMemberAlignment(VMXTRANSIENT, uExitIntInfo, sizeof(uint64_t)); +AssertCompileMemberAlignment(VMXTRANSIENT, uEntryIntInfo, sizeof(uint64_t)); +AssertCompileMemberAlignment(VMXTRANSIENT, fWasGuestFPUStateActive, sizeof(uint64_t)); +AssertCompileMemberSize(VMXTRANSIENT, ExitInstrInfo, sizeof(uint32_t)); +/** Pointer to VMX transient state. */ +typedef VMXTRANSIENT *PVMXTRANSIENT; + + +/** + * MSR-bitmap read permissions. + */ +typedef enum VMXMSREXITREAD +{ + /** Reading this MSR causes a VM-exit. */ + VMXMSREXIT_INTERCEPT_READ = 0xb, + /** Reading this MSR does not cause a VM-exit. */ + VMXMSREXIT_PASSTHRU_READ +} VMXMSREXITREAD; + +/** + * MSR-bitmap write permissions. + */ +typedef enum VMXMSREXITWRITE +{ + /** Writing to this MSR causes a VM-exit. */ + VMXMSREXIT_INTERCEPT_WRITE = 0xd, + /** Writing to this MSR does not cause a VM-exit. */ + VMXMSREXIT_PASSTHRU_WRITE +} VMXMSREXITWRITE; + +/** + * VMX VM-exit handler. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required + * fields before using them. + * @param pVmxTransient Pointer to the VMX-transient structure. + */ +#ifndef HMVMX_USE_FUNCTION_TABLE +typedef int FNVMXEXITHANDLER(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +#else +typedef DECLCALLBACK(int) FNVMXEXITHANDLER(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +/** Pointer to VM-exit handler. */ +typedef FNVMXEXITHANDLER *PFNVMXEXITHANDLER; +#endif + + +/******************************************************************************* +* Internal Functions * +*******************************************************************************/ +static void hmR0VmxFlushEpt(PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush); +static void hmR0VmxFlushVpid(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr); +static void hmR0VmxClearEventVmcs(PVMCPU pVCpu, PCPUMCTX pMixedCtx); +static int hmR0VmxInjectEventVmcs(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint64_t u64IntInfo, uint32_t cbInstr, + uint32_t u32ErrCode, RTGCUINTREG GCPtrFaultAddress, uint32_t *puIntState); +#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) +static int hmR0VmxInitVmcsReadCache(PVM pVM, PVMCPU pVCpu); +#endif +#ifndef HMVMX_USE_FUNCTION_TABLE +DECLINLINE(int) hmR0VmxHandleExit(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, uint32_t rcReason); +# define HMVMX_EXIT_DECL static int +#else +# define HMVMX_EXIT_DECL static DECLCALLBACK(int) +#endif + +/** @name VM-exit handlers. + * @{ + */ +static FNVMXEXITHANDLER hmR0VmxExitXcptOrNmi; +static FNVMXEXITHANDLER hmR0VmxExitExtInt; +static FNVMXEXITHANDLER hmR0VmxExitTripleFault; +static FNVMXEXITHANDLER hmR0VmxExitInitSignal; +static FNVMXEXITHANDLER hmR0VmxExitSipi; +static FNVMXEXITHANDLER hmR0VmxExitIoSmi; +static FNVMXEXITHANDLER hmR0VmxExitSmi; +static FNVMXEXITHANDLER hmR0VmxExitIntWindow; +static FNVMXEXITHANDLER hmR0VmxExitNmiWindow; +static FNVMXEXITHANDLER hmR0VmxExitTaskSwitch; +static FNVMXEXITHANDLER hmR0VmxExitCpuid; +static FNVMXEXITHANDLER hmR0VmxExitGetsec; +static FNVMXEXITHANDLER hmR0VmxExitHlt; +static FNVMXEXITHANDLER hmR0VmxExitInvd; +static FNVMXEXITHANDLER hmR0VmxExitInvlpg; +static FNVMXEXITHANDLER hmR0VmxExitRdpmc; +static FNVMXEXITHANDLER hmR0VmxExitRdtsc; +static FNVMXEXITHANDLER hmR0VmxExitRsm; +static FNVMXEXITHANDLER hmR0VmxExitSetPendingXcptUD; +static FNVMXEXITHANDLER hmR0VmxExitMovCRx; +static FNVMXEXITHANDLER hmR0VmxExitMovDRx; +static FNVMXEXITHANDLER hmR0VmxExitIoInstr; +static FNVMXEXITHANDLER hmR0VmxExitRdmsr; +static FNVMXEXITHANDLER hmR0VmxExitWrmsr; +static FNVMXEXITHANDLER hmR0VmxExitErrInvalidGuestState; +static FNVMXEXITHANDLER hmR0VmxExitErrMsrLoad; +static FNVMXEXITHANDLER hmR0VmxExitErrUndefined; +static FNVMXEXITHANDLER hmR0VmxExitMwait; +static FNVMXEXITHANDLER hmR0VmxExitMtf; +static FNVMXEXITHANDLER hmR0VmxExitMonitor; +static FNVMXEXITHANDLER hmR0VmxExitPause; +static FNVMXEXITHANDLER hmR0VmxExitErrMachineCheck; +static FNVMXEXITHANDLER hmR0VmxExitTprBelowThreshold; +static FNVMXEXITHANDLER hmR0VmxExitApicAccess; +static FNVMXEXITHANDLER hmR0VmxExitXdtrAccess; +static FNVMXEXITHANDLER hmR0VmxExitXdtrAccess; +static FNVMXEXITHANDLER hmR0VmxExitEptViolation; +static FNVMXEXITHANDLER hmR0VmxExitEptMisconfig; +static FNVMXEXITHANDLER hmR0VmxExitRdtscp; +static FNVMXEXITHANDLER hmR0VmxExitPreemptTimer; +static FNVMXEXITHANDLER hmR0VmxExitWbinvd; +static FNVMXEXITHANDLER hmR0VmxExitXsetbv; +static FNVMXEXITHANDLER hmR0VmxExitRdrand; +static FNVMXEXITHANDLER hmR0VmxExitInvpcid; +/** @} */ + +static int hmR0VmxExitXcptNM(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +static int hmR0VmxExitXcptPF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +static int hmR0VmxExitXcptMF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +static int hmR0VmxExitXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +static int hmR0VmxExitXcptBP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +static int hmR0VmxExitXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +static int hmR0VmxExitXcptGeneric(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +static uint32_t hmR0VmxCheckGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx); + +/******************************************************************************* +* Global Variables * +*******************************************************************************/ +#ifdef HMVMX_USE_FUNCTION_TABLE + +/** + * VMX_EXIT dispatch table. + */ +static const PFNVMXEXITHANDLER g_apfnVMExitHandlers[VMX_EXIT_MAX + 1] = +{ + /* 00 VMX_EXIT_XCPT_OR_NMI */ hmR0VmxExitXcptOrNmi, + /* 01 VMX_EXIT_EXT_INT */ hmR0VmxExitExtInt, + /* 02 VMX_EXIT_TRIPLE_FAULT */ hmR0VmxExitTripleFault, + /* 03 VMX_EXIT_INIT_SIGNAL */ hmR0VmxExitInitSignal, + /* 04 VMX_EXIT_SIPI */ hmR0VmxExitSipi, + /* 05 VMX_EXIT_IO_SMI */ hmR0VmxExitIoSmi, + /* 06 VMX_EXIT_SMI */ hmR0VmxExitSmi, + /* 07 VMX_EXIT_INT_WINDOW */ hmR0VmxExitIntWindow, + /* 08 VMX_EXIT_NMI_WINDOW */ hmR0VmxExitNmiWindow, + /* 09 VMX_EXIT_TASK_SWITCH */ hmR0VmxExitTaskSwitch, + /* 10 VMX_EXIT_CPUID */ hmR0VmxExitCpuid, + /* 11 VMX_EXIT_GETSEC */ hmR0VmxExitGetsec, + /* 12 VMX_EXIT_HLT */ hmR0VmxExitHlt, + /* 13 VMX_EXIT_INVD */ hmR0VmxExitInvd, + /* 14 VMX_EXIT_INVLPG */ hmR0VmxExitInvlpg, + /* 15 VMX_EXIT_RDPMC */ hmR0VmxExitRdpmc, + /* 16 VMX_EXIT_RDTSC */ hmR0VmxExitRdtsc, + /* 17 VMX_EXIT_RSM */ hmR0VmxExitRsm, + /* 18 VMX_EXIT_VMCALL */ hmR0VmxExitSetPendingXcptUD, + /* 19 VMX_EXIT_VMCLEAR */ hmR0VmxExitSetPendingXcptUD, + /* 20 VMX_EXIT_VMLAUNCH */ hmR0VmxExitSetPendingXcptUD, + /* 21 VMX_EXIT_VMPTRLD */ hmR0VmxExitSetPendingXcptUD, + /* 22 VMX_EXIT_VMPTRST */ hmR0VmxExitSetPendingXcptUD, + /* 23 VMX_EXIT_VMREAD */ hmR0VmxExitSetPendingXcptUD, + /* 24 VMX_EXIT_VMRESUME */ hmR0VmxExitSetPendingXcptUD, + /* 25 VMX_EXIT_VMWRITE */ hmR0VmxExitSetPendingXcptUD, + /* 26 VMX_EXIT_VMXOFF */ hmR0VmxExitSetPendingXcptUD, + /* 27 VMX_EXIT_VMXON */ hmR0VmxExitSetPendingXcptUD, + /* 28 VMX_EXIT_MOV_CRX */ hmR0VmxExitMovCRx, + /* 29 VMX_EXIT_MOV_DRX */ hmR0VmxExitMovDRx, + /* 30 VMX_EXIT_IO_INSTR */ hmR0VmxExitIoInstr, + /* 31 VMX_EXIT_RDMSR */ hmR0VmxExitRdmsr, + /* 32 VMX_EXIT_WRMSR */ hmR0VmxExitWrmsr, + /* 33 VMX_EXIT_ERR_INVALID_GUEST_STATE */ hmR0VmxExitErrInvalidGuestState, + /* 34 VMX_EXIT_ERR_MSR_LOAD */ hmR0VmxExitErrMsrLoad, + /* 35 UNDEFINED */ hmR0VmxExitErrUndefined, + /* 36 VMX_EXIT_MWAIT */ hmR0VmxExitMwait, + /* 37 VMX_EXIT_MTF */ hmR0VmxExitMtf, + /* 38 UNDEFINED */ hmR0VmxExitErrUndefined, + /* 39 VMX_EXIT_MONITOR */ hmR0VmxExitMonitor, + /* 40 UNDEFINED */ hmR0VmxExitPause, + /* 41 VMX_EXIT_PAUSE */ hmR0VmxExitErrMachineCheck, + /* 42 VMX_EXIT_ERR_MACHINE_CHECK */ hmR0VmxExitErrUndefined, + /* 43 VMX_EXIT_TPR_BELOW_THRESHOLD */ hmR0VmxExitTprBelowThreshold, + /* 44 VMX_EXIT_APIC_ACCESS */ hmR0VmxExitApicAccess, + /* 45 UNDEFINED */ hmR0VmxExitErrUndefined, + /* 46 VMX_EXIT_XDTR_ACCESS */ hmR0VmxExitXdtrAccess, + /* 47 VMX_EXIT_TR_ACCESS */ hmR0VmxExitXdtrAccess, + /* 48 VMX_EXIT_EPT_VIOLATION */ hmR0VmxExitEptViolation, + /* 49 VMX_EXIT_EPT_MISCONFIG */ hmR0VmxExitEptMisconfig, + /* 50 VMX_EXIT_INVEPT */ hmR0VmxExitSetPendingXcptUD, + /* 51 VMX_EXIT_RDTSCP */ hmR0VmxExitRdtscp, + /* 52 VMX_EXIT_PREEMPT_TIMER */ hmR0VmxExitPreemptTimer, + /* 53 VMX_EXIT_INVVPID */ hmR0VmxExitSetPendingXcptUD, + /* 54 VMX_EXIT_WBINVD */ hmR0VmxExitWbinvd, + /* 55 VMX_EXIT_XSETBV */ hmR0VmxExitXsetbv, + /* 56 UNDEFINED */ hmR0VmxExitErrUndefined, + /* 57 VMX_EXIT_RDRAND */ hmR0VmxExitRdrand, + /* 58 VMX_EXIT_INVPCID */ hmR0VmxExitInvpcid, + /* 59 VMX_EXIT_VMFUNC */ hmR0VmxExitSetPendingXcptUD +}; +#endif /* HMVMX_USE_FUNCTION_TABLE */ + +#ifdef VBOX_STRICT +static const char * const g_apszVmxInstrErrors[HMVMX_INSTR_ERROR_MAX + 1] = +{ + /* 0 */ "(Not Used)", + /* 1 */ "VMCALL executed in VMX root operation.", + /* 2 */ "VMCLEAR with invalid physical address.", + /* 3 */ "VMCLEAR with VMXON pointer.", + /* 4 */ "VMLAUNCH with non-clear VMCS.", + /* 5 */ "VMRESUME with non-launched VMCS.", + /* 6 */ "VMRESUME after VMXOFF", + /* 7 */ "VM entry with invalid control fields.", + /* 8 */ "VM entry with invalid host state fields.", + /* 9 */ "VMPTRLD with invalid physical address.", + /* 10 */ "VMPTRLD with VMXON pointer.", + /* 11 */ "VMPTRLD with incorrect revision identifier.", + /* 12 */ "VMREAD/VMWRITE from/to unsupported VMCS component.", + /* 13 */ "VMWRITE to read-only VMCS component.", + /* 14 */ "(Not Used)", + /* 15 */ "VMXON executed in VMX root operation.", + /* 16 */ "VM entry with invalid executive-VMCS pointer.", + /* 17 */ "VM entry with non-launched executing VMCS.", + /* 18 */ "VM entry with executive-VMCS pointer not VMXON pointer.", + /* 19 */ "VMCALL with non-clear VMCS.", + /* 20 */ "VMCALL with invalid VM-exit control fields.", + /* 21 */ "(Not Used)", + /* 22 */ "VMCALL with incorrect MSEG revision identifier.", + /* 23 */ "VMXOFF under dual monitor treatment of SMIs and SMM.", + /* 24 */ "VMCALL with invalid SMM-monitor features.", + /* 25 */ "VM entry with invalid VM-execution control fields in executive VMCS.", + /* 26 */ "VM entry with events blocked by MOV SS.", + /* 27 */ "(Not Used)", + /* 28 */ "Invalid operand to INVEPT/INVVPID." +}; +#endif /* VBOX_STRICT */ + + + +/** + * Updates the VM's last error record. If there was a VMX instruction error, + * reads the error data from the VMCS and updates VCPU's last error record as + * well. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU (can be NULL if @a rc is not + * VERR_VMX_UNABLE_TO_START_VM or + * VERR_VMX_INVALID_VMCS_FIELD). + * @param rc The error code. + */ +static void hmR0VmxUpdateErrorRecord(PVM pVM, PVMCPU pVCpu, int rc) +{ + AssertPtr(pVM); + if ( rc == VERR_VMX_INVALID_VMCS_FIELD + || rc == VERR_VMX_UNABLE_TO_START_VM) + { + AssertPtrReturnVoid(pVCpu); + VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError); + } + pVM->hm.s.lLastError = rc; +} + + +/** + * Reads the VM-entry interruption-information field from the VMCS into the VMX + * transient structure. + * + * @returns VBox status code. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxReadEntryIntInfoVmcs(PVMXTRANSIENT pVmxTransient) +{ + int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &pVmxTransient->uEntryIntInfo); + AssertRCReturn(rc, rc); + return VINF_SUCCESS; +} + + +/** + * Reads the VM-entry exception error code field from the VMCS into + * the VMX transient structure. + * + * @returns VBox status code. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxReadEntryXcptErrorCodeVmcs(PVMXTRANSIENT pVmxTransient) +{ + int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &pVmxTransient->uEntryXcptErrorCode); + AssertRCReturn(rc, rc); + return VINF_SUCCESS; +} + + +/** + * Reads the VM-entry exception error code field from the VMCS into + * the VMX transient structure. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxReadEntryInstrLenVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) +{ + int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &pVmxTransient->cbEntryInstr); + AssertRCReturn(rc, rc); + return VINF_SUCCESS; +} + + +/** + * Reads the VM-exit interruption-information field from the VMCS into the VMX + * transient structure. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pVmxTransient Pointer to the VMX transient structure. + */ +DECLINLINE(int) hmR0VmxReadExitIntInfoVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) +{ + if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO)) + { + int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &pVmxTransient->uExitIntInfo); + AssertRCReturn(rc, rc); + pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO; + } + return VINF_SUCCESS; +} + + +/** + * Reads the VM-exit interruption error code from the VMCS into the VMX + * transient structure. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pVmxTransient Pointer to the VMX transient structure. + */ +DECLINLINE(int) hmR0VmxReadExitIntErrorCodeVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) +{ + if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE)) + { + int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE, &pVmxTransient->uExitIntErrorCode); + AssertRCReturn(rc, rc); + pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE; + } + return VINF_SUCCESS; +} + + +/** + * Reads the VM-exit instruction length field from the VMCS into the VMX + * transient structure. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pVmxTransient Pointer to the VMX transient structure. + */ +DECLINLINE(int) hmR0VmxReadExitInstrLenVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) +{ + if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN)) + { + int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &pVmxTransient->cbInstr); + AssertRCReturn(rc, rc); + pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN; + } + return VINF_SUCCESS; +} + + +/** + * Reads the VM-exit instruction-information field from the VMCS into + * the VMX transient structure. + * + * @returns VBox status code. + * @param pVCpu The cross context per CPU structure. + * @param pVmxTransient Pointer to the VMX transient structure. + */ +DECLINLINE(int) hmR0VmxReadExitInstrInfoVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) +{ + if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_INFO)) + { + int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_INFO, &pVmxTransient->ExitInstrInfo.u); + AssertRCReturn(rc, rc); + pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_INFO; + } + return VINF_SUCCESS; +} + + +/** + * Reads the exit qualification from the VMCS into the VMX transient structure. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pVmxTransient Pointer to the VMX transient structure. + */ +DECLINLINE(int) hmR0VmxReadExitQualificationVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) +{ + if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION)) + { + int rc = VMXReadVmcsGstN(VMX_VMCS_RO_EXIT_QUALIFICATION, &pVmxTransient->uExitQualification); + AssertRCReturn(rc, rc); + pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION; + } + return VINF_SUCCESS; +} + + +/** + * Reads the IDT-vectoring information field from the VMCS into the VMX + * transient structure. + * + * @returns VBox status code. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxReadIdtVectoringInfoVmcs(PVMXTRANSIENT pVmxTransient) +{ + if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO)) + { + int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_INFO, &pVmxTransient->uIdtVectoringInfo); + AssertRCReturn(rc, rc); + pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO; + } + return VINF_SUCCESS; +} + + +/** + * Reads the IDT-vectoring error code from the VMCS into the VMX + * transient structure. + * + * @returns VBox status code. + * @param pVmxTransient Pointer to the VMX transient structure. + */ +DECLINLINE(int) hmR0VmxReadIdtVectoringErrorCodeVmcs(PVMXTRANSIENT pVmxTransient) +{ + if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE)) + { + int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_ERROR_CODE, &pVmxTransient->uIdtVectoringErrorCode); + AssertRCReturn(rc, rc); + pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE; + } + return VINF_SUCCESS; +} + + +/** + * Enters VMX root mode operation on the current CPU. + * + * @returns VBox status code. + * @param pVM Pointer to the VM (optional, can be NULL, after + * a resume). + * @param HCPhysCpuPage Physical address of the VMXON region. + * @param pvCpuPage Pointer to the VMXON region. + */ +static int hmR0VmxEnterRootMode(PVM pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage) +{ + AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER); + AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + if (pVM) + { + /* Write the VMCS revision dword to the VMXON region. */ + *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.Msrs.u64BasicInfo); + } + + /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */ + RTCCUINTREG uEflags = ASMIntDisableFlags(); + + /* Enable the VMX bit in CR4 if necessary. */ + RTCCUINTREG uCr4 = ASMGetCR4(); + if (!(uCr4 & X86_CR4_VMXE)) + ASMSetCR4(uCr4 | X86_CR4_VMXE); + + /* Enter VMX root mode. */ + int rc = VMXEnable(HCPhysCpuPage); + if (RT_FAILURE(rc)) + ASMSetCR4(uCr4); + + /* Restore interrupts. */ + ASMSetFlags(uEflags); + return rc; +} + + +/** + * Exits VMX root mode operation on the current CPU. + * + * @returns VBox status code. + */ +static int hmR0VmxLeaveRootMode(void) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */ + RTCCUINTREG uEflags = ASMIntDisableFlags(); + + /* If we're for some reason not in VMX root mode, then don't leave it. */ + RTCCUINTREG uHostCR4 = ASMGetCR4(); + + int rc; + if (uHostCR4 & X86_CR4_VMXE) + { + /* Exit VMX root mode and clear the VMX bit in CR4. */ + VMXDisable(); + ASMSetCR4(uHostCR4 & ~X86_CR4_VMXE); + rc = VINF_SUCCESS; + } + else + rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE; + + /* Restore interrupts. */ + ASMSetFlags(uEflags); + return rc; +} + + +/** + * Allocates and maps one physically contiguous page. The allocated page is + * zero'd out. (Used by various VT-x structures). + * + * @returns IPRT status code. + * @param pMemObj Pointer to the ring-0 memory object. + * @param ppVirt Where to store the virtual address of the + * allocation. + * @param pPhys Where to store the physical address of the + * allocation. + */ +DECLINLINE(int) hmR0VmxPageAllocZ(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys) +{ + AssertPtrReturn(pMemObj, VERR_INVALID_PARAMETER); + AssertPtrReturn(ppVirt, VERR_INVALID_PARAMETER); + AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER); + + int rc = RTR0MemObjAllocCont(pMemObj, PAGE_SIZE, false /* fExecutable */); + if (RT_FAILURE(rc)) + return rc; + *ppVirt = RTR0MemObjAddress(*pMemObj); + *pHCPhys = RTR0MemObjGetPagePhysAddr(*pMemObj, 0 /* iPage */); + ASMMemZero32(*ppVirt, PAGE_SIZE); + return VINF_SUCCESS; +} + + +/** + * Frees and unmaps an allocated physical page. + * + * @param pMemObj Pointer to the ring-0 memory object. + * @param ppVirt Where to re-initialize the virtual address of + * allocation as 0. + * @param pHCPhys Where to re-initialize the physical address of the + * allocation as 0. + */ +DECLINLINE(void) hmR0VmxPageFree(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys) +{ + AssertPtr(pMemObj); + AssertPtr(ppVirt); + AssertPtr(pHCPhys); + if (*pMemObj != NIL_RTR0MEMOBJ) + { + int rc = RTR0MemObjFree(*pMemObj, true /* fFreeMappings */); + AssertRC(rc); + *pMemObj = NIL_RTR0MEMOBJ; + *ppVirt = 0; + *pHCPhys = 0; + } +} + + +/** + * Worker function to free VT-x related structures. + * + * @returns IPRT status code. + * @param pVM Pointer to the VM. + */ +static void hmR0VmxStructsFree(PVM pVM) +{ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + AssertPtr(pVCpu); + +#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr); + hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr); +#endif + + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS) + hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap, &pVCpu->hm.s.vmx.HCPhysMsrBitmap); + + hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjVirtApic, (PRTR0PTR)&pVCpu->hm.s.vmx.pbVirtApic, &pVCpu->hm.s.vmx.HCPhysVirtApic); + hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs); + } + + hmR0VmxPageFree(&pVM->hm.s.vmx.hMemObjApicAccess, (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess, &pVM->hm.s.vmx.HCPhysApicAccess); +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + hmR0VmxPageFree(&pVM->hm.s.vmx.hMemObjScratch, &pVM->hm.s.vmx.pbScratch, &pVM->hm.s.vmx.HCPhysScratch); +#endif +} + + +/** + * Worker function to allocate VT-x related VM structures. + * + * @returns IPRT status code. + * @param pVM Pointer to the VM. + */ +static int hmR0VmxStructsAlloc(PVM pVM) +{ + /* + * Initialize members up-front so we can cleanup properly on allocation failure. + */ +#define VMXLOCAL_INIT_VM_MEMOBJ(a_Name, a_VirtPrefix) \ + pVM->hm.s.vmx.hMemObj##a_Name = NIL_RTR0MEMOBJ; \ + pVM->hm.s.vmx.a_VirtPrefix##a_Name = 0; \ + pVM->hm.s.vmx.HCPhys##a_Name = 0; + +#define VMXLOCAL_INIT_VMCPU_MEMOBJ(a_Name, a_VirtPrefix) \ + pVCpu->hm.s.vmx.hMemObj##a_Name = NIL_RTR0MEMOBJ; \ + pVCpu->hm.s.vmx.a_VirtPrefix##a_Name = 0; \ + pVCpu->hm.s.vmx.HCPhys##a_Name = 0; + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + VMXLOCAL_INIT_VM_MEMOBJ(Scratch, pv); +#endif + VMXLOCAL_INIT_VM_MEMOBJ(ApicAccess, pb); + + AssertCompile(sizeof(VMCPUID) == sizeof(pVM->cCpus)); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + VMXLOCAL_INIT_VMCPU_MEMOBJ(Vmcs, pv); + VMXLOCAL_INIT_VMCPU_MEMOBJ(VirtApic, pb); + VMXLOCAL_INIT_VMCPU_MEMOBJ(MsrBitmap, pv); +#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + VMXLOCAL_INIT_VMCPU_MEMOBJ(GuestMsr, pv); + VMXLOCAL_INIT_VMCPU_MEMOBJ(HostMsr, pv); +#endif + } +#undef VMXLOCAL_INIT_VMCPU_MEMOBJ +#undef VMXLOCAL_INIT_VM_MEMOBJ + + /* The VMCS size cannot be more than 4096 bytes. See Intel spec. Appendix A.1 "Basic VMX Information". */ + AssertReturnStmt(MSR_IA32_VMX_BASIC_INFO_VMCS_SIZE(pVM->hm.s.vmx.Msrs.u64BasicInfo) <= PAGE_SIZE, + (&pVM->aCpus[0])->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE, + VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO); + + /* + * Allocate all the VT-x structures. + */ + int rc = VINF_SUCCESS; +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + rc = hmR0VmxPageAllocZ(&pVM->hm.s.vmx.hMemObjScratch, &pVM->hm.s.vmx.pbScratch, &pVM->hm.s.vmx.HCPhysScratch); + if (RT_FAILURE(rc)) + goto cleanup; + strcpy((char *)pVM->hm.s.vmx.pbScratch, "SCRATCH Magic"); + *(uint64_t *)(pVM->hm.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef); +#endif + + /* Allocate the APIC-access page for trapping APIC accesses from the guest. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) + { + rc = hmR0VmxPageAllocZ(&pVM->hm.s.vmx.hMemObjApicAccess, (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess, + &pVM->hm.s.vmx.HCPhysApicAccess); + if (RT_FAILURE(rc)) + goto cleanup; + } + + /* + * Initialize per-VCPU VT-x structures. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + AssertPtr(pVCpu); + + /* Allocate the VM control structure (VMCS). */ + rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs); + if (RT_FAILURE(rc)) + goto cleanup; + + /* Allocate the Virtual-APIC page for transparent TPR accesses. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW) + { + rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjVirtApic, (PRTR0PTR)&pVCpu->hm.s.vmx.pbVirtApic, + &pVCpu->hm.s.vmx.HCPhysVirtApic); + if (RT_FAILURE(rc)) + goto cleanup; + } + + /* Allocate the MSR-bitmap if supported by the CPU. The MSR-bitmap is for transparent accesses of specific MSRs. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS) + { + rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap, + &pVCpu->hm.s.vmx.HCPhysMsrBitmap); + if (RT_FAILURE(rc)) + goto cleanup; + memset(pVCpu->hm.s.vmx.pvMsrBitmap, 0xff, PAGE_SIZE); + } + +#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + /* Allocate the VM-entry MSR-load and VM-exit MSR-store page for the guest MSRs. */ + rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr); + if (RT_FAILURE(rc)) + goto cleanup; + + /* Allocate the VM-exit MSR-load page for the host MSRs. */ + rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr); + if (RT_FAILURE(rc)) + goto cleanup; +#endif + } + + return VINF_SUCCESS; + +cleanup: + hmR0VmxStructsFree(pVM); + return rc; +} + + +/** + * Does global VT-x initialization (called during module initialization). + * + * @returns VBox status code. + */ +VMMR0DECL(int) VMXR0GlobalInit(void) +{ +#ifdef HMVMX_USE_FUNCTION_TABLE + AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_apfnVMExitHandlers)); +# ifdef VBOX_STRICT + for (unsigned i = 0; i < RT_ELEMENTS(g_apfnVMExitHandlers); i++) + Assert(g_apfnVMExitHandlers[i]); +# endif +#endif + return VINF_SUCCESS; +} + + +/** + * Does global VT-x termination (called during module termination). + */ +VMMR0DECL(void) VMXR0GlobalTerm() +{ + /* Nothing to do currently. */ +} + + +/** + * Sets up and activates VT-x on the current CPU. + * + * @returns VBox status code. + * @param pCpu Pointer to the global CPU info struct. + * @param pVM Pointer to the VM (can be NULL after a host resume + * operation). + * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a + * fEnabledByHost is true). + * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if + * @a fEnabledByHost is true). + * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to + * enable VT-x on the host. + * @param pvMsrs Opaque pointer to VMXMSRS struct. + */ +VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBALCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost, + void *pvMsrs) +{ + AssertReturn(pCpu, VERR_INVALID_PARAMETER); + AssertReturn(pvMsrs, VERR_INVALID_PARAMETER); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + /* Enable VT-x if it's not already enabled by the host. */ + if (!fEnabledByHost) + { + int rc = hmR0VmxEnterRootMode(pVM, HCPhysCpuPage, pvCpuPage); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been using EPTPs) so + * we don't retain any stale guest-physical mappings which won't get invalidated when flushing by VPID. + */ + PVMXMSRS pMsrs = (PVMXMSRS)pvMsrs; + if (pMsrs->u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS) + { + hmR0VmxFlushEpt(NULL /* pVCpu */, VMX_FLUSH_EPT_ALL_CONTEXTS); + pCpu->fFlushAsidBeforeUse = false; + } + else + pCpu->fFlushAsidBeforeUse = true; + + /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */ + ++pCpu->cTlbFlushes; + + return VINF_SUCCESS; +} + + +/** + * Deactivates VT-x on the current CPU. + * + * @returns VBox status code. + * @param pCpu Pointer to the global CPU info struct. + * @param pvCpuPage Pointer to the VMXON region. + * @param HCPhysCpuPage Physical address of the VMXON region. + * + * @remarks This function should never be called when SUPR0EnableVTx() or + * similar was used to enable VT-x on the host. + */ +VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBALCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage) +{ + NOREF(pCpu); + NOREF(pvCpuPage); + NOREF(HCPhysCpuPage); + + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + return hmR0VmxLeaveRootMode(); +} + + +/** + * Sets the permission bits for the specified MSR in the MSR bitmap. + * + * @param pVCpu Pointer to the VMCPU. + * @param uMSR The MSR value. + * @param enmRead Whether reading this MSR causes a VM-exit. + * @param enmWrite Whether writing this MSR causes a VM-exit. + */ +static void hmR0VmxSetMsrPermission(PVMCPU pVCpu, uint32_t uMsr, VMXMSREXITREAD enmRead, VMXMSREXITWRITE enmWrite) +{ + int32_t iBit; + uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.vmx.pvMsrBitmap; + + /* + * Layout: + * 0x000 - 0x3ff - Low MSR read bits + * 0x400 - 0x7ff - High MSR read bits + * 0x800 - 0xbff - Low MSR write bits + * 0xc00 - 0xfff - High MSR write bits + */ + if (uMsr <= 0x00001FFF) + iBit = uMsr; + else if ( uMsr >= 0xC0000000 + && uMsr <= 0xC0001FFF) + { + iBit = (uMsr - 0xC0000000); + pbMsrBitmap += 0x400; + } + else + { + AssertMsgFailed(("hmR0VmxSetMsrPermission: Invalid MSR %#RX32\n", uMsr)); + return; + } + + Assert(iBit <= 0x1fff); + if (enmRead == VMXMSREXIT_INTERCEPT_READ) + ASMBitSet(pbMsrBitmap, iBit); + else + ASMBitClear(pbMsrBitmap, iBit); + + if (enmWrite == VMXMSREXIT_INTERCEPT_WRITE) + ASMBitSet(pbMsrBitmap + 0x800, iBit); + else + ASMBitClear(pbMsrBitmap + 0x800, iBit); +} + + +/** + * Flushes the TLB using EPT. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a + * enmFlush). + * @param enmFlush Type of flush. + * + * @remarks Caller is responsible for making sure this function is called only + * when NestedPaging is supported and providing @a enmFlush that is + * supported by the CPU. + * @remarks Can be called with interrupts disabled. + */ +static void hmR0VmxFlushEpt(PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush) +{ + uint64_t au64Descriptor[2]; + if (enmFlush == VMX_FLUSH_EPT_ALL_CONTEXTS) + au64Descriptor[0] = 0; + else + { + Assert(pVCpu); + au64Descriptor[0] = pVCpu->hm.s.vmx.HCPhysEPTP; + } + au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */ + + int rc = VMXR0InvEPT(enmFlush, &au64Descriptor[0]); + AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %RGv failed with %Rrc\n", enmFlush, pVCpu ? pVCpu->hm.s.vmx.HCPhysEPTP : 0, + rc)); + if ( RT_SUCCESS(rc) + && pVCpu) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging); + } +} + + +/** + * Flushes the TLB using VPID. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a + * enmFlush). + * @param enmFlush Type of flush. + * @param GCPtr Virtual address of the page to flush (can be 0 depending + * on @a enmFlush). + * + * @remarks Can be called with interrupts disabled. + */ +static void hmR0VmxFlushVpid(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr) +{ + AssertPtr(pVM); + Assert(pVM->hm.s.vmx.fVpid); + + uint64_t au64Descriptor[2]; + if (enmFlush == VMX_FLUSH_VPID_ALL_CONTEXTS) + { + au64Descriptor[0] = 0; + au64Descriptor[1] = 0; + } + else + { + AssertPtr(pVCpu); + AssertMsg(pVCpu->hm.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid)); + AssertMsg(pVCpu->hm.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid)); + au64Descriptor[0] = pVCpu->hm.s.uCurrentAsid; + au64Descriptor[1] = GCPtr; + } + + int rc = VMXR0InvVPID(enmFlush, &au64Descriptor[0]); NOREF(rc); + AssertMsg(rc == VINF_SUCCESS, + ("VMXR0InvVPID %#x %u %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hm.s.uCurrentAsid : 0, GCPtr, rc)); + if ( RT_SUCCESS(rc) + && pVCpu) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid); + } +} + + +/** + * Invalidates a guest page by guest virtual address. Only relevant for + * EPT/VPID, otherwise there is nothing really to invalidate. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param GCVirt Guest virtual address of the page to invalidate. + */ +VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + LogFlowFunc(("pVM=%p pVCpu=%p GCVirt=%RGv\n", pVM, pVCpu, GCVirt)); + + bool fFlushPending = VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_FLUSH); + if (!fFlushPending) + { + /* + * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case + * See @bugref{6043} and @bugref{6177}. + * + * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*() as this + * function maybe called in a loop with individual addresses. + */ + if (pVM->hm.s.vmx.fVpid) + { + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR) + { + hmR0VmxFlushVpid(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, GCVirt); + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt); + } + else + VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); + } + else if (pVM->hm.s.fNestedPaging) + VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); + } + + return VINF_SUCCESS; +} + + +/** + * Invalidates a guest page by physical address. Only relevant for EPT/VPID, + * otherwise there is nothing really to invalidate. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param GCPhys Guest physical address of the page to invalidate. + */ +VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys) +{ + LogFlowFunc(("%RGp\n", GCPhys)); + + /* + * We cannot flush a page by guest-physical address. invvpid takes only a linear address while invept only flushes + * by EPT not individual addresses. We update the force flag here and flush before the next VM-entry in hmR0VmxFlushTLB*(). + * This function might be called in a loop. This should cause a flush-by-EPT if EPT is in use. See @bugref{6568}. + */ + VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgPhys); + return VINF_SUCCESS; +} + + +/** + * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the + * case where neither EPT nor VPID is supported by the CPU. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCpu Pointer to the global HM struct. + * + * @remarks Called with interrupts disabled. + */ +static void hmR0VmxFlushTaggedTlbNone(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu) +{ + AssertPtr(pVCpu); + AssertPtr(pCpu); + NOREF(pVM); + + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH); + + /** @todo TLB shootdown is currently not used. See hmQueueInvlPage(). */ +#if 0 + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN); + pVCpu->hm.s.TlbShootdown.cPages = 0; +#endif + + pVCpu->hm.s.idLastCpu = pCpu->idCpu; + pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes; + pVCpu->hm.s.fForceTLBFlush = false; + return; +} + + +/** + * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCpu Pointer to the global HM CPU struct. + * @remarks All references to "ASID" in this function pertains to "VPID" in + * Intel's nomenclature. The reason is, to avoid confusion in compare + * statements since the host-CPU copies are named "ASID". + * + * @remarks Called with interrupts disabled. + */ +static void hmR0VmxFlushTaggedTlbBoth(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu) +{ +#ifdef VBOX_WITH_STATISTICS + bool fTlbFlushed = false; +# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0) +# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \ + if (!fTlbFlushed) \ + STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \ + } while (0) +#else +# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0) +# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0) +#endif + + AssertPtr(pVM); + AssertPtr(pCpu); + AssertPtr(pVCpu); + AssertMsg(pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid, + ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled." + "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hm.s.fNestedPaging, pVM->hm.s.vmx.fVpid)); + + + /* + * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last. + * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB + * or the host CPU is online after a suspend/resume, so we cannot reuse the current ASID anymore. + */ + if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu + || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes) + { + ++pCpu->uCurrentAsid; + if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid) + { + pCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */ + pCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */ + pCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */ + } + + pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid; + pVCpu->hm.s.idLastCpu = pCpu->idCpu; + pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes; + + /* + * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also + * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}. + */ + hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt); + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch); + HMVMX_SET_TAGGED_TLB_FLUSHED(); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH); /* Already flushed-by-EPT, skip doing it again below. */ + } + + /* Check for explicit TLB shootdowns. */ + if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) + { + /* + * Changes to the EPT paging structure by VMM requires flushing by EPT as the CPU creates + * guest-physical (only EPT-tagged) mappings while traversing the EPT tables when EPT is in use. + * Flushing by VPID will only flush linear (only VPID-tagged) and combined (EPT+VPID tagged) mappings + * but not guest-physical mappings. + * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information". See @bugref{6568}. + */ + hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt); + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb); + HMVMX_SET_TAGGED_TLB_FLUSHED(); + } + + /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere. See hmQueueInvlPage() + * where it is commented out. Support individual entry flushing + * someday. */ +#if 0 + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN)) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown); + + /* + * Flush individual guest entries using VPID from the TLB or as little as possible with EPT + * as supported by the CPU. + */ + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR) + { + for (uint32_t i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++) + hmR0VmxFlushVpid(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]); + } + else + hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt); + + HMVMX_SET_TAGGED_TLB_FLUSHED(); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN); + pVCpu->hm.s.TlbShootdown.cPages = 0; + } +#endif + + pVCpu->hm.s.fForceTLBFlush = false; + + HMVMX_UPDATE_FLUSH_SKIPPED_STAT(); + + Assert(pVCpu->hm.s.idLastCpu == pCpu->idCpu); + Assert(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes); + AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes, + ("Flush count mismatch for cpu %d (%u vs %u)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes)); + AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid, + ("cpu%d uCurrentAsid = %u\n", pCpu->idCpu, pCpu->uCurrentAsid)); + AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid, + ("cpu%d VM uCurrentAsid = %u\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid)); + + /* Update VMCS with the VPID. */ + int rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid); + AssertRC(rc); + +#undef HMVMX_SET_TAGGED_TLB_FLUSHED +} + + +/** + * Flushes the tagged-TLB entries for EPT CPUs as necessary. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCpu Pointer to the global HM CPU struct. + * + * @remarks Called with interrupts disabled. + */ +static void hmR0VmxFlushTaggedTlbEpt(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + AssertPtr(pCpu); + AssertMsg(pVM->hm.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with NestedPaging disabled.")); + AssertMsg(!pVM->hm.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID enabled.")); + + /* + * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last. + * A change in the TLB flush count implies the host CPU is online after a suspend/resume. + */ + if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu + || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes) + { + pVCpu->hm.s.fForceTLBFlush = true; + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch); + } + + /* Check for explicit TLB shootdown flushes. */ + if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) + { + pVCpu->hm.s.fForceTLBFlush = true; + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb); + } + + pVCpu->hm.s.idLastCpu = pCpu->idCpu; + pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes; + + if (pVCpu->hm.s.fForceTLBFlush) + { + hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt); + pVCpu->hm.s.fForceTLBFlush = false; + } + /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere. See hmQueueInvlPage() + * where it is commented out. Support individual entry flushing + * someday. */ +#if 0 + else + { + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN)) + { + /* We cannot flush individual entries without VPID support. Flush using EPT. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown); + hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt); + } + else + STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); + + pVCpu->hm.s.TlbShootdown.cPages = 0; + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN); + } +#endif +} + + +/** + * Flushes the tagged-TLB entries for VPID CPUs as necessary. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCpu Pointer to the global HM CPU struct. + * + * @remarks Called with interrupts disabled. + */ +static void hmR0VmxFlushTaggedTlbVpid(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + AssertPtr(pCpu); + AssertMsg(pVM->hm.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked with VPID disabled.")); + AssertMsg(!pVM->hm.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging enabled")); + + /* + * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last. + * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB + * or the host CPU is online after a suspend/resume, so we cannot reuse the current ASID anymore. + */ + if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu + || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes) + { + pVCpu->hm.s.fForceTLBFlush = true; + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch); + } + + /* Check for explicit TLB shootdown flushes. */ + if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) + { + /* + * If we ever support VPID flush combinations other than ALL or SINGLE-context (see hmR0VmxSetupTaggedTlb()) + * we would need to explicitly flush in this case (add an fExplicitFlush = true here and change the + * pCpu->fFlushAsidBeforeUse check below to include fExplicitFlush's too) - an obscure corner case. + */ + pVCpu->hm.s.fForceTLBFlush = true; + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb); + } + + pVCpu->hm.s.idLastCpu = pCpu->idCpu; + if (pVCpu->hm.s.fForceTLBFlush) + { + ++pCpu->uCurrentAsid; + if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid) + { + pCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */ + pCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */ + pCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */ + } + + pVCpu->hm.s.fForceTLBFlush = false; + pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes; + pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid; + if (pCpu->fFlushAsidBeforeUse) + hmR0VmxFlushVpid(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */); + } + /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere. See hmQueueInvlPage() + * where it is commented out. Support individual entry flushing + * someday. */ +#if 0 + else + { + AssertMsg(pVCpu->hm.s.uCurrentAsid && pCpu->uCurrentAsid, + ("hm->uCurrentAsid=%lu hm->cTlbFlushes=%lu cpu->uCurrentAsid=%lu cpu->cTlbFlushes=%lu\n", + pVCpu->hm.s.uCurrentAsid, pVCpu->hm.s.cTlbFlushes, + pCpu->uCurrentAsid, pCpu->cTlbFlushes)); + + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN)) + { + /* Flush individual guest entries using VPID or as little as possible with EPT as supported by the CPU. */ + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR) + { + for (uint32_t i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++) + hmR0VmxFlushVpid(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]); + } + else + hmR0VmxFlushVpid(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */); + + pVCpu->hm.s.TlbShootdown.cPages = 0; + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN); + } + else + STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); + } +#endif + + AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes, + ("Flush count mismatch for cpu %d (%u vs %u)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes)); + AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid, + ("cpu%d uCurrentAsid = %u\n", pCpu->idCpu, pCpu->uCurrentAsid)); + AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid, + ("cpu%d VM uCurrentAsid = %u\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid)); + + int rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid); + AssertRC(rc); +} + + +/** + * Flushes the guest TLB entry based on CPU capabilities. + * + * @param pVCpu Pointer to the VMCPU. + * @param pCpu Pointer to the global HM CPU struct. + */ +DECLINLINE(void) hmR0VmxFlushTaggedTlb(PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu) +{ + PVM pVM = pVCpu->CTX_SUFF(pVM); + switch (pVM->hm.s.vmx.uFlushTaggedTlb) + { + case HMVMX_FLUSH_TAGGED_TLB_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pVM, pVCpu, pCpu); break; + case HMVMX_FLUSH_TAGGED_TLB_EPT: hmR0VmxFlushTaggedTlbEpt(pVM, pVCpu, pCpu); break; + case HMVMX_FLUSH_TAGGED_TLB_VPID: hmR0VmxFlushTaggedTlbVpid(pVM, pVCpu, pCpu); break; + case HMVMX_FLUSH_TAGGED_TLB_NONE: hmR0VmxFlushTaggedTlbNone(pVM, pVCpu, pCpu); break; + default: + AssertMsgFailed(("Invalid flush-tag function identifier\n")); + break; + } +} + + +/** + * Sets up the appropriate tagged TLB-flush level and handler for flushing guest + * TLB entries from the host TLB before VM-entry. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + */ +static int hmR0VmxSetupTaggedTlb(PVM pVM) +{ + /* + * Determine optimal flush type for Nested Paging. + * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup unrestricted + * guest execution (see hmR3InitFinalizeR0()). + */ + if (pVM->hm.s.fNestedPaging) + { + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT) + { + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT) + pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_SINGLE_CONTEXT; + else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS) + pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_ALL_CONTEXTS; + else + { + /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */ + pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + /* Make sure the write-back cacheable memory type for EPT is supported. */ + if (!(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_EMT_WB)) + { + LogRel(("hmR0VmxSetupTaggedTlb: Unsupported EPTP memory type %#x.\n", pVM->hm.s.vmx.Msrs.u64EptVpidCaps)); + pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + } + else + { + /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */ + pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + } + + /* + * Determine optimal flush type for VPID. + */ + if (pVM->hm.s.vmx.fVpid) + { + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID) + { + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT) + pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_SINGLE_CONTEXT; + else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS) + pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_ALL_CONTEXTS; + else + { + /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */ + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR) + LogRel(("hmR0VmxSetupTaggedTlb: Only INDIV_ADDR supported. Ignoring VPID.\n")); + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS) + LogRel(("hmR0VmxSetupTaggedTlb: Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n")); + pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED; + pVM->hm.s.vmx.fVpid = false; + } + } + else + { + /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */ + Log4(("hmR0VmxSetupTaggedTlb: VPID supported without INVEPT support. Ignoring VPID.\n")); + pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED; + pVM->hm.s.vmx.fVpid = false; + } + } + + /* + * Setup the handler for flushing tagged-TLBs. + */ + if (pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid) + pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_EPT_VPID; + else if (pVM->hm.s.fNestedPaging) + pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_EPT; + else if (pVM->hm.s.vmx.fVpid) + pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_VPID; + else + pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_NONE; + return VINF_SUCCESS; +} + + +/** + * Sets up pin-based VM-execution controls in the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +static int hmR0VmxSetupPinCtls(PVM pVM, PVMCPU pVCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + + uint32_t val = pVM->hm.s.vmx.Msrs.VmxPinCtls.n.disallowed0; /* Bits set here must always be set. */ + uint32_t zap = pVM->hm.s.vmx.Msrs.VmxPinCtls.n.allowed1; /* Bits cleared here must always be cleared. */ + + val |= VMX_VMCS_CTRL_PIN_EXEC_EXT_INT_EXIT /* External interrupts causes a VM-exits. */ + | VMX_VMCS_CTRL_PIN_EXEC_NMI_EXIT; /* Non-maskable interrupts causes a VM-exit. */ + Assert(!(val & VMX_VMCS_CTRL_PIN_EXEC_VIRTUAL_NMI)); + + /* Enable the VMX preemption timer. */ + if (pVM->hm.s.vmx.fUsePreemptTimer) + { + Assert(pVM->hm.s.vmx.Msrs.VmxPinCtls.n.allowed1 & VMX_VMCS_CTRL_PIN_EXEC_PREEMPT_TIMER); + val |= VMX_VMCS_CTRL_PIN_EXEC_PREEMPT_TIMER; + } + + if ((val & zap) != val) + { + LogRel(("hmR0VmxSetupPinCtls: invalid pin-based VM-execution controls combo! cpu=%#RX64 val=%#RX64 zap=%#RX64\n", + pVM->hm.s.vmx.Msrs.VmxPinCtls.n.disallowed0, val, zap)); + pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, val); + AssertRCReturn(rc, rc); + + /* Update VCPU with the currently set pin-based VM-execution controls. */ + pVCpu->hm.s.vmx.u32PinCtls = val; + return rc; +} + + +/** + * Sets up processor-based VM-execution controls in the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVMCPU Pointer to the VMCPU. + */ +static int hmR0VmxSetupProcCtls(PVM pVM, PVMCPU pVCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + + int rc = VERR_INTERNAL_ERROR_5; + uint32_t val = pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0; /* Bits set here must be set in the VMCS. */ + uint32_t zap = pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */ + + val |= VMX_VMCS_CTRL_PROC_EXEC_HLT_EXIT /* HLT causes a VM-exit. */ + | VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING /* Use TSC-offsetting. */ + | VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */ + | VMX_VMCS_CTRL_PROC_EXEC_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */ + | VMX_VMCS_CTRL_PROC_EXEC_RDPMC_EXIT /* RDPMC causes a VM-exit. */ + | VMX_VMCS_CTRL_PROC_EXEC_MONITOR_EXIT /* MONITOR causes a VM-exit. */ + | VMX_VMCS_CTRL_PROC_EXEC_MWAIT_EXIT; /* MWAIT causes a VM-exit. */ + + /* We toggle VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */ + if ( !(pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT) + || (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0 & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT)) + { + LogRel(("hmR0VmxSetupProcCtls: unsupported VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT combo!")); + pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + /* Without Nested Paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */ + if (!pVM->hm.s.fNestedPaging) + { + Assert(!pVM->hm.s.vmx.fUnrestrictedGuest); /* Paranoia. */ + val |= VMX_VMCS_CTRL_PROC_EXEC_INVLPG_EXIT + | VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT + | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT; + } + + /* Use TPR shadowing if supported by the CPU. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW) + { + Assert(pVCpu->hm.s.vmx.HCPhysVirtApic); + Assert(!(pVCpu->hm.s.vmx.HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */ + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, 0); + rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hm.s.vmx.HCPhysVirtApic); + AssertRCReturn(rc, rc); + + val |= VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */ + /* CR8 writes causes a VM-exit based on TPR threshold. */ + Assert(!(val & VMX_VMCS_CTRL_PROC_EXEC_CR8_STORE_EXIT)); + Assert(!(val & VMX_VMCS_CTRL_PROC_EXEC_CR8_LOAD_EXIT)); + } + else + { + /* + * Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is invalid on 32-bit Intel CPUs. + * Set this control only for 64-bit guests. + */ + if (pVM->hm.s.fAllow64BitGuests) + { + val |= VMX_VMCS_CTRL_PROC_EXEC_CR8_STORE_EXIT /* CR8 reads causes a VM-exit. */ + | VMX_VMCS_CTRL_PROC_EXEC_CR8_LOAD_EXIT; /* CR8 writes causes a VM-exit. */ + } + } + + /* Use MSR-bitmaps if supported by the CPU. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS) + { + val |= VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS; + + Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap); + Assert(!(pVCpu->hm.s.vmx.HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */ + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, pVCpu->hm.s.vmx.HCPhysMsrBitmap); + AssertRCReturn(rc, rc); + + /* + * The guest can access the following MSRs (read, write) without causing VM-exits; they are loaded/stored + * automatically (either as part of the MSR-load/store areas or dedicated fields in the VMCS). + */ + hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_CS, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_ESP, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_EIP, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_K8_LSTAR, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_K6_STAR, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_K8_SF_MASK, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_K8_GS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_K8_FS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + } + + /* Use the secondary processor-based VM-execution controls if supported by the CPU. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL) + val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL; + + if ((val & zap) != val) + { + LogRel(("hmR0VmxSetupProcCtls: invalid processor-based VM-execution controls combo! cpu=%#RX64 val=%#RX64 zap=%#RX64\n", + pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0, val, zap)); + pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, val); + AssertRCReturn(rc, rc); + + /* Update VCPU with the currently set processor-based VM-execution controls. */ + pVCpu->hm.s.vmx.u32ProcCtls = val; + + /* + * Secondary processor-based VM-execution controls. + */ + if (RT_LIKELY(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)) + { + val = pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.disallowed0; /* Bits set here must be set in the VMCS. */ + zap = pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */ + + if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT) + val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT; /* WBINVD causes a VM-exit. */ + + if (pVM->hm.s.fNestedPaging) + val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT; /* Enable EPT. */ + else + { + /* + * Without Nested Paging, INVPCID should cause a VM-exit. Enabling this bit causes the CPU to refer to + * VMX_VMCS_CTRL_PROC_EXEC_INVLPG_EXIT when INVPCID is executed by the guest. + * See Intel spec. 25.4 "Changes to instruction behaviour in VMX non-root operation". + */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_INVPCID) + val |= VMX_VMCS_CTRL_PROC_EXEC2_INVPCID; + } + + if (pVM->hm.s.vmx.fVpid) + val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID; /* Enable VPID. */ + + if (pVM->hm.s.vmx.fUnrestrictedGuest) + val |= VMX_VMCS_CTRL_PROC_EXEC2_UNRESTRICTED_GUEST; /* Enable Unrestricted Execution. */ + + /* Enable Virtual-APIC page accesses if supported by the CPU. This is essentially where the TPR shadow resides. */ + /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be + * done dynamically. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) + { + Assert(pVM->hm.s.vmx.HCPhysApicAccess); + Assert(!(pVM->hm.s.vmx.HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */ + val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC; /* Virtualize APIC accesses. */ + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, pVM->hm.s.vmx.HCPhysApicAccess); + AssertRCReturn(rc, rc); + } + + if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) + { + val |= VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP; /* Enable RDTSCP support. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS) + hmR0VmxSetMsrPermission(pVCpu, MSR_K8_TSC_AUX, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + } + + if ((val & zap) != val) + { + LogRel(("hmR0VmxSetupProcCtls: invalid secondary processor-based VM-execution controls combo! " + "cpu=%#RX64 val=%#RX64 zap=%#RX64\n", pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.disallowed0, val, zap)); + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, val); + AssertRCReturn(rc, rc); + + /* Update VCPU with the currently set secondary processor-based VM-execution controls. */ + pVCpu->hm.s.vmx.u32ProcCtls2 = val; + } + else if (RT_UNLIKELY(pVM->hm.s.vmx.fUnrestrictedGuest)) + { + LogRel(("hmR0VmxSetupProcCtls: Unrestricted Guest set as true when secondary processor-based VM-execution controls not " + "available\n")); + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + return VINF_SUCCESS; +} + + +/** + * Sets up miscellaneous (everything other than Pin & Processor-based + * VM-execution) control fields in the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +static int hmR0VmxSetupMiscCtls(PVM pVM, PVMCPU pVCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + + int rc = VERR_GENERAL_FAILURE; + + /* All fields are zero-initialized during allocation; but don't remove the commented block below. */ +#if 0 + /* All CR3 accesses cause VM-exits. Later we optimize CR3 accesses (see hmR0VmxLoadGuestCR3AndCR4())*/ + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, 0); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, 0); AssertRCReturn(rc, rc); + + /* + * Set MASK & MATCH to 0. VMX checks if GuestPFErrCode & MASK == MATCH. If equal (in our case it always is) + * and if the X86_XCPT_PF bit in the exception bitmap is set it causes a VM-exit, if clear doesn't cause an exit. + * We thus use the exception bitmap to control it rather than use both. + */ + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, 0); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, 0); AssertRCReturn(rc, rc); + + /** @todo Explore possibility of using IO-bitmaps. */ + /* All IO & IOIO instructions cause VM-exits. */ + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_A_FULL, 0); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_B_FULL, 0); AssertRCReturn(rc, rc); + + /* Initialize the MSR-bitmap area. */ + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, 0); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, 0); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, 0); AssertRCReturn(rc, rc); +#endif + +#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + /* Setup MSR autoloading/storing. */ + Assert(pVCpu->hm.s.vmx.HCPhysGuestMsr); + Assert(!(pVCpu->hm.s.vmx.HCPhysGuestMsr & 0xf)); /* Lower 4 bits MBZ. */ + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr); + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr); + AssertRCReturn(rc, rc); + + Assert(pVCpu->hm.s.vmx.HCPhysHostMsr); + Assert(!(pVCpu->hm.s.vmx.HCPhysHostMsr & 0xf)); /* Lower 4 bits MBZ. */ + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysHostMsr); + AssertRCReturn(rc, rc); +#endif + + /* Set VMCS link pointer. Reserved for future use, must be -1. Intel spec. 24.4 "Guest-State Area". */ + rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, UINT64_C(0xffffffffffffffff)); + AssertRCReturn(rc, rc); + + /* All fields are zero-initialized during allocation; but don't remove the commented block below. */ +#if 0 + /* Setup debug controls */ + rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, 0); /** @todo We don't support IA32_DEBUGCTL MSR. Should we? */ + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, 0); + AssertRCReturn(rc, rc); +#endif + + return rc; +} + + +/** + * Sets up the initial exception bitmap in the VMCS based on static conditions + * (i.e. conditions that cannot ever change after starting the VM). + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +static int hmR0VmxInitXcptBitmap(PVM pVM, PVMCPU pVCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + + LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu)); + + uint32_t u32XcptBitmap = 0; + + /* Without Nested Paging, #PF must cause a VM-exit so we can sync our shadow page tables. */ + if (!pVM->hm.s.fNestedPaging) + u32XcptBitmap |= RT_BIT(X86_XCPT_PF); + + pVCpu->hm.s.vmx.u32XcptBitmap = u32XcptBitmap; + int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap); + AssertRCReturn(rc, rc); + return rc; +} + + +/** + * Sets up the initial guest-state mask. The guest-state mask is consulted + * before reading guest-state fields from the VMCS as VMREADs can be expensive + * for the nested virtualization case (as it would cause a VM-exit). + * + * @param pVCpu Pointer to the VMCPU. + */ +static int hmR0VmxInitUpdatedGuestStateMask(PVMCPU pVCpu) +{ + /* Initially the guest-state is up-to-date as there is nothing in the VMCS. */ + HMVMXCPU_GST_RESET_TO(pVCpu, HMVMX_UPDATED_GUEST_ALL); + return VINF_SUCCESS; +} + + +/** + * Does per-VM VT-x initialization. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + */ +VMMR0DECL(int) VMXR0InitVM(PVM pVM) +{ + LogFlowFunc(("pVM=%p\n", pVM)); + + int rc = hmR0VmxStructsAlloc(pVM); + if (RT_FAILURE(rc)) + { + LogRel(("VMXR0InitVM: hmR0VmxStructsAlloc failed! rc=%Rrc\n", rc)); + return rc; + } + + return VINF_SUCCESS; +} + + +/** + * Does per-VM VT-x termination. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + */ +VMMR0DECL(int) VMXR0TermVM(PVM pVM) +{ + LogFlowFunc(("pVM=%p\n", pVM)); + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + if (pVM->hm.s.vmx.hMemObjScratch != NIL_RTR0MEMOBJ) + ASMMemZero32(pVM->hm.s.vmx.pvScratch, PAGE_SIZE); +#endif + hmR0VmxStructsFree(pVM); + return VINF_SUCCESS; +} + + +/** + * Sets up the VM for execution under VT-x. + * This function is only called once per-VM during initialization. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + */ +VMMR0DECL(int) VMXR0SetupVM(PVM pVM) +{ + AssertPtrReturn(pVM, VERR_INVALID_PARAMETER); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + LogFlowFunc(("pVM=%p\n", pVM)); + + /* + * Without UnrestrictedGuest, pRealModeTSS and pNonPagingModeEPTPageTable *must* always be allocated. + * We no longer support the highly unlikely case of UnrestrictedGuest without pRealModeTSS. See hmR3InitFinalizeR0(). + */ + /* -XXX- change hmR3InitFinalizeR0Intel() to fail if pRealModeTSS alloc fails. */ + if ( !pVM->hm.s.vmx.fUnrestrictedGuest + && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable + || !pVM->hm.s.vmx.pRealModeTSS)) + { + LogRel(("VMXR0SetupVM: invalid real-on-v86 state.\n")); + return VERR_INTERNAL_ERROR; + } + +#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL + /* + * This is for the darwin 32-bit/PAE kernels trying to execute 64-bit guests. We don't bother with + * the 32<->64 switcher in this case. This is a rare, legacy use-case with barely any test coverage. + */ + if ( pVM->hm.s.fAllow64BitGuests + && !HMVMX_IS_64BIT_HOST_MODE()) + { + LogRel(("VMXR0SetupVM: Unsupported guest and host paging mode combination.\n")); + return VERR_PGM_UNSUPPORTED_HOST_PAGING_MODE; + } +#endif + + /* Initialize these always, see hmR3InitFinalizeR0().*/ + pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NONE; + pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NONE; + + /* Setup the tagged-TLB flush handlers. */ + int rc = hmR0VmxSetupTaggedTlb(pVM); + if (RT_FAILURE(rc)) + { + LogRel(("VMXR0SetupVM: hmR0VmxSetupTaggedTlb failed! rc=%Rrc\n", rc)); + return rc; + } + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + AssertPtr(pVCpu); + AssertPtr(pVCpu->hm.s.vmx.pvVmcs); + + /* Log the VCPU pointers, useful for debugging SMP VMs. */ + Log4(("VMXR0SetupVM: pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu)); + + /* Set revision dword at the beginning of the VMCS structure. */ + *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.Msrs.u64BasicInfo); + + /* Initialize our VMCS region in memory, set the VMCS launch state to "clear". */ + rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXClearVmcs failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + + /* Load this VMCS as the current VMCS. */ + rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXActivateVmcs failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + + rc = hmR0VmxSetupPinCtls(pVM, pVCpu); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupPinCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + + rc = hmR0VmxSetupProcCtls(pVM, pVCpu); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupProcCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + + rc = hmR0VmxSetupMiscCtls(pVM, pVCpu); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupMiscCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + + rc = hmR0VmxInitXcptBitmap(pVM, pVCpu); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitXcptBitmap failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + + rc = hmR0VmxInitUpdatedGuestStateMask(pVCpu); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitUpdatedGuestStateMask failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + +#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + rc = hmR0VmxInitVmcsReadCache(pVM, pVCpu); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitVmcsReadCache failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); +#endif + + /* Re-sync the CPU's internal data into our VMCS memory region & reset the launch state to "clear". */ + rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXClearVmcs(2) failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + + pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR; + + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc); + } + + return VINF_SUCCESS; +} + + +/** + * Saves the host control registers (CR0, CR3, CR4) into the host-state area in + * the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +DECLINLINE(int) hmR0VmxSaveHostControlRegs(PVM pVM, PVMCPU pVCpu) +{ + RTCCUINTREG uReg = ASMGetCR0(); + int rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR0, uReg); + AssertRCReturn(rc, rc); + +#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL + /* For the darwin 32-bit hybrid kernel, we need the 64-bit CR3 as it uses 64-bit paging. */ + if (HMVMX_IS_64BIT_HOST_MODE()) + { + uint64_t uRegCR3 = HMR0Get64bitCR3(); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_CR3, uRegCR3); + } + else +#endif + { + uReg = ASMGetCR3(); + rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR3, uReg); + } + AssertRCReturn(rc, rc); + + uReg = ASMGetCR4(); + rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR4, uReg); + AssertRCReturn(rc, rc); + return rc; +} + + +#if HC_ARCH_BITS == 64 +/** + * Macro for adjusting host segment selectors to satisfy VT-x's VM-entry + * requirements. See hmR0VmxSaveHostSegmentRegs(). + */ +# define VMXLOCAL_ADJUST_HOST_SEG(seg, selValue) \ + if ((selValue) & (X86_SEL_RPL | X86_SEL_LDT)) \ + { \ + bool fValidSelector = true; \ + if ((selValue) & X86_SEL_LDT) \ + { \ + uint32_t uAttr = ASMGetSegAttr((selValue)); \ + fValidSelector = RT_BOOL(uAttr != ~0U && (uAttr & X86_DESC_P)); \ + } \ + if (fValidSelector) \ + { \ + pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##seg; \ + pVCpu->hm.s.vmx.RestoreHost.uHostSel##seg = (selValue); \ + } \ + (selValue) = 0; \ + } +#endif + + +/** + * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into + * the host-state area in the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +DECLINLINE(int) hmR0VmxSaveHostSegmentRegs(PVM pVM, PVMCPU pVCpu) +{ + int rc = VERR_INTERNAL_ERROR_5; + + /* + * Host DS, ES, FS and GS segment registers. + */ +#if HC_ARCH_BITS == 64 + RTSEL uSelDS = ASMGetDS(); + RTSEL uSelES = ASMGetES(); + RTSEL uSelFS = ASMGetFS(); + RTSEL uSelGS = ASMGetGS(); +#else + RTSEL uSelDS = 0; + RTSEL uSelES = 0; + RTSEL uSelFS = 0; + RTSEL uSelGS = 0; +#endif + + /* Recalculate which host-state bits need to be manually restored. */ + pVCpu->hm.s.vmx.fRestoreHostFlags = 0; + + /* + * Host CS and SS segment registers. + */ +#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL + RTSEL uSelCS; + RTSEL uSelSS; + if (HMVMX_IS_64BIT_HOST_MODE()) + { + uSelCS = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS; + uSelSS = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS; + } + else + { + /* Seems darwin uses the LDT (TI flag is set) in the CS & SS selectors which VT-x doesn't like. */ + uSelCS = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS; + uSelSS = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS; + } +#else + RTSEL uSelCS = ASMGetCS(); + RTSEL uSelSS = ASMGetSS(); +#endif + + /* + * Host TR segment register. + */ + RTSEL uSelTR = ASMGetTR(); + +#if HC_ARCH_BITS == 64 + /* + * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to gain VM-entry and restore them + * before we get preempted. See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers". + */ + VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS); + VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES); + VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS); + VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS); +# undef VMXLOCAL_ADJUST_HOST_SEG +#endif + + /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */ + Assert(!(uSelCS & X86_SEL_RPL)); Assert(!(uSelCS & X86_SEL_LDT)); + Assert(!(uSelSS & X86_SEL_RPL)); Assert(!(uSelSS & X86_SEL_LDT)); + Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT)); + Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT)); + Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT)); + Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT)); + Assert(!(uSelTR & X86_SEL_RPL)); Assert(!(uSelTR & X86_SEL_LDT)); + Assert(uSelCS); + Assert(uSelTR); + + /* Assertion is right but we would not have updated u32ExitCtls yet. */ +#if 0 + if (!(pVCpu->hm.s.vmx.u32ExitCtls & VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE)) + Assert(uSelSS != 0); +#endif + + /* Write these host selector fields into the host-state area in the VMCS. */ + rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_CS, uSelCS); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_SS, uSelSS); AssertRCReturn(rc, rc); +#if HC_ARCH_BITS == 64 + rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_DS, uSelDS); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_ES, uSelES); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_FS, uSelFS); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_GS, uSelGS); AssertRCReturn(rc, rc); +#endif + rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_TR, uSelTR); AssertRCReturn(rc, rc); + + /* + * Host GDTR and IDTR. + */ + RTGDTR Gdtr; + RT_ZERO(Gdtr); +#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL + if (HMVMX_IS_64BIT_HOST_MODE()) + { + X86XDTR64 Gdtr64; + X86XDTR64 Idtr64; + HMR0Get64bitGdtrAndIdtr(&Gdtr64, &Idtr64); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_GDTR_BASE, Gdtr64.uAddr); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_IDTR_BASE, Idtr64.uAddr); AssertRCReturn(rc, rc); + + Gdtr.cbGdt = Gdtr64.cb; + Gdtr.pGdt = (uintptr_t)Gdtr64.uAddr; + } + else +#endif + { + RTIDTR Idtr; + ASMGetGDTR(&Gdtr); + ASMGetIDTR(&Idtr); + rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_GDTR_BASE, Gdtr.pGdt); AssertRCReturn(rc, rc); + rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_IDTR_BASE, Idtr.pIdt); AssertRCReturn(rc, rc); + +#if HC_ARCH_BITS == 64 + /* + * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps them to the + * maximum limit (0xffff) on every VM-exit. + */ + if (Gdtr.cbGdt != 0xffff) + { + pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR; + AssertCompile(sizeof(Gdtr) == sizeof(X86XDTR64)); + memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostGdtr, &Gdtr, sizeof(X86XDTR64)); + } + + /* + * IDT limit is practically 0xfff. Therefore if the host has the limit as 0xfff, VT-x bloating the limit to 0xffff + * is not a problem as it's not possible to get at them anyway. See Intel spec. 6.14.1 "64-Bit Mode IDT" and + * Intel spec. 6.2 "Exception and Interrupt Vectors". + */ + if (Idtr.cbIdt < 0x0fff) + { + pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR; + AssertCompile(sizeof(Idtr) == sizeof(X86XDTR64)); + memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostIdtr, &Idtr, sizeof(X86XDTR64)); + } +#endif + } + + /* + * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI and RPL bits + * is effectively what the CPU does for "scaling by 8". TI is always 0 and RPL should be too in most cases. + */ + if ((uSelTR & X86_SEL_MASK) > Gdtr.cbGdt) + { + AssertMsgFailed(("hmR0VmxSaveHostSegmentRegs: TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, Gdtr.cbGdt)); + return VERR_VMX_INVALID_HOST_STATE; + } + + PCX86DESCHC pDesc = (PCX86DESCHC)(Gdtr.pGdt + (uSelTR & X86_SEL_MASK)); +#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL + if (HMVMX_IS_64BIT_HOST_MODE()) + { + /* We need the 64-bit TR base for hybrid darwin. */ + uint64_t u64TRBase = X86DESC64_BASE((PX86DESC64)pDesc); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_TR_BASE, u64TRBase); + } + else +#endif + { + uintptr_t uTRBase; +#if HC_ARCH_BITS == 64 + uTRBase = X86DESC64_BASE(pDesc); + + /* + * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on all VM-exits. + * The type is the same for 64-bit busy TSS[1]. The limit needs manual restoration if the host has something else. + * Task switching is not supported in 64-bit mode[2], but the limit still matters as IOPM is supported in 64-bit mode. + * Restoring the limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0. + * + * [1] See Intel spec. 3.5 "System Descriptor Types". + * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode". + */ + Assert(pDesc->System.u4Type == 11); + if ( pDesc->System.u16LimitLow != 0x67 + || pDesc->System.u4LimitHigh) + { + pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR; + pVCpu->hm.s.vmx.RestoreHost.uHostSelTR = uSelTR; + + /* Store the GDTR here as we need it while restoring TR. */ + memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostGdtr, &Gdtr, sizeof(X86XDTR64)); + } +#else + uTRBase = X86DESC_BASE(pDesc); +#endif + rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_TR_BASE, uTRBase); + } + AssertRCReturn(rc, rc); + + /* + * Host FS base and GS base. + */ +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + uint64_t u64FSBase = ASMRdMsr(MSR_K8_FS_BASE); + uint64_t u64GSBase = ASMRdMsr(MSR_K8_GS_BASE); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_FS_BASE, u64FSBase); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_GS_BASE, u64GSBase); AssertRCReturn(rc, rc); + +# if HC_ARCH_BITS == 64 + /* Store the base if we have to restore FS or GS manually as we need to restore the base as well. */ + if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_FS) + pVCpu->hm.s.vmx.RestoreHost.uHostFSBase = u64FSBase; + if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_GS) + pVCpu->hm.s.vmx.RestoreHost.uHostGSBase = u64GSBase; +# endif + } +#endif + return rc; +} + + +/** + * Saves certain host MSRs in the VM-Exit MSR-load area and some in the + * host-state area of the VMCS. Theses MSRs will be automatically restored on + * the host after every successful VM exit. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +DECLINLINE(int) hmR0VmxSaveHostMsrs(PVM pVM, PVMCPU pVCpu) +{ + AssertPtr(pVCpu); + AssertPtr(pVCpu->hm.s.vmx.pvHostMsr); + + int rc = VINF_SUCCESS; +#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr; + uint32_t cHostMsrs = 0; + uint32_t u32HostExtFeatures = pVM->hm.s.cpuid.u32AMDFeatureEDX; + + if (u32HostExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE)) + { + uint64_t u64HostEfer = ASMRdMsr(MSR_K6_EFER); + +# if HC_ARCH_BITS == 64 + /* Paranoia. 64-bit code requires these bits to be set always. */ + Assert((u64HostEfer & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)); + + /* + * We currently do not save/restore host EFER, we just make sure it doesn't get modified by VT-x operation. + * All guest accesses (read, write) on EFER cause VM-exits. If we are to conditionally load the guest EFER for + * some reason (e.g. allow transparent reads) we would activate the code below. + */ +# if 0 + /* All our supported 64-bit host platforms must have NXE bit set. Otherwise we can change the below code to save EFER. */ + Assert(u64HostEfer & (MSR_K6_EFER_NXE)); + /* The SCE bit is only applicable in 64-bit mode. Save EFER if it doesn't match what the guest has. + See Intel spec. 30.10.4.3 "Handling the SYSCALL and SYSRET Instructions". */ + if (CPUMIsGuestInLongMode(pVCpu)) + { + uint64_t u64GuestEfer; + rc = CPUMQueryGuestMsr(pVCpu, MSR_K6_EFER, &u64GuestEfer); + AssertRC(rc); + + if ((u64HostEfer & MSR_K6_EFER_SCE) != (u64GuestEfer & MSR_K6_EFER_SCE)) + { + pHostMsr->u32Msr = MSR_K6_EFER; + pHostMsr->u32Reserved = 0; + pHostMsr->u64Value = u64HostEfer; + pHostMsr++; cHostMsrs++; + } + } +# endif +# else /* HC_ARCH_BITS != 64 */ + pHostMsr->u32Msr = MSR_K6_EFER; + pHostMsr->u32Reserved = 0; +# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (CPUMIsGuestInLongMode(pVCpu)) + { + /* Must match the EFER value in our 64 bits switcher. */ + pHostMsr->u64Value = u64HostEfer | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE; + } + else +# endif + pHostMsr->u64Value = u64HostEfer; + pHostMsr++; cHostMsrs++; +# endif /* HC_ARCH_BITS == 64 */ + } + +# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + pHostMsr->u32Msr = MSR_K6_STAR; + pHostMsr->u32Reserved = 0; + pHostMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */ + pHostMsr++; cHostMsrs++; + pHostMsr->u32Msr = MSR_K8_LSTAR; + pHostMsr->u32Reserved = 0; + pHostMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64-bit mode syscall rip */ + pHostMsr++; cHostMsrs++; + pHostMsr->u32Msr = MSR_K8_SF_MASK; + pHostMsr->u32Reserved = 0; + pHostMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */ + pHostMsr++; cHostMsrs++; + pHostMsr->u32Msr = MSR_K8_KERNEL_GS_BASE; + pHostMsr->u32Reserved = 0; + pHostMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */ + pHostMsr++; cHostMsrs++; + } +# endif + + /* Host TSC AUX MSR must be restored since we always load/store guest TSC AUX MSR. */ + if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) + { + pHostMsr->u32Msr = MSR_K8_TSC_AUX; + pHostMsr->u32Reserved = 0; + pHostMsr->u64Value = ASMRdMsr(MSR_K8_TSC_AUX); + pHostMsr++; cHostMsrs++; + } + + /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */ + if (RT_UNLIKELY(cHostMsrs > MSR_IA32_VMX_MISC_MAX_MSR(pVM->hm.s.vmx.Msrs.u64Misc))) + { + LogRel(("cHostMsrs=%u Cpu=%u\n", cHostMsrs, (unsigned)MSR_IA32_VMX_MISC_MAX_MSR(pVM->hm.s.vmx.Msrs.u64Misc))); + pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_HOST_MSR_STORAGE; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cHostMsrs); +#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */ + + /* + * Host Sysenter MSRs. + */ + rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); + AssertRCReturn(rc, rc); +#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL + if (HMVMX_IS_64BIT_HOST_MODE()) + { + rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); + } + else + { + rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)); + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)); + } +#elif HC_ARCH_BITS == 32 + rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)); + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)); +#else + rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); +#endif + AssertRCReturn(rc, rc); + + /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT, IA32_EFER, also see + * hmR0VmxSetupExitCtls() !! */ + return rc; +} + + +/** + * Sets up VM-entry controls in the VMCS. These controls can affect things done + * on VM-exit; e.g. "load debug controls", see Intel spec. 24.8.1 "VM-entry + * controls". + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxLoadGuestEntryCtls(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_ENTRY_CTLS)) + { + PVM pVM = pVCpu->CTX_SUFF(pVM); + uint32_t val = pVM->hm.s.vmx.Msrs.VmxEntry.n.disallowed0; /* Bits set here must be set in the VMCS. */ + uint32_t zap = pVM->hm.s.vmx.Msrs.VmxEntry.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */ + + /* Load debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x capable CPUs only supports the 1-setting of this bit. */ + val |= VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG; + + /* Set if the guest is in long mode. This will set/clear the EFER.LMA bit on VM-entry. */ + if (CPUMIsGuestInLongModeEx(pMixedCtx)) + val |= VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST; + else + Assert(!(val & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST)); + + /* + * The following should -not- be set (since we're not in SMM mode): + * - VMX_VMCS_CTRL_ENTRY_ENTRY_SMM + * - VMX_VMCS_CTRL_ENTRY_DEACTIVATE_DUALMON + */ + + /** @todo VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PERF_MSR, + * VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PAT_MSR, + * VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_EFER_MSR */ + + if ((val & zap) != val) + { + LogRel(("hmR0VmxLoadGuestEntryCtls: invalid VM-entry controls combo! cpu=%RX64 val=%RX64 zap=%RX64\n", + pVM->hm.s.vmx.Msrs.VmxEntry.n.disallowed0, val, zap)); + pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_ENTRY; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY, val); + AssertRCReturn(rc, rc); + + /* Update VCPU with the currently set VM-exit controls. */ + pVCpu->hm.s.vmx.u32EntryCtls = val; + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_ENTRY_CTLS); + } + return rc; +} + + +/** + * Sets up the VM-exit controls in the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks requires EFER. + */ +DECLINLINE(int) hmR0VmxLoadGuestExitCtls(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_EXIT_CTLS)) + { + PVM pVM = pVCpu->CTX_SUFF(pVM); + uint32_t val = pVM->hm.s.vmx.Msrs.VmxExit.n.disallowed0; /* Bits set here must be set in the VMCS. */ + uint32_t zap = pVM->hm.s.vmx.Msrs.VmxExit.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */ + + /* Save debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x CPUs only supported the 1-setting of this bit. */ + val |= VMX_VMCS_CTRL_EXIT_SAVE_DEBUG; + + /* + * Set the host long mode active (EFER.LMA) bit (which Intel calls "Host address-space size") if necessary. + * On VM-exit, VT-x sets both the host EFER.LMA and EFER.LME bit to this value. See assertion in hmR0VmxSaveHostMsrs(). + */ +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + val |= VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE; + else + Assert(!(val & VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE)); +#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) + if (CPUMIsGuestInLongModeEx(pMixedCtx)) + val |= VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE; /* The switcher goes to long mode. */ + else + Assert(!(val & VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE)); +#endif + + /* Don't acknowledge external interrupts on VM-exit. We want to let the host do that. */ + Assert(!(val & VMX_VMCS_CTRL_EXIT_ACK_EXT_INT)); + + /** @todo VMX_VMCS_CTRL_EXIT_LOAD_PERF_MSR, + * VMX_VMCS_CTRL_EXIT_SAVE_GUEST_PAT_MSR, + * VMX_VMCS_CTRL_EXIT_LOAD_HOST_PAT_MSR, + * VMX_VMCS_CTRL_EXIT_SAVE_GUEST_EFER_MSR, + * VMX_VMCS_CTRL_EXIT_LOAD_HOST_EFER_MSR. */ + + if (pVM->hm.s.vmx.Msrs.VmxExit.n.allowed1 & VMX_VMCS_CTRL_EXIT_SAVE_VMX_PREEMPT_TIMER) + val |= VMX_VMCS_CTRL_EXIT_SAVE_VMX_PREEMPT_TIMER; + + if ((val & zap) != val) + { + LogRel(("hmR0VmxSetupProcCtls: invalid VM-exit controls combo! cpu=%RX64 val=%RX64 zap=%RX64\n", + pVM->hm.s.vmx.Msrs.VmxExit.n.disallowed0, val, zap)); + pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_EXIT; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT, val); + AssertRCReturn(rc, rc); + + /* Update VCPU with the currently set VM-exit controls. */ + pVCpu->hm.s.vmx.u32ExitCtls = val; + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_EXIT_CTLS); + } + return rc; +} + + +/** + * Loads the guest APIC and related state. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +DECLINLINE(int) hmR0VmxLoadGuestApicState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE)) + { + /* Setup TPR shadowing. Also setup TPR patching for 32-bit guests. */ + if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW) + { + Assert(pVCpu->hm.s.vmx.HCPhysVirtApic); + + bool fPendingIntr = false; + uint8_t u8Tpr = 0; + uint8_t u8PendingIntr = 0; + rc = PDMApicGetTPR(pVCpu, &u8Tpr, &fPendingIntr, &u8PendingIntr); + AssertRCReturn(rc, rc); + + /* + * If there are external interrupts pending but masked by the TPR value, instruct VT-x to cause a VM-exit when + * the guest lowers its TPR below the highest-priority pending interrupt and we can deliver the interrupt. + * If there are no external interrupts pending, set threshold to 0 to not cause a VM-exit. We will eventually deliver + * the interrupt when we VM-exit for other reasons. + */ + pVCpu->hm.s.vmx.pbVirtApic[0x80] = u8Tpr; /* Offset 0x80 is TPR in the APIC MMIO range. */ + uint32_t u32TprThreshold = 0; + if (fPendingIntr) + { + /* Bits 3:0 of the TPR threshold field correspond to bits 7:4 of the TPR (which is the Task-Priority Class). */ + const uint8_t u8PendingPriority = (u8PendingIntr >> 4) & 0xf; + const uint8_t u8TprPriority = (u8Tpr >> 4) & 0xf; + if (u8PendingPriority <= u8TprPriority) + u32TprThreshold = u8PendingPriority; + else + u32TprThreshold = u8TprPriority; /* Required for Vista 64-bit guest, see @bugref{6398}. */ + } + Assert(!(u32TprThreshold & 0xfffffff0)); /* Bits 31:4 MBZ. */ + + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold); + AssertRCReturn(rc, rc); + } + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE); + } + return rc; +} + + +/** + * Gets the guest's interruptibility-state ("interrupt shadow" as AMD calls it). + * + * @returns Guest's interruptibility-state. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + * @remarks Has side-effects with VMCPU_FF_INHIBIT_INTERRUPTS force-flag. + */ +DECLINLINE(uint32_t) hmR0VmxGetGuestIntrState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + /* + * Instructions like STI and MOV SS inhibit interrupts till the next instruction completes. Check if we should + * inhibit interrupts or clear any existing interrupt-inhibition. + */ + uint32_t uIntrState = 0; + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) + { + /* If inhibition is active, RIP & RFLAGS should've been accessed (i.e. read previously from the VMCS or from ring-3). */ + AssertMsg(HMVMXCPU_GST_IS_SET(pVCpu, HMVMX_UPDATED_GUEST_RIP | HMVMX_UPDATED_GUEST_RFLAGS), + ("%#x\n", HMVMXCPU_GST_VALUE(pVCpu))); + if (pMixedCtx->rip != EMGetInhibitInterruptsPC(pVCpu)) + { + /* + * We can clear the inhibit force flag as even if we go back to the recompiler without executing guest code in + * VT-x, the flag's condition to be cleared is met and thus the cleared state is correct. + */ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); + } + else if (pMixedCtx->eflags.Bits.u1IF) + uIntrState = VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI; + else + uIntrState = VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS; + } + return uIntrState; +} + + +/** + * Loads the guest's interruptibility-state into the guest-state area in the + * VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param uIntrState The interruptibility-state to set. + */ +static int hmR0VmxLoadGuestIntrState(PVMCPU pVCpu, uint32_t uIntrState) +{ + AssertMsg(!(uIntrState & 0xfffffff0), ("%#x\n", uIntrState)); /* Bits 31:4 MBZ. */ + Assert((uIntrState & 0x3) != 0x3); /* Block-by-STI and MOV SS cannot be simultaneously set. */ + int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, uIntrState); + AssertRCReturn(rc, rc); + return rc; +} + + +/** + * Loads the guest's RIP into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RIP)) + { + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_RIP, pMixedCtx->rip); + AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_RIP); + Log4(("Load: VMX_VMCS_GUEST_RIP=%#RX64 fContextUseFlags=%#RX32\n", pMixedCtx->rip, HMCPU_CF_VALUE(pVCpu))); + } + return rc; +} + + +/** + * Loads the guest's RSP into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestRsp(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RSP)) + { + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_RSP, pMixedCtx->rsp); + AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_RSP); + Log4(("Load: VMX_VMCS_GUEST_RSP=%#RX64\n", pMixedCtx->rsp)); + } + return rc; +} + + +/** + * Loads the guest's RFLAGS into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RFLAGS)) + { + /* Intel spec. 2.3.1 "System Flags and Fields in IA-32e Mode" claims the upper 32-bits of RFLAGS are reserved (MBZ). + Let us assert it as such and use 32-bit VMWRITE. */ + Assert(!(pMixedCtx->rflags.u64 >> 32)); + X86EFLAGS Eflags = pMixedCtx->eflags; + Eflags.u32 &= VMX_EFLAGS_RESERVED_0; /* Bits 22-31, 15, 5 & 3 MBZ. */ + Eflags.u32 |= VMX_EFLAGS_RESERVED_1; /* Bit 1 MB1. */ + + /* + * If we're emulating real-mode using Virtual 8086 mode, save the real-mode eflags so we can restore them on VM exit. + * Modify the real-mode guest's eflags so that VT-x can run the real-mode guest code under Virtual 8086 mode. + */ + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS); + Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM))); + pVCpu->hm.s.vmx.RealMode.Eflags.u32 = Eflags.u32; /* Save the original eflags of the real-mode guest. */ + Eflags.Bits.u1VM = 1; /* Set the Virtual 8086 mode bit. */ + Eflags.Bits.u2IOPL = 0; /* Change IOPL to 0, otherwise certain instructions won't fault. */ + } + + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_RFLAGS, Eflags.u32); + AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_RFLAGS); + Log4(("Load: VMX_VMCS_GUEST_RFLAGS=%#RX32\n", Eflags.u32)); + } + return rc; +} + + +/** + * Loads the guest RIP, RSP and RFLAGS into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxLoadGuestRipRspRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = hmR0VmxLoadGuestRip(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + rc = hmR0VmxLoadGuestRsp(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + rc = hmR0VmxLoadGuestRflags(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + return rc; +} + + +/** + * Loads the guest CR0 control register into the guest-state area in the VMCS. + * CR0 is partially shared with the host and we have to consider the FPU bits. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadSharedCR0(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + /* + * Guest CR0. + * Guest FPU. + */ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0)) + { + Assert(!(pMixedCtx->cr0 >> 32)); + uint32_t u32GuestCR0 = pMixedCtx->cr0; + PVM pVM = pVCpu->CTX_SUFF(pVM); + + /* The guest's view (read access) of its CR0 is unblemished. */ + rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_READ_SHADOW, u32GuestCR0); + AssertRCReturn(rc, rc); + Log4(("Load: VMX_VMCS_CTRL_CR0_READ_SHADOW=%#RX32\n", u32GuestCR0)); + + /* Setup VT-x's view of the guest CR0. */ + /* Minimize VM-exits due to CR3 changes when we have NestedPaging. */ + if (pVM->hm.s.fNestedPaging) + { + if (CPUMIsGuestPagingEnabledEx(pMixedCtx)) + { + /* The guest has paging enabled, let it access CR3 without causing a VM exit if supported. */ + pVCpu->hm.s.vmx.u32ProcCtls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT + | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT); + } + else + { + /* The guest doesn't have paging enabled, make CR3 access to cause VM exits to update our shadow. */ + pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT + | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT; + } + + /* If we have unrestricted guest execution, we never have to intercept CR3 reads. */ + if (pVM->hm.s.vmx.fUnrestrictedGuest) + pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT; + + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRCReturn(rc, rc); + } + else + u32GuestCR0 |= X86_CR0_WP; /* Guest CPL 0 writes to its read-only pages should cause a #PF VM-exit. */ + + /* + * Guest FPU bits. + * Intel spec. 23.8 "Restrictions on VMX operation" mentions that CR0.NE bit must always be set on the first + * CPUs to support VT-x and no mention of with regards to UX in VM-entry checks. + */ + u32GuestCR0 |= X86_CR0_NE; + bool fInterceptNM = false; + if (CPUMIsGuestFPUStateActive(pVCpu)) + { + fInterceptNM = false; /* Guest FPU active, no need to VM-exit on #NM. */ + /* The guest should still get #NM exceptions when it expects it to, so we should not clear TS & MP bits here. + We're only concerned about -us- not intercepting #NMs when the guest-FPU is active. Not the guest itself! */ + } + else + { + fInterceptNM = true; /* Guest FPU inactive, VM-exit on #NM for lazy FPU loading. */ + u32GuestCR0 |= X86_CR0_TS /* Guest can task switch quickly and do lazy FPU syncing. */ + | X86_CR0_MP; /* FWAIT/WAIT should not ignore CR0.TS and should generate #NM. */ + } + + /* Catch floating point exceptions if we need to report them to the guest in a different way. */ + bool fInterceptMF = false; + if (!(pMixedCtx->cr0 & X86_CR0_NE)) + fInterceptMF = true; + + /* Finally, intercept all exceptions as we cannot directly inject them in real-mode, see hmR0VmxInjectEventVmcs(). */ + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + Assert(PDMVmmDevHeapIsEnabled(pVM)); + Assert(pVM->hm.s.vmx.pRealModeTSS); + pVCpu->hm.s.vmx.u32XcptBitmap |= HMVMX_REAL_MODE_XCPT_MASK; + fInterceptNM = true; + fInterceptMF = true; + } + else + pVCpu->hm.s.vmx.u32XcptBitmap &= ~HMVMX_REAL_MODE_XCPT_MASK; + + if (fInterceptNM) + pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_NM); + else + pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_NM); + + if (fInterceptMF) + pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_MF); + else + pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_MF); + + /* Additional intercepts for debugging, define these yourself explicitly. */ +#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS + pVCpu->hm.s.vmx.u32XcptBitmap |= 0 + | RT_BIT(X86_XCPT_BP) + | RT_BIT(X86_XCPT_DB) + | RT_BIT(X86_XCPT_DE) + | RT_BIT(X86_XCPT_NM) + | RT_BIT(X86_XCPT_UD) + | RT_BIT(X86_XCPT_NP) + | RT_BIT(X86_XCPT_SS) + | RT_BIT(X86_XCPT_GP) + | RT_BIT(X86_XCPT_PF) + | RT_BIT(X86_XCPT_MF) + ; +#elif defined(HMVMX_ALWAYS_TRAP_PF) + pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_PF); +#endif + + Assert(pVM->hm.s.fNestedPaging || (pVCpu->hm.s.vmx.u32XcptBitmap & RT_BIT(X86_XCPT_PF))); + + /* Set/clear the CR0 specific bits along with their exceptions (PE, PG, CD, NW). */ + uint32_t uSetCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1); + uint32_t uZapCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1); + if (pVM->hm.s.vmx.fUnrestrictedGuest) /* Exceptions for unrestricted-guests for fixed CR0 bits (PE, PG). */ + uSetCR0 &= ~(X86_CR0_PE | X86_CR0_PG); + else + Assert((uSetCR0 & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG)); + + u32GuestCR0 |= uSetCR0; + u32GuestCR0 &= uZapCR0; + u32GuestCR0 &= ~(X86_CR0_CD | X86_CR0_NW); /* Always enable caching. */ + + /* Write VT-x's view of the guest CR0 into the VMCS and update the exception bitmap. */ + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_CR0, u32GuestCR0); + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap); + AssertRCReturn(rc, rc); + Log4(("Load: VMX_VMCS_GUEST_CR0=%#RX32 (uSetCR0=%#RX32 uZapCR0=%#RX32)\n", u32GuestCR0, uSetCR0, uZapCR0)); + + /* + * CR0 is shared between host and guest along with a CR0 read shadow. Therefore, certain bits must not be changed + * by the guest because VT-x ignores saving/restoring them (namely CD, ET, NW) and for certain other bits + * we want to be notified immediately of guest CR0 changes (e.g. PG to update our shadow page tables). + */ + uint32_t u32CR0Mask = 0; + u32CR0Mask = X86_CR0_PE + | X86_CR0_NE + | X86_CR0_WP + | X86_CR0_PG + | X86_CR0_ET /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.ET */ + | X86_CR0_CD /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.CD */ + | X86_CR0_NW; /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.NW */ + + /** @todo Avoid intercepting CR0.PE with unrestricted guests. Fix PGM + * enmGuestMode to be in-sync with the current mode. See @bugref{6398} + * and @bugref{6944}. */ +#if 0 + if (pVM->hm.s.vmx.fUnrestrictedGuest) + u32CR0Mask &= ~X86_CR0_PE; +#endif + if (pVM->hm.s.fNestedPaging) + u32CR0Mask &= ~X86_CR0_WP; + + /* If the guest FPU state is active, don't need to VM-exit on writes to FPU related bits in CR0. */ + if (fInterceptNM) + { + u32CR0Mask |= X86_CR0_TS + | X86_CR0_MP; + } + + /* Write the CR0 mask into the VMCS and update the VCPU's copy of the current CR0 mask. */ + pVCpu->hm.s.vmx.u32CR0Mask = u32CR0Mask; + rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_MASK, u32CR0Mask); + AssertRCReturn(rc, rc); + Log4(("Load: VMX_VMCS_CTRL_CR0_MASK=%#RX32\n", u32CR0Mask)); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR0); + } + return rc; +} + + +/** + * Loads the guest control registers (CR3, CR4) into the guest-state area + * in the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestCR3AndCR4(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + PVM pVM = pVCpu->CTX_SUFF(pVM); + + /* + * Guest CR2. + * It's always loaded in the assembler code. Nothing to do here. + */ + + /* + * Guest CR3. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR3)) + { + RTGCPHYS GCPhysGuestCR3 = NIL_RTGCPHYS; + if (pVM->hm.s.fNestedPaging) + { + pVCpu->hm.s.vmx.HCPhysEPTP = PGMGetHyperCR3(pVCpu); + + /* Validate. See Intel spec. 28.2.2 "EPT Translation Mechanism" and 24.6.11 "Extended-Page-Table Pointer (EPTP)" */ + Assert(pVCpu->hm.s.vmx.HCPhysEPTP); + Assert(!(pVCpu->hm.s.vmx.HCPhysEPTP & UINT64_C(0xfff0000000000000))); + Assert(!(pVCpu->hm.s.vmx.HCPhysEPTP & 0xfff)); + + /* VMX_EPT_MEMTYPE_WB support is already checked in hmR0VmxSetupTaggedTlb(). */ + pVCpu->hm.s.vmx.HCPhysEPTP |= VMX_EPT_MEMTYPE_WB + | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT); + + /* Validate. See Intel spec. 26.2.1 "Checks on VMX Controls" */ + AssertMsg( ((pVCpu->hm.s.vmx.HCPhysEPTP >> 3) & 0x07) == 3 /* Bits 3:5 (EPT page walk length - 1) must be 3. */ + && ((pVCpu->hm.s.vmx.HCPhysEPTP >> 6) & 0x3f) == 0, /* Bits 6:11 MBZ. */ + ("EPTP %#RX64\n", pVCpu->hm.s.vmx.HCPhysEPTP)); + + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, pVCpu->hm.s.vmx.HCPhysEPTP); + AssertRCReturn(rc, rc); + Log4(("Load: VMX_VMCS64_CTRL_EPTP_FULL=%#RX64\n", pVCpu->hm.s.vmx.HCPhysEPTP)); + + if ( pVM->hm.s.vmx.fUnrestrictedGuest + || CPUMIsGuestPagingEnabledEx(pMixedCtx)) + { + /* If the guest is in PAE mode, pass the PDPEs to VT-x using the VMCS fields. */ + if (CPUMIsGuestInPAEModeEx(pMixedCtx)) + { + rc = PGMGstGetPaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, pVCpu->hm.s.aPdpes[0].u); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, pVCpu->hm.s.aPdpes[1].u); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, pVCpu->hm.s.aPdpes[2].u); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, pVCpu->hm.s.aPdpes[3].u); AssertRCReturn(rc, rc); + } + + /* The guest's view of its CR3 is unblemished with Nested Paging when the guest is using paging or we + have Unrestricted Execution to handle the guest when it's not using paging. */ + GCPhysGuestCR3 = pMixedCtx->cr3; + } + else + { + /* + * The guest is not using paging, but the CPU (VT-x) has to. While the guest thinks it accesses physical memory + * directly, we use our identity-mapped page table to map guest-linear to guest-physical addresses. + * EPT takes care of translating it to host-physical addresses. + */ + RTGCPHYS GCPhys; + Assert(pVM->hm.s.vmx.pNonPagingModeEPTPageTable); + Assert(PDMVmmDevHeapIsEnabled(pVM)); + + /* We obtain it here every time as the guest could have relocated this PCI region. */ + rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys); + AssertRCReturn(rc, rc); + + GCPhysGuestCR3 = GCPhys; + } + + Log4(("Load: VMX_VMCS_GUEST_CR3=%#RGv (GstN)\n", GCPhysGuestCR3)); + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_CR3, GCPhysGuestCR3); + } + else + { + /* Non-nested paging case, just use the hypervisor's CR3. */ + RTHCPHYS HCPhysGuestCR3 = PGMGetHyperCR3(pVCpu); + + Log4(("Load: VMX_VMCS_GUEST_CR3=%#RHv (HstN)\n", HCPhysGuestCR3)); + rc = VMXWriteVmcsHstN(VMX_VMCS_GUEST_CR3, HCPhysGuestCR3); + } + AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR3); + } + + /* + * Guest CR4. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR4)) + { + Assert(!(pMixedCtx->cr4 >> 32)); + uint32_t u32GuestCR4 = pMixedCtx->cr4; + + /* The guest's view of its CR4 is unblemished. */ + rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_READ_SHADOW, u32GuestCR4); + AssertRCReturn(rc, rc); + Log4(("Load: VMX_VMCS_CTRL_CR4_READ_SHADOW=%#RX32\n", u32GuestCR4)); + + /* Setup VT-x's view of the guest CR4. */ + /* + * If we're emulating real-mode using virtual-8086 mode, we want to redirect software interrupts to the 8086 program + * interrupt handler. Clear the VME bit (the interrupt redirection bitmap is already all 0, see hmR3InitFinalizeR0()) + * See Intel spec. 20.2 "Software Interrupt Handling Methods While in Virtual-8086 Mode". + */ + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + Assert(pVM->hm.s.vmx.pRealModeTSS); + Assert(PDMVmmDevHeapIsEnabled(pVM)); + u32GuestCR4 &= ~X86_CR4_VME; + } + + if (pVM->hm.s.fNestedPaging) + { + if ( !CPUMIsGuestPagingEnabledEx(pMixedCtx) + && !pVM->hm.s.vmx.fUnrestrictedGuest) + { + /* We use 4 MB pages in our identity mapping page table when the guest doesn't have paging. */ + u32GuestCR4 |= X86_CR4_PSE; + /* Our identity mapping is a 32 bits page directory. */ + u32GuestCR4 &= ~X86_CR4_PAE; + } + /* else use guest CR4.*/ + } + else + { + /* + * The shadow paging modes and guest paging modes are different, the shadow is in accordance with the host + * paging mode and thus we need to adjust VT-x's view of CR4 depending on our shadow page tables. + */ + switch (pVCpu->hm.s.enmShadowMode) + { + case PGMMODE_REAL: /* Real-mode. */ + case PGMMODE_PROTECTED: /* Protected mode without paging. */ + case PGMMODE_32_BIT: /* 32-bit paging. */ + { + u32GuestCR4 &= ~X86_CR4_PAE; + break; + } + + case PGMMODE_PAE: /* PAE paging. */ + case PGMMODE_PAE_NX: /* PAE paging with NX. */ + { + u32GuestCR4 |= X86_CR4_PAE; + break; + } + + case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */ + case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */ +#ifdef VBOX_ENABLE_64_BITS_GUESTS + break; +#endif + default: + AssertFailed(); + return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; + } + } + + /* We need to set and clear the CR4 specific bits here (mainly the X86_CR4_VMXE bit). */ + uint64_t uSetCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1); + uint64_t uZapCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1); + u32GuestCR4 |= uSetCR4; + u32GuestCR4 &= uZapCR4; + + /* Write VT-x's view of the guest CR4 into the VMCS. */ + Log4(("Load: VMX_VMCS_GUEST_CR4=%#RX32 (Set=%#RX32 Zap=%#RX32)\n", u32GuestCR4, uSetCR4, uZapCR4)); + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_CR4, u32GuestCR4); + AssertRCReturn(rc, rc); + + /* Setup CR4 mask. CR4 flags owned by the host, if the guest attempts to change them, that would cause a VM exit. */ + uint32_t u32CR4Mask = 0; + u32CR4Mask = X86_CR4_VME + | X86_CR4_PAE + | X86_CR4_PGE + | X86_CR4_PSE + | X86_CR4_VMXE; + pVCpu->hm.s.vmx.u32CR4Mask = u32CR4Mask; + rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_MASK, u32CR4Mask); + AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR4); + } + return rc; +} + + +/** + * Loads the guest debug registers into the guest-state area in the VMCS. + * This also sets up whether #DB and MOV DRx accesses cause VM exits. + * + * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3). + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadSharedDebugState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + if (!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_DEBUG)) + return VINF_SUCCESS; + +#ifdef VBOX_STRICT + /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */ + if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG) + { + /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */ + Assert((pMixedCtx->dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0); /* Bits 63:32, 15, 14, 12, 11 are reserved. */ + Assert((pMixedCtx->dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK); /* Bit 10 is reserved (RA1). */ + } +#endif + + int rc; + PVM pVM = pVCpu->CTX_SUFF(pVM); + bool fInterceptDB = false; + bool fInterceptMovDRx = false; + if ( pVCpu->hm.s.fSingleInstruction + || DBGFIsStepping(pVCpu)) + { + /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG) + { + pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRCReturn(rc, rc); + Assert(fInterceptDB == false); + } + else + { + pMixedCtx->eflags.u32 |= X86_EFL_TF; + pVCpu->hm.s.fClearTrapFlag = true; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RFLAGS); + fInterceptDB = true; + } + } + + if ( fInterceptDB + || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK)) + { + /* + * Use the combined guest and host DRx values found in the hypervisor + * register set because the debugger has breakpoints active or someone + * is single stepping on the host side without a monitor trap flag. + * + * Note! DBGF expects a clean DR6 state before executing guest code. + */ +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if ( CPUMIsGuestInLongModeEx(pMixedCtx) + && !CPUMIsHyperDebugStateActivePending(pVCpu)) + { + CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */); + Assert(CPUMIsHyperDebugStateActivePending(pVCpu)); + Assert(!CPUMIsGuestDebugStateActivePending(pVCpu)); + } + else +#endif + if (!CPUMIsHyperDebugStateActive(pVCpu)) + { + CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */); + Assert(CPUMIsHyperDebugStateActive(pVCpu)); + Assert(!CPUMIsGuestDebugStateActive(pVCpu)); + } + + /* Update DR7. (The other DRx values are handled by CPUM one way or the other.) */ + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, (uint32_t)CPUMGetHyperDR7(pVCpu)); + AssertRCReturn(rc, rc); + + pVCpu->hm.s.fUsingHyperDR7 = true; + fInterceptDB = true; + fInterceptMovDRx = true; + } + else + { + /* + * If the guest has enabled debug registers, we need to load them prior to + * executing guest code so they'll trigger at the right time. + */ + if (pMixedCtx->dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD)) /** @todo Why GD? */ + { +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if ( CPUMIsGuestInLongModeEx(pMixedCtx) + && !CPUMIsGuestDebugStateActivePending(pVCpu)) + { + CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */); + Assert(CPUMIsGuestDebugStateActivePending(pVCpu)); + Assert(!CPUMIsHyperDebugStateActivePending(pVCpu)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed); + } + else +#endif + if (!CPUMIsGuestDebugStateActive(pVCpu)) + { + CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */); + Assert(CPUMIsGuestDebugStateActive(pVCpu)); + Assert(!CPUMIsHyperDebugStateActive(pVCpu)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed); + } + Assert(!fInterceptDB); + Assert(!fInterceptMovDRx); + } + /* + * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we + * must intercept #DB in order to maintain a correct DR6 guest value. + */ +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + else if ( !CPUMIsGuestDebugStateActivePending(pVCpu) + && !CPUMIsGuestDebugStateActive(pVCpu)) +#else + else if (!CPUMIsGuestDebugStateActive(pVCpu)) +#endif + { + fInterceptMovDRx = true; + fInterceptDB = true; + } + + /* Update guest DR7. */ + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, pMixedCtx->dr[7]); + AssertRCReturn(rc, rc); + + pVCpu->hm.s.fUsingHyperDR7 = false; + } + + /* + * Update the exception bitmap regarding intercepting #DB generated by the guest. + */ + if (fInterceptDB) + pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_DB); + else if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { +#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS + pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_DB); +#endif + } + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap); + AssertRCReturn(rc, rc); + + /* + * Update the processor-based VM-execution controls regarding intercepting MOV DRx instructions. + */ + if (fInterceptMovDRx) + pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT; + else + pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_DEBUG); + return VINF_SUCCESS; +} + + +#ifdef VBOX_STRICT +/** + * Strict function to validate segment registers. + * + * @remarks ASSUMES CR0 is up to date. + */ +static void hmR0VmxValidateSegmentRegs(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + /* Validate segment registers. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers". */ + /* NOTE: The reason we check for attribute value 0 and not just the unusable bit here is because hmR0VmxWriteSegmentReg() + * only updates the VMCS' copy of the value with the unusable bit and doesn't change the guest-context value. */ + if ( !pVM->hm.s.vmx.fUnrestrictedGuest + && ( !CPUMIsGuestInRealModeEx(pCtx) + && !CPUMIsGuestInV86ModeEx(pCtx))) + { + /* Protected mode checks */ + /* CS */ + Assert(pCtx->cs.Attr.n.u1Present); + Assert(!(pCtx->cs.Attr.u & 0xf00)); + Assert(!(pCtx->cs.Attr.u & 0xfffe0000)); + Assert( (pCtx->cs.u32Limit & 0xfff) == 0xfff + || !(pCtx->cs.Attr.n.u1Granularity)); + Assert( !(pCtx->cs.u32Limit & 0xfff00000) + || (pCtx->cs.Attr.n.u1Granularity)); + /* CS cannot be loaded with NULL in protected mode. */ + Assert(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE)); /** @todo is this really true even for 64-bit CS?!? */ + if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11) + Assert(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl); + else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15) + Assert(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl); + else + AssertMsgFailed(("Invalid CS Type %#x\n", pCtx->cs.Attr.n.u2Dpl)); + /* SS */ + Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL)); + Assert(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL)); + if ( !(pCtx->cr0 & X86_CR0_PE) + || pCtx->cs.Attr.n.u4Type == 3) + { + Assert(!pCtx->ss.Attr.n.u2Dpl); + } + if (pCtx->ss.Attr.u && !(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE)) + { + Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL)); + Assert(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7); + Assert(pCtx->ss.Attr.n.u1Present); + Assert(!(pCtx->ss.Attr.u & 0xf00)); + Assert(!(pCtx->ss.Attr.u & 0xfffe0000)); + Assert( (pCtx->ss.u32Limit & 0xfff) == 0xfff + || !(pCtx->ss.Attr.n.u1Granularity)); + Assert( !(pCtx->ss.u32Limit & 0xfff00000) + || (pCtx->ss.Attr.n.u1Granularity)); + } + /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxWriteSegmentReg(). */ + if (pCtx->ds.Attr.u && !(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE)) + { + Assert(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED); + Assert(pCtx->ds.Attr.n.u1Present); + Assert(pCtx->ds.Attr.n.u4Type > 11 || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL)); + Assert(!(pCtx->ds.Attr.u & 0xf00)); + Assert(!(pCtx->ds.Attr.u & 0xfffe0000)); + Assert( (pCtx->ds.u32Limit & 0xfff) == 0xfff + || !(pCtx->ds.Attr.n.u1Granularity)); + Assert( !(pCtx->ds.u32Limit & 0xfff00000) + || (pCtx->ds.Attr.n.u1Granularity)); + Assert( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ)); + } + if (pCtx->es.Attr.u && !(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE)) + { + Assert(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED); + Assert(pCtx->es.Attr.n.u1Present); + Assert(pCtx->es.Attr.n.u4Type > 11 || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL)); + Assert(!(pCtx->es.Attr.u & 0xf00)); + Assert(!(pCtx->es.Attr.u & 0xfffe0000)); + Assert( (pCtx->es.u32Limit & 0xfff) == 0xfff + || !(pCtx->es.Attr.n.u1Granularity)); + Assert( !(pCtx->es.u32Limit & 0xfff00000) + || (pCtx->es.Attr.n.u1Granularity)); + Assert( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ)); + } + if (pCtx->fs.Attr.u && !(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE)) + { + Assert(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED); + Assert(pCtx->fs.Attr.n.u1Present); + Assert(pCtx->fs.Attr.n.u4Type > 11 || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL)); + Assert(!(pCtx->fs.Attr.u & 0xf00)); + Assert(!(pCtx->fs.Attr.u & 0xfffe0000)); + Assert( (pCtx->fs.u32Limit & 0xfff) == 0xfff + || !(pCtx->fs.Attr.n.u1Granularity)); + Assert( !(pCtx->fs.u32Limit & 0xfff00000) + || (pCtx->fs.Attr.n.u1Granularity)); + Assert( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ)); + } + if (pCtx->gs.Attr.u && !(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE)) + { + Assert(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED); + Assert(pCtx->gs.Attr.n.u1Present); + Assert(pCtx->gs.Attr.n.u4Type > 11 || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL)); + Assert(!(pCtx->gs.Attr.u & 0xf00)); + Assert(!(pCtx->gs.Attr.u & 0xfffe0000)); + Assert( (pCtx->gs.u32Limit & 0xfff) == 0xfff + || !(pCtx->gs.Attr.n.u1Granularity)); + Assert( !(pCtx->gs.u32Limit & 0xfff00000) + || (pCtx->gs.Attr.n.u1Granularity)); + Assert( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ)); + } + /* 64-bit capable CPUs. */ +# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + Assert(!(pCtx->cs.u64Base >> 32)); + Assert(!pCtx->ss.Attr.u || !(pCtx->ss.u64Base >> 32)); + Assert(!pCtx->ds.Attr.u || !(pCtx->ds.u64Base >> 32)); + Assert(!pCtx->es.Attr.u || !(pCtx->es.u64Base >> 32)); + } +# endif + } + else if ( CPUMIsGuestInV86ModeEx(pCtx) + || ( CPUMIsGuestInRealModeEx(pCtx) + && !pVM->hm.s.vmx.fUnrestrictedGuest)) + { + /* Real and v86 mode checks. */ + /* hmR0VmxWriteSegmentReg() writes the modified in VMCS. We want what we're feeding to VT-x. */ + uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr; + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + u32CSAttr = 0xf3; u32SSAttr = 0xf3; u32DSAttr = 0xf3; u32ESAttr = 0xf3; u32FSAttr = 0xf3; u32GSAttr = 0xf3; + } + else + { + u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u; u32DSAttr = pCtx->ds.Attr.u; + u32ESAttr = pCtx->es.Attr.u; u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u; + } + + /* CS */ + AssertMsg((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), ("CS base %#x %#x\n", pCtx->cs.u64Base, pCtx->cs.Sel)); + Assert(pCtx->cs.u32Limit == 0xffff); + Assert(u32CSAttr == 0xf3); + /* SS */ + Assert(pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4); + Assert(pCtx->ss.u32Limit == 0xffff); + Assert(u32SSAttr == 0xf3); + /* DS */ + Assert(pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4); + Assert(pCtx->ds.u32Limit == 0xffff); + Assert(u32DSAttr == 0xf3); + /* ES */ + Assert(pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4); + Assert(pCtx->es.u32Limit == 0xffff); + Assert(u32ESAttr == 0xf3); + /* FS */ + Assert(pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4); + Assert(pCtx->fs.u32Limit == 0xffff); + Assert(u32FSAttr == 0xf3); + /* GS */ + Assert(pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4); + Assert(pCtx->gs.u32Limit == 0xffff); + Assert(u32GSAttr == 0xf3); + /* 64-bit capable CPUs. */ +# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + Assert(!(pCtx->cs.u64Base >> 32)); + Assert(!u32SSAttr || !(pCtx->ss.u64Base >> 32)); + Assert(!u32DSAttr || !(pCtx->ds.u64Base >> 32)); + Assert(!u32ESAttr || !(pCtx->es.u64Base >> 32)); + } +# endif + } +} +#endif /* VBOX_STRICT */ + + +/** + * Writes a guest segment register into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param idxSel Index of the selector in the VMCS. + * @param idxLimit Index of the segment limit in the VMCS. + * @param idxBase Index of the segment base in the VMCS. + * @param idxAccess Index of the access rights of the segment in the VMCS. + * @param pSelReg Pointer to the segment selector. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxWriteSegmentReg(PVMCPU pVCpu, uint32_t idxSel, uint32_t idxLimit, uint32_t idxBase, + uint32_t idxAccess, PCPUMSELREG pSelReg, PCPUMCTX pCtx) +{ + int rc = VMXWriteVmcs32(idxSel, pSelReg->Sel); /* 16-bit guest selector field. */ + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(idxLimit, pSelReg->u32Limit); /* 32-bit guest segment limit field. */ + AssertRCReturn(rc, rc); + rc = VMXWriteVmcsGstN(idxBase, pSelReg->u64Base); /* Natural width guest segment base field.*/ + AssertRCReturn(rc, rc); + + uint32_t u32Access = pSelReg->Attr.u; + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + /* VT-x requires our real-using-v86 mode hack to override the segment access-right bits. */ + u32Access = 0xf3; + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS); + Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM))); + } + else + { + /* + * The way to differentiate between whether this is really a null selector or was just a selector loaded with 0 in + * real-mode is using the segment attributes. A selector loaded in real-mode with the value 0 is valid and usable in + * protected-mode and we should -not- mark it as an unusable segment. Both the recompiler & VT-x ensures NULL selectors + * loaded in protected-mode have their attribute as 0. + */ + if (!u32Access) + u32Access = X86DESCATTR_UNUSABLE; + } + + /* Validate segment access rights. Refer to Intel spec. "26.3.1.2 Checks on Guest Segment Registers". */ + AssertMsg((u32Access & X86DESCATTR_UNUSABLE) || (u32Access & X86_SEL_TYPE_ACCESSED), + ("Access bit not set for usable segment. idx=%#x sel=%#x attr %#x\n", idxBase, pSelReg, pSelReg->Attr.u)); + + rc = VMXWriteVmcs32(idxAccess, u32Access); /* 32-bit guest segment access-rights field. */ + AssertRCReturn(rc, rc); + return rc; +} + + +/** + * Loads the guest segment registers, GDTR, IDTR, LDTR, (TR, FS and GS bases) + * into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCPU Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks ASSUMES pMixedCtx->cr0 is up to date (strict builds validation). + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestSegmentRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VERR_INTERNAL_ERROR_5; + PVM pVM = pVCpu->CTX_SUFF(pVM); + + /* + * Guest Segment registers: CS, SS, DS, ES, FS, GS. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS)) + { + /* Save the segment attributes for real-on-v86 mode hack, so we can restore them on VM-exit. */ + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + pVCpu->hm.s.vmx.RealMode.AttrCS.u = pMixedCtx->cs.Attr.u; + pVCpu->hm.s.vmx.RealMode.AttrSS.u = pMixedCtx->ss.Attr.u; + pVCpu->hm.s.vmx.RealMode.AttrDS.u = pMixedCtx->ds.Attr.u; + pVCpu->hm.s.vmx.RealMode.AttrES.u = pMixedCtx->es.Attr.u; + pVCpu->hm.s.vmx.RealMode.AttrFS.u = pMixedCtx->fs.Attr.u; + pVCpu->hm.s.vmx.RealMode.AttrGS.u = pMixedCtx->gs.Attr.u; + } + +#ifdef VBOX_WITH_REM + if (!pVM->hm.s.vmx.fUnrestrictedGuest) + { + Assert(pVM->hm.s.vmx.pRealModeTSS); + AssertCompile(PGMMODE_REAL < PGMMODE_PROTECTED); + if ( pVCpu->hm.s.vmx.fWasInRealMode + && PGMGetGuestMode(pVCpu) >= PGMMODE_PROTECTED) + { + /* Signal that the recompiler must flush its code-cache as the guest -may- rewrite code it will later execute + in real-mode (e.g. OpenBSD 4.0) */ + REMFlushTBs(pVM); + Log4(("Load: Switch to protected mode detected!\n")); + pVCpu->hm.s.vmx.fWasInRealMode = false; + } + } +#endif + rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_CS, VMX_VMCS32_GUEST_CS_LIMIT, VMX_VMCS_GUEST_CS_BASE, + VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS, &pMixedCtx->cs, pMixedCtx); + AssertRCReturn(rc, rc); + rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_SS, VMX_VMCS32_GUEST_SS_LIMIT, VMX_VMCS_GUEST_SS_BASE, + VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS, &pMixedCtx->ss, pMixedCtx); + AssertRCReturn(rc, rc); + rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_DS, VMX_VMCS32_GUEST_DS_LIMIT, VMX_VMCS_GUEST_DS_BASE, + VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS, &pMixedCtx->ds, pMixedCtx); + AssertRCReturn(rc, rc); + rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_ES, VMX_VMCS32_GUEST_ES_LIMIT, VMX_VMCS_GUEST_ES_BASE, + VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS, &pMixedCtx->es, pMixedCtx); + AssertRCReturn(rc, rc); + rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_FS, VMX_VMCS32_GUEST_FS_LIMIT, VMX_VMCS_GUEST_FS_BASE, + VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS, &pMixedCtx->fs, pMixedCtx); + AssertRCReturn(rc, rc); + rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_GS, VMX_VMCS32_GUEST_GS_LIMIT, VMX_VMCS_GUEST_GS_BASE, + VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS, &pMixedCtx->gs, pMixedCtx); + AssertRCReturn(rc, rc); + +#ifdef VBOX_STRICT + /* Validate. */ + hmR0VmxValidateSegmentRegs(pVM, pVCpu, pMixedCtx); +#endif + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS); + Log4(("Load: CS=%#RX16 Base=%#RX64 Limit=%#RX32 Attr=%#RX32\n", pMixedCtx->cs.Sel, pMixedCtx->cs.u64Base, + pMixedCtx->cs.u32Limit, pMixedCtx->cs.Attr.u)); + } + + /* + * Guest TR. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_TR)) + { + /* + * Real-mode emulation using virtual-8086 mode with CR4.VME. Interrupt redirection is achieved + * using the interrupt redirection bitmap (all bits cleared to let the guest handle INT-n's) in the TSS. + * See hmR3InitFinalizeR0() to see how pRealModeTSS is setup. + */ + uint16_t u16Sel = 0; + uint32_t u32Limit = 0; + uint64_t u64Base = 0; + uint32_t u32AccessRights = 0; + + if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + u16Sel = pMixedCtx->tr.Sel; + u32Limit = pMixedCtx->tr.u32Limit; + u64Base = pMixedCtx->tr.u64Base; + u32AccessRights = pMixedCtx->tr.Attr.u; + } + else + { + Assert(pVM->hm.s.vmx.pRealModeTSS); + Assert(PDMVmmDevHeapIsEnabled(pVM)); /* Guaranteed by HMR3CanExecuteGuest() -XXX- what about inner loop changes? */ + + /* We obtain it here every time as PCI regions could be reconfigured in the guest, changing the VMMDev base. */ + RTGCPHYS GCPhys; + rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pRealModeTSS, &GCPhys); + AssertRCReturn(rc, rc); + + X86DESCATTR DescAttr; + DescAttr.u = 0; + DescAttr.n.u1Present = 1; + DescAttr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY; + + u16Sel = 0; + u32Limit = HM_VTX_TSS_SIZE; + u64Base = GCPhys; /* in real-mode phys = virt. */ + u32AccessRights = DescAttr.u; + } + + /* Validate. */ + Assert(!(u16Sel & RT_BIT(2))); + AssertMsg( (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY + || (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY, ("TSS is not busy!? %#x\n", u32AccessRights)); + AssertMsg(!(u32AccessRights & X86DESCATTR_UNUSABLE), ("TR unusable bit is not clear!? %#x\n", u32AccessRights)); + Assert(!(u32AccessRights & RT_BIT(4))); /* System MBZ.*/ + Assert(u32AccessRights & RT_BIT(7)); /* Present MB1.*/ + Assert(!(u32AccessRights & 0xf00)); /* 11:8 MBZ. */ + Assert(!(u32AccessRights & 0xfffe0000)); /* 31:17 MBZ. */ + Assert( (u32Limit & 0xfff) == 0xfff + || !(u32AccessRights & RT_BIT(15))); /* Granularity MBZ. */ + Assert( !(pMixedCtx->tr.u32Limit & 0xfff00000) + || (u32AccessRights & RT_BIT(15))); /* Granularity MB1. */ + + rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_TR, u16Sel); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_LIMIT, u32Limit); AssertRCReturn(rc, rc); + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_TR_BASE, u64Base); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, u32AccessRights); AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_TR); + Log4(("Load: VMX_VMCS_GUEST_TR_BASE=%#RX64\n", u64Base)); + } + + /* + * Guest GDTR. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_GDTR)) + { + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, pMixedCtx->gdtr.cbGdt); AssertRCReturn(rc, rc); + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_GDTR_BASE, pMixedCtx->gdtr.pGdt); AssertRCReturn(rc, rc); + + /* Validate. */ + Assert(!(pMixedCtx->gdtr.cbGdt & 0xffff0000)); /* Bits 31:16 MBZ. */ + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_GDTR); + Log4(("Load: VMX_VMCS_GUEST_GDTR_BASE=%#RX64\n", pMixedCtx->gdtr.pGdt)); + } + + /* + * Guest LDTR. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_LDTR)) + { + /* The unusable bit is specific to VT-x, if it's a null selector mark it as an unusable segment. */ + uint32_t u32Access = 0; + if (!pMixedCtx->ldtr.Attr.u) + u32Access = X86DESCATTR_UNUSABLE; + else + u32Access = pMixedCtx->ldtr.Attr.u; + + rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_LDTR, pMixedCtx->ldtr.Sel); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_LIMIT, pMixedCtx->ldtr.u32Limit); AssertRCReturn(rc, rc); + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_LDTR_BASE, pMixedCtx->ldtr.u64Base); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, u32Access); AssertRCReturn(rc, rc); + + /* Validate. */ + if (!(u32Access & X86DESCATTR_UNUSABLE)) + { + Assert(!(pMixedCtx->ldtr.Sel & RT_BIT(2))); /* TI MBZ. */ + Assert(pMixedCtx->ldtr.Attr.n.u4Type == 2); /* Type MB2 (LDT). */ + Assert(!pMixedCtx->ldtr.Attr.n.u1DescType); /* System MBZ. */ + Assert(pMixedCtx->ldtr.Attr.n.u1Present == 1); /* Present MB1. */ + Assert(!pMixedCtx->ldtr.Attr.n.u4LimitHigh); /* 11:8 MBZ. */ + Assert(!(pMixedCtx->ldtr.Attr.u & 0xfffe0000)); /* 31:17 MBZ. */ + Assert( (pMixedCtx->ldtr.u32Limit & 0xfff) == 0xfff + || !pMixedCtx->ldtr.Attr.n.u1Granularity); /* Granularity MBZ. */ + Assert( !(pMixedCtx->ldtr.u32Limit & 0xfff00000) + || pMixedCtx->ldtr.Attr.n.u1Granularity); /* Granularity MB1. */ + } + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_LDTR); + Log4(("Load: VMX_VMCS_GUEST_LDTR_BASE=%#RX64\n", pMixedCtx->ldtr.u64Base)); + } + + /* + * Guest IDTR. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_IDTR)) + { + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, pMixedCtx->idtr.cbIdt); AssertRCReturn(rc, rc); + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_IDTR_BASE, pMixedCtx->idtr.pIdt); AssertRCReturn(rc, rc); + + /* Validate. */ + Assert(!(pMixedCtx->idtr.cbIdt & 0xffff0000)); /* Bits 31:16 MBZ. */ + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_IDTR); + Log4(("Load: VMX_VMCS_GUEST_IDTR_BASE=%#RX64\n", pMixedCtx->idtr.pIdt)); + } + + return VINF_SUCCESS; +} + + +/** + * Loads certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store + * areas. These MSRs will automatically be loaded to the host CPU on every + * successful VM entry and stored from the host CPU on every successful VM exit. + * Also loads the sysenter MSRs into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + AssertPtr(pVCpu); + AssertPtr(pVCpu->hm.s.vmx.pvGuestMsr); + + /* + * MSRs covered by Auto-load/store: EFER, LSTAR, STAR, SF_MASK, TSC_AUX (RDTSCP). + */ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS)) + { +#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + PVM pVM = pVCpu->CTX_SUFF(pVM); + PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr; + uint32_t cGuestMsrs = 0; + + /* See Intel spec. 4.1.4 "Enumeration of Paging Features by CPUID". */ + /** @todo r=ramshankar: Optimize this further to do lazy restoration and only + * when the guest really is in 64-bit mode. */ + bool fSupportsLongMode = CPUMGetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_LONG_MODE); + if (fSupportsLongMode) + { + pGuestMsr->u32Msr = MSR_K8_LSTAR; + pGuestMsr->u32Reserved = 0; + pGuestMsr->u64Value = pMixedCtx->msrLSTAR; /* 64 bits mode syscall rip */ + pGuestMsr++; cGuestMsrs++; + pGuestMsr->u32Msr = MSR_K6_STAR; + pGuestMsr->u32Reserved = 0; + pGuestMsr->u64Value = pMixedCtx->msrSTAR; /* legacy syscall eip, cs & ss */ + pGuestMsr++; cGuestMsrs++; + pGuestMsr->u32Msr = MSR_K8_SF_MASK; + pGuestMsr->u32Reserved = 0; + pGuestMsr->u64Value = pMixedCtx->msrSFMASK; /* syscall flag mask */ + pGuestMsr++; cGuestMsrs++; + pGuestMsr->u32Msr = MSR_K8_KERNEL_GS_BASE; + pGuestMsr->u32Reserved = 0; + pGuestMsr->u64Value = pMixedCtx->msrKERNELGSBASE; /* swapgs exchange value */ + pGuestMsr++; cGuestMsrs++; + } + + /* + * RDTSCP requires the TSC_AUX MSR. Host and guest share the physical MSR. So we have to + * load the guest's copy always (since the MSR bitmap allows passthru unconditionally). + */ + if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) + { + pGuestMsr->u32Msr = MSR_K8_TSC_AUX; + pGuestMsr->u32Reserved = 0; + pGuestMsr->u64Value = CPUMR0GetGuestTscAux(pVCpu); + pGuestMsr++; cGuestMsrs++; + } + + /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */ + if (cGuestMsrs > MSR_IA32_VMX_MISC_MAX_MSR(pVM->hm.s.vmx.Msrs.u64Misc)) + { + LogRel(("CPU autoload/store MSR count in VMCS exceeded cGuestMsrs=%u.\n", cGuestMsrs)); + pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + /* Update the VCPU's copy of the guest MSR count. */ + pVCpu->hm.s.vmx.cGuestMsrs = cGuestMsrs; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cGuestMsrs); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cGuestMsrs); AssertRCReturn(rc, rc); +#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */ + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS); + } + + /* + * Guest Sysenter MSRs. + * These flags are only set when MSR-bitmaps are not supported by the CPU and we cause + * VM-exits on WRMSRs for these MSRs. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SYSENTER_CS_MSR)) + { + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pMixedCtx->SysEnter.cs); AssertRCReturn(rc, rc); + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_SYSENTER_CS_MSR); + } + + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SYSENTER_EIP_MSR)) + { + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_SYSENTER_EIP, pMixedCtx->SysEnter.eip); AssertRCReturn(rc, rc); + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_SYSENTER_EIP_MSR); + } + + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SYSENTER_ESP_MSR)) + { + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_SYSENTER_ESP, pMixedCtx->SysEnter.esp); AssertRCReturn(rc, rc); + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_SYSENTER_ESP_MSR); + } + + return rc; +} + + +/** + * Loads the guest activity state into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestActivityState(PVMCPU pVCpu, PCPUMCTX pCtx) +{ + /** @todo See if we can make use of other states, e.g. + * VMX_VMCS_GUEST_ACTIVITY_SHUTDOWN or HLT. */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_GUEST_ACTIVITY_STATE)) + { + int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_VMCS_GUEST_ACTIVITY_ACTIVE); + AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_GUEST_ACTIVITY_STATE); + } + return VINF_SUCCESS; +} + + +/** + * Sets up the appropriate function to run guest code. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSetupVMRunHandler(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + if (CPUMIsGuestInLongModeEx(pMixedCtx)) + { +#ifndef VBOX_ENABLE_64_BITS_GUESTS + return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; +#endif + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests); /* Guaranteed by hmR3InitFinalizeR0(). */ +#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + /* 32-bit host. We need to switch to 64-bit before running the 64-bit guest. */ + if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0SwitcherStartVM64) + { + pVCpu->hm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64; + HMCPU_CF_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_EXIT_CTLS | HM_CHANGED_VMX_ENTRY_CTLS); + } +#else + /* 64-bit host or hybrid host. */ + pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM64; +#endif + } + else + { + /* Guest is not in long mode, use the 32-bit handler. */ +#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0StartVM32) + { + pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32; + HMCPU_CF_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_EXIT_CTLS | HM_CHANGED_VMX_ENTRY_CTLS); + } +#else + pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32; +#endif + } + Assert(pVCpu->hm.s.vmx.pfnStartVM); + return VINF_SUCCESS; +} + + +/** + * Wrapper for running the guest code in VT-x. + * + * @returns VBox strict status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + /* + * 64-bit Windows uses XMM registers in the kernel as the Microsoft compiler expresses floating-point operations + * using SSE instructions. Some XMM registers (XMM6-XMM15) are callee-saved and thus the need for this XMM wrapper. + * Refer MSDN docs. "Configuring Programs for 64-bit / x64 Software Conventions / Register Usage" for details. + */ + const bool fResumeVM = RT_BOOL(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_LAUNCHED); + /** @todo Add stats for resume vs launch. */ +#ifdef VBOX_WITH_KERNEL_USING_XMM + return HMR0VMXStartVMWrapXMM(fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hm.s.vmx.pfnStartVM); +#else + return pVCpu->hm.s.vmx.pfnStartVM(fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu); +#endif +} + + +/** + * Reports world-switch error and dumps some useful debug info. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param rcVMRun The return code from VMLAUNCH/VMRESUME. + * @param pCtx Pointer to the guest-CPU context. + * @param pVmxTransient Pointer to the VMX transient structure (only + * exitReason updated). + */ +static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rcVMRun, PCPUMCTX pCtx, PVMXTRANSIENT pVmxTransient) +{ + Assert(pVM); + Assert(pVCpu); + Assert(pCtx); + Assert(pVmxTransient); + HMVMX_ASSERT_PREEMPT_SAFE(); + + Log4(("VM-entry failure: %Rrc\n", rcVMRun)); + switch (rcVMRun) + { + case VERR_VMX_INVALID_VMXON_PTR: + AssertFailed(); + break; + case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */ + case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */ + { + int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason); + rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError); + rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + AssertRC(rc); + + pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu; + /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted(). + Cannot do it here as we may have been long preempted. */ + +#ifdef VBOX_STRICT + Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason, + pVmxTransient->uExitReason)); + Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQualification)); + Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError)); + if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX) + Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError])); + else + Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX)); + Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu)); + Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu)); + + /* VMX control bits. */ + uint32_t u32Val; + uint64_t u64Val; + HMVMXHCUINTREG uHCReg; + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_PIN_EXEC %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_PROC_EXEC %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_PROC_EXEC2 %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_ENTRY %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_EXIT %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_CR3_TARGET_COUNT %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %u\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_TPR_THRESHOLD %u\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT %u (guest MSRs)\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT %u (host MSRs)\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT %u (guest MSRs)\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_EXCEPTION_BITMAP %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH %#RX32\n", u32Val)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_MASK, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR0_MASK %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_MASK, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR4_MASK %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg)); + rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc); + Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val)); + + /* Guest bits. */ + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RIP, &u64Val); AssertRC(rc); + Log4(("Old Guest Rip %#RX64 New %#RX64\n", pCtx->rip, u64Val)); + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RSP, &u64Val); AssertRC(rc); + Log4(("Old Guest Rsp %#RX64 New %#RX64\n", pCtx->rsp, u64Val)); + rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Val); AssertRC(rc); + Log4(("Old Guest Rflags %#RX32 New %#RX32\n", pCtx->eflags.u32, u32Val)); + rc = VMXReadVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS16_GUEST_FIELD_VPID %u\n", u32Val)); + + /* Host bits. */ + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR0, &uHCReg); AssertRC(rc); + Log4(("Host CR0 %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR3, &uHCReg); AssertRC(rc); + Log4(("Host CR3 %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR4, &uHCReg); AssertRC(rc); + Log4(("Host CR4 %#RHr\n", uHCReg)); + + RTGDTR HostGdtr; + PCX86DESCHC pDesc; + ASMGetGDTR(&HostGdtr); + rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_CS, &u32Val); AssertRC(rc); + Log4(("Host CS %#08x\n", u32Val)); + if (u32Val < HostGdtr.cbGdt) + { + pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); + HMR0DumpDescriptor(pDesc, u32Val, "CS: "); + } + + rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_DS, &u32Val); AssertRC(rc); + Log4(("Host DS %#08x\n", u32Val)); + if (u32Val < HostGdtr.cbGdt) + { + pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); + HMR0DumpDescriptor(pDesc, u32Val, "DS: "); + } + + rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_ES, &u32Val); AssertRC(rc); + Log4(("Host ES %#08x\n", u32Val)); + if (u32Val < HostGdtr.cbGdt) + { + pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); + HMR0DumpDescriptor(pDesc, u32Val, "ES: "); + } + + rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_FS, &u32Val); AssertRC(rc); + Log4(("Host FS %#08x\n", u32Val)); + if (u32Val < HostGdtr.cbGdt) + { + pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); + HMR0DumpDescriptor(pDesc, u32Val, "FS: "); + } + + rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_GS, &u32Val); AssertRC(rc); + Log4(("Host GS %#08x\n", u32Val)); + if (u32Val < HostGdtr.cbGdt) + { + pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); + HMR0DumpDescriptor(pDesc, u32Val, "GS: "); + } + + rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_SS, &u32Val); AssertRC(rc); + Log4(("Host SS %#08x\n", u32Val)); + if (u32Val < HostGdtr.cbGdt) + { + pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); + HMR0DumpDescriptor(pDesc, u32Val, "SS: "); + } + + rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_TR, &u32Val); AssertRC(rc); + Log4(("Host TR %#08x\n", u32Val)); + if (u32Val < HostGdtr.cbGdt) + { + pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); + HMR0DumpDescriptor(pDesc, u32Val, "TR: "); + } + + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_TR_BASE, &uHCReg); AssertRC(rc); + Log4(("Host TR Base %#RHv\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_GDTR_BASE, &uHCReg); AssertRC(rc); + Log4(("Host GDTR Base %#RHv\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_IDTR_BASE, &uHCReg); AssertRC(rc); + Log4(("Host IDTR Base %#RHv\n", uHCReg)); + rc = VMXReadVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, &u32Val); AssertRC(rc); + Log4(("Host SYSENTER CS %#08x\n", u32Val)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_SYSENTER_EIP, &uHCReg); AssertRC(rc); + Log4(("Host SYSENTER EIP %#RHv\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_SYSENTER_ESP, &uHCReg); AssertRC(rc); + Log4(("Host SYSENTER ESP %#RHv\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_RSP, &uHCReg); AssertRC(rc); + Log4(("Host RSP %#RHv\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_RIP, &uHCReg); AssertRC(rc); + Log4(("Host RIP %#RHv\n", uHCReg)); +# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER))); + Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR))); + Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR))); + Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR))); + Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK))); + Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE))); + } +# endif +#endif /* VBOX_STRICT */ + break; + } + + default: + /* Impossible */ + AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun)); + break; + } + NOREF(pVM); +} + + +#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) +#ifndef VMX_USE_CACHED_VMCS_ACCESSES +# error "VMX_USE_CACHED_VMCS_ACCESSES not defined when it should be!" +#endif +#ifdef VBOX_STRICT +static bool hmR0VmxIsValidWriteField(uint32_t idxField) +{ + switch (idxField) + { + case VMX_VMCS_GUEST_RIP: + case VMX_VMCS_GUEST_RSP: + case VMX_VMCS_GUEST_SYSENTER_EIP: + case VMX_VMCS_GUEST_SYSENTER_ESP: + case VMX_VMCS_GUEST_GDTR_BASE: + case VMX_VMCS_GUEST_IDTR_BASE: + case VMX_VMCS_GUEST_CS_BASE: + case VMX_VMCS_GUEST_DS_BASE: + case VMX_VMCS_GUEST_ES_BASE: + case VMX_VMCS_GUEST_FS_BASE: + case VMX_VMCS_GUEST_GS_BASE: + case VMX_VMCS_GUEST_SS_BASE: + case VMX_VMCS_GUEST_LDTR_BASE: + case VMX_VMCS_GUEST_TR_BASE: + case VMX_VMCS_GUEST_CR3: + return true; + } + return false; +} + +static bool hmR0VmxIsValidReadField(uint32_t idxField) +{ + switch (idxField) + { + /* Read-only fields. */ + case VMX_VMCS_RO_EXIT_QUALIFICATION: + return true; + } + /* Remaining readable fields should also be writable. */ + return hmR0VmxIsValidWriteField(idxField); +} +#endif /* VBOX_STRICT */ + + +/** + * Executes the specified handler in 64-bit mode. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest CPU context. + * @param enmOp The operation to perform. + * @param cbParam Number of parameters. + * @param paParam Array of 32-bit parameters. + */ +VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, HM64ON32OP enmOp, uint32_t cbParam, + uint32_t *paParam) +{ + int rc, rc2; + PHMGLOBALCPUINFO pCpu; + RTHCPHYS HCPhysCpuPage; + RTCCUINTREG uOldEflags; + + AssertReturn(pVM->hm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER); + Assert(enmOp > HM64ON32OP_INVALID && enmOp < HM64ON32OP_END); + Assert(pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Write.aField)); + Assert(pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Read.aField)); + +#ifdef VBOX_STRICT + for (uint32_t i = 0; i < pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries; i++) + Assert(hmR0VmxIsValidWriteField(pVCpu->hm.s.vmx.VMCSCache.Write.aField[i])); + + for (uint32_t i = 0; i <pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries; i++) + Assert(hmR0VmxIsValidReadField(pVCpu->hm.s.vmx.VMCSCache.Read.aField[i])); +#endif + + /* Disable interrupts. */ + uOldEflags = ASMIntDisableFlags(); + +#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI + RTCPUID idHostCpu = RTMpCpuId(); + CPUMR0SetLApic(pVCpu, idHostCpu); +#endif + + pCpu = HMR0GetCurrentCpu(); + HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0); + + /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */ + VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + + /* Leave VMX Root Mode. */ + VMXDisable(); + + ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE); + + CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu)); + CPUMSetHyperEIP(pVCpu, enmOp); + for (int i = (int)cbParam - 1; i >= 0; i--) + CPUMPushHyper(pVCpu, paParam[i]); + + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z); + + /* Call the switcher. */ + rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum)); + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z); + + /** @todo replace with hmR0VmxEnterRootMode() and hmR0VmxLeaveRootMode(). */ + /* Make sure the VMX instructions don't cause #UD faults. */ + ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE); + + /* Re-enter VMX Root Mode */ + rc2 = VMXEnable(HCPhysCpuPage); + if (RT_FAILURE(rc2)) + { + ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE); + ASMSetFlags(uOldEflags); + return rc2; + } + + rc2 = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + AssertRC(rc2); + Assert(!(ASMGetFlags() & X86_EFL_IF)); + ASMSetFlags(uOldEflags); + return rc; +} + + +/** + * Prepares for and executes VMLAUNCH (64 bits guests) for 32-bit hosts + * supporting 64-bit guests. + * + * @returns VBox status code. + * @param fResume Whether to VMLAUNCH or VMRESUME. + * @param pCtx Pointer to the guest-CPU context. + * @param pCache Pointer to the VMCS cache. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu) +{ + uint32_t aParam[6]; + PHMGLOBALCPUINFO pCpu = NULL; + RTHCPHYS HCPhysCpuPage = 0; + int rc = VERR_INTERNAL_ERROR_5; + + pCpu = HMR0GetCurrentCpu(); + HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0); + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + pCache->uPos = 1; + pCache->interPD = PGMGetInterPaeCR3(pVM); + pCache->pSwitcher = (uint64_t)pVM->hm.s.pfnHost32ToGuest64R0; +#endif + +#if defined(DEBUG) && defined(VMX_USE_CACHED_VMCS_ACCESSES) + pCache->TestIn.HCPhysCpuPage = 0; + pCache->TestIn.HCPhysVmcs = 0; + pCache->TestIn.pCache = 0; + pCache->TestOut.HCPhysVmcs = 0; + pCache->TestOut.pCache = 0; + pCache->TestOut.pCtx = 0; + pCache->TestOut.eflags = 0; +#endif + + aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */ + aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */ + aParam[2] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVmcs); /* Param 2: VMCS physical address - Lo. */ + aParam[3] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVmcs >> 32); /* Param 2: VMCS physical address - Hi. */ + aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache); + aParam[5] = 0; + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + pCtx->dr[4] = pVM->hm.s.vmx.pScratchPhys + 16 + 8; + *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 1; +#endif + rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, HM64ON32OP_VMXRCStartVM64, 6, &aParam[0]); + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + Assert(*(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) == 5); + Assert(pCtx->dr[4] == 10); + *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 0xff; +#endif + +#if defined(DEBUG) && defined(VMX_USE_CACHED_VMCS_ACCESSES) + AssertMsg(pCache->TestIn.HCPhysCpuPage == HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage)); + AssertMsg(pCache->TestIn.HCPhysVmcs == pVCpu->hm.s.vmx.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs, + pVCpu->hm.s.vmx.HCPhysVmcs)); + AssertMsg(pCache->TestIn.HCPhysVmcs == pCache->TestOut.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs, + pCache->TestOut.HCPhysVmcs)); + AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache, + pCache->TestOut.pCache)); + AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache), + ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache))); + AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx, + pCache->TestOut.pCtx)); + Assert(!(pCache->TestOut.eflags & X86_EFL_IF)); +#endif + return rc; +} + + +/** + * Initialize the VMCS-Read cache. The VMCS cache is used for 32-bit hosts + * running 64-bit guests (except 32-bit Darwin which runs with 64-bit paging in + * 32-bit mode) for 64-bit fields that cannot be accessed in 32-bit mode. Some + * 64-bit fields -can- be accessed (those that have a 32-bit FULL & HIGH part). + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +static int hmR0VmxInitVmcsReadCache(PVM pVM, PVMCPU pVCpu) +{ +#define VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, idxField) \ +{ \ + Assert(pCache->Read.aField[idxField##_CACHE_IDX] == 0); \ + pCache->Read.aField[idxField##_CACHE_IDX] = idxField; \ + pCache->Read.aFieldVal[idxField##_CACHE_IDX] = 0; \ + ++cReadFields; \ +} + + AssertPtr(pVM); + AssertPtr(pVCpu); + PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache; + uint32_t cReadFields = 0; + + /* + * Don't remove the #if 0'd fields in this code. They're listed here for consistency + * and serve to indicate exceptions to the rules. + */ + + /* Guest-natural selector base fields. */ +#if 0 + /* These are 32-bit in practice. See Intel spec. 2.5 "Control Registers". */ + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR0); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR4); +#endif + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_ES_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CS_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SS_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_DS_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_FS_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_GS_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_LDTR_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_TR_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_GDTR_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_IDTR_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_RSP); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_RIP); +#if 0 + /* Unused natural width guest-state fields. */ + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR3); /* Handled in Nested Paging case */ +#endif + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SYSENTER_ESP); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SYSENTER_EIP); + + /* 64-bit guest-state fields; unused as we use two 32-bit VMREADs for these 64-bit fields (using "FULL" and "HIGH" fields). */ +#if 0 + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_DEBUGCTL_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PAT_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_EFER_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE0_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE1_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE2_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE3_FULL); +#endif + + /* Natural width guest-state fields. */ + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION); +#if 0 + /* Currently unused field. */ + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_RO_EXIT_GUEST_LINEAR_ADDR); +#endif + + if (pVM->hm.s.fNestedPaging) + { + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR3); + AssertMsg(cReadFields == VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX, ("cReadFields=%u expected %u\n", cReadFields, + VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX)); + pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX; + } + else + { + AssertMsg(cReadFields == VMX_VMCS_MAX_CACHE_IDX, ("cReadFields=%u expected %u\n", cReadFields, VMX_VMCS_MAX_CACHE_IDX)); + pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX; + } + +#undef VMXLOCAL_INIT_READ_CACHE_FIELD + return VINF_SUCCESS; +} + + +/** + * Writes a field into the VMCS. This can either directly invoke a VMWRITE or + * queue up the VMWRITE by using the VMCS write cache (on 32-bit hosts, except + * darwin, running 64-bit guests). + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param idxField The VMCS field encoding. + * @param u64Val 16, 32 or 64 bits value. + */ +VMMR0DECL(int) VMXWriteVmcs64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val) +{ + int rc; + switch (idxField) + { + /* + * These fields consists of a "FULL" and a "HIGH" part which can be written to individually. + */ + /* 64-bit Control fields. */ + case VMX_VMCS64_CTRL_IO_BITMAP_A_FULL: + case VMX_VMCS64_CTRL_IO_BITMAP_B_FULL: + case VMX_VMCS64_CTRL_MSR_BITMAP_FULL: + case VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL: + case VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL: + case VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL: + case VMX_VMCS64_CTRL_EXEC_VMCS_PTR_FULL: + case VMX_VMCS64_CTRL_TSC_OFFSET_FULL: + case VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL: + case VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL: + case VMX_VMCS64_CTRL_VMFUNC_CTRLS_FULL: + case VMX_VMCS64_CTRL_EPTP_FULL: + case VMX_VMCS64_CTRL_EPTP_LIST_FULL: + /* 64-bit Guest-state fields. */ + case VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL: + case VMX_VMCS64_GUEST_DEBUGCTL_FULL: + case VMX_VMCS64_GUEST_PAT_FULL: + case VMX_VMCS64_GUEST_EFER_FULL: + case VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL: + case VMX_VMCS64_GUEST_PDPTE0_FULL: + case VMX_VMCS64_GUEST_PDPTE1_FULL: + case VMX_VMCS64_GUEST_PDPTE2_FULL: + case VMX_VMCS64_GUEST_PDPTE3_FULL: + /* 64-bit Host-state fields. */ + case VMX_VMCS64_HOST_FIELD_PAT_FULL: + case VMX_VMCS64_HOST_FIELD_EFER_FULL: + case VMX_VMCS64_HOST_PERF_GLOBAL_CTRL_FULL: + { + rc = VMXWriteVmcs32(idxField, u64Val); + rc |= VMXWriteVmcs32(idxField + 1, (uint32_t)(u64Val >> 32)); + break; + } + + /* + * These fields do not have high and low parts. Queue up the VMWRITE by using the VMCS write-cache (for 64-bit + * values). When we switch the host to 64-bit mode for running 64-bit guests, these VMWRITEs get executed then. + */ + /* Natural-width Guest-state fields. */ + case VMX_VMCS_GUEST_CR3: + case VMX_VMCS_GUEST_ES_BASE: + case VMX_VMCS_GUEST_CS_BASE: + case VMX_VMCS_GUEST_SS_BASE: + case VMX_VMCS_GUEST_DS_BASE: + case VMX_VMCS_GUEST_FS_BASE: + case VMX_VMCS_GUEST_GS_BASE: + case VMX_VMCS_GUEST_LDTR_BASE: + case VMX_VMCS_GUEST_TR_BASE: + case VMX_VMCS_GUEST_GDTR_BASE: + case VMX_VMCS_GUEST_IDTR_BASE: + case VMX_VMCS_GUEST_RSP: + case VMX_VMCS_GUEST_RIP: + case VMX_VMCS_GUEST_SYSENTER_ESP: + case VMX_VMCS_GUEST_SYSENTER_EIP: + { + if (!(u64Val >> 32)) + { + /* If this field is 64-bit, VT-x will zero out the top bits. */ + rc = VMXWriteVmcs32(idxField, (uint32_t)u64Val); + } + else + { + /* Assert that only the 32->64 switcher case should ever come here. */ + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests); + rc = VMXWriteCachedVmcsEx(pVCpu, idxField, u64Val); + } + break; + } + + default: + { + AssertMsgFailed(("VMXWriteVmcs64Ex: Invalid field %#RX32 (pVCpu=%p u64Val=%#RX64)\n", idxField, pVCpu, u64Val)); + rc = VERR_INVALID_PARAMETER; + break; + } + } + AssertRCReturn(rc, rc); + return rc; +} + + +/** + * Queue up a VMWRITE by using the VMCS write cache. This is only used on 32-bit + * hosts (except darwin) for 64-bit guests. + * + * @param pVCpu Pointer to the VMCPU. + * @param idxField The VMCS field encoding. + * @param u64Val 16, 32 or 64 bits value. + */ +VMMR0DECL(int) VMXWriteCachedVmcsEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val) +{ + AssertPtr(pVCpu); + PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache; + + AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1, + ("entries=%u\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED); + + /* Make sure there are no duplicates. */ + for (uint32_t i = 0; i < pCache->Write.cValidEntries; i++) + { + if (pCache->Write.aField[i] == idxField) + { + pCache->Write.aFieldVal[i] = u64Val; + return VINF_SUCCESS; + } + } + + pCache->Write.aField[pCache->Write.cValidEntries] = idxField; + pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val; + pCache->Write.cValidEntries++; + return VINF_SUCCESS; +} + +/* Enable later when the assembly code uses these as callbacks. */ +#if 0 +/* + * Loads the VMCS write-cache into the CPU (by executing VMWRITEs). + * + * @param pVCpu Pointer to the VMCPU. + * @param pCache Pointer to the VMCS cache. + * + * @remarks No-long-jump zone!!! + */ +VMMR0DECL(void) VMXWriteCachedVmcsLoad(PVMCPU pVCpu, PVMCSCACHE pCache) +{ + AssertPtr(pCache); + for (uint32_t i = 0; i < pCache->Write.cValidEntries; i++) + { + int rc = VMXWriteVmcs64(pCache->Write.aField[i], pCache->Write.aFieldVal[i]); + AssertRC(rc); + } + pCache->Write.cValidEntries = 0; +} + + +/** + * Stores the VMCS read-cache from the CPU (by executing VMREADs). + * + * @param pVCpu Pointer to the VMCPU. + * @param pCache Pointer to the VMCS cache. + * + * @remarks No-long-jump zone!!! + */ +VMMR0DECL(void) VMXReadCachedVmcsStore(PVMCPU pVCpu, PVMCSCACHE pCache) +{ + AssertPtr(pCache); + for (uint32_t i = 0; i < pCache->Read.cValidEntries; i++) + { + int rc = VMXReadVmcs64(pCache->Read.aField[i], &pCache->Read.aFieldVal[i]); + AssertRC(rc); + } +} +#endif +#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */ + + +/** + * Sets up the usage of TSC-offsetting and updates the VMCS. If offsetting is + * not possible, cause VM-exits on RDTSC(P)s. Also sets up the VMX preemption + * timer. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @remarks No-long-jump zone!!! + */ +static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VERR_INTERNAL_ERROR_5; + bool fOffsettedTsc = false; + PVM pVM = pVCpu->CTX_SUFF(pVM); + if (pVM->hm.s.vmx.fUsePreemptTimer) + { + uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hm.s.vmx.u64TSCOffset); + + /* Make sure the returned values have sane upper and lower boundaries. */ + uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage); + cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second */ + cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */ + cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift; + + uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16); + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_PREEMPT_TIMER_VALUE, cPreemptionTickCount); AssertRC(rc); + } + else + fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hm.s.vmx.u64TSCOffset); + + if (fOffsettedTsc) + { + uint64_t u64CurTSC = ASMReadTSC(); + if (u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu)) + { + /* Note: VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */ + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, pVCpu->hm.s.vmx.u64TSCOffset); AssertRC(rc); + + pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset); + } + else + { + /* VM-exit on RDTSC(P) as we would otherwise pass decreasing TSC values to the guest. */ + pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatTscInterceptOverFlow); + } + } + else + { + /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */ + pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept); + } +} + + +/** + * Determines if an exception is a contributory exception. Contributory + * exceptions are ones which can cause double-faults. Page-fault is + * intentionally not included here as it's a conditional contributory exception. + * + * @returns true if the exception is contributory, false otherwise. + * @param uVector The exception vector. + */ +DECLINLINE(bool) hmR0VmxIsContributoryXcpt(const uint32_t uVector) +{ + switch (uVector) + { + case X86_XCPT_GP: + case X86_XCPT_SS: + case X86_XCPT_NP: + case X86_XCPT_TS: + case X86_XCPT_DE: + return true; + default: + break; + } + return false; +} + + +/** + * Sets an event as a pending event to be injected into the guest. + * + * @param pVCpu Pointer to the VMCPU. + * @param u32IntInfo The VM-entry interruption-information field. + * @param cbInstr The VM-entry instruction length in bytes (for software + * interrupts, exceptions and privileged software + * exceptions). + * @param u32ErrCode The VM-entry exception error code. + * @param GCPtrFaultAddress The fault-address (CR2) in case it's a + * page-fault. + * + * @remarks Statistics counter assumes this is a guest event being injected or + * re-injected into the guest, i.e. 'StatInjectPendingReflect' is + * always incremented. + */ +DECLINLINE(void) hmR0VmxSetPendingEvent(PVMCPU pVCpu, uint32_t u32IntInfo, uint32_t cbInstr, uint32_t u32ErrCode, + RTGCUINTPTR GCPtrFaultAddress) +{ + Assert(!pVCpu->hm.s.Event.fPending); + pVCpu->hm.s.Event.fPending = true; + pVCpu->hm.s.Event.u64IntInfo = u32IntInfo; + pVCpu->hm.s.Event.u32ErrCode = u32ErrCode; + pVCpu->hm.s.Event.cbInstr = cbInstr; + pVCpu->hm.s.Event.GCPtrFaultAddress = GCPtrFaultAddress; + + STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingReflect); +} + + +/** + * Sets a double-fault (#DF) exception as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +DECLINLINE(void) hmR0VmxSetPendingXcptDF(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + uint32_t u32IntInfo = X86_XCPT_DF | VMX_EXIT_INTERRUPTION_INFO_VALID; + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID; + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */); +} + + +/** + * Handle a condition that occurred while delivering an event through the guest + * IDT. + * + * @returns VBox status code (informational error codes included). + * @retval VINF_SUCCESS if we should continue handling the VM-exit. + * @retval VINF_HM_DOUBLE_FAULT if a #DF condition was detected and we ought to + * continue execution of the guest which will delivery the #DF. + * @retval VINF_EM_RESET if we detected a triple-fault condition. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxCheckExitDueToEventDelivery(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + int rc = hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient); + AssertRCReturn(rc, rc); + if (VMX_IDT_VECTORING_INFO_VALID(pVmxTransient->uIdtVectoringInfo)) + { + rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + + uint32_t uIntType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo); + uint32_t uExitVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVmxTransient->uExitIntInfo); + uint32_t uIdtVector = VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo); + + typedef enum + { + VMXREFLECTXCPT_XCPT, /* Reflect the exception to the guest or for further evaluation by VMM. */ + VMXREFLECTXCPT_DF, /* Reflect the exception as a double-fault to the guest. */ + VMXREFLECTXCPT_TF, /* Indicate a triple faulted state to the VMM. */ + VMXREFLECTXCPT_NONE /* Nothing to reflect. */ + } VMXREFLECTXCPT; + + /* See Intel spec. 30.7.1.1 "Reflecting Exceptions to Guest Software". */ + VMXREFLECTXCPT enmReflect = VMXREFLECTXCPT_NONE; + if (VMX_EXIT_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uExitIntInfo)) + { + if (uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT) + { + enmReflect = VMXREFLECTXCPT_XCPT; +#ifdef VBOX_STRICT + if ( hmR0VmxIsContributoryXcpt(uIdtVector) + && uExitVector == X86_XCPT_PF) + { + Log4(("IDT: vcpu[%RU32] Contributory #PF uCR2=%#RX64\n", pVCpu->idCpu, pMixedCtx->cr2)); + } +#endif + if ( uExitVector == X86_XCPT_PF + && uIdtVector == X86_XCPT_PF) + { + pVmxTransient->fVectoringPF = true; + Log4(("IDT: vcpu[%RU32] Vectoring #PF uCR2=%#RX64\n", pVCpu->idCpu, pMixedCtx->cr2)); + } + else if ( (pVCpu->hm.s.vmx.u32XcptBitmap & HMVMX_CONTRIBUTORY_XCPT_MASK) + && hmR0VmxIsContributoryXcpt(uExitVector) + && ( hmR0VmxIsContributoryXcpt(uIdtVector) + || uIdtVector == X86_XCPT_PF)) + { + enmReflect = VMXREFLECTXCPT_DF; + } + else if (uIdtVector == X86_XCPT_DF) + enmReflect = VMXREFLECTXCPT_TF; + } + else if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT + || uIntType == VMX_IDT_VECTORING_INFO_TYPE_EXT_INT + || uIntType == VMX_IDT_VECTORING_INFO_TYPE_NMI) + { + /* + * Ignore software interrupts (INT n), software exceptions (#BP, #OF) and privileged software exception + * (whatever they are) as they reoccur when restarting the instruction. + */ + enmReflect = VMXREFLECTXCPT_XCPT; + } + } + else + { + /* + * If event delivery caused an EPT violation/misconfig or APIC access VM-exit, then the VM-exit + * interruption-information will not be valid and we end up here. In such cases, it is sufficient to reflect the + * original exception to the guest after handling the VM-exit. + */ + if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT + || uIntType == VMX_IDT_VECTORING_INFO_TYPE_EXT_INT + || uIntType == VMX_IDT_VECTORING_INFO_TYPE_NMI) + { + enmReflect = VMXREFLECTXCPT_XCPT; + } + } + + switch (enmReflect) + { + case VMXREFLECTXCPT_XCPT: + { + Assert( uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_INT + && uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT + && uIntType != VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT); + + uint32_t u32ErrCode = 0; + if (VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVmxTransient->uIdtVectoringInfo)) + { + rc = hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient); + AssertRCReturn(rc, rc); + u32ErrCode = pVmxTransient->uIdtVectoringErrorCode; + } + + /* If uExitVector is #PF, CR2 value will be updated from the VMCS if it's a guest #PF. See hmR0VmxExitXcptPF(). */ + hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_IDT_INFO(pVmxTransient->uIdtVectoringInfo), + 0 /* cbInstr */, u32ErrCode, pMixedCtx->cr2); + rc = VINF_SUCCESS; + Log4(("IDT: vcpu[%RU32] Pending vectoring event %#RX64 Err=%#RX32\n", pVCpu->idCpu, + pVCpu->hm.s.Event.u64IntInfo, pVCpu->hm.s.Event.u32ErrCode)); + + break; + } + + case VMXREFLECTXCPT_DF: + { + hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx); + rc = VINF_HM_DOUBLE_FAULT; + Log4(("IDT: vcpu[%RU32] Pending vectoring #DF %#RX64 uIdtVector=%#x uExitVector=%#x\n", pVCpu->idCpu, + pVCpu->hm.s.Event.u64IntInfo, uIdtVector, uExitVector)); + + break; + } + + case VMXREFLECTXCPT_TF: + { + rc = VINF_EM_RESET; + Log4(("IDT: vcpu[%RU32] Pending vectoring triple-fault uIdt=%#x uExit=%#x\n", pVCpu->idCpu, uIdtVector, + uExitVector)); + break; + } + + default: + Assert(rc == VINF_SUCCESS); + break; + } + } + Assert(rc == VINF_SUCCESS || rc == VINF_HM_DOUBLE_FAULT || rc == VINF_EM_RESET); + return rc; +} + + +/** + * Saves the guest's CR0 register from the VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestCR0(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + NOREF(pMixedCtx); + + /* + * While in the middle of saving guest-CR0, we could get preempted and re-invoked from the preemption hook, + * see hmR0VmxLeave(). Safer to just make this code non-preemptible. + */ + VMMRZCallRing3Disable(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR0)) + { + uint32_t uVal = 0; + int rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &uVal); + AssertRCReturn(rc, rc); + + uint32_t uShadow = 0; + rc = VMXReadVmcs32(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uShadow); + AssertRCReturn(rc, rc); + + uVal = (uShadow & pVCpu->hm.s.vmx.u32CR0Mask) | (uVal & ~pVCpu->hm.s.vmx.u32CR0Mask); + CPUMSetGuestCR0(pVCpu, uVal); + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR0); + } + + HM_RESTORE_PREEMPT_IF_NEEDED(); + VMMRZCallRing3Enable(pVCpu); + return VINF_SUCCESS; +} + + +/** + * Saves the guest's CR4 register from the VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestCR4(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR4)) + { + uint32_t uVal = 0; + uint32_t uShadow = 0; + rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR4, &uVal); + AssertRCReturn(rc, rc); + rc = VMXReadVmcs32(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uShadow); + AssertRCReturn(rc, rc); + + uVal = (uShadow & pVCpu->hm.s.vmx.u32CR4Mask) | (uVal & ~pVCpu->hm.s.vmx.u32CR4Mask); + CPUMSetGuestCR4(pVCpu, uVal); + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR4); + } + return rc; +} + + +/** + * Saves the guest's RIP register from the VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RIP)) + { + uint64_t u64Val = 0; + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RIP, &u64Val); + AssertRCReturn(rc, rc); + + pMixedCtx->rip = u64Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RIP); + } + return rc; +} + + +/** + * Saves the guest's RSP register from the VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestRsp(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RSP)) + { + uint64_t u64Val = 0; + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RSP, &u64Val); + AssertRCReturn(rc, rc); + + pMixedCtx->rsp = u64Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RSP); + } + return rc; +} + + +/** + * Saves the guest's RFLAGS from the VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS)) + { + uint32_t uVal = 0; + int rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &uVal); + AssertRCReturn(rc, rc); + + pMixedCtx->eflags.u32 = uVal; + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) /* Undo our real-on-v86-mode changes to eflags if necessary. */ + { + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS); + Log4(("Saving real-mode EFLAGS VT-x view=%#RX32\n", pMixedCtx->eflags.u32)); + + pMixedCtx->eflags.Bits.u1VM = 0; + pMixedCtx->eflags.Bits.u2IOPL = pVCpu->hm.s.vmx.RealMode.Eflags.Bits.u2IOPL; + } + + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS); + } + return VINF_SUCCESS; +} + + +/** + * Wrapper for saving the guest's RIP, RSP and RFLAGS from the VMCS into the + * guest-CPU context. + */ +DECLINLINE(int) hmR0VmxSaveGuestRipRspRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRsp(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + return rc; +} + + +/** + * Saves the guest's interruptibility-state ("interrupt shadow" as AMD calls it) + * from the guest-state area in the VMCS. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static void hmR0VmxSaveGuestIntrState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + uint32_t uIntrState = 0; + int rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &uIntrState); + AssertRC(rc); + + if (!uIntrState) + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); + else + { + Assert( uIntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI + || uIntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS); + rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx); + AssertRC(rc); + rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); /* for hmR0VmxGetGuestIntrState(). */ + AssertRC(rc); + + EMSetInhibitInterruptsPC(pVCpu, pMixedCtx->rip); + Assert(VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)); + } +} + + +/** + * Saves the guest's activity state. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestActivityState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + /* Nothing to do for now until we make use of different guest-CPU activity state. Just update the flag. */ + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_ACTIVITY_STATE); + return VINF_SUCCESS; +} + + +/** + * Saves the guest SYSENTER MSRs (SYSENTER_CS, SYSENTER_EIP, SYSENTER_ESP) from + * the current VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestSysenterMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR)) + { + uint32_t u32Val = 0; + rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val); AssertRCReturn(rc, rc); + pMixedCtx->SysEnter.cs = u32Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR); + } + + uint64_t u64Val = 0; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR)) + { + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_SYSENTER_EIP, &u64Val); AssertRCReturn(rc, rc); + pMixedCtx->SysEnter.eip = u64Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR); + } + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR)) + { + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_SYSENTER_ESP, &u64Val); AssertRCReturn(rc, rc); + pMixedCtx->SysEnter.esp = u64Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR); + } + return rc; +} + + +/** + * Saves the guest FS_BASE MSRs from the current VMCS into the guest-CPU + * context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestFSBaseMsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_FS_BASE_MSR)) + { + uint64_t u64Val = 0; + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_FS_BASE, &u64Val); AssertRCReturn(rc, rc); + pMixedCtx->fs.u64Base = u64Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_FS_BASE_MSR); + } + return rc; +} + + +/** + * Saves the guest GS_BASE MSRs from the current VMCS into the guest-CPU + * context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestGSBaseMsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_GS_BASE_MSR)) + { + uint64_t u64Val = 0; + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_GS_BASE, &u64Val); AssertRCReturn(rc, rc); + pMixedCtx->gs.u64Base = u64Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_GS_BASE_MSR); + } + return rc; +} + + +/** + * Saves the auto load/store'd guest MSRs from the current VMCS into the + * guest-CPU context. Currently these are LSTAR, STAR, SFMASK, KERNEL-GS BASE + * and TSC_AUX. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestAutoLoadStoreMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + if (HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS)) + return VINF_SUCCESS; + +#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + for (uint32_t i = 0; i < pVCpu->hm.s.vmx.cGuestMsrs; i++) + { + PVMXAUTOMSR pMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr; + pMsr += i; + switch (pMsr->u32Msr) + { + case MSR_K8_LSTAR: pMixedCtx->msrLSTAR = pMsr->u64Value; break; + case MSR_K6_STAR: pMixedCtx->msrSTAR = pMsr->u64Value; break; + case MSR_K8_SF_MASK: pMixedCtx->msrSFMASK = pMsr->u64Value; break; + case MSR_K8_TSC_AUX: CPUMR0SetGuestTscAux(pVCpu, pMsr->u64Value); break; + case MSR_K8_KERNEL_GS_BASE: pMixedCtx->msrKERNELGSBASE = pMsr->u64Value; break; + case MSR_K6_EFER: /* EFER can't be changed without causing a VM-exit. */ break; + default: + { + AssertFailed(); + return VERR_HM_UNEXPECTED_LD_ST_MSR; + } + } + } +#endif + + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS); + return VINF_SUCCESS; +} + + +/** + * Saves the guest control registers from the current VMCS into the guest-CPU + * context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestControlRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + /* Guest CR0. Guest FPU. */ + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + /* Guest CR4. */ + rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + /* Guest CR2 - updated always during the world-switch or in #PF. */ + /* Guest CR3. Only changes with Nested Paging. This must be done -after- saving CR0 and CR4 from the guest! */ + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR3)) + { + Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR0)); + Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR4)); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + if ( pVM->hm.s.vmx.fUnrestrictedGuest + || ( pVM->hm.s.fNestedPaging + && CPUMIsGuestPagingEnabledEx(pMixedCtx))) + { + uint64_t u64Val = 0; + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_CR3, &u64Val); + if (pMixedCtx->cr3 != u64Val) + { + CPUMSetGuestCR3(pVCpu, u64Val); + if (VMMRZCallRing3IsEnabled(pVCpu)) + { + PGMUpdateCR3(pVCpu, u64Val); + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3)); + } + else + { + /* Set the force flag to inform PGM about it when necessary. It is cleared by PGMUpdateCR3().*/ + VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3); + } + } + + /* If the guest is in PAE mode, sync back the PDPE's into the guest state. */ + if (CPUMIsGuestInPAEModeEx(pMixedCtx)) /* Reads CR0, CR4 and EFER MSR (EFER is always up-to-date). */ + { + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &pVCpu->hm.s.aPdpes[0].u); AssertRCReturn(rc, rc); + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &pVCpu->hm.s.aPdpes[1].u); AssertRCReturn(rc, rc); + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &pVCpu->hm.s.aPdpes[2].u); AssertRCReturn(rc, rc); + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &pVCpu->hm.s.aPdpes[3].u); AssertRCReturn(rc, rc); + + if (VMMRZCallRing3IsEnabled(pVCpu)) + { + PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]); + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)); + } + else + { + /* Set the force flag to inform PGM about it when necessary. It is cleared by PGMGstUpdatePaePdpes(). */ + VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES); + } + } + } + + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR3); + } + + /* + * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> hmR0VmxCallRing3Callback() + * -> VMMRZCallRing3Disable() -> hmR0VmxSaveGuestState() -> Set VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp + * -> continue with VM-exit handling -> hmR0VmxSaveGuestControlRegs() and here we are. + * + * The reason for such complicated handling is because VM-exits that call into PGM expect CR3 to be up-to-date and thus + * if any CR3-saves -before- the VM-exit (longjmp) postponed the CR3 update via the force-flag, any VM-exit handler that + * calls into PGM when it re-saves CR3 will end up here and we call PGMUpdateCR3(). This is why the code below should + * -NOT- check if HMVMX_UPDATED_GUEST_CR3 is already set or not! + * + * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here. + */ + if (VMMRZCallRing3IsEnabled(pVCpu)) + { + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3)) + PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu)); + + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)) + PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]); + + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3)); + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)); + } + + return rc; +} + + +/** + * Reads a guest segment register from the current VMCS into the guest-CPU + * context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param idxSel Index of the selector in the VMCS. + * @param idxLimit Index of the segment limit in the VMCS. + * @param idxBase Index of the segment base in the VMCS. + * @param idxAccess Index of the access rights of the segment in the VMCS. + * @param pSelReg Pointer to the segment selector. + * + * @remarks No-long-jump zone!!! + * @remarks Never call this function directly!!! Use the VMXLOCAL_READ_SEG() + * macro as that takes care of whether to read from the VMCS cache or + * not. + */ +DECLINLINE(int) hmR0VmxReadSegmentReg(PVMCPU pVCpu, uint32_t idxSel, uint32_t idxLimit, uint32_t idxBase, uint32_t idxAccess, + PCPUMSELREG pSelReg) +{ + uint32_t u32Val = 0; + int rc = VMXReadVmcs32(idxSel, &u32Val); + AssertRCReturn(rc, rc); + pSelReg->Sel = (uint16_t)u32Val; + pSelReg->ValidSel = (uint16_t)u32Val; + pSelReg->fFlags = CPUMSELREG_FLAGS_VALID; + + rc = VMXReadVmcs32(idxLimit, &u32Val); + AssertRCReturn(rc, rc); + pSelReg->u32Limit = u32Val; + + uint64_t u64Val = 0; + rc = VMXReadVmcsGstNByIdxVal(idxBase, &u64Val); + AssertRCReturn(rc, rc); + pSelReg->u64Base = u64Val; + + rc = VMXReadVmcs32(idxAccess, &u32Val); + AssertRCReturn(rc, rc); + pSelReg->Attr.u = u32Val; + + /* + * If VT-x marks the segment as unusable, most other bits remain undefined: + * - For CS the L, D and G bits have meaning. + * - For SS the DPL has meaning (it -is- the CPL for Intel and VBox). + * - For the remaining data segments no bits are defined. + * + * The present bit and the unusable bit has been observed to be set at the + * same time (the selector was supposed to invalid as we started executing + * a V8086 interrupt in ring-0). + * + * What should be important for the rest of the VBox code that the P bit is + * cleared. Some of the other VBox code recognizes the unusable bit, but + * AMD-V certainly don't, and REM doesn't really either. So, to be on the + * safe side here, we'll strip off P and other bits we don't care about. If + * any code breaks because Attr.u != 0 when Sel < 4, it should be fixed. + * + * See Intel spec. 27.3.2 "Saving Segment Registers and Descriptor-Table Registers". + */ + if (pSelReg->Attr.u & X86DESCATTR_UNUSABLE) + { + Assert(idxSel != VMX_VMCS16_GUEST_FIELD_TR); /* TR is the only selector that can never be unusable. */ + + /* Masking off: X86DESCATTR_P, X86DESCATTR_LIMIT_HIGH, and X86DESCATTR_AVL. The latter two are really irrelevant. */ + pSelReg->Attr.u &= X86DESCATTR_UNUSABLE | X86DESCATTR_L | X86DESCATTR_D | X86DESCATTR_G + | X86DESCATTR_DPL | X86DESCATTR_TYPE | X86DESCATTR_DT; + + Log4(("hmR0VmxReadSegmentReg: Unusable idxSel=%#x attr=%#x -> %#x\n", idxSel, u32Val, pSelReg->Attr.u)); +#ifdef DEBUG_bird + AssertMsg((u32Val & ~X86DESCATTR_P) == pSelReg->Attr.u, + ("%#x: %#x != %#x (sel=%#x base=%#llx limit=%#x)\n", + idxSel, u32Val, pSelReg->Attr.u, pSelReg->Sel, pSelReg->u64Base, pSelReg->u32Limit)); +#endif + } + return VINF_SUCCESS; +} + + +#ifdef VMX_USE_CACHED_VMCS_ACCESSES +# define VMXLOCAL_READ_SEG(Sel, CtxSel) \ + hmR0VmxReadSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_##Sel, VMX_VMCS32_GUEST_##Sel##_LIMIT, \ + VMX_VMCS_GUEST_##Sel##_BASE_CACHE_IDX, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, &pMixedCtx->CtxSel) +#else +# define VMXLOCAL_READ_SEG(Sel, CtxSel) \ + hmR0VmxReadSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_##Sel, VMX_VMCS32_GUEST_##Sel##_LIMIT, \ + VMX_VMCS_GUEST_##Sel##_BASE, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, &pMixedCtx->CtxSel) +#endif + + +/** + * Saves the guest segment registers from the current VMCS into the guest-CPU + * context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestSegmentRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + /* Guest segment registers. */ + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SEGMENT_REGS)) + { + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); AssertRCReturn(rc, rc); + rc = VMXLOCAL_READ_SEG(CS, cs); AssertRCReturn(rc, rc); + rc = VMXLOCAL_READ_SEG(SS, ss); AssertRCReturn(rc, rc); + rc = VMXLOCAL_READ_SEG(DS, ds); AssertRCReturn(rc, rc); + rc = VMXLOCAL_READ_SEG(ES, es); AssertRCReturn(rc, rc); + rc = VMXLOCAL_READ_SEG(FS, fs); AssertRCReturn(rc, rc); + rc = VMXLOCAL_READ_SEG(GS, gs); AssertRCReturn(rc, rc); + + /* Restore segment attributes for real-on-v86 mode hack. */ + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + pMixedCtx->cs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrCS.u; + pMixedCtx->ss.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrSS.u; + pMixedCtx->ds.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrDS.u; + pMixedCtx->es.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrES.u; + pMixedCtx->fs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrFS.u; + pMixedCtx->gs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrGS.u; + } + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SEGMENT_REGS); + } + + return VINF_SUCCESS; +} + + +/** + * Saves the guest descriptor table registers and task register from the current + * VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestTableRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + + /* Guest LDTR. */ + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_LDTR)) + { + rc = VMXLOCAL_READ_SEG(LDTR, ldtr); + AssertRCReturn(rc, rc); + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_LDTR); + } + + /* Guest GDTR. */ + uint64_t u64Val = 0; + uint32_t u32Val = 0; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_GDTR)) + { + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_GDTR_BASE, &u64Val); AssertRCReturn(rc, rc); + rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); AssertRCReturn(rc, rc); + pMixedCtx->gdtr.pGdt = u64Val; + pMixedCtx->gdtr.cbGdt = u32Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_GDTR); + } + + /* Guest IDTR. */ + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_IDTR)) + { + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_IDTR_BASE, &u64Val); AssertRCReturn(rc, rc); + rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); AssertRCReturn(rc, rc); + pMixedCtx->idtr.pIdt = u64Val; + pMixedCtx->idtr.cbIdt = u32Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_IDTR); + } + + /* Guest TR. */ + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_TR)) + { + rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + /* For real-mode emulation using virtual-8086 mode we have the fake TSS (pRealModeTSS) in TR, don't save the fake one. */ + if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + rc = VMXLOCAL_READ_SEG(TR, tr); + AssertRCReturn(rc, rc); + } + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_TR); + } + return rc; +} + +#undef VMXLOCAL_READ_SEG + + +/** + * Saves the guest debug-register DR7 from the current VMCS into the guest-CPU + * context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestDR7(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_DEBUG)) + { + if (!pVCpu->hm.s.fUsingHyperDR7) + { + /* Upper 32-bits are always zero. See Intel spec. 2.7.3 "Loading and Storing Debug Registers". */ + uint32_t u32Val; + int rc = VMXReadVmcs32(VMX_VMCS_GUEST_DR7, &u32Val); AssertRCReturn(rc, rc); + pMixedCtx->dr[7] = u32Val; + } + + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_DEBUG); + } + return VINF_SUCCESS; +} + + +/** + * Saves the guest APIC state from the current VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestApicState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + /* Updating TPR is already done in hmR0VmxPostRunGuest(). Just update the flag. */ + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_APIC_STATE); + return VINF_SUCCESS; +} + + +/** + * Saves the entire guest state from the currently active VMCS into the + * guest-CPU context. This essentially VMREADs all guest-data. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +static int hmR0VmxSaveGuestState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + Assert(pVCpu); + Assert(pMixedCtx); + + if (HMVMXCPU_GST_VALUE(pVCpu) == HMVMX_UPDATED_GUEST_ALL) + return VINF_SUCCESS; + + /* Though we can longjmp to ring-3 due to log-flushes here and get recalled + again on the ring-3 callback path, there is no real need to. */ + if (VMMRZCallRing3IsEnabled(pVCpu)) + VMMR0LogFlushDisable(pVCpu); + else + Assert(VMMR0IsLogFlushDisabled(pVCpu)); + Log4Func(("vcpu[%RU32]\n", pVCpu->idCpu)); + + int rc = hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestRipRspRflags failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestControlRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestSegmentRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestTableRegs(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestTableRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestDR7 failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestSysenterMsrs(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestSysenterMsrs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestFSBaseMsr(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestFSBaseMsr failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestGSBaseMsr(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestGSBaseMsr failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestAutoLoadStoreMsrs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestActivityState(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestActivityState failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestApicState(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestApicState failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + AssertMsg(HMVMXCPU_GST_VALUE(pVCpu) == HMVMX_UPDATED_GUEST_ALL, + ("Missed guest state bits while saving state; residue %RX32\n", HMVMXCPU_GST_VALUE(pVCpu))); + + if (VMMRZCallRing3IsEnabled(pVCpu)) + VMMR0LogFlushEnable(pVCpu); + + return rc; +} + + +/** + * Check per-VM and per-VCPU force flag actions that require us to go back to + * ring-3 for one reason or another. + * + * @returns VBox status code (information status code included). + * @retval VINF_SUCCESS if we don't have any actions that require going back to + * ring-3. + * @retval VINF_PGM_SYNC_CR3 if we have pending PGM CR3 sync. + * @retval VINF_EM_PENDING_REQUEST if we have pending requests (like hardware + * interrupts) + * @retval VINF_PGM_POOL_FLUSH_PENDING if PGM is doing a pool flush and requires + * all EMTs to be in ring-3. + * @retval VINF_EM_RAW_TO_R3 if there is pending DMA requests. + * @retval VINF_EM_NO_MEMORY PGM is out of memory, we need to return + * to the EM loop. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +static int hmR0VmxCheckForceFlags(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + Assert(VMMRZCallRing3IsEnabled(pVCpu)); + + if ( VM_FF_IS_PENDING(pVM, !pVCpu->hm.s.fSingleInstruction + ? VM_FF_HP_R0_PRE_HM_MASK : VM_FF_HP_R0_PRE_HM_STEP_MASK) + || VMCPU_FF_IS_PENDING(pVCpu, !pVCpu->hm.s.fSingleInstruction + ? VMCPU_FF_HP_R0_PRE_HM_MASK : VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) ) + { + /* We need the control registers now, make sure the guest-CPU context is updated. */ + int rc3 = hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + AssertRCReturn(rc3, rc3); + + /* Pending HM CR3 sync. */ + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3)) + { + int rc2 = PGMUpdateCR3(pVCpu, pMixedCtx->cr3); + AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3, + ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS); + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3)); + } + + /* Pending HM PAE PDPEs. */ + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)) + { + PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]); + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)); + } + + /* Pending PGM C3 sync. */ + if (VMCPU_FF_IS_PENDING(pVCpu,VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)) + { + int rc2 = PGMSyncCR3(pVCpu, pMixedCtx->cr0, pMixedCtx->cr3, pMixedCtx->cr4, + VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3)); + if (rc2 != VINF_SUCCESS) + { + AssertRC(rc2); + Log4(("hmR0VmxCheckForceFlags: PGMSyncCR3 forcing us back to ring-3. rc2=%d\n", rc2)); + return rc2; + } + } + + /* Pending HM-to-R3 operations (critsects, timers, EMT rendezvous etc.) */ + if ( VM_FF_IS_PENDING(pVM, VM_FF_HM_TO_R3_MASK) + || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK)) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF); + int rc2 = RT_UNLIKELY(VM_FF_IS_PENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3; + Log4(("hmR0VmxCheckForceFlags: HM_TO_R3 forcing us back to ring-3. rc=%d\n", rc2)); + return rc2; + } + + /* Pending VM request packets, such as hardware interrupts. */ + if ( VM_FF_IS_PENDING(pVM, VM_FF_REQUEST) + || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_REQUEST)) + { + Log4(("hmR0VmxCheckForceFlags: Pending VM request forcing us back to ring-3\n")); + return VINF_EM_PENDING_REQUEST; + } + + /* Pending PGM pool flushes. */ + if (VM_FF_IS_PENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING)) + { + Log4(("hmR0VmxCheckForceFlags: PGM pool flush pending forcing us back to ring-3\n")); + return VINF_PGM_POOL_FLUSH_PENDING; + } + + /* Pending DMA requests. */ + if (VM_FF_IS_PENDING(pVM, VM_FF_PDM_DMA)) + { + Log4(("hmR0VmxCheckForceFlags: Pending DMA request forcing us back to ring-3\n")); + return VINF_EM_RAW_TO_R3; + } + } + + return VINF_SUCCESS; +} + + +/** + * Converts any TRPM trap into a pending HM event. This is typically used when + * entering from ring-3 (not longjmp returns). + * + * @param pVCpu Pointer to the VMCPU. + */ +static void hmR0VmxTrpmTrapToPendingEvent(PVMCPU pVCpu) +{ + Assert(TRPMHasTrap(pVCpu)); + Assert(!pVCpu->hm.s.Event.fPending); + + uint8_t uVector; + TRPMEVENT enmTrpmEvent; + RTGCUINT uErrCode; + RTGCUINTPTR GCPtrFaultAddress; + uint8_t cbInstr; + + int rc = TRPMQueryTrapAll(pVCpu, &uVector, &enmTrpmEvent, &uErrCode, &GCPtrFaultAddress, &cbInstr); + AssertRC(rc); + + /* Refer Intel spec. 24.8.3 "VM-entry Controls for Event Injection" for the format of u32IntInfo. */ + uint32_t u32IntInfo = uVector | VMX_EXIT_INTERRUPTION_INFO_VALID; + if (enmTrpmEvent == TRPM_TRAP) + { + switch (uVector) + { + case X86_XCPT_BP: + case X86_XCPT_OF: + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + break; + + case X86_XCPT_PF: + case X86_XCPT_DF: + case X86_XCPT_TS: + case X86_XCPT_NP: + case X86_XCPT_SS: + case X86_XCPT_GP: + case X86_XCPT_AC: + u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID; + /* no break! */ + default: + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + break; + } + } + else if (enmTrpmEvent == TRPM_HARDWARE_INT) + { + if (uVector == X86_XCPT_NMI) + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + else + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + } + else if (enmTrpmEvent == TRPM_SOFTWARE_INT) + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + else + AssertMsgFailed(("Invalid TRPM event type %d\n", enmTrpmEvent)); + + rc = TRPMResetTrap(pVCpu); + AssertRC(rc); + Log4(("TRPM->HM event: u32IntInfo=%#RX32 enmTrpmEvent=%d cbInstr=%u uErrCode=%#RX32 GCPtrFaultAddress=%#RGv\n", + u32IntInfo, enmTrpmEvent, cbInstr, uErrCode, GCPtrFaultAddress)); + + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, uErrCode, GCPtrFaultAddress); + STAM_COUNTER_DEC(&pVCpu->hm.s.StatInjectPendingReflect); +} + + +/** + * Converts any pending HM event into a TRPM trap. Typically used when leaving + * VT-x to execute any instruction. + * + * @param pvCpu Pointer to the VMCPU. + */ +static void hmR0VmxPendingEventToTrpmTrap(PVMCPU pVCpu) +{ + Assert(pVCpu->hm.s.Event.fPending); + + uint32_t uVectorType = VMX_IDT_VECTORING_INFO_TYPE(pVCpu->hm.s.Event.u64IntInfo); + uint32_t uVector = VMX_IDT_VECTORING_INFO_VECTOR(pVCpu->hm.s.Event.u64IntInfo); + bool fErrorCodeValid = !!VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.u64IntInfo); + uint32_t uErrorCode = pVCpu->hm.s.Event.u32ErrCode; + + /* If a trap was already pending, we did something wrong! */ + Assert(TRPMQueryTrap(pVCpu, NULL /* pu8TrapNo */, NULL /* pEnmType */) == VERR_TRPM_NO_ACTIVE_TRAP); + + TRPMEVENT enmTrapType; + switch (uVectorType) + { + case VMX_IDT_VECTORING_INFO_TYPE_EXT_INT: + case VMX_IDT_VECTORING_INFO_TYPE_NMI: + enmTrapType = TRPM_HARDWARE_INT; + break; + + case VMX_IDT_VECTORING_INFO_TYPE_SW_INT: + enmTrapType = TRPM_SOFTWARE_INT; + break; + + case VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT: + case VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT: /* #BP and #OF */ + case VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT: + enmTrapType = TRPM_TRAP; + break; + + default: + AssertMsgFailed(("Invalid trap type %#x\n", uVectorType)); + enmTrapType = TRPM_32BIT_HACK; + break; + } + + Log4(("HM event->TRPM: uVector=%#x enmTrapType=%d\n", uVector, enmTrapType)); + + int rc = TRPMAssertTrap(pVCpu, uVector, enmTrapType); + AssertRC(rc); + + if (fErrorCodeValid) + TRPMSetErrorCode(pVCpu, uErrorCode); + + if ( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT + && uVector == X86_XCPT_PF) + { + TRPMSetFaultAddress(pVCpu, pVCpu->hm.s.Event.GCPtrFaultAddress); + } + else if ( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT + || uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT + || uVectorType == VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT) + { + AssertMsg( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT + || (uVector == X86_XCPT_BP || uVector == X86_XCPT_OF), + ("Invalid vector: uVector=%#x uVectorType=%#x\n", uVector, uVectorType)); + TRPMSetInstrLength(pVCpu, pVCpu->hm.s.Event.cbInstr); + } + pVCpu->hm.s.Event.fPending = false; +} + + +/** + * Does the necessary state syncing before returning to ring-3 for any reason + * (longjmp, preemption, voluntary exits to ring-3) from VT-x. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may + * be out-of-sync. Make sure to update the required + * fields before using them. + * @param fSaveGuestState Whether to save the guest state or not. + * + * @remarks If you modify code here, make sure to check whether + * hmR0VmxCallRing3Callback() needs to be updated too. + * @remarks No-long-jmp zone!!! + */ +static int hmR0VmxLeave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, bool fSaveGuestState) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + + RTCPUID idCpu = RTMpCpuId(); + Log4Func(("HostCpuId=%u\n", idCpu)); + + /* Save the guest state if necessary. */ + if ( fSaveGuestState + && HMVMXCPU_GST_VALUE(pVCpu) != HMVMX_UPDATED_GUEST_ALL) + { + int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + Assert(HMVMXCPU_GST_VALUE(pVCpu) == HMVMX_UPDATED_GUEST_ALL); + } + + /* Restore host FPU state if necessary and resync on next R0 reentry .*/ + if (CPUMIsGuestFPUStateActive(pVCpu)) + { + /* We shouldn't reload CR0 without saving it first. */ + if (!fSaveGuestState) + { + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + } + CPUMR0SaveGuestFPU(pVM, pVCpu, pMixedCtx); + Assert(!CPUMIsGuestFPUStateActive(pVCpu)); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + } + + /* Restore host debug registers if necessary and resync on next R0 reentry. */ +#ifdef VBOX_STRICT + if (CPUMIsHyperDebugStateActive(pVCpu)) + Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT); +#endif + if (CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */)) + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG); + Assert(!CPUMIsGuestDebugStateActive(pVCpu) && !CPUMIsGuestDebugStateActivePending(pVCpu)); + Assert(!CPUMIsHyperDebugStateActive(pVCpu) && !CPUMIsHyperDebugStateActivePending(pVCpu)); + +#if HC_ARCH_BITS == 64 + /* Restore host-state bits that VT-x only restores partially. */ + if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED) + && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED)) + { + Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hm.s.vmx.fRestoreHostFlags, idCpu)); + VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost); + pVCpu->hm.s.vmx.fRestoreHostFlags = 0; + } +#endif + + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatLoadGuestState); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit1); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit2); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi); + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3); + + VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC); + + /** @todo This kinda defeats the purpose of having preemption hooks. + * The problem is, deregistering the hooks should be moved to a place that + * lasts until the EMT is about to be destroyed not everytime while leaving HM + * context. + */ + if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_ACTIVE) + { + int rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + AssertRCReturn(rc, rc); + + pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR; + Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu)); + } + Assert(!(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_LAUNCHED)); + NOREF(idCpu); + + return VINF_SUCCESS; +} + + +/** + * Leaves the VT-x session. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jmp zone!!! + */ +DECLINLINE(int) hmR0VmxLeaveSession(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + HM_DISABLE_PREEMPT_IF_NEEDED(); + HMVMX_ASSERT_CPU_SAFE(); + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before + and done this from the VMXR0ThreadCtxCallback(). */ + if (!pVCpu->hm.s.fLeaveDone) + { + int rc2 = hmR0VmxLeave(pVM, pVCpu, pMixedCtx, true /* fSaveGuestState */); + AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT_IF_NEEDED(), rc2); + pVCpu->hm.s.fLeaveDone = true; + } + + /* Deregister hook now that we've left HM context before re-enabling preemption. */ + /** @todo This is bad. Deregistering here means we need to VMCLEAR always + * (longjmp/exit-to-r3) in VT-x which is not efficient. */ + if (VMMR0ThreadCtxHooksAreRegistered(pVCpu)) + VMMR0ThreadCtxHooksDeregister(pVCpu); + + /* Leave HM context. This takes care of local init (term). */ + int rc = HMR0LeaveCpu(pVCpu); + + HM_RESTORE_PREEMPT_IF_NEEDED(); + + return rc; +} + + +/** + * Does the necessary state syncing before doing a longjmp to ring-3. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jmp zone!!! + */ +DECLINLINE(int) hmR0VmxLongJmpToRing3(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + return hmR0VmxLeaveSession(pVM, pVCpu, pMixedCtx); +} + + +/** + * Take necessary actions before going back to ring-3. + * + * An action requires us to go back to ring-3. This function does the necessary + * steps before we can safely return to ring-3. This is not the same as longjmps + * to ring-3, this is voluntary and prepares the guest so it may continue + * executing outside HM (recompiler/IEM). + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @param rcExit The reason for exiting to ring-3. Can be + * VINF_VMM_UNKNOWN_RING3_CALL. + */ +static int hmR0VmxExitToRing3(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, int rcExit) +{ + Assert(pVM); + Assert(pVCpu); + Assert(pMixedCtx); + HMVMX_ASSERT_PREEMPT_SAFE(); + + if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR)) + { + VMXGetActivatedVmcs(&pVCpu->hm.s.vmx.LastError.u64VMCSPhys); + pVCpu->hm.s.vmx.LastError.u32VMCSRevision = *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs; + pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu; + /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */ + } + + /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */ + VMMRZCallRing3Disable(pVCpu); + Log4(("hmR0VmxExitToRing3: pVCpu=%p idCpu=%RU32 rcExit=%d\n", pVCpu, pVCpu->idCpu, rcExit)); + + /* We need to do this only while truly exiting the "inner loop" back to ring-3 and -not- for any longjmp to ring3. */ + if (pVCpu->hm.s.Event.fPending) + { + hmR0VmxPendingEventToTrpmTrap(pVCpu); + Assert(!pVCpu->hm.s.Event.fPending); + } + + /* Save guest state and restore host state bits. */ + int rc = hmR0VmxLeaveSession(pVM, pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3); + + /* Sync recompiler state. */ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3); + CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR + | CPUM_CHANGED_LDTR + | CPUM_CHANGED_GDTR + | CPUM_CHANGED_IDTR + | CPUM_CHANGED_TR + | CPUM_CHANGED_HIDDEN_SEL_REGS); + Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR0)); + if ( pVM->hm.s.fNestedPaging + && CPUMIsGuestPagingEnabledEx(pMixedCtx)) + { + CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH); + } + + /* + * Clear the X86_EFL_TF if necessary. + */ + if (pVCpu->hm.s.fClearTrapFlag) + { + Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS)); + pMixedCtx->eflags.Bits.u1TF = 0; + pVCpu->hm.s.fClearTrapFlag = false; + } + /** @todo there seems to be issues with the resume flag when the monitor trap + * flag is pending without being used. Seen early in bios init when + * accessing APIC page in prot mode. */ + + /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */ + if (rcExit != VINF_EM_RAW_INTERRUPT) + HMCPU_CF_SET(pVCpu, HM_CHANGED_ALL_GUEST); + + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3); + + /* We do -not- want any longjmp notifications after this! We must return to ring-3 ASAP. */ + VMMRZCallRing3RemoveNotification(pVCpu); + VMMRZCallRing3Enable(pVCpu); + + return rc; +} + + +/** + * VMMRZCallRing3() callback wrapper which saves the guest state before we + * longjump to ring-3 and possibly get preempted. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param enmOperation The operation causing the ring-3 longjump. + * @param pvUser Opaque pointer to the guest-CPU context. The data + * may be out-of-sync. Make sure to update the required + * fields before using them. + * @remarks If you modify code here, make sure to check whether + * hmR0VmxLeave() needs to be updated too. + */ +DECLCALLBACK(int) hmR0VmxCallRing3Callback(PVMCPU pVCpu, VMMCALLRING3 enmOperation, void *pvUser) +{ + if (enmOperation == VMMCALLRING3_VM_R0_ASSERTION) + { + VMMRZCallRing3RemoveNotification(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + /* If anything here asserts or fails, good luck. */ + if (CPUMIsGuestFPUStateActive(pVCpu)) + CPUMR0SaveGuestFPU(pVCpu->CTX_SUFF(pVM), pVCpu, (PCPUMCTX)pvUser); + + CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */); + +#if HC_ARCH_BITS == 64 + /* Restore host-state bits that VT-x only restores partially. */ + if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED) + && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED)) + { + VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost); + pVCpu->hm.s.vmx.fRestoreHostFlags = 0; + } +#endif + VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC); + if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_ACTIVE) + { + VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR; + } + + if (VMMR0ThreadCtxHooksAreRegistered(pVCpu)) + VMMR0ThreadCtxHooksDeregister(pVCpu); + + HMR0LeaveCpu(pVCpu); + HM_RESTORE_PREEMPT_IF_NEEDED(); + return VINF_SUCCESS; + } + + Assert(pVCpu); + Assert(pvUser); + Assert(VMMRZCallRing3IsEnabled(pVCpu)); + HMVMX_ASSERT_PREEMPT_SAFE(); + + VMMRZCallRing3Disable(pVCpu); + Assert(VMMR0IsLogFlushDisabled(pVCpu)); + + Log4(("hmR0VmxCallRing3Callback->hmR0VmxLongJmpToRing3 pVCpu=%p idCpu=%RU32\n enmOperation=%d", pVCpu, pVCpu->idCpu, + enmOperation)); + + int rc = hmR0VmxLongJmpToRing3(pVCpu->CTX_SUFF(pVM), pVCpu, (PCPUMCTX)pvUser); + AssertRCReturn(rc, rc); + + VMMRZCallRing3Enable(pVCpu); + return VINF_SUCCESS; +} + + +/** + * Sets the interrupt-window exiting control in the VMCS which instructs VT-x to + * cause a VM-exit as soon as the guest is in a state to receive interrupts. + * + * @param pVCpu Pointer to the VMCPU. + */ +DECLINLINE(void) hmR0VmxSetIntWindowExitVmcs(PVMCPU pVCpu) +{ + if (RT_LIKELY(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT)) + { + if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT)) + { + pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT; + int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRC(rc); + } + } /* else we will deliver interrupts whenever the guest exits next and is in a state to receive events. */ +} + + +/** + * Evaluates the event to be delivered to the guest and sets it as the pending + * event. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +static void hmR0VmxEvaluatePendingEvent(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + Assert(!pVCpu->hm.s.Event.fPending); + + /* Get the current interruptibility-state of the guest and then figure out what can be injected. */ + uint32_t uIntrState = hmR0VmxGetGuestIntrState(pVCpu, pMixedCtx); + bool fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS); + bool fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI); + + Assert(!fBlockSti || HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS)); + Assert( !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI) /* We don't support block-by-NMI and SMI yet.*/ + && !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI)); + Assert(!fBlockSti || pMixedCtx->eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */ + Assert(!TRPMHasTrap(pVCpu)); + + /** @todo SMI. SMIs take priority over NMIs. */ + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INTERRUPT_NMI)) /* NMI. NMIs take priority over regular interrupts . */ + { + /* On some CPUs block-by-STI also blocks NMIs. See Intel spec. 26.3.1.5 "Checks On Guest Non-Register State". */ + if ( !fBlockMovSS + && !fBlockSti) + { + /* On some CPUs block-by-STI also blocks NMIs. See Intel spec. 26.3.1.5 "Checks On Guest Non-Register State". */ + Log4(("Pending NMI vcpu[%RU32]\n", pVCpu->idCpu)); + uint32_t u32IntInfo = X86_XCPT_NMI | VMX_EXIT_INTERRUPTION_INFO_VALID; + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddres */); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI); + } + else + hmR0VmxSetIntWindowExitVmcs(pVCpu); + } + else if ( VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)) + && !pVCpu->hm.s.fSingleInstruction) + { + /* + * Check if the guest can receive external interrupts (PIC/APIC). Once we do PDMGetInterrupt() we -must- deliver + * the interrupt ASAP. We must not execute any guest code until we inject the interrupt which is why it is + * evaluated here and not set as pending, solely based on the force-flags. + */ + int rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + AssertRC(rc); + const bool fBlockInt = !(pMixedCtx->eflags.u32 & X86_EFL_IF); + if ( !fBlockInt + && !fBlockSti + && !fBlockMovSS) + { + uint8_t u8Interrupt; + rc = PDMGetInterrupt(pVCpu, &u8Interrupt); + if (RT_SUCCESS(rc)) + { + Log4(("Pending interrupt vcpu[%RU32] u8Interrupt=%#x \n", pVCpu->idCpu, u8Interrupt)); + uint32_t u32IntInfo = u8Interrupt | VMX_EXIT_INTERRUPTION_INFO_VALID; + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrfaultAddress */); + } + else + { + /** @todo Does this actually happen? If not turn it into an assertion. */ + Assert(!VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))); + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq); + } + } + else + hmR0VmxSetIntWindowExitVmcs(pVCpu); + } +} + + +/** + * Sets a pending-debug exception to be delivered to the guest if the guest is + * single-stepping. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +DECLINLINE(void) hmR0VmxSetPendingDebugXcpt(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS)); + if (pMixedCtx->eflags.Bits.u1TF) /* We don't have any IA32_DEBUGCTL MSR for guests. Treat as all bits 0. */ + { + int rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, VMX_VMCS_GUEST_DEBUG_EXCEPTIONS_BS); + AssertRC(rc); + } +} + + +/** + * Injects any pending events into the guest if the guest is in a state to + * receive them. + * + * @returns VBox status code (informational status codes included). + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +static int hmR0VmxInjectPendingEvent(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + HMVMX_ASSERT_PREEMPT_SAFE(); + Assert(VMMRZCallRing3IsEnabled(pVCpu)); + + /* Get the current interruptibility-state of the guest and then figure out what can be injected. */ + uint32_t uIntrState = hmR0VmxGetGuestIntrState(pVCpu, pMixedCtx); + bool fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS); + bool fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI); + + Assert(!fBlockSti || HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS)); + Assert( !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI) /* We don't support block-by-NMI and SMI yet.*/ + && !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI)); + Assert(!fBlockSti || pMixedCtx->eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */ + Assert(!TRPMHasTrap(pVCpu)); + + int rc = VINF_SUCCESS; + if (pVCpu->hm.s.Event.fPending) + { +#if defined(VBOX_STRICT) || defined(VBOX_WITH_STATISTICS) + uint32_t uIntType = VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.u64IntInfo); + if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT) + { + rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + const bool fBlockInt = !(pMixedCtx->eflags.u32 & X86_EFL_IF); + Assert(!fBlockInt); + Assert(!fBlockSti); + Assert(!fBlockMovSS); + } + else if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI) + { + Assert(!fBlockSti); + Assert(!fBlockMovSS); + } +#endif + Log4(("Injecting pending event vcpu[%RU32] u64IntInfo=%#RX64\n", pVCpu->idCpu, pVCpu->hm.s.Event.u64IntInfo)); + rc = hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, pVCpu->hm.s.Event.u64IntInfo, pVCpu->hm.s.Event.cbInstr, + pVCpu->hm.s.Event.u32ErrCode, pVCpu->hm.s.Event.GCPtrFaultAddress, &uIntrState); + AssertRCReturn(rc, rc); + + /* Update the interruptibility-state as it could have been changed by + hmR0VmxInjectEventVmcs() (e.g. real-on-v86 guest injecting software interrupts) */ + fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS); + fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI); + +#ifdef VBOX_WITH_STATISTICS + if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT) + STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterrupt); + else + STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectXcpt); +#endif + } + + /* Deliver pending debug exception if the guest is single-stepping. Evaluate and set the BS bit. */ + if ( fBlockSti + || fBlockMovSS) + { + if ( !pVCpu->hm.s.fSingleInstruction + && !DBGFIsStepping(pVCpu)) + { + /* + * The pending-debug exceptions field is cleared on all VM-exits except VMX_EXIT_TPR_BELOW_THRESHOLD, + * VMX_EXIT_MTF, VMX_EXIT_APIC_WRITE and VMX_EXIT_VIRTUALIZED_EOI. + * See Intel spec. 27.3.4 "Saving Non-Register State". + */ + int rc2 = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + AssertRCReturn(rc2, rc2); + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + } + else if (pMixedCtx->eflags.Bits.u1TF) + { + /* + * We are single-stepping in the hypervisor debugger using EFLAGS.TF. Clear interrupt inhibition as setting the + * BS bit would mean delivering a #DB to the guest upon VM-entry when it shouldn't be. + */ + Assert(!(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG)); + uIntrState = 0; + } + } + + /* + * There's no need to clear the VM entry-interruption information field here if we're not injecting anything. + * VT-x clears the valid bit on every VM-exit. See Intel spec. 24.8.3 "VM-Entry Controls for Event Injection". + */ + int rc2 = hmR0VmxLoadGuestIntrState(pVCpu, uIntrState); + AssertRC(rc2); + + Assert(rc == VINF_SUCCESS || rc == VINF_EM_RESET); + return rc; +} + + +/** + * Sets an invalid-opcode (#UD) exception as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +DECLINLINE(void) hmR0VmxSetPendingXcptUD(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + uint32_t u32IntInfo = X86_XCPT_UD | VMX_EXIT_INTERRUPTION_INFO_VALID; + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */); +} + + +/** + * Injects a double-fault (#DF) exception into the VM. + * + * @returns VBox status code (informational status code included). + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +DECLINLINE(int) hmR0VmxInjectXcptDF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint32_t *puIntrState) +{ + uint32_t u32IntInfo = X86_XCPT_DF | VMX_EXIT_INTERRUPTION_INFO_VALID; + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID; + return hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */, + puIntrState); +} + + +/** + * Sets a debug (#DB) exception as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +DECLINLINE(void) hmR0VmxSetPendingXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + uint32_t u32IntInfo = X86_XCPT_DB | VMX_EXIT_INTERRUPTION_INFO_VALID; + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */); +} + + +/** + * Sets an overflow (#OF) exception as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @param cbInstr The value of RIP that is to be pushed on the guest + * stack. + */ +DECLINLINE(void) hmR0VmxSetPendingXcptOF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint32_t cbInstr) +{ + uint32_t u32IntInfo = X86_XCPT_OF | VMX_EXIT_INTERRUPTION_INFO_VALID; + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */); +} + + +/** + * Injects a general-protection (#GP) fault into the VM. + * + * @returns VBox status code (informational status code included). + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @param u32ErrorCode The error code associated with the #GP. + */ +DECLINLINE(int) hmR0VmxInjectXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, bool fErrorCodeValid, uint32_t u32ErrorCode, + uint32_t *puIntrState) +{ + uint32_t u32IntInfo = X86_XCPT_GP | VMX_EXIT_INTERRUPTION_INFO_VALID; + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + if (fErrorCodeValid) + u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID; + return hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, u32IntInfo, 0 /* cbInstr */, u32ErrorCode, 0 /* GCPtrFaultAddress */, + puIntrState); +} + + +/** + * Sets a software interrupt (INTn) as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @param uVector The software interrupt vector number. + * @param cbInstr The value of RIP that is to be pushed on the guest + * stack. + */ +DECLINLINE(void) hmR0VmxSetPendingIntN(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint16_t uVector, uint32_t cbInstr) +{ + uint32_t u32IntInfo = uVector | VMX_EXIT_INTERRUPTION_INFO_VALID; + if ( uVector == X86_XCPT_BP + || uVector == X86_XCPT_OF) + { + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + } + else + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */); +} + + +/** + * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's + * stack. + * + * @returns VBox status code (information status code included). + * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault. + * @param pVM Pointer to the VM. + * @param pMixedCtx Pointer to the guest-CPU context. + * @param uValue The value to push to the guest stack. + */ +DECLINLINE(int) hmR0VmxRealModeGuestStackPush(PVM pVM, PCPUMCTX pMixedCtx, uint16_t uValue) +{ + /* + * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in + * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers". + * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound". + */ + if (pMixedCtx->sp == 1) + return VINF_EM_RESET; + pMixedCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */ + int rc = PGMPhysSimpleWriteGCPhys(pVM, pMixedCtx->ss.u64Base + pMixedCtx->sp, &uValue, sizeof(uint16_t)); + AssertRCReturn(rc, rc); + return rc; +} + + +/** + * Injects an event into the guest upon VM-entry by updating the relevant fields + * in the VM-entry area in the VMCS. + * + * @returns VBox status code (informational error codes included). + * @retval VINF_SUCCESS if the event is successfully injected into the VMCS. + * @retval VINF_EM_RESET if event injection resulted in a triple-fault. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may + * be out-of-sync. Make sure to update the required + * fields before using them. + * @param u64IntInfo The VM-entry interruption-information field. + * @param cbInstr The VM-entry instruction length in bytes (for + * software interrupts, exceptions and privileged + * software exceptions). + * @param u32ErrCode The VM-entry exception error code. + * @param GCPtrFaultAddress The page-fault address for #PF exceptions. + * @param puIntrState Pointer to the current guest interruptibility-state. + * This interruptibility-state will be updated if + * necessary. This cannot not be NULL. + * + * @remarks Requires CR0! + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxInjectEventVmcs(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint64_t u64IntInfo, uint32_t cbInstr, + uint32_t u32ErrCode, RTGCUINTREG GCPtrFaultAddress, uint32_t *puIntrState) +{ + /* Intel spec. 24.8.3 "VM-Entry Controls for Event Injection" specifies the interruption-information field to be 32-bits. */ + AssertMsg(u64IntInfo >> 32 == 0, ("%#RX64\n", u64IntInfo)); + Assert(puIntrState); + uint32_t u32IntInfo = (uint32_t)u64IntInfo; + + const uint32_t uVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(u32IntInfo); + const uint32_t uIntType = VMX_EXIT_INTERRUPTION_INFO_TYPE(u32IntInfo); + +#ifdef VBOX_STRICT + /* Validate the error-code-valid bit for hardware exceptions. */ + if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT) + { + switch (uVector) + { + case X86_XCPT_PF: + case X86_XCPT_DF: + case X86_XCPT_TS: + case X86_XCPT_NP: + case X86_XCPT_SS: + case X86_XCPT_GP: + case X86_XCPT_AC: + AssertMsg(VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(u32IntInfo), + ("Error-code-valid bit not set for exception that has an error code uVector=%#x\n", uVector)); + /* fallthru */ + default: + break; + } + } +#endif + + /* Cannot inject an NMI when block-by-MOV SS is in effect. */ + Assert( uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI + || !(*puIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS)); + + STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[uVector & MASK_INJECT_IRQ_STAT]); + + /* We require CR0 to check if the guest is in real-mode. */ + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + /* + * Hardware interrupts & exceptions cannot be delivered through the software interrupt redirection bitmap to the real + * mode task in virtual-8086 mode. We must jump to the interrupt handler in the (real-mode) guest. + * See Intel spec. 20.3 "Interrupt and Exception handling in Virtual-8086 Mode" for interrupt & exception classes. + * See Intel spec. 20.1.4 "Interrupt and Exception Handling" for real-mode interrupt handling. + */ + if (CPUMIsGuestInRealModeEx(pMixedCtx)) + { + PVM pVM = pVCpu->CTX_SUFF(pVM); + if (!pVM->hm.s.vmx.fUnrestrictedGuest) + { + Assert(PDMVmmDevHeapIsEnabled(pVM)); + Assert(pVM->hm.s.vmx.pRealModeTSS); + + /* We require RIP, RSP, RFLAGS, CS, IDTR. Save the required ones from the VMCS. */ + rc = hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestTableRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RIP)); + + /* Check if the interrupt handler is present in the IVT (real-mode IDT). IDT limit is (4N - 1). */ + const size_t cbIdtEntry = sizeof(X86IDTR16); + if (uVector * cbIdtEntry + (cbIdtEntry - 1) > pMixedCtx->idtr.cbIdt) + { + /* If we are trying to inject a #DF with no valid IDT entry, return a triple-fault. */ + if (uVector == X86_XCPT_DF) + return VINF_EM_RESET; + else if (uVector == X86_XCPT_GP) + { + /* If we're injecting a #GP with no valid IDT entry, inject a double-fault. */ + return hmR0VmxInjectXcptDF(pVCpu, pMixedCtx, puIntrState); + } + + /* If we're injecting an interrupt/exception with no valid IDT entry, inject a general-protection fault. */ + /* No error codes for exceptions in real-mode. See Intel spec. 20.1.4 "Interrupt and Exception Handling" */ + return hmR0VmxInjectXcptGP(pVCpu, pMixedCtx, false /* fErrCodeValid */, 0 /* u32ErrCode */, puIntrState); + } + + /* Software exceptions (#BP and #OF exceptions thrown as a result of INT3 or INTO) */ + uint16_t uGuestIp = pMixedCtx->ip; + if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT) + { + Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF); + /* #BP and #OF are both benign traps, we need to resume the next instruction. */ + uGuestIp = pMixedCtx->ip + (uint16_t)cbInstr; + } + else if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT) + uGuestIp = pMixedCtx->ip + (uint16_t)cbInstr; + + /* Get the code segment selector and offset from the IDT entry for the interrupt handler. */ + X86IDTR16 IdtEntry; + RTGCPHYS GCPhysIdtEntry = (RTGCPHYS)pMixedCtx->idtr.pIdt + uVector * cbIdtEntry; + rc = PGMPhysSimpleReadGCPhys(pVM, &IdtEntry, GCPhysIdtEntry, cbIdtEntry); + AssertRCReturn(rc, rc); + + /* Construct the stack frame for the interrupt/exception handler. */ + rc = hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, pMixedCtx->eflags.u32); + rc |= hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, pMixedCtx->cs.Sel); + rc |= hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, uGuestIp); + AssertRCReturn(rc, rc); + + /* Clear the required eflag bits and jump to the interrupt/exception handler. */ + if (rc == VINF_SUCCESS) + { + pMixedCtx->eflags.u32 &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC); + pMixedCtx->rip = IdtEntry.offSel; + pMixedCtx->cs.Sel = IdtEntry.uSel; + pMixedCtx->cs.u64Base = IdtEntry.uSel << cbIdtEntry; + if ( uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT + && uVector == X86_XCPT_PF) + { + pMixedCtx->cr2 = GCPtrFaultAddress; + } + + /* If any other guest-state bits are changed here, make sure to update + hmR0VmxPreRunGuestCommitted() when thread-context hooks are used. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS + | HM_CHANGED_GUEST_RIP + | HM_CHANGED_GUEST_RFLAGS + | HM_CHANGED_GUEST_RSP); + + /* We're clearing interrupts, which means no block-by-STI interrupt-inhibition. */ + if (*puIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI) + { + Assert( uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI + && uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT); + Log4(("Clearing inhibition due to STI.\n")); + *puIntrState &= ~VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI; + } + Log4(("Injecting real-mode: u32IntInfo=%#x u32ErrCode=%#x instrlen=%#x\n", u32IntInfo, u32ErrCode, cbInstr)); + + /* The event has been truly dispatched. Mark it as no longer pending so we don't attempt to 'undo' + it, if we are returning to ring-3 before executing guest code. */ + pVCpu->hm.s.Event.fPending = false; + } + Assert(rc == VINF_SUCCESS || rc == VINF_EM_RESET); + return rc; + } + else + { + /* + * For unrestricted execution enabled CPUs running real-mode guests, we must not set the deliver-error-code bit. + * See Intel spec. 26.2.1.3 "VM-Entry Control Fields". + */ + u32IntInfo &= ~VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID; + } + } + + /* Validate. */ + Assert(VMX_EXIT_INTERRUPTION_INFO_IS_VALID(u32IntInfo)); /* Bit 31 (Valid bit) must be set by caller. */ + Assert(!VMX_EXIT_INTERRUPTION_INFO_NMI_UNBLOCK(u32IntInfo)); /* Bit 12 MBZ. */ + Assert(!(u32IntInfo & 0x7ffff000)); /* Bits 30:12 MBZ. */ + + /* Inject. */ + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, u32IntInfo); + if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(u32IntInfo)) + rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, u32ErrCode); + rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, cbInstr); + + if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(u32IntInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT + && uVector == X86_XCPT_PF) + { + pMixedCtx->cr2 = GCPtrFaultAddress; + } + + Log4(("Injecting vcpu[%RU32] u32IntInfo=%#x u32ErrCode=%#x cbInstr=%#x pMixedCtx->uCR2=%#RX64\n", pVCpu->idCpu, + u32IntInfo, u32ErrCode, cbInstr, pMixedCtx->cr2)); + + AssertRCReturn(rc, rc); + return rc; +} + + +/** + * Clears the interrupt-window exiting control in the VMCS and if necessary + * clears the current event in the VMCS as well. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * + * @remarks Use this function only to clear events that have not yet been + * delivered to the guest but are injected in the VMCS! + * @remarks No-long-jump zone!!! + */ +static void hmR0VmxClearEventVmcs(PVMCPU pVCpu) +{ + int rc; + Log4Func(("vcpu[%d]\n", pVCpu->idCpu)); + + /* Clear interrupt-window exiting control. */ + if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT) + { + pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRC(rc); + } + + if (!pVCpu->hm.s.Event.fPending) + return; + +#ifdef VBOX_STRICT + uint32_t u32EntryInfo; + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32EntryInfo); + AssertRC(rc); + Assert(VMX_ENTRY_INTERRUPTION_INFO_IS_VALID(u32EntryInfo)); +#endif + + /* Clear the entry-interruption field (including the valid bit). */ + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); + AssertRC(rc); + + /* Clear the pending debug exception field. */ + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, 0); + AssertRC(rc); +} + + +/** + * Enters the VT-x session. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCpu Pointer to the CPU info struct. + */ +VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + Assert(pVM->hm.s.vmx.fSupported); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + NOREF(pCpu); + + LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu)); + Assert(HMCPU_CF_IS_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE)); + +#ifdef VBOX_STRICT + /* Make sure we're in VMX root mode. */ + RTCCUINTREG u32HostCR4 = ASMGetCR4(); + if (!(u32HostCR4 & X86_CR4_VMXE)) + { + LogRel(("VMXR0Enter: X86_CR4_VMXE bit in CR4 is not set!\n")); + return VERR_VMX_X86_CR4_VMXE_CLEARED; + } +#endif + + /* + * Load the VCPU's VMCS as the current (and active) one. + */ + Assert(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_CLEAR); + int rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + if (RT_FAILURE(rc)) + return rc; + + pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_ACTIVE; + pVCpu->hm.s.fLeaveDone = false; + Log4Func(("Activated Vmcs. HostCpuId=%u\n", RTMpCpuId())); + + return VINF_SUCCESS; +} + + +/** + * The thread-context callback (only on platforms which support it). + * + * @param enmEvent The thread-context event. + * @param pVCpu Pointer to the VMCPU. + * @param fGlobalInit Whether global VT-x/AMD-V init. was used. + * @thread EMT(pVCpu) + */ +VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit) +{ + switch (enmEvent) + { + case RTTHREADCTXEVENT_PREEMPTING: + { + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(VMMR0ThreadCtxHooksAreRegistered(pVCpu)); + VMCPU_ASSERT_EMT(pVCpu); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + PCPUMCTX pMixedCtx = CPUMQueryGuestCtxPtr(pVCpu); + + /* No longjmps (logger flushes, locks) in this fragile context. */ + VMMRZCallRing3Disable(pVCpu); + Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId())); + + /* + * Restore host-state (FPU, debug etc.) + */ + if (!pVCpu->hm.s.fLeaveDone) + { + /* Do -not- save guest-state here as we might already be in the middle of saving it (esp. bad if we are + holding the PGM lock while saving the guest state (see hmR0VmxSaveGuestControlRegs()). */ + hmR0VmxLeave(pVM, pVCpu, pMixedCtx, false /* fSaveGuestState */); + pVCpu->hm.s.fLeaveDone = true; + } + + /* Leave HM context, takes care of local init (term). */ + int rc = HMR0LeaveCpu(pVCpu); + AssertRC(rc); NOREF(rc); + + /* Restore longjmp state. */ + VMMRZCallRing3Enable(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatPreemptPreempting); + break; + } + + case RTTHREADCTXEVENT_RESUMED: + { + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(VMMR0ThreadCtxHooksAreRegistered(pVCpu)); + VMCPU_ASSERT_EMT(pVCpu); + + /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */ + VMMRZCallRing3Disable(pVCpu); + Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId())); + + /* Initialize the bare minimum state required for HM. This takes care of + initializing VT-x if necessary (onlined CPUs, local init etc.) */ + int rc = HMR0EnterCpu(pVCpu); + AssertRC(rc); + Assert(HMCPU_CF_IS_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE)); + + /* Load the active VMCS as the current one. */ + if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_CLEAR) + { + rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + AssertRC(rc); NOREF(rc); + pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_ACTIVE; + Log4Func(("Resumed: Activated Vmcs. HostCpuId=%u\n", RTMpCpuId())); + } + pVCpu->hm.s.fLeaveDone = false; + + /* Restore longjmp state. */ + VMMRZCallRing3Enable(pVCpu); + break; + } + + default: + break; + } +} + + +/** + * Saves the host state in the VMCS host-state. + * Sets up the VM-exit MSR-load area. + * + * The CPU state will be loaded from these fields on every successful VM-exit. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveHostState(PVM pVM, PVMCPU pVCpu) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + if (!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_CONTEXT)) + return VINF_SUCCESS; + + int rc = hmR0VmxSaveHostControlRegs(pVM, pVCpu); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostControlRegisters failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + rc = hmR0VmxSaveHostSegmentRegs(pVM, pVCpu); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostSegmentRegisters failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + rc = hmR0VmxSaveHostMsrs(pVM, pVCpu); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostMsrs failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_HOST_CONTEXT); + return rc; +} + + +/** + * Saves the host state in the VMCS host-state. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * + * @remarks No-long-jump zone!!! + */ +VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + + LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu)); + + /* Save the host state here while entering HM context. When thread-context hooks are used, we might get preempted + and have to resave the host state but most of the time we won't be, so do it here before we disable interrupts. */ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + return hmR0VmxSaveHostState(pVM, pVCpu); +} + + +/** + * Loads the guest state into the VMCS guest-state area. The CPU state will be + * loaded from these fields on every successful VM-entry. + * + * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. + * Sets up the VM-entry controls. + * Sets up the appropriate VMX non-root function to execute guest code based on + * the guest CPU mode. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + AssertPtr(pMixedCtx); + HMVMX_ASSERT_PREEMPT_SAFE(); + +#ifdef LOG_ENABLED + /** @todo r=ramshankar: I'm not able to use VMMRZCallRing3Disable() here, + * probably not initialized yet? Anyway this will do for now. + * + * Update: Should be possible once VMXR0LoadGuestState() is removed as an + * interface and disable ring-3 calls when thread-context hooks are not + * available. */ + bool fCallerDisabledLogFlush = VMMR0IsLogFlushDisabled(pVCpu); + VMMR0LogFlushDisable(pVCpu); +#endif + + LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu)); + + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestState, x); + + /* Determine real-on-v86 mode. */ + pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = false; + if ( !pVM->hm.s.vmx.fUnrestrictedGuest + && CPUMIsGuestInRealModeEx(pMixedCtx)) + { + pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = true; + } + + /* + * Load the guest-state into the VMCS. + * Any ordering dependency among the sub-functions below must be explicitly stated using comments. + * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it. + */ + int rc = hmR0VmxSetupVMRunHandler(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSetupVMRunHandler! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + /* This needs to be done after hmR0VmxSetupVMRunHandler() as changing pfnStartVM may require VM-entry control updates. */ + rc = hmR0VmxLoadGuestEntryCtls(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestEntryCtls! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + /* This needs to be done after hmR0VmxSetupVMRunHandler() as changing pfnStartVM may require VM-exit control updates. */ + rc = hmR0VmxLoadGuestExitCtls(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSetupExitCtls failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + rc = hmR0VmxLoadGuestActivityState(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestActivityState! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + rc = hmR0VmxLoadGuestCR3AndCR4(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestCR3AndCR4: rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + /* Assumes pMixedCtx->cr0 is up-to-date (strict builds require CR0 for segment register validation checks). */ + rc = hmR0VmxLoadGuestSegmentRegs(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestSegmentRegs: rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + rc = hmR0VmxLoadGuestMsrs(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestMsrs! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + rc = hmR0VmxLoadGuestApicState(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestApicState! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + /* + * Loading Rflags here is fine, even though Rflags.TF might depend on guest debug state (which is not loaded here). + * It is re-evaluated and updated if necessary in hmR0VmxLoadSharedState(). + */ + rc = hmR0VmxLoadGuestRipRspRflags(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestRipRspRflags! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + /* Clear any unused and reserved bits. */ + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR2); + +#ifdef LOG_ENABLED + /* Only reenable log-flushing if the caller has it enabled. */ + if (!fCallerDisabledLogFlush) + VMMR0LogFlushEnable(pVCpu); +#endif + + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestState, x); + return rc; +} + + +/** + * Loads the state shared between the host and guest into the VMCS. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + */ +static void hmR0VmxLoadSharedState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0)) + { + int rc = hmR0VmxLoadSharedCR0(pVCpu, pCtx); + AssertRC(rc); + } + + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_DEBUG)) + { + int rc = hmR0VmxLoadSharedDebugState(pVCpu, pCtx); + AssertRC(rc); + + /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RFLAGS)) + { + rc = hmR0VmxLoadGuestRflags(pVCpu, pCtx); + AssertRC(rc); + } + } + + AssertMsg(!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_GUEST_SHARED_STATE), + ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu))); +} + + +/** + * Worker for loading the guest-state bits in the inner VT-x execution loop. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +DECLINLINE(void) hmR0VmxLoadGuestStateOptimal(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + HMVMX_ASSERT_PREEMPT_SAFE(); + + Log5(("LoadFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu))); +#ifdef HMVMX_SYNC_FULL_GUEST_STATE + HMCPU_CF_SET(pVCpu, HM_CHANGED_ALL_GUEST); +#endif + + if (HMCPU_CF_IS_SET_ONLY(pVCpu, HM_CHANGED_GUEST_RIP)) + { + int rc = hmR0VmxLoadGuestRip(pVCpu, pMixedCtx); + AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadMinimal); + } + else if (HMCPU_CF_VALUE(pVCpu)) + { + int rc = hmR0VmxLoadGuestState(pVM, pVCpu, pMixedCtx); + AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadFull); + } + + /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */ + AssertMsg( !HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_ALL_GUEST) + || HMCPU_CF_IS_PENDING_ONLY(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE), + ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu))); + +#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE + uint32_t uInvalidReason = hmR0VmxCheckGuestState(pVM, pVCpu, pMixedCtx); + if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND) + Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason)); +#endif +} + + +/** + * Does the preparations before executing guest code in VT-x. + * + * This may cause longjmps to ring-3 and may even result in rescheduling to the + * recompiler. We must be cautious what we do here regarding committing + * guest-state information into the VMCS assuming we assuredly execute the + * guest in VT-x mode. If we fall back to the recompiler after updating the VMCS + * and clearing the common-state (TRPM/forceflags), we must undo those changes + * so that the recompiler can (and should) use them when it resumes guest + * execution. Otherwise such operations must be done when we can no longer + * exit to ring-3. + * + * @returns Strict VBox status code. + * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts + * have been disabled. + * @retval VINF_EM_RESET if a triple-fault occurs while injecting a + * double-fault into the guest. + * @retval VINF_* scheduling changes, we have to go back to ring-3. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks Called with preemption disabled. In the VINF_SUCCESS return case + * interrupts will be disabled. + */ +static int hmR0VmxPreRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + Assert(VMMRZCallRing3IsEnabled(pVCpu)); + +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 + PGMRZDynMapFlushAutoSet(pVCpu); +#endif + + /* Check force flag actions that might require us to go back to ring-3. */ + int rc = hmR0VmxCheckForceFlags(pVM, pVCpu, pMixedCtx); + if (rc != VINF_SUCCESS) + return rc; + +#ifndef IEM_VERIFICATION_MODE_FULL + /* Setup the Virtualized APIC accesses. pMixedCtx->msrApicBase is always up-to-date. It's not part of the VMCS. */ + if ( pVCpu->hm.s.vmx.u64MsrApicBase != pMixedCtx->msrApicBase + && (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)) + { + Assert(pVM->hm.s.vmx.HCPhysApicAccess); + RTGCPHYS GCPhysApicBase; + GCPhysApicBase = pMixedCtx->msrApicBase; + GCPhysApicBase &= PAGE_BASE_GC_MASK; + + /* Unalias any existing mapping. */ + rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase); + AssertRCReturn(rc, rc); + + /* Map the HC APIC-access page into the GC space, this also updates the shadow page tables if necessary. */ + Log4(("Mapped HC APIC-access page into GC: GCPhysApicBase=%#RGv\n", GCPhysApicBase)); + rc = IOMMMIOMapMMIOHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P); + AssertRCReturn(rc, rc); + + pVCpu->hm.s.vmx.u64MsrApicBase = pMixedCtx->msrApicBase; + } +#endif /* !IEM_VERIFICATION_MODE_FULL */ + + /* Load the guest state bits, we can handle longjmps/getting preempted here. */ + hmR0VmxLoadGuestStateOptimal(pVM, pVCpu, pMixedCtx); + + /* + * Evaluate events as pending-for-injection into the guest. Toggling of force-flags here is safe as long as + * we update TRPM on premature exits to ring-3 before executing guest code. We must NOT restore the force-flags. + */ + if (TRPMHasTrap(pVCpu)) + hmR0VmxTrpmTrapToPendingEvent(pVCpu); + else if (!pVCpu->hm.s.Event.fPending) + hmR0VmxEvaluatePendingEvent(pVCpu, pMixedCtx); + + /* + * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus needs to be done with + * longjmps or interrupts + preemption enabled. Event injection might also result in triple-faulting the VM. + */ + rc = hmR0VmxInjectPendingEvent(pVCpu, pMixedCtx); + if (RT_UNLIKELY(rc != VINF_SUCCESS)) + { + Assert(rc == VINF_EM_RESET); + return rc; + } + + /* + * No longjmps to ring-3 from this point on!!! + * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic. + * This also disables flushing of the R0-logger instance (if any). + */ + VMMRZCallRing3Disable(pVCpu); + + /* + * We disable interrupts so that we don't miss any interrupts that would flag preemption (IPI/timers etc.) + * when thread-context hooks aren't used and we've been running with preemption disabled for a while. + * + * We need to check for force-flags that could've possible been altered since we last checked them (e.g. + * by PDMGetInterrupt() leaving the PDM critical section, see @bugref{6398}). + * + * We also check a couple of other force-flags as a last opportunity to get the EMT back to ring-3 before + * executing guest code. + */ + pVmxTransient->uEflags = ASMIntDisableFlags(); + if ( VM_FF_IS_PENDING(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC) + || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK)) + { + hmR0VmxClearEventVmcs(pVCpu); + ASMSetFlags(pVmxTransient->uEflags); + VMMRZCallRing3Enable(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF); + return VINF_EM_RAW_TO_R3; + } + if (RTThreadPreemptIsPending(NIL_RTTHREAD)) + { + hmR0VmxClearEventVmcs(pVCpu); + ASMSetFlags(pVmxTransient->uEflags); + VMMRZCallRing3Enable(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatPendingHostIrq); + return VINF_EM_RAW_INTERRUPT; + } + + /* We've injected any pending events. This is really the point of no return (to ring-3). */ + pVCpu->hm.s.Event.fPending = false; + + return VINF_SUCCESS; +} + + +/** + * Prepares to run guest code in VT-x and we've committed to doing so. This + * means there is no backing out to ring-3 or anywhere else at this + * point. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks Called with preemption disabled. + * @remarks No-long-jump zone!!! + */ +static void hmR0VmxPreRunGuestCommitted(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + Assert(VMMR0IsLogFlushDisabled(pVCpu)); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM); + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); /* Indicate the start of guest execution. */ + + /* + * If we are injecting events to a real-on-v86 mode guest, we may have to update + * RIP and some other registers, i.e. hmR0VmxInjectPendingEvent()->hmR0VmxInjectEventVmcs(). + * Reload only the necessary state, the assertion will catch if other parts of the code + * change. + */ + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + hmR0VmxLoadGuestRipRspRflags(pVCpu, pMixedCtx); + hmR0VmxLoadGuestSegmentRegs(pVCpu, pMixedCtx); + } + +#ifdef HMVMX_ALWAYS_SWAP_FPU_STATE + if (!CPUMIsGuestFPUStateActive(pVCpu)) + CPUMR0LoadGuestFPU(pVM, pVCpu, pMixedCtx); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); +#endif + + /* + * Load the host state bits as we may've been preempted (only happens when + * thread-context hooks are used or when hmR0VmxSetupVMRunHandler() changes pfnStartVM). + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_CONTEXT)) + { + /* This ASSUMES that pfnStartVM has been set up already. */ + int rc = hmR0VmxSaveHostState(pVM, pVCpu); + AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatPreemptSaveHostState); + } + Assert(!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_CONTEXT)); + + /* + * Load the state shared between host and guest (FPU, debug). + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_GUEST_SHARED_STATE)) + hmR0VmxLoadSharedState(pVM, pVCpu, pMixedCtx); + AssertMsg(!HMCPU_CF_VALUE(pVCpu), ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu))); + + /* Store status of the shared guest-host state at the time of VM-entry. */ +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (CPUMIsGuestInLongModeEx(pMixedCtx)) + { + pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActivePending(pVCpu); + pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActivePending(pVCpu); + } + else +#endif + { + pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu); + pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu); + } + pVmxTransient->fWasGuestFPUStateActive = CPUMIsGuestFPUStateActive(pVCpu); + + /* + * Cache the TPR-shadow for checking on every VM-exit if it might have changed. + */ + if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW) + pVmxTransient->u8GuestTpr = pVCpu->hm.s.vmx.pbVirtApic[0x80]; + + PHMGLOBALCPUINFO pCpu = HMR0GetCurrentCpu(); + RTCPUID idCurrentCpu = pCpu->idCpu; + if ( pVmxTransient->fUpdateTscOffsettingAndPreemptTimer + || idCurrentCpu != pVCpu->hm.s.idLastCpu) + { + hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pMixedCtx); + pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = false; + } + + ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB-shootdowns, set this across the world switch. */ + hmR0VmxFlushTaggedTlb(pVCpu, pCpu); /* Invalidate the appropriate guest entries from the TLB. */ + Assert(idCurrentCpu == pVCpu->hm.s.idLastCpu); + pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Update the error reporting info. with the current host CPU. */ + + STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x); + + TMNotifyStartOfExecution(pVCpu); /* Finally, notify TM to resume its clocks as we're about + to start executing. */ + +#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + /* + * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that + * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}. + */ + if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) + { + pVCpu->hm.s.u64HostTscAux = ASMRdMsr(MSR_K8_TSC_AUX); + uint64_t u64HostTscAux = CPUMR0GetGuestTscAux(pVCpu); + ASMWrMsr(MSR_K8_TSC_AUX, u64HostTscAux); + } +#endif +} + + +/** + * Performs some essential restoration of state after running guest code in + * VT-x. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * @param pVmxTransient Pointer to the VMX transient structure. + * @param rcVMRun Return code of VMLAUNCH/VMRESUME. + * + * @remarks Called with interrupts disabled, and returns with interrups enabled! + * + * @remarks No-long-jump zone!!! This function will however re-enable longjmps + * unconditionally when it is safe to do so. + */ +static void hmR0VmxPostRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, int rcVMRun) +{ + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + + ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB-shootdowns. */ + ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for TLB-shootdowns. */ + HMVMXCPU_GST_RESET_TO(pVCpu, 0); /* Exits/longjmps to ring-3 requires saving the guest state. */ + pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */ + pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */ + + if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT)) + { +#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + /* Restore host's TSC_AUX. */ + if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) + ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hm.s.u64HostTscAux); +#endif + /** @todo Find a way to fix hardcoding a guestimate. */ + TMCpuTickSetLastSeen(pVCpu, ASMReadTSC() + + pVCpu->hm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */); + } + + STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatExit1, x); + TMNotifyEndOfExecution(pVCpu); /* Notify TM that the guest is no longer running. */ + Assert(!(ASMGetFlags() & X86_EFL_IF)); + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM); + +#ifdef HMVMX_ALWAYS_SWAP_FPU_STATE + if (CPUMIsGuestFPUStateActive(pVCpu)) + { + hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + CPUMR0SaveGuestFPU(pVM, pVCpu, pMixedCtx); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + } +#endif + + pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Host state messed up by VT-x, we must restore. */ + pVCpu->hm.s.vmx.uVmcsState |= HMVMX_VMCS_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */ + ASMSetFlags(pVmxTransient->uEflags); /* Enable interrupts. */ + VMMRZCallRing3Enable(pVCpu); /* It is now safe to do longjmps to ring-3!!! */ + + /* Save the basic VM-exit reason. Refer Intel spec. 24.9.1 "Basic VM-exit Information". */ + uint32_t uExitReason; + int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason); + rc |= hmR0VmxReadEntryIntInfoVmcs(pVmxTransient); + AssertRC(rc); + pVmxTransient->uExitReason = (uint16_t)VMX_EXIT_REASON_BASIC(uExitReason); + pVmxTransient->fVMEntryFailed = VMX_ENTRY_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uEntryIntInfo); + + /* If the VMLAUNCH/VMRESUME failed, we can bail out early. This does -not- cover VMX_EXIT_ERR_*. */ + if (RT_UNLIKELY(rcVMRun != VINF_SUCCESS)) + { + Log4(("VM-entry failure: pVCpu=%p idCpu=%RU32 rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", pVCpu, pVCpu->idCpu, rcVMRun, + pVmxTransient->fVMEntryFailed)); + return; + } + + if (RT_LIKELY(!pVmxTransient->fVMEntryFailed)) + { + /* Update the guest interruptibility-state from the VMCS. */ + hmR0VmxSaveGuestIntrState(pVCpu, pMixedCtx); +#if defined(HMVMX_SYNC_FULL_GUEST_STATE) || defined(HMVMX_SAVE_FULL_GUEST_STATE) + rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRC(rc); +#endif + /* + * If the TPR was raised by the guest, it wouldn't cause a VM-exit immediately. Instead we sync the TPR lazily whenever + * we eventually get a VM-exit for any reason. This maybe expensive as PDMApicSetTPR() can longjmp to ring-3 and which is + * why it's done here as it's easier and no less efficient to deal with it here than making hmR0VmxSaveGuestState() + * cope with longjmps safely (see VMCPU_FF_HM_UPDATE_CR3 handling). + */ + if ( (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW) + && pVmxTransient->u8GuestTpr != pVCpu->hm.s.vmx.pbVirtApic[0x80]) + { + rc = PDMApicSetTPR(pVCpu, pVCpu->hm.s.vmx.pbVirtApic[0x80]); + AssertRC(rc); + HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE); + } + } +} + + + +/** + * Runs the guest code using VT-x the normal way. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @note Mostly the same as hmR0VmxRunGuestCodeStep. + * @remarks Called with preemption disabled. + */ +static int hmR0VmxRunGuestCodeNormal(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + VMXTRANSIENT VmxTransient; + VmxTransient.fUpdateTscOffsettingAndPreemptTimer = true; + int rc = VERR_INTERNAL_ERROR_5; + uint32_t cLoops = 0; + + for (;; cLoops++) + { + Assert(!HMR0SuspendPending()); + HMVMX_ASSERT_CPU_SAFE(); + + /* Preparatory work for running guest code, this may force us to return + to ring-3. This bugger disables interrupts on VINF_SUCCESS! */ + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x); + rc = hmR0VmxPreRunGuest(pVM, pVCpu, pCtx, &VmxTransient); + if (rc != VINF_SUCCESS) + break; + + hmR0VmxPreRunGuestCommitted(pVM, pVCpu, pCtx, &VmxTransient); + rc = hmR0VmxRunGuest(pVM, pVCpu, pCtx); + /* The guest-CPU context is now outdated, 'pCtx' is to be treated as 'pMixedCtx' from this point on!!! */ + + /* Restore any residual host-state and save any bits shared between host + and guest into the guest-CPU state. Re-enables interrupts! */ + hmR0VmxPostRunGuest(pVM, pVCpu, pCtx, &VmxTransient, rc); + + /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */ + if (RT_UNLIKELY(rc != VINF_SUCCESS)) + { + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x); + hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx, &VmxTransient); + return rc; + } + + /* Handle the VM-exit. */ + AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll); + STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]); + STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x); + HMVMX_START_EXIT_DISPATCH_PROF(); +#ifdef HMVMX_USE_FUNCTION_TABLE + rc = g_apfnVMExitHandlers[VmxTransient.uExitReason](pVCpu, pCtx, &VmxTransient); +#else + rc = hmR0VmxHandleExit(pVCpu, pCtx, &VmxTransient, VmxTransient.uExitReason); +#endif + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x); + if (rc != VINF_SUCCESS) + break; + else if (cLoops > pVM->hm.s.cMaxResumeLoops) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume); + rc = VINF_EM_RAW_INTERRUPT; + break; + } + } + + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x); + return rc; +} + + +/** + * Single steps guest code using VT-x. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @note Mostly the same as hmR0VmxRunGuestCodeNormal. + * @remarks Called with preemption disabled. + */ +static int hmR0VmxRunGuestCodeStep(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + VMXTRANSIENT VmxTransient; + VmxTransient.fUpdateTscOffsettingAndPreemptTimer = true; + int rc = VERR_INTERNAL_ERROR_5; + uint32_t cLoops = 0; + uint16_t uCsStart = pCtx->cs.Sel; + uint64_t uRipStart = pCtx->rip; + + for (;; cLoops++) + { + Assert(!HMR0SuspendPending()); + HMVMX_ASSERT_CPU_SAFE(); + + /* Preparatory work for running guest code, this may force us to return + to ring-3. This bugger disables interrupts on VINF_SUCCESS! */ + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x); + rc = hmR0VmxPreRunGuest(pVM, pVCpu, pCtx, &VmxTransient); + if (rc != VINF_SUCCESS) + break; + + hmR0VmxPreRunGuestCommitted(pVM, pVCpu, pCtx, &VmxTransient); + rc = hmR0VmxRunGuest(pVM, pVCpu, pCtx); + /* The guest-CPU context is now outdated, 'pCtx' is to be treated as 'pMixedCtx' from this point on!!! */ + + /* Restore any residual host-state and save any bits shared between host + and guest into the guest-CPU state. Re-enables interrupts! */ + hmR0VmxPostRunGuest(pVM, pVCpu, pCtx, &VmxTransient, rc); + + /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */ + if (RT_UNLIKELY(rc != VINF_SUCCESS)) + { + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x); + hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx, &VmxTransient); + return rc; + } + + /* Handle the VM-exit. */ + AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll); + STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]); + STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x); + HMVMX_START_EXIT_DISPATCH_PROF(); +#ifdef HMVMX_USE_FUNCTION_TABLE + rc = g_apfnVMExitHandlers[VmxTransient.uExitReason](pVCpu, pCtx, &VmxTransient); +#else + rc = hmR0VmxHandleExit(pVCpu, pCtx, &VmxTransient, VmxTransient.uExitReason); +#endif + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x); + if (rc != VINF_SUCCESS) + break; + else if (cLoops > pVM->hm.s.cMaxResumeLoops) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume); + rc = VINF_EM_RAW_INTERRUPT; + break; + } + + /* + * Did the RIP change, if so, consider it a single step. + * Otherwise, make sure one of the TFs gets set. + */ + int rc2 = hmR0VmxSaveGuestRip(pVCpu, pCtx); + rc2 |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pCtx); + AssertRCReturn(rc2, rc2); + if ( pCtx->rip != uRipStart + || pCtx->cs.Sel != uCsStart) + { + rc = VINF_EM_DBG_STEPPED; + break; + } + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG); + } + + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x); + return rc; +} + + +/** + * Runs the guest code using VT-x. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks Called with preemption disabled. + */ +VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + Assert(VMMRZCallRing3IsEnabled(pVCpu)); + Assert(HMVMXCPU_GST_VALUE(pVCpu) == HMVMX_UPDATED_GUEST_ALL); + HMVMX_ASSERT_PREEMPT_SAFE(); + + VMMRZCallRing3SetNotification(pVCpu, hmR0VmxCallRing3Callback, pCtx); + + int rc; + if (!pVCpu->hm.s.fSingleInstruction && !DBGFIsStepping(pVCpu)) + rc = hmR0VmxRunGuestCodeNormal(pVM, pVCpu, pCtx); + else + rc = hmR0VmxRunGuestCodeStep(pVM, pVCpu, pCtx); + + if (rc == VERR_EM_INTERPRETER) + rc = VINF_EM_RAW_EMULATE_INSTR; + else if (rc == VINF_EM_RESET) + rc = VINF_EM_TRIPLE_FAULT; + + int rc2 = hmR0VmxExitToRing3(pVM, pVCpu, pCtx, rc); + if (RT_FAILURE(rc2)) + { + pVCpu->hm.s.u32HMError = rc; + rc = rc2; + } + Assert(!VMMRZCallRing3IsNotificationSet(pVCpu)); + return rc; +} + + +#ifndef HMVMX_USE_FUNCTION_TABLE +DECLINLINE(int) hmR0VmxHandleExit(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, uint32_t rcReason) +{ + int rc; + switch (rcReason) + { + case VMX_EXIT_EPT_MISCONFIG: rc = hmR0VmxExitEptMisconfig(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_EPT_VIOLATION: rc = hmR0VmxExitEptViolation(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_IO_INSTR: rc = hmR0VmxExitIoInstr(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_CPUID: rc = hmR0VmxExitCpuid(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_RDTSC: rc = hmR0VmxExitRdtsc(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_RDTSCP: rc = hmR0VmxExitRdtscp(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_APIC_ACCESS: rc = hmR0VmxExitApicAccess(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_XCPT_OR_NMI: rc = hmR0VmxExitXcptOrNmi(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_MOV_CRX: rc = hmR0VmxExitMovCRx(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_EXT_INT: rc = hmR0VmxExitExtInt(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_INT_WINDOW: rc = hmR0VmxExitIntWindow(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_MWAIT: rc = hmR0VmxExitMwait(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_MONITOR: rc = hmR0VmxExitMonitor(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_TASK_SWITCH: rc = hmR0VmxExitTaskSwitch(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_PREEMPT_TIMER: rc = hmR0VmxExitPreemptTimer(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_RDMSR: rc = hmR0VmxExitRdmsr(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_WRMSR: rc = hmR0VmxExitWrmsr(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_MOV_DRX: rc = hmR0VmxExitMovDRx(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_TPR_BELOW_THRESHOLD: rc = hmR0VmxExitTprBelowThreshold(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_HLT: rc = hmR0VmxExitHlt(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_INVD: rc = hmR0VmxExitInvd(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_INVLPG: rc = hmR0VmxExitInvlpg(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_RSM: rc = hmR0VmxExitRsm(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_MTF: rc = hmR0VmxExitMtf(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_PAUSE: rc = hmR0VmxExitPause(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_XDTR_ACCESS: rc = hmR0VmxExitXdtrAccess(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_TR_ACCESS: rc = hmR0VmxExitXdtrAccess(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_WBINVD: rc = hmR0VmxExitWbinvd(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_XSETBV: rc = hmR0VmxExitXsetbv(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_RDRAND: rc = hmR0VmxExitRdrand(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_INVPCID: rc = hmR0VmxExitInvpcid(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_GETSEC: rc = hmR0VmxExitGetsec(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_RDPMC: rc = hmR0VmxExitRdpmc(pVCpu, pMixedCtx, pVmxTransient); break; + + case VMX_EXIT_TRIPLE_FAULT: rc = hmR0VmxExitTripleFault(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_NMI_WINDOW: rc = hmR0VmxExitNmiWindow(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_INIT_SIGNAL: rc = hmR0VmxExitInitSignal(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_SIPI: rc = hmR0VmxExitSipi(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_IO_SMI: rc = hmR0VmxExitIoSmi(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_SMI: rc = hmR0VmxExitSmi(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_ERR_MSR_LOAD: rc = hmR0VmxExitErrMsrLoad(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_ERR_INVALID_GUEST_STATE: rc = hmR0VmxExitErrInvalidGuestState(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_ERR_MACHINE_CHECK: rc = hmR0VmxExitErrMachineCheck(pVCpu, pMixedCtx, pVmxTransient); break; + + case VMX_EXIT_VMCALL: + case VMX_EXIT_VMCLEAR: + case VMX_EXIT_VMLAUNCH: + case VMX_EXIT_VMPTRLD: + case VMX_EXIT_VMPTRST: + case VMX_EXIT_VMREAD: + case VMX_EXIT_VMRESUME: + case VMX_EXIT_VMWRITE: + case VMX_EXIT_VMXOFF: + case VMX_EXIT_VMXON: + case VMX_EXIT_INVEPT: + case VMX_EXIT_INVVPID: + case VMX_EXIT_VMFUNC: + rc = hmR0VmxExitSetPendingXcptUD(pVCpu, pMixedCtx, pVmxTransient); + break; + default: + rc = hmR0VmxExitErrUndefined(pVCpu, pMixedCtx, pVmxTransient); + break; + } + return rc; +} +#endif + +#ifdef DEBUG +/* Is there some generic IPRT define for this that are not in Runtime/internal/\* ?? */ +# define HMVMX_ASSERT_PREEMPT_CPUID_VAR() \ + RTCPUID const idAssertCpu = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId() + +# define HMVMX_ASSERT_PREEMPT_CPUID() \ + do \ + { \ + RTCPUID const idAssertCpuNow = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId(); \ + AssertMsg(idAssertCpu == idAssertCpuNow, ("VMX %#x, %#x\n", idAssertCpu, idAssertCpuNow)); \ + } while (0) + +# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS() \ + do { \ + AssertPtr(pVCpu); \ + AssertPtr(pMixedCtx); \ + AssertPtr(pVmxTransient); \ + Assert(pVmxTransient->fVMEntryFailed == false); \ + Assert(ASMIntAreEnabled()); \ + HMVMX_ASSERT_PREEMPT_SAFE(); \ + HMVMX_ASSERT_PREEMPT_CPUID_VAR(); \ + Log4Func(("vcpu[%RU32] -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v\n", pVCpu->idCpu)); \ + HMVMX_ASSERT_PREEMPT_SAFE(); \ + if (VMMR0IsLogFlushDisabled(pVCpu)) \ + HMVMX_ASSERT_PREEMPT_CPUID(); \ + HMVMX_STOP_EXIT_DISPATCH_PROF(); \ + } while (0) + +# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS() \ + do { \ + Log4Func(("\n")); \ + } while(0) +#else /* Release builds */ +# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS() do { HMVMX_STOP_EXIT_DISPATCH_PROF(); } while(0) +# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS() do { } while(0) +#endif + + +/** + * Advances the guest RIP after reading it from the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxAdvanceGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + int rc = hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxSaveGuestRip(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + pMixedCtx->rip += pVmxTransient->cbInstr; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP); + + /* + * Deliver a debug exception to the guest if it is single-stepping. Don't directly inject a #DB but use the + * pending debug exception field as it takes care of priority of events. + * + * See Intel spec. 32.2.1 "Debug Exceptions". + */ + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + + return rc; +} + + +/** + * Tries to determine what part of the guest-state VT-x has deemed as invalid + * and update error record fields accordingly. + * + * @return VMX_IGS_* return codes. + * @retval VMX_IGS_REASON_NOT_FOUND if this function could not find anything + * wrong with the guest state. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU state. + */ +static uint32_t hmR0VmxCheckGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ +#define HMVMX_ERROR_BREAK(err) { uError = (err); break; } +#define HMVMX_CHECK_BREAK(expr, err) if (!(expr)) { \ + uError = (err); \ + break; \ + } else do {} while (0) +/* Duplicate of IEM_IS_CANONICAL(). */ +#define HMVMX_IS_CANONICAL(a_u64Addr) ((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000) < UINT64_C(0x1000000000000)) + + int rc; + uint32_t uError = VMX_IGS_ERROR; + uint32_t u32Val; + bool fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuest; + + do + { + /* + * CR0. + */ + uint32_t uSetCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1); + uint32_t uZapCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1); + /* Exceptions for unrestricted-guests for fixed CR0 bits (PE, PG). + See Intel spec. 26.3.1 "Checks on guest Guest Control Registers, Debug Registers and MSRs." */ + if (fUnrestrictedGuest) + uSetCR0 &= ~(X86_CR0_PE | X86_CR0_PG); + + uint32_t u32GuestCR0; + rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32GuestCR0); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK((u32GuestCR0 & uSetCR0) == uSetCR0, VMX_IGS_CR0_FIXED1); + HMVMX_CHECK_BREAK(!(u32GuestCR0 & ~uZapCR0), VMX_IGS_CR0_FIXED0); + if ( !fUnrestrictedGuest + && (u32GuestCR0 & X86_CR0_PG) + && !(u32GuestCR0 & X86_CR0_PE)) + { + HMVMX_ERROR_BREAK(VMX_IGS_CR0_PG_PE_COMBO); + } + + /* + * CR4. + */ + uint64_t uSetCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1); + uint64_t uZapCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1); + + uint32_t u32GuestCR4; + rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR4, &u32GuestCR4); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK((u32GuestCR4 & uSetCR4) == uSetCR4, VMX_IGS_CR4_FIXED1); + HMVMX_CHECK_BREAK(!(u32GuestCR4 & ~uZapCR4), VMX_IGS_CR4_FIXED0); + + /* + * IA32_DEBUGCTL MSR. + */ + uint64_t u64Val; + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, &u64Val); + AssertRCBreak(rc); + if ( (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG) + && (u64Val & 0xfffffe3c)) /* Bits 31:9, bits 5:2 MBZ. */ + { + HMVMX_ERROR_BREAK(VMX_IGS_DEBUGCTL_MSR_RESERVED); + } + uint64_t u64DebugCtlMsr = u64Val; + +#ifdef VBOX_STRICT + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val); + AssertRCBreak(rc); + Assert(u32Val == pVCpu->hm.s.vmx.u32EntryCtls); +#endif + bool const fLongModeGuest = RT_BOOL(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST); + + /* + * RIP and RFLAGS. + */ + uint32_t u32Eflags; +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + rc = VMXReadVmcs64(VMX_VMCS_GUEST_RIP, &u64Val); + AssertRCBreak(rc); + /* pCtx->rip can be different than the one in the VMCS (e.g. run guest code and VM-exits that don't update it). */ + if ( !fLongModeGuest + || !pCtx->cs.Attr.n.u1Long) + { + HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffff00000000)), VMX_IGS_LONGMODE_RIP_INVALID); + } + /** @todo If the processor supports N < 64 linear-address bits, bits 63:N + * must be identical if the "IA32e mode guest" VM-entry control is 1 + * and CS.L is 1. No check applies if the CPU supports 64 + * linear-address bits. */ + + /* Flags in pCtx can be different (real-on-v86 for instance). We are only concerned about the VMCS contents here. */ + rc = VMXReadVmcs64(VMX_VMCS_GUEST_RFLAGS, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffc08028)), /* Bit 63:22, Bit 15, 5, 3 MBZ. */ + VMX_IGS_RFLAGS_RESERVED); + HMVMX_CHECK_BREAK((u64Val & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */ + u32Eflags = u64Val; + } + else +#endif + { + rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Eflags); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u32Eflags & 0xffc08028), VMX_IGS_RFLAGS_RESERVED); /* Bit 31:22, Bit 15, 5, 3 MBZ. */ + HMVMX_CHECK_BREAK((u32Eflags & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */ + } + + if ( fLongModeGuest + || ( fUnrestrictedGuest + && !(u32GuestCR0 & X86_CR0_PE))) + { + HMVMX_CHECK_BREAK(!(u32Eflags & X86_EFL_VM), VMX_IGS_RFLAGS_VM_INVALID); + } + + uint32_t u32EntryInfo; + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32EntryInfo); + AssertRCBreak(rc); + if ( VMX_ENTRY_INTERRUPTION_INFO_IS_VALID(u32EntryInfo) + && VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT) + { + HMVMX_CHECK_BREAK(u32Eflags & X86_EFL_IF, VMX_IGS_RFLAGS_IF_INVALID); + } + + /* + * 64-bit checks. + */ +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + if ( fLongModeGuest + && !fUnrestrictedGuest) + { + HMVMX_CHECK_BREAK(u32GuestCR0 & X86_CR0_PG, VMX_IGS_CR0_PG_LONGMODE); + HMVMX_CHECK_BREAK(u32GuestCR4 & X86_CR4_PAE, VMX_IGS_CR4_PAE_LONGMODE); + } + + if ( !fLongModeGuest + && (u32GuestCR4 & X86_CR4_PCIDE)) + { + HMVMX_ERROR_BREAK(VMX_IGS_CR4_PCIDE); + } + + /** @todo CR3 field must be such that bits 63:52 and bits in the range + * 51:32 beyond the processor's physical-address width are 0. */ + + if ( (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG) + && (pCtx->dr[7] & X86_DR7_MBZ_MASK)) + { + HMVMX_ERROR_BREAK(VMX_IGS_DR7_RESERVED); + } + + rc = VMXReadVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_ESP_NOT_CANONICAL); + + rc = VMXReadVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_EIP_NOT_CANONICAL); + } +#endif + + /* + * PERF_GLOBAL MSR. + */ + if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PERF_MSR) + { + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffff8fffffffc)), + VMX_IGS_PERF_GLOBAL_MSR_RESERVED); /* Bits 63:35, bits 31:2 MBZ. */ + } + + /* + * PAT MSR. + */ + if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PAT_MSR) + { + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PAT_FULL, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0x707070707070707)), VMX_IGS_PAT_MSR_RESERVED); + for (unsigned i = 0; i < 8; i++) + { + uint8_t u8Val = (u64Val & 0x7); + if ( u8Val != 0 /* UC */ + || u8Val != 1 /* WC */ + || u8Val != 4 /* WT */ + || u8Val != 5 /* WP */ + || u8Val != 6 /* WB */ + || u8Val != 7 /* UC- */) + { + HMVMX_ERROR_BREAK(VMX_IGS_PAT_MSR_INVALID); + } + u64Val >>= 3; + } + } + + /* + * EFER MSR. + */ + if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_EFER_MSR) + { + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_EFER_FULL, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffffffffff2fe)), + VMX_IGS_EFER_MSR_RESERVED); /* Bits 63:12, bit 9, bits 7:1 MBZ. */ + HMVMX_CHECK_BREAK((u64Val & MSR_K6_EFER_LMA) == (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST), + VMX_IGS_EFER_LMA_GUEST_MODE_MISMATCH); + HMVMX_CHECK_BREAK( fUnrestrictedGuest + || (u64Val & MSR_K6_EFER_LMA) == (u32GuestCR0 & X86_CR0_PG), VMX_IGS_EFER_LMA_PG_MISMATCH); + } + + /* + * Segment registers. + */ + HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE) + || !(pCtx->ldtr.Sel & X86_SEL_LDT), VMX_IGS_LDTR_TI_INVALID); + if (!(u32Eflags & X86_EFL_VM)) + { + /* CS */ + HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1Present, VMX_IGS_CS_ATTR_P_INVALID); + HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xf00), VMX_IGS_CS_ATTR_RESERVED); + HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xfffe0000), VMX_IGS_CS_ATTR_RESERVED); + HMVMX_CHECK_BREAK( (pCtx->cs.u32Limit & 0xfff) == 0xfff + || !(pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->cs.u32Limit & 0xfff00000) + || (pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID); + /* CS cannot be loaded with NULL in protected mode. */ + HMVMX_CHECK_BREAK(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_CS_ATTR_UNUSABLE); + HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1DescType, VMX_IGS_CS_ATTR_S_INVALID); + if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11) + HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_UNEQUAL); + else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15) + HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_MISMATCH); + else if (pVM->hm.s.vmx.fUnrestrictedGuest && pCtx->cs.Attr.n.u4Type == 3) + HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == 0, VMX_IGS_CS_ATTR_DPL_INVALID); + else + HMVMX_ERROR_BREAK(VMX_IGS_CS_ATTR_TYPE_INVALID); + + /* SS */ + HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest + || (pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL), VMX_IGS_SS_CS_RPL_UNEQUAL); + HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL), VMX_IGS_SS_ATTR_DPL_RPL_UNEQUAL); + if ( !(pCtx->cr0 & X86_CR0_PE) + || pCtx->cs.Attr.n.u4Type == 3) + { + HMVMX_CHECK_BREAK(!pCtx->ss.Attr.n.u2Dpl, VMX_IGS_SS_ATTR_DPL_INVALID); + } + if (!(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE)) + { + HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7, VMX_IGS_SS_ATTR_TYPE_INVALID); + HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u1Present, VMX_IGS_SS_ATTR_P_INVALID); + HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xf00), VMX_IGS_SS_ATTR_RESERVED); + HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xfffe0000), VMX_IGS_SS_ATTR_RESERVED); + HMVMX_CHECK_BREAK( (pCtx->ss.u32Limit & 0xfff) == 0xfff + || !(pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->ss.u32Limit & 0xfff00000) + || (pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID); + } + + /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxWriteSegmentReg(). */ + if (!(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE)) + { + HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_DS_ATTR_A_INVALID); + HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u1Present, VMX_IGS_DS_ATTR_P_INVALID); + HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest + || pCtx->ds.Attr.n.u4Type > 11 + || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL); + HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xf00), VMX_IGS_DS_ATTR_RESERVED); + HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xfffe0000), VMX_IGS_DS_ATTR_RESERVED); + HMVMX_CHECK_BREAK( (pCtx->ds.u32Limit & 0xfff) == 0xfff + || !(pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->ds.u32Limit & 0xfff00000) + || (pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_DS_ATTR_TYPE_INVALID); + } + if (!(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE)) + { + HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_ES_ATTR_A_INVALID); + HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u1Present, VMX_IGS_ES_ATTR_P_INVALID); + HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest + || pCtx->es.Attr.n.u4Type > 11 + || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL); + HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xf00), VMX_IGS_ES_ATTR_RESERVED); + HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xfffe0000), VMX_IGS_ES_ATTR_RESERVED); + HMVMX_CHECK_BREAK( (pCtx->es.u32Limit & 0xfff) == 0xfff + || !(pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->es.u32Limit & 0xfff00000) + || (pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_ES_ATTR_TYPE_INVALID); + } + if (!(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE)) + { + HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_FS_ATTR_A_INVALID); + HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u1Present, VMX_IGS_FS_ATTR_P_INVALID); + HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest + || pCtx->fs.Attr.n.u4Type > 11 + || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL), VMX_IGS_FS_ATTR_DPL_RPL_UNEQUAL); + HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xf00), VMX_IGS_FS_ATTR_RESERVED); + HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xfffe0000), VMX_IGS_FS_ATTR_RESERVED); + HMVMX_CHECK_BREAK( (pCtx->fs.u32Limit & 0xfff) == 0xfff + || !(pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->fs.u32Limit & 0xfff00000) + || (pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_FS_ATTR_TYPE_INVALID); + } + if (!(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE)) + { + HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_GS_ATTR_A_INVALID); + HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u1Present, VMX_IGS_GS_ATTR_P_INVALID); + HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest + || pCtx->gs.Attr.n.u4Type > 11 + || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL), VMX_IGS_GS_ATTR_DPL_RPL_UNEQUAL); + HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xf00), VMX_IGS_GS_ATTR_RESERVED); + HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xfffe0000), VMX_IGS_GS_ATTR_RESERVED); + HMVMX_CHECK_BREAK( (pCtx->gs.u32Limit & 0xfff) == 0xfff + || !(pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->gs.u32Limit & 0xfff00000) + || (pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_GS_ATTR_TYPE_INVALID); + } + /* 64-bit capable CPUs. */ +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL); + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL); + HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE) + || HMVMX_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL); + HMVMX_CHECK_BREAK(!(pCtx->cs.u64Base >> 32), VMX_IGS_LONGMODE_CS_BASE_INVALID); + HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ss.u64Base >> 32), + VMX_IGS_LONGMODE_SS_BASE_INVALID); + HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ds.u64Base >> 32), + VMX_IGS_LONGMODE_DS_BASE_INVALID); + HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->es.u64Base >> 32), + VMX_IGS_LONGMODE_ES_BASE_INVALID); + } +#endif + } + else + { + /* V86 mode checks. */ + uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr; + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + u32CSAttr = 0xf3; u32SSAttr = 0xf3; + u32DSAttr = 0xf3; u32ESAttr = 0xf3; + u32FSAttr = 0xf3; u32GSAttr = 0xf3; + } + else + { + u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u; + u32DSAttr = pCtx->ds.Attr.u; u32ESAttr = pCtx->es.Attr.u; + u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u; + } + + /* CS */ + HMVMX_CHECK_BREAK((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), VMX_IGS_V86_CS_BASE_INVALID); + HMVMX_CHECK_BREAK(pCtx->cs.u32Limit == 0xffff, VMX_IGS_V86_CS_LIMIT_INVALID); + HMVMX_CHECK_BREAK(u32CSAttr == 0xf3, VMX_IGS_V86_CS_ATTR_INVALID); + /* SS */ + HMVMX_CHECK_BREAK((pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4), VMX_IGS_V86_SS_BASE_INVALID); + HMVMX_CHECK_BREAK(pCtx->ss.u32Limit == 0xffff, VMX_IGS_V86_SS_LIMIT_INVALID); + HMVMX_CHECK_BREAK(u32SSAttr == 0xf3, VMX_IGS_V86_SS_ATTR_INVALID); + /* DS */ + HMVMX_CHECK_BREAK((pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4), VMX_IGS_V86_DS_BASE_INVALID); + HMVMX_CHECK_BREAK(pCtx->ds.u32Limit == 0xffff, VMX_IGS_V86_DS_LIMIT_INVALID); + HMVMX_CHECK_BREAK(u32DSAttr == 0xf3, VMX_IGS_V86_DS_ATTR_INVALID); + /* ES */ + HMVMX_CHECK_BREAK((pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4), VMX_IGS_V86_ES_BASE_INVALID); + HMVMX_CHECK_BREAK(pCtx->es.u32Limit == 0xffff, VMX_IGS_V86_ES_LIMIT_INVALID); + HMVMX_CHECK_BREAK(u32ESAttr == 0xf3, VMX_IGS_V86_ES_ATTR_INVALID); + /* FS */ + HMVMX_CHECK_BREAK((pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4), VMX_IGS_V86_FS_BASE_INVALID); + HMVMX_CHECK_BREAK(pCtx->fs.u32Limit == 0xffff, VMX_IGS_V86_FS_LIMIT_INVALID); + HMVMX_CHECK_BREAK(u32FSAttr == 0xf3, VMX_IGS_V86_FS_ATTR_INVALID); + /* GS */ + HMVMX_CHECK_BREAK((pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4), VMX_IGS_V86_GS_BASE_INVALID); + HMVMX_CHECK_BREAK(pCtx->gs.u32Limit == 0xffff, VMX_IGS_V86_GS_LIMIT_INVALID); + HMVMX_CHECK_BREAK(u32GSAttr == 0xf3, VMX_IGS_V86_GS_ATTR_INVALID); + /* 64-bit capable CPUs. */ +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL); + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL); + HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE) + || HMVMX_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL); + HMVMX_CHECK_BREAK(!(pCtx->cs.u64Base >> 32), VMX_IGS_LONGMODE_CS_BASE_INVALID); + HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ss.u64Base >> 32), + VMX_IGS_LONGMODE_SS_BASE_INVALID); + HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ds.u64Base >> 32), + VMX_IGS_LONGMODE_DS_BASE_INVALID); + HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->es.u64Base >> 32), + VMX_IGS_LONGMODE_ES_BASE_INVALID); + } +#endif + } + + /* + * TR. + */ + HMVMX_CHECK_BREAK(!(pCtx->tr.Sel & X86_SEL_LDT), VMX_IGS_TR_TI_INVALID); + /* 64-bit capable CPUs. */ +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->tr.u64Base), VMX_IGS_TR_BASE_NOT_CANONICAL); + } +#endif + if (fLongModeGuest) + { + HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u4Type == 11, /* 64-bit busy TSS. */ + VMX_IGS_LONGMODE_TR_ATTR_TYPE_INVALID); + } + else + { + HMVMX_CHECK_BREAK( pCtx->tr.Attr.n.u4Type == 3 /* 16-bit busy TSS. */ + || pCtx->tr.Attr.n.u4Type == 11, /* 32-bit busy TSS.*/ + VMX_IGS_TR_ATTR_TYPE_INVALID); + } + HMVMX_CHECK_BREAK(!pCtx->tr.Attr.n.u1DescType, VMX_IGS_TR_ATTR_S_INVALID); + HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u1Present, VMX_IGS_TR_ATTR_P_INVALID); + HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & 0xf00), VMX_IGS_TR_ATTR_RESERVED); /* Bits 11:8 MBZ. */ + HMVMX_CHECK_BREAK( (pCtx->tr.u32Limit & 0xfff) == 0xfff + || !(pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->tr.u32Limit & 0xfff00000) + || (pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID); + HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_TR_ATTR_UNUSABLE); + + /* + * GDTR and IDTR. + */ +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + rc = VMXReadVmcs64(VMX_VMCS_GUEST_GDTR_BASE, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_GDTR_BASE_NOT_CANONICAL); + + rc = VMXReadVmcs64(VMX_VMCS_GUEST_IDTR_BASE, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_IDTR_BASE_NOT_CANONICAL); + } +#endif + + rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_GDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */ + + rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_IDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */ + + /* + * Guest Non-Register State. + */ + /* Activity State. */ + uint32_t u32ActivityState; + rc = VMXReadVmcs32(VMX_VMCS32_GUEST_ACTIVITY_STATE, &u32ActivityState); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK( !u32ActivityState + || (u32ActivityState & MSR_IA32_VMX_MISC_ACTIVITY_STATES(pVM->hm.s.vmx.Msrs.u64Misc)), + VMX_IGS_ACTIVITY_STATE_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->ss.Attr.n.u2Dpl) + || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_HLT, VMX_IGS_ACTIVITY_STATE_HLT_INVALID); + uint32_t u32IntrState; + rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &u32IntrState); + AssertRCBreak(rc); + if ( u32IntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS + || u32IntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI) + { + HMVMX_CHECK_BREAK(u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_ACTIVE, VMX_IGS_ACTIVITY_STATE_ACTIVE_INVALID); + } + + /** @todo Activity state and injecting interrupts. Left as a todo since we + * currently don't use activity states but ACTIVE. */ + + HMVMX_CHECK_BREAK( !(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_ENTRY_SMM) + || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_SIPI_WAIT, VMX_IGS_ACTIVITY_STATE_SIPI_WAIT_INVALID); + + /* Guest interruptibility-state. */ + HMVMX_CHECK_BREAK(!(u32IntrState & 0xfffffff0), VMX_IGS_INTERRUPTIBILITY_STATE_RESERVED); + HMVMX_CHECK_BREAK((u32IntrState & ( VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI + | VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS)) + != ( VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI + | VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS), + VMX_IGS_INTERRUPTIBILITY_STATE_STI_MOVSS_INVALID); + HMVMX_CHECK_BREAK( (u32Eflags & X86_EFL_IF) + || !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI), + VMX_IGS_INTERRUPTIBILITY_STATE_STI_EFL_INVALID); + if (VMX_ENTRY_INTERRUPTION_INFO_IS_VALID(u32EntryInfo)) + { + if (VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT) + { + HMVMX_CHECK_BREAK( !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI) + && !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS), + VMX_IGS_INTERRUPTIBILITY_STATE_EXT_INT_INVALID); + } + else if (VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI) + { + HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS), + VMX_IGS_INTERRUPTIBILITY_STATE_MOVSS_INVALID); + HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI), + VMX_IGS_INTERRUPTIBILITY_STATE_STI_INVALID); + } + } + /** @todo Assumes the processor is not in SMM. */ + HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI), + VMX_IGS_INTERRUPTIBILITY_STATE_SMI_INVALID); + HMVMX_CHECK_BREAK( !(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_ENTRY_SMM) + || (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI), + VMX_IGS_INTERRUPTIBILITY_STATE_SMI_SMM_INVALID); + if ( (pVCpu->hm.s.vmx.u32PinCtls & VMX_VMCS_CTRL_PIN_EXEC_VIRTUAL_NMI) + && VMX_ENTRY_INTERRUPTION_INFO_IS_VALID(u32EntryInfo) + && VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI) + { + HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI), + VMX_IGS_INTERRUPTIBILITY_STATE_NMI_INVALID); + } + + /* Pending debug exceptions. */ + if (HMVMX_IS_64BIT_HOST_MODE()) + { + rc = VMXReadVmcs64(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, &u64Val); + AssertRCBreak(rc); + /* Bits 63:15, Bit 13, Bits 11:4 MBZ. */ + HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffffaff0)), VMX_IGS_LONGMODE_PENDING_DEBUG_RESERVED); + u32Val = u64Val; /* For pending debug exceptions checks below. */ + } + else + { + rc = VMXReadVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, &u32Val); + AssertRCBreak(rc); + /* Bits 31:15, Bit 13, Bits 11:4 MBZ. */ + HMVMX_CHECK_BREAK(!(u64Val & 0xffffaff0), VMX_IGS_PENDING_DEBUG_RESERVED); + } + + if ( (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI) + || (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS) + || u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_HLT) + { + if ( (u32Eflags & X86_EFL_TF) + && !(u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */ + { + /* Bit 14 is PendingDebug.BS. */ + HMVMX_CHECK_BREAK(u32Val & RT_BIT(14), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_SET); + } + if ( !(u32Eflags & X86_EFL_TF) + || (u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */ + { + /* Bit 14 is PendingDebug.BS. */ + HMVMX_CHECK_BREAK(!(u32Val & RT_BIT(14)), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_CLEAR); + } + } + + /* VMCS link pointer. */ + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, &u64Val); + AssertRCBreak(rc); + if (u64Val != UINT64_C(0xffffffffffffffff)) + { + HMVMX_CHECK_BREAK(!(u64Val & 0xfff), VMX_IGS_VMCS_LINK_PTR_RESERVED); + /** @todo Bits beyond the processor's physical-address width MBZ. */ + /** @todo 32-bit located in memory referenced by value of this field (as a + * physical address) must contain the processor's VMCS revision ID. */ + /** @todo SMM checks. */ + } + + /** @todo Checks on Guest Page-Directory-Pointer-Table Entries when guest is + * not using Nested Paging? */ + if ( pVM->hm.s.fNestedPaging + && !fLongModeGuest + && CPUMIsGuestInPAEModeEx(pCtx)) + { + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED); + + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED); + + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED); + + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED); + } + + /* Shouldn't happen but distinguish it from AssertRCBreak() errors. */ + if (uError == VMX_IGS_ERROR) + uError = VMX_IGS_REASON_NOT_FOUND; + } while (0); + + pVCpu->hm.s.u32HMError = uError; + return uError; + +#undef HMVMX_ERROR_BREAK +#undef HMVMX_CHECK_BREAK +#undef HMVMX_IS_CANONICAL +} + +/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */ +/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- VM-exit handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */ +/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */ + +/** @name VM-exit handlers. + * @{ + */ + +/** + * VM-exit handler for external interrupts (VMX_EXIT_EXT_INT). + */ +HMVMX_EXIT_DECL hmR0VmxExitExtInt(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitExtInt); + /* Windows hosts (32-bit and 64-bit) have DPC latency issues. See @bugref{6853}. */ + if (VMMR0ThreadCtxHooksAreRegistered(pVCpu)) + return VINF_SUCCESS; + return VINF_EM_RAW_INTERRUPT; +} + + +/** + * VM-exit handler for exceptions or NMIs (VMX_EXIT_XCPT_OR_NMI). + */ +HMVMX_EXIT_DECL hmR0VmxExitXcptOrNmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitXcptNmi, y3); + + int rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + + uint32_t uIntType = VMX_EXIT_INTERRUPTION_INFO_TYPE(pVmxTransient->uExitIntInfo); + Assert( !(pVCpu->hm.s.vmx.u32ExitCtls & VMX_VMCS_CTRL_EXIT_ACK_EXT_INT) + && uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT); + Assert(VMX_EXIT_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uExitIntInfo)); + + if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI) + { + /* + * This cannot be a guest NMI as the only way for the guest to receive an NMI is if we injected it ourselves and + * anything we inject is not going to cause a VM-exit directly for the event being injected. + * See Intel spec. 27.2.3 "Information for VM Exits During Event Delivery". + * + * Dispatch the NMI to the host. See Intel spec. 27.5.5 "Updating Non-Register State". + */ + VMXDispatchHostNmi(); + STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC); + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3); + return VINF_SUCCESS; + } + + /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */ + rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient); + if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT)) + { + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3); + return VINF_SUCCESS; + } + else if (RT_UNLIKELY(rc == VINF_EM_RESET)) + { + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3); + return rc; + } + + uint32_t uExitIntInfo = pVmxTransient->uExitIntInfo; + uint32_t uVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(uExitIntInfo); + switch (uIntType) + { + case VMX_EXIT_INTERRUPTION_INFO_TYPE_PRIV_SW_XCPT: /* Privileged software exception. (#DB from ICEBP) */ + Assert(uVector == X86_XCPT_DB); + /* no break */ + case VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT: /* Software exception. (#BP or #OF) */ + Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF || uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_PRIV_SW_XCPT); + /* no break */ + case VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT: + { + switch (uVector) + { + case X86_XCPT_PF: rc = hmR0VmxExitXcptPF(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_GP: rc = hmR0VmxExitXcptGP(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_NM: rc = hmR0VmxExitXcptNM(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_MF: rc = hmR0VmxExitXcptMF(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_DB: rc = hmR0VmxExitXcptDB(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_BP: rc = hmR0VmxExitXcptBP(pVCpu, pMixedCtx, pVmxTransient); break; +#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS + case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXF); + rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE); + rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD); + rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS); + rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP); + rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break; +#endif + default: + { + rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXcpUnk); + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS); + Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM))); + Assert(CPUMIsGuestInRealModeEx(pMixedCtx)); + + rc = hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(uExitIntInfo), + pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, + 0 /* GCPtrFaultAddress */); + AssertRCReturn(rc, rc); + } + else + { + AssertMsgFailed(("Unexpected VM-exit caused by exception %#x\n", uVector)); + pVCpu->hm.s.u32HMError = uVector; + rc = VERR_VMX_UNEXPECTED_EXCEPTION; + } + break; + } + } + break; + } + + default: + { + pVCpu->hm.s.u32HMError = uExitIntInfo; + rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_TYPE; + AssertMsgFailed(("Unexpected interruption info %#x\n", VMX_EXIT_INTERRUPTION_INFO_TYPE(uExitIntInfo))); + break; + } + } + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3); + return rc; +} + + +/** + * VM-exit handler for interrupt-window exiting (VMX_EXIT_INT_WINDOW). + */ +HMVMX_EXIT_DECL hmR0VmxExitIntWindow(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* Indicate that we no longer need to VM-exit when the guest is ready to receive interrupts, it is now ready. */ + Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT); + pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT; + int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRCReturn(rc, rc); + + /* Deliver the pending interrupt via hmR0VmxPreRunGuest()->hmR0VmxInjectEvent() and resume guest execution. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow); + return VINF_SUCCESS; +} + + +/** + * VM-exit handler for NMI-window exiting (VMX_EXIT_NMI_WINDOW). + */ +HMVMX_EXIT_DECL hmR0VmxExitNmiWindow(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + AssertMsgFailed(("Unexpected NMI-window exit.\n")); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for WBINVD (VMX_EXIT_WBINVD). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitWbinvd(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWbinvd); + return hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); +} + + +/** + * VM-exit handler for INVD (VMX_EXIT_INVD). Unconditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitInvd(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvd); + return hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); +} + + +/** + * VM-exit handler for CPUID (VMX_EXIT_CPUID). Unconditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitCpuid(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + PVM pVM = pVCpu->CTX_SUFF(pVM); + int rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + Assert(pVmxTransient->cbInstr == 2); + } + else + { + AssertMsgFailed(("hmR0VmxExitCpuid: EMInterpretCpuId failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCpuid); + return rc; +} + + +/** + * VM-exit handler for GETSEC (VMX_EXIT_GETSEC). Unconditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitGetsec(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + if (pMixedCtx->cr4 & X86_CR4_SMXE) + return VINF_EM_RAW_EMULATE_INSTR; + + AssertMsgFailed(("hmR0VmxExitGetsec: unexpected VM-exit when CR4.SMXE is 0.\n")); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for RDTSC (VMX_EXIT_RDTSC). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitRdtsc(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */ + AssertRCReturn(rc, rc); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + Assert(pVmxTransient->cbInstr == 2); + /* If we get a spurious VM-exit when offsetting is enabled, we must reset offsetting on VM-reentry. See @bugref{6634}. */ + if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING) + pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true; + } + else + { + AssertMsgFailed(("hmR0VmxExitRdtsc: EMInterpretRdtsc failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc); + return rc; +} + + +/** + * VM-exit handler for RDTSCP (VMX_EXIT_RDTSCP). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitRdtscp(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */ + rc |= hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx); /* For MSR_K8_TSC_AUX */ + AssertRCReturn(rc, rc); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = EMInterpretRdtscp(pVM, pVCpu, pMixedCtx); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + Assert(pVmxTransient->cbInstr == 3); + /* If we get a spurious VM-exit when offsetting is enabled, we must reset offsetting on VM-reentry. See @bugref{6634}. */ + if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING) + pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true; + } + else + { + AssertMsgFailed(("hmR0VmxExitRdtscp: EMInterpretRdtscp failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc); + return rc; +} + + +/** + * VM-exit handler for RDPMC (VMX_EXIT_RDPMC). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitRdpmc(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */ + rc |= hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); /** @todo review if CR0 is really required by EM. */ + AssertRCReturn(rc, rc); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + Assert(pVmxTransient->cbInstr == 2); + } + else + { + AssertMsgFailed(("hmR0VmxExitRdpmc: EMInterpretRdpmc failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdpmc); + return rc; +} + + +/** + * VM-exit handler for INVLPG (VMX_EXIT_INVLPG). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitInvlpg(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + PVM pVM = pVCpu->CTX_SUFF(pVM); + Assert(!pVM->hm.s.fNestedPaging); + + int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + VBOXSTRICTRC rc2 = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), pVmxTransient->uExitQualification); + rc = VBOXSTRICTRC_VAL(rc2); + if (RT_LIKELY(rc == VINF_SUCCESS)) + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + else + { + AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitInvlpg: EMInterpretInvlpg %#RX64 failed with %Rrc\n", + pVmxTransient->uExitQualification, rc)); + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvlpg); + return rc; +} + + +/** + * VM-exit handler for MONITOR (VMX_EXIT_MONITOR). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitMonitor(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + else + { + AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMonitor: EMInterpretMonitor failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor); + return rc; +} + + +/** + * VM-exit handler for MWAIT (VMX_EXIT_MWAIT). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitMwait(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + VBOXSTRICTRC rc2 = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + rc = VBOXSTRICTRC_VAL(rc2); + if (RT_LIKELY( rc == VINF_SUCCESS + || rc == VINF_EM_HALT)) + { + int rc3 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + AssertRCReturn(rc3, rc3); + + if ( rc == VINF_EM_HALT + && EMMonitorWaitShouldContinue(pVCpu, pMixedCtx)) + { + rc = VINF_SUCCESS; + } + } + else + { + AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMwait: EMInterpretMWait failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + AssertMsg(rc == VINF_SUCCESS || rc == VINF_EM_HALT || rc == VERR_EM_INTERPRETER, + ("hmR0VmxExitMwait: failed, invalid error code %Rrc\n", rc)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait); + return rc; +} + + +/** + * VM-exit handler for RSM (VMX_EXIT_RSM). Unconditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitRsm(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + /* + * Execution of RSM outside of SMM mode causes #UD regardless of VMX root or VMX non-root mode. In theory, we should never + * get this VM-exit. This can happen only if dual-monitor treatment of SMI and VMX is enabled, which can (only?) be done by + * executing VMCALL in VMX root operation. If we get here, something funny is going on. + * See Intel spec. "33.15.5 Enabling the Dual-Monitor Treatment". + */ + AssertMsgFailed(("Unexpected RSM VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for SMI (VMX_EXIT_SMI). Unconditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitSmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + /* + * This can only happen if we support dual-monitor treatment of SMI, which can be activated by executing VMCALL in VMX + * root operation. Only an STM (SMM transfer monitor) would get this exit when we (the executive monitor) execute a VMCALL + * in VMX root mode or receive an SMI. If we get here, something funny is going on. + * See Intel spec. "33.15.6 Activating the Dual-Monitor Treatment" and Intel spec. 25.3 "Other Causes of VM-Exits" + */ + AssertMsgFailed(("Unexpected SMI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for IO SMI (VMX_EXIT_IO_SMI). Unconditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitIoSmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + /* Same treatment as VMX_EXIT_SMI. See comment in hmR0VmxExitSmi(). */ + AssertMsgFailed(("Unexpected IO SMI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for SIPI (VMX_EXIT_SIPI). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitSipi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + /* + * SIPI exits can only occur in VMX non-root operation when the "wait-for-SIPI" guest activity state is used. We currently + * don't make use of it (see hmR0VmxLoadGuestActivityState()) as our guests don't have direct access to the host LAPIC. + * See Intel spec. 25.3 "Other Causes of VM-exits". + */ + AssertMsgFailed(("Unexpected SIPI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for INIT signal (VMX_EXIT_INIT_SIGNAL). Unconditional + * VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitInitSignal(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + /* + * INIT signals are blocked in VMX root operation by VMXON and by SMI in SMM. + * See Intel spec. 33.14.1 Default Treatment of SMI Delivery" and Intel spec. 29.3 "VMX Instructions" for "VMXON". + * + * It is -NOT- blocked in VMX non-root operation so we can, in theory, still get these VM-exits. + * See Intel spec. "23.8 Restrictions on VMX operation". + */ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + return VINF_SUCCESS; +} + + +/** + * VM-exit handler for triple faults (VMX_EXIT_TRIPLE_FAULT). Unconditional + * VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitTripleFault(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + return VINF_EM_RESET; +} + + +/** + * VM-exit handler for HLT (VMX_EXIT_HLT). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitHlt(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_HLT_EXIT); + int rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + pMixedCtx->rip++; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP); + if (EMShouldContinueAfterHalt(pVCpu, pMixedCtx)) /* Requires eflags. */ + rc = VINF_SUCCESS; + else + rc = VINF_EM_HALT; + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt); + return rc; +} + + +/** + * VM-exit handler for instructions that result in a #UD exception delivered to + * the guest. + */ +HMVMX_EXIT_DECL hmR0VmxExitSetPendingXcptUD(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + hmR0VmxSetPendingXcptUD(pVCpu, pMixedCtx); + return VINF_SUCCESS; +} + + +/** + * VM-exit handler for expiry of the VMX preemption timer. + */ +HMVMX_EXIT_DECL hmR0VmxExitPreemptTimer(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* If the preemption-timer has expired, reinitialize the preemption timer on next VM-entry. */ + pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true; + + /* If there are any timer events pending, fall back to ring-3, otherwise resume guest execution. */ + PVM pVM = pVCpu->CTX_SUFF(pVM); + bool fTimersPending = TMTimerPollBool(pVM, pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPreemptTimer); + return fTimersPending ? VINF_EM_RAW_TIMER_PENDING : VINF_SUCCESS; +} + + +/** + * VM-exit handler for XSETBV (VMX_EXIT_XSETBV). Unconditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitXsetbv(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* We expose XSETBV to the guest, fallback to the recompiler for emulation. */ + /** @todo check if XSETBV is supported by the recompiler. */ + return VERR_EM_INTERPRETER; +} + + +/** + * VM-exit handler for INVPCID (VMX_EXIT_INVPCID). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitInvpcid(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* The guest should not invalidate the host CPU's TLBs, fallback to recompiler. */ + /** @todo implement EMInterpretInvpcid() */ + return VERR_EM_INTERPRETER; +} + + +/** + * VM-exit handler for invalid-guest-state (VMX_EXIT_ERR_INVALID_GUEST_STATE). + * Error VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitErrInvalidGuestState(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + uint32_t uInvalidReason = hmR0VmxCheckGuestState(pVCpu->CTX_SUFF(pVM), pVCpu, pMixedCtx); + NOREF(uInvalidReason); + +#ifdef VBOX_STRICT + uint32_t uIntrState; + HMVMXHCUINTREG uHCReg; + uint64_t u64Val; + uint32_t u32Val; + + rc = hmR0VmxReadEntryIntInfoVmcs(pVmxTransient); + rc |= hmR0VmxReadEntryXcptErrorCodeVmcs(pVmxTransient); + rc |= hmR0VmxReadEntryInstrLenVmcs(pVCpu, pVmxTransient); + rc |= VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &uIntrState); + AssertRCReturn(rc, rc); + + Log4(("uInvalidReason %u\n", uInvalidReason)); + Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", pVmxTransient->uEntryIntInfo)); + Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", pVmxTransient->uEntryXcptErrorCode)); + Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %#RX32\n", pVmxTransient->cbEntryInstr)); + Log4(("VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE %#RX32\n", uIntrState)); + + rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS_GUEST_CR0 %#RX32\n", u32Val)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_MASK, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR0_MASK %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_MASK, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR4_MASK %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg)); + rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc); + Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val)); +#endif + + PVM pVM = pVCpu->CTX_SUFF(pVM); + HMDumpRegs(pVM, pVCpu, pMixedCtx); + + return VERR_VMX_INVALID_GUEST_STATE; +} + + +/** + * VM-exit handler for VM-entry failure due to an MSR-load + * (VMX_EXIT_ERR_MSR_LOAD). Error VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitErrMsrLoad(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + AssertMsgFailed(("Unexpected MSR-load exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for VM-entry failure due to a machine-check event + * (VMX_EXIT_ERR_MACHINE_CHECK). Error VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitErrMachineCheck(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + AssertMsgFailed(("Unexpected machine-check event exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for all undefined reasons. Should never ever happen.. in + * theory. + */ +HMVMX_EXIT_DECL hmR0VmxExitErrUndefined(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + AssertMsgFailed(("Huh!? Undefined VM-exit reason %d. pVCpu=%p pMixedCtx=%p\n", pVmxTransient->uExitReason, pVCpu, pMixedCtx)); + return VERR_VMX_UNDEFINED_EXIT_CODE; +} + + +/** + * VM-exit handler for XDTR (LGDT, SGDT, LIDT, SIDT) accesses + * (VMX_EXIT_XDTR_ACCESS) and LDT and TR access (LLDT, LTR, SLDT, STR). + * Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitXdtrAccess(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* By default, we don't enable VMX_VMCS_CTRL_PROC_EXEC2_DESCRIPTOR_TABLE_EXIT. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitXdtrAccess); + if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_DESCRIPTOR_TABLE_EXIT) + return VERR_EM_INTERPRETER; + AssertMsgFailed(("Unexpected XDTR access. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for RDRAND (VMX_EXIT_RDRAND). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitRdrand(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* By default, we don't enable VMX_VMCS_CTRL_PROC_EXEC2_RDRAND_EXIT. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdrand); + if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDRAND_EXIT) + return VERR_EM_INTERPRETER; + AssertMsgFailed(("Unexpected RDRAND exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for RDMSR (VMX_EXIT_RDMSR). + */ +HMVMX_EXIT_DECL hmR0VmxExitRdmsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* EMInterpretRdmsr() requires CR0, Eflags and SS segment register. */ + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + Log4(("CS:RIP=%04x:%#RX64 ECX=%X\n", pMixedCtx->cs.Sel, pMixedCtx->rip, pMixedCtx->ecx)); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = EMInterpretRdmsr(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER, + ("hmR0VmxExitRdmsr: failed, invalid error code %Rrc\n", rc)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdmsr); + + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + Assert(pVmxTransient->cbInstr == 2); + } + return rc; +} + + +/** + * VM-exit handler for WRMSR (VMX_EXIT_WRMSR). + */ +HMVMX_EXIT_DECL hmR0VmxExitWrmsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + PVM pVM = pVCpu->CTX_SUFF(pVM); + int rc = VINF_SUCCESS; + + /* EMInterpretWrmsr() requires CR0, EFLAGS and SS segment register. */ + rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + Log4(("ecx=%#RX32\n", pMixedCtx->ecx)); + + rc = EMInterpretWrmsr(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER, ("hmR0VmxExitWrmsr: failed, invalid error code %Rrc\n", rc)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWrmsr); + + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + + /* If this is an X2APIC WRMSR access, update the APIC state as well. */ + if ( pMixedCtx->ecx >= MSR_IA32_X2APIC_START + && pMixedCtx->ecx <= MSR_IA32_X2APIC_END) + { + /* We've already saved the APIC related guest-state (TPR) in hmR0VmxPostRunGuest(). When full APIC register + * virtualization is implemented we'll have to make sure APIC state is saved from the VMCS before + EMInterpretWrmsr() changes it. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE); + } + else if (pMixedCtx->ecx == MSR_K6_EFER) /* EFER is the only MSR we auto-load but don't allow write-passthrough. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS); + else if (pMixedCtx->ecx == MSR_IA32_TSC) /* Windows 7 does this during bootup. See @bugref{6398}. */ + pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true; + + /* Update MSRs that are part of the VMCS when MSR-bitmaps are not supported. */ + if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)) + { + switch (pMixedCtx->ecx) + { + case MSR_IA32_SYSENTER_CS: HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SYSENTER_CS_MSR); break; + case MSR_IA32_SYSENTER_EIP: HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SYSENTER_EIP_MSR); break; + case MSR_IA32_SYSENTER_ESP: HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SYSENTER_ESP_MSR); break; + case MSR_K8_FS_BASE: /* no break */ + case MSR_K8_GS_BASE: HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS); break; + case MSR_K8_KERNEL_GS_BASE: HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS); break; + } + } +#ifdef VBOX_STRICT + else + { + /* Paranoia. Validate that MSRs in the MSR-bitmaps with write-passthru are not intercepted. */ + switch (pMixedCtx->ecx) + { + case MSR_IA32_SYSENTER_CS: + case MSR_IA32_SYSENTER_EIP: + case MSR_IA32_SYSENTER_ESP: + case MSR_K8_FS_BASE: + case MSR_K8_GS_BASE: + { + AssertMsgFailed(("Unexpected WRMSR for an MSR in the VMCS. ecx=%#RX32\n", pMixedCtx->ecx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); + } + + case MSR_K8_LSTAR: + case MSR_K6_STAR: + case MSR_K8_SF_MASK: + case MSR_K8_TSC_AUX: + case MSR_K8_KERNEL_GS_BASE: + { + AssertMsgFailed(("Unexpected WRMSR for an MSR in the auto-load/store area in the VMCS. ecx=%#RX32\n", + pMixedCtx->ecx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); + } + } + } +#endif /* VBOX_STRICT */ + } + return rc; +} + + +/** + * VM-exit handler for PAUSE (VMX_EXIT_PAUSE). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitPause(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* By default, we don't enable VMX_VMCS_CTRL_PROC_EXEC_PAUSE_EXIT. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPause); + if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_PAUSE_EXIT) + return VERR_EM_INTERPRETER; + AssertMsgFailed(("Unexpected PAUSE exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for when the TPR value is lowered below the specified + * threshold (VMX_EXIT_TPR_BELOW_THRESHOLD). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitTprBelowThreshold(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW); + + /* + * The TPR has already been updated, see hmR0VMXPostRunGuest(). RIP is also updated as part of the VM-exit by VT-x. Update + * the threshold in the VMCS, deliver the pending interrupt via hmR0VmxPreRunGuest()->hmR0VmxInjectEvent() and + * resume guest execution. + */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTprBelowThreshold); + return VINF_SUCCESS; +} + + +/** + * VM-exit handler for control-register accesses (VMX_EXIT_MOV_CRX). Conditional + * VM-exit. + * + * @retval VINF_SUCCESS when guest execution can continue. + * @retval VINF_PGM_CHANGE_MODE when shadow paging mode changed, back to ring-3. + * @retval VINF_PGM_SYNC_CR3 CR3 sync is required, back to ring-3. + * @retval VERR_EM_INTERPRETER when something unexpected happened, fallback to + * recompiler. + */ +HMVMX_EXIT_DECL hmR0VmxExitMovCRx(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitMovCRx, y2); + int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + + const RTGCUINTPTR uExitQualification = pVmxTransient->uExitQualification; + const uint32_t uAccessType = VMX_EXIT_QUALIFICATION_CRX_ACCESS(uExitQualification); + PVM pVM = pVCpu->CTX_SUFF(pVM); + switch (uAccessType) + { + case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE: /* MOV to CRx */ + { +#if 0 + /* EMInterpretCRxWrite() references a lot of guest state (EFER, RFLAGS, Segment Registers, etc.) Sync entire state */ + rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); +#else + rc = hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); +#endif + AssertRCReturn(rc, rc); + + rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), + VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification), + VMX_EXIT_QUALIFICATION_CRX_GENREG(uExitQualification)); + Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3); + + switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)) + { + case 0: /* CR0 */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + Log4(("CRX CR0 write rc=%d CR0=%#RX64\n", rc, pMixedCtx->cr0)); + break; + case 2: /* CR2 */ + /* Nothing to do here, CR2 it's not part of the VMCS. */ + break; + case 3: /* CR3 */ + Assert(!pVM->hm.s.fNestedPaging || !CPUMIsGuestPagingEnabledEx(pMixedCtx)); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR3); + Log4(("CRX CR3 write rc=%d CR3=%#RX64\n", rc, pMixedCtx->cr3)); + break; + case 4: /* CR4 */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR4); + Log4(("CRX CR4 write rc=%d CR4=%#RX64\n", rc, pMixedCtx->cr4)); + break; + case 8: /* CR8 */ + Assert(!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)); + /* CR8 contains the APIC TPR. Was updated by EMInterpretCRxWrite(). */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE); + break; + default: + AssertMsgFailed(("Invalid CRx register %#x\n", VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification))); + break; + } + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)]); + break; + } + + case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ: /* MOV from CRx */ + { + /* EMInterpretCRxRead() requires EFER MSR, CS. */ + rc = hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + Assert( !pVM->hm.s.fNestedPaging + || !CPUMIsGuestPagingEnabledEx(pMixedCtx) + || VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification) != 3); + + /* CR8 reads only cause a VM-exit when the TPR shadow feature isn't enabled. */ + Assert( VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification) != 8 + || !(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)); + + rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), + VMX_EXIT_QUALIFICATION_CRX_GENREG(uExitQualification), + VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)); + Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)]); + Log4(("CRX CR%d Read access rc=%d\n", VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification), rc)); + break; + } + + case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS: /* CLTS (Clear Task-Switch Flag in CR0) */ + { + rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + rc = EMInterpretCLTS(pVM, pVCpu); + AssertRCReturn(rc, rc); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitClts); + Log4(("CRX CLTS write rc=%d\n", rc)); + break; + } + + case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW: /* LMSW (Load Machine-Status Word into CR0) */ + { + rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(uExitQualification)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitLmsw); + Log4(("CRX LMSW write rc=%d\n", rc)); + break; + } + + default: + { + AssertMsgFailed(("Invalid access-type in Mov CRx exit qualification %#x\n", uAccessType)); + rc = VERR_VMX_UNEXPECTED_EXCEPTION; + } + } + + /* Validate possible error codes. */ + Assert(rc == VINF_SUCCESS || rc == VINF_PGM_CHANGE_MODE || rc == VERR_EM_INTERPRETER || rc == VINF_PGM_SYNC_CR3 + || rc == VERR_VMX_UNEXPECTED_EXCEPTION); + if (RT_SUCCESS(rc)) + { + int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + AssertRCReturn(rc2, rc2); + } + + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitMovCRx, y2); + return rc; +} + + +/** + * VM-exit handler for I/O instructions (VMX_EXIT_IO_INSTR). Conditional + * VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitIoInstr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitIO, y1); + + int rc2 = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + rc2 |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient); + rc2 |= hmR0VmxSaveGuestRip(pVCpu, pMixedCtx); + rc2 |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); /* Eflag checks in EMInterpretDisasCurrent(). */ + rc2 |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); /* CR0 checks & PGM* in EMInterpretDisasCurrent(). */ + rc2 |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); /* SELM checks in EMInterpretDisasCurrent(). */ + /* EFER also required for longmode checks in EMInterpretDisasCurrent(), but it's always up-to-date. */ + AssertRCReturn(rc2, rc2); + + /* Refer Intel spec. 27-5. "Exit Qualifications for I/O Instructions" for the format. */ + uint32_t uIOPort = VMX_EXIT_QUALIFICATION_IO_PORT(pVmxTransient->uExitQualification); + uint8_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(pVmxTransient->uExitQualification); + bool fIOWrite = ( VMX_EXIT_QUALIFICATION_IO_DIRECTION(pVmxTransient->uExitQualification) + == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT); + bool fIOString = VMX_EXIT_QUALIFICATION_IO_IS_STRING(pVmxTransient->uExitQualification); + bool fStepping = RT_BOOL(pMixedCtx->eflags.Bits.u1TF); + AssertReturn(uIOWidth <= 3 && uIOWidth != 2, VERR_HMVMX_IPE_1); + + /* I/O operation lookup arrays. */ + static const uint32_t s_aIOSizes[4] = { 1, 2, 0, 4 }; /* Size of the I/O accesses. */ + static const uint32_t s_aIOOpAnd[4] = { 0xff, 0xffff, 0, 0xffffffff }; /* AND masks for saving the result (in AL/AX/EAX). */ + + VBOXSTRICTRC rcStrict; + const uint32_t cbValue = s_aIOSizes[uIOWidth]; + const uint32_t cbInstr = pVmxTransient->cbInstr; + bool fUpdateRipAlready = false; /* ugly hack, should be temporary. */ + PVM pVM = pVCpu->CTX_SUFF(pVM); + if (fIOString) + { +#if 0 /* Not yet ready. IEM gurus with debian 32-bit guest without NP (on ATA reads). See @bugref{5752#c158}*/ + /* + * INS/OUTS - I/O String instruction. + * + * Use instruction-information if available, otherwise fall back on + * interpreting the instruction. + */ + Log4(("CS:RIP=%04x:%#RX64 %#06x/%u %c str\n", pMixedCtx->cs.Sel, pMixedCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r')); + AssertReturn(pMixedCtx->dx == uIOPort, VERR_HMVMX_IPE_2); + if (MSR_IA32_VMX_BASIC_INFO_VMCS_INS_OUTS(pVM->hm.s.vmx.Msrs.u64BasicInfo)) + { + rc2 = hmR0VmxReadExitInstrInfoVmcs(pVCpu, pVmxTransient); + /** @todo optimize this, IEM should request the additional state if it needs it (GP, PF, ++). */ + rc2 |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc2, rc2); + AssertReturn(pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize <= 2, VERR_HMVMX_IPE_3); + AssertCompile(IEMMODE_16BIT == 0 && IEMMODE_32BIT == 1 && IEMMODE_64BIT == 2); + IEMMODE enmAddrMode = (IEMMODE)pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize; + bool fRep = VMX_EXIT_QUALIFICATION_IO_IS_REP(pVmxTransient->uExitQualification); + if (fIOWrite) + { + rcStrict = IEMExecStringIoWrite(pVCpu, cbValue, enmAddrMode, fRep, cbInstr, + pVmxTransient->ExitInstrInfo.StrIo.iSegReg); + } + else + { + /* + * The segment prefix for INS cannot be overridden and is always ES. We can safely assume X86_SREG_ES. + * Hence "iSegReg" field is undefined in the instruction-information field in VT-x for INS. + * See Intel Instruction spec. for "INS". + * See Intel spec. Table 27-8 "Format of the VM-Exit Instruction-Information Field as Used for INS and OUTS". + */ + rcStrict = IEMExecStringIoRead(pVCpu, cbValue, enmAddrMode, fRep, cbInstr); + } + } + else + { + /** @todo optimize this, IEM should request the additional state if it needs it (GP, PF, ++). */ + rc2 = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc2, rc2); + rcStrict = IEMExecOne(pVCpu); + } + /** @todo IEM needs to be setting these flags somehow. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP); + fUpdateRipAlready = true; +#else + PDISCPUSTATE pDis = &pVCpu->hm.s.DisState; + rcStrict = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL); + if (RT_SUCCESS(rcStrict)) + { + if (fIOWrite) + { + rcStrict = IOMInterpretOUTSEx(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), uIOPort, pDis->fPrefix, + (DISCPUMODE)pDis->uAddrMode, cbValue); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringWrite); + } + else + { + rcStrict = IOMInterpretINSEx(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), uIOPort, pDis->fPrefix, + (DISCPUMODE)pDis->uAddrMode, cbValue); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringRead); + } + } + else + { + AssertMsg(rcStrict == VERR_EM_INTERPRETER, ("rcStrict=%Rrc RIP %#RX64\n", VBOXSTRICTRC_VAL(rcStrict), pMixedCtx->rip)); + rcStrict = VINF_EM_RAW_EMULATE_INSTR; + } +#endif + } + else + { + /* + * IN/OUT - I/O instruction. + */ + Log4(("CS:RIP=%04x:%#RX64 %#06x/%u %c\n", pMixedCtx->cs.Sel, pMixedCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r')); + const uint32_t uAndVal = s_aIOOpAnd[uIOWidth]; + Assert(!VMX_EXIT_QUALIFICATION_IO_IS_REP(pVmxTransient->uExitQualification)); + if (fIOWrite) + { + rcStrict = IOMIOPortWrite(pVM, pVCpu, uIOPort, pMixedCtx->eax & uAndVal, cbValue); + if (rcStrict == VINF_IOM_R3_IOPORT_WRITE) + HMR0SavePendingIOPortWrite(pVCpu, pMixedCtx->rip, pMixedCtx->rip + cbInstr, uIOPort, uAndVal, cbValue); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite); + } + else + { + uint32_t u32Result = 0; + rcStrict = IOMIOPortRead(pVM, pVCpu, uIOPort, &u32Result, cbValue); + if (IOM_SUCCESS(rcStrict)) + { + /* Save result of I/O IN instr. in AL/AX/EAX. */ + pMixedCtx->eax = (pMixedCtx->eax & ~uAndVal) | (u32Result & uAndVal); + } + else if (rcStrict == VINF_IOM_R3_IOPORT_READ) + HMR0SavePendingIOPortRead(pVCpu, pMixedCtx->rip, pMixedCtx->rip + cbInstr, uIOPort, uAndVal, cbValue); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead); + } + } + + if (IOM_SUCCESS(rcStrict)) + { + if (!fUpdateRipAlready) + { + pMixedCtx->rip += cbInstr; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP); + } + + /* INS & OUTS with REP prefix modify RFLAGS. */ + if (fIOString) + { + /** @todo Single-step for INS/OUTS with REP prefix? */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RFLAGS); + } + else if (fStepping) + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + + /* + * If any I/O breakpoints are armed, we need to check if one triggered + * and take appropriate action. + * Note that the I/O breakpoint type is undefined if CR4.DE is 0. + */ + rc2 = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx); + AssertRCReturn(rc2, rc2); + + /** @todo Optimize away the DBGFBpIsHwIoArmed call by having DBGF tell the + * execution engines about whether hyper BPs and such are pending. */ + uint32_t const uDr7 = pMixedCtx->dr[7]; + if (RT_UNLIKELY( ( (uDr7 & X86_DR7_ENABLED_MASK) + && X86_DR7_ANY_RW_IO(uDr7) + && (pMixedCtx->cr4 & X86_CR4_DE)) + || DBGFBpIsHwIoArmed(pVM))) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck); + + /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */ + VMMRZCallRing3Disable(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + bool fIsGuestDbgActive = CPUMR0DebugStateMaybeSaveGuest(pVCpu, true /*fDr6*/); + + VBOXSTRICTRC rcStrict2 = DBGFBpCheckIo(pVM, pVCpu, pMixedCtx, uIOPort, cbValue); + if (rcStrict2 == VINF_EM_RAW_GUEST_TRAP) + { + /* Raise #DB. */ + if (fIsGuestDbgActive) + ASMSetDR6(pMixedCtx->dr[6]); + if (pMixedCtx->dr[7] != uDr7) + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG); + + hmR0VmxSetPendingXcptDB(pVCpu, pMixedCtx); + } + /* rcStrict is VINF_SUCCESS or in [VINF_EM_FIRST..VINF_EM_LAST]. */ + else if ( rcStrict2 != VINF_SUCCESS + && (rcStrict == VINF_SUCCESS || rcStrict2 < rcStrict)) + rcStrict = rcStrict2; + + HM_RESTORE_PREEMPT_IF_NEEDED(); + VMMRZCallRing3Enable(pVCpu); + } + } + +#ifdef DEBUG + if (rcStrict == VINF_IOM_R3_IOPORT_READ) + Assert(!fIOWrite); + else if (rcStrict == VINF_IOM_R3_IOPORT_WRITE) + Assert(fIOWrite); + else + { + /** @todo r=bird: This is missing a bunch of VINF_EM_FIRST..VINF_EM_LAST + * statuses, that the VMM device and some others may return. See + * IOM_SUCCESS() for guidance. */ + AssertMsg( RT_FAILURE(rcStrict) + || rcStrict == VINF_SUCCESS + || rcStrict == VINF_EM_RAW_EMULATE_INSTR + || rcStrict == VINF_EM_DBG_BREAKPOINT + || rcStrict == VINF_EM_RAW_GUEST_TRAP + || rcStrict == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + } +#endif + + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitIO, y1); + return VBOXSTRICTRC_TODO(rcStrict); +} + + +/** + * VM-exit handler for task switches (VMX_EXIT_TASK_SWITCH). Unconditional + * VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitTaskSwitch(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* Check if this task-switch occurred while delivery an event through the guest IDT. */ + int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + if (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT) + { + rc = hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient); + AssertRCReturn(rc, rc); + if (VMX_IDT_VECTORING_INFO_VALID(pVmxTransient->uIdtVectoringInfo)) + { + uint32_t uIntType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo); + + /* Software interrupts and exceptions will be regenerated when the recompiler restarts the instruction. */ + if ( uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_INT + && uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT + && uIntType != VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT) + { + uint32_t uVector = VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo); + bool fErrorCodeValid = !!VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVmxTransient->uIdtVectoringInfo); + + /* Save it as a pending event and it'll be converted to a TRPM event on the way out to ring-3. */ + Assert(!pVCpu->hm.s.Event.fPending); + pVCpu->hm.s.Event.fPending = true; + pVCpu->hm.s.Event.u64IntInfo = pVmxTransient->uIdtVectoringInfo; + rc = hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient); + AssertRCReturn(rc, rc); + if (fErrorCodeValid) + pVCpu->hm.s.Event.u32ErrCode = pVmxTransient->uIdtVectoringErrorCode; + else + pVCpu->hm.s.Event.u32ErrCode = 0; + if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT + && uVector == X86_XCPT_PF) + { + pVCpu->hm.s.Event.GCPtrFaultAddress = pMixedCtx->cr2; + } + + Log4(("Pending event on TaskSwitch uIntType=%#x uVector=%#x\n", uIntType, uVector)); + } + } + } + + /** @todo Emulate task switch someday, currently just going back to ring-3 for + * emulation. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch); + return VERR_EM_INTERPRETER; +} + + +/** + * VM-exit handler for monitor-trap-flag (VMX_EXIT_MTF). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitMtf(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG); + pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG; + int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRCReturn(rc, rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMtf); + return VINF_EM_DBG_STEPPED; +} + + +/** + * VM-exit handler for APIC access (VMX_EXIT_APIC_ACCESS). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitApicAccess(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */ + int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient); + if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT)) + return VINF_SUCCESS; + else if (RT_UNLIKELY(rc == VINF_EM_RESET)) + return rc; + +#if 0 + /** @todo Investigate if IOMMMIOPhysHandler() requires a lot of state, for now + * just sync the whole thing. */ + rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); +#else + /* Aggressive state sync. for now. */ + rc = hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); +#endif + rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + + /* See Intel spec. 27-6 "Exit Qualifications for APIC-access VM-exits from Linear Accesses & Guest-Phyiscal Addresses" */ + uint32_t uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(pVmxTransient->uExitQualification); + switch (uAccessType) + { + case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE: + case VMX_APIC_ACCESS_TYPE_LINEAR_READ: + { + if ( (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW) + && VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification) == 0x80) + { + AssertMsgFailed(("hmR0VmxExitApicAccess: can't access TPR offset while using TPR shadowing.\n")); + } + + RTGCPHYS GCPhys = pMixedCtx->msrApicBase; /* Always up-to-date, msrApicBase is not part of the VMCS. */ + GCPhys &= PAGE_BASE_GC_MASK; + GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification); + PVM pVM = pVCpu->CTX_SUFF(pVM); + Log4(("ApicAccess uAccessType=%#x GCPhys=%#RGv Off=%#x\n", uAccessType, GCPhys, + VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification))); + + VBOXSTRICTRC rc2 = IOMMMIOPhysHandler(pVM, pVCpu, + (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW, + CPUMCTX2CORE(pMixedCtx), GCPhys); + rc = VBOXSTRICTRC_VAL(rc2); + Log4(("ApicAccess rc=%d\n", rc)); + if ( rc == VINF_SUCCESS + || rc == VERR_PAGE_TABLE_NOT_PRESENT + || rc == VERR_PAGE_NOT_PRESENT) + { + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP + | HM_CHANGED_GUEST_RSP + | HM_CHANGED_GUEST_RFLAGS + | HM_CHANGED_VMX_GUEST_APIC_STATE); + rc = VINF_SUCCESS; + } + break; + } + + default: + Log4(("ApicAccess uAccessType=%#x\n", uAccessType)); + rc = VINF_EM_RAW_EMULATE_INSTR; + break; + } + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitApicAccess); + return rc; +} + + +/** + * VM-exit handler for debug-register accesses (VMX_EXIT_MOV_DRX). Conditional + * VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitMovDRx(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* We should -not- get this VM-exit if the guest's debug registers were active. */ + if (pVmxTransient->fWasGuestDebugStateActive) + { + AssertMsgFailed(("Unexpected MOV DRx exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); + } + + int rc = VERR_INTERNAL_ERROR_5; + if ( !DBGFIsStepping(pVCpu) + && !pVCpu->hm.s.fSingleInstruction + && !pVmxTransient->fWasHyperDebugStateActive) + { + /* Don't intercept MOV DRx and #DB any more. */ + pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRCReturn(rc, rc); + + if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { +#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS + pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_DB); + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap); + AssertRCReturn(rc, rc); +#endif + } + + /* We're playing with the host CPU state here, make sure we can't preempt or longjmp. */ + VMMRZCallRing3Disable(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + /* Save the host & load the guest debug state, restart execution of the MOV DRx instruction. */ + PVM pVM = pVCpu->CTX_SUFF(pVM); + CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */); + Assert(CPUMIsGuestDebugStateActive(pVCpu) || HC_ARCH_BITS == 32); + + HM_RESTORE_PREEMPT_IF_NEEDED(); + VMMRZCallRing3Enable(pVCpu); + +#ifdef VBOX_WITH_STATISTICS + rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE) + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite); + else + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead); +#endif + STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch); + return VINF_SUCCESS; + } + + /* + * EMInterpretDRx[Write|Read]() calls CPUMIsGuestIn64BitCode() which requires EFER, CS. EFER is always up-to-date, see + * hmR0VmxSaveGuestAutoLoadStoreMsrs(). Update only the segment registers from the CPU. + */ + rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + Log4(("CS:RIP=%04x:%#RX64\n", pMixedCtx->cs.Sel, pMixedCtx->rip)); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE) + { + rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), + VMX_EXIT_QUALIFICATION_DRX_REGISTER(pVmxTransient->uExitQualification), + VMX_EXIT_QUALIFICATION_DRX_GENREG(pVmxTransient->uExitQualification)); + if (RT_SUCCESS(rc)) + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite); + } + else + { + rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), + VMX_EXIT_QUALIFICATION_DRX_GENREG(pVmxTransient->uExitQualification), + VMX_EXIT_QUALIFICATION_DRX_REGISTER(pVmxTransient->uExitQualification)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead); + } + + Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER); + if (RT_SUCCESS(rc)) + { + int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + AssertRCReturn(rc2, rc2); + } + return rc; +} + + +/** + * VM-exit handler for EPT misconfiguration (VMX_EXIT_EPT_MISCONFIG). + * Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitEptMisconfig(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging); + + /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */ + int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient); + if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT)) + return VINF_SUCCESS; + else if (RT_UNLIKELY(rc == VINF_EM_RESET)) + return rc; + + RTGCPHYS GCPhys = 0; + rc = VMXReadVmcs64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys); + +#if 0 + rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx); /** @todo Can we do better? */ +#else + /* Aggressive state sync. for now. */ + rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); +#endif + AssertRCReturn(rc, rc); + + /* + * If we succeed, resume guest execution. + * If we fail in interpreting the instruction because we couldn't get the guest physical address + * of the page containing the instruction via the guest's page tables (we would invalidate the guest page + * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this + * weird case. See @bugref{6043}. + */ + PVM pVM = pVCpu->CTX_SUFF(pVM); + VBOXSTRICTRC rc2 = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pMixedCtx), GCPhys, UINT32_MAX); + rc = VBOXSTRICTRC_VAL(rc2); + Log4(("EPT misconfig at %#RGv RIP=%#RX64 rc=%d\n", GCPhys, pMixedCtx->rip, rc)); + if ( rc == VINF_SUCCESS + || rc == VERR_PAGE_TABLE_NOT_PRESENT + || rc == VERR_PAGE_NOT_PRESENT) + { + /* Successfully handled MMIO operation. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP + | HM_CHANGED_GUEST_RSP + | HM_CHANGED_GUEST_RFLAGS + | HM_CHANGED_VMX_GUEST_APIC_STATE); + rc = VINF_SUCCESS; + } + return rc; +} + + +/** + * VM-exit handler for EPT violation (VMX_EXIT_EPT_VIOLATION). Conditional + * VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitEptViolation(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging); + + /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */ + int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient); + if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT)) + return VINF_SUCCESS; + else if (RT_UNLIKELY(rc == VINF_EM_RESET)) + return rc; + + RTGCPHYS GCPhys = 0; + rc = VMXReadVmcs64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys); + rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); +#if 0 + rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx); /** @todo Can we do better? */ +#else + /* Aggressive state sync. for now. */ + rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); +#endif + AssertRCReturn(rc, rc); + + /* Intel spec. Table 27-7 "Exit Qualifications for EPT violations". */ + AssertMsg(((pVmxTransient->uExitQualification >> 7) & 3) != 2, ("%#RX64", pVmxTransient->uExitQualification)); + + RTGCUINT uErrorCode = 0; + if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH) + uErrorCode |= X86_TRAP_PF_ID; + if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE) + uErrorCode |= X86_TRAP_PF_RW; + if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT) + uErrorCode |= X86_TRAP_PF_P; + + TRPMAssertXcptPF(pVCpu, GCPhys, uErrorCode); + + Log4(("EPT violation %#x at %#RX64 ErrorCode %#x CS:EIP=%04x:%#RX64\n", pVmxTransient->uExitQualification, GCPhys, + uErrorCode, pMixedCtx->cs.Sel, pMixedCtx->rip)); + + /* Handle the pagefault trap for the nested shadow table. */ + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, uErrorCode, CPUMCTX2CORE(pMixedCtx), GCPhys); + TRPMResetTrap(pVCpu); + + /* Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}. */ + if ( rc == VINF_SUCCESS + || rc == VERR_PAGE_TABLE_NOT_PRESENT + || rc == VERR_PAGE_NOT_PRESENT) + { + /* Successfully synced our nested page tables. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP + | HM_CHANGED_GUEST_RSP + | HM_CHANGED_GUEST_RFLAGS); + return VINF_SUCCESS; + } + + Log4(("EPT return to ring-3 rc=%d\n")); + return rc; +} + +/** @} */ + +/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-= */ +/* -=-=-=-=-=-=-=-=-=- VM-exit Exception Handlers -=-=-=-=-=-=-=-=-=-=- */ +/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-= */ + +/** @name VM-exit exception handlers. + * @{ + */ + +/** + * VM-exit exception handler for #MF (Math Fault: floating point exception). + */ +static int hmR0VmxExitXcptMF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF); + + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + if (!(pMixedCtx->cr0 & X86_CR0_NE)) + { + /* Old-style FPU error reporting needs some extra work. */ + /** @todo don't fall back to the recompiler, but do it manually. */ + return VERR_EM_INTERPRETER; + } + + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); + return rc; +} + + +/** + * VM-exit exception handler for #BP (Breakpoint exception). + */ +static int hmR0VmxExitXcptBP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP); + + /** @todo Try optimize this by not saving the entire guest state unless + * really needed. */ + int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + if (rc == VINF_EM_RAW_GUEST_TRAP) + { + rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); + } + + Assert(rc == VINF_SUCCESS || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_EM_DBG_BREAKPOINT); + return rc; +} + + +/** + * VM-exit exception handler for #DB (Debug exception). + */ +static int hmR0VmxExitXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB); + Log6(("XcptDB\n")); + + /* + * Get the DR6-like values from the exit qualification and pass it to DBGF + * for processing. + */ + int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + + /* Refer Intel spec. Table 27-1. "Exit Qualifications for debug exceptions" for the format. */ + uint64_t uDR6 = X86_DR6_INIT_VAL; + uDR6 |= ( pVmxTransient->uExitQualification + & (X86_DR6_B0 | X86_DR6_B1 | X86_DR6_B2 | X86_DR6_B3 | X86_DR6_BD | X86_DR6_BS)); + + rc = DBGFRZTrap01Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pMixedCtx), uDR6, pVCpu->hm.s.fSingleInstruction); + if (rc == VINF_EM_RAW_GUEST_TRAP) + { + /* + * The exception was for the guest. Update DR6, DR7.GD and + * IA32_DEBUGCTL.LBR before forwarding it. + * (See Intel spec. 27.1 "Architectural State before a VM-Exit".) + */ + VMMRZCallRing3Disable(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + pMixedCtx->dr[6] &= ~X86_DR6_B_MASK; + pMixedCtx->dr[6] |= uDR6; + if (CPUMIsGuestDebugStateActive(pVCpu)) + ASMSetDR6(pMixedCtx->dr[6]); + + HM_RESTORE_PREEMPT_IF_NEEDED(); + VMMRZCallRing3Enable(pVCpu); + + rc = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */ + pMixedCtx->dr[7] &= ~X86_DR7_GD; + + /* Paranoia. */ + pMixedCtx->dr[7] &= ~X86_DR7_RAZ_MASK; + pMixedCtx->dr[7] |= X86_DR7_RA1_MASK; + + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, (uint32_t)pMixedCtx->dr[7]); + AssertRCReturn(rc, rc); + + /* + * Raise #DB in the guest. + * + * It is important to reflect what the VM-exit gave us (preserving the interruption-type) rather than use + * hmR0VmxSetPendingXcptDB() as the #DB could've been raised while executing ICEBP and not the 'normal' #DB. + * Thus it -may- trigger different handling in the CPU (like skipped DPL checks). See @bugref{6398}. + * + * Since ICEBP isn't documented on Intel, see AMD spec. 15.20 "Event Injection". + */ + rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); + return VINF_SUCCESS; + } + + /* + * Not a guest trap, must be a hypervisor related debug event then. + * Update DR6 in case someone is interested in it. + */ + AssertMsg(rc == VINF_EM_DBG_STEPPED || rc == VINF_EM_DBG_BREAKPOINT, ("%Rrc\n", rc)); + AssertReturn(pVmxTransient->fWasHyperDebugStateActive, VERR_HM_IPE_5); + CPUMSetHyperDR6(pVCpu, uDR6); + + return rc; +} + + +/** + * VM-exit exception handler for #NM (Device-not-available exception: floating + * point exception). + */ +static int hmR0VmxExitXcptNM(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(); + + /* We require CR0 and EFER. EFER is always up-to-date. */ + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + /* We're playing with the host CPU state here, have to disable preemption or longjmp. */ + VMMRZCallRing3Disable(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + /* If the guest FPU was active at the time of the #NM exit, then it's a guest fault. */ + if (pVmxTransient->fWasGuestFPUStateActive) + { + rc = VINF_EM_RAW_GUEST_TRAP; + Assert(CPUMIsGuestFPUStateActive(pVCpu) || HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0)); + } + else + { +#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS + Assert(!pVmxTransient->fWasGuestFPUStateActive); +#endif + rc = CPUMR0Trap07Handler(pVCpu->CTX_SUFF(pVM), pVCpu, pMixedCtx); + Assert(rc == VINF_EM_RAW_GUEST_TRAP || (rc == VINF_SUCCESS && CPUMIsGuestFPUStateActive(pVCpu))); + } + + HM_RESTORE_PREEMPT_IF_NEEDED(); + VMMRZCallRing3Enable(pVCpu); + + if (rc == VINF_SUCCESS) + { + /* Guest FPU state was activated, we'll want to change CR0 FPU intercepts before the next VM-reentry. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowNM); + } + else + { + /* Forward #NM to the guest. */ + Assert(rc == VINF_EM_RAW_GUEST_TRAP); + rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + pVmxTransient->cbInstr, 0 /* error code */, 0 /* GCPtrFaultAddress */); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNM); + } + + return VINF_SUCCESS; +} + + +/** + * VM-exit exception handler for #GP (General-protection exception). + * + * @remarks Requires pVmxTransient->uExitIntInfo to be up-to-date. + */ +static int hmR0VmxExitXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP); + + int rc = VERR_INTERNAL_ERROR_5; + if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { +#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS + /* If the guest is not in real-mode or we have unrestricted execution support, reflect #GP to the guest. */ + rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + Log4(("#GP Gst: RIP %#RX64 ErrorCode=%#x CR0=%#RX64 CPL=%u\n", pMixedCtx->rip, pVmxTransient->uExitIntErrorCode, + pMixedCtx->cr0, CPUMGetGuestCPL(pVCpu))); + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); + return rc; +#else + /* We don't intercept #GP. */ + AssertMsgFailed(("Unexpected VM-exit caused by #GP exception\n")); + return VERR_VMX_UNEXPECTED_EXCEPTION; +#endif + } + + Assert(CPUMIsGuestInRealModeEx(pMixedCtx)); + Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest); + + /* EMInterpretDisasCurrent() requires a lot of the state, save the entire state. */ + rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + PDISCPUSTATE pDis = &pVCpu->hm.s.DisState; + uint32_t cbOp = 0; + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp); + if (RT_SUCCESS(rc)) + { + rc = VINF_SUCCESS; + Assert(cbOp == pDis->cbInstr); + Log4(("#GP Disas OpCode=%u CS:EIP %04x:%#RX64\n", pDis->pCurInstr->uOpcode, pMixedCtx->cs.Sel, pMixedCtx->rip)); + switch (pDis->pCurInstr->uOpcode) + { + case OP_CLI: + { + pMixedCtx->eflags.Bits.u1IF = 0; + pMixedCtx->rip += pDis->cbInstr; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS); + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCli); + break; + } + + case OP_STI: + { + pMixedCtx->eflags.Bits.u1IF = 1; + pMixedCtx->rip += pDis->cbInstr; + EMSetInhibitInterruptsPC(pVCpu, pMixedCtx->rip); + Assert(VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS); + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitSti); + break; + } + + case OP_HLT: + { + rc = VINF_EM_HALT; + pMixedCtx->rip += pDis->cbInstr; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt); + break; + } + + case OP_POPF: + { + Log4(("POPF CS:RIP %04x:%#RX64\n", pMixedCtx->cs.Sel, pMixedCtx->rip)); + uint32_t cbParm; + uint32_t uMask; + bool fStepping = RT_BOOL(pMixedCtx->eflags.Bits.u1TF); + if (pDis->fPrefix & DISPREFIX_OPSIZE) + { + cbParm = 4; + uMask = 0xffffffff; + } + else + { + cbParm = 2; + uMask = 0xffff; + } + + /* Get the stack pointer & pop the contents of the stack onto Eflags. */ + RTGCPTR GCPtrStack = 0; + X86EFLAGS Eflags; + rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), pMixedCtx->esp & uMask, SELMTOFLAT_FLAGS_CPL0, + &GCPtrStack); + if (RT_SUCCESS(rc)) + { + Assert(sizeof(Eflags.u32) >= cbParm); + Eflags.u32 = 0; + rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &Eflags.u32, cbParm); + } + if (RT_FAILURE(rc)) + { + rc = VERR_EM_INTERPRETER; + break; + } + Log4(("POPF %#x -> %#RX64 mask=%#x RIP=%#RX64\n", Eflags.u, pMixedCtx->rsp, uMask, pMixedCtx->rip)); + pMixedCtx->eflags.u32 = (pMixedCtx->eflags.u32 & ~(X86_EFL_POPF_BITS & uMask)) + | (Eflags.u32 & X86_EFL_POPF_BITS & uMask); + pMixedCtx->eflags.Bits.u1RF = 0; /* The RF bit is always cleared by POPF; see Intel Instruction reference. */ + pMixedCtx->esp += cbParm; + pMixedCtx->esp &= uMask; + pMixedCtx->rip += pDis->cbInstr; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP + | HM_CHANGED_GUEST_RSP + | HM_CHANGED_GUEST_RFLAGS); + /* Generate a pending-debug exception when stepping over POPF regardless of how POPF modifies EFLAGS.TF. */ + if (fStepping) + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPopf); + break; + } + + case OP_PUSHF: + { + uint32_t cbParm; + uint32_t uMask; + if (pDis->fPrefix & DISPREFIX_OPSIZE) + { + cbParm = 4; + uMask = 0xffffffff; + } + else + { + cbParm = 2; + uMask = 0xffff; + } + + /* Get the stack pointer & push the contents of eflags onto the stack. */ + RTGCPTR GCPtrStack = 0; + rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), (pMixedCtx->esp - cbParm) & uMask, + SELMTOFLAT_FLAGS_CPL0, &GCPtrStack); + if (RT_FAILURE(rc)) + { + rc = VERR_EM_INTERPRETER; + break; + } + X86EFLAGS Eflags = pMixedCtx->eflags; + /* The RF & VM bits are cleared on image stored on stack; see Intel Instruction reference for PUSHF. */ + Eflags.Bits.u1RF = 0; + Eflags.Bits.u1VM = 0; + + rc = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &Eflags.u, cbParm); + if (RT_FAILURE(rc)) + { + rc = VERR_EM_INTERPRETER; + break; + } + Log4(("PUSHF %#x -> %#RGv\n", Eflags.u, GCPtrStack)); + pMixedCtx->esp -= cbParm; + pMixedCtx->esp &= uMask; + pMixedCtx->rip += pDis->cbInstr; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP); + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPushf); + break; + } + + case OP_IRET: + { + /** @todo Handle 32-bit operand sizes and check stack limits. See Intel + * instruction reference. */ + RTGCPTR GCPtrStack = 0; + uint32_t uMask = 0xffff; + bool fStepping = RT_BOOL(pMixedCtx->eflags.Bits.u1TF); + uint16_t aIretFrame[3]; + if (pDis->fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE)) + { + rc = VERR_EM_INTERPRETER; + break; + } + rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), pMixedCtx->esp & uMask, SELMTOFLAT_FLAGS_CPL0, + &GCPtrStack); + if (RT_SUCCESS(rc)) + rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame)); + if (RT_FAILURE(rc)) + { + rc = VERR_EM_INTERPRETER; + break; + } + pMixedCtx->eip = 0; + pMixedCtx->ip = aIretFrame[0]; + pMixedCtx->cs.Sel = aIretFrame[1]; + pMixedCtx->cs.ValidSel = aIretFrame[1]; + pMixedCtx->cs.u64Base = (uint64_t)pMixedCtx->cs.Sel << 4; + pMixedCtx->eflags.u32 = (pMixedCtx->eflags.u32 & ~(X86_EFL_POPF_BITS & uMask)) + | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask); + pMixedCtx->sp += sizeof(aIretFrame); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP + | HM_CHANGED_GUEST_SEGMENT_REGS + | HM_CHANGED_GUEST_RSP + | HM_CHANGED_GUEST_RFLAGS); + /* Generate a pending-debug exception when stepping over IRET regardless of how IRET modifies EFLAGS.TF. */ + if (fStepping) + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + Log4(("IRET %#RX32 to %04x:%x\n", GCPtrStack, pMixedCtx->cs.Sel, pMixedCtx->ip)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIret); + break; + } + + case OP_INT: + { + uint16_t uVector = pDis->Param1.uValue & 0xff; + hmR0VmxSetPendingIntN(pVCpu, pMixedCtx, uVector, pDis->cbInstr); + /* INT clears EFLAGS.TF, we mustn't set any pending debug exceptions here. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt); + break; + } + + case OP_INTO: + { + if (pMixedCtx->eflags.Bits.u1OF) + { + hmR0VmxSetPendingXcptOF(pVCpu, pMixedCtx, pDis->cbInstr); + /* INTO clears EFLAGS.TF, we mustn't set any pending debug exceptions here. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt); + } + break; + } + + default: + { + VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pMixedCtx), 0 /* pvFault */, + EMCODETYPE_SUPERVISOR); + rc = VBOXSTRICTRC_VAL(rc2); + HMCPU_CF_SET(pVCpu, HM_CHANGED_ALL_GUEST); + /** @todo We have to set pending-debug exceptions here when the guest is + * single-stepping depending on the instruction that was interpreted. */ + Log4(("#GP rc=%Rrc\n", rc)); + break; + } + } + } + else + rc = VERR_EM_INTERPRETER; + + AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, + ("#GP Unexpected rc=%Rrc\n", rc)); + return rc; +} + + +/** + * VM-exit exception handler wrapper for generic exceptions. Simply re-injects + * the exception reported in the VMX transient structure back into the VM. + * + * @remarks Requires uExitIntInfo in the VMX transient structure to be + * up-to-date. + */ +static int hmR0VmxExitXcptGeneric(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(); + + /* Re-inject the exception into the guest. This cannot be a double-fault condition which would have been handled in + hmR0VmxCheckExitDueToEventDelivery(). */ + int rc = hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + Assert(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO); + + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); + return VINF_SUCCESS; +} + + +/** + * VM-exit exception handler for #PF (Page-fault exception). + */ +static int hmR0VmxExitXcptPF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(); + PVM pVM = pVCpu->CTX_SUFF(pVM); + int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + +#if defined(HMVMX_ALWAYS_TRAP_ALL_XCPTS) || defined(HMVMX_ALWAYS_TRAP_PF) + if (pVM->hm.s.fNestedPaging) + { + pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory or vectoring #PF. */ + if (RT_LIKELY(!pVmxTransient->fVectoringPF)) + { + pMixedCtx->cr2 = pVmxTransient->uExitQualification; /* Update here in case we go back to ring-3 before injection. */ + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + 0 /* cbInstr */, pVmxTransient->uExitIntErrorCode, pVmxTransient->uExitQualification); + } + else + { + /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */ + hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx); + Log4(("Pending #DF due to vectoring #PF. NP\n")); + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF); + return rc; + } +#else + Assert(!pVM->hm.s.fNestedPaging); +#endif + + rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + Log4(("#PF: cr2=%#RX64 cs:rip=%#04x:%#RX64 uErrCode %#RX32 cr3=%#RX64\n", pVmxTransient->uExitQualification, + pMixedCtx->cs.Sel, pMixedCtx->rip, pVmxTransient->uExitIntErrorCode, pMixedCtx->cr3)); + + TRPMAssertXcptPF(pVCpu, pVmxTransient->uExitQualification, (RTGCUINT)pVmxTransient->uExitIntErrorCode); + rc = PGMTrap0eHandler(pVCpu, pVmxTransient->uExitIntErrorCode, CPUMCTX2CORE(pMixedCtx), + (RTGCPTR)pVmxTransient->uExitQualification); + + Log4(("#PF: rc=%Rrc\n", rc)); + if (rc == VINF_SUCCESS) + { + /* Successfully synced shadow pages tables or emulated an MMIO instruction. */ + /** @todo this isn't quite right, what if guest does lgdt with some MMIO + * memory? We don't update the whole state here... */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP + | HM_CHANGED_GUEST_RSP + | HM_CHANGED_GUEST_RFLAGS + | HM_CHANGED_VMX_GUEST_APIC_STATE); + TRPMResetTrap(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF); + return rc; + } + else if (rc == VINF_EM_RAW_GUEST_TRAP) + { + if (!pVmxTransient->fVectoringPF) + { + /* It's a guest page fault and needs to be reflected to the guest. */ + uint32_t uGstErrorCode = TRPMGetErrorCode(pVCpu); + TRPMResetTrap(pVCpu); + pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory #PF. */ + pMixedCtx->cr2 = pVmxTransient->uExitQualification; /* Update here in case we go back to ring-3 before injection. */ + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + 0 /* cbInstr */, uGstErrorCode, pVmxTransient->uExitQualification); + } + else + { + /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */ + TRPMResetTrap(pVCpu); + pVCpu->hm.s.Event.fPending = false; /* Clear pending #PF to replace it with #DF. */ + hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx); + Log4(("#PF: Pending #DF due to vectoring #PF\n")); + } + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF); + return VINF_SUCCESS; + } + + TRPMResetTrap(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM); + return rc; +} + +/** @} */ + diff --git a/src/VBox/VMM/VMMR0/HMVMXR0.h b/src/VBox/VMM/VMMR0/HMVMXR0.h new file mode 100644 index 00000000..46248ccd --- /dev/null +++ b/src/VBox/VMM/VMMR0/HMVMXR0.h @@ -0,0 +1,91 @@ +/* $Id: HMVMXR0.h $ */ +/** @file + * HM VMX (VT-x) - Internal header file. + */ + +/* + * Copyright (C) 2006-2013 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef ___HMVMXR0_h +#define ___HMVMXR0_h + +RT_C_DECLS_BEGIN + +/** @defgroup grp_vmx_int Internal + * @ingroup grp_vmx + * @internal + * @{ + */ + +#ifdef IN_RING0 + +VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu); +VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit); +VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBALCPUINFO pCpu, PVM pVM, void *pvPageCpu, RTHCPHYS pPageCpuPhys, bool fEnabledBySystem, + void *pvMsrs); +VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBALCPUINFO pCpu, void *pvPageCpu, RTHCPHYS pPageCpuPhys); +VMMR0DECL(int) VMXR0GlobalInit(void); +VMMR0DECL(void) VMXR0GlobalTerm(void); +VMMR0DECL(int) VMXR0InitVM(PVM pVM); +VMMR0DECL(int) VMXR0TermVM(PVM pVM); +VMMR0DECL(int) VMXR0SetupVM(PVM pVM); +VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu); +VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx); +DECLASM(int) VMXR0StartVM32(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu); +DECLASM(int) VMXR0StartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu); + + +# if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) +DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu); +VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, HM64ON32OP enmOp, uint32_t cbParam, + uint32_t *paParam); +# endif + +/* Cached VMCS accesses -- defined only for 32-bit hosts (with 64-bit guest support). */ +# ifdef VMX_USE_CACHED_VMCS_ACCESSES +VMMR0DECL(int) VMXWriteCachedVmcsEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val); + +DECLINLINE(int) VMXReadCachedVmcsEx(PVMCPU pVCpu, uint32_t idxCache, RTGCUINTREG *pVal) +{ + Assert(idxCache <= VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX); + *pVal = pVCpu->hm.s.vmx.VMCSCache.Read.aFieldVal[idxCache]; + return VINF_SUCCESS; +} +# endif + +# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL +/* Don't use fAllow64BitGuests for VMXReadVmcsGstN() even though it looks right, as it can be forced to 'true'. + HMVMX_IS_64BIT_HOST_MODE() is what we need. */ +# define VMXReadVmcsHstN(idxField, p64Val) HMVMX_IS_64BIT_HOST_MODE() ? \ + VMXReadVmcs64(idxField, p64Val) \ + : (*(p64Val) &= UINT64_C(0xffffffff), \ + VMXReadVmcs32(idxField, (uint32_t *)(p64Val))) +# define VMXReadVmcsGstN VMXReadVmcsHstN +# define VMXReadVmcsGstNByIdxVal VMXReadVmcsGstN +# elif HC_ARCH_BITS == 32 +# define VMXReadVmcsHstN VMXReadVmcs32 +# define VMXReadVmcsGstN(idxField, pVal) VMXReadCachedVmcsEx(pVCpu, idxField##_CACHE_IDX, pVal) +# define VMXReadVmcsGstNByIdxVal(idxField, pVal) VMXReadCachedVmcsEx(pVCpu, idxField, pVal) +# else /* HC_ARCH_BITS == 64 */ +# define VMXReadVmcsHstN VMXReadVmcs64 +# define VMXReadVmcsGstN VMXReadVmcs64 +# define VMXReadVmcsGstNByIdxVal VMXReadVmcs64 +# endif + +#endif /* IN_RING0 */ + +/** @} */ + +RT_C_DECLS_END + +#endif /* ___HMVMXR0_h */ + diff --git a/src/VBox/VMM/VMMR0/HWACCMR0Mixed.mac b/src/VBox/VMM/VMMR0/HWACCMR0Mixed.mac deleted file mode 100644 index 59350142..00000000 --- a/src/VBox/VMM/VMMR0/HWACCMR0Mixed.mac +++ /dev/null @@ -1,901 +0,0 @@ -; $Id: HWACCMR0Mixed.mac $ -;; @file -; HWACCMR0Mixed.mac - Stuff that darwin needs to build two versions of. -; -; Included by HWACCMR0A.asm with RT_ARCH_AMD64 defined or or undefined. -; - -; -; Copyright (C) 2006-2012 Oracle Corporation -; -; This file is part of VirtualBox Open Source Edition (OSE), as -; available from http://www.virtualbox.org. This file is free software; -; you can redistribute it and/or modify it under the terms of the GNU -; General Public License (GPL) as published by the Free Software -; Foundation, in version 2 as it comes in the "COPYING" file of the -; VirtualBox OSE distribution. VirtualBox OSE is distributed in the -; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. -; - - -;/** -; * Prepares for and executes VMLAUNCH/VMRESUME (32 bits guest mode) -; * -; * @returns VBox status code -; * @param fResume x86:[ebp+8], msc:rcx,gcc:rdi vmlauch/vmresume -; * @param pCtx x86:[ebp+c], msc:rdx,gcc:rsi Guest context -; * @param pCache x86:[esp+10],msc:r8, gcc:rdx VMCS cache -; */ -ALIGNCODE(16) -BEGINPROC MY_NAME(VMXR0StartVM32) - push xBP - mov xBP, xSP - - pushf - cli - - ;/* Save all general purpose host registers. */ - MYPUSHAD - - ;/* First we have to save some final CPU context registers. */ - mov eax, VMX_VMCS_HOST_RIP -%ifdef RT_ARCH_AMD64 - lea r10, [.vmlaunch_done wrt rip] - vmwrite rax, r10 -%else - mov ecx, .vmlaunch_done - vmwrite eax, ecx -%endif - ;/* Note: assumes success... */ - - ;/* Manual save and restore: - ; * - General purpose registers except RIP, RSP - ; * - ; * Trashed: - ; * - CR2 (we don't care) - ; * - LDTR (reset to 0) - ; * - DRx (presumably not changed at all) - ; * - DR7 (reset to 0x400) - ; * - EFLAGS (reset to RT_BIT(1); not relevant) - ; * - ; */ - - ;/* Save the Guest CPU context pointer. */ -%ifdef RT_ARCH_AMD64 - %ifdef ASM_CALL64_GCC - ; fResume already in rdi - ; pCtx already in rsi - mov rbx, rdx ; pCache - %else - mov rdi, rcx ; fResume - mov rsi, rdx ; pCtx - mov rbx, r8 ; pCache - %endif -%else - mov edi, [ebp + 8] ; fResume - mov esi, [ebp + 12] ; pCtx - mov ebx, [ebp + 16] ; pCache -%endif - - ;/* Save segment registers */ - ; Note: MYPUSHSEGS trashes rdx & rcx, so we moved it here (msvc amd64 case) - MYPUSHSEGS xAX, ax - -%ifdef VMX_USE_CACHED_VMCS_ACCESSES - mov ecx, [xBX + VMCSCACHE.Write.cValidEntries] - cmp ecx, 0 - je .no_cached_writes - mov edx, ecx - mov ecx, 0 - jmp .cached_write - -ALIGN(16) -.cached_write: - mov eax, [xBX + VMCSCACHE.Write.aField + xCX*4] - vmwrite xAX, [xBX + VMCSCACHE.Write.aFieldVal + xCX*8] - inc xCX - cmp xCX, xDX - jl .cached_write - - mov dword [xBX + VMCSCACHE.Write.cValidEntries], 0 -.no_cached_writes: - - ; Save the pCache pointer - push xBX -%endif - - ; Save the pCtx pointer - push xSI - - ; Save LDTR - xor eax, eax - sldt ax - push xAX - - ; The TR limit is reset to 0x67; restore it manually - str eax - push xAX - - ; VMX only saves the base of the GDTR & IDTR and resets the limit to 0xffff; we must restore the limit correctly! - sub xSP, xS*2 - sgdt [xSP] - - sub xSP, xS*2 - sidt [xSP] - -%ifdef VBOX_WITH_DR6_EXPERIMENT - ; Restore DR6 - experiment, not safe! - mov xBX, [xSI + CPUMCTX.dr6] - mov dr6, xBX -%endif - - ; Restore CR2 - mov ebx, [xSI + CPUMCTX.cr2] - mov cr2, xBX - - mov eax, VMX_VMCS_HOST_RSP - vmwrite xAX, xSP - ;/* Note: assumes success... */ - ;/* Don't mess with ESP anymore!! */ - - ;/* Restore Guest's general purpose registers. */ - mov eax, [xSI + CPUMCTX.eax] - mov ebx, [xSI + CPUMCTX.ebx] - mov ecx, [xSI + CPUMCTX.ecx] - mov edx, [xSI + CPUMCTX.edx] - mov ebp, [xSI + CPUMCTX.ebp] - - ; resume or start? - cmp xDI, 0 ; fResume - je .vmlauch_lauch - - ;/* Restore edi & esi. */ - mov edi, [xSI + CPUMCTX.edi] - mov esi, [xSI + CPUMCTX.esi] - - vmresume - jmp .vmlaunch_done; ;/* here if vmresume detected a failure. */ - -.vmlauch_lauch: - ;/* Restore edi & esi. */ - mov edi, [xSI + CPUMCTX.edi] - mov esi, [xSI + CPUMCTX.esi] - - vmlaunch - jmp .vmlaunch_done; ;/* here if vmlaunch detected a failure. */ - -ALIGNCODE(16) ;; @todo YASM BUG - this alignment is wrong on darwin, it's 1 byte off. -.vmlaunch_done: - jc near .vmxstart_invalid_vmxon_ptr - jz near .vmxstart_start_failed - - ; Restore base and limit of the IDTR & GDTR - lidt [xSP] - add xSP, xS*2 - lgdt [xSP] - add xSP, xS*2 - - push xDI - mov xDI, [xSP + xS * 3] ; pCtx (*3 to skip the saved LDTR + TR) - - mov [ss:xDI + CPUMCTX.eax], eax - mov [ss:xDI + CPUMCTX.ebx], ebx - mov [ss:xDI + CPUMCTX.ecx], ecx - mov [ss:xDI + CPUMCTX.edx], edx - mov [ss:xDI + CPUMCTX.esi], esi - mov [ss:xDI + CPUMCTX.ebp], ebp -%ifdef RT_ARCH_AMD64 - pop xAX ; the guest edi we pushed above - mov dword [ss:xDI + CPUMCTX.edi], eax -%else - pop dword [ss:xDI + CPUMCTX.edi] ; the guest edi we pushed above -%endif - -%ifdef VBOX_WITH_DR6_EXPERIMENT - ; Save DR6 - experiment, not safe! - mov xAX, dr6 - mov [ss:xDI + CPUMCTX.dr6], xAX -%endif - - ; Restore TSS selector; must mark it as not busy before using ltr (!) - ; ASSUME that this is supposed to be 'BUSY'. (saves 20-30 ticks on the T42p) - ; @todo get rid of sgdt - pop xBX ; saved TR - sub xSP, xS*2 - sgdt [xSP] - mov xAX, xBX - and al, 0F8h ; mask away TI and RPL bits, get descriptor offset. - add xAX, [xSP + 2] ; eax <- GDTR.address + descriptor offset. - and dword [ss:xAX + 4], ~0200h ; clear busy flag (2nd type2 bit) - ltr bx - add xSP, xS*2 - - pop xAX ; saved LDTR - lldt ax - - add xSP, xS ; pCtx - -%ifdef VMX_USE_CACHED_VMCS_ACCESSES - pop xDX ; saved pCache - - mov ecx, [ss:xDX + VMCSCACHE.Read.cValidEntries] - cmp ecx, 0 ; can't happen - je .no_cached_reads - jmp .cached_read - -ALIGN(16) -.cached_read: - dec xCX - mov eax, [ss:xDX + VMCSCACHE.Read.aField + xCX*4] - vmread [ss:xDX + VMCSCACHE.Read.aFieldVal + xCX*8], xAX - cmp xCX, 0 - jnz .cached_read -.no_cached_reads: - - ; Save CR2 for EPT - mov xAX, cr2 - mov [ss:xDX + VMCSCACHE.cr2], xAX -%endif - - ; Restore segment registers - MYPOPSEGS xAX, ax - - ; Restore general purpose registers - MYPOPAD - - mov eax, VINF_SUCCESS - -.vmstart_end: - popf - pop xBP - ret - - -.vmxstart_invalid_vmxon_ptr: - ; Restore base and limit of the IDTR & GDTR - lidt [xSP] - add xSP, xS*2 - lgdt [xSP] - add xSP, xS*2 - - ; Restore TSS selector; must mark it as not busy before using ltr (!) - ; ASSUME that this is supposed to be 'BUSY'. (saves 20-30 ticks on the T42p) - ; @todo get rid of sgdt - pop xBX ; saved TR - sub xSP, xS*2 - sgdt [xSP] - mov xAX, xBX - and al, 0F8h ; mask away TI and RPL bits, get descriptor offset. - add xAX, [xSP + 2] ; eax <- GDTR.address + descriptor offset. - and dword [ss:xAX + 4], ~0200h ; clear busy flag (2nd type2 bit) - ltr bx - add xSP, xS*2 - - pop xAX ; saved LDTR - lldt ax - -%ifdef VMX_USE_CACHED_VMCS_ACCESSES - add xSP, xS*2 ; pCtx + pCache -%else - add xSP, xS ; pCtx -%endif - - ; Restore segment registers - MYPOPSEGS xAX, ax - - ; Restore all general purpose host registers. - MYPOPAD - mov eax, VERR_VMX_INVALID_VMXON_PTR - jmp .vmstart_end - -.vmxstart_start_failed: - ; Restore base and limit of the IDTR & GDTR - lidt [xSP] - add xSP, xS*2 - lgdt [xSP] - add xSP, xS*2 - - ; Restore TSS selector; must mark it as not busy before using ltr (!) - ; ASSUME that this is supposed to be 'BUSY'. (saves 20-30 ticks on the T42p) - ; @todo get rid of sgdt - pop xBX ; saved TR - sub xSP, xS*2 - sgdt [xSP] - mov xAX, xBX - and al, 0F8h ; mask away TI and RPL bits, get descriptor offset. - add xAX, [xSP + 2] ; eax <- GDTR.address + descriptor offset. - and dword [ss:xAX + 4], ~0200h ; clear busy flag (2nd type2 bit) - ltr bx - add xSP, xS*2 - - pop xAX ; saved LDTR - lldt ax - -%ifdef VMX_USE_CACHED_VMCS_ACCESSES - add xSP, xS*2 ; pCtx + pCache -%else - add xSP, xS ; pCtx -%endif - - ; Restore segment registers - MYPOPSEGS xAX, ax - - ; Restore all general purpose host registers. - MYPOPAD - mov eax, VERR_VMX_UNABLE_TO_START_VM - jmp .vmstart_end - -ENDPROC MY_NAME(VMXR0StartVM32) - -%ifdef RT_ARCH_AMD64 -;/** -; * Prepares for and executes VMLAUNCH/VMRESUME (64 bits guest mode) -; * -; * @returns VBox status code -; * @param fResume msc:rcx, gcc:rdi vmlauch/vmresume -; * @param pCtx msc:rdx, gcc:rsi Guest context -; * @param pCache msc:r8, gcc:rdx VMCS cache -; */ -ALIGNCODE(16) -BEGINPROC MY_NAME(VMXR0StartVM64) - push xBP - mov xBP, xSP - - pushf - cli - - ;/* Save all general purpose host registers. */ - MYPUSHAD - - ;/* First we have to save some final CPU context registers. */ - lea r10, [.vmlaunch64_done wrt rip] - mov rax, VMX_VMCS_HOST_RIP ;/* return address (too difficult to continue after VMLAUNCH?) */ - vmwrite rax, r10 - ;/* Note: assumes success... */ - - ;/* Manual save and restore: - ; * - General purpose registers except RIP, RSP - ; * - ; * Trashed: - ; * - CR2 (we don't care) - ; * - LDTR (reset to 0) - ; * - DRx (presumably not changed at all) - ; * - DR7 (reset to 0x400) - ; * - EFLAGS (reset to RT_BIT(1); not relevant) - ; * - ; */ - - ;/* Save the Guest CPU context pointer. */ -%ifdef ASM_CALL64_GCC - ; fResume already in rdi - ; pCtx already in rsi - mov rbx, rdx ; pCache -%else - mov rdi, rcx ; fResume - mov rsi, rdx ; pCtx - mov rbx, r8 ; pCache -%endif - - ;/* Save segment registers */ - ; Note: MYPUSHSEGS trashes rdx & rcx, so we moved it here (msvc amd64 case) - MYPUSHSEGS xAX, ax - -%ifdef VMX_USE_CACHED_VMCS_ACCESSES - mov ecx, [xBX + VMCSCACHE.Write.cValidEntries] - cmp ecx, 0 - je .no_cached_writes - mov edx, ecx - mov ecx, 0 - jmp .cached_write - -ALIGN(16) -.cached_write: - mov eax, [xBX + VMCSCACHE.Write.aField + xCX*4] - vmwrite xAX, [xBX + VMCSCACHE.Write.aFieldVal + xCX*8] - inc xCX - cmp xCX, xDX - jl .cached_write - - mov dword [xBX + VMCSCACHE.Write.cValidEntries], 0 -.no_cached_writes: - - ; Save the pCache pointer - push xBX -%endif - -%ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE - ; Save the host MSRs and load the guest MSRs - LOADGUESTMSR MSR_K8_LSTAR, CPUMCTX.msrLSTAR - LOADGUESTMSR MSR_K6_STAR, CPUMCTX.msrSTAR - LOADGUESTMSR MSR_K8_SF_MASK, CPUMCTX.msrSFMASK -%endif - ; Kernel GS Base is special, we need to manually load/store it, see @bugref{6208} - LOADGUESTMSR MSR_K8_KERNEL_GS_BASE, CPUMCTX.msrKERNELGSBASE - - ; Save the pCtx pointer - push xSI - - ; Save LDTR - xor eax, eax - sldt ax - push xAX - - ; The TR limit is reset to 0x67; restore it manually - str eax - push xAX - - ; VMX only saves the base of the GDTR & IDTR and resets the limit to 0xffff; we must restore the limit correctly! - sub xSP, xS*2 - sgdt [xSP] - - sub xSP, xS*2 - sidt [xSP] - -%ifdef VBOX_WITH_DR6_EXPERIMENT - ; Restore DR6 - experiment, not safe! - mov xBX, [xSI + CPUMCTX.dr6] - mov dr6, xBX -%endif - - ; Restore CR2 - mov rbx, qword [xSI + CPUMCTX.cr2] - mov cr2, rbx - - mov eax, VMX_VMCS_HOST_RSP - vmwrite xAX, xSP - ;/* Note: assumes success... */ - ;/* Don't mess with ESP anymore!! */ - - ;/* Restore Guest's general purpose registers. */ - mov rax, qword [xSI + CPUMCTX.eax] - mov rbx, qword [xSI + CPUMCTX.ebx] - mov rcx, qword [xSI + CPUMCTX.ecx] - mov rdx, qword [xSI + CPUMCTX.edx] - mov rbp, qword [xSI + CPUMCTX.ebp] - mov r8, qword [xSI + CPUMCTX.r8] - mov r9, qword [xSI + CPUMCTX.r9] - mov r10, qword [xSI + CPUMCTX.r10] - mov r11, qword [xSI + CPUMCTX.r11] - mov r12, qword [xSI + CPUMCTX.r12] - mov r13, qword [xSI + CPUMCTX.r13] - mov r14, qword [xSI + CPUMCTX.r14] - mov r15, qword [xSI + CPUMCTX.r15] - - ; resume or start? - cmp xDI, 0 ; fResume - je .vmlauch64_lauch - - ;/* Restore edi & esi. */ - mov rdi, qword [xSI + CPUMCTX.edi] - mov rsi, qword [xSI + CPUMCTX.esi] - - vmresume - jmp .vmlaunch64_done; ;/* here if vmresume detected a failure. */ - -.vmlauch64_lauch: - ;/* Restore rdi & rsi. */ - mov rdi, qword [xSI + CPUMCTX.edi] - mov rsi, qword [xSI + CPUMCTX.esi] - - vmlaunch - jmp .vmlaunch64_done; ;/* here if vmlaunch detected a failure. */ - -ALIGNCODE(16) -.vmlaunch64_done: - jc near .vmxstart64_invalid_vmxon_ptr - jz near .vmxstart64_start_failed - - ; Restore base and limit of the IDTR & GDTR - lidt [xSP] - add xSP, xS*2 - lgdt [xSP] - add xSP, xS*2 - - push xDI - mov xDI, [xSP + xS * 3] ; pCtx (*3 to skip the saved LDTR + TR) - - mov qword [xDI + CPUMCTX.eax], rax - mov qword [xDI + CPUMCTX.ebx], rbx - mov qword [xDI + CPUMCTX.ecx], rcx - mov qword [xDI + CPUMCTX.edx], rdx - mov qword [xDI + CPUMCTX.esi], rsi - mov qword [xDI + CPUMCTX.ebp], rbp - mov qword [xDI + CPUMCTX.r8], r8 - mov qword [xDI + CPUMCTX.r9], r9 - mov qword [xDI + CPUMCTX.r10], r10 - mov qword [xDI + CPUMCTX.r11], r11 - mov qword [xDI + CPUMCTX.r12], r12 - mov qword [xDI + CPUMCTX.r13], r13 - mov qword [xDI + CPUMCTX.r14], r14 - mov qword [xDI + CPUMCTX.r15], r15 - - pop xAX ; the guest edi we pushed above - mov qword [xDI + CPUMCTX.edi], rax - -%ifdef VBOX_WITH_DR6_EXPERIMENT - ; Save DR6 - experiment, not safe! - mov xAX, dr6 - mov [xDI + CPUMCTX.dr6], xAX -%endif - - ; Restore TSS selector; must mark it as not busy before using ltr (!) - ; ASSUME that this is supposed to be 'BUSY'. (saves 20-30 ticks on the T42p) - ; @todo get rid of sgdt - pop xBX ; saved TR - sub xSP, xS*2 - sgdt [xSP] - mov xAX, xBX - and al, 0F8h ; mask away TI and RPL bits, get descriptor offset. - add xAX, [xSP + 2] ; eax <- GDTR.address + descriptor offset. - and dword [xAX + 4], ~0200h ; clear busy flag (2nd type2 bit) - ltr bx - add xSP, xS*2 - - pop xAX ; saved LDTR - lldt ax - - pop xSI ; pCtx (needed in rsi by the macros below) - - ; Kernel GS Base is special, we need to manually load/store it, see @bugref{6208}. - LOADHOSTMSREX MSR_K8_KERNEL_GS_BASE, CPUMCTX.msrKERNELGSBASE -%ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE - ; Save the guest MSRs and load the host MSRs - LOADHOSTMSREX MSR_K8_SF_MASK, CPUMCTX.msrSFMASK - LOADHOSTMSREX MSR_K6_STAR, CPUMCTX.msrSTAR - LOADHOSTMSREX MSR_K8_LSTAR, CPUMCTX.msrLSTAR -%endif - -%ifdef VMX_USE_CACHED_VMCS_ACCESSES - pop xDX ; saved pCache - - mov ecx, [xDX + VMCSCACHE.Read.cValidEntries] - cmp ecx, 0 ; can't happen - je .no_cached_reads - jmp .cached_read - -ALIGN(16) -.cached_read: - dec xCX - mov eax, [xDX + VMCSCACHE.Read.aField + xCX*4] - vmread [xDX + VMCSCACHE.Read.aFieldVal + xCX*8], xAX - cmp xCX, 0 - jnz .cached_read -.no_cached_reads: - - ; Save CR2 for EPT - mov xAX, cr2 - mov [xDX + VMCSCACHE.cr2], xAX -%endif - - ; Restore segment registers - MYPOPSEGS xAX, ax - - ; Restore general purpose registers - MYPOPAD - - mov eax, VINF_SUCCESS - -.vmstart64_end: - popf - pop xBP - ret - - -.vmxstart64_invalid_vmxon_ptr: - ; Restore base and limit of the IDTR & GDTR - lidt [xSP] - add xSP, xS*2 - lgdt [xSP] - add xSP, xS*2 - - ; Restore TSS selector; must mark it as not busy before using ltr (!) - ; ASSUME that this is supposed to be 'BUSY'. (saves 20-30 ticks on the T42p) - ; @todo get rid of sgdt - pop xBX ; saved TR - sub xSP, xS*2 - sgdt [xSP] - mov xAX, xBX - and al, 0F8h ; mask away TI and RPL bits, get descriptor offset. - add xAX, [xSP + 2] ; eax <- GDTR.address + descriptor offset. - and dword [xAX + 4], ~0200h ; clear busy flag (2nd type2 bit) - ltr bx - add xSP, xS*2 - - pop xAX ; saved LDTR - lldt ax - - pop xSI ; pCtx (needed in rsi by the macros below) - - ; Kernel GS base is special, we need to manually load/store it See @bugref{6208}. - LOADHOSTMSREX MSR_K8_KERNEL_GS_BASE, CPUMCTX.msrKERNELGSBASE -%ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE - ; Load the host MSRs - LOADHOSTMSR MSR_K8_SF_MASK - LOADHOSTMSR MSR_K6_STAR - LOADHOSTMSR MSR_K8_LSTAR -%endif - -%ifdef VMX_USE_CACHED_VMCS_ACCESSES - add xSP, xS ; pCache -%endif - - ; Restore segment registers - MYPOPSEGS xAX, ax - - ; Restore all general purpose host registers. - MYPOPAD - mov eax, VERR_VMX_INVALID_VMXON_PTR - jmp .vmstart64_end - -.vmxstart64_start_failed: - ; Restore base and limit of the IDTR & GDTR - lidt [xSP] - add xSP, xS*2 - lgdt [xSP] - add xSP, xS*2 - - ; Restore TSS selector; must mark it as not busy before using ltr (!) - ; ASSUME that this is supposed to be 'BUSY'. (saves 20-30 ticks on the T42p) - ; @todo get rid of sgdt - pop xBX ; saved TR - sub xSP, xS*2 - sgdt [xSP] - mov xAX, xBX - and al, 0F8h ; mask away TI and RPL bits, get descriptor offset. - add xAX, [xSP + 2] ; eax <- GDTR.address + descriptor offset. - and dword [xAX + 4], ~0200h ; clear busy flag (2nd type2 bit) - ltr bx - add xSP, xS*2 - - pop xAX ; saved LDTR - lldt ax - - pop xSI ; pCtx (needed in rsi by the macros below) - - ; Kernel GS base is special, load it manually. See @bugref{6208}. - LOADHOSTMSREX MSR_K8_KERNEL_GS_BASE, CPUMCTX.msrKERNELGSBASE -%ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE - ; Load the host MSRs - LOADHOSTMSR MSR_K8_SF_MASK - LOADHOSTMSR MSR_K6_STAR - LOADHOSTMSR MSR_K8_LSTAR -%endif - -%ifdef VMX_USE_CACHED_VMCS_ACCESSES - add xSP, xS ; pCache -%endif - - ; Restore segment registers - MYPOPSEGS xAX, ax - - ; Restore all general purpose host registers. - MYPOPAD - mov eax, VERR_VMX_UNABLE_TO_START_VM - jmp .vmstart64_end -ENDPROC MY_NAME(VMXR0StartVM64) -%endif ; RT_ARCH_AMD64 - - -;/** -; * Prepares for and executes VMRUN (32 bits guests) -; * -; * @returns VBox status code -; * @param HCPhysVMCB Physical address of host VMCB -; * @param HCPhysVMCB Physical address of guest VMCB -; * @param pCtx Guest context -; */ -ALIGNCODE(16) -BEGINPROC MY_NAME(SVMR0VMRun) -%ifdef RT_ARCH_AMD64 ; fake a cdecl stack frame - %ifdef ASM_CALL64_GCC - push rdx - push rsi - push rdi - %else - push r8 - push rdx - push rcx - %endif - push 0 -%endif - push xBP - mov xBP, xSP - pushf - - ;/* Manual save and restore: - ; * - General purpose registers except RIP, RSP, RAX - ; * - ; * Trashed: - ; * - CR2 (we don't care) - ; * - LDTR (reset to 0) - ; * - DRx (presumably not changed at all) - ; * - DR7 (reset to 0x400) - ; */ - - ;/* Save all general purpose host registers. */ - MYPUSHAD - - ;/* Save the Guest CPU context pointer. */ - mov xSI, [xBP + xS*2 + RTHCPHYS_CB*2] ; pCtx - push xSI ; push for saving the state at the end - - ; save host fs, gs, sysenter msr etc - mov xAX, [xBP + xS*2] ; pVMCBHostPhys (64 bits physical address; x86: take low dword only) - push xAX ; save for the vmload after vmrun - vmsave - - ; setup eax for VMLOAD - mov xAX, [xBP + xS*2 + RTHCPHYS_CB] ; pVMCBPhys (64 bits physical address; take low dword only) - - ;/* Restore Guest's general purpose registers. */ - ;/* EAX is loaded from the VMCB by VMRUN */ - mov ebx, [xSI + CPUMCTX.ebx] - mov ecx, [xSI + CPUMCTX.ecx] - mov edx, [xSI + CPUMCTX.edx] - mov edi, [xSI + CPUMCTX.edi] - mov ebp, [xSI + CPUMCTX.ebp] - mov esi, [xSI + CPUMCTX.esi] - - ; Clear the global interrupt flag & execute sti to make sure external interrupts cause a world switch - clgi - sti - - ; load guest fs, gs, sysenter msr etc - vmload - ; run the VM - vmrun - - ;/* EAX is in the VMCB already; we can use it here. */ - - ; save guest fs, gs, sysenter msr etc - vmsave - - ; load host fs, gs, sysenter msr etc - pop xAX ; pushed above - vmload - - ; Set the global interrupt flag again, but execute cli to make sure IF=0. - cli - stgi - - pop xAX ; pCtx - - mov [ss:xAX + CPUMCTX.ebx], ebx - mov [ss:xAX + CPUMCTX.ecx], ecx - mov [ss:xAX + CPUMCTX.edx], edx - mov [ss:xAX + CPUMCTX.esi], esi - mov [ss:xAX + CPUMCTX.edi], edi - mov [ss:xAX + CPUMCTX.ebp], ebp - - ; Restore general purpose registers - MYPOPAD - - mov eax, VINF_SUCCESS - - popf - pop xBP -%ifdef RT_ARCH_AMD64 - add xSP, 4*xS -%endif - ret -ENDPROC MY_NAME(SVMR0VMRun) - -%ifdef RT_ARCH_AMD64 -;/** -; * Prepares for and executes VMRUN (64 bits guests) -; * -; * @returns VBox status code -; * @param HCPhysVMCB Physical address of host VMCB -; * @param HCPhysVMCB Physical address of guest VMCB -; * @param pCtx Guest context -; */ -ALIGNCODE(16) -BEGINPROC MY_NAME(SVMR0VMRun64) - ; fake a cdecl stack frame - %ifdef ASM_CALL64_GCC - push rdx - push rsi - push rdi - %else - push r8 - push rdx - push rcx - %endif - push 0 - push rbp - mov rbp, rsp - pushf - - ;/* Manual save and restore: - ; * - General purpose registers except RIP, RSP, RAX - ; * - ; * Trashed: - ; * - CR2 (we don't care) - ; * - LDTR (reset to 0) - ; * - DRx (presumably not changed at all) - ; * - DR7 (reset to 0x400) - ; */ - - ;/* Save all general purpose host registers. */ - MYPUSHAD - - ;/* Save the Guest CPU context pointer. */ - mov rsi, [rbp + xS*2 + RTHCPHYS_CB*2] ; pCtx - push rsi ; push for saving the state at the end - - ; save host fs, gs, sysenter msr etc - mov rax, [rbp + xS*2] ; pVMCBHostPhys (64 bits physical address; x86: take low dword only) - push rax ; save for the vmload after vmrun - vmsave - - ; setup eax for VMLOAD - mov rax, [rbp + xS*2 + RTHCPHYS_CB] ; pVMCBPhys (64 bits physical address; take low dword only) - - ;/* Restore Guest's general purpose registers. */ - ;/* RAX is loaded from the VMCB by VMRUN */ - mov rbx, qword [xSI + CPUMCTX.ebx] - mov rcx, qword [xSI + CPUMCTX.ecx] - mov rdx, qword [xSI + CPUMCTX.edx] - mov rdi, qword [xSI + CPUMCTX.edi] - mov rbp, qword [xSI + CPUMCTX.ebp] - mov r8, qword [xSI + CPUMCTX.r8] - mov r9, qword [xSI + CPUMCTX.r9] - mov r10, qword [xSI + CPUMCTX.r10] - mov r11, qword [xSI + CPUMCTX.r11] - mov r12, qword [xSI + CPUMCTX.r12] - mov r13, qword [xSI + CPUMCTX.r13] - mov r14, qword [xSI + CPUMCTX.r14] - mov r15, qword [xSI + CPUMCTX.r15] - mov rsi, qword [xSI + CPUMCTX.esi] - - ; Clear the global interrupt flag & execute sti to make sure external interrupts cause a world switch - clgi - sti - - ; load guest fs, gs, sysenter msr etc - vmload - ; run the VM - vmrun - - ;/* RAX is in the VMCB already; we can use it here. */ - - ; save guest fs, gs, sysenter msr etc - vmsave - - ; load host fs, gs, sysenter msr etc - pop rax ; pushed above - vmload - - ; Set the global interrupt flag again, but execute cli to make sure IF=0. - cli - stgi - - pop rax ; pCtx - - mov qword [rax + CPUMCTX.ebx], rbx - mov qword [rax + CPUMCTX.ecx], rcx - mov qword [rax + CPUMCTX.edx], rdx - mov qword [rax + CPUMCTX.esi], rsi - mov qword [rax + CPUMCTX.edi], rdi - mov qword [rax + CPUMCTX.ebp], rbp - mov qword [rax + CPUMCTX.r8], r8 - mov qword [rax + CPUMCTX.r9], r9 - mov qword [rax + CPUMCTX.r10], r10 - mov qword [rax + CPUMCTX.r11], r11 - mov qword [rax + CPUMCTX.r12], r12 - mov qword [rax + CPUMCTX.r13], r13 - mov qword [rax + CPUMCTX.r14], r14 - mov qword [rax + CPUMCTX.r15], r15 - - ; Restore general purpose registers - MYPOPAD - - mov eax, VINF_SUCCESS - - popf - pop rbp - add rsp, 4*xS - ret -ENDPROC MY_NAME(SVMR0VMRun64) -%endif ; RT_ARCH_AMD64 - diff --git a/src/VBox/VMM/VMMR0/HWSVMR0.cpp b/src/VBox/VMM/VMMR0/HWSVMR0.cpp deleted file mode 100644 index 40d9c4af..00000000 --- a/src/VBox/VMM/VMMR0/HWSVMR0.cpp +++ /dev/null @@ -1,3117 +0,0 @@ -/* $Id: HWSVMR0.cpp $ */ -/** @file - * HM SVM (AMD-V) - Host Context Ring-0. - */ - -/* - * Copyright (C) 2006-2012 Oracle Corporation - * - * This file is part of VirtualBox Open Source Edition (OSE), as - * available from http://www.virtualbox.org. This file is free software; - * you can redistribute it and/or modify it under the terms of the GNU - * General Public License (GPL) as published by the Free Software - * Foundation, in version 2 as it comes in the "COPYING" file of the - * VirtualBox OSE distribution. VirtualBox OSE is distributed in the - * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. - */ - -/******************************************************************************* -* Header Files * -*******************************************************************************/ -#define LOG_GROUP LOG_GROUP_HWACCM -#include <VBox/vmm/hwaccm.h> -#include <VBox/vmm/pgm.h> -#include <VBox/vmm/selm.h> -#include <VBox/vmm/iom.h> -#include <VBox/vmm/dbgf.h> -#include <VBox/vmm/dbgftrace.h> -#include <VBox/vmm/tm.h> -#include <VBox/vmm/pdmapi.h> -#include "HWACCMInternal.h" -#include <VBox/vmm/vm.h> -#include <VBox/vmm/hwacc_svm.h> -#include <VBox/err.h> -#include <VBox/log.h> -#include <VBox/dis.h> -#include <VBox/disopcode.h> -#include <iprt/param.h> -#include <iprt/assert.h> -#include <iprt/asm.h> -#include <iprt/asm-amd64-x86.h> -#include <iprt/cpuset.h> -#include <iprt/mp.h> -#include <iprt/time.h> -#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION -# include <iprt/thread.h> -#endif -#include <iprt/x86.h> -#include "HWSVMR0.h" - -#include "dtrace/VBoxVMM.h" - - -/******************************************************************************* -* Internal Functions * -*******************************************************************************/ -static int hmR0SvmInterpretInvlpg(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame); -static int hmR0SvmEmulateTprVMMCall(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx); -static void hmR0SvmSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite); - - -/******************************************************************************* -* Global Variables * -*******************************************************************************/ -/* IO operation lookup arrays. */ -static uint32_t const g_aIOSize[8] = {0, 1, 2, 0, 4, 0, 0, 0}; -static uint32_t const g_aIOOpAnd[8] = {0, 0xff, 0xffff, 0, 0xffffffff, 0, 0, 0}; - - -/** - * Sets up and activates AMD-V on the current CPU. - * - * @returns VBox status code. - * @param pCpu Pointer to the CPU info struct. - * @param pVM Pointer to the VM (can be NULL after a resume!). - * @param pvCpuPage Pointer to the global CPU page. - * @param HCPhysCpuPage Physical address of the global CPU page. - */ -VMMR0DECL(int) SVMR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost) -{ - AssertReturn(!fEnabledByHost, VERR_INVALID_PARAMETER); - AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER); - AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER); - - /* - * We must turn on AMD-V and setup the host state physical address, as those MSRs are per cpu/core. - */ - uint64_t fEfer = ASMRdMsr(MSR_K6_EFER); - if (fEfer & MSR_K6_EFER_SVME) - { - /* - * If the VBOX_HWVIRTEX_IGNORE_SVM_IN_USE is active, then we blindly use AMD-V. - */ - if ( pVM - && pVM->hwaccm.s.svm.fIgnoreInUseError) - { - pCpu->fIgnoreAMDVInUseError = true; - } - - if (!pCpu->fIgnoreAMDVInUseError) - return VERR_SVM_IN_USE; - } - - /* Turn on AMD-V in the EFER MSR. */ - ASMWrMsr(MSR_K6_EFER, fEfer | MSR_K6_EFER_SVME); - - /* Write the physical page address where the CPU will store the host state while executing the VM. */ - ASMWrMsr(MSR_K8_VM_HSAVE_PA, HCPhysCpuPage); - - /* - * Theoretically, other hypervisors may have used ASIDs, ideally we should flush all non-zero ASIDs - * when enabling SVM. AMD doesn't have an SVM instruction to flush all ASIDs (flushing is done - * upon VMRUN). Therefore, just set the fFlushASIDBeforeUse flag which instructs hmR0SvmSetupTLB() - * to flush the TLB with before using a new ASID. - */ - pCpu->fFlushASIDBeforeUse = true; - - /* - * Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. - */ - ++pCpu->cTLBFlushes; - - return VINF_SUCCESS; -} - - -/** - * Deactivates AMD-V on the current CPU. - * - * @returns VBox status code. - * @param pCpu Pointer to the CPU info struct. - * @param pvCpuPage Pointer to the global CPU page. - * @param HCPhysCpuPage Physical address of the global CPU page. - */ -VMMR0DECL(int) SVMR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage) -{ - AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER); - AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER); - NOREF(pCpu); - - /* Turn off AMD-V in the EFER MSR. */ - uint64_t fEfer = ASMRdMsr(MSR_K6_EFER); - ASMWrMsr(MSR_K6_EFER, fEfer & ~MSR_K6_EFER_SVME); - - /* Invalidate host state physical address. */ - ASMWrMsr(MSR_K8_VM_HSAVE_PA, 0); - - return VINF_SUCCESS; -} - - -/** - * Does Ring-0 per VM AMD-V init. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - */ -VMMR0DECL(int) SVMR0InitVM(PVM pVM) -{ - int rc; - - pVM->hwaccm.s.svm.pMemObjIOBitmap = NIL_RTR0MEMOBJ; - - /* Allocate 12 KB for the IO bitmap (doesn't seem to be a way to convince SVM not to use it) */ - rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.svm.pMemObjIOBitmap, 3 << PAGE_SHIFT, true /* executable R0 mapping */); - if (RT_FAILURE(rc)) - return rc; - - pVM->hwaccm.s.svm.pIOBitmap = RTR0MemObjAddress(pVM->hwaccm.s.svm.pMemObjIOBitmap); - pVM->hwaccm.s.svm.pIOBitmapPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.svm.pMemObjIOBitmap, 0); - /* Set all bits to intercept all IO accesses. */ - ASMMemFill32(pVM->hwaccm.s.svm.pIOBitmap, 3 << PAGE_SHIFT, 0xffffffff); - - /* - * Erratum 170 which requires a forced TLB flush for each world switch: - * See http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/33610.pdf - * - * All BH-G1/2 and DH-G1/2 models include a fix: - * Athlon X2: 0x6b 1/2 - * 0x68 1/2 - * Athlon 64: 0x7f 1 - * 0x6f 2 - * Sempron: 0x7f 1/2 - * 0x6f 2 - * 0x6c 2 - * 0x7c 2 - * Turion 64: 0x68 2 - */ - uint32_t u32Dummy; - uint32_t u32Version, u32Family, u32Model, u32Stepping, u32BaseFamily; - ASMCpuId(1, &u32Version, &u32Dummy, &u32Dummy, &u32Dummy); - u32BaseFamily = (u32Version >> 8) & 0xf; - u32Family = u32BaseFamily + (u32BaseFamily == 0xf ? ((u32Version >> 20) & 0x7f) : 0); - u32Model = ((u32Version >> 4) & 0xf); - u32Model = u32Model | ((u32BaseFamily == 0xf ? (u32Version >> 16) & 0x0f : 0) << 4); - u32Stepping = u32Version & 0xf; - if ( u32Family == 0xf - && !((u32Model == 0x68 || u32Model == 0x6b || u32Model == 0x7f) && u32Stepping >= 1) - && !((u32Model == 0x6f || u32Model == 0x6c || u32Model == 0x7c) && u32Stepping >= 2)) - { - Log(("SVMR0InitVM: AMD cpu with erratum 170 family %x model %x stepping %x\n", u32Family, u32Model, u32Stepping)); - pVM->hwaccm.s.svm.fAlwaysFlushTLB = true; - } - - /* Allocate VMCBs for all guest CPUs. */ - for (VMCPUID i = 0; i < pVM->cCpus; i++) - { - PVMCPU pVCpu = &pVM->aCpus[i]; - - pVCpu->hwaccm.s.svm.pMemObjVMCBHost = NIL_RTR0MEMOBJ; - pVCpu->hwaccm.s.svm.pMemObjVMCB = NIL_RTR0MEMOBJ; - pVCpu->hwaccm.s.svm.pMemObjMSRBitmap = NIL_RTR0MEMOBJ; - - /* Allocate one page for the host context */ - rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.svm.pMemObjVMCBHost, 1 << PAGE_SHIFT, true /* executable R0 mapping */); - if (RT_FAILURE(rc)) - return rc; - - pVCpu->hwaccm.s.svm.pVMCBHost = RTR0MemObjAddress(pVCpu->hwaccm.s.svm.pMemObjVMCBHost); - pVCpu->hwaccm.s.svm.pVMCBHostPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.svm.pMemObjVMCBHost, 0); - Assert(pVCpu->hwaccm.s.svm.pVMCBHostPhys < _4G); - ASMMemZeroPage(pVCpu->hwaccm.s.svm.pVMCBHost); - - /* Allocate one page for the VM control block (VMCB). */ - rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.svm.pMemObjVMCB, 1 << PAGE_SHIFT, true /* executable R0 mapping */); - if (RT_FAILURE(rc)) - return rc; - - pVCpu->hwaccm.s.svm.pVMCB = RTR0MemObjAddress(pVCpu->hwaccm.s.svm.pMemObjVMCB); - pVCpu->hwaccm.s.svm.pVMCBPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.svm.pMemObjVMCB, 0); - Assert(pVCpu->hwaccm.s.svm.pVMCBPhys < _4G); - ASMMemZeroPage(pVCpu->hwaccm.s.svm.pVMCB); - - /* Allocate 8 KB for the MSR bitmap (doesn't seem to be a way to convince SVM not to use it) */ - rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.svm.pMemObjMSRBitmap, 2 << PAGE_SHIFT, true /* executable R0 mapping */); - if (RT_FAILURE(rc)) - return rc; - - pVCpu->hwaccm.s.svm.pMSRBitmap = RTR0MemObjAddress(pVCpu->hwaccm.s.svm.pMemObjMSRBitmap); - pVCpu->hwaccm.s.svm.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.svm.pMemObjMSRBitmap, 0); - /* Set all bits to intercept all MSR accesses. */ - ASMMemFill32(pVCpu->hwaccm.s.svm.pMSRBitmap, 2 << PAGE_SHIFT, 0xffffffff); - } - - return VINF_SUCCESS; -} - - -/** - * Does Ring-0 per VM AMD-V termination. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - */ -VMMR0DECL(int) SVMR0TermVM(PVM pVM) -{ - for (VMCPUID i = 0; i < pVM->cCpus; i++) - { - PVMCPU pVCpu = &pVM->aCpus[i]; - - if (pVCpu->hwaccm.s.svm.pMemObjVMCBHost != NIL_RTR0MEMOBJ) - { - RTR0MemObjFree(pVCpu->hwaccm.s.svm.pMemObjVMCBHost, false); - pVCpu->hwaccm.s.svm.pVMCBHost = 0; - pVCpu->hwaccm.s.svm.pVMCBHostPhys = 0; - pVCpu->hwaccm.s.svm.pMemObjVMCBHost = NIL_RTR0MEMOBJ; - } - - if (pVCpu->hwaccm.s.svm.pMemObjVMCB != NIL_RTR0MEMOBJ) - { - RTR0MemObjFree(pVCpu->hwaccm.s.svm.pMemObjVMCB, false); - pVCpu->hwaccm.s.svm.pVMCB = 0; - pVCpu->hwaccm.s.svm.pVMCBPhys = 0; - pVCpu->hwaccm.s.svm.pMemObjVMCB = NIL_RTR0MEMOBJ; - } - if (pVCpu->hwaccm.s.svm.pMemObjMSRBitmap != NIL_RTR0MEMOBJ) - { - RTR0MemObjFree(pVCpu->hwaccm.s.svm.pMemObjMSRBitmap, false); - pVCpu->hwaccm.s.svm.pMSRBitmap = 0; - pVCpu->hwaccm.s.svm.pMSRBitmapPhys = 0; - pVCpu->hwaccm.s.svm.pMemObjMSRBitmap = NIL_RTR0MEMOBJ; - } - } - if (pVM->hwaccm.s.svm.pMemObjIOBitmap != NIL_RTR0MEMOBJ) - { - RTR0MemObjFree(pVM->hwaccm.s.svm.pMemObjIOBitmap, false); - pVM->hwaccm.s.svm.pIOBitmap = 0; - pVM->hwaccm.s.svm.pIOBitmapPhys = 0; - pVM->hwaccm.s.svm.pMemObjIOBitmap = NIL_RTR0MEMOBJ; - } - return VINF_SUCCESS; -} - - -/** - * Sets up AMD-V for the specified VM. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - */ -VMMR0DECL(int) SVMR0SetupVM(PVM pVM) -{ - int rc = VINF_SUCCESS; - - AssertReturn(pVM, VERR_INVALID_PARAMETER); - Assert(pVM->hwaccm.s.svm.fSupported); - - for (VMCPUID i = 0; i < pVM->cCpus; i++) - { - PVMCPU pVCpu = &pVM->aCpus[i]; - SVM_VMCB *pVMCB = (SVM_VMCB *)pVM->aCpus[i].hwaccm.s.svm.pVMCB; - - AssertMsgReturn(pVMCB, ("Invalid pVMCB\n"), VERR_HMSVM_INVALID_PVMCB); - - /* - * Program the control fields. Most of them never have to be changed again. - * CR0/4 reads must be intercepted, our shadow values are not necessarily the same as the guest's. - * Note: CR0 & CR4 can be safely read when guest and shadow copies are identical. - */ - pVMCB->ctrl.u16InterceptRdCRx = RT_BIT(0) | RT_BIT(4); - - /* CR0/4 writes must be intercepted for obvious reasons. */ - pVMCB->ctrl.u16InterceptWrCRx = RT_BIT(0) | RT_BIT(4); - - /* Intercept all DRx reads and writes by default. Changed later on. */ - pVMCB->ctrl.u16InterceptRdDRx = 0xFFFF; - pVMCB->ctrl.u16InterceptWrDRx = 0xFFFF; - - /* Intercept traps; only #NM is always intercepted. */ - pVMCB->ctrl.u32InterceptException = RT_BIT(X86_XCPT_NM); -#ifdef VBOX_ALWAYS_TRAP_PF - pVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_PF); -#endif -#ifdef VBOX_STRICT - pVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_BP) - | RT_BIT(X86_XCPT_DB) - | RT_BIT(X86_XCPT_DE) - | RT_BIT(X86_XCPT_UD) - | RT_BIT(X86_XCPT_NP) - | RT_BIT(X86_XCPT_SS) - | RT_BIT(X86_XCPT_GP) - | RT_BIT(X86_XCPT_MF) - ; -#endif - - /* Set up instruction and miscellaneous intercepts. */ - pVMCB->ctrl.u32InterceptCtrl1 = SVM_CTRL1_INTERCEPT_INTR - | SVM_CTRL1_INTERCEPT_VINTR - | SVM_CTRL1_INTERCEPT_NMI - | SVM_CTRL1_INTERCEPT_SMI - | SVM_CTRL1_INTERCEPT_INIT - | SVM_CTRL1_INTERCEPT_RDPMC - | SVM_CTRL1_INTERCEPT_CPUID - | SVM_CTRL1_INTERCEPT_RSM - | SVM_CTRL1_INTERCEPT_HLT - | SVM_CTRL1_INTERCEPT_INOUT_BITMAP - | SVM_CTRL1_INTERCEPT_MSR_SHADOW - | SVM_CTRL1_INTERCEPT_INVLPGA /* AMD only */ - | SVM_CTRL1_INTERCEPT_SHUTDOWN /* fatal */ - | SVM_CTRL1_INTERCEPT_FERR_FREEZE; /* Legacy FPU FERR handling. */ - ; - pVMCB->ctrl.u32InterceptCtrl2 = SVM_CTRL2_INTERCEPT_VMRUN /* required */ - | SVM_CTRL2_INTERCEPT_VMMCALL - | SVM_CTRL2_INTERCEPT_VMLOAD - | SVM_CTRL2_INTERCEPT_VMSAVE - | SVM_CTRL2_INTERCEPT_STGI - | SVM_CTRL2_INTERCEPT_CLGI - | SVM_CTRL2_INTERCEPT_SKINIT - | SVM_CTRL2_INTERCEPT_WBINVD - | SVM_CTRL2_INTERCEPT_MONITOR - | SVM_CTRL2_INTERCEPT_MWAIT_UNCOND; /* don't execute mwait or else we'll idle inside the - guest (host thinks the cpu load is high) */ - - Log(("pVMCB->ctrl.u32InterceptException = %x\n", pVMCB->ctrl.u32InterceptException)); - Log(("pVMCB->ctrl.u32InterceptCtrl1 = %x\n", pVMCB->ctrl.u32InterceptCtrl1)); - Log(("pVMCB->ctrl.u32InterceptCtrl2 = %x\n", pVMCB->ctrl.u32InterceptCtrl2)); - - /* Virtualize masking of INTR interrupts. (reads/writes from/to CR8 go to the V_TPR register) */ - pVMCB->ctrl.IntCtrl.n.u1VIrqMasking = 1; - - /* Ignore the priority in the TPR; just deliver it when we tell it to. */ - pVMCB->ctrl.IntCtrl.n.u1IgnoreTPR = 1; - - /* Set IO and MSR bitmap addresses. */ - pVMCB->ctrl.u64IOPMPhysAddr = pVM->hwaccm.s.svm.pIOBitmapPhys; - pVMCB->ctrl.u64MSRPMPhysAddr = pVCpu->hwaccm.s.svm.pMSRBitmapPhys; - - /* No LBR virtualization. */ - pVMCB->ctrl.u64LBRVirt = 0; - - /* The ASID must start at 1; the host uses 0. */ - pVMCB->ctrl.TLBCtrl.n.u32ASID = 1; - - /* - * Setup the PAT MSR (nested paging only) - * The default value should be 0x0007040600070406ULL, but we want to treat all guest memory as WB, - * so choose type 6 for all PAT slots. - */ - pVMCB->guest.u64GPAT = 0x0006060606060606ULL; - - /* If nested paging is not in use, additional intercepts have to be set up. */ - if (!pVM->hwaccm.s.fNestedPaging) - { - /* CR3 reads/writes must be intercepted; our shadow values are different from guest's. */ - pVMCB->ctrl.u16InterceptRdCRx |= RT_BIT(3); - pVMCB->ctrl.u16InterceptWrCRx |= RT_BIT(3); - - /* - * We must also intercept: - * - INVLPG (must go through shadow paging) - * - task switches (may change CR3/EFLAGS/LDT) - */ - pVMCB->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_INVLPG - | SVM_CTRL1_INTERCEPT_TASK_SWITCH; - - /* Page faults must be intercepted to implement shadow paging. */ - pVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_PF); - } - - /* - * The following MSRs are saved automatically by vmload/vmsave, so we allow the guest - * to modify them directly. - */ - hmR0SvmSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true); - hmR0SvmSetMSRPermission(pVCpu, MSR_K8_CSTAR, true, true); - hmR0SvmSetMSRPermission(pVCpu, MSR_K6_STAR, true, true); - hmR0SvmSetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true); - hmR0SvmSetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true); - hmR0SvmSetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true); - hmR0SvmSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true); - hmR0SvmSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true); - hmR0SvmSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true); - hmR0SvmSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true); - } - - return rc; -} - - -/** - * Sets the permission bits for the specified MSR. - * - * @param pVCpu Pointer to the VMCPU. - * @param ulMSR MSR value. - * @param fRead Whether reading is allowed. - * @param fWrite Whether writing is allowed. - */ -static void hmR0SvmSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite) -{ - unsigned ulBit; - uint8_t *pMSRBitmap = (uint8_t *)pVCpu->hwaccm.s.svm.pMSRBitmap; - - if (ulMSR <= 0x00001FFF) - { - /* Pentium-compatible MSRs */ - ulBit = ulMSR * 2; - } - else if ( ulMSR >= 0xC0000000 - && ulMSR <= 0xC0001FFF) - { - /* AMD Sixth Generation x86 Processor MSRs and SYSCALL */ - ulBit = (ulMSR - 0xC0000000) * 2; - pMSRBitmap += 0x800; - } - else if ( ulMSR >= 0xC0010000 - && ulMSR <= 0xC0011FFF) - { - /* AMD Seventh and Eighth Generation Processor MSRs */ - ulBit = (ulMSR - 0xC0001000) * 2; - pMSRBitmap += 0x1000; - } - else - { - AssertFailed(); - return; - } - Assert(ulBit < 16 * 1024 - 1); - if (fRead) - ASMBitClear(pMSRBitmap, ulBit); - else - ASMBitSet(pMSRBitmap, ulBit); - - if (fWrite) - ASMBitClear(pMSRBitmap, ulBit + 1); - else - ASMBitSet(pMSRBitmap, ulBit + 1); -} - - -/** - * Injects an event (trap or external interrupt). - * - * @param pVCpu Pointer to the VMCPU. - * @param pVMCB Pointer to the VMCB. - * @param pCtx Pointer to the guest CPU context. - * @param pIntInfo Pointer to the SVM interrupt info. - */ -DECLINLINE(void) hmR0SvmInjectEvent(PVMCPU pVCpu, SVM_VMCB *pVMCB, CPUMCTX *pCtx, SVM_EVENT *pEvent) -{ -#ifdef VBOX_WITH_STATISTICS - STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatInjectedIrqsR0[pEvent->n.u8Vector & MASK_INJECT_IRQ_STAT]); -#endif - -#ifdef VBOX_STRICT - if (pEvent->n.u8Vector == 0xE) - { - Log(("SVM: Inject int %d at %RGv error code=%02x CR2=%RGv intInfo=%08x\n", pEvent->n.u8Vector, - (RTGCPTR)pCtx->rip, pEvent->n.u32ErrorCode, (RTGCPTR)pCtx->cr2, pEvent->au64[0])); - } - else if (pEvent->n.u8Vector < 0x20) - Log(("SVM: Inject int %d at %RGv error code=%08x\n", pEvent->n.u8Vector, (RTGCPTR)pCtx->rip, pEvent->n.u32ErrorCode)); - else - { - Log(("INJ-EI: %x at %RGv\n", pEvent->n.u8Vector, (RTGCPTR)pCtx->rip)); - Assert(!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)); - Assert(pCtx->eflags.u32 & X86_EFL_IF); - } -#endif - - /* Set event injection state. */ - pVMCB->ctrl.EventInject.au64[0] = pEvent->au64[0]; -} - - -/** - * Checks for pending guest interrupts and injects them. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pVMCB Pointer to the VMCB. - * @param pCtx Pointer to the guest CPU Context. - */ -static int hmR0SvmCheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, SVM_VMCB *pVMCB, CPUMCTX *pCtx) -{ - int rc; - NOREF(pVM); - - /* - * Dispatch any pending interrupts (injected before, but a VM-exit occurred prematurely). - */ - if (pVCpu->hwaccm.s.Event.fPending) - { - SVM_EVENT Event; - - Log(("Reinjecting event %08x %08x at %RGv\n", pVCpu->hwaccm.s.Event.intInfo, pVCpu->hwaccm.s.Event.errCode, - (RTGCPTR)pCtx->rip)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntReinject); - Event.au64[0] = pVCpu->hwaccm.s.Event.intInfo; - hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event); - - pVCpu->hwaccm.s.Event.fPending = false; - return VINF_SUCCESS; - } - - /* - * If an active trap is already pending, we must forward it first! - */ - if (!TRPMHasTrap(pVCpu)) - { - if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI)) - { - SVM_EVENT Event; - - Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu)); - Event.n.u8Vector = X86_XCPT_NMI; - Event.n.u1Valid = 1; - Event.n.u32ErrorCode = 0; - Event.n.u3Type = SVM_EVENT_NMI; - - hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event); - return VINF_SUCCESS; - } - - /** @todo SMI interrupts. */ - - /* - * When external interrupts are pending, we should exit the VM when IF is set. - */ - if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC))) - { - if ( !(pCtx->eflags.u32 & X86_EFL_IF) - || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) - { - if (!pVMCB->ctrl.IntCtrl.n.u1VIrqValid) - { - if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) - LogFlow(("Enable irq window exit!\n")); - else - { - Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS -> irq window exit\n", - (RTGCPTR)pCtx->rip)); - } - - /** @todo Use virtual interrupt method to inject a pending IRQ; dispatched as - * soon as guest.IF is set. */ - pVMCB->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_VINTR; - pVMCB->ctrl.IntCtrl.n.u1VIrqValid = 1; - pVMCB->ctrl.IntCtrl.n.u8VIrqVector = 0; /* don't care */ - } - } - else - { - uint8_t u8Interrupt; - - rc = PDMGetInterrupt(pVCpu, &u8Interrupt); - Log(("Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc\n", u8Interrupt, u8Interrupt, rc)); - if (RT_SUCCESS(rc)) - { - rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT); - AssertRC(rc); - } - else - { - /* Can only happen in rare cases where a pending interrupt is cleared behind our back */ - Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC))); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchGuestIrq); - /* Just continue */ - } - } - } - } - -#ifdef VBOX_STRICT - if (TRPMHasTrap(pVCpu)) - { - uint8_t u8Vector; - rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0); - AssertRC(rc); - } -#endif - - if ( (pCtx->eflags.u32 & X86_EFL_IF) - && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) - && TRPMHasTrap(pVCpu) - ) - { - uint8_t u8Vector; - TRPMEVENT enmType; - SVM_EVENT Event; - RTGCUINT u32ErrorCode; - - Event.au64[0] = 0; - - /* If a new event is pending, then dispatch it now. */ - rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &u32ErrorCode, 0); - AssertRC(rc); - Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP); - Assert(enmType != TRPM_SOFTWARE_INT); - - /* Clear the pending trap. */ - rc = TRPMResetTrap(pVCpu); - AssertRC(rc); - - Event.n.u8Vector = u8Vector; - Event.n.u1Valid = 1; - Event.n.u32ErrorCode = u32ErrorCode; - - if (enmType == TRPM_TRAP) - { - switch (u8Vector) - { - case X86_XCPT_DF: - case X86_XCPT_TS: - case X86_XCPT_NP: - case X86_XCPT_SS: - case X86_XCPT_GP: - case X86_XCPT_PF: - case X86_XCPT_AC: - /* Valid error codes. */ - Event.n.u1ErrorCodeValid = 1; - break; - default: - break; - } - if (u8Vector == X86_XCPT_NMI) - Event.n.u3Type = SVM_EVENT_NMI; - else - Event.n.u3Type = SVM_EVENT_EXCEPTION; - } - else - Event.n.u3Type = SVM_EVENT_EXTERNAL_IRQ; - - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntInject); - hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event); - } /* if (interrupts can be dispatched) */ - - return VINF_SUCCESS; -} - - -/** - * Save the host state. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - */ -VMMR0DECL(int) SVMR0SaveHostState(PVM pVM, PVMCPU pVCpu) -{ - NOREF(pVM); - NOREF(pVCpu); - /* Nothing to do here. */ - return VINF_SUCCESS; -} - - -/** - * Loads the guest state. - * - * NOTE: Don't do anything here that can cause a jump back to ring-3!!! - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -VMMR0DECL(int) SVMR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) -{ - RTGCUINTPTR val; - SVM_VMCB *pVMCB; - - if (pVM == NULL) - return VERR_INVALID_PARAMETER; - - /* Setup AMD SVM. */ - Assert(pVM->hwaccm.s.svm.fSupported); - - pVMCB = (SVM_VMCB *)pVCpu->hwaccm.s.svm.pVMCB; - AssertMsgReturn(pVMCB, ("Invalid pVMCB\n"), VERR_HMSVM_INVALID_PVMCB); - - /* Guest CPU context: ES, CS, SS, DS, FS, GS. */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS) - { - SVM_WRITE_SELREG(CS, cs); - SVM_WRITE_SELREG(SS, ss); - SVM_WRITE_SELREG(DS, ds); - SVM_WRITE_SELREG(ES, es); - SVM_WRITE_SELREG(FS, fs); - SVM_WRITE_SELREG(GS, gs); - } - - /* Guest CPU context: LDTR. */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR) - { - SVM_WRITE_SELREG(LDTR, ldtr); - } - - /* Guest CPU context: TR. */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR) - { - SVM_WRITE_SELREG(TR, tr); - } - - /* Guest CPU context: GDTR. */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR) - { - pVMCB->guest.GDTR.u32Limit = pCtx->gdtr.cbGdt; - pVMCB->guest.GDTR.u64Base = pCtx->gdtr.pGdt; - } - - /* Guest CPU context: IDTR. */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR) - { - pVMCB->guest.IDTR.u32Limit = pCtx->idtr.cbIdt; - pVMCB->guest.IDTR.u64Base = pCtx->idtr.pIdt; - } - - /* - * Sysenter MSRs (unconditional) - */ - pVMCB->guest.u64SysEnterCS = pCtx->SysEnter.cs; - pVMCB->guest.u64SysEnterEIP = pCtx->SysEnter.eip; - pVMCB->guest.u64SysEnterESP = pCtx->SysEnter.esp; - - /* Control registers */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0) - { - val = pCtx->cr0; - if (!CPUMIsGuestFPUStateActive(pVCpu)) - { - /* Always use #NM exceptions to load the FPU/XMM state on demand. */ - val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP; - } - else - { - /** @todo check if we support the old style mess correctly. */ - if (!(val & X86_CR0_NE)) - { - Log(("Forcing X86_CR0_NE!!!\n")); - - /* Also catch floating point exceptions as we need to report them to the guest in a different way. */ - if (!pVCpu->hwaccm.s.fFPUOldStyleOverride) - { - pVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_MF); - pVCpu->hwaccm.s.fFPUOldStyleOverride = true; - } - } - val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */ - } - /* Always enable caching. */ - val &= ~(X86_CR0_CD|X86_CR0_NW); - - /* - * Note: WP is not relevant in nested paging mode as we catch accesses on the (guest) physical level. - * Note: In nested paging mode, the guest is allowed to run with paging disabled; the guest-physical to host-physical - * translation will remain active. - */ - if (!pVM->hwaccm.s.fNestedPaging) - { - val |= X86_CR0_PG; /* Paging is always enabled; even when the guest is running in real mode or PE without paging. */ - val |= X86_CR0_WP; /* Must set this as we rely on protecting various pages and supervisor writes must be caught. */ - } - pVMCB->guest.u64CR0 = val; - } - /* CR2 as well */ - pVMCB->guest.u64CR2 = pCtx->cr2; - - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3) - { - /* Save our shadow CR3 register. */ - if (pVM->hwaccm.s.fNestedPaging) - { - PGMMODE enmShwPagingMode; - -#if HC_ARCH_BITS == 32 - if (CPUMIsGuestInLongModeEx(pCtx)) - enmShwPagingMode = PGMMODE_AMD64_NX; - else -#endif - enmShwPagingMode = PGMGetHostMode(pVM); - - pVMCB->ctrl.u64NestedPagingCR3 = PGMGetNestedCR3(pVCpu, enmShwPagingMode); - Assert(pVMCB->ctrl.u64NestedPagingCR3); - pVMCB->guest.u64CR3 = pCtx->cr3; - } - else - { - pVMCB->guest.u64CR3 = PGMGetHyperCR3(pVCpu); - Assert(pVMCB->guest.u64CR3 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)); - } - } - - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4) - { - val = pCtx->cr4; - if (!pVM->hwaccm.s.fNestedPaging) - { - switch (pVCpu->hwaccm.s.enmShadowMode) - { - case PGMMODE_REAL: - case PGMMODE_PROTECTED: /* Protected mode, no paging. */ - AssertFailed(); - return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; - - case PGMMODE_32_BIT: /* 32-bit paging. */ - val &= ~X86_CR4_PAE; - break; - - case PGMMODE_PAE: /* PAE paging. */ - case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */ - /** Must use PAE paging as we could use physical memory > 4 GB */ - val |= X86_CR4_PAE; - break; - - case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */ - case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */ -#ifdef VBOX_ENABLE_64_BITS_GUESTS - break; -#else - AssertFailed(); - return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; -#endif - - default: /* shut up gcc */ - AssertFailed(); - return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; - } - } - pVMCB->guest.u64CR4 = val; - } - - /* Debug registers. */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG) - { - pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */ - pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */ - - pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */ - pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */ - pCtx->dr[7] |= 0x400; /* must be one */ - - pVMCB->guest.u64DR7 = pCtx->dr[7]; - pVMCB->guest.u64DR6 = pCtx->dr[6]; - -#ifdef DEBUG - /* Sync the hypervisor debug state now if any breakpoint is armed. */ - if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD) - && !CPUMIsHyperDebugStateActive(pVCpu) - && !DBGFIsStepping(pVCpu)) - { - /* Save the host and load the hypervisor debug state. */ - int rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */); - AssertRC(rc); - - /* DRx intercepts remain enabled. */ - - /* Override dr6 & dr7 with the hypervisor values. */ - pVMCB->guest.u64DR7 = CPUMGetHyperDR7(pVCpu); - pVMCB->guest.u64DR6 = CPUMGetHyperDR6(pVCpu); - } - else -#endif - /* Sync the debug state now if any breakpoint is armed. */ - if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD)) - && !CPUMIsGuestDebugStateActive(pVCpu) - && !DBGFIsStepping(pVCpu)) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxArmed); - - /* Disable drx move intercepts. */ - pVMCB->ctrl.u16InterceptRdDRx = 0; - pVMCB->ctrl.u16InterceptWrDRx = 0; - - /* Save the host and load the guest debug state. */ - int rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */); - AssertRC(rc); - } - } - - /* EIP, ESP and EFLAGS */ - pVMCB->guest.u64RIP = pCtx->rip; - pVMCB->guest.u64RSP = pCtx->rsp; - pVMCB->guest.u64RFlags = pCtx->eflags.u32; - - /* Set CPL */ - pVMCB->guest.u8CPL = pCtx->ss.Attr.n.u2Dpl; - - /* RAX/EAX too, as VMRUN uses RAX as an implicit parameter. */ - pVMCB->guest.u64RAX = pCtx->rax; - - /* vmrun will fail without MSR_K6_EFER_SVME. */ - pVMCB->guest.u64EFER = pCtx->msrEFER | MSR_K6_EFER_SVME; - - /* 64 bits guest mode? */ - if (CPUMIsGuestInLongModeEx(pCtx)) - { -#if !defined(VBOX_ENABLE_64_BITS_GUESTS) - return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; -#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) - pVCpu->hwaccm.s.svm.pfnVMRun = SVMR0VMSwitcherRun64; -#else -# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL - if (!pVM->hwaccm.s.fAllow64BitGuests) - return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; -# endif - pVCpu->hwaccm.s.svm.pfnVMRun = SVMR0VMRun64; -#endif - /* Unconditionally update these as wrmsr might have changed them. (HWACCM_CHANGED_GUEST_SEGMENT_REGS will not be set) */ - pVMCB->guest.FS.u64Base = pCtx->fs.u64Base; - pVMCB->guest.GS.u64Base = pCtx->gs.u64Base; - } - else - { - /* Filter out the MSR_K6_LME bit or else AMD-V expects amd64 shadow paging. */ - pVMCB->guest.u64EFER &= ~MSR_K6_EFER_LME; - - pVCpu->hwaccm.s.svm.pfnVMRun = SVMR0VMRun; - } - - /* TSC offset. */ - if (TMCpuTickCanUseRealTSC(pVCpu, &pVMCB->ctrl.u64TSCOffset)) - { - uint64_t u64CurTSC = ASMReadTSC(); - if (u64CurTSC + pVMCB->ctrl.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu)) - { - pVMCB->ctrl.u32InterceptCtrl1 &= ~SVM_CTRL1_INTERCEPT_RDTSC; - pVMCB->ctrl.u32InterceptCtrl2 &= ~SVM_CTRL2_INTERCEPT_RDTSCP; - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset); - } - else - { - /* Fall back to rdtsc emulation as we would otherwise pass decreasing tsc values to the guest. */ - LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC, - pVMCB->ctrl.u64TSCOffset, u64CurTSC + pVMCB->ctrl.u64TSCOffset, TMCpuTickGetLastSeen(pVCpu), - TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVMCB->ctrl.u64TSCOffset, TMCpuTickGet(pVCpu))); - pVMCB->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_RDTSC; - pVMCB->ctrl.u32InterceptCtrl2 |= SVM_CTRL2_INTERCEPT_RDTSCP; - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCInterceptOverFlow); - } - } - else - { - pVMCB->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_RDTSC; - pVMCB->ctrl.u32InterceptCtrl2 |= SVM_CTRL2_INTERCEPT_RDTSCP; - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept); - } - - /* Sync the various MSRs for 64-bit mode. */ - pVMCB->guest.u64STAR = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */ - pVMCB->guest.u64LSTAR = pCtx->msrLSTAR; /* 64-bit mode syscall rip */ - pVMCB->guest.u64CSTAR = pCtx->msrCSTAR; /* compatibility mode syscall rip */ - pVMCB->guest.u64SFMASK = pCtx->msrSFMASK; /* syscall flag mask */ - pVMCB->guest.u64KernelGSBase = pCtx->msrKERNELGSBASE; /* SWAPGS exchange value */ - -#ifdef DEBUG - /* Intercept X86_XCPT_DB if stepping is enabled */ - if ( DBGFIsStepping(pVCpu) - || CPUMIsHyperDebugStateActive(pVCpu)) - pVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_DB); - else - pVMCB->ctrl.u32InterceptException &= ~RT_BIT(X86_XCPT_DB); -#endif - - /* Done. */ - pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST; - - return VINF_SUCCESS; -} - - -/** - * Setup TLB for ASID. - * - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - */ -static void hmR0SvmSetupTLB(PVM pVM, PVMCPU pVCpu) -{ - PHMGLOBLCPUINFO pCpu; - - AssertPtr(pVM); - AssertPtr(pVCpu); - - SVM_VMCB *pVMCB = (SVM_VMCB *)pVCpu->hwaccm.s.svm.pVMCB; - pCpu = HWACCMR0GetCurrentCpu(); - - /* - * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last. - * This can happen both for start & resume due to long jumps back to ring-3. - * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB, - * so we cannot reuse the ASIDs without flushing. - */ - bool fNewASID = false; - if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu - || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes) - { - pVCpu->hwaccm.s.fForceTLBFlush = true; - fNewASID = true; - } - - /* - * Set TLB flush state as checked until we return from the world switch. - */ - ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, true); - - /* - * Check for TLB shootdown flushes. - */ - if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) - pVCpu->hwaccm.s.fForceTLBFlush = true; - - pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu; - pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_NOTHING; - - if (RT_UNLIKELY(pVM->hwaccm.s.svm.fAlwaysFlushTLB)) - { - /* - * This is the AMD erratum 170. We need to flush the entire TLB for each world switch. Sad. - */ - pCpu->uCurrentASID = 1; - pVCpu->hwaccm.s.uCurrentASID = 1; - pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes; - pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE; - } - else if (pVCpu->hwaccm.s.fForceTLBFlush) - { - if (fNewASID) - { - ++pCpu->uCurrentASID; - bool fHitASIDLimit = false; - if (pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID) - { - pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */ - pCpu->cTLBFlushes++; - fHitASIDLimit = true; - - if (pVM->hwaccm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID) - { - pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT; - pCpu->fFlushASIDBeforeUse = true; - } - else - { - pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE; - pCpu->fFlushASIDBeforeUse = false; - } - } - - if ( !fHitASIDLimit - && pCpu->fFlushASIDBeforeUse) - { - if (pVM->hwaccm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID) - pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT; - else - { - pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE; - pCpu->fFlushASIDBeforeUse = false; - } - } - - pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID; - pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes; - } - else - { - if (pVM->hwaccm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID) - pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT; - else - pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE; - } - - pVCpu->hwaccm.s.fForceTLBFlush = false; - } - else - { - /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should - * not be executed. See hwaccmQueueInvlPage() where it is commented - * out. Support individual entry flushing someday. */ - if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN)) - { - /* Deal with pending TLB shootdown actions which were queued when we were not executing code. */ - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown); - for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++) - SVMR0InvlpgA(pVCpu->hwaccm.s.TlbShootdown.aPages[i], pVMCB->ctrl.TLBCtrl.n.u32ASID); - } - } - - pVCpu->hwaccm.s.TlbShootdown.cPages = 0; - VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN); - - /* Update VMCB with the ASID. */ - pVMCB->ctrl.TLBCtrl.n.u32ASID = pVCpu->hwaccm.s.uCurrentASID; - - AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, - ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes)); - AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, - ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID)); - AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, - ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID)); - -#ifdef VBOX_WITH_STATISTICS - if (pVMCB->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_NOTHING) - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch); - else if ( pVMCB->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_SINGLE_CONTEXT - || pVMCB->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_SINGLE_CONTEXT_RETAIN_GLOBALS) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID); - } - else - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch); -#endif -} - - -/** - * Runs guest code in an AMD-V VM. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -VMMR0DECL(int) SVMR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) -{ - STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x); - STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit1); - STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit2); - - VBOXSTRICTRC rc = VINF_SUCCESS; - int rc2; - uint64_t exitCode = (uint64_t)SVM_EXIT_INVALID; - SVM_VMCB *pVMCB = NULL; - bool fSyncTPR = false; - unsigned cResume = 0; - uint8_t u8LastTPR = 0; /* Initialized for potentially stupid compilers. */ - uint32_t u32HostExtFeatures = 0; - PHMGLOBLCPUINFO pCpu = 0; - RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0; -#ifdef VBOX_STRICT - RTCPUID idCpuCheck; -#endif -#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0 - uint64_t u64LastTime = RTTimeMilliTS(); -#endif - - pVMCB = (SVM_VMCB *)pVCpu->hwaccm.s.svm.pVMCB; - AssertMsgReturn(pVMCB, ("Invalid pVMCB\n"), VERR_HMSVM_INVALID_PVMCB); - - /* - * We can jump to this point to resume execution after determining that a VM-exit is innocent. - */ -ResumeExecution: - if (!STAM_PROFILE_ADV_IS_RUNNING(&pVCpu->hwaccm.s.StatEntry)) - STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit2, &pVCpu->hwaccm.s.StatEntry, x); - Assert(!HWACCMR0SuspendPending()); - - /* - * Safety precaution; looping for too long here can have a very bad effect on the host. - */ - if (RT_UNLIKELY(++cResume > pVM->hwaccm.s.cMaxResumeLoops)) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMaxResume); - rc = VINF_EM_RAW_INTERRUPT; - goto end; - } - - /* - * Check for IRQ inhibition due to instruction fusing (sti, mov ss). - */ - if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) - { - Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu))); - if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu)) - { - /* - * Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here. - * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might - * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could - * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think. - */ - VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); - /* Irq inhibition is no longer active; clear the corresponding SVM state. */ - pVMCB->ctrl.u64IntShadow = 0; - } - } - else - { - /* Irq inhibition is no longer active; clear the corresponding SVM state. */ - pVMCB->ctrl.u64IntShadow = 0; - } - -#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0 - if (RT_UNLIKELY((cResume & 0xf) == 0)) - { - uint64_t u64CurTime = RTTimeMilliTS(); - - if (RT_UNLIKELY(u64CurTime > u64LastTime)) - { - u64LastTime = u64CurTime; - TMTimerPollVoid(pVM, pVCpu); - } - } -#endif - - /* - * Check for pending actions that force us to go back to ring-3. - */ - if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA) - || VMCPU_FF_ISPENDING(pVCpu, - VMCPU_FF_HWACCM_TO_R3_MASK - | VMCPU_FF_PGM_SYNC_CR3 - | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL - | VMCPU_FF_REQUEST)) - { - /* Check if a sync operation is pending. */ - if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)) - { - rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)); - AssertRC(VBOXSTRICTRC_VAL(rc)); - if (rc != VINF_SUCCESS) - { - Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc))); - goto end; - } - } - -#ifdef DEBUG - /* Intercept X86_XCPT_DB if stepping is enabled */ - if (!DBGFIsStepping(pVCpu)) -#endif - { - if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK) - || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK)) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3); - rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3; - goto end; - } - } - - /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */ - if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST) - || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST)) - { - rc = VINF_EM_PENDING_REQUEST; - goto end; - } - - /* Check if a pgm pool flush is in progress. */ - if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING)) - { - rc = VINF_PGM_POOL_FLUSH_PENDING; - goto end; - } - - /* Check if DMA work is pending (2nd+ run). */ - if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1) - { - rc = VINF_EM_RAW_TO_R3; - goto end; - } - } - -#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION - /* - * Exit to ring-3 preemption/work is pending. - * - * Interrupts are disabled before the call to make sure we don't miss any interrupt - * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this - * further down, but hmR0SvmCheckPendingInterrupt makes that impossible.) - * - * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB - * shootdowns rely on this. - */ - uOldEFlags = ASMIntDisableFlags(); - if (RTThreadPreemptIsPending(NIL_RTTHREAD)) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPreemptPending); - rc = VINF_EM_RAW_INTERRUPT; - goto end; - } - VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); -#endif - - /* - * When external interrupts are pending, we should exit the VM when IF is set. - * Note: *After* VM_FF_INHIBIT_INTERRUPTS check!! - */ - rc = hmR0SvmCheckPendingInterrupt(pVM, pVCpu, pVMCB, pCtx); - if (RT_FAILURE(rc)) - goto end; - - /* - * TPR caching using CR8 is only available in 64-bit mode or with 32-bit guests when X86_CPUID_AMD_FEATURE_ECX_CR8L is - * supported. - * Note: we can't do this in LoddGuestState as PDMApicGetTPR can jump back to ring 3 (lock)! (no longer true) - */ - /** @todo query and update the TPR only when it could have been changed (mmio access) - */ - if (pVM->hwaccm.s.fHasIoApic) - { - /* TPR caching in CR8 */ - bool fPending; - rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending); - AssertRC(rc2); - - if (pVM->hwaccm.s.fTPRPatchingActive) - { - /* Our patch code uses LSTAR for TPR caching. */ - pCtx->msrLSTAR = u8LastTPR; - - if (fPending) - { - /* A TPR change could activate a pending interrupt, so catch lstar writes. */ - hmR0SvmSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false); - } - else - { - /* - * No interrupts are pending, so we don't need to be explicitely notified. - * There are enough world switches for detecting pending interrupts. - */ - hmR0SvmSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true); - } - } - else - { - /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */ - pVMCB->ctrl.IntCtrl.n.u8VTPR = (u8LastTPR >> 4); - - if (fPending) - { - /* A TPR change could activate a pending interrupt, so catch cr8 writes. */ - pVMCB->ctrl.u16InterceptWrCRx |= RT_BIT(8); - } - else - { - /* - * No interrupts are pending, so we don't need to be explicitly notified. - * There are enough world switches for detecting pending interrupts. - */ - pVMCB->ctrl.u16InterceptWrCRx &= ~RT_BIT(8); - } - } - fSyncTPR = !fPending; - } - - /* All done! Let's start VM execution. */ - - /* Enable nested paging if necessary (disabled each time after #VMEXIT). */ - pVMCB->ctrl.NestedPaging.n.u1NestedPaging = pVM->hwaccm.s.fNestedPaging; - -#ifdef LOG_ENABLED - pCpu = HWACCMR0GetCurrentCpu(); - if (pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu) - LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu, pCpu->idCpu)); - else if (pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes) - LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes)); - else if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH)) - LogFlow(("Manual TLB flush\n")); -#endif - - /* - * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING 3! - * (until the actual world switch) - */ -#ifdef VBOX_STRICT - idCpuCheck = RTMpCpuId(); -#endif - VMMR0LogFlushDisable(pVCpu); - - /* - * Load the guest state; *must* be here as it sets up the shadow CR0 for lazy FPU syncing! - */ - rc = SVMR0LoadGuestState(pVM, pVCpu, pCtx); - if (RT_UNLIKELY(rc != VINF_SUCCESS)) - { - VMMR0LogFlushEnable(pVCpu); - goto end; - } - -#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION - /* - * Disable interrupts to make sure a poke will interrupt execution. - * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this. - */ - uOldEFlags = ASMIntDisableFlags(); - VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); -#endif - STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatEntry, &pVCpu->hwaccm.s.StatInGC, x); - - /* Setup TLB control and ASID in the VMCB. */ - hmR0SvmSetupTLB(pVM, pVCpu); - - /* In case we execute a goto ResumeExecution later on. */ - pVCpu->hwaccm.s.fResumeVM = true; - pVCpu->hwaccm.s.fForceTLBFlush = pVM->hwaccm.s.svm.fAlwaysFlushTLB; - - Assert(sizeof(pVCpu->hwaccm.s.svm.pVMCBPhys) == 8); - Assert(pVMCB->ctrl.IntCtrl.n.u1VIrqMasking); - Assert(pVMCB->ctrl.u64IOPMPhysAddr == pVM->hwaccm.s.svm.pIOBitmapPhys); - Assert(pVMCB->ctrl.u64MSRPMPhysAddr == pVCpu->hwaccm.s.svm.pMSRBitmapPhys); - Assert(pVMCB->ctrl.u64LBRVirt == 0); - -#ifdef VBOX_STRICT - Assert(idCpuCheck == RTMpCpuId()); -#endif - TMNotifyStartOfExecution(pVCpu); - - /* - * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that - * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}. - */ - u32HostExtFeatures = pVM->hwaccm.s.cpuid.u32AMDFeatureEDX; - if ( (u32HostExtFeatures & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) - && !(pVMCB->ctrl.u32InterceptCtrl2 & SVM_CTRL2_INTERCEPT_RDTSCP)) - { - pVCpu->hwaccm.s.u64HostTSCAux = ASMRdMsr(MSR_K8_TSC_AUX); - uint64_t u64GuestTSCAux = 0; - rc2 = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &u64GuestTSCAux); - AssertRC(rc2); - ASMWrMsr(MSR_K8_TSC_AUX, u64GuestTSCAux); - } - -#ifdef VBOX_WITH_KERNEL_USING_XMM - hwaccmR0SVMRunWrapXMM(pVCpu->hwaccm.s.svm.pVMCBHostPhys, pVCpu->hwaccm.s.svm.pVMCBPhys, pCtx, pVM, pVCpu, - pVCpu->hwaccm.s.svm.pfnVMRun); -#else - pVCpu->hwaccm.s.svm.pfnVMRun(pVCpu->hwaccm.s.svm.pVMCBHostPhys, pVCpu->hwaccm.s.svm.pVMCBPhys, pCtx, pVM, pVCpu); -#endif - ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, false); - ASMAtomicIncU32(&pVCpu->hwaccm.s.cWorldSwitchExits); - /* Possibly the last TSC value seen by the guest (too high) (only when we're in TSC offset mode). */ - if (!(pVMCB->ctrl.u32InterceptCtrl1 & SVM_CTRL1_INTERCEPT_RDTSC)) - { - /* Restore host's TSC_AUX. */ - if (u32HostExtFeatures & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) - ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hwaccm.s.u64HostTSCAux); - - TMCpuTickSetLastSeen(pVCpu, ASMReadTSC() + - pVMCB->ctrl.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */); - } - TMNotifyEndOfExecution(pVCpu); - VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED); - STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatInGC, &pVCpu->hwaccm.s.StatExit1, x); - ASMSetFlags(uOldEFlags); -#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION - uOldEFlags = ~(RTCCUINTREG)0; -#endif - - /* - * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING-3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE - * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - */ - - /* Reason for the VM exit */ - exitCode = pVMCB->ctrl.u64ExitCode; - - if (RT_UNLIKELY(exitCode == (uint64_t)SVM_EXIT_INVALID)) /* Invalid guest state. */ - { - HWACCMDumpRegs(pVM, pVCpu, pCtx); -#ifdef DEBUG - Log(("ctrl.u16InterceptRdCRx %x\n", pVMCB->ctrl.u16InterceptRdCRx)); - Log(("ctrl.u16InterceptWrCRx %x\n", pVMCB->ctrl.u16InterceptWrCRx)); - Log(("ctrl.u16InterceptRdDRx %x\n", pVMCB->ctrl.u16InterceptRdDRx)); - Log(("ctrl.u16InterceptWrDRx %x\n", pVMCB->ctrl.u16InterceptWrDRx)); - Log(("ctrl.u32InterceptException %x\n", pVMCB->ctrl.u32InterceptException)); - Log(("ctrl.u32InterceptCtrl1 %x\n", pVMCB->ctrl.u32InterceptCtrl1)); - Log(("ctrl.u32InterceptCtrl2 %x\n", pVMCB->ctrl.u32InterceptCtrl2)); - Log(("ctrl.u64IOPMPhysAddr %RX64\n", pVMCB->ctrl.u64IOPMPhysAddr)); - Log(("ctrl.u64MSRPMPhysAddr %RX64\n", pVMCB->ctrl.u64MSRPMPhysAddr)); - Log(("ctrl.u64TSCOffset %RX64\n", pVMCB->ctrl.u64TSCOffset)); - - Log(("ctrl.TLBCtrl.u32ASID %x\n", pVMCB->ctrl.TLBCtrl.n.u32ASID)); - Log(("ctrl.TLBCtrl.u8TLBFlush %x\n", pVMCB->ctrl.TLBCtrl.n.u8TLBFlush)); - Log(("ctrl.TLBCtrl.u24Reserved %x\n", pVMCB->ctrl.TLBCtrl.n.u24Reserved)); - - Log(("ctrl.IntCtrl.u8VTPR %x\n", pVMCB->ctrl.IntCtrl.n.u8VTPR)); - Log(("ctrl.IntCtrl.u1VIrqValid %x\n", pVMCB->ctrl.IntCtrl.n.u1VIrqValid)); - Log(("ctrl.IntCtrl.u7Reserved %x\n", pVMCB->ctrl.IntCtrl.n.u7Reserved)); - Log(("ctrl.IntCtrl.u4VIrqPriority %x\n", pVMCB->ctrl.IntCtrl.n.u4VIrqPriority)); - Log(("ctrl.IntCtrl.u1IgnoreTPR %x\n", pVMCB->ctrl.IntCtrl.n.u1IgnoreTPR)); - Log(("ctrl.IntCtrl.u3Reserved %x\n", pVMCB->ctrl.IntCtrl.n.u3Reserved)); - Log(("ctrl.IntCtrl.u1VIrqMasking %x\n", pVMCB->ctrl.IntCtrl.n.u1VIrqMasking)); - Log(("ctrl.IntCtrl.u7Reserved2 %x\n", pVMCB->ctrl.IntCtrl.n.u7Reserved2)); - Log(("ctrl.IntCtrl.u8VIrqVector %x\n", pVMCB->ctrl.IntCtrl.n.u8VIrqVector)); - Log(("ctrl.IntCtrl.u24Reserved %x\n", pVMCB->ctrl.IntCtrl.n.u24Reserved)); - - Log(("ctrl.u64IntShadow %RX64\n", pVMCB->ctrl.u64IntShadow)); - Log(("ctrl.u64ExitCode %RX64\n", pVMCB->ctrl.u64ExitCode)); - Log(("ctrl.u64ExitInfo1 %RX64\n", pVMCB->ctrl.u64ExitInfo1)); - Log(("ctrl.u64ExitInfo2 %RX64\n", pVMCB->ctrl.u64ExitInfo2)); - Log(("ctrl.ExitIntInfo.u8Vector %x\n", pVMCB->ctrl.ExitIntInfo.n.u8Vector)); - Log(("ctrl.ExitIntInfo.u3Type %x\n", pVMCB->ctrl.ExitIntInfo.n.u3Type)); - Log(("ctrl.ExitIntInfo.u1ErrorCodeValid %x\n", pVMCB->ctrl.ExitIntInfo.n.u1ErrorCodeValid)); - Log(("ctrl.ExitIntInfo.u19Reserved %x\n", pVMCB->ctrl.ExitIntInfo.n.u19Reserved)); - Log(("ctrl.ExitIntInfo.u1Valid %x\n", pVMCB->ctrl.ExitIntInfo.n.u1Valid)); - Log(("ctrl.ExitIntInfo.u32ErrorCode %x\n", pVMCB->ctrl.ExitIntInfo.n.u32ErrorCode)); - Log(("ctrl.NestedPaging %RX64\n", pVMCB->ctrl.NestedPaging.au64)); - Log(("ctrl.EventInject.u8Vector %x\n", pVMCB->ctrl.EventInject.n.u8Vector)); - Log(("ctrl.EventInject.u3Type %x\n", pVMCB->ctrl.EventInject.n.u3Type)); - Log(("ctrl.EventInject.u1ErrorCodeValid %x\n", pVMCB->ctrl.EventInject.n.u1ErrorCodeValid)); - Log(("ctrl.EventInject.u19Reserved %x\n", pVMCB->ctrl.EventInject.n.u19Reserved)); - Log(("ctrl.EventInject.u1Valid %x\n", pVMCB->ctrl.EventInject.n.u1Valid)); - Log(("ctrl.EventInject.u32ErrorCode %x\n", pVMCB->ctrl.EventInject.n.u32ErrorCode)); - - Log(("ctrl.u64NestedPagingCR3 %RX64\n", pVMCB->ctrl.u64NestedPagingCR3)); - Log(("ctrl.u64LBRVirt %RX64\n", pVMCB->ctrl.u64LBRVirt)); - - Log(("guest.CS.u16Sel %04X\n", pVMCB->guest.CS.u16Sel)); - Log(("guest.CS.u16Attr %04X\n", pVMCB->guest.CS.u16Attr)); - Log(("guest.CS.u32Limit %X\n", pVMCB->guest.CS.u32Limit)); - Log(("guest.CS.u64Base %RX64\n", pVMCB->guest.CS.u64Base)); - Log(("guest.DS.u16Sel %04X\n", pVMCB->guest.DS.u16Sel)); - Log(("guest.DS.u16Attr %04X\n", pVMCB->guest.DS.u16Attr)); - Log(("guest.DS.u32Limit %X\n", pVMCB->guest.DS.u32Limit)); - Log(("guest.DS.u64Base %RX64\n", pVMCB->guest.DS.u64Base)); - Log(("guest.ES.u16Sel %04X\n", pVMCB->guest.ES.u16Sel)); - Log(("guest.ES.u16Attr %04X\n", pVMCB->guest.ES.u16Attr)); - Log(("guest.ES.u32Limit %X\n", pVMCB->guest.ES.u32Limit)); - Log(("guest.ES.u64Base %RX64\n", pVMCB->guest.ES.u64Base)); - Log(("guest.FS.u16Sel %04X\n", pVMCB->guest.FS.u16Sel)); - Log(("guest.FS.u16Attr %04X\n", pVMCB->guest.FS.u16Attr)); - Log(("guest.FS.u32Limit %X\n", pVMCB->guest.FS.u32Limit)); - Log(("guest.FS.u64Base %RX64\n", pVMCB->guest.FS.u64Base)); - Log(("guest.GS.u16Sel %04X\n", pVMCB->guest.GS.u16Sel)); - Log(("guest.GS.u16Attr %04X\n", pVMCB->guest.GS.u16Attr)); - Log(("guest.GS.u32Limit %X\n", pVMCB->guest.GS.u32Limit)); - Log(("guest.GS.u64Base %RX64\n", pVMCB->guest.GS.u64Base)); - - Log(("guest.GDTR.u32Limit %X\n", pVMCB->guest.GDTR.u32Limit)); - Log(("guest.GDTR.u64Base %RX64\n", pVMCB->guest.GDTR.u64Base)); - - Log(("guest.LDTR.u16Sel %04X\n", pVMCB->guest.LDTR.u16Sel)); - Log(("guest.LDTR.u16Attr %04X\n", pVMCB->guest.LDTR.u16Attr)); - Log(("guest.LDTR.u32Limit %X\n", pVMCB->guest.LDTR.u32Limit)); - Log(("guest.LDTR.u64Base %RX64\n", pVMCB->guest.LDTR.u64Base)); - - Log(("guest.IDTR.u32Limit %X\n", pVMCB->guest.IDTR.u32Limit)); - Log(("guest.IDTR.u64Base %RX64\n", pVMCB->guest.IDTR.u64Base)); - - Log(("guest.TR.u16Sel %04X\n", pVMCB->guest.TR.u16Sel)); - Log(("guest.TR.u16Attr %04X\n", pVMCB->guest.TR.u16Attr)); - Log(("guest.TR.u32Limit %X\n", pVMCB->guest.TR.u32Limit)); - Log(("guest.TR.u64Base %RX64\n", pVMCB->guest.TR.u64Base)); - - Log(("guest.u8CPL %X\n", pVMCB->guest.u8CPL)); - Log(("guest.u64CR0 %RX64\n", pVMCB->guest.u64CR0)); - Log(("guest.u64CR2 %RX64\n", pVMCB->guest.u64CR2)); - Log(("guest.u64CR3 %RX64\n", pVMCB->guest.u64CR3)); - Log(("guest.u64CR4 %RX64\n", pVMCB->guest.u64CR4)); - Log(("guest.u64DR6 %RX64\n", pVMCB->guest.u64DR6)); - Log(("guest.u64DR7 %RX64\n", pVMCB->guest.u64DR7)); - - Log(("guest.u64RIP %RX64\n", pVMCB->guest.u64RIP)); - Log(("guest.u64RSP %RX64\n", pVMCB->guest.u64RSP)); - Log(("guest.u64RAX %RX64\n", pVMCB->guest.u64RAX)); - Log(("guest.u64RFlags %RX64\n", pVMCB->guest.u64RFlags)); - - Log(("guest.u64SysEnterCS %RX64\n", pVMCB->guest.u64SysEnterCS)); - Log(("guest.u64SysEnterEIP %RX64\n", pVMCB->guest.u64SysEnterEIP)); - Log(("guest.u64SysEnterESP %RX64\n", pVMCB->guest.u64SysEnterESP)); - - Log(("guest.u64EFER %RX64\n", pVMCB->guest.u64EFER)); - Log(("guest.u64STAR %RX64\n", pVMCB->guest.u64STAR)); - Log(("guest.u64LSTAR %RX64\n", pVMCB->guest.u64LSTAR)); - Log(("guest.u64CSTAR %RX64\n", pVMCB->guest.u64CSTAR)); - Log(("guest.u64SFMASK %RX64\n", pVMCB->guest.u64SFMASK)); - Log(("guest.u64KernelGSBase %RX64\n", pVMCB->guest.u64KernelGSBase)); - Log(("guest.u64GPAT %RX64\n", pVMCB->guest.u64GPAT)); - Log(("guest.u64DBGCTL %RX64\n", pVMCB->guest.u64DBGCTL)); - Log(("guest.u64BR_FROM %RX64\n", pVMCB->guest.u64BR_FROM)); - Log(("guest.u64BR_TO %RX64\n", pVMCB->guest.u64BR_TO)); - Log(("guest.u64LASTEXCPFROM %RX64\n", pVMCB->guest.u64LASTEXCPFROM)); - Log(("guest.u64LASTEXCPTO %RX64\n", pVMCB->guest.u64LASTEXCPTO)); -#endif - rc = VERR_SVM_UNABLE_TO_START_VM; - VMMR0LogFlushEnable(pVCpu); - goto end; - } - - /* Let's first sync back EIP, ESP, and EFLAGS. */ - pCtx->rip = pVMCB->guest.u64RIP; - pCtx->rsp = pVMCB->guest.u64RSP; - pCtx->eflags.u32 = pVMCB->guest.u64RFlags; - /* eax is saved/restore across the vmrun instruction */ - pCtx->rax = pVMCB->guest.u64RAX; - - /* - * Save all the MSRs that can be changed by the guest without causing a world switch. - * FS & GS base are saved with SVM_READ_SELREG. - */ - pCtx->msrSTAR = pVMCB->guest.u64STAR; /* legacy syscall eip, cs & ss */ - pCtx->msrLSTAR = pVMCB->guest.u64LSTAR; /* 64-bit mode syscall rip */ - pCtx->msrCSTAR = pVMCB->guest.u64CSTAR; /* compatibility mode syscall rip */ - pCtx->msrSFMASK = pVMCB->guest.u64SFMASK; /* syscall flag mask */ - pCtx->msrKERNELGSBASE = pVMCB->guest.u64KernelGSBase; /* swapgs exchange value */ - pCtx->SysEnter.cs = pVMCB->guest.u64SysEnterCS; - pCtx->SysEnter.eip = pVMCB->guest.u64SysEnterEIP; - pCtx->SysEnter.esp = pVMCB->guest.u64SysEnterESP; - - /* Can be updated behind our back in the nested paging case. */ - pCtx->cr2 = pVMCB->guest.u64CR2; - - /* Guest CPU context: ES, CS, SS, DS, FS, GS. */ - SVM_READ_SELREG(SS, ss); - SVM_READ_SELREG(CS, cs); - SVM_READ_SELREG(DS, ds); - SVM_READ_SELREG(ES, es); - SVM_READ_SELREG(FS, fs); - SVM_READ_SELREG(GS, gs); - - /* - * Correct the hidden CS granularity flag. Haven't seen it being wrong in any other - * register (yet). - */ - if ( !pCtx->cs.Attr.n.u1Granularity - && pCtx->cs.Attr.n.u1Present - && pCtx->cs.u32Limit > UINT32_C(0xfffff)) - { - Assert((pCtx->cs.u32Limit & 0xfff) == 0xfff); - pCtx->cs.Attr.n.u1Granularity = 1; - } -#define SVM_ASSERT_SEL_GRANULARITY(reg) \ - AssertMsg( !pCtx->reg.Attr.n.u1Present \ - || ( pCtx->reg.Attr.n.u1Granularity \ - ? (pCtx->reg.u32Limit & 0xfff) == 0xfff \ - : pCtx->reg.u32Limit <= 0xfffff), \ - ("%#x %#x %#llx\n", pCtx->reg.u32Limit, pCtx->reg.Attr.u, pCtx->reg.u64Base)) - SVM_ASSERT_SEL_GRANULARITY(ss); - SVM_ASSERT_SEL_GRANULARITY(cs); - SVM_ASSERT_SEL_GRANULARITY(ds); - SVM_ASSERT_SEL_GRANULARITY(es); - SVM_ASSERT_SEL_GRANULARITY(fs); - SVM_ASSERT_SEL_GRANULARITY(gs); -#undef SVM_ASSERT_SEL_GRANULARITY - - /* - * Correct the hidden SS DPL field. It can be wrong on certain CPUs - * sometimes (seen it on AMD Fusion CPUs with 64-bit guests). The CPU - * always uses the CPL field in the VMCB instead of the DPL in the hidden - * SS (chapter AMD spec. 15.5.1 Basic operation). - */ - Assert(!(pVMCB->guest.u8CPL & ~0x3)); - pCtx->ss.Attr.n.u2Dpl = pVMCB->guest.u8CPL & 0x3; - - /* - * Remaining guest CPU context: TR, IDTR, GDTR, LDTR; - * must sync everything otherwise we can get out of sync when jumping back to ring-3. - */ - SVM_READ_SELREG(LDTR, ldtr); - SVM_READ_SELREG(TR, tr); - - pCtx->gdtr.cbGdt = pVMCB->guest.GDTR.u32Limit; - pCtx->gdtr.pGdt = pVMCB->guest.GDTR.u64Base; - - pCtx->idtr.cbIdt = pVMCB->guest.IDTR.u32Limit; - pCtx->idtr.pIdt = pVMCB->guest.IDTR.u64Base; - - /* - * No reason to sync back the CRx and DRx registers as they cannot be changed by the guest - * unless in the nested paging case where CR3 can be changed by the guest. - */ - if ( pVM->hwaccm.s.fNestedPaging - && pCtx->cr3 != pVMCB->guest.u64CR3) - { - CPUMSetGuestCR3(pVCpu, pVMCB->guest.u64CR3); - PGMUpdateCR3(pVCpu, pVMCB->guest.u64CR3); - } - - /* Note! NOW IT'S SAFE FOR LOGGING! */ - VMMR0LogFlushEnable(pVCpu); - - /* Take care of instruction fusing (sti, mov ss) (see AMD spec. 15.20.5 Interrupt Shadows) */ - if (pVMCB->ctrl.u64IntShadow & SVM_INTERRUPT_SHADOW_ACTIVE) - { - Log(("uInterruptState %x rip=%RGv\n", pVMCB->ctrl.u64IntShadow, (RTGCPTR)pCtx->rip)); - EMSetInhibitInterruptsPC(pVCpu, pCtx->rip); - } - else - VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); - - Log2(("exitCode = %x\n", exitCode)); - - /* Sync back DR6 as it could have been changed by hitting breakpoints. */ - pCtx->dr[6] = pVMCB->guest.u64DR6; - /* DR7.GD can be cleared by debug exceptions, so sync it back as well. */ - pCtx->dr[7] = pVMCB->guest.u64DR7; - - /* Check if an injected event was interrupted prematurely. */ - pVCpu->hwaccm.s.Event.intInfo = pVMCB->ctrl.ExitIntInfo.au64[0]; - if ( pVMCB->ctrl.ExitIntInfo.n.u1Valid - /* we don't care about 'int xx' as the instruction will be restarted. */ - && pVMCB->ctrl.ExitIntInfo.n.u3Type != SVM_EVENT_SOFTWARE_INT) - { - Log(("Pending inject %RX64 at %RGv exit=%08x\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitCode)); - -#ifdef LOG_ENABLED - SVM_EVENT Event; - Event.au64[0] = pVCpu->hwaccm.s.Event.intInfo; - - if ( exitCode == SVM_EXIT_EXCEPTION_E - && Event.n.u8Vector == 0xE) - { - Log(("Double fault!\n")); - } -#endif - - pVCpu->hwaccm.s.Event.fPending = true; - /* Error code present? (redundant) */ - if (pVMCB->ctrl.ExitIntInfo.n.u1ErrorCodeValid) - pVCpu->hwaccm.s.Event.errCode = pVMCB->ctrl.ExitIntInfo.n.u32ErrorCode; - else - pVCpu->hwaccm.s.Event.errCode = 0; - } -#ifdef VBOX_WITH_STATISTICS - if (exitCode == SVM_EXIT_NPF) - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitReasonNPF); - else - STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitCode & MASK_EXITREASON_STAT]); -#endif - - /* Sync back the TPR if it was changed. */ - if (fSyncTPR) - { - if (pVM->hwaccm.s.fTPRPatchingActive) - { - if ((pCtx->msrLSTAR & 0xff) != u8LastTPR) - { - /* Our patch code uses LSTAR for TPR caching. */ - rc2 = PDMApicSetTPR(pVCpu, pCtx->msrLSTAR & 0xff); - AssertRC(rc2); - } - } - else - { - if ((uint8_t)(u8LastTPR >> 4) != pVMCB->ctrl.IntCtrl.n.u8VTPR) - { - /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */ - rc2 = PDMApicSetTPR(pVCpu, pVMCB->ctrl.IntCtrl.n.u8VTPR << 4); - AssertRC(rc2); - } - } - } - -#ifdef DBGFTRACE_ENABLED /** @todo DTrace */ - RTTraceBufAddMsgF(pVM->CTX_SUFF(hTraceBuf), "vmexit %08x at %04:%08RX64 %RX64 %RX64 %RX64", - exitCode, pCtx->cs.Sel, pCtx->rip, - pVMCB->ctrl.u64ExitInfo1, pVMCB->ctrl.u64ExitInfo2, pVMCB->ctrl.ExitIntInfo.au64[0]); -#endif -#if ARCH_BITS == 64 /* for the time being */ - VBOXVMM_R0_HMSVM_VMEXIT(pVCpu, pCtx, exitCode, pVMCB->ctrl.u64ExitInfo1, pVMCB->ctrl.u64ExitInfo2, - pVMCB->ctrl.ExitIntInfo.au64[0], UINT64_MAX); -#endif - STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit1, &pVCpu->hwaccm.s.StatExit2, x); - - /* Deal with the reason of the VM-exit. */ - switch (exitCode) - { - case SVM_EXIT_EXCEPTION_0: case SVM_EXIT_EXCEPTION_1: case SVM_EXIT_EXCEPTION_2: case SVM_EXIT_EXCEPTION_3: - case SVM_EXIT_EXCEPTION_4: case SVM_EXIT_EXCEPTION_5: case SVM_EXIT_EXCEPTION_6: case SVM_EXIT_EXCEPTION_7: - case SVM_EXIT_EXCEPTION_8: case SVM_EXIT_EXCEPTION_9: case SVM_EXIT_EXCEPTION_A: case SVM_EXIT_EXCEPTION_B: - case SVM_EXIT_EXCEPTION_C: case SVM_EXIT_EXCEPTION_D: case SVM_EXIT_EXCEPTION_E: case SVM_EXIT_EXCEPTION_F: - case SVM_EXIT_EXCEPTION_10: case SVM_EXIT_EXCEPTION_11: case SVM_EXIT_EXCEPTION_12: case SVM_EXIT_EXCEPTION_13: - case SVM_EXIT_EXCEPTION_14: case SVM_EXIT_EXCEPTION_15: case SVM_EXIT_EXCEPTION_16: case SVM_EXIT_EXCEPTION_17: - case SVM_EXIT_EXCEPTION_18: case SVM_EXIT_EXCEPTION_19: case SVM_EXIT_EXCEPTION_1A: case SVM_EXIT_EXCEPTION_1B: - case SVM_EXIT_EXCEPTION_1C: case SVM_EXIT_EXCEPTION_1D: case SVM_EXIT_EXCEPTION_1E: case SVM_EXIT_EXCEPTION_1F: - { - /* Pending trap. */ - SVM_EVENT Event; - uint32_t vector = exitCode - SVM_EXIT_EXCEPTION_0; - - Log2(("Hardware/software interrupt %d\n", vector)); - switch (vector) - { - case X86_XCPT_DB: - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDB); - - /* Note that we don't support guest and host-initiated debugging at the same time. */ - Assert(DBGFIsStepping(pVCpu) || CPUMIsHyperDebugStateActive(pVCpu)); - - rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), pCtx->dr[6]); - if (rc == VINF_EM_RAW_GUEST_TRAP) - { - Log(("Trap %x (debug) at %016RX64\n", vector, pCtx->rip)); - - /* Reinject the exception. */ - Event.au64[0] = 0; - Event.n.u3Type = SVM_EVENT_EXCEPTION; /* trap or fault */ - Event.n.u1Valid = 1; - Event.n.u8Vector = X86_XCPT_DB; - - hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event); - goto ResumeExecution; - } - /* Return to ring 3 to deal with the debug exit code. */ - Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc))); - break; - } - - case X86_XCPT_NM: - { - Log(("#NM fault at %RGv\n", (RTGCPTR)pCtx->rip)); - - /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */ - /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */ - rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx); - if (rc == VINF_SUCCESS) - { - Assert(CPUMIsGuestFPUStateActive(pVCpu)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM); - - /* Continue execution. */ - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0; - - goto ResumeExecution; - } - - Log(("Forward #NM fault to the guest\n")); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM); - - Event.au64[0] = 0; - Event.n.u3Type = SVM_EVENT_EXCEPTION; - Event.n.u1Valid = 1; - Event.n.u8Vector = X86_XCPT_NM; - - hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event); - goto ResumeExecution; - } - - case X86_XCPT_PF: /* Page fault */ - { - uint32_t errCode = pVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */ - RTGCUINTPTR uFaultAddress = pVMCB->ctrl.u64ExitInfo2; /* EXITINFO2 = fault address */ - -#ifdef VBOX_ALWAYS_TRAP_PF - if (pVM->hwaccm.s.fNestedPaging) - { - /* - * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution. - */ - Log(("Guest page fault at %04X:%RGv cr2=%RGv error code %x rsp=%RGv\n", pCtx->cs, (RTGCPTR)pCtx->rip, - uFaultAddress, errCode, (RTGCPTR)pCtx->rsp)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF); - - /* Now we must update CR2. */ - pCtx->cr2 = uFaultAddress; - - Event.au64[0] = 0; - Event.n.u3Type = SVM_EVENT_EXCEPTION; - Event.n.u1Valid = 1; - Event.n.u8Vector = X86_XCPT_PF; - Event.n.u1ErrorCodeValid = 1; - Event.n.u32ErrorCode = errCode; - - hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event); - goto ResumeExecution; - } -#endif - Assert(!pVM->hwaccm.s.fNestedPaging); - -#ifdef VBOX_HWACCM_WITH_GUEST_PATCHING - /* Shortcut for APIC TPR reads and writes; 32 bits guests only */ - if ( pVM->hwaccm.s.fTRPPatchingAllowed - && (uFaultAddress & 0xfff) == 0x080 - && !(errCode & X86_TRAP_PF_P) /* not present */ - && CPUMGetGuestCPL(pVCpu) == 0 - && !CPUMIsGuestInLongModeEx(pCtx) - && pVM->hwaccm.s.cPatches < RT_ELEMENTS(pVM->hwaccm.s.aPatches)) - { - RTGCPHYS GCPhysApicBase, GCPhys; - PDMApicGetBase(pVM, &GCPhysApicBase); /** @todo cache this */ - GCPhysApicBase &= PAGE_BASE_GC_MASK; - - rc = PGMGstGetPage(pVCpu, (RTGCPTR)uFaultAddress, NULL, &GCPhys); - if ( rc == VINF_SUCCESS - && GCPhys == GCPhysApicBase) - { - /* Only attempt to patch the instruction once. */ - PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip); - if (!pPatch) - { - rc = VINF_EM_HWACCM_PATCH_TPR_INSTR; - break; - } - } - } -#endif - - Log2(("Page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, uFaultAddress, errCode)); - /* Exit qualification contains the linear address of the page fault. */ - TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP); - TRPMSetErrorCode(pVCpu, errCode); - TRPMSetFaultAddress(pVCpu, uFaultAddress); - - /* Forward it to our trap handler first, in case our shadow pages are out of sync. */ - rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)uFaultAddress); - Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc))); - if (rc == VINF_SUCCESS) - { - /* We've successfully synced our shadow pages, so let's just continue execution. */ - Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, uFaultAddress, errCode)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF); - - TRPMResetTrap(pVCpu); - goto ResumeExecution; - } - else if (rc == VINF_EM_RAW_GUEST_TRAP) - { - /* - * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution. - */ - Log2(("Forward page fault to the guest\n")); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF); - /* The error code might have been changed. */ - errCode = TRPMGetErrorCode(pVCpu); - - TRPMResetTrap(pVCpu); - - /* Now we must update CR2. */ - pCtx->cr2 = uFaultAddress; - - Event.au64[0] = 0; - Event.n.u3Type = SVM_EVENT_EXCEPTION; - Event.n.u1Valid = 1; - Event.n.u8Vector = X86_XCPT_PF; - Event.n.u1ErrorCodeValid = 1; - Event.n.u32ErrorCode = errCode; - - hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event); - goto ResumeExecution; - } -#ifdef VBOX_STRICT - if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK) - LogFlow(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc))); -#endif - /* Need to go back to the recompiler to emulate the instruction. */ - TRPMResetTrap(pVCpu); - break; - } - - case X86_XCPT_MF: /* Floating point exception. */ - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestMF); - if (!(pCtx->cr0 & X86_CR0_NE)) - { - /* old style FPU error reporting needs some extra work. */ - /** @todo don't fall back to the recompiler, but do it manually. */ - rc = VINF_EM_RAW_EMULATE_INSTR; - break; - } - Log(("Trap %x at %RGv\n", vector, (RTGCPTR)pCtx->rip)); - - Event.au64[0] = 0; - Event.n.u3Type = SVM_EVENT_EXCEPTION; - Event.n.u1Valid = 1; - Event.n.u8Vector = X86_XCPT_MF; - - hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event); - goto ResumeExecution; - } - -#ifdef VBOX_STRICT - case X86_XCPT_BP: /* Breakpoint. */ - case X86_XCPT_GP: /* General protection failure exception.*/ - case X86_XCPT_UD: /* Unknown opcode exception. */ - case X86_XCPT_DE: /* Divide error. */ - case X86_XCPT_SS: /* Stack segment exception. */ - case X86_XCPT_NP: /* Segment not present exception. */ - { - Event.au64[0] = 0; - Event.n.u3Type = SVM_EVENT_EXCEPTION; - Event.n.u1Valid = 1; - Event.n.u8Vector = vector; - - switch (vector) - { - case X86_XCPT_GP: - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestGP); - Event.n.u1ErrorCodeValid = 1; - Event.n.u32ErrorCode = pVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */ - break; - case X86_XCPT_BP: - /** Saves the wrong EIP on the stack (pointing to the int3 instead of the next instruction. */ - break; - case X86_XCPT_DE: - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE); - break; - case X86_XCPT_UD: - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestUD); - break; - case X86_XCPT_SS: - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestSS); - Event.n.u1ErrorCodeValid = 1; - Event.n.u32ErrorCode = pVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */ - break; - case X86_XCPT_NP: - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNP); - Event.n.u1ErrorCodeValid = 1; - Event.n.u32ErrorCode = pVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */ - break; - } - Log(("Trap %x at %04x:%RGv esi=%x\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip, pCtx->esi)); - hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event); - goto ResumeExecution; - } -#endif - default: - AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector)); - rc = VERR_HMSVM_UNEXPECTED_XCPT_EXIT; - break; - - } /* switch (vector) */ - break; - } - - case SVM_EXIT_NPF: - { - /* EXITINFO1 contains fault errorcode; EXITINFO2 contains the guest physical address causing the fault. */ - uint32_t errCode = pVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */ - RTGCPHYS GCPhysFault = pVMCB->ctrl.u64ExitInfo2; /* EXITINFO2 = fault address */ - PGMMODE enmShwPagingMode; - - Assert(pVM->hwaccm.s.fNestedPaging); - LogFlow(("Nested page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, GCPhysFault, errCode)); - -#ifdef VBOX_HWACCM_WITH_GUEST_PATCHING - /* Shortcut for APIC TPR reads and writes; 32 bits guests only */ - if ( pVM->hwaccm.s.fTRPPatchingAllowed - && (GCPhysFault & PAGE_OFFSET_MASK) == 0x080 - && ( !(errCode & X86_TRAP_PF_P) /* not present */ - || (errCode & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) == (X86_TRAP_PF_P | X86_TRAP_PF_RSVD) /* mmio optimization */) - && CPUMGetGuestCPL(pVCpu) == 0 - && !CPUMIsGuestInLongModeEx(pCtx) - && pVM->hwaccm.s.cPatches < RT_ELEMENTS(pVM->hwaccm.s.aPatches)) - { - RTGCPHYS GCPhysApicBase; - PDMApicGetBase(pVM, &GCPhysApicBase); /** @todo cache this */ - GCPhysApicBase &= PAGE_BASE_GC_MASK; - - if (GCPhysFault == GCPhysApicBase + 0x80) - { - /* Only attempt to patch the instruction once. */ - PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip); - if (!pPatch) - { - rc = VINF_EM_HWACCM_PATCH_TPR_INSTR; - break; - } - } - } -#endif - - /* Handle the pagefault trap for the nested shadow table. */ -#if HC_ARCH_BITS == 32 /** @todo shadow this in a variable. */ - if (CPUMIsGuestInLongModeEx(pCtx)) - enmShwPagingMode = PGMMODE_AMD64_NX; - else -#endif - enmShwPagingMode = PGMGetHostMode(pVM); - - /* MMIO optimization */ - Assert((errCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != X86_TRAP_PF_RSVD); - if ((errCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) == (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) - { - rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, enmShwPagingMode, CPUMCTX2CORE(pCtx), GCPhysFault, errCode); - - /* - * If we succeed, resume execution. - * Or, if fail in interpreting the instruction because we couldn't get the guest physical address - * of the page containing the instruction via the guest's page tables (we would invalidate the guest page - * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this - * weird case. See @bugref{6043}. - */ - if ( rc == VINF_SUCCESS - || rc == VERR_PAGE_TABLE_NOT_PRESENT - || rc == VERR_PAGE_NOT_PRESENT) - { - Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhysFault, (RTGCPTR)pCtx->rip)); - goto ResumeExecution; - } - Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhysFault, (RTGCPTR)pCtx->rip)); - break; - } - - /* Exit qualification contains the linear address of the page fault. */ - TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP); - TRPMSetErrorCode(pVCpu, errCode); - TRPMSetFaultAddress(pVCpu, GCPhysFault); - - rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, enmShwPagingMode, errCode, CPUMCTX2CORE(pCtx), GCPhysFault); - Log2(("PGMR0Trap0eHandlerNestedPaging %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc))); - - /* - * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}. - */ - if ( rc == VINF_SUCCESS - || rc == VERR_PAGE_TABLE_NOT_PRESENT - || rc == VERR_PAGE_NOT_PRESENT) - { - /* We've successfully synced our shadow pages, so let's just continue execution. */ - Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, GCPhysFault, errCode)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF); - - TRPMResetTrap(pVCpu); - goto ResumeExecution; - } - -#ifdef VBOX_STRICT - if (rc != VINF_EM_RAW_EMULATE_INSTR) - LogFlow(("PGMTrap0eHandlerNestedPaging failed with %d\n", VBOXSTRICTRC_VAL(rc))); -#endif - /* Need to go back to the recompiler to emulate the instruction. */ - TRPMResetTrap(pVCpu); - break; - } - - case SVM_EXIT_VINTR: - /* A virtual interrupt is about to be delivered, which means IF=1. */ - Log(("SVM_EXIT_VINTR IF=%d\n", pCtx->eflags.Bits.u1IF)); - pVMCB->ctrl.IntCtrl.n.u1VIrqValid = 0; - pVMCB->ctrl.IntCtrl.n.u8VIrqVector = 0; - goto ResumeExecution; - - case SVM_EXIT_FERR_FREEZE: - case SVM_EXIT_INTR: - case SVM_EXIT_NMI: - case SVM_EXIT_SMI: - case SVM_EXIT_INIT: - /* External interrupt; leave to allow it to be dispatched again. */ - rc = VINF_EM_RAW_INTERRUPT; - break; - - case SVM_EXIT_WBINVD: - case SVM_EXIT_INVD: /* Guest software attempted to execute INVD. */ - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvd); - /* Skip instruction and continue directly. */ - pCtx->rip += 2; /* Note! hardcoded opcode size! */ - /* Continue execution.*/ - goto ResumeExecution; - - case SVM_EXIT_CPUID: /* Guest software attempted to execute CPUID. */ - { - Log2(("SVM: Cpuid at %RGv for %x\n", (RTGCPTR)pCtx->rip, pCtx->eax)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCpuid); - rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx)); - if (rc == VINF_SUCCESS) - { - /* Update EIP and continue execution. */ - pCtx->rip += 2; /* Note! hardcoded opcode size! */ - goto ResumeExecution; - } - AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc))); - rc = VINF_EM_RAW_EMULATE_INSTR; - break; - } - - case SVM_EXIT_RDTSC: /* Guest software attempted to execute RDTSC. */ - { - Log2(("SVM: Rdtsc\n")); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc); - rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx)); - if (rc == VINF_SUCCESS) - { - /* Update EIP and continue execution. */ - pCtx->rip += 2; /* Note! hardcoded opcode size! */ - goto ResumeExecution; - } - rc = VINF_EM_RAW_EMULATE_INSTR; - break; - } - - case SVM_EXIT_RDPMC: /* Guest software attempted to execute RDPMC. */ - { - Log2(("SVM: Rdpmc %x\n", pCtx->ecx)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdpmc); - rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx)); - if (rc == VINF_SUCCESS) - { - /* Update EIP and continue execution. */ - pCtx->rip += 2; /* Note! hardcoded opcode size! */ - goto ResumeExecution; - } - rc = VINF_EM_RAW_EMULATE_INSTR; - break; - } - - case SVM_EXIT_RDTSCP: /* Guest software attempted to execute RDTSCP. */ - { - Log2(("SVM: Rdtscp\n")); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtscp); - rc = EMInterpretRdtscp(pVM, pVCpu, pCtx); - if (rc == VINF_SUCCESS) - { - /* Update EIP and continue execution. */ - pCtx->rip += 3; /* Note! hardcoded opcode size! */ - goto ResumeExecution; - } - AssertMsgFailed(("EMU: rdtscp failed with %Rrc\n", VBOXSTRICTRC_VAL(rc))); - rc = VINF_EM_RAW_EMULATE_INSTR; - break; - } - - case SVM_EXIT_INVLPG: /* Guest software attempted to execute INVLPG. */ - { - Log2(("SVM: invlpg\n")); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvlpg); - - Assert(!pVM->hwaccm.s.fNestedPaging); - - /* Truly a pita. Why can't SVM give the same information as VT-x? */ - rc = hmR0SvmInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx)); - if (rc == VINF_SUCCESS) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushPageInvlpg); - goto ResumeExecution; /* eip already updated */ - } - break; - } - - case SVM_EXIT_WRITE_CR0: case SVM_EXIT_WRITE_CR1: case SVM_EXIT_WRITE_CR2: case SVM_EXIT_WRITE_CR3: - case SVM_EXIT_WRITE_CR4: case SVM_EXIT_WRITE_CR5: case SVM_EXIT_WRITE_CR6: case SVM_EXIT_WRITE_CR7: - case SVM_EXIT_WRITE_CR8: case SVM_EXIT_WRITE_CR9: case SVM_EXIT_WRITE_CR10: case SVM_EXIT_WRITE_CR11: - case SVM_EXIT_WRITE_CR12: case SVM_EXIT_WRITE_CR13: case SVM_EXIT_WRITE_CR14: case SVM_EXIT_WRITE_CR15: - { - Log2(("SVM: %RGv mov cr%d, \n", (RTGCPTR)pCtx->rip, exitCode - SVM_EXIT_WRITE_CR0)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite[exitCode - SVM_EXIT_WRITE_CR0]); - rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0); - - switch (exitCode - SVM_EXIT_WRITE_CR0) - { - case 0: - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0; - break; - case 2: - break; - case 3: - Assert(!pVM->hwaccm.s.fNestedPaging); - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3; - break; - case 4: - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4; - break; - case 8: - break; - default: - AssertFailed(); - } - if (rc == VINF_SUCCESS) - { - /* EIP has been updated already. */ - /* Only resume if successful. */ - goto ResumeExecution; - } - Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3); - break; - } - - case SVM_EXIT_READ_CR0: case SVM_EXIT_READ_CR1: case SVM_EXIT_READ_CR2: case SVM_EXIT_READ_CR3: - case SVM_EXIT_READ_CR4: case SVM_EXIT_READ_CR5: case SVM_EXIT_READ_CR6: case SVM_EXIT_READ_CR7: - case SVM_EXIT_READ_CR8: case SVM_EXIT_READ_CR9: case SVM_EXIT_READ_CR10: case SVM_EXIT_READ_CR11: - case SVM_EXIT_READ_CR12: case SVM_EXIT_READ_CR13: case SVM_EXIT_READ_CR14: case SVM_EXIT_READ_CR15: - { - Log2(("SVM: %RGv mov x, cr%d\n", (RTGCPTR)pCtx->rip, exitCode - SVM_EXIT_READ_CR0)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead[exitCode - SVM_EXIT_READ_CR0]); - rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0); - if (rc == VINF_SUCCESS) - { - /* EIP has been updated already. */ - /* Only resume if successful. */ - goto ResumeExecution; - } - Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3); - break; - } - - case SVM_EXIT_WRITE_DR0: case SVM_EXIT_WRITE_DR1: case SVM_EXIT_WRITE_DR2: case SVM_EXIT_WRITE_DR3: - case SVM_EXIT_WRITE_DR4: case SVM_EXIT_WRITE_DR5: case SVM_EXIT_WRITE_DR6: case SVM_EXIT_WRITE_DR7: - case SVM_EXIT_WRITE_DR8: case SVM_EXIT_WRITE_DR9: case SVM_EXIT_WRITE_DR10: case SVM_EXIT_WRITE_DR11: - case SVM_EXIT_WRITE_DR12: case SVM_EXIT_WRITE_DR13: case SVM_EXIT_WRITE_DR14: case SVM_EXIT_WRITE_DR15: - { - Log2(("SVM: %RGv mov dr%d, x\n", (RTGCPTR)pCtx->rip, exitCode - SVM_EXIT_WRITE_DR0)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite); - - if ( !DBGFIsStepping(pVCpu) - && !CPUMIsHyperDebugStateActive(pVCpu)) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch); - - /* Disable drx move intercepts. */ - pVMCB->ctrl.u16InterceptRdDRx = 0; - pVMCB->ctrl.u16InterceptWrDRx = 0; - - /* Save the host and load the guest debug state. */ - rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */); - AssertRC(rc2); - goto ResumeExecution; - } - - rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0); - if (rc == VINF_SUCCESS) - { - /* EIP has been updated already. */ - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG; - - /* Only resume if successful. */ - goto ResumeExecution; - } - Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3); - break; - } - - case SVM_EXIT_READ_DR0: case SVM_EXIT_READ_DR1: case SVM_EXIT_READ_DR2: case SVM_EXIT_READ_DR3: - case SVM_EXIT_READ_DR4: case SVM_EXIT_READ_DR5: case SVM_EXIT_READ_DR6: case SVM_EXIT_READ_DR7: - case SVM_EXIT_READ_DR8: case SVM_EXIT_READ_DR9: case SVM_EXIT_READ_DR10: case SVM_EXIT_READ_DR11: - case SVM_EXIT_READ_DR12: case SVM_EXIT_READ_DR13: case SVM_EXIT_READ_DR14: case SVM_EXIT_READ_DR15: - { - Log2(("SVM: %RGv mov x, dr%d\n", (RTGCPTR)pCtx->rip, exitCode - SVM_EXIT_READ_DR0)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead); - - if (!DBGFIsStepping(pVCpu)) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch); - - /* Disable DRx move intercepts. */ - pVMCB->ctrl.u16InterceptRdDRx = 0; - pVMCB->ctrl.u16InterceptWrDRx = 0; - - /* Save the host and load the guest debug state. */ - rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */); - AssertRC(rc2); - goto ResumeExecution; - } - - rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0); - if (rc == VINF_SUCCESS) - { - /* EIP has been updated already. */ - /* Only resume if successful. */ - goto ResumeExecution; - } - Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3); - break; - } - - /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */ - case SVM_EXIT_IOIO: /* I/O instruction. */ - { - SVM_IOIO_EXIT IoExitInfo; - - IoExitInfo.au32[0] = pVMCB->ctrl.u64ExitInfo1; - unsigned uIdx = (IoExitInfo.au32[0] >> 4) & 0x7; - uint32_t uIOSize = g_aIOSize[uIdx]; - uint32_t uAndVal = g_aIOOpAnd[uIdx]; - if (RT_UNLIKELY(!uIOSize)) - { - AssertFailed(); /* should be fatal. */ - rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo r=ramshankar: would this really fall back to the recompiler and work? */ - break; - } - - if (IoExitInfo.n.u1STR) - { - /* ins/outs */ - PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState; - - /* Disassemble manually to deal with segment prefixes. */ - rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL); - if (rc == VINF_SUCCESS) - { - if (IoExitInfo.n.u1Type == 0) - { - Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite); - rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), IoExitInfo.n.u16Port, pDis->fPrefix, - (DISCPUMODE)pDis->uAddrMode, uIOSize); - } - else - { - Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead); - rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), IoExitInfo.n.u16Port, pDis->fPrefix, - (DISCPUMODE)pDis->uAddrMode, uIOSize); - } - } - else - rc = VINF_EM_RAW_EMULATE_INSTR; - } - else - { - /* Normal in/out */ - Assert(!IoExitInfo.n.u1REP); - - if (IoExitInfo.n.u1Type == 0) - { - Log2(("IOMIOPortWrite %RGv %x %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, pCtx->eax & uAndVal, - uIOSize)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite); - rc = IOMIOPortWrite(pVM, IoExitInfo.n.u16Port, pCtx->eax & uAndVal, uIOSize); - if (rc == VINF_IOM_R3_IOPORT_WRITE) - { - HWACCMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pVMCB->ctrl.u64ExitInfo2, IoExitInfo.n.u16Port, - uAndVal, uIOSize); - } - } - else - { - uint32_t u32Val = 0; - - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead); - rc = IOMIOPortRead(pVM, IoExitInfo.n.u16Port, &u32Val, uIOSize); - if (IOM_SUCCESS(rc)) - { - /* Write back to the EAX register. */ - pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal); - Log2(("IOMIOPortRead %RGv %x %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, u32Val & uAndVal, - uIOSize)); - } - else if (rc == VINF_IOM_R3_IOPORT_READ) - { - HWACCMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pVMCB->ctrl.u64ExitInfo2, IoExitInfo.n.u16Port, - uAndVal, uIOSize); - } - } - } - - /* - * Handled the I/O return codes. - * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.) - */ - if (IOM_SUCCESS(rc)) - { - /* Update EIP and continue execution. */ - pCtx->rip = pVMCB->ctrl.u64ExitInfo2; /* RIP/EIP of the next instruction is saved in EXITINFO2. */ - if (RT_LIKELY(rc == VINF_SUCCESS)) - { - /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */ - if (pCtx->dr[7] & X86_DR7_ENABLED_MASK) - { - /* IO operation lookup arrays. */ - static uint32_t const aIOSize[4] = { 1, 2, 0, 4 }; - - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxIOCheck); - for (unsigned i = 0; i < 4; i++) - { - unsigned uBPLen = aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)]; - - if ( (IoExitInfo.n.u16Port >= pCtx->dr[i] && IoExitInfo.n.u16Port < pCtx->dr[i] + uBPLen) - && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i))) - && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO)) - { - SVM_EVENT Event; - - Assert(CPUMIsGuestDebugStateActive(pVCpu)); - - /* Clear all breakpoint status flags and set the one we just hit. */ - pCtx->dr[6] &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3); - pCtx->dr[6] |= (uint64_t)RT_BIT(i); - - /* - * Note: AMD64 Architecture Programmer's Manual 13.1: - * Bits 15:13 of the DR6 register is never cleared by the processor and must be cleared - * by software after the contents have been read. - */ - pVMCB->guest.u64DR6 = pCtx->dr[6]; - - /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */ - pCtx->dr[7] &= ~X86_DR7_GD; - - /* Paranoia. */ - pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */ - pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */ - pCtx->dr[7] |= 0x400; /* must be one */ - - pVMCB->guest.u64DR7 = pCtx->dr[7]; - - /* Inject the exception. */ - Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip)); - - Event.au64[0] = 0; - Event.n.u3Type = SVM_EVENT_EXCEPTION; /* trap or fault */ - Event.n.u1Valid = 1; - Event.n.u8Vector = X86_XCPT_DB; - - hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event); - goto ResumeExecution; - } - } - } - goto ResumeExecution; - } - Log2(("EM status from IO at %RGv %x size %d: %Rrc\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize, - VBOXSTRICTRC_VAL(rc))); - break; - } - -#ifdef VBOX_STRICT - if (rc == VINF_IOM_R3_IOPORT_READ) - Assert(IoExitInfo.n.u1Type != 0); - else if (rc == VINF_IOM_R3_IOPORT_WRITE) - Assert(IoExitInfo.n.u1Type == 0); - else - { - AssertMsg( RT_FAILURE(rc) - || rc == VINF_EM_RAW_EMULATE_INSTR - || rc == VINF_EM_RAW_GUEST_TRAP - || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); - } -#endif - Log2(("Failed IO at %RGv %x size %d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize)); - break; - } - - case SVM_EXIT_HLT: - /* Check if external interrupts are pending; if so, don't switch back. */ - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt); - pCtx->rip++; /* skip hlt */ - if (EMShouldContinueAfterHalt(pVCpu, pCtx)) - goto ResumeExecution; - - rc = VINF_EM_HALT; - break; - - case SVM_EXIT_MWAIT_UNCOND: - Log2(("SVM: mwait\n")); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMwait); - rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx)); - if ( rc == VINF_EM_HALT - || rc == VINF_SUCCESS) - { - /* Update EIP and continue execution. */ - pCtx->rip += 3; /* Note: hardcoded opcode size assumption! */ - - /* Check if external interrupts are pending; if so, don't switch back. */ - if ( rc == VINF_SUCCESS - || ( rc == VINF_EM_HALT - && EMShouldContinueAfterHalt(pVCpu, pCtx)) - ) - goto ResumeExecution; - } - AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc))); - break; - - case SVM_EXIT_MONITOR: - { - Log2(("SVM: monitor\n")); - - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMonitor); - rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx)); - if (rc == VINF_SUCCESS) - { - /* Update EIP and continue execution. */ - pCtx->rip += 3; /* Note: hardcoded opcode size assumption! */ - goto ResumeExecution; - } - AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc))); - break; - } - - case SVM_EXIT_VMMCALL: - rc = hmR0SvmEmulateTprVMMCall(pVM, pVCpu, pCtx); - if (rc == VINF_SUCCESS) - { - goto ResumeExecution; /* rip already updated. */ - } - /* no break */ - - case SVM_EXIT_RSM: - case SVM_EXIT_INVLPGA: - case SVM_EXIT_VMRUN: - case SVM_EXIT_VMLOAD: - case SVM_EXIT_VMSAVE: - case SVM_EXIT_STGI: - case SVM_EXIT_CLGI: - case SVM_EXIT_SKINIT: - { - /* Unsupported instructions. */ - SVM_EVENT Event; - - Event.au64[0] = 0; - Event.n.u3Type = SVM_EVENT_EXCEPTION; - Event.n.u1Valid = 1; - Event.n.u8Vector = X86_XCPT_UD; - - Log(("Forced #UD trap at %RGv\n", (RTGCPTR)pCtx->rip)); - hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event); - goto ResumeExecution; - } - - /* Emulate in ring-3. */ - case SVM_EXIT_MSR: - { - /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */ - if ( pVM->hwaccm.s.fTPRPatchingActive - && pCtx->ecx == MSR_K8_LSTAR - && pVMCB->ctrl.u64ExitInfo1 == 1 /* wrmsr */) - { - if ((pCtx->eax & 0xff) != u8LastTPR) - { - Log(("SVM: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff)); - - /* Our patch code uses LSTAR for TPR caching. */ - rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff); - AssertRC(rc2); - } - - /* Skip the instruction and continue. */ - pCtx->rip += 2; /* wrmsr = [0F 30] */ - - /* Only resume if successful. */ - goto ResumeExecution; - } - - /* - * The Intel spec. claims there's an REX version of RDMSR that's slightly different, - * so we play safe by completely disassembling the instruction. - */ - STAM_COUNTER_INC((pVMCB->ctrl.u64ExitInfo1 == 0) ? &pVCpu->hwaccm.s.StatExitRdmsr : &pVCpu->hwaccm.s.StatExitWrmsr); - Log(("SVM: %s\n", (pVMCB->ctrl.u64ExitInfo1 == 0) ? "rdmsr" : "wrmsr")); - rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0); - if (rc == VINF_SUCCESS) - { - /* EIP has been updated already. */ - /* Only resume if successful. */ - goto ResumeExecution; - } - AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (pVMCB->ctrl.u64ExitInfo1 == 0) ? "rdmsr" : "wrmsr", - VBOXSTRICTRC_VAL(rc))); - break; - } - - case SVM_EXIT_TASK_SWITCH: /* too complicated to emulate, so fall back to the recompiler */ - Log(("SVM_EXIT_TASK_SWITCH: exit2=%RX64\n", pVMCB->ctrl.u64ExitInfo2)); - if ( !(pVMCB->ctrl.u64ExitInfo2 & (SVM_EXIT2_TASK_SWITCH_IRET | SVM_EXIT2_TASK_SWITCH_JMP)) - && pVCpu->hwaccm.s.Event.fPending) - { - SVM_EVENT Event; - Event.au64[0] = pVCpu->hwaccm.s.Event.intInfo; - - /* Caused by an injected interrupt. */ - pVCpu->hwaccm.s.Event.fPending = false; - switch (Event.n.u3Type) - { - case SVM_EVENT_EXTERNAL_IRQ: - case SVM_EVENT_NMI: - Log(("SVM_EXIT_TASK_SWITCH: reassert trap %d\n", Event.n.u8Vector)); - Assert(!Event.n.u1ErrorCodeValid); - rc2 = TRPMAssertTrap(pVCpu, Event.n.u8Vector, TRPM_HARDWARE_INT); - AssertRC(rc2); - break; - - default: - /* Exceptions and software interrupts can just be restarted. */ - break; - } - } - rc = VERR_EM_INTERPRETER; - break; - - case SVM_EXIT_PAUSE: - case SVM_EXIT_MWAIT_ARMED: - rc = VERR_EM_INTERPRETER; - break; - - case SVM_EXIT_SHUTDOWN: - rc = VINF_EM_RESET; /* Triple fault equals a reset. */ - break; - - case SVM_EXIT_IDTR_READ: - case SVM_EXIT_GDTR_READ: - case SVM_EXIT_LDTR_READ: - case SVM_EXIT_TR_READ: - case SVM_EXIT_IDTR_WRITE: - case SVM_EXIT_GDTR_WRITE: - case SVM_EXIT_LDTR_WRITE: - case SVM_EXIT_TR_WRITE: - case SVM_EXIT_CR0_SEL_WRITE: - default: - /* Unexpected exit codes. */ - rc = VERR_HMSVM_UNEXPECTED_EXIT; - AssertMsgFailed(("Unexpected exit code %x\n", exitCode)); /* Can't happen. */ - break; - } - -end: - - /* - * We are now going back to ring-3, so clear the forced action flag. - */ - VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3); - - /* - * Signal changes to the recompiler. - */ - CPUMSetChangedFlags(pVCpu, - CPUM_CHANGED_SYSENTER_MSR - | CPUM_CHANGED_LDTR - | CPUM_CHANGED_GDTR - | CPUM_CHANGED_IDTR - | CPUM_CHANGED_TR - | CPUM_CHANGED_HIDDEN_SEL_REGS); - - /* - * If we executed vmrun and an external IRQ was pending, then we don't have to do a full sync the next time. - */ - if (exitCode == SVM_EXIT_INTR) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatPendingHostIrq); - /* On the next entry we'll only sync the host context. */ - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT; - } - else - { - /* On the next entry we'll sync everything. */ - /** @todo we can do better than this */ - /* Not in the VINF_PGM_CHANGE_MODE though! */ - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL; - } - - /* Translate into a less severe return code */ - if (rc == VERR_EM_INTERPRETER) - rc = VINF_EM_RAW_EMULATE_INSTR; - - /* Just set the correct state here instead of trying to catch every goto above. */ - VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC); - -#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION - /* Restore interrupts if we exitted after disabling them. */ - if (uOldEFlags != ~(RTCCUINTREG)0) - ASMSetFlags(uOldEFlags); -#endif - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, x); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); - return VBOXSTRICTRC_TODO(rc); -} - - -/** - * Emulate simple mov tpr instruction. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -static int hmR0SvmEmulateTprVMMCall(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) -{ - int rc; - - LogFlow(("Emulated VMMCall TPR access replacement at %RGv\n", pCtx->rip)); - - for (;;) - { - bool fPending; - uint8_t u8Tpr; - - PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip); - if (!pPatch) - break; - - switch (pPatch->enmType) - { - case HWACCMTPRINSTR_READ: - /* TPR caching in CR8 */ - rc = PDMApicGetTPR(pVCpu, &u8Tpr, &fPending); - AssertRC(rc); - - rc = DISWriteReg32(CPUMCTX2CORE(pCtx), pPatch->uDstOperand, u8Tpr); - AssertRC(rc); - - LogFlow(("Emulated read successfully\n")); - pCtx->rip += pPatch->cbOp; - break; - - case HWACCMTPRINSTR_WRITE_REG: - case HWACCMTPRINSTR_WRITE_IMM: - /* Fetch the new TPR value */ - if (pPatch->enmType == HWACCMTPRINSTR_WRITE_REG) - { - uint32_t val; - - rc = DISFetchReg32(CPUMCTX2CORE(pCtx), pPatch->uSrcOperand, &val); - AssertRC(rc); - u8Tpr = val; - } - else - u8Tpr = (uint8_t)pPatch->uSrcOperand; - - rc = PDMApicSetTPR(pVCpu, u8Tpr); - AssertRC(rc); - LogFlow(("Emulated write successfully\n")); - pCtx->rip += pPatch->cbOp; - break; - - default: - AssertMsgFailedReturn(("Unexpected type %d\n", pPatch->enmType), VERR_HMSVM_UNEXPECTED_PATCH_TYPE); - } - } - return VINF_SUCCESS; -} - - -/** - * Enters the AMD-V session. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCpu Pointer to the CPU info struct. - */ -VMMR0DECL(int) SVMR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu) -{ - Assert(pVM->hwaccm.s.svm.fSupported); - - LogFlow(("SVMR0Enter cpu%d last=%d asid=%d\n", pCpu->idCpu, pVCpu->hwaccm.s.idLastCpu, pVCpu->hwaccm.s.uCurrentASID)); - pVCpu->hwaccm.s.fResumeVM = false; - - /* Force to reload LDTR, so we'll execute VMLoad to load additional guest state. */ - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_LDTR; - - return VINF_SUCCESS; -} - - -/** - * Leaves the AMD-V session. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -VMMR0DECL(int) SVMR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) -{ - SVM_VMCB *pVMCB = (SVM_VMCB *)pVCpu->hwaccm.s.svm.pVMCB; - - Assert(pVM->hwaccm.s.svm.fSupported); - -#ifdef DEBUG - if (CPUMIsHyperDebugStateActive(pVCpu)) - { - CPUMR0LoadHostDebugState(pVM, pVCpu); - } - else -#endif - /* Save the guest debug state if necessary. */ - if (CPUMIsGuestDebugStateActive(pVCpu)) - { - CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, false /* skip DR6 */); - - /* Intercept all DRx reads and writes again. Changed later on. */ - pVMCB->ctrl.u16InterceptRdDRx = 0xFFFF; - pVMCB->ctrl.u16InterceptWrDRx = 0xFFFF; - - /* Resync the debug registers the next time. */ - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG; - } - else - Assert(pVMCB->ctrl.u16InterceptRdDRx == 0xFFFF && pVMCB->ctrl.u16InterceptWrDRx == 0xFFFF); - - return VINF_SUCCESS; -} - - -/** - * Worker for Interprets INVLPG. - * - * @return VBox status code. - * @param pVCpu Pointer to the VMCPU. - * @param pCpu Pointer to the CPU info struct. - * @param pRegFrame Pointer to the register frame. - */ -static int hmR0svmInterpretInvlPgEx(PVMCPU pVCpu, PDISCPUSTATE pCpu, PCPUMCTXCORE pRegFrame) -{ - DISQPVPARAMVAL param1; - RTGCPTR addr; - - int rc = DISQueryParamVal(pRegFrame, pCpu, &pCpu->Param1, ¶m1, DISQPVWHICH_SRC); - if (RT_FAILURE(rc)) - return VERR_EM_INTERPRETER; - - switch (param1.type) - { - case DISQPV_TYPE_IMMEDIATE: - case DISQPV_TYPE_ADDRESS: - if (!(param1.flags & (DISQPV_FLAG_32 | DISQPV_FLAG_64))) - return VERR_EM_INTERPRETER; - addr = param1.val.val64; - break; - - default: - return VERR_EM_INTERPRETER; - } - - /** @todo is addr always a flat linear address or ds based - * (in absence of segment override prefixes)???? - */ - rc = PGMInvalidatePage(pVCpu, addr); - if (RT_SUCCESS(rc)) - return VINF_SUCCESS; - - AssertRC(rc); - return rc; -} - - -/** - * Interprets INVLPG. - * - * @returns VBox status code. - * @retval VINF_* Scheduling instructions. - * @retval VERR_EM_INTERPRETER Something we can't cope with. - * @retval VERR_* Fatal errors. - * - * @param pVM Pointer to the VM. - * @param pRegFrame Pointer to the register frame. - * - * @remarks Updates the EIP if an instruction was executed successfully. - */ -static int hmR0SvmInterpretInvlpg(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame) -{ - /* - * Only allow 32 & 64 bit code. - */ - if (CPUMGetGuestCodeBits(pVCpu) != 16) - { - PDISSTATE pDis = &pVCpu->hwaccm.s.DisState; - int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL); - if (RT_SUCCESS(rc) && pDis->pCurInstr->uOpcode == OP_INVLPG) - { - rc = hmR0svmInterpretInvlPgEx(pVCpu, pDis, pRegFrame); - if (RT_SUCCESS(rc)) - pRegFrame->rip += pDis->cbInstr; /* Move on to the next instruction. */ - return rc; - } - } - return VERR_EM_INTERPRETER; -} - - -/** - * Invalidates a guest page by guest virtual address. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param GCVirt Guest virtual address of the page to invalidate. - */ -VMMR0DECL(int) SVMR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt) -{ - bool fFlushPending = pVM->hwaccm.s.svm.fAlwaysFlushTLB | VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH); - - /* Skip it if a TLB flush is already pending. */ - if (!fFlushPending) - { - SVM_VMCB *pVMCB; - - Log2(("SVMR0InvalidatePage %RGv\n", GCVirt)); - AssertReturn(pVM, VERR_INVALID_PARAMETER); - Assert(pVM->hwaccm.s.svm.fSupported); - - pVMCB = (SVM_VMCB *)pVCpu->hwaccm.s.svm.pVMCB; - AssertMsgReturn(pVMCB, ("Invalid pVMCB\n"), VERR_HMSVM_INVALID_PVMCB); - -#if HC_ARCH_BITS == 32 - /* If we get a flush in 64 bits guest mode, then force a full TLB flush. Invlpga takes only 32 bits addresses. */ - if (CPUMIsGuestInLongMode(pVCpu)) - VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); - else -#endif - SVMR0InvlpgA(GCVirt, pVMCB->ctrl.TLBCtrl.n.u32ASID); - } - return VINF_SUCCESS; -} - - -#if 0 /* obsolete, but left here for clarification. */ -/** - * Invalidates a guest page by physical address. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param GCPhys Guest physical address of the page to invalidate. - */ -VMMR0DECL(int) SVMR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys) -{ - Assert(pVM->hwaccm.s.fNestedPaging); - /* invlpga only invalidates TLB entries for guest virtual addresses; we have no choice but to force a TLB flush here. */ - VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBInvlpga); - return VINF_SUCCESS; -} -#endif - - -#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) -/** - * Prepares for and executes VMRUN (64-bit guests from a 32-bit host). - * - * @returns VBox status code. - * @param pVMCBHostPhys Physical address of host VMCB. - * @param pVMCBPhys Physical address of the VMCB. - * @param pCtx Pointer to the guest CPU context. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - */ -DECLASM(int) SVMR0VMSwitcherRun64(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu) -{ - uint32_t aParam[4]; - - aParam[0] = (uint32_t)(pVMCBHostPhys); /* Param 1: pVMCBHostPhys - Lo. */ - aParam[1] = (uint32_t)(pVMCBHostPhys >> 32); /* Param 1: pVMCBHostPhys - Hi. */ - aParam[2] = (uint32_t)(pVMCBPhys); /* Param 2: pVMCBPhys - Lo. */ - aParam[3] = (uint32_t)(pVMCBPhys >> 32); /* Param 2: pVMCBPhys - Hi. */ - - return SVMR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnSVMGCVMRun64, 4, &aParam[0]); -} - - -/** - * Executes the specified handler in 64-bit mode. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - * @param pfnHandler Pointer to the RC handler function. - * @param cbParam Number of parameters. - * @param paParam Array of 32-bit parameters. - */ -VMMR0DECL(int) SVMR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam, - uint32_t *paParam) -{ - int rc; - RTHCUINTREG uOldEFlags; - - Assert(pfnHandler); - - /* Disable interrupts. */ - uOldEFlags = ASMIntDisableFlags(); - -#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI - RTCPUID idHostCpu = RTMpCpuId(); - CPUMR0SetLApic(pVM, idHostCpu); -#endif - - CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu)); - CPUMSetHyperEIP(pVCpu, pfnHandler); - for (int i = (int)cbParam - 1; i >= 0; i--) - CPUMPushHyper(pVCpu, paParam[i]); - - STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatWorldSwitch3264, z); - /* Call switcher. */ - rc = pVM->hwaccm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum)); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatWorldSwitch3264, z); - - ASMSetFlags(uOldEFlags); - return rc; -} - -#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) */ - diff --git a/src/VBox/VMM/VMMR0/HWSVMR0.h b/src/VBox/VMM/VMMR0/HWSVMR0.h deleted file mode 100644 index 2d8a6729..00000000 --- a/src/VBox/VMM/VMMR0/HWSVMR0.h +++ /dev/null @@ -1,222 +0,0 @@ -/* $Id: HWSVMR0.h $ */ -/** @file - * HM SVM (AMD-V) - Internal header file. - */ - -/* - * Copyright (C) 2006-2012 Oracle Corporation - * - * This file is part of VirtualBox Open Source Edition (OSE), as - * available from http://www.virtualbox.org. This file is free software; - * you can redistribute it and/or modify it under the terms of the GNU - * General Public License (GPL) as published by the Free Software - * Foundation, in version 2 as it comes in the "COPYING" file of the - * VirtualBox OSE distribution. VirtualBox OSE is distributed in the - * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. - */ - -#ifndef ___VMMR0_HWSVMR0_h -#define ___VMMR0_HWSVMR0_h - -#include <VBox/cdefs.h> -#include <VBox/types.h> -#include <VBox/vmm/em.h> -#include <VBox/vmm/stam.h> -#include <VBox/dis.h> -#include <VBox/vmm/hwaccm.h> -#include <VBox/vmm/pgm.h> -#include <VBox/vmm/hwacc_svm.h> - -RT_C_DECLS_BEGIN - -/** @defgroup grp_svm_int Internal - * @ingroup grp_svm - * @internal - * @{ - */ - -#ifdef IN_RING0 - -/** - * Enters the AMD-V session - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCpu Pointer to the CPU info struct. - */ -VMMR0DECL(int) SVMR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu); - -/** - * Leaves the AMD-V session - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -VMMR0DECL(int) SVMR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx); - -VMMR0DECL(int) SVMR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvPageCpu, RTHCPHYS HCPhysCpuPage, bool fEnabledBySystem); - -/** - * Deactivates AMD-V on the current CPU - * - * @returns VBox status code. - * @param pCpu Pointer to the CPU info struct. - * @param pvPageCpu Pointer to the global CPU page. - * @param pPageCpuPhys Physical address of the global CPU page. - */ -VMMR0DECL(int) SVMR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvPageCpu, RTHCPHYS pPageCpuPhys); - -/** - * Does Ring-0 per VM AMD-V init. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - */ -VMMR0DECL(int) SVMR0InitVM(PVM pVM); - -/** - * Does Ring-0 per VM AMD-V termination. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - */ -VMMR0DECL(int) SVMR0TermVM(PVM pVM); - -/** - * Sets up AMD-V for the specified VM - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - */ -VMMR0DECL(int) SVMR0SetupVM(PVM pVM); - - -/** - * Runs guest code in an AMD-V VM. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -VMMR0DECL(int) SVMR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx); - - -/** - * Save the host state. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - */ -VMMR0DECL(int) SVMR0SaveHostState(PVM pVM, PVMCPU pVCpu); - -/** - * Loads the guest state. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -VMMR0DECL(int) SVMR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx); - - -#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) -/** - * Prepares for and executes VMRUN (64-bit guests from a 32-bit host). - * - * @returns VBox status code. - * @param pVMCBHostPhys Physical address of host VMCB. - * @param pVMCBPhys Physical address of the VMCB. - * @param pCtx Pointer to the guest CPU context. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. (not used) - */ -DECLASM(int) SVMR0VMSwitcherRun64(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu); - -/** - * Executes the specified handler in 64-bit mode. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - * @param pfnHandler Pointer to the RC handler function. - * @param cbParam Number of parameters. - * @param paParam Array of 32-bit parameters. - */ -VMMR0DECL(int) SVMR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam, - uint32_t *paParam); -#endif /* HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */ - -/** - * Prepares for and executes VMRUN (32-bit guests). - * - * @returns VBox status code. - * @param pVMCBHostPhys Physical address of host VMCB. - * @param pVMCBPhys Physical address of the VMCB. - * @param pCtx Pointer to the guest CPU context. - * @param pVM Pointer to the VM. (not used) - * @param pVCpu Pointer to the VMCPU. (not used) - */ -DECLASM(int) SVMR0VMRun(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu); - - -/** - * Prepares for and executes VMRUN (64-bit guests). - * - * @returns VBox status code. - * @param pVMCBHostPhys Physical address of host VMCB. - * @param pVMCBPhys Physical address of the VMCB. - * @param pCtx Pointer to the guest CPU context. - * @param pVM Pointer to the VM. (not used) - * @param pVCpu Pointer to the VMCPU. (not used) - */ -DECLASM(int) SVMR0VMRun64(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu); - -/** - * Executes INVLPGA. - * - * @param pPageGC Virtual page to invalidate. - * @param u32ASID Tagged TLB id. - */ -DECLASM(void) SVMR0InvlpgA(RTGCPTR pPageGC, uint32_t u32ASID); - -/** Convert hidden selector attribute word between VMX and SVM formats. */ -#define SVM_HIDSEGATTR_VMX2SVM(a) (a & 0xFF) | ((a & 0xF000) >> 4) -#define SVM_HIDSEGATTR_SVM2VMX(a) (a & 0xFF) | ((a & 0x0F00) << 4) - -#define SVM_WRITE_SELREG(REG, reg) \ - do \ - { \ - Assert(pCtx->reg.fFlags & CPUMSELREG_FLAGS_VALID); \ - Assert(pCtx->reg.ValidSel == pCtx->reg.Sel); \ - pVMCB->guest.REG.u16Sel = pCtx->reg.Sel; \ - pVMCB->guest.REG.u32Limit = pCtx->reg.u32Limit; \ - pVMCB->guest.REG.u64Base = pCtx->reg.u64Base; \ - pVMCB->guest.REG.u16Attr = SVM_HIDSEGATTR_VMX2SVM(pCtx->reg.Attr.u); \ - } while (0) - -#define SVM_READ_SELREG(REG, reg) \ - do \ - { \ - pCtx->reg.Sel = pVMCB->guest.REG.u16Sel; \ - pCtx->reg.ValidSel = pVMCB->guest.REG.u16Sel; \ - pCtx->reg.fFlags = CPUMSELREG_FLAGS_VALID; \ - pCtx->reg.u32Limit = pVMCB->guest.REG.u32Limit; \ - pCtx->reg.u64Base = pVMCB->guest.REG.u64Base; \ - pCtx->reg.Attr.u = SVM_HIDSEGATTR_SVM2VMX(pVMCB->guest.REG.u16Attr); \ - } while (0) - -#endif /* IN_RING0 */ - -/** @} */ - -RT_C_DECLS_END - -#endif /* !___VMMR0_HWSVMR0_h */ - diff --git a/src/VBox/VMM/VMMR0/HWVMXR0.cpp b/src/VBox/VMM/VMMR0/HWVMXR0.cpp deleted file mode 100644 index d937eb5b..00000000 --- a/src/VBox/VMM/VMMR0/HWVMXR0.cpp +++ /dev/null @@ -1,5621 +0,0 @@ -/* $Id: HWVMXR0.cpp $ */ -/** @file - * HM VMX (VT-x) - Host Context Ring-0. - */ - -/* - * Copyright (C) 2006-2012 Oracle Corporation - * - * This file is part of VirtualBox Open Source Edition (OSE), as - * available from http://www.virtualbox.org. This file is free software; - * you can redistribute it and/or modify it under the terms of the GNU - * General Public License (GPL) as published by the Free Software - * Foundation, in version 2 as it comes in the "COPYING" file of the - * VirtualBox OSE distribution. VirtualBox OSE is distributed in the - * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. - */ - - -/******************************************************************************* -* Header Files * -*******************************************************************************/ -#define LOG_GROUP LOG_GROUP_HWACCM -#include <iprt/asm-amd64-x86.h> -#include <VBox/vmm/hwaccm.h> -#include <VBox/vmm/pgm.h> -#include <VBox/vmm/dbgf.h> -#include <VBox/vmm/dbgftrace.h> -#include <VBox/vmm/selm.h> -#include <VBox/vmm/iom.h> -#ifdef VBOX_WITH_REM -# include <VBox/vmm/rem.h> -#endif -#include <VBox/vmm/tm.h> -#include "HWACCMInternal.h" -#include <VBox/vmm/vm.h> -#include <VBox/vmm/pdmapi.h> -#include <VBox/err.h> -#include <VBox/log.h> -#include <iprt/assert.h> -#include <iprt/param.h> -#include <iprt/string.h> -#include <iprt/time.h> -#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION -# include <iprt/thread.h> -#endif -#include <iprt/x86.h> -#include "HWVMXR0.h" - -#include "dtrace/VBoxVMM.h" - - -/******************************************************************************* -* Defined Constants And Macros * -*******************************************************************************/ -#if defined(RT_ARCH_AMD64) -# define VMX_IS_64BIT_HOST_MODE() (true) -#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL) -# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0) -#else -# define VMX_IS_64BIT_HOST_MODE() (false) -#endif - - -/******************************************************************************* -* Global Variables * -*******************************************************************************/ -/* IO operation lookup arrays. */ -static uint32_t const g_aIOSize[4] = {1, 2, 0, 4}; -static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff}; - -#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL -/** See HWACCMR0A.asm. */ -extern "C" uint32_t g_fVMXIs64bitHost; -#endif - - -/******************************************************************************* -* Local Functions * -*******************************************************************************/ -static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu); -static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu); -static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu); -static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu); -static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush); -static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr); -static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx); -static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite); -static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx); - - -/** - * Updates error from VMCS to HWACCMCPU's lasterror record. - * - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param rc The error code. - */ -static void hmR0VmxCheckError(PVM pVM, PVMCPU pVCpu, int rc) -{ - if (rc == VERR_VMX_GENERIC) - { - RTCCUINTREG instrError; - - VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError); - pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError; - } - pVM->hwaccm.s.lLastError = rc; -} - - -/** - * Sets up and activates VT-x on the current CPU. - * - * @returns VBox status code. - * @param pCpu Pointer to the CPU info struct. - * @param pVM Pointer to the VM. (can be NULL after a resume!!) - * @param pvCpuPage Pointer to the global CPU page. - * @param HCPhysCpuPage Physical address of the global CPU page. - * @param fEnabledByHost Set if SUPR0EnableVTx or similar was used to enable - * VT-x/AMD-V on the host. - */ -VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost) -{ - if (!fEnabledByHost) - { - AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER); - AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER); - - if (pVM) - { - /* Set revision dword at the beginning of the VMXON structure. */ - *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info); - } - - /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption. - * (which can have very bad consequences!!!) - */ - - if (ASMGetCR4() & X86_CR4_VMXE) - return VERR_VMX_IN_VMX_ROOT_MODE; - - ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE); /* Make sure the VMX instructions don't cause #UD faults. */ - - /* - * Enter VM root mode. - */ - int rc = VMXEnable(HCPhysCpuPage); - if (RT_FAILURE(rc)) - { - ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE); - return VERR_VMX_VMXON_FAILED; - } - } - - /* - * Flush all VPIDs (in case we or any other hypervisor have been using VPIDs) so that - * we can avoid an explicit flush while using new VPIDs. We would still need to flush - * each time while reusing a VPID after hitting the MaxASID limit once. - */ - if ( pVM - && pVM->hwaccm.s.vmx.fVPID - && (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS)) - { - hmR0VmxFlushVPID(pVM, NULL /* pvCpu */, VMX_FLUSH_VPID_ALL_CONTEXTS, 0 /* GCPtr */); - pCpu->fFlushASIDBeforeUse = false; - } - else - pCpu->fFlushASIDBeforeUse = true; - - /* - * Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. - */ - ++pCpu->cTLBFlushes; - - return VINF_SUCCESS; -} - - -/** - * Deactivates VT-x on the current CPU. - * - * @returns VBox status code. - * @param pCpu Pointer to the CPU info struct. - * @param pvCpuPage Pointer to the global CPU page. - * @param HCPhysCpuPage Physical address of the global CPU page. - */ -VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage) -{ - AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER); - AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER); - NOREF(pCpu); - - /* If we're somehow not in VMX root mode, then we shouldn't dare leaving it. */ - if (!(ASMGetCR4() & X86_CR4_VMXE)) - return VERR_VMX_NOT_IN_VMX_ROOT_MODE; - - /* Leave VMX Root Mode. */ - VMXDisable(); - - /* And clear the X86_CR4_VMXE bit. */ - ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE); - return VINF_SUCCESS; -} - - -/** - * Does Ring-0 per VM VT-x initialization. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - */ -VMMR0DECL(int) VMXR0InitVM(PVM pVM) -{ - int rc; - -#ifdef LOG_ENABLED - SUPR0Printf("VMXR0InitVM %p\n", pVM); -#endif - - pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ; - - if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW) - { - /* Allocate one page for the APIC physical page (serves for filtering accesses). */ - rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjAPIC, PAGE_SIZE, true /* executable R0 mapping */); - AssertRC(rc); - if (RT_FAILURE(rc)) - return rc; - - pVM->hwaccm.s.vmx.pAPIC = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjAPIC); - pVM->hwaccm.s.vmx.pAPICPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjAPIC, 0); - ASMMemZero32(pVM->hwaccm.s.vmx.pAPIC, PAGE_SIZE); - } - else - { - pVM->hwaccm.s.vmx.pMemObjAPIC = 0; - pVM->hwaccm.s.vmx.pAPIC = 0; - pVM->hwaccm.s.vmx.pAPICPhys = 0; - } - -#ifdef VBOX_WITH_CRASHDUMP_MAGIC - { - rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjScratch, PAGE_SIZE, true /* executable R0 mapping */); - AssertRC(rc); - if (RT_FAILURE(rc)) - return rc; - - pVM->hwaccm.s.vmx.pScratch = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjScratch); - pVM->hwaccm.s.vmx.pScratchPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjScratch, 0); - - ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE); - strcpy((char *)pVM->hwaccm.s.vmx.pScratch, "SCRATCH Magic"); - *(uint64_t *)(pVM->hwaccm.s.vmx.pScratch + 16) = UINT64_C(0xDEADBEEFDEADBEEF); - } -#endif - - /* Allocate VMCSs for all guest CPUs. */ - for (VMCPUID i = 0; i < pVM->cCpus; i++) - { - PVMCPU pVCpu = &pVM->aCpus[i]; - - pVCpu->hwaccm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ; - - /* Allocate one page for the VM control structure (VMCS). */ - rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.hMemObjVMCS, PAGE_SIZE, true /* executable R0 mapping */); - AssertRC(rc); - if (RT_FAILURE(rc)) - return rc; - - pVCpu->hwaccm.s.vmx.pvVMCS = RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.hMemObjVMCS); - pVCpu->hwaccm.s.vmx.HCPhysVMCS = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.hMemObjVMCS, 0); - ASMMemZeroPage(pVCpu->hwaccm.s.vmx.pvVMCS); - - pVCpu->hwaccm.s.vmx.cr0_mask = 0; - pVCpu->hwaccm.s.vmx.cr4_mask = 0; - - /* Allocate one page for the virtual APIC page for TPR caching. */ - rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.hMemObjVAPIC, PAGE_SIZE, true /* executable R0 mapping */); - AssertRC(rc); - if (RT_FAILURE(rc)) - return rc; - - pVCpu->hwaccm.s.vmx.pbVAPIC = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.hMemObjVAPIC); - pVCpu->hwaccm.s.vmx.HCPhysVAPIC = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.hMemObjVAPIC, 0); - ASMMemZeroPage(pVCpu->hwaccm.s.vmx.pbVAPIC); - - /* Allocate the MSR bitmap if this feature is supported. */ - if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS) - { - rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, PAGE_SIZE, true /* executable R0 mapping */); - AssertRC(rc); - if (RT_FAILURE(rc)) - return rc; - - pVCpu->hwaccm.s.vmx.pMSRBitmap = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap); - pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, 0); - memset(pVCpu->hwaccm.s.vmx.pMSRBitmap, 0xff, PAGE_SIZE); - } - -#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE - /* Allocate one page for the guest MSR load area (for preloading guest MSRs during the world switch). */ - rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, PAGE_SIZE, true /* executable R0 mapping */); - AssertRC(rc); - if (RT_FAILURE(rc)) - return rc; - - pVCpu->hwaccm.s.vmx.pGuestMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR); - pVCpu->hwaccm.s.vmx.pGuestMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, 0); - Assert(!(pVCpu->hwaccm.s.vmx.pGuestMSRPhys & 0xf)); - memset(pVCpu->hwaccm.s.vmx.pGuestMSR, 0, PAGE_SIZE); - - /* Allocate one page for the host MSR load area (for restoring host MSRs after the world switch back). */ - rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjHostMSR, PAGE_SIZE, true /* executable R0 mapping */); - AssertRC(rc); - if (RT_FAILURE(rc)) - return rc; - - pVCpu->hwaccm.s.vmx.pHostMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjHostMSR); - pVCpu->hwaccm.s.vmx.pHostMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, 0); - Assert(!(pVCpu->hwaccm.s.vmx.pHostMSRPhys & 0xf)); - memset(pVCpu->hwaccm.s.vmx.pHostMSR, 0, PAGE_SIZE); -#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */ - - /* Current guest paging mode. */ - pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL; - -#ifdef LOG_ENABLED - SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hwaccm.s.vmx.pvVMCS, (uint32_t)pVCpu->hwaccm.s.vmx.HCPhysVMCS); -#endif - } - - return VINF_SUCCESS; -} - - -/** - * Does Ring-0 per VM VT-x termination. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - */ -VMMR0DECL(int) VMXR0TermVM(PVM pVM) -{ - for (VMCPUID i = 0; i < pVM->cCpus; i++) - { - PVMCPU pVCpu = &pVM->aCpus[i]; - - if (pVCpu->hwaccm.s.vmx.hMemObjVMCS != NIL_RTR0MEMOBJ) - { - RTR0MemObjFree(pVCpu->hwaccm.s.vmx.hMemObjVMCS, false); - pVCpu->hwaccm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ; - pVCpu->hwaccm.s.vmx.pvVMCS = 0; - pVCpu->hwaccm.s.vmx.HCPhysVMCS = 0; - } - if (pVCpu->hwaccm.s.vmx.hMemObjVAPIC != NIL_RTR0MEMOBJ) - { - RTR0MemObjFree(pVCpu->hwaccm.s.vmx.hMemObjVAPIC, false); - pVCpu->hwaccm.s.vmx.hMemObjVAPIC = NIL_RTR0MEMOBJ; - pVCpu->hwaccm.s.vmx.pbVAPIC = 0; - pVCpu->hwaccm.s.vmx.HCPhysVAPIC = 0; - } - if (pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap != NIL_RTR0MEMOBJ) - { - RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, false); - pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap = NIL_RTR0MEMOBJ; - pVCpu->hwaccm.s.vmx.pMSRBitmap = 0; - pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = 0; - } -#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE - if (pVCpu->hwaccm.s.vmx.pMemObjHostMSR != NIL_RTR0MEMOBJ) - { - RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, false); - pVCpu->hwaccm.s.vmx.pMemObjHostMSR = NIL_RTR0MEMOBJ; - pVCpu->hwaccm.s.vmx.pHostMSR = 0; - pVCpu->hwaccm.s.vmx.pHostMSRPhys = 0; - } - if (pVCpu->hwaccm.s.vmx.pMemObjGuestMSR != NIL_RTR0MEMOBJ) - { - RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, false); - pVCpu->hwaccm.s.vmx.pMemObjGuestMSR = NIL_RTR0MEMOBJ; - pVCpu->hwaccm.s.vmx.pGuestMSR = 0; - pVCpu->hwaccm.s.vmx.pGuestMSRPhys = 0; - } -#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */ - } - if (pVM->hwaccm.s.vmx.pMemObjAPIC != NIL_RTR0MEMOBJ) - { - RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjAPIC, false); - pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ; - pVM->hwaccm.s.vmx.pAPIC = 0; - pVM->hwaccm.s.vmx.pAPICPhys = 0; - } -#ifdef VBOX_WITH_CRASHDUMP_MAGIC - if (pVM->hwaccm.s.vmx.pMemObjScratch != NIL_RTR0MEMOBJ) - { - ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE); - RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjScratch, false); - pVM->hwaccm.s.vmx.pMemObjScratch = NIL_RTR0MEMOBJ; - pVM->hwaccm.s.vmx.pScratch = 0; - pVM->hwaccm.s.vmx.pScratchPhys = 0; - } -#endif - return VINF_SUCCESS; -} - - -/** - * Sets up VT-x for the specified VM. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - */ -VMMR0DECL(int) VMXR0SetupVM(PVM pVM) -{ - int rc = VINF_SUCCESS; - uint32_t val; - - AssertReturn(pVM, VERR_INVALID_PARAMETER); - - /* Initialize these always, see hwaccmR3InitFinalizeR0().*/ - pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NONE; - pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NONE; - - /* Determine optimal flush type for EPT. */ - if (pVM->hwaccm.s.fNestedPaging) - { - if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT) - { - if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_SINGLE_CONTEXT) - pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_SINGLE_CONTEXT; - else if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_ALL_CONTEXTS) - pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_ALL_CONTEXTS; - else - { - /* - * Should never really happen. EPT is supported but no suitable flush types supported. - * We cannot ignore EPT at this point as we've already setup Unrestricted Guest execution. - */ - pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NOT_SUPPORTED; - return VERR_VMX_GENERIC; - } - } - else - { - /* - * Should never really happen. EPT is supported but INVEPT instruction is not supported. - */ - pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NOT_SUPPORTED; - return VERR_VMX_GENERIC; - } - } - - /* Determine optimal flush type for VPID. */ - if (pVM->hwaccm.s.vmx.fVPID) - { - if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID) - { - if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT) - pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_SINGLE_CONTEXT; - else if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS) - pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_ALL_CONTEXTS; - else - { - /* - * Neither SINGLE nor ALL context flush types for VPID supported by the CPU. - * We do not handle other flush type combinations, ignore VPID capabilities. - */ - if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR) - Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_INDIV_ADDR supported. Ignoring VPID.\n")); - if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT_RETAIN_GLOBALS) - Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n")); - pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NOT_SUPPORTED; - pVM->hwaccm.s.vmx.fVPID = false; - } - } - else - { - /* - * Should not really happen. EPT is supported but INVEPT is not supported. - * Ignore VPID capabilities as our code relies on using INVEPT for selective flushing. - */ - Log(("VMXR0SetupVM: VPID supported without INVEPT support. Ignoring VPID.\n")); - pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NOT_SUPPORTED; - pVM->hwaccm.s.vmx.fVPID = false; - } - } - - for (VMCPUID i = 0; i < pVM->cCpus; i++) - { - PVMCPU pVCpu = &pVM->aCpus[i]; - - AssertPtr(pVCpu->hwaccm.s.vmx.pvVMCS); - - /* Set revision dword at the beginning of the VMCS structure. */ - *(uint32_t *)pVCpu->hwaccm.s.vmx.pvVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info); - - /* - * Clear and activate the VMCS. - */ - Log(("HCPhysVMCS = %RHp\n", pVCpu->hwaccm.s.vmx.HCPhysVMCS)); - rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS); - if (RT_FAILURE(rc)) - goto vmx_end; - - rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS); - if (RT_FAILURE(rc)) - goto vmx_end; - - /* - * VMX_VMCS_CTRL_PIN_EXEC_CONTROLS - * Set required bits to one and zero according to the MSR capabilities. - */ - val = pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0; - val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT /* External interrupts */ - | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT; /* Non-maskable interrupts */ - - /* - * Enable the VMX preemption timer. - */ - if (pVM->hwaccm.s.vmx.fUsePreemptTimer) - val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_PREEMPT_TIMER; - val &= pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1; - - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, val); - AssertRC(rc); - - /* - * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS - * Set required bits to one and zero according to the MSR capabilities. - */ - val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0; - /* Program which event cause VM-exits and which features we want to use. */ - val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT - | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET - | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT - | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT - | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDPMC_EXIT - | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_EXIT - | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside - the guest (host thinks the cpu load is high) */ - - /* Without nested paging we should intercept invlpg and cr3 mov instructions. */ - if (!pVM->hwaccm.s.fNestedPaging) - { - val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT - | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT - | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT; - } - - /* - * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch - * failure with an invalid control fields error. (combined with some other exit reasons) - */ - if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW) - { - /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */ - val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW; - Assert(pVM->hwaccm.s.vmx.pAPIC); - } - else - /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */ - val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT; - - if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS) - { - Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys); - val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS; - } - - /* We will use the secondary control if it's present. */ - val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL; - - /* Mask away the bits that the CPU doesn't support */ - /** @todo make sure they don't conflict with the above requirements. */ - val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1; - pVCpu->hwaccm.s.vmx.proc_ctls = val; - - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, val); - AssertRC(rc); - - if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL) - { - /* - * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2 - * Set required bits to one and zero according to the MSR capabilities. - */ - val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0; - val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT; - - if (pVM->hwaccm.s.fNestedPaging) - val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT; - - if (pVM->hwaccm.s.vmx.fVPID) - val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID; - - if (pVM->hwaccm.s.fHasIoApic) - val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC; - - if (pVM->hwaccm.s.vmx.fUnrestrictedGuest) - val |= VMX_VMCS_CTRL_PROC_EXEC2_REAL_MODE; - - if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) - val |= VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP; - - /* Mask away the bits that the CPU doesn't support */ - /** @todo make sure they don't conflict with the above requirements. */ - val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1; - pVCpu->hwaccm.s.vmx.proc_ctls2 = val; - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2, val); - AssertRC(rc); - } - - /* - * VMX_VMCS_CTRL_CR3_TARGET_COUNT - * Set required bits to one and zero according to the MSR capabilities. - */ - rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR3_TARGET_COUNT, 0); - AssertRC(rc); - - /* - * Forward all exception except #NM & #PF to the guest. - * We always need to check pagefaults since our shadow page table can be out of sync. - * And we always lazily sync the FPU & XMM state. . - */ - - /** @todo Possible optimization: - * Keep the FPU and XMM state current in the EM thread. That way there's no need to - * lazily sync anything, but the downside is that we can't use the FPU stack or XMM - * registers ourselves of course. - * - * Note: only possible if the current state is actually ours (X86_CR0_TS flag) - */ - - /* - * Don't filter page faults, all of them should cause a world switch. - */ - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MASK, 0); - AssertRC(rc); - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MATCH, 0); - AssertRC(rc); - - rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, 0); - AssertRC(rc); - rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_A_FULL, 0); - AssertRC(rc); - rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_B_FULL, 0); - AssertRC(rc); - - /* - * Set the MSR bitmap address. - */ - if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS) - { - Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys); - - rc = VMXWriteVMCS64(VMX_VMCS_CTRL_MSR_BITMAP_FULL, pVCpu->hwaccm.s.vmx.pMSRBitmapPhys); - AssertRC(rc); - - /* - * Allow the guest to directly modify these MSRs; they are loaded/stored automatically - * using MSR-load/store areas in the VMCS. - */ - hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true); - hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true); - hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true); - hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true); - hmR0VmxSetMSRPermission(pVCpu, MSR_K6_STAR, true, true); - hmR0VmxSetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true); - hmR0VmxSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true); - hmR0VmxSetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true); - hmR0VmxSetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true); - if (pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) - hmR0VmxSetMSRPermission(pVCpu, MSR_K8_TSC_AUX, true, true); - } - -#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE - /* - * Set the guest & host MSR load/store physical addresses. - */ - Assert(pVCpu->hwaccm.s.vmx.pGuestMSRPhys); - rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys); - AssertRC(rc); - rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys); - AssertRC(rc); - Assert(pVCpu->hwaccm.s.vmx.pHostMSRPhys); - rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pHostMSRPhys); - AssertRC(rc); -#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */ - - rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, 0); - AssertRC(rc); - rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, 0); - AssertRC(rc); - rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, 0); - AssertRC(rc); - - if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW) - { - Assert(pVM->hwaccm.s.vmx.pMemObjAPIC); - /* Optional */ - rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, 0); - rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hwaccm.s.vmx.HCPhysVAPIC); - - if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) - rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL, pVM->hwaccm.s.vmx.pAPICPhys); - - AssertRC(rc); - } - - /* Set link pointer to -1. Not currently used. */ - rc = VMXWriteVMCS64(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL); - AssertRC(rc); - - /* - * Clear VMCS, marking it inactive. Clear implementation specific data and writing back - * VMCS data back to memory. - */ - rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS); - AssertRC(rc); - - /* - * Configure the VMCS read cache. - */ - PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache; - - VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RIP); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RSP); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RFLAGS); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR0); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR4); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_DR7); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_SYSENTER_CS); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_EIP); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_ESP); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_GDTR_BASE); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_IDTR_BASE); - - VMX_SETUP_SELREG(ES, pCache); - VMX_SETUP_SELREG(SS, pCache); - VMX_SETUP_SELREG(CS, pCache); - VMX_SETUP_SELREG(DS, pCache); - VMX_SETUP_SELREG(FS, pCache); - VMX_SETUP_SELREG(GS, pCache); - VMX_SETUP_SELREG(LDTR, pCache); - VMX_SETUP_SELREG(TR, pCache); - - /* - * Status code VMCS reads. - */ - VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_REASON); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_INFO); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_ERRCODE); - - if (pVM->hwaccm.s.fNestedPaging) - { - VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR3); - VMXSetupCachedReadVMCS(pCache, VMX_VMCS_EXIT_PHYS_ADDR_FULL); - pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX; - } - else - pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX; - } /* for each VMCPU */ - - /* - * Setup the right TLB function based on CPU capabilities. - */ - if (pVM->hwaccm.s.fNestedPaging && pVM->hwaccm.s.vmx.fVPID) - pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBBoth; - else if (pVM->hwaccm.s.fNestedPaging) - pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBEPT; - else if (pVM->hwaccm.s.vmx.fVPID) - pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBVPID; - else - pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBDummy; - -vmx_end: - hmR0VmxCheckError(pVM, &pVM->aCpus[0], rc); - return rc; -} - - -/** - * Sets the permission bits for the specified MSR. - * - * @param pVCpu Pointer to the VMCPU. - * @param ulMSR The MSR value. - * @param fRead Whether reading is allowed. - * @param fWrite Whether writing is allowed. - */ -static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite) -{ - unsigned ulBit; - uint8_t *pMSRBitmap = (uint8_t *)pVCpu->hwaccm.s.vmx.pMSRBitmap; - - /* - * Layout: - * 0x000 - 0x3ff - Low MSR read bits - * 0x400 - 0x7ff - High MSR read bits - * 0x800 - 0xbff - Low MSR write bits - * 0xc00 - 0xfff - High MSR write bits - */ - if (ulMSR <= 0x00001FFF) - { - /* Pentium-compatible MSRs */ - ulBit = ulMSR; - } - else if ( ulMSR >= 0xC0000000 - && ulMSR <= 0xC0001FFF) - { - /* AMD Sixth Generation x86 Processor MSRs */ - ulBit = (ulMSR - 0xC0000000); - pMSRBitmap += 0x400; - } - else - { - AssertFailed(); - return; - } - - Assert(ulBit <= 0x1fff); - if (fRead) - ASMBitClear(pMSRBitmap, ulBit); - else - ASMBitSet(pMSRBitmap, ulBit); - - if (fWrite) - ASMBitClear(pMSRBitmap + 0x800, ulBit); - else - ASMBitSet(pMSRBitmap + 0x800, ulBit); -} - - -/** - * Injects an event (trap or external interrupt). - * - * @returns VBox status code. Note that it may return VINF_EM_RESET to - * indicate a triple fault when injecting X86_XCPT_DF. - * - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU Context. - * @param intInfo VMX interrupt info. - * @param cbInstr Opcode length of faulting instruction. - * @param errCode Error code (optional). - */ -static int hmR0VmxInjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode) -{ - int rc; - uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo); - -#ifdef VBOX_WITH_STATISTICS - STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatInjectedIrqsR0[iGate & MASK_INJECT_IRQ_STAT]); -#endif - -#ifdef VBOX_STRICT - if (iGate == 0xE) - { - LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%RGv intInfo=%08x\n", iGate, - (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo)); - } - else if (iGate < 0x20) - { - LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip, - errCode)); - } - else - { - LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip)); - Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW - || !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)); - Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW - || pCtx->eflags.u32 & X86_EFL_IF); - } -#endif - - if ( CPUMIsGuestInRealModeEx(pCtx) - && pVM->hwaccm.s.vmx.pRealModeTSS) - { - RTGCPHYS GCPhysHandler; - uint16_t offset, ip; - RTSEL sel; - - /* - * Injecting events doesn't work right with real mode emulation. - * (#GP if we try to inject external hardware interrupts) - * Inject the interrupt or trap directly instead. - * - * ASSUMES no access handlers for the bits we read or write below (should be safe). - */ - Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate)); - - /* - * Check if the interrupt handler is present. - */ - if (iGate * 4 + 3 > pCtx->idtr.cbIdt) - { - Log(("IDT cbIdt violation\n")); - if (iGate != X86_XCPT_DF) - { - uint32_t intInfo2; - - intInfo2 = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : iGate; - intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT); - intInfo2 |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID; - intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); - - return hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, 0, 0 /* no error code according to the Intel docs */); - } - Log(("Triple fault -> reset the VM!\n")); - return VINF_EM_RESET; - } - if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW - || iGate == 3 /* Both #BP and #OF point to the instruction after. */ - || iGate == 4) - { - ip = pCtx->ip + cbInstr; - } - else - ip = pCtx->ip; - - /* - * Read the selector:offset pair of the interrupt handler. - */ - GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4; - rc = PGMPhysSimpleReadGCPhys(pVM, &offset, GCPhysHandler, sizeof(offset)); AssertRC(rc); - rc = PGMPhysSimpleReadGCPhys(pVM, &sel, GCPhysHandler + 2, sizeof(sel)); AssertRC(rc); - - LogFlow(("IDT handler %04X:%04X\n", sel, offset)); - - /* - * Construct the stack frame. - */ - /** @todo Check stack limit. */ - pCtx->sp -= 2; - LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->eflags.u)); - rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t)); AssertRC(rc); - pCtx->sp -= 2; - LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->cs.Sel)); - rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t)); AssertRC(rc); - pCtx->sp -= 2; - LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss.Sel, pCtx->sp, ip)); - rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &ip, sizeof(ip)); AssertRC(rc); - - /* - * Update the CPU state for executing the handler. - */ - pCtx->rip = offset; - pCtx->cs.Sel = sel; - pCtx->cs.u64Base = sel << 4; - pCtx->eflags.u &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC); - - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_SEGMENT_REGS; - return VINF_SUCCESS; - } - - /* - * Set event injection state. - */ - rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT)); - rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_INSTR_LENGTH, cbInstr); - rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode); - - AssertRC(rc); - return rc; -} - - -/** - * Checks for pending guest interrupts and injects them. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -static int hmR0VmxCheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx) -{ - int rc; - - /* - * Dispatch any pending interrupts (injected before, but a VM exit occurred prematurely). - */ - if (pVCpu->hwaccm.s.Event.fPending) - { - Log(("CPU%d: Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->idCpu, pVCpu->hwaccm.s.Event.intInfo, - pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntReinject); - rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, pVCpu->hwaccm.s.Event.intInfo, 0, pVCpu->hwaccm.s.Event.errCode); - AssertRC(rc); - - pVCpu->hwaccm.s.Event.fPending = false; - return VINF_SUCCESS; - } - - /* - * If an active trap is already pending, we must forward it first! - */ - if (!TRPMHasTrap(pVCpu)) - { - if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI)) - { - RTGCUINTPTR intInfo; - - Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu)); - - intInfo = X86_XCPT_NMI; - intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT); - intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); - - rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0); - AssertRC(rc); - - return VINF_SUCCESS; - } - - /** @todo SMI interrupts. */ - - /* - * When external interrupts are pending, we should exit the VM when IF is set. - */ - if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC))) - { - if (!(pCtx->eflags.u32 & X86_EFL_IF)) - { - if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT)) - { - LogFlow(("Enable irq window exit!\n")); - pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT; - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc); - } - /* else nothing to do but wait */ - } - else if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) - { - uint8_t u8Interrupt; - - rc = PDMGetInterrupt(pVCpu, &u8Interrupt); - Log(("CPU%d: Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", pVCpu->idCpu, - u8Interrupt, u8Interrupt, rc, pCtx->cs.Sel, (RTGCPTR)pCtx->rip)); - if (RT_SUCCESS(rc)) - { - rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT); - AssertRC(rc); - } - else - { - /* Can only happen in rare cases where a pending interrupt is cleared behind our back */ - Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC))); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchGuestIrq); - /* Just continue */ - } - } - else - Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip)); - } - } - -#ifdef VBOX_STRICT - if (TRPMHasTrap(pVCpu)) - { - uint8_t u8Vector; - rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0); - AssertRC(rc); - } -#endif - - if ( (pCtx->eflags.u32 & X86_EFL_IF) - && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) - && TRPMHasTrap(pVCpu) - ) - { - uint8_t u8Vector; - TRPMEVENT enmType; - RTGCUINTPTR intInfo; - RTGCUINT errCode; - - /* - * If a new event is pending, dispatch it now. - */ - rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &errCode, 0); - AssertRC(rc); - Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP); - Assert(enmType != TRPM_SOFTWARE_INT); - - /* - * Clear the pending trap. - */ - rc = TRPMResetTrap(pVCpu); - AssertRC(rc); - - intInfo = u8Vector; - intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT); - - if (enmType == TRPM_TRAP) - { - switch (u8Vector) - { - case X86_XCPT_DF: - case X86_XCPT_TS: - case X86_XCPT_NP: - case X86_XCPT_SS: - case X86_XCPT_GP: - case X86_XCPT_PF: - case X86_XCPT_AC: - { - /* Valid error codes. */ - intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID; - break; - } - - default: - break; - } - - if ( u8Vector == X86_XCPT_BP - || u8Vector == X86_XCPT_OF) - { - intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); - } - else - intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); - } - else - intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); - - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntInject); - rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode); - AssertRC(rc); - } /* if (interrupts can be dispatched) */ - - return VINF_SUCCESS; -} - - -/** - * Save the host state into the VMCS. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - */ -VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu) -{ - int rc = VINF_SUCCESS; - NOREF(pVM); - - /* - * Host CPU Context. - */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT) - { - RTIDTR idtr; - RTGDTR gdtr; - RTSEL SelTR; - PCX86DESCHC pDesc; - uintptr_t trBase; - RTSEL cs; - RTSEL ss; - uint64_t cr3; - - /* - * Control registers. - */ - rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0()); - Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0())); -#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL - if (VMX_IS_64BIT_HOST_MODE()) - { - cr3 = hwaccmR0Get64bitCR3(); - rc |= VMXWriteVMCS64(VMX_VMCS_HOST_CR3, cr3); - } - else -#endif - { - cr3 = ASMGetCR3(); - rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, cr3); - } - Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3)); - rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4()); - Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4())); - AssertRC(rc); - - /* - * Selector registers. - */ -#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL - if (VMX_IS_64BIT_HOST_MODE()) - { - cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS; - ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS; - } - else - { - /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */ - cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS; - ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS; - } -#else - cs = ASMGetCS(); - ss = ASMGetSS(); -#endif - Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0); - Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0); - rc = VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_CS, cs); - /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */ - rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_DS, 0); - rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_ES, 0); -#if HC_ARCH_BITS == 32 - if (!VMX_IS_64BIT_HOST_MODE()) - { - rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_FS, 0); - rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_GS, 0); - } -#endif - rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_SS, ss); - SelTR = ASMGetTR(); - rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_TR, SelTR); - AssertRC(rc); - Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS())); - Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS())); - Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES())); - Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS())); - Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS())); - Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS())); - Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR())); - - /* - * GDTR & IDTR. - */ -#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL - if (VMX_IS_64BIT_HOST_MODE()) - { - X86XDTR64 gdtr64, idtr64; - hwaccmR0Get64bitGDTRandIDTR(&gdtr64, &idtr64); - rc = VMXWriteVMCS64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr); - rc |= VMXWriteVMCS64(VMX_VMCS_HOST_IDTR_BASE, gdtr64.uAddr); - AssertRC(rc); - Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr)); - Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr)); - gdtr.cbGdt = gdtr64.cb; - gdtr.pGdt = (uintptr_t)gdtr64.uAddr; - } - else -#endif - { - ASMGetGDTR(&gdtr); - rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt); - ASMGetIDTR(&idtr); - rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt); - AssertRC(rc); - Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt)); - Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt)); - } - - /* - * Save the base address of the TR selector. - */ - if (SelTR > gdtr.cbGdt) - { - AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt)); - return VERR_VMX_INVALID_HOST_STATE; - } - - pDesc = (PCX86DESCHC)(gdtr.pGdt + (SelTR & X86_SEL_MASK)); -#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL - if (VMX_IS_64BIT_HOST_MODE()) - { - uint64_t trBase64 = X86DESC64_BASE((PX86DESC64)pDesc); - rc = VMXWriteVMCS64(VMX_VMCS_HOST_TR_BASE, trBase64); - Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64)); - AssertRC(rc); - } - else -#endif - { -#if HC_ARCH_BITS == 64 - trBase = X86DESC64_BASE(pDesc); -#else - trBase = X86DESC_BASE(pDesc); -#endif - rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase); - AssertRC(rc); - Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase)); - } - - /* - * FS base and GS base. - */ -#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) - if (VMX_IS_64BIT_HOST_MODE()) - { - Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE))); - Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE))); - rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE)); - rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE)); - } -#endif - AssertRC(rc); - - /* - * Sysenter MSRs. - */ - /** @todo expensive!! */ - rc = VMXWriteVMCS(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); - Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS))); -#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL - if (VMX_IS_64BIT_HOST_MODE()) - { - Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP))); - Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP))); - rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); - rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); - } - else - { - rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)); - rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)); - Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP))); - Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP))); - } -#elif HC_ARCH_BITS == 32 - rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)); - rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)); - Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP))); - Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP))); -#else - Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP))); - Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP))); - rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); - rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); -#endif - AssertRC(rc); - - -#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE - /* - * Store all host MSRs in the VM-Exit load area, so they will be reloaded after - * the world switch back to the host. - */ - PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pHostMSR; - unsigned idxMsr = 0; - - uint32_t u32HostExtFeatures = ASMCpuId_EDX(0x80000001); - if (u32HostExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE)) - { -#if 0 - pMsr->u32IndexMSR = MSR_K6_EFER; - pMsr->u32Reserved = 0; -# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) - if (CPUMIsGuestInLongMode(pVCpu)) - { - /* Must match the EFER value in our 64 bits switcher. */ - pMsr->u64Value = ASMRdMsr(MSR_K6_EFER) | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE; - } - else -# endif - pMsr->u64Value = ASMRdMsr(MSR_K6_EFER); - pMsr++; idxMsr++; -#endif - } - -# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) - if (VMX_IS_64BIT_HOST_MODE()) - { - pMsr->u32IndexMSR = MSR_K6_STAR; - pMsr->u32Reserved = 0; - pMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */ - pMsr++; idxMsr++; - pMsr->u32IndexMSR = MSR_K8_LSTAR; - pMsr->u32Reserved = 0; - pMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64 bits mode syscall rip */ - pMsr++; idxMsr++; - pMsr->u32IndexMSR = MSR_K8_SF_MASK; - pMsr->u32Reserved = 0; - pMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */ - pMsr++; idxMsr++; - - /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */ -#if 0 - pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE; - pMsr->u32Reserved = 0; - pMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */ - pMsr++; idxMsr++; -#endif - } -# endif - - if (pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) - { - pMsr->u32IndexMSR = MSR_K8_TSC_AUX; - pMsr->u32Reserved = 0; - pMsr->u64Value = ASMRdMsr(MSR_K8_TSC_AUX); - pMsr++; idxMsr++; - } - - /** @todo r=ramshankar: check IA32_VMX_MISC bits 27:25 for valid idxMsr - * range. */ - rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, idxMsr); - AssertRC(rc); -#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */ - - pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_HOST_CONTEXT; - } - return rc; -} - - -/** - * Loads the 4 PDPEs into the guest state when nested paging is used and the - * guest operates in PAE mode. - * - * @returns VBox status code. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -static int hmR0VmxLoadPaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx) -{ - if (CPUMIsGuestInPAEModeEx(pCtx)) - { - X86PDPE aPdpes[4]; - int rc = PGMGstGetPaePdpes(pVCpu, &aPdpes[0]); - AssertRCReturn(rc, rc); - - rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL, aPdpes[0].u); AssertRCReturn(rc, rc); - rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR1_FULL, aPdpes[1].u); AssertRCReturn(rc, rc); - rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR2_FULL, aPdpes[2].u); AssertRCReturn(rc, rc); - rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR3_FULL, aPdpes[3].u); AssertRCReturn(rc, rc); - } - return VINF_SUCCESS; -} - - -/** - * Saves the 4 PDPEs into the guest state when nested paging is used and the - * guest operates in PAE mode. - * - * @returns VBox status code. - * @param pVCpu Pointer to the VM CPU. - * @param pCtx Pointer to the guest CPU context. - * - * @remarks Tell PGM about CR3 changes before calling this helper. - */ -static int hmR0VmxSavePaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx) -{ - if (CPUMIsGuestInPAEModeEx(pCtx)) - { - int rc; - X86PDPE aPdpes[4]; - rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL, &aPdpes[0].u); AssertRCReturn(rc, rc); - rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR1_FULL, &aPdpes[1].u); AssertRCReturn(rc, rc); - rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR2_FULL, &aPdpes[2].u); AssertRCReturn(rc, rc); - rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR3_FULL, &aPdpes[3].u); AssertRCReturn(rc, rc); - - rc = PGMGstUpdatePaePdpes(pVCpu, &aPdpes[0]); - AssertRCReturn(rc, rc); - } - return VINF_SUCCESS; -} - - -/** - * Update the exception bitmap according to the current CPU state. - * - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) -{ - uint32_t u32TrapMask; - Assert(pCtx); - - /* - * Set up a mask for intercepting traps. - */ - /** @todo Do we really need to always intercept #DB? */ - u32TrapMask = RT_BIT(X86_XCPT_DB) - | RT_BIT(X86_XCPT_NM) -#ifdef VBOX_ALWAYS_TRAP_PF - | RT_BIT(X86_XCPT_PF) -#endif -#ifdef VBOX_STRICT - | RT_BIT(X86_XCPT_BP) - | RT_BIT(X86_XCPT_DB) - | RT_BIT(X86_XCPT_DE) - | RT_BIT(X86_XCPT_NM) - | RT_BIT(X86_XCPT_UD) - | RT_BIT(X86_XCPT_NP) - | RT_BIT(X86_XCPT_SS) - | RT_BIT(X86_XCPT_GP) - | RT_BIT(X86_XCPT_MF) -#endif - ; - - /* - * Without nested paging, #PF must be intercepted to implement shadow paging. - */ - /** @todo NP state won't change so maybe we should build the initial trap mask up front? */ - if (!pVM->hwaccm.s.fNestedPaging) - u32TrapMask |= RT_BIT(X86_XCPT_PF); - - /* Catch floating point exceptions if we need to report them to the guest in a different way. */ - if (!(pCtx->cr0 & X86_CR0_NE)) - u32TrapMask |= RT_BIT(X86_XCPT_MF); - -#ifdef VBOX_STRICT - Assert(u32TrapMask & RT_BIT(X86_XCPT_GP)); -#endif - - /* - * Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise). - */ - /** @todo Despite the claim to intercept everything, with NP we do not intercept #PF. Should we? */ - if ( CPUMIsGuestInRealModeEx(pCtx) - && pVM->hwaccm.s.vmx.pRealModeTSS) - { - u32TrapMask |= RT_BIT(X86_XCPT_DE) - | RT_BIT(X86_XCPT_DB) - | RT_BIT(X86_XCPT_NMI) - | RT_BIT(X86_XCPT_BP) - | RT_BIT(X86_XCPT_OF) - | RT_BIT(X86_XCPT_BR) - | RT_BIT(X86_XCPT_UD) - | RT_BIT(X86_XCPT_DF) - | RT_BIT(X86_XCPT_CO_SEG_OVERRUN) - | RT_BIT(X86_XCPT_TS) - | RT_BIT(X86_XCPT_NP) - | RT_BIT(X86_XCPT_SS) - | RT_BIT(X86_XCPT_GP) - | RT_BIT(X86_XCPT_MF) - | RT_BIT(X86_XCPT_AC) - | RT_BIT(X86_XCPT_MC) - | RT_BIT(X86_XCPT_XF) - ; - } - - int rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXCEPTION_BITMAP, u32TrapMask); - AssertRC(rc); -} - - -/** - * Loads a minimal guest state. - * - * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!! - * - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -VMMR0DECL(void) VMXR0LoadMinimalGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) -{ - int rc; - X86EFLAGS eflags; - - Assert(!(pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_ALL_GUEST)); - - /* - * Load EIP, ESP and EFLAGS. - */ - rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_RIP, pCtx->rip); - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_RSP, pCtx->rsp); - AssertRC(rc); - - /* - * Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1. - */ - eflags = pCtx->eflags; - eflags.u32 &= VMX_EFLAGS_RESERVED_0; - eflags.u32 |= VMX_EFLAGS_RESERVED_1; - - /* - * Check if real mode emulation using v86 mode. - */ - if ( CPUMIsGuestInRealModeEx(pCtx) - && pVM->hwaccm.s.vmx.pRealModeTSS) - { - pVCpu->hwaccm.s.vmx.RealMode.eflags = eflags; - - eflags.Bits.u1VM = 1; - eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */ - } - rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32); - AssertRC(rc); -} - - -/** - * Loads the guest state. - * - * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!! - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) -{ - int rc = VINF_SUCCESS; - RTGCUINTPTR val; - - /* - * VMX_VMCS_CTRL_ENTRY_CONTROLS - * Set required bits to one and zero according to the MSR capabilities. - */ - val = pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0; - - /* - * Load guest debug controls (DR7 & IA32_DEBUGCTL_MSR). - * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs - */ - val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG; - - if (CPUMIsGuestInLongModeEx(pCtx)) - val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA64_MODE; - /* else Must be zero when AMD64 is not available. */ - - /* - * Mask away the bits that the CPU doesn't support. - */ - val &= pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1; - rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, val); - AssertRC(rc); - - /* - * VMX_VMCS_CTRL_EXIT_CONTROLS - * Set required bits to one and zero according to the MSR capabilities. - */ - val = pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0; - - /* - * Save debug controls (DR7 & IA32_DEBUGCTL_MSR) - * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs - */ - val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG; - -#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) - if (VMX_IS_64BIT_HOST_MODE()) - val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64; - /* else Must be zero when AMD64 is not available. */ -#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) - if (CPUMIsGuestInLongModeEx(pCtx)) - val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64; /* our switcher goes to long mode */ - else - Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64)); -#endif - val &= pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1; - - /* - * Don't acknowledge external interrupts on VM-exit. - */ - rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, val); - AssertRC(rc); - - /* - * Guest CPU context: ES, CS, SS, DS, FS, GS. - */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS) - { - if (pVM->hwaccm.s.vmx.pRealModeTSS) - { - PGMMODE enmGuestMode = PGMGetGuestMode(pVCpu); - if (pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode != enmGuestMode) - { - /* - * Correct weird requirements for switching to protected mode. - */ - if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL - && enmGuestMode >= PGMMODE_PROTECTED) - { -#ifdef VBOX_WITH_REM - /* - * Flush the recompiler code cache as it's not unlikely the guest will rewrite code - * it will later execute in real mode (OpenBSD 4.0 is one such example) - */ - REMFlushTBs(pVM); -#endif - - /* - * DPL of all hidden selector registers must match the current CPL (0). - */ - pCtx->cs.Attr.n.u2Dpl = 0; - pCtx->cs.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC; - - pCtx->ds.Attr.n.u2Dpl = 0; - pCtx->es.Attr.n.u2Dpl = 0; - pCtx->fs.Attr.n.u2Dpl = 0; - pCtx->gs.Attr.n.u2Dpl = 0; - pCtx->ss.Attr.n.u2Dpl = 0; - } - pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = enmGuestMode; - } - else if ( CPUMIsGuestInRealModeEx(pCtx) - && pCtx->cs.u64Base == 0xffff0000) - { - /* VT-x will fail with a guest invalid state otherwise... (CPU state after a reset) */ - pCtx->cs.u64Base = 0xf0000; - pCtx->cs.Sel = 0xf000; - } - } - - VMX_WRITE_SELREG(ES, es); - AssertRC(rc); - - VMX_WRITE_SELREG(CS, cs); - AssertRC(rc); - - VMX_WRITE_SELREG(SS, ss); - AssertRC(rc); - - VMX_WRITE_SELREG(DS, ds); - AssertRC(rc); - - VMX_WRITE_SELREG(FS, fs); - AssertRC(rc); - - VMX_WRITE_SELREG(GS, gs); - AssertRC(rc); - } - - /* - * Guest CPU context: LDTR. - */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR) - { - if (pCtx->ldtr.Sel == 0) - { - rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, 0); - rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, 0); - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, 0); - /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */ - rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */); - } - else - { - rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr.Sel); - rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtr.u32Limit); - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, pCtx->ldtr.u64Base); - rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtr.Attr.u); - } - AssertRC(rc); - } - - /* - * Guest CPU context: TR. - */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR) - { - /* - * Real mode emulation using v86 mode with CR4.VME (interrupt redirection - * using the int bitmap in the TSS). - */ - if ( CPUMIsGuestInRealModeEx(pCtx) - && pVM->hwaccm.s.vmx.pRealModeTSS) - { - RTGCPHYS GCPhys; - - /* We convert it here every time as PCI regions could be reconfigured. */ - rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pRealModeTSS, &GCPhys); - AssertRC(rc); - - rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, 0); - rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, HWACCM_VTX_TSS_SIZE); - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */); - - X86DESCATTR attr; - - attr.u = 0; - attr.n.u1Present = 1; - attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY; - val = attr.u; - } - else - { - rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr.Sel); - rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->tr.u32Limit); - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, pCtx->tr.u64Base); - - val = pCtx->tr.Attr.u; - - /* The TSS selector must be busy (REM bugs? see defect #XXXX). */ - if (!(val & X86_SEL_TYPE_SYS_TSS_BUSY_MASK)) - { - if (val & 0xf) - val |= X86_SEL_TYPE_SYS_TSS_BUSY_MASK; - else - /* Default if no TR selector has been set (otherwise vmlaunch will fail!) */ - val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY; - } - AssertMsg((val & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY || (val & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY, - ("%#x\n", val)); - } - rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val); - AssertRC(rc); - } - - /* - * Guest CPU context: GDTR. - */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR) - { - rc = VMXWriteVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt); - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_GDTR_BASE, pCtx->gdtr.pGdt); - AssertRC(rc); - } - - /* - * Guest CPU context: IDTR. - */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR) - { - rc = VMXWriteVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt); - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_IDTR_BASE, pCtx->idtr.pIdt); - AssertRC(rc); - } - - /* - * Sysenter MSRs. - */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR) - { - rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs); - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip); - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp); - AssertRC(rc); - } - - /* - * Guest CPU context: Control registers. - */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0) - { - val = pCtx->cr0; - rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val); - Log2(("Guest CR0-shadow %08x\n", val)); - if (CPUMIsGuestFPUStateActive(pVCpu) == false) - { - /* Always use #NM exceptions to load the FPU/XMM state on demand. */ - val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP; - } - else - { - /** @todo check if we support the old style mess correctly. */ - if (!(val & X86_CR0_NE)) - Log(("Forcing X86_CR0_NE!!!\n")); - - val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */ - } - /* Protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */ - if (!pVM->hwaccm.s.vmx.fUnrestrictedGuest) - val |= X86_CR0_PE | X86_CR0_PG; - - if (pVM->hwaccm.s.fNestedPaging) - { - if (CPUMIsGuestInPagedProtectedModeEx(pCtx)) - { - /* Disable CR3 read/write monitoring as we don't need it for EPT. */ - pVCpu->hwaccm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT - | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT); - } - else - { - /* Reenable CR3 read/write monitoring as our identity mapped page table is active. */ - pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT - | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT; - } - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc); - } - else - { - /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */ - val |= X86_CR0_WP; - } - - /* Always enable caching. */ - val &= ~(X86_CR0_CD|X86_CR0_NW); - - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR0, val); - Log2(("Guest CR0 %08x\n", val)); - - /* - * CR0 flags owned by the host; if the guests attempts to change them, then the VM will exit. - */ - val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */ - | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */ - | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */ - | X86_CR0_CD /* Bit not restored during VM-exit! */ - | X86_CR0_NW /* Bit not restored during VM-exit! */ - | X86_CR0_NE; - - /* - * When the guest's FPU state is active, then we no longer care about the FPU related bits. - */ - if (CPUMIsGuestFPUStateActive(pVCpu) == false) - val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_MP; - - pVCpu->hwaccm.s.vmx.cr0_mask = val; - - rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val); - Log2(("Guest CR0-mask %08x\n", val)); - AssertRC(rc); - } - - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4) - { - rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4); - Log2(("Guest CR4-shadow %08x\n", pCtx->cr4)); - /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */ - val = pCtx->cr4 | (uint32_t)pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed0; - - if (!pVM->hwaccm.s.fNestedPaging) - { - switch (pVCpu->hwaccm.s.enmShadowMode) - { - case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */ - case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */ - case PGMMODE_32_BIT: /* 32-bit paging. */ - val &= ~X86_CR4_PAE; - break; - - case PGMMODE_PAE: /* PAE paging. */ - case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */ - /** Must use PAE paging as we could use physical memory > 4 GB */ - val |= X86_CR4_PAE; - break; - - case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */ - case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */ -#ifdef VBOX_ENABLE_64_BITS_GUESTS - break; -#else - AssertFailed(); - return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; -#endif - default: /* shut up gcc */ - AssertFailed(); - return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; - } - } - else if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx) - && !pVM->hwaccm.s.vmx.fUnrestrictedGuest) - { - /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */ - val |= X86_CR4_PSE; - /* Our identity mapping is a 32 bits page directory. */ - val &= ~X86_CR4_PAE; - } - - /* - * Turn off VME if we're in emulated real mode. - */ - if ( CPUMIsGuestInRealModeEx(pCtx) - && pVM->hwaccm.s.vmx.pRealModeTSS) - { - val &= ~X86_CR4_VME; - } - - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR4, val); - Log2(("Guest CR4 %08x\n", val)); - - /* - * CR4 flags owned by the host; if the guests attempts to change them, then the VM will exit. - */ - val = 0 - | X86_CR4_VME - | X86_CR4_PAE - | X86_CR4_PGE - | X86_CR4_PSE - | X86_CR4_VMXE; - pVCpu->hwaccm.s.vmx.cr4_mask = val; - - rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val); - Log2(("Guest CR4-mask %08x\n", val)); - AssertRC(rc); - } - -#if 0 - /* Enable single stepping if requested and CPU supports it. */ - if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG) - if (DBGFIsStepping(pVCpu)) - { - pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG; - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc); - } -#endif - - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3) - { - if (pVM->hwaccm.s.fNestedPaging) - { - Assert(PGMGetHyperCR3(pVCpu)); - pVCpu->hwaccm.s.vmx.GCPhysEPTP = PGMGetHyperCR3(pVCpu); - - Assert(!(pVCpu->hwaccm.s.vmx.GCPhysEPTP & 0xfff)); - /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */ - pVCpu->hwaccm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB - | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT); - - rc = VMXWriteVMCS64(VMX_VMCS_CTRL_EPTP_FULL, pVCpu->hwaccm.s.vmx.GCPhysEPTP); - AssertRC(rc); - - if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx) - && !pVM->hwaccm.s.vmx.fUnrestrictedGuest) - { - RTGCPHYS GCPhys; - - /* We convert it here every time as PCI regions could be reconfigured. */ - rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys); - AssertMsgRC(rc, ("pNonPagingModeEPTPageTable = %RGv\n", pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable)); - - /* - * We use our identity mapping page table here as we need to map guest virtual to - * guest physical addresses; EPT will take care of the translation to host physical addresses. - */ - val = GCPhys; - } - else - { - /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */ - val = pCtx->cr3; - rc = hmR0VmxLoadPaePdpes(pVCpu, pCtx); - AssertRCReturn(rc, rc); - } - } - else - { - val = PGMGetHyperCR3(pVCpu); - Assert(val || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)); - } - - /* Save our shadow CR3 register. */ - rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_CR3, val); - AssertRC(rc); - } - - /* - * Guest CPU context: Debug registers. - */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG) - { - pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */ - pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */ - - pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */ - pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */ - pCtx->dr[7] |= 0x400; /* must be one */ - - /* Resync DR7 */ - rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]); - AssertRC(rc); - -#ifdef DEBUG - /* Sync the hypervisor debug state now if any breakpoint is armed. */ - if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD) - && !CPUMIsHyperDebugStateActive(pVCpu) - && !DBGFIsStepping(pVCpu)) - { - /* Save the host and load the hypervisor debug state. */ - rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, true /* include DR6 */); - AssertRC(rc); - - /* DRx intercepts remain enabled. */ - - /* Override dr7 with the hypervisor value. */ - rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, CPUMGetHyperDR7(pVCpu)); - AssertRC(rc); - } - else -#endif - /* Sync the debug state now if any breakpoint is armed. */ - if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD)) - && !CPUMIsGuestDebugStateActive(pVCpu) - && !DBGFIsStepping(pVCpu)) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxArmed); - - /* Disable DRx move intercepts. */ - pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT; - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc); - - /* Save the host and load the guest debug state. */ - rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */); - AssertRC(rc); - } - - /* IA32_DEBUGCTL MSR. */ - rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DEBUGCTL_FULL, 0); - AssertRC(rc); - - /** @todo do we really ever need this? */ - rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0); - AssertRC(rc); - } - - /* - * 64-bit guest mode. - */ - if (CPUMIsGuestInLongModeEx(pCtx)) - { -#if !defined(VBOX_ENABLE_64_BITS_GUESTS) - return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; -#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) - pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64; -#else -# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL - if (!pVM->hwaccm.s.fAllow64BitGuests) - return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; -# endif - pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM64; -#endif - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR) - { - /* Update these as wrmsr might have changed them. */ - rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_FS_BASE, pCtx->fs.u64Base); - AssertRC(rc); - rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_GS_BASE, pCtx->gs.u64Base); - AssertRC(rc); - } - } - else - { - pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM32; - } - - hmR0VmxUpdateExceptionBitmap(pVM, pVCpu, pCtx); - -#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE - /* - * Store all guest MSRs in the VM-entry load area, so they will be loaded - * during VM-entry and restored into the VM-exit store area during VM-exit. - */ - PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR; - unsigned idxMsr = 0; - - uint32_t u32GstExtFeatures; - uint32_t u32Temp; - CPUMGetGuestCpuId(pVCpu, 0x80000001, &u32Temp, &u32Temp, &u32Temp, &u32GstExtFeatures); - - if (u32GstExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE)) - { -#if 0 - pMsr->u32IndexMSR = MSR_K6_EFER; - pMsr->u32Reserved = 0; - pMsr->u64Value = pCtx->msrEFER; - /* VT-x will complain if only MSR_K6_EFER_LME is set. */ - if (!CPUMIsGuestInLongModeEx(pCtx)) - pMsr->u64Value &= ~(MSR_K6_EFER_LMA | MSR_K6_EFER_LME); - pMsr++; idxMsr++; -#endif - - if (u32GstExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) - { - pMsr->u32IndexMSR = MSR_K8_LSTAR; - pMsr->u32Reserved = 0; - pMsr->u64Value = pCtx->msrLSTAR; /* 64 bits mode syscall rip */ - pMsr++; idxMsr++; - pMsr->u32IndexMSR = MSR_K6_STAR; - pMsr->u32Reserved = 0; - pMsr->u64Value = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */ - pMsr++; idxMsr++; - pMsr->u32IndexMSR = MSR_K8_SF_MASK; - pMsr->u32Reserved = 0; - pMsr->u64Value = pCtx->msrSFMASK; /* syscall flag mask */ - pMsr++; idxMsr++; - - /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */ -#if 0 - pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE; - pMsr->u32Reserved = 0; - pMsr->u64Value = pCtx->msrKERNELGSBASE; /* swapgs exchange value */ - pMsr++; idxMsr++; -#endif - } - } - - if ( pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP - && (u32GstExtFeatures & X86_CPUID_EXT_FEATURE_EDX_RDTSCP)) - { - pMsr->u32IndexMSR = MSR_K8_TSC_AUX; - pMsr->u32Reserved = 0; - rc = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &pMsr->u64Value); - AssertRC(rc); - pMsr++; idxMsr++; - } - - pVCpu->hwaccm.s.vmx.cCachedMSRs = idxMsr; - - rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, idxMsr); - AssertRC(rc); - - rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, idxMsr); - AssertRC(rc); -#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */ - - bool fOffsettedTsc; - if (pVM->hwaccm.s.vmx.fUsePreemptTimer) - { - uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hwaccm.s.vmx.u64TSCOffset); - - /* Make sure the returned values have sane upper and lower boundaries. */ - uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage); - - cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64 of a second */ - cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */ - - cTicksToDeadline >>= pVM->hwaccm.s.vmx.cPreemptTimerShift; - uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16); - rc = VMXWriteVMCS(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount); - AssertRC(rc); - } - else - fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hwaccm.s.vmx.u64TSCOffset); - - if (fOffsettedTsc) - { - uint64_t u64CurTSC = ASMReadTSC(); - if (u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu)) - { - /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */ - rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, pVCpu->hwaccm.s.vmx.u64TSCOffset); - AssertRC(rc); - - pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT; - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset); - } - else - { - /* Fall back to rdtsc, rdtscp emulation as we would otherwise pass decreasing tsc values to the guest. */ - LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC, - pVCpu->hwaccm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset, - TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hwaccm.s.vmx.u64TSCOffset, - TMCpuTickGet(pVCpu))); - pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT; - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCInterceptOverFlow); - } - } - else - { - pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT; - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept); - } - - /* Done with the major changes */ - pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST; - - /* Minimal guest state update (ESP, EIP, EFLAGS mostly) */ - VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx); - return rc; -} - - -/** - * Syncs back the guest state from VMCS. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) -{ - RTGCUINTREG val, valShadow; - RTGCUINTPTR uInterruptState; - int rc; - - /* First sync back EIP, ESP, and EFLAGS. */ - rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RIP, &val); - AssertRC(rc); - pCtx->rip = val; - rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RSP, &val); - AssertRC(rc); - pCtx->rsp = val; - rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RFLAGS, &val); - AssertRC(rc); - pCtx->eflags.u32 = val; - - /* Take care of instruction fusing (sti, mov ss) */ - rc |= VMXReadCachedVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val); - uInterruptState = val; - if (uInterruptState != 0) - { - Assert(uInterruptState <= 2); /* only sti & mov ss */ - Log(("uInterruptState %x eip=%RGv\n", (uint32_t)uInterruptState, pCtx->rip)); - EMSetInhibitInterruptsPC(pVCpu, pCtx->rip); - } - else - VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); - - /* Control registers. */ - VMXReadCachedVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow); - VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR0, &val); - val = (valShadow & pVCpu->hwaccm.s.vmx.cr0_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr0_mask); - CPUMSetGuestCR0(pVCpu, val); - - VMXReadCachedVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow); - VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR4, &val); - val = (valShadow & pVCpu->hwaccm.s.vmx.cr4_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr4_mask); - CPUMSetGuestCR4(pVCpu, val); - - /* - * No reason to sync back the CRx registers. They can't be changed by the guest unless in - * the nested paging case where CR3 & CR4 can be changed by the guest. - */ - if ( pVM->hwaccm.s.fNestedPaging - && CPUMIsGuestInPagedProtectedModeEx(pCtx)) /** @todo check if we will always catch mode switches and such... */ - { - PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache; - - /* Can be updated behind our back in the nested paging case. */ - CPUMSetGuestCR2(pVCpu, pCache->cr2); - - VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR3, &val); - - if (val != pCtx->cr3) - { - CPUMSetGuestCR3(pVCpu, val); - PGMUpdateCR3(pVCpu, val); - } - rc = hmR0VmxSavePaePdpes(pVCpu, pCtx); - AssertRCReturn(rc, rc); - } - - /* Sync back DR7. */ - VMXReadCachedVMCS(VMX_VMCS64_GUEST_DR7, &val); - pCtx->dr[7] = val; - - /* Guest CPU context: ES, CS, SS, DS, FS, GS. */ - VMX_READ_SELREG(ES, es); - VMX_READ_SELREG(SS, ss); - VMX_READ_SELREG(CS, cs); - VMX_READ_SELREG(DS, ds); - VMX_READ_SELREG(FS, fs); - VMX_READ_SELREG(GS, gs); - - /* System MSRs */ - VMXReadCachedVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, &val); - pCtx->SysEnter.cs = val; - VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_EIP, &val); - pCtx->SysEnter.eip = val; - VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_ESP, &val); - pCtx->SysEnter.esp = val; - - /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */ - VMX_READ_SELREG(LDTR, ldtr); - - VMXReadCachedVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, &val); - pCtx->gdtr.cbGdt = val; - VMXReadCachedVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val); - pCtx->gdtr.pGdt = val; - - VMXReadCachedVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, &val); - pCtx->idtr.cbIdt = val; - VMXReadCachedVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val); - pCtx->idtr.pIdt = val; - - /* Real mode emulation using v86 mode. */ - if ( CPUMIsGuestInRealModeEx(pCtx) - && pVM->hwaccm.s.vmx.pRealModeTSS) - { - /* Hide our emulation flags */ - pCtx->eflags.Bits.u1VM = 0; - - /* Restore original IOPL setting as we always use 0. */ - pCtx->eflags.Bits.u2IOPL = pVCpu->hwaccm.s.vmx.RealMode.eflags.Bits.u2IOPL; - - /* Force a TR resync every time in case we switch modes. */ - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_TR; - } - else - { - /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */ - VMX_READ_SELREG(TR, tr); - } - -#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE - /* - * Save the possibly changed MSRs that we automatically restore and save during a world switch. - */ - for (unsigned i = 0; i < pVCpu->hwaccm.s.vmx.cCachedMSRs; i++) - { - PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR; - pMsr += i; - - switch (pMsr->u32IndexMSR) - { - case MSR_K8_LSTAR: - pCtx->msrLSTAR = pMsr->u64Value; - break; - case MSR_K6_STAR: - pCtx->msrSTAR = pMsr->u64Value; - break; - case MSR_K8_SF_MASK: - pCtx->msrSFMASK = pMsr->u64Value; - break; - /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */ -#if 0 - case MSR_K8_KERNEL_GS_BASE: - pCtx->msrKERNELGSBASE = pMsr->u64Value; - break; -#endif - case MSR_K8_TSC_AUX: - CPUMSetGuestMsr(pVCpu, MSR_K8_TSC_AUX, pMsr->u64Value); - break; -#if 0 - case MSR_K6_EFER: - /* EFER can't be changed without causing a VM-exit. */ - /* Assert(pCtx->msrEFER == pMsr->u64Value); */ - break; -#endif - default: - AssertFailed(); - return VERR_HM_UNEXPECTED_LD_ST_MSR; - } - } -#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */ - return VINF_SUCCESS; -} - - -/** - * Dummy placeholder for TLB flush handling before VM-entry. Used in the case - * where neither EPT nor VPID is supported by the CPU. - * - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - */ -static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu) -{ - NOREF(pVM); - VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH); - VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN); - pVCpu->hwaccm.s.TlbShootdown.cPages = 0; - return; -} - - -/** - * Setup the tagged TLB for EPT+VPID. - * - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - */ -static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu) -{ - PHMGLOBLCPUINFO pCpu; - - Assert(pVM->hwaccm.s.fNestedPaging && pVM->hwaccm.s.vmx.fVPID); - - pCpu = HWACCMR0GetCurrentCpu(); - - /* - * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last - * This can happen both for start & resume due to long jumps back to ring-3. - * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB - * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore. - */ - bool fNewASID = false; - if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu - || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes) - { - pVCpu->hwaccm.s.fForceTLBFlush = true; - fNewASID = true; - } - - /* - * Check for explicit TLB shootdowns. - */ - if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) - pVCpu->hwaccm.s.fForceTLBFlush = true; - - pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu; - - if (pVCpu->hwaccm.s.fForceTLBFlush) - { - if (fNewASID) - { - ++pCpu->uCurrentASID; - if (pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID) - { - pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */ - pCpu->cTLBFlushes++; - pCpu->fFlushASIDBeforeUse = true; - } - - pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID; - if (pCpu->fFlushASIDBeforeUse) - { - hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */); -#ifdef VBOX_WITH_STATISTICS - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID); -#endif - } - } - else - { - if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT) - hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_SINGLE_CONTEXT, 0 /* GCPtr */); - else - hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT); - -#ifdef VBOX_WITH_STATISTICS - /* - * This is not terribly accurate (i.e. we don't have any StatFlushEPT counter). We currently count these - * as ASID flushes too, better than including them under StatFlushTLBWorldSwitch. - */ - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID); -#endif - } - - pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes; - pVCpu->hwaccm.s.fForceTLBFlush = false; - } - else - { - AssertMsg(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID, - ("hwaccm->uCurrentASID=%lu hwaccm->cTLBFlushes=%lu cpu->uCurrentASID=%lu cpu->cTLBFlushes=%lu\n", - pVCpu->hwaccm.s.uCurrentASID, pVCpu->hwaccm.s.cTLBFlushes, - pCpu->uCurrentASID, pCpu->cTLBFlushes)); - - /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should - * not be executed. See hwaccmQueueInvlPage() where it is commented - * out. Support individual entry flushing someday. */ - if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN)) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown); - - /* - * Flush individual guest entries using VPID from the TLB or as little as possible with EPT - * as supported by the CPU. - */ - if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR) - { - for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++) - hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hwaccm.s.TlbShootdown.aPages[i]); - } - else - hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT); - } - else - { -#ifdef VBOX_WITH_STATISTICS - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch); -#endif - } - } - pVCpu->hwaccm.s.TlbShootdown.cPages = 0; - VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN); - - AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, - ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes)); - AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, - ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID)); - AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, - ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID)); - - /* Update VMCS with the VPID. */ - int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID); - AssertRC(rc); -} - - -/** - * Setup the tagged TLB for EPT only. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - */ -static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu) -{ - PHMGLOBLCPUINFO pCpu; - - Assert(pVM->hwaccm.s.fNestedPaging); - Assert(!pVM->hwaccm.s.vmx.fVPID); - - pCpu = HWACCMR0GetCurrentCpu(); - - /* - * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last - * This can happen both for start & resume due to long jumps back to ring-3. - * A change in the TLB flush count implies the host Cpu is online after a suspend/resume. - */ - if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu - || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes) - { - pVCpu->hwaccm.s.fForceTLBFlush = true; - } - - /* - * Check for explicit TLB shootdown flushes. - */ - if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) - pVCpu->hwaccm.s.fForceTLBFlush = true; - - pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu; - pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes; - - if (pVCpu->hwaccm.s.fForceTLBFlush) - hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT); - else - { - /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should - * not be executed. See hwaccmQueueInvlPage() where it is commented - * out. Support individual entry flushing someday. */ - if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN)) - { - /* - * We cannot flush individual entries without VPID support. Flush using EPT. - */ - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown); - hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT); - } - } - pVCpu->hwaccm.s.TlbShootdown.cPages= 0; - VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN); - -#ifdef VBOX_WITH_STATISTICS - if (pVCpu->hwaccm.s.fForceTLBFlush) - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch); - else - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch); -#endif -} - - -/** - * Setup the tagged TLB for VPID. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - */ -static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu) -{ - PHMGLOBLCPUINFO pCpu; - - Assert(pVM->hwaccm.s.vmx.fVPID); - Assert(!pVM->hwaccm.s.fNestedPaging); - - pCpu = HWACCMR0GetCurrentCpu(); - - /* - * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last - * This can happen both for start & resume due to long jumps back to ring-3. - * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB - * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore. - */ - if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu - || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes) - { - /* Force a TLB flush on VM entry. */ - pVCpu->hwaccm.s.fForceTLBFlush = true; - } - - /* - * Check for explicit TLB shootdown flushes. - */ - if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) - pVCpu->hwaccm.s.fForceTLBFlush = true; - - pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu; - - if (pVCpu->hwaccm.s.fForceTLBFlush) - { - ++pCpu->uCurrentASID; - if (pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID) - { - pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */ - pCpu->cTLBFlushes++; - pCpu->fFlushASIDBeforeUse = true; - } - else - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID); - - pVCpu->hwaccm.s.fForceTLBFlush = false; - pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes; - pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID; - if (pCpu->fFlushASIDBeforeUse) - hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */); - } - else - { - AssertMsg(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID, - ("hwaccm->uCurrentASID=%lu hwaccm->cTLBFlushes=%lu cpu->uCurrentASID=%lu cpu->cTLBFlushes=%lu\n", - pVCpu->hwaccm.s.uCurrentASID, pVCpu->hwaccm.s.cTLBFlushes, - pCpu->uCurrentASID, pCpu->cTLBFlushes)); - - /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should - * not be executed. See hwaccmQueueInvlPage() where it is commented - * out. Support individual entry flushing someday. */ - if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN)) - { - /* - * Flush individual guest entries using VPID from the TLB or as little as possible with EPT - * as supported by the CPU. - */ - if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR) - { - for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++) - hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hwaccm.s.TlbShootdown.aPages[i]); - } - else - hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */); - } - } - pVCpu->hwaccm.s.TlbShootdown.cPages = 0; - VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN); - - AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, - ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes)); - AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, - ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID)); - AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, - ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID)); - - int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID); - AssertRC(rc); - -# ifdef VBOX_WITH_STATISTICS - if (pVCpu->hwaccm.s.fForceTLBFlush) - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch); - else - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch); -# endif -} - - -/** - * Runs guest code in a VT-x VM. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) -{ - STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x); - STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit1); - STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit2); - - VBOXSTRICTRC rc = VINF_SUCCESS; - int rc2; - RTGCUINTREG val; - RTGCUINTREG exitReason = (RTGCUINTREG)VMX_EXIT_INVALID; - RTGCUINTREG instrError, cbInstr; - RTGCUINTPTR exitQualification = 0; - RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */ - RTGCUINTPTR errCode, instrInfo; - bool fSetupTPRCaching = false; - uint64_t u64OldLSTAR = 0; - uint8_t u8LastTPR = 0; - RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0; - unsigned cResume = 0; -#ifdef VBOX_STRICT - RTCPUID idCpuCheck; - bool fWasInLongMode = false; -#endif -#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0 - uint64_t u64LastTime = RTTimeMilliTS(); -#endif - - Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) - || (pVCpu->hwaccm.s.vmx.pbVAPIC && pVM->hwaccm.s.vmx.pAPIC)); - - /* - * Check if we need to use TPR shadowing. - */ - if ( CPUMIsGuestInLongModeEx(pCtx) - || ( (( pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) - || pVM->hwaccm.s.fTRPPatchingAllowed) - && pVM->hwaccm.s.fHasIoApic) - ) - { - fSetupTPRCaching = true; - } - - Log2(("\nE")); - -#ifdef VBOX_STRICT - { - RTCCUINTREG val2; - - rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val2); - AssertRC(rc2); - Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val2)); - - /* allowed zero */ - if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) - Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n")); - - /* allowed one */ - if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0) - Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n")); - - rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val2); - AssertRC(rc2); - Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val2)); - - /* - * Must be set according to the MSR, but can be cleared if nested paging is used. - */ - if (pVM->hwaccm.s.fNestedPaging) - { - val2 |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT - | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT - | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT; - } - - /* allowed zero */ - if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) - Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n")); - - /* allowed one */ - if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0) - Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n")); - - rc2 = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val2); - AssertRC(rc2); - Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val2)); - - /* allowed zero */ - if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) - Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n")); - - /* allowed one */ - if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1) != 0) - Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n")); - - rc2 = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val2); - AssertRC(rc2); - Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val2)); - - /* allowed zero */ - if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) - Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n")); - - /* allowed one */ - if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1) != 0) - Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n")); - } - fWasInLongMode = CPUMIsGuestInLongModeEx(pCtx); -#endif /* VBOX_STRICT */ - -#ifdef VBOX_WITH_CRASHDUMP_MAGIC - pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeEntry = RTTimeNanoTS(); -#endif - - /* - * We can jump to this point to resume execution after determining that a VM-exit is innocent. - */ -ResumeExecution: - if (!STAM_REL_PROFILE_ADV_IS_RUNNING(&pVCpu->hwaccm.s.StatEntry)) - STAM_REL_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit2, &pVCpu->hwaccm.s.StatEntry, x); - AssertMsg(pVCpu->hwaccm.s.idEnteredCpu == RTMpCpuId(), - ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n", - (int)pVCpu->hwaccm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification)); - Assert(!HWACCMR0SuspendPending()); - /* Not allowed to switch modes without reloading the host state (32->64 switcher)!! */ - Assert(fWasInLongMode == CPUMIsGuestInLongModeEx(pCtx)); - - /* - * Safety precaution; looping for too long here can have a very bad effect on the host. - */ - if (RT_UNLIKELY(++cResume > pVM->hwaccm.s.cMaxResumeLoops)) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMaxResume); - rc = VINF_EM_RAW_INTERRUPT; - goto end; - } - - /* - * Check for IRQ inhibition due to instruction fusing (sti, mov ss). - */ - if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) - { - Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu))); - if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu)) - { - /* - * Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here. - * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might - * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could - * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think. - */ - VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); - /* Irq inhibition is no longer active; clear the corresponding VMX state. */ - rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0); - AssertRC(rc2); - } - } - else - { - /* Irq inhibition is no longer active; clear the corresponding VMX state. */ - rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0); - AssertRC(rc2); - } - -#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0 - if (RT_UNLIKELY((cResume & 0xf) == 0)) - { - uint64_t u64CurTime = RTTimeMilliTS(); - - if (RT_UNLIKELY(u64CurTime > u64LastTime)) - { - u64LastTime = u64CurTime; - TMTimerPollVoid(pVM, pVCpu); - } - } -#endif - - /* - * Check for pending actions that force us to go back to ring-3. - */ - if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA) - || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST)) - { - /* Check if a sync operation is pending. */ - if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)) - { - rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)); - if (rc != VINF_SUCCESS) - { - AssertRC(VBOXSTRICTRC_VAL(rc)); - Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc))); - goto end; - } - } - -#ifdef DEBUG - /* Intercept X86_XCPT_DB if stepping is enabled */ - if (!DBGFIsStepping(pVCpu)) -#endif - { - if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK) - || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK)) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3); - rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3; - goto end; - } - } - - /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */ - if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST) - || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST)) - { - rc = VINF_EM_PENDING_REQUEST; - goto end; - } - - /* Check if a pgm pool flush is in progress. */ - if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING)) - { - rc = VINF_PGM_POOL_FLUSH_PENDING; - goto end; - } - - /* Check if DMA work is pending (2nd+ run). */ - if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1) - { - rc = VINF_EM_RAW_TO_R3; - goto end; - } - } - -#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION - /* - * Exit to ring-3 preemption/work is pending. - * - * Interrupts are disabled before the call to make sure we don't miss any interrupt - * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this - * further down, but hmR0VmxCheckPendingInterrupt makes that impossible.) - * - * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB - * shootdowns rely on this. - */ - uOldEFlags = ASMIntDisableFlags(); - if (RTThreadPreemptIsPending(NIL_RTTHREAD)) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPreemptPending); - rc = VINF_EM_RAW_INTERRUPT; - goto end; - } - VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); -#endif - - /* - * When external interrupts are pending, we should exit the VM when IF is et. - * Note: *After* VM_FF_INHIBIT_INTERRUPTS check! - */ - rc = hmR0VmxCheckPendingInterrupt(pVM, pVCpu, pCtx); - if (RT_FAILURE(rc)) - goto end; - - /** @todo check timers?? */ - - /* - * TPR caching using CR8 is only available in 64-bit mode. - * Note: The 32-bit exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but this appears missing in Intel CPUs. - * Note: We can't do this in LoadGuestState() as PDMApicGetTPR can jump back to ring-3 (lock)!! (no longer true) . - */ - /** @todo query and update the TPR only when it could have been changed (mmio - * access & wrsmr (x2apic) */ - if (fSetupTPRCaching) - { - /* TPR caching in CR8 */ - bool fPending; - - rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending); - AssertRC(rc2); - /* The TPR can be found at offset 0x80 in the APIC mmio page. */ - pVCpu->hwaccm.s.vmx.pbVAPIC[0x80] = u8LastTPR; - - /* - * Two options here: - * - external interrupt pending, but masked by the TPR value. - * -> a CR8 update that lower the current TPR value should cause an exit - * - no pending interrupts - * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts. - */ - - /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */ - rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, (fPending) ? (u8LastTPR >> 4) : 0); - AssertRC(VBOXSTRICTRC_VAL(rc)); - - if (pVM->hwaccm.s.fTPRPatchingActive) - { - Assert(!CPUMIsGuestInLongModeEx(pCtx)); - /* Our patch code uses LSTAR for TPR caching. */ - pCtx->msrLSTAR = u8LastTPR; - - /** @todo r=ramshankar: we should check for MSR-bitmap support here. */ - if (fPending) - { - /* A TPR change could activate a pending interrupt, so catch lstar writes. */ - hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false); - } - else - { - /* - * No interrupts are pending, so we don't need to be explicitely notified. - * There are enough world switches for detecting pending interrupts. - */ - hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true); - } - } - } - -#ifdef LOG_ENABLED - if ( pVM->hwaccm.s.fNestedPaging - || pVM->hwaccm.s.vmx.fVPID) - { - PHMGLOBLCPUINFO pCpu = HWACCMR0GetCurrentCpu(); - if (pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu) - { - LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu, - pCpu->idCpu)); - } - else if (pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes) - { - LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes, - pCpu->cTLBFlushes)); - } - else if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH)) - LogFlow(("Manual TLB flush\n")); - } -#endif -#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 - PGMRZDynMapFlushAutoSet(pVCpu); -#endif - - /* - * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING-3! - * (until the actual world switch) - */ -#ifdef VBOX_STRICT - idCpuCheck = RTMpCpuId(); -#endif -#ifdef LOG_ENABLED - VMMR0LogFlushDisable(pVCpu); -#endif - - /* - * Save the host state first. - */ - if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT) - { - rc = VMXR0SaveHostState(pVM, pVCpu); - if (RT_UNLIKELY(rc != VINF_SUCCESS)) - { - VMMR0LogFlushEnable(pVCpu); - goto end; - } - } - - /* - * Load the guest state. - */ - if (!pVCpu->hwaccm.s.fContextUseFlags) - { - VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadMinimal); - } - else - { - rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx); - if (RT_UNLIKELY(rc != VINF_SUCCESS)) - { - VMMR0LogFlushEnable(pVCpu); - goto end; - } - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadFull); - } - -#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION - /* - * Disable interrupts to make sure a poke will interrupt execution. - * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this. - */ - uOldEFlags = ASMIntDisableFlags(); - VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); -#endif - - /* Non-register state Guest Context */ - /** @todo change me according to cpu state */ - rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE); - AssertRC(rc2); - - /* Set TLB flush state as checked until we return from the world switch. */ - ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, true); - /* Deal with tagged TLB setup and invalidation. */ - pVM->hwaccm.s.vmx.pfnSetupTaggedTLB(pVM, pVCpu); - - /* - * Manual save and restore: - * - General purpose registers except RIP, RSP - * - * Trashed: - * - CR2 (we don't care) - * - LDTR (reset to 0) - * - DRx (presumably not changed at all) - * - DR7 (reset to 0x400) - * - EFLAGS (reset to RT_BIT(1); not relevant) - */ - - /* All done! Let's start VM execution. */ - STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatEntry, &pVCpu->hwaccm.s.StatInGC, x); - Assert(idCpuCheck == RTMpCpuId()); - -#ifdef VBOX_WITH_CRASHDUMP_MAGIC - pVCpu->hwaccm.s.vmx.VMCSCache.cResume = cResume; - pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeSwitch = RTTimeNanoTS(); -#endif - - /* - * Save the current TPR value in the LSTAR MSR so our patches can access it. - */ - if (pVM->hwaccm.s.fTPRPatchingActive) - { - Assert(pVM->hwaccm.s.fTPRPatchingActive); - u64OldLSTAR = ASMRdMsr(MSR_K8_LSTAR); - ASMWrMsr(MSR_K8_LSTAR, u8LastTPR); - } - - TMNotifyStartOfExecution(pVCpu); - -#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE - /* - * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that - * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}. - */ - if ( (pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) - && !(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT)) - { - pVCpu->hwaccm.s.u64HostTSCAux = ASMRdMsr(MSR_K8_TSC_AUX); - uint64_t u64GuestTSCAux = 0; - rc2 = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &u64GuestTSCAux); - AssertRC(rc2); - ASMWrMsr(MSR_K8_TSC_AUX, u64GuestTSCAux); - } -#endif - -#ifdef VBOX_WITH_KERNEL_USING_XMM - rc = hwaccmR0VMXStartVMWrapXMM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hwaccm.s.vmx.pfnStartVM); -#else - rc = pVCpu->hwaccm.s.vmx.pfnStartVM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu); -#endif - ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, false); - ASMAtomicIncU32(&pVCpu->hwaccm.s.cWorldSwitchExits); - - /* Possibly the last TSC value seen by the guest (too high) (only when we're in TSC offset mode). */ - if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT)) - { -#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE - /* Restore host's TSC_AUX. */ - if (pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) - ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hwaccm.s.u64HostTSCAux); -#endif - - TMCpuTickSetLastSeen(pVCpu, - ASMReadTSC() + pVCpu->hwaccm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */); - } - - TMNotifyEndOfExecution(pVCpu); - VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED); - Assert(!(ASMGetFlags() & X86_EFL_IF)); - - /* - * Restore the host LSTAR MSR if the guest could have changed it. - */ - if (pVM->hwaccm.s.fTPRPatchingActive) - { - Assert(pVM->hwaccm.s.fTPRPatchingActive); - pVCpu->hwaccm.s.vmx.pbVAPIC[0x80] = pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR); - ASMWrMsr(MSR_K8_LSTAR, u64OldLSTAR); - } - - STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatInGC, &pVCpu->hwaccm.s.StatExit1, x); - ASMSetFlags(uOldEFlags); -#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION - uOldEFlags = ~(RTCCUINTREG)0; -#endif - - AssertMsg(!pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries=%d\n", - pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries)); - - /* In case we execute a goto ResumeExecution later on. */ - pVCpu->hwaccm.s.fResumeVM = true; - pVCpu->hwaccm.s.fForceTLBFlush = false; - - /* - * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE - * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - */ - - if (RT_UNLIKELY(rc != VINF_SUCCESS)) - { - hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx); - VMMR0LogFlushEnable(pVCpu); - goto end; - } - - /* Success. Query the guest state and figure out what has happened. */ - - /* Investigate why there was a VM-exit. */ - rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]); - - exitReason &= 0xffff; /* bit 0-15 contain the exit code. */ - rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError); - rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr); - rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo); - /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */ - rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode); - rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo); - rc2 |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification); - AssertRC(rc2); - - /* - * Sync back the guest state. - */ - rc2 = VMXR0SaveGuestState(pVM, pVCpu, pCtx); - AssertRC(rc2); - - /* Note! NOW IT'S SAFE FOR LOGGING! */ - VMMR0LogFlushEnable(pVCpu); - Log2(("Raw exit reason %08x\n", exitReason)); -#if ARCH_BITS == 64 /* for the time being */ - VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, pCtx, exitReason); -#endif - - /* - * Check if an injected event was interrupted prematurely. - */ - rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val); - AssertRC(rc2); - pVCpu->hwaccm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val); - if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo) - /* Ignore 'int xx' as they'll be restarted anyway. */ - && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW - /* Ignore software exceptions (such as int3) as they'll reoccur when we restart the instruction anyway. */ - && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT) - { - Assert(!pVCpu->hwaccm.s.Event.fPending); - pVCpu->hwaccm.s.Event.fPending = true; - /* Error code present? */ - if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo)) - { - rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val); - AssertRC(rc2); - pVCpu->hwaccm.s.Event.errCode = val; - Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n", - pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val)); - } - else - { - Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo, - (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification)); - pVCpu->hwaccm.s.Event.errCode = 0; - } - } -#ifdef VBOX_STRICT - else if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo) - /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */ - && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT) - { - Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", - pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification)); - } - - if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE) - HWACCMDumpRegs(pVM, pVCpu, pCtx); -#endif - - Log2(("E%d: New EIP=%x:%RGv\n", (uint32_t)exitReason, pCtx->cs.Sel, (RTGCPTR)pCtx->rip)); - Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification)); - Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr)); - Log2(("Interruption error code %d\n", (uint32_t)errCode)); - Log2(("IntInfo = %08x\n", (uint32_t)intInfo)); - - /* - * Sync back the TPR if it was changed. - */ - if ( fSetupTPRCaching - && u8LastTPR != pVCpu->hwaccm.s.vmx.pbVAPIC[0x80]) - { - rc2 = PDMApicSetTPR(pVCpu, pVCpu->hwaccm.s.vmx.pbVAPIC[0x80]); - AssertRC(rc2); - } - -#ifdef DBGFTRACE_ENABLED /** @todo DTrace later. */ - RTTraceBufAddMsgF(pVM->CTX_SUFF(hTraceBuf), "vmexit %08x %016RX64 at %04:%08RX64 %RX64", - exitReason, (uint64_t)exitQualification, pCtx->cs.Sel, pCtx->rip, (uint64_t)intInfo); -#endif - STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit1, &pVCpu->hwaccm.s.StatExit2, x); - - /* Some cases don't need a complete resync of the guest CPU state; handle them here. */ - Assert(rc == VINF_SUCCESS); /* might consider VERR_IPE_UNINITIALIZED_STATUS here later... */ - switch (exitReason) - { - case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */ - case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */ - { - uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo); - - if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo)) - { - Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ); -#if 0 //def VBOX_WITH_VMMR0_DISABLE_PREEMPTION - if ( RTThreadPreemptIsPendingTrusty() - && !RTThreadPreemptIsPending(NIL_RTTHREAD)) - goto ResumeExecution; -#endif - /* External interrupt; leave to allow it to be dispatched again. */ - rc = VINF_EM_RAW_INTERRUPT; - break; - } - STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo)) - { - case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */ - /* External interrupt; leave to allow it to be dispatched again. */ - rc = VINF_EM_RAW_INTERRUPT; - break; - - case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */ - AssertFailed(); /* can't come here; fails the first check. */ - break; - - case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT: /* Unknown why we get this type for #DB */ - case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */ - Assert(vector == 1 || vector == 3 || vector == 4); - /* no break */ - case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */ - Log2(("Hardware/software interrupt %d\n", vector)); - switch (vector) - { - case X86_XCPT_NM: - { - Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode)); - - /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */ - /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */ - rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx); - if (rc == VINF_SUCCESS) - { - Assert(CPUMIsGuestFPUStateActive(pVCpu)); - - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM); - - /* Continue execution. */ - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0; - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - goto ResumeExecution; - } - - Log(("Forward #NM fault to the guest\n")); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM); - rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), - cbInstr, 0); - AssertRC(rc2); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - goto ResumeExecution; - } - - case X86_XCPT_PF: /* Page fault */ - { -#ifdef VBOX_ALWAYS_TRAP_PF - if (pVM->hwaccm.s.fNestedPaging) - { - /* - * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution. - */ - Log(("Guest page fault at %RGv cr2=%RGv error code %RGv rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification, - errCode, (RTGCPTR)pCtx->rsp)); - - Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx)); - - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF); - - /* Now we must update CR2. */ - pCtx->cr2 = exitQualification; - rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), - cbInstr, errCode); - AssertRC(rc2); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - goto ResumeExecution; - } -#else - Assert(!pVM->hwaccm.s.fNestedPaging); -#endif - -#ifdef VBOX_HWACCM_WITH_GUEST_PATCHING - /* Shortcut for APIC TPR reads and writes; 32 bits guests only */ - if ( pVM->hwaccm.s.fTRPPatchingAllowed - && pVM->hwaccm.s.pGuestPatchMem - && (exitQualification & 0xfff) == 0x080 - && !(errCode & X86_TRAP_PF_P) /* not present */ - && CPUMGetGuestCPL(pVCpu) == 0 - && !CPUMIsGuestInLongModeEx(pCtx) - && pVM->hwaccm.s.cPatches < RT_ELEMENTS(pVM->hwaccm.s.aPatches)) - { - RTGCPHYS GCPhysApicBase, GCPhys; - PDMApicGetBase(pVM, &GCPhysApicBase); /** @todo cache this */ - GCPhysApicBase &= PAGE_BASE_GC_MASK; - - rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys); - if ( rc == VINF_SUCCESS - && GCPhys == GCPhysApicBase) - { - /* Only attempt to patch the instruction once. */ - PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip); - if (!pPatch) - { - rc = VINF_EM_HWACCM_PATCH_TPR_INSTR; - break; - } - } - } -#endif - - Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode)); - /* Exit qualification contains the linear address of the page fault. */ - TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP); - TRPMSetErrorCode(pVCpu, errCode); - TRPMSetFaultAddress(pVCpu, exitQualification); - - /* Shortcut for APIC TPR reads and writes. */ - if ( (exitQualification & 0xfff) == 0x080 - && !(errCode & X86_TRAP_PF_P) /* not present */ - && fSetupTPRCaching - && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)) - { - RTGCPHYS GCPhysApicBase, GCPhys; - PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */ - GCPhysApicBase &= PAGE_BASE_GC_MASK; - - rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys); - if ( rc == VINF_SUCCESS - && GCPhys == GCPhysApicBase) - { - Log(("Enable VT-x virtual APIC access filtering\n")); - rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P); - AssertRC(rc2); - } - } - - /* Forward it to our trap handler first, in case our shadow pages are out of sync. */ - rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification); - Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc))); - - if (rc == VINF_SUCCESS) - { /* We've successfully synced our shadow pages, so let's just continue execution. */ - Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF); - - TRPMResetTrap(pVCpu); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - goto ResumeExecution; - } - else if (rc == VINF_EM_RAW_GUEST_TRAP) - { - /* - * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution. - */ - Log2(("Forward page fault to the guest\n")); - - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF); - /* The error code might have been changed. */ - errCode = TRPMGetErrorCode(pVCpu); - - TRPMResetTrap(pVCpu); - - /* Now we must update CR2. */ - pCtx->cr2 = exitQualification; - rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), - cbInstr, errCode); - AssertRC(rc2); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - goto ResumeExecution; - } -#ifdef VBOX_STRICT - if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK) - Log2(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc))); -#endif - /* Need to go back to the recompiler to emulate the instruction. */ - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPFEM); - TRPMResetTrap(pVCpu); - break; - } - - case X86_XCPT_MF: /* Floating point exception. */ - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestMF); - if (!(pCtx->cr0 & X86_CR0_NE)) - { - /* old style FPU error reporting needs some extra work. */ - /** @todo don't fall back to the recompiler, but do it manually. */ - rc = VINF_EM_RAW_EMULATE_INSTR; - break; - } - Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip)); - rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), - cbInstr, errCode); - AssertRC(rc2); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - goto ResumeExecution; - } - - case X86_XCPT_DB: /* Debug exception. */ - { - uint64_t uDR6; - - /* - * DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet. - * - * Exit qualification bits: - * 3:0 B0-B3 which breakpoint condition was met - * 12:4 Reserved (0) - * 13 BD - debug register access detected - * 14 BS - single step execution or branch taken - * 63:15 Reserved (0) - */ - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDB); - - /* Note that we don't support guest and host-initiated debugging at the same time. */ - - uDR6 = X86_DR6_INIT_VAL; - uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS)); - rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), uDR6); - if (rc == VINF_EM_RAW_GUEST_TRAP) - { - /* Update DR6 here. */ - pCtx->dr[6] = uDR6; - - /* Resync DR6 if the debug state is active. */ - if (CPUMIsGuestDebugStateActive(pVCpu)) - ASMSetDR6(pCtx->dr[6]); - - /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */ - pCtx->dr[7] &= ~X86_DR7_GD; - - /* Paranoia. */ - pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */ - pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */ - pCtx->dr[7] |= 0x400; /* must be one */ - - /* Resync DR7 */ - rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]); - AssertRC(rc2); - - Log(("Trap %x (debug) at %RGv exit qualification %RX64 dr6=%x dr7=%x\n", vector, (RTGCPTR)pCtx->rip, - exitQualification, (uint32_t)pCtx->dr[6], (uint32_t)pCtx->dr[7])); - rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), - cbInstr, errCode); - AssertRC(rc2); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - goto ResumeExecution; - } - /* Return to ring 3 to deal with the debug exit code. */ - Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc))); - break; - } - - case X86_XCPT_BP: /* Breakpoint. */ - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestBP); - rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx)); - if (rc == VINF_EM_RAW_GUEST_TRAP) - { - Log(("Guest #BP at %04x:%RGv\n", pCtx->cs.Sel, pCtx->rip)); - rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), - cbInstr, errCode); - AssertRC(rc2); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - goto ResumeExecution; - } - if (rc == VINF_SUCCESS) - { - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - goto ResumeExecution; - } - Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc))); - break; - } - - case X86_XCPT_GP: /* General protection failure exception. */ - { - uint32_t cbOp; - PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState; - - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestGP); -#ifdef VBOX_STRICT - if ( !CPUMIsGuestInRealModeEx(pCtx) - || !pVM->hwaccm.s.vmx.pRealModeTSS) - { - Log(("Trap %x at %04X:%RGv errorCode=%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip, errCode)); - rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), - cbInstr, errCode); - AssertRC(rc2); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - goto ResumeExecution; - } -#endif - Assert(CPUMIsGuestInRealModeEx(pCtx)); - - LogFlow(("Real mode X86_XCPT_GP instruction emulation at %x:%RGv\n", pCtx->cs.Sel, (RTGCPTR)pCtx->rip)); - - rc2 = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp); - if (RT_SUCCESS(rc2)) - { - bool fUpdateRIP = true; - - rc = VINF_SUCCESS; - Assert(cbOp == pDis->cbInstr); - switch (pDis->pCurInstr->uOpcode) - { - case OP_CLI: - pCtx->eflags.Bits.u1IF = 0; - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCli); - break; - - case OP_STI: - pCtx->eflags.Bits.u1IF = 1; - EMSetInhibitInterruptsPC(pVCpu, pCtx->rip + pDis->cbInstr); - Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)); - rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, - VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI); - AssertRC(rc2); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitSti); - break; - - case OP_HLT: - fUpdateRIP = false; - rc = VINF_EM_HALT; - pCtx->rip += pDis->cbInstr; - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt); - break; - - case OP_POPF: - { - RTGCPTR GCPtrStack; - uint32_t cbParm; - uint32_t uMask; - X86EFLAGS eflags; - - if (pDis->fPrefix & DISPREFIX_OPSIZE) - { - cbParm = 4; - uMask = 0xffffffff; - } - else - { - cbParm = 2; - uMask = 0xffff; - } - - rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack); - if (RT_FAILURE(rc2)) - { - rc = VERR_EM_INTERPRETER; - break; - } - eflags.u = 0; - rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm); - if (RT_FAILURE(rc2)) - { - rc = VERR_EM_INTERPRETER; - break; - } - LogFlow(("POPF %x -> %RGv mask=%x\n", eflags.u, pCtx->rsp, uMask)); - pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask)) - | (eflags.u & X86_EFL_POPF_BITS & uMask); - /* RF cleared when popped in real mode; see pushf description in AMD manual. */ - pCtx->eflags.Bits.u1RF = 0; - pCtx->esp += cbParm; - pCtx->esp &= uMask; - - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPopf); - break; - } - - case OP_PUSHF: - { - RTGCPTR GCPtrStack; - uint32_t cbParm; - uint32_t uMask; - X86EFLAGS eflags; - - if (pDis->fPrefix & DISPREFIX_OPSIZE) - { - cbParm = 4; - uMask = 0xffffffff; - } - else - { - cbParm = 2; - uMask = 0xffff; - } - - rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0, - &GCPtrStack); - if (RT_FAILURE(rc2)) - { - rc = VERR_EM_INTERPRETER; - break; - } - eflags = pCtx->eflags; - /* RF & VM cleared when pushed in real mode; see pushf description in AMD manual. */ - eflags.Bits.u1RF = 0; - eflags.Bits.u1VM = 0; - - rc2 = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm); - if (RT_FAILURE(rc2)) - { - rc = VERR_EM_INTERPRETER; - break; - } - LogFlow(("PUSHF %x -> %RGv\n", eflags.u, GCPtrStack)); - pCtx->esp -= cbParm; - pCtx->esp &= uMask; - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPushf); - break; - } - - case OP_IRET: - { - RTGCPTR GCPtrStack; - uint32_t uMask = 0xffff; - uint16_t aIretFrame[3]; - - if (pDis->fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE)) - { - rc = VERR_EM_INTERPRETER; - break; - } - - rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack); - if (RT_FAILURE(rc2)) - { - rc = VERR_EM_INTERPRETER; - break; - } - rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame)); - if (RT_FAILURE(rc2)) - { - rc = VERR_EM_INTERPRETER; - break; - } - pCtx->ip = aIretFrame[0]; - pCtx->cs.Sel = aIretFrame[1]; - pCtx->cs.ValidSel = aIretFrame[1]; - pCtx->cs.u64Base = (uint32_t)pCtx->cs.Sel << 4; - pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask)) - | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask); - pCtx->sp += sizeof(aIretFrame); - - LogFlow(("iret to %04x:%x\n", pCtx->cs.Sel, pCtx->ip)); - fUpdateRIP = false; - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIret); - break; - } - - case OP_INT: - { - uint32_t intInfo2; - - LogFlow(("Realmode: INT %x\n", pDis->Param1.uValue & 0xff)); - intInfo2 = pDis->Param1.uValue & 0xff; - intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT); - intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); - - rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0); - AssertRC(VBOXSTRICTRC_VAL(rc)); - fUpdateRIP = false; - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt); - break; - } - - case OP_INTO: - { - if (pCtx->eflags.Bits.u1OF) - { - uint32_t intInfo2; - - LogFlow(("Realmode: INTO\n")); - intInfo2 = X86_XCPT_OF; - intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT); - intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); - - rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0); - AssertRC(VBOXSTRICTRC_VAL(rc)); - fUpdateRIP = false; - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt); - } - break; - } - - case OP_INT3: - { - uint32_t intInfo2; - - LogFlow(("Realmode: INT 3\n")); - intInfo2 = 3; - intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT); - intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); - - rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0); - AssertRC(VBOXSTRICTRC_VAL(rc)); - fUpdateRIP = false; - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt); - break; - } - - default: - rc = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, EMCODETYPE_SUPERVISOR); - fUpdateRIP = false; - break; - } - - if (rc == VINF_SUCCESS) - { - if (fUpdateRIP) - pCtx->rip += cbOp; /* Move on to the next instruction. */ - - /* - * LIDT, LGDT can end up here. In the future CRx changes as well. Just reload the - * whole context to be done with it. - */ - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL; - - /* Only resume if successful. */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - goto ResumeExecution; - } - } - else - rc = VERR_EM_INTERPRETER; - - AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, - ("Unexpected rc=%Rrc\n", VBOXSTRICTRC_VAL(rc))); - break; - } - -#ifdef VBOX_STRICT - case X86_XCPT_XF: /* SIMD exception. */ - case X86_XCPT_DE: /* Divide error. */ - case X86_XCPT_UD: /* Unknown opcode exception. */ - case X86_XCPT_SS: /* Stack segment exception. */ - case X86_XCPT_NP: /* Segment not present exception. */ - { - switch (vector) - { - case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE); break; - case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestUD); break; - case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestSS); break; - case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNP); break; - case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestXF); break; - } - - Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip)); - rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), - cbInstr, errCode); - AssertRC(rc2); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - goto ResumeExecution; - } -#endif - default: - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestXcpUnk); - if ( CPUMIsGuestInRealModeEx(pCtx) - && pVM->hwaccm.s.vmx.pRealModeTSS) - { - Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs.Sel, pCtx->eip, errCode)); - rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), - cbInstr, errCode); - AssertRC(VBOXSTRICTRC_VAL(rc)); /* Strict RC check below. */ - - /* Go back to ring-3 in case of a triple fault. */ - if ( vector == X86_XCPT_DF - && rc == VINF_EM_RESET) - { - break; - } - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - goto ResumeExecution; - } - AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector)); - rc = VERR_VMX_UNEXPECTED_EXCEPTION; - break; - } /* switch (vector) */ - - break; - - default: - rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE; - AssertMsgFailed(("Unexpected interruption code %x\n", intInfo)); - break; - } - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); - break; - } - - /* - * 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed - * by the configuration of the EPT paging structures. - */ - case VMX_EXIT_EPT_VIOLATION: - { - RTGCPHYS GCPhys; - - Assert(pVM->hwaccm.s.fNestedPaging); - - rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys); - AssertRC(rc2); - Assert(((exitQualification >> 7) & 3) != 2); - - /* Determine the kind of violation. */ - errCode = 0; - if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH) - errCode |= X86_TRAP_PF_ID; - - if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE) - errCode |= X86_TRAP_PF_RW; - - /* If the page is present, then it's a page level protection fault. */ - if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT) - errCode |= X86_TRAP_PF_P; - else - { - /* Shortcut for APIC TPR reads and writes. */ - if ( (GCPhys & 0xfff) == 0x080 - && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */ - && fSetupTPRCaching - && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)) - { - RTGCPHYS GCPhysApicBase; - PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */ - GCPhysApicBase &= PAGE_BASE_GC_MASK; - if (GCPhys == GCPhysApicBase + 0x80) - { - Log(("Enable VT-x virtual APIC access filtering\n")); - rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P); - AssertRC(rc2); - } - } - } - Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode)); - - /* GCPhys contains the guest physical address of the page fault. */ - TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP); - TRPMSetErrorCode(pVCpu, errCode); - TRPMSetFaultAddress(pVCpu, GCPhys); - - /* Handle the pagefault trap for the nested shadow table. */ - rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys); - - /* - * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment below, @bugref{6043}. - */ - if ( rc == VINF_SUCCESS - || rc == VERR_PAGE_TABLE_NOT_PRESENT - || rc == VERR_PAGE_NOT_PRESENT) - { - /* We've successfully synced our shadow pages, so let's just continue execution. */ - Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitReasonNPF); - - TRPMResetTrap(pVCpu); - goto ResumeExecution; - } - -#ifdef VBOX_STRICT - if (rc != VINF_EM_RAW_EMULATE_INSTR) - LogFlow(("PGMTrap0eHandlerNestedPaging at %RGv failed with %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc))); -#endif - /* Need to go back to the recompiler to emulate the instruction. */ - TRPMResetTrap(pVCpu); - break; - } - - case VMX_EXIT_EPT_MISCONFIG: - { - RTGCPHYS GCPhys; - - Assert(pVM->hwaccm.s.fNestedPaging); - - rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys); - AssertRC(rc2); - Log(("VMX_EXIT_EPT_MISCONFIG for %RGp\n", GCPhys)); - - /* Shortcut for APIC TPR reads and writes. */ - if ( (GCPhys & 0xfff) == 0x080 - && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */ - && fSetupTPRCaching - && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)) - { - RTGCPHYS GCPhysApicBase; - PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */ - GCPhysApicBase &= PAGE_BASE_GC_MASK; - if (GCPhys == GCPhysApicBase + 0x80) - { - Log(("Enable VT-x virtual APIC access filtering\n")); - rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P); - AssertRC(rc2); - } - } - - rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX); - - /* - * If we succeed, resume execution. - * Or, if fail in interpreting the instruction because we couldn't get the guest physical address - * of the page containing the instruction via the guest's page tables (we would invalidate the guest page - * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this - * weird case. See @bugref{6043}. - */ - if ( rc == VINF_SUCCESS - || rc == VERR_PAGE_TABLE_NOT_PRESENT - || rc == VERR_PAGE_NOT_PRESENT) - { - Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhys, (RTGCPTR)pCtx->rip)); - goto ResumeExecution; - } - - Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> %Rrc\n", GCPhys, (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc))); - break; - } - - case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */ - /* Clear VM-exit on IF=1 change. */ - LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip, - VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF)); - pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT; - rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc2); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIrqWindow); - goto ResumeExecution; /* we check for pending guest interrupts there */ - - case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */ - case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */ - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvd); - /* Skip instruction and continue directly. */ - pCtx->rip += cbInstr; - /* Continue execution.*/ - goto ResumeExecution; - - case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */ - { - Log2(("VMX: Cpuid %x\n", pCtx->eax)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCpuid); - rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx)); - if (rc == VINF_SUCCESS) - { - /* Update EIP and continue execution. */ - Assert(cbInstr == 2); - pCtx->rip += cbInstr; - goto ResumeExecution; - } - AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc))); - rc = VINF_EM_RAW_EMULATE_INSTR; - break; - } - - case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */ - { - Log2(("VMX: Rdpmc %x\n", pCtx->ecx)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdpmc); - rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx)); - if (rc == VINF_SUCCESS) - { - /* Update EIP and continue execution. */ - Assert(cbInstr == 2); - pCtx->rip += cbInstr; - goto ResumeExecution; - } - rc = VINF_EM_RAW_EMULATE_INSTR; - break; - } - - case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */ - { - Log2(("VMX: Rdtsc\n")); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc); - rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx)); - if (rc == VINF_SUCCESS) - { - /* Update EIP and continue execution. */ - Assert(cbInstr == 2); - pCtx->rip += cbInstr; - goto ResumeExecution; - } - rc = VINF_EM_RAW_EMULATE_INSTR; - break; - } - - case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */ - { - Log2(("VMX: Rdtscp\n")); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtscp); - rc = EMInterpretRdtscp(pVM, pVCpu, pCtx); - if (rc == VINF_SUCCESS) - { - /* Update EIP and continue execution. */ - Assert(cbInstr == 3); - pCtx->rip += cbInstr; - goto ResumeExecution; - } - rc = VINF_EM_RAW_EMULATE_INSTR; - break; - } - - case VMX_EXIT_INVLPG: /* 14 Guest software attempted to execute INVLPG. */ - { - Log2(("VMX: invlpg\n")); - Assert(!pVM->hwaccm.s.fNestedPaging); - - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvlpg); - rc = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), exitQualification); - if (rc == VINF_SUCCESS) - { - /* Update EIP and continue execution. */ - pCtx->rip += cbInstr; - goto ResumeExecution; - } - AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, VBOXSTRICTRC_VAL(rc))); - break; - } - - case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */ - { - Log2(("VMX: monitor\n")); - - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMonitor); - rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx)); - if (rc == VINF_SUCCESS) - { - /* Update EIP and continue execution. */ - pCtx->rip += cbInstr; - goto ResumeExecution; - } - AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc))); - break; - } - - case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */ - /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */ - if ( pVM->hwaccm.s.fTPRPatchingActive - && pCtx->ecx == MSR_K8_LSTAR) - { - Assert(!CPUMIsGuestInLongModeEx(pCtx)); - if ((pCtx->eax & 0xff) != u8LastTPR) - { - Log(("VMX: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff)); - - /* Our patch code uses LSTAR for TPR caching. */ - rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff); - AssertRC(rc2); - } - - /* Skip the instruction and continue. */ - pCtx->rip += cbInstr; /* wrmsr = [0F 30] */ - - /* Only resume if successful. */ - goto ResumeExecution; - } - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_MSR; - /* no break */ - case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */ - { - STAM_COUNTER_INC((exitReason == VMX_EXIT_RDMSR) ? &pVCpu->hwaccm.s.StatExitRdmsr : &pVCpu->hwaccm.s.StatExitWrmsr); - - /* - * Note: The Intel spec. claims there's an REX version of RDMSR that's slightly different, - * so we play safe by completely disassembling the instruction. - */ - Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr")); - rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0); - if (rc == VINF_SUCCESS) - { - /* EIP has been updated already. */ - /* Only resume if successful. */ - goto ResumeExecution; - } - AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", - (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", VBOXSTRICTRC_VAL(rc))); - break; - } - - case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */ - { - STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub2, y2); - - switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification)) - { - case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE: - { - Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]); - rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx), - VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification), - VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification)); - switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)) - { - case 0: - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0 | HWACCM_CHANGED_GUEST_CR3; - break; - case 2: - break; - case 3: - Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx)); - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3; - break; - case 4: - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4; - break; - case 8: - /* CR8 contains the APIC TPR */ - Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 - & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)); - break; - - default: - AssertFailed(); - break; - } - break; - } - - case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ: - { - Log2(("VMX: mov x, crx\n")); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]); - - Assert( !pVM->hwaccm.s.fNestedPaging - || !CPUMIsGuestInPagedProtectedModeEx(pCtx) - || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != DISCREG_CR3); - - /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */ - Assert( VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8 - || !(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)); - - rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx), - VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification), - VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)); - break; - } - - case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS: - { - Log2(("VMX: clts\n")); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCLTS); - rc = EMInterpretCLTS(pVM, pVCpu); - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0; - break; - } - - case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW: - { - Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification))); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitLMSW); - rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)); - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0; - break; - } - } - - /* Update EIP if no error occurred. */ - if (RT_SUCCESS(rc)) - pCtx->rip += cbInstr; - - if (rc == VINF_SUCCESS) - { - /* Only resume if successful. */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2); - goto ResumeExecution; - } - Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2); - break; - } - - case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */ - { - if ( !DBGFIsStepping(pVCpu) - && !CPUMIsHyperDebugStateActive(pVCpu)) - { - /* Disable DRx move intercepts. */ - pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT; - rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc2); - - /* Save the host and load the guest debug state. */ - rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */); - AssertRC(rc2); - -#ifdef LOG_ENABLED - if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE) - { - Log(("VMX_EXIT_DRX_MOVE: write DR%d genreg %d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), - VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification))); - } - else - Log(("VMX_EXIT_DRX_MOVE: read DR%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification))); -#endif - -#ifdef VBOX_WITH_STATISTICS - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch); - if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE) - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite); - else - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead); -#endif - - goto ResumeExecution; - } - - /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first - * time and restore DRx registers afterwards */ - if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE) - { - Log2(("VMX: mov DRx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), - VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification))); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite); - rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx), - VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), - VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)); - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG; - Log2(("DR7=%08x\n", pCtx->dr[7])); - } - else - { - Log2(("VMX: mov x, DRx\n")); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead); - rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx), - VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification), - VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification)); - } - /* Update EIP if no error occurred. */ - if (RT_SUCCESS(rc)) - pCtx->rip += cbInstr; - - if (rc == VINF_SUCCESS) - { - /* Only resume if successful. */ - goto ResumeExecution; - } - Assert(rc == VERR_EM_INTERPRETER); - break; - } - - /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */ - case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */ - { - STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub1, y1); - uint32_t uPort; - uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification); - bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT); - - /** @todo necessary to make the distinction? */ - if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX) - uPort = pCtx->edx & 0xffff; - else - uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */ - - if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4)) /* paranoia */ - { - rc = fIOWrite ? VINF_IOM_R3_IOPORT_WRITE : VINF_IOM_R3_IOPORT_READ; - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1); - break; - } - - uint32_t cbSize = g_aIOSize[uIOWidth]; - if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification)) - { - /* ins/outs */ - PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState; - - /* Disassemble manually to deal with segment prefixes. */ - /** @todo VMX_VMCS_EXIT_GUEST_LINEAR_ADDR contains the flat pointer operand of the instruction. */ - /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */ - rc2 = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL); - if (RT_SUCCESS(rc)) - { - if (fIOWrite) - { - Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite); - rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize); - } - else - { - Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize)); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead); - rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize); - } - } - else - rc = VINF_EM_RAW_EMULATE_INSTR; - } - else - { - /* Normal in/out */ - uint32_t uAndVal = g_aIOOpAnd[uIOWidth]; - - Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification)); - - if (fIOWrite) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite); - rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize); - if (rc == VINF_IOM_R3_IOPORT_WRITE) - HWACCMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize); - } - else - { - uint32_t u32Val = 0; - - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead); - rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize); - if (IOM_SUCCESS(rc)) - { - /* Write back to the EAX register. */ - pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal); - } - else - if (rc == VINF_IOM_R3_IOPORT_READ) - HWACCMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize); - } - } - - /* - * Handled the I/O return codes. - * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.) - */ - if (IOM_SUCCESS(rc)) - { - /* Update EIP and continue execution. */ - pCtx->rip += cbInstr; - if (RT_LIKELY(rc == VINF_SUCCESS)) - { - /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */ - if (pCtx->dr[7] & X86_DR7_ENABLED_MASK) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxIOCheck); - for (unsigned i = 0; i < 4; i++) - { - unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)]; - - if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen) - && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i))) - && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO)) - { - uint64_t uDR6; - - Assert(CPUMIsGuestDebugStateActive(pVCpu)); - - uDR6 = ASMGetDR6(); - - /* Clear all breakpoint status flags and set the one we just hit. */ - uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3); - uDR6 |= (uint64_t)RT_BIT(i); - - /* - * Note: AMD64 Architecture Programmer's Manual 13.1: - * Bits 15:13 of the DR6 register is never cleared by the processor and must - * be cleared by software after the contents have been read. - */ - ASMSetDR6(uDR6); - - /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */ - pCtx->dr[7] &= ~X86_DR7_GD; - - /* Paranoia. */ - pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */ - pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */ - pCtx->dr[7] |= 0x400; /* must be one */ - - /* Resync DR7 */ - rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]); - AssertRC(rc2); - - /* Construct inject info. */ - intInfo = X86_XCPT_DB; - intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT); - intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); - - Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip)); - rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), - 0 /* cbInstr */, 0 /* errCode */); - AssertRC(rc2); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1); - goto ResumeExecution; - } - } - } - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1); - goto ResumeExecution; - } - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1); - break; - } - -#ifdef VBOX_STRICT - if (rc == VINF_IOM_R3_IOPORT_READ) - Assert(!fIOWrite); - else if (rc == VINF_IOM_R3_IOPORT_WRITE) - Assert(fIOWrite); - else - { - AssertMsg( RT_FAILURE(rc) - || rc == VINF_EM_RAW_EMULATE_INSTR - || rc == VINF_EM_RAW_GUEST_TRAP - || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); - } -#endif - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1); - break; - } - - case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */ - LogFlow(("VMX_EXIT_TPR\n")); - /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */ - goto ResumeExecution; - - case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address - on the APIC-access page. */ - { - LogFlow(("VMX_EXIT_APIC_ACCESS\n")); - unsigned uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(exitQualification); - - switch (uAccessType) - { - case VMX_APIC_ACCESS_TYPE_LINEAR_READ: - case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE: - { - RTGCPHYS GCPhys; - PDMApicGetBase(pVM, &GCPhys); - GCPhys &= PAGE_BASE_GC_MASK; - GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(exitQualification); - - LogFlow(("Apic access at %RGp\n", GCPhys)); - rc = IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW, - CPUMCTX2CORE(pCtx), GCPhys); - if (rc == VINF_SUCCESS) - goto ResumeExecution; /* rip already updated */ - break; - } - - default: - rc = VINF_EM_RAW_EMULATE_INSTR; - break; - } - break; - } - - case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */ - if (!TMTimerPollBool(pVM, pVCpu)) - goto ResumeExecution; - rc = VINF_EM_RAW_TIMER_PENDING; - break; - - default: - /* The rest is handled after syncing the entire CPU state. */ - break; - } - - - /* - * Note: The guest state is not entirely synced back at this stage! - */ - - /* Investigate why there was a VM-exit. (part 2) */ - switch (exitReason) - { - case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */ - case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */ - case VMX_EXIT_EPT_VIOLATION: - case VMX_EXIT_EPT_MISCONFIG: /* 49 EPT misconfig is used by the PGM/MMIO optimizations. */ - case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */ - /* Already handled above. */ - break; - - case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */ - rc = VINF_EM_RESET; /* Triple fault equals a reset. */ - break; - - case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */ - case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */ - rc = VINF_EM_RAW_INTERRUPT; - AssertFailed(); /* Can't happen. Yet. */ - break; - - case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */ - case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */ - rc = VINF_EM_RAW_INTERRUPT; - AssertFailed(); /* Can't happen afaik. */ - break; - - case VMX_EXIT_TASK_SWITCH: /* 9 Task switch: too complicated to emulate, so fall back to the recompiler */ - Log(("VMX_EXIT_TASK_SWITCH: exit=%RX64\n", exitQualification)); - if ( (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(exitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT) - && pVCpu->hwaccm.s.Event.fPending) - { - /* Caused by an injected interrupt. */ - pVCpu->hwaccm.s.Event.fPending = false; - - Log(("VMX_EXIT_TASK_SWITCH: reassert trap %d\n", VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo))); - Assert(!VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo)); - rc2 = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo), TRPM_HARDWARE_INT); - AssertRC(rc2); - } - /* else Exceptions and software interrupts can just be restarted. */ - rc = VERR_EM_INTERPRETER; - break; - - case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */ - /* Check if external interrupts are pending; if so, don't switch back. */ - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt); - pCtx->rip++; /* skip hlt */ - if (EMShouldContinueAfterHalt(pVCpu, pCtx)) - goto ResumeExecution; - - rc = VINF_EM_HALT; - break; - - case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */ - Log2(("VMX: mwait\n")); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMwait); - rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx)); - if ( rc == VINF_EM_HALT - || rc == VINF_SUCCESS) - { - /* Update EIP and continue execution. */ - pCtx->rip += cbInstr; - - /* Check if external interrupts are pending; if so, don't switch back. */ - if ( rc == VINF_SUCCESS - || ( rc == VINF_EM_HALT - && EMShouldContinueAfterHalt(pVCpu, pCtx)) - ) - goto ResumeExecution; - } - AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc))); - break; - - case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */ - AssertFailed(); /* can't happen. */ - rc = VERR_EM_INTERPRETER; - break; - - case VMX_EXIT_MTF: /* 37 Exit due to Monitor Trap Flag. */ - LogFlow(("VMX_EXIT_MTF at %RGv\n", (RTGCPTR)pCtx->rip)); - pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG; - rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc2); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMTF); -#if 0 - DBGFDoneStepping(pVCpu); -#endif - rc = VINF_EM_DBG_STOP; - break; - - case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */ - case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */ - case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */ - case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */ - case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */ - case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */ - case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */ - case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */ - case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */ - case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */ - /** @todo inject #UD immediately */ - rc = VERR_EM_INTERPRETER; - break; - - case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */ - case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */ - case VMX_EXIT_INVLPG: /* 14 Guest software attempted to execute INVLPG. */ - case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */ - case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */ - case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */ - case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */ - case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */ - /* already handled above */ - AssertMsg( rc == VINF_PGM_CHANGE_MODE - || rc == VINF_EM_RAW_INTERRUPT - || rc == VERR_EM_INTERPRETER - || rc == VINF_EM_RAW_EMULATE_INSTR - || rc == VINF_PGM_SYNC_CR3 - || rc == VINF_IOM_R3_IOPORT_READ - || rc == VINF_IOM_R3_IOPORT_WRITE - || rc == VINF_EM_RAW_GUEST_TRAP - || rc == VINF_TRPM_XCPT_DISPATCHED - || rc == VINF_EM_RESCHEDULE_REM, - ("rc = %d\n", VBOXSTRICTRC_VAL(rc))); - break; - - case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */ - case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */ - case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */ - case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */ - case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */ - case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address - on the APIC-access page. */ - { - /* - * If we decided to emulate them here, then we must sync the MSRs that could have been changed (sysenter, FS/GS base) - */ - rc = VERR_EM_INTERPRETER; - break; - } - - case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */ - Assert(rc == VINF_EM_RAW_INTERRUPT); - break; - - case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */ - { -#ifdef VBOX_STRICT - RTCCUINTREG val2 = 0; - - Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n")); - - VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val2); - Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val2)); - - VMXReadVMCS(VMX_VMCS64_GUEST_CR0, &val2); - Log(("VMX_VMCS_GUEST_CR0 %RX64\n", (uint64_t)val2)); - - VMXReadVMCS(VMX_VMCS64_GUEST_CR3, &val2); - Log(("VMX_VMCS_GUEST_CR3 %RX64\n", (uint64_t)val2)); - - VMXReadVMCS(VMX_VMCS64_GUEST_CR4, &val2); - Log(("VMX_VMCS_GUEST_CR4 %RX64\n", (uint64_t)val2)); - - VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val2); - Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val2)); - - VMX_LOG_SELREG(CS, "CS", val2); - VMX_LOG_SELREG(DS, "DS", val2); - VMX_LOG_SELREG(ES, "ES", val2); - VMX_LOG_SELREG(FS, "FS", val2); - VMX_LOG_SELREG(GS, "GS", val2); - VMX_LOG_SELREG(SS, "SS", val2); - VMX_LOG_SELREG(TR, "TR", val2); - VMX_LOG_SELREG(LDTR, "LDTR", val2); - - VMXReadVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val2); - Log(("VMX_VMCS_GUEST_GDTR_BASE %RX64\n", (uint64_t)val2)); - VMXReadVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val2); - Log(("VMX_VMCS_GUEST_IDTR_BASE %RX64\n", (uint64_t)val2)); -#endif /* VBOX_STRICT */ - rc = VERR_VMX_INVALID_GUEST_STATE; - break; - } - - case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */ - case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */ - default: - rc = VERR_VMX_UNEXPECTED_EXIT_CODE; - AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */ - break; - - } - -end: - /* We now going back to ring-3, so clear the action flag. */ - VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3); - - /* - * Signal changes for the recompiler. - */ - CPUMSetChangedFlags(pVCpu, - CPUM_CHANGED_SYSENTER_MSR - | CPUM_CHANGED_LDTR - | CPUM_CHANGED_GDTR - | CPUM_CHANGED_IDTR - | CPUM_CHANGED_TR - | CPUM_CHANGED_HIDDEN_SEL_REGS); - - /* - * If we executed vmlaunch/vmresume and an external IRQ was pending, then we don't have to do a full sync the next time. - */ - if ( exitReason == VMX_EXIT_EXTERNAL_IRQ - && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo)) - { - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatPendingHostIrq); - /* On the next entry we'll only sync the host context. */ - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT; - } - else - { - /* On the next entry we'll sync everything. */ - /** @todo we can do better than this */ - /* Not in the VINF_PGM_CHANGE_MODE though! */ - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL; - } - - /* Translate into a less severe return code */ - if (rc == VERR_EM_INTERPRETER) - rc = VINF_EM_RAW_EMULATE_INSTR; - else if (rc == VERR_VMX_INVALID_VMCS_PTR) - { - /* Try to extract more information about what might have gone wrong here. */ - VMXGetActivateVMCS(&pVCpu->hwaccm.s.vmx.lasterror.u64VMCSPhys); - pVCpu->hwaccm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVCpu->hwaccm.s.vmx.pvVMCS; - pVCpu->hwaccm.s.vmx.lasterror.idEnteredCpu = pVCpu->hwaccm.s.idEnteredCpu; - pVCpu->hwaccm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId(); - } - - /* Just set the correct state here instead of trying to catch every goto above. */ - VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC); - -#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION - /* Restore interrupts if we exited after disabling them. */ - if (uOldEFlags != ~(RTCCUINTREG)0) - ASMSetFlags(uOldEFlags); -#endif - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, x); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); - Log2(("X")); - return VBOXSTRICTRC_TODO(rc); -} - - -/** - * Enters the VT-x session. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCpu Pointer to the CPU info struct. - */ -VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu) -{ - Assert(pVM->hwaccm.s.vmx.fSupported); - NOREF(pCpu); - - unsigned cr4 = ASMGetCR4(); - if (!(cr4 & X86_CR4_VMXE)) - { - AssertMsgFailed(("X86_CR4_VMXE should be set!\n")); - return VERR_VMX_X86_CR4_VMXE_CLEARED; - } - - /* Activate the VMCS. */ - int rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS); - if (RT_FAILURE(rc)) - return rc; - - pVCpu->hwaccm.s.fResumeVM = false; - return VINF_SUCCESS; -} - - -/** - * Leaves the VT-x session. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guests CPU context. - */ -VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) -{ - Assert(pVM->hwaccm.s.vmx.fSupported); - -#ifdef DEBUG - if (CPUMIsHyperDebugStateActive(pVCpu)) - { - CPUMR0LoadHostDebugState(pVM, pVCpu); - Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT); - } - else -#endif - - /* - * Save the guest debug state if necessary. - */ - if (CPUMIsGuestDebugStateActive(pVCpu)) - { - CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */); - - /* Enable DRx move intercepts again. */ - pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT; - int rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc); - - /* Resync the debug registers the next time. */ - pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG; - } - else - Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT); - - /* - * Clear VMCS, marking it inactive, clearing implementation-specific data and writing - * VMCS data back to memory. - */ - int rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS); - AssertRC(rc); - - return VINF_SUCCESS; -} - - -/** - * Flush the TLB using EPT. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param enmFlush Type of flush. - */ -static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush) -{ - uint64_t descriptor[2]; - - LogFlow(("hmR0VmxFlushEPT %d\n", enmFlush)); - Assert(pVM->hwaccm.s.fNestedPaging); - descriptor[0] = pVCpu->hwaccm.s.vmx.GCPhysEPTP; - descriptor[1] = 0; /* MBZ. Intel spec. 33.3 VMX Instructions */ - int rc = VMXR0InvEPT(enmFlush, &descriptor[0]); - AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %x %RGv failed with %d\n", enmFlush, pVCpu->hwaccm.s.vmx.GCPhysEPTP, rc)); -} - - -/** - * Flush the TLB using VPID. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a - * enmFlush). - * @param enmFlush Type of flush. - * @param GCPtr Virtual address of the page to flush (can be 0 depending - * on @a enmFlush). - */ -static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr) -{ - uint64_t descriptor[2]; - - Assert(pVM->hwaccm.s.vmx.fVPID); - if (enmFlush == VMX_FLUSH_VPID_ALL_CONTEXTS) - { - descriptor[0] = 0; - descriptor[1] = 0; - } - else - { - AssertPtr(pVCpu); - AssertMsg(pVCpu->hwaccm.s.uCurrentASID != 0, ("VMXR0InvVPID invalid ASID %lu\n", pVCpu->hwaccm.s.uCurrentASID)); - AssertMsg(pVCpu->hwaccm.s.uCurrentASID <= UINT16_MAX, ("VMXR0InvVPID invalid ASID %lu\n", pVCpu->hwaccm.s.uCurrentASID)); - descriptor[0] = pVCpu->hwaccm.s.uCurrentASID; - descriptor[1] = GCPtr; - } - int rc = VMXR0InvVPID(enmFlush, &descriptor[0]); NOREF(rc); - AssertMsg(rc == VINF_SUCCESS, - ("VMXR0InvVPID %x %x %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hwaccm.s.uCurrentASID : 0, GCPtr, rc)); -} - - -/** - * Invalidates a guest page by guest virtual address. Only relevant for - * EPT/VPID, otherwise there is nothing really to invalidate. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param GCVirt Guest virtual address of the page to invalidate. - */ -VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt) -{ - bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH); - - Log2(("VMXR0InvalidatePage %RGv\n", GCVirt)); - - if (!fFlushPending) - { - /* - * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case - * See @bugref{6043} and @bugref{6177} - * - * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VMENTRY in hmR0VmxSetupTLB*() as this - * function maybe called in a loop with individual addresses. - */ - if (pVM->hwaccm.s.vmx.fVPID) - { - /* If we can flush just this page do it, otherwise flush as little as possible. */ - if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR) - hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, GCVirt); - else - VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); - } - else if (pVM->hwaccm.s.fNestedPaging) - VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); - } - - return VINF_SUCCESS; -} - - -/** - * Invalidates a guest page by physical address. Only relevant for EPT/VPID, - * otherwise there is nothing really to invalidate. - * - * NOTE: Assumes the current instruction references this physical page though a virtual address!! - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param GCPhys Guest physical address of the page to invalidate. - */ -VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys) -{ - LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys)); - - /* - * We cannot flush a page by guest-physical address. invvpid takes only a linear address - * while invept only flushes by EPT not individual addresses. We update the force flag here - * and flush before VMENTRY in hmR0VmxSetupTLB*(). This function might be called in a loop. - */ - VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); - return VINF_SUCCESS; -} - - -/** - * Report world switch error and dump some useful debug info. - * - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param rc Return code. - * @param pCtx Pointer to the current guest CPU context (not updated). - */ -static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx) -{ - NOREF(pVM); - - switch (VBOXSTRICTRC_VAL(rc)) - { - case VERR_VMX_INVALID_VMXON_PTR: - AssertFailed(); - break; - - case VERR_VMX_UNABLE_TO_START_VM: - case VERR_VMX_UNABLE_TO_RESUME_VM: - { - int rc2; - RTCCUINTREG exitReason, instrError; - - rc2 = VMXReadVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason); - rc2 |= VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError); - AssertRC(rc2); - if (rc2 == VINF_SUCCESS) - { - Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason, - (uint32_t)instrError)); - Log(("Current stack %08x\n", &rc2)); - - pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError; - pVCpu->hwaccm.s.vmx.lasterror.ulExitReason = exitReason; - -#ifdef VBOX_STRICT - RTGDTR gdtr; - PCX86DESCHC pDesc; - RTCCUINTREG val; - - ASMGetGDTR(&gdtr); - - VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val); - Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val)); - VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val); - Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val)); - VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val); - Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val)); - VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val); - Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val)); - VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val); - Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val)); - - VMXReadVMCS(VMX_VMCS_HOST_CR0, &val); - Log(("VMX_VMCS_HOST_CR0 %08x\n", val)); - VMXReadVMCS(VMX_VMCS_HOST_CR3, &val); - Log(("VMX_VMCS_HOST_CR3 %08x\n", val)); - VMXReadVMCS(VMX_VMCS_HOST_CR4, &val); - Log(("VMX_VMCS_HOST_CR4 %08x\n", val)); - - VMXReadVMCS(VMX_VMCS16_HOST_FIELD_CS, &val); - Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val)); - VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val); - Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val)); - - if (val < gdtr.cbGdt) - { - pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK)); - HWACCMR0DumpDescriptor(pDesc, val, "CS: "); - } - - VMXReadVMCS(VMX_VMCS16_HOST_FIELD_DS, &val); - Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val)); - if (val < gdtr.cbGdt) - { - pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK)); - HWACCMR0DumpDescriptor(pDesc, val, "DS: "); - } - - VMXReadVMCS(VMX_VMCS16_HOST_FIELD_ES, &val); - Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val)); - if (val < gdtr.cbGdt) - { - pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK)); - HWACCMR0DumpDescriptor(pDesc, val, "ES: "); - } - - VMXReadVMCS(VMX_VMCS16_HOST_FIELD_FS, &val); - Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val)); - if (val < gdtr.cbGdt) - { - pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK)); - HWACCMR0DumpDescriptor(pDesc, val, "FS: "); - } - - VMXReadVMCS(VMX_VMCS16_HOST_FIELD_GS, &val); - Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val)); - if (val < gdtr.cbGdt) - { - pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK)); - HWACCMR0DumpDescriptor(pDesc, val, "GS: "); - } - - VMXReadVMCS(VMX_VMCS16_HOST_FIELD_SS, &val); - Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val)); - if (val < gdtr.cbGdt) - { - pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK)); - HWACCMR0DumpDescriptor(pDesc, val, "SS: "); - } - - VMXReadVMCS(VMX_VMCS16_HOST_FIELD_TR, &val); - Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val)); - if (val < gdtr.cbGdt) - { - pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK)); - HWACCMR0DumpDescriptor(pDesc, val, "TR: "); - } - - VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val); - Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val)); - VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val); - Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val)); - VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val); - Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val)); - VMXReadVMCS(VMX_VMCS32_HOST_SYSENTER_CS, &val); - Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val)); - VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val); - Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val)); - VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val); - Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val)); - VMXReadVMCS(VMX_VMCS_HOST_RSP, &val); - Log(("VMX_VMCS_HOST_RSP %RHv\n", val)); - VMXReadVMCS(VMX_VMCS_HOST_RIP, &val); - Log(("VMX_VMCS_HOST_RIP %RHv\n", val)); -# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) - if (VMX_IS_64BIT_HOST_MODE()) - { - Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER))); - Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR))); - Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR))); - Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR))); - Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK))); - Log(("MSR_K8_KERNEL_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE))); - } -# endif -#endif /* VBOX_STRICT */ - } - break; - } - - default: - /* impossible */ - AssertMsgFailed(("%Rrc (%#x)\n", VBOXSTRICTRC_VAL(rc), VBOXSTRICTRC_VAL(rc))); - break; - } -} - - -#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) -/** - * Prepares for and executes VMLAUNCH (64 bits guest mode). - * - * @returns VBox status code. - * @param fResume Whether to vmlauch/vmresume. - * @param pCtx Pointer to the guest CPU context. - * @param pCache Pointer to the VMCS cache. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - */ -DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu) -{ - uint32_t aParam[6]; - PHMGLOBLCPUINFO pCpu; - RTHCPHYS HCPhysCpuPage; - int rc; - - pCpu = HWACCMR0GetCurrentCpu(); - HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0); - -#ifdef VBOX_WITH_CRASHDUMP_MAGIC - pCache->uPos = 1; - pCache->interPD = PGMGetInterPaeCR3(pVM); - pCache->pSwitcher = (uint64_t)pVM->hwaccm.s.pfnHost32ToGuest64R0; -#endif - -#ifdef DEBUG - pCache->TestIn.HCPhysCpuPage= 0; - pCache->TestIn.HCPhysVMCS = 0; - pCache->TestIn.pCache = 0; - pCache->TestOut.HCPhysVMCS = 0; - pCache->TestOut.pCache = 0; - pCache->TestOut.pCtx = 0; - pCache->TestOut.eflags = 0; -#endif - - aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */ - aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */ - aParam[2] = (uint32_t)(pVCpu->hwaccm.s.vmx.HCPhysVMCS); /* Param 2: VMCS physical address - Lo. */ - aParam[3] = (uint32_t)(pVCpu->hwaccm.s.vmx.HCPhysVMCS >> 32); /* Param 2: VMCS physical address - Hi. */ - aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache); - aParam[5] = 0; - -#ifdef VBOX_WITH_CRASHDUMP_MAGIC - pCtx->dr[4] = pVM->hwaccm.s.vmx.pScratchPhys + 16 + 8; - *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 1; -#endif - rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnVMXGCStartVM64, 6, &aParam[0]); - -#ifdef VBOX_WITH_CRASHDUMP_MAGIC - Assert(*(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) == 5); - Assert(pCtx->dr[4] == 10); - *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 0xff; -#endif - -#ifdef DEBUG - AssertMsg(pCache->TestIn.HCPhysCpuPage== HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage)); - AssertMsg(pCache->TestIn.HCPhysVMCS == pVCpu->hwaccm.s.vmx.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS, - pVCpu->hwaccm.s.vmx.HCPhysVMCS)); - AssertMsg(pCache->TestIn.HCPhysVMCS == pCache->TestOut.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS, - pCache->TestOut.HCPhysVMCS)); - AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache, - pCache->TestOut.pCache)); - AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache), - ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache))); - AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx, - pCache->TestOut.pCtx)); - Assert(!(pCache->TestOut.eflags & X86_EFL_IF)); -#endif - return rc; -} - - -# ifdef VBOX_STRICT -static bool hmR0VmxIsValidReadField(uint32_t idxField) -{ - switch (idxField) - { - case VMX_VMCS64_GUEST_RIP: - case VMX_VMCS64_GUEST_RSP: - case VMX_VMCS_GUEST_RFLAGS: - case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE: - case VMX_VMCS_CTRL_CR0_READ_SHADOW: - case VMX_VMCS64_GUEST_CR0: - case VMX_VMCS_CTRL_CR4_READ_SHADOW: - case VMX_VMCS64_GUEST_CR4: - case VMX_VMCS64_GUEST_DR7: - case VMX_VMCS32_GUEST_SYSENTER_CS: - case VMX_VMCS64_GUEST_SYSENTER_EIP: - case VMX_VMCS64_GUEST_SYSENTER_ESP: - case VMX_VMCS32_GUEST_GDTR_LIMIT: - case VMX_VMCS64_GUEST_GDTR_BASE: - case VMX_VMCS32_GUEST_IDTR_LIMIT: - case VMX_VMCS64_GUEST_IDTR_BASE: - case VMX_VMCS16_GUEST_FIELD_CS: - case VMX_VMCS32_GUEST_CS_LIMIT: - case VMX_VMCS64_GUEST_CS_BASE: - case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS: - case VMX_VMCS16_GUEST_FIELD_DS: - case VMX_VMCS32_GUEST_DS_LIMIT: - case VMX_VMCS64_GUEST_DS_BASE: - case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS: - case VMX_VMCS16_GUEST_FIELD_ES: - case VMX_VMCS32_GUEST_ES_LIMIT: - case VMX_VMCS64_GUEST_ES_BASE: - case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS: - case VMX_VMCS16_GUEST_FIELD_FS: - case VMX_VMCS32_GUEST_FS_LIMIT: - case VMX_VMCS64_GUEST_FS_BASE: - case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS: - case VMX_VMCS16_GUEST_FIELD_GS: - case VMX_VMCS32_GUEST_GS_LIMIT: - case VMX_VMCS64_GUEST_GS_BASE: - case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS: - case VMX_VMCS16_GUEST_FIELD_SS: - case VMX_VMCS32_GUEST_SS_LIMIT: - case VMX_VMCS64_GUEST_SS_BASE: - case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS: - case VMX_VMCS16_GUEST_FIELD_LDTR: - case VMX_VMCS32_GUEST_LDTR_LIMIT: - case VMX_VMCS64_GUEST_LDTR_BASE: - case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS: - case VMX_VMCS16_GUEST_FIELD_TR: - case VMX_VMCS32_GUEST_TR_LIMIT: - case VMX_VMCS64_GUEST_TR_BASE: - case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS: - case VMX_VMCS32_RO_EXIT_REASON: - case VMX_VMCS32_RO_VM_INSTR_ERROR: - case VMX_VMCS32_RO_EXIT_INSTR_LENGTH: - case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE: - case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO: - case VMX_VMCS32_RO_EXIT_INSTR_INFO: - case VMX_VMCS_RO_EXIT_QUALIFICATION: - case VMX_VMCS32_RO_IDT_INFO: - case VMX_VMCS32_RO_IDT_ERRCODE: - case VMX_VMCS64_GUEST_CR3: - case VMX_VMCS_EXIT_PHYS_ADDR_FULL: - return true; - } - return false; -} - - -static bool hmR0VmxIsValidWriteField(uint32_t idxField) -{ - switch (idxField) - { - case VMX_VMCS64_GUEST_LDTR_BASE: - case VMX_VMCS64_GUEST_TR_BASE: - case VMX_VMCS64_GUEST_GDTR_BASE: - case VMX_VMCS64_GUEST_IDTR_BASE: - case VMX_VMCS64_GUEST_SYSENTER_EIP: - case VMX_VMCS64_GUEST_SYSENTER_ESP: - case VMX_VMCS64_GUEST_CR0: - case VMX_VMCS64_GUEST_CR4: - case VMX_VMCS64_GUEST_CR3: - case VMX_VMCS64_GUEST_DR7: - case VMX_VMCS64_GUEST_RIP: - case VMX_VMCS64_GUEST_RSP: - case VMX_VMCS64_GUEST_CS_BASE: - case VMX_VMCS64_GUEST_DS_BASE: - case VMX_VMCS64_GUEST_ES_BASE: - case VMX_VMCS64_GUEST_FS_BASE: - case VMX_VMCS64_GUEST_GS_BASE: - case VMX_VMCS64_GUEST_SS_BASE: - return true; - } - return false; -} -# endif /* VBOX_STRICT */ - - -/** - * Executes the specified handler in 64-bit mode. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - * @param pfnHandler Pointer to the RC handler function. - * @param cbParam Number of parameters. - * @param paParam Array of 32-bit parameters. - */ -VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam, - uint32_t *paParam) -{ - int rc, rc2; - PHMGLOBLCPUINFO pCpu; - RTHCPHYS HCPhysCpuPage; - RTHCUINTREG uOldEFlags; - - AssertReturn(pVM->hwaccm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER); - Assert(pfnHandler); - Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField)); - Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField)); - -#ifdef VBOX_STRICT - for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries;i++) - Assert(hmR0VmxIsValidWriteField(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField[i])); - - for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries;i++) - Assert(hmR0VmxIsValidReadField(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField[i])); -#endif - - /* Disable interrupts. */ - uOldEFlags = ASMIntDisableFlags(); - -#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI - RTCPUID idHostCpu = RTMpCpuId(); - CPUMR0SetLApic(pVM, idHostCpu); -#endif - - pCpu = HWACCMR0GetCurrentCpu(); - HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0); - - /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */ - VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS); - - /* Leave VMX Root Mode. */ - VMXDisable(); - - ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE); - - CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu)); - CPUMSetHyperEIP(pVCpu, pfnHandler); - for (int i=(int)cbParam-1;i>=0;i--) - CPUMPushHyper(pVCpu, paParam[i]); - - STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatWorldSwitch3264, z); - - /* Call switcher. */ - rc = pVM->hwaccm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum)); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatWorldSwitch3264, z); - - /* Make sure the VMX instructions don't cause #UD faults. */ - ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE); - - /* Enter VMX Root Mode */ - rc2 = VMXEnable(HCPhysCpuPage); - if (RT_FAILURE(rc2)) - { - ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE); - ASMSetFlags(uOldEFlags); - return VERR_VMX_VMXON_FAILED; - } - - rc2 = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS); - AssertRC(rc2); - Assert(!(ASMGetFlags() & X86_EFL_IF)); - ASMSetFlags(uOldEFlags); - return rc; -} -#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */ - - -#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) -/** - * Executes VMWRITE. - * - * @returns VBox status code - * @param pVCpu Pointer to the VMCPU. - * @param idxField VMCS field index. - * @param u64Val 16, 32 or 64 bits value. - */ -VMMR0DECL(int) VMXWriteVMCS64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val) -{ - int rc; - switch (idxField) - { - case VMX_VMCS_CTRL_TSC_OFFSET_FULL: - case VMX_VMCS_CTRL_IO_BITMAP_A_FULL: - case VMX_VMCS_CTRL_IO_BITMAP_B_FULL: - case VMX_VMCS_CTRL_MSR_BITMAP_FULL: - case VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL: - case VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL: - case VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL: - case VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL: - case VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL: - case VMX_VMCS_GUEST_LINK_PTR_FULL: - case VMX_VMCS_GUEST_PDPTR0_FULL: - case VMX_VMCS_GUEST_PDPTR1_FULL: - case VMX_VMCS_GUEST_PDPTR2_FULL: - case VMX_VMCS_GUEST_PDPTR3_FULL: - case VMX_VMCS_GUEST_DEBUGCTL_FULL: - case VMX_VMCS_GUEST_EFER_FULL: - case VMX_VMCS_CTRL_EPTP_FULL: - /* These fields consist of two parts, which are both writable in 32 bits mode. */ - rc = VMXWriteVMCS32(idxField, u64Val); - rc |= VMXWriteVMCS32(idxField + 1, (uint32_t)(u64Val >> 32ULL)); - AssertRC(rc); - return rc; - - case VMX_VMCS64_GUEST_LDTR_BASE: - case VMX_VMCS64_GUEST_TR_BASE: - case VMX_VMCS64_GUEST_GDTR_BASE: - case VMX_VMCS64_GUEST_IDTR_BASE: - case VMX_VMCS64_GUEST_SYSENTER_EIP: - case VMX_VMCS64_GUEST_SYSENTER_ESP: - case VMX_VMCS64_GUEST_CR0: - case VMX_VMCS64_GUEST_CR4: - case VMX_VMCS64_GUEST_CR3: - case VMX_VMCS64_GUEST_DR7: - case VMX_VMCS64_GUEST_RIP: - case VMX_VMCS64_GUEST_RSP: - case VMX_VMCS64_GUEST_CS_BASE: - case VMX_VMCS64_GUEST_DS_BASE: - case VMX_VMCS64_GUEST_ES_BASE: - case VMX_VMCS64_GUEST_FS_BASE: - case VMX_VMCS64_GUEST_GS_BASE: - case VMX_VMCS64_GUEST_SS_BASE: - /* Queue a 64 bits value as we can't set it in 32 bits host mode. */ - if (u64Val >> 32ULL) - rc = VMXWriteCachedVMCSEx(pVCpu, idxField, u64Val); - else - rc = VMXWriteVMCS32(idxField, (uint32_t)u64Val); - - return rc; - - default: - AssertMsgFailed(("Unexpected field %x\n", idxField)); - return VERR_INVALID_PARAMETER; - } -} - - -/** - * Cache VMCS writes for performance reasons (Darwin) and for running 64 bits guests on 32 bits hosts. - * - * @param pVCpu Pointer to the VMCPU. - * @param idxField VMCS field index. - * @param u64Val 16, 32 or 64 bits value. - */ -VMMR0DECL(int) VMXWriteCachedVMCSEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val) -{ - PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache; - - AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1, - ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED); - - /* Make sure there are no duplicates. */ - for (unsigned i = 0; i < pCache->Write.cValidEntries; i++) - { - if (pCache->Write.aField[i] == idxField) - { - pCache->Write.aFieldVal[i] = u64Val; - return VINF_SUCCESS; - } - } - - pCache->Write.aField[pCache->Write.cValidEntries] = idxField; - pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val; - pCache->Write.cValidEntries++; - return VINF_SUCCESS; -} - -#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */ - diff --git a/src/VBox/VMM/VMMR0/HWVMXR0.h b/src/VBox/VMM/VMMR0/HWVMXR0.h deleted file mode 100644 index e81cf1d0..00000000 --- a/src/VBox/VMM/VMMR0/HWVMXR0.h +++ /dev/null @@ -1,378 +0,0 @@ -/* $Id: HWVMXR0.h $ */ -/** @file - * HM VMX (VT-x) - Internal header file. - */ - -/* - * Copyright (C) 2006-2012 Oracle Corporation - * - * This file is part of VirtualBox Open Source Edition (OSE), as - * available from http://www.virtualbox.org. This file is free software; - * you can redistribute it and/or modify it under the terms of the GNU - * General Public License (GPL) as published by the Free Software - * Foundation, in version 2 as it comes in the "COPYING" file of the - * VirtualBox OSE distribution. VirtualBox OSE is distributed in the - * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. - */ - -#ifndef ___HWVMXR0_h -#define ___HWVMXR0_h - -#include <VBox/cdefs.h> -#include <VBox/types.h> -#include <VBox/vmm/em.h> -#include <VBox/vmm/stam.h> -#include <VBox/dis.h> -#include <VBox/vmm/hwaccm.h> -#include <VBox/vmm/pgm.h> -#include <VBox/vmm/hwacc_vmx.h> - -RT_C_DECLS_BEGIN - -/** @defgroup grp_vmx_int Internal - * @ingroup grp_vmx - * @internal - * @{ - */ - -/* Read cache indices. */ -#define VMX_VMCS64_GUEST_RIP_CACHE_IDX 0 -#define VMX_VMCS64_GUEST_RSP_CACHE_IDX 1 -#define VMX_VMCS_GUEST_RFLAGS_CACHE_IDX 2 -#define VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE_CACHE_IDX 3 -#define VMX_VMCS_CTRL_CR0_READ_SHADOW_CACHE_IDX 4 -#define VMX_VMCS64_GUEST_CR0_CACHE_IDX 5 -#define VMX_VMCS_CTRL_CR4_READ_SHADOW_CACHE_IDX 6 -#define VMX_VMCS64_GUEST_CR4_CACHE_IDX 7 -#define VMX_VMCS64_GUEST_DR7_CACHE_IDX 8 -#define VMX_VMCS32_GUEST_SYSENTER_CS_CACHE_IDX 9 -#define VMX_VMCS64_GUEST_SYSENTER_EIP_CACHE_IDX 10 -#define VMX_VMCS64_GUEST_SYSENTER_ESP_CACHE_IDX 11 -#define VMX_VMCS32_GUEST_GDTR_LIMIT_CACHE_IDX 12 -#define VMX_VMCS64_GUEST_GDTR_BASE_CACHE_IDX 13 -#define VMX_VMCS32_GUEST_IDTR_LIMIT_CACHE_IDX 14 -#define VMX_VMCS64_GUEST_IDTR_BASE_CACHE_IDX 15 -#define VMX_VMCS16_GUEST_FIELD_CS_CACHE_IDX 16 -#define VMX_VMCS32_GUEST_CS_LIMIT_CACHE_IDX 17 -#define VMX_VMCS64_GUEST_CS_BASE_CACHE_IDX 18 -#define VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS_CACHE_IDX 19 -#define VMX_VMCS16_GUEST_FIELD_DS_CACHE_IDX 20 -#define VMX_VMCS32_GUEST_DS_LIMIT_CACHE_IDX 21 -#define VMX_VMCS64_GUEST_DS_BASE_CACHE_IDX 22 -#define VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS_CACHE_IDX 23 -#define VMX_VMCS16_GUEST_FIELD_ES_CACHE_IDX 24 -#define VMX_VMCS32_GUEST_ES_LIMIT_CACHE_IDX 25 -#define VMX_VMCS64_GUEST_ES_BASE_CACHE_IDX 26 -#define VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS_CACHE_IDX 27 -#define VMX_VMCS16_GUEST_FIELD_FS_CACHE_IDX 28 -#define VMX_VMCS32_GUEST_FS_LIMIT_CACHE_IDX 29 -#define VMX_VMCS64_GUEST_FS_BASE_CACHE_IDX 30 -#define VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS_CACHE_IDX 31 -#define VMX_VMCS16_GUEST_FIELD_GS_CACHE_IDX 32 -#define VMX_VMCS32_GUEST_GS_LIMIT_CACHE_IDX 33 -#define VMX_VMCS64_GUEST_GS_BASE_CACHE_IDX 34 -#define VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS_CACHE_IDX 35 -#define VMX_VMCS16_GUEST_FIELD_SS_CACHE_IDX 36 -#define VMX_VMCS32_GUEST_SS_LIMIT_CACHE_IDX 37 -#define VMX_VMCS64_GUEST_SS_BASE_CACHE_IDX 38 -#define VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS_CACHE_IDX 39 -#define VMX_VMCS16_GUEST_FIELD_TR_CACHE_IDX 40 -#define VMX_VMCS32_GUEST_TR_LIMIT_CACHE_IDX 41 -#define VMX_VMCS64_GUEST_TR_BASE_CACHE_IDX 42 -#define VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS_CACHE_IDX 43 -#define VMX_VMCS16_GUEST_FIELD_LDTR_CACHE_IDX 44 -#define VMX_VMCS32_GUEST_LDTR_LIMIT_CACHE_IDX 45 -#define VMX_VMCS64_GUEST_LDTR_BASE_CACHE_IDX 46 -#define VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS_CACHE_IDX 47 -#define VMX_VMCS32_RO_EXIT_REASON_CACHE_IDX 48 -#define VMX_VMCS32_RO_VM_INSTR_ERROR_CACHE_IDX 49 -#define VMX_VMCS32_RO_EXIT_INSTR_LENGTH_CACHE_IDX 50 -#define VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE_CACHE_IDX 51 -#define VMX_VMCS32_RO_EXIT_INSTR_INFO_CACHE_IDX 52 -#define VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO_CACHE_IDX 53 -#define VMX_VMCS_RO_EXIT_QUALIFICATION_CACHE_IDX 54 -#define VMX_VMCS32_RO_IDT_INFO_CACHE_IDX 55 -#define VMX_VMCS32_RO_IDT_ERRCODE_CACHE_IDX 56 -#define VMX_VMCS_MAX_CACHE_IDX (VMX_VMCS32_RO_IDT_ERRCODE_CACHE_IDX+1) -#define VMX_VMCS64_GUEST_CR3_CACHE_IDX 57 -#define VMX_VMCS_EXIT_PHYS_ADDR_FULL_CACHE_IDX 58 -#define VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX (VMX_VMCS_EXIT_PHYS_ADDR_FULL_CACHE_IDX+1) - - -#ifdef IN_RING0 - -/** - * Enters the VT-x session. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VM CPU. - * @param pCpu Pointer to the CPU info struct. - */ -VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu); - -/** - * Leaves the VT-x session. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx); - -VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvPageCpu, RTHCPHYS pPageCpuPhys, bool fEnabledBySystem); - -/** - * Deactivates VT-x on the current CPU. - * - * @returns VBox status code. - * @param pCpu Pointer to the CPU info struct. - * @param pvPageCpu Pointer to the global CPU page. - * @param pPageCpuPhys Physical address of the global CPU page. - */ -VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvPageCpu, RTHCPHYS pPageCpuPhys); - -/** - * Does Ring-0 per VM VT-x initialization. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - */ -VMMR0DECL(int) VMXR0InitVM(PVM pVM); - -/** - * Does Ring-0 per VM VT-x termination. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - */ -VMMR0DECL(int) VMXR0TermVM(PVM pVM); - -/** - * Sets up VT-x for the specified VM. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - */ -VMMR0DECL(int) VMXR0SetupVM(PVM pVM); - - -/** - * Save the host state. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - */ -VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu); - -/** - * Loads the guest state. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx); - - -/** - * Runs guest code in a VT-x VM. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - */ -VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx); - - -# if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) -/** - * Executes the specified handler in 64-bit mode. - * - * @returns VBox status code. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - * @param pCtx Pointer to the guest CPU context. - * @param pfnHandler Pointer to the RC handler function. - * @param cbParam Number of parameters. - * @param paParam Array of 32-bit parameters. - */ -VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam, - uint32_t *paParam); -# endif - -# define VMX_WRITE_SELREG(REG, reg) \ - do \ - { \ - rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_##REG, pCtx->reg.Sel); \ - rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_##REG##_LIMIT, pCtx->reg.u32Limit); \ - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_##REG##_BASE, pCtx->reg.u64Base); \ - if ((pCtx->eflags.u32 & X86_EFL_VM)) \ - { \ - /* Must override this or else VT-x will fail with invalid guest state errors. */ \ - /* DPL=3, present, code/data, r/w/accessed. */ \ - val = (pCtx->reg.Attr.u & ~0xFF) | 0xF3; \ - } \ - else \ - if ( CPUMIsGuestInRealModeEx(pCtx) \ - && !pVM->hwaccm.s.vmx.fUnrestrictedGuest) \ - { \ - /* Must override this or else VT-x will fail with invalid guest state errors. */ \ - /* DPL=3, present, code/data, r/w/accessed. */ \ - val = 0xf3; \ - } \ - else \ - if ( ( pCtx->reg.Sel \ - || !CPUMIsGuestInPagedProtectedModeEx(pCtx) \ - || (!pCtx->cs.Attr.n.u1DefBig && !CPUMIsGuestIn64BitCodeEx(pCtx)) \ - ) \ - && pCtx->reg.Attr.n.u1Present == 1) \ - { \ - val = pCtx->reg.Attr.u | X86_SEL_TYPE_ACCESSED; \ - } \ - else \ - val = 0x10000; /* Invalid guest state error otherwise. (BIT(16) = Unusable) */ \ - \ - rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_##REG##_ACCESS_RIGHTS, val); \ - } while (0) - -# define VMX_READ_SELREG(REG, reg) \ - do \ - { \ - VMXReadCachedVMCS(VMX_VMCS16_GUEST_FIELD_##REG, &val); \ - pCtx->reg.Sel = val; \ - pCtx->reg.ValidSel = val; \ - pCtx->reg.fFlags = CPUMSELREG_FLAGS_VALID; \ - VMXReadCachedVMCS(VMX_VMCS32_GUEST_##REG##_LIMIT, &val); \ - pCtx->reg.u32Limit = val; \ - VMXReadCachedVMCS(VMX_VMCS64_GUEST_##REG##_BASE, &val); \ - pCtx->reg.u64Base = val; \ - VMXReadCachedVMCS(VMX_VMCS32_GUEST_##REG##_ACCESS_RIGHTS, &val); \ - pCtx->reg.Attr.u = val; \ - } while (0) - -/* Don't read from the cache in this macro; used only in case of failure where the cache is out of sync. */ -# define VMX_LOG_SELREG(REG, szSelReg, val) \ - do \ - { \ - VMXReadVMCS(VMX_VMCS16_GUEST_FIELD_##REG, &(val)); \ - Log(("%s Selector %x\n", szSelReg, (val))); \ - VMXReadVMCS(VMX_VMCS32_GUEST_##REG##_LIMIT, &(val)); \ - Log(("%s Limit %x\n", szSelReg, (val))); \ - VMXReadVMCS(VMX_VMCS64_GUEST_##REG##_BASE, &(val)); \ - Log(("%s Base %RX64\n", szSelReg, (uint64_t)(val))); \ - VMXReadVMCS(VMX_VMCS32_GUEST_##REG##_ACCESS_RIGHTS, &(val)); \ - Log(("%s Attributes %x\n", szSelReg, (val))); \ - } while (0) - -/** - * Cache VMCS writes for performance reasons (Darwin) and for running 64 bits - * guests on 32-bit hosts. - * - * @param pVCpu Pointer to the VMCPU. - * @param idxField VMCS field index. - * @param u64Val 16, 32 or 64 bits value. - */ -VMMR0DECL(int) VMXWriteCachedVMCSEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val); - -#ifdef VMX_USE_CACHED_VMCS_ACCESSES -/** - * Return value of cached VMCS read for performance reasons (Darwin) and for running 64 bits guests on 32 bits hosts. - * - * @param pVCpu Pointer to the VMCPU. - * @param idxField VMCS cache index (not VMCS field index!) - * @param pVal 16, 32 or 64 bits value. - */ -DECLINLINE(int) VMXReadCachedVMCSEx(PVMCPU pVCpu, uint32_t idxCache, RTGCUINTREG *pVal) -{ - Assert(idxCache <= VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX); - *pVal = pVCpu->hwaccm.s.vmx.VMCSCache.Read.aFieldVal[idxCache]; - return VINF_SUCCESS; -} -#endif - -/** - * Return value of cached VMCS read for performance reasons (Darwin) and for - * running 64 bits guests on 32-bit hosts. - * - * @param idxField VMCS field index. - * @param pVal Value pointer (out). - */ -#ifdef VMX_USE_CACHED_VMCS_ACCESSES -# define VMXReadCachedVMCS(idxField, pVal) VMXReadCachedVMCSEx(pVCpu, idxField##_CACHE_IDX, pVal) -#else -# define VMXReadCachedVMCS(idxField, pVal) VMXReadVMCS(idxField, pVal) -#endif - -/** - * Setup cached VMCS for performance reasons (Darwin) and for running 64-bit - * guests on 32-bit hosts. - * - * @param pCache The cache. - * @param idxField VMCS field index. - */ -#define VMXSetupCachedReadVMCS(pCache, idxField) \ -{ \ - Assert(pCache->Read.aField[idxField##_CACHE_IDX] == 0); \ - pCache->Read.aField[idxField##_CACHE_IDX] = idxField; \ - pCache->Read.aFieldVal[idxField##_CACHE_IDX] = 0; \ -} - -#define VMX_SETUP_SELREG(REG, pCache) \ -{ \ - VMXSetupCachedReadVMCS(pCache, VMX_VMCS16_GUEST_FIELD_##REG); \ - VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_##REG##_LIMIT); \ - VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_##REG##_BASE); \ - VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_##REG##_ACCESS_RIGHTS); \ -} - -/** - * Prepares for and executes VMLAUNCH (32-bit guest mode). - * - * @returns VBox status code. - * @param fResume Whether to vmlauch/vmresume. - * @param pCtx Pointer to the guest CPU context. - * @param pCache Pointer to the VMCS cache. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - */ -DECLASM(int) VMXR0StartVM32(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu); - -/** - * Prepares for and executes VMLAUNCH (64-bit guest mode). - * - * @returns VBox status code. - * @param fResume Whether to vmlauch/vmresume. - * @param pCtx Pointer to the guest CPU context. - * @param pCache Pointer to the VMCS cache. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - */ -DECLASM(int) VMXR0StartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu); - -# if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) -/** - * Prepares for and executes VMLAUNCH (64-bit guest mode). - * - * @returns VBox status code - * @param fResume Whether to vmlauch/vmresume. - * @param pCtx Pointer to the guest CPU context. - * @param pCache Pointer to the VMCS cache. - * @param pVM Pointer to the VM. - * @param pVCpu Pointer to the VMCPU. - */ -DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu); -# endif - -#endif /* IN_RING0 */ - -/** @} */ - -RT_C_DECLS_END - -#endif /* ___HWVMXR0_h */ - diff --git a/src/VBox/VMM/VMMR0/PDMNetShaperR0.cpp b/src/VBox/VMM/VMMR0/PDMNetShaperR0.cpp deleted file mode 100644 index e6da8ac3..00000000 --- a/src/VBox/VMM/VMMR0/PDMNetShaperR0.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* $Id: PDMNetShaperR0.cpp $ */ -/** @file - * PDM Network Shaper - Limit network traffic according to bandwidth - * group settings [R0 part]. - */ - -/* - * Copyright (C) 2011-2012 Oracle Corporation - * - * This file is part of VirtualBox Open Source Edition (OSE), as - * available from http://www.virtualbox.org. This file is free software; - * you can redistribute it and/or modify it under the terms of the GNU - * General Public License (GPL) as published by the Free Software - * Foundation, in version 2 as it comes in the "COPYING" file of the - * VirtualBox OSE distribution. VirtualBox OSE is distributed in the - * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. - */ - - -/******************************************************************************* -* Header Files * -*******************************************************************************/ -#define LOG_GROUP LOG_GROUP_NET_SHAPER - -#include <VBox/vmm/pdm.h> -#include <VBox/log.h> -#include <iprt/time.h> - -#include <VBox/vmm/pdmnetshaper.h> -#include <VBox/vmm/pdmnetshaperint.h> - - -/** - * Obtain bandwidth in a bandwidth group (R0 version). - * - * @returns VBox status code. - * @param pFilter Pointer to the filter that allocates bandwidth. - * @param cbTransfer Number of bytes to allocate. - */ -VMMR0DECL(bool) PDMR0NsAllocateBandwidth(PPDMNSFILTER pFilter, size_t cbTransfer) -{ - return pdmNsAllocateBandwidth(pFilter, cbTransfer); -} diff --git a/src/VBox/VMM/VMMR0/PDMR0Device.cpp b/src/VBox/VMM/VMMR0/PDMR0Device.cpp index 47455809..94abdda0 100644 --- a/src/VBox/VMM/VMMR0/PDMR0Device.cpp +++ b/src/VBox/VMM/VMMR0/PDMR0Device.cpp @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2006-2011 Oracle Corporation + * Copyright (C) 2006-2013 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -27,7 +27,7 @@ #include <VBox/vmm/vm.h> #include <VBox/vmm/vmm.h> #include <VBox/vmm/patm.h> -#include <VBox/vmm/hwaccm.h> +#include <VBox/vmm/hm.h> #include <VBox/log.h> #include <VBox/err.h> @@ -56,13 +56,6 @@ RT_C_DECLS_END /******************************************************************************* -* Prototypes * -*******************************************************************************/ -static DECLCALLBACK(int) pdmR0DevHlp_PhysRead(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, void *pvBuf, size_t cbRead); -static DECLCALLBACK(int) pdmR0DevHlp_PhysWrite(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, const void *pvBuf, size_t cbWrite); - - -/******************************************************************************* * Internal Functions * *******************************************************************************/ static bool pdmR0IsaSetIrq(PVM pVM, int iIrq, int iLevel, uint32_t uTagSrc); @@ -77,22 +70,23 @@ static bool pdmR0IsaSetIrq(PVM pVM, int iIrq, int iLevel, uint32_t uTagSrc); static DECLCALLBACK(int) pdmR0DevHlp_PCIPhysRead(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, void *pvBuf, size_t cbRead) { PDMDEV_ASSERT_DEVINS(pDevIns); - LogFlow(("pdmR0DevHlp_PCIPhysRead: caller=%p/%d: GCPhys=%RGp pvBuf=%p cbRead=%#x\n", - pDevIns, pDevIns->iInstance, GCPhys, pvBuf, cbRead)); - PCIDevice *pPciDev = pDevIns->Internal.s.pPciDeviceR0; - AssertPtrReturn(pPciDev, VERR_INVALID_POINTER); +#ifndef PDM_DO_NOT_RESPECT_PCI_BM_BIT + /* + * Just check the busmaster setting here and forward the request to the generic read helper. + */ + PPCIDEVICE pPciDev = pDevIns->Internal.s.pPciDeviceR0; + AssertReleaseMsg(pPciDev, ("No PCI device registered!\n")); if (!PCIDevIsBusmaster(pPciDev)) { -#ifdef DEBUG - LogFlow(("%s: %RU16:%RU16: No bus master (anymore), skipping read %p (%z)\n", __FUNCTION__, - PCIDevGetVendorId(pPciDev), PCIDevGetDeviceId(pPciDev), pvBuf, cbRead)); -#endif - return VINF_PDM_PCI_PHYS_READ_BM_DISABLED; + Log(("pdmRCDevHlp_PCIPhysRead: caller=%p/%d: returns %Rrc - Not bus master! GCPhys=%RGp cbRead=%#zx\n", + pDevIns, pDevIns->iInstance, VERR_PDM_NOT_PCI_BUS_MASTER, GCPhys, cbRead)); + return VERR_PDM_NOT_PCI_BUS_MASTER; } +#endif - return pdmR0DevHlp_PhysRead(pDevIns, GCPhys, pvBuf, cbRead); + return pDevIns->pHlpR0->pfnPhysRead(pDevIns, GCPhys, pvBuf, cbRead); } @@ -100,22 +94,23 @@ static DECLCALLBACK(int) pdmR0DevHlp_PCIPhysRead(PPDMDEVINS pDevIns, RTGCPHYS GC static DECLCALLBACK(int) pdmR0DevHlp_PCIPhysWrite(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, const void *pvBuf, size_t cbWrite) { PDMDEV_ASSERT_DEVINS(pDevIns); - LogFlow(("pdmR0DevHlp_PCIPhysWrite: caller=%p/%d: GCPhys=%RGp pvBuf=%p cbWrite=%#x\n", - pDevIns, pDevIns->iInstance, GCPhys, pvBuf, cbWrite)); - PCIDevice *pPciDev = pDevIns->Internal.s.pPciDeviceR0; - AssertPtrReturn(pPciDev, VERR_INVALID_POINTER); +#ifndef PDM_DO_NOT_RESPECT_PCI_BM_BIT + /* + * Just check the busmaster setting here and forward the request to the generic read helper. + */ + PPCIDEVICE pPciDev = pDevIns->Internal.s.pPciDeviceR0; + AssertReleaseMsg(pPciDev, ("No PCI device registered!\n")); if (!PCIDevIsBusmaster(pPciDev)) { -#ifdef DEBUG - LogFlow(("%s: %RU16:%RU16: No bus master (anymore), skipping write %p (%z)\n", __FUNCTION__, - PCIDevGetVendorId(pPciDev), PCIDevGetDeviceId(pPciDev), pvBuf, cbWrite)); -#endif - return VINF_PDM_PCI_PHYS_WRITE_BM_DISABLED; + Log(("pdmRCDevHlp_PCIPhysWrite: caller=%p/%d: returns %Rrc - Not bus master! GCPhys=%RGp cbWrite=%#zx\n", + pDevIns, pDevIns->iInstance, VERR_PDM_NOT_PCI_BUS_MASTER, GCPhys, cbWrite)); + return VERR_PDM_NOT_PCI_BUS_MASTER; } +#endif - return pdmR0DevHlp_PhysWrite(pDevIns, GCPhys, pvBuf, cbWrite); + return pDevIns->pHlpR0->pfnPhysWrite(pDevIns, GCPhys, pvBuf, cbWrite); } @@ -373,7 +368,7 @@ static DECLCALLBACK(bool) pdmR0DevHlp_CanEmulateIoBlock(PPDMDEVINS pDevIns) { PDMDEV_ASSERT_DEVINS(pDevIns); LogFlow(("pdmR0DevHlp_GetVM: caller='%p'/%d\n", pDevIns, pDevIns->iInstance)); - return HWACCMCanEmulateIoBlock(VMMGetCpu(pDevIns->Internal.s.pVMR0)); + return HMCanEmulateIoBlock(VMMGetCpu(pDevIns->Internal.s.pVMR0)); } @@ -433,7 +428,7 @@ static DECLCALLBACK(void) pdmR0PicHlp_SetInterruptFF(PPDMDEVINS pDevIns) PVMCPU pVCpu = &pVM->aCpus[0]; /* for PIC we always deliver to CPU 0, MP use APIC */ LogFlow(("pdmR0PicHlp_SetInterruptFF: caller=%p/%d: VMCPU_FF_INTERRUPT_PIC %d -> 1\n", - pDevIns, pDevIns->iInstance, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INTERRUPT_PIC))); + pDevIns, pDevIns->iInstance, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_PIC))); VMCPU_FF_SET(pVCpu, VMCPU_FF_INTERRUPT_PIC); } @@ -458,7 +453,7 @@ static DECLCALLBACK(void) pdmR0PicHlp_ClearInterruptFF(PPDMDEVINS pDevIns) PVMCPU pVCpu = &pVM->aCpus[0]; /* for PIC we always deliver to CPU 0, MP use APIC */ LogFlow(("pdmR0PicHlp_ClearInterruptFF: caller=%p/%d: VMCPU_FF_INTERRUPT_PIC %d -> 0\n", - pDevIns, pDevIns->iInstance, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INTERRUPT_PIC))); + pDevIns, pDevIns->iInstance, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_PIC))); VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_PIC); } @@ -512,7 +507,7 @@ static DECLCALLBACK(void) pdmR0ApicHlp_SetInterruptFF(PPDMDEVINS pDevIns, PDMAPI AssertReturnVoid(idCpu < pVM->cCpus); LogFlow(("pdmR0ApicHlp_SetInterruptFF: CPU%d=caller=%p/%d: VM_FF_INTERRUPT %d -> 1 (CPU%d)\n", - VMMGetCpuId(pVM), pDevIns, pDevIns->iInstance, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INTERRUPT_APIC), idCpu)); + VMMGetCpuId(pVM), pDevIns, pDevIns->iInstance, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC), idCpu)); switch (enmType) { @@ -533,7 +528,7 @@ static DECLCALLBACK(void) pdmR0ApicHlp_SetInterruptFF(PPDMDEVINS pDevIns, PDMAPI break; } - /* We need to wait up the target CPU. */ + /* We need to wake up the target CPU. */ if (VMMGetCpuId(pVM) != idCpu) { switch (VMCPU_GET_STATE(pVCpu)) @@ -563,7 +558,7 @@ static DECLCALLBACK(void) pdmR0ApicHlp_ClearInterruptFF(PPDMDEVINS pDevIns, PDMA AssertReturnVoid(idCpu < pVM->cCpus); LogFlow(("pdmR0ApicHlp_ClearInterruptFF: caller=%p/%d: VM_FF_INTERRUPT %d -> 0\n", - pDevIns, pDevIns->iInstance, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INTERRUPT_APIC))); + pDevIns, pDevIns->iInstance, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC))); /* Note: NMI/SMI can't be cleared. */ switch (enmType) diff --git a/src/VBox/VMM/VMMR0/PDMR0Driver.cpp b/src/VBox/VMM/VMMR0/PDMR0Driver.cpp index 90cf31e7..fb9781fc 100644 --- a/src/VBox/VMM/VMMR0/PDMR0Driver.cpp +++ b/src/VBox/VMM/VMMR0/PDMR0Driver.cpp @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2010 Oracle Corporation + * Copyright (C) 2010-2012 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; diff --git a/src/VBox/VMM/VMMR0/PGMR0.cpp b/src/VBox/VMM/VMMR0/PGMR0.cpp index 371aec25..69ef6c75 100644 --- a/src/VBox/VMM/VMMR0/PGMR0.cpp +++ b/src/VBox/VMM/VMMR0/PGMR0.cpp @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2007-2011 Oracle Corporation + * Copyright (C) 2007-2012 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; diff --git a/src/VBox/VMM/VMMR0/PGMR0Bth.h b/src/VBox/VMM/VMMR0/PGMR0Bth.h index 6c0bb8de..ca4a5a89 100644 --- a/src/VBox/VMM/VMMR0/PGMR0Bth.h +++ b/src/VBox/VMM/VMMR0/PGMR0Bth.h @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2006-2007 Oracle Corporation + * Copyright (C) 2006-2010 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; diff --git a/src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp b/src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp index 8bc97f53..f70c0638 100644 --- a/src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp +++ b/src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp @@ -107,7 +107,7 @@ VMMR0DECL(int) PGMR0SharedModuleCheck(PVM pVM, PGVM pGVM, VMCPUID idCpu, PGMMSHA bool fFlush = false; rc = pgmPoolTrackUpdateGCPhys(pVM, PageDesc.GCPhys, pPage, true /* clear the entries */, &fFlush); Assert( rc == VINF_SUCCESS - || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3) + || ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3) && (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))); if (rc == VINF_SUCCESS) fFlushTLBs |= fFlush; diff --git a/src/VBox/VMM/VMMR0/TRPMR0A.asm b/src/VBox/VMM/VMMR0/TRPMR0A.asm index 64da5a37..55adbccc 100644 --- a/src/VBox/VMM/VMMR0/TRPMR0A.asm +++ b/src/VBox/VMM/VMMR0/TRPMR0A.asm @@ -4,7 +4,7 @@ ; ; -; Copyright (C) 2006-2007 Oracle Corporation +; Copyright (C) 2006-2011 Oracle Corporation ; ; This file is part of VirtualBox Open Source Edition (OSE), as ; available from http://www.virtualbox.org. This file is free software; diff --git a/src/VBox/VMM/VMMR0/VMMR0.cpp b/src/VBox/VMM/VMMR0/VMMR0.cpp index d8f32add..95641b3d 100644 --- a/src/VBox/VMM/VMMR0/VMMR0.cpp +++ b/src/VBox/VMM/VMMR0/VMMR0.cpp @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2006-2011 Oracle Corporation + * Copyright (C) 2006-2012 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -36,7 +36,7 @@ #include <VBox/vmm/gvmm.h> #include <VBox/vmm/gmm.h> #include <VBox/intnet.h> -#include <VBox/vmm/hwaccm.h> +#include <VBox/vmm/hm.h> #include <VBox/param.h> #include <VBox/err.h> #include <VBox/version.h> @@ -115,8 +115,18 @@ DECLEXPORT(int) ModuleInit(void *hMod) #endif LogFlow(("ModuleInit:\n")); +#ifdef VBOX_WITH_64ON32_CMOS_DEBUG /* - * Initialize the VMM, GVMM, GMM, HWACCM, PGM (Darwin) and INTNET. + * Display the CMOS debug code. + */ + ASMOutU8(0x72, 0x03); + uint8_t bDebugCode = ASMInU8(0x73); + LogRel(("CMOS Debug Code: %#x (%d)\n", bDebugCode, bDebugCode)); + RTLogComPrintf("CMOS Debug Code: %#x (%d)\n", bDebugCode, bDebugCode); +#endif + + /* + * Initialize the VMM, GVMM, GMM, HM, PGM (Darwin) and INTNET. */ int rc = vmmInitFormatTypes(); if (RT_SUCCESS(rc)) @@ -127,7 +137,7 @@ DECLEXPORT(int) ModuleInit(void *hMod) rc = GMMR0Init(); if (RT_SUCCESS(rc)) { - rc = HWACCMR0Init(); + rc = HMR0Init(); if (RT_SUCCESS(rc)) { rc = PGMRegisterStringFormatTypes(); @@ -187,10 +197,10 @@ DECLEXPORT(int) ModuleInit(void *hMod) } else LogRel(("ModuleInit: PGMRegisterStringFormatTypes -> %Rrc\n", rc)); - HWACCMR0Term(); + HMR0Term(); } else - LogRel(("ModuleInit: HWACCMR0Init -> %Rrc\n", rc)); + LogRel(("ModuleInit: HMR0Init -> %Rrc\n", rc)); GMMR0Term(); } else @@ -230,7 +240,7 @@ DECLEXPORT(void) ModuleTerm(void *hMod) IntNetR0Term(); /* - * PGM (Darwin), HWACCM and PciRaw global cleanup. + * PGM (Darwin), HM and PciRaw global cleanup. */ #ifdef VBOX_WITH_2X_4GB_ADDR_SPACE PGMR0DynMapTerm(); @@ -239,7 +249,7 @@ DECLEXPORT(void) ModuleTerm(void *hMod) PciRawR0Term(); #endif PGMDeregisterStringFormatTypes(); - HWACCMR0Term(); + HMR0Term(); #ifdef VBOX_WITH_TRIPLE_FAULT_HACK vmmR0TripleFaultHackTerm(); #endif @@ -263,12 +273,13 @@ DECLEXPORT(void) ModuleTerm(void *hMod) * * @param pVM Pointer to the VM. * @param uSvnRev The SVN revision of the ring-3 part. + * @param uBuildType Build type indicator. * @thread EMT. */ -static int vmmR0InitVM(PVM pVM, uint32_t uSvnRev) +static int vmmR0InitVM(PVM pVM, uint32_t uSvnRev, uint32_t uBuildType) { /* - * Match the SVN revisions. + * Match the SVN revisions and build type. */ if (uSvnRev != VMMGetSvnRev()) { @@ -276,10 +287,17 @@ static int vmmR0InitVM(PVM pVM, uint32_t uSvnRev) SUPR0Printf("VMMR0InitVM: Revision mismatch, r3=%d r0=%d\n", uSvnRev, VMMGetSvnRev()); return VERR_VMM_R0_VERSION_MISMATCH; } + if (uBuildType != vmmGetBuildType()) + { + LogRel(("VMMR0InitVM: Build type mismatch, r3=%#x r0=%#x\n", uBuildType, vmmGetBuildType())); + SUPR0Printf("VMMR0InitVM: Build type mismatch, r3=%#x r0=%#x\n", uBuildType, vmmGetBuildType()); + return VERR_VMM_R0_VERSION_MISMATCH; + } if ( !VALID_PTR(pVM) || pVM->pVMR0 != pVM) return VERR_INVALID_PARAMETER; + #ifdef LOG_ENABLED /* * Register the EMT R0 logger instance for VCPU 0. @@ -341,12 +359,12 @@ static int vmmR0InitVM(PVM pVM, uint32_t uSvnRev) if (RT_SUCCESS(rc)) { /* - * Init HWACCM, CPUM and PGM (Darwin only). + * Init HM, CPUM and PGM (Darwin only). */ - rc = HWACCMR0InitVM(pVM); + rc = HMR0InitVM(pVM); if (RT_SUCCESS(rc)) { - rc = CPUMR0Init(pVM); /** @todo rename to CPUMR0InitVM */ + rc = CPUMR0InitVM(pVM); if (RT_SUCCESS(rc)) { #ifdef VBOX_WITH_2X_4GB_ADDR_SPACE @@ -369,7 +387,7 @@ static int vmmR0InitVM(PVM pVM, uint32_t uSvnRev) #ifdef VBOX_WITH_PCI_PASSTHROUGH PciRawR0TermVM(pVM); #endif - HWACCMR0TermVM(pVM); + HMR0TermVM(pVM); } } @@ -398,7 +416,6 @@ VMMR0DECL(int) VMMR0TermVM(PVM pVM, PGVM pGVM) PciRawR0TermVM(pVM); #endif - /* * Tell GVMM what we're up to and check that we only do this once. */ @@ -409,7 +426,7 @@ VMMR0DECL(int) VMMR0TermVM(PVM pVM, PGVM pGVM) #ifdef VBOX_WITH_2X_4GB_ADDR_SPACE PGMR0DynMapTermVM(pVM); #endif - HWACCMR0TermVM(pVM); + HMR0TermVM(pVM); } /* @@ -420,6 +437,158 @@ VMMR0DECL(int) VMMR0TermVM(PVM pVM, PGVM pGVM) } +/** + * Creates R0 thread-context hooks for the current EMT thread. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * + * @thread EMT(pVCpu) + */ +VMMR0DECL(int) VMMR0ThreadCtxHooksCreate(PVMCPU pVCpu) +{ + VMCPU_ASSERT_EMT(pVCpu); + Assert(pVCpu->vmm.s.hR0ThreadCtx == NIL_RTTHREADCTX); +#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) + int rc = RTThreadCtxHooksCreate(&pVCpu->vmm.s.hR0ThreadCtx); + if ( RT_FAILURE(rc) + && rc != VERR_NOT_SUPPORTED) + { + Log(("RTThreadCtxHooksCreate failed! rc=%Rrc pVCpu=%p idCpu=%RU32\n", rc, pVCpu, pVCpu->idCpu)); + return rc; + } +#endif + + return VINF_SUCCESS; +} + + +/** + * Releases the object reference for the thread-context hook. + * + * @param pVCpu Pointer to the VMCPU. + * @remarks Can be called from any thread. + */ +VMMR0DECL(void) VMMR0ThreadCtxHooksRelease(PVMCPU pVCpu) +{ + RTThreadCtxHooksRelease(pVCpu->vmm.s.hR0ThreadCtx); +} + + +/** + * Registers the thread-context hook for this VCPU. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pfnThreadHook Pointer to the thread-context callback. + * + * @thread EMT(pVCpu) + */ +VMMR0DECL(int) VMMR0ThreadCtxHooksRegister(PVMCPU pVCpu, PFNRTTHREADCTXHOOK pfnThreadHook) +{ + VMCPU_ASSERT_EMT(pVCpu); + return RTThreadCtxHooksRegister(pVCpu->vmm.s.hR0ThreadCtx, pfnThreadHook, pVCpu); +} + + +/** + * Deregisters the thread-context hook for this VCPU. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * + * @thread EMT(pVCpu) + */ +VMMR0DECL(int) VMMR0ThreadCtxHooksDeregister(PVMCPU pVCpu) +{ + return RTThreadCtxHooksDeregister(pVCpu->vmm.s.hR0ThreadCtx); +} + + +/** + * Whether thread-context hooks are created (implying they're supported) on this + * platform. + * + * @returns true if the hooks are created, false otherwise. + * @param pVCpu Pointer to the VMCPU. + */ +VMMR0DECL(bool) VMMR0ThreadCtxHooksAreCreated(PVMCPU pVCpu) +{ + return pVCpu->vmm.s.hR0ThreadCtx != NIL_RTTHREADCTX; +} + + +/** + * Whether thread-context hooks are registered for this VCPU. + * + * @returns true if registered, false otherwise. + * @param pVCpu Pointer to the VMCPU. + */ +VMMR0DECL(bool) VMMR0ThreadCtxHooksAreRegistered(PVMCPU pVCpu) +{ + return RTThreadCtxHooksAreRegistered(pVCpu->vmm.s.hR0ThreadCtx); +} + + +/** + * VMM ring-0 thread-context callback. + * + * This does common HM state updating and calls the HM-specific thread-context + * callback. + * + * @param enmEvent The thread-context event. + * @param pvUser Opaque pointer to the VMCPU. + * + * @thread EMT(pvUser) + */ +static DECLCALLBACK(void) vmmR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, void *pvUser) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + + switch (enmEvent) + { + case RTTHREADCTXEVENT_RESUMED: + { + /** @todo Linux may call us with preemption enabled (really!) but technically we + * cannot get preempted here, otherwise we end up in an infinite recursion + * scenario (i.e. preempted in resume hook -> preempt hook -> resume hook... ad + * infinitum). Let's just disable preemption for now... + */ + HM_DISABLE_PREEMPT_IF_NEEDED(); + + /* We need to update the VCPU <-> host CPU mapping. */ + RTCPUID idHostCpu = RTMpCpuId(); + ASMAtomicWriteU32(&pVCpu->idHostCpu, idHostCpu); + + /* Invoke the HM-specific thread-context callback. */ + HMR0ThreadCtxCallback(enmEvent, pvUser); + + /* Restore preemption. */ + HM_RESTORE_PREEMPT_IF_NEEDED(); + break; + } + + case RTTHREADCTXEVENT_PREEMPTING: + { + /* Invoke the HM-specific thread-context callback. */ + HMR0ThreadCtxCallback(enmEvent, pvUser); + + /* + * Sigh. See VMMGetCpu() used by VMCPU_ASSERT_EMT(). We cannot let several VCPUs + * have the same host CPU associated with it. + */ + ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID); + break; + } + + default: + /* Invoke the HM-specific thread-context callback. */ + HMR0ThreadCtxCallback(enmEvent, pvUser); + break; + } +} + + #ifdef VBOX_WITH_STATISTICS /** * Record return code statistics @@ -525,21 +694,21 @@ static void vmmR0RecordRC(PVM pVM, PVMCPU pVCpu, int rc) STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetRescheduleREM); break; case VINF_EM_RAW_TO_R3: - if (VM_FF_ISPENDING(pVM, VM_FF_TM_VIRTUAL_SYNC)) + if (VM_FF_IS_PENDING(pVM, VM_FF_TM_VIRTUAL_SYNC)) STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3TMVirt); - else if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NEED_HANDY_PAGES)) + else if (VM_FF_IS_PENDING(pVM, VM_FF_PGM_NEED_HANDY_PAGES)) STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3HandyPages); - else if (VM_FF_ISPENDING(pVM, VM_FF_PDM_QUEUES)) + else if (VM_FF_IS_PENDING(pVM, VM_FF_PDM_QUEUES)) STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3PDMQueues); - else if (VM_FF_ISPENDING(pVM, VM_FF_EMT_RENDEZVOUS)) + else if (VM_FF_IS_PENDING(pVM, VM_FF_EMT_RENDEZVOUS)) STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Rendezvous); - else if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA)) + else if (VM_FF_IS_PENDING(pVM, VM_FF_PDM_DMA)) STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3DMA); - else if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TIMER)) + else if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TIMER)) STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Timer); - else if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PDM_CRITSECT)) + else if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_PDM_CRITSECT)) STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3CritSect); - else if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TO_R3)) + else if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TO_R3)) STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3); else STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Unknown); @@ -602,7 +771,7 @@ static void vmmR0RecordRC(PVM pVM, PVMCPU pVCpu, int rc) case VINF_EM_PENDING_REQUEST: STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPendingRequest); break; - case VINF_EM_HWACCM_PATCH_TPR_INSTR: + case VINF_EM_HM_PATCH_TPR_INSTR: STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPatchTPR); break; default: @@ -659,102 +828,83 @@ VMMR0DECL(void) VMMR0EntryFast(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperati */ case VMMR0_DO_RAW_RUN: { - /* Some safety precautions first. */ #ifndef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 - if (RT_LIKELY( !pVM->vmm.s.fSwitcherDisabled /* hwaccm */ - && pVM->cCpus == 1 /* !smp */ - && PGMGetHyperCR3(pVCpu))) -#else - if (RT_LIKELY( !pVM->vmm.s.fSwitcherDisabled - && pVM->cCpus == 1)) -#endif + /* Some safety precautions first. */ + if (RT_UNLIKELY(!PGMGetHyperCR3(pVCpu))) { - /* Disable preemption and update the periodic preemption timer. */ - RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER; - RTThreadPreemptDisable(&PreemptState); - RTCPUID idHostCpu = RTMpCpuId(); + pVCpu->vmm.s.iLastGZRc = VERR_PGM_NO_CR3_SHADOW_ROOT; + break; + } +#endif + + /* Disable preemption and update the periodic preemption timer. */ + RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER; + RTThreadPreemptDisable(&PreemptState); + RTCPUID idHostCpu = RTMpCpuId(); #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI - CPUMR0SetLApic(pVM, idHostCpu); + CPUMR0SetLApic(pVCpu, idHostCpu); #endif - ASMAtomicWriteU32(&pVCpu->idHostCpu, idHostCpu); - if (pVM->vmm.s.fUsePeriodicPreemptionTimers) - GVMMR0SchedUpdatePeriodicPreemptionTimer(pVM, pVCpu->idHostCpu, TMCalcHostTimerFrequency(pVM, pVCpu)); + ASMAtomicWriteU32(&pVCpu->idHostCpu, idHostCpu); + if (pVM->vmm.s.fUsePeriodicPreemptionTimers) + GVMMR0SchedUpdatePeriodicPreemptionTimer(pVM, pVCpu->idHostCpu, TMCalcHostTimerFrequency(pVM, pVCpu)); - /* We might need to disable VT-x if the active switcher turns off paging. */ - bool fVTxDisabled; - int rc = HWACCMR0EnterSwitcher(pVM, &fVTxDisabled); - if (RT_SUCCESS(rc)) - { - RTCCUINTREG uFlags = ASMIntDisableFlags(); + /* We might need to disable VT-x if the active switcher turns off paging. */ + bool fVTxDisabled; + int rc = HMR0EnterSwitcher(pVM, pVM->vmm.s.enmSwitcher, &fVTxDisabled); + if (RT_SUCCESS(rc)) + { + RTCCUINTREG uFlags = ASMIntDisableFlags(); - for (;;) - { - VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); - TMNotifyStartOfExecution(pVCpu); + for (;;) + { + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); + TMNotifyStartOfExecution(pVCpu); - rc = pVM->vmm.s.pfnR0ToRawMode(pVM); - pVCpu->vmm.s.iLastGZRc = rc; + rc = pVM->vmm.s.pfnR0ToRawMode(pVM); + pVCpu->vmm.s.iLastGZRc = rc; - TMNotifyEndOfExecution(pVCpu); - VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED); + TMNotifyEndOfExecution(pVCpu); + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED); - if (rc != VINF_VMM_CALL_TRACER) - break; - SUPR0TracerUmodProbeFire(pVM->pSession, &pVCpu->vmm.s.TracerCtx); - } + if (rc != VINF_VMM_CALL_TRACER) + break; + SUPR0TracerUmodProbeFire(pVM->pSession, &pVCpu->vmm.s.TracerCtx); + } - /* Re-enable VT-x if previously turned off. */ - HWACCMR0LeaveSwitcher(pVM, fVTxDisabled); + /* Re-enable VT-x if previously turned off. */ + HMR0LeaveSwitcher(pVM, fVTxDisabled); - if ( rc == VINF_EM_RAW_INTERRUPT - || rc == VINF_EM_RAW_INTERRUPT_HYPER) - TRPMR0DispatchHostInterrupt(pVM); + if ( rc == VINF_EM_RAW_INTERRUPT + || rc == VINF_EM_RAW_INTERRUPT_HYPER) + TRPMR0DispatchHostInterrupt(pVM); - ASMSetFlags(uFlags); + ASMSetFlags(uFlags); #ifdef VBOX_WITH_STATISTICS - STAM_COUNTER_INC(&pVM->vmm.s.StatRunRC); - vmmR0RecordRC(pVM, pVCpu, rc); + STAM_COUNTER_INC(&pVM->vmm.s.StatRunRC); + vmmR0RecordRC(pVM, pVCpu, rc); #endif - } - else - pVCpu->vmm.s.iLastGZRc = rc; - ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID); - RTThreadPreemptRestore(&PreemptState); } else - { - Assert(!pVM->vmm.s.fSwitcherDisabled); - pVCpu->vmm.s.iLastGZRc = VERR_NOT_SUPPORTED; - if (pVM->cCpus != 1) - pVCpu->vmm.s.iLastGZRc = VERR_RAW_MODE_INVALID_SMP; -#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 - if (!PGMGetHyperCR3(pVCpu)) - pVCpu->vmm.s.iLastGZRc = VERR_PGM_NO_CR3_SHADOW_ROOT; -#endif - } + pVCpu->vmm.s.iLastGZRc = rc; + ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID); + RTThreadPreemptRestore(&PreemptState); break; } /* * Run guest code using the available hardware acceleration technology. - * - * Disable interrupts before we do anything interesting. On Windows we avoid - * this by having the support driver raise the IRQL before calling us, this way - * we hope to get away with page faults and later calling into the kernel. */ - case VMMR0_DO_HWACC_RUN: + case VMMR0_DO_HM_RUN: { -#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION + Assert(!VMMR0ThreadCtxHooksAreRegistered(pVCpu)); RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER; RTThreadPreemptDisable(&PreemptState); -#elif !defined(RT_OS_WINDOWS) - RTCCUINTREG uFlags = ASMIntDisableFlags(); -#endif + + /* Update the VCPU <-> host CPU mapping before doing anything else. */ ASMAtomicWriteU32(&pVCpu->idHostCpu, RTMpCpuId()); if (pVM->vmm.s.fUsePeriodicPreemptionTimers) GVMMR0SchedUpdatePeriodicPreemptionTimer(pVM, pVCpu->idHostCpu, TMCalcHostTimerFrequency(pVM, pVCpu)); - #ifdef LOG_ENABLED if (pVCpu->idCpu > 0) { @@ -768,15 +918,53 @@ VMMR0DECL(void) VMMR0EntryFast(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperati } } #endif - int rc; - if (!HWACCMR0SuspendPending()) + + int rc; + bool fPreemptRestored = false; + if (!HMR0SuspendPending()) { - rc = HWACCMR0Enter(pVM, pVCpu); + /* Register thread-context hooks if required. */ + if ( VMMR0ThreadCtxHooksAreCreated(pVCpu) + && !VMMR0ThreadCtxHooksAreRegistered(pVCpu)) + { + rc = VMMR0ThreadCtxHooksRegister(pVCpu, vmmR0ThreadCtxCallback); + AssertRC(rc); + } + + /* Enter HM context. */ + rc = HMR0Enter(pVM, pVCpu); if (RT_SUCCESS(rc)) { - rc = vmmR0CallRing3SetJmp(&pVCpu->vmm.s.CallRing3JmpBufR0, HWACCMR0RunGuestCode, pVM, pVCpu); /* this may resume code. */ - int rc2 = HWACCMR0Leave(pVM, pVCpu); - AssertRC(rc2); + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM); + + /* When preemption hooks are in place, enable preemption now that we're in HM context. */ + if (VMMR0ThreadCtxHooksAreRegistered(pVCpu)) + { + fPreemptRestored = true; + RTThreadPreemptRestore(&PreemptState); + } + + /* Setup the longjmp machinery and execute guest code. */ + rc = vmmR0CallRing3SetJmp(&pVCpu->vmm.s.CallRing3JmpBufR0, HMR0RunGuestCode, pVM, pVCpu); + + /* Manual assert as normal assertions are going to crash in this case. */ + if (RT_UNLIKELY( VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_HM + && RT_SUCCESS_NP(rc) && rc != VINF_VMM_CALL_HOST )) + { + pVM->vmm.s.szRing0AssertMsg1[0] = '\0'; + RTStrPrintf(pVM->vmm.s.szRing0AssertMsg2, sizeof(pVM->vmm.s.szRing0AssertMsg2), + "Got VMCPU state %d expected %d.\n", VMCPU_GET_STATE(pVCpu), VMCPUSTATE_STARTED_HM); + rc = VERR_VMM_WRONG_HM_VMCPU_STATE; + } + else if (RT_UNLIKELY(VMMR0ThreadCtxHooksAreRegistered(pVCpu))) + { + pVM->vmm.s.szRing0AssertMsg1[0] = '\0'; + RTStrPrintf(pVM->vmm.s.szRing0AssertMsg2, sizeof(pVM->vmm.s.szRing0AssertMsg2), + "Thread-context hooks still registered! VCPU=%p Id=%u rc=%d.\n", pVCpu, pVCpu->idCpu, rc); + rc = VERR_INVALID_STATE; + } + + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED); } STAM_COUNTER_INC(&pVM->vmm.s.StatRunRC); } @@ -787,12 +975,11 @@ VMMR0DECL(void) VMMR0EntryFast(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperati } pVCpu->vmm.s.iLastGZRc = rc; + /* Clear the VCPU <-> host CPU mapping as we've left HM context. */ ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID); -#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION - RTThreadPreemptRestore(&PreemptState); -#elif !defined(RT_OS_WINDOWS) - ASMSetFlags(uFlags); -#endif + + if (!fPreemptRestored) + RTThreadPreemptRestore(&PreemptState); #ifdef VBOX_WITH_STATISTICS vmmR0RecordRC(pVM, pVCpu, rc); @@ -952,7 +1139,7 @@ static int vmmR0EntryExWorker(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperatio * Initialize the R0 part of a VM instance. */ case VMMR0_DO_VMMR0_INIT: - return vmmR0InitVM(pVM, (uint32_t)u64Arg); + return vmmR0InitVM(pVM, RT_LODWORD(u64Arg), RT_HIDWORD(u64Arg)); /* * Terminate the R0 part of a VM instance. @@ -961,16 +1148,16 @@ static int vmmR0EntryExWorker(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperatio return VMMR0TermVM(pVM, NULL); /* - * Attempt to enable hwacc mode and check the current setting. + * Attempt to enable hm mode and check the current setting. */ - case VMMR0_DO_HWACC_ENABLE: - return HWACCMR0EnableAllCpus(pVM); + case VMMR0_DO_HM_ENABLE: + return HMR0EnableAllCpus(pVM); /* * Setup the hardware accelerated session. */ - case VMMR0_DO_HWACC_SETUP_VM: - return HWACCMR0SetupVM(pVM); + case VMMR0_DO_HM_SETUP_VM: + return HMR0SetupVM(pVM); /* * Switch to RC to execute Hypervisor function. @@ -980,11 +1167,6 @@ static int vmmR0EntryExWorker(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperatio int rc; bool fVTxDisabled; - /* Safety precaution as HWACCM can disable the switcher. */ - Assert(!pVM->vmm.s.fSwitcherDisabled); - if (RT_UNLIKELY(pVM->vmm.s.fSwitcherDisabled)) - return VERR_NOT_SUPPORTED; - #ifndef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 if (RT_UNLIKELY(!PGMGetHyperCR3(VMMGetCpu0(pVM)))) return VERR_PGM_NO_CR3_SHADOW_ROOT; @@ -994,18 +1176,18 @@ static int vmmR0EntryExWorker(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperatio #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI RTCPUID idHostCpu = RTMpCpuId(); - CPUMR0SetLApic(pVM, idHostCpu); + CPUMR0SetLApic(&pVM->aCpus[0], idHostCpu); #endif /* We might need to disable VT-x if the active switcher turns off paging. */ - rc = HWACCMR0EnterSwitcher(pVM, &fVTxDisabled); + rc = HMR0EnterSwitcher(pVM, pVM->vmm.s.enmSwitcher, &fVTxDisabled); if (RT_FAILURE(rc)) return rc; rc = pVM->vmm.s.pfnR0ToRawMode(pVM); /* Re-enable VT-x if previously turned off. */ - HWACCMR0LeaveSwitcher(pVM, fVTxDisabled); + HMR0LeaveSwitcher(pVM, fVTxDisabled); /** @todo dispatch interrupts? */ ASMSetFlags(fFlags); @@ -1283,7 +1465,7 @@ static int vmmR0EntryExWorker(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperatio case VMMR0_DO_TEST_SWITCHER3264: if (idCpu == NIL_VMCPUID) return VERR_INVALID_CPU_ID; - return HWACCMR0TestSwitcher3264(pVM); + return HMR0TestSwitcher3264(pVM); #endif default: /* @@ -1386,6 +1568,40 @@ VMMR0DECL(int) VMMR0EntryEx(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperation, return vmmR0EntryExWorker(pVM, idCpu, enmOperation, pReq, u64Arg, pSession); } + +/** + * Checks whether we've armed the ring-0 long jump machinery. + * + * @returns @c true / @c false + * @param pVCpu Pointer to the VMCPU. + * @thread EMT + * @sa VMMIsLongJumpArmed + */ +VMMR0_INT_DECL(bool) VMMR0IsLongJumpArmed(PVMCPU pVCpu) +{ +#ifdef RT_ARCH_X86 + return pVCpu->vmm.s.CallRing3JmpBufR0.eip + && !pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call; +#else + return pVCpu->vmm.s.CallRing3JmpBufR0.rip + && !pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call; +#endif +} + + +/** + * Checks whether we've done a ring-3 long jump. + * + * @returns @c true / @c false + * @param pVCpu Pointer to the VMCPU. + * @thread EMT + */ +VMMR0_INT_DECL(bool) VMMR0IsInRing3LongJump(PVMCPU pVCpu) +{ + return pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call; +} + + /** * Internal R0 logger worker: Flush logger. * @@ -1507,6 +1723,18 @@ VMMR0DECL(void) VMMR0LogFlushEnable(PVMCPU pVCpu) pVCpu->vmm.s.pR0LoggerR0->fFlushingDisabled = false; } + +/** + * Checks if log flushing is disabled or not. + * + * @param pVCpu Pointer to the VMCPU. + */ +VMMR0DECL(bool) VMMR0IsLogFlushDisabled(PVMCPU pVCpu) +{ + if (pVCpu->vmm.s.pR0LoggerR0) + return pVCpu->vmm.s.pR0LoggerR0->fFlushingDisabled; + return true; +} #endif /* LOG_ENABLED */ /** @@ -1609,9 +1837,16 @@ DECLEXPORT(void) RTCALL RTAssertMsg2WeakV(const char *pszFormat, va_list va) va_list vaCopy; /* - * Push the message to the logger. + * Push the message to the loggers. */ - PRTLOGGER pLog = RTLogDefaultInstance(); /** @todo we want this for release as well! */ + PRTLOGGER pLog = RTLogGetDefaultInstance(); /* Don't initialize it here... */ + if (pLog) + { + va_copy(vaCopy, va); + RTLogFormatV(rtLogOutput, pLog, pszFormat, vaCopy); + va_end(vaCopy); + } + pLog = RTLogRelDefaultInstance(); if (pLog) { va_copy(vaCopy, va); diff --git a/src/VBox/VMM/VMMR0/VMMR0.def b/src/VBox/VMM/VMMR0/VMMR0.def index bf8ffcdf..fc11689f 100644 --- a/src/VBox/VMM/VMMR0/VMMR0.def +++ b/src/VBox/VMM/VMMR0/VMMR0.def @@ -3,7 +3,7 @@ ; VMM Ring 0 DLL - Definition file. ; -; Copyright (C) 2006-2009 Oracle Corporation +; Copyright (C) 2006-2013 Oracle Corporation ; ; This file is part of VirtualBox Open Source Edition (OSE), as ; available from http://www.virtualbox.org. This file is free software; @@ -79,7 +79,7 @@ EXPORTS IntNetR0IfWait ; Network Shaper - PDMR0NsAllocateBandwidth + PDMNsAllocateBandwidth ; runtime RTAssertMsg1Weak diff --git a/src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm b/src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm index 31c565b6..6c3116fd 100644 --- a/src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm +++ b/src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm @@ -4,7 +4,7 @@ ; ; -; Copyright (C) 2006-2009 Oracle Corporation +; Copyright (C) 2006-2011 Oracle Corporation ; ; This file is part of VirtualBox Open Source Edition (OSE), as ; available from http://www.virtualbox.org. This file is free software; diff --git a/src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm b/src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm index dd657f38..8a1c9db9 100644 --- a/src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm +++ b/src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm @@ -4,7 +4,7 @@ ; ; -; Copyright (C) 2006-2009 Oracle Corporation +; Copyright (C) 2006-2011 Oracle Corporation ; ; This file is part of VirtualBox Open Source Edition (OSE), as ; available from http://www.virtualbox.org. This file is free software; diff --git a/src/VBox/VMM/VMMR0/VMMR0TripleFaultHackA.asm b/src/VBox/VMM/VMMR0/VMMR0TripleFaultHackA.asm index 1ab44d62..eff2e3a0 100644 --- a/src/VBox/VMM/VMMR0/VMMR0TripleFaultHackA.asm +++ b/src/VBox/VMM/VMMR0/VMMR0TripleFaultHackA.asm @@ -4,7 +4,7 @@ ; ; -; Copyright (C) 2011 Oracle Corporation +; Copyright (C) 2011-2012 Oracle Corporation ; ; This file is part of VirtualBox Open Source Edition (OSE), as ; available from http://www.virtualbox.org. This file is free software; |
