diff options
Diffstat (limited to 'src/VBox/VMM/VMMR0/HMVMXR0.cpp')
| -rw-r--r-- | src/VBox/VMM/VMMR0/HMVMXR0.cpp | 10993 |
1 files changed, 10993 insertions, 0 deletions
diff --git a/src/VBox/VMM/VMMR0/HMVMXR0.cpp b/src/VBox/VMM/VMMR0/HMVMXR0.cpp new file mode 100644 index 00000000..afd83258 --- /dev/null +++ b/src/VBox/VMM/VMMR0/HMVMXR0.cpp @@ -0,0 +1,10993 @@ +/* $Id: HMVMXR0.cpp $ */ +/** @file + * HM VMX (Intel VT-x) - Host Context Ring-0. + */ + +/* + * Copyright (C) 2012-2013 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/******************************************************************************* +* Header Files * +*******************************************************************************/ +#define LOG_GROUP LOG_GROUP_HM +#include <iprt/asm-amd64-x86.h> +#include <iprt/thread.h> +#include <iprt/string.h> + +#include "HMInternal.h" +#include <VBox/vmm/vm.h> +#include "HMVMXR0.h" +#include <VBox/vmm/pdmapi.h> +#include <VBox/vmm/dbgf.h> +#include <VBox/vmm/iem.h> +#include <VBox/vmm/iom.h> +#include <VBox/vmm/selm.h> +#include <VBox/vmm/tm.h> +#ifdef VBOX_WITH_REM +# include <VBox/vmm/rem.h> +#endif +#ifdef DEBUG_ramshankar +#define HMVMX_SAVE_FULL_GUEST_STATE +#define HMVMX_SYNC_FULL_GUEST_STATE +#define HMVMX_ALWAYS_CHECK_GUEST_STATE +#define HMVMX_ALWAYS_TRAP_ALL_XCPTS +#define HMVMX_ALWAYS_TRAP_PF +#define HMVMX_ALWAYS_SWAP_FPU_STATE +#endif + + +/******************************************************************************* +* Defined Constants And Macros * +*******************************************************************************/ +#if defined(RT_ARCH_AMD64) +# define HMVMX_IS_64BIT_HOST_MODE() (true) +typedef RTHCUINTREG HMVMXHCUINTREG; +#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL) +extern "C" uint32_t g_fVMXIs64bitHost; +# define HMVMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0) +typedef uint64_t HMVMXHCUINTREG; +#else +# define HMVMX_IS_64BIT_HOST_MODE() (false) +typedef RTHCUINTREG HMVMXHCUINTREG; +#endif + +/** Use the function table. */ +#define HMVMX_USE_FUNCTION_TABLE + +/** Determine which tagged-TLB flush handler to use. */ +#define HMVMX_FLUSH_TAGGED_TLB_EPT_VPID 0 +#define HMVMX_FLUSH_TAGGED_TLB_EPT 1 +#define HMVMX_FLUSH_TAGGED_TLB_VPID 2 +#define HMVMX_FLUSH_TAGGED_TLB_NONE 3 + +/** @name Updated-guest-state flags. + * @{ */ +#define HMVMX_UPDATED_GUEST_RIP RT_BIT(0) +#define HMVMX_UPDATED_GUEST_RSP RT_BIT(1) +#define HMVMX_UPDATED_GUEST_RFLAGS RT_BIT(2) +#define HMVMX_UPDATED_GUEST_CR0 RT_BIT(3) +#define HMVMX_UPDATED_GUEST_CR3 RT_BIT(4) +#define HMVMX_UPDATED_GUEST_CR4 RT_BIT(5) +#define HMVMX_UPDATED_GUEST_GDTR RT_BIT(6) +#define HMVMX_UPDATED_GUEST_IDTR RT_BIT(7) +#define HMVMX_UPDATED_GUEST_LDTR RT_BIT(8) +#define HMVMX_UPDATED_GUEST_TR RT_BIT(9) +#define HMVMX_UPDATED_GUEST_SEGMENT_REGS RT_BIT(10) +#define HMVMX_UPDATED_GUEST_DEBUG RT_BIT(11) +#define HMVMX_UPDATED_GUEST_FS_BASE_MSR RT_BIT(12) +#define HMVMX_UPDATED_GUEST_GS_BASE_MSR RT_BIT(13) +#define HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR RT_BIT(14) +#define HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR RT_BIT(15) +#define HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR RT_BIT(16) +#define HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS RT_BIT(17) +#define HMVMX_UPDATED_GUEST_ACTIVITY_STATE RT_BIT(18) +#define HMVMX_UPDATED_GUEST_APIC_STATE RT_BIT(19) +#define HMVMX_UPDATED_GUEST_ALL ( HMVMX_UPDATED_GUEST_RIP \ + | HMVMX_UPDATED_GUEST_RSP \ + | HMVMX_UPDATED_GUEST_RFLAGS \ + | HMVMX_UPDATED_GUEST_CR0 \ + | HMVMX_UPDATED_GUEST_CR3 \ + | HMVMX_UPDATED_GUEST_CR4 \ + | HMVMX_UPDATED_GUEST_GDTR \ + | HMVMX_UPDATED_GUEST_IDTR \ + | HMVMX_UPDATED_GUEST_LDTR \ + | HMVMX_UPDATED_GUEST_TR \ + | HMVMX_UPDATED_GUEST_SEGMENT_REGS \ + | HMVMX_UPDATED_GUEST_DEBUG \ + | HMVMX_UPDATED_GUEST_FS_BASE_MSR \ + | HMVMX_UPDATED_GUEST_GS_BASE_MSR \ + | HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR \ + | HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR \ + | HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR \ + | HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS \ + | HMVMX_UPDATED_GUEST_ACTIVITY_STATE \ + | HMVMX_UPDATED_GUEST_APIC_STATE) +/** @} */ + +/** @name + * Flags to skip redundant reads of some common VMCS fields that are not part of + * the guest-CPU state but are in the transient structure. + */ +#define HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO RT_BIT(0) +#define HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE RT_BIT(1) +#define HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION RT_BIT(2) +#define HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN RT_BIT(3) +#define HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO RT_BIT(4) +#define HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE RT_BIT(5) +#define HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_INFO RT_BIT(6) +/** @} */ + +/** @name + * States of the VMCS. + * + * This does not reflect all possible VMCS states but currently only those + * needed for maintaining the VMCS consistently even when thread-context hooks + * are used. Maybe later this can be extended (i.e. Nested Virtualization). + */ +#define HMVMX_VMCS_STATE_CLEAR RT_BIT(0) +#define HMVMX_VMCS_STATE_ACTIVE RT_BIT(1) +#define HMVMX_VMCS_STATE_LAUNCHED RT_BIT(2) +/** @} */ + +/** + * Exception bitmap mask for real-mode guests (real-on-v86). + * + * We need to intercept all exceptions manually (except #PF). #NM is also + * handled separately, see hmR0VmxLoadSharedCR0(). #PF need not be intercepted + * even in real-mode if we have Nested Paging support. + */ +#define HMVMX_REAL_MODE_XCPT_MASK ( RT_BIT(X86_XCPT_DE) | RT_BIT(X86_XCPT_DB) | RT_BIT(X86_XCPT_NMI) \ + | RT_BIT(X86_XCPT_BP) | RT_BIT(X86_XCPT_OF) | RT_BIT(X86_XCPT_BR) \ + | RT_BIT(X86_XCPT_UD) /* RT_BIT(X86_XCPT_NM) */ | RT_BIT(X86_XCPT_DF) \ + | RT_BIT(X86_XCPT_CO_SEG_OVERRUN) | RT_BIT(X86_XCPT_TS) | RT_BIT(X86_XCPT_NP) \ + | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_GP) /* RT_BIT(X86_XCPT_PF) */ \ + | RT_BIT(X86_XCPT_MF) | RT_BIT(X86_XCPT_AC) | RT_BIT(X86_XCPT_MC) \ + | RT_BIT(X86_XCPT_XF)) + +/** + * Exception bitmap mask for all contributory exceptions. + * + * Page fault is deliberately excluded here as it's conditional as to whether + * it's contributory or benign. Page faults are handled separately. + */ +#define HMVMX_CONTRIBUTORY_XCPT_MASK ( RT_BIT(X86_XCPT_GP) | RT_BIT(X86_XCPT_NP) | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_TS) \ + | RT_BIT(X86_XCPT_DE)) + +/** Maximum VM-instruction error number. */ +#define HMVMX_INSTR_ERROR_MAX 28 + +/** Profiling macro. */ +#ifdef HM_PROFILE_EXIT_DISPATCH +# define HMVMX_START_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitDispatch, ed) +# define HMVMX_STOP_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitDispatch, ed) +#else +# define HMVMX_START_EXIT_DISPATCH_PROF() do { } while (0) +# define HMVMX_STOP_EXIT_DISPATCH_PROF() do { } while (0) +#endif + +/** Assert that preemption is disabled or covered by thread-context hooks. */ +#define HMVMX_ASSERT_PREEMPT_SAFE() Assert( VMMR0ThreadCtxHooksAreRegistered(pVCpu) \ + || !RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + +/** Assert that we haven't migrated CPUs when thread-context hooks are not + * used. */ +#define HMVMX_ASSERT_CPU_SAFE() AssertMsg( VMMR0ThreadCtxHooksAreRegistered(pVCpu) \ + || pVCpu->hm.s.idEnteredCpu == RTMpCpuId(), \ + ("Illegal migration! Entered on CPU %u Current %u\n", \ + pVCpu->hm.s.idEnteredCpu, RTMpCpuId())); \ + +/** Helper macro for VM-exit handlers called unexpectedly. */ +#define HMVMX_RETURN_UNEXPECTED_EXIT() \ + do { \ + pVCpu->hm.s.u32HMError = pVmxTransient->uExitReason; \ + return VERR_VMX_UNEXPECTED_EXIT; \ + } while (0) + + +/******************************************************************************* +* Structures and Typedefs * +*******************************************************************************/ +/** + * VMX transient state. + * + * A state structure for holding miscellaneous information across + * VMX non-root operation and restored after the transition. + */ +typedef struct VMXTRANSIENT +{ + /** The host's rflags/eflags. */ + RTCCUINTREG uEflags; +#if HC_ARCH_BITS == 32 + uint32_t u32Alignment0; +#endif + /** The guest's LSTAR MSR value used for TPR patching for 32-bit guests. */ + uint64_t u64LStarMsr; + /** The guest's TPR value used for TPR shadowing. */ + uint8_t u8GuestTpr; + /** Alignment. */ + uint8_t abAlignment0[7]; + + /** The basic VM-exit reason. */ + uint16_t uExitReason; + /** Alignment. */ + uint16_t u16Alignment0; + /** The VM-exit interruption error code. */ + uint32_t uExitIntErrorCode; + /** The VM-exit exit qualification. */ + uint64_t uExitQualification; + + /** The VM-exit interruption-information field. */ + uint32_t uExitIntInfo; + /** The VM-exit instruction-length field. */ + uint32_t cbInstr; + /** The VM-exit instruction-information field. */ + union + { + /** Plain unsigned int representation. */ + uint32_t u; + /** INS and OUTS information. */ + struct + { + uint32_t u6Reserved0 : 7; + /** The address size; 0=16-bit, 1=32-bit, 2=64-bit, rest undefined. */ + uint32_t u3AddrSize : 3; + uint32_t u5Reserved1 : 5; + /** The segment register (X86_SREG_XXX). */ + uint32_t iSegReg : 3; + uint32_t uReserved2 : 14; + } StrIo; + } ExitInstrInfo; + /** Whether the VM-entry failed or not. */ + bool fVMEntryFailed; + /** Alignment. */ + uint8_t abAlignment1[3]; + + /** The VM-entry interruption-information field. */ + uint32_t uEntryIntInfo; + /** The VM-entry exception error code field. */ + uint32_t uEntryXcptErrorCode; + /** The VM-entry instruction length field. */ + uint32_t cbEntryInstr; + + /** IDT-vectoring information field. */ + uint32_t uIdtVectoringInfo; + /** IDT-vectoring error code. */ + uint32_t uIdtVectoringErrorCode; + + /** Mask of currently read VMCS fields; HMVMX_UPDATED_TRANSIENT_*. */ + uint32_t fVmcsFieldsRead; + + /** Whether the guest FPU was active at the time of VM-exit. */ + bool fWasGuestFPUStateActive; + /** Whether the guest debug state was active at the time of VM-exit. */ + bool fWasGuestDebugStateActive; + /** Whether the hyper debug state was active at the time of VM-exit. */ + bool fWasHyperDebugStateActive; + /** Whether TSC-offsetting should be setup before VM-entry. */ + bool fUpdateTscOffsettingAndPreemptTimer; + /** Whether the VM-exit was caused by a page-fault during delivery of a + * contributory exception or a page-fault. */ + bool fVectoringPF; +} VMXTRANSIENT; +AssertCompileMemberAlignment(VMXTRANSIENT, uExitReason, sizeof(uint64_t)); +AssertCompileMemberAlignment(VMXTRANSIENT, uExitIntInfo, sizeof(uint64_t)); +AssertCompileMemberAlignment(VMXTRANSIENT, uEntryIntInfo, sizeof(uint64_t)); +AssertCompileMemberAlignment(VMXTRANSIENT, fWasGuestFPUStateActive, sizeof(uint64_t)); +AssertCompileMemberSize(VMXTRANSIENT, ExitInstrInfo, sizeof(uint32_t)); +/** Pointer to VMX transient state. */ +typedef VMXTRANSIENT *PVMXTRANSIENT; + + +/** + * MSR-bitmap read permissions. + */ +typedef enum VMXMSREXITREAD +{ + /** Reading this MSR causes a VM-exit. */ + VMXMSREXIT_INTERCEPT_READ = 0xb, + /** Reading this MSR does not cause a VM-exit. */ + VMXMSREXIT_PASSTHRU_READ +} VMXMSREXITREAD; + +/** + * MSR-bitmap write permissions. + */ +typedef enum VMXMSREXITWRITE +{ + /** Writing to this MSR causes a VM-exit. */ + VMXMSREXIT_INTERCEPT_WRITE = 0xd, + /** Writing to this MSR does not cause a VM-exit. */ + VMXMSREXIT_PASSTHRU_WRITE +} VMXMSREXITWRITE; + +/** + * VMX VM-exit handler. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required + * fields before using them. + * @param pVmxTransient Pointer to the VMX-transient structure. + */ +#ifndef HMVMX_USE_FUNCTION_TABLE +typedef int FNVMXEXITHANDLER(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +#else +typedef DECLCALLBACK(int) FNVMXEXITHANDLER(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +/** Pointer to VM-exit handler. */ +typedef FNVMXEXITHANDLER *PFNVMXEXITHANDLER; +#endif + + +/******************************************************************************* +* Internal Functions * +*******************************************************************************/ +static void hmR0VmxFlushEpt(PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush); +static void hmR0VmxFlushVpid(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr); +static void hmR0VmxClearEventVmcs(PVMCPU pVCpu, PCPUMCTX pMixedCtx); +static int hmR0VmxInjectEventVmcs(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint64_t u64IntInfo, uint32_t cbInstr, + uint32_t u32ErrCode, RTGCUINTREG GCPtrFaultAddress, uint32_t *puIntState); +#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) +static int hmR0VmxInitVmcsReadCache(PVM pVM, PVMCPU pVCpu); +#endif +#ifndef HMVMX_USE_FUNCTION_TABLE +DECLINLINE(int) hmR0VmxHandleExit(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, uint32_t rcReason); +# define HMVMX_EXIT_DECL static int +#else +# define HMVMX_EXIT_DECL static DECLCALLBACK(int) +#endif + +/** @name VM-exit handlers. + * @{ + */ +static FNVMXEXITHANDLER hmR0VmxExitXcptOrNmi; +static FNVMXEXITHANDLER hmR0VmxExitExtInt; +static FNVMXEXITHANDLER hmR0VmxExitTripleFault; +static FNVMXEXITHANDLER hmR0VmxExitInitSignal; +static FNVMXEXITHANDLER hmR0VmxExitSipi; +static FNVMXEXITHANDLER hmR0VmxExitIoSmi; +static FNVMXEXITHANDLER hmR0VmxExitSmi; +static FNVMXEXITHANDLER hmR0VmxExitIntWindow; +static FNVMXEXITHANDLER hmR0VmxExitNmiWindow; +static FNVMXEXITHANDLER hmR0VmxExitTaskSwitch; +static FNVMXEXITHANDLER hmR0VmxExitCpuid; +static FNVMXEXITHANDLER hmR0VmxExitGetsec; +static FNVMXEXITHANDLER hmR0VmxExitHlt; +static FNVMXEXITHANDLER hmR0VmxExitInvd; +static FNVMXEXITHANDLER hmR0VmxExitInvlpg; +static FNVMXEXITHANDLER hmR0VmxExitRdpmc; +static FNVMXEXITHANDLER hmR0VmxExitRdtsc; +static FNVMXEXITHANDLER hmR0VmxExitRsm; +static FNVMXEXITHANDLER hmR0VmxExitSetPendingXcptUD; +static FNVMXEXITHANDLER hmR0VmxExitMovCRx; +static FNVMXEXITHANDLER hmR0VmxExitMovDRx; +static FNVMXEXITHANDLER hmR0VmxExitIoInstr; +static FNVMXEXITHANDLER hmR0VmxExitRdmsr; +static FNVMXEXITHANDLER hmR0VmxExitWrmsr; +static FNVMXEXITHANDLER hmR0VmxExitErrInvalidGuestState; +static FNVMXEXITHANDLER hmR0VmxExitErrMsrLoad; +static FNVMXEXITHANDLER hmR0VmxExitErrUndefined; +static FNVMXEXITHANDLER hmR0VmxExitMwait; +static FNVMXEXITHANDLER hmR0VmxExitMtf; +static FNVMXEXITHANDLER hmR0VmxExitMonitor; +static FNVMXEXITHANDLER hmR0VmxExitPause; +static FNVMXEXITHANDLER hmR0VmxExitErrMachineCheck; +static FNVMXEXITHANDLER hmR0VmxExitTprBelowThreshold; +static FNVMXEXITHANDLER hmR0VmxExitApicAccess; +static FNVMXEXITHANDLER hmR0VmxExitXdtrAccess; +static FNVMXEXITHANDLER hmR0VmxExitXdtrAccess; +static FNVMXEXITHANDLER hmR0VmxExitEptViolation; +static FNVMXEXITHANDLER hmR0VmxExitEptMisconfig; +static FNVMXEXITHANDLER hmR0VmxExitRdtscp; +static FNVMXEXITHANDLER hmR0VmxExitPreemptTimer; +static FNVMXEXITHANDLER hmR0VmxExitWbinvd; +static FNVMXEXITHANDLER hmR0VmxExitXsetbv; +static FNVMXEXITHANDLER hmR0VmxExitRdrand; +static FNVMXEXITHANDLER hmR0VmxExitInvpcid; +/** @} */ + +static int hmR0VmxExitXcptNM(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +static int hmR0VmxExitXcptPF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +static int hmR0VmxExitXcptMF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +static int hmR0VmxExitXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +static int hmR0VmxExitXcptBP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +static int hmR0VmxExitXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +static int hmR0VmxExitXcptGeneric(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient); +static uint32_t hmR0VmxCheckGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx); + +/******************************************************************************* +* Global Variables * +*******************************************************************************/ +#ifdef HMVMX_USE_FUNCTION_TABLE + +/** + * VMX_EXIT dispatch table. + */ +static const PFNVMXEXITHANDLER g_apfnVMExitHandlers[VMX_EXIT_MAX + 1] = +{ + /* 00 VMX_EXIT_XCPT_OR_NMI */ hmR0VmxExitXcptOrNmi, + /* 01 VMX_EXIT_EXT_INT */ hmR0VmxExitExtInt, + /* 02 VMX_EXIT_TRIPLE_FAULT */ hmR0VmxExitTripleFault, + /* 03 VMX_EXIT_INIT_SIGNAL */ hmR0VmxExitInitSignal, + /* 04 VMX_EXIT_SIPI */ hmR0VmxExitSipi, + /* 05 VMX_EXIT_IO_SMI */ hmR0VmxExitIoSmi, + /* 06 VMX_EXIT_SMI */ hmR0VmxExitSmi, + /* 07 VMX_EXIT_INT_WINDOW */ hmR0VmxExitIntWindow, + /* 08 VMX_EXIT_NMI_WINDOW */ hmR0VmxExitNmiWindow, + /* 09 VMX_EXIT_TASK_SWITCH */ hmR0VmxExitTaskSwitch, + /* 10 VMX_EXIT_CPUID */ hmR0VmxExitCpuid, + /* 11 VMX_EXIT_GETSEC */ hmR0VmxExitGetsec, + /* 12 VMX_EXIT_HLT */ hmR0VmxExitHlt, + /* 13 VMX_EXIT_INVD */ hmR0VmxExitInvd, + /* 14 VMX_EXIT_INVLPG */ hmR0VmxExitInvlpg, + /* 15 VMX_EXIT_RDPMC */ hmR0VmxExitRdpmc, + /* 16 VMX_EXIT_RDTSC */ hmR0VmxExitRdtsc, + /* 17 VMX_EXIT_RSM */ hmR0VmxExitRsm, + /* 18 VMX_EXIT_VMCALL */ hmR0VmxExitSetPendingXcptUD, + /* 19 VMX_EXIT_VMCLEAR */ hmR0VmxExitSetPendingXcptUD, + /* 20 VMX_EXIT_VMLAUNCH */ hmR0VmxExitSetPendingXcptUD, + /* 21 VMX_EXIT_VMPTRLD */ hmR0VmxExitSetPendingXcptUD, + /* 22 VMX_EXIT_VMPTRST */ hmR0VmxExitSetPendingXcptUD, + /* 23 VMX_EXIT_VMREAD */ hmR0VmxExitSetPendingXcptUD, + /* 24 VMX_EXIT_VMRESUME */ hmR0VmxExitSetPendingXcptUD, + /* 25 VMX_EXIT_VMWRITE */ hmR0VmxExitSetPendingXcptUD, + /* 26 VMX_EXIT_VMXOFF */ hmR0VmxExitSetPendingXcptUD, + /* 27 VMX_EXIT_VMXON */ hmR0VmxExitSetPendingXcptUD, + /* 28 VMX_EXIT_MOV_CRX */ hmR0VmxExitMovCRx, + /* 29 VMX_EXIT_MOV_DRX */ hmR0VmxExitMovDRx, + /* 30 VMX_EXIT_IO_INSTR */ hmR0VmxExitIoInstr, + /* 31 VMX_EXIT_RDMSR */ hmR0VmxExitRdmsr, + /* 32 VMX_EXIT_WRMSR */ hmR0VmxExitWrmsr, + /* 33 VMX_EXIT_ERR_INVALID_GUEST_STATE */ hmR0VmxExitErrInvalidGuestState, + /* 34 VMX_EXIT_ERR_MSR_LOAD */ hmR0VmxExitErrMsrLoad, + /* 35 UNDEFINED */ hmR0VmxExitErrUndefined, + /* 36 VMX_EXIT_MWAIT */ hmR0VmxExitMwait, + /* 37 VMX_EXIT_MTF */ hmR0VmxExitMtf, + /* 38 UNDEFINED */ hmR0VmxExitErrUndefined, + /* 39 VMX_EXIT_MONITOR */ hmR0VmxExitMonitor, + /* 40 UNDEFINED */ hmR0VmxExitPause, + /* 41 VMX_EXIT_PAUSE */ hmR0VmxExitErrMachineCheck, + /* 42 VMX_EXIT_ERR_MACHINE_CHECK */ hmR0VmxExitErrUndefined, + /* 43 VMX_EXIT_TPR_BELOW_THRESHOLD */ hmR0VmxExitTprBelowThreshold, + /* 44 VMX_EXIT_APIC_ACCESS */ hmR0VmxExitApicAccess, + /* 45 UNDEFINED */ hmR0VmxExitErrUndefined, + /* 46 VMX_EXIT_XDTR_ACCESS */ hmR0VmxExitXdtrAccess, + /* 47 VMX_EXIT_TR_ACCESS */ hmR0VmxExitXdtrAccess, + /* 48 VMX_EXIT_EPT_VIOLATION */ hmR0VmxExitEptViolation, + /* 49 VMX_EXIT_EPT_MISCONFIG */ hmR0VmxExitEptMisconfig, + /* 50 VMX_EXIT_INVEPT */ hmR0VmxExitSetPendingXcptUD, + /* 51 VMX_EXIT_RDTSCP */ hmR0VmxExitRdtscp, + /* 52 VMX_EXIT_PREEMPT_TIMER */ hmR0VmxExitPreemptTimer, + /* 53 VMX_EXIT_INVVPID */ hmR0VmxExitSetPendingXcptUD, + /* 54 VMX_EXIT_WBINVD */ hmR0VmxExitWbinvd, + /* 55 VMX_EXIT_XSETBV */ hmR0VmxExitXsetbv, + /* 56 UNDEFINED */ hmR0VmxExitErrUndefined, + /* 57 VMX_EXIT_RDRAND */ hmR0VmxExitRdrand, + /* 58 VMX_EXIT_INVPCID */ hmR0VmxExitInvpcid, + /* 59 VMX_EXIT_VMFUNC */ hmR0VmxExitSetPendingXcptUD +}; +#endif /* HMVMX_USE_FUNCTION_TABLE */ + +#ifdef VBOX_STRICT +static const char * const g_apszVmxInstrErrors[HMVMX_INSTR_ERROR_MAX + 1] = +{ + /* 0 */ "(Not Used)", + /* 1 */ "VMCALL executed in VMX root operation.", + /* 2 */ "VMCLEAR with invalid physical address.", + /* 3 */ "VMCLEAR with VMXON pointer.", + /* 4 */ "VMLAUNCH with non-clear VMCS.", + /* 5 */ "VMRESUME with non-launched VMCS.", + /* 6 */ "VMRESUME after VMXOFF", + /* 7 */ "VM entry with invalid control fields.", + /* 8 */ "VM entry with invalid host state fields.", + /* 9 */ "VMPTRLD with invalid physical address.", + /* 10 */ "VMPTRLD with VMXON pointer.", + /* 11 */ "VMPTRLD with incorrect revision identifier.", + /* 12 */ "VMREAD/VMWRITE from/to unsupported VMCS component.", + /* 13 */ "VMWRITE to read-only VMCS component.", + /* 14 */ "(Not Used)", + /* 15 */ "VMXON executed in VMX root operation.", + /* 16 */ "VM entry with invalid executive-VMCS pointer.", + /* 17 */ "VM entry with non-launched executing VMCS.", + /* 18 */ "VM entry with executive-VMCS pointer not VMXON pointer.", + /* 19 */ "VMCALL with non-clear VMCS.", + /* 20 */ "VMCALL with invalid VM-exit control fields.", + /* 21 */ "(Not Used)", + /* 22 */ "VMCALL with incorrect MSEG revision identifier.", + /* 23 */ "VMXOFF under dual monitor treatment of SMIs and SMM.", + /* 24 */ "VMCALL with invalid SMM-monitor features.", + /* 25 */ "VM entry with invalid VM-execution control fields in executive VMCS.", + /* 26 */ "VM entry with events blocked by MOV SS.", + /* 27 */ "(Not Used)", + /* 28 */ "Invalid operand to INVEPT/INVVPID." +}; +#endif /* VBOX_STRICT */ + + + +/** + * Updates the VM's last error record. If there was a VMX instruction error, + * reads the error data from the VMCS and updates VCPU's last error record as + * well. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU (can be NULL if @a rc is not + * VERR_VMX_UNABLE_TO_START_VM or + * VERR_VMX_INVALID_VMCS_FIELD). + * @param rc The error code. + */ +static void hmR0VmxUpdateErrorRecord(PVM pVM, PVMCPU pVCpu, int rc) +{ + AssertPtr(pVM); + if ( rc == VERR_VMX_INVALID_VMCS_FIELD + || rc == VERR_VMX_UNABLE_TO_START_VM) + { + AssertPtrReturnVoid(pVCpu); + VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError); + } + pVM->hm.s.lLastError = rc; +} + + +/** + * Reads the VM-entry interruption-information field from the VMCS into the VMX + * transient structure. + * + * @returns VBox status code. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxReadEntryIntInfoVmcs(PVMXTRANSIENT pVmxTransient) +{ + int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &pVmxTransient->uEntryIntInfo); + AssertRCReturn(rc, rc); + return VINF_SUCCESS; +} + + +/** + * Reads the VM-entry exception error code field from the VMCS into + * the VMX transient structure. + * + * @returns VBox status code. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxReadEntryXcptErrorCodeVmcs(PVMXTRANSIENT pVmxTransient) +{ + int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &pVmxTransient->uEntryXcptErrorCode); + AssertRCReturn(rc, rc); + return VINF_SUCCESS; +} + + +/** + * Reads the VM-entry exception error code field from the VMCS into + * the VMX transient structure. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxReadEntryInstrLenVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) +{ + int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &pVmxTransient->cbEntryInstr); + AssertRCReturn(rc, rc); + return VINF_SUCCESS; +} + + +/** + * Reads the VM-exit interruption-information field from the VMCS into the VMX + * transient structure. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pVmxTransient Pointer to the VMX transient structure. + */ +DECLINLINE(int) hmR0VmxReadExitIntInfoVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) +{ + if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO)) + { + int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &pVmxTransient->uExitIntInfo); + AssertRCReturn(rc, rc); + pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO; + } + return VINF_SUCCESS; +} + + +/** + * Reads the VM-exit interruption error code from the VMCS into the VMX + * transient structure. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pVmxTransient Pointer to the VMX transient structure. + */ +DECLINLINE(int) hmR0VmxReadExitIntErrorCodeVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) +{ + if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE)) + { + int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE, &pVmxTransient->uExitIntErrorCode); + AssertRCReturn(rc, rc); + pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE; + } + return VINF_SUCCESS; +} + + +/** + * Reads the VM-exit instruction length field from the VMCS into the VMX + * transient structure. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pVmxTransient Pointer to the VMX transient structure. + */ +DECLINLINE(int) hmR0VmxReadExitInstrLenVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) +{ + if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN)) + { + int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &pVmxTransient->cbInstr); + AssertRCReturn(rc, rc); + pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN; + } + return VINF_SUCCESS; +} + + +/** + * Reads the VM-exit instruction-information field from the VMCS into + * the VMX transient structure. + * + * @returns VBox status code. + * @param pVCpu The cross context per CPU structure. + * @param pVmxTransient Pointer to the VMX transient structure. + */ +DECLINLINE(int) hmR0VmxReadExitInstrInfoVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) +{ + if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_INFO)) + { + int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_INFO, &pVmxTransient->ExitInstrInfo.u); + AssertRCReturn(rc, rc); + pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_INFO; + } + return VINF_SUCCESS; +} + + +/** + * Reads the exit qualification from the VMCS into the VMX transient structure. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pVmxTransient Pointer to the VMX transient structure. + */ +DECLINLINE(int) hmR0VmxReadExitQualificationVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) +{ + if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION)) + { + int rc = VMXReadVmcsGstN(VMX_VMCS_RO_EXIT_QUALIFICATION, &pVmxTransient->uExitQualification); + AssertRCReturn(rc, rc); + pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION; + } + return VINF_SUCCESS; +} + + +/** + * Reads the IDT-vectoring information field from the VMCS into the VMX + * transient structure. + * + * @returns VBox status code. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxReadIdtVectoringInfoVmcs(PVMXTRANSIENT pVmxTransient) +{ + if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO)) + { + int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_INFO, &pVmxTransient->uIdtVectoringInfo); + AssertRCReturn(rc, rc); + pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO; + } + return VINF_SUCCESS; +} + + +/** + * Reads the IDT-vectoring error code from the VMCS into the VMX + * transient structure. + * + * @returns VBox status code. + * @param pVmxTransient Pointer to the VMX transient structure. + */ +DECLINLINE(int) hmR0VmxReadIdtVectoringErrorCodeVmcs(PVMXTRANSIENT pVmxTransient) +{ + if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE)) + { + int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_ERROR_CODE, &pVmxTransient->uIdtVectoringErrorCode); + AssertRCReturn(rc, rc); + pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE; + } + return VINF_SUCCESS; +} + + +/** + * Enters VMX root mode operation on the current CPU. + * + * @returns VBox status code. + * @param pVM Pointer to the VM (optional, can be NULL, after + * a resume). + * @param HCPhysCpuPage Physical address of the VMXON region. + * @param pvCpuPage Pointer to the VMXON region. + */ +static int hmR0VmxEnterRootMode(PVM pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage) +{ + AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER); + AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + if (pVM) + { + /* Write the VMCS revision dword to the VMXON region. */ + *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.Msrs.u64BasicInfo); + } + + /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */ + RTCCUINTREG uEflags = ASMIntDisableFlags(); + + /* Enable the VMX bit in CR4 if necessary. */ + RTCCUINTREG uCr4 = ASMGetCR4(); + if (!(uCr4 & X86_CR4_VMXE)) + ASMSetCR4(uCr4 | X86_CR4_VMXE); + + /* Enter VMX root mode. */ + int rc = VMXEnable(HCPhysCpuPage); + if (RT_FAILURE(rc)) + ASMSetCR4(uCr4); + + /* Restore interrupts. */ + ASMSetFlags(uEflags); + return rc; +} + + +/** + * Exits VMX root mode operation on the current CPU. + * + * @returns VBox status code. + */ +static int hmR0VmxLeaveRootMode(void) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */ + RTCCUINTREG uEflags = ASMIntDisableFlags(); + + /* If we're for some reason not in VMX root mode, then don't leave it. */ + RTCCUINTREG uHostCR4 = ASMGetCR4(); + + int rc; + if (uHostCR4 & X86_CR4_VMXE) + { + /* Exit VMX root mode and clear the VMX bit in CR4. */ + VMXDisable(); + ASMSetCR4(uHostCR4 & ~X86_CR4_VMXE); + rc = VINF_SUCCESS; + } + else + rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE; + + /* Restore interrupts. */ + ASMSetFlags(uEflags); + return rc; +} + + +/** + * Allocates and maps one physically contiguous page. The allocated page is + * zero'd out. (Used by various VT-x structures). + * + * @returns IPRT status code. + * @param pMemObj Pointer to the ring-0 memory object. + * @param ppVirt Where to store the virtual address of the + * allocation. + * @param pPhys Where to store the physical address of the + * allocation. + */ +DECLINLINE(int) hmR0VmxPageAllocZ(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys) +{ + AssertPtrReturn(pMemObj, VERR_INVALID_PARAMETER); + AssertPtrReturn(ppVirt, VERR_INVALID_PARAMETER); + AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER); + + int rc = RTR0MemObjAllocCont(pMemObj, PAGE_SIZE, false /* fExecutable */); + if (RT_FAILURE(rc)) + return rc; + *ppVirt = RTR0MemObjAddress(*pMemObj); + *pHCPhys = RTR0MemObjGetPagePhysAddr(*pMemObj, 0 /* iPage */); + ASMMemZero32(*ppVirt, PAGE_SIZE); + return VINF_SUCCESS; +} + + +/** + * Frees and unmaps an allocated physical page. + * + * @param pMemObj Pointer to the ring-0 memory object. + * @param ppVirt Where to re-initialize the virtual address of + * allocation as 0. + * @param pHCPhys Where to re-initialize the physical address of the + * allocation as 0. + */ +DECLINLINE(void) hmR0VmxPageFree(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys) +{ + AssertPtr(pMemObj); + AssertPtr(ppVirt); + AssertPtr(pHCPhys); + if (*pMemObj != NIL_RTR0MEMOBJ) + { + int rc = RTR0MemObjFree(*pMemObj, true /* fFreeMappings */); + AssertRC(rc); + *pMemObj = NIL_RTR0MEMOBJ; + *ppVirt = 0; + *pHCPhys = 0; + } +} + + +/** + * Worker function to free VT-x related structures. + * + * @returns IPRT status code. + * @param pVM Pointer to the VM. + */ +static void hmR0VmxStructsFree(PVM pVM) +{ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + AssertPtr(pVCpu); + +#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr); + hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr); +#endif + + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS) + hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap, &pVCpu->hm.s.vmx.HCPhysMsrBitmap); + + hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjVirtApic, (PRTR0PTR)&pVCpu->hm.s.vmx.pbVirtApic, &pVCpu->hm.s.vmx.HCPhysVirtApic); + hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs); + } + + hmR0VmxPageFree(&pVM->hm.s.vmx.hMemObjApicAccess, (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess, &pVM->hm.s.vmx.HCPhysApicAccess); +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + hmR0VmxPageFree(&pVM->hm.s.vmx.hMemObjScratch, &pVM->hm.s.vmx.pbScratch, &pVM->hm.s.vmx.HCPhysScratch); +#endif +} + + +/** + * Worker function to allocate VT-x related VM structures. + * + * @returns IPRT status code. + * @param pVM Pointer to the VM. + */ +static int hmR0VmxStructsAlloc(PVM pVM) +{ + /* + * Initialize members up-front so we can cleanup properly on allocation failure. + */ +#define VMXLOCAL_INIT_VM_MEMOBJ(a_Name, a_VirtPrefix) \ + pVM->hm.s.vmx.hMemObj##a_Name = NIL_RTR0MEMOBJ; \ + pVM->hm.s.vmx.a_VirtPrefix##a_Name = 0; \ + pVM->hm.s.vmx.HCPhys##a_Name = 0; + +#define VMXLOCAL_INIT_VMCPU_MEMOBJ(a_Name, a_VirtPrefix) \ + pVCpu->hm.s.vmx.hMemObj##a_Name = NIL_RTR0MEMOBJ; \ + pVCpu->hm.s.vmx.a_VirtPrefix##a_Name = 0; \ + pVCpu->hm.s.vmx.HCPhys##a_Name = 0; + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + VMXLOCAL_INIT_VM_MEMOBJ(Scratch, pv); +#endif + VMXLOCAL_INIT_VM_MEMOBJ(ApicAccess, pb); + + AssertCompile(sizeof(VMCPUID) == sizeof(pVM->cCpus)); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + VMXLOCAL_INIT_VMCPU_MEMOBJ(Vmcs, pv); + VMXLOCAL_INIT_VMCPU_MEMOBJ(VirtApic, pb); + VMXLOCAL_INIT_VMCPU_MEMOBJ(MsrBitmap, pv); +#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + VMXLOCAL_INIT_VMCPU_MEMOBJ(GuestMsr, pv); + VMXLOCAL_INIT_VMCPU_MEMOBJ(HostMsr, pv); +#endif + } +#undef VMXLOCAL_INIT_VMCPU_MEMOBJ +#undef VMXLOCAL_INIT_VM_MEMOBJ + + /* The VMCS size cannot be more than 4096 bytes. See Intel spec. Appendix A.1 "Basic VMX Information". */ + AssertReturnStmt(MSR_IA32_VMX_BASIC_INFO_VMCS_SIZE(pVM->hm.s.vmx.Msrs.u64BasicInfo) <= PAGE_SIZE, + (&pVM->aCpus[0])->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE, + VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO); + + /* + * Allocate all the VT-x structures. + */ + int rc = VINF_SUCCESS; +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + rc = hmR0VmxPageAllocZ(&pVM->hm.s.vmx.hMemObjScratch, &pVM->hm.s.vmx.pbScratch, &pVM->hm.s.vmx.HCPhysScratch); + if (RT_FAILURE(rc)) + goto cleanup; + strcpy((char *)pVM->hm.s.vmx.pbScratch, "SCRATCH Magic"); + *(uint64_t *)(pVM->hm.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef); +#endif + + /* Allocate the APIC-access page for trapping APIC accesses from the guest. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) + { + rc = hmR0VmxPageAllocZ(&pVM->hm.s.vmx.hMemObjApicAccess, (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess, + &pVM->hm.s.vmx.HCPhysApicAccess); + if (RT_FAILURE(rc)) + goto cleanup; + } + + /* + * Initialize per-VCPU VT-x structures. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + AssertPtr(pVCpu); + + /* Allocate the VM control structure (VMCS). */ + rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs); + if (RT_FAILURE(rc)) + goto cleanup; + + /* Allocate the Virtual-APIC page for transparent TPR accesses. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW) + { + rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjVirtApic, (PRTR0PTR)&pVCpu->hm.s.vmx.pbVirtApic, + &pVCpu->hm.s.vmx.HCPhysVirtApic); + if (RT_FAILURE(rc)) + goto cleanup; + } + + /* Allocate the MSR-bitmap if supported by the CPU. The MSR-bitmap is for transparent accesses of specific MSRs. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS) + { + rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap, + &pVCpu->hm.s.vmx.HCPhysMsrBitmap); + if (RT_FAILURE(rc)) + goto cleanup; + memset(pVCpu->hm.s.vmx.pvMsrBitmap, 0xff, PAGE_SIZE); + } + +#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + /* Allocate the VM-entry MSR-load and VM-exit MSR-store page for the guest MSRs. */ + rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr); + if (RT_FAILURE(rc)) + goto cleanup; + + /* Allocate the VM-exit MSR-load page for the host MSRs. */ + rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr); + if (RT_FAILURE(rc)) + goto cleanup; +#endif + } + + return VINF_SUCCESS; + +cleanup: + hmR0VmxStructsFree(pVM); + return rc; +} + + +/** + * Does global VT-x initialization (called during module initialization). + * + * @returns VBox status code. + */ +VMMR0DECL(int) VMXR0GlobalInit(void) +{ +#ifdef HMVMX_USE_FUNCTION_TABLE + AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_apfnVMExitHandlers)); +# ifdef VBOX_STRICT + for (unsigned i = 0; i < RT_ELEMENTS(g_apfnVMExitHandlers); i++) + Assert(g_apfnVMExitHandlers[i]); +# endif +#endif + return VINF_SUCCESS; +} + + +/** + * Does global VT-x termination (called during module termination). + */ +VMMR0DECL(void) VMXR0GlobalTerm() +{ + /* Nothing to do currently. */ +} + + +/** + * Sets up and activates VT-x on the current CPU. + * + * @returns VBox status code. + * @param pCpu Pointer to the global CPU info struct. + * @param pVM Pointer to the VM (can be NULL after a host resume + * operation). + * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a + * fEnabledByHost is true). + * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if + * @a fEnabledByHost is true). + * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to + * enable VT-x on the host. + * @param pvMsrs Opaque pointer to VMXMSRS struct. + */ +VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBALCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost, + void *pvMsrs) +{ + AssertReturn(pCpu, VERR_INVALID_PARAMETER); + AssertReturn(pvMsrs, VERR_INVALID_PARAMETER); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + /* Enable VT-x if it's not already enabled by the host. */ + if (!fEnabledByHost) + { + int rc = hmR0VmxEnterRootMode(pVM, HCPhysCpuPage, pvCpuPage); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been using EPTPs) so + * we don't retain any stale guest-physical mappings which won't get invalidated when flushing by VPID. + */ + PVMXMSRS pMsrs = (PVMXMSRS)pvMsrs; + if (pMsrs->u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS) + { + hmR0VmxFlushEpt(NULL /* pVCpu */, VMX_FLUSH_EPT_ALL_CONTEXTS); + pCpu->fFlushAsidBeforeUse = false; + } + else + pCpu->fFlushAsidBeforeUse = true; + + /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */ + ++pCpu->cTlbFlushes; + + return VINF_SUCCESS; +} + + +/** + * Deactivates VT-x on the current CPU. + * + * @returns VBox status code. + * @param pCpu Pointer to the global CPU info struct. + * @param pvCpuPage Pointer to the VMXON region. + * @param HCPhysCpuPage Physical address of the VMXON region. + * + * @remarks This function should never be called when SUPR0EnableVTx() or + * similar was used to enable VT-x on the host. + */ +VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBALCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage) +{ + NOREF(pCpu); + NOREF(pvCpuPage); + NOREF(HCPhysCpuPage); + + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + return hmR0VmxLeaveRootMode(); +} + + +/** + * Sets the permission bits for the specified MSR in the MSR bitmap. + * + * @param pVCpu Pointer to the VMCPU. + * @param uMSR The MSR value. + * @param enmRead Whether reading this MSR causes a VM-exit. + * @param enmWrite Whether writing this MSR causes a VM-exit. + */ +static void hmR0VmxSetMsrPermission(PVMCPU pVCpu, uint32_t uMsr, VMXMSREXITREAD enmRead, VMXMSREXITWRITE enmWrite) +{ + int32_t iBit; + uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.vmx.pvMsrBitmap; + + /* + * Layout: + * 0x000 - 0x3ff - Low MSR read bits + * 0x400 - 0x7ff - High MSR read bits + * 0x800 - 0xbff - Low MSR write bits + * 0xc00 - 0xfff - High MSR write bits + */ + if (uMsr <= 0x00001FFF) + iBit = uMsr; + else if ( uMsr >= 0xC0000000 + && uMsr <= 0xC0001FFF) + { + iBit = (uMsr - 0xC0000000); + pbMsrBitmap += 0x400; + } + else + { + AssertMsgFailed(("hmR0VmxSetMsrPermission: Invalid MSR %#RX32\n", uMsr)); + return; + } + + Assert(iBit <= 0x1fff); + if (enmRead == VMXMSREXIT_INTERCEPT_READ) + ASMBitSet(pbMsrBitmap, iBit); + else + ASMBitClear(pbMsrBitmap, iBit); + + if (enmWrite == VMXMSREXIT_INTERCEPT_WRITE) + ASMBitSet(pbMsrBitmap + 0x800, iBit); + else + ASMBitClear(pbMsrBitmap + 0x800, iBit); +} + + +/** + * Flushes the TLB using EPT. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a + * enmFlush). + * @param enmFlush Type of flush. + * + * @remarks Caller is responsible for making sure this function is called only + * when NestedPaging is supported and providing @a enmFlush that is + * supported by the CPU. + * @remarks Can be called with interrupts disabled. + */ +static void hmR0VmxFlushEpt(PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush) +{ + uint64_t au64Descriptor[2]; + if (enmFlush == VMX_FLUSH_EPT_ALL_CONTEXTS) + au64Descriptor[0] = 0; + else + { + Assert(pVCpu); + au64Descriptor[0] = pVCpu->hm.s.vmx.HCPhysEPTP; + } + au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */ + + int rc = VMXR0InvEPT(enmFlush, &au64Descriptor[0]); + AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %RGv failed with %Rrc\n", enmFlush, pVCpu ? pVCpu->hm.s.vmx.HCPhysEPTP : 0, + rc)); + if ( RT_SUCCESS(rc) + && pVCpu) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging); + } +} + + +/** + * Flushes the TLB using VPID. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a + * enmFlush). + * @param enmFlush Type of flush. + * @param GCPtr Virtual address of the page to flush (can be 0 depending + * on @a enmFlush). + * + * @remarks Can be called with interrupts disabled. + */ +static void hmR0VmxFlushVpid(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr) +{ + AssertPtr(pVM); + Assert(pVM->hm.s.vmx.fVpid); + + uint64_t au64Descriptor[2]; + if (enmFlush == VMX_FLUSH_VPID_ALL_CONTEXTS) + { + au64Descriptor[0] = 0; + au64Descriptor[1] = 0; + } + else + { + AssertPtr(pVCpu); + AssertMsg(pVCpu->hm.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid)); + AssertMsg(pVCpu->hm.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid)); + au64Descriptor[0] = pVCpu->hm.s.uCurrentAsid; + au64Descriptor[1] = GCPtr; + } + + int rc = VMXR0InvVPID(enmFlush, &au64Descriptor[0]); NOREF(rc); + AssertMsg(rc == VINF_SUCCESS, + ("VMXR0InvVPID %#x %u %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hm.s.uCurrentAsid : 0, GCPtr, rc)); + if ( RT_SUCCESS(rc) + && pVCpu) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid); + } +} + + +/** + * Invalidates a guest page by guest virtual address. Only relevant for + * EPT/VPID, otherwise there is nothing really to invalidate. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param GCVirt Guest virtual address of the page to invalidate. + */ +VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + LogFlowFunc(("pVM=%p pVCpu=%p GCVirt=%RGv\n", pVM, pVCpu, GCVirt)); + + bool fFlushPending = VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_FLUSH); + if (!fFlushPending) + { + /* + * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case + * See @bugref{6043} and @bugref{6177}. + * + * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*() as this + * function maybe called in a loop with individual addresses. + */ + if (pVM->hm.s.vmx.fVpid) + { + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR) + { + hmR0VmxFlushVpid(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, GCVirt); + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt); + } + else + VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); + } + else if (pVM->hm.s.fNestedPaging) + VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); + } + + return VINF_SUCCESS; +} + + +/** + * Invalidates a guest page by physical address. Only relevant for EPT/VPID, + * otherwise there is nothing really to invalidate. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param GCPhys Guest physical address of the page to invalidate. + */ +VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys) +{ + LogFlowFunc(("%RGp\n", GCPhys)); + + /* + * We cannot flush a page by guest-physical address. invvpid takes only a linear address while invept only flushes + * by EPT not individual addresses. We update the force flag here and flush before the next VM-entry in hmR0VmxFlushTLB*(). + * This function might be called in a loop. This should cause a flush-by-EPT if EPT is in use. See @bugref{6568}. + */ + VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgPhys); + return VINF_SUCCESS; +} + + +/** + * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the + * case where neither EPT nor VPID is supported by the CPU. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCpu Pointer to the global HM struct. + * + * @remarks Called with interrupts disabled. + */ +static void hmR0VmxFlushTaggedTlbNone(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu) +{ + AssertPtr(pVCpu); + AssertPtr(pCpu); + NOREF(pVM); + + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH); + + /** @todo TLB shootdown is currently not used. See hmQueueInvlPage(). */ +#if 0 + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN); + pVCpu->hm.s.TlbShootdown.cPages = 0; +#endif + + pVCpu->hm.s.idLastCpu = pCpu->idCpu; + pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes; + pVCpu->hm.s.fForceTLBFlush = false; + return; +} + + +/** + * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCpu Pointer to the global HM CPU struct. + * @remarks All references to "ASID" in this function pertains to "VPID" in + * Intel's nomenclature. The reason is, to avoid confusion in compare + * statements since the host-CPU copies are named "ASID". + * + * @remarks Called with interrupts disabled. + */ +static void hmR0VmxFlushTaggedTlbBoth(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu) +{ +#ifdef VBOX_WITH_STATISTICS + bool fTlbFlushed = false; +# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0) +# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \ + if (!fTlbFlushed) \ + STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \ + } while (0) +#else +# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0) +# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0) +#endif + + AssertPtr(pVM); + AssertPtr(pCpu); + AssertPtr(pVCpu); + AssertMsg(pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid, + ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled." + "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hm.s.fNestedPaging, pVM->hm.s.vmx.fVpid)); + + + /* + * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last. + * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB + * or the host CPU is online after a suspend/resume, so we cannot reuse the current ASID anymore. + */ + if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu + || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes) + { + ++pCpu->uCurrentAsid; + if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid) + { + pCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */ + pCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */ + pCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */ + } + + pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid; + pVCpu->hm.s.idLastCpu = pCpu->idCpu; + pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes; + + /* + * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also + * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}. + */ + hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt); + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch); + HMVMX_SET_TAGGED_TLB_FLUSHED(); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH); /* Already flushed-by-EPT, skip doing it again below. */ + } + + /* Check for explicit TLB shootdowns. */ + if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) + { + /* + * Changes to the EPT paging structure by VMM requires flushing by EPT as the CPU creates + * guest-physical (only EPT-tagged) mappings while traversing the EPT tables when EPT is in use. + * Flushing by VPID will only flush linear (only VPID-tagged) and combined (EPT+VPID tagged) mappings + * but not guest-physical mappings. + * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information". See @bugref{6568}. + */ + hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt); + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb); + HMVMX_SET_TAGGED_TLB_FLUSHED(); + } + + /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere. See hmQueueInvlPage() + * where it is commented out. Support individual entry flushing + * someday. */ +#if 0 + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN)) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown); + + /* + * Flush individual guest entries using VPID from the TLB or as little as possible with EPT + * as supported by the CPU. + */ + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR) + { + for (uint32_t i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++) + hmR0VmxFlushVpid(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]); + } + else + hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt); + + HMVMX_SET_TAGGED_TLB_FLUSHED(); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN); + pVCpu->hm.s.TlbShootdown.cPages = 0; + } +#endif + + pVCpu->hm.s.fForceTLBFlush = false; + + HMVMX_UPDATE_FLUSH_SKIPPED_STAT(); + + Assert(pVCpu->hm.s.idLastCpu == pCpu->idCpu); + Assert(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes); + AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes, + ("Flush count mismatch for cpu %d (%u vs %u)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes)); + AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid, + ("cpu%d uCurrentAsid = %u\n", pCpu->idCpu, pCpu->uCurrentAsid)); + AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid, + ("cpu%d VM uCurrentAsid = %u\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid)); + + /* Update VMCS with the VPID. */ + int rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid); + AssertRC(rc); + +#undef HMVMX_SET_TAGGED_TLB_FLUSHED +} + + +/** + * Flushes the tagged-TLB entries for EPT CPUs as necessary. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCpu Pointer to the global HM CPU struct. + * + * @remarks Called with interrupts disabled. + */ +static void hmR0VmxFlushTaggedTlbEpt(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + AssertPtr(pCpu); + AssertMsg(pVM->hm.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with NestedPaging disabled.")); + AssertMsg(!pVM->hm.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID enabled.")); + + /* + * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last. + * A change in the TLB flush count implies the host CPU is online after a suspend/resume. + */ + if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu + || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes) + { + pVCpu->hm.s.fForceTLBFlush = true; + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch); + } + + /* Check for explicit TLB shootdown flushes. */ + if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) + { + pVCpu->hm.s.fForceTLBFlush = true; + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb); + } + + pVCpu->hm.s.idLastCpu = pCpu->idCpu; + pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes; + + if (pVCpu->hm.s.fForceTLBFlush) + { + hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt); + pVCpu->hm.s.fForceTLBFlush = false; + } + /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere. See hmQueueInvlPage() + * where it is commented out. Support individual entry flushing + * someday. */ +#if 0 + else + { + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN)) + { + /* We cannot flush individual entries without VPID support. Flush using EPT. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown); + hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt); + } + else + STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); + + pVCpu->hm.s.TlbShootdown.cPages = 0; + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN); + } +#endif +} + + +/** + * Flushes the tagged-TLB entries for VPID CPUs as necessary. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCpu Pointer to the global HM CPU struct. + * + * @remarks Called with interrupts disabled. + */ +static void hmR0VmxFlushTaggedTlbVpid(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + AssertPtr(pCpu); + AssertMsg(pVM->hm.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked with VPID disabled.")); + AssertMsg(!pVM->hm.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging enabled")); + + /* + * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last. + * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB + * or the host CPU is online after a suspend/resume, so we cannot reuse the current ASID anymore. + */ + if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu + || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes) + { + pVCpu->hm.s.fForceTLBFlush = true; + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch); + } + + /* Check for explicit TLB shootdown flushes. */ + if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) + { + /* + * If we ever support VPID flush combinations other than ALL or SINGLE-context (see hmR0VmxSetupTaggedTlb()) + * we would need to explicitly flush in this case (add an fExplicitFlush = true here and change the + * pCpu->fFlushAsidBeforeUse check below to include fExplicitFlush's too) - an obscure corner case. + */ + pVCpu->hm.s.fForceTLBFlush = true; + STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb); + } + + pVCpu->hm.s.idLastCpu = pCpu->idCpu; + if (pVCpu->hm.s.fForceTLBFlush) + { + ++pCpu->uCurrentAsid; + if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid) + { + pCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */ + pCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */ + pCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */ + } + + pVCpu->hm.s.fForceTLBFlush = false; + pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes; + pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid; + if (pCpu->fFlushAsidBeforeUse) + hmR0VmxFlushVpid(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */); + } + /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere. See hmQueueInvlPage() + * where it is commented out. Support individual entry flushing + * someday. */ +#if 0 + else + { + AssertMsg(pVCpu->hm.s.uCurrentAsid && pCpu->uCurrentAsid, + ("hm->uCurrentAsid=%lu hm->cTlbFlushes=%lu cpu->uCurrentAsid=%lu cpu->cTlbFlushes=%lu\n", + pVCpu->hm.s.uCurrentAsid, pVCpu->hm.s.cTlbFlushes, + pCpu->uCurrentAsid, pCpu->cTlbFlushes)); + + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN)) + { + /* Flush individual guest entries using VPID or as little as possible with EPT as supported by the CPU. */ + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR) + { + for (uint32_t i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++) + hmR0VmxFlushVpid(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]); + } + else + hmR0VmxFlushVpid(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */); + + pVCpu->hm.s.TlbShootdown.cPages = 0; + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN); + } + else + STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); + } +#endif + + AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes, + ("Flush count mismatch for cpu %d (%u vs %u)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes)); + AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid, + ("cpu%d uCurrentAsid = %u\n", pCpu->idCpu, pCpu->uCurrentAsid)); + AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid, + ("cpu%d VM uCurrentAsid = %u\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid)); + + int rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid); + AssertRC(rc); +} + + +/** + * Flushes the guest TLB entry based on CPU capabilities. + * + * @param pVCpu Pointer to the VMCPU. + * @param pCpu Pointer to the global HM CPU struct. + */ +DECLINLINE(void) hmR0VmxFlushTaggedTlb(PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu) +{ + PVM pVM = pVCpu->CTX_SUFF(pVM); + switch (pVM->hm.s.vmx.uFlushTaggedTlb) + { + case HMVMX_FLUSH_TAGGED_TLB_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pVM, pVCpu, pCpu); break; + case HMVMX_FLUSH_TAGGED_TLB_EPT: hmR0VmxFlushTaggedTlbEpt(pVM, pVCpu, pCpu); break; + case HMVMX_FLUSH_TAGGED_TLB_VPID: hmR0VmxFlushTaggedTlbVpid(pVM, pVCpu, pCpu); break; + case HMVMX_FLUSH_TAGGED_TLB_NONE: hmR0VmxFlushTaggedTlbNone(pVM, pVCpu, pCpu); break; + default: + AssertMsgFailed(("Invalid flush-tag function identifier\n")); + break; + } +} + + +/** + * Sets up the appropriate tagged TLB-flush level and handler for flushing guest + * TLB entries from the host TLB before VM-entry. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + */ +static int hmR0VmxSetupTaggedTlb(PVM pVM) +{ + /* + * Determine optimal flush type for Nested Paging. + * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup unrestricted + * guest execution (see hmR3InitFinalizeR0()). + */ + if (pVM->hm.s.fNestedPaging) + { + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT) + { + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT) + pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_SINGLE_CONTEXT; + else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS) + pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_ALL_CONTEXTS; + else + { + /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */ + pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + /* Make sure the write-back cacheable memory type for EPT is supported. */ + if (!(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_EMT_WB)) + { + LogRel(("hmR0VmxSetupTaggedTlb: Unsupported EPTP memory type %#x.\n", pVM->hm.s.vmx.Msrs.u64EptVpidCaps)); + pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + } + else + { + /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */ + pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + } + + /* + * Determine optimal flush type for VPID. + */ + if (pVM->hm.s.vmx.fVpid) + { + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID) + { + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT) + pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_SINGLE_CONTEXT; + else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS) + pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_ALL_CONTEXTS; + else + { + /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */ + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR) + LogRel(("hmR0VmxSetupTaggedTlb: Only INDIV_ADDR supported. Ignoring VPID.\n")); + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS) + LogRel(("hmR0VmxSetupTaggedTlb: Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n")); + pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED; + pVM->hm.s.vmx.fVpid = false; + } + } + else + { + /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */ + Log4(("hmR0VmxSetupTaggedTlb: VPID supported without INVEPT support. Ignoring VPID.\n")); + pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED; + pVM->hm.s.vmx.fVpid = false; + } + } + + /* + * Setup the handler for flushing tagged-TLBs. + */ + if (pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid) + pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_EPT_VPID; + else if (pVM->hm.s.fNestedPaging) + pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_EPT; + else if (pVM->hm.s.vmx.fVpid) + pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_VPID; + else + pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_NONE; + return VINF_SUCCESS; +} + + +/** + * Sets up pin-based VM-execution controls in the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +static int hmR0VmxSetupPinCtls(PVM pVM, PVMCPU pVCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + + uint32_t val = pVM->hm.s.vmx.Msrs.VmxPinCtls.n.disallowed0; /* Bits set here must always be set. */ + uint32_t zap = pVM->hm.s.vmx.Msrs.VmxPinCtls.n.allowed1; /* Bits cleared here must always be cleared. */ + + val |= VMX_VMCS_CTRL_PIN_EXEC_EXT_INT_EXIT /* External interrupts causes a VM-exits. */ + | VMX_VMCS_CTRL_PIN_EXEC_NMI_EXIT; /* Non-maskable interrupts causes a VM-exit. */ + Assert(!(val & VMX_VMCS_CTRL_PIN_EXEC_VIRTUAL_NMI)); + + /* Enable the VMX preemption timer. */ + if (pVM->hm.s.vmx.fUsePreemptTimer) + { + Assert(pVM->hm.s.vmx.Msrs.VmxPinCtls.n.allowed1 & VMX_VMCS_CTRL_PIN_EXEC_PREEMPT_TIMER); + val |= VMX_VMCS_CTRL_PIN_EXEC_PREEMPT_TIMER; + } + + if ((val & zap) != val) + { + LogRel(("hmR0VmxSetupPinCtls: invalid pin-based VM-execution controls combo! cpu=%#RX64 val=%#RX64 zap=%#RX64\n", + pVM->hm.s.vmx.Msrs.VmxPinCtls.n.disallowed0, val, zap)); + pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, val); + AssertRCReturn(rc, rc); + + /* Update VCPU with the currently set pin-based VM-execution controls. */ + pVCpu->hm.s.vmx.u32PinCtls = val; + return rc; +} + + +/** + * Sets up processor-based VM-execution controls in the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVMCPU Pointer to the VMCPU. + */ +static int hmR0VmxSetupProcCtls(PVM pVM, PVMCPU pVCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + + int rc = VERR_INTERNAL_ERROR_5; + uint32_t val = pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0; /* Bits set here must be set in the VMCS. */ + uint32_t zap = pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */ + + val |= VMX_VMCS_CTRL_PROC_EXEC_HLT_EXIT /* HLT causes a VM-exit. */ + | VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING /* Use TSC-offsetting. */ + | VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */ + | VMX_VMCS_CTRL_PROC_EXEC_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */ + | VMX_VMCS_CTRL_PROC_EXEC_RDPMC_EXIT /* RDPMC causes a VM-exit. */ + | VMX_VMCS_CTRL_PROC_EXEC_MONITOR_EXIT /* MONITOR causes a VM-exit. */ + | VMX_VMCS_CTRL_PROC_EXEC_MWAIT_EXIT; /* MWAIT causes a VM-exit. */ + + /* We toggle VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */ + if ( !(pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT) + || (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0 & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT)) + { + LogRel(("hmR0VmxSetupProcCtls: unsupported VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT combo!")); + pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + /* Without Nested Paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */ + if (!pVM->hm.s.fNestedPaging) + { + Assert(!pVM->hm.s.vmx.fUnrestrictedGuest); /* Paranoia. */ + val |= VMX_VMCS_CTRL_PROC_EXEC_INVLPG_EXIT + | VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT + | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT; + } + + /* Use TPR shadowing if supported by the CPU. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW) + { + Assert(pVCpu->hm.s.vmx.HCPhysVirtApic); + Assert(!(pVCpu->hm.s.vmx.HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */ + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, 0); + rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hm.s.vmx.HCPhysVirtApic); + AssertRCReturn(rc, rc); + + val |= VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */ + /* CR8 writes causes a VM-exit based on TPR threshold. */ + Assert(!(val & VMX_VMCS_CTRL_PROC_EXEC_CR8_STORE_EXIT)); + Assert(!(val & VMX_VMCS_CTRL_PROC_EXEC_CR8_LOAD_EXIT)); + } + else + { + /* + * Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is invalid on 32-bit Intel CPUs. + * Set this control only for 64-bit guests. + */ + if (pVM->hm.s.fAllow64BitGuests) + { + val |= VMX_VMCS_CTRL_PROC_EXEC_CR8_STORE_EXIT /* CR8 reads causes a VM-exit. */ + | VMX_VMCS_CTRL_PROC_EXEC_CR8_LOAD_EXIT; /* CR8 writes causes a VM-exit. */ + } + } + + /* Use MSR-bitmaps if supported by the CPU. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS) + { + val |= VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS; + + Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap); + Assert(!(pVCpu->hm.s.vmx.HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */ + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, pVCpu->hm.s.vmx.HCPhysMsrBitmap); + AssertRCReturn(rc, rc); + + /* + * The guest can access the following MSRs (read, write) without causing VM-exits; they are loaded/stored + * automatically (either as part of the MSR-load/store areas or dedicated fields in the VMCS). + */ + hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_CS, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_ESP, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_EIP, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_K8_LSTAR, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_K6_STAR, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_K8_SF_MASK, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_K8_GS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + hmR0VmxSetMsrPermission(pVCpu, MSR_K8_FS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + } + + /* Use the secondary processor-based VM-execution controls if supported by the CPU. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL) + val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL; + + if ((val & zap) != val) + { + LogRel(("hmR0VmxSetupProcCtls: invalid processor-based VM-execution controls combo! cpu=%#RX64 val=%#RX64 zap=%#RX64\n", + pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0, val, zap)); + pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, val); + AssertRCReturn(rc, rc); + + /* Update VCPU with the currently set processor-based VM-execution controls. */ + pVCpu->hm.s.vmx.u32ProcCtls = val; + + /* + * Secondary processor-based VM-execution controls. + */ + if (RT_LIKELY(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)) + { + val = pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.disallowed0; /* Bits set here must be set in the VMCS. */ + zap = pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */ + + if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT) + val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT; /* WBINVD causes a VM-exit. */ + + if (pVM->hm.s.fNestedPaging) + val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT; /* Enable EPT. */ + else + { + /* + * Without Nested Paging, INVPCID should cause a VM-exit. Enabling this bit causes the CPU to refer to + * VMX_VMCS_CTRL_PROC_EXEC_INVLPG_EXIT when INVPCID is executed by the guest. + * See Intel spec. 25.4 "Changes to instruction behaviour in VMX non-root operation". + */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_INVPCID) + val |= VMX_VMCS_CTRL_PROC_EXEC2_INVPCID; + } + + if (pVM->hm.s.vmx.fVpid) + val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID; /* Enable VPID. */ + + if (pVM->hm.s.vmx.fUnrestrictedGuest) + val |= VMX_VMCS_CTRL_PROC_EXEC2_UNRESTRICTED_GUEST; /* Enable Unrestricted Execution. */ + + /* Enable Virtual-APIC page accesses if supported by the CPU. This is essentially where the TPR shadow resides. */ + /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be + * done dynamically. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) + { + Assert(pVM->hm.s.vmx.HCPhysApicAccess); + Assert(!(pVM->hm.s.vmx.HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */ + val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC; /* Virtualize APIC accesses. */ + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, pVM->hm.s.vmx.HCPhysApicAccess); + AssertRCReturn(rc, rc); + } + + if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) + { + val |= VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP; /* Enable RDTSCP support. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS) + hmR0VmxSetMsrPermission(pVCpu, MSR_K8_TSC_AUX, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); + } + + if ((val & zap) != val) + { + LogRel(("hmR0VmxSetupProcCtls: invalid secondary processor-based VM-execution controls combo! " + "cpu=%#RX64 val=%#RX64 zap=%#RX64\n", pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.disallowed0, val, zap)); + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, val); + AssertRCReturn(rc, rc); + + /* Update VCPU with the currently set secondary processor-based VM-execution controls. */ + pVCpu->hm.s.vmx.u32ProcCtls2 = val; + } + else if (RT_UNLIKELY(pVM->hm.s.vmx.fUnrestrictedGuest)) + { + LogRel(("hmR0VmxSetupProcCtls: Unrestricted Guest set as true when secondary processor-based VM-execution controls not " + "available\n")); + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + return VINF_SUCCESS; +} + + +/** + * Sets up miscellaneous (everything other than Pin & Processor-based + * VM-execution) control fields in the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +static int hmR0VmxSetupMiscCtls(PVM pVM, PVMCPU pVCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + + int rc = VERR_GENERAL_FAILURE; + + /* All fields are zero-initialized during allocation; but don't remove the commented block below. */ +#if 0 + /* All CR3 accesses cause VM-exits. Later we optimize CR3 accesses (see hmR0VmxLoadGuestCR3AndCR4())*/ + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, 0); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, 0); AssertRCReturn(rc, rc); + + /* + * Set MASK & MATCH to 0. VMX checks if GuestPFErrCode & MASK == MATCH. If equal (in our case it always is) + * and if the X86_XCPT_PF bit in the exception bitmap is set it causes a VM-exit, if clear doesn't cause an exit. + * We thus use the exception bitmap to control it rather than use both. + */ + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, 0); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, 0); AssertRCReturn(rc, rc); + + /** @todo Explore possibility of using IO-bitmaps. */ + /* All IO & IOIO instructions cause VM-exits. */ + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_A_FULL, 0); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_B_FULL, 0); AssertRCReturn(rc, rc); + + /* Initialize the MSR-bitmap area. */ + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, 0); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, 0); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, 0); AssertRCReturn(rc, rc); +#endif + +#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + /* Setup MSR autoloading/storing. */ + Assert(pVCpu->hm.s.vmx.HCPhysGuestMsr); + Assert(!(pVCpu->hm.s.vmx.HCPhysGuestMsr & 0xf)); /* Lower 4 bits MBZ. */ + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr); + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr); + AssertRCReturn(rc, rc); + + Assert(pVCpu->hm.s.vmx.HCPhysHostMsr); + Assert(!(pVCpu->hm.s.vmx.HCPhysHostMsr & 0xf)); /* Lower 4 bits MBZ. */ + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysHostMsr); + AssertRCReturn(rc, rc); +#endif + + /* Set VMCS link pointer. Reserved for future use, must be -1. Intel spec. 24.4 "Guest-State Area". */ + rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, UINT64_C(0xffffffffffffffff)); + AssertRCReturn(rc, rc); + + /* All fields are zero-initialized during allocation; but don't remove the commented block below. */ +#if 0 + /* Setup debug controls */ + rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, 0); /** @todo We don't support IA32_DEBUGCTL MSR. Should we? */ + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, 0); + AssertRCReturn(rc, rc); +#endif + + return rc; +} + + +/** + * Sets up the initial exception bitmap in the VMCS based on static conditions + * (i.e. conditions that cannot ever change after starting the VM). + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +static int hmR0VmxInitXcptBitmap(PVM pVM, PVMCPU pVCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + + LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu)); + + uint32_t u32XcptBitmap = 0; + + /* Without Nested Paging, #PF must cause a VM-exit so we can sync our shadow page tables. */ + if (!pVM->hm.s.fNestedPaging) + u32XcptBitmap |= RT_BIT(X86_XCPT_PF); + + pVCpu->hm.s.vmx.u32XcptBitmap = u32XcptBitmap; + int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap); + AssertRCReturn(rc, rc); + return rc; +} + + +/** + * Sets up the initial guest-state mask. The guest-state mask is consulted + * before reading guest-state fields from the VMCS as VMREADs can be expensive + * for the nested virtualization case (as it would cause a VM-exit). + * + * @param pVCpu Pointer to the VMCPU. + */ +static int hmR0VmxInitUpdatedGuestStateMask(PVMCPU pVCpu) +{ + /* Initially the guest-state is up-to-date as there is nothing in the VMCS. */ + HMVMXCPU_GST_RESET_TO(pVCpu, HMVMX_UPDATED_GUEST_ALL); + return VINF_SUCCESS; +} + + +/** + * Does per-VM VT-x initialization. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + */ +VMMR0DECL(int) VMXR0InitVM(PVM pVM) +{ + LogFlowFunc(("pVM=%p\n", pVM)); + + int rc = hmR0VmxStructsAlloc(pVM); + if (RT_FAILURE(rc)) + { + LogRel(("VMXR0InitVM: hmR0VmxStructsAlloc failed! rc=%Rrc\n", rc)); + return rc; + } + + return VINF_SUCCESS; +} + + +/** + * Does per-VM VT-x termination. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + */ +VMMR0DECL(int) VMXR0TermVM(PVM pVM) +{ + LogFlowFunc(("pVM=%p\n", pVM)); + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + if (pVM->hm.s.vmx.hMemObjScratch != NIL_RTR0MEMOBJ) + ASMMemZero32(pVM->hm.s.vmx.pvScratch, PAGE_SIZE); +#endif + hmR0VmxStructsFree(pVM); + return VINF_SUCCESS; +} + + +/** + * Sets up the VM for execution under VT-x. + * This function is only called once per-VM during initialization. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + */ +VMMR0DECL(int) VMXR0SetupVM(PVM pVM) +{ + AssertPtrReturn(pVM, VERR_INVALID_PARAMETER); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + LogFlowFunc(("pVM=%p\n", pVM)); + + /* + * Without UnrestrictedGuest, pRealModeTSS and pNonPagingModeEPTPageTable *must* always be allocated. + * We no longer support the highly unlikely case of UnrestrictedGuest without pRealModeTSS. See hmR3InitFinalizeR0(). + */ + /* -XXX- change hmR3InitFinalizeR0Intel() to fail if pRealModeTSS alloc fails. */ + if ( !pVM->hm.s.vmx.fUnrestrictedGuest + && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable + || !pVM->hm.s.vmx.pRealModeTSS)) + { + LogRel(("VMXR0SetupVM: invalid real-on-v86 state.\n")); + return VERR_INTERNAL_ERROR; + } + +#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL + /* + * This is for the darwin 32-bit/PAE kernels trying to execute 64-bit guests. We don't bother with + * the 32<->64 switcher in this case. This is a rare, legacy use-case with barely any test coverage. + */ + if ( pVM->hm.s.fAllow64BitGuests + && !HMVMX_IS_64BIT_HOST_MODE()) + { + LogRel(("VMXR0SetupVM: Unsupported guest and host paging mode combination.\n")); + return VERR_PGM_UNSUPPORTED_HOST_PAGING_MODE; + } +#endif + + /* Initialize these always, see hmR3InitFinalizeR0().*/ + pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NONE; + pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NONE; + + /* Setup the tagged-TLB flush handlers. */ + int rc = hmR0VmxSetupTaggedTlb(pVM); + if (RT_FAILURE(rc)) + { + LogRel(("VMXR0SetupVM: hmR0VmxSetupTaggedTlb failed! rc=%Rrc\n", rc)); + return rc; + } + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + AssertPtr(pVCpu); + AssertPtr(pVCpu->hm.s.vmx.pvVmcs); + + /* Log the VCPU pointers, useful for debugging SMP VMs. */ + Log4(("VMXR0SetupVM: pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu)); + + /* Set revision dword at the beginning of the VMCS structure. */ + *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.Msrs.u64BasicInfo); + + /* Initialize our VMCS region in memory, set the VMCS launch state to "clear". */ + rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXClearVmcs failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + + /* Load this VMCS as the current VMCS. */ + rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXActivateVmcs failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + + rc = hmR0VmxSetupPinCtls(pVM, pVCpu); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupPinCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + + rc = hmR0VmxSetupProcCtls(pVM, pVCpu); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupProcCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + + rc = hmR0VmxSetupMiscCtls(pVM, pVCpu); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupMiscCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + + rc = hmR0VmxInitXcptBitmap(pVM, pVCpu); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitXcptBitmap failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + + rc = hmR0VmxInitUpdatedGuestStateMask(pVCpu); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitUpdatedGuestStateMask failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + +#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + rc = hmR0VmxInitVmcsReadCache(pVM, pVCpu); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitVmcsReadCache failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); +#endif + + /* Re-sync the CPU's internal data into our VMCS memory region & reset the launch state to "clear". */ + rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXClearVmcs(2) failed! rc=%Rrc (pVM=%p)\n", rc, pVM), + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc); + + pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR; + + hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc); + } + + return VINF_SUCCESS; +} + + +/** + * Saves the host control registers (CR0, CR3, CR4) into the host-state area in + * the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +DECLINLINE(int) hmR0VmxSaveHostControlRegs(PVM pVM, PVMCPU pVCpu) +{ + RTCCUINTREG uReg = ASMGetCR0(); + int rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR0, uReg); + AssertRCReturn(rc, rc); + +#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL + /* For the darwin 32-bit hybrid kernel, we need the 64-bit CR3 as it uses 64-bit paging. */ + if (HMVMX_IS_64BIT_HOST_MODE()) + { + uint64_t uRegCR3 = HMR0Get64bitCR3(); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_CR3, uRegCR3); + } + else +#endif + { + uReg = ASMGetCR3(); + rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR3, uReg); + } + AssertRCReturn(rc, rc); + + uReg = ASMGetCR4(); + rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR4, uReg); + AssertRCReturn(rc, rc); + return rc; +} + + +#if HC_ARCH_BITS == 64 +/** + * Macro for adjusting host segment selectors to satisfy VT-x's VM-entry + * requirements. See hmR0VmxSaveHostSegmentRegs(). + */ +# define VMXLOCAL_ADJUST_HOST_SEG(seg, selValue) \ + if ((selValue) & (X86_SEL_RPL | X86_SEL_LDT)) \ + { \ + bool fValidSelector = true; \ + if ((selValue) & X86_SEL_LDT) \ + { \ + uint32_t uAttr = ASMGetSegAttr((selValue)); \ + fValidSelector = RT_BOOL(uAttr != ~0U && (uAttr & X86_DESC_P)); \ + } \ + if (fValidSelector) \ + { \ + pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##seg; \ + pVCpu->hm.s.vmx.RestoreHost.uHostSel##seg = (selValue); \ + } \ + (selValue) = 0; \ + } +#endif + + +/** + * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into + * the host-state area in the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +DECLINLINE(int) hmR0VmxSaveHostSegmentRegs(PVM pVM, PVMCPU pVCpu) +{ + int rc = VERR_INTERNAL_ERROR_5; + + /* + * Host DS, ES, FS and GS segment registers. + */ +#if HC_ARCH_BITS == 64 + RTSEL uSelDS = ASMGetDS(); + RTSEL uSelES = ASMGetES(); + RTSEL uSelFS = ASMGetFS(); + RTSEL uSelGS = ASMGetGS(); +#else + RTSEL uSelDS = 0; + RTSEL uSelES = 0; + RTSEL uSelFS = 0; + RTSEL uSelGS = 0; +#endif + + /* Recalculate which host-state bits need to be manually restored. */ + pVCpu->hm.s.vmx.fRestoreHostFlags = 0; + + /* + * Host CS and SS segment registers. + */ +#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL + RTSEL uSelCS; + RTSEL uSelSS; + if (HMVMX_IS_64BIT_HOST_MODE()) + { + uSelCS = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS; + uSelSS = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS; + } + else + { + /* Seems darwin uses the LDT (TI flag is set) in the CS & SS selectors which VT-x doesn't like. */ + uSelCS = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS; + uSelSS = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS; + } +#else + RTSEL uSelCS = ASMGetCS(); + RTSEL uSelSS = ASMGetSS(); +#endif + + /* + * Host TR segment register. + */ + RTSEL uSelTR = ASMGetTR(); + +#if HC_ARCH_BITS == 64 + /* + * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to gain VM-entry and restore them + * before we get preempted. See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers". + */ + VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS); + VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES); + VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS); + VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS); +# undef VMXLOCAL_ADJUST_HOST_SEG +#endif + + /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */ + Assert(!(uSelCS & X86_SEL_RPL)); Assert(!(uSelCS & X86_SEL_LDT)); + Assert(!(uSelSS & X86_SEL_RPL)); Assert(!(uSelSS & X86_SEL_LDT)); + Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT)); + Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT)); + Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT)); + Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT)); + Assert(!(uSelTR & X86_SEL_RPL)); Assert(!(uSelTR & X86_SEL_LDT)); + Assert(uSelCS); + Assert(uSelTR); + + /* Assertion is right but we would not have updated u32ExitCtls yet. */ +#if 0 + if (!(pVCpu->hm.s.vmx.u32ExitCtls & VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE)) + Assert(uSelSS != 0); +#endif + + /* Write these host selector fields into the host-state area in the VMCS. */ + rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_CS, uSelCS); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_SS, uSelSS); AssertRCReturn(rc, rc); +#if HC_ARCH_BITS == 64 + rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_DS, uSelDS); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_ES, uSelES); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_FS, uSelFS); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_GS, uSelGS); AssertRCReturn(rc, rc); +#endif + rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_TR, uSelTR); AssertRCReturn(rc, rc); + + /* + * Host GDTR and IDTR. + */ + RTGDTR Gdtr; + RT_ZERO(Gdtr); +#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL + if (HMVMX_IS_64BIT_HOST_MODE()) + { + X86XDTR64 Gdtr64; + X86XDTR64 Idtr64; + HMR0Get64bitGdtrAndIdtr(&Gdtr64, &Idtr64); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_GDTR_BASE, Gdtr64.uAddr); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_IDTR_BASE, Idtr64.uAddr); AssertRCReturn(rc, rc); + + Gdtr.cbGdt = Gdtr64.cb; + Gdtr.pGdt = (uintptr_t)Gdtr64.uAddr; + } + else +#endif + { + RTIDTR Idtr; + ASMGetGDTR(&Gdtr); + ASMGetIDTR(&Idtr); + rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_GDTR_BASE, Gdtr.pGdt); AssertRCReturn(rc, rc); + rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_IDTR_BASE, Idtr.pIdt); AssertRCReturn(rc, rc); + +#if HC_ARCH_BITS == 64 + /* + * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps them to the + * maximum limit (0xffff) on every VM-exit. + */ + if (Gdtr.cbGdt != 0xffff) + { + pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR; + AssertCompile(sizeof(Gdtr) == sizeof(X86XDTR64)); + memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostGdtr, &Gdtr, sizeof(X86XDTR64)); + } + + /* + * IDT limit is practically 0xfff. Therefore if the host has the limit as 0xfff, VT-x bloating the limit to 0xffff + * is not a problem as it's not possible to get at them anyway. See Intel spec. 6.14.1 "64-Bit Mode IDT" and + * Intel spec. 6.2 "Exception and Interrupt Vectors". + */ + if (Idtr.cbIdt < 0x0fff) + { + pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR; + AssertCompile(sizeof(Idtr) == sizeof(X86XDTR64)); + memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostIdtr, &Idtr, sizeof(X86XDTR64)); + } +#endif + } + + /* + * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI and RPL bits + * is effectively what the CPU does for "scaling by 8". TI is always 0 and RPL should be too in most cases. + */ + if ((uSelTR & X86_SEL_MASK) > Gdtr.cbGdt) + { + AssertMsgFailed(("hmR0VmxSaveHostSegmentRegs: TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, Gdtr.cbGdt)); + return VERR_VMX_INVALID_HOST_STATE; + } + + PCX86DESCHC pDesc = (PCX86DESCHC)(Gdtr.pGdt + (uSelTR & X86_SEL_MASK)); +#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL + if (HMVMX_IS_64BIT_HOST_MODE()) + { + /* We need the 64-bit TR base for hybrid darwin. */ + uint64_t u64TRBase = X86DESC64_BASE((PX86DESC64)pDesc); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_TR_BASE, u64TRBase); + } + else +#endif + { + uintptr_t uTRBase; +#if HC_ARCH_BITS == 64 + uTRBase = X86DESC64_BASE(pDesc); + + /* + * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on all VM-exits. + * The type is the same for 64-bit busy TSS[1]. The limit needs manual restoration if the host has something else. + * Task switching is not supported in 64-bit mode[2], but the limit still matters as IOPM is supported in 64-bit mode. + * Restoring the limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0. + * + * [1] See Intel spec. 3.5 "System Descriptor Types". + * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode". + */ + Assert(pDesc->System.u4Type == 11); + if ( pDesc->System.u16LimitLow != 0x67 + || pDesc->System.u4LimitHigh) + { + pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR; + pVCpu->hm.s.vmx.RestoreHost.uHostSelTR = uSelTR; + + /* Store the GDTR here as we need it while restoring TR. */ + memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostGdtr, &Gdtr, sizeof(X86XDTR64)); + } +#else + uTRBase = X86DESC_BASE(pDesc); +#endif + rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_TR_BASE, uTRBase); + } + AssertRCReturn(rc, rc); + + /* + * Host FS base and GS base. + */ +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + uint64_t u64FSBase = ASMRdMsr(MSR_K8_FS_BASE); + uint64_t u64GSBase = ASMRdMsr(MSR_K8_GS_BASE); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_FS_BASE, u64FSBase); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_GS_BASE, u64GSBase); AssertRCReturn(rc, rc); + +# if HC_ARCH_BITS == 64 + /* Store the base if we have to restore FS or GS manually as we need to restore the base as well. */ + if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_FS) + pVCpu->hm.s.vmx.RestoreHost.uHostFSBase = u64FSBase; + if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_GS) + pVCpu->hm.s.vmx.RestoreHost.uHostGSBase = u64GSBase; +# endif + } +#endif + return rc; +} + + +/** + * Saves certain host MSRs in the VM-Exit MSR-load area and some in the + * host-state area of the VMCS. Theses MSRs will be automatically restored on + * the host after every successful VM exit. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +DECLINLINE(int) hmR0VmxSaveHostMsrs(PVM pVM, PVMCPU pVCpu) +{ + AssertPtr(pVCpu); + AssertPtr(pVCpu->hm.s.vmx.pvHostMsr); + + int rc = VINF_SUCCESS; +#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr; + uint32_t cHostMsrs = 0; + uint32_t u32HostExtFeatures = pVM->hm.s.cpuid.u32AMDFeatureEDX; + + if (u32HostExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE)) + { + uint64_t u64HostEfer = ASMRdMsr(MSR_K6_EFER); + +# if HC_ARCH_BITS == 64 + /* Paranoia. 64-bit code requires these bits to be set always. */ + Assert((u64HostEfer & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)); + + /* + * We currently do not save/restore host EFER, we just make sure it doesn't get modified by VT-x operation. + * All guest accesses (read, write) on EFER cause VM-exits. If we are to conditionally load the guest EFER for + * some reason (e.g. allow transparent reads) we would activate the code below. + */ +# if 0 + /* All our supported 64-bit host platforms must have NXE bit set. Otherwise we can change the below code to save EFER. */ + Assert(u64HostEfer & (MSR_K6_EFER_NXE)); + /* The SCE bit is only applicable in 64-bit mode. Save EFER if it doesn't match what the guest has. + See Intel spec. 30.10.4.3 "Handling the SYSCALL and SYSRET Instructions". */ + if (CPUMIsGuestInLongMode(pVCpu)) + { + uint64_t u64GuestEfer; + rc = CPUMQueryGuestMsr(pVCpu, MSR_K6_EFER, &u64GuestEfer); + AssertRC(rc); + + if ((u64HostEfer & MSR_K6_EFER_SCE) != (u64GuestEfer & MSR_K6_EFER_SCE)) + { + pHostMsr->u32Msr = MSR_K6_EFER; + pHostMsr->u32Reserved = 0; + pHostMsr->u64Value = u64HostEfer; + pHostMsr++; cHostMsrs++; + } + } +# endif +# else /* HC_ARCH_BITS != 64 */ + pHostMsr->u32Msr = MSR_K6_EFER; + pHostMsr->u32Reserved = 0; +# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (CPUMIsGuestInLongMode(pVCpu)) + { + /* Must match the EFER value in our 64 bits switcher. */ + pHostMsr->u64Value = u64HostEfer | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE; + } + else +# endif + pHostMsr->u64Value = u64HostEfer; + pHostMsr++; cHostMsrs++; +# endif /* HC_ARCH_BITS == 64 */ + } + +# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + pHostMsr->u32Msr = MSR_K6_STAR; + pHostMsr->u32Reserved = 0; + pHostMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */ + pHostMsr++; cHostMsrs++; + pHostMsr->u32Msr = MSR_K8_LSTAR; + pHostMsr->u32Reserved = 0; + pHostMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64-bit mode syscall rip */ + pHostMsr++; cHostMsrs++; + pHostMsr->u32Msr = MSR_K8_SF_MASK; + pHostMsr->u32Reserved = 0; + pHostMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */ + pHostMsr++; cHostMsrs++; + pHostMsr->u32Msr = MSR_K8_KERNEL_GS_BASE; + pHostMsr->u32Reserved = 0; + pHostMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */ + pHostMsr++; cHostMsrs++; + } +# endif + + /* Host TSC AUX MSR must be restored since we always load/store guest TSC AUX MSR. */ + if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) + { + pHostMsr->u32Msr = MSR_K8_TSC_AUX; + pHostMsr->u32Reserved = 0; + pHostMsr->u64Value = ASMRdMsr(MSR_K8_TSC_AUX); + pHostMsr++; cHostMsrs++; + } + + /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */ + if (RT_UNLIKELY(cHostMsrs > MSR_IA32_VMX_MISC_MAX_MSR(pVM->hm.s.vmx.Msrs.u64Misc))) + { + LogRel(("cHostMsrs=%u Cpu=%u\n", cHostMsrs, (unsigned)MSR_IA32_VMX_MISC_MAX_MSR(pVM->hm.s.vmx.Msrs.u64Misc))); + pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_HOST_MSR_STORAGE; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cHostMsrs); +#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */ + + /* + * Host Sysenter MSRs. + */ + rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); + AssertRCReturn(rc, rc); +#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL + if (HMVMX_IS_64BIT_HOST_MODE()) + { + rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); + } + else + { + rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)); + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)); + } +#elif HC_ARCH_BITS == 32 + rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)); + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)); +#else + rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); +#endif + AssertRCReturn(rc, rc); + + /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT, IA32_EFER, also see + * hmR0VmxSetupExitCtls() !! */ + return rc; +} + + +/** + * Sets up VM-entry controls in the VMCS. These controls can affect things done + * on VM-exit; e.g. "load debug controls", see Intel spec. 24.8.1 "VM-entry + * controls". + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxLoadGuestEntryCtls(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_ENTRY_CTLS)) + { + PVM pVM = pVCpu->CTX_SUFF(pVM); + uint32_t val = pVM->hm.s.vmx.Msrs.VmxEntry.n.disallowed0; /* Bits set here must be set in the VMCS. */ + uint32_t zap = pVM->hm.s.vmx.Msrs.VmxEntry.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */ + + /* Load debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x capable CPUs only supports the 1-setting of this bit. */ + val |= VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG; + + /* Set if the guest is in long mode. This will set/clear the EFER.LMA bit on VM-entry. */ + if (CPUMIsGuestInLongModeEx(pMixedCtx)) + val |= VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST; + else + Assert(!(val & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST)); + + /* + * The following should -not- be set (since we're not in SMM mode): + * - VMX_VMCS_CTRL_ENTRY_ENTRY_SMM + * - VMX_VMCS_CTRL_ENTRY_DEACTIVATE_DUALMON + */ + + /** @todo VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PERF_MSR, + * VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PAT_MSR, + * VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_EFER_MSR */ + + if ((val & zap) != val) + { + LogRel(("hmR0VmxLoadGuestEntryCtls: invalid VM-entry controls combo! cpu=%RX64 val=%RX64 zap=%RX64\n", + pVM->hm.s.vmx.Msrs.VmxEntry.n.disallowed0, val, zap)); + pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_ENTRY; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY, val); + AssertRCReturn(rc, rc); + + /* Update VCPU with the currently set VM-exit controls. */ + pVCpu->hm.s.vmx.u32EntryCtls = val; + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_ENTRY_CTLS); + } + return rc; +} + + +/** + * Sets up the VM-exit controls in the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks requires EFER. + */ +DECLINLINE(int) hmR0VmxLoadGuestExitCtls(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_EXIT_CTLS)) + { + PVM pVM = pVCpu->CTX_SUFF(pVM); + uint32_t val = pVM->hm.s.vmx.Msrs.VmxExit.n.disallowed0; /* Bits set here must be set in the VMCS. */ + uint32_t zap = pVM->hm.s.vmx.Msrs.VmxExit.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */ + + /* Save debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x CPUs only supported the 1-setting of this bit. */ + val |= VMX_VMCS_CTRL_EXIT_SAVE_DEBUG; + + /* + * Set the host long mode active (EFER.LMA) bit (which Intel calls "Host address-space size") if necessary. + * On VM-exit, VT-x sets both the host EFER.LMA and EFER.LME bit to this value. See assertion in hmR0VmxSaveHostMsrs(). + */ +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + val |= VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE; + else + Assert(!(val & VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE)); +#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) + if (CPUMIsGuestInLongModeEx(pMixedCtx)) + val |= VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE; /* The switcher goes to long mode. */ + else + Assert(!(val & VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE)); +#endif + + /* Don't acknowledge external interrupts on VM-exit. We want to let the host do that. */ + Assert(!(val & VMX_VMCS_CTRL_EXIT_ACK_EXT_INT)); + + /** @todo VMX_VMCS_CTRL_EXIT_LOAD_PERF_MSR, + * VMX_VMCS_CTRL_EXIT_SAVE_GUEST_PAT_MSR, + * VMX_VMCS_CTRL_EXIT_LOAD_HOST_PAT_MSR, + * VMX_VMCS_CTRL_EXIT_SAVE_GUEST_EFER_MSR, + * VMX_VMCS_CTRL_EXIT_LOAD_HOST_EFER_MSR. */ + + if (pVM->hm.s.vmx.Msrs.VmxExit.n.allowed1 & VMX_VMCS_CTRL_EXIT_SAVE_VMX_PREEMPT_TIMER) + val |= VMX_VMCS_CTRL_EXIT_SAVE_VMX_PREEMPT_TIMER; + + if ((val & zap) != val) + { + LogRel(("hmR0VmxSetupProcCtls: invalid VM-exit controls combo! cpu=%RX64 val=%RX64 zap=%RX64\n", + pVM->hm.s.vmx.Msrs.VmxExit.n.disallowed0, val, zap)); + pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_EXIT; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT, val); + AssertRCReturn(rc, rc); + + /* Update VCPU with the currently set VM-exit controls. */ + pVCpu->hm.s.vmx.u32ExitCtls = val; + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_EXIT_CTLS); + } + return rc; +} + + +/** + * Loads the guest APIC and related state. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +DECLINLINE(int) hmR0VmxLoadGuestApicState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE)) + { + /* Setup TPR shadowing. Also setup TPR patching for 32-bit guests. */ + if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW) + { + Assert(pVCpu->hm.s.vmx.HCPhysVirtApic); + + bool fPendingIntr = false; + uint8_t u8Tpr = 0; + uint8_t u8PendingIntr = 0; + rc = PDMApicGetTPR(pVCpu, &u8Tpr, &fPendingIntr, &u8PendingIntr); + AssertRCReturn(rc, rc); + + /* + * If there are external interrupts pending but masked by the TPR value, instruct VT-x to cause a VM-exit when + * the guest lowers its TPR below the highest-priority pending interrupt and we can deliver the interrupt. + * If there are no external interrupts pending, set threshold to 0 to not cause a VM-exit. We will eventually deliver + * the interrupt when we VM-exit for other reasons. + */ + pVCpu->hm.s.vmx.pbVirtApic[0x80] = u8Tpr; /* Offset 0x80 is TPR in the APIC MMIO range. */ + uint32_t u32TprThreshold = 0; + if (fPendingIntr) + { + /* Bits 3:0 of the TPR threshold field correspond to bits 7:4 of the TPR (which is the Task-Priority Class). */ + const uint8_t u8PendingPriority = (u8PendingIntr >> 4) & 0xf; + const uint8_t u8TprPriority = (u8Tpr >> 4) & 0xf; + if (u8PendingPriority <= u8TprPriority) + u32TprThreshold = u8PendingPriority; + else + u32TprThreshold = u8TprPriority; /* Required for Vista 64-bit guest, see @bugref{6398}. */ + } + Assert(!(u32TprThreshold & 0xfffffff0)); /* Bits 31:4 MBZ. */ + + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold); + AssertRCReturn(rc, rc); + } + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE); + } + return rc; +} + + +/** + * Gets the guest's interruptibility-state ("interrupt shadow" as AMD calls it). + * + * @returns Guest's interruptibility-state. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + * @remarks Has side-effects with VMCPU_FF_INHIBIT_INTERRUPTS force-flag. + */ +DECLINLINE(uint32_t) hmR0VmxGetGuestIntrState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + /* + * Instructions like STI and MOV SS inhibit interrupts till the next instruction completes. Check if we should + * inhibit interrupts or clear any existing interrupt-inhibition. + */ + uint32_t uIntrState = 0; + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) + { + /* If inhibition is active, RIP & RFLAGS should've been accessed (i.e. read previously from the VMCS or from ring-3). */ + AssertMsg(HMVMXCPU_GST_IS_SET(pVCpu, HMVMX_UPDATED_GUEST_RIP | HMVMX_UPDATED_GUEST_RFLAGS), + ("%#x\n", HMVMXCPU_GST_VALUE(pVCpu))); + if (pMixedCtx->rip != EMGetInhibitInterruptsPC(pVCpu)) + { + /* + * We can clear the inhibit force flag as even if we go back to the recompiler without executing guest code in + * VT-x, the flag's condition to be cleared is met and thus the cleared state is correct. + */ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); + } + else if (pMixedCtx->eflags.Bits.u1IF) + uIntrState = VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI; + else + uIntrState = VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS; + } + return uIntrState; +} + + +/** + * Loads the guest's interruptibility-state into the guest-state area in the + * VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param uIntrState The interruptibility-state to set. + */ +static int hmR0VmxLoadGuestIntrState(PVMCPU pVCpu, uint32_t uIntrState) +{ + AssertMsg(!(uIntrState & 0xfffffff0), ("%#x\n", uIntrState)); /* Bits 31:4 MBZ. */ + Assert((uIntrState & 0x3) != 0x3); /* Block-by-STI and MOV SS cannot be simultaneously set. */ + int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, uIntrState); + AssertRCReturn(rc, rc); + return rc; +} + + +/** + * Loads the guest's RIP into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RIP)) + { + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_RIP, pMixedCtx->rip); + AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_RIP); + Log4(("Load: VMX_VMCS_GUEST_RIP=%#RX64 fContextUseFlags=%#RX32\n", pMixedCtx->rip, HMCPU_CF_VALUE(pVCpu))); + } + return rc; +} + + +/** + * Loads the guest's RSP into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestRsp(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RSP)) + { + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_RSP, pMixedCtx->rsp); + AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_RSP); + Log4(("Load: VMX_VMCS_GUEST_RSP=%#RX64\n", pMixedCtx->rsp)); + } + return rc; +} + + +/** + * Loads the guest's RFLAGS into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RFLAGS)) + { + /* Intel spec. 2.3.1 "System Flags and Fields in IA-32e Mode" claims the upper 32-bits of RFLAGS are reserved (MBZ). + Let us assert it as such and use 32-bit VMWRITE. */ + Assert(!(pMixedCtx->rflags.u64 >> 32)); + X86EFLAGS Eflags = pMixedCtx->eflags; + Eflags.u32 &= VMX_EFLAGS_RESERVED_0; /* Bits 22-31, 15, 5 & 3 MBZ. */ + Eflags.u32 |= VMX_EFLAGS_RESERVED_1; /* Bit 1 MB1. */ + + /* + * If we're emulating real-mode using Virtual 8086 mode, save the real-mode eflags so we can restore them on VM exit. + * Modify the real-mode guest's eflags so that VT-x can run the real-mode guest code under Virtual 8086 mode. + */ + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS); + Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM))); + pVCpu->hm.s.vmx.RealMode.Eflags.u32 = Eflags.u32; /* Save the original eflags of the real-mode guest. */ + Eflags.Bits.u1VM = 1; /* Set the Virtual 8086 mode bit. */ + Eflags.Bits.u2IOPL = 0; /* Change IOPL to 0, otherwise certain instructions won't fault. */ + } + + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_RFLAGS, Eflags.u32); + AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_RFLAGS); + Log4(("Load: VMX_VMCS_GUEST_RFLAGS=%#RX32\n", Eflags.u32)); + } + return rc; +} + + +/** + * Loads the guest RIP, RSP and RFLAGS into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxLoadGuestRipRspRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = hmR0VmxLoadGuestRip(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + rc = hmR0VmxLoadGuestRsp(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + rc = hmR0VmxLoadGuestRflags(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + return rc; +} + + +/** + * Loads the guest CR0 control register into the guest-state area in the VMCS. + * CR0 is partially shared with the host and we have to consider the FPU bits. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadSharedCR0(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + /* + * Guest CR0. + * Guest FPU. + */ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0)) + { + Assert(!(pMixedCtx->cr0 >> 32)); + uint32_t u32GuestCR0 = pMixedCtx->cr0; + PVM pVM = pVCpu->CTX_SUFF(pVM); + + /* The guest's view (read access) of its CR0 is unblemished. */ + rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_READ_SHADOW, u32GuestCR0); + AssertRCReturn(rc, rc); + Log4(("Load: VMX_VMCS_CTRL_CR0_READ_SHADOW=%#RX32\n", u32GuestCR0)); + + /* Setup VT-x's view of the guest CR0. */ + /* Minimize VM-exits due to CR3 changes when we have NestedPaging. */ + if (pVM->hm.s.fNestedPaging) + { + if (CPUMIsGuestPagingEnabledEx(pMixedCtx)) + { + /* The guest has paging enabled, let it access CR3 without causing a VM exit if supported. */ + pVCpu->hm.s.vmx.u32ProcCtls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT + | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT); + } + else + { + /* The guest doesn't have paging enabled, make CR3 access to cause VM exits to update our shadow. */ + pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT + | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT; + } + + /* If we have unrestricted guest execution, we never have to intercept CR3 reads. */ + if (pVM->hm.s.vmx.fUnrestrictedGuest) + pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT; + + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRCReturn(rc, rc); + } + else + u32GuestCR0 |= X86_CR0_WP; /* Guest CPL 0 writes to its read-only pages should cause a #PF VM-exit. */ + + /* + * Guest FPU bits. + * Intel spec. 23.8 "Restrictions on VMX operation" mentions that CR0.NE bit must always be set on the first + * CPUs to support VT-x and no mention of with regards to UX in VM-entry checks. + */ + u32GuestCR0 |= X86_CR0_NE; + bool fInterceptNM = false; + if (CPUMIsGuestFPUStateActive(pVCpu)) + { + fInterceptNM = false; /* Guest FPU active, no need to VM-exit on #NM. */ + /* The guest should still get #NM exceptions when it expects it to, so we should not clear TS & MP bits here. + We're only concerned about -us- not intercepting #NMs when the guest-FPU is active. Not the guest itself! */ + } + else + { + fInterceptNM = true; /* Guest FPU inactive, VM-exit on #NM for lazy FPU loading. */ + u32GuestCR0 |= X86_CR0_TS /* Guest can task switch quickly and do lazy FPU syncing. */ + | X86_CR0_MP; /* FWAIT/WAIT should not ignore CR0.TS and should generate #NM. */ + } + + /* Catch floating point exceptions if we need to report them to the guest in a different way. */ + bool fInterceptMF = false; + if (!(pMixedCtx->cr0 & X86_CR0_NE)) + fInterceptMF = true; + + /* Finally, intercept all exceptions as we cannot directly inject them in real-mode, see hmR0VmxInjectEventVmcs(). */ + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + Assert(PDMVmmDevHeapIsEnabled(pVM)); + Assert(pVM->hm.s.vmx.pRealModeTSS); + pVCpu->hm.s.vmx.u32XcptBitmap |= HMVMX_REAL_MODE_XCPT_MASK; + fInterceptNM = true; + fInterceptMF = true; + } + else + pVCpu->hm.s.vmx.u32XcptBitmap &= ~HMVMX_REAL_MODE_XCPT_MASK; + + if (fInterceptNM) + pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_NM); + else + pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_NM); + + if (fInterceptMF) + pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_MF); + else + pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_MF); + + /* Additional intercepts for debugging, define these yourself explicitly. */ +#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS + pVCpu->hm.s.vmx.u32XcptBitmap |= 0 + | RT_BIT(X86_XCPT_BP) + | RT_BIT(X86_XCPT_DB) + | RT_BIT(X86_XCPT_DE) + | RT_BIT(X86_XCPT_NM) + | RT_BIT(X86_XCPT_UD) + | RT_BIT(X86_XCPT_NP) + | RT_BIT(X86_XCPT_SS) + | RT_BIT(X86_XCPT_GP) + | RT_BIT(X86_XCPT_PF) + | RT_BIT(X86_XCPT_MF) + ; +#elif defined(HMVMX_ALWAYS_TRAP_PF) + pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_PF); +#endif + + Assert(pVM->hm.s.fNestedPaging || (pVCpu->hm.s.vmx.u32XcptBitmap & RT_BIT(X86_XCPT_PF))); + + /* Set/clear the CR0 specific bits along with their exceptions (PE, PG, CD, NW). */ + uint32_t uSetCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1); + uint32_t uZapCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1); + if (pVM->hm.s.vmx.fUnrestrictedGuest) /* Exceptions for unrestricted-guests for fixed CR0 bits (PE, PG). */ + uSetCR0 &= ~(X86_CR0_PE | X86_CR0_PG); + else + Assert((uSetCR0 & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG)); + + u32GuestCR0 |= uSetCR0; + u32GuestCR0 &= uZapCR0; + u32GuestCR0 &= ~(X86_CR0_CD | X86_CR0_NW); /* Always enable caching. */ + + /* Write VT-x's view of the guest CR0 into the VMCS and update the exception bitmap. */ + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_CR0, u32GuestCR0); + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap); + AssertRCReturn(rc, rc); + Log4(("Load: VMX_VMCS_GUEST_CR0=%#RX32 (uSetCR0=%#RX32 uZapCR0=%#RX32)\n", u32GuestCR0, uSetCR0, uZapCR0)); + + /* + * CR0 is shared between host and guest along with a CR0 read shadow. Therefore, certain bits must not be changed + * by the guest because VT-x ignores saving/restoring them (namely CD, ET, NW) and for certain other bits + * we want to be notified immediately of guest CR0 changes (e.g. PG to update our shadow page tables). + */ + uint32_t u32CR0Mask = 0; + u32CR0Mask = X86_CR0_PE + | X86_CR0_NE + | X86_CR0_WP + | X86_CR0_PG + | X86_CR0_ET /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.ET */ + | X86_CR0_CD /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.CD */ + | X86_CR0_NW; /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.NW */ + + /** @todo Avoid intercepting CR0.PE with unrestricted guests. Fix PGM + * enmGuestMode to be in-sync with the current mode. See @bugref{6398} + * and @bugref{6944}. */ +#if 0 + if (pVM->hm.s.vmx.fUnrestrictedGuest) + u32CR0Mask &= ~X86_CR0_PE; +#endif + if (pVM->hm.s.fNestedPaging) + u32CR0Mask &= ~X86_CR0_WP; + + /* If the guest FPU state is active, don't need to VM-exit on writes to FPU related bits in CR0. */ + if (fInterceptNM) + { + u32CR0Mask |= X86_CR0_TS + | X86_CR0_MP; + } + + /* Write the CR0 mask into the VMCS and update the VCPU's copy of the current CR0 mask. */ + pVCpu->hm.s.vmx.u32CR0Mask = u32CR0Mask; + rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_MASK, u32CR0Mask); + AssertRCReturn(rc, rc); + Log4(("Load: VMX_VMCS_CTRL_CR0_MASK=%#RX32\n", u32CR0Mask)); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR0); + } + return rc; +} + + +/** + * Loads the guest control registers (CR3, CR4) into the guest-state area + * in the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestCR3AndCR4(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + PVM pVM = pVCpu->CTX_SUFF(pVM); + + /* + * Guest CR2. + * It's always loaded in the assembler code. Nothing to do here. + */ + + /* + * Guest CR3. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR3)) + { + RTGCPHYS GCPhysGuestCR3 = NIL_RTGCPHYS; + if (pVM->hm.s.fNestedPaging) + { + pVCpu->hm.s.vmx.HCPhysEPTP = PGMGetHyperCR3(pVCpu); + + /* Validate. See Intel spec. 28.2.2 "EPT Translation Mechanism" and 24.6.11 "Extended-Page-Table Pointer (EPTP)" */ + Assert(pVCpu->hm.s.vmx.HCPhysEPTP); + Assert(!(pVCpu->hm.s.vmx.HCPhysEPTP & UINT64_C(0xfff0000000000000))); + Assert(!(pVCpu->hm.s.vmx.HCPhysEPTP & 0xfff)); + + /* VMX_EPT_MEMTYPE_WB support is already checked in hmR0VmxSetupTaggedTlb(). */ + pVCpu->hm.s.vmx.HCPhysEPTP |= VMX_EPT_MEMTYPE_WB + | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT); + + /* Validate. See Intel spec. 26.2.1 "Checks on VMX Controls" */ + AssertMsg( ((pVCpu->hm.s.vmx.HCPhysEPTP >> 3) & 0x07) == 3 /* Bits 3:5 (EPT page walk length - 1) must be 3. */ + && ((pVCpu->hm.s.vmx.HCPhysEPTP >> 6) & 0x3f) == 0, /* Bits 6:11 MBZ. */ + ("EPTP %#RX64\n", pVCpu->hm.s.vmx.HCPhysEPTP)); + + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, pVCpu->hm.s.vmx.HCPhysEPTP); + AssertRCReturn(rc, rc); + Log4(("Load: VMX_VMCS64_CTRL_EPTP_FULL=%#RX64\n", pVCpu->hm.s.vmx.HCPhysEPTP)); + + if ( pVM->hm.s.vmx.fUnrestrictedGuest + || CPUMIsGuestPagingEnabledEx(pMixedCtx)) + { + /* If the guest is in PAE mode, pass the PDPEs to VT-x using the VMCS fields. */ + if (CPUMIsGuestInPAEModeEx(pMixedCtx)) + { + rc = PGMGstGetPaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, pVCpu->hm.s.aPdpes[0].u); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, pVCpu->hm.s.aPdpes[1].u); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, pVCpu->hm.s.aPdpes[2].u); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, pVCpu->hm.s.aPdpes[3].u); AssertRCReturn(rc, rc); + } + + /* The guest's view of its CR3 is unblemished with Nested Paging when the guest is using paging or we + have Unrestricted Execution to handle the guest when it's not using paging. */ + GCPhysGuestCR3 = pMixedCtx->cr3; + } + else + { + /* + * The guest is not using paging, but the CPU (VT-x) has to. While the guest thinks it accesses physical memory + * directly, we use our identity-mapped page table to map guest-linear to guest-physical addresses. + * EPT takes care of translating it to host-physical addresses. + */ + RTGCPHYS GCPhys; + Assert(pVM->hm.s.vmx.pNonPagingModeEPTPageTable); + Assert(PDMVmmDevHeapIsEnabled(pVM)); + + /* We obtain it here every time as the guest could have relocated this PCI region. */ + rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys); + AssertRCReturn(rc, rc); + + GCPhysGuestCR3 = GCPhys; + } + + Log4(("Load: VMX_VMCS_GUEST_CR3=%#RGv (GstN)\n", GCPhysGuestCR3)); + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_CR3, GCPhysGuestCR3); + } + else + { + /* Non-nested paging case, just use the hypervisor's CR3. */ + RTHCPHYS HCPhysGuestCR3 = PGMGetHyperCR3(pVCpu); + + Log4(("Load: VMX_VMCS_GUEST_CR3=%#RHv (HstN)\n", HCPhysGuestCR3)); + rc = VMXWriteVmcsHstN(VMX_VMCS_GUEST_CR3, HCPhysGuestCR3); + } + AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR3); + } + + /* + * Guest CR4. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR4)) + { + Assert(!(pMixedCtx->cr4 >> 32)); + uint32_t u32GuestCR4 = pMixedCtx->cr4; + + /* The guest's view of its CR4 is unblemished. */ + rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_READ_SHADOW, u32GuestCR4); + AssertRCReturn(rc, rc); + Log4(("Load: VMX_VMCS_CTRL_CR4_READ_SHADOW=%#RX32\n", u32GuestCR4)); + + /* Setup VT-x's view of the guest CR4. */ + /* + * If we're emulating real-mode using virtual-8086 mode, we want to redirect software interrupts to the 8086 program + * interrupt handler. Clear the VME bit (the interrupt redirection bitmap is already all 0, see hmR3InitFinalizeR0()) + * See Intel spec. 20.2 "Software Interrupt Handling Methods While in Virtual-8086 Mode". + */ + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + Assert(pVM->hm.s.vmx.pRealModeTSS); + Assert(PDMVmmDevHeapIsEnabled(pVM)); + u32GuestCR4 &= ~X86_CR4_VME; + } + + if (pVM->hm.s.fNestedPaging) + { + if ( !CPUMIsGuestPagingEnabledEx(pMixedCtx) + && !pVM->hm.s.vmx.fUnrestrictedGuest) + { + /* We use 4 MB pages in our identity mapping page table when the guest doesn't have paging. */ + u32GuestCR4 |= X86_CR4_PSE; + /* Our identity mapping is a 32 bits page directory. */ + u32GuestCR4 &= ~X86_CR4_PAE; + } + /* else use guest CR4.*/ + } + else + { + /* + * The shadow paging modes and guest paging modes are different, the shadow is in accordance with the host + * paging mode and thus we need to adjust VT-x's view of CR4 depending on our shadow page tables. + */ + switch (pVCpu->hm.s.enmShadowMode) + { + case PGMMODE_REAL: /* Real-mode. */ + case PGMMODE_PROTECTED: /* Protected mode without paging. */ + case PGMMODE_32_BIT: /* 32-bit paging. */ + { + u32GuestCR4 &= ~X86_CR4_PAE; + break; + } + + case PGMMODE_PAE: /* PAE paging. */ + case PGMMODE_PAE_NX: /* PAE paging with NX. */ + { + u32GuestCR4 |= X86_CR4_PAE; + break; + } + + case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */ + case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */ +#ifdef VBOX_ENABLE_64_BITS_GUESTS + break; +#endif + default: + AssertFailed(); + return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; + } + } + + /* We need to set and clear the CR4 specific bits here (mainly the X86_CR4_VMXE bit). */ + uint64_t uSetCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1); + uint64_t uZapCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1); + u32GuestCR4 |= uSetCR4; + u32GuestCR4 &= uZapCR4; + + /* Write VT-x's view of the guest CR4 into the VMCS. */ + Log4(("Load: VMX_VMCS_GUEST_CR4=%#RX32 (Set=%#RX32 Zap=%#RX32)\n", u32GuestCR4, uSetCR4, uZapCR4)); + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_CR4, u32GuestCR4); + AssertRCReturn(rc, rc); + + /* Setup CR4 mask. CR4 flags owned by the host, if the guest attempts to change them, that would cause a VM exit. */ + uint32_t u32CR4Mask = 0; + u32CR4Mask = X86_CR4_VME + | X86_CR4_PAE + | X86_CR4_PGE + | X86_CR4_PSE + | X86_CR4_VMXE; + pVCpu->hm.s.vmx.u32CR4Mask = u32CR4Mask; + rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_MASK, u32CR4Mask); + AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR4); + } + return rc; +} + + +/** + * Loads the guest debug registers into the guest-state area in the VMCS. + * This also sets up whether #DB and MOV DRx accesses cause VM exits. + * + * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3). + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadSharedDebugState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + if (!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_DEBUG)) + return VINF_SUCCESS; + +#ifdef VBOX_STRICT + /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */ + if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG) + { + /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */ + Assert((pMixedCtx->dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0); /* Bits 63:32, 15, 14, 12, 11 are reserved. */ + Assert((pMixedCtx->dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK); /* Bit 10 is reserved (RA1). */ + } +#endif + + int rc; + PVM pVM = pVCpu->CTX_SUFF(pVM); + bool fInterceptDB = false; + bool fInterceptMovDRx = false; + if ( pVCpu->hm.s.fSingleInstruction + || DBGFIsStepping(pVCpu)) + { + /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */ + if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG) + { + pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRCReturn(rc, rc); + Assert(fInterceptDB == false); + } + else + { + pMixedCtx->eflags.u32 |= X86_EFL_TF; + pVCpu->hm.s.fClearTrapFlag = true; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RFLAGS); + fInterceptDB = true; + } + } + + if ( fInterceptDB + || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK)) + { + /* + * Use the combined guest and host DRx values found in the hypervisor + * register set because the debugger has breakpoints active or someone + * is single stepping on the host side without a monitor trap flag. + * + * Note! DBGF expects a clean DR6 state before executing guest code. + */ +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if ( CPUMIsGuestInLongModeEx(pMixedCtx) + && !CPUMIsHyperDebugStateActivePending(pVCpu)) + { + CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */); + Assert(CPUMIsHyperDebugStateActivePending(pVCpu)); + Assert(!CPUMIsGuestDebugStateActivePending(pVCpu)); + } + else +#endif + if (!CPUMIsHyperDebugStateActive(pVCpu)) + { + CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */); + Assert(CPUMIsHyperDebugStateActive(pVCpu)); + Assert(!CPUMIsGuestDebugStateActive(pVCpu)); + } + + /* Update DR7. (The other DRx values are handled by CPUM one way or the other.) */ + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, (uint32_t)CPUMGetHyperDR7(pVCpu)); + AssertRCReturn(rc, rc); + + pVCpu->hm.s.fUsingHyperDR7 = true; + fInterceptDB = true; + fInterceptMovDRx = true; + } + else + { + /* + * If the guest has enabled debug registers, we need to load them prior to + * executing guest code so they'll trigger at the right time. + */ + if (pMixedCtx->dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD)) /** @todo Why GD? */ + { +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if ( CPUMIsGuestInLongModeEx(pMixedCtx) + && !CPUMIsGuestDebugStateActivePending(pVCpu)) + { + CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */); + Assert(CPUMIsGuestDebugStateActivePending(pVCpu)); + Assert(!CPUMIsHyperDebugStateActivePending(pVCpu)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed); + } + else +#endif + if (!CPUMIsGuestDebugStateActive(pVCpu)) + { + CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */); + Assert(CPUMIsGuestDebugStateActive(pVCpu)); + Assert(!CPUMIsHyperDebugStateActive(pVCpu)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed); + } + Assert(!fInterceptDB); + Assert(!fInterceptMovDRx); + } + /* + * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we + * must intercept #DB in order to maintain a correct DR6 guest value. + */ +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + else if ( !CPUMIsGuestDebugStateActivePending(pVCpu) + && !CPUMIsGuestDebugStateActive(pVCpu)) +#else + else if (!CPUMIsGuestDebugStateActive(pVCpu)) +#endif + { + fInterceptMovDRx = true; + fInterceptDB = true; + } + + /* Update guest DR7. */ + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, pMixedCtx->dr[7]); + AssertRCReturn(rc, rc); + + pVCpu->hm.s.fUsingHyperDR7 = false; + } + + /* + * Update the exception bitmap regarding intercepting #DB generated by the guest. + */ + if (fInterceptDB) + pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_DB); + else if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { +#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS + pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_DB); +#endif + } + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap); + AssertRCReturn(rc, rc); + + /* + * Update the processor-based VM-execution controls regarding intercepting MOV DRx instructions. + */ + if (fInterceptMovDRx) + pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT; + else + pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_DEBUG); + return VINF_SUCCESS; +} + + +#ifdef VBOX_STRICT +/** + * Strict function to validate segment registers. + * + * @remarks ASSUMES CR0 is up to date. + */ +static void hmR0VmxValidateSegmentRegs(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + /* Validate segment registers. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers". */ + /* NOTE: The reason we check for attribute value 0 and not just the unusable bit here is because hmR0VmxWriteSegmentReg() + * only updates the VMCS' copy of the value with the unusable bit and doesn't change the guest-context value. */ + if ( !pVM->hm.s.vmx.fUnrestrictedGuest + && ( !CPUMIsGuestInRealModeEx(pCtx) + && !CPUMIsGuestInV86ModeEx(pCtx))) + { + /* Protected mode checks */ + /* CS */ + Assert(pCtx->cs.Attr.n.u1Present); + Assert(!(pCtx->cs.Attr.u & 0xf00)); + Assert(!(pCtx->cs.Attr.u & 0xfffe0000)); + Assert( (pCtx->cs.u32Limit & 0xfff) == 0xfff + || !(pCtx->cs.Attr.n.u1Granularity)); + Assert( !(pCtx->cs.u32Limit & 0xfff00000) + || (pCtx->cs.Attr.n.u1Granularity)); + /* CS cannot be loaded with NULL in protected mode. */ + Assert(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE)); /** @todo is this really true even for 64-bit CS?!? */ + if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11) + Assert(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl); + else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15) + Assert(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl); + else + AssertMsgFailed(("Invalid CS Type %#x\n", pCtx->cs.Attr.n.u2Dpl)); + /* SS */ + Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL)); + Assert(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL)); + if ( !(pCtx->cr0 & X86_CR0_PE) + || pCtx->cs.Attr.n.u4Type == 3) + { + Assert(!pCtx->ss.Attr.n.u2Dpl); + } + if (pCtx->ss.Attr.u && !(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE)) + { + Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL)); + Assert(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7); + Assert(pCtx->ss.Attr.n.u1Present); + Assert(!(pCtx->ss.Attr.u & 0xf00)); + Assert(!(pCtx->ss.Attr.u & 0xfffe0000)); + Assert( (pCtx->ss.u32Limit & 0xfff) == 0xfff + || !(pCtx->ss.Attr.n.u1Granularity)); + Assert( !(pCtx->ss.u32Limit & 0xfff00000) + || (pCtx->ss.Attr.n.u1Granularity)); + } + /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxWriteSegmentReg(). */ + if (pCtx->ds.Attr.u && !(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE)) + { + Assert(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED); + Assert(pCtx->ds.Attr.n.u1Present); + Assert(pCtx->ds.Attr.n.u4Type > 11 || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL)); + Assert(!(pCtx->ds.Attr.u & 0xf00)); + Assert(!(pCtx->ds.Attr.u & 0xfffe0000)); + Assert( (pCtx->ds.u32Limit & 0xfff) == 0xfff + || !(pCtx->ds.Attr.n.u1Granularity)); + Assert( !(pCtx->ds.u32Limit & 0xfff00000) + || (pCtx->ds.Attr.n.u1Granularity)); + Assert( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ)); + } + if (pCtx->es.Attr.u && !(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE)) + { + Assert(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED); + Assert(pCtx->es.Attr.n.u1Present); + Assert(pCtx->es.Attr.n.u4Type > 11 || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL)); + Assert(!(pCtx->es.Attr.u & 0xf00)); + Assert(!(pCtx->es.Attr.u & 0xfffe0000)); + Assert( (pCtx->es.u32Limit & 0xfff) == 0xfff + || !(pCtx->es.Attr.n.u1Granularity)); + Assert( !(pCtx->es.u32Limit & 0xfff00000) + || (pCtx->es.Attr.n.u1Granularity)); + Assert( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ)); + } + if (pCtx->fs.Attr.u && !(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE)) + { + Assert(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED); + Assert(pCtx->fs.Attr.n.u1Present); + Assert(pCtx->fs.Attr.n.u4Type > 11 || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL)); + Assert(!(pCtx->fs.Attr.u & 0xf00)); + Assert(!(pCtx->fs.Attr.u & 0xfffe0000)); + Assert( (pCtx->fs.u32Limit & 0xfff) == 0xfff + || !(pCtx->fs.Attr.n.u1Granularity)); + Assert( !(pCtx->fs.u32Limit & 0xfff00000) + || (pCtx->fs.Attr.n.u1Granularity)); + Assert( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ)); + } + if (pCtx->gs.Attr.u && !(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE)) + { + Assert(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED); + Assert(pCtx->gs.Attr.n.u1Present); + Assert(pCtx->gs.Attr.n.u4Type > 11 || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL)); + Assert(!(pCtx->gs.Attr.u & 0xf00)); + Assert(!(pCtx->gs.Attr.u & 0xfffe0000)); + Assert( (pCtx->gs.u32Limit & 0xfff) == 0xfff + || !(pCtx->gs.Attr.n.u1Granularity)); + Assert( !(pCtx->gs.u32Limit & 0xfff00000) + || (pCtx->gs.Attr.n.u1Granularity)); + Assert( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ)); + } + /* 64-bit capable CPUs. */ +# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + Assert(!(pCtx->cs.u64Base >> 32)); + Assert(!pCtx->ss.Attr.u || !(pCtx->ss.u64Base >> 32)); + Assert(!pCtx->ds.Attr.u || !(pCtx->ds.u64Base >> 32)); + Assert(!pCtx->es.Attr.u || !(pCtx->es.u64Base >> 32)); + } +# endif + } + else if ( CPUMIsGuestInV86ModeEx(pCtx) + || ( CPUMIsGuestInRealModeEx(pCtx) + && !pVM->hm.s.vmx.fUnrestrictedGuest)) + { + /* Real and v86 mode checks. */ + /* hmR0VmxWriteSegmentReg() writes the modified in VMCS. We want what we're feeding to VT-x. */ + uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr; + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + u32CSAttr = 0xf3; u32SSAttr = 0xf3; u32DSAttr = 0xf3; u32ESAttr = 0xf3; u32FSAttr = 0xf3; u32GSAttr = 0xf3; + } + else + { + u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u; u32DSAttr = pCtx->ds.Attr.u; + u32ESAttr = pCtx->es.Attr.u; u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u; + } + + /* CS */ + AssertMsg((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), ("CS base %#x %#x\n", pCtx->cs.u64Base, pCtx->cs.Sel)); + Assert(pCtx->cs.u32Limit == 0xffff); + Assert(u32CSAttr == 0xf3); + /* SS */ + Assert(pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4); + Assert(pCtx->ss.u32Limit == 0xffff); + Assert(u32SSAttr == 0xf3); + /* DS */ + Assert(pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4); + Assert(pCtx->ds.u32Limit == 0xffff); + Assert(u32DSAttr == 0xf3); + /* ES */ + Assert(pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4); + Assert(pCtx->es.u32Limit == 0xffff); + Assert(u32ESAttr == 0xf3); + /* FS */ + Assert(pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4); + Assert(pCtx->fs.u32Limit == 0xffff); + Assert(u32FSAttr == 0xf3); + /* GS */ + Assert(pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4); + Assert(pCtx->gs.u32Limit == 0xffff); + Assert(u32GSAttr == 0xf3); + /* 64-bit capable CPUs. */ +# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + Assert(!(pCtx->cs.u64Base >> 32)); + Assert(!u32SSAttr || !(pCtx->ss.u64Base >> 32)); + Assert(!u32DSAttr || !(pCtx->ds.u64Base >> 32)); + Assert(!u32ESAttr || !(pCtx->es.u64Base >> 32)); + } +# endif + } +} +#endif /* VBOX_STRICT */ + + +/** + * Writes a guest segment register into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param idxSel Index of the selector in the VMCS. + * @param idxLimit Index of the segment limit in the VMCS. + * @param idxBase Index of the segment base in the VMCS. + * @param idxAccess Index of the access rights of the segment in the VMCS. + * @param pSelReg Pointer to the segment selector. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxWriteSegmentReg(PVMCPU pVCpu, uint32_t idxSel, uint32_t idxLimit, uint32_t idxBase, + uint32_t idxAccess, PCPUMSELREG pSelReg, PCPUMCTX pCtx) +{ + int rc = VMXWriteVmcs32(idxSel, pSelReg->Sel); /* 16-bit guest selector field. */ + AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(idxLimit, pSelReg->u32Limit); /* 32-bit guest segment limit field. */ + AssertRCReturn(rc, rc); + rc = VMXWriteVmcsGstN(idxBase, pSelReg->u64Base); /* Natural width guest segment base field.*/ + AssertRCReturn(rc, rc); + + uint32_t u32Access = pSelReg->Attr.u; + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + /* VT-x requires our real-using-v86 mode hack to override the segment access-right bits. */ + u32Access = 0xf3; + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS); + Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM))); + } + else + { + /* + * The way to differentiate between whether this is really a null selector or was just a selector loaded with 0 in + * real-mode is using the segment attributes. A selector loaded in real-mode with the value 0 is valid and usable in + * protected-mode and we should -not- mark it as an unusable segment. Both the recompiler & VT-x ensures NULL selectors + * loaded in protected-mode have their attribute as 0. + */ + if (!u32Access) + u32Access = X86DESCATTR_UNUSABLE; + } + + /* Validate segment access rights. Refer to Intel spec. "26.3.1.2 Checks on Guest Segment Registers". */ + AssertMsg((u32Access & X86DESCATTR_UNUSABLE) || (u32Access & X86_SEL_TYPE_ACCESSED), + ("Access bit not set for usable segment. idx=%#x sel=%#x attr %#x\n", idxBase, pSelReg, pSelReg->Attr.u)); + + rc = VMXWriteVmcs32(idxAccess, u32Access); /* 32-bit guest segment access-rights field. */ + AssertRCReturn(rc, rc); + return rc; +} + + +/** + * Loads the guest segment registers, GDTR, IDTR, LDTR, (TR, FS and GS bases) + * into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCPU Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks ASSUMES pMixedCtx->cr0 is up to date (strict builds validation). + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestSegmentRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VERR_INTERNAL_ERROR_5; + PVM pVM = pVCpu->CTX_SUFF(pVM); + + /* + * Guest Segment registers: CS, SS, DS, ES, FS, GS. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS)) + { + /* Save the segment attributes for real-on-v86 mode hack, so we can restore them on VM-exit. */ + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + pVCpu->hm.s.vmx.RealMode.AttrCS.u = pMixedCtx->cs.Attr.u; + pVCpu->hm.s.vmx.RealMode.AttrSS.u = pMixedCtx->ss.Attr.u; + pVCpu->hm.s.vmx.RealMode.AttrDS.u = pMixedCtx->ds.Attr.u; + pVCpu->hm.s.vmx.RealMode.AttrES.u = pMixedCtx->es.Attr.u; + pVCpu->hm.s.vmx.RealMode.AttrFS.u = pMixedCtx->fs.Attr.u; + pVCpu->hm.s.vmx.RealMode.AttrGS.u = pMixedCtx->gs.Attr.u; + } + +#ifdef VBOX_WITH_REM + if (!pVM->hm.s.vmx.fUnrestrictedGuest) + { + Assert(pVM->hm.s.vmx.pRealModeTSS); + AssertCompile(PGMMODE_REAL < PGMMODE_PROTECTED); + if ( pVCpu->hm.s.vmx.fWasInRealMode + && PGMGetGuestMode(pVCpu) >= PGMMODE_PROTECTED) + { + /* Signal that the recompiler must flush its code-cache as the guest -may- rewrite code it will later execute + in real-mode (e.g. OpenBSD 4.0) */ + REMFlushTBs(pVM); + Log4(("Load: Switch to protected mode detected!\n")); + pVCpu->hm.s.vmx.fWasInRealMode = false; + } + } +#endif + rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_CS, VMX_VMCS32_GUEST_CS_LIMIT, VMX_VMCS_GUEST_CS_BASE, + VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS, &pMixedCtx->cs, pMixedCtx); + AssertRCReturn(rc, rc); + rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_SS, VMX_VMCS32_GUEST_SS_LIMIT, VMX_VMCS_GUEST_SS_BASE, + VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS, &pMixedCtx->ss, pMixedCtx); + AssertRCReturn(rc, rc); + rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_DS, VMX_VMCS32_GUEST_DS_LIMIT, VMX_VMCS_GUEST_DS_BASE, + VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS, &pMixedCtx->ds, pMixedCtx); + AssertRCReturn(rc, rc); + rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_ES, VMX_VMCS32_GUEST_ES_LIMIT, VMX_VMCS_GUEST_ES_BASE, + VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS, &pMixedCtx->es, pMixedCtx); + AssertRCReturn(rc, rc); + rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_FS, VMX_VMCS32_GUEST_FS_LIMIT, VMX_VMCS_GUEST_FS_BASE, + VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS, &pMixedCtx->fs, pMixedCtx); + AssertRCReturn(rc, rc); + rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_GS, VMX_VMCS32_GUEST_GS_LIMIT, VMX_VMCS_GUEST_GS_BASE, + VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS, &pMixedCtx->gs, pMixedCtx); + AssertRCReturn(rc, rc); + +#ifdef VBOX_STRICT + /* Validate. */ + hmR0VmxValidateSegmentRegs(pVM, pVCpu, pMixedCtx); +#endif + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS); + Log4(("Load: CS=%#RX16 Base=%#RX64 Limit=%#RX32 Attr=%#RX32\n", pMixedCtx->cs.Sel, pMixedCtx->cs.u64Base, + pMixedCtx->cs.u32Limit, pMixedCtx->cs.Attr.u)); + } + + /* + * Guest TR. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_TR)) + { + /* + * Real-mode emulation using virtual-8086 mode with CR4.VME. Interrupt redirection is achieved + * using the interrupt redirection bitmap (all bits cleared to let the guest handle INT-n's) in the TSS. + * See hmR3InitFinalizeR0() to see how pRealModeTSS is setup. + */ + uint16_t u16Sel = 0; + uint32_t u32Limit = 0; + uint64_t u64Base = 0; + uint32_t u32AccessRights = 0; + + if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + u16Sel = pMixedCtx->tr.Sel; + u32Limit = pMixedCtx->tr.u32Limit; + u64Base = pMixedCtx->tr.u64Base; + u32AccessRights = pMixedCtx->tr.Attr.u; + } + else + { + Assert(pVM->hm.s.vmx.pRealModeTSS); + Assert(PDMVmmDevHeapIsEnabled(pVM)); /* Guaranteed by HMR3CanExecuteGuest() -XXX- what about inner loop changes? */ + + /* We obtain it here every time as PCI regions could be reconfigured in the guest, changing the VMMDev base. */ + RTGCPHYS GCPhys; + rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pRealModeTSS, &GCPhys); + AssertRCReturn(rc, rc); + + X86DESCATTR DescAttr; + DescAttr.u = 0; + DescAttr.n.u1Present = 1; + DescAttr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY; + + u16Sel = 0; + u32Limit = HM_VTX_TSS_SIZE; + u64Base = GCPhys; /* in real-mode phys = virt. */ + u32AccessRights = DescAttr.u; + } + + /* Validate. */ + Assert(!(u16Sel & RT_BIT(2))); + AssertMsg( (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY + || (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY, ("TSS is not busy!? %#x\n", u32AccessRights)); + AssertMsg(!(u32AccessRights & X86DESCATTR_UNUSABLE), ("TR unusable bit is not clear!? %#x\n", u32AccessRights)); + Assert(!(u32AccessRights & RT_BIT(4))); /* System MBZ.*/ + Assert(u32AccessRights & RT_BIT(7)); /* Present MB1.*/ + Assert(!(u32AccessRights & 0xf00)); /* 11:8 MBZ. */ + Assert(!(u32AccessRights & 0xfffe0000)); /* 31:17 MBZ. */ + Assert( (u32Limit & 0xfff) == 0xfff + || !(u32AccessRights & RT_BIT(15))); /* Granularity MBZ. */ + Assert( !(pMixedCtx->tr.u32Limit & 0xfff00000) + || (u32AccessRights & RT_BIT(15))); /* Granularity MB1. */ + + rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_TR, u16Sel); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_LIMIT, u32Limit); AssertRCReturn(rc, rc); + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_TR_BASE, u64Base); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, u32AccessRights); AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_TR); + Log4(("Load: VMX_VMCS_GUEST_TR_BASE=%#RX64\n", u64Base)); + } + + /* + * Guest GDTR. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_GDTR)) + { + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, pMixedCtx->gdtr.cbGdt); AssertRCReturn(rc, rc); + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_GDTR_BASE, pMixedCtx->gdtr.pGdt); AssertRCReturn(rc, rc); + + /* Validate. */ + Assert(!(pMixedCtx->gdtr.cbGdt & 0xffff0000)); /* Bits 31:16 MBZ. */ + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_GDTR); + Log4(("Load: VMX_VMCS_GUEST_GDTR_BASE=%#RX64\n", pMixedCtx->gdtr.pGdt)); + } + + /* + * Guest LDTR. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_LDTR)) + { + /* The unusable bit is specific to VT-x, if it's a null selector mark it as an unusable segment. */ + uint32_t u32Access = 0; + if (!pMixedCtx->ldtr.Attr.u) + u32Access = X86DESCATTR_UNUSABLE; + else + u32Access = pMixedCtx->ldtr.Attr.u; + + rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_LDTR, pMixedCtx->ldtr.Sel); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_LIMIT, pMixedCtx->ldtr.u32Limit); AssertRCReturn(rc, rc); + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_LDTR_BASE, pMixedCtx->ldtr.u64Base); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, u32Access); AssertRCReturn(rc, rc); + + /* Validate. */ + if (!(u32Access & X86DESCATTR_UNUSABLE)) + { + Assert(!(pMixedCtx->ldtr.Sel & RT_BIT(2))); /* TI MBZ. */ + Assert(pMixedCtx->ldtr.Attr.n.u4Type == 2); /* Type MB2 (LDT). */ + Assert(!pMixedCtx->ldtr.Attr.n.u1DescType); /* System MBZ. */ + Assert(pMixedCtx->ldtr.Attr.n.u1Present == 1); /* Present MB1. */ + Assert(!pMixedCtx->ldtr.Attr.n.u4LimitHigh); /* 11:8 MBZ. */ + Assert(!(pMixedCtx->ldtr.Attr.u & 0xfffe0000)); /* 31:17 MBZ. */ + Assert( (pMixedCtx->ldtr.u32Limit & 0xfff) == 0xfff + || !pMixedCtx->ldtr.Attr.n.u1Granularity); /* Granularity MBZ. */ + Assert( !(pMixedCtx->ldtr.u32Limit & 0xfff00000) + || pMixedCtx->ldtr.Attr.n.u1Granularity); /* Granularity MB1. */ + } + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_LDTR); + Log4(("Load: VMX_VMCS_GUEST_LDTR_BASE=%#RX64\n", pMixedCtx->ldtr.u64Base)); + } + + /* + * Guest IDTR. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_IDTR)) + { + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, pMixedCtx->idtr.cbIdt); AssertRCReturn(rc, rc); + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_IDTR_BASE, pMixedCtx->idtr.pIdt); AssertRCReturn(rc, rc); + + /* Validate. */ + Assert(!(pMixedCtx->idtr.cbIdt & 0xffff0000)); /* Bits 31:16 MBZ. */ + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_IDTR); + Log4(("Load: VMX_VMCS_GUEST_IDTR_BASE=%#RX64\n", pMixedCtx->idtr.pIdt)); + } + + return VINF_SUCCESS; +} + + +/** + * Loads certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store + * areas. These MSRs will automatically be loaded to the host CPU on every + * successful VM entry and stored from the host CPU on every successful VM exit. + * Also loads the sysenter MSRs into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + AssertPtr(pVCpu); + AssertPtr(pVCpu->hm.s.vmx.pvGuestMsr); + + /* + * MSRs covered by Auto-load/store: EFER, LSTAR, STAR, SF_MASK, TSC_AUX (RDTSCP). + */ + int rc = VINF_SUCCESS; + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS)) + { +#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + PVM pVM = pVCpu->CTX_SUFF(pVM); + PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr; + uint32_t cGuestMsrs = 0; + + /* See Intel spec. 4.1.4 "Enumeration of Paging Features by CPUID". */ + /** @todo r=ramshankar: Optimize this further to do lazy restoration and only + * when the guest really is in 64-bit mode. */ + bool fSupportsLongMode = CPUMGetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_LONG_MODE); + if (fSupportsLongMode) + { + pGuestMsr->u32Msr = MSR_K8_LSTAR; + pGuestMsr->u32Reserved = 0; + pGuestMsr->u64Value = pMixedCtx->msrLSTAR; /* 64 bits mode syscall rip */ + pGuestMsr++; cGuestMsrs++; + pGuestMsr->u32Msr = MSR_K6_STAR; + pGuestMsr->u32Reserved = 0; + pGuestMsr->u64Value = pMixedCtx->msrSTAR; /* legacy syscall eip, cs & ss */ + pGuestMsr++; cGuestMsrs++; + pGuestMsr->u32Msr = MSR_K8_SF_MASK; + pGuestMsr->u32Reserved = 0; + pGuestMsr->u64Value = pMixedCtx->msrSFMASK; /* syscall flag mask */ + pGuestMsr++; cGuestMsrs++; + pGuestMsr->u32Msr = MSR_K8_KERNEL_GS_BASE; + pGuestMsr->u32Reserved = 0; + pGuestMsr->u64Value = pMixedCtx->msrKERNELGSBASE; /* swapgs exchange value */ + pGuestMsr++; cGuestMsrs++; + } + + /* + * RDTSCP requires the TSC_AUX MSR. Host and guest share the physical MSR. So we have to + * load the guest's copy always (since the MSR bitmap allows passthru unconditionally). + */ + if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) + { + pGuestMsr->u32Msr = MSR_K8_TSC_AUX; + pGuestMsr->u32Reserved = 0; + pGuestMsr->u64Value = CPUMR0GetGuestTscAux(pVCpu); + pGuestMsr++; cGuestMsrs++; + } + + /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */ + if (cGuestMsrs > MSR_IA32_VMX_MISC_MAX_MSR(pVM->hm.s.vmx.Msrs.u64Misc)) + { + LogRel(("CPU autoload/store MSR count in VMCS exceeded cGuestMsrs=%u.\n", cGuestMsrs)); + pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE; + return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; + } + + /* Update the VCPU's copy of the guest MSR count. */ + pVCpu->hm.s.vmx.cGuestMsrs = cGuestMsrs; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cGuestMsrs); AssertRCReturn(rc, rc); + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cGuestMsrs); AssertRCReturn(rc, rc); +#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */ + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS); + } + + /* + * Guest Sysenter MSRs. + * These flags are only set when MSR-bitmaps are not supported by the CPU and we cause + * VM-exits on WRMSRs for these MSRs. + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SYSENTER_CS_MSR)) + { + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pMixedCtx->SysEnter.cs); AssertRCReturn(rc, rc); + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_SYSENTER_CS_MSR); + } + + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SYSENTER_EIP_MSR)) + { + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_SYSENTER_EIP, pMixedCtx->SysEnter.eip); AssertRCReturn(rc, rc); + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_SYSENTER_EIP_MSR); + } + + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SYSENTER_ESP_MSR)) + { + rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_SYSENTER_ESP, pMixedCtx->SysEnter.esp); AssertRCReturn(rc, rc); + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_SYSENTER_ESP_MSR); + } + + return rc; +} + + +/** + * Loads the guest activity state into the guest-state area in the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestActivityState(PVMCPU pVCpu, PCPUMCTX pCtx) +{ + /** @todo See if we can make use of other states, e.g. + * VMX_VMCS_GUEST_ACTIVITY_SHUTDOWN or HLT. */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_GUEST_ACTIVITY_STATE)) + { + int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_VMCS_GUEST_ACTIVITY_ACTIVE); + AssertRCReturn(rc, rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_GUEST_ACTIVITY_STATE); + } + return VINF_SUCCESS; +} + + +/** + * Sets up the appropriate function to run guest code. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSetupVMRunHandler(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + if (CPUMIsGuestInLongModeEx(pMixedCtx)) + { +#ifndef VBOX_ENABLE_64_BITS_GUESTS + return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; +#endif + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests); /* Guaranteed by hmR3InitFinalizeR0(). */ +#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + /* 32-bit host. We need to switch to 64-bit before running the 64-bit guest. */ + if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0SwitcherStartVM64) + { + pVCpu->hm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64; + HMCPU_CF_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_EXIT_CTLS | HM_CHANGED_VMX_ENTRY_CTLS); + } +#else + /* 64-bit host or hybrid host. */ + pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM64; +#endif + } + else + { + /* Guest is not in long mode, use the 32-bit handler. */ +#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0StartVM32) + { + pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32; + HMCPU_CF_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_EXIT_CTLS | HM_CHANGED_VMX_ENTRY_CTLS); + } +#else + pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32; +#endif + } + Assert(pVCpu->hm.s.vmx.pfnStartVM); + return VINF_SUCCESS; +} + + +/** + * Wrapper for running the guest code in VT-x. + * + * @returns VBox strict status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + /* + * 64-bit Windows uses XMM registers in the kernel as the Microsoft compiler expresses floating-point operations + * using SSE instructions. Some XMM registers (XMM6-XMM15) are callee-saved and thus the need for this XMM wrapper. + * Refer MSDN docs. "Configuring Programs for 64-bit / x64 Software Conventions / Register Usage" for details. + */ + const bool fResumeVM = RT_BOOL(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_LAUNCHED); + /** @todo Add stats for resume vs launch. */ +#ifdef VBOX_WITH_KERNEL_USING_XMM + return HMR0VMXStartVMWrapXMM(fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hm.s.vmx.pfnStartVM); +#else + return pVCpu->hm.s.vmx.pfnStartVM(fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu); +#endif +} + + +/** + * Reports world-switch error and dumps some useful debug info. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param rcVMRun The return code from VMLAUNCH/VMRESUME. + * @param pCtx Pointer to the guest-CPU context. + * @param pVmxTransient Pointer to the VMX transient structure (only + * exitReason updated). + */ +static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rcVMRun, PCPUMCTX pCtx, PVMXTRANSIENT pVmxTransient) +{ + Assert(pVM); + Assert(pVCpu); + Assert(pCtx); + Assert(pVmxTransient); + HMVMX_ASSERT_PREEMPT_SAFE(); + + Log4(("VM-entry failure: %Rrc\n", rcVMRun)); + switch (rcVMRun) + { + case VERR_VMX_INVALID_VMXON_PTR: + AssertFailed(); + break; + case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */ + case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */ + { + int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason); + rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError); + rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + AssertRC(rc); + + pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu; + /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted(). + Cannot do it here as we may have been long preempted. */ + +#ifdef VBOX_STRICT + Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason, + pVmxTransient->uExitReason)); + Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQualification)); + Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError)); + if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX) + Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError])); + else + Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX)); + Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu)); + Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu)); + + /* VMX control bits. */ + uint32_t u32Val; + uint64_t u64Val; + HMVMXHCUINTREG uHCReg; + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_PIN_EXEC %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_PROC_EXEC %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_PROC_EXEC2 %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_ENTRY %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_EXIT %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_CR3_TARGET_COUNT %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %u\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_TPR_THRESHOLD %u\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT %u (guest MSRs)\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT %u (host MSRs)\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT %u (guest MSRs)\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_EXCEPTION_BITMAP %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK %#RX32\n", u32Val)); + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH %#RX32\n", u32Val)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_MASK, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR0_MASK %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_MASK, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR4_MASK %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg)); + rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc); + Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val)); + + /* Guest bits. */ + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RIP, &u64Val); AssertRC(rc); + Log4(("Old Guest Rip %#RX64 New %#RX64\n", pCtx->rip, u64Val)); + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RSP, &u64Val); AssertRC(rc); + Log4(("Old Guest Rsp %#RX64 New %#RX64\n", pCtx->rsp, u64Val)); + rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Val); AssertRC(rc); + Log4(("Old Guest Rflags %#RX32 New %#RX32\n", pCtx->eflags.u32, u32Val)); + rc = VMXReadVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS16_GUEST_FIELD_VPID %u\n", u32Val)); + + /* Host bits. */ + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR0, &uHCReg); AssertRC(rc); + Log4(("Host CR0 %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR3, &uHCReg); AssertRC(rc); + Log4(("Host CR3 %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR4, &uHCReg); AssertRC(rc); + Log4(("Host CR4 %#RHr\n", uHCReg)); + + RTGDTR HostGdtr; + PCX86DESCHC pDesc; + ASMGetGDTR(&HostGdtr); + rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_CS, &u32Val); AssertRC(rc); + Log4(("Host CS %#08x\n", u32Val)); + if (u32Val < HostGdtr.cbGdt) + { + pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); + HMR0DumpDescriptor(pDesc, u32Val, "CS: "); + } + + rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_DS, &u32Val); AssertRC(rc); + Log4(("Host DS %#08x\n", u32Val)); + if (u32Val < HostGdtr.cbGdt) + { + pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); + HMR0DumpDescriptor(pDesc, u32Val, "DS: "); + } + + rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_ES, &u32Val); AssertRC(rc); + Log4(("Host ES %#08x\n", u32Val)); + if (u32Val < HostGdtr.cbGdt) + { + pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); + HMR0DumpDescriptor(pDesc, u32Val, "ES: "); + } + + rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_FS, &u32Val); AssertRC(rc); + Log4(("Host FS %#08x\n", u32Val)); + if (u32Val < HostGdtr.cbGdt) + { + pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); + HMR0DumpDescriptor(pDesc, u32Val, "FS: "); + } + + rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_GS, &u32Val); AssertRC(rc); + Log4(("Host GS %#08x\n", u32Val)); + if (u32Val < HostGdtr.cbGdt) + { + pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); + HMR0DumpDescriptor(pDesc, u32Val, "GS: "); + } + + rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_SS, &u32Val); AssertRC(rc); + Log4(("Host SS %#08x\n", u32Val)); + if (u32Val < HostGdtr.cbGdt) + { + pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); + HMR0DumpDescriptor(pDesc, u32Val, "SS: "); + } + + rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_TR, &u32Val); AssertRC(rc); + Log4(("Host TR %#08x\n", u32Val)); + if (u32Val < HostGdtr.cbGdt) + { + pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); + HMR0DumpDescriptor(pDesc, u32Val, "TR: "); + } + + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_TR_BASE, &uHCReg); AssertRC(rc); + Log4(("Host TR Base %#RHv\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_GDTR_BASE, &uHCReg); AssertRC(rc); + Log4(("Host GDTR Base %#RHv\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_IDTR_BASE, &uHCReg); AssertRC(rc); + Log4(("Host IDTR Base %#RHv\n", uHCReg)); + rc = VMXReadVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, &u32Val); AssertRC(rc); + Log4(("Host SYSENTER CS %#08x\n", u32Val)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_SYSENTER_EIP, &uHCReg); AssertRC(rc); + Log4(("Host SYSENTER EIP %#RHv\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_SYSENTER_ESP, &uHCReg); AssertRC(rc); + Log4(("Host SYSENTER ESP %#RHv\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_RSP, &uHCReg); AssertRC(rc); + Log4(("Host RSP %#RHv\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_HOST_RIP, &uHCReg); AssertRC(rc); + Log4(("Host RIP %#RHv\n", uHCReg)); +# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER))); + Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR))); + Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR))); + Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR))); + Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK))); + Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE))); + } +# endif +#endif /* VBOX_STRICT */ + break; + } + + default: + /* Impossible */ + AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun)); + break; + } + NOREF(pVM); +} + + +#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) +#ifndef VMX_USE_CACHED_VMCS_ACCESSES +# error "VMX_USE_CACHED_VMCS_ACCESSES not defined when it should be!" +#endif +#ifdef VBOX_STRICT +static bool hmR0VmxIsValidWriteField(uint32_t idxField) +{ + switch (idxField) + { + case VMX_VMCS_GUEST_RIP: + case VMX_VMCS_GUEST_RSP: + case VMX_VMCS_GUEST_SYSENTER_EIP: + case VMX_VMCS_GUEST_SYSENTER_ESP: + case VMX_VMCS_GUEST_GDTR_BASE: + case VMX_VMCS_GUEST_IDTR_BASE: + case VMX_VMCS_GUEST_CS_BASE: + case VMX_VMCS_GUEST_DS_BASE: + case VMX_VMCS_GUEST_ES_BASE: + case VMX_VMCS_GUEST_FS_BASE: + case VMX_VMCS_GUEST_GS_BASE: + case VMX_VMCS_GUEST_SS_BASE: + case VMX_VMCS_GUEST_LDTR_BASE: + case VMX_VMCS_GUEST_TR_BASE: + case VMX_VMCS_GUEST_CR3: + return true; + } + return false; +} + +static bool hmR0VmxIsValidReadField(uint32_t idxField) +{ + switch (idxField) + { + /* Read-only fields. */ + case VMX_VMCS_RO_EXIT_QUALIFICATION: + return true; + } + /* Remaining readable fields should also be writable. */ + return hmR0VmxIsValidWriteField(idxField); +} +#endif /* VBOX_STRICT */ + + +/** + * Executes the specified handler in 64-bit mode. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest CPU context. + * @param enmOp The operation to perform. + * @param cbParam Number of parameters. + * @param paParam Array of 32-bit parameters. + */ +VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, HM64ON32OP enmOp, uint32_t cbParam, + uint32_t *paParam) +{ + int rc, rc2; + PHMGLOBALCPUINFO pCpu; + RTHCPHYS HCPhysCpuPage; + RTCCUINTREG uOldEflags; + + AssertReturn(pVM->hm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER); + Assert(enmOp > HM64ON32OP_INVALID && enmOp < HM64ON32OP_END); + Assert(pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Write.aField)); + Assert(pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Read.aField)); + +#ifdef VBOX_STRICT + for (uint32_t i = 0; i < pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries; i++) + Assert(hmR0VmxIsValidWriteField(pVCpu->hm.s.vmx.VMCSCache.Write.aField[i])); + + for (uint32_t i = 0; i <pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries; i++) + Assert(hmR0VmxIsValidReadField(pVCpu->hm.s.vmx.VMCSCache.Read.aField[i])); +#endif + + /* Disable interrupts. */ + uOldEflags = ASMIntDisableFlags(); + +#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI + RTCPUID idHostCpu = RTMpCpuId(); + CPUMR0SetLApic(pVCpu, idHostCpu); +#endif + + pCpu = HMR0GetCurrentCpu(); + HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0); + + /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */ + VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + + /* Leave VMX Root Mode. */ + VMXDisable(); + + ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE); + + CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu)); + CPUMSetHyperEIP(pVCpu, enmOp); + for (int i = (int)cbParam - 1; i >= 0; i--) + CPUMPushHyper(pVCpu, paParam[i]); + + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z); + + /* Call the switcher. */ + rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum)); + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z); + + /** @todo replace with hmR0VmxEnterRootMode() and hmR0VmxLeaveRootMode(). */ + /* Make sure the VMX instructions don't cause #UD faults. */ + ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE); + + /* Re-enter VMX Root Mode */ + rc2 = VMXEnable(HCPhysCpuPage); + if (RT_FAILURE(rc2)) + { + ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE); + ASMSetFlags(uOldEflags); + return rc2; + } + + rc2 = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + AssertRC(rc2); + Assert(!(ASMGetFlags() & X86_EFL_IF)); + ASMSetFlags(uOldEflags); + return rc; +} + + +/** + * Prepares for and executes VMLAUNCH (64 bits guests) for 32-bit hosts + * supporting 64-bit guests. + * + * @returns VBox status code. + * @param fResume Whether to VMLAUNCH or VMRESUME. + * @param pCtx Pointer to the guest-CPU context. + * @param pCache Pointer to the VMCS cache. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu) +{ + uint32_t aParam[6]; + PHMGLOBALCPUINFO pCpu = NULL; + RTHCPHYS HCPhysCpuPage = 0; + int rc = VERR_INTERNAL_ERROR_5; + + pCpu = HMR0GetCurrentCpu(); + HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0); + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + pCache->uPos = 1; + pCache->interPD = PGMGetInterPaeCR3(pVM); + pCache->pSwitcher = (uint64_t)pVM->hm.s.pfnHost32ToGuest64R0; +#endif + +#if defined(DEBUG) && defined(VMX_USE_CACHED_VMCS_ACCESSES) + pCache->TestIn.HCPhysCpuPage = 0; + pCache->TestIn.HCPhysVmcs = 0; + pCache->TestIn.pCache = 0; + pCache->TestOut.HCPhysVmcs = 0; + pCache->TestOut.pCache = 0; + pCache->TestOut.pCtx = 0; + pCache->TestOut.eflags = 0; +#endif + + aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */ + aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */ + aParam[2] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVmcs); /* Param 2: VMCS physical address - Lo. */ + aParam[3] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVmcs >> 32); /* Param 2: VMCS physical address - Hi. */ + aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache); + aParam[5] = 0; + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + pCtx->dr[4] = pVM->hm.s.vmx.pScratchPhys + 16 + 8; + *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 1; +#endif + rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, HM64ON32OP_VMXRCStartVM64, 6, &aParam[0]); + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + Assert(*(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) == 5); + Assert(pCtx->dr[4] == 10); + *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 0xff; +#endif + +#if defined(DEBUG) && defined(VMX_USE_CACHED_VMCS_ACCESSES) + AssertMsg(pCache->TestIn.HCPhysCpuPage == HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage)); + AssertMsg(pCache->TestIn.HCPhysVmcs == pVCpu->hm.s.vmx.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs, + pVCpu->hm.s.vmx.HCPhysVmcs)); + AssertMsg(pCache->TestIn.HCPhysVmcs == pCache->TestOut.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs, + pCache->TestOut.HCPhysVmcs)); + AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache, + pCache->TestOut.pCache)); + AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache), + ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache))); + AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx, + pCache->TestOut.pCtx)); + Assert(!(pCache->TestOut.eflags & X86_EFL_IF)); +#endif + return rc; +} + + +/** + * Initialize the VMCS-Read cache. The VMCS cache is used for 32-bit hosts + * running 64-bit guests (except 32-bit Darwin which runs with 64-bit paging in + * 32-bit mode) for 64-bit fields that cannot be accessed in 32-bit mode. Some + * 64-bit fields -can- be accessed (those that have a 32-bit FULL & HIGH part). + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + */ +static int hmR0VmxInitVmcsReadCache(PVM pVM, PVMCPU pVCpu) +{ +#define VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, idxField) \ +{ \ + Assert(pCache->Read.aField[idxField##_CACHE_IDX] == 0); \ + pCache->Read.aField[idxField##_CACHE_IDX] = idxField; \ + pCache->Read.aFieldVal[idxField##_CACHE_IDX] = 0; \ + ++cReadFields; \ +} + + AssertPtr(pVM); + AssertPtr(pVCpu); + PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache; + uint32_t cReadFields = 0; + + /* + * Don't remove the #if 0'd fields in this code. They're listed here for consistency + * and serve to indicate exceptions to the rules. + */ + + /* Guest-natural selector base fields. */ +#if 0 + /* These are 32-bit in practice. See Intel spec. 2.5 "Control Registers". */ + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR0); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR4); +#endif + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_ES_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CS_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SS_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_DS_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_FS_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_GS_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_LDTR_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_TR_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_GDTR_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_IDTR_BASE); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_RSP); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_RIP); +#if 0 + /* Unused natural width guest-state fields. */ + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR3); /* Handled in Nested Paging case */ +#endif + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SYSENTER_ESP); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SYSENTER_EIP); + + /* 64-bit guest-state fields; unused as we use two 32-bit VMREADs for these 64-bit fields (using "FULL" and "HIGH" fields). */ +#if 0 + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_DEBUGCTL_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PAT_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_EFER_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE0_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE1_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE2_FULL); + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE3_FULL); +#endif + + /* Natural width guest-state fields. */ + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION); +#if 0 + /* Currently unused field. */ + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_RO_EXIT_GUEST_LINEAR_ADDR); +#endif + + if (pVM->hm.s.fNestedPaging) + { + VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR3); + AssertMsg(cReadFields == VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX, ("cReadFields=%u expected %u\n", cReadFields, + VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX)); + pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX; + } + else + { + AssertMsg(cReadFields == VMX_VMCS_MAX_CACHE_IDX, ("cReadFields=%u expected %u\n", cReadFields, VMX_VMCS_MAX_CACHE_IDX)); + pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX; + } + +#undef VMXLOCAL_INIT_READ_CACHE_FIELD + return VINF_SUCCESS; +} + + +/** + * Writes a field into the VMCS. This can either directly invoke a VMWRITE or + * queue up the VMWRITE by using the VMCS write cache (on 32-bit hosts, except + * darwin, running 64-bit guests). + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param idxField The VMCS field encoding. + * @param u64Val 16, 32 or 64 bits value. + */ +VMMR0DECL(int) VMXWriteVmcs64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val) +{ + int rc; + switch (idxField) + { + /* + * These fields consists of a "FULL" and a "HIGH" part which can be written to individually. + */ + /* 64-bit Control fields. */ + case VMX_VMCS64_CTRL_IO_BITMAP_A_FULL: + case VMX_VMCS64_CTRL_IO_BITMAP_B_FULL: + case VMX_VMCS64_CTRL_MSR_BITMAP_FULL: + case VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL: + case VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL: + case VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL: + case VMX_VMCS64_CTRL_EXEC_VMCS_PTR_FULL: + case VMX_VMCS64_CTRL_TSC_OFFSET_FULL: + case VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL: + case VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL: + case VMX_VMCS64_CTRL_VMFUNC_CTRLS_FULL: + case VMX_VMCS64_CTRL_EPTP_FULL: + case VMX_VMCS64_CTRL_EPTP_LIST_FULL: + /* 64-bit Guest-state fields. */ + case VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL: + case VMX_VMCS64_GUEST_DEBUGCTL_FULL: + case VMX_VMCS64_GUEST_PAT_FULL: + case VMX_VMCS64_GUEST_EFER_FULL: + case VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL: + case VMX_VMCS64_GUEST_PDPTE0_FULL: + case VMX_VMCS64_GUEST_PDPTE1_FULL: + case VMX_VMCS64_GUEST_PDPTE2_FULL: + case VMX_VMCS64_GUEST_PDPTE3_FULL: + /* 64-bit Host-state fields. */ + case VMX_VMCS64_HOST_FIELD_PAT_FULL: + case VMX_VMCS64_HOST_FIELD_EFER_FULL: + case VMX_VMCS64_HOST_PERF_GLOBAL_CTRL_FULL: + { + rc = VMXWriteVmcs32(idxField, u64Val); + rc |= VMXWriteVmcs32(idxField + 1, (uint32_t)(u64Val >> 32)); + break; + } + + /* + * These fields do not have high and low parts. Queue up the VMWRITE by using the VMCS write-cache (for 64-bit + * values). When we switch the host to 64-bit mode for running 64-bit guests, these VMWRITEs get executed then. + */ + /* Natural-width Guest-state fields. */ + case VMX_VMCS_GUEST_CR3: + case VMX_VMCS_GUEST_ES_BASE: + case VMX_VMCS_GUEST_CS_BASE: + case VMX_VMCS_GUEST_SS_BASE: + case VMX_VMCS_GUEST_DS_BASE: + case VMX_VMCS_GUEST_FS_BASE: + case VMX_VMCS_GUEST_GS_BASE: + case VMX_VMCS_GUEST_LDTR_BASE: + case VMX_VMCS_GUEST_TR_BASE: + case VMX_VMCS_GUEST_GDTR_BASE: + case VMX_VMCS_GUEST_IDTR_BASE: + case VMX_VMCS_GUEST_RSP: + case VMX_VMCS_GUEST_RIP: + case VMX_VMCS_GUEST_SYSENTER_ESP: + case VMX_VMCS_GUEST_SYSENTER_EIP: + { + if (!(u64Val >> 32)) + { + /* If this field is 64-bit, VT-x will zero out the top bits. */ + rc = VMXWriteVmcs32(idxField, (uint32_t)u64Val); + } + else + { + /* Assert that only the 32->64 switcher case should ever come here. */ + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests); + rc = VMXWriteCachedVmcsEx(pVCpu, idxField, u64Val); + } + break; + } + + default: + { + AssertMsgFailed(("VMXWriteVmcs64Ex: Invalid field %#RX32 (pVCpu=%p u64Val=%#RX64)\n", idxField, pVCpu, u64Val)); + rc = VERR_INVALID_PARAMETER; + break; + } + } + AssertRCReturn(rc, rc); + return rc; +} + + +/** + * Queue up a VMWRITE by using the VMCS write cache. This is only used on 32-bit + * hosts (except darwin) for 64-bit guests. + * + * @param pVCpu Pointer to the VMCPU. + * @param idxField The VMCS field encoding. + * @param u64Val 16, 32 or 64 bits value. + */ +VMMR0DECL(int) VMXWriteCachedVmcsEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val) +{ + AssertPtr(pVCpu); + PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache; + + AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1, + ("entries=%u\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED); + + /* Make sure there are no duplicates. */ + for (uint32_t i = 0; i < pCache->Write.cValidEntries; i++) + { + if (pCache->Write.aField[i] == idxField) + { + pCache->Write.aFieldVal[i] = u64Val; + return VINF_SUCCESS; + } + } + + pCache->Write.aField[pCache->Write.cValidEntries] = idxField; + pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val; + pCache->Write.cValidEntries++; + return VINF_SUCCESS; +} + +/* Enable later when the assembly code uses these as callbacks. */ +#if 0 +/* + * Loads the VMCS write-cache into the CPU (by executing VMWRITEs). + * + * @param pVCpu Pointer to the VMCPU. + * @param pCache Pointer to the VMCS cache. + * + * @remarks No-long-jump zone!!! + */ +VMMR0DECL(void) VMXWriteCachedVmcsLoad(PVMCPU pVCpu, PVMCSCACHE pCache) +{ + AssertPtr(pCache); + for (uint32_t i = 0; i < pCache->Write.cValidEntries; i++) + { + int rc = VMXWriteVmcs64(pCache->Write.aField[i], pCache->Write.aFieldVal[i]); + AssertRC(rc); + } + pCache->Write.cValidEntries = 0; +} + + +/** + * Stores the VMCS read-cache from the CPU (by executing VMREADs). + * + * @param pVCpu Pointer to the VMCPU. + * @param pCache Pointer to the VMCS cache. + * + * @remarks No-long-jump zone!!! + */ +VMMR0DECL(void) VMXReadCachedVmcsStore(PVMCPU pVCpu, PVMCSCACHE pCache) +{ + AssertPtr(pCache); + for (uint32_t i = 0; i < pCache->Read.cValidEntries; i++) + { + int rc = VMXReadVmcs64(pCache->Read.aField[i], &pCache->Read.aFieldVal[i]); + AssertRC(rc); + } +} +#endif +#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */ + + +/** + * Sets up the usage of TSC-offsetting and updates the VMCS. If offsetting is + * not possible, cause VM-exits on RDTSC(P)s. Also sets up the VMX preemption + * timer. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @remarks No-long-jump zone!!! + */ +static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VERR_INTERNAL_ERROR_5; + bool fOffsettedTsc = false; + PVM pVM = pVCpu->CTX_SUFF(pVM); + if (pVM->hm.s.vmx.fUsePreemptTimer) + { + uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hm.s.vmx.u64TSCOffset); + + /* Make sure the returned values have sane upper and lower boundaries. */ + uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage); + cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second */ + cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */ + cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift; + + uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16); + rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_PREEMPT_TIMER_VALUE, cPreemptionTickCount); AssertRC(rc); + } + else + fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hm.s.vmx.u64TSCOffset); + + if (fOffsettedTsc) + { + uint64_t u64CurTSC = ASMReadTSC(); + if (u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu)) + { + /* Note: VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */ + rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, pVCpu->hm.s.vmx.u64TSCOffset); AssertRC(rc); + + pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset); + } + else + { + /* VM-exit on RDTSC(P) as we would otherwise pass decreasing TSC values to the guest. */ + pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatTscInterceptOverFlow); + } + } + else + { + /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */ + pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept); + } +} + + +/** + * Determines if an exception is a contributory exception. Contributory + * exceptions are ones which can cause double-faults. Page-fault is + * intentionally not included here as it's a conditional contributory exception. + * + * @returns true if the exception is contributory, false otherwise. + * @param uVector The exception vector. + */ +DECLINLINE(bool) hmR0VmxIsContributoryXcpt(const uint32_t uVector) +{ + switch (uVector) + { + case X86_XCPT_GP: + case X86_XCPT_SS: + case X86_XCPT_NP: + case X86_XCPT_TS: + case X86_XCPT_DE: + return true; + default: + break; + } + return false; +} + + +/** + * Sets an event as a pending event to be injected into the guest. + * + * @param pVCpu Pointer to the VMCPU. + * @param u32IntInfo The VM-entry interruption-information field. + * @param cbInstr The VM-entry instruction length in bytes (for software + * interrupts, exceptions and privileged software + * exceptions). + * @param u32ErrCode The VM-entry exception error code. + * @param GCPtrFaultAddress The fault-address (CR2) in case it's a + * page-fault. + * + * @remarks Statistics counter assumes this is a guest event being injected or + * re-injected into the guest, i.e. 'StatInjectPendingReflect' is + * always incremented. + */ +DECLINLINE(void) hmR0VmxSetPendingEvent(PVMCPU pVCpu, uint32_t u32IntInfo, uint32_t cbInstr, uint32_t u32ErrCode, + RTGCUINTPTR GCPtrFaultAddress) +{ + Assert(!pVCpu->hm.s.Event.fPending); + pVCpu->hm.s.Event.fPending = true; + pVCpu->hm.s.Event.u64IntInfo = u32IntInfo; + pVCpu->hm.s.Event.u32ErrCode = u32ErrCode; + pVCpu->hm.s.Event.cbInstr = cbInstr; + pVCpu->hm.s.Event.GCPtrFaultAddress = GCPtrFaultAddress; + + STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingReflect); +} + + +/** + * Sets a double-fault (#DF) exception as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +DECLINLINE(void) hmR0VmxSetPendingXcptDF(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + uint32_t u32IntInfo = X86_XCPT_DF | VMX_EXIT_INTERRUPTION_INFO_VALID; + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID; + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */); +} + + +/** + * Handle a condition that occurred while delivering an event through the guest + * IDT. + * + * @returns VBox status code (informational error codes included). + * @retval VINF_SUCCESS if we should continue handling the VM-exit. + * @retval VINF_HM_DOUBLE_FAULT if a #DF condition was detected and we ought to + * continue execution of the guest which will delivery the #DF. + * @retval VINF_EM_RESET if we detected a triple-fault condition. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxCheckExitDueToEventDelivery(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + int rc = hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient); + AssertRCReturn(rc, rc); + if (VMX_IDT_VECTORING_INFO_VALID(pVmxTransient->uIdtVectoringInfo)) + { + rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + + uint32_t uIntType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo); + uint32_t uExitVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVmxTransient->uExitIntInfo); + uint32_t uIdtVector = VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo); + + typedef enum + { + VMXREFLECTXCPT_XCPT, /* Reflect the exception to the guest or for further evaluation by VMM. */ + VMXREFLECTXCPT_DF, /* Reflect the exception as a double-fault to the guest. */ + VMXREFLECTXCPT_TF, /* Indicate a triple faulted state to the VMM. */ + VMXREFLECTXCPT_NONE /* Nothing to reflect. */ + } VMXREFLECTXCPT; + + /* See Intel spec. 30.7.1.1 "Reflecting Exceptions to Guest Software". */ + VMXREFLECTXCPT enmReflect = VMXREFLECTXCPT_NONE; + if (VMX_EXIT_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uExitIntInfo)) + { + if (uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT) + { + enmReflect = VMXREFLECTXCPT_XCPT; +#ifdef VBOX_STRICT + if ( hmR0VmxIsContributoryXcpt(uIdtVector) + && uExitVector == X86_XCPT_PF) + { + Log4(("IDT: vcpu[%RU32] Contributory #PF uCR2=%#RX64\n", pVCpu->idCpu, pMixedCtx->cr2)); + } +#endif + if ( uExitVector == X86_XCPT_PF + && uIdtVector == X86_XCPT_PF) + { + pVmxTransient->fVectoringPF = true; + Log4(("IDT: vcpu[%RU32] Vectoring #PF uCR2=%#RX64\n", pVCpu->idCpu, pMixedCtx->cr2)); + } + else if ( (pVCpu->hm.s.vmx.u32XcptBitmap & HMVMX_CONTRIBUTORY_XCPT_MASK) + && hmR0VmxIsContributoryXcpt(uExitVector) + && ( hmR0VmxIsContributoryXcpt(uIdtVector) + || uIdtVector == X86_XCPT_PF)) + { + enmReflect = VMXREFLECTXCPT_DF; + } + else if (uIdtVector == X86_XCPT_DF) + enmReflect = VMXREFLECTXCPT_TF; + } + else if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT + || uIntType == VMX_IDT_VECTORING_INFO_TYPE_EXT_INT + || uIntType == VMX_IDT_VECTORING_INFO_TYPE_NMI) + { + /* + * Ignore software interrupts (INT n), software exceptions (#BP, #OF) and privileged software exception + * (whatever they are) as they reoccur when restarting the instruction. + */ + enmReflect = VMXREFLECTXCPT_XCPT; + } + } + else + { + /* + * If event delivery caused an EPT violation/misconfig or APIC access VM-exit, then the VM-exit + * interruption-information will not be valid and we end up here. In such cases, it is sufficient to reflect the + * original exception to the guest after handling the VM-exit. + */ + if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT + || uIntType == VMX_IDT_VECTORING_INFO_TYPE_EXT_INT + || uIntType == VMX_IDT_VECTORING_INFO_TYPE_NMI) + { + enmReflect = VMXREFLECTXCPT_XCPT; + } + } + + switch (enmReflect) + { + case VMXREFLECTXCPT_XCPT: + { + Assert( uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_INT + && uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT + && uIntType != VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT); + + uint32_t u32ErrCode = 0; + if (VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVmxTransient->uIdtVectoringInfo)) + { + rc = hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient); + AssertRCReturn(rc, rc); + u32ErrCode = pVmxTransient->uIdtVectoringErrorCode; + } + + /* If uExitVector is #PF, CR2 value will be updated from the VMCS if it's a guest #PF. See hmR0VmxExitXcptPF(). */ + hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_IDT_INFO(pVmxTransient->uIdtVectoringInfo), + 0 /* cbInstr */, u32ErrCode, pMixedCtx->cr2); + rc = VINF_SUCCESS; + Log4(("IDT: vcpu[%RU32] Pending vectoring event %#RX64 Err=%#RX32\n", pVCpu->idCpu, + pVCpu->hm.s.Event.u64IntInfo, pVCpu->hm.s.Event.u32ErrCode)); + + break; + } + + case VMXREFLECTXCPT_DF: + { + hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx); + rc = VINF_HM_DOUBLE_FAULT; + Log4(("IDT: vcpu[%RU32] Pending vectoring #DF %#RX64 uIdtVector=%#x uExitVector=%#x\n", pVCpu->idCpu, + pVCpu->hm.s.Event.u64IntInfo, uIdtVector, uExitVector)); + + break; + } + + case VMXREFLECTXCPT_TF: + { + rc = VINF_EM_RESET; + Log4(("IDT: vcpu[%RU32] Pending vectoring triple-fault uIdt=%#x uExit=%#x\n", pVCpu->idCpu, uIdtVector, + uExitVector)); + break; + } + + default: + Assert(rc == VINF_SUCCESS); + break; + } + } + Assert(rc == VINF_SUCCESS || rc == VINF_HM_DOUBLE_FAULT || rc == VINF_EM_RESET); + return rc; +} + + +/** + * Saves the guest's CR0 register from the VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestCR0(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + NOREF(pMixedCtx); + + /* + * While in the middle of saving guest-CR0, we could get preempted and re-invoked from the preemption hook, + * see hmR0VmxLeave(). Safer to just make this code non-preemptible. + */ + VMMRZCallRing3Disable(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR0)) + { + uint32_t uVal = 0; + int rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &uVal); + AssertRCReturn(rc, rc); + + uint32_t uShadow = 0; + rc = VMXReadVmcs32(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uShadow); + AssertRCReturn(rc, rc); + + uVal = (uShadow & pVCpu->hm.s.vmx.u32CR0Mask) | (uVal & ~pVCpu->hm.s.vmx.u32CR0Mask); + CPUMSetGuestCR0(pVCpu, uVal); + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR0); + } + + HM_RESTORE_PREEMPT_IF_NEEDED(); + VMMRZCallRing3Enable(pVCpu); + return VINF_SUCCESS; +} + + +/** + * Saves the guest's CR4 register from the VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestCR4(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR4)) + { + uint32_t uVal = 0; + uint32_t uShadow = 0; + rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR4, &uVal); + AssertRCReturn(rc, rc); + rc = VMXReadVmcs32(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uShadow); + AssertRCReturn(rc, rc); + + uVal = (uShadow & pVCpu->hm.s.vmx.u32CR4Mask) | (uVal & ~pVCpu->hm.s.vmx.u32CR4Mask); + CPUMSetGuestCR4(pVCpu, uVal); + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR4); + } + return rc; +} + + +/** + * Saves the guest's RIP register from the VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RIP)) + { + uint64_t u64Val = 0; + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RIP, &u64Val); + AssertRCReturn(rc, rc); + + pMixedCtx->rip = u64Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RIP); + } + return rc; +} + + +/** + * Saves the guest's RSP register from the VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestRsp(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RSP)) + { + uint64_t u64Val = 0; + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RSP, &u64Val); + AssertRCReturn(rc, rc); + + pMixedCtx->rsp = u64Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RSP); + } + return rc; +} + + +/** + * Saves the guest's RFLAGS from the VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS)) + { + uint32_t uVal = 0; + int rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &uVal); + AssertRCReturn(rc, rc); + + pMixedCtx->eflags.u32 = uVal; + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) /* Undo our real-on-v86-mode changes to eflags if necessary. */ + { + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS); + Log4(("Saving real-mode EFLAGS VT-x view=%#RX32\n", pMixedCtx->eflags.u32)); + + pMixedCtx->eflags.Bits.u1VM = 0; + pMixedCtx->eflags.Bits.u2IOPL = pVCpu->hm.s.vmx.RealMode.Eflags.Bits.u2IOPL; + } + + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS); + } + return VINF_SUCCESS; +} + + +/** + * Wrapper for saving the guest's RIP, RSP and RFLAGS from the VMCS into the + * guest-CPU context. + */ +DECLINLINE(int) hmR0VmxSaveGuestRipRspRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRsp(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + return rc; +} + + +/** + * Saves the guest's interruptibility-state ("interrupt shadow" as AMD calls it) + * from the guest-state area in the VMCS. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static void hmR0VmxSaveGuestIntrState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + uint32_t uIntrState = 0; + int rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &uIntrState); + AssertRC(rc); + + if (!uIntrState) + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); + else + { + Assert( uIntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI + || uIntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS); + rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx); + AssertRC(rc); + rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); /* for hmR0VmxGetGuestIntrState(). */ + AssertRC(rc); + + EMSetInhibitInterruptsPC(pVCpu, pMixedCtx->rip); + Assert(VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)); + } +} + + +/** + * Saves the guest's activity state. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestActivityState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + /* Nothing to do for now until we make use of different guest-CPU activity state. Just update the flag. */ + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_ACTIVITY_STATE); + return VINF_SUCCESS; +} + + +/** + * Saves the guest SYSENTER MSRs (SYSENTER_CS, SYSENTER_EIP, SYSENTER_ESP) from + * the current VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestSysenterMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR)) + { + uint32_t u32Val = 0; + rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val); AssertRCReturn(rc, rc); + pMixedCtx->SysEnter.cs = u32Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR); + } + + uint64_t u64Val = 0; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR)) + { + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_SYSENTER_EIP, &u64Val); AssertRCReturn(rc, rc); + pMixedCtx->SysEnter.eip = u64Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR); + } + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR)) + { + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_SYSENTER_ESP, &u64Val); AssertRCReturn(rc, rc); + pMixedCtx->SysEnter.esp = u64Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR); + } + return rc; +} + + +/** + * Saves the guest FS_BASE MSRs from the current VMCS into the guest-CPU + * context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestFSBaseMsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_FS_BASE_MSR)) + { + uint64_t u64Val = 0; + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_FS_BASE, &u64Val); AssertRCReturn(rc, rc); + pMixedCtx->fs.u64Base = u64Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_FS_BASE_MSR); + } + return rc; +} + + +/** + * Saves the guest GS_BASE MSRs from the current VMCS into the guest-CPU + * context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestGSBaseMsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_GS_BASE_MSR)) + { + uint64_t u64Val = 0; + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_GS_BASE, &u64Val); AssertRCReturn(rc, rc); + pMixedCtx->gs.u64Base = u64Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_GS_BASE_MSR); + } + return rc; +} + + +/** + * Saves the auto load/store'd guest MSRs from the current VMCS into the + * guest-CPU context. Currently these are LSTAR, STAR, SFMASK, KERNEL-GS BASE + * and TSC_AUX. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestAutoLoadStoreMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + if (HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS)) + return VINF_SUCCESS; + +#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + for (uint32_t i = 0; i < pVCpu->hm.s.vmx.cGuestMsrs; i++) + { + PVMXAUTOMSR pMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr; + pMsr += i; + switch (pMsr->u32Msr) + { + case MSR_K8_LSTAR: pMixedCtx->msrLSTAR = pMsr->u64Value; break; + case MSR_K6_STAR: pMixedCtx->msrSTAR = pMsr->u64Value; break; + case MSR_K8_SF_MASK: pMixedCtx->msrSFMASK = pMsr->u64Value; break; + case MSR_K8_TSC_AUX: CPUMR0SetGuestTscAux(pVCpu, pMsr->u64Value); break; + case MSR_K8_KERNEL_GS_BASE: pMixedCtx->msrKERNELGSBASE = pMsr->u64Value; break; + case MSR_K6_EFER: /* EFER can't be changed without causing a VM-exit. */ break; + default: + { + AssertFailed(); + return VERR_HM_UNEXPECTED_LD_ST_MSR; + } + } + } +#endif + + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS); + return VINF_SUCCESS; +} + + +/** + * Saves the guest control registers from the current VMCS into the guest-CPU + * context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestControlRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + /* Guest CR0. Guest FPU. */ + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + /* Guest CR4. */ + rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + /* Guest CR2 - updated always during the world-switch or in #PF. */ + /* Guest CR3. Only changes with Nested Paging. This must be done -after- saving CR0 and CR4 from the guest! */ + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR3)) + { + Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR0)); + Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR4)); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + if ( pVM->hm.s.vmx.fUnrestrictedGuest + || ( pVM->hm.s.fNestedPaging + && CPUMIsGuestPagingEnabledEx(pMixedCtx))) + { + uint64_t u64Val = 0; + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_CR3, &u64Val); + if (pMixedCtx->cr3 != u64Val) + { + CPUMSetGuestCR3(pVCpu, u64Val); + if (VMMRZCallRing3IsEnabled(pVCpu)) + { + PGMUpdateCR3(pVCpu, u64Val); + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3)); + } + else + { + /* Set the force flag to inform PGM about it when necessary. It is cleared by PGMUpdateCR3().*/ + VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3); + } + } + + /* If the guest is in PAE mode, sync back the PDPE's into the guest state. */ + if (CPUMIsGuestInPAEModeEx(pMixedCtx)) /* Reads CR0, CR4 and EFER MSR (EFER is always up-to-date). */ + { + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &pVCpu->hm.s.aPdpes[0].u); AssertRCReturn(rc, rc); + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &pVCpu->hm.s.aPdpes[1].u); AssertRCReturn(rc, rc); + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &pVCpu->hm.s.aPdpes[2].u); AssertRCReturn(rc, rc); + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &pVCpu->hm.s.aPdpes[3].u); AssertRCReturn(rc, rc); + + if (VMMRZCallRing3IsEnabled(pVCpu)) + { + PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]); + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)); + } + else + { + /* Set the force flag to inform PGM about it when necessary. It is cleared by PGMGstUpdatePaePdpes(). */ + VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES); + } + } + } + + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR3); + } + + /* + * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> hmR0VmxCallRing3Callback() + * -> VMMRZCallRing3Disable() -> hmR0VmxSaveGuestState() -> Set VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp + * -> continue with VM-exit handling -> hmR0VmxSaveGuestControlRegs() and here we are. + * + * The reason for such complicated handling is because VM-exits that call into PGM expect CR3 to be up-to-date and thus + * if any CR3-saves -before- the VM-exit (longjmp) postponed the CR3 update via the force-flag, any VM-exit handler that + * calls into PGM when it re-saves CR3 will end up here and we call PGMUpdateCR3(). This is why the code below should + * -NOT- check if HMVMX_UPDATED_GUEST_CR3 is already set or not! + * + * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here. + */ + if (VMMRZCallRing3IsEnabled(pVCpu)) + { + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3)) + PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu)); + + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)) + PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]); + + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3)); + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)); + } + + return rc; +} + + +/** + * Reads a guest segment register from the current VMCS into the guest-CPU + * context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param idxSel Index of the selector in the VMCS. + * @param idxLimit Index of the segment limit in the VMCS. + * @param idxBase Index of the segment base in the VMCS. + * @param idxAccess Index of the access rights of the segment in the VMCS. + * @param pSelReg Pointer to the segment selector. + * + * @remarks No-long-jump zone!!! + * @remarks Never call this function directly!!! Use the VMXLOCAL_READ_SEG() + * macro as that takes care of whether to read from the VMCS cache or + * not. + */ +DECLINLINE(int) hmR0VmxReadSegmentReg(PVMCPU pVCpu, uint32_t idxSel, uint32_t idxLimit, uint32_t idxBase, uint32_t idxAccess, + PCPUMSELREG pSelReg) +{ + uint32_t u32Val = 0; + int rc = VMXReadVmcs32(idxSel, &u32Val); + AssertRCReturn(rc, rc); + pSelReg->Sel = (uint16_t)u32Val; + pSelReg->ValidSel = (uint16_t)u32Val; + pSelReg->fFlags = CPUMSELREG_FLAGS_VALID; + + rc = VMXReadVmcs32(idxLimit, &u32Val); + AssertRCReturn(rc, rc); + pSelReg->u32Limit = u32Val; + + uint64_t u64Val = 0; + rc = VMXReadVmcsGstNByIdxVal(idxBase, &u64Val); + AssertRCReturn(rc, rc); + pSelReg->u64Base = u64Val; + + rc = VMXReadVmcs32(idxAccess, &u32Val); + AssertRCReturn(rc, rc); + pSelReg->Attr.u = u32Val; + + /* + * If VT-x marks the segment as unusable, most other bits remain undefined: + * - For CS the L, D and G bits have meaning. + * - For SS the DPL has meaning (it -is- the CPL for Intel and VBox). + * - For the remaining data segments no bits are defined. + * + * The present bit and the unusable bit has been observed to be set at the + * same time (the selector was supposed to invalid as we started executing + * a V8086 interrupt in ring-0). + * + * What should be important for the rest of the VBox code that the P bit is + * cleared. Some of the other VBox code recognizes the unusable bit, but + * AMD-V certainly don't, and REM doesn't really either. So, to be on the + * safe side here, we'll strip off P and other bits we don't care about. If + * any code breaks because Attr.u != 0 when Sel < 4, it should be fixed. + * + * See Intel spec. 27.3.2 "Saving Segment Registers and Descriptor-Table Registers". + */ + if (pSelReg->Attr.u & X86DESCATTR_UNUSABLE) + { + Assert(idxSel != VMX_VMCS16_GUEST_FIELD_TR); /* TR is the only selector that can never be unusable. */ + + /* Masking off: X86DESCATTR_P, X86DESCATTR_LIMIT_HIGH, and X86DESCATTR_AVL. The latter two are really irrelevant. */ + pSelReg->Attr.u &= X86DESCATTR_UNUSABLE | X86DESCATTR_L | X86DESCATTR_D | X86DESCATTR_G + | X86DESCATTR_DPL | X86DESCATTR_TYPE | X86DESCATTR_DT; + + Log4(("hmR0VmxReadSegmentReg: Unusable idxSel=%#x attr=%#x -> %#x\n", idxSel, u32Val, pSelReg->Attr.u)); +#ifdef DEBUG_bird + AssertMsg((u32Val & ~X86DESCATTR_P) == pSelReg->Attr.u, + ("%#x: %#x != %#x (sel=%#x base=%#llx limit=%#x)\n", + idxSel, u32Val, pSelReg->Attr.u, pSelReg->Sel, pSelReg->u64Base, pSelReg->u32Limit)); +#endif + } + return VINF_SUCCESS; +} + + +#ifdef VMX_USE_CACHED_VMCS_ACCESSES +# define VMXLOCAL_READ_SEG(Sel, CtxSel) \ + hmR0VmxReadSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_##Sel, VMX_VMCS32_GUEST_##Sel##_LIMIT, \ + VMX_VMCS_GUEST_##Sel##_BASE_CACHE_IDX, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, &pMixedCtx->CtxSel) +#else +# define VMXLOCAL_READ_SEG(Sel, CtxSel) \ + hmR0VmxReadSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_##Sel, VMX_VMCS32_GUEST_##Sel##_LIMIT, \ + VMX_VMCS_GUEST_##Sel##_BASE, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, &pMixedCtx->CtxSel) +#endif + + +/** + * Saves the guest segment registers from the current VMCS into the guest-CPU + * context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestSegmentRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + /* Guest segment registers. */ + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SEGMENT_REGS)) + { + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); AssertRCReturn(rc, rc); + rc = VMXLOCAL_READ_SEG(CS, cs); AssertRCReturn(rc, rc); + rc = VMXLOCAL_READ_SEG(SS, ss); AssertRCReturn(rc, rc); + rc = VMXLOCAL_READ_SEG(DS, ds); AssertRCReturn(rc, rc); + rc = VMXLOCAL_READ_SEG(ES, es); AssertRCReturn(rc, rc); + rc = VMXLOCAL_READ_SEG(FS, fs); AssertRCReturn(rc, rc); + rc = VMXLOCAL_READ_SEG(GS, gs); AssertRCReturn(rc, rc); + + /* Restore segment attributes for real-on-v86 mode hack. */ + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + pMixedCtx->cs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrCS.u; + pMixedCtx->ss.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrSS.u; + pMixedCtx->ds.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrDS.u; + pMixedCtx->es.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrES.u; + pMixedCtx->fs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrFS.u; + pMixedCtx->gs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrGS.u; + } + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SEGMENT_REGS); + } + + return VINF_SUCCESS; +} + + +/** + * Saves the guest descriptor table registers and task register from the current + * VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestTableRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + int rc = VINF_SUCCESS; + + /* Guest LDTR. */ + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_LDTR)) + { + rc = VMXLOCAL_READ_SEG(LDTR, ldtr); + AssertRCReturn(rc, rc); + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_LDTR); + } + + /* Guest GDTR. */ + uint64_t u64Val = 0; + uint32_t u32Val = 0; + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_GDTR)) + { + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_GDTR_BASE, &u64Val); AssertRCReturn(rc, rc); + rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); AssertRCReturn(rc, rc); + pMixedCtx->gdtr.pGdt = u64Val; + pMixedCtx->gdtr.cbGdt = u32Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_GDTR); + } + + /* Guest IDTR. */ + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_IDTR)) + { + rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_IDTR_BASE, &u64Val); AssertRCReturn(rc, rc); + rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); AssertRCReturn(rc, rc); + pMixedCtx->idtr.pIdt = u64Val; + pMixedCtx->idtr.cbIdt = u32Val; + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_IDTR); + } + + /* Guest TR. */ + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_TR)) + { + rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + /* For real-mode emulation using virtual-8086 mode we have the fake TSS (pRealModeTSS) in TR, don't save the fake one. */ + if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + rc = VMXLOCAL_READ_SEG(TR, tr); + AssertRCReturn(rc, rc); + } + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_TR); + } + return rc; +} + +#undef VMXLOCAL_READ_SEG + + +/** + * Saves the guest debug-register DR7 from the current VMCS into the guest-CPU + * context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestDR7(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_DEBUG)) + { + if (!pVCpu->hm.s.fUsingHyperDR7) + { + /* Upper 32-bits are always zero. See Intel spec. 2.7.3 "Loading and Storing Debug Registers". */ + uint32_t u32Val; + int rc = VMXReadVmcs32(VMX_VMCS_GUEST_DR7, &u32Val); AssertRCReturn(rc, rc); + pMixedCtx->dr[7] = u32Val; + } + + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_DEBUG); + } + return VINF_SUCCESS; +} + + +/** + * Saves the guest APIC state from the current VMCS into the guest-CPU context. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveGuestApicState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + /* Updating TPR is already done in hmR0VmxPostRunGuest(). Just update the flag. */ + HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_APIC_STATE); + return VINF_SUCCESS; +} + + +/** + * Saves the entire guest state from the currently active VMCS into the + * guest-CPU context. This essentially VMREADs all guest-data. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +static int hmR0VmxSaveGuestState(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + Assert(pVCpu); + Assert(pMixedCtx); + + if (HMVMXCPU_GST_VALUE(pVCpu) == HMVMX_UPDATED_GUEST_ALL) + return VINF_SUCCESS; + + /* Though we can longjmp to ring-3 due to log-flushes here and get recalled + again on the ring-3 callback path, there is no real need to. */ + if (VMMRZCallRing3IsEnabled(pVCpu)) + VMMR0LogFlushDisable(pVCpu); + else + Assert(VMMR0IsLogFlushDisabled(pVCpu)); + Log4Func(("vcpu[%RU32]\n", pVCpu->idCpu)); + + int rc = hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestRipRspRflags failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestControlRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestSegmentRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestTableRegs(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestTableRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestDR7 failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestSysenterMsrs(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestSysenterMsrs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestFSBaseMsr(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestFSBaseMsr failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestGSBaseMsr(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestGSBaseMsr failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestAutoLoadStoreMsrs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestActivityState(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestActivityState failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + rc = hmR0VmxSaveGuestApicState(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestApicState failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc); + + AssertMsg(HMVMXCPU_GST_VALUE(pVCpu) == HMVMX_UPDATED_GUEST_ALL, + ("Missed guest state bits while saving state; residue %RX32\n", HMVMXCPU_GST_VALUE(pVCpu))); + + if (VMMRZCallRing3IsEnabled(pVCpu)) + VMMR0LogFlushEnable(pVCpu); + + return rc; +} + + +/** + * Check per-VM and per-VCPU force flag actions that require us to go back to + * ring-3 for one reason or another. + * + * @returns VBox status code (information status code included). + * @retval VINF_SUCCESS if we don't have any actions that require going back to + * ring-3. + * @retval VINF_PGM_SYNC_CR3 if we have pending PGM CR3 sync. + * @retval VINF_EM_PENDING_REQUEST if we have pending requests (like hardware + * interrupts) + * @retval VINF_PGM_POOL_FLUSH_PENDING if PGM is doing a pool flush and requires + * all EMTs to be in ring-3. + * @retval VINF_EM_RAW_TO_R3 if there is pending DMA requests. + * @retval VINF_EM_NO_MEMORY PGM is out of memory, we need to return + * to the EM loop. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +static int hmR0VmxCheckForceFlags(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + Assert(VMMRZCallRing3IsEnabled(pVCpu)); + + if ( VM_FF_IS_PENDING(pVM, !pVCpu->hm.s.fSingleInstruction + ? VM_FF_HP_R0_PRE_HM_MASK : VM_FF_HP_R0_PRE_HM_STEP_MASK) + || VMCPU_FF_IS_PENDING(pVCpu, !pVCpu->hm.s.fSingleInstruction + ? VMCPU_FF_HP_R0_PRE_HM_MASK : VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) ) + { + /* We need the control registers now, make sure the guest-CPU context is updated. */ + int rc3 = hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + AssertRCReturn(rc3, rc3); + + /* Pending HM CR3 sync. */ + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3)) + { + int rc2 = PGMUpdateCR3(pVCpu, pMixedCtx->cr3); + AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3, + ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS); + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3)); + } + + /* Pending HM PAE PDPEs. */ + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)) + { + PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]); + Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)); + } + + /* Pending PGM C3 sync. */ + if (VMCPU_FF_IS_PENDING(pVCpu,VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)) + { + int rc2 = PGMSyncCR3(pVCpu, pMixedCtx->cr0, pMixedCtx->cr3, pMixedCtx->cr4, + VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3)); + if (rc2 != VINF_SUCCESS) + { + AssertRC(rc2); + Log4(("hmR0VmxCheckForceFlags: PGMSyncCR3 forcing us back to ring-3. rc2=%d\n", rc2)); + return rc2; + } + } + + /* Pending HM-to-R3 operations (critsects, timers, EMT rendezvous etc.) */ + if ( VM_FF_IS_PENDING(pVM, VM_FF_HM_TO_R3_MASK) + || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK)) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF); + int rc2 = RT_UNLIKELY(VM_FF_IS_PENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3; + Log4(("hmR0VmxCheckForceFlags: HM_TO_R3 forcing us back to ring-3. rc=%d\n", rc2)); + return rc2; + } + + /* Pending VM request packets, such as hardware interrupts. */ + if ( VM_FF_IS_PENDING(pVM, VM_FF_REQUEST) + || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_REQUEST)) + { + Log4(("hmR0VmxCheckForceFlags: Pending VM request forcing us back to ring-3\n")); + return VINF_EM_PENDING_REQUEST; + } + + /* Pending PGM pool flushes. */ + if (VM_FF_IS_PENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING)) + { + Log4(("hmR0VmxCheckForceFlags: PGM pool flush pending forcing us back to ring-3\n")); + return VINF_PGM_POOL_FLUSH_PENDING; + } + + /* Pending DMA requests. */ + if (VM_FF_IS_PENDING(pVM, VM_FF_PDM_DMA)) + { + Log4(("hmR0VmxCheckForceFlags: Pending DMA request forcing us back to ring-3\n")); + return VINF_EM_RAW_TO_R3; + } + } + + return VINF_SUCCESS; +} + + +/** + * Converts any TRPM trap into a pending HM event. This is typically used when + * entering from ring-3 (not longjmp returns). + * + * @param pVCpu Pointer to the VMCPU. + */ +static void hmR0VmxTrpmTrapToPendingEvent(PVMCPU pVCpu) +{ + Assert(TRPMHasTrap(pVCpu)); + Assert(!pVCpu->hm.s.Event.fPending); + + uint8_t uVector; + TRPMEVENT enmTrpmEvent; + RTGCUINT uErrCode; + RTGCUINTPTR GCPtrFaultAddress; + uint8_t cbInstr; + + int rc = TRPMQueryTrapAll(pVCpu, &uVector, &enmTrpmEvent, &uErrCode, &GCPtrFaultAddress, &cbInstr); + AssertRC(rc); + + /* Refer Intel spec. 24.8.3 "VM-entry Controls for Event Injection" for the format of u32IntInfo. */ + uint32_t u32IntInfo = uVector | VMX_EXIT_INTERRUPTION_INFO_VALID; + if (enmTrpmEvent == TRPM_TRAP) + { + switch (uVector) + { + case X86_XCPT_BP: + case X86_XCPT_OF: + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + break; + + case X86_XCPT_PF: + case X86_XCPT_DF: + case X86_XCPT_TS: + case X86_XCPT_NP: + case X86_XCPT_SS: + case X86_XCPT_GP: + case X86_XCPT_AC: + u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID; + /* no break! */ + default: + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + break; + } + } + else if (enmTrpmEvent == TRPM_HARDWARE_INT) + { + if (uVector == X86_XCPT_NMI) + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + else + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + } + else if (enmTrpmEvent == TRPM_SOFTWARE_INT) + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + else + AssertMsgFailed(("Invalid TRPM event type %d\n", enmTrpmEvent)); + + rc = TRPMResetTrap(pVCpu); + AssertRC(rc); + Log4(("TRPM->HM event: u32IntInfo=%#RX32 enmTrpmEvent=%d cbInstr=%u uErrCode=%#RX32 GCPtrFaultAddress=%#RGv\n", + u32IntInfo, enmTrpmEvent, cbInstr, uErrCode, GCPtrFaultAddress)); + + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, uErrCode, GCPtrFaultAddress); + STAM_COUNTER_DEC(&pVCpu->hm.s.StatInjectPendingReflect); +} + + +/** + * Converts any pending HM event into a TRPM trap. Typically used when leaving + * VT-x to execute any instruction. + * + * @param pvCpu Pointer to the VMCPU. + */ +static void hmR0VmxPendingEventToTrpmTrap(PVMCPU pVCpu) +{ + Assert(pVCpu->hm.s.Event.fPending); + + uint32_t uVectorType = VMX_IDT_VECTORING_INFO_TYPE(pVCpu->hm.s.Event.u64IntInfo); + uint32_t uVector = VMX_IDT_VECTORING_INFO_VECTOR(pVCpu->hm.s.Event.u64IntInfo); + bool fErrorCodeValid = !!VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.u64IntInfo); + uint32_t uErrorCode = pVCpu->hm.s.Event.u32ErrCode; + + /* If a trap was already pending, we did something wrong! */ + Assert(TRPMQueryTrap(pVCpu, NULL /* pu8TrapNo */, NULL /* pEnmType */) == VERR_TRPM_NO_ACTIVE_TRAP); + + TRPMEVENT enmTrapType; + switch (uVectorType) + { + case VMX_IDT_VECTORING_INFO_TYPE_EXT_INT: + case VMX_IDT_VECTORING_INFO_TYPE_NMI: + enmTrapType = TRPM_HARDWARE_INT; + break; + + case VMX_IDT_VECTORING_INFO_TYPE_SW_INT: + enmTrapType = TRPM_SOFTWARE_INT; + break; + + case VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT: + case VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT: /* #BP and #OF */ + case VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT: + enmTrapType = TRPM_TRAP; + break; + + default: + AssertMsgFailed(("Invalid trap type %#x\n", uVectorType)); + enmTrapType = TRPM_32BIT_HACK; + break; + } + + Log4(("HM event->TRPM: uVector=%#x enmTrapType=%d\n", uVector, enmTrapType)); + + int rc = TRPMAssertTrap(pVCpu, uVector, enmTrapType); + AssertRC(rc); + + if (fErrorCodeValid) + TRPMSetErrorCode(pVCpu, uErrorCode); + + if ( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT + && uVector == X86_XCPT_PF) + { + TRPMSetFaultAddress(pVCpu, pVCpu->hm.s.Event.GCPtrFaultAddress); + } + else if ( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT + || uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT + || uVectorType == VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT) + { + AssertMsg( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT + || (uVector == X86_XCPT_BP || uVector == X86_XCPT_OF), + ("Invalid vector: uVector=%#x uVectorType=%#x\n", uVector, uVectorType)); + TRPMSetInstrLength(pVCpu, pVCpu->hm.s.Event.cbInstr); + } + pVCpu->hm.s.Event.fPending = false; +} + + +/** + * Does the necessary state syncing before returning to ring-3 for any reason + * (longjmp, preemption, voluntary exits to ring-3) from VT-x. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may + * be out-of-sync. Make sure to update the required + * fields before using them. + * @param fSaveGuestState Whether to save the guest state or not. + * + * @remarks If you modify code here, make sure to check whether + * hmR0VmxCallRing3Callback() needs to be updated too. + * @remarks No-long-jmp zone!!! + */ +static int hmR0VmxLeave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, bool fSaveGuestState) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + + RTCPUID idCpu = RTMpCpuId(); + Log4Func(("HostCpuId=%u\n", idCpu)); + + /* Save the guest state if necessary. */ + if ( fSaveGuestState + && HMVMXCPU_GST_VALUE(pVCpu) != HMVMX_UPDATED_GUEST_ALL) + { + int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + Assert(HMVMXCPU_GST_VALUE(pVCpu) == HMVMX_UPDATED_GUEST_ALL); + } + + /* Restore host FPU state if necessary and resync on next R0 reentry .*/ + if (CPUMIsGuestFPUStateActive(pVCpu)) + { + /* We shouldn't reload CR0 without saving it first. */ + if (!fSaveGuestState) + { + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + } + CPUMR0SaveGuestFPU(pVM, pVCpu, pMixedCtx); + Assert(!CPUMIsGuestFPUStateActive(pVCpu)); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + } + + /* Restore host debug registers if necessary and resync on next R0 reentry. */ +#ifdef VBOX_STRICT + if (CPUMIsHyperDebugStateActive(pVCpu)) + Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT); +#endif + if (CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */)) + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG); + Assert(!CPUMIsGuestDebugStateActive(pVCpu) && !CPUMIsGuestDebugStateActivePending(pVCpu)); + Assert(!CPUMIsHyperDebugStateActive(pVCpu) && !CPUMIsHyperDebugStateActivePending(pVCpu)); + +#if HC_ARCH_BITS == 64 + /* Restore host-state bits that VT-x only restores partially. */ + if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED) + && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED)) + { + Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hm.s.vmx.fRestoreHostFlags, idCpu)); + VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost); + pVCpu->hm.s.vmx.fRestoreHostFlags = 0; + } +#endif + + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatLoadGuestState); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit1); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit2); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi); + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3); + + VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC); + + /** @todo This kinda defeats the purpose of having preemption hooks. + * The problem is, deregistering the hooks should be moved to a place that + * lasts until the EMT is about to be destroyed not everytime while leaving HM + * context. + */ + if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_ACTIVE) + { + int rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + AssertRCReturn(rc, rc); + + pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR; + Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu)); + } + Assert(!(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_LAUNCHED)); + NOREF(idCpu); + + return VINF_SUCCESS; +} + + +/** + * Leaves the VT-x session. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jmp zone!!! + */ +DECLINLINE(int) hmR0VmxLeaveSession(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + HM_DISABLE_PREEMPT_IF_NEEDED(); + HMVMX_ASSERT_CPU_SAFE(); + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before + and done this from the VMXR0ThreadCtxCallback(). */ + if (!pVCpu->hm.s.fLeaveDone) + { + int rc2 = hmR0VmxLeave(pVM, pVCpu, pMixedCtx, true /* fSaveGuestState */); + AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT_IF_NEEDED(), rc2); + pVCpu->hm.s.fLeaveDone = true; + } + + /* Deregister hook now that we've left HM context before re-enabling preemption. */ + /** @todo This is bad. Deregistering here means we need to VMCLEAR always + * (longjmp/exit-to-r3) in VT-x which is not efficient. */ + if (VMMR0ThreadCtxHooksAreRegistered(pVCpu)) + VMMR0ThreadCtxHooksDeregister(pVCpu); + + /* Leave HM context. This takes care of local init (term). */ + int rc = HMR0LeaveCpu(pVCpu); + + HM_RESTORE_PREEMPT_IF_NEEDED(); + + return rc; +} + + +/** + * Does the necessary state syncing before doing a longjmp to ring-3. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jmp zone!!! + */ +DECLINLINE(int) hmR0VmxLongJmpToRing3(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + return hmR0VmxLeaveSession(pVM, pVCpu, pMixedCtx); +} + + +/** + * Take necessary actions before going back to ring-3. + * + * An action requires us to go back to ring-3. This function does the necessary + * steps before we can safely return to ring-3. This is not the same as longjmps + * to ring-3, this is voluntary and prepares the guest so it may continue + * executing outside HM (recompiler/IEM). + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @param rcExit The reason for exiting to ring-3. Can be + * VINF_VMM_UNKNOWN_RING3_CALL. + */ +static int hmR0VmxExitToRing3(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, int rcExit) +{ + Assert(pVM); + Assert(pVCpu); + Assert(pMixedCtx); + HMVMX_ASSERT_PREEMPT_SAFE(); + + if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR)) + { + VMXGetActivatedVmcs(&pVCpu->hm.s.vmx.LastError.u64VMCSPhys); + pVCpu->hm.s.vmx.LastError.u32VMCSRevision = *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs; + pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu; + /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */ + } + + /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */ + VMMRZCallRing3Disable(pVCpu); + Log4(("hmR0VmxExitToRing3: pVCpu=%p idCpu=%RU32 rcExit=%d\n", pVCpu, pVCpu->idCpu, rcExit)); + + /* We need to do this only while truly exiting the "inner loop" back to ring-3 and -not- for any longjmp to ring3. */ + if (pVCpu->hm.s.Event.fPending) + { + hmR0VmxPendingEventToTrpmTrap(pVCpu); + Assert(!pVCpu->hm.s.Event.fPending); + } + + /* Save guest state and restore host state bits. */ + int rc = hmR0VmxLeaveSession(pVM, pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3); + + /* Sync recompiler state. */ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3); + CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR + | CPUM_CHANGED_LDTR + | CPUM_CHANGED_GDTR + | CPUM_CHANGED_IDTR + | CPUM_CHANGED_TR + | CPUM_CHANGED_HIDDEN_SEL_REGS); + Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR0)); + if ( pVM->hm.s.fNestedPaging + && CPUMIsGuestPagingEnabledEx(pMixedCtx)) + { + CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH); + } + + /* + * Clear the X86_EFL_TF if necessary. + */ + if (pVCpu->hm.s.fClearTrapFlag) + { + Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS)); + pMixedCtx->eflags.Bits.u1TF = 0; + pVCpu->hm.s.fClearTrapFlag = false; + } + /** @todo there seems to be issues with the resume flag when the monitor trap + * flag is pending without being used. Seen early in bios init when + * accessing APIC page in prot mode. */ + + /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */ + if (rcExit != VINF_EM_RAW_INTERRUPT) + HMCPU_CF_SET(pVCpu, HM_CHANGED_ALL_GUEST); + + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3); + + /* We do -not- want any longjmp notifications after this! We must return to ring-3 ASAP. */ + VMMRZCallRing3RemoveNotification(pVCpu); + VMMRZCallRing3Enable(pVCpu); + + return rc; +} + + +/** + * VMMRZCallRing3() callback wrapper which saves the guest state before we + * longjump to ring-3 and possibly get preempted. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param enmOperation The operation causing the ring-3 longjump. + * @param pvUser Opaque pointer to the guest-CPU context. The data + * may be out-of-sync. Make sure to update the required + * fields before using them. + * @remarks If you modify code here, make sure to check whether + * hmR0VmxLeave() needs to be updated too. + */ +DECLCALLBACK(int) hmR0VmxCallRing3Callback(PVMCPU pVCpu, VMMCALLRING3 enmOperation, void *pvUser) +{ + if (enmOperation == VMMCALLRING3_VM_R0_ASSERTION) + { + VMMRZCallRing3RemoveNotification(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + /* If anything here asserts or fails, good luck. */ + if (CPUMIsGuestFPUStateActive(pVCpu)) + CPUMR0SaveGuestFPU(pVCpu->CTX_SUFF(pVM), pVCpu, (PCPUMCTX)pvUser); + + CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */); + +#if HC_ARCH_BITS == 64 + /* Restore host-state bits that VT-x only restores partially. */ + if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED) + && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED)) + { + VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost); + pVCpu->hm.s.vmx.fRestoreHostFlags = 0; + } +#endif + VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC); + if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_ACTIVE) + { + VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR; + } + + if (VMMR0ThreadCtxHooksAreRegistered(pVCpu)) + VMMR0ThreadCtxHooksDeregister(pVCpu); + + HMR0LeaveCpu(pVCpu); + HM_RESTORE_PREEMPT_IF_NEEDED(); + return VINF_SUCCESS; + } + + Assert(pVCpu); + Assert(pvUser); + Assert(VMMRZCallRing3IsEnabled(pVCpu)); + HMVMX_ASSERT_PREEMPT_SAFE(); + + VMMRZCallRing3Disable(pVCpu); + Assert(VMMR0IsLogFlushDisabled(pVCpu)); + + Log4(("hmR0VmxCallRing3Callback->hmR0VmxLongJmpToRing3 pVCpu=%p idCpu=%RU32\n enmOperation=%d", pVCpu, pVCpu->idCpu, + enmOperation)); + + int rc = hmR0VmxLongJmpToRing3(pVCpu->CTX_SUFF(pVM), pVCpu, (PCPUMCTX)pvUser); + AssertRCReturn(rc, rc); + + VMMRZCallRing3Enable(pVCpu); + return VINF_SUCCESS; +} + + +/** + * Sets the interrupt-window exiting control in the VMCS which instructs VT-x to + * cause a VM-exit as soon as the guest is in a state to receive interrupts. + * + * @param pVCpu Pointer to the VMCPU. + */ +DECLINLINE(void) hmR0VmxSetIntWindowExitVmcs(PVMCPU pVCpu) +{ + if (RT_LIKELY(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT)) + { + if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT)) + { + pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT; + int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRC(rc); + } + } /* else we will deliver interrupts whenever the guest exits next and is in a state to receive events. */ +} + + +/** + * Evaluates the event to be delivered to the guest and sets it as the pending + * event. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +static void hmR0VmxEvaluatePendingEvent(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + Assert(!pVCpu->hm.s.Event.fPending); + + /* Get the current interruptibility-state of the guest and then figure out what can be injected. */ + uint32_t uIntrState = hmR0VmxGetGuestIntrState(pVCpu, pMixedCtx); + bool fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS); + bool fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI); + + Assert(!fBlockSti || HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS)); + Assert( !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI) /* We don't support block-by-NMI and SMI yet.*/ + && !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI)); + Assert(!fBlockSti || pMixedCtx->eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */ + Assert(!TRPMHasTrap(pVCpu)); + + /** @todo SMI. SMIs take priority over NMIs. */ + if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INTERRUPT_NMI)) /* NMI. NMIs take priority over regular interrupts . */ + { + /* On some CPUs block-by-STI also blocks NMIs. See Intel spec. 26.3.1.5 "Checks On Guest Non-Register State". */ + if ( !fBlockMovSS + && !fBlockSti) + { + /* On some CPUs block-by-STI also blocks NMIs. See Intel spec. 26.3.1.5 "Checks On Guest Non-Register State". */ + Log4(("Pending NMI vcpu[%RU32]\n", pVCpu->idCpu)); + uint32_t u32IntInfo = X86_XCPT_NMI | VMX_EXIT_INTERRUPTION_INFO_VALID; + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddres */); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI); + } + else + hmR0VmxSetIntWindowExitVmcs(pVCpu); + } + else if ( VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)) + && !pVCpu->hm.s.fSingleInstruction) + { + /* + * Check if the guest can receive external interrupts (PIC/APIC). Once we do PDMGetInterrupt() we -must- deliver + * the interrupt ASAP. We must not execute any guest code until we inject the interrupt which is why it is + * evaluated here and not set as pending, solely based on the force-flags. + */ + int rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + AssertRC(rc); + const bool fBlockInt = !(pMixedCtx->eflags.u32 & X86_EFL_IF); + if ( !fBlockInt + && !fBlockSti + && !fBlockMovSS) + { + uint8_t u8Interrupt; + rc = PDMGetInterrupt(pVCpu, &u8Interrupt); + if (RT_SUCCESS(rc)) + { + Log4(("Pending interrupt vcpu[%RU32] u8Interrupt=%#x \n", pVCpu->idCpu, u8Interrupt)); + uint32_t u32IntInfo = u8Interrupt | VMX_EXIT_INTERRUPTION_INFO_VALID; + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrfaultAddress */); + } + else + { + /** @todo Does this actually happen? If not turn it into an assertion. */ + Assert(!VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))); + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq); + } + } + else + hmR0VmxSetIntWindowExitVmcs(pVCpu); + } +} + + +/** + * Sets a pending-debug exception to be delivered to the guest if the guest is + * single-stepping. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +DECLINLINE(void) hmR0VmxSetPendingDebugXcpt(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS)); + if (pMixedCtx->eflags.Bits.u1TF) /* We don't have any IA32_DEBUGCTL MSR for guests. Treat as all bits 0. */ + { + int rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, VMX_VMCS_GUEST_DEBUG_EXCEPTIONS_BS); + AssertRC(rc); + } +} + + +/** + * Injects any pending events into the guest if the guest is in a state to + * receive them. + * + * @returns VBox status code (informational status codes included). + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +static int hmR0VmxInjectPendingEvent(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + HMVMX_ASSERT_PREEMPT_SAFE(); + Assert(VMMRZCallRing3IsEnabled(pVCpu)); + + /* Get the current interruptibility-state of the guest and then figure out what can be injected. */ + uint32_t uIntrState = hmR0VmxGetGuestIntrState(pVCpu, pMixedCtx); + bool fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS); + bool fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI); + + Assert(!fBlockSti || HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS)); + Assert( !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI) /* We don't support block-by-NMI and SMI yet.*/ + && !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI)); + Assert(!fBlockSti || pMixedCtx->eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */ + Assert(!TRPMHasTrap(pVCpu)); + + int rc = VINF_SUCCESS; + if (pVCpu->hm.s.Event.fPending) + { +#if defined(VBOX_STRICT) || defined(VBOX_WITH_STATISTICS) + uint32_t uIntType = VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.u64IntInfo); + if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT) + { + rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + const bool fBlockInt = !(pMixedCtx->eflags.u32 & X86_EFL_IF); + Assert(!fBlockInt); + Assert(!fBlockSti); + Assert(!fBlockMovSS); + } + else if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI) + { + Assert(!fBlockSti); + Assert(!fBlockMovSS); + } +#endif + Log4(("Injecting pending event vcpu[%RU32] u64IntInfo=%#RX64\n", pVCpu->idCpu, pVCpu->hm.s.Event.u64IntInfo)); + rc = hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, pVCpu->hm.s.Event.u64IntInfo, pVCpu->hm.s.Event.cbInstr, + pVCpu->hm.s.Event.u32ErrCode, pVCpu->hm.s.Event.GCPtrFaultAddress, &uIntrState); + AssertRCReturn(rc, rc); + + /* Update the interruptibility-state as it could have been changed by + hmR0VmxInjectEventVmcs() (e.g. real-on-v86 guest injecting software interrupts) */ + fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS); + fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI); + +#ifdef VBOX_WITH_STATISTICS + if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT) + STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterrupt); + else + STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectXcpt); +#endif + } + + /* Deliver pending debug exception if the guest is single-stepping. Evaluate and set the BS bit. */ + if ( fBlockSti + || fBlockMovSS) + { + if ( !pVCpu->hm.s.fSingleInstruction + && !DBGFIsStepping(pVCpu)) + { + /* + * The pending-debug exceptions field is cleared on all VM-exits except VMX_EXIT_TPR_BELOW_THRESHOLD, + * VMX_EXIT_MTF, VMX_EXIT_APIC_WRITE and VMX_EXIT_VIRTUALIZED_EOI. + * See Intel spec. 27.3.4 "Saving Non-Register State". + */ + int rc2 = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + AssertRCReturn(rc2, rc2); + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + } + else if (pMixedCtx->eflags.Bits.u1TF) + { + /* + * We are single-stepping in the hypervisor debugger using EFLAGS.TF. Clear interrupt inhibition as setting the + * BS bit would mean delivering a #DB to the guest upon VM-entry when it shouldn't be. + */ + Assert(!(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG)); + uIntrState = 0; + } + } + + /* + * There's no need to clear the VM entry-interruption information field here if we're not injecting anything. + * VT-x clears the valid bit on every VM-exit. See Intel spec. 24.8.3 "VM-Entry Controls for Event Injection". + */ + int rc2 = hmR0VmxLoadGuestIntrState(pVCpu, uIntrState); + AssertRC(rc2); + + Assert(rc == VINF_SUCCESS || rc == VINF_EM_RESET); + return rc; +} + + +/** + * Sets an invalid-opcode (#UD) exception as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +DECLINLINE(void) hmR0VmxSetPendingXcptUD(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + uint32_t u32IntInfo = X86_XCPT_UD | VMX_EXIT_INTERRUPTION_INFO_VALID; + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */); +} + + +/** + * Injects a double-fault (#DF) exception into the VM. + * + * @returns VBox status code (informational status code included). + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +DECLINLINE(int) hmR0VmxInjectXcptDF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint32_t *puIntrState) +{ + uint32_t u32IntInfo = X86_XCPT_DF | VMX_EXIT_INTERRUPTION_INFO_VALID; + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID; + return hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */, + puIntrState); +} + + +/** + * Sets a debug (#DB) exception as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +DECLINLINE(void) hmR0VmxSetPendingXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + uint32_t u32IntInfo = X86_XCPT_DB | VMX_EXIT_INTERRUPTION_INFO_VALID; + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */); +} + + +/** + * Sets an overflow (#OF) exception as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @param cbInstr The value of RIP that is to be pushed on the guest + * stack. + */ +DECLINLINE(void) hmR0VmxSetPendingXcptOF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint32_t cbInstr) +{ + uint32_t u32IntInfo = X86_XCPT_OF | VMX_EXIT_INTERRUPTION_INFO_VALID; + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */); +} + + +/** + * Injects a general-protection (#GP) fault into the VM. + * + * @returns VBox status code (informational status code included). + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @param u32ErrorCode The error code associated with the #GP. + */ +DECLINLINE(int) hmR0VmxInjectXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, bool fErrorCodeValid, uint32_t u32ErrorCode, + uint32_t *puIntrState) +{ + uint32_t u32IntInfo = X86_XCPT_GP | VMX_EXIT_INTERRUPTION_INFO_VALID; + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + if (fErrorCodeValid) + u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID; + return hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, u32IntInfo, 0 /* cbInstr */, u32ErrorCode, 0 /* GCPtrFaultAddress */, + puIntrState); +} + + +/** + * Sets a software interrupt (INTn) as pending-for-injection into the VM. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @param uVector The software interrupt vector number. + * @param cbInstr The value of RIP that is to be pushed on the guest + * stack. + */ +DECLINLINE(void) hmR0VmxSetPendingIntN(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint16_t uVector, uint32_t cbInstr) +{ + uint32_t u32IntInfo = uVector | VMX_EXIT_INTERRUPTION_INFO_VALID; + if ( uVector == X86_XCPT_BP + || uVector == X86_XCPT_OF) + { + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + } + else + u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); + hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */); +} + + +/** + * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's + * stack. + * + * @returns VBox status code (information status code included). + * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault. + * @param pVM Pointer to the VM. + * @param pMixedCtx Pointer to the guest-CPU context. + * @param uValue The value to push to the guest stack. + */ +DECLINLINE(int) hmR0VmxRealModeGuestStackPush(PVM pVM, PCPUMCTX pMixedCtx, uint16_t uValue) +{ + /* + * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in + * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers". + * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound". + */ + if (pMixedCtx->sp == 1) + return VINF_EM_RESET; + pMixedCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */ + int rc = PGMPhysSimpleWriteGCPhys(pVM, pMixedCtx->ss.u64Base + pMixedCtx->sp, &uValue, sizeof(uint16_t)); + AssertRCReturn(rc, rc); + return rc; +} + + +/** + * Injects an event into the guest upon VM-entry by updating the relevant fields + * in the VM-entry area in the VMCS. + * + * @returns VBox status code (informational error codes included). + * @retval VINF_SUCCESS if the event is successfully injected into the VMCS. + * @retval VINF_EM_RESET if event injection resulted in a triple-fault. + * + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may + * be out-of-sync. Make sure to update the required + * fields before using them. + * @param u64IntInfo The VM-entry interruption-information field. + * @param cbInstr The VM-entry instruction length in bytes (for + * software interrupts, exceptions and privileged + * software exceptions). + * @param u32ErrCode The VM-entry exception error code. + * @param GCPtrFaultAddress The page-fault address for #PF exceptions. + * @param puIntrState Pointer to the current guest interruptibility-state. + * This interruptibility-state will be updated if + * necessary. This cannot not be NULL. + * + * @remarks Requires CR0! + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxInjectEventVmcs(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint64_t u64IntInfo, uint32_t cbInstr, + uint32_t u32ErrCode, RTGCUINTREG GCPtrFaultAddress, uint32_t *puIntrState) +{ + /* Intel spec. 24.8.3 "VM-Entry Controls for Event Injection" specifies the interruption-information field to be 32-bits. */ + AssertMsg(u64IntInfo >> 32 == 0, ("%#RX64\n", u64IntInfo)); + Assert(puIntrState); + uint32_t u32IntInfo = (uint32_t)u64IntInfo; + + const uint32_t uVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(u32IntInfo); + const uint32_t uIntType = VMX_EXIT_INTERRUPTION_INFO_TYPE(u32IntInfo); + +#ifdef VBOX_STRICT + /* Validate the error-code-valid bit for hardware exceptions. */ + if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT) + { + switch (uVector) + { + case X86_XCPT_PF: + case X86_XCPT_DF: + case X86_XCPT_TS: + case X86_XCPT_NP: + case X86_XCPT_SS: + case X86_XCPT_GP: + case X86_XCPT_AC: + AssertMsg(VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(u32IntInfo), + ("Error-code-valid bit not set for exception that has an error code uVector=%#x\n", uVector)); + /* fallthru */ + default: + break; + } + } +#endif + + /* Cannot inject an NMI when block-by-MOV SS is in effect. */ + Assert( uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI + || !(*puIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS)); + + STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[uVector & MASK_INJECT_IRQ_STAT]); + + /* We require CR0 to check if the guest is in real-mode. */ + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + /* + * Hardware interrupts & exceptions cannot be delivered through the software interrupt redirection bitmap to the real + * mode task in virtual-8086 mode. We must jump to the interrupt handler in the (real-mode) guest. + * See Intel spec. 20.3 "Interrupt and Exception handling in Virtual-8086 Mode" for interrupt & exception classes. + * See Intel spec. 20.1.4 "Interrupt and Exception Handling" for real-mode interrupt handling. + */ + if (CPUMIsGuestInRealModeEx(pMixedCtx)) + { + PVM pVM = pVCpu->CTX_SUFF(pVM); + if (!pVM->hm.s.vmx.fUnrestrictedGuest) + { + Assert(PDMVmmDevHeapIsEnabled(pVM)); + Assert(pVM->hm.s.vmx.pRealModeTSS); + + /* We require RIP, RSP, RFLAGS, CS, IDTR. Save the required ones from the VMCS. */ + rc = hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestTableRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RIP)); + + /* Check if the interrupt handler is present in the IVT (real-mode IDT). IDT limit is (4N - 1). */ + const size_t cbIdtEntry = sizeof(X86IDTR16); + if (uVector * cbIdtEntry + (cbIdtEntry - 1) > pMixedCtx->idtr.cbIdt) + { + /* If we are trying to inject a #DF with no valid IDT entry, return a triple-fault. */ + if (uVector == X86_XCPT_DF) + return VINF_EM_RESET; + else if (uVector == X86_XCPT_GP) + { + /* If we're injecting a #GP with no valid IDT entry, inject a double-fault. */ + return hmR0VmxInjectXcptDF(pVCpu, pMixedCtx, puIntrState); + } + + /* If we're injecting an interrupt/exception with no valid IDT entry, inject a general-protection fault. */ + /* No error codes for exceptions in real-mode. See Intel spec. 20.1.4 "Interrupt and Exception Handling" */ + return hmR0VmxInjectXcptGP(pVCpu, pMixedCtx, false /* fErrCodeValid */, 0 /* u32ErrCode */, puIntrState); + } + + /* Software exceptions (#BP and #OF exceptions thrown as a result of INT3 or INTO) */ + uint16_t uGuestIp = pMixedCtx->ip; + if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT) + { + Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF); + /* #BP and #OF are both benign traps, we need to resume the next instruction. */ + uGuestIp = pMixedCtx->ip + (uint16_t)cbInstr; + } + else if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT) + uGuestIp = pMixedCtx->ip + (uint16_t)cbInstr; + + /* Get the code segment selector and offset from the IDT entry for the interrupt handler. */ + X86IDTR16 IdtEntry; + RTGCPHYS GCPhysIdtEntry = (RTGCPHYS)pMixedCtx->idtr.pIdt + uVector * cbIdtEntry; + rc = PGMPhysSimpleReadGCPhys(pVM, &IdtEntry, GCPhysIdtEntry, cbIdtEntry); + AssertRCReturn(rc, rc); + + /* Construct the stack frame for the interrupt/exception handler. */ + rc = hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, pMixedCtx->eflags.u32); + rc |= hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, pMixedCtx->cs.Sel); + rc |= hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, uGuestIp); + AssertRCReturn(rc, rc); + + /* Clear the required eflag bits and jump to the interrupt/exception handler. */ + if (rc == VINF_SUCCESS) + { + pMixedCtx->eflags.u32 &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC); + pMixedCtx->rip = IdtEntry.offSel; + pMixedCtx->cs.Sel = IdtEntry.uSel; + pMixedCtx->cs.u64Base = IdtEntry.uSel << cbIdtEntry; + if ( uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT + && uVector == X86_XCPT_PF) + { + pMixedCtx->cr2 = GCPtrFaultAddress; + } + + /* If any other guest-state bits are changed here, make sure to update + hmR0VmxPreRunGuestCommitted() when thread-context hooks are used. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS + | HM_CHANGED_GUEST_RIP + | HM_CHANGED_GUEST_RFLAGS + | HM_CHANGED_GUEST_RSP); + + /* We're clearing interrupts, which means no block-by-STI interrupt-inhibition. */ + if (*puIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI) + { + Assert( uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI + && uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT); + Log4(("Clearing inhibition due to STI.\n")); + *puIntrState &= ~VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI; + } + Log4(("Injecting real-mode: u32IntInfo=%#x u32ErrCode=%#x instrlen=%#x\n", u32IntInfo, u32ErrCode, cbInstr)); + + /* The event has been truly dispatched. Mark it as no longer pending so we don't attempt to 'undo' + it, if we are returning to ring-3 before executing guest code. */ + pVCpu->hm.s.Event.fPending = false; + } + Assert(rc == VINF_SUCCESS || rc == VINF_EM_RESET); + return rc; + } + else + { + /* + * For unrestricted execution enabled CPUs running real-mode guests, we must not set the deliver-error-code bit. + * See Intel spec. 26.2.1.3 "VM-Entry Control Fields". + */ + u32IntInfo &= ~VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID; + } + } + + /* Validate. */ + Assert(VMX_EXIT_INTERRUPTION_INFO_IS_VALID(u32IntInfo)); /* Bit 31 (Valid bit) must be set by caller. */ + Assert(!VMX_EXIT_INTERRUPTION_INFO_NMI_UNBLOCK(u32IntInfo)); /* Bit 12 MBZ. */ + Assert(!(u32IntInfo & 0x7ffff000)); /* Bits 30:12 MBZ. */ + + /* Inject. */ + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, u32IntInfo); + if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(u32IntInfo)) + rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, u32ErrCode); + rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, cbInstr); + + if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(u32IntInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT + && uVector == X86_XCPT_PF) + { + pMixedCtx->cr2 = GCPtrFaultAddress; + } + + Log4(("Injecting vcpu[%RU32] u32IntInfo=%#x u32ErrCode=%#x cbInstr=%#x pMixedCtx->uCR2=%#RX64\n", pVCpu->idCpu, + u32IntInfo, u32ErrCode, cbInstr, pMixedCtx->cr2)); + + AssertRCReturn(rc, rc); + return rc; +} + + +/** + * Clears the interrupt-window exiting control in the VMCS and if necessary + * clears the current event in the VMCS as well. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * + * @remarks Use this function only to clear events that have not yet been + * delivered to the guest but are injected in the VMCS! + * @remarks No-long-jump zone!!! + */ +static void hmR0VmxClearEventVmcs(PVMCPU pVCpu) +{ + int rc; + Log4Func(("vcpu[%d]\n", pVCpu->idCpu)); + + /* Clear interrupt-window exiting control. */ + if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT) + { + pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRC(rc); + } + + if (!pVCpu->hm.s.Event.fPending) + return; + +#ifdef VBOX_STRICT + uint32_t u32EntryInfo; + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32EntryInfo); + AssertRC(rc); + Assert(VMX_ENTRY_INTERRUPTION_INFO_IS_VALID(u32EntryInfo)); +#endif + + /* Clear the entry-interruption field (including the valid bit). */ + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); + AssertRC(rc); + + /* Clear the pending debug exception field. */ + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, 0); + AssertRC(rc); +} + + +/** + * Enters the VT-x session. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCpu Pointer to the CPU info struct. + */ +VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + Assert(pVM->hm.s.vmx.fSupported); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + NOREF(pCpu); + + LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu)); + Assert(HMCPU_CF_IS_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE)); + +#ifdef VBOX_STRICT + /* Make sure we're in VMX root mode. */ + RTCCUINTREG u32HostCR4 = ASMGetCR4(); + if (!(u32HostCR4 & X86_CR4_VMXE)) + { + LogRel(("VMXR0Enter: X86_CR4_VMXE bit in CR4 is not set!\n")); + return VERR_VMX_X86_CR4_VMXE_CLEARED; + } +#endif + + /* + * Load the VCPU's VMCS as the current (and active) one. + */ + Assert(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_CLEAR); + int rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + if (RT_FAILURE(rc)) + return rc; + + pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_ACTIVE; + pVCpu->hm.s.fLeaveDone = false; + Log4Func(("Activated Vmcs. HostCpuId=%u\n", RTMpCpuId())); + + return VINF_SUCCESS; +} + + +/** + * The thread-context callback (only on platforms which support it). + * + * @param enmEvent The thread-context event. + * @param pVCpu Pointer to the VMCPU. + * @param fGlobalInit Whether global VT-x/AMD-V init. was used. + * @thread EMT(pVCpu) + */ +VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit) +{ + switch (enmEvent) + { + case RTTHREADCTXEVENT_PREEMPTING: + { + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(VMMR0ThreadCtxHooksAreRegistered(pVCpu)); + VMCPU_ASSERT_EMT(pVCpu); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + PCPUMCTX pMixedCtx = CPUMQueryGuestCtxPtr(pVCpu); + + /* No longjmps (logger flushes, locks) in this fragile context. */ + VMMRZCallRing3Disable(pVCpu); + Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId())); + + /* + * Restore host-state (FPU, debug etc.) + */ + if (!pVCpu->hm.s.fLeaveDone) + { + /* Do -not- save guest-state here as we might already be in the middle of saving it (esp. bad if we are + holding the PGM lock while saving the guest state (see hmR0VmxSaveGuestControlRegs()). */ + hmR0VmxLeave(pVM, pVCpu, pMixedCtx, false /* fSaveGuestState */); + pVCpu->hm.s.fLeaveDone = true; + } + + /* Leave HM context, takes care of local init (term). */ + int rc = HMR0LeaveCpu(pVCpu); + AssertRC(rc); NOREF(rc); + + /* Restore longjmp state. */ + VMMRZCallRing3Enable(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatPreemptPreempting); + break; + } + + case RTTHREADCTXEVENT_RESUMED: + { + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(VMMR0ThreadCtxHooksAreRegistered(pVCpu)); + VMCPU_ASSERT_EMT(pVCpu); + + /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */ + VMMRZCallRing3Disable(pVCpu); + Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId())); + + /* Initialize the bare minimum state required for HM. This takes care of + initializing VT-x if necessary (onlined CPUs, local init etc.) */ + int rc = HMR0EnterCpu(pVCpu); + AssertRC(rc); + Assert(HMCPU_CF_IS_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE)); + + /* Load the active VMCS as the current one. */ + if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_CLEAR) + { + rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); + AssertRC(rc); NOREF(rc); + pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_ACTIVE; + Log4Func(("Resumed: Activated Vmcs. HostCpuId=%u\n", RTMpCpuId())); + } + pVCpu->hm.s.fLeaveDone = false; + + /* Restore longjmp state. */ + VMMRZCallRing3Enable(pVCpu); + break; + } + + default: + break; + } +} + + +/** + * Saves the host state in the VMCS host-state. + * Sets up the VM-exit MSR-load area. + * + * The CPU state will be loaded from these fields on every successful VM-exit. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxSaveHostState(PVM pVM, PVMCPU pVCpu) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + if (!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_CONTEXT)) + return VINF_SUCCESS; + + int rc = hmR0VmxSaveHostControlRegs(pVM, pVCpu); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostControlRegisters failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + rc = hmR0VmxSaveHostSegmentRegs(pVM, pVCpu); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostSegmentRegisters failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + rc = hmR0VmxSaveHostMsrs(pVM, pVCpu); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostMsrs failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_HOST_CONTEXT); + return rc; +} + + +/** + * Saves the host state in the VMCS host-state. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * + * @remarks No-long-jump zone!!! + */ +VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + + LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu)); + + /* Save the host state here while entering HM context. When thread-context hooks are used, we might get preempted + and have to resave the host state but most of the time we won't be, so do it here before we disable interrupts. */ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + return hmR0VmxSaveHostState(pVM, pVCpu); +} + + +/** + * Loads the guest state into the VMCS guest-state area. The CPU state will be + * loaded from these fields on every successful VM-entry. + * + * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. + * Sets up the VM-entry controls. + * Sets up the appropriate VMX non-root function to execute guest code based on + * the guest CPU mode. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * + * @remarks No-long-jump zone!!! + */ +static int hmR0VmxLoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + AssertPtr(pVM); + AssertPtr(pVCpu); + AssertPtr(pMixedCtx); + HMVMX_ASSERT_PREEMPT_SAFE(); + +#ifdef LOG_ENABLED + /** @todo r=ramshankar: I'm not able to use VMMRZCallRing3Disable() here, + * probably not initialized yet? Anyway this will do for now. + * + * Update: Should be possible once VMXR0LoadGuestState() is removed as an + * interface and disable ring-3 calls when thread-context hooks are not + * available. */ + bool fCallerDisabledLogFlush = VMMR0IsLogFlushDisabled(pVCpu); + VMMR0LogFlushDisable(pVCpu); +#endif + + LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu)); + + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestState, x); + + /* Determine real-on-v86 mode. */ + pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = false; + if ( !pVM->hm.s.vmx.fUnrestrictedGuest + && CPUMIsGuestInRealModeEx(pMixedCtx)) + { + pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = true; + } + + /* + * Load the guest-state into the VMCS. + * Any ordering dependency among the sub-functions below must be explicitly stated using comments. + * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it. + */ + int rc = hmR0VmxSetupVMRunHandler(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSetupVMRunHandler! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + /* This needs to be done after hmR0VmxSetupVMRunHandler() as changing pfnStartVM may require VM-entry control updates. */ + rc = hmR0VmxLoadGuestEntryCtls(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestEntryCtls! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + /* This needs to be done after hmR0VmxSetupVMRunHandler() as changing pfnStartVM may require VM-exit control updates. */ + rc = hmR0VmxLoadGuestExitCtls(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxSetupExitCtls failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + rc = hmR0VmxLoadGuestActivityState(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestActivityState! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + rc = hmR0VmxLoadGuestCR3AndCR4(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestCR3AndCR4: rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + /* Assumes pMixedCtx->cr0 is up-to-date (strict builds require CR0 for segment register validation checks). */ + rc = hmR0VmxLoadGuestSegmentRegs(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestSegmentRegs: rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + rc = hmR0VmxLoadGuestMsrs(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestMsrs! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + rc = hmR0VmxLoadGuestApicState(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestApicState! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + /* + * Loading Rflags here is fine, even though Rflags.TF might depend on guest debug state (which is not loaded here). + * It is re-evaluated and updated if necessary in hmR0VmxLoadSharedState(). + */ + rc = hmR0VmxLoadGuestRipRspRflags(pVCpu, pMixedCtx); + AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestRipRspRflags! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc); + + /* Clear any unused and reserved bits. */ + HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR2); + +#ifdef LOG_ENABLED + /* Only reenable log-flushing if the caller has it enabled. */ + if (!fCallerDisabledLogFlush) + VMMR0LogFlushEnable(pVCpu); +#endif + + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestState, x); + return rc; +} + + +/** + * Loads the state shared between the host and guest into the VMCS. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks No-long-jump zone!!! + */ +static void hmR0VmxLoadSharedState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0)) + { + int rc = hmR0VmxLoadSharedCR0(pVCpu, pCtx); + AssertRC(rc); + } + + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_DEBUG)) + { + int rc = hmR0VmxLoadSharedDebugState(pVCpu, pCtx); + AssertRC(rc); + + /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RFLAGS)) + { + rc = hmR0VmxLoadGuestRflags(pVCpu, pCtx); + AssertRC(rc); + } + } + + AssertMsg(!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_GUEST_SHARED_STATE), + ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu))); +} + + +/** + * Worker for loading the guest-state bits in the inner VT-x execution loop. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + */ +DECLINLINE(void) hmR0VmxLoadGuestStateOptimal(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx) +{ + HMVMX_ASSERT_PREEMPT_SAFE(); + + Log5(("LoadFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu))); +#ifdef HMVMX_SYNC_FULL_GUEST_STATE + HMCPU_CF_SET(pVCpu, HM_CHANGED_ALL_GUEST); +#endif + + if (HMCPU_CF_IS_SET_ONLY(pVCpu, HM_CHANGED_GUEST_RIP)) + { + int rc = hmR0VmxLoadGuestRip(pVCpu, pMixedCtx); + AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadMinimal); + } + else if (HMCPU_CF_VALUE(pVCpu)) + { + int rc = hmR0VmxLoadGuestState(pVM, pVCpu, pMixedCtx); + AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadFull); + } + + /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */ + AssertMsg( !HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_ALL_GUEST) + || HMCPU_CF_IS_PENDING_ONLY(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE), + ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu))); + +#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE + uint32_t uInvalidReason = hmR0VmxCheckGuestState(pVM, pVCpu, pMixedCtx); + if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND) + Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason)); +#endif +} + + +/** + * Does the preparations before executing guest code in VT-x. + * + * This may cause longjmps to ring-3 and may even result in rescheduling to the + * recompiler. We must be cautious what we do here regarding committing + * guest-state information into the VMCS assuming we assuredly execute the + * guest in VT-x mode. If we fall back to the recompiler after updating the VMCS + * and clearing the common-state (TRPM/forceflags), we must undo those changes + * so that the recompiler can (and should) use them when it resumes guest + * execution. Otherwise such operations must be done when we can no longer + * exit to ring-3. + * + * @returns Strict VBox status code. + * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts + * have been disabled. + * @retval VINF_EM_RESET if a triple-fault occurs while injecting a + * double-fault into the guest. + * @retval VINF_* scheduling changes, we have to go back to ring-3. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks Called with preemption disabled. In the VINF_SUCCESS return case + * interrupts will be disabled. + */ +static int hmR0VmxPreRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + Assert(VMMRZCallRing3IsEnabled(pVCpu)); + +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 + PGMRZDynMapFlushAutoSet(pVCpu); +#endif + + /* Check force flag actions that might require us to go back to ring-3. */ + int rc = hmR0VmxCheckForceFlags(pVM, pVCpu, pMixedCtx); + if (rc != VINF_SUCCESS) + return rc; + +#ifndef IEM_VERIFICATION_MODE_FULL + /* Setup the Virtualized APIC accesses. pMixedCtx->msrApicBase is always up-to-date. It's not part of the VMCS. */ + if ( pVCpu->hm.s.vmx.u64MsrApicBase != pMixedCtx->msrApicBase + && (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)) + { + Assert(pVM->hm.s.vmx.HCPhysApicAccess); + RTGCPHYS GCPhysApicBase; + GCPhysApicBase = pMixedCtx->msrApicBase; + GCPhysApicBase &= PAGE_BASE_GC_MASK; + + /* Unalias any existing mapping. */ + rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase); + AssertRCReturn(rc, rc); + + /* Map the HC APIC-access page into the GC space, this also updates the shadow page tables if necessary. */ + Log4(("Mapped HC APIC-access page into GC: GCPhysApicBase=%#RGv\n", GCPhysApicBase)); + rc = IOMMMIOMapMMIOHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P); + AssertRCReturn(rc, rc); + + pVCpu->hm.s.vmx.u64MsrApicBase = pMixedCtx->msrApicBase; + } +#endif /* !IEM_VERIFICATION_MODE_FULL */ + + /* Load the guest state bits, we can handle longjmps/getting preempted here. */ + hmR0VmxLoadGuestStateOptimal(pVM, pVCpu, pMixedCtx); + + /* + * Evaluate events as pending-for-injection into the guest. Toggling of force-flags here is safe as long as + * we update TRPM on premature exits to ring-3 before executing guest code. We must NOT restore the force-flags. + */ + if (TRPMHasTrap(pVCpu)) + hmR0VmxTrpmTrapToPendingEvent(pVCpu); + else if (!pVCpu->hm.s.Event.fPending) + hmR0VmxEvaluatePendingEvent(pVCpu, pMixedCtx); + + /* + * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus needs to be done with + * longjmps or interrupts + preemption enabled. Event injection might also result in triple-faulting the VM. + */ + rc = hmR0VmxInjectPendingEvent(pVCpu, pMixedCtx); + if (RT_UNLIKELY(rc != VINF_SUCCESS)) + { + Assert(rc == VINF_EM_RESET); + return rc; + } + + /* + * No longjmps to ring-3 from this point on!!! + * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic. + * This also disables flushing of the R0-logger instance (if any). + */ + VMMRZCallRing3Disable(pVCpu); + + /* + * We disable interrupts so that we don't miss any interrupts that would flag preemption (IPI/timers etc.) + * when thread-context hooks aren't used and we've been running with preemption disabled for a while. + * + * We need to check for force-flags that could've possible been altered since we last checked them (e.g. + * by PDMGetInterrupt() leaving the PDM critical section, see @bugref{6398}). + * + * We also check a couple of other force-flags as a last opportunity to get the EMT back to ring-3 before + * executing guest code. + */ + pVmxTransient->uEflags = ASMIntDisableFlags(); + if ( VM_FF_IS_PENDING(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC) + || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK)) + { + hmR0VmxClearEventVmcs(pVCpu); + ASMSetFlags(pVmxTransient->uEflags); + VMMRZCallRing3Enable(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF); + return VINF_EM_RAW_TO_R3; + } + if (RTThreadPreemptIsPending(NIL_RTTHREAD)) + { + hmR0VmxClearEventVmcs(pVCpu); + ASMSetFlags(pVmxTransient->uEflags); + VMMRZCallRing3Enable(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatPendingHostIrq); + return VINF_EM_RAW_INTERRUPT; + } + + /* We've injected any pending events. This is really the point of no return (to ring-3). */ + pVCpu->hm.s.Event.fPending = false; + + return VINF_SUCCESS; +} + + +/** + * Prepares to run guest code in VT-x and we've committed to doing so. This + * means there is no backing out to ring-3 or anywhere else at this + * point. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data may be + * out-of-sync. Make sure to update the required fields + * before using them. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks Called with preemption disabled. + * @remarks No-long-jump zone!!! + */ +static void hmR0VmxPreRunGuestCommitted(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + Assert(VMMR0IsLogFlushDisabled(pVCpu)); + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + + VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM); + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); /* Indicate the start of guest execution. */ + + /* + * If we are injecting events to a real-on-v86 mode guest, we may have to update + * RIP and some other registers, i.e. hmR0VmxInjectPendingEvent()->hmR0VmxInjectEventVmcs(). + * Reload only the necessary state, the assertion will catch if other parts of the code + * change. + */ + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + hmR0VmxLoadGuestRipRspRflags(pVCpu, pMixedCtx); + hmR0VmxLoadGuestSegmentRegs(pVCpu, pMixedCtx); + } + +#ifdef HMVMX_ALWAYS_SWAP_FPU_STATE + if (!CPUMIsGuestFPUStateActive(pVCpu)) + CPUMR0LoadGuestFPU(pVM, pVCpu, pMixedCtx); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); +#endif + + /* + * Load the host state bits as we may've been preempted (only happens when + * thread-context hooks are used or when hmR0VmxSetupVMRunHandler() changes pfnStartVM). + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_CONTEXT)) + { + /* This ASSUMES that pfnStartVM has been set up already. */ + int rc = hmR0VmxSaveHostState(pVM, pVCpu); + AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatPreemptSaveHostState); + } + Assert(!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_CONTEXT)); + + /* + * Load the state shared between host and guest (FPU, debug). + */ + if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_GUEST_SHARED_STATE)) + hmR0VmxLoadSharedState(pVM, pVCpu, pMixedCtx); + AssertMsg(!HMCPU_CF_VALUE(pVCpu), ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu))); + + /* Store status of the shared guest-host state at the time of VM-entry. */ +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (CPUMIsGuestInLongModeEx(pMixedCtx)) + { + pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActivePending(pVCpu); + pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActivePending(pVCpu); + } + else +#endif + { + pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu); + pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu); + } + pVmxTransient->fWasGuestFPUStateActive = CPUMIsGuestFPUStateActive(pVCpu); + + /* + * Cache the TPR-shadow for checking on every VM-exit if it might have changed. + */ + if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW) + pVmxTransient->u8GuestTpr = pVCpu->hm.s.vmx.pbVirtApic[0x80]; + + PHMGLOBALCPUINFO pCpu = HMR0GetCurrentCpu(); + RTCPUID idCurrentCpu = pCpu->idCpu; + if ( pVmxTransient->fUpdateTscOffsettingAndPreemptTimer + || idCurrentCpu != pVCpu->hm.s.idLastCpu) + { + hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pMixedCtx); + pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = false; + } + + ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB-shootdowns, set this across the world switch. */ + hmR0VmxFlushTaggedTlb(pVCpu, pCpu); /* Invalidate the appropriate guest entries from the TLB. */ + Assert(idCurrentCpu == pVCpu->hm.s.idLastCpu); + pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Update the error reporting info. with the current host CPU. */ + + STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x); + + TMNotifyStartOfExecution(pVCpu); /* Finally, notify TM to resume its clocks as we're about + to start executing. */ + +#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + /* + * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that + * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}. + */ + if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) + { + pVCpu->hm.s.u64HostTscAux = ASMRdMsr(MSR_K8_TSC_AUX); + uint64_t u64HostTscAux = CPUMR0GetGuestTscAux(pVCpu); + ASMWrMsr(MSR_K8_TSC_AUX, u64HostTscAux); + } +#endif +} + + +/** + * Performs some essential restoration of state after running guest code in + * VT-x. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * @param pVmxTransient Pointer to the VMX transient structure. + * @param rcVMRun Return code of VMLAUNCH/VMRESUME. + * + * @remarks Called with interrupts disabled, and returns with interrups enabled! + * + * @remarks No-long-jump zone!!! This function will however re-enable longjmps + * unconditionally when it is safe to do so. + */ +static void hmR0VmxPostRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, int rcVMRun) +{ + Assert(!VMMRZCallRing3IsEnabled(pVCpu)); + + ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB-shootdowns. */ + ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for TLB-shootdowns. */ + HMVMXCPU_GST_RESET_TO(pVCpu, 0); /* Exits/longjmps to ring-3 requires saving the guest state. */ + pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */ + pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */ + + if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT)) + { +#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE + /* Restore host's TSC_AUX. */ + if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP) + ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hm.s.u64HostTscAux); +#endif + /** @todo Find a way to fix hardcoding a guestimate. */ + TMCpuTickSetLastSeen(pVCpu, ASMReadTSC() + + pVCpu->hm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */); + } + + STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatExit1, x); + TMNotifyEndOfExecution(pVCpu); /* Notify TM that the guest is no longer running. */ + Assert(!(ASMGetFlags() & X86_EFL_IF)); + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM); + +#ifdef HMVMX_ALWAYS_SWAP_FPU_STATE + if (CPUMIsGuestFPUStateActive(pVCpu)) + { + hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + CPUMR0SaveGuestFPU(pVM, pVCpu, pMixedCtx); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + } +#endif + + pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Host state messed up by VT-x, we must restore. */ + pVCpu->hm.s.vmx.uVmcsState |= HMVMX_VMCS_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */ + ASMSetFlags(pVmxTransient->uEflags); /* Enable interrupts. */ + VMMRZCallRing3Enable(pVCpu); /* It is now safe to do longjmps to ring-3!!! */ + + /* Save the basic VM-exit reason. Refer Intel spec. 24.9.1 "Basic VM-exit Information". */ + uint32_t uExitReason; + int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason); + rc |= hmR0VmxReadEntryIntInfoVmcs(pVmxTransient); + AssertRC(rc); + pVmxTransient->uExitReason = (uint16_t)VMX_EXIT_REASON_BASIC(uExitReason); + pVmxTransient->fVMEntryFailed = VMX_ENTRY_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uEntryIntInfo); + + /* If the VMLAUNCH/VMRESUME failed, we can bail out early. This does -not- cover VMX_EXIT_ERR_*. */ + if (RT_UNLIKELY(rcVMRun != VINF_SUCCESS)) + { + Log4(("VM-entry failure: pVCpu=%p idCpu=%RU32 rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", pVCpu, pVCpu->idCpu, rcVMRun, + pVmxTransient->fVMEntryFailed)); + return; + } + + if (RT_LIKELY(!pVmxTransient->fVMEntryFailed)) + { + /* Update the guest interruptibility-state from the VMCS. */ + hmR0VmxSaveGuestIntrState(pVCpu, pMixedCtx); +#if defined(HMVMX_SYNC_FULL_GUEST_STATE) || defined(HMVMX_SAVE_FULL_GUEST_STATE) + rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRC(rc); +#endif + /* + * If the TPR was raised by the guest, it wouldn't cause a VM-exit immediately. Instead we sync the TPR lazily whenever + * we eventually get a VM-exit for any reason. This maybe expensive as PDMApicSetTPR() can longjmp to ring-3 and which is + * why it's done here as it's easier and no less efficient to deal with it here than making hmR0VmxSaveGuestState() + * cope with longjmps safely (see VMCPU_FF_HM_UPDATE_CR3 handling). + */ + if ( (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW) + && pVmxTransient->u8GuestTpr != pVCpu->hm.s.vmx.pbVirtApic[0x80]) + { + rc = PDMApicSetTPR(pVCpu, pVCpu->hm.s.vmx.pbVirtApic[0x80]); + AssertRC(rc); + HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE); + } + } +} + + + +/** + * Runs the guest code using VT-x the normal way. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @note Mostly the same as hmR0VmxRunGuestCodeStep. + * @remarks Called with preemption disabled. + */ +static int hmR0VmxRunGuestCodeNormal(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + VMXTRANSIENT VmxTransient; + VmxTransient.fUpdateTscOffsettingAndPreemptTimer = true; + int rc = VERR_INTERNAL_ERROR_5; + uint32_t cLoops = 0; + + for (;; cLoops++) + { + Assert(!HMR0SuspendPending()); + HMVMX_ASSERT_CPU_SAFE(); + + /* Preparatory work for running guest code, this may force us to return + to ring-3. This bugger disables interrupts on VINF_SUCCESS! */ + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x); + rc = hmR0VmxPreRunGuest(pVM, pVCpu, pCtx, &VmxTransient); + if (rc != VINF_SUCCESS) + break; + + hmR0VmxPreRunGuestCommitted(pVM, pVCpu, pCtx, &VmxTransient); + rc = hmR0VmxRunGuest(pVM, pVCpu, pCtx); + /* The guest-CPU context is now outdated, 'pCtx' is to be treated as 'pMixedCtx' from this point on!!! */ + + /* Restore any residual host-state and save any bits shared between host + and guest into the guest-CPU state. Re-enables interrupts! */ + hmR0VmxPostRunGuest(pVM, pVCpu, pCtx, &VmxTransient, rc); + + /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */ + if (RT_UNLIKELY(rc != VINF_SUCCESS)) + { + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x); + hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx, &VmxTransient); + return rc; + } + + /* Handle the VM-exit. */ + AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll); + STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]); + STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x); + HMVMX_START_EXIT_DISPATCH_PROF(); +#ifdef HMVMX_USE_FUNCTION_TABLE + rc = g_apfnVMExitHandlers[VmxTransient.uExitReason](pVCpu, pCtx, &VmxTransient); +#else + rc = hmR0VmxHandleExit(pVCpu, pCtx, &VmxTransient, VmxTransient.uExitReason); +#endif + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x); + if (rc != VINF_SUCCESS) + break; + else if (cLoops > pVM->hm.s.cMaxResumeLoops) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume); + rc = VINF_EM_RAW_INTERRUPT; + break; + } + } + + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x); + return rc; +} + + +/** + * Single steps guest code using VT-x. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @note Mostly the same as hmR0VmxRunGuestCodeNormal. + * @remarks Called with preemption disabled. + */ +static int hmR0VmxRunGuestCodeStep(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + VMXTRANSIENT VmxTransient; + VmxTransient.fUpdateTscOffsettingAndPreemptTimer = true; + int rc = VERR_INTERNAL_ERROR_5; + uint32_t cLoops = 0; + uint16_t uCsStart = pCtx->cs.Sel; + uint64_t uRipStart = pCtx->rip; + + for (;; cLoops++) + { + Assert(!HMR0SuspendPending()); + HMVMX_ASSERT_CPU_SAFE(); + + /* Preparatory work for running guest code, this may force us to return + to ring-3. This bugger disables interrupts on VINF_SUCCESS! */ + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x); + rc = hmR0VmxPreRunGuest(pVM, pVCpu, pCtx, &VmxTransient); + if (rc != VINF_SUCCESS) + break; + + hmR0VmxPreRunGuestCommitted(pVM, pVCpu, pCtx, &VmxTransient); + rc = hmR0VmxRunGuest(pVM, pVCpu, pCtx); + /* The guest-CPU context is now outdated, 'pCtx' is to be treated as 'pMixedCtx' from this point on!!! */ + + /* Restore any residual host-state and save any bits shared between host + and guest into the guest-CPU state. Re-enables interrupts! */ + hmR0VmxPostRunGuest(pVM, pVCpu, pCtx, &VmxTransient, rc); + + /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */ + if (RT_UNLIKELY(rc != VINF_SUCCESS)) + { + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x); + hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx, &VmxTransient); + return rc; + } + + /* Handle the VM-exit. */ + AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll); + STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]); + STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x); + HMVMX_START_EXIT_DISPATCH_PROF(); +#ifdef HMVMX_USE_FUNCTION_TABLE + rc = g_apfnVMExitHandlers[VmxTransient.uExitReason](pVCpu, pCtx, &VmxTransient); +#else + rc = hmR0VmxHandleExit(pVCpu, pCtx, &VmxTransient, VmxTransient.uExitReason); +#endif + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x); + if (rc != VINF_SUCCESS) + break; + else if (cLoops > pVM->hm.s.cMaxResumeLoops) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume); + rc = VINF_EM_RAW_INTERRUPT; + break; + } + + /* + * Did the RIP change, if so, consider it a single step. + * Otherwise, make sure one of the TFs gets set. + */ + int rc2 = hmR0VmxSaveGuestRip(pVCpu, pCtx); + rc2 |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pCtx); + AssertRCReturn(rc2, rc2); + if ( pCtx->rip != uRipStart + || pCtx->cs.Sel != uCsStart) + { + rc = VINF_EM_DBG_STEPPED; + break; + } + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG); + } + + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x); + return rc; +} + + +/** + * Runs the guest code using VT-x. + * + * @returns VBox status code. + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU context. + * + * @remarks Called with preemption disabled. + */ +VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + Assert(VMMRZCallRing3IsEnabled(pVCpu)); + Assert(HMVMXCPU_GST_VALUE(pVCpu) == HMVMX_UPDATED_GUEST_ALL); + HMVMX_ASSERT_PREEMPT_SAFE(); + + VMMRZCallRing3SetNotification(pVCpu, hmR0VmxCallRing3Callback, pCtx); + + int rc; + if (!pVCpu->hm.s.fSingleInstruction && !DBGFIsStepping(pVCpu)) + rc = hmR0VmxRunGuestCodeNormal(pVM, pVCpu, pCtx); + else + rc = hmR0VmxRunGuestCodeStep(pVM, pVCpu, pCtx); + + if (rc == VERR_EM_INTERPRETER) + rc = VINF_EM_RAW_EMULATE_INSTR; + else if (rc == VINF_EM_RESET) + rc = VINF_EM_TRIPLE_FAULT; + + int rc2 = hmR0VmxExitToRing3(pVM, pVCpu, pCtx, rc); + if (RT_FAILURE(rc2)) + { + pVCpu->hm.s.u32HMError = rc; + rc = rc2; + } + Assert(!VMMRZCallRing3IsNotificationSet(pVCpu)); + return rc; +} + + +#ifndef HMVMX_USE_FUNCTION_TABLE +DECLINLINE(int) hmR0VmxHandleExit(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, uint32_t rcReason) +{ + int rc; + switch (rcReason) + { + case VMX_EXIT_EPT_MISCONFIG: rc = hmR0VmxExitEptMisconfig(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_EPT_VIOLATION: rc = hmR0VmxExitEptViolation(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_IO_INSTR: rc = hmR0VmxExitIoInstr(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_CPUID: rc = hmR0VmxExitCpuid(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_RDTSC: rc = hmR0VmxExitRdtsc(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_RDTSCP: rc = hmR0VmxExitRdtscp(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_APIC_ACCESS: rc = hmR0VmxExitApicAccess(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_XCPT_OR_NMI: rc = hmR0VmxExitXcptOrNmi(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_MOV_CRX: rc = hmR0VmxExitMovCRx(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_EXT_INT: rc = hmR0VmxExitExtInt(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_INT_WINDOW: rc = hmR0VmxExitIntWindow(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_MWAIT: rc = hmR0VmxExitMwait(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_MONITOR: rc = hmR0VmxExitMonitor(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_TASK_SWITCH: rc = hmR0VmxExitTaskSwitch(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_PREEMPT_TIMER: rc = hmR0VmxExitPreemptTimer(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_RDMSR: rc = hmR0VmxExitRdmsr(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_WRMSR: rc = hmR0VmxExitWrmsr(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_MOV_DRX: rc = hmR0VmxExitMovDRx(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_TPR_BELOW_THRESHOLD: rc = hmR0VmxExitTprBelowThreshold(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_HLT: rc = hmR0VmxExitHlt(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_INVD: rc = hmR0VmxExitInvd(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_INVLPG: rc = hmR0VmxExitInvlpg(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_RSM: rc = hmR0VmxExitRsm(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_MTF: rc = hmR0VmxExitMtf(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_PAUSE: rc = hmR0VmxExitPause(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_XDTR_ACCESS: rc = hmR0VmxExitXdtrAccess(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_TR_ACCESS: rc = hmR0VmxExitXdtrAccess(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_WBINVD: rc = hmR0VmxExitWbinvd(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_XSETBV: rc = hmR0VmxExitXsetbv(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_RDRAND: rc = hmR0VmxExitRdrand(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_INVPCID: rc = hmR0VmxExitInvpcid(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_GETSEC: rc = hmR0VmxExitGetsec(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_RDPMC: rc = hmR0VmxExitRdpmc(pVCpu, pMixedCtx, pVmxTransient); break; + + case VMX_EXIT_TRIPLE_FAULT: rc = hmR0VmxExitTripleFault(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_NMI_WINDOW: rc = hmR0VmxExitNmiWindow(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_INIT_SIGNAL: rc = hmR0VmxExitInitSignal(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_SIPI: rc = hmR0VmxExitSipi(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_IO_SMI: rc = hmR0VmxExitIoSmi(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_SMI: rc = hmR0VmxExitSmi(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_ERR_MSR_LOAD: rc = hmR0VmxExitErrMsrLoad(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_ERR_INVALID_GUEST_STATE: rc = hmR0VmxExitErrInvalidGuestState(pVCpu, pMixedCtx, pVmxTransient); break; + case VMX_EXIT_ERR_MACHINE_CHECK: rc = hmR0VmxExitErrMachineCheck(pVCpu, pMixedCtx, pVmxTransient); break; + + case VMX_EXIT_VMCALL: + case VMX_EXIT_VMCLEAR: + case VMX_EXIT_VMLAUNCH: + case VMX_EXIT_VMPTRLD: + case VMX_EXIT_VMPTRST: + case VMX_EXIT_VMREAD: + case VMX_EXIT_VMRESUME: + case VMX_EXIT_VMWRITE: + case VMX_EXIT_VMXOFF: + case VMX_EXIT_VMXON: + case VMX_EXIT_INVEPT: + case VMX_EXIT_INVVPID: + case VMX_EXIT_VMFUNC: + rc = hmR0VmxExitSetPendingXcptUD(pVCpu, pMixedCtx, pVmxTransient); + break; + default: + rc = hmR0VmxExitErrUndefined(pVCpu, pMixedCtx, pVmxTransient); + break; + } + return rc; +} +#endif + +#ifdef DEBUG +/* Is there some generic IPRT define for this that are not in Runtime/internal/\* ?? */ +# define HMVMX_ASSERT_PREEMPT_CPUID_VAR() \ + RTCPUID const idAssertCpu = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId() + +# define HMVMX_ASSERT_PREEMPT_CPUID() \ + do \ + { \ + RTCPUID const idAssertCpuNow = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId(); \ + AssertMsg(idAssertCpu == idAssertCpuNow, ("VMX %#x, %#x\n", idAssertCpu, idAssertCpuNow)); \ + } while (0) + +# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS() \ + do { \ + AssertPtr(pVCpu); \ + AssertPtr(pMixedCtx); \ + AssertPtr(pVmxTransient); \ + Assert(pVmxTransient->fVMEntryFailed == false); \ + Assert(ASMIntAreEnabled()); \ + HMVMX_ASSERT_PREEMPT_SAFE(); \ + HMVMX_ASSERT_PREEMPT_CPUID_VAR(); \ + Log4Func(("vcpu[%RU32] -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v\n", pVCpu->idCpu)); \ + HMVMX_ASSERT_PREEMPT_SAFE(); \ + if (VMMR0IsLogFlushDisabled(pVCpu)) \ + HMVMX_ASSERT_PREEMPT_CPUID(); \ + HMVMX_STOP_EXIT_DISPATCH_PROF(); \ + } while (0) + +# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS() \ + do { \ + Log4Func(("\n")); \ + } while(0) +#else /* Release builds */ +# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS() do { HMVMX_STOP_EXIT_DISPATCH_PROF(); } while(0) +# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS() do { } while(0) +#endif + + +/** + * Advances the guest RIP after reading it from the VMCS. + * + * @returns VBox status code. + * @param pVCpu Pointer to the VMCPU. + * @param pMixedCtx Pointer to the guest-CPU context. The data maybe + * out-of-sync. Make sure to update the required fields + * before using them. + * @param pVmxTransient Pointer to the VMX transient structure. + * + * @remarks No-long-jump zone!!! + */ +DECLINLINE(int) hmR0VmxAdvanceGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + int rc = hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxSaveGuestRip(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + pMixedCtx->rip += pVmxTransient->cbInstr; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP); + + /* + * Deliver a debug exception to the guest if it is single-stepping. Don't directly inject a #DB but use the + * pending debug exception field as it takes care of priority of events. + * + * See Intel spec. 32.2.1 "Debug Exceptions". + */ + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + + return rc; +} + + +/** + * Tries to determine what part of the guest-state VT-x has deemed as invalid + * and update error record fields accordingly. + * + * @return VMX_IGS_* return codes. + * @retval VMX_IGS_REASON_NOT_FOUND if this function could not find anything + * wrong with the guest state. + * + * @param pVM Pointer to the VM. + * @param pVCpu Pointer to the VMCPU. + * @param pCtx Pointer to the guest-CPU state. + */ +static uint32_t hmR0VmxCheckGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ +#define HMVMX_ERROR_BREAK(err) { uError = (err); break; } +#define HMVMX_CHECK_BREAK(expr, err) if (!(expr)) { \ + uError = (err); \ + break; \ + } else do {} while (0) +/* Duplicate of IEM_IS_CANONICAL(). */ +#define HMVMX_IS_CANONICAL(a_u64Addr) ((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000) < UINT64_C(0x1000000000000)) + + int rc; + uint32_t uError = VMX_IGS_ERROR; + uint32_t u32Val; + bool fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuest; + + do + { + /* + * CR0. + */ + uint32_t uSetCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1); + uint32_t uZapCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1); + /* Exceptions for unrestricted-guests for fixed CR0 bits (PE, PG). + See Intel spec. 26.3.1 "Checks on guest Guest Control Registers, Debug Registers and MSRs." */ + if (fUnrestrictedGuest) + uSetCR0 &= ~(X86_CR0_PE | X86_CR0_PG); + + uint32_t u32GuestCR0; + rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32GuestCR0); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK((u32GuestCR0 & uSetCR0) == uSetCR0, VMX_IGS_CR0_FIXED1); + HMVMX_CHECK_BREAK(!(u32GuestCR0 & ~uZapCR0), VMX_IGS_CR0_FIXED0); + if ( !fUnrestrictedGuest + && (u32GuestCR0 & X86_CR0_PG) + && !(u32GuestCR0 & X86_CR0_PE)) + { + HMVMX_ERROR_BREAK(VMX_IGS_CR0_PG_PE_COMBO); + } + + /* + * CR4. + */ + uint64_t uSetCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1); + uint64_t uZapCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1); + + uint32_t u32GuestCR4; + rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR4, &u32GuestCR4); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK((u32GuestCR4 & uSetCR4) == uSetCR4, VMX_IGS_CR4_FIXED1); + HMVMX_CHECK_BREAK(!(u32GuestCR4 & ~uZapCR4), VMX_IGS_CR4_FIXED0); + + /* + * IA32_DEBUGCTL MSR. + */ + uint64_t u64Val; + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, &u64Val); + AssertRCBreak(rc); + if ( (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG) + && (u64Val & 0xfffffe3c)) /* Bits 31:9, bits 5:2 MBZ. */ + { + HMVMX_ERROR_BREAK(VMX_IGS_DEBUGCTL_MSR_RESERVED); + } + uint64_t u64DebugCtlMsr = u64Val; + +#ifdef VBOX_STRICT + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val); + AssertRCBreak(rc); + Assert(u32Val == pVCpu->hm.s.vmx.u32EntryCtls); +#endif + bool const fLongModeGuest = RT_BOOL(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST); + + /* + * RIP and RFLAGS. + */ + uint32_t u32Eflags; +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + rc = VMXReadVmcs64(VMX_VMCS_GUEST_RIP, &u64Val); + AssertRCBreak(rc); + /* pCtx->rip can be different than the one in the VMCS (e.g. run guest code and VM-exits that don't update it). */ + if ( !fLongModeGuest + || !pCtx->cs.Attr.n.u1Long) + { + HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffff00000000)), VMX_IGS_LONGMODE_RIP_INVALID); + } + /** @todo If the processor supports N < 64 linear-address bits, bits 63:N + * must be identical if the "IA32e mode guest" VM-entry control is 1 + * and CS.L is 1. No check applies if the CPU supports 64 + * linear-address bits. */ + + /* Flags in pCtx can be different (real-on-v86 for instance). We are only concerned about the VMCS contents here. */ + rc = VMXReadVmcs64(VMX_VMCS_GUEST_RFLAGS, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffc08028)), /* Bit 63:22, Bit 15, 5, 3 MBZ. */ + VMX_IGS_RFLAGS_RESERVED); + HMVMX_CHECK_BREAK((u64Val & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */ + u32Eflags = u64Val; + } + else +#endif + { + rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Eflags); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u32Eflags & 0xffc08028), VMX_IGS_RFLAGS_RESERVED); /* Bit 31:22, Bit 15, 5, 3 MBZ. */ + HMVMX_CHECK_BREAK((u32Eflags & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */ + } + + if ( fLongModeGuest + || ( fUnrestrictedGuest + && !(u32GuestCR0 & X86_CR0_PE))) + { + HMVMX_CHECK_BREAK(!(u32Eflags & X86_EFL_VM), VMX_IGS_RFLAGS_VM_INVALID); + } + + uint32_t u32EntryInfo; + rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32EntryInfo); + AssertRCBreak(rc); + if ( VMX_ENTRY_INTERRUPTION_INFO_IS_VALID(u32EntryInfo) + && VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT) + { + HMVMX_CHECK_BREAK(u32Eflags & X86_EFL_IF, VMX_IGS_RFLAGS_IF_INVALID); + } + + /* + * 64-bit checks. + */ +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + if ( fLongModeGuest + && !fUnrestrictedGuest) + { + HMVMX_CHECK_BREAK(u32GuestCR0 & X86_CR0_PG, VMX_IGS_CR0_PG_LONGMODE); + HMVMX_CHECK_BREAK(u32GuestCR4 & X86_CR4_PAE, VMX_IGS_CR4_PAE_LONGMODE); + } + + if ( !fLongModeGuest + && (u32GuestCR4 & X86_CR4_PCIDE)) + { + HMVMX_ERROR_BREAK(VMX_IGS_CR4_PCIDE); + } + + /** @todo CR3 field must be such that bits 63:52 and bits in the range + * 51:32 beyond the processor's physical-address width are 0. */ + + if ( (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG) + && (pCtx->dr[7] & X86_DR7_MBZ_MASK)) + { + HMVMX_ERROR_BREAK(VMX_IGS_DR7_RESERVED); + } + + rc = VMXReadVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_ESP_NOT_CANONICAL); + + rc = VMXReadVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_EIP_NOT_CANONICAL); + } +#endif + + /* + * PERF_GLOBAL MSR. + */ + if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PERF_MSR) + { + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffff8fffffffc)), + VMX_IGS_PERF_GLOBAL_MSR_RESERVED); /* Bits 63:35, bits 31:2 MBZ. */ + } + + /* + * PAT MSR. + */ + if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PAT_MSR) + { + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PAT_FULL, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0x707070707070707)), VMX_IGS_PAT_MSR_RESERVED); + for (unsigned i = 0; i < 8; i++) + { + uint8_t u8Val = (u64Val & 0x7); + if ( u8Val != 0 /* UC */ + || u8Val != 1 /* WC */ + || u8Val != 4 /* WT */ + || u8Val != 5 /* WP */ + || u8Val != 6 /* WB */ + || u8Val != 7 /* UC- */) + { + HMVMX_ERROR_BREAK(VMX_IGS_PAT_MSR_INVALID); + } + u64Val >>= 3; + } + } + + /* + * EFER MSR. + */ + if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_EFER_MSR) + { + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_EFER_FULL, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffffffffff2fe)), + VMX_IGS_EFER_MSR_RESERVED); /* Bits 63:12, bit 9, bits 7:1 MBZ. */ + HMVMX_CHECK_BREAK((u64Val & MSR_K6_EFER_LMA) == (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST), + VMX_IGS_EFER_LMA_GUEST_MODE_MISMATCH); + HMVMX_CHECK_BREAK( fUnrestrictedGuest + || (u64Val & MSR_K6_EFER_LMA) == (u32GuestCR0 & X86_CR0_PG), VMX_IGS_EFER_LMA_PG_MISMATCH); + } + + /* + * Segment registers. + */ + HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE) + || !(pCtx->ldtr.Sel & X86_SEL_LDT), VMX_IGS_LDTR_TI_INVALID); + if (!(u32Eflags & X86_EFL_VM)) + { + /* CS */ + HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1Present, VMX_IGS_CS_ATTR_P_INVALID); + HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xf00), VMX_IGS_CS_ATTR_RESERVED); + HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xfffe0000), VMX_IGS_CS_ATTR_RESERVED); + HMVMX_CHECK_BREAK( (pCtx->cs.u32Limit & 0xfff) == 0xfff + || !(pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->cs.u32Limit & 0xfff00000) + || (pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID); + /* CS cannot be loaded with NULL in protected mode. */ + HMVMX_CHECK_BREAK(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_CS_ATTR_UNUSABLE); + HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1DescType, VMX_IGS_CS_ATTR_S_INVALID); + if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11) + HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_UNEQUAL); + else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15) + HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_MISMATCH); + else if (pVM->hm.s.vmx.fUnrestrictedGuest && pCtx->cs.Attr.n.u4Type == 3) + HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == 0, VMX_IGS_CS_ATTR_DPL_INVALID); + else + HMVMX_ERROR_BREAK(VMX_IGS_CS_ATTR_TYPE_INVALID); + + /* SS */ + HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest + || (pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL), VMX_IGS_SS_CS_RPL_UNEQUAL); + HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL), VMX_IGS_SS_ATTR_DPL_RPL_UNEQUAL); + if ( !(pCtx->cr0 & X86_CR0_PE) + || pCtx->cs.Attr.n.u4Type == 3) + { + HMVMX_CHECK_BREAK(!pCtx->ss.Attr.n.u2Dpl, VMX_IGS_SS_ATTR_DPL_INVALID); + } + if (!(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE)) + { + HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7, VMX_IGS_SS_ATTR_TYPE_INVALID); + HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u1Present, VMX_IGS_SS_ATTR_P_INVALID); + HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xf00), VMX_IGS_SS_ATTR_RESERVED); + HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xfffe0000), VMX_IGS_SS_ATTR_RESERVED); + HMVMX_CHECK_BREAK( (pCtx->ss.u32Limit & 0xfff) == 0xfff + || !(pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->ss.u32Limit & 0xfff00000) + || (pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID); + } + + /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxWriteSegmentReg(). */ + if (!(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE)) + { + HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_DS_ATTR_A_INVALID); + HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u1Present, VMX_IGS_DS_ATTR_P_INVALID); + HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest + || pCtx->ds.Attr.n.u4Type > 11 + || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL); + HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xf00), VMX_IGS_DS_ATTR_RESERVED); + HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xfffe0000), VMX_IGS_DS_ATTR_RESERVED); + HMVMX_CHECK_BREAK( (pCtx->ds.u32Limit & 0xfff) == 0xfff + || !(pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->ds.u32Limit & 0xfff00000) + || (pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_DS_ATTR_TYPE_INVALID); + } + if (!(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE)) + { + HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_ES_ATTR_A_INVALID); + HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u1Present, VMX_IGS_ES_ATTR_P_INVALID); + HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest + || pCtx->es.Attr.n.u4Type > 11 + || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL); + HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xf00), VMX_IGS_ES_ATTR_RESERVED); + HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xfffe0000), VMX_IGS_ES_ATTR_RESERVED); + HMVMX_CHECK_BREAK( (pCtx->es.u32Limit & 0xfff) == 0xfff + || !(pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->es.u32Limit & 0xfff00000) + || (pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_ES_ATTR_TYPE_INVALID); + } + if (!(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE)) + { + HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_FS_ATTR_A_INVALID); + HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u1Present, VMX_IGS_FS_ATTR_P_INVALID); + HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest + || pCtx->fs.Attr.n.u4Type > 11 + || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL), VMX_IGS_FS_ATTR_DPL_RPL_UNEQUAL); + HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xf00), VMX_IGS_FS_ATTR_RESERVED); + HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xfffe0000), VMX_IGS_FS_ATTR_RESERVED); + HMVMX_CHECK_BREAK( (pCtx->fs.u32Limit & 0xfff) == 0xfff + || !(pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->fs.u32Limit & 0xfff00000) + || (pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_FS_ATTR_TYPE_INVALID); + } + if (!(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE)) + { + HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_GS_ATTR_A_INVALID); + HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u1Present, VMX_IGS_GS_ATTR_P_INVALID); + HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest + || pCtx->gs.Attr.n.u4Type > 11 + || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL), VMX_IGS_GS_ATTR_DPL_RPL_UNEQUAL); + HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xf00), VMX_IGS_GS_ATTR_RESERVED); + HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xfffe0000), VMX_IGS_GS_ATTR_RESERVED); + HMVMX_CHECK_BREAK( (pCtx->gs.u32Limit & 0xfff) == 0xfff + || !(pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->gs.u32Limit & 0xfff00000) + || (pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE) + || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_GS_ATTR_TYPE_INVALID); + } + /* 64-bit capable CPUs. */ +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL); + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL); + HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE) + || HMVMX_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL); + HMVMX_CHECK_BREAK(!(pCtx->cs.u64Base >> 32), VMX_IGS_LONGMODE_CS_BASE_INVALID); + HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ss.u64Base >> 32), + VMX_IGS_LONGMODE_SS_BASE_INVALID); + HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ds.u64Base >> 32), + VMX_IGS_LONGMODE_DS_BASE_INVALID); + HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->es.u64Base >> 32), + VMX_IGS_LONGMODE_ES_BASE_INVALID); + } +#endif + } + else + { + /* V86 mode checks. */ + uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr; + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + u32CSAttr = 0xf3; u32SSAttr = 0xf3; + u32DSAttr = 0xf3; u32ESAttr = 0xf3; + u32FSAttr = 0xf3; u32GSAttr = 0xf3; + } + else + { + u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u; + u32DSAttr = pCtx->ds.Attr.u; u32ESAttr = pCtx->es.Attr.u; + u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u; + } + + /* CS */ + HMVMX_CHECK_BREAK((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), VMX_IGS_V86_CS_BASE_INVALID); + HMVMX_CHECK_BREAK(pCtx->cs.u32Limit == 0xffff, VMX_IGS_V86_CS_LIMIT_INVALID); + HMVMX_CHECK_BREAK(u32CSAttr == 0xf3, VMX_IGS_V86_CS_ATTR_INVALID); + /* SS */ + HMVMX_CHECK_BREAK((pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4), VMX_IGS_V86_SS_BASE_INVALID); + HMVMX_CHECK_BREAK(pCtx->ss.u32Limit == 0xffff, VMX_IGS_V86_SS_LIMIT_INVALID); + HMVMX_CHECK_BREAK(u32SSAttr == 0xf3, VMX_IGS_V86_SS_ATTR_INVALID); + /* DS */ + HMVMX_CHECK_BREAK((pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4), VMX_IGS_V86_DS_BASE_INVALID); + HMVMX_CHECK_BREAK(pCtx->ds.u32Limit == 0xffff, VMX_IGS_V86_DS_LIMIT_INVALID); + HMVMX_CHECK_BREAK(u32DSAttr == 0xf3, VMX_IGS_V86_DS_ATTR_INVALID); + /* ES */ + HMVMX_CHECK_BREAK((pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4), VMX_IGS_V86_ES_BASE_INVALID); + HMVMX_CHECK_BREAK(pCtx->es.u32Limit == 0xffff, VMX_IGS_V86_ES_LIMIT_INVALID); + HMVMX_CHECK_BREAK(u32ESAttr == 0xf3, VMX_IGS_V86_ES_ATTR_INVALID); + /* FS */ + HMVMX_CHECK_BREAK((pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4), VMX_IGS_V86_FS_BASE_INVALID); + HMVMX_CHECK_BREAK(pCtx->fs.u32Limit == 0xffff, VMX_IGS_V86_FS_LIMIT_INVALID); + HMVMX_CHECK_BREAK(u32FSAttr == 0xf3, VMX_IGS_V86_FS_ATTR_INVALID); + /* GS */ + HMVMX_CHECK_BREAK((pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4), VMX_IGS_V86_GS_BASE_INVALID); + HMVMX_CHECK_BREAK(pCtx->gs.u32Limit == 0xffff, VMX_IGS_V86_GS_LIMIT_INVALID); + HMVMX_CHECK_BREAK(u32GSAttr == 0xf3, VMX_IGS_V86_GS_ATTR_INVALID); + /* 64-bit capable CPUs. */ +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL); + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL); + HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE) + || HMVMX_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL); + HMVMX_CHECK_BREAK(!(pCtx->cs.u64Base >> 32), VMX_IGS_LONGMODE_CS_BASE_INVALID); + HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ss.u64Base >> 32), + VMX_IGS_LONGMODE_SS_BASE_INVALID); + HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ds.u64Base >> 32), + VMX_IGS_LONGMODE_DS_BASE_INVALID); + HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->es.u64Base >> 32), + VMX_IGS_LONGMODE_ES_BASE_INVALID); + } +#endif + } + + /* + * TR. + */ + HMVMX_CHECK_BREAK(!(pCtx->tr.Sel & X86_SEL_LDT), VMX_IGS_TR_TI_INVALID); + /* 64-bit capable CPUs. */ +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->tr.u64Base), VMX_IGS_TR_BASE_NOT_CANONICAL); + } +#endif + if (fLongModeGuest) + { + HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u4Type == 11, /* 64-bit busy TSS. */ + VMX_IGS_LONGMODE_TR_ATTR_TYPE_INVALID); + } + else + { + HMVMX_CHECK_BREAK( pCtx->tr.Attr.n.u4Type == 3 /* 16-bit busy TSS. */ + || pCtx->tr.Attr.n.u4Type == 11, /* 32-bit busy TSS.*/ + VMX_IGS_TR_ATTR_TYPE_INVALID); + } + HMVMX_CHECK_BREAK(!pCtx->tr.Attr.n.u1DescType, VMX_IGS_TR_ATTR_S_INVALID); + HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u1Present, VMX_IGS_TR_ATTR_P_INVALID); + HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & 0xf00), VMX_IGS_TR_ATTR_RESERVED); /* Bits 11:8 MBZ. */ + HMVMX_CHECK_BREAK( (pCtx->tr.u32Limit & 0xfff) == 0xfff + || !(pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->tr.u32Limit & 0xfff00000) + || (pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID); + HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_TR_ATTR_UNUSABLE); + + /* + * GDTR and IDTR. + */ +#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL) + if (HMVMX_IS_64BIT_HOST_MODE()) + { + rc = VMXReadVmcs64(VMX_VMCS_GUEST_GDTR_BASE, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_GDTR_BASE_NOT_CANONICAL); + + rc = VMXReadVmcs64(VMX_VMCS_GUEST_IDTR_BASE, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_IDTR_BASE_NOT_CANONICAL); + } +#endif + + rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_GDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */ + + rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_IDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */ + + /* + * Guest Non-Register State. + */ + /* Activity State. */ + uint32_t u32ActivityState; + rc = VMXReadVmcs32(VMX_VMCS32_GUEST_ACTIVITY_STATE, &u32ActivityState); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK( !u32ActivityState + || (u32ActivityState & MSR_IA32_VMX_MISC_ACTIVITY_STATES(pVM->hm.s.vmx.Msrs.u64Misc)), + VMX_IGS_ACTIVITY_STATE_INVALID); + HMVMX_CHECK_BREAK( !(pCtx->ss.Attr.n.u2Dpl) + || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_HLT, VMX_IGS_ACTIVITY_STATE_HLT_INVALID); + uint32_t u32IntrState; + rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &u32IntrState); + AssertRCBreak(rc); + if ( u32IntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS + || u32IntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI) + { + HMVMX_CHECK_BREAK(u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_ACTIVE, VMX_IGS_ACTIVITY_STATE_ACTIVE_INVALID); + } + + /** @todo Activity state and injecting interrupts. Left as a todo since we + * currently don't use activity states but ACTIVE. */ + + HMVMX_CHECK_BREAK( !(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_ENTRY_SMM) + || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_SIPI_WAIT, VMX_IGS_ACTIVITY_STATE_SIPI_WAIT_INVALID); + + /* Guest interruptibility-state. */ + HMVMX_CHECK_BREAK(!(u32IntrState & 0xfffffff0), VMX_IGS_INTERRUPTIBILITY_STATE_RESERVED); + HMVMX_CHECK_BREAK((u32IntrState & ( VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI + | VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS)) + != ( VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI + | VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS), + VMX_IGS_INTERRUPTIBILITY_STATE_STI_MOVSS_INVALID); + HMVMX_CHECK_BREAK( (u32Eflags & X86_EFL_IF) + || !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI), + VMX_IGS_INTERRUPTIBILITY_STATE_STI_EFL_INVALID); + if (VMX_ENTRY_INTERRUPTION_INFO_IS_VALID(u32EntryInfo)) + { + if (VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT) + { + HMVMX_CHECK_BREAK( !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI) + && !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS), + VMX_IGS_INTERRUPTIBILITY_STATE_EXT_INT_INVALID); + } + else if (VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI) + { + HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS), + VMX_IGS_INTERRUPTIBILITY_STATE_MOVSS_INVALID); + HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI), + VMX_IGS_INTERRUPTIBILITY_STATE_STI_INVALID); + } + } + /** @todo Assumes the processor is not in SMM. */ + HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI), + VMX_IGS_INTERRUPTIBILITY_STATE_SMI_INVALID); + HMVMX_CHECK_BREAK( !(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_ENTRY_SMM) + || (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI), + VMX_IGS_INTERRUPTIBILITY_STATE_SMI_SMM_INVALID); + if ( (pVCpu->hm.s.vmx.u32PinCtls & VMX_VMCS_CTRL_PIN_EXEC_VIRTUAL_NMI) + && VMX_ENTRY_INTERRUPTION_INFO_IS_VALID(u32EntryInfo) + && VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI) + { + HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI), + VMX_IGS_INTERRUPTIBILITY_STATE_NMI_INVALID); + } + + /* Pending debug exceptions. */ + if (HMVMX_IS_64BIT_HOST_MODE()) + { + rc = VMXReadVmcs64(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, &u64Val); + AssertRCBreak(rc); + /* Bits 63:15, Bit 13, Bits 11:4 MBZ. */ + HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffffaff0)), VMX_IGS_LONGMODE_PENDING_DEBUG_RESERVED); + u32Val = u64Val; /* For pending debug exceptions checks below. */ + } + else + { + rc = VMXReadVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, &u32Val); + AssertRCBreak(rc); + /* Bits 31:15, Bit 13, Bits 11:4 MBZ. */ + HMVMX_CHECK_BREAK(!(u64Val & 0xffffaff0), VMX_IGS_PENDING_DEBUG_RESERVED); + } + + if ( (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI) + || (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS) + || u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_HLT) + { + if ( (u32Eflags & X86_EFL_TF) + && !(u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */ + { + /* Bit 14 is PendingDebug.BS. */ + HMVMX_CHECK_BREAK(u32Val & RT_BIT(14), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_SET); + } + if ( !(u32Eflags & X86_EFL_TF) + || (u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */ + { + /* Bit 14 is PendingDebug.BS. */ + HMVMX_CHECK_BREAK(!(u32Val & RT_BIT(14)), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_CLEAR); + } + } + + /* VMCS link pointer. */ + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, &u64Val); + AssertRCBreak(rc); + if (u64Val != UINT64_C(0xffffffffffffffff)) + { + HMVMX_CHECK_BREAK(!(u64Val & 0xfff), VMX_IGS_VMCS_LINK_PTR_RESERVED); + /** @todo Bits beyond the processor's physical-address width MBZ. */ + /** @todo 32-bit located in memory referenced by value of this field (as a + * physical address) must contain the processor's VMCS revision ID. */ + /** @todo SMM checks. */ + } + + /** @todo Checks on Guest Page-Directory-Pointer-Table Entries when guest is + * not using Nested Paging? */ + if ( pVM->hm.s.fNestedPaging + && !fLongModeGuest + && CPUMIsGuestInPAEModeEx(pCtx)) + { + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED); + + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED); + + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED); + + rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &u64Val); + AssertRCBreak(rc); + HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED); + } + + /* Shouldn't happen but distinguish it from AssertRCBreak() errors. */ + if (uError == VMX_IGS_ERROR) + uError = VMX_IGS_REASON_NOT_FOUND; + } while (0); + + pVCpu->hm.s.u32HMError = uError; + return uError; + +#undef HMVMX_ERROR_BREAK +#undef HMVMX_CHECK_BREAK +#undef HMVMX_IS_CANONICAL +} + +/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */ +/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- VM-exit handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */ +/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */ + +/** @name VM-exit handlers. + * @{ + */ + +/** + * VM-exit handler for external interrupts (VMX_EXIT_EXT_INT). + */ +HMVMX_EXIT_DECL hmR0VmxExitExtInt(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitExtInt); + /* Windows hosts (32-bit and 64-bit) have DPC latency issues. See @bugref{6853}. */ + if (VMMR0ThreadCtxHooksAreRegistered(pVCpu)) + return VINF_SUCCESS; + return VINF_EM_RAW_INTERRUPT; +} + + +/** + * VM-exit handler for exceptions or NMIs (VMX_EXIT_XCPT_OR_NMI). + */ +HMVMX_EXIT_DECL hmR0VmxExitXcptOrNmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitXcptNmi, y3); + + int rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + + uint32_t uIntType = VMX_EXIT_INTERRUPTION_INFO_TYPE(pVmxTransient->uExitIntInfo); + Assert( !(pVCpu->hm.s.vmx.u32ExitCtls & VMX_VMCS_CTRL_EXIT_ACK_EXT_INT) + && uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT); + Assert(VMX_EXIT_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uExitIntInfo)); + + if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI) + { + /* + * This cannot be a guest NMI as the only way for the guest to receive an NMI is if we injected it ourselves and + * anything we inject is not going to cause a VM-exit directly for the event being injected. + * See Intel spec. 27.2.3 "Information for VM Exits During Event Delivery". + * + * Dispatch the NMI to the host. See Intel spec. 27.5.5 "Updating Non-Register State". + */ + VMXDispatchHostNmi(); + STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC); + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3); + return VINF_SUCCESS; + } + + /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */ + rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient); + if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT)) + { + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3); + return VINF_SUCCESS; + } + else if (RT_UNLIKELY(rc == VINF_EM_RESET)) + { + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3); + return rc; + } + + uint32_t uExitIntInfo = pVmxTransient->uExitIntInfo; + uint32_t uVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(uExitIntInfo); + switch (uIntType) + { + case VMX_EXIT_INTERRUPTION_INFO_TYPE_PRIV_SW_XCPT: /* Privileged software exception. (#DB from ICEBP) */ + Assert(uVector == X86_XCPT_DB); + /* no break */ + case VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT: /* Software exception. (#BP or #OF) */ + Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF || uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_PRIV_SW_XCPT); + /* no break */ + case VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT: + { + switch (uVector) + { + case X86_XCPT_PF: rc = hmR0VmxExitXcptPF(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_GP: rc = hmR0VmxExitXcptGP(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_NM: rc = hmR0VmxExitXcptNM(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_MF: rc = hmR0VmxExitXcptMF(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_DB: rc = hmR0VmxExitXcptDB(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_BP: rc = hmR0VmxExitXcptBP(pVCpu, pMixedCtx, pVmxTransient); break; +#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS + case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXF); + rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE); + rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD); + rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS); + rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break; + case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP); + rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break; +#endif + default: + { + rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXcpUnk); + if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS); + Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM))); + Assert(CPUMIsGuestInRealModeEx(pMixedCtx)); + + rc = hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(uExitIntInfo), + pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, + 0 /* GCPtrFaultAddress */); + AssertRCReturn(rc, rc); + } + else + { + AssertMsgFailed(("Unexpected VM-exit caused by exception %#x\n", uVector)); + pVCpu->hm.s.u32HMError = uVector; + rc = VERR_VMX_UNEXPECTED_EXCEPTION; + } + break; + } + } + break; + } + + default: + { + pVCpu->hm.s.u32HMError = uExitIntInfo; + rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_TYPE; + AssertMsgFailed(("Unexpected interruption info %#x\n", VMX_EXIT_INTERRUPTION_INFO_TYPE(uExitIntInfo))); + break; + } + } + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3); + return rc; +} + + +/** + * VM-exit handler for interrupt-window exiting (VMX_EXIT_INT_WINDOW). + */ +HMVMX_EXIT_DECL hmR0VmxExitIntWindow(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* Indicate that we no longer need to VM-exit when the guest is ready to receive interrupts, it is now ready. */ + Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT); + pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT; + int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRCReturn(rc, rc); + + /* Deliver the pending interrupt via hmR0VmxPreRunGuest()->hmR0VmxInjectEvent() and resume guest execution. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow); + return VINF_SUCCESS; +} + + +/** + * VM-exit handler for NMI-window exiting (VMX_EXIT_NMI_WINDOW). + */ +HMVMX_EXIT_DECL hmR0VmxExitNmiWindow(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + AssertMsgFailed(("Unexpected NMI-window exit.\n")); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for WBINVD (VMX_EXIT_WBINVD). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitWbinvd(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWbinvd); + return hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); +} + + +/** + * VM-exit handler for INVD (VMX_EXIT_INVD). Unconditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitInvd(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvd); + return hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); +} + + +/** + * VM-exit handler for CPUID (VMX_EXIT_CPUID). Unconditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitCpuid(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + PVM pVM = pVCpu->CTX_SUFF(pVM); + int rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + Assert(pVmxTransient->cbInstr == 2); + } + else + { + AssertMsgFailed(("hmR0VmxExitCpuid: EMInterpretCpuId failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCpuid); + return rc; +} + + +/** + * VM-exit handler for GETSEC (VMX_EXIT_GETSEC). Unconditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitGetsec(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + if (pMixedCtx->cr4 & X86_CR4_SMXE) + return VINF_EM_RAW_EMULATE_INSTR; + + AssertMsgFailed(("hmR0VmxExitGetsec: unexpected VM-exit when CR4.SMXE is 0.\n")); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for RDTSC (VMX_EXIT_RDTSC). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitRdtsc(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */ + AssertRCReturn(rc, rc); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + Assert(pVmxTransient->cbInstr == 2); + /* If we get a spurious VM-exit when offsetting is enabled, we must reset offsetting on VM-reentry. See @bugref{6634}. */ + if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING) + pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true; + } + else + { + AssertMsgFailed(("hmR0VmxExitRdtsc: EMInterpretRdtsc failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc); + return rc; +} + + +/** + * VM-exit handler for RDTSCP (VMX_EXIT_RDTSCP). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitRdtscp(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */ + rc |= hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx); /* For MSR_K8_TSC_AUX */ + AssertRCReturn(rc, rc); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = EMInterpretRdtscp(pVM, pVCpu, pMixedCtx); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + Assert(pVmxTransient->cbInstr == 3); + /* If we get a spurious VM-exit when offsetting is enabled, we must reset offsetting on VM-reentry. See @bugref{6634}. */ + if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING) + pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true; + } + else + { + AssertMsgFailed(("hmR0VmxExitRdtscp: EMInterpretRdtscp failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc); + return rc; +} + + +/** + * VM-exit handler for RDPMC (VMX_EXIT_RDPMC). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitRdpmc(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */ + rc |= hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); /** @todo review if CR0 is really required by EM. */ + AssertRCReturn(rc, rc); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + Assert(pVmxTransient->cbInstr == 2); + } + else + { + AssertMsgFailed(("hmR0VmxExitRdpmc: EMInterpretRdpmc failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdpmc); + return rc; +} + + +/** + * VM-exit handler for INVLPG (VMX_EXIT_INVLPG). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitInvlpg(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + PVM pVM = pVCpu->CTX_SUFF(pVM); + Assert(!pVM->hm.s.fNestedPaging); + + int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + VBOXSTRICTRC rc2 = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), pVmxTransient->uExitQualification); + rc = VBOXSTRICTRC_VAL(rc2); + if (RT_LIKELY(rc == VINF_SUCCESS)) + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + else + { + AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitInvlpg: EMInterpretInvlpg %#RX64 failed with %Rrc\n", + pVmxTransient->uExitQualification, rc)); + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvlpg); + return rc; +} + + +/** + * VM-exit handler for MONITOR (VMX_EXIT_MONITOR). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitMonitor(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + else + { + AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMonitor: EMInterpretMonitor failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor); + return rc; +} + + +/** + * VM-exit handler for MWAIT (VMX_EXIT_MWAIT). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitMwait(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + VBOXSTRICTRC rc2 = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + rc = VBOXSTRICTRC_VAL(rc2); + if (RT_LIKELY( rc == VINF_SUCCESS + || rc == VINF_EM_HALT)) + { + int rc3 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + AssertRCReturn(rc3, rc3); + + if ( rc == VINF_EM_HALT + && EMMonitorWaitShouldContinue(pVCpu, pMixedCtx)) + { + rc = VINF_SUCCESS; + } + } + else + { + AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMwait: EMInterpretMWait failed with %Rrc\n", rc)); + rc = VERR_EM_INTERPRETER; + } + AssertMsg(rc == VINF_SUCCESS || rc == VINF_EM_HALT || rc == VERR_EM_INTERPRETER, + ("hmR0VmxExitMwait: failed, invalid error code %Rrc\n", rc)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait); + return rc; +} + + +/** + * VM-exit handler for RSM (VMX_EXIT_RSM). Unconditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitRsm(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + /* + * Execution of RSM outside of SMM mode causes #UD regardless of VMX root or VMX non-root mode. In theory, we should never + * get this VM-exit. This can happen only if dual-monitor treatment of SMI and VMX is enabled, which can (only?) be done by + * executing VMCALL in VMX root operation. If we get here, something funny is going on. + * See Intel spec. "33.15.5 Enabling the Dual-Monitor Treatment". + */ + AssertMsgFailed(("Unexpected RSM VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for SMI (VMX_EXIT_SMI). Unconditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitSmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + /* + * This can only happen if we support dual-monitor treatment of SMI, which can be activated by executing VMCALL in VMX + * root operation. Only an STM (SMM transfer monitor) would get this exit when we (the executive monitor) execute a VMCALL + * in VMX root mode or receive an SMI. If we get here, something funny is going on. + * See Intel spec. "33.15.6 Activating the Dual-Monitor Treatment" and Intel spec. 25.3 "Other Causes of VM-Exits" + */ + AssertMsgFailed(("Unexpected SMI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for IO SMI (VMX_EXIT_IO_SMI). Unconditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitIoSmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + /* Same treatment as VMX_EXIT_SMI. See comment in hmR0VmxExitSmi(). */ + AssertMsgFailed(("Unexpected IO SMI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for SIPI (VMX_EXIT_SIPI). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitSipi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + /* + * SIPI exits can only occur in VMX non-root operation when the "wait-for-SIPI" guest activity state is used. We currently + * don't make use of it (see hmR0VmxLoadGuestActivityState()) as our guests don't have direct access to the host LAPIC. + * See Intel spec. 25.3 "Other Causes of VM-exits". + */ + AssertMsgFailed(("Unexpected SIPI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for INIT signal (VMX_EXIT_INIT_SIGNAL). Unconditional + * VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitInitSignal(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + /* + * INIT signals are blocked in VMX root operation by VMXON and by SMI in SMM. + * See Intel spec. 33.14.1 Default Treatment of SMI Delivery" and Intel spec. 29.3 "VMX Instructions" for "VMXON". + * + * It is -NOT- blocked in VMX non-root operation so we can, in theory, still get these VM-exits. + * See Intel spec. "23.8 Restrictions on VMX operation". + */ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + return VINF_SUCCESS; +} + + +/** + * VM-exit handler for triple faults (VMX_EXIT_TRIPLE_FAULT). Unconditional + * VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitTripleFault(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + return VINF_EM_RESET; +} + + +/** + * VM-exit handler for HLT (VMX_EXIT_HLT). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitHlt(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_HLT_EXIT); + int rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + pMixedCtx->rip++; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP); + if (EMShouldContinueAfterHalt(pVCpu, pMixedCtx)) /* Requires eflags. */ + rc = VINF_SUCCESS; + else + rc = VINF_EM_HALT; + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt); + return rc; +} + + +/** + * VM-exit handler for instructions that result in a #UD exception delivered to + * the guest. + */ +HMVMX_EXIT_DECL hmR0VmxExitSetPendingXcptUD(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + hmR0VmxSetPendingXcptUD(pVCpu, pMixedCtx); + return VINF_SUCCESS; +} + + +/** + * VM-exit handler for expiry of the VMX preemption timer. + */ +HMVMX_EXIT_DECL hmR0VmxExitPreemptTimer(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* If the preemption-timer has expired, reinitialize the preemption timer on next VM-entry. */ + pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true; + + /* If there are any timer events pending, fall back to ring-3, otherwise resume guest execution. */ + PVM pVM = pVCpu->CTX_SUFF(pVM); + bool fTimersPending = TMTimerPollBool(pVM, pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPreemptTimer); + return fTimersPending ? VINF_EM_RAW_TIMER_PENDING : VINF_SUCCESS; +} + + +/** + * VM-exit handler for XSETBV (VMX_EXIT_XSETBV). Unconditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitXsetbv(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* We expose XSETBV to the guest, fallback to the recompiler for emulation. */ + /** @todo check if XSETBV is supported by the recompiler. */ + return VERR_EM_INTERPRETER; +} + + +/** + * VM-exit handler for INVPCID (VMX_EXIT_INVPCID). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitInvpcid(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* The guest should not invalidate the host CPU's TLBs, fallback to recompiler. */ + /** @todo implement EMInterpretInvpcid() */ + return VERR_EM_INTERPRETER; +} + + +/** + * VM-exit handler for invalid-guest-state (VMX_EXIT_ERR_INVALID_GUEST_STATE). + * Error VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitErrInvalidGuestState(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + uint32_t uInvalidReason = hmR0VmxCheckGuestState(pVCpu->CTX_SUFF(pVM), pVCpu, pMixedCtx); + NOREF(uInvalidReason); + +#ifdef VBOX_STRICT + uint32_t uIntrState; + HMVMXHCUINTREG uHCReg; + uint64_t u64Val; + uint32_t u32Val; + + rc = hmR0VmxReadEntryIntInfoVmcs(pVmxTransient); + rc |= hmR0VmxReadEntryXcptErrorCodeVmcs(pVmxTransient); + rc |= hmR0VmxReadEntryInstrLenVmcs(pVCpu, pVmxTransient); + rc |= VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &uIntrState); + AssertRCReturn(rc, rc); + + Log4(("uInvalidReason %u\n", uInvalidReason)); + Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", pVmxTransient->uEntryIntInfo)); + Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", pVmxTransient->uEntryXcptErrorCode)); + Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %#RX32\n", pVmxTransient->cbEntryInstr)); + Log4(("VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE %#RX32\n", uIntrState)); + + rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32Val); AssertRC(rc); + Log4(("VMX_VMCS_GUEST_CR0 %#RX32\n", u32Val)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_MASK, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR0_MASK %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_MASK, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR4_MASK %#RHr\n", uHCReg)); + rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uHCReg); AssertRC(rc); + Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg)); + rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc); + Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val)); +#endif + + PVM pVM = pVCpu->CTX_SUFF(pVM); + HMDumpRegs(pVM, pVCpu, pMixedCtx); + + return VERR_VMX_INVALID_GUEST_STATE; +} + + +/** + * VM-exit handler for VM-entry failure due to an MSR-load + * (VMX_EXIT_ERR_MSR_LOAD). Error VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitErrMsrLoad(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + AssertMsgFailed(("Unexpected MSR-load exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for VM-entry failure due to a machine-check event + * (VMX_EXIT_ERR_MACHINE_CHECK). Error VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitErrMachineCheck(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + AssertMsgFailed(("Unexpected machine-check event exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for all undefined reasons. Should never ever happen.. in + * theory. + */ +HMVMX_EXIT_DECL hmR0VmxExitErrUndefined(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + AssertMsgFailed(("Huh!? Undefined VM-exit reason %d. pVCpu=%p pMixedCtx=%p\n", pVmxTransient->uExitReason, pVCpu, pMixedCtx)); + return VERR_VMX_UNDEFINED_EXIT_CODE; +} + + +/** + * VM-exit handler for XDTR (LGDT, SGDT, LIDT, SIDT) accesses + * (VMX_EXIT_XDTR_ACCESS) and LDT and TR access (LLDT, LTR, SLDT, STR). + * Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitXdtrAccess(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* By default, we don't enable VMX_VMCS_CTRL_PROC_EXEC2_DESCRIPTOR_TABLE_EXIT. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitXdtrAccess); + if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_DESCRIPTOR_TABLE_EXIT) + return VERR_EM_INTERPRETER; + AssertMsgFailed(("Unexpected XDTR access. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for RDRAND (VMX_EXIT_RDRAND). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitRdrand(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* By default, we don't enable VMX_VMCS_CTRL_PROC_EXEC2_RDRAND_EXIT. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdrand); + if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDRAND_EXIT) + return VERR_EM_INTERPRETER; + AssertMsgFailed(("Unexpected RDRAND exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for RDMSR (VMX_EXIT_RDMSR). + */ +HMVMX_EXIT_DECL hmR0VmxExitRdmsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* EMInterpretRdmsr() requires CR0, Eflags and SS segment register. */ + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + Log4(("CS:RIP=%04x:%#RX64 ECX=%X\n", pMixedCtx->cs.Sel, pMixedCtx->rip, pMixedCtx->ecx)); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = EMInterpretRdmsr(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER, + ("hmR0VmxExitRdmsr: failed, invalid error code %Rrc\n", rc)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdmsr); + + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + Assert(pVmxTransient->cbInstr == 2); + } + return rc; +} + + +/** + * VM-exit handler for WRMSR (VMX_EXIT_WRMSR). + */ +HMVMX_EXIT_DECL hmR0VmxExitWrmsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + PVM pVM = pVCpu->CTX_SUFF(pVM); + int rc = VINF_SUCCESS; + + /* EMInterpretWrmsr() requires CR0, EFLAGS and SS segment register. */ + rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + Log4(("ecx=%#RX32\n", pMixedCtx->ecx)); + + rc = EMInterpretWrmsr(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER, ("hmR0VmxExitWrmsr: failed, invalid error code %Rrc\n", rc)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWrmsr); + + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + + /* If this is an X2APIC WRMSR access, update the APIC state as well. */ + if ( pMixedCtx->ecx >= MSR_IA32_X2APIC_START + && pMixedCtx->ecx <= MSR_IA32_X2APIC_END) + { + /* We've already saved the APIC related guest-state (TPR) in hmR0VmxPostRunGuest(). When full APIC register + * virtualization is implemented we'll have to make sure APIC state is saved from the VMCS before + EMInterpretWrmsr() changes it. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE); + } + else if (pMixedCtx->ecx == MSR_K6_EFER) /* EFER is the only MSR we auto-load but don't allow write-passthrough. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS); + else if (pMixedCtx->ecx == MSR_IA32_TSC) /* Windows 7 does this during bootup. See @bugref{6398}. */ + pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true; + + /* Update MSRs that are part of the VMCS when MSR-bitmaps are not supported. */ + if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)) + { + switch (pMixedCtx->ecx) + { + case MSR_IA32_SYSENTER_CS: HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SYSENTER_CS_MSR); break; + case MSR_IA32_SYSENTER_EIP: HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SYSENTER_EIP_MSR); break; + case MSR_IA32_SYSENTER_ESP: HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SYSENTER_ESP_MSR); break; + case MSR_K8_FS_BASE: /* no break */ + case MSR_K8_GS_BASE: HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS); break; + case MSR_K8_KERNEL_GS_BASE: HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS); break; + } + } +#ifdef VBOX_STRICT + else + { + /* Paranoia. Validate that MSRs in the MSR-bitmaps with write-passthru are not intercepted. */ + switch (pMixedCtx->ecx) + { + case MSR_IA32_SYSENTER_CS: + case MSR_IA32_SYSENTER_EIP: + case MSR_IA32_SYSENTER_ESP: + case MSR_K8_FS_BASE: + case MSR_K8_GS_BASE: + { + AssertMsgFailed(("Unexpected WRMSR for an MSR in the VMCS. ecx=%#RX32\n", pMixedCtx->ecx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); + } + + case MSR_K8_LSTAR: + case MSR_K6_STAR: + case MSR_K8_SF_MASK: + case MSR_K8_TSC_AUX: + case MSR_K8_KERNEL_GS_BASE: + { + AssertMsgFailed(("Unexpected WRMSR for an MSR in the auto-load/store area in the VMCS. ecx=%#RX32\n", + pMixedCtx->ecx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); + } + } + } +#endif /* VBOX_STRICT */ + } + return rc; +} + + +/** + * VM-exit handler for PAUSE (VMX_EXIT_PAUSE). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitPause(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* By default, we don't enable VMX_VMCS_CTRL_PROC_EXEC_PAUSE_EXIT. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPause); + if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_PAUSE_EXIT) + return VERR_EM_INTERPRETER; + AssertMsgFailed(("Unexpected PAUSE exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); +} + + +/** + * VM-exit handler for when the TPR value is lowered below the specified + * threshold (VMX_EXIT_TPR_BELOW_THRESHOLD). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitTprBelowThreshold(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW); + + /* + * The TPR has already been updated, see hmR0VMXPostRunGuest(). RIP is also updated as part of the VM-exit by VT-x. Update + * the threshold in the VMCS, deliver the pending interrupt via hmR0VmxPreRunGuest()->hmR0VmxInjectEvent() and + * resume guest execution. + */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTprBelowThreshold); + return VINF_SUCCESS; +} + + +/** + * VM-exit handler for control-register accesses (VMX_EXIT_MOV_CRX). Conditional + * VM-exit. + * + * @retval VINF_SUCCESS when guest execution can continue. + * @retval VINF_PGM_CHANGE_MODE when shadow paging mode changed, back to ring-3. + * @retval VINF_PGM_SYNC_CR3 CR3 sync is required, back to ring-3. + * @retval VERR_EM_INTERPRETER when something unexpected happened, fallback to + * recompiler. + */ +HMVMX_EXIT_DECL hmR0VmxExitMovCRx(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitMovCRx, y2); + int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + + const RTGCUINTPTR uExitQualification = pVmxTransient->uExitQualification; + const uint32_t uAccessType = VMX_EXIT_QUALIFICATION_CRX_ACCESS(uExitQualification); + PVM pVM = pVCpu->CTX_SUFF(pVM); + switch (uAccessType) + { + case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE: /* MOV to CRx */ + { +#if 0 + /* EMInterpretCRxWrite() references a lot of guest state (EFER, RFLAGS, Segment Registers, etc.) Sync entire state */ + rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); +#else + rc = hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); +#endif + AssertRCReturn(rc, rc); + + rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), + VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification), + VMX_EXIT_QUALIFICATION_CRX_GENREG(uExitQualification)); + Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3); + + switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)) + { + case 0: /* CR0 */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + Log4(("CRX CR0 write rc=%d CR0=%#RX64\n", rc, pMixedCtx->cr0)); + break; + case 2: /* CR2 */ + /* Nothing to do here, CR2 it's not part of the VMCS. */ + break; + case 3: /* CR3 */ + Assert(!pVM->hm.s.fNestedPaging || !CPUMIsGuestPagingEnabledEx(pMixedCtx)); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR3); + Log4(("CRX CR3 write rc=%d CR3=%#RX64\n", rc, pMixedCtx->cr3)); + break; + case 4: /* CR4 */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR4); + Log4(("CRX CR4 write rc=%d CR4=%#RX64\n", rc, pMixedCtx->cr4)); + break; + case 8: /* CR8 */ + Assert(!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)); + /* CR8 contains the APIC TPR. Was updated by EMInterpretCRxWrite(). */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE); + break; + default: + AssertMsgFailed(("Invalid CRx register %#x\n", VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification))); + break; + } + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)]); + break; + } + + case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ: /* MOV from CRx */ + { + /* EMInterpretCRxRead() requires EFER MSR, CS. */ + rc = hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + Assert( !pVM->hm.s.fNestedPaging + || !CPUMIsGuestPagingEnabledEx(pMixedCtx) + || VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification) != 3); + + /* CR8 reads only cause a VM-exit when the TPR shadow feature isn't enabled. */ + Assert( VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification) != 8 + || !(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)); + + rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), + VMX_EXIT_QUALIFICATION_CRX_GENREG(uExitQualification), + VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)); + Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)]); + Log4(("CRX CR%d Read access rc=%d\n", VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification), rc)); + break; + } + + case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS: /* CLTS (Clear Task-Switch Flag in CR0) */ + { + rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + rc = EMInterpretCLTS(pVM, pVCpu); + AssertRCReturn(rc, rc); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitClts); + Log4(("CRX CLTS write rc=%d\n", rc)); + break; + } + + case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW: /* LMSW (Load Machine-Status Word into CR0) */ + { + rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(uExitQualification)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitLmsw); + Log4(("CRX LMSW write rc=%d\n", rc)); + break; + } + + default: + { + AssertMsgFailed(("Invalid access-type in Mov CRx exit qualification %#x\n", uAccessType)); + rc = VERR_VMX_UNEXPECTED_EXCEPTION; + } + } + + /* Validate possible error codes. */ + Assert(rc == VINF_SUCCESS || rc == VINF_PGM_CHANGE_MODE || rc == VERR_EM_INTERPRETER || rc == VINF_PGM_SYNC_CR3 + || rc == VERR_VMX_UNEXPECTED_EXCEPTION); + if (RT_SUCCESS(rc)) + { + int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + AssertRCReturn(rc2, rc2); + } + + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitMovCRx, y2); + return rc; +} + + +/** + * VM-exit handler for I/O instructions (VMX_EXIT_IO_INSTR). Conditional + * VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitIoInstr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitIO, y1); + + int rc2 = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + rc2 |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient); + rc2 |= hmR0VmxSaveGuestRip(pVCpu, pMixedCtx); + rc2 |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); /* Eflag checks in EMInterpretDisasCurrent(). */ + rc2 |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); /* CR0 checks & PGM* in EMInterpretDisasCurrent(). */ + rc2 |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); /* SELM checks in EMInterpretDisasCurrent(). */ + /* EFER also required for longmode checks in EMInterpretDisasCurrent(), but it's always up-to-date. */ + AssertRCReturn(rc2, rc2); + + /* Refer Intel spec. 27-5. "Exit Qualifications for I/O Instructions" for the format. */ + uint32_t uIOPort = VMX_EXIT_QUALIFICATION_IO_PORT(pVmxTransient->uExitQualification); + uint8_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(pVmxTransient->uExitQualification); + bool fIOWrite = ( VMX_EXIT_QUALIFICATION_IO_DIRECTION(pVmxTransient->uExitQualification) + == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT); + bool fIOString = VMX_EXIT_QUALIFICATION_IO_IS_STRING(pVmxTransient->uExitQualification); + bool fStepping = RT_BOOL(pMixedCtx->eflags.Bits.u1TF); + AssertReturn(uIOWidth <= 3 && uIOWidth != 2, VERR_HMVMX_IPE_1); + + /* I/O operation lookup arrays. */ + static const uint32_t s_aIOSizes[4] = { 1, 2, 0, 4 }; /* Size of the I/O accesses. */ + static const uint32_t s_aIOOpAnd[4] = { 0xff, 0xffff, 0, 0xffffffff }; /* AND masks for saving the result (in AL/AX/EAX). */ + + VBOXSTRICTRC rcStrict; + const uint32_t cbValue = s_aIOSizes[uIOWidth]; + const uint32_t cbInstr = pVmxTransient->cbInstr; + bool fUpdateRipAlready = false; /* ugly hack, should be temporary. */ + PVM pVM = pVCpu->CTX_SUFF(pVM); + if (fIOString) + { +#if 0 /* Not yet ready. IEM gurus with debian 32-bit guest without NP (on ATA reads). See @bugref{5752#c158}*/ + /* + * INS/OUTS - I/O String instruction. + * + * Use instruction-information if available, otherwise fall back on + * interpreting the instruction. + */ + Log4(("CS:RIP=%04x:%#RX64 %#06x/%u %c str\n", pMixedCtx->cs.Sel, pMixedCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r')); + AssertReturn(pMixedCtx->dx == uIOPort, VERR_HMVMX_IPE_2); + if (MSR_IA32_VMX_BASIC_INFO_VMCS_INS_OUTS(pVM->hm.s.vmx.Msrs.u64BasicInfo)) + { + rc2 = hmR0VmxReadExitInstrInfoVmcs(pVCpu, pVmxTransient); + /** @todo optimize this, IEM should request the additional state if it needs it (GP, PF, ++). */ + rc2 |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc2, rc2); + AssertReturn(pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize <= 2, VERR_HMVMX_IPE_3); + AssertCompile(IEMMODE_16BIT == 0 && IEMMODE_32BIT == 1 && IEMMODE_64BIT == 2); + IEMMODE enmAddrMode = (IEMMODE)pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize; + bool fRep = VMX_EXIT_QUALIFICATION_IO_IS_REP(pVmxTransient->uExitQualification); + if (fIOWrite) + { + rcStrict = IEMExecStringIoWrite(pVCpu, cbValue, enmAddrMode, fRep, cbInstr, + pVmxTransient->ExitInstrInfo.StrIo.iSegReg); + } + else + { + /* + * The segment prefix for INS cannot be overridden and is always ES. We can safely assume X86_SREG_ES. + * Hence "iSegReg" field is undefined in the instruction-information field in VT-x for INS. + * See Intel Instruction spec. for "INS". + * See Intel spec. Table 27-8 "Format of the VM-Exit Instruction-Information Field as Used for INS and OUTS". + */ + rcStrict = IEMExecStringIoRead(pVCpu, cbValue, enmAddrMode, fRep, cbInstr); + } + } + else + { + /** @todo optimize this, IEM should request the additional state if it needs it (GP, PF, ++). */ + rc2 = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc2, rc2); + rcStrict = IEMExecOne(pVCpu); + } + /** @todo IEM needs to be setting these flags somehow. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP); + fUpdateRipAlready = true; +#else + PDISCPUSTATE pDis = &pVCpu->hm.s.DisState; + rcStrict = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL); + if (RT_SUCCESS(rcStrict)) + { + if (fIOWrite) + { + rcStrict = IOMInterpretOUTSEx(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), uIOPort, pDis->fPrefix, + (DISCPUMODE)pDis->uAddrMode, cbValue); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringWrite); + } + else + { + rcStrict = IOMInterpretINSEx(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), uIOPort, pDis->fPrefix, + (DISCPUMODE)pDis->uAddrMode, cbValue); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringRead); + } + } + else + { + AssertMsg(rcStrict == VERR_EM_INTERPRETER, ("rcStrict=%Rrc RIP %#RX64\n", VBOXSTRICTRC_VAL(rcStrict), pMixedCtx->rip)); + rcStrict = VINF_EM_RAW_EMULATE_INSTR; + } +#endif + } + else + { + /* + * IN/OUT - I/O instruction. + */ + Log4(("CS:RIP=%04x:%#RX64 %#06x/%u %c\n", pMixedCtx->cs.Sel, pMixedCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r')); + const uint32_t uAndVal = s_aIOOpAnd[uIOWidth]; + Assert(!VMX_EXIT_QUALIFICATION_IO_IS_REP(pVmxTransient->uExitQualification)); + if (fIOWrite) + { + rcStrict = IOMIOPortWrite(pVM, pVCpu, uIOPort, pMixedCtx->eax & uAndVal, cbValue); + if (rcStrict == VINF_IOM_R3_IOPORT_WRITE) + HMR0SavePendingIOPortWrite(pVCpu, pMixedCtx->rip, pMixedCtx->rip + cbInstr, uIOPort, uAndVal, cbValue); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite); + } + else + { + uint32_t u32Result = 0; + rcStrict = IOMIOPortRead(pVM, pVCpu, uIOPort, &u32Result, cbValue); + if (IOM_SUCCESS(rcStrict)) + { + /* Save result of I/O IN instr. in AL/AX/EAX. */ + pMixedCtx->eax = (pMixedCtx->eax & ~uAndVal) | (u32Result & uAndVal); + } + else if (rcStrict == VINF_IOM_R3_IOPORT_READ) + HMR0SavePendingIOPortRead(pVCpu, pMixedCtx->rip, pMixedCtx->rip + cbInstr, uIOPort, uAndVal, cbValue); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead); + } + } + + if (IOM_SUCCESS(rcStrict)) + { + if (!fUpdateRipAlready) + { + pMixedCtx->rip += cbInstr; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP); + } + + /* INS & OUTS with REP prefix modify RFLAGS. */ + if (fIOString) + { + /** @todo Single-step for INS/OUTS with REP prefix? */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RFLAGS); + } + else if (fStepping) + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + + /* + * If any I/O breakpoints are armed, we need to check if one triggered + * and take appropriate action. + * Note that the I/O breakpoint type is undefined if CR4.DE is 0. + */ + rc2 = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx); + AssertRCReturn(rc2, rc2); + + /** @todo Optimize away the DBGFBpIsHwIoArmed call by having DBGF tell the + * execution engines about whether hyper BPs and such are pending. */ + uint32_t const uDr7 = pMixedCtx->dr[7]; + if (RT_UNLIKELY( ( (uDr7 & X86_DR7_ENABLED_MASK) + && X86_DR7_ANY_RW_IO(uDr7) + && (pMixedCtx->cr4 & X86_CR4_DE)) + || DBGFBpIsHwIoArmed(pVM))) + { + STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck); + + /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */ + VMMRZCallRing3Disable(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + bool fIsGuestDbgActive = CPUMR0DebugStateMaybeSaveGuest(pVCpu, true /*fDr6*/); + + VBOXSTRICTRC rcStrict2 = DBGFBpCheckIo(pVM, pVCpu, pMixedCtx, uIOPort, cbValue); + if (rcStrict2 == VINF_EM_RAW_GUEST_TRAP) + { + /* Raise #DB. */ + if (fIsGuestDbgActive) + ASMSetDR6(pMixedCtx->dr[6]); + if (pMixedCtx->dr[7] != uDr7) + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG); + + hmR0VmxSetPendingXcptDB(pVCpu, pMixedCtx); + } + /* rcStrict is VINF_SUCCESS or in [VINF_EM_FIRST..VINF_EM_LAST]. */ + else if ( rcStrict2 != VINF_SUCCESS + && (rcStrict == VINF_SUCCESS || rcStrict2 < rcStrict)) + rcStrict = rcStrict2; + + HM_RESTORE_PREEMPT_IF_NEEDED(); + VMMRZCallRing3Enable(pVCpu); + } + } + +#ifdef DEBUG + if (rcStrict == VINF_IOM_R3_IOPORT_READ) + Assert(!fIOWrite); + else if (rcStrict == VINF_IOM_R3_IOPORT_WRITE) + Assert(fIOWrite); + else + { + /** @todo r=bird: This is missing a bunch of VINF_EM_FIRST..VINF_EM_LAST + * statuses, that the VMM device and some others may return. See + * IOM_SUCCESS() for guidance. */ + AssertMsg( RT_FAILURE(rcStrict) + || rcStrict == VINF_SUCCESS + || rcStrict == VINF_EM_RAW_EMULATE_INSTR + || rcStrict == VINF_EM_DBG_BREAKPOINT + || rcStrict == VINF_EM_RAW_GUEST_TRAP + || rcStrict == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + } +#endif + + STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitIO, y1); + return VBOXSTRICTRC_TODO(rcStrict); +} + + +/** + * VM-exit handler for task switches (VMX_EXIT_TASK_SWITCH). Unconditional + * VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitTaskSwitch(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* Check if this task-switch occurred while delivery an event through the guest IDT. */ + int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + if (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT) + { + rc = hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient); + AssertRCReturn(rc, rc); + if (VMX_IDT_VECTORING_INFO_VALID(pVmxTransient->uIdtVectoringInfo)) + { + uint32_t uIntType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo); + + /* Software interrupts and exceptions will be regenerated when the recompiler restarts the instruction. */ + if ( uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_INT + && uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT + && uIntType != VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT) + { + uint32_t uVector = VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo); + bool fErrorCodeValid = !!VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVmxTransient->uIdtVectoringInfo); + + /* Save it as a pending event and it'll be converted to a TRPM event on the way out to ring-3. */ + Assert(!pVCpu->hm.s.Event.fPending); + pVCpu->hm.s.Event.fPending = true; + pVCpu->hm.s.Event.u64IntInfo = pVmxTransient->uIdtVectoringInfo; + rc = hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient); + AssertRCReturn(rc, rc); + if (fErrorCodeValid) + pVCpu->hm.s.Event.u32ErrCode = pVmxTransient->uIdtVectoringErrorCode; + else + pVCpu->hm.s.Event.u32ErrCode = 0; + if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT + && uVector == X86_XCPT_PF) + { + pVCpu->hm.s.Event.GCPtrFaultAddress = pMixedCtx->cr2; + } + + Log4(("Pending event on TaskSwitch uIntType=%#x uVector=%#x\n", uIntType, uVector)); + } + } + } + + /** @todo Emulate task switch someday, currently just going back to ring-3 for + * emulation. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch); + return VERR_EM_INTERPRETER; +} + + +/** + * VM-exit handler for monitor-trap-flag (VMX_EXIT_MTF). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitMtf(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG); + pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG; + int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRCReturn(rc, rc); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMtf); + return VINF_EM_DBG_STEPPED; +} + + +/** + * VM-exit handler for APIC access (VMX_EXIT_APIC_ACCESS). Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitApicAccess(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */ + int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient); + if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT)) + return VINF_SUCCESS; + else if (RT_UNLIKELY(rc == VINF_EM_RESET)) + return rc; + +#if 0 + /** @todo Investigate if IOMMMIOPhysHandler() requires a lot of state, for now + * just sync the whole thing. */ + rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); +#else + /* Aggressive state sync. for now. */ + rc = hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); +#endif + rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + + /* See Intel spec. 27-6 "Exit Qualifications for APIC-access VM-exits from Linear Accesses & Guest-Phyiscal Addresses" */ + uint32_t uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(pVmxTransient->uExitQualification); + switch (uAccessType) + { + case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE: + case VMX_APIC_ACCESS_TYPE_LINEAR_READ: + { + if ( (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW) + && VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification) == 0x80) + { + AssertMsgFailed(("hmR0VmxExitApicAccess: can't access TPR offset while using TPR shadowing.\n")); + } + + RTGCPHYS GCPhys = pMixedCtx->msrApicBase; /* Always up-to-date, msrApicBase is not part of the VMCS. */ + GCPhys &= PAGE_BASE_GC_MASK; + GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification); + PVM pVM = pVCpu->CTX_SUFF(pVM); + Log4(("ApicAccess uAccessType=%#x GCPhys=%#RGv Off=%#x\n", uAccessType, GCPhys, + VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification))); + + VBOXSTRICTRC rc2 = IOMMMIOPhysHandler(pVM, pVCpu, + (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW, + CPUMCTX2CORE(pMixedCtx), GCPhys); + rc = VBOXSTRICTRC_VAL(rc2); + Log4(("ApicAccess rc=%d\n", rc)); + if ( rc == VINF_SUCCESS + || rc == VERR_PAGE_TABLE_NOT_PRESENT + || rc == VERR_PAGE_NOT_PRESENT) + { + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP + | HM_CHANGED_GUEST_RSP + | HM_CHANGED_GUEST_RFLAGS + | HM_CHANGED_VMX_GUEST_APIC_STATE); + rc = VINF_SUCCESS; + } + break; + } + + default: + Log4(("ApicAccess uAccessType=%#x\n", uAccessType)); + rc = VINF_EM_RAW_EMULATE_INSTR; + break; + } + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitApicAccess); + return rc; +} + + +/** + * VM-exit handler for debug-register accesses (VMX_EXIT_MOV_DRX). Conditional + * VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitMovDRx(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + + /* We should -not- get this VM-exit if the guest's debug registers were active. */ + if (pVmxTransient->fWasGuestDebugStateActive) + { + AssertMsgFailed(("Unexpected MOV DRx exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); + HMVMX_RETURN_UNEXPECTED_EXIT(); + } + + int rc = VERR_INTERNAL_ERROR_5; + if ( !DBGFIsStepping(pVCpu) + && !pVCpu->hm.s.fSingleInstruction + && !pVmxTransient->fWasHyperDebugStateActive) + { + /* Don't intercept MOV DRx and #DB any more. */ + pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT; + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); + AssertRCReturn(rc, rc); + + if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { +#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS + pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_DB); + rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap); + AssertRCReturn(rc, rc); +#endif + } + + /* We're playing with the host CPU state here, make sure we can't preempt or longjmp. */ + VMMRZCallRing3Disable(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + /* Save the host & load the guest debug state, restart execution of the MOV DRx instruction. */ + PVM pVM = pVCpu->CTX_SUFF(pVM); + CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */); + Assert(CPUMIsGuestDebugStateActive(pVCpu) || HC_ARCH_BITS == 32); + + HM_RESTORE_PREEMPT_IF_NEEDED(); + VMMRZCallRing3Enable(pVCpu); + +#ifdef VBOX_WITH_STATISTICS + rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE) + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite); + else + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead); +#endif + STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch); + return VINF_SUCCESS; + } + + /* + * EMInterpretDRx[Write|Read]() calls CPUMIsGuestIn64BitCode() which requires EFER, CS. EFER is always up-to-date, see + * hmR0VmxSaveGuestAutoLoadStoreMsrs(). Update only the segment registers from the CPU. + */ + rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + Log4(("CS:RIP=%04x:%#RX64\n", pMixedCtx->cs.Sel, pMixedCtx->rip)); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE) + { + rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), + VMX_EXIT_QUALIFICATION_DRX_REGISTER(pVmxTransient->uExitQualification), + VMX_EXIT_QUALIFICATION_DRX_GENREG(pVmxTransient->uExitQualification)); + if (RT_SUCCESS(rc)) + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite); + } + else + { + rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), + VMX_EXIT_QUALIFICATION_DRX_GENREG(pVmxTransient->uExitQualification), + VMX_EXIT_QUALIFICATION_DRX_REGISTER(pVmxTransient->uExitQualification)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead); + } + + Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER); + if (RT_SUCCESS(rc)) + { + int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient); + AssertRCReturn(rc2, rc2); + } + return rc; +} + + +/** + * VM-exit handler for EPT misconfiguration (VMX_EXIT_EPT_MISCONFIG). + * Conditional VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitEptMisconfig(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging); + + /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */ + int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient); + if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT)) + return VINF_SUCCESS; + else if (RT_UNLIKELY(rc == VINF_EM_RESET)) + return rc; + + RTGCPHYS GCPhys = 0; + rc = VMXReadVmcs64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys); + +#if 0 + rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx); /** @todo Can we do better? */ +#else + /* Aggressive state sync. for now. */ + rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); +#endif + AssertRCReturn(rc, rc); + + /* + * If we succeed, resume guest execution. + * If we fail in interpreting the instruction because we couldn't get the guest physical address + * of the page containing the instruction via the guest's page tables (we would invalidate the guest page + * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this + * weird case. See @bugref{6043}. + */ + PVM pVM = pVCpu->CTX_SUFF(pVM); + VBOXSTRICTRC rc2 = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pMixedCtx), GCPhys, UINT32_MAX); + rc = VBOXSTRICTRC_VAL(rc2); + Log4(("EPT misconfig at %#RGv RIP=%#RX64 rc=%d\n", GCPhys, pMixedCtx->rip, rc)); + if ( rc == VINF_SUCCESS + || rc == VERR_PAGE_TABLE_NOT_PRESENT + || rc == VERR_PAGE_NOT_PRESENT) + { + /* Successfully handled MMIO operation. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP + | HM_CHANGED_GUEST_RSP + | HM_CHANGED_GUEST_RFLAGS + | HM_CHANGED_VMX_GUEST_APIC_STATE); + rc = VINF_SUCCESS; + } + return rc; +} + + +/** + * VM-exit handler for EPT violation (VMX_EXIT_EPT_VIOLATION). Conditional + * VM-exit. + */ +HMVMX_EXIT_DECL hmR0VmxExitEptViolation(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(); + Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging); + + /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */ + int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient); + if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT)) + return VINF_SUCCESS; + else if (RT_UNLIKELY(rc == VINF_EM_RESET)) + return rc; + + RTGCPHYS GCPhys = 0; + rc = VMXReadVmcs64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys); + rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); +#if 0 + rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx); /** @todo Can we do better? */ +#else + /* Aggressive state sync. for now. */ + rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); + rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); +#endif + AssertRCReturn(rc, rc); + + /* Intel spec. Table 27-7 "Exit Qualifications for EPT violations". */ + AssertMsg(((pVmxTransient->uExitQualification >> 7) & 3) != 2, ("%#RX64", pVmxTransient->uExitQualification)); + + RTGCUINT uErrorCode = 0; + if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH) + uErrorCode |= X86_TRAP_PF_ID; + if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE) + uErrorCode |= X86_TRAP_PF_RW; + if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT) + uErrorCode |= X86_TRAP_PF_P; + + TRPMAssertXcptPF(pVCpu, GCPhys, uErrorCode); + + Log4(("EPT violation %#x at %#RX64 ErrorCode %#x CS:EIP=%04x:%#RX64\n", pVmxTransient->uExitQualification, GCPhys, + uErrorCode, pMixedCtx->cs.Sel, pMixedCtx->rip)); + + /* Handle the pagefault trap for the nested shadow table. */ + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, uErrorCode, CPUMCTX2CORE(pMixedCtx), GCPhys); + TRPMResetTrap(pVCpu); + + /* Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}. */ + if ( rc == VINF_SUCCESS + || rc == VERR_PAGE_TABLE_NOT_PRESENT + || rc == VERR_PAGE_NOT_PRESENT) + { + /* Successfully synced our nested page tables. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP + | HM_CHANGED_GUEST_RSP + | HM_CHANGED_GUEST_RFLAGS); + return VINF_SUCCESS; + } + + Log4(("EPT return to ring-3 rc=%d\n")); + return rc; +} + +/** @} */ + +/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-= */ +/* -=-=-=-=-=-=-=-=-=- VM-exit Exception Handlers -=-=-=-=-=-=-=-=-=-=- */ +/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-= */ + +/** @name VM-exit exception handlers. + * @{ + */ + +/** + * VM-exit exception handler for #MF (Math Fault: floating point exception). + */ +static int hmR0VmxExitXcptMF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF); + + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + if (!(pMixedCtx->cr0 & X86_CR0_NE)) + { + /* Old-style FPU error reporting needs some extra work. */ + /** @todo don't fall back to the recompiler, but do it manually. */ + return VERR_EM_INTERPRETER; + } + + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); + return rc; +} + + +/** + * VM-exit exception handler for #BP (Breakpoint exception). + */ +static int hmR0VmxExitXcptBP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP); + + /** @todo Try optimize this by not saving the entire guest state unless + * really needed. */ + int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx)); + if (rc == VINF_EM_RAW_GUEST_TRAP) + { + rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); + } + + Assert(rc == VINF_SUCCESS || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_EM_DBG_BREAKPOINT); + return rc; +} + + +/** + * VM-exit exception handler for #DB (Debug exception). + */ +static int hmR0VmxExitXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB); + Log6(("XcptDB\n")); + + /* + * Get the DR6-like values from the exit qualification and pass it to DBGF + * for processing. + */ + int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + + /* Refer Intel spec. Table 27-1. "Exit Qualifications for debug exceptions" for the format. */ + uint64_t uDR6 = X86_DR6_INIT_VAL; + uDR6 |= ( pVmxTransient->uExitQualification + & (X86_DR6_B0 | X86_DR6_B1 | X86_DR6_B2 | X86_DR6_B3 | X86_DR6_BD | X86_DR6_BS)); + + rc = DBGFRZTrap01Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pMixedCtx), uDR6, pVCpu->hm.s.fSingleInstruction); + if (rc == VINF_EM_RAW_GUEST_TRAP) + { + /* + * The exception was for the guest. Update DR6, DR7.GD and + * IA32_DEBUGCTL.LBR before forwarding it. + * (See Intel spec. 27.1 "Architectural State before a VM-Exit".) + */ + VMMRZCallRing3Disable(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + pMixedCtx->dr[6] &= ~X86_DR6_B_MASK; + pMixedCtx->dr[6] |= uDR6; + if (CPUMIsGuestDebugStateActive(pVCpu)) + ASMSetDR6(pMixedCtx->dr[6]); + + HM_RESTORE_PREEMPT_IF_NEEDED(); + VMMRZCallRing3Enable(pVCpu); + + rc = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */ + pMixedCtx->dr[7] &= ~X86_DR7_GD; + + /* Paranoia. */ + pMixedCtx->dr[7] &= ~X86_DR7_RAZ_MASK; + pMixedCtx->dr[7] |= X86_DR7_RA1_MASK; + + rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, (uint32_t)pMixedCtx->dr[7]); + AssertRCReturn(rc, rc); + + /* + * Raise #DB in the guest. + * + * It is important to reflect what the VM-exit gave us (preserving the interruption-type) rather than use + * hmR0VmxSetPendingXcptDB() as the #DB could've been raised while executing ICEBP and not the 'normal' #DB. + * Thus it -may- trigger different handling in the CPU (like skipped DPL checks). See @bugref{6398}. + * + * Since ICEBP isn't documented on Intel, see AMD spec. 15.20 "Event Injection". + */ + rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); + return VINF_SUCCESS; + } + + /* + * Not a guest trap, must be a hypervisor related debug event then. + * Update DR6 in case someone is interested in it. + */ + AssertMsg(rc == VINF_EM_DBG_STEPPED || rc == VINF_EM_DBG_BREAKPOINT, ("%Rrc\n", rc)); + AssertReturn(pVmxTransient->fWasHyperDebugStateActive, VERR_HM_IPE_5); + CPUMSetHyperDR6(pVCpu, uDR6); + + return rc; +} + + +/** + * VM-exit exception handler for #NM (Device-not-available exception: floating + * point exception). + */ +static int hmR0VmxExitXcptNM(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(); + + /* We require CR0 and EFER. EFER is always up-to-date. */ + int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + /* We're playing with the host CPU state here, have to disable preemption or longjmp. */ + VMMRZCallRing3Disable(pVCpu); + HM_DISABLE_PREEMPT_IF_NEEDED(); + + /* If the guest FPU was active at the time of the #NM exit, then it's a guest fault. */ + if (pVmxTransient->fWasGuestFPUStateActive) + { + rc = VINF_EM_RAW_GUEST_TRAP; + Assert(CPUMIsGuestFPUStateActive(pVCpu) || HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0)); + } + else + { +#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS + Assert(!pVmxTransient->fWasGuestFPUStateActive); +#endif + rc = CPUMR0Trap07Handler(pVCpu->CTX_SUFF(pVM), pVCpu, pMixedCtx); + Assert(rc == VINF_EM_RAW_GUEST_TRAP || (rc == VINF_SUCCESS && CPUMIsGuestFPUStateActive(pVCpu))); + } + + HM_RESTORE_PREEMPT_IF_NEEDED(); + VMMRZCallRing3Enable(pVCpu); + + if (rc == VINF_SUCCESS) + { + /* Guest FPU state was activated, we'll want to change CR0 FPU intercepts before the next VM-reentry. */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowNM); + } + else + { + /* Forward #NM to the guest. */ + Assert(rc == VINF_EM_RAW_GUEST_TRAP); + rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + pVmxTransient->cbInstr, 0 /* error code */, 0 /* GCPtrFaultAddress */); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNM); + } + + return VINF_SUCCESS; +} + + +/** + * VM-exit exception handler for #GP (General-protection exception). + * + * @remarks Requires pVmxTransient->uExitIntInfo to be up-to-date. + */ +static int hmR0VmxExitXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP); + + int rc = VERR_INTERNAL_ERROR_5; + if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) + { +#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS + /* If the guest is not in real-mode or we have unrestricted execution support, reflect #GP to the guest. */ + rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + Log4(("#GP Gst: RIP %#RX64 ErrorCode=%#x CR0=%#RX64 CPL=%u\n", pMixedCtx->rip, pVmxTransient->uExitIntErrorCode, + pMixedCtx->cr0, CPUMGetGuestCPL(pVCpu))); + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); + return rc; +#else + /* We don't intercept #GP. */ + AssertMsgFailed(("Unexpected VM-exit caused by #GP exception\n")); + return VERR_VMX_UNEXPECTED_EXCEPTION; +#endif + } + + Assert(CPUMIsGuestInRealModeEx(pMixedCtx)); + Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest); + + /* EMInterpretDisasCurrent() requires a lot of the state, save the entire state. */ + rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + PDISCPUSTATE pDis = &pVCpu->hm.s.DisState; + uint32_t cbOp = 0; + PVM pVM = pVCpu->CTX_SUFF(pVM); + rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp); + if (RT_SUCCESS(rc)) + { + rc = VINF_SUCCESS; + Assert(cbOp == pDis->cbInstr); + Log4(("#GP Disas OpCode=%u CS:EIP %04x:%#RX64\n", pDis->pCurInstr->uOpcode, pMixedCtx->cs.Sel, pMixedCtx->rip)); + switch (pDis->pCurInstr->uOpcode) + { + case OP_CLI: + { + pMixedCtx->eflags.Bits.u1IF = 0; + pMixedCtx->rip += pDis->cbInstr; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS); + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCli); + break; + } + + case OP_STI: + { + pMixedCtx->eflags.Bits.u1IF = 1; + pMixedCtx->rip += pDis->cbInstr; + EMSetInhibitInterruptsPC(pVCpu, pMixedCtx->rip); + Assert(VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS); + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitSti); + break; + } + + case OP_HLT: + { + rc = VINF_EM_HALT; + pMixedCtx->rip += pDis->cbInstr; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt); + break; + } + + case OP_POPF: + { + Log4(("POPF CS:RIP %04x:%#RX64\n", pMixedCtx->cs.Sel, pMixedCtx->rip)); + uint32_t cbParm; + uint32_t uMask; + bool fStepping = RT_BOOL(pMixedCtx->eflags.Bits.u1TF); + if (pDis->fPrefix & DISPREFIX_OPSIZE) + { + cbParm = 4; + uMask = 0xffffffff; + } + else + { + cbParm = 2; + uMask = 0xffff; + } + + /* Get the stack pointer & pop the contents of the stack onto Eflags. */ + RTGCPTR GCPtrStack = 0; + X86EFLAGS Eflags; + rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), pMixedCtx->esp & uMask, SELMTOFLAT_FLAGS_CPL0, + &GCPtrStack); + if (RT_SUCCESS(rc)) + { + Assert(sizeof(Eflags.u32) >= cbParm); + Eflags.u32 = 0; + rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &Eflags.u32, cbParm); + } + if (RT_FAILURE(rc)) + { + rc = VERR_EM_INTERPRETER; + break; + } + Log4(("POPF %#x -> %#RX64 mask=%#x RIP=%#RX64\n", Eflags.u, pMixedCtx->rsp, uMask, pMixedCtx->rip)); + pMixedCtx->eflags.u32 = (pMixedCtx->eflags.u32 & ~(X86_EFL_POPF_BITS & uMask)) + | (Eflags.u32 & X86_EFL_POPF_BITS & uMask); + pMixedCtx->eflags.Bits.u1RF = 0; /* The RF bit is always cleared by POPF; see Intel Instruction reference. */ + pMixedCtx->esp += cbParm; + pMixedCtx->esp &= uMask; + pMixedCtx->rip += pDis->cbInstr; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP + | HM_CHANGED_GUEST_RSP + | HM_CHANGED_GUEST_RFLAGS); + /* Generate a pending-debug exception when stepping over POPF regardless of how POPF modifies EFLAGS.TF. */ + if (fStepping) + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPopf); + break; + } + + case OP_PUSHF: + { + uint32_t cbParm; + uint32_t uMask; + if (pDis->fPrefix & DISPREFIX_OPSIZE) + { + cbParm = 4; + uMask = 0xffffffff; + } + else + { + cbParm = 2; + uMask = 0xffff; + } + + /* Get the stack pointer & push the contents of eflags onto the stack. */ + RTGCPTR GCPtrStack = 0; + rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), (pMixedCtx->esp - cbParm) & uMask, + SELMTOFLAT_FLAGS_CPL0, &GCPtrStack); + if (RT_FAILURE(rc)) + { + rc = VERR_EM_INTERPRETER; + break; + } + X86EFLAGS Eflags = pMixedCtx->eflags; + /* The RF & VM bits are cleared on image stored on stack; see Intel Instruction reference for PUSHF. */ + Eflags.Bits.u1RF = 0; + Eflags.Bits.u1VM = 0; + + rc = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &Eflags.u, cbParm); + if (RT_FAILURE(rc)) + { + rc = VERR_EM_INTERPRETER; + break; + } + Log4(("PUSHF %#x -> %#RGv\n", Eflags.u, GCPtrStack)); + pMixedCtx->esp -= cbParm; + pMixedCtx->esp &= uMask; + pMixedCtx->rip += pDis->cbInstr; + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP); + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPushf); + break; + } + + case OP_IRET: + { + /** @todo Handle 32-bit operand sizes and check stack limits. See Intel + * instruction reference. */ + RTGCPTR GCPtrStack = 0; + uint32_t uMask = 0xffff; + bool fStepping = RT_BOOL(pMixedCtx->eflags.Bits.u1TF); + uint16_t aIretFrame[3]; + if (pDis->fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE)) + { + rc = VERR_EM_INTERPRETER; + break; + } + rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), pMixedCtx->esp & uMask, SELMTOFLAT_FLAGS_CPL0, + &GCPtrStack); + if (RT_SUCCESS(rc)) + rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame)); + if (RT_FAILURE(rc)) + { + rc = VERR_EM_INTERPRETER; + break; + } + pMixedCtx->eip = 0; + pMixedCtx->ip = aIretFrame[0]; + pMixedCtx->cs.Sel = aIretFrame[1]; + pMixedCtx->cs.ValidSel = aIretFrame[1]; + pMixedCtx->cs.u64Base = (uint64_t)pMixedCtx->cs.Sel << 4; + pMixedCtx->eflags.u32 = (pMixedCtx->eflags.u32 & ~(X86_EFL_POPF_BITS & uMask)) + | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask); + pMixedCtx->sp += sizeof(aIretFrame); + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP + | HM_CHANGED_GUEST_SEGMENT_REGS + | HM_CHANGED_GUEST_RSP + | HM_CHANGED_GUEST_RFLAGS); + /* Generate a pending-debug exception when stepping over IRET regardless of how IRET modifies EFLAGS.TF. */ + if (fStepping) + hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx); + Log4(("IRET %#RX32 to %04x:%x\n", GCPtrStack, pMixedCtx->cs.Sel, pMixedCtx->ip)); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIret); + break; + } + + case OP_INT: + { + uint16_t uVector = pDis->Param1.uValue & 0xff; + hmR0VmxSetPendingIntN(pVCpu, pMixedCtx, uVector, pDis->cbInstr); + /* INT clears EFLAGS.TF, we mustn't set any pending debug exceptions here. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt); + break; + } + + case OP_INTO: + { + if (pMixedCtx->eflags.Bits.u1OF) + { + hmR0VmxSetPendingXcptOF(pVCpu, pMixedCtx, pDis->cbInstr); + /* INTO clears EFLAGS.TF, we mustn't set any pending debug exceptions here. */ + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt); + } + break; + } + + default: + { + VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pMixedCtx), 0 /* pvFault */, + EMCODETYPE_SUPERVISOR); + rc = VBOXSTRICTRC_VAL(rc2); + HMCPU_CF_SET(pVCpu, HM_CHANGED_ALL_GUEST); + /** @todo We have to set pending-debug exceptions here when the guest is + * single-stepping depending on the instruction that was interpreted. */ + Log4(("#GP rc=%Rrc\n", rc)); + break; + } + } + } + else + rc = VERR_EM_INTERPRETER; + + AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, + ("#GP Unexpected rc=%Rrc\n", rc)); + return rc; +} + + +/** + * VM-exit exception handler wrapper for generic exceptions. Simply re-injects + * the exception reported in the VMX transient structure back into the VM. + * + * @remarks Requires uExitIntInfo in the VMX transient structure to be + * up-to-date. + */ +static int hmR0VmxExitXcptGeneric(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(); + + /* Re-inject the exception into the guest. This cannot be a double-fault condition which would have been handled in + hmR0VmxCheckExitDueToEventDelivery(). */ + int rc = hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + Assert(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO); + + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); + return VINF_SUCCESS; +} + + +/** + * VM-exit exception handler for #PF (Page-fault exception). + */ +static int hmR0VmxExitXcptPF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient) +{ + HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(); + PVM pVM = pVCpu->CTX_SUFF(pVM); + int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient); + rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient); + AssertRCReturn(rc, rc); + +#if defined(HMVMX_ALWAYS_TRAP_ALL_XCPTS) || defined(HMVMX_ALWAYS_TRAP_PF) + if (pVM->hm.s.fNestedPaging) + { + pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory or vectoring #PF. */ + if (RT_LIKELY(!pVmxTransient->fVectoringPF)) + { + pMixedCtx->cr2 = pVmxTransient->uExitQualification; /* Update here in case we go back to ring-3 before injection. */ + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + 0 /* cbInstr */, pVmxTransient->uExitIntErrorCode, pVmxTransient->uExitQualification); + } + else + { + /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */ + hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx); + Log4(("Pending #DF due to vectoring #PF. NP\n")); + } + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF); + return rc; + } +#else + Assert(!pVM->hm.s.fNestedPaging); +#endif + + rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); + AssertRCReturn(rc, rc); + + Log4(("#PF: cr2=%#RX64 cs:rip=%#04x:%#RX64 uErrCode %#RX32 cr3=%#RX64\n", pVmxTransient->uExitQualification, + pMixedCtx->cs.Sel, pMixedCtx->rip, pVmxTransient->uExitIntErrorCode, pMixedCtx->cr3)); + + TRPMAssertXcptPF(pVCpu, pVmxTransient->uExitQualification, (RTGCUINT)pVmxTransient->uExitIntErrorCode); + rc = PGMTrap0eHandler(pVCpu, pVmxTransient->uExitIntErrorCode, CPUMCTX2CORE(pMixedCtx), + (RTGCPTR)pVmxTransient->uExitQualification); + + Log4(("#PF: rc=%Rrc\n", rc)); + if (rc == VINF_SUCCESS) + { + /* Successfully synced shadow pages tables or emulated an MMIO instruction. */ + /** @todo this isn't quite right, what if guest does lgdt with some MMIO + * memory? We don't update the whole state here... */ + HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP + | HM_CHANGED_GUEST_RSP + | HM_CHANGED_GUEST_RFLAGS + | HM_CHANGED_VMX_GUEST_APIC_STATE); + TRPMResetTrap(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF); + return rc; + } + else if (rc == VINF_EM_RAW_GUEST_TRAP) + { + if (!pVmxTransient->fVectoringPF) + { + /* It's a guest page fault and needs to be reflected to the guest. */ + uint32_t uGstErrorCode = TRPMGetErrorCode(pVCpu); + TRPMResetTrap(pVCpu); + pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory #PF. */ + pMixedCtx->cr2 = pVmxTransient->uExitQualification; /* Update here in case we go back to ring-3 before injection. */ + hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), + 0 /* cbInstr */, uGstErrorCode, pVmxTransient->uExitQualification); + } + else + { + /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */ + TRPMResetTrap(pVCpu); + pVCpu->hm.s.Event.fPending = false; /* Clear pending #PF to replace it with #DF. */ + hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx); + Log4(("#PF: Pending #DF due to vectoring #PF\n")); + } + + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF); + return VINF_SUCCESS; + } + + TRPMResetTrap(pVCpu); + STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM); + return rc; +} + +/** @} */ + |
