Index: sys/amd64/conf/GENERIC =================================================================== --- sys/amd64/conf/GENERIC (revision 271152) +++ sys/amd64/conf/GENERIC (working copy) @@ -356,3 +356,7 @@ # VMware support device vmx # VMware VMXNET3 Ethernet + +options KTR +options KTR_MASK=(KTR_GEN) +options KTR_ENTRIES=(1024*1024) Index: sys/amd64/vmm/amd/svm.c =================================================================== --- sys/amd64/vmm/amd/svm.c (revision 271203) +++ sys/amd64/vmm/amd/svm.c (working copy) @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -80,8 +81,11 @@ #define AMD_CPUID_SVM_PAUSE_INC BIT(10) /* Pause intercept filter. */ #define AMD_CPUID_SVM_PAUSE_FTH BIT(12) /* Pause filter threshold */ -#define VMCB_CACHE_DEFAULT \ - (VMCB_CACHE_ASID | VMCB_CACHE_IOPM | VMCB_CACHE_NP) +#define VMCB_CACHE_DEFAULT (VMCB_CACHE_ASID | \ + VMCB_CACHE_IOPM | \ + VMCB_CACHE_I | \ + VMCB_CACHE_TPR | \ + VMCB_CACHE_NP) MALLOC_DEFINE(M_SVM, "svm", "svm"); MALLOC_DEFINE(M_SVM_VLAPIC, "svm-vlapic", "svm-vlapic"); @@ -114,8 +118,9 @@ */ static struct svm_regctx host_ctx[MAXCPU]; -static VMM_STAT_AMD(VCPU_EXITINTINFO, "Valid VMCB EXITINTINFO"); -static VMM_STAT_AMD(VCPU_INTINFO_INJECTED, "VMM pending exception injected"); +static VMM_STAT_AMD(VCPU_EXITINTINFO, "VM exits during event delivery"); +static VMM_STAT_AMD(VCPU_INTINFO_INJECTED, "Events pending at VM entry"); +static VMM_STAT_AMD(VMEXIT_VINTR, "VM exits due to interrupt window"); /* * Common function to enable or disabled SVM for a CPU. @@ -389,11 +394,146 @@ } static __inline void -vcpu_set_dirty(struct svm_vcpu *vcpustate, uint32_t dirtybits) +vcpu_set_dirty(struct svm_softc *sc, int vcpu, uint32_t dirtybits) { + struct svm_vcpu *vcpustate; + + vcpustate = svm_get_vcpu(sc, vcpu); + vcpustate->dirty |= dirtybits; } +static __inline int +svm_get_intercept(struct svm_softc *sc, int vcpu, int idx, uint32_t bitmask) +{ + struct vmcb_ctrl *ctrl; + + KASSERT(idx >=0 && idx < 5, ("invalid intercept index %d", idx)); + + ctrl = svm_get_vmcb_ctrl(sc, vcpu); + return (ctrl->intercept[idx] & bitmask ? 1 : 0); +} + +static __inline void +svm_set_intercept(struct svm_softc *sc, int vcpu, int idx, uint32_t bitmask, + int enabled) +{ + struct vmcb_ctrl *ctrl; + uint32_t oldval; + + KASSERT(idx >=0 && idx < 5, ("invalid intercept index %d", idx)); + + ctrl = svm_get_vmcb_ctrl(sc, vcpu); + oldval = ctrl->intercept[idx]; + + if (enabled) + ctrl->intercept[idx] |= bitmask; + else + ctrl->intercept[idx] &= ~bitmask; + + if (ctrl->intercept[idx] != oldval) { + vcpu_set_dirty(sc, vcpu, VMCB_CACHE_I); + VCPU_CTR3(sc->vm, vcpu, "intercept[%d] modified " + "from %#x to %#x", idx, oldval, ctrl->intercept[idx]); + } +} + +static __inline void +svm_disable_intercept(struct svm_softc *sc, int vcpu, int off, uint32_t bitmask) +{ + svm_set_intercept(sc, vcpu, off, bitmask, 0); +} + +static __inline void +svm_enable_intercept(struct svm_softc *sc, int vcpu, int off, uint32_t bitmask) +{ + svm_set_intercept(sc, vcpu, off, bitmask, 1); +} + +static void +vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa, + uint64_t msrpm_base_pa, uint64_t np_pml4) +{ + struct vmcb_ctrl *ctrl; + struct vmcb_state *state; + uint32_t mask; + int n; + + ctrl = svm_get_vmcb_ctrl(sc, vcpu); + state = svm_get_vmcb_state(sc, vcpu); + + ctrl->iopm_base_pa = iopm_base_pa; + ctrl->msrpm_base_pa = msrpm_base_pa; + + /* Enable nested paging */ + ctrl->np_enable = 1; + ctrl->n_cr3 = np_pml4; + + /* + * Intercept accesses to the control registers that are not shadowed + * in the VMCB - i.e. all except cr0, cr2, cr3, cr4 and cr8. + */ + for (n = 0; n < 16; n++) { + mask = (BIT(n) << 16) | BIT(n); + if (n == 0 || n == 2 || n == 3 || n == 4 || n == 8) + svm_disable_intercept(sc, vcpu, VMCB_CR_INTCPT, mask); + else + svm_enable_intercept(sc, vcpu, VMCB_CR_INTCPT, mask); + } + + /* Intercept Machine Check exceptions. */ + svm_enable_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_MC)); + + /* Intercept various events (for e.g. I/O, MSR and CPUID accesses) */ + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IO); + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_MSR); + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_HLT); + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_CPUID); + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INTR); + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_INIT); + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_NMI); + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_SMI); + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_SHUTDOWN); + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, + VMCB_INTCPT_FERR_FREEZE); + + /* + * From section "Canonicalization and Consistency Checks" in APMv2 + * the VMRUN intercept bit must be set to pass the consistency check. + */ + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMRUN); + + /* + * The ASID will be set to a non-zero value just before VMRUN. + */ + ctrl->asid = 0; + + /* + * Section 15.21.1, Interrupt Masking in EFLAGS + * Section 15.21.2, Virtualizing APIC.TPR + * + * This must be set for %rflag and %cr8 isolation of guest and host. + */ + ctrl->v_intr_masking = 1; + + /* Enable Last Branch Record aka LBR for debugging */ + ctrl->lbr_virt_en = 1; + state->dbgctl = BIT(0); + + /* EFER_SVM must always be set when the guest is executing */ + state->efer = EFER_SVM; + + /* Set up the PAT to power-on state */ + state->g_pat = PAT_VALUE(0, PAT_WRITE_BACK) | + PAT_VALUE(1, PAT_WRITE_THROUGH) | + PAT_VALUE(2, PAT_UNCACHED) | + PAT_VALUE(3, PAT_UNCACHEABLE) | + PAT_VALUE(4, PAT_WRITE_BACK) | + PAT_VALUE(5, PAT_WRITE_THROUGH) | + PAT_VALUE(6, PAT_UNCACHED) | + PAT_VALUE(7, PAT_UNCACHEABLE); +} + /* * Initialise a virtual machine. */ @@ -451,7 +591,7 @@ vcpu = svm_get_vcpu(svm_sc, i); vcpu->lastcpu = NOCPU; vcpu->vmcb_pa = vtophys(&vcpu->vmcb); - svm_init_vmcb(&vcpu->vmcb, iopm_pa, msrpm_pa, pml4_pa); + vmcb_init(svm_sc, i, iopm_pa, msrpm_pa, pml4_pa); } return (svm_sc); } @@ -591,7 +731,7 @@ struct svm_regctx *regs; struct vm_inout_str *vis; uint64_t info1; - + state = svm_get_vmcb_state(svm_sc, vcpu); ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); regs = svm_get_guest_regctx(svm_sc, vcpu); @@ -686,12 +826,11 @@ } /* - * Special handling of EFER MSR. - * SVM guest must have SVM EFER bit set, prohibit guest from cleareing SVM - * enable bit in EFER. + * Intercept access to MSR_EFER to prevent the guest from clearing the + * SVM enable bit. */ static void -svm_efer(struct svm_softc *svm_sc, int vcpu, boolean_t write) +svm_efer(struct svm_softc *svm_sc, int vcpu, uint64_t write) { struct svm_regctx *swctx; struct vmcb_state *state; @@ -706,9 +845,72 @@ state->rax = (uint32_t)state->efer; swctx->e.g.sctx_rdx = (uint32_t)(state->efer >> 32); } + VCPU_CTR2(svm_sc->vm, vcpu, "%s guest EFER %#lx", + write ? "wrmsr" : "rdmsr", state->efer); } +#ifdef KTR +static const char * +intrtype_to_str(int intr_type) +{ + switch (intr_type) { + case VMCB_EVENTINJ_TYPE_INTR: + return ("hwintr"); + case VMCB_EVENTINJ_TYPE_NMI: + return ("nmi"); + case VMCB_EVENTINJ_TYPE_INTn: + return ("swintr"); + case VMCB_EVENTINJ_TYPE_EXCEPTION: + return ("exception"); + default: + panic("%s: unknown intr_type %d", __func__, intr_type); + } +} +#endif + +/* + * Inject an event to vcpu as described in section 15.20, "Event injection". + */ static void +svm_eventinject(struct svm_softc *sc, int vcpu, int intr_type, int vector, + uint32_t error, bool ec_valid) +{ + struct vmcb_ctrl *ctrl; + + ctrl = svm_get_vmcb_ctrl(sc, vcpu); + + KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0, + ("%s: event already pending %#lx", __func__, ctrl->eventinj)); + + KASSERT(vector >=0 && vector <= 255, ("%s: invalid vector %d", + __func__, vector)); + + switch (intr_type) { + case VMCB_EVENTINJ_TYPE_INTR: + case VMCB_EVENTINJ_TYPE_NMI: + case VMCB_EVENTINJ_TYPE_INTn: + break; + case VMCB_EVENTINJ_TYPE_EXCEPTION: + if (vector >= 0 && vector <= 31 && vector != 2) + break; + /* FALLTHROUGH */ + default: + panic("%s: invalid intr_type/vector: %d/%d", __func__, + intr_type, vector); + } + ctrl->eventinj = vector | (intr_type << 8) | VMCB_EVENTINJ_VALID; + if (ec_valid) { + ctrl->eventinj |= VMCB_EVENTINJ_EC_VALID; + ctrl->eventinj |= (uint64_t)error << 32; + VCPU_CTR3(sc->vm, vcpu, "Injecting %s at vector %d errcode %#x", + intrtype_to_str(intr_type), vector, error); + } else { + VCPU_CTR2(sc->vm, vcpu, "Injecting %s at vector %d", + intrtype_to_str(intr_type), vector); + } +} + +static void svm_save_intinfo(struct svm_softc *svm_sc, int vcpu) { struct vmcb_ctrl *ctrl; @@ -731,6 +933,100 @@ vm_exit_intinfo(svm_sc->vm, vcpu, intinfo); } +static __inline void +enable_intr_window_exiting(struct svm_softc *sc, int vcpu) +{ + struct vmcb_ctrl *ctrl; + + ctrl = svm_get_vmcb_ctrl(sc, vcpu); + + if (ctrl->v_irq == 0) { + VCPU_CTR0(sc->vm, vcpu, "Enable intr window exiting"); + ctrl->v_irq = 1; + ctrl->v_ign_tpr = 1; + vcpu_set_dirty(sc, vcpu, VMCB_CACHE_TPR); + svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, + VMCB_INTCPT_VINTR); + } else { + VCPU_CTR0(sc->vm, vcpu, "intr window exiting already enabled"); + } +} + +static __inline void +disable_intr_window_exiting(struct svm_softc *sc, int vcpu) +{ + struct vmcb_ctrl *ctrl; + + ctrl = svm_get_vmcb_ctrl(sc, vcpu); + + if (ctrl->v_irq) { + VCPU_CTR0(sc->vm, vcpu, "Disable intr window exiting"); + ctrl->v_irq = 0; + vcpu_set_dirty(sc, vcpu, VMCB_CACHE_TPR); + svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, + VMCB_INTCPT_VINTR); + } else { + VCPU_CTR0(sc->vm, vcpu, "intr window exiting already disabled"); + } +} + +static int +nmi_blocked(struct svm_softc *sc, int vcpu) +{ + /* XXX need to track NMI blocking */ + return (0); +} + +static void +enable_nmi_blocking(struct svm_softc *sc, int vcpu) +{ + /* XXX enable iret intercept */ +} + +#ifdef notyet +static void +clear_nmi_blocking(struct svm_softc *sc, int vcpu) +{ + /* XXX disable iret intercept */ +} +#endif + +#ifdef KTR +static const char * +exit_reason_to_str(uint64_t reason) +{ + static char reasonbuf[32]; + + switch (reason) { + case VMCB_EXIT_INVALID: + return ("invalvmcb"); + case VMCB_EXIT_SHUTDOWN: + return ("shutdown"); + case VMCB_EXIT_NPF: + return ("nptfault"); + case VMCB_EXIT_PAUSE: + return ("pause"); + case VMCB_EXIT_HLT: + return ("hlt"); + case VMCB_EXIT_CPUID: + return ("cpuid"); + case VMCB_EXIT_IO: + return ("inout"); + case VMCB_EXIT_MC: + return ("mchk"); + case VMCB_EXIT_INTR: + return ("extintr"); + case VMCB_EXIT_VINTR: + return ("vintr"); + case VMCB_EXIT_MSR: + return ("msr"); + default: + snprintf(reasonbuf, sizeof(reasonbuf), "%#lx", reason); + return (reasonbuf); + } +} +#endif /* KTR */ + /* * Determine the cause of virtual cpu exit and handle VMEXIT. * Return: false - Break vcpu execution loop and handle vmexit @@ -758,16 +1054,27 @@ update_rip = true; loop = true; - vmexit->exitcode = VM_EXITCODE_VMX; + vmexit->exitcode = VM_EXITCODE_VMX; /* XXX fixme */ vmexit->u.vmx.status = 0; + vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_COUNT, 1); + KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0, ("%s: event " "injection valid bit is set %#lx", __func__, ctrl->eventinj)); svm_save_intinfo(svm_sc, vcpu); + /* + * XXX exit on reads and writes to %cr8 since V_TPR is not accurate. + */ + switch (code) { + case VMCB_EXIT_VINTR: + update_rip = false; + vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_VINTR, 1); + break; case VMCB_EXIT_MC: /* Machine Check. */ + /* XXX fixme machine check != MTRAP */ vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MTRAP, 1); vmexit->exitcode = VM_EXITCODE_MTRAP; loop = false; @@ -779,7 +1086,6 @@ edx = ctx->e.g.sctx_rdx; if (ecx == MSR_EFER) { - VCPU_CTR0(svm_sc->vm, vcpu,"VMEXIT EFER\n"); svm_efer(svm_sc, vcpu, info1); break; } @@ -799,6 +1105,8 @@ } else loop = retu ? false : true; + /* XXX see vmx.c to add an assertion */ + VCPU_CTR3(svm_sc->vm, vcpu, "VMEXIT WRMSR(%s handling) 0x%lx @0x%x", loop ? "kernel" : "user", val, ecx); @@ -812,6 +1120,9 @@ loop = false; } else loop = retu ? false : true; + + /* XXX see vmx.c to add an assertion */ + VCPU_CTR3(svm_sc->vm, vcpu, "SVM:VMEXIT RDMSR" " MSB=0x%08x, LSB=%08x @0x%x", ctx->e.g.sctx_rdx, state->rax, ecx); @@ -835,8 +1146,6 @@ * interrupt, local APIC will inject event in guest. */ update_rip = false; - VCPU_CTR1(svm_sc->vm, vcpu, "SVM:VMEXIT ExtInt" - " RIP:0x%lx.\n", state->rip); vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1); break; @@ -852,29 +1161,16 @@ (uint32_t *)&ctx->sctx_rbx, (uint32_t *)&ctx->sctx_rcx, (uint32_t *)&ctx->e.g.sctx_rdx); - VCPU_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT CPUID\n"); break; case VMCB_EXIT_HLT: vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_HLT, 1); - if (ctrl->v_irq) { - /* Interrupt is pending, can't halt guest. */ - vmm_stat_incr(svm_sc->vm, vcpu, - VMEXIT_HLT_IGNORED, 1); - VCPU_CTR0(svm_sc->vm, vcpu, - "VMEXIT halt ignored."); - } else { - VCPU_CTR0(svm_sc->vm, vcpu, - "VMEXIT halted CPU."); - vmexit->exitcode = VM_EXITCODE_HLT; - vmexit->u.hlt.rflags = state->rflags; - loop = false; - - } + vmexit->exitcode = VM_EXITCODE_HLT; + vmexit->u.hlt.rflags = state->rflags; + loop = false; break; case VMCB_EXIT_PAUSE: - VCPU_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT pause"); vmexit->exitcode = VM_EXITCODE_PAUSE; vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_PAUSE, 1); @@ -912,16 +1208,14 @@ vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INST_EMUL, 1); } - + break; case VMCB_EXIT_SHUTDOWN: - VCPU_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT shutdown."); loop = false; break; case VMCB_EXIT_INVALID: - VCPU_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT INVALID."); loop = false; break; @@ -939,9 +1233,14 @@ break; } + VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %#lx nrip %#lx", + loop ? "handled" : "unhandled", exit_reason_to_str(code), + state->rip, update_rip ? ctrl->nrip : state->rip); + vmexit->rip = state->rip; if (update_rip) { if (ctrl->nrip == 0) { + /* XXX fixme */ VCPU_CTR1(svm_sc->vm, vcpu, "SVM_ERR:nRIP is not set " "for RIP0x%lx.\n", state->rip); vmexit->exitcode = VM_EXITCODE_VMX; @@ -954,40 +1253,9 @@ state->rip = vmexit->rip; } - if (state->rip == 0) { - VCPU_CTR0(svm_sc->vm, vcpu, "SVM_ERR:RIP is NULL\n"); - vmexit->exitcode = VM_EXITCODE_VMX; - } - return (loop); } -/* - * Inject NMI to virtual cpu. - */ -static int -svm_inject_nmi(struct svm_softc *svm_sc, int vcpu) -{ - struct vmcb_ctrl *ctrl; - - KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu)); - - ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); - /* Can't inject another NMI if last one is pending.*/ - if (!vm_nmi_pending(svm_sc->vm, vcpu)) - return (0); - - /* Inject NMI, vector number is not used.*/ - vmcb_eventinject(ctrl, VMCB_EVENTINJ_TYPE_NMI, IDT_NMI, 0, false); - - /* Acknowledge the request is accepted.*/ - vm_nmi_clear(svm_sc->vm, vcpu); - - VCPU_CTR0(svm_sc->vm, vcpu, "SVM:Injected NMI.\n"); - - return (1); -} - static void svm_inj_intinfo(struct svm_softc *svm_sc, int vcpu) { @@ -1002,7 +1270,7 @@ KASSERT(VMCB_EXITINTINFO_VALID(intinfo), ("%s: entry intinfo is not " "valid: %#lx", __func__, intinfo)); - vmcb_eventinject(ctrl, VMCB_EXITINTINFO_TYPE(intinfo), + svm_eventinject(svm_sc, vcpu, VMCB_EXITINTINFO_TYPE(intinfo), VMCB_EXITINTINFO_VECTOR(intinfo), VMCB_EXITINTINFO_EC(intinfo), VMCB_EXITINTINFO_EC_VALID(intinfo)); @@ -1014,76 +1282,143 @@ * Inject event to virtual cpu. */ static void -svm_inj_interrupts(struct svm_softc *svm_sc, int vcpu, struct vlapic *vlapic) +svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic) { struct vmcb_ctrl *ctrl; struct vmcb_state *state; int extint_pending; - int vector; - - KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu)); + int vector, need_intr_window; - state = svm_get_vmcb_state(svm_sc, vcpu); - ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); + KASSERT(vcpu < sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu)); - svm_inj_intinfo(svm_sc, vcpu); + state = svm_get_vmcb_state(sc, vcpu); + ctrl = svm_get_vmcb_ctrl(sc, vcpu); - /* Can't inject multiple events at once. */ - if (ctrl->eventinj & VMCB_EVENTINJ_VALID) { - VCPU_CTR1(svm_sc->vm, vcpu, - "SVM:Last event(0x%lx) is pending.\n", ctrl->eventinj); - return ; - } + need_intr_window = 0; - /* Wait for guest to come out of interrupt shadow. */ - if (ctrl->intr_shadow) { - VCPU_CTR0(svm_sc->vm, vcpu, "SVM:Guest in interrupt shadow.\n"); - return; - } + svm_inj_intinfo(sc, vcpu); - /* NMI event has priority over interrupts.*/ - if (svm_inject_nmi(svm_sc, vcpu)) { - return; + /* NMI event has priority over interrupts. */ + if (vm_nmi_pending(sc->vm, vcpu)) { + if (nmi_blocked(sc, vcpu)) { + /* + * Can't inject another NMI if the guest has not + * yet executed an "iret" after the last NMI. + */ + VCPU_CTR0(sc->vm, vcpu, "Cannot inject NMI due " + "to NMI-blocking"); + } else if (ctrl->eventinj & VMCB_EVENTINJ_VALID) { + /* + * If there is already an exception/interrupt pending + * then defer the NMI until after that. + */ + VCPU_CTR1(sc->vm, vcpu, "Cannot inject NMI due to " + "eventinj %#lx", ctrl->eventinj); + + /* + * Use self-IPI to trigger a VM-exit as soon as + * possible after the event injection is completed. + * + * This works only if the external interrupt exiting + * is at a lower priority than the event injection. + * + * Although not explicitly stated in the APMv2 the + * relative priorities were verified empirically. + */ + ipi_cpu(curcpu, IPI_AST); + } else { + vm_nmi_clear(sc->vm, vcpu); + + /* Inject NMI, vector number is not used */ + svm_eventinject(sc, vcpu, VMCB_EVENTINJ_TYPE_NMI, + IDT_NMI, 0, false); + + /* virtual NMI blocking is now in effect */ + enable_nmi_blocking(sc, vcpu); + + VCPU_CTR0(sc->vm, vcpu, "Injecting vNMI"); + } } - extint_pending = vm_extint_pending(svm_sc->vm, vcpu); + extint_pending = vm_extint_pending(sc->vm, vcpu); if (!extint_pending) { /* Ask the local apic for a vector to inject */ - if (!vlapic_pending_intr(vlapic, &vector)) - return; + if (!vlapic_pending_intr(vlapic, &vector)) { + goto done; /* nothing to inject */ + } + KASSERT(vector >= 16 && vector <= 255, + ("invalid vector %d from local APIC", vector)); } else { /* Ask the legacy pic for a vector to inject */ - vatpic_pending_intr(svm_sc->vm, &vector); + vatpic_pending_intr(sc->vm, &vector); + KASSERT(vector >= 0 && vector <= 255, + ("invalid vector %d from local APIC", vector)); } - if (vector < 32 || vector > 255) { - VCPU_CTR1(svm_sc->vm, vcpu, "SVM_ERR:Event injection" - "invalid vector=%d.\n", vector); - ERR("SVM_ERR:Event injection invalid vector=%d.\n", vector); - return; + /* + * If the guest has disabled interrupts or is in an interrupt shadow + * then we cannot inject the pending interrupt. + */ + if ((state->rflags & PSL_I) == 0) { + VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to " + "rflags %#lx", vector, state->rflags); + need_intr_window = 1; + goto done; } - if ((state->rflags & PSL_I) == 0) { - VCPU_CTR0(svm_sc->vm, vcpu, "SVM:Interrupt is disabled\n"); - return; + if (ctrl->intr_shadow) { + VCPU_CTR1(sc->vm, vcpu, "Cannot inject vector %d due to " + "interrupt shadow", vector); + need_intr_window = 1; + goto done; } - vmcb_eventinject(ctrl, VMCB_EVENTINJ_TYPE_INTR, vector, 0, false); + if (ctrl->eventinj & VMCB_EVENTINJ_VALID) { + VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to " + "eventinj %#lx", vector, ctrl->eventinj); + need_intr_window = 1; + goto done; + } + svm_eventinject(sc, vcpu, VMCB_EVENTINJ_TYPE_INTR, vector, 0, false); + if (!extint_pending) { /* Update the Local APIC ISR */ vlapic_intr_accepted(vlapic, vector); } else { - vm_extint_clear(svm_sc->vm, vcpu); - vatpic_intr_accepted(svm_sc->vm, vector); - - /* - * XXX need to recheck exting_pending ala VT-x + vm_extint_clear(sc->vm, vcpu); + vatpic_intr_accepted(sc->vm, vector); + /* + * Force a VM-exit as soon as the vcpu is ready to accept + * another interrupt. This is done because the PIC might + * have another vector that it wants to inject. Also, if + * the vlapic has a pending interrupt that is preempted + * by the ExtInt, then it allows us to inject the APIC + * vector as soon as possible. */ + need_intr_window = 1; } - - VCPU_CTR1(svm_sc->vm, vcpu, "SVM:event injected,vector=%d.\n", vector); +done: + if (need_intr_window) { + /* + * We use V_IRQ in conjunction with the VINTR intercept to + * trap into the hypervisor as soon as a virtual interrupt + * can be delivered. + * + * Since injected events are not subject to intercept checks + * we need to ensure that the V_IRQ is not actually going to + * be delivered on VM entry. The KASSERT below enforces this. + */ + KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) != 0 || + (state->rflags & PSL_I) == 0 || ctrl->intr_shadow, + ("Bogus intr_window_exiting: eventinj (%#lx), " + "intr_shadow (%u), rflags (%#lx)", + ctrl->eventinj, ctrl->intr_shadow, state->rflags)); + enable_intr_window_exiting(sc, vcpu); + } else { + disable_intr_window_exiting(sc, vcpu); + } } static __inline void @@ -1190,7 +1525,7 @@ vcpustate->asid.num = asid[thiscpu].num; ctrl->asid = vcpustate->asid.num; - vcpu_set_dirty(vcpustate, VMCB_CACHE_ASID); + vcpu_set_dirty(sc, vcpuid, VMCB_CACHE_ASID); /* * If this cpu supports "flush-by-asid" then the TLB * was not flushed after the generation bump. The TLB @@ -1257,7 +1592,7 @@ /* * Invalidate the VMCB state cache by marking all fields dirty. */ - vcpu_set_dirty(vcpustate, 0xffffffff); + vcpu_set_dirty(svm_sc, vcpu, 0xffffffff); /* * XXX @@ -1273,10 +1608,6 @@ vmm_stat_incr(vm, vcpu, VCPU_MIGRATIONS, 1); } - VCPU_CTR3(vm, vcpu, "SVM:Enter vmrun RIP:0x%lx" - " inst len=%d/%d\n", - rip, vmexit->inst_length, - vmexit->u.inst_emul.vie.num_valid); /* Update Guest RIP */ state->rip = rip; @@ -1300,12 +1631,7 @@ if (vcpu_rendezvous_pending(rend_cookie)) { enable_gintr(); - vmexit->exitcode = VM_EXITCODE_RENDEZVOUS; - vmm_stat_incr(vm, vcpu, VMEXIT_RENDEZVOUS, 1); - VCPU_CTR1(vm, vcpu, - "SVM: VCPU rendezvous, RIP:0x%lx\n", - state->rip); - vmexit->rip = state->rip; + vm_exit_rendezvous(vm, vcpu, state->rip); break; } @@ -1312,11 +1638,7 @@ /* We are asked to give the cpu by scheduler. */ if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) { enable_gintr(); - vmexit->exitcode = VM_EXITCODE_BOGUS; - vmm_stat_incr(vm, vcpu, VMEXIT_ASTPENDING, 1); - VCPU_CTR1(vm, vcpu, - "SVM: ASTPENDING, RIP:0x%lx\n", state->rip); - vmexit->rip = state->rip; + vm_exit_astpending(vm, vcpu, state->rip); break; } @@ -1333,8 +1655,10 @@ ctrl->vmcb_clean = VMCB_CACHE_DEFAULT & ~vcpustate->dirty; vcpustate->dirty = 0; + VCPU_CTR1(vm, vcpu, "vmcb clean %#x", ctrl->vmcb_clean); /* Launch Virtual Machine. */ + VCPU_CTR1(vm, vcpu, "Resume execution at %#lx", state->rip); svm_launch(vmcb_pa, gctx, hctx); CPU_CLR_ATOMIC(thiscpu, &pmap->pm_active); @@ -1365,8 +1689,6 @@ /* Handle #VMEXIT and if required return to user space. */ loop = svm_vmexit(svm_sc, vcpu, vmexit); - vcpustate->loop++; - vmm_stat_incr(vm, vcpu, VMEXIT_COUNT, 1); } while (loop); return (0); @@ -1580,99 +1902,58 @@ static int svm_setcap(void *arg, int vcpu, int type, int val) { - struct svm_softc *svm_sc; - struct vmcb_ctrl *ctrl; - int ret = ENOENT; + struct svm_softc *sc; + int error; - svm_sc = arg; - KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu)); - - ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); - + sc = arg; + error = 0; switch (type) { - case VM_CAP_HALT_EXIT: - if (val) - ctrl->ctrl1 |= VMCB_INTCPT_HLT; - else - ctrl->ctrl1 &= ~VMCB_INTCPT_HLT; - ret = 0; - VCPU_CTR1(svm_sc->vm, vcpu, "SVM:Set_gap:Halt exit %s.\n", - val ? "enabled": "disabled"); - break; - - case VM_CAP_PAUSE_EXIT: - if (val) - ctrl->ctrl1 |= VMCB_INTCPT_PAUSE; - else - ctrl->ctrl1 &= ~VMCB_INTCPT_PAUSE; - ret = 0; - VCPU_CTR1(svm_sc->vm, vcpu, "SVM:Set_gap:Pause exit %s.\n", - val ? "enabled": "disabled"); - break; - - case VM_CAP_MTRAP_EXIT: - if (val) - ctrl->exception |= BIT(IDT_MC); - else - ctrl->exception &= ~BIT(IDT_MC); - ret = 0; - VCPU_CTR1(svm_sc->vm, vcpu, "SVM:Set_gap:MC exit %s.\n", - val ? "enabled": "disabled"); - break; - - case VM_CAP_UNRESTRICTED_GUEST: - /* SVM doesn't need special capability for SMP.*/ - VCPU_CTR0(svm_sc->vm, vcpu, "SVM:Set_gap:Unrestricted " - "always enabled.\n"); - ret = 0; - break; - - default: - break; - } - - return (ret); + case VM_CAP_HALT_EXIT: + svm_set_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, + VMCB_INTCPT_HLT, val); + break; + case VM_CAP_PAUSE_EXIT: + svm_set_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, + VMCB_INTCPT_PAUSE, val); + break; + case VM_CAP_UNRESTRICTED_GUEST: + /* Unrestricted guest execution cannot be disabled in SVM */ + if (val == 0) + error = EINVAL; + break; + default: + error = ENOENT; + break; + } + return (error); } static int svm_getcap(void *arg, int vcpu, int type, int *retval) { - struct svm_softc *svm_sc; - struct vmcb_ctrl *ctrl; + struct svm_softc *sc; + int error; - svm_sc = arg; - KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu)); + sc = arg; + error = 0; - ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); - switch (type) { - case VM_CAP_HALT_EXIT: - *retval = (ctrl->ctrl1 & VMCB_INTCPT_HLT) ? 1 : 0; - VCPU_CTR1(svm_sc->vm, vcpu, "SVM:get_cap:Halt exit %s.\n", - *retval ? "enabled": "disabled"); + case VM_CAP_HALT_EXIT: + *retval = svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, + VMCB_INTCPT_HLT); break; - - case VM_CAP_PAUSE_EXIT: - *retval = (ctrl->ctrl1 & VMCB_INTCPT_PAUSE) ? 1 : 0; - VCPU_CTR1(svm_sc->vm, vcpu, "SVM:get_cap:Pause exit %s.\n", - *retval ? "enabled": "disabled"); + case VM_CAP_PAUSE_EXIT: + *retval = svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, + VMCB_INTCPT_PAUSE); break; - - case VM_CAP_MTRAP_EXIT: - *retval = (ctrl->exception & BIT(IDT_MC)) ? 1 : 0; - VCPU_CTR1(svm_sc->vm, vcpu, "SVM:get_cap:MC exit %s.\n", - *retval ? "enabled": "disabled"); - break; - case VM_CAP_UNRESTRICTED_GUEST: - VCPU_CTR0(svm_sc->vm, vcpu, "SVM:get_cap:Unrestricted.\n"); - *retval = 1; + *retval = 1; /* unrestricted guest is always enabled */ break; - default: + default: + error = ENOENT; break; } - - return (0); + return (error); } static struct vlapic * Index: sys/amd64/vmm/amd/svm_softc.h =================================================================== --- sys/amd64/vmm/amd/svm_softc.h (revision 271203) +++ sys/amd64/vmm/amd/svm_softc.h (working copy) @@ -44,7 +44,6 @@ struct vmcb vmcb; /* hardware saved vcpu context */ struct svm_regctx swctx; /* software saved vcpu context */ uint64_t vmcb_pa; /* VMCB physical address */ - uint64_t loop; /* loop count for vcpu */ int lastcpu; /* host cpu that the vcpu last ran on */ uint32_t dirty; /* state cache bits that must be cleared */ long eptgen; /* pmap->pm_eptgen when the vcpu last ran */ Index: sys/amd64/vmm/amd/vmcb.c =================================================================== --- sys/amd64/vmm/amd/vmcb.c (revision 271203) +++ sys/amd64/vmm/amd/vmcb.c (working copy) @@ -49,87 +49,6 @@ */ /* - * Initialize SVM h/w context i.e. the VMCB control and saved state areas. - */ -void -svm_init_vmcb(struct vmcb *vmcb, uint64_t iopm_base_pa, uint64_t msrpm_base_pa, - uint64_t np_pml4) -{ - struct vmcb_ctrl *ctrl; - struct vmcb_state *state; - uint16_t cr_shadow; - - ctrl = &vmcb->ctrl; - state = &vmcb->state; - - ctrl->iopm_base_pa = iopm_base_pa; - ctrl->msrpm_base_pa = msrpm_base_pa; - - /* Enable nested paging */ - ctrl->np_enable = 1; - ctrl->n_cr3 = np_pml4; - - /* - * Intercept accesses to the control registers that are not shadowed - * in the VMCB - i.e. all except cr0, cr2, cr3, cr4 and cr8. - */ - cr_shadow = BIT(0) | BIT(2) | BIT(3) | BIT(4) | BIT(8); - ctrl->cr_write = ctrl->cr_read = ~cr_shadow; - - /* Intercept Machine Check exceptions. */ - ctrl->exception = BIT(IDT_MC); - - /* Intercept various events (for e.g. I/O, MSR and CPUID accesses) */ - ctrl->ctrl1 = VMCB_INTCPT_IO | - VMCB_INTCPT_MSR | - VMCB_INTCPT_HLT | - VMCB_INTCPT_CPUID | - VMCB_INTCPT_INTR | - VMCB_INTCPT_VINTR | - VMCB_INTCPT_INIT | - VMCB_INTCPT_NMI | - VMCB_INTCPT_SMI | - VMCB_INTCPT_FERR_FREEZE | - VMCB_INTCPT_SHUTDOWN; - - /* - * From section "Canonicalization and Consistency Checks" in APMv2 - * the VMRUN intercept bit must be set to pass the consistency check. - */ - ctrl->ctrl2 = VMCB_INTCPT_VMRUN; - - /* - * The ASID will be set to a non-zero value just before VMRUN. - */ - ctrl->asid = 0; - - /* - * Section 15.21.1, Interrupt Masking in EFLAGS - * Section 15.21.2, Virtualizing APIC.TPR - * - * This must be set for %rflag and %cr8 isolation of guest and host. - */ - ctrl->v_intr_masking = 1; - - /* Enable Last Branch Record aka LBR for debugging */ - ctrl->lbr_virt_en = 1; - state->dbgctl = BIT(0); - - /* EFER_SVM must always be set when the guest is executing */ - state->efer = EFER_SVM; - - /* Set up the PAT to power-on state */ - state->g_pat = PAT_VALUE(0, PAT_WRITE_BACK) | - PAT_VALUE(1, PAT_WRITE_THROUGH) | - PAT_VALUE(2, PAT_UNCACHED) | - PAT_VALUE(3, PAT_UNCACHEABLE) | - PAT_VALUE(4, PAT_WRITE_BACK) | - PAT_VALUE(5, PAT_WRITE_THROUGH) | - PAT_VALUE(6, PAT_UNCACHED) | - PAT_VALUE(7, PAT_UNCACHEABLE); -} - -/* * Read from segment selector, control and general purpose register of VMCB. */ int @@ -352,36 +271,3 @@ return (seg); } - -/* - * Inject an event to vcpu as described in section 15.20, "Event injection". - */ -void -vmcb_eventinject(struct vmcb_ctrl *ctrl, int intr_type, int vector, - uint32_t error, bool ec_valid) -{ - KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0, - ("%s: event already pending %#lx", __func__, ctrl->eventinj)); - - KASSERT(vector >=0 && vector <= 255, ("%s: invalid vector %d", - __func__, vector)); - - switch (intr_type) { - case VMCB_EVENTINJ_TYPE_INTR: - case VMCB_EVENTINJ_TYPE_NMI: - case VMCB_EVENTINJ_TYPE_INTn: - break; - case VMCB_EVENTINJ_TYPE_EXCEPTION: - if (vector >= 0 && vector <= 31 && vector != 2) - break; - /* FALLTHROUGH */ - default: - panic("%s: invalid intr_type/vector: %d/%d", __func__, - intr_type, vector); - } - ctrl->eventinj = vector | (intr_type << 8) | VMCB_EVENTINJ_VALID; - if (ec_valid) { - ctrl->eventinj |= VMCB_EVENTINJ_EC_VALID; - ctrl->eventinj |= (uint64_t)error << 32; - } -} Index: sys/amd64/vmm/amd/vmcb.h =================================================================== --- sys/amd64/vmm/amd/vmcb.h (revision 271203) +++ sys/amd64/vmm/amd/vmcb.h (working copy) @@ -34,7 +34,14 @@ * Layout of VMCB: AMD64 Programmer's Manual Vol2, Appendix B */ -/* VMCB Control offset 0xC */ +/* vmcb_ctrl->intercept[] array indices */ +#define VMCB_CR_INTCPT 0 +#define VMCB_DR_INTCPT 1 +#define VMCB_EXC_INTCPT 2 +#define VMCB_CTRL1_INTCPT 3 +#define VMCB_CTRL2_INTCPT 4 + +/* intercept[VMCB_CTRL1_INTCPT] fields */ #define VMCB_INTCPT_INTR BIT(0) #define VMCB_INTCPT_NMI BIT(1) #define VMCB_INTCPT_SMI BIT(2) @@ -68,7 +75,7 @@ #define VMCB_INTCPT_FERR_FREEZE BIT(30) #define VMCB_INTCPT_SHUTDOWN BIT(31) -/* VMCB Control offset 0x10 */ +/* intercept[VMCB_CTRL2_INTCPT] fields */ #define VMCB_INTCPT_VMRUN BIT(0) #define VMCB_INTCPT_VMMCALL BIT(1) #define VMCB_INTCPT_VMLOAD BIT(2) @@ -91,18 +98,18 @@ #define VMCB_TLB_FLUSH_GUEST_NONGLOBAL 7 /* Flush guest non-PG entries */ /* VMCB state caching */ -#define VMCB_CACHE_NONE 0 /* No caching */ -#define VMCB_CACHE_I BIT(0) /* Cache vectors, TSC offset */ -#define VMCB_CACHE_IOPM BIT(1) /* I/O and MSR permission */ -#define VMCB_CACHE_ASID BIT(2) /* ASID */ -#define VMCB_CACHE_TPR BIT(3) /* V_TPR to V_INTR_VECTOR */ -#define VMCB_CACHE_NP BIT(4) /* Nested Paging */ -#define VMCB_CACHE_CR BIT(5) /* CR0, CR3, CR4 & EFER */ -#define VMCB_CACHE_DR BIT(6) /* Debug registers */ -#define VMCB_CACHE_DT BIT(7) /* GDT/IDT */ -#define VMCB_CACHE_SEG BIT(8) /* User segments, CPL */ -#define VMCB_CACHE_CR2 BIT(9) /* page fault address */ -#define VMCB_CACHE_LBR BIT(10) /* Last branch */ +#define VMCB_CACHE_NONE 0 /* No caching */ +#define VMCB_CACHE_I BIT(0) /* Intercept, TSC off, Pause filter */ +#define VMCB_CACHE_IOPM BIT(1) /* I/O and MSR permission */ +#define VMCB_CACHE_ASID BIT(2) /* ASID */ +#define VMCB_CACHE_TPR BIT(3) /* V_TPR to V_INTR_VECTOR */ +#define VMCB_CACHE_NP BIT(4) /* Nested Paging */ +#define VMCB_CACHE_CR BIT(5) /* CR0, CR3, CR4 & EFER */ +#define VMCB_CACHE_DR BIT(6) /* Debug registers */ +#define VMCB_CACHE_DT BIT(7) /* GDT/IDT */ +#define VMCB_CACHE_SEG BIT(8) /* User segments, CPL */ +#define VMCB_CACHE_CR2 BIT(9) /* page fault address */ +#define VMCB_CACHE_LBR BIT(10) /* Last branch */ /* VMCB control event injection */ #define VMCB_EVENTINJ_EC_VALID BIT(11) /* Error Code valid */ @@ -117,6 +124,7 @@ /* VMCB exit code, APM vol2 Appendix C */ #define VMCB_EXIT_MC 0x52 #define VMCB_EXIT_INTR 0x60 +#define VMCB_EXIT_VINTR 0x64 #define VMCB_EXIT_PUSHF 0x70 #define VMCB_EXIT_POPF 0x71 #define VMCB_EXIT_CPUID 0x72 @@ -174,13 +182,7 @@ /* VMCB control area - padded up to 1024 bytes */ struct vmcb_ctrl { - uint16_t cr_read; /* Offset 0, CR0-15 read/write */ - uint16_t cr_write; - uint16_t dr_read; /* Offset 4, DR0-DR15 */ - uint16_t dr_write; - uint32_t exception; /* Offset 8, bit mask for exceptions. */ - uint32_t ctrl1; /* Offset 0xC, intercept events1 */ - uint32_t ctrl2; /* Offset 0x10, intercept event2 */ + uint32_t intercept[5]; /* all intercepts */ uint8_t pad1[0x28]; /* Offsets 0x14-0x3B are reserved. */ uint16_t pause_filthresh; /* Offset 0x3C, PAUSE filter threshold */ uint16_t pause_filcnt; /* Offset 0x3E, PAUSE filter count */ @@ -276,12 +278,8 @@ CTASSERT(sizeof(struct vmcb) == PAGE_SIZE); CTASSERT(offsetof(struct vmcb, state) == 0x400); -void svm_init_vmcb(struct vmcb *vmcb, uint64_t iopm_base_pa, - uint64_t msrpm_base_pa, uint64_t np_pml4); int vmcb_read(struct vmcb *vmcb, int ident, uint64_t *retval); int vmcb_write(struct vmcb *vmcb, int ident, uint64_t val); struct vmcb_segment *vmcb_seg(struct vmcb *vmcb, int type); -void vmcb_eventinject(struct vmcb_ctrl *ctrl, int type, int vector, - uint32_t error, bool ec_valid); #endif /* _VMCB_H_ */ Index: sys/amd64/vmm/io/vlapic.c =================================================================== --- sys/amd64/vmm/io/vlapic.c (revision 271152) +++ sys/amd64/vmm/io/vlapic.c (working copy) @@ -1051,7 +1051,8 @@ vmexit->exitcode = VM_EXITCODE_SPINUP_AP; vmexit->u.spinup_ap.vcpu = dest; vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; - + VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "spinup ap %d " + "at rip %#lx", dest, vec << PAGE_SHIFT); return (0); } }