Index: sys/amd64/include/vmm.h =================================================================== --- sys/amd64/include/vmm.h (revision 283640) +++ sys/amd64/include/vmm.h (working copy) @@ -120,13 +120,18 @@ struct vm_guest_paging; struct pmap; +struct vm_eventinfo { + void *rptr; /* rendezvous cookie */ + int *sptr; /* suspend cookie */ + int *iptr; /* reqidle cookie */ +}; + typedef int (*vmm_init_func_t)(int ipinum); typedef int (*vmm_cleanup_func_t)(void); typedef void (*vmm_resume_func_t)(void); typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap); typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip, - struct pmap *pmap, void *rendezvous_cookie, - void *suspend_cookie); + struct pmap *pmap, struct vm_eventinfo *info); typedef void (*vmi_cleanup_func_t)(void *vmi); typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num, uint64_t *retval); @@ -208,6 +213,7 @@ void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip); +void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip); #ifdef _SYS__CPUSET_H_ /* @@ -232,19 +238,26 @@ #endif /* _SYS__CPUSET_H_ */ static __inline int -vcpu_rendezvous_pending(void *rendezvous_cookie) +vcpu_rendezvous_pending(struct vm_eventinfo *info) { - return (*(uintptr_t *)rendezvous_cookie != 0); + return (*((uintptr_t *)(info->rptr)) != 0); } static __inline int -vcpu_suspended(void *suspend_cookie) +vcpu_suspended(struct vm_eventinfo *info) { - return (*(int *)suspend_cookie); + return (*info->sptr); } +static __inline int +vcpu_reqidle(struct vm_eventinfo *info) +{ + + return (*info->iptr); +} + /* * Return 1 if device indicated by bus/slot/func is supposed to be a * pci passthrough device. @@ -506,6 +519,7 @@ VM_EXITCODE_MONITOR, VM_EXITCODE_MWAIT, VM_EXITCODE_SVM, + VM_EXITCODE_REQIDLE, VM_EXITCODE_MAX }; Index: sys/amd64/vmm/amd/svm.c =================================================================== --- sys/amd64/vmm/amd/svm.c (revision 283640) +++ sys/amd64/vmm/amd/svm.c (working copy) @@ -1900,7 +1900,7 @@ */ static int svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, - void *rend_cookie, void *suspended_cookie) + struct vm_eventinfo *evinfo) { struct svm_regctx *gctx; struct svm_softc *svm_sc; @@ -1975,18 +1975,24 @@ */ disable_gintr(); - if (vcpu_suspended(suspended_cookie)) { + if (vcpu_suspended(evinfo)) { enable_gintr(); vm_exit_suspended(vm, vcpu, state->rip); break; } - if (vcpu_rendezvous_pending(rend_cookie)) { + if (vcpu_rendezvous_pending(evinfo)) { enable_gintr(); vm_exit_rendezvous(vm, vcpu, state->rip); break; } + if (vcpu_reqidle(evinfo)) { + enable_gintr(); + vm_exit_reqidle(vm, vcpu, state->rip); + break; + } + /* We are asked to give the cpu by scheduler. */ if (vcpu_should_yield(vm, vcpu)) { enable_gintr(); Index: sys/amd64/vmm/intel/vmx.c =================================================================== --- sys/amd64/vmm/intel/vmx.c (revision 283640) +++ sys/amd64/vmm/intel/vmx.c (working copy) @@ -2554,7 +2554,7 @@ static int vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, - void *rendezvous_cookie, void *suspend_cookie) + struct vm_eventinfo *evinfo) { int rc, handled, launched; struct vmx *vmx; @@ -2623,18 +2623,24 @@ * vmx_inject_interrupts() can suspend the vcpu due to a * triple fault. */ - if (vcpu_suspended(suspend_cookie)) { + if (vcpu_suspended(evinfo)) { enable_intr(); vm_exit_suspended(vmx->vm, vcpu, rip); break; } - if (vcpu_rendezvous_pending(rendezvous_cookie)) { + if (vcpu_rendezvous_pending(evinfo)) { enable_intr(); vm_exit_rendezvous(vmx->vm, vcpu, rip); break; } + if (vcpu_reqidle(evinfo)) { + enable_intr(); + vm_exit_reqidle(vmx->vm, vcpu, rip); + break; + } + if (vcpu_should_yield(vm, vcpu)) { enable_intr(); vm_exit_astpending(vmx->vm, vcpu, rip); Index: sys/amd64/vmm/vmm.c =================================================================== --- sys/amd64/vmm/vmm.c (revision 283640) +++ sys/amd64/vmm/vmm.c (working copy) @@ -95,6 +95,7 @@ struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ enum vcpu_state state; /* (o) vcpu state */ int hostcpu; /* (o) vcpu's host cpu */ + int reqidle; /* (i) request vcpu to idle */ struct vlapic *vlapic; /* (i) APIC device model */ enum x2apic_state x2apic_state; /* (i) APIC mode */ uint64_t exitintinfo; /* (i) events pending at VM exit */ @@ -164,8 +165,8 @@ #define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) #define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) -#define VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \ - (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO) +#define VMRUN(vmi, vcpu, rip, pmap, evinfo) \ + (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo) : ENXIO) #define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) #define VMSPACE_ALLOC(min, max) \ (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) @@ -221,6 +222,28 @@ SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0, "Force use of I/O MMU even if no passthrough devices were found."); +static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); + +#ifdef KTR +static const char * +vcpu_state2str(enum vcpu_state state) +{ + + switch (state) { + case VCPU_IDLE: + return ("idle"); + case VCPU_FROZEN: + return ("frozen"); + case VCPU_RUNNING: + return ("running"); + case VCPU_SLEEPING: + return ("sleeping"); + default: + return ("unknown"); + } +} +#endif + static void vcpu_cleanup(struct vm *vm, int i, bool destroy) { @@ -255,6 +278,7 @@ vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); + vcpu->reqidle = 0; vcpu->exitintinfo = 0; vcpu->nmi_pending = 0; vcpu->extint_pending = 0; @@ -980,11 +1004,13 @@ static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); static int -vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, +vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate, bool from_idle) { + struct vcpu *vcpu; int error; + vcpu = &vm->vcpu[vcpuid]; vcpu_assert_locked(vcpu); /* @@ -993,8 +1019,13 @@ * ioctl() operating on a vcpu at any point. */ if (from_idle) { - while (vcpu->state != VCPU_IDLE) + while (vcpu->state != VCPU_IDLE) { + vcpu->reqidle = 1; + vcpu_notify_event_locked(vcpu, false); + VCPU_CTR1(vm, vcpuid, "vcpu state change from %s to " + "idle requested", vcpu_state2str(vcpu->state)); msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); + } } else { KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " "vcpu idle state")); @@ -1031,6 +1062,9 @@ if (error) return (EBUSY); + VCPU_CTR2(vm, vcpuid, "vcpu state changed from %s to %s", + vcpu_state2str(vcpu->state), vcpu_state2str(newstate)); + vcpu->state = newstate; if (newstate == VCPU_RUNNING) vcpu->hostcpu = curcpu; @@ -1053,11 +1087,11 @@ } static void -vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) +vcpu_require_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate) { int error; - if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) + if ((error = vcpu_set_state_locked(vm, vcpuid, newstate, false)) != 0) panic("Error %d setting state to %d", error, newstate); } @@ -1145,7 +1179,7 @@ * vcpu returned from VMRUN() and before it acquired the * vcpu lock above. */ - if (vm->rendezvous_func != NULL || vm->suspend) + if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle) break; if (vm_nmi_pending(vm, vcpuid)) break; @@ -1182,13 +1216,13 @@ } t = ticks; - vcpu_require_state_locked(vcpu, VCPU_SLEEPING); + vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); /* * XXX msleep_spin() cannot be interrupted by signals so * wake up periodically to check pending signals. */ msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); - vcpu_require_state_locked(vcpu, VCPU_FROZEN); + vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); } @@ -1350,9 +1384,9 @@ if (vm->rendezvous_func == NULL) { VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); - vcpu_require_state_locked(vcpu, VCPU_SLEEPING); + vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); - vcpu_require_state_locked(vcpu, VCPU_FROZEN); + vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); } else { VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); vcpu_unlock(vcpu); @@ -1375,6 +1409,19 @@ return (0); } +static int +vm_handle_reqidle(struct vm *vm, int vcpuid, bool *retu) +{ + struct vcpu *vcpu = &vm->vcpu[vcpuid]; + + vcpu_lock(vcpu); + KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle)); + vcpu->reqidle = 0; + vcpu_unlock(vcpu); + *retu = true; + return (0); +} + int vm_suspend(struct vm *vm, enum vm_suspend_how how) { @@ -1432,6 +1479,18 @@ } void +vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip) +{ + struct vm_exit *vmexit; + + vmexit = vm_exitinfo(vm, vcpuid); + vmexit->rip = rip; + vmexit->inst_length = 0; + vmexit->exitcode = VM_EXITCODE_REQIDLE; + vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1); +} + +void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip) { struct vm_exit *vmexit; @@ -1446,6 +1505,7 @@ int vm_run(struct vm *vm, struct vm_run *vmrun) { + struct vm_eventinfo evinfo; int error, vcpuid; struct vcpu *vcpu; struct pcb *pcb; @@ -1453,7 +1513,6 @@ struct vm_exit *vme; bool retu, intr_disabled; pmap_t pmap; - void *rptr, *sptr; vcpuid = vmrun->cpuid; @@ -1466,11 +1525,12 @@ if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) return (EINVAL); - rptr = &vm->rendezvous_func; - sptr = &vm->suspend; pmap = vmspace_pmap(vm->vmspace); vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; + evinfo.rptr = &vm->rendezvous_func; + evinfo.sptr = &vm->suspend; + evinfo.iptr = &vcpu->reqidle; restart: critical_enter(); @@ -1485,7 +1545,7 @@ restore_guest_fpustate(vcpu); vcpu_require_state(vm, vcpuid, VCPU_RUNNING); - error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, rptr, sptr); + error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo); vcpu_require_state(vm, vcpuid, VCPU_FROZEN); save_guest_fpustate(vcpu); @@ -1498,6 +1558,9 @@ retu = false; vcpu->nextrip = vme->rip + vme->inst_length; switch (vme->exitcode) { + case VM_EXITCODE_REQIDLE: + error = vm_handle_reqidle(vm, vcpuid, &retu); + break; case VM_EXITCODE_SUSPENDED: error = vm_handle_suspend(vm, vcpuid, &retu); break; @@ -1536,6 +1599,8 @@ if (error == 0 && retu == false) goto restart; + VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode); + /* copy the exit information */ bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); return (error); @@ -2072,7 +2137,7 @@ vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); - error = vcpu_set_state_locked(vcpu, newstate, from_idle); + error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle); vcpu_unlock(vcpu); return (error); @@ -2168,15 +2233,11 @@ * - If the vcpu is running on a different host_cpu then an IPI will be directed * to the host_cpu to cause the vcpu to trap into the hypervisor. */ -void -vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) +static void +vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) { int hostcpu; - struct vcpu *vcpu; - vcpu = &vm->vcpu[vcpuid]; - - vcpu_lock(vcpu); hostcpu = vcpu->hostcpu; if (vcpu->state == VCPU_RUNNING) { KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); @@ -2201,6 +2262,15 @@ if (vcpu->state == VCPU_SLEEPING) wakeup_one(vcpu); } +} + +void +vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) +{ + struct vcpu *vcpu = &vm->vcpu[vcpuid]; + + vcpu_lock(vcpu); + vcpu_notify_event_locked(vcpu, lapic_intr); vcpu_unlock(vcpu); } Index: sys/amd64/vmm/vmm_stat.c =================================================================== --- sys/amd64/vmm/vmm_stat.c (revision 283640) +++ sys/amd64/vmm/vmm_stat.c (working copy) @@ -164,6 +164,7 @@ VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation"); VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason"); VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit"); +VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit"); VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace"); VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit"); VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions"); Index: sys/amd64/vmm/vmm_stat.h =================================================================== --- sys/amd64/vmm/vmm_stat.h (revision 283640) +++ sys/amd64/vmm/vmm_stat.h (working copy) @@ -157,4 +157,5 @@ VMM_STAT_DECLARE(VMEXIT_USERSPACE); VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS); VMM_STAT_DECLARE(VMEXIT_EXCEPTION); +VMM_STAT_DECLARE(VMEXIT_REQIDLE); #endif Index: usr.sbin/bhyve/bhyverun.c =================================================================== --- usr.sbin/bhyve/bhyverun.c (revision 283640) +++ usr.sbin/bhyve/bhyverun.c (working copy) @@ -100,7 +100,7 @@ struct bhyvestats { uint64_t vmexit_bogus; - uint64_t vmexit_bogus_switch; + uint64_t vmexit_reqidle; uint64_t vmexit_hlt; uint64_t vmexit_pause; uint64_t vmexit_mtrap; @@ -461,6 +461,17 @@ } static int +vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + + assert(vmexit->inst_length == 0); + + stats.vmexit_reqidle++; + + return (VMEXIT_CONTINUE); +} + +static int vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { @@ -571,6 +582,7 @@ [VM_EXITCODE_VMX] = vmexit_vmx, [VM_EXITCODE_SVM] = vmexit_svm, [VM_EXITCODE_BOGUS] = vmexit_bogus, + [VM_EXITCODE_REQIDLE] = vmexit_reqidle, [VM_EXITCODE_RDMSR] = vmexit_rdmsr, [VM_EXITCODE_WRMSR] = vmexit_wrmsr, [VM_EXITCODE_MTRAP] = vmexit_mtrap,