Index: sys/amd64/include/vmm.h =================================================================== --- sys/amd64/include/vmm.h (revision 259401) +++ sys/amd64/include/vmm.h (working copy) @@ -146,7 +146,8 @@ enum vcpu_state { VCPU_SLEEPING, }; -int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state); +int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state oldstate, + enum vcpu_state newstate, bool sleepok); enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu); static int __inline Index: sys/amd64/vmm/vmm.c =================================================================== --- sys/amd64/vmm/vmm.c (revision 259401) +++ sys/amd64/vmm/vmm.c (working copy) @@ -804,19 +804,38 @@ save_guest_fpustate(struct vcpu *vcpu) static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); static int -vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) +vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state oldstate, + enum vcpu_state newstate, bool sleepok) { int error; +restart: vcpu_assert_locked(vcpu); + if (vcpu->state != oldstate) { + if (sleepok) { + KASSERT(oldstate == VCPU_IDLE, ("vcpu sleeping when " + "changing state from %d to %d", oldstate, + newstate)); + /* + * XXX msleep_spin is not interruptible so use the + * timeout to put an upper bound on the sleep. + */ + msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); + goto restart; + } else { + return (EBUSY); + } + } + KASSERT(vcpu->state == oldstate, ("vcpu state %d and oldstate %d " + "mismatch", vcpu->state, oldstate)); /* * The following state transitions are allowed: * IDLE -> FROZEN -> IDLE * FROZEN -> RUNNING -> FROZEN * FROZEN -> SLEEPING -> FROZEN */ - switch (vcpu->state) { + switch (oldstate) { case VCPU_IDLE: case VCPU_RUNNING: case VCPU_SLEEPING: @@ -830,30 +849,40 @@ static int break; } - if (error == 0) - vcpu->state = newstate; - else - error = EBUSY; + if (error) + return (EBUSY); - return (error); + vcpu->state = newstate; + if (newstate == VCPU_IDLE) + wakeup(&vcpu->state); + + return (0); } static void -vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) +vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state oldstate, + enum vcpu_state newstate) { int error; - if ((error = vcpu_set_state(vm, vcpuid, newstate)) != 0) - panic("Error %d setting state to %d\n", error, newstate); + error = vcpu_set_state(vm, vcpuid, oldstate, newstate, false); + if (error != 0) { + panic("Error %d changing state from %d to %d", error, oldstate, + newstate); + } } static void -vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) +vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state oldstate, + enum vcpu_state newstate) { int error; - if ((error = vcpu_set_state_locked(vcpu, newstate)) != 0) - panic("Error %d setting state to %d", error, newstate); + error = vcpu_set_state_locked(vcpu, oldstate, newstate, false); + if (error != 0) { + panic("Error %d changing state from %d to %d", error, oldstate, + newstate); + } } /* @@ -880,7 +909,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr if (!vm_nmi_pending(vm, vcpuid) && (intr_disabled || vlapic_pending_intr(vcpu->vlapic) < 0)) { t = ticks; - vcpu_require_state_locked(vcpu, VCPU_SLEEPING); + vcpu_require_state_locked(vcpu, VCPU_FROZEN, VCPU_SLEEPING); if (vlapic_enabled(vcpu->vlapic)) { /* * XXX msleep_spin() is not interruptible so use the @@ -898,7 +927,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU; VCPU_CTR0(vm, vcpuid, "spinning down cpu"); } - vcpu_require_state_locked(vcpu, VCPU_FROZEN); + vcpu_require_state_locked(vcpu, VCPU_SLEEPING, VCPU_FROZEN); vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); } vcpu_unlock(vcpu); @@ -1030,11 +1059,11 @@ restart: restore_guest_msrs(vm, vcpuid); restore_guest_fpustate(vcpu); - vcpu_require_state(vm, vcpuid, VCPU_RUNNING); + vcpu_require_state(vm, vcpuid, VCPU_FROZEN, VCPU_RUNNING); vcpu->hostcpu = curcpu; error = VMRUN(vm->cookie, vcpuid, rip, pmap); vcpu->hostcpu = NOCPU; - vcpu_require_state(vm, vcpuid, VCPU_FROZEN); + vcpu_require_state(vm, vcpuid, VCPU_RUNNING, VCPU_FROZEN); save_guest_fpustate(vcpu); restore_host_msrs(vm, vcpuid); @@ -1235,7 +1264,8 @@ vm_iommu_domain(struct vm *vm) } int -vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) +vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state oldstate, + enum vcpu_state newstate, bool sleepok) { int error; struct vcpu *vcpu; @@ -1246,7 +1276,7 @@ int vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); - error = vcpu_set_state_locked(vcpu, newstate); + error = vcpu_set_state_locked(vcpu, oldstate, newstate, sleepok); vcpu_unlock(vcpu); return (error); Index: sys/amd64/vmm/vmm_dev.c =================================================================== --- sys/amd64/vmm/vmm_dev.c (revision 259401) +++ sys/amd64/vmm/vmm_dev.c (working copy) @@ -144,7 +144,7 @@ static int vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct thread *td) { - int error, vcpu, state_changed; + int error, err2, vcpu, state_changed; struct vmmdev_softc *sc; struct vm_memory_segment *seg; struct vm_register *vmreg; @@ -196,7 +196,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_ goto done; } - error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN); + error = vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, + VCPU_FROZEN, true); if (error) goto done; @@ -213,14 +214,19 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_ */ error = 0; for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) { - error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN); + error = vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, + VCPU_FROZEN, true); if (error) break; } if (error) { - while (--vcpu >= 0) - vcpu_set_state(sc->vm, vcpu, VCPU_IDLE); + while (--vcpu >= 0) { + err2 = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, + VCPU_IDLE, false); + KASSERT(err2 == 0, ("error %d idling a vcpu", + err2)); + } goto done; } @@ -377,10 +383,15 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_ } if (state_changed == 1) { - vcpu_set_state(sc->vm, vcpu, VCPU_IDLE); + err2 = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, VCPU_IDLE, + false); + KASSERT(err2 == 0, ("error %d idling a vcpu", err2)); } else if (state_changed == 2) { - for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) - vcpu_set_state(sc->vm, vcpu, VCPU_IDLE); + for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) { + err2 = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, + VCPU_IDLE, false); + KASSERT(err2 == 0, ("error %d idling a vcpu", err2)); + } } done: Index: usr.sbin/bhyve/bhyverun.c =================================================================== --- usr.sbin/bhyve/bhyverun.c (revision 259401) +++ usr.sbin/bhyve/bhyverun.c (working copy) @@ -460,19 +460,8 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) while (1) { error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]); - if (error != 0) { - /* - * It is possible that 'vmmctl' or some other process - * has transitioned the vcpu to CANNOT_RUN state right - * before we tried to transition it to RUNNING. - * - * This is expected to be temporary so just retry. - */ - if (errno == EBUSY) - continue; - else - break; - } + if (error != 0) + break; prevcpu = vcpu;