--- //depot/vendor/freebsd/src/sys/amd64/vmm/intel/vmx.c +++ //depot/user/jhb/bhyve/sys/amd64/vmm/intel/vmx.c @@ -1217,6 +1217,40 @@ } static int +vmx_emulate_xsetbv(struct vmx *vmx, int vcpu) +{ + struct vmxctx *vmxctx; + uint64_t xcrval; + const struct xsave_limits *limits; + + vmxctx = &vmx->ctx[vcpu]; + limits = vmm_get_xsave_limits(); + + /* We only handle xcr0 if the host has XSAVE enabled. */ + if (vmxctx->guest_rcx != 0 || !limits->xsave_enabled) + return (UNHANDLED); + + xcrval = vmxctx->guest_rdx << 32 | (vmxctx->guest_rax & 0xffffffff); + if ((xcrval & ~limits->xcr0_allowed) != 0) + return (UNHANDLED); + + if (!(xcrval & XFEATURE_ENABLED_X87)) + return (UNHANDLED); + + if ((xcrval & (XFEATURE_ENABLED_AVX | XFEATURE_ENABLED_SSE)) == + XFEATURE_ENABLED_AVX) + return (UNHANDLED); + + /* + * This runs "inside" vmrun() with the guest's FPU state, so + * modifying xcr0 directly modifies the guest's xcr0, not the + * host's. + */ + load_xcr(0, xcrval); + return (HANDLED); +} + +static int vmx_emulate_cr_access(struct vmx *vmx, int vcpu, uint64_t exitqual) { int cr, vmcs_guest_cr, vmcs_shadow_cr; @@ -1743,6 +1777,9 @@ vlapic = vm_lapic(vmx->vm, vcpu); handled = vmx_handle_apic_write(vlapic, qual); break; + case EXIT_REASON_XSETBV: + handled = vmx_emulate_xsetbv(vmx, vcpu); + break; default: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1); break; --- //depot/vendor/freebsd/src/sys/amd64/vmm/vmm.c +++ //depot/user/jhb/bhyve/sys/amd64/vmm/vmm.c @@ -89,6 +89,7 @@ struct vlapic *vlapic; int vcpuid; struct savefpu *guestfpu; /* guest fpu state */ + uint64_t guest_xcr0; void *stats; struct vm_exit exitinfo; enum x2apic_state x2apic_state; @@ -206,6 +207,7 @@ vcpu->vcpuid = vcpu_id; vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED); + vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; vcpu->guestfpu = fpu_save_area_alloc(); fpu_save_area_reset(vcpu->guestfpu); vcpu->stats = vmm_stat_alloc(); @@ -815,6 +817,10 @@ fpu_stop_emulating(); fpurestore(vcpu->guestfpu); + /* restore guest XCR0 if XSAVE is enabled in the host */ + if (rcr4() & CR4_XSAVE) + load_xcr(0, vcpu->guest_xcr0); + /* * The FPU is now "dirty" with the guest's state so turn on emulation * to trap any access to the FPU by the host. @@ -829,6 +835,12 @@ if ((rcr0() & CR0_TS) == 0) panic("fpu emulation not enabled in host!"); + /* save guest XCR0 and restore host XCR0 */ + if (rcr4() & CR4_XSAVE) { + vcpu->guest_xcr0 = rxcr(0); + load_xcr(0, vmm_get_host_xcr0()); + } + /* save guest FPU state */ fpu_stop_emulating(); fpusave(vcpu->guestfpu); --- //depot/vendor/freebsd/src/sys/amd64/vmm/vmm_host.c +++ //depot/user/jhb/bhyve/sys/amd64/vmm/vmm_host.c @@ -38,11 +38,14 @@ #include "vmm_host.h" -static uint64_t vmm_host_efer, vmm_host_pat, vmm_host_cr0, vmm_host_cr4; +static uint64_t vmm_host_efer, vmm_host_pat, vmm_host_cr0, vmm_host_cr4, + vmm_host_xcr0; +static struct xsave_limits vmm_xsave_limits; void vmm_host_state_init(void) { + int regs[4]; vmm_host_efer = rdmsr(MSR_EFER); vmm_host_pat = rdmsr(MSR_PAT); @@ -57,6 +60,21 @@ vmm_host_cr0 = rcr0() | CR0_TS; vmm_host_cr4 = rcr4(); + + /* + * Only permit a guest to use XSAVE if the host is using + * XSAVE. Only permit a guest to use XSAVE features supported + * by the host. This ensures that the FPU state used by the + * guest is always a subset of the saved guest FPU state. + */ + if (vmm_host_cr4 & CR4_XSAVE) { + vmm_xsave_limits.xsave_enabled = 1; + vmm_host_xcr0 = rxcr(0); + vmm_xsave_limits.xcr0_allowed = vmm_host_xcr0; + + cpuid_count(0xd, 0x0, regs); + vmm_xsave_limits.xsave_max_size = regs[1]; + } } uint64_t @@ -88,6 +106,13 @@ } uint64_t +vmm_get_host_xcr0(void) +{ + + return (vmm_host_xcr0); +} + +uint64_t vmm_get_host_datasel(void) { @@ -122,3 +147,10 @@ return (r_idt.rd_base); } + +const struct xsave_limits * +vmm_get_xsave_limits(void) +{ + + return (&vmm_xsave_limits); +} --- //depot/vendor/freebsd/src/sys/amd64/vmm/vmm_host.h +++ //depot/user/jhb/bhyve/sys/amd64/vmm/vmm_host.h @@ -33,17 +33,25 @@ #error "no user-servicable parts inside" #endif +struct xsave_limits { + int xsave_enabled; + uint64_t xcr0_allowed; + uint32_t xsave_max_size; +}; + void vmm_host_state_init(void); uint64_t vmm_get_host_pat(void); uint64_t vmm_get_host_efer(void); uint64_t vmm_get_host_cr0(void); uint64_t vmm_get_host_cr4(void); +uint64_t vmm_get_host_xcr0(void); uint64_t vmm_get_host_datasel(void); uint64_t vmm_get_host_codesel(void); uint64_t vmm_get_host_tsssel(void); uint64_t vmm_get_host_fsbase(void); uint64_t vmm_get_host_idtrbase(void); +const struct xsave_limits *vmm_get_xsave_limits(void); /* * Inline access to host state that is used on every VM entry --- //depot/vendor/freebsd/src/sys/amd64/vmm/x86.c +++ //depot/user/jhb/bhyve/sys/amd64/vmm/x86.c @@ -30,17 +30,19 @@ __FBSDID("$FreeBSD: head/sys/amd64/vmm/x86.c 256645 2013-10-16 18:20:27Z neel $"); #include -#include +#include #include #include #include #include #include +#include #include #include +#include "vmm_host.h" #include "x86.h" #define CPUID_VM_HIGH 0x40000000 @@ -53,6 +55,8 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { + const struct xsave_limits *limits; + uint64_t cr4; int error, enable_invpcid; unsigned int func, regs[4]; enum x2apic_state x2apic_state; @@ -145,13 +149,29 @@ if (x2apic_state != X2APIC_DISABLED) regs[2] |= CPUID2_X2APIC; + + /* + * Only advertise CPUID2_XSAVE in the guest if + * the host is using XSAVE. + */ + if (!(regs[2] & CPUID2_OSXSAVE)) + regs[2] &= ~CPUID2_XSAVE; /* - * Hide xsave/osxsave/avx until the FPU save/restore - * issues are resolved + * If CPUID2_XSAVE is being advertised and the + * guest has set CR4_XSAVE, set + * CPUID2_OSXSAVE. */ - regs[2] &= ~(CPUID2_XSAVE | CPUID2_OSXSAVE | - CPUID2_AVX); + regs[2] &= ~CPUID2_OSXSAVE; + if (regs[2] & CPUID2_XSAVE) { + error = vm_get_register(vm, vcpu_id, + VM_REG_GUEST_CR4, &cr4); + if (error) + panic("x86_emulate_cpuid: error %d " + "fetching %%cr4", error); + if (cr4 & CR4_XSAVE) + regs[2] |= CPUID2_OSXSAVE; + } /* * Hide monitor/mwait until we know how to deal with @@ -219,7 +243,6 @@ case CPUID_0000_0006: case CPUID_0000_000A: - case CPUID_0000_000D: /* * Handle the access, but report 0 for * all options @@ -240,6 +263,57 @@ regs[3] = vcpu_id; break; + case CPUID_0000_000D: + limits = vmm_get_xsave_limits(); + if (!limits->xsave_enabled) { + regs[0] = 0; + regs[1] = 0; + regs[2] = 0; + regs[3] = 0; + break; + } + + cpuid_count(*eax, *ecx, regs); + switch (*ecx) { + case 0: + /* + * Only permit the guest to use bits + * that are active in the host in + * %xcr0. Also, claim that the + * maximum save area size is + * equivalent to the host's current + * save area size. Since this runs + * "inside" of vmrun(), it runs with + * the guest's xcr0, so the current + * save area size is correct as-is. + */ + regs[0] &= limits->xcr0_allowed; + regs[2] = limits->xsave_max_size; + regs[3] &= (limits->xcr0_allowed >> 32); + break; + case 1: + /* Only permit XSAVEOPT. */ + regs[0] &= CPUID_EXTSTATE_XSAVEOPT; + regs[1] = 0; + regs[2] = 0; + regs[3] = 0; + break; + default: + /* + * If the leaf is for a permitted feature, + * pass through as-is, otherwise return + * all zeroes. + */ + if (!(limits->xcr0_allowed & (1ul << *ecx))) { + regs[0] = 0; + regs[1] = 0; + regs[2] = 0; + regs[3] = 0; + } + break; + } + break; + case 0x40000000: regs[0] = CPUID_VM_HIGH; bcopy(bhyve_id, ®s[1], 4);