diff --git a/sys/amd64/include/pvclock.h b/sys/amd64/include/pvclock.h new file mode 100644 index 0000000..f01fac6 --- /dev/null +++ b/sys/amd64/include/pvclock.h @@ -0,0 +1,6 @@ +/*- + * This file is in the public domain. + */ +/* $FreeBSD$ */ + +#include diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index bbbe827..7d85742 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -555,13 +555,17 @@ x86/isa/nmi.c standard x86/isa/orm.c optional isa x86/pci/pci_bus.c optional pci x86/pci/qpi.c optional pci +x86/x86/bhyve.c standard x86/x86/busdma_bounce.c standard x86/x86/busdma_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/fdt_machdep.c optional fdt +x86/x86/hypervisor.c standard x86/x86/identcpu.c standard x86/x86/intr_machdep.c standard x86/x86/io_apic.c standard +x86/x86/kvm.c standard +x86/x86/kvm_clock.c standard x86/x86/legacy.c standard x86/x86/local_apic.c standard x86/x86/mca.c standard @@ -569,8 +573,10 @@ x86/x86/mptable.c optional mptable x86/x86/mptable_pci.c optional mptable pci x86/x86/msi.c optional pci x86/x86/nexus.c standard +x86/x86/pvclock.c standard x86/x86/tsc.c standard x86/x86/delay.c standard +x86/x86/vmware.c standard x86/xen/hvm.c optional xenhvm x86/xen/xen_intr.c optional xen | xenhvm x86/xen/pv.c optional xenhvm diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 96879b8..ca83c4c 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -573,13 +573,17 @@ x86/isa/nmi.c standard x86/isa/orm.c optional isa x86/pci/pci_bus.c optional pci x86/pci/qpi.c optional pci +x86/x86/bhyve.c standard x86/x86/busdma_bounce.c standard x86/x86/busdma_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/fdt_machdep.c optional fdt +x86/x86/hypervisor.c standard x86/x86/identcpu.c standard x86/x86/intr_machdep.c standard x86/x86/io_apic.c optional apic +x86/x86/kvm.c standard +x86/x86/kvm_clock.c standard x86/x86/legacy.c optional native x86/x86/local_apic.c optional apic x86/x86/mca.c standard @@ -588,7 +592,9 @@ x86/x86/mptable_pci.c optional apic native pci x86/x86/msi.c optional apic pci x86/x86/nexus.c standard x86/x86/tsc.c standard +x86/x86/pvclock.c standard x86/x86/delay.c standard +x86/x86/vmware.c standard x86/xen/hvm.c optional xenhvm x86/xen/xen_intr.c optional xen | xenhvm x86/xen/xen_apic.c optional xenhvm diff --git a/sys/dev/xen/timer/timer.c b/sys/dev/xen/timer/timer.c index 5743076..53aff0a 100644 --- a/sys/dev/xen/timer/timer.c +++ b/sys/dev/xen/timer/timer.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include @@ -95,9 +96,6 @@ struct xentimer_softc { struct eventtimer et; }; -/* Last time; this guarantees a monotonically increasing clock. */ -volatile uint64_t xen_timer_last_time = 0; - static void xentimer_identify(driver_t *driver, device_t parent) { @@ -148,128 +146,20 @@ xentimer_probe(device_t dev) return (BUS_PROBE_NOWILDCARD); } -/* - * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, - * yielding a 64-bit result. - */ -static inline uint64_t -scale_delta(uint64_t delta, uint32_t mul_frac, int shift) -{ - uint64_t product; - - if (shift < 0) - delta >>= -shift; - else - delta <<= shift; - -#if defined(__i386__) - { - uint32_t tmp1, tmp2; - - /** - * For i386, the formula looks like: - * - * lower = (mul_frac * (delta & UINT_MAX)) >> 32 - * upper = mul_frac * (delta >> 32) - * product = lower + upper - */ - __asm__ ( - "mul %5 ; " - "mov %4,%%eax ; " - "mov %%edx,%4 ; " - "mul %5 ; " - "xor %5,%5 ; " - "add %4,%%eax ; " - "adc %5,%%edx ; " - : "=A" (product), "=r" (tmp1), "=r" (tmp2) - : "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)), - "2" (mul_frac) ); - } -#elif defined(__amd64__) - { - unsigned long tmp; - - __asm__ ( - "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]" - : [lo]"=a" (product), [hi]"=d" (tmp) - : "0" (delta), [mul_frac]"rm"((uint64_t)mul_frac)); - } -#else -#error "xentimer: unsupported architecture" -#endif - - return (product); -} - -static uint64_t -get_nsec_offset(struct vcpu_time_info *tinfo) -{ - - return (scale_delta(rdtsc() - tinfo->tsc_timestamp, - tinfo->tsc_to_system_mul, tinfo->tsc_shift)); -} - -/* - * Read the current hypervisor system uptime value from Xen. - * See for a description of how this works. - */ -static uint32_t -xen_fetch_vcpu_tinfo(struct vcpu_time_info *dst, struct vcpu_time_info *src) -{ - - do { - dst->version = src->version; - rmb(); - dst->tsc_timestamp = src->tsc_timestamp; - dst->system_time = src->system_time; - dst->tsc_to_system_mul = src->tsc_to_system_mul; - dst->tsc_shift = src->tsc_shift; - rmb(); - } while ((src->version & 1) | (dst->version ^ src->version)); - - return (dst->version); -} - /** * \brief Get the current time, in nanoseconds, since the hypervisor booted. * * \param vcpu vcpu_info structure to fetch the time from. * - * \note This function returns the current CPU's idea of this value, unless - * it happens to be less than another CPU's previously determined value. */ static uint64_t xen_fetch_vcpu_time(struct vcpu_info *vcpu) { - struct vcpu_time_info dst; - struct vcpu_time_info *src; - uint32_t pre_version; - uint64_t now; - volatile uint64_t last; - - src = &vcpu->time; - - do { - pre_version = xen_fetch_vcpu_tinfo(&dst, src); - barrier(); - now = dst.system_time + get_nsec_offset(&dst); - barrier(); - } while (pre_version != src->version); + struct pvclock_vcpu_time_info *time; - /* - * Enforce a monotonically increasing clock time across all - * VCPUs. If our time is too old, use the last time and return. - * Otherwise, try to update the last time. - */ - do { - last = xen_timer_last_time; - if (last > now) { - now = last; - break; - } - } while (!atomic_cmpset_64(&xen_timer_last_time, last, now)); + time = (struct pvclock_vcpu_time_info *) &vcpu->time; - return (now); + return (pvclock_get_timecount(time)); } static uint32_t @@ -302,15 +192,11 @@ static void xen_fetch_wallclock(struct timespec *ts) { shared_info_t *src = HYPERVISOR_shared_info; - uint32_t version = 0; + struct pvclock_wall_clock *wc; - do { - version = src->wc_version; - rmb(); - ts->tv_sec = src->wc_sec; - ts->tv_nsec = src->wc_nsec; - rmb(); - } while ((src->wc_version & 1) | (version ^ src->wc_version)); + wc = (struct pvclock_wall_clock *) &src->wc_version; + + pvclock_get_wallclock(wc, ts); } static void @@ -574,7 +460,7 @@ xentimer_resume(device_t dev) } /* Reset the last uptime value */ - xen_timer_last_time = 0; + pvclock_resume(); /* Reset the RTC clock */ inittodr(time_second); diff --git a/sys/i386/include/pvclock.h b/sys/i386/include/pvclock.h new file mode 100644 index 0000000..f01fac6 --- /dev/null +++ b/sys/i386/include/pvclock.h @@ -0,0 +1,6 @@ +/*- + * This file is in the public domain. + */ +/* $FreeBSD$ */ + +#include diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c index 95f3250..5332055 100644 --- a/sys/kern/subr_param.c +++ b/sys/kern/subr_param.c @@ -159,6 +159,8 @@ static const char *const vm_guest_sysctl_names[] = { "xen", "hv", "vmware", + "bhyve", + "kvm", NULL }; CTASSERT(nitems(vm_guest_sysctl_names) - 1 == VM_LAST); diff --git a/sys/sys/systm.h b/sys/sys/systm.h index d3833d0..50a49d2 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -73,7 +73,7 @@ extern int vm_guest; /* Running as virtual machine guest? */ * Keep in sync with vm_guest_sysctl_names[]. */ enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN, VM_GUEST_HV, - VM_GUEST_VMWARE, VM_LAST }; + VM_GUEST_VMWARE, VM_GUEST_BHYVE, VM_GUEST_KVM, VM_LAST }; #if defined(WITNESS) || defined(INVARIANTS) void kassert_panic(const char *fmt, ...) __printflike(1, 2); diff --git a/sys/x86/include/hypervisor.h b/sys/x86/include/hypervisor.h new file mode 100644 index 0000000..d5d30eb --- /dev/null +++ b/sys/x86/include/hypervisor.h @@ -0,0 +1,56 @@ +/*- + * Copyright (c) 2014 Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _X86_HYPERVISOR_H_ +#define _X86_HYPERVISOR_H_ + +#include +#include + +typedef void hypervisor_init_func_t(void); + +/* + * The guest hypervisor support may provide paravirtualized or have special + * requirements for various operations. The callback functions are provided + * when a hypervisor is detected and registered. + */ +struct hypervisor_ops { +}; + +void hypervisor_sysinit(void *func); +void hypervisor_register(const char *vendor, enum VM_GUEST guest, + struct hypervisor_ops *ops); +int hypervisor_cpuid_base(const char *signature, int leaves, + uint32_t *base, uint32_t *high); +void hypervisor_print_info(void); + +#define HYPERVISOR_SYSINIT(name, func) \ + SYSINIT(name ## _hypervisor_sysinit, SI_SUB_HYPERVISOR, \ + SI_ORDER_FIRST, hypervisor_sysinit, func) + +#endif /* !_X86_HYPERVISOR_H_ */ diff --git a/sys/x86/include/kvm.h b/sys/x86/include/kvm.h new file mode 100644 index 0000000..b539038 --- /dev/null +++ b/sys/x86/include/kvm.h @@ -0,0 +1,49 @@ +/*- + * Copyright (c) 2014 Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _X86_KVM_H_ +#define _X86_KVM_H_ + +#define KVM_CPUID_FEATURES_LEAF 0x40000001 + +#define KVM_FEATURE_CLOCKSOURCE 0x00000001 +#define KVM_FEATURE_CLOCKSOURCE2 0x00000008 + +/* Deprecated: for the CLOCKSOURCE feature. */ +#define KVM_MSR_WALL_CLOCK 0x11 +#define KVM_MSR_SYSTEM_TIME 0x12 + +#define KVM_MSR_WALL_CLOCK_NEW 0x4b564d00 +#define KVM_MSR_SYSTEM_TIME_NEW 0x4b564d01 + +int kvm_paravirt_supported(void); +uint32_t kvm_get_features(void); + +uint64_t kvm_clock_tsc_freq(void); + +#endif /* !_X86_KVM_H_ */ diff --git a/sys/x86/include/pvclock.h b/sys/x86/include/pvclock.h new file mode 100644 index 0000000..25aba99 --- /dev/null +++ b/sys/x86/include/pvclock.h @@ -0,0 +1,58 @@ +/*- + * Copyright (c) 2014, Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef X86_PVCLOCK +#define X86_PVCLOCK + +struct pvclock_vcpu_time_info { + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; + uint64_t system_time; + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + uint8_t flags; + uint8_t pad[2]; +} __packed; + +#define PVCLOCK_FLAG_TSC_STABLE 0x01 +#define PVCLOCK_FLAG_GUEST_PASUED 0x02 + +struct pvclock_wall_clock { + uint32_t version; + uint32_t sec; + uint32_t nsec; +} __packed; + +void pvclock_resume(void); +uint64_t pvclock_tsc_freq(struct pvclock_vcpu_time_info *ti); +uint64_t pvclock_get_timecount(struct pvclock_vcpu_time_info *ti); +void pvclock_get_wallclock(struct pvclock_wall_clock *wc, + struct timespec *ts); + +#endif diff --git a/sys/x86/include/vmware.h b/sys/x86/include/vmware.h index c72f48d..89616c5 100644 --- a/sys/x86/include/vmware.h +++ b/sys/x86/include/vmware.h @@ -44,4 +44,6 @@ vmware_hvcall(u_int cmd, u_int *p) : "memory"); } +uint64_t vmware_tsc_freq(void); + #endif /* !_X86_VMWARE_H_ */ diff --git a/sys/x86/x86/bhyve.c b/sys/x86/x86/bhyve.c new file mode 100644 index 0000000..d21e808 --- /dev/null +++ b/sys/x86/x86/bhyve.c @@ -0,0 +1,58 @@ +/*- + * Copyright (c) 2014 Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include + +static uint32_t bhyve_cpuid_base = -1; +static uint32_t bhyve_cpuid_high = -1; + +static int +bhyve_cpuid_identify(void) +{ + + if (bhyve_cpuid_base == -1) { + hypervisor_cpuid_base("bhyve bhyve", 0, &bhyve_cpuid_base, + &bhyve_cpuid_high); + } + + return (bhyve_cpuid_base > 0); +} + +static void +bhyve_init(void) +{ + + if (bhyve_cpuid_identify() != 0) + hypervisor_register("bhyve", VM_GUEST_BHYVE, NULL); +} + +HYPERVISOR_SYSINIT(bhyve, bhyve_init); diff --git a/sys/x86/x86/hypervisor.c b/sys/x86/x86/hypervisor.c new file mode 100644 index 0000000..30c70df --- /dev/null +++ b/sys/x86/x86/hypervisor.c @@ -0,0 +1,99 @@ +/*- + * Copyright (c) 2014 Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include +#include +#include +#include + +#include + +char hv_vendor[16]; +SYSCTL_STRING(_hw, OID_AUTO, hv_vendor, CTLFLAG_RD, hv_vendor, 0, + "Hypervisor vendor"); + +void +hypervisor_sysinit(void *func) +{ + hypervisor_init_func_t *init; + + init = func; + + /* + * Call the init function if we have not already identified the + * hypervisor yet. We assume the detectable hypervisors will + * announce its presence via the CPUID bit. + */ + if (vm_guest == VM_GUEST_VM && cpu_feature2 & CPUID2_HV) + (*init)(); +} + +void +hypervisor_register(const char *vendor, enum VM_GUEST guest, + struct hypervisor_ops *ops) +{ + + strlcpy(hv_vendor, vendor, sizeof(hv_vendor)); + vm_guest = guest; +} + +/* + * [RFC] CPUID usage for interaction between Hypervisors and Linux. + * http://lkml.org/lkml/2008/10/1/246 + */ +int +hypervisor_cpuid_base(const char *signature, int leaves, uint32_t *base, + uint32_t *high) +{ + uint32_t leaf, regs[4]; + + for (leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { + do_cpuid(leaf, regs); + if (!memcmp(signature, ®s[1], 12) && + (leaves == 0 || (regs[0] - leaf >= leaves))) { + *base = leaf; + *high = regs[0]; + return (0); + } + } + + return (1); +} + +void +hypervisor_print_info(void) +{ + + if (*hv_vendor) + printf("Hypervisor: Origin = \"%s\"\n", hv_vendor); +} diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c index bae430a..c28390c 100644 --- a/sys/x86/x86/identcpu.c +++ b/sys/x86/x86/identcpu.c @@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #ifdef __i386__ @@ -78,7 +79,6 @@ static u_int find_cpu_vendor_id(void); static void print_AMD_info(void); static void print_INTEL_info(void); static void print_INTEL_TLB(u_int data); -static void print_hypervisor_info(void); static void print_svm_info(void); static void print_via_padlock_info(void); static void print_vmx_info(void); @@ -123,11 +123,6 @@ static int hw_clockrate; SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD, &hw_clockrate, 0, "CPU instruction clock rate"); -u_int hv_high; -char hv_vendor[16]; -SYSCTL_STRING(_hw, OID_AUTO, hv_vendor, CTLFLAG_RD, hv_vendor, 0, - "Hypervisor vendor"); - static eventhandler_tag tsc_post_tag; static char cpu_brand[48]; @@ -985,7 +980,7 @@ printcpuinfo(void) #endif } - print_hypervisor_info(); + hypervisor_print_info(); } void @@ -1218,25 +1213,12 @@ identify_hypervisor(void) int i; /* - * [RFC] CPUID usage for interaction between Hypervisors and Linux. - * http://lkml.org/lkml/2008/10/1/246 - * - * KB1009458: Mechanisms to determine if software is running in - * a VMware virtual machine - * http://kb.vmware.com/kb/1009458 + * Modern hypervisors set the HV present feature bit and are then + * identifiable through a special CPUID leaf. Hypervisors we know + * about are later detected via the SI_SUB_HYPERVISOR SYSINIT(). */ if (cpu_feature2 & CPUID2_HV) { vm_guest = VM_GUEST_VM; - do_cpuid(0x40000000, regs); - if (regs[0] >= 0x40000000) { - hv_high = regs[0]; - ((u_int *)&hv_vendor)[0] = regs[1]; - ((u_int *)&hv_vendor)[1] = regs[2]; - ((u_int *)&hv_vendor)[2] = regs[3]; - hv_vendor[12] = '\0'; - if (strcmp(hv_vendor, "VMwareVMware") == 0) - vm_guest = VM_GUEST_VMWARE; - } return; } @@ -2150,11 +2132,3 @@ print_vmx_info(void) ); } } - -static void -print_hypervisor_info(void) -{ - - if (*hv_vendor) - printf("Hypervisor: Origin = \"%s\"\n", hv_vendor); -} diff --git a/sys/x86/x86/kvm.c b/sys/x86/x86/kvm.c new file mode 100644 index 0000000..b47eb76 --- /dev/null +++ b/sys/x86/x86/kvm.c @@ -0,0 +1,83 @@ +/*- + * Copyright (c) 2014 Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include + +#include +#include + +static int kvm_cpuid_identify(void); + +static uint32_t kvm_cpuid_base = -1; +static uint32_t kvm_cpuid_high = -1; + +static int +kvm_cpuid_identify(void) +{ + + if (kvm_cpuid_base == -1) { + hypervisor_cpuid_base("KVMKVMKVM\0\0", 0, &kvm_cpuid_base, + &kvm_cpuid_high); + } + + return (kvm_cpuid_base > 0); +} + +int +kvm_paravirt_supported(void) +{ + + return (kvm_cpuid_base > 0); +} + +uint32_t +kvm_get_features(void) +{ + u_int regs[4]; + + if (kvm_paravirt_supported()) + do_cpuid(kvm_cpuid_base | KVM_CPUID_FEATURES_LEAF, regs); + else + regs[0] = 0; + + return (regs[0]); +} + +static void +kvm_init(void) +{ + + if (kvm_cpuid_identify() != 0) + hypervisor_register("KVM", VM_GUEST_KVM, NULL); +} + +HYPERVISOR_SYSINIT(kvm, kvm_init); diff --git a/sys/x86/x86/kvm_clock.c b/sys/x86/x86/kvm_clock.c new file mode 100644 index 0000000..7da6363 --- /dev/null +++ b/sys/x86/x86/kvm_clock.c @@ -0,0 +1,132 @@ +/*- + * Copyright (c) 2014 Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +static u_int kvm_clock_get_timecounter(struct timecounter *); +static void kvm_clock_pcpu_system_time(void *); + +DPCPU_DEFINE(struct pvclock_vcpu_time_info, kvm_clock_vcpu_time_info); + +static struct timecounter kvm_clock_timecounter = { + kvm_clock_get_timecounter, + NULL, + ~0u, + 1000000000ULL, + "KVMCLOCK", + 1000, +}; + +static int kvm_clock_registered; +static uint32_t kvm_clock_wall_clock_msr; +static uint32_t kvm_clock_system_time_msr; + +uint64_t +kvm_clock_tsc_freq(void) +{ + struct pvclock_vcpu_time_info *ti; + uint64_t freq; + + critical_enter(); + ti = DPCPU_PTR(kvm_clock_vcpu_time_info); + freq = pvclock_tsc_freq(ti); + critical_exit(); + + return (freq); +} + +static u_int +kvm_clock_get_timecounter(struct timecounter *tc) +{ + struct pvclock_vcpu_time_info *ti; + uint64_t time; + + critical_enter(); + ti = DPCPU_PTR(kvm_clock_vcpu_time_info); + time = pvclock_get_timecount(ti); + critical_exit(); + + return (time & UINT_MAX); +} + +static void +kvm_clock_pcpu_system_time(void *arg) +{ + uint64_t data; + int enable; + + enable = *(int *) arg; + + if (enable != 0) + data = vtophys(DPCPU_PTR(kvm_clock_vcpu_time_info)) | 1; + else + data = 0; + + wrmsr(kvm_clock_system_time_msr, data); +} + +static void +kvm_clock_init(void) +{ + uint32_t features; + + if (vm_guest != VM_GUEST_KVM || !kvm_paravirt_supported()) + return; + + features = kvm_get_features(); + + if (features & KVM_FEATURE_CLOCKSOURCE2) { + kvm_clock_wall_clock_msr = KVM_MSR_WALL_CLOCK_NEW; + kvm_clock_system_time_msr = KVM_MSR_SYSTEM_TIME_NEW; + } else if (features & KVM_FEATURE_CLOCKSOURCE) { + kvm_clock_wall_clock_msr = KVM_MSR_WALL_CLOCK; + kvm_clock_system_time_msr = KVM_MSR_SYSTEM_TIME; + } else + return; + + kvm_clock_registered = 1; + smp_rendezvous(smp_no_rendevous_barrier, kvm_clock_pcpu_system_time, + smp_no_rendevous_barrier, &kvm_clock_registered); + + tc_init(&kvm_clock_timecounter); +} + +SYSINIT(kvm_clock, SI_SUB_SMP, SI_ORDER_ANY, kvm_clock_init, NULL); diff --git a/sys/x86/x86/pvclock.c b/sys/x86/x86/pvclock.c new file mode 100644 index 0000000..d0eef185 --- /dev/null +++ b/sys/x86/x86/pvclock.c @@ -0,0 +1,197 @@ +/*- + * Copyright (c) 2009 Adrian Chadd + * Copyright (c) 2012 Spectra Logic Corporation + * Copyright (c) 2014 Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include +#include +#include +#include + +/* + * Last time; this guarantees a monotonically increasing clock for when + * a stable TSC is not provided. + */ +static volatile uint64_t pvclock_last_cycles; + +void +pvclock_resume(void) +{ + + atomic_store_rel_64(&pvclock_last_cycles, 0); +} + +uint64_t +pvclock_tsc_freq(struct pvclock_vcpu_time_info *ti) +{ + uint64_t freq; + + freq = (1000000000ULL << 32) / ti->tsc_to_system_mul; + + if (ti->tsc_shift < 0) + freq <<= -ti->tsc_shift; + else + freq >>= ti->tsc_shift; + + return (freq); +} + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline uint64_t +pvclock_scale_delta(uint64_t delta, uint32_t mul_frac, int shift) +{ + uint64_t product; + + if (shift < 0) + delta >>= -shift; + else + delta <<= shift; + +#if defined(__i386__) + { + uint32_t tmp1, tmp2; + + /** + * For i386, the formula looks like: + * + * lower = (mul_frac * (delta & UINT_MAX)) >> 32 + * upper = mul_frac * (delta >> 32) + * product = lower + upper + */ + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "xor %5,%5 ; " + "add %4,%%eax ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)), + "2" (mul_frac) ); + } +#elif defined(__amd64__) + { + unsigned long tmp; + + __asm__ ( + "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]" + : [lo]"=a" (product), [hi]"=d" (tmp) + : "0" (delta), [mul_frac]"rm"((uint64_t)mul_frac)); + } +#else +#error "pvclock: unsupported x86 architecture?" +#endif + + return (product); +} + +static uint64_t +pvclock_get_nsec_offset(struct pvclock_vcpu_time_info *ti) +{ + uint64_t delta; + + delta = rdtsc() - ti->tsc_timestamp; + + return (pvclock_scale_delta(delta, ti->tsc_to_system_mul, + ti->tsc_shift)); +} + +static void +pvclock_read_time_info(struct pvclock_vcpu_time_info *ti, + uint64_t *cycles, uint8_t *flags) +{ + uint32_t version; + + do { + version = ti->version; + rmb(); + *cycles = ti->system_time + pvclock_get_nsec_offset(ti); + *flags = ti->flags; + rmb(); + } while ((ti->version & 1) != 0 || ti->version != version); +} + +static void +pvclock_read_wall_clock(struct pvclock_wall_clock *wc, uint32_t *sec, + uint32_t *nsec) +{ + uint32_t version; + + do { + version = wc->version; + rmb(); + *sec = wc->sec; + *nsec = wc->nsec; + rmb(); + } while ((wc->version & 1) != 0 || wc->version != version); +} + +uint64_t +pvclock_get_timecount(struct pvclock_vcpu_time_info *ti) +{ + uint64_t now; + uint8_t flags; + volatile uint64_t last; + + pvclock_read_time_info(ti, &now, &flags); + + if (flags & PVCLOCK_FLAG_TSC_STABLE) + return (now); + + /* + * Enforce a monotonically increasing clock time across all VCPUs. + * If our time is too old, use the last time and return. Otherwise, + * try to update the last time. + */ + do { + last = atomic_load_acq_64(&pvclock_last_cycles); + if (last > now) + return (last); + } while (!atomic_cmpset_64(&pvclock_last_cycles, last, now)); + + return (now); +} + +void +pvclock_get_wallclock(struct pvclock_wall_clock *wc, struct timespec *ts) +{ + uint32_t sec, nsec; + + pvclock_read_wall_clock(wc, &sec, &nsec); + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} diff --git a/sys/x86/x86/tsc.c b/sys/x86/x86/tsc.c index 4ca574e..a834cb5 100644 --- a/sys/x86/x86/tsc.c +++ b/sys/x86/x86/tsc.c @@ -104,22 +104,6 @@ static struct timecounter tsc_timecounter = { }; static void -tsc_freq_vmware(void) -{ - u_int regs[4]; - - if (hv_high >= 0x40000010) { - do_cpuid(0x40000010, regs); - tsc_freq = regs[0] * 1000; - } else { - vmware_hvcall(VMW_HVCMD_GETHZ, regs); - if (regs[1] != UINT_MAX) - tsc_freq = regs[0] | ((uint64_t)regs[1] << 32); - } - tsc_is_invariant = 1; -} - -static void tsc_freq_intel(void) { char brand[48]; @@ -201,7 +185,8 @@ probe_tsc_freq(void) } if (vm_guest == VM_GUEST_VMWARE) { - tsc_freq_vmware(); + tsc_freq = vmware_tsc_freq(); + tsc_is_invariant = 1; return; } diff --git a/sys/x86/x86/vmware.c b/sys/x86/x86/vmware.c new file mode 100644 index 0000000..e16acbb --- /dev/null +++ b/sys/x86/x86/vmware.c @@ -0,0 +1,84 @@ +/*- + * Copyright (c) 2014 Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include +#include + +static uint32_t vmware_cpuid_base = -1; +static uint32_t vmware_cpuid_high = -1; + +static int +vmware_cpuid_identify(void) +{ + + /* + * KB1009458: Mechanisms to determine if software is running in a + * VMware virtual machine: http://kb.vmware.com/kb/1009458 + */ + if (vmware_cpuid_base == -1) { + hypervisor_cpuid_base("VMwareVMware", 0, &vmware_cpuid_base, + &vmware_cpuid_high); + } + + return (vmware_cpuid_base > 0); +} + +uint64_t +vmware_tsc_freq(void) +{ + uint64_t freq; + u_int regs[4]; + + if (vmware_cpuid_high >= 0x40000010) { + do_cpuid(0x40000010, regs); + freq = regs[0] * 1000; + } else { + vmware_hvcall(VMW_HVCMD_GETHZ, regs); + if (regs[1] != UINT_MAX) + freq = regs[0] | ((uint64_t)regs[1] << 32); + else + freq = 0; + } + + return (freq); +} + +static void +vmware_init(void) +{ + + if (vmware_cpuid_identify() != 0) + hypervisor_register("VMware", VM_GUEST_VMWARE, NULL); +} + +HYPERVISOR_SYSINIT(vmware, vmware_init);