Index: usr.sbin/bhyve/pmtmr.c =================================================================== --- usr.sbin/bhyve/pmtmr.c (revision 248935) +++ usr.sbin/bhyve/pmtmr.c (working copy) @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -53,35 +54,108 @@ #define PMTMR_FREQ 3579545 /* 3.579545MHz */ static pthread_mutex_t pmtmr_mtx; + +static uint64_t pmtmr_old; + static uint64_t pmtmr_tscf; -static uint64_t pmtmr_old; static uint64_t pmtmr_tsc_old; +static clockid_t clockid = CLOCK_UPTIME_FAST; +static struct timespec pmtmr_uptime_old; + +#define timespecsub(vvp, uvp) \ + do { \ + (vvp)->tv_sec -= (uvp)->tv_sec; \ + (vvp)->tv_nsec -= (uvp)->tv_nsec; \ + if ((vvp)->tv_nsec < 0) { \ + (vvp)->tv_sec--; \ + (vvp)->tv_nsec += 1000000000; \ + } \ + } while (0) + +static uint64_t +timespec_to_pmtmr(const struct timespec *tsnew, const struct timespec *tsold) +{ + struct timespec tsdiff; + int64_t nsecs; + + tsdiff = *tsnew; + timespecsub(&tsdiff, tsold); + nsecs = tsdiff.tv_sec * 1000000000 + tsdiff.tv_nsec; + assert(nsecs >= 0); + + return (nsecs * PMTMR_FREQ / 1000000000 + pmtmr_old); +} + +static uint64_t +tsc_to_pmtmr(uint64_t tsc_new, uint64_t tsc_old) +{ + + return ((tsc_new - tsc_old) * PMTMR_FREQ / pmtmr_tscf + pmtmr_old); +} + +static void +pmtmr_init(void) +{ + size_t len; + int smp_tsc, err; + struct timespec tsnew, tsold = { 0 }; + + len = sizeof(smp_tsc); + err = sysctlbyname("kern.timecounter.smp_tsc", &smp_tsc, &len, NULL, 0); + assert(err == 0); + + if (smp_tsc) { + len = sizeof(pmtmr_tscf); + err = sysctlbyname("machdep.tsc_freq", &pmtmr_tscf, &len, + NULL, 0); + assert(err == 0); + + pmtmr_tsc_old = rdtsc(); + pmtmr_old = tsc_to_pmtmr(pmtmr_tsc_old, 0); + } else { + if (getenv("BHYVE_PMTMR_PRECISE") != NULL) + clockid = CLOCK_UPTIME; + + err = clock_gettime(clockid, &tsnew); + assert(err == 0); + + pmtmr_uptime_old = tsnew; + pmtmr_old = timespec_to_pmtmr(&tsnew, &tsold); + } +} + static uint32_t pmtmr_val(void) { + struct timespec tsnew; uint64_t pmtmr_tsc_new; uint64_t pmtmr_new; + int error; + static int inited = 0; if (!inited) { - size_t len; - + pthread_mutex_init(&pmtmr_mtx, NULL); + pmtmr_init(); inited = 1; - pthread_mutex_init(&pmtmr_mtx, NULL); - len = sizeof(pmtmr_tscf); - sysctlbyname("machdep.tsc_freq", &pmtmr_tscf, &len, - NULL, 0); - pmtmr_tsc_old = rdtsc(); - pmtmr_old = pmtmr_tsc_old / pmtmr_tscf * PMTMR_FREQ; } pthread_mutex_lock(&pmtmr_mtx); - pmtmr_tsc_new = rdtsc(); - pmtmr_new = (pmtmr_tsc_new - pmtmr_tsc_old) * PMTMR_FREQ / pmtmr_tscf + - pmtmr_old; + + if (pmtmr_tscf) { + pmtmr_tsc_new = rdtsc(); + pmtmr_new = tsc_to_pmtmr(pmtmr_tsc_new, pmtmr_tsc_old); + pmtmr_tsc_old = pmtmr_tsc_new; + } else { + error = clock_gettime(clockid, &tsnew); + assert(error == 0); + + pmtmr_new = timespec_to_pmtmr(&tsnew, &pmtmr_uptime_old); + pmtmr_uptime_old = tsnew; + } pmtmr_old = pmtmr_new; - pmtmr_tsc_old = pmtmr_tsc_new; + pthread_mutex_unlock(&pmtmr_mtx); return (pmtmr_new); @@ -102,4 +176,3 @@ } INOUT_PORT(pmtmr, IO_PMTMR, IOPORT_F_IN, pmtmr_handler); - Index: sys/amd64/include/clock.h =================================================================== --- sys/amd64/include/clock.h (revision 248935) +++ sys/amd64/include/clock.h (working copy) @@ -20,6 +20,9 @@ extern uint64_t tsc_freq; extern int tsc_is_invariant; extern int tsc_perf_stat; +#ifdef SMP +extern int smp_tsc; +#endif void i8254_init(void); Index: sys/amd64/vmm/x86.c =================================================================== --- sys/amd64/vmm/x86.c (revision 248935) +++ sys/amd64/vmm/x86.c (working copy) @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -89,11 +90,27 @@ case CPUID_8000_0003: case CPUID_8000_0004: case CPUID_8000_0006: - case CPUID_8000_0007: case CPUID_8000_0008: cpuid_count(*eax, *ecx, regs); break; + case CPUID_8000_0007: + cpuid_count(*eax, *ecx, regs); + /* + * If the host TSCs are not synchronized across + * physical cpus then we cannot advertise an + * invariant tsc to a vcpu. + * + * XXX This still falls short because the vcpu + * can observe the TSC moving backwards as it + * migrates across physical cpus. But at least + * it should discourage the guest from using the + * TSC to keep track of time. + */ + if (!smp_tsc) + regs[3] &= ~AMDPM_TSC_INVARIANT; + break; + case CPUID_0000_0001: do_cpuid(1, regs); Index: sys/x86/x86/tsc.c =================================================================== --- sys/x86/x86/tsc.c (revision 248935) +++ sys/x86/x86/tsc.c (working copy) @@ -61,7 +61,7 @@ TUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant); #ifdef SMP -static int smp_tsc; +int smp_tsc; SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0, "Indicates whether the TSC is safe to use in SMP mode"); TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);