Index: x86/x86/tsc.c =================================================================== --- x86/x86/tsc.c (revision 249482) +++ x86/x86/tsc.c (working copy) @@ -67,6 +67,17 @@ SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, C TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc); #endif +int smp_tsc_adjust = 1; +#ifdef SMP +SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc_adjust, CTLFLAG_RDTUN, + &smp_tsc_adjust, 0, + "Indicates whether the TSC is SMP-adjusted"); +TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc_adjust); +#endif + +static DPCPU_DEFINE(int64_t, smp_tsc_adj); +static DPCPU_DEFINE(int32_t, smp_tsc_adj32); + static int tsc_shift = 1; SYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN, &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency"); @@ -87,10 +98,13 @@ static void tsc_freq_changed(void *arg, const stru static void tsc_freq_changing(void *arg, const struct cf_level *level, int *status); static unsigned tsc_get_timecount(struct timecounter *tc); +static inline unsigned tsc_get_timecount_adj(struct timecounter *tc); static inline unsigned tsc_get_timecount_low(struct timecounter *tc); static unsigned tsc_get_timecount_lfence(struct timecounter *tc); +static unsigned tsc_get_timecount_adj_lfence(struct timecounter *tc); static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc); static unsigned tsc_get_timecount_mfence(struct timecounter *tc); +static unsigned tsc_get_timecount_adj_mfence(struct timecounter *tc); static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc); static void tsc_levels_changed(void *arg, int unit); @@ -385,17 +399,19 @@ static void comp_smp_tsc(void *arg) { uint64_t *tsc; - int64_t d1, d2; + int64_t a1, a2, d1, d2; u_int cpu = PCPU_GET(cpuid); u_int i, j, size; + a1 = DPCPU_GET(smp_tsc_adj); size = (mp_maxid + 1) * 3; for (i = 0, tsc = arg; i < N; i++, tsc += size) CPU_FOREACH(j) { if (j == cpu) continue; - d1 = tsc[cpu * 3 + 1] - tsc[j * 3]; - d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1]; + a2 = DPCPU_ID_GET(j, smp_tsc_adj); + d1 = tsc[cpu * 3 + 1] + a1 - tsc[j * 3] - a2; + d2 = tsc[cpu * 3 + 2] + a1 - tsc[j * 3 + 1] - a2; if (d1 <= 0 || d2 <= 0) { smp_tsc = 0; return; @@ -403,6 +419,42 @@ comp_smp_tsc(void *arg) } } +static void +calc_smp_tsc_adj(uint64_t *data) +{ + uint64_t *tsc; + int64_t d, min, max; + u_int cpu, first, i, size; + + size = (mp_maxid + 1) * 3; + first = CPU_FIRST(); + CPU_FOREACH(cpu) { + if (cpu == first) + continue; + min = INT64_MIN; + max = INT64_MAX; + for (i = 0, tsc = data; i < N; i++, tsc += size) { + d = tsc[first * 3] - tsc[cpu * 3 + 1]; + if (d > min) + min = d; + d = tsc[first * 3 + 1] - tsc[cpu * 3 + 2]; + if (d > min) + min = d; + d = tsc[first * 3 + 1] - tsc[cpu * 3]; + if (d < max) + max = d; + d = tsc[first * 3 + 2] - tsc[cpu * 3 + 1]; + if (d < max) + max = d; + } + if (min > max) { + smp_tsc = 0; + return; + } + DPCPU_ID_SET(cpu, smp_tsc_adj, max / 2 + min / 2); + } +} + static int test_tsc(void) { @@ -418,10 +470,28 @@ test_tsc(void) smp_tsc = 1; /* XXX */ smp_rendezvous(smp_no_rendevous_barrier, comp_smp_tsc, smp_no_rendevous_barrier, data); + if (smp_tsc || !smp_tsc_adjust) { + smp_tsc_adjust = 0; + goto done; + } + smp_tsc = 1; + calc_smp_tsc_adj(data); + if (!smp_tsc) { + printf("SMP: TSC SMP adjustment failed\n"); + goto done; + } + smp_rendezvous(smp_no_rendevous_barrier, comp_smp_tsc, + smp_no_rendevous_barrier, data); + if (smp_tsc == 0) + printf("SMP: TSC SMP post-adjustment check failed\n"); +done: + if (!smp_tsc) + smp_tsc_adjust = 0; free(data, M_TEMP); if (bootverbose) - printf("SMP: %sed TSC synchronization test\n", - smp_tsc ? "pass" : "fail"); + printf("SMP: %sed TSC synchronization test%s\n", + smp_tsc ? "pass" : "fail", + smp_tsc_adjust ? " with adjustments" : ""); if (smp_tsc && tsc_is_invariant) { switch (cpu_vendor_id) { case CPU_VENDOR_AMD: @@ -468,7 +538,7 @@ static void init_TSC_tc(void) { uint64_t max_freq; - int shift; + int cpu, shift; if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled) return; @@ -524,20 +594,29 @@ init_TSC_tc(void) max_freq >>= tsc_shift; init: + if (mp_ncpus == 1) + smp_tsc_adjust = 0; for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++) ; + CPU_FOREACH(cpu) { + *DPCPU_ID_PTR(cpu, smp_tsc_adj) >>= shift; + DPCPU_ID_SET(cpu, smp_tsc_adj32, DPCPU_ID_GET(cpu, smp_tsc_adj)); + } if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) { if (cpu_vendor_id == CPU_VENDOR_AMD) { - tsc_timecounter.tc_get_timecount = shift > 0 ? + tsc_timecounter.tc_get_timecount = smp_tsc_adjust ? + tsc_get_timecount_adj_mfence : shift > 0 ? tsc_get_timecount_low_mfence : tsc_get_timecount_mfence; } else { - tsc_timecounter.tc_get_timecount = shift > 0 ? + tsc_timecounter.tc_get_timecount = smp_tsc_adjust ? + tsc_get_timecount_adj_lfence : shift > 0 ? tsc_get_timecount_low_lfence : tsc_get_timecount_lfence; } } else { - tsc_timecounter.tc_get_timecount = shift > 0 ? + tsc_timecounter.tc_get_timecount = smp_tsc_adjust ? + tsc_get_timecount_adj : shift > 0 ? tsc_get_timecount_low : tsc_get_timecount; } if (shift > 0) { @@ -653,6 +732,19 @@ tsc_get_timecount(struct timecounter *tc __unused) } static inline u_int +tsc_get_timecount_adj(struct timecounter *tc) +{ + uint32_t rv; + + critical_enter(); + __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" + : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx"); + rv += DPCPU_GET(smp_tsc_adj32); + critical_exit(); + return (rv); +} + +static inline u_int tsc_get_timecount_low(struct timecounter *tc) { uint32_t rv; @@ -671,6 +763,14 @@ tsc_get_timecount_lfence(struct timecounter *tc __ } static u_int +tsc_get_timecount_adj_lfence(struct timecounter *tc) +{ + + lfence(); + return (tsc_get_timecount_adj(tc)); +} + +static u_int tsc_get_timecount_low_lfence(struct timecounter *tc) { @@ -687,6 +787,14 @@ tsc_get_timecount_mfence(struct timecounter *tc __ } static u_int +tsc_get_timecount_adj_mfence(struct timecounter *tc) +{ + + mfence(); + return (tsc_get_timecount_adj(tc)); +} + +static u_int tsc_get_timecount_low_mfence(struct timecounter *tc) { @@ -700,7 +808,7 @@ cpu_fill_vdso_timehands(struct vdso_timehands *vds vdso_th->th_x86_shift = (int)(intptr_t)timecounter->tc_priv; bzero(vdso_th->th_res, sizeof(vdso_th->th_res)); - return (timecounter == &tsc_timecounter); + return (timecounter == &tsc_timecounter && !smp_tsc_adjust); } #ifdef COMPAT_FREEBSD32 @@ -710,6 +818,6 @@ cpu_fill_vdso_timehands32(struct vdso_timehands32 vdso_th32->th_x86_shift = (int)(intptr_t)timecounter->tc_priv; bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res)); - return (timecounter == &tsc_timecounter); + return (timecounter == &tsc_timecounter && !smp_tsc_adjust); } #endif