diff -ruNp --exclude compile sys.prev/amd64/amd64/machdep.c sys/amd64/amd64/machdep.c --- sys.prev/amd64/amd64/machdep.c 2010-09-03 02:18:44.000000000 +0300 +++ sys/amd64/amd64/machdep.c 2010-09-03 13:18:44.000000000 +0300 @@ -662,11 +662,13 @@ cpu_idle_amdc1e(int busy) if (sched_runnable()) enable_intr(); else { - uint64_t msr; + if (cpu_disable_deep_sleep) { + uint64_t msr; - msr = rdmsr(MSR_AMDK8_IPM); - if (msr & AMDK8_CMPHALT) - wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); + msr = rdmsr(MSR_AMDK8_IPM); + if (msr & AMDK8_CMPHALT) + wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); + } if (cpu_idle_hook) cpu_idle_hook(); @@ -678,6 +680,8 @@ cpu_idle_amdc1e(int busy) static void cpu_idle_spin(int busy) { + + cpu_spinwait(); return; } @@ -690,7 +694,19 @@ cpu_idle(int busy) if (mp_grab_cpu_hlt()) return; #endif + CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", + busy, curcpu); + if (!busy) { + critical_enter(); + cpu_idleclock(); + } cpu_idle_fn(busy); + if (!busy) { + cpu_activeclock(); + critical_exit(); + } + CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", + busy, curcpu); } /* diff -ruNp --exclude compile sys.prev/amd64/amd64/mp_machdep.c sys/amd64/amd64/mp_machdep.c --- sys.prev/amd64/amd64/mp_machdep.c 2010-09-03 02:18:44.000000000 +0300 +++ sys/amd64/amd64/mp_machdep.c 2010-09-03 22:44:42.000000000 +0300 @@ -118,7 +118,6 @@ u_long *ipi_invlcache_counts[MAXCPU]; u_long *ipi_rendezvous_counts[MAXCPU]; u_long *ipi_lazypmap_counts[MAXCPU]; static u_long *ipi_hardclock_counts[MAXCPU]; -static u_long *ipi_statclock_counts[MAXCPU]; #endif extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); @@ -1219,12 +1218,6 @@ ipi_bitmap_handler(struct trapframe fram #endif hardclockintr(&frame); } - if (ipi_bitmap & (1 << IPI_STATCLOCK)) { -#ifdef COUNT_IPIS - (*ipi_statclock_counts[cpu])++; -#endif - statclockintr(&frame); - } } /* @@ -1579,8 +1572,6 @@ mp_ipi_intrcnt(void *dummy) intrcnt_add(buf, &ipi_lazypmap_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:hardclock", i); intrcnt_add(buf, &ipi_hardclock_counts[i]); - snprintf(buf, sizeof(buf), "cpu%d:statclock", i); - intrcnt_add(buf, &ipi_statclock_counts[i]); } } SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL); diff -ruNp --exclude compile sys.prev/amd64/include/apicvar.h sys/amd64/include/apicvar.h --- sys.prev/amd64/include/apicvar.h 2010-09-03 02:18:42.000000000 +0300 +++ sys/amd64/include/apicvar.h 2010-09-03 22:44:25.000000000 +0300 @@ -123,8 +123,7 @@ #define IPI_AST 0 /* Generate software trap. */ #define IPI_PREEMPT 1 #define IPI_HARDCLOCK 2 -#define IPI_STATCLOCK 3 -#define IPI_BITMAP_LAST IPI_STATCLOCK +#define IPI_BITMAP_LAST IPI_HARDCLOCK #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */ diff -ruNp --exclude compile sys.prev/dev/acpica/acpi_cpu.c sys/dev/acpica/acpi_cpu.c --- sys.prev/dev/acpica/acpi_cpu.c 2010-09-03 02:16:33.000000000 +0300 +++ sys/dev/acpica/acpi_cpu.c 2010-09-03 13:18:44.000000000 +0300 @@ -900,7 +900,13 @@ acpi_cpu_idle() /* Find the lowest state that has small enough latency. */ cx_next_idx = 0; - for (i = sc->cpu_cx_lowest; i >= 0; i--) { +#ifndef __ia64__ + if (cpu_disable_deep_sleep) + i = sc->cpu_non_c3; + else +#endif + i = sc->cpu_cx_lowest; + for (; i >= 0; i--) { if (sc->cpu_cx_states[i].trans_lat * 3 <= sc->cpu_prev_sleep) { cx_next_idx = i; break; @@ -929,15 +935,17 @@ acpi_cpu_idle() /* * Execute HLT (or equivalent) and wait for an interrupt. We can't * precisely calculate the time spent in C1 since the place we wake up - * is an ISR. Assume we slept no more then half of quantum. + * is an ISR. Assume we slept no more then half of quantum, unless + * we are called inside critical section, delaying context switch. */ if (cx_next->type == ACPI_STATE_C1) { AcpiHwRead(&start_time, &AcpiGbl_FADT.XPmTimerBlock); acpi_cpu_c1(); AcpiHwRead(&end_time, &AcpiGbl_FADT.XPmTimerBlock); - end_time = acpi_TimerDelta(end_time, start_time); - sc->cpu_prev_sleep = (sc->cpu_prev_sleep * 3 + - min(PM_USEC(end_time), 500000 / hz)) / 4; + end_time = PM_USEC(acpi_TimerDelta(end_time, start_time)); + if (curthread->td_critnest == 0) + end_time = min(end_time, 500000 / hz); + sc->cpu_prev_sleep = (sc->cpu_prev_sleep * 3 + end_time) / 4; return; } diff -ruNp --exclude compile sys.prev/dev/acpica/acpi_hpet.c sys/dev/acpica/acpi_hpet.c --- sys.prev/dev/acpica/acpi_hpet.c 2010-09-03 02:16:33.000000000 +0300 +++ sys/dev/acpica/acpi_hpet.c 2010-09-03 13:18:44.000000000 +0300 @@ -612,15 +645,15 @@ hpet_detach(device_t dev) static int hpet_suspend(device_t dev) { - struct hpet_softc *sc; +// struct hpet_softc *sc; /* * Disable the timer during suspend. The timer will not lose * its state in S1 or S2, but we are required to disable * it. */ - sc = device_get_softc(dev); - hpet_disable(sc); +// sc = device_get_softc(dev); +// hpet_disable(sc); return (0); } diff -ruNp --exclude compile sys.prev/i386/i386/machdep.c sys/i386/i386/machdep.c --- sys.prev/i386/i386/machdep.c 2010-09-03 02:17:59.000000000 +0300 +++ sys/i386/i386/machdep.c 2010-09-03 21:42:54.000000000 +0300 @@ -1281,17 +1281,21 @@ cpu_probe_amdc1e(void) static void cpu_idle_amdc1e(int busy) { + uint64_t msr; disable_intr(); if (sched_runnable()) enable_intr(); else { - uint64_t msr; - - msr = rdmsr(MSR_AMDK8_IPM); - if (msr & AMDK8_CMPHALT) - wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); - +#ifndef XEN + if (cpu_disable_deep_sleep) { +#endif + msr = rdmsr(MSR_AMDK8_IPM); + if (msr & AMDK8_CMPHALT) + wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); +#ifndef XEN + } +#endif if (cpu_idle_hook) cpu_idle_hook(); else @@ -1302,6 +1306,8 @@ cpu_idle_amdc1e(int busy) static void cpu_idle_spin(int busy) { + + cpu_spinwait(); return; } @@ -1318,7 +1324,23 @@ cpu_idle(int busy) if (mp_grab_cpu_hlt()) return; #endif + CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", + busy, curcpu); +#ifndef XEN + if (!busy) { + critical_enter(); + cpu_idleclock(); + } +#endif cpu_idle_fn(busy); +#ifndef XEN + if (!busy) { + cpu_activeclock(); + critical_exit(); + } +#endif + CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", + busy, curcpu); } /* diff -ruNp --exclude compile sys.prev/i386/i386/mp_machdep.c sys/i386/i386/mp_machdep.c --- sys.prev/i386/i386/mp_machdep.c 2010-09-03 02:17:59.000000000 +0300 +++ sys/i386/i386/mp_machdep.c 2010-09-03 22:43:25.000000000 +0300 @@ -167,7 +167,6 @@ u_long *ipi_invlcache_counts[MAXCPU]; u_long *ipi_rendezvous_counts[MAXCPU]; u_long *ipi_lazypmap_counts[MAXCPU]; static u_long *ipi_hardclock_counts[MAXCPU]; -static u_long *ipi_statclock_counts[MAXCPU]; #endif /* @@ -1307,12 +1306,6 @@ ipi_bitmap_handler(struct trapframe fram #endif hardclockintr(&frame); } - if (ipi_bitmap & (1 << IPI_STATCLOCK)) { -#ifdef COUNT_IPIS - (*ipi_statclock_counts[cpu])++; -#endif - statclockintr(&frame); - } } /* @@ -1627,8 +1620,6 @@ mp_ipi_intrcnt(void *dummy) intrcnt_add(buf, &ipi_lazypmap_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:hardclock", i); intrcnt_add(buf, &ipi_hardclock_counts[i]); - snprintf(buf, sizeof(buf), "cpu%d:statclock", i); - intrcnt_add(buf, &ipi_statclock_counts[i]); } } SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL); diff -ruNp --exclude compile sys.prev/i386/include/apicvar.h sys/i386/include/apicvar.h --- sys.prev/i386/include/apicvar.h 2010-09-03 02:17:57.000000000 +0300 +++ sys/i386/include/apicvar.h 2010-09-03 22:44:06.000000000 +0300 @@ -124,8 +124,7 @@ #define IPI_AST 0 /* Generate software trap. */ #define IPI_PREEMPT 1 #define IPI_HARDCLOCK 2 -#define IPI_STATCLOCK 3 -#define IPI_BITMAP_LAST IPI_STATCLOCK +#define IPI_BITMAP_LAST IPI_HARDCLOCK #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */ @@ -152,8 +151,7 @@ #define IPI_AST 0 /* Generate software trap. */ #define IPI_PREEMPT 1 #define IPI_HARDCLOCK 2 -#define IPI_STATCLOCK 3 -#define IPI_BITMAP_LAST IPI_STATCLOCK +#define IPI_BITMAP_LAST IPI_HARDCLOCK #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */ diff -ruNp --exclude compile sys.prev/kern/kern_clock.c sys/kern/kern_clock.c --- sys.prev/kern/kern_clock.c 2010-09-03 02:15:53.000000000 +0300 +++ sys/kern/kern_clock.c 2010-09-03 13:18:44.000000000 +0300 @@ -373,12 +373,6 @@ int profprocs; int ticks; int psratio; -int timer1hz; -int timer2hz; -static DPCPU_DEFINE(u_int, hard_cnt); -static DPCPU_DEFINE(u_int, stat_cnt); -static DPCPU_DEFINE(u_int, prof_cnt); - /* * Initialize clock frequencies and start both clocks running. */ @@ -408,52 +402,6 @@ initclocks(dummy) #endif } -void -timer1clock(int usermode, uintfptr_t pc) -{ - u_int *cnt; - - cnt = DPCPU_PTR(hard_cnt); - *cnt += hz; - if (*cnt >= timer1hz) { - *cnt -= timer1hz; - if (*cnt >= timer1hz) - *cnt = 0; - if (PCPU_GET(cpuid) == 0) - hardclock(usermode, pc); - else - hardclock_cpu(usermode); - } - if (timer2hz == 0) - timer2clock(usermode, pc); -} - -void -timer2clock(int usermode, uintfptr_t pc) -{ - u_int *cnt; - int t2hz = timer2hz ? timer2hz : timer1hz; - - cnt = DPCPU_PTR(stat_cnt); - *cnt += stathz; - if (*cnt >= t2hz) { - *cnt -= t2hz; - if (*cnt >= t2hz) - *cnt = 0; - statclock(usermode); - } - if (profprocs == 0) - return; - cnt = DPCPU_PTR(prof_cnt); - *cnt += profhz; - if (*cnt >= t2hz) { - *cnt -= t2hz; - if (*cnt >= t2hz) - *cnt = 0; - profclock(usermode, pc); - } -} - /* * Each time the real-time timer fires, this function is called on all CPUs. * Note that hardclock() calls hardclock_cpu() for the boot CPU, so only @@ -494,6 +442,8 @@ hardclock_cpu(int usermode) if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); #endif + PCPU_SET(idlecallsprev, PCPU_GET(idlecalls)); + PCPU_SET(idlecalls, 0); callout_tick(); } diff -ruNp --exclude compile sys.prev/kern/kern_clocksource.c sys/kern/kern_clocksource.c --- sys.prev/kern/kern_clocksource.c 2010-09-03 02:15:53.000000000 +0300 +++ sys/kern/kern_clocksource.c 2010-09-03 23:23:30.000000000 +0300 @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD: head/sys/kern/kern_c #include #include #include +#include #include #include #include @@ -59,28 +60,61 @@ __FBSDID("$FreeBSD: head/sys/kern/kern_c cyclic_clock_func_t cyclic_clock_func[MAXCPU]; #endif -static void cpu_restartclocks(void); -static void timercheck(void); -inline static int doconfigtimer(int i); -static void configtimer(int i); - -static struct eventtimer *timer[2] = { NULL, NULL }; -static int timertest = 0; -static int timerticks[2] = { 0, 0 }; -static int profiling_on = 0; -static struct bintime timerperiod[2]; - -static char timername[2][32]; -TUNABLE_STR("kern.eventtimer.timer1", timername[0], sizeof(*timername)); -TUNABLE_STR("kern.eventtimer.timer2", timername[1], sizeof(*timername)); +int cpu_disable_deep_sleep = 0; /* Timer dies in C3. */ -static u_int singlemul = 0; +static void setuptimer(void); +static void loadtimer(struct bintime *now, int first); +static int doconfigtimer(void); +static void configtimer(int start); +static int round_freq(struct eventtimer *et, int freq); + +static void getnextcpuevent(struct bintime *event, int idle); +static void getnextevent(struct bintime *event); +static int handleevents(struct bintime *now, int fake); + +static struct mtx et_hw_mtx; +MTX_SYSINIT(et_hw_init, &et_hw_mtx, "et_hw_mtx", MTX_SPIN); +#define ET_HW_LOCK() mtx_lock_spin(&et_hw_mtx) +#define ET_HW_UNLOCK() mtx_unlock_spin(&et_hw_mtx) + +static struct eventtimer *timer = NULL; +static struct bintime timerperiod; /* Timer period for periodic mode. */ +static struct bintime hardperiod; /* hardclock() events period. */ +static struct bintime statperiod; /* statclock() events period. */ +static struct bintime profperiod; /* profclock() events period. */ +static struct bintime nexttick; /* Last tick time. */ +static u_int busy = 0; /* Reconfiguration is in progress. */ +static int profiling = 0; /* Profiling events enabled. */ + +static char timername[32]; /* Wanted timer. */ +TUNABLE_STR("kern.eventtimer.timer", timername, sizeof(timername)); + +static u_int singlemul = 0; /* Multiplier for periodic mode. */ TUNABLE_INT("kern.eventtimer.singlemul", &singlemul); SYSCTL_INT(_kern_eventtimer, OID_AUTO, singlemul, CTLFLAG_RW, &singlemul, - 0, "Multiplier, used in single timer mode"); + 0, "Multiplier for periodic mode"); + +static u_int idletick = 0; /* Idle mode allowed. */ +TUNABLE_INT("kern.eventtimer.idletick", &singlemul); +SYSCTL_INT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RW, &idletick, + 0, "Run periodic events when idle"); + +static int periodic = 0; /* Periodic or one-shot mode. */ +TUNABLE_INT("kern.eventtimer.periodic", &periodic); + +struct pcpu_state { + u_int action; /* Reconfiguration resuests. */ + struct bintime now; /* Last tick time. */ + struct bintime nextevent; /* Next scheduled event on this CPU. */ + struct bintime nexttick; /* Next timer tick time. */ + struct bintime nexthard; /* Next hardlock() event. */ + struct bintime nextstat; /* Next statclock() event. */ + struct bintime nextprof; /* Next profclock() event. */ + int ipi; /* This CPU needs IPI. */ + int idle; /* This CPU is in idle mode. */ +}; -typedef u_int tc[2]; -static DPCPU_DEFINE(tc, configtimer); +static DPCPU_DEFINE(struct pcpu_state, timerstate); #define FREQ2BT(freq, bt) \ { \ @@ -91,159 +125,305 @@ static DPCPU_DEFINE(tc, configtimer); (((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) / \ ((bt)->frac >> 1)) -/* Per-CPU timer1 handler. */ -static int -hardclockhandler(struct trapframe *frame) +/* + * Timer broadcast IPI handler. + */ +int +hardclockintr(struct trapframe *frame) { + struct trapframe *oldframe; + struct pcpu_state *state; + struct thread *td; + int done; + if (doconfigtimer()) + return (FILTER_HANDLED); + critical_enter(); + td = curthread; + td->td_intr_nesting_level++; + oldframe = td->td_intr_frame; + td->td_intr_frame = frame; + state = DPCPU_PTR(timerstate); + CTR4(KTR_SPARE2, "ipi at %d: now %d.%08x%08x", + curcpu, state->now.sec, (unsigned int)(state->now.frac >> 32), + (unsigned int)(state->now.frac & 0xffffffff)); + done = handleevents(&state->now, 0); + td->td_intr_frame = oldframe; + td->td_intr_nesting_level--; + critical_exit(); + return (done ? FILTER_HANDLED : FILTER_STRAY); +} + +/* + * Handle all events for specified time on this CPU + */ +static int +handleevents(struct bintime *now, int fake) +{ + struct bintime t; + struct trapframe *frame; + struct pcpu_state *state; + uintfptr_t pc; + int usermode; + int done; + + CTR4(KTR_SPARE2, "handle at %d: now %d.%08x%08x", + curcpu, now->sec, (unsigned int)(now->frac >> 32), + (unsigned int)(now->frac & 0xffffffff)); + done = 0; + if (fake) { + frame = NULL; + usermode = 0; + pc = 0; + } else { + frame = curthread->td_intr_frame; + usermode = TRAPF_USERMODE(frame); + pc = TRAPF_PC(frame); + } #ifdef KDTRACE_HOOKS /* * If the DTrace hooks are configured and a callback function * has been registered, then call it to process the high speed * timers. */ - int cpu = curcpu; - if (cyclic_clock_func[cpu] != NULL) - (*cyclic_clock_func[cpu])(frame); + if (!fake && cyclic_clock_func[curcpu] != NULL) + (*cyclic_clock_func[curcpu])(frame); #endif - - timer1clock(TRAPF_USERMODE(frame), TRAPF_PC(frame)); - return (FILTER_HANDLED); -} - -/* Per-CPU timer2 handler. */ -static int -statclockhandler(struct trapframe *frame) -{ - - timer2clock(TRAPF_USERMODE(frame), TRAPF_PC(frame)); - return (FILTER_HANDLED); -} - -/* timer1 broadcast IPI handler. */ -int -hardclockintr(struct trapframe *frame) -{ - - if (doconfigtimer(0)) - return (FILTER_HANDLED); - return (hardclockhandler(frame)); + state = DPCPU_PTR(timerstate); + while (bintime_cmp(now, &state->nexthard, >=)) { + if (curcpu == 0) + hardclock(usermode, pc); + else + hardclock_cpu(usermode); + bintime_add(&state->nexthard, &hardperiod); + done = 1; + } + while (bintime_cmp(now, &state->nextstat, >=)) { + statclock(usermode); + bintime_add(&state->nextstat, &statperiod); + done = 1; + } + if (profiling && !fake) { + while (bintime_cmp(now, &state->nextprof, >=)) { + profclock(usermode, pc); + bintime_add(&state->nextprof, &profperiod); + done = 1; + } + } + getnextcpuevent(&t, 0); + ET_HW_LOCK(); + state->idle = 0; + state->nextevent = t; + if (!busy) + loadtimer(now, 0); + ET_HW_UNLOCK(); + return (done); } -/* timer2 broadcast IPI handler. */ -int -statclockintr(struct trapframe *frame) +/* + * Schedule binuptime of the next event on current CPU. + */ +static void +getnextcpuevent(struct bintime *event, int idle) { - - if (doconfigtimer(1)) - return (FILTER_HANDLED); - return (statclockhandler(frame)); + struct bintime tmp; + struct pcpu_state *state; + int skip; + + state = DPCPU_PTR(timerstate); + *event = state->nexthard; + if (idle) { /* If CPU is idle - ask callouts for how long. */ + skip = callout_cpuidleticks(); + CTR2(KTR_SPARE2, "skip at %d: %d", curcpu, skip); + tmp = hardperiod; + bintime_mul(&tmp, skip); + bintime_add(event, &tmp); + } else { /* If CPU is active - handle all types of events. */ + if (bintime_cmp(event, &state->nextstat, >)) + *event = state->nextstat; + if (profiling && + bintime_cmp(event, &state->nextprof, >)) + *event = state->nextprof; + } } -/* timer1 callback. */ +/* + * Schedule binuptime of the next event on all CPUs. + */ static void -timer1cb(struct eventtimer *et, void *arg) +getnextevent(struct bintime *event) { - + struct pcpu_state *state; #ifdef SMP - /* Broadcast interrupt to other CPUs for non-per-CPU timers */ - if (smp_started && (et->et_flags & ET_FLAGS_PERCPU) == 0) - ipi_all_but_self(IPI_HARDCLOCK); + int cpu; #endif - if (timertest) { - if ((et->et_flags & ET_FLAGS_PERCPU) == 0 || curcpu == 0) { - timerticks[0]++; - if (timerticks[0] >= timer1hz) { - ET_LOCK(); - timercheck(); - ET_UNLOCK(); + int c; + + state = DPCPU_PTR(timerstate); + *event = state->nextevent; + c = curcpu; +#ifdef SMP + if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) { + CPU_FOREACH(cpu) { + if (curcpu == cpu) + continue; + state = DPCPU_ID_PTR(cpu, timerstate); + if (bintime_cmp(event, &state->nextevent, >)) { + *event = state->nextevent; + c = cpu; } } } - hardclockhandler(curthread->td_intr_frame); +#endif + CTR5(KTR_SPARE2, "next at %d: next %d.%08x%08x by %d", + curcpu, event->sec, (unsigned int)(event->frac >> 32), + (unsigned int)(event->frac & 0xffffffff), c); } -/* timer2 callback. */ +/* Hardware timer callback function. */ static void -timer2cb(struct eventtimer *et, void *arg) +timercb(struct eventtimer *et, void *arg) { + struct bintime now; + struct bintime *next; + struct pcpu_state *state; +#ifdef SMP + int cpu, bcast; +#endif + + /* Update present and next tick times. */ + state = DPCPU_PTR(timerstate); + if (et->et_flags & ET_FLAGS_PERCPU) { + next = &state->nexttick; + } else + next = &nexttick; + if (periodic) { + now = *next; /* Ex-next tick time becomes present time. */ + bintime_add(next, &timerperiod); /* Next tick in 1 period. */ + } else { + binuptime(&now); /* Get present time from hardware. */ + next->sec = -1; /* Next tick is not scheduled yet. */ + } + state->now = now; + CTR4(KTR_SPARE2, "intr at %d: now %d.%08x%08x", + curcpu, now.sec, (unsigned int)(now.frac >> 32), + (unsigned int)(now.frac & 0xffffffff)); #ifdef SMP - /* Broadcast interrupt to other CPUs for non-per-CPU timers */ - if (smp_started && (et->et_flags & ET_FLAGS_PERCPU) == 0) - ipi_all_but_self(IPI_STATCLOCK); + /* Prepare broadcasting to other CPUs for non-per-CPU timers. */ + bcast = 0; + if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) { + ET_HW_LOCK(); + CPU_FOREACH(cpu) { + if (curcpu == cpu) + continue; + state = DPCPU_ID_PTR(cpu, timerstate); + state->now = now; + if (bintime_cmp(&now, &state->nextevent, >=)) { + state->nextevent.sec++; + state->ipi = 1; + bcast = 1; + } + } + ET_HW_UNLOCK(); + } #endif - if (timertest) { - if ((et->et_flags & ET_FLAGS_PERCPU) == 0 || curcpu == 0) { - timerticks[1]++; - if (timerticks[1] >= timer2hz * 2) { - ET_LOCK(); - timercheck(); - ET_UNLOCK(); + + /* Handle events for this time on this CPU. */ + handleevents(&now, 0); + +#ifdef SMP + /* Broadcast interrupt to other CPUs for non-per-CPU timers. */ + if (bcast) { + CPU_FOREACH(cpu) { + if (curcpu == cpu) + continue; + state = DPCPU_ID_PTR(cpu, timerstate); + if (state->ipi) { + state->ipi = 0; + ipi_cpu(cpu, IPI_HARDCLOCK); } } } - statclockhandler(curthread->td_intr_frame); +#endif } /* - * Check that both timers are running with at least 1/4 of configured rate. - * If not - replace the broken one. + * Load new value into hardware timer. */ static void -timercheck(void) +loadtimer(struct bintime *now, int start) { - - if (!timertest) - return; - timertest = 0; - if (timerticks[0] * 4 < timer1hz) { - printf("Event timer \"%s\" is dead.\n", timer[0]->et_name); - timer1hz = 0; - configtimer(0); - et_ban(timer[0]); - et_free(timer[0]); - timer[0] = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC); - if (timer[0] == NULL) { - timer2hz = 0; - configtimer(1); - et_free(timer[1]); - timer[1] = NULL; - timer[0] = timer[1]; + struct pcpu_state *state; + struct bintime new; + struct bintime *next; + int eq; + + if (periodic) { + if (start) + et_start(timer, NULL, &timerperiod); + } else { + if (timer->et_flags & ET_FLAGS_PERCPU) { + state = DPCPU_PTR(timerstate); + next = &state->nexttick; + } else + next = &nexttick; + getnextevent(&new); + eq = bintime_cmp(&new, next, ==); + CTR5(KTR_SPARE2, "load at %d: next %d.%08x%08x eq %d", + curcpu, new.sec, (unsigned int)(new.frac >> 32), + (unsigned int)(new.frac & 0xffffffff), + eq); + if (!eq) { + *next = new; + bintime_sub(&new, now); + et_start(timer, &new, NULL); } - et_init(timer[0], timer1cb, NULL, NULL); - cpu_restartclocks(); - return; - } - if (timerticks[1] * 4 < timer2hz) { - printf("Event timer \"%s\" is dead.\n", timer[1]->et_name); - timer2hz = 0; - configtimer(1); - et_ban(timer[1]); - et_free(timer[1]); - timer[1] = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC); - if (timer[1] != NULL) - et_init(timer[1], timer2cb, NULL, NULL); - cpu_restartclocks(); - return; } } /* + * Prepare event timer parameters after configuration changes. + */ +static void +setuptimer(void) +{ + int freq; + + if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0) + periodic = 0; + else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0) + periodic = 1; + freq = hz * singlemul; + while (freq < (profiling ? profhz : stathz)) + freq += hz; + freq = round_freq(timer, freq); + FREQ2BT(freq, &timerperiod); +} + +/* * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler. */ -inline static int -doconfigtimer(int i) +static int +doconfigtimer(void) { - tc *conf; + struct bintime now; + struct pcpu_state *state; - conf = DPCPU_PTR(configtimer); - if (atomic_load_acq_int(*conf + i)) { - if (i == 0 ? timer1hz : timer2hz) - et_start(timer[i], NULL, &timerperiod[i]); - else - et_stop(timer[i]); - atomic_store_rel_int(*conf + i, 0); + state = DPCPU_PTR(timerstate); + switch (atomic_load_acq_int(&state->action)) { + case 1: + binuptime(&now); + ET_HW_LOCK(); + loadtimer(&now, 1); + ET_HW_UNLOCK(); + atomic_store_rel_int(&state->action, 0); + return (1); + case 2: + ET_HW_LOCK(); + et_stop(timer); + ET_HW_UNLOCK(); + atomic_store_rel_int(&state->action, 0); return (1); } return (0); @@ -254,45 +434,77 @@ doconfigtimer(int i) * For per-CPU timers use IPI to make other CPUs to reconfigure. */ static void -configtimer(int i) +configtimer(int start) { -#ifdef SMP - tc *conf; + struct bintime now, next; + struct pcpu_state *state; int cpu; + if (start) + setuptimer(); critical_enter(); -#endif - /* Start/stop global timer or per-CPU timer of this CPU. */ - if (i == 0 ? timer1hz : timer2hz) - et_start(timer[i], NULL, &timerperiod[i]); - else - et_stop(timer[i]); + ET_HW_LOCK(); + if (start) { + /* Initialize time machine parameters. */ + binuptime(&now); + next = now; + bintime_add(&next, &timerperiod); + if (periodic) + nexttick = next; + else + nexttick.sec = -1; + CPU_FOREACH(cpu) { + state = DPCPU_ID_PTR(cpu, timerstate); + state->now = now; + state->nextevent = next; + if (periodic) + state->nexttick = next; + else + state->nexttick.sec = -1; + state->nexthard = next; + state->nextstat = next; + state->nextprof = next; + } + /* Start global timer or per-CPU timer of this CPU. */ + loadtimer(&now, 1); + busy = 0; + } else { + busy = 1; + /* Stop global timer or per-CPU timer of this CPU. */ + et_stop(timer); + } + ET_HW_UNLOCK(); #ifdef SMP - if ((timer[i]->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) { + /* If timer is global of there is no other CPUs yet - we are done. */ + if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) { critical_exit(); return; } /* Set reconfigure flags for other CPUs. */ CPU_FOREACH(cpu) { - conf = DPCPU_ID_PTR(cpu, configtimer); - atomic_store_rel_int(*conf + i, (cpu == curcpu) ? 0 : 1); + state = DPCPU_ID_PTR(cpu, timerstate); + atomic_store_rel_int(&state->action, + (cpu == curcpu) ? 0 : ( start ? 1 : 2)); } - /* Send reconfigure IPI. */ - ipi_all_but_self(i == 0 ? IPI_HARDCLOCK : IPI_STATCLOCK); + /* Broadcast reconfigure IPI. */ + ipi_all_but_self(IPI_HARDCLOCK); /* Wait for reconfiguration completed. */ restart: cpu_spinwait(); CPU_FOREACH(cpu) { if (cpu == curcpu) continue; - conf = DPCPU_ID_PTR(cpu, configtimer); - if (atomic_load_acq_int(*conf + i)) + state = DPCPU_ID_PTR(cpu, timerstate); + if (atomic_load_acq_int(&state->action)) goto restart; } - critical_exit(); #endif + critical_exit(); } +/* + * Calculate nearest frequency supported by hardware timer. + */ static int round_freq(struct eventtimer *et, int freq) { @@ -314,23 +526,40 @@ round_freq(struct eventtimer *et, int fr } /* - * Configure and start event timers. + * Configure and start event timers (BSP part). */ void cpu_initclocks_bsp(void) { int base, div; - timer[0] = et_find(timername[0], ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC); - if (timer[0] == NULL) - timer[0] = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC); - if (timer[0] == NULL) + /* Grab requested timer or the best of present. */ + if (timername[0]) + timer = et_find(timername, 0, 0); + if (timer == NULL && periodic) { + timer = et_find(NULL, + ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC); + } + if (timer == NULL) { + timer = et_find(NULL, + ET_FLAGS_ONESHOT, ET_FLAGS_ONESHOT); + } + if (timer == NULL && !periodic) { + timer = et_find(NULL, + ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC); + } + if (timer == NULL) panic("No usable event timer found!"); - et_init(timer[0], timer1cb, NULL, NULL); - timer[1] = et_find(timername[1][0] ? timername[1] : NULL, - ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC); - if (timer[1]) - et_init(timer[1], timer2cb, NULL, NULL); + et_init(timer, timercb, NULL, NULL); + + /* Adapt to timer capabilities. */ + if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0) + periodic = 0; + else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0) + periodic = 1; + if (timer->et_flags & ET_FLAGS_C3STOP) + cpu_disable_deep_sleep++; + /* * We honor the requested 'hz' value. * We want to run stathz in the neighborhood of 128hz. @@ -344,8 +573,8 @@ cpu_initclocks_bsp(void) else singlemul = 4; } - if (timer[1] == NULL) { - base = round_freq(timer[0], hz * singlemul); + if (periodic) { + base = round_freq(timer, hz * singlemul); singlemul = max((base + hz / 2) / hz, 1); hz = (base + singlemul / 2) / singlemul; if (base <= 128) @@ -359,175 +588,186 @@ cpu_initclocks_bsp(void) profhz = stathz; while ((profhz + stathz) <= 128 * 64) profhz += stathz; - profhz = round_freq(timer[0], profhz); + profhz = round_freq(timer, profhz); } else { - hz = round_freq(timer[0], hz); - stathz = round_freq(timer[1], 127); - profhz = round_freq(timer[1], stathz * 64); + hz = round_freq(timer, hz); + stathz = round_freq(timer, 127); + profhz = round_freq(timer, stathz * 64); } tick = 1000000 / hz; + FREQ2BT(hz, &hardperiod); + FREQ2BT(stathz, &statperiod); + FREQ2BT(profhz, &profperiod); ET_LOCK(); - cpu_restartclocks(); + configtimer(1); ET_UNLOCK(); } -/* Start per-CPU event timers on APs. */ +/* + * Start per-CPU event timers on APs. + */ void cpu_initclocks_ap(void) { + struct bintime now; - ET_LOCK(); - if (timer[0]->et_flags & ET_FLAGS_PERCPU) - et_start(timer[0], NULL, &timerperiod[0]); - if (timer[1] && timer[1]->et_flags & ET_FLAGS_PERCPU) - et_start(timer[1], NULL, &timerperiod[1]); - ET_UNLOCK(); -} - -/* Reconfigure and restart event timers after configuration changes. */ -static void -cpu_restartclocks(void) -{ - - /* Stop all event timers. */ - timertest = 0; - if (timer1hz) { - timer1hz = 0; - configtimer(0); - } - if (timer[1] && timer2hz) { - timer2hz = 0; - configtimer(1); - } - /* Calculate new event timers parameters. */ - if (timer[1] == NULL) { - timer1hz = hz * singlemul; - while (timer1hz < (profiling_on ? profhz : stathz)) - timer1hz += hz; - timer2hz = 0; - } else { - timer1hz = hz; - timer2hz = profiling_on ? profhz : stathz; - timer2hz = round_freq(timer[1], timer2hz); - } - timer1hz = round_freq(timer[0], timer1hz); - printf("Starting kernel event timers: %s @ %dHz, %s @ %dHz\n", - timer[0]->et_name, timer1hz, - timer[1] ? timer[1]->et_name : "NONE", timer2hz); - /* Restart event timers. */ - FREQ2BT(timer1hz, &timerperiod[0]); - configtimer(0); - if (timer[1]) { - timerticks[0] = 0; - timerticks[1] = 0; - FREQ2BT(timer2hz, &timerperiod[1]); - configtimer(1); - timertest = 1; + if (timer->et_flags & ET_FLAGS_PERCPU) { + binuptime(&now); + ET_HW_LOCK(); + loadtimer(&now, 1); + ET_HW_UNLOCK(); } } -/* Switch to profiling clock rates. */ +/* + * Switch to profiling clock rates. + */ void cpu_startprofclock(void) { ET_LOCK(); - profiling_on = 1; - cpu_restartclocks(); + configtimer(0); + profiling = 1; + configtimer(1); ET_UNLOCK(); } -/* Switch to regular clock rates. */ +/* + * Switch to regular clock rates. + */ void cpu_stopprofclock(void) { ET_LOCK(); - profiling_on = 0; - cpu_restartclocks(); + configtimer(0); + profiling = 0; + configtimer(1); ET_UNLOCK(); } -/* Report or change the active event timers hardware. */ +/* + * Switch to idle mode (all ticks handled). + */ +void +cpu_idleclock(void) +{ + struct bintime now, t; + struct pcpu_state *state; + + if (idletick || busy || + (periodic && (timer->et_flags & ET_FLAGS_PERCPU))) + return; + state = DPCPU_PTR(timerstate); + if (periodic) + now = state->now; + else + binuptime(&now); + CTR4(KTR_SPARE2, "idle at %d: now %d.%08x%08x", + curcpu, now.sec, (unsigned int)(now.frac >> 32), + (unsigned int)(now.frac & 0xffffffff)); + getnextcpuevent(&t, 1); + ET_HW_LOCK(); + state->idle = 1; + state->nextevent = t; + if (!periodic) + loadtimer(&now, 0); + ET_HW_UNLOCK(); +} + +/* + * Switch to active mode (skip empty ticks). + */ +void +cpu_activeclock(void) +{ + struct bintime now; + struct pcpu_state *state; + struct thread *td; + + state = DPCPU_PTR(timerstate); + if (state->idle == 0) + return; + if (periodic) + now = state->now; + else + binuptime(&now); + CTR4(KTR_SPARE2, "active at %d: now %d.%08x%08x", + curcpu, now.sec, (unsigned int)(now.frac >> 32), + (unsigned int)(now.frac & 0xffffffff)); + spinlock_enter(); + td = curthread; + td->td_intr_nesting_level++; + handleevents(&now, 1); + td->td_intr_nesting_level--; + spinlock_exit(); +} + +/* + * Report or change the active event timers hardware. + */ static int -sysctl_kern_eventtimer_timer1(SYSCTL_HANDLER_ARGS) +sysctl_kern_eventtimer_timer(SYSCTL_HANDLER_ARGS) { char buf[32]; struct eventtimer *et; int error; ET_LOCK(); - et = timer[0]; + et = timer; snprintf(buf, sizeof(buf), "%s", et->et_name); ET_UNLOCK(); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); ET_LOCK(); - et = timer[0]; + et = timer; if (error != 0 || req->newptr == NULL || - strcmp(buf, et->et_name) == 0) { + strcasecmp(buf, et->et_name) == 0) { ET_UNLOCK(); return (error); } - et = et_find(buf, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC); + et = et_find(buf, 0, 0); if (et == NULL) { ET_UNLOCK(); return (ENOENT); } - timer1hz = 0; configtimer(0); - et_free(timer[0]); - timer[0] = et; - et_init(timer[0], timer1cb, NULL, NULL); - cpu_restartclocks(); + et_free(timer); + if (et->et_flags & ET_FLAGS_C3STOP) + cpu_disable_deep_sleep++; + if (timer->et_flags & ET_FLAGS_C3STOP) + cpu_disable_deep_sleep--; + timer = et; + et_init(timer, timercb, NULL, NULL); + configtimer(1); ET_UNLOCK(); return (error); } -SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer1, +SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, - 0, 0, sysctl_kern_eventtimer_timer1, "A", "Primary event timer"); + 0, 0, sysctl_kern_eventtimer_timer, "A", "Kernel event timer"); +/* + * Report or change the active event timer periodicity. + */ static int -sysctl_kern_eventtimer_timer2(SYSCTL_HANDLER_ARGS) +sysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS) { - char buf[32]; - struct eventtimer *et; - int error; + int error, val; - ET_LOCK(); - et = timer[1]; - if (et == NULL) - snprintf(buf, sizeof(buf), "NONE"); - else - snprintf(buf, sizeof(buf), "%s", et->et_name); - ET_UNLOCK(); - error = sysctl_handle_string(oidp, buf, sizeof(buf), req); - ET_LOCK(); - et = timer[1]; - if (error != 0 || req->newptr == NULL || - strcmp(buf, et ? et->et_name : "NONE") == 0) { - ET_UNLOCK(); + val = periodic; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || req->newptr == NULL) return (error); - } - et = et_find(buf, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC); - if (et == NULL && strcasecmp(buf, "NONE") != 0) { - ET_UNLOCK(); - return (ENOENT); - } - if (timer[1] != NULL) { - timer2hz = 0; - configtimer(1); - et_free(timer[1]); - } - timer[1] = et; - if (timer[1] != NULL) - et_init(timer[1], timer2cb, NULL, NULL); - cpu_restartclocks(); + ET_LOCK(); + configtimer(0); + periodic = val; + configtimer(1); ET_UNLOCK(); return (error); } -SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer2, - CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, - 0, 0, sysctl_kern_eventtimer_timer2, "A", "Secondary event timer"); +SYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, + 0, 0, sysctl_kern_eventtimer_periodic, "I", "Kernel event timer periodic"); #endif - diff -ruNp --exclude compile sys.prev/kern/kern_et.c sys/kern/kern_et.c --- sys.prev/kern/kern_et.c 2010-09-03 02:15:53.000000000 +0300 +++ sys/kern/kern_et.c 2010-09-03 13:18:44.000000000 +0300 @@ -38,7 +38,7 @@ SLIST_HEAD(et_eventtimers_list, eventtim static struct et_eventtimers_list eventtimers = SLIST_HEAD_INITIALIZER(et_eventtimers); struct mtx et_eventtimers_mtx; -MTX_SYSINIT(et_eventtimers_init, &et_eventtimers_mtx, "et_mtx", MTX_SPIN); +MTX_SYSINIT(et_eventtimers_init, &et_eventtimers_mtx, "et_mtx", MTX_DEF); SYSCTL_NODE(_kern, OID_AUTO, eventtimer, CTLFLAG_RW, 0, "Event timers"); SYSCTL_NODE(_kern_eventtimer, OID_AUTO, et, CTLFLAG_RW, 0, ""); diff -ruNp --exclude compile sys.prev/kern/kern_timeout.c sys/kern/kern_timeout.c --- sys.prev/kern/kern_timeout.c 2010-09-03 02:15:53.000000000 +0300 +++ sys/kern/kern_timeout.c 2010-09-03 13:18:44.000000000 +0300 @@ -277,6 +277,33 @@ callout_tick(void) swi_sched(cc->cc_cookie, 0); } +int +callout_cpuidleticks(void) +{ + struct callout_cpu *cc; + struct callout *c; + struct callout_tailq *sc; + int curticks; + int skip = 1; + + cc = CC_SELF(); + mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); + curticks = cc->cc_ticks; + while( skip < ncallout && skip < hz/8 ) { + sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ]; + /* search scanning ticks */ + TAILQ_FOREACH( c, sc, c_links.tqe ){ + if (c && (c->c_time <= curticks + ncallout) + && (c->c_time > 0)) + goto out; + } + skip++; + } +out: + mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); + return (skip - 1); +} + static struct callout_cpu * callout_lock(struct callout *c) { diff -ruNp --exclude compile sys.prev/kern/sched_4bsd.c sys/kern/sched_4bsd.c --- sys.prev/kern/sched_4bsd.c 2010-09-03 02:15:53.000000000 +0300 +++ sys/kern/sched_4bsd.c 2010-09-03 13:18:44.000000000 +0300 @@ -1541,8 +1541,11 @@ sched_idletd(void *dummy) for (;;) { mtx_assert(&Giant, MA_NOTOWNED); - while (sched_runnable() == 0) - cpu_idle(0); + while (sched_runnable() == 0) { + cpu_idle(PCPU_GET(idlecalls) + + PCPU_GET(idlecallsprev) > 10); + PCPU_INC(idlecalls); + } mtx_lock_spin(&sched_lock); mi_switch(SW_VOL | SWT_IDLE, NULL); diff -ruNp --exclude compile sys.prev/kern/sched_ule.c sys/kern/sched_ule.c --- sys.prev/kern/sched_ule.c 2010-09-03 13:16:52.000000000 +0300 +++ sys/kern/sched_ule.c 2010-09-03 13:18:44.000000000 +0300 @@ -2177,8 +2177,8 @@ sched_tick(void) * Ticks is updated asynchronously on a single cpu. Check here to * avoid incrementing ts_ticks multiple times in a single tick. */ - if (ts->ts_incrtick == ticks) - return; +// if (ts->ts_incrtick == ticks) +// return; /* Adjust ticks for pctcpu */ ts->ts_ticks += 1 << SCHED_TICK_SHIFT; ts->ts_ltick = ticks; @@ -2550,9 +2550,11 @@ sched_idletd(void *dummy) cpu_spinwait(); } } - switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt; - if (tdq->tdq_load == 0) - cpu_idle(switchcnt > 1); + if (tdq->tdq_load == 0) { + cpu_idle(PCPU_GET(idlecalls) + + PCPU_GET(idlecallsprev) > 10); + PCPU_INC(idlecalls); + } if (tdq->tdq_load) { thread_lock(td); mi_switch(SW_VOL | SWT_IDLE, NULL); diff -ruNp --exclude compile sys.prev/mips/include/smp.h sys/mips/include/smp.h --- sys.prev/mips/include/smp.h 2010-09-03 02:16:14.000000000 +0300 +++ sys/mips/include/smp.h 2010-09-03 22:42:12.000000000 +0300 @@ -28,7 +28,6 @@ #define IPI_STOP_HARD 0x0008 #define IPI_PREEMPT 0x0010 #define IPI_HARDCLOCK 0x0020 -#define IPI_STATCLOCK 0x0040 #ifndef LOCORE diff -ruNp --exclude compile sys.prev/mips/mips/mp_machdep.c sys/mips/mips/mp_machdep.c --- sys.prev/mips/mips/mp_machdep.c 2010-09-03 02:16:17.000000000 +0300 +++ sys/mips/mips/mp_machdep.c 2010-09-03 22:42:16.000000000 +0300 @@ -166,10 +166,6 @@ mips_ipi_handler(void *arg) CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__); hardclockintr(arg);; break; - case IPI_STATCLOCK: - CTR1(KTR_SMP, "%s: IPI_STATCLOCK", __func__); - statclockintr(arg);; - break; default: panic("Unknown IPI 0x%0x on cpu %d", ipi, curcpu); } diff -ruNp --exclude compile sys.prev/pc98/pc98/machdep.c sys/pc98/pc98/machdep.c --- sys.prev/pc98/pc98/machdep.c 2010-09-03 02:17:48.000000000 +0300 +++ sys/pc98/pc98/machdep.c 2010-09-03 13:18:44.000000000 +0300 @@ -1137,6 +1137,8 @@ cpu_idle_hlt(int busy) static void cpu_idle_spin(int busy) { + + cpu_spinwait(); return; } @@ -1149,7 +1151,19 @@ cpu_idle(int busy) if (mp_grab_cpu_hlt()) return; #endif + CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", + busy, curcpu); + if (!busy) { + critical_enter(); + cpu_idleclock(); + } cpu_idle_fn(busy); + if (!busy) { + cpu_activeclock(); + critical_exit(); + } + CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", + busy, curcpu); } /* diff -ruNp --exclude compile sys.prev/sparc64/include/intr_machdep.h sys/sparc64/include/intr_machdep.h --- sys.prev/sparc64/include/intr_machdep.h 2010-09-03 02:15:38.000000000 +0300 +++ sys/sparc64/include/intr_machdep.h 2010-09-03 22:40:12.000000000 +0300 @@ -47,7 +47,6 @@ #define PIL_STOP 5 /* stop cpu ipi */ #define PIL_PREEMPT 6 /* preempt idle thread cpu ipi */ #define PIL_HARDCLOCK 7 /* hardclock broadcast */ -#define PIL_STATCLOCK 8 /* statclock broadcast */ #define PIL_FILTER 12 /* filter interrupts */ #define PIL_FAST 13 /* fast interrupts */ #define PIL_TICK 14 /* tick interrupts */ diff -ruNp --exclude compile sys.prev/sparc64/include/smp.h sys/sparc64/include/smp.h --- sys.prev/sparc64/include/smp.h 2010-09-03 02:15:38.000000000 +0300 +++ sys/sparc64/include/smp.h 2010-09-03 22:40:06.000000000 +0300 @@ -59,7 +59,6 @@ #define IPI_RENDEZVOUS PIL_RENDEZVOUS #define IPI_PREEMPT PIL_PREEMPT #define IPI_HARDCLOCK PIL_HARDCLOCK -#define IPI_STATCLOCK PIL_STATCLOCK #define IPI_STOP PIL_STOP #define IPI_STOP_HARD PIL_STOP diff -ruNp --exclude compile sys.prev/sparc64/sparc64/intr_machdep.c sys/sparc64/sparc64/intr_machdep.c --- sys.prev/sparc64/sparc64/intr_machdep.c 2010-09-03 02:15:38.000000000 +0300 +++ sys/sparc64/sparc64/intr_machdep.c 2010-09-03 22:39:56.000000000 +0300 @@ -97,8 +97,7 @@ static const char *const pil_names[] = { "stop", /* PIL_STOP */ "preempt", /* PIL_PREEMPT */ "hardclock", /* PIL_HARDCLOCK */ - "statclock", /* PIL_STATCLOCK */ - "stray", "stray", "stray", + "stray", "stray", "stray", "stray", "filter", /* PIL_FILTER */ "fast", /* PIL_FAST */ "tick", /* PIL_TICK */ diff -ruNp --exclude compile sys.prev/sparc64/sparc64/mp_machdep.c sys/sparc64/sparc64/mp_machdep.c --- sys.prev/sparc64/sparc64/mp_machdep.c 2010-09-03 02:15:38.000000000 +0300 +++ sys/sparc64/sparc64/mp_machdep.c 2010-09-03 22:39:22.000000000 +0300 @@ -98,7 +98,6 @@ __FBSDID("$FreeBSD: head/sys/sparc64/spa static ih_func_t cpu_ipi_ast; static ih_func_t cpu_ipi_hardclock; static ih_func_t cpu_ipi_preempt; -static ih_func_t cpu_ipi_statclock; static ih_func_t cpu_ipi_stop; /* @@ -292,7 +291,6 @@ cpu_mp_start(void) intr_setup(PIL_STOP, cpu_ipi_stop, -1, NULL, NULL); intr_setup(PIL_PREEMPT, cpu_ipi_preempt, -1, NULL, NULL); intr_setup(PIL_HARDCLOCK, cpu_ipi_hardclock, -1, NULL, NULL); - intr_setup(PIL_STATCLOCK, cpu_ipi_statclock, -1, NULL, NULL); cpuid_to_mid[curcpu] = PCPU_GET(mid); @@ -529,13 +527,6 @@ cpu_ipi_hardclock(struct trapframe *tf) } static void -cpu_ipi_statclock(struct trapframe *tf) -{ - - statclockintr(tf); -} - -static void spitfire_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) { u_int cpu; diff -ruNp --exclude compile sys.prev/sun4v/include/intr_machdep.h sys/sun4v/include/intr_machdep.h --- sys.prev/sun4v/include/intr_machdep.h 2010-09-03 02:17:40.000000000 +0300 +++ sys/sun4v/include/intr_machdep.h 2010-09-03 22:41:26.000000000 +0300 @@ -47,7 +47,6 @@ #define PIL_STOP 5 /* stop cpu ipi */ #define PIL_PREEMPT 6 /* preempt idle thread cpu ipi */ #define PIL_HARDCLOCK 7 /* hardclock broadcast */ -#define PIL_STATCLOCK 8 /* statclock broadcast */ #define PIL_FAST 13 /* fast interrupts */ #define PIL_TICK 14 diff -ruNp --exclude compile sys.prev/sun4v/include/smp.h sys/sun4v/include/smp.h --- sys.prev/sun4v/include/smp.h 2010-09-03 02:17:40.000000000 +0300 +++ sys/sun4v/include/smp.h 2010-09-03 22:41:38.000000000 +0300 @@ -47,7 +47,6 @@ #define IPI_STOP_HARD PIL_STOP #define IPI_PREEMPT PIL_PREEMPT #define IPI_HARDCLOCK PIL_HARDCLOCK -#define IPI_STATCLOCK PIL_STATCLOCK #define IPI_RETRIES 5000 @@ -83,7 +82,6 @@ void cpu_ipi_ast(struct trapframe *tf); void cpu_ipi_stop(struct trapframe *tf); void cpu_ipi_preempt(struct trapframe *tf); void cpu_ipi_hardclock(struct trapframe *tf); -void cpu_ipi_statclock(struct trapframe *tf); void ipi_all_but_self(u_int ipi); void ipi_cpu(int cpu, u_int ipi); diff -ruNp --exclude compile sys.prev/sun4v/sun4v/intr_machdep.c sys/sun4v/sun4v/intr_machdep.c --- sys.prev/sun4v/sun4v/intr_machdep.c 2010-09-03 02:17:40.000000000 +0300 +++ sys/sun4v/sun4v/intr_machdep.c 2010-09-03 22:41:13.000000000 +0300 @@ -110,8 +110,7 @@ static char *pil_names[] = { "stop", /* PIL_STOP */ "preempt", /* PIL_PREEMPT */ "hardclock", /* PIL_HARDCLOCK */ - "statclock", /* PIL_STATCLOCK */ - "stray", "stray", "stray", "stray", + "stray", "stray", "stray", "stray", "stray", "fast", /* PIL_FAST */ "tick", /* PIL_TICK */ }; @@ -265,7 +264,6 @@ intr_init(void) intr_handlers[PIL_STOP]= cpu_ipi_stop; intr_handlers[PIL_PREEMPT]= cpu_ipi_preempt; intr_handlers[PIL_HARDCLOCK]= cpu_ipi_hardclock; - intr_handlers[PIL_STATCLOCK]= cpu_ipi_statclock; #endif mtx_init(&intr_table_lock, "intr table", NULL, MTX_SPIN); cpu_intrq_alloc(); diff -ruNp --exclude compile sys.prev/sun4v/sun4v/mp_machdep.c sys/sun4v/sun4v/mp_machdep.c --- sys.prev/sun4v/sun4v/mp_machdep.c 2010-09-03 02:17:40.000000000 +0300 +++ sys/sun4v/sun4v/mp_machdep.c 2010-09-03 22:41:35.000000000 +0300 @@ -477,13 +477,6 @@ cpu_ipi_hardclock(struct trapframe *tf) } void -cpu_ipi_statclock(struct trapframe *tf) -{ - - statclockintr(tf); -} - -void cpu_ipi_selected(int cpu_count, uint16_t *cpulist, u_long d0, u_long d1, u_long d2, uint64_t *ackmask) { diff -ruNp --exclude compile sys.prev/sys/callout.h sys/sys/callout.h --- sys.prev/sys/callout.h 2010-09-03 02:18:47.000000000 +0300 +++ sys/sys/callout.h 2010-09-03 13:18:44.000000000 +0300 @@ -96,6 +96,7 @@ int callout_schedule_on(struct callout * #define callout_stop(c) _callout_stop_safe(c, 0) int _callout_stop_safe(struct callout *, int); void callout_tick(void); +int callout_cpuidleticks(void); #endif diff -ruNp --exclude compile sys.prev/sys/pcpu.h sys/sys/pcpu.h --- sys.prev/sys/pcpu.h 2010-09-03 02:18:47.000000000 +0300 +++ sys/sys/pcpu.h 2010-09-03 13:18:44.000000000 +0300 @@ -166,6 +166,8 @@ struct pcpu { struct pcb *pc_curpcb; /* Current pcb */ uint64_t pc_switchtime; /* cpu_ticks() at last csw */ int pc_switchticks; /* `ticks' at last csw */ + int pc_idlecalls; /* count of cpu_idle() calls */ + int pc_idlecallsprev; /* previous count */ u_int pc_cpuid; /* This cpu number */ cpumask_t pc_cpumask; /* This cpu mask */ cpumask_t pc_other_cpus; /* Mask of all other cpus */ diff -ruNp --exclude compile sys.prev/sys/systm.h sys/sys/systm.h --- sys.prev/sys/systm.h 2010-09-03 02:18:47.000000000 +0300 +++ sys/sys/systm.h 2010-09-03 22:38:49.000000000 +0300 @@ -241,16 +241,16 @@ void hardclock_cpu(int usermode); void softclock(void *); void statclock(int usermode); void profclock(int usermode, uintfptr_t pc); -void timer1clock(int usermode, uintfptr_t pc); -void timer2clock(int usermode, uintfptr_t pc); int hardclockintr(struct trapframe *frame); -int statclockintr(struct trapframe *frame); void startprofclock(struct proc *); void stopprofclock(struct proc *); void cpu_startprofclock(void); void cpu_stopprofclock(void); +void cpu_idleclock(void); +void cpu_activeclock(void); +extern int cpu_disable_deep_sleep; int cr_cansee(struct ucred *u1, struct ucred *u2); int cr_canseesocket(struct ucred *cred, struct socket *so); diff -ruNp --exclude compile sys.prev/sys/time.h sys/sys/time.h --- sys.prev/sys/time.h 2010-09-03 02:18:47.000000000 +0300 +++ sys/sys/time.h 2010-09-03 13:18:44.000000000 +0300 @@ -90,6 +90,25 @@ bintime_sub(struct bintime *bt, const st bt->sec -= bt2->sec; } +static __inline void +bintime_mul(struct bintime *bt, u_int x) +{ + uint64_t p1, p2; + + p1 = (bt->frac & 0xffffffffllu) * x; + p2 = (bt->frac >> 32) * x + (p1 >> 32); + bt->sec *= x; + bt->sec += (p2 >> 32); + bt->frac = (p2 << 32) | (p1 & 0xffffffffllu); +} + +#define bintime_clear(a) ((a)->sec = (a)->frac = 0) +#define bintime_isset(a) ((a)->sec || (a)->frac) +#define bintime_cmp(a, b, cmp) \ + (((a)->sec == (b)->sec) ? \ + ((a)->frac cmp (b)->frac) : \ + ((a)->sec cmp (b)->sec)) + /*- * Background information: * diff -ruNp --exclude compile sys.prev/sys/timeet.h sys/sys/timeet.h --- sys.prev/sys/timeet.h 2010-09-03 02:18:47.000000000 +0300 +++ sys/sys/timeet.h 2010-09-03 13:18:44.000000000 +0300 @@ -83,8 +83,8 @@ struct eventtimer { }; extern struct mtx et_eventtimers_mtx; -#define ET_LOCK() mtx_lock_spin(&et_eventtimers_mtx) -#define ET_UNLOCK() mtx_unlock_spin(&et_eventtimers_mtx) +#define ET_LOCK() mtx_lock(&et_eventtimers_mtx) +#define ET_UNLOCK() mtx_unlock(&et_eventtimers_mtx) /* Driver API */ int et_register(struct eventtimer *et); diff -ruNp --exclude compile sys.prev/x86/x86/local_apic.c sys/x86/x86/local_apic.c --- sys.prev/x86/x86/local_apic.c 2010-09-03 02:17:48.000000000 +0300 +++ sys/x86/x86/local_apic.c 2010-09-03 13:18:44.000000000 +0300 @@ -261,7 +261,7 @@ lapic_init(vm_paddr_t addr) lapic_et.et_quality = 600; if (!arat) { lapic_et.et_flags |= ET_FLAGS_C3STOP; - lapic_et.et_quality -= 100; + lapic_et.et_quality -= 200; } lapic_et.et_frequency = 0; /* We don't know frequency yet, so trying to guess. */