Index: lib/libprocstat/zfs.c =================================================================== --- lib/libprocstat/zfs.c (.../head) (revision 237923) +++ lib/libprocstat/zfs.c (.../projects/calloutng) (revision 237923) @@ -35,6 +35,7 @@ #undef lbolt #undef lbolt64 +#undef gethrestime #undef gethrestime_sec #include #include Index: sys/conf/NOTES =================================================================== --- sys/conf/NOTES (.../head) (revision 237923) +++ sys/conf/NOTES (.../projects/calloutng) (revision 237923) @@ -259,6 +259,8 @@ options SX_NOINLINE # SMP Debugging Options: # +# CALLOUT_PROFILING enables rudimentary profiling of the callwheel data +# structure used as backend in callout(9). # PREEMPTION allows the threads that are in the kernel to be preempted by # higher priority [interrupt] threads. It helps with interactivity # and allows interrupt threads to run sooner rather than waiting. @@ -297,6 +299,9 @@ options LOCK_PROFILING options MPROF_BUFFERS="1536" options MPROF_HASH_SIZE="1543" +# Profiling for the callout(9) backend. +options CALLOUT_PROFILING + # Profiling for internal hash tables. options SLEEPQUEUE_PROFILING options TURNSTILE_PROFILING Index: sys/conf/options =================================================================== --- sys/conf/options (.../head) (revision 237923) +++ sys/conf/options (.../projects/calloutng) (revision 237923) @@ -66,6 +66,7 @@ SYSCTL_DEBUG opt_sysctl.h ADAPTIVE_LOCKMGRS ALQ AUDIT opt_global.h +CALLOUT_PROFILING CAPABILITIES opt_capsicum.h CAPABILITY_MODE opt_capsicum.h CODA_COMPAT_5 opt_coda.h Index: sys/kern/kern_timeout.c =================================================================== --- sys/kern/kern_timeout.c (.../head) (revision 237923) +++ sys/kern/kern_timeout.c (.../projects/calloutng) (revision 237923) @@ -37,6 +37,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_callout_profiling.h" #include "opt_kdtrace.h" #include @@ -47,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -68,6 +70,7 @@ SDT_PROBE_DEFINE(callout_execute, kernel, , callou SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_end, 0, "struct callout *"); +#ifdef CALLOUT_PROFILING static int avg_depth; SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, "Average number of items examined per softclock call. Units = 1/1000"); @@ -80,11 +83,12 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTL static int avg_mpcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, "Average number of MP callouts made per softclock call. Units = 1/1000"); +#endif /* * TODO: * allocate more timeout table slots when table overflows. */ -int callwheelsize, callwheelbits, callwheelmask; +int callwheelsize, callwheelmask; /* * The callout cpu migration entity represents informations necessary for @@ -94,51 +98,38 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFL */ struct cc_mig_ent { #ifdef SMP - void (*ce_migration_func)(void *); - void *ce_migration_arg; - int ce_migration_cpu; - int ce_migration_ticks; + void (*ce_migration_func)(void *); + void *ce_migration_arg; + int ce_migration_cpu; + struct bintime ce_migration_time; #endif }; /* * There is one struct callout_cpu per cpu, holding all relevant * state for the callout processing thread on the individual CPU. - * In particular: - * cc_ticks is incremented once per tick in callout_cpu(). - * It tracks the global 'ticks' but in a way that the individual - * threads should not worry about races in the order in which - * hardclock() and hardclock_cpu() run on the various CPUs. - * cc_softclock is advanced in callout_cpu() to point to the - * first entry in cc_callwheel that may need handling. In turn, - * a softclock() is scheduled so it can serve the various entries i - * such that cc_softclock <= i <= cc_ticks . - * XXX maybe cc_softclock and cc_ticks should be volatile ? - * - * cc_ticks is also used in callout_reset_cpu() to determine - * when the callout should be served. */ struct callout_cpu { struct cc_mig_ent cc_migrating_entity; struct mtx cc_lock; struct callout *cc_callout; struct callout_tailq *cc_callwheel; + struct callout_tailq cc_expireq; struct callout_list cc_callfree; struct callout *cc_next; struct callout *cc_curr; + struct bintime cc_firstevent; + struct bintime cc_lastscan; void *cc_cookie; - int cc_ticks; - int cc_softticks; int cc_cancel; int cc_waiting; - int cc_firsttick; }; #ifdef SMP #define cc_migration_func cc_migrating_entity.ce_migration_func #define cc_migration_arg cc_migrating_entity.ce_migration_arg #define cc_migration_cpu cc_migrating_entity.ce_migration_cpu -#define cc_migration_ticks cc_migrating_entity.ce_migration_ticks +#define cc_migration_time cc_migrating_entity.ce_migration_time struct callout_cpu cc_cpu[MAXCPU]; #define CPUBLOCK MAXCPU @@ -152,27 +143,37 @@ struct callout_cpu cc_cpu; #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) #define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) +#define C_PRECISION 0x2 +#define FREQ2BT(freq, bt) \ +{ \ + (bt)->sec = 0; \ + (bt)->frac = ((uint64_t)0x8000000000000000 / (freq)) << 1; \ +} + +#define TIME_T_MAX \ + (sizeof(time_t) == (sizeof(int64_t)) ? INT64_MAX : INT32_MAX) + static int timeout_cpu; -void (*callout_new_inserted)(int cpu, int ticks) = NULL; +void (*callout_new_inserted)(int cpu, struct bintime bt) = NULL; static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); /** * Locked by cc_lock: - * cc_curr - If a callout is in progress, it is curr_callout. - * If curr_callout is non-NULL, threads waiting in + * cc_curr - If a callout is in progress, it is cc_curr. + * If cc_curr is non-NULL, threads waiting in * callout_drain() will be woken up as soon as the * relevant callout completes. - * cc_cancel - Changing to 1 with both callout_lock and c_lock held + * cc_cancel - Changing to 1 with both callout_lock and cc_lock held * guarantees that the current callout will not run. * The softclock() function sets this to 0 before it * drops callout_lock to acquire c_lock, and it calls * the handler only if curr_cancelled is still 0 after - * c_lock is successfully acquired. + * cc_lock is successfully acquired. * cc_waiting - If a thread is waiting in callout_drain(), then * callout_wait is nonzero. Set only when - * curr_callout is non-NULL. + * cc_curr is non-NULL. */ /* @@ -184,7 +185,8 @@ cc_cme_cleanup(struct callout_cpu *cc) #ifdef SMP cc->cc_migration_cpu = CPUBLOCK; - cc->cc_migration_ticks = 0; + cc->cc_migration_time.sec = 0; + cc->cc_migration_time.frac = 0; cc->cc_migration_func = NULL; cc->cc_migration_arg = NULL; #endif @@ -220,10 +222,9 @@ kern_timeout_callwheel_alloc(caddr_t v) /* * Calculate callout wheel size */ - for (callwheelsize = 1, callwheelbits = 0; - callwheelsize < ncallout; - callwheelsize <<= 1, ++callwheelbits) - ; + callwheelsize = 1; + while (callwheelsize < ncallout) + callwheelsize <<= 1; callwheelmask = callwheelsize - 1; cc->cc_callout = (struct callout *)v; @@ -244,6 +245,7 @@ callout_cpu_init(struct callout_cpu *cc) for (i = 0; i < callwheelsize; i++) { TAILQ_INIT(&cc->cc_callwheel[i]); } + TAILQ_INIT(&cc->cc_expireq); cc_cme_cleanup(cc); if (cc->cc_callout == NULL) return; @@ -332,12 +334,28 @@ start_softclock(void *dummy) SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); +static inline int +callout_hash(struct bintime *bt) +{ + + return (int) ((bt->sec<<10)+(bt->frac>>54)); +} + +static inline int +get_bucket(struct bintime *bt) +{ + + return callout_hash(bt) & callwheelmask; +} + void -callout_tick(void) +callout_process(void) { + struct bintime max, min, next, now, tmp_max, tmp_min; + struct callout *tmp; struct callout_cpu *cc; - int need_softclock; - int bucket; + struct callout_tailq *sc; + int cpu, first, future, last, need_softclock; /* * Process callouts at a very low cpu priority, so we don't keep the @@ -346,48 +364,112 @@ void need_softclock = 0; cc = CC_SELF(); mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); - cc->cc_firsttick = cc->cc_ticks = ticks; - for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) { - bucket = cc->cc_softticks & callwheelmask; - if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) { - need_softclock = 1; + binuptime(&now); + cpu = curcpu; + first = callout_hash(&cc->cc_lastscan); + last = callout_hash(&now); + /* + * Check if we wrapped around the entire wheel from the last scan. + * In case, we need to scan entirely the wheel for pending callouts. + */ + last = (last - first >= callwheelsize) ? (first - 1) & callwheelmask : + last & callwheelmask; + first &= callwheelmask; + for (;;) { + sc = &cc->cc_callwheel[first]; + TAILQ_FOREACH(tmp, sc, c_links.tqe) { + next = tmp->c_time; + bintime_sub(&next, &tmp->c_precision); + if (bintime_cmp(&next, &now, <=)) { + /* + * Consumer told us the callout may be run + * directly from hardware interrupt context. + */ + if (tmp->c_flags & CALLOUT_DIRECT) { + tmp->c_func(tmp->c_arg); + TAILQ_REMOVE(sc, tmp, c_links.tqe); + tmp->c_flags &= ~CALLOUT_PENDING; + } else { + TAILQ_INSERT_TAIL(&cc->cc_expireq, + tmp, c_staiter); + TAILQ_REMOVE(sc, tmp, c_links.tqe); + tmp->c_flags |= CALLOUT_PROCESSED; + need_softclock = 1; + } + } + } + if (first == last) break; - } + first = (first + 1) & callwheelmask; } + future = (last + hz / 4) & callwheelmask; + max.sec = min.sec = TIME_T_MAX; + max.frac = min.frac = UINT64_MAX; + /* + * Look for the first bucket in the future that contains some event, + * up to some point, so that we can look for aggregation. + */ + for (;;) { + sc = &cc->cc_callwheel[last]; + TAILQ_FOREACH(tmp, sc, c_links.tqe) { + tmp_max = tmp_min = tmp->c_time; + bintime_add(&tmp_max, &tmp->c_precision); + bintime_sub(&tmp_min, &tmp->c_precision); + /* + * This is the fist event we're going to process or + * event maximal time is less than present minimal. + * In both cases, take it. + */ + if (bintime_cmp(&tmp_max, &min, <)) { + max = tmp_max; + min = tmp_min; + continue; + } + /* + * Event minimal time is bigger than present maximal + * time, so it cannot be aggregated. + */ + if (bintime_cmp(&tmp_min, &max, >)) + continue; + /* + * If neither of the two previous happened, just take + * the intersection of events. + */ + min = (bintime_cmp(&tmp_min, &min, >)) ? tmp_min : min; + max = (bintime_cmp(&tmp_max, &max, >)) ? tmp_max : max; + } + if (last == future || max.sec != TIME_T_MAX) + break; + last = (last + 1) & callwheelmask; + } + if (max.sec == TIME_T_MAX) { + next.sec = 0; + next.frac = (uint64_t)1 << (64 - 2); + bintime_add(&next, &now); + } else { + /* + * Now that we found something to aggregate, schedule an + * interrupt in the middle of the previously calculated range. + */ + bintime_add(&max, &min); + next = max; + next.frac >>= 1; + if (next.sec & 1) + next.frac |= ((uint64_t)1 << 63); + next.sec >>= 1; + } + cc->cc_firstevent = next; + if (callout_new_inserted != NULL) + (*callout_new_inserted)(cpu, next); + cc->cc_lastscan = now; mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); /* * swi_sched acquires the thread lock, so we don't want to call it * with cc_lock held; incorrect locking order. */ - if (need_softclock) + if (need_softclock) { swi_sched(cc->cc_cookie, 0); -} - -int -callout_tickstofirst(int limit) -{ - struct callout_cpu *cc; - struct callout *c; - struct callout_tailq *sc; - int curticks; - int skip = 1; - - cc = CC_SELF(); - mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); - curticks = cc->cc_ticks; - while( skip < ncallout && skip < limit ) { - sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ]; - /* search scanning ticks */ - TAILQ_FOREACH( c, sc, c_links.tqe ){ - if (c->c_time - curticks <= ncallout) - goto out; - } - skip++; } -out: - cc->cc_firsttick = curticks + skip; - mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); - return (skip); } static struct callout_cpu * @@ -415,25 +497,67 @@ callout_lock(struct callout *c) } static void -callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks, - void (*func)(void *), void *arg, int cpu) +callout_cc_add(struct callout *c, struct callout_cpu *cc, + struct bintime to_bintime, void (*func)(void *), void *arg, int cpu, + int flags) { - + struct bintime bt; + int bucket, r_shift, r_val; + CC_LOCK_ASSERT(cc); - - if (to_ticks <= 0) - to_ticks = 1; + if (bintime_cmp(&to_bintime, &cc->cc_lastscan, <)) + to_bintime = cc->cc_lastscan; c->c_arg = arg; c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); + if (flags & C_DIRECT_EXEC) + c->c_flags |= CALLOUT_DIRECT; + c->c_flags &= ~CALLOUT_PROCESSED; c->c_func = func; - c->c_time = ticks + to_ticks; - TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], + c->c_time = to_bintime; + bintime_clear(&c->c_precision); + if (flags & C_PRECISION) { + r_shift = ((flags >> 2) & PRECISION_RANGE); + r_val = (r_shift != 0) ? (uint64_t)1 << (64 - r_shift) : 0; + /* + * Round as far as precision specified is coarse (up to 8ms). + * In order to play safe, round to to half of the interval and + * set half precision. + */ + if (r_shift < 6) { + r_val = (r_shift != 0) ? r_val >> 2 : + ((uint64_t)1 << (64 - 1)) - 1; + /* + * Round only if c_time is not a multiple of the + * rounding factor. + */ + if ((c->c_time.frac & r_val) != r_val) { + c->c_time.frac |= r_val - 1; + c->c_time.frac += 1; + if (c->c_time.frac == 0) + c->c_time.sec += 1; + } + } + c->c_precision.frac = r_val; + CTR6(KTR_CALLOUT, "rounding %d.%08x%08x to %d.%08x%08x", + to_bintime.sec, (u_int) (to_bintime.frac >> 32), + (u_int) (to_bintime.frac & 0xffffffff), c->c_time.sec, + (u_int) (c->c_time.frac >> 32), + (u_int) (c->c_time.frac & 0xffffffff)); + } + bucket = get_bucket(&c->c_time); + TAILQ_INSERT_TAIL(&cc->cc_callwheel[bucket & callwheelmask], c, c_links.tqe); - if ((c->c_time - cc->cc_firsttick) < 0 && - callout_new_inserted != NULL) { - cc->cc_firsttick = c->c_time; - (*callout_new_inserted)(cpu, - to_ticks + (ticks - cc->cc_ticks)); + /* + * Inform the eventtimers(4) subsystem there's a new callout + * that has been inserted, but only if really required. + */ + bt = c->c_time; + bintime_add(&bt, &c->c_precision); + if (callout_new_inserted != NULL && + (bintime_cmp(&bt, &cc->cc_firstevent, <) || + (cc->cc_firstevent.sec == 0 && cc->cc_firstevent.frac == 0))) { + cc->cc_firstevent = c->c_time; + (*callout_new_inserted)(cpu, c->c_time); } } @@ -442,7 +566,7 @@ callout_cc_del(struct callout *c, struct callout_c { if (cc->cc_next == c) - cc->cc_next = TAILQ_NEXT(c, c_links.tqe); + cc->cc_next = TAILQ_NEXT(c, c_staiter); if (c->c_flags & CALLOUT_LOCAL_ALLOC) { c->c_func = NULL; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); @@ -462,7 +586,8 @@ softclock_call_cc(struct callout *c, struct callou struct callout_cpu *new_cc; void (*new_func)(void *); void *new_arg; - int new_cpu, new_ticks; + int new_cpu; + struct bintime new_time; #endif #ifdef DIAGNOSTIC struct bintime bt1, bt2; @@ -471,7 +596,7 @@ softclock_call_cc(struct callout *c, struct callou static timeout_t *lastfunc; #endif - cc->cc_next = TAILQ_NEXT(c, c_links.tqe); + cc->cc_next = TAILQ_NEXT(c, c_staiter); class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1; c_lock = c->c_lock; @@ -574,7 +699,7 @@ skip: * migration just perform it now. */ new_cpu = cc->cc_migration_cpu; - new_ticks = cc->cc_migration_ticks; + new_time = cc->cc_migration_time; new_func = cc->cc_migration_func; new_arg = cc->cc_migration_arg; cc_cme_cleanup(cc); @@ -598,8 +723,8 @@ skip: * is not easy. */ new_cc = callout_cpu_switch(c, cc, new_cpu); - callout_cc_add(c, new_cc, new_ticks, new_func, new_arg, - new_cpu); + callout_cc_add(c, new_cc, new_time, new_func, new_arg, + new_cpu, 0); CC_UNLOCK(new_cc); CC_LOCK(cc); #else @@ -633,10 +758,7 @@ softclock(void *arg) { struct callout_cpu *cc; struct callout *c; - struct callout_tailq *bucket; - int curticks; int steps; /* #steps since we last allowed interrupts */ - int depth; int mpcalls; int lockcalls; int gcalls; @@ -644,49 +766,37 @@ softclock(void *arg) #ifndef MAX_SOFTCLOCK_STEPS #define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */ #endif /* MAX_SOFTCLOCK_STEPS */ - + mpcalls = 0; lockcalls = 0; gcalls = 0; - depth = 0; steps = 0; cc = (struct callout_cpu *)arg; CC_LOCK(cc); - while (cc->cc_softticks - 1 != cc->cc_ticks) { - /* - * cc_softticks may be modified by hard clock, so cache - * it while we work on a given bucket. - */ - curticks = cc->cc_softticks; - cc->cc_softticks++; - bucket = &cc->cc_callwheel[curticks & callwheelmask]; - c = TAILQ_FIRST(bucket); - while (c != NULL) { - depth++; - if (c->c_time != curticks) { - c = TAILQ_NEXT(c, c_links.tqe); - ++steps; - if (steps >= MAX_SOFTCLOCK_STEPS) { - cc->cc_next = c; - /* Give interrupts a chance. */ - CC_UNLOCK(cc); - ; /* nothing */ - CC_LOCK(cc); - c = cc->cc_next; - steps = 0; - } - } else { - TAILQ_REMOVE(bucket, c, c_links.tqe); - c = softclock_call_cc(c, cc, &mpcalls, - &lockcalls, &gcalls); - steps = 0; - } - } + + c = TAILQ_FIRST(&cc->cc_expireq); + while (c != NULL) { + ++steps; + if (steps >= MAX_SOFTCLOCK_STEPS) { + cc->cc_next = c; + /* Give interrupts a chance. */ + CC_UNLOCK(cc); + ; /* nothing */ + CC_LOCK(cc); + c = cc->cc_next; + steps = 0; + } else { + TAILQ_REMOVE(&cc->cc_expireq, c, c_staiter); + c = softclock_call_cc(c, cc, &mpcalls, + &lockcalls, &gcalls); + } } +#ifdef CALLOUT_PROFILING avg_depth += (depth * 1000 - avg_depth) >> 8; avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; +#endif cc->cc_next = NULL; CC_UNLOCK(cc); } @@ -776,13 +886,22 @@ callout_handle_init(struct callout_handle *handle) * callout_pending() - returns truth if callout is still waiting for timeout * callout_deactivate() - marks the callout as having been serviced */ -int -callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *), - void *arg, int cpu) +int +_callout_reset_on(struct callout *c, struct bintime *bt, int to_ticks, + void (*ftn)(void *), void *arg, int cpu, int flags) { + struct bintime now, to_bt; struct callout_cpu *cc; int cancelled = 0; + int bucket; + if (bt == NULL) { + FREQ2BT(hz,&to_bt); + getbinuptime(&now); + bintime_mul(&to_bt,to_ticks); + bintime_add(&to_bt,&now); + } else + to_bt = *bt; /* * Don't allow migration of pre-allocated callouts lest they * become unbalanced. @@ -811,12 +930,17 @@ callout_handle_init(struct callout_handle *handle) } } if (c->c_flags & CALLOUT_PENDING) { - if (cc->cc_next == c) { - cc->cc_next = TAILQ_NEXT(c, c_links.tqe); + if ((c->c_flags & CALLOUT_PROCESSED) == 0) { + if (cc->cc_next == c) + cc->cc_next = TAILQ_NEXT(c, c_links.tqe); + bucket = get_bucket(&c->c_time); + TAILQ_REMOVE(&cc->cc_callwheel[bucket], c, + c_links.tqe); + } else { + if (cc->cc_next == c) + cc->cc_next = TAILQ_NEXT(c, c_staiter); + TAILQ_REMOVE(&cc->cc_expireq, c, c_staiter); } - TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c, - c_links.tqe); - cancelled = 1; c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); } @@ -830,13 +954,14 @@ callout_handle_init(struct callout_handle *handle) if (c->c_cpu != cpu) { if (cc->cc_curr == c) { cc->cc_migration_cpu = cpu; - cc->cc_migration_ticks = to_ticks; + cc->cc_migration_time = to_bt; cc->cc_migration_func = ftn; cc->cc_migration_arg = arg; c->c_flags |= CALLOUT_DFRMIGRATION; - CTR5(KTR_CALLOUT, - "migration of %p func %p arg %p in %d to %u deferred", - c, c->c_func, c->c_arg, to_ticks, cpu); + CTR6(KTR_CALLOUT, + "migration of %p func %p arg %p in %d.%08x to %u deferred", + c, c->c_func, c->c_arg, (int)(to_bt.sec), + (u_int)(to_bt.frac >> 32), cpu); CC_UNLOCK(cc); return (cancelled); } @@ -844,9 +969,10 @@ callout_handle_init(struct callout_handle *handle) } #endif - callout_cc_add(c, cc, to_ticks, ftn, arg, cpu); - CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d", - cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks); + callout_cc_add(c, cc, to_bt, ftn, arg, cpu, flags); + CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", + cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_bt.sec), + (u_int)(to_bt.frac >> 32)); CC_UNLOCK(cc); return (cancelled); @@ -874,7 +1000,7 @@ _callout_stop_safe(c, safe) { struct callout_cpu *cc, *old_cc; struct lock_class *class; - int use_lock, sq_locked; + int use_lock, sq_locked, bucket; /* * Some old subsystems don't hold Giant while running a callout_stop(), @@ -1024,8 +1150,12 @@ again: CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); - TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c, - c_links.tqe); + if ((c->c_flags & CALLOUT_PROCESSED) == 0) { + bucket = get_bucket(&c->c_time); + TAILQ_REMOVE(&cc->cc_callwheel[bucket], c, + c_links.tqe); + } else + TAILQ_REMOVE(&cc->cc_expireq, c, c_staiter); callout_cc_del(c, cc); CC_UNLOCK(cc); Index: sys/kern/kern_time.c =================================================================== --- sys/kern/kern_time.c (.../head) (revision 237923) +++ sys/kern/kern_time.c (.../projects/calloutng) (revision 237923) @@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -352,37 +353,38 @@ static int nanowait; int kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt) { - struct timespec ts, ts2, ts3; - struct timeval tv; + struct timespec ts; + struct bintime bt, bt2, tmp; int error; if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000) return (EINVAL); if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0)) return (0); - getnanouptime(&ts); - timespecadd(&ts, rqt); - TIMESPEC_TO_TIMEVAL(&tv, rqt); + binuptime(&bt); + timespec2bintime(rqt, &tmp); + bintime_add(&bt,&tmp); for (;;) { - error = tsleep(&nanowait, PWAIT | PCATCH, "nanslp", - tvtohz(&tv)); - getnanouptime(&ts2); + sleepq_lock(&nanowait); + sleepq_add(&nanowait, NULL, "nanslp", PWAIT | PCATCH, 0); + sleepq_set_timeout_bt(&nanowait,bt); + error = sleepq_timedwait_sig(&nanowait, PWAIT | PCATCH); + binuptime(&bt2); if (error != EWOULDBLOCK) { if (error == ERESTART) error = EINTR; if (rmt != NULL) { - timespecsub(&ts, &ts2); + tmp = bt; + bintime_sub(&tmp, &bt2); + bintime2timespec(&tmp, &ts); if (ts.tv_sec < 0) timespecclear(&ts); *rmt = ts; } return (error); } - if (timespeccmp(&ts2, &ts, >=)) + if (bintime_cmp(&bt2, &bt, >=)) return (0); - ts3 = ts; - timespecsub(&ts3, &ts2); - TIMESPEC_TO_TIMEVAL(&tv, &ts3); } } Index: sys/kern/kern_clock.c =================================================================== --- sys/kern/kern_clock.c (.../head) (revision 237923) +++ sys/kern/kern_clock.c (.../projects/calloutng) (revision 237923) @@ -459,7 +459,7 @@ hardclock_cpu(int usermode) if (td->td_intr_frame != NULL) PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); #endif - callout_tick(); + callout_process(); } /* @@ -549,7 +549,6 @@ hardclock_cnt(int cnt, int usermode) if (td->td_intr_frame != NULL) PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); #endif - callout_tick(); /* We are in charge to handle this tick duty. */ if (newticks > 0) { /* Dangerous and no need to call these things concurrently. */ Index: sys/kern/kern_clocksource.c =================================================================== --- sys/kern/kern_clocksource.c (.../head) (revision 237923) +++ sys/kern/kern_clocksource.c (.../projects/calloutng) (revision 237923) @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -71,9 +72,7 @@ static int round_freq(struct eventtimer *et, int static void getnextcpuevent(struct bintime *event, int idle); static void getnextevent(struct bintime *event); static int handleevents(struct bintime *now, int fake); -#ifdef SMP -static void cpu_new_callout(int cpu, int ticks); -#endif +static void cpu_new_callout(int cpu, struct bintime bt); static struct mtx et_hw_mtx; @@ -135,6 +134,7 @@ struct pcpu_state { struct bintime nexthard; /* Next hardlock() event. */ struct bintime nextstat; /* Next statclock() event. */ struct bintime nextprof; /* Next profclock() event. */ + struct bintime nextcall; /* Next callout event. */ #ifdef KDTRACE_HOOKS struct bintime nextcyc; /* Next OpenSolaris cyclics event. */ #endif @@ -168,8 +168,8 @@ hardclockintr(void) state = DPCPU_PTR(timerstate); now = state->now; CTR4(KTR_SPARE2, "ipi at %d: now %d.%08x%08x", - curcpu, now.sec, (unsigned int)(now.frac >> 32), - (unsigned int)(now.frac & 0xffffffff)); + curcpu, now.sec, (u_int)(now.frac >> 32), + (u_int)(now.frac & 0xffffffff)); done = handleevents(&now, 0); return (done ? FILTER_HANDLED : FILTER_STRAY); } @@ -188,8 +188,8 @@ handleevents(struct bintime *now, int fake) int done, runs; CTR4(KTR_SPARE2, "handle at %d: now %d.%08x%08x", - curcpu, now->sec, (unsigned int)(now->frac >> 32), - (unsigned int)(now->frac & 0xffffffff)); + curcpu, now->sec, (u_int)(now->frac >> 32), + (u_int)(now->frac & 0xffffffff)); done = 0; if (fake) { frame = NULL; @@ -236,6 +236,11 @@ handleevents(struct bintime *now, int fake) } } else state->nextprof = state->nextstat; + if (bintime_cmp(now, &state->nextcall, >=) && + (state->nextcall.sec != -1)) { + state->nextcall.sec = -1; + callout_process(); + } #ifdef KDTRACE_HOOKS if (fake == 0 && cyclic_clock_func != NULL && @@ -267,24 +272,28 @@ handleevents(struct bintime *now, int fake) static void getnextcpuevent(struct bintime *event, int idle) { + struct pcpu_state *state; struct bintime tmp; - struct pcpu_state *state; - int skip; - + int hardfreq; + state = DPCPU_PTR(timerstate); - /* Handle hardclock() events. */ + /* Handle hardclock() events, skipping some is CPU is idle. */ *event = state->nexthard; if (idle || (!activetick && !profiling && (timer->et_flags & ET_FLAGS_PERCPU) == 0)) { - skip = idle ? 4 : (stathz / 2); - if (curcpu == CPU_FIRST() && tc_min_ticktock_freq > skip) - skip = tc_min_ticktock_freq; - skip = callout_tickstofirst(hz / skip) - 1; - CTR2(KTR_SPARE2, "skip at %d: %d", curcpu, skip); - tmp = hardperiod; - bintime_mul(&tmp, skip); - bintime_add(event, &tmp); + hardfreq = idle ? 4 : (stathz / 2); + if (curcpu == CPU_FIRST() && tc_min_ticktock_freq > hardfreq) + hardfreq = tc_min_ticktock_freq; + if (hz > hardfreq) { + tmp = hardperiod; + bintime_mul(&tmp, hz / hardfreq - 1); + bintime_add(event, &tmp); + } } + /* Handle callout events. */ + if (state->nextcall.sec != -1 && + bintime_cmp(event, &state->nextcall, >)) + *event = state->nextcall; if (!idle) { /* If CPU is active - handle other types of events. */ if (bintime_cmp(event, &state->nextstat, >)) *event = state->nextstat; @@ -625,10 +634,9 @@ cpu_initclocks_bsp(void) #ifdef KDTRACE_HOOKS state->nextcyc.sec = -1; #endif + state->nextcall.sec = -1; } -#ifdef SMP callout_new_inserted = cpu_new_callout; -#endif periodic = want_periodic; /* Grab requested timer or the best of present. */ if (timername[0]) @@ -856,52 +864,48 @@ clocksource_cyc_set(const struct bintime *t) } #endif -#ifdef SMP static void -cpu_new_callout(int cpu, int ticks) +cpu_new_callout(int cpu, struct bintime bt) { - struct bintime tmp; + struct bintime now; struct pcpu_state *state; - CTR3(KTR_SPARE2, "new co at %d: on %d in %d", - curcpu, cpu, ticks); + CTR5(KTR_SPARE2, "new co at %d: on %d at %d.%08x%08x", + curcpu, cpu, (int)(bt.sec), (u_int)(bt.frac >> 32), + (u_int)(bt.frac & 0xffffffff)); state = DPCPU_ID_PTR(cpu, timerstate); ET_HW_LOCK(state); - if (state->idle == 0 || busy) { + + /* If there is callout time already set earlier -- do nothing. */ + if (state->nextcall.sec != -1 && + bintime_cmp(&bt, &state->nextcall, >=)) { ET_HW_UNLOCK(state); return; } - /* - * If timer is periodic - just update next event time for target CPU. - * If timer is global - there is chance it is already programmed. - */ - if (periodic || (timer->et_flags & ET_FLAGS_PERCPU) == 0) { - tmp = hardperiod; - bintime_mul(&tmp, ticks - 1); - bintime_add(&tmp, &state->nexthard); - if (bintime_cmp(&tmp, &state->nextevent, <)) - state->nextevent = tmp; - if (periodic || - bintime_cmp(&state->nextevent, &nexttick, >=)) { - ET_HW_UNLOCK(state); - return; - } + state->nextcall = bt; + /* If there is some some other event set earlier -- do nothing. */ + if (bintime_cmp(&state->nextcall, &state->nextevent, >=)) { + ET_HW_UNLOCK(state); + return; } - /* - * Otherwise we have to wake that CPU up, as we can't get present - * bintime to reprogram global timer from here. If timer is per-CPU, - * we by definition can't do it from here. - */ + state->nextevent = state->nextcall; + /* If timer is periodic -- there is nothing to reprogram. */ + if (periodic) { + ET_HW_UNLOCK(state); + return; + } + /* If timer is global or of the current CPU -- reprogram it. */ + if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || cpu == curcpu) { + binuptime(&now); + loadtimer(&now, 0); + ET_HW_UNLOCK(state); + return; + } + /* Otherwise make other CPU to reprogram it. */ + state->handle = 1; ET_HW_UNLOCK(state); - if (timer->et_flags & ET_FLAGS_PERCPU) { - state->handle = 1; - ipi_cpu(cpu, IPI_HARDCLOCK); - } else { - if (!cpu_idle_wakeup(cpu)) - ipi_cpu(cpu, IPI_AST); - } + ipi_cpu(cpu, IPI_HARDCLOCK); } -#endif /* * Report or change the active event timers hardware. Index: sys/kern/subr_sleepqueue.c =================================================================== --- sys/kern/subr_sleepqueue.c (.../head) (revision 237923) +++ sys/kern/subr_sleepqueue.c (.../projects/calloutng) (revision 237923) @@ -361,9 +361,10 @@ sleepq_add(void *wchan, struct lock_object *lock, * Sets a timeout that will remove the current thread from the specified * sleep queue after timo ticks if the thread has not already been awakened. */ -void -sleepq_set_timeout(void *wchan, int timo) +void +_sleepq_set_timeout(void *wchan, struct bintime *bt, int timo) { + struct sleepqueue_chain *sc; struct thread *td; @@ -373,7 +374,12 @@ sleepq_add(void *wchan, struct lock_object *lock, MPASS(TD_ON_SLEEPQ(td)); MPASS(td->td_sleepqueue == NULL); MPASS(wchan != NULL); - callout_reset_curcpu(&td->td_slpcallout, timo, sleepq_timeout, td); + if (bt == NULL) + callout_reset_curcpu(&td->td_slpcallout, timo, + sleepq_timeout, td); + else + callout_reset_bt_on(&td->td_slpcallout, bt, + sleepq_timeout, td, PCPU_GET(cpuid), 0); } /* Index: sys/kern/sys_generic.c =================================================================== --- sys/kern/sys_generic.c (.../head) (revision 237923) +++ sys/kern/sys_generic.c (.../projects/calloutng) (revision 237923) @@ -102,7 +102,7 @@ static int dofilewrite(struct thread *, int, struc off_t, int); static void doselwakeup(struct selinfo *, int); static void seltdinit(struct thread *); -static int seltdwait(struct thread *, int); +static int seltdwait(struct thread *, struct bintime *, int); static void seltdclear(struct thread *); /* @@ -902,7 +902,8 @@ kern_select(struct thread *td, int nd, fd_set *fd_ */ fd_mask s_selbits[howmany(2048, NFDBITS)]; fd_mask *ibits[3], *obits[3], *selbits, *sbp; - struct timeval atv, rtv, ttv; + struct bintime abt, rbt; + struct timeval atv; int error, lf, ndu, timo; u_int nbufbytes, ncpbytes, ncpubytes, nfdbits; @@ -996,33 +997,34 @@ kern_select(struct thread *td, int nd, fd_set *fd_ if (tvp != NULL) { atv = *tvp; - if (itimerfix(&atv)) { + if (atv.tv_sec < 0 || atv.tv_usec < 0 || + atv.tv_usec >= 1000000) { error = EINVAL; goto done; } - getmicrouptime(&rtv); - timevaladd(&atv, &rtv); + binuptime(&rbt); + timeval2bintime(&atv, &abt); + bintime_add(&abt, &rbt); } else { - atv.tv_sec = 0; - atv.tv_usec = 0; + abt.sec = 0; + abt.frac = 0; } - timo = 0; seltdinit(td); /* Iterate until the timeout expires or descriptors become ready. */ for (;;) { error = selscan(td, ibits, obits, nd); if (error || td->td_retval[0] != 0) break; - if (atv.tv_sec || atv.tv_usec) { - getmicrouptime(&rtv); - if (timevalcmp(&rtv, &atv, >=)) + if (abt.sec || abt.frac) { + binuptime(&rbt); + if (bintime_cmp(&rbt, &abt, >=)) break; - ttv = atv; - timevalsub(&ttv, &rtv); - timo = ttv.tv_sec > 24 * 60 * 60 ? - 24 * 60 * 60 * hz : tvtohz(&ttv); + error = seltdwait(td, &abt, 0); } - error = seltdwait(td, timo); + else { + timo = 0; + error = seltdwait(td, NULL, timo); + } if (error) break; error = selrescan(td, ibits, obits); @@ -1254,7 +1256,8 @@ sys_poll(td, uap) { struct pollfd *bits; struct pollfd smallbits[32]; - struct timeval atv, rtv, ttv; + struct bintime abt, rbt; + struct timeval atv; int error, timo; u_int nfds; size_t ni; @@ -1273,33 +1276,33 @@ sys_poll(td, uap) if (uap->timeout != INFTIM) { atv.tv_sec = uap->timeout / 1000; atv.tv_usec = (uap->timeout % 1000) * 1000; - if (itimerfix(&atv)) { + if (atv.tv_sec < 0 || atv.tv_usec < 0 || + atv.tv_usec >= 1000000) { error = EINVAL; goto done; } - getmicrouptime(&rtv); - timevaladd(&atv, &rtv); + binuptime(&rbt); + timeval2bintime(&atv, &abt); + bintime_add(&abt, &rbt); } else { - atv.tv_sec = 0; - atv.tv_usec = 0; + abt.sec = 0; + abt.frac = 0; } - timo = 0; seltdinit(td); /* Iterate until the timeout expires or descriptors become ready. */ for (;;) { error = pollscan(td, bits, nfds); if (error || td->td_retval[0] != 0) break; - if (atv.tv_sec || atv.tv_usec) { - getmicrouptime(&rtv); - if (timevalcmp(&rtv, &atv, >=)) + if (abt.sec || abt.frac) { + binuptime(&rbt); + if (bintime_cmp(&rbt, &abt, >=)) break; - ttv = atv; - timevalsub(&ttv, &rtv); - timo = ttv.tv_sec > 24 * 60 * 60 ? - 24 * 60 * 60 * hz : tvtohz(&ttv); + error = seltdwait(td, &abt, 0); + } else { + timo = 0; + error = seltdwait(td, NULL, timo); } - error = seltdwait(td, timo); if (error) break; error = pollrescan(td); @@ -1518,7 +1521,7 @@ selsocket(struct socket *so, int events, struct ti timo = ttv.tv_sec > 24 * 60 * 60 ? 24 * 60 * 60 * hz : tvtohz(&ttv); } - error = seltdwait(td, timo); + error = seltdwait(td, NULL, timo); seltdclear(td); if (error) break; @@ -1697,7 +1700,7 @@ out: } static int -seltdwait(struct thread *td, int timo) +seltdwait(struct thread *td, struct bintime *bt, int timo) { struct seltd *stp; int error; @@ -1716,9 +1719,11 @@ static int mtx_unlock(&stp->st_mtx); return (0); } - if (timo > 0) + if (bt == NULL && timo > 0) error = cv_timedwait_sig(&stp->st_wait, &stp->st_mtx, timo); - else + else if (bt != NULL) + error = cv_timedwait_bt_sig(&stp->st_wait, &stp->st_mtx, *bt); + else error = cv_wait_sig(&stp->st_wait, &stp->st_mtx); mtx_unlock(&stp->st_mtx); Index: sys/kern/kern_condvar.c =================================================================== --- sys/kern/kern_condvar.c (.../head) (revision 237923) +++ sys/kern/kern_condvar.c (.../projects/calloutng) (revision 237923) @@ -342,7 +342,8 @@ _cv_timedwait(struct cv *cvp, struct lock_object * * a signal was caught. */ int -_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo) +_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, + struct bintime *bt, int timo) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; @@ -379,7 +380,10 @@ int sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR | SLEEPQ_INTERRUPTIBLE, 0); - sleepq_set_timeout(cvp, timo); + if (bt == NULL) + sleepq_set_timeout(cvp, timo); + else + sleepq_set_timeout_bt(cvp, *bt); if (lock != &Giant.lock_object) { if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); Index: sys/netinet/tcp_timer.c =================================================================== --- sys/netinet/tcp_timer.c (.../head) (revision 237923) +++ sys/netinet/tcp_timer.c (.../projects/calloutng) (revision 237923) @@ -667,21 +667,39 @@ tcp_timer_active(struct tcpcb *tp, int timer_type) #define ticks_to_msecs(t) (1000*(t) / hz) +static int +delta_bintime_in_msecs(struct bintime bt, struct bintime now) +{ + bintime_sub(&bt, &now); + return (((uint64_t)1000 * (uint64_t)(bt.frac >> 32)) >> 32) + + (bt.sec * 1000); +} + void -tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, struct xtcp_timer *xtimer) +tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, + struct xtcp_timer *xtimer) { - bzero(xtimer, sizeof(struct xtcp_timer)); + struct bintime bt, now; + + bzero(xtimer, sizeof(*xtimer)); if (timer == NULL) return; - if (callout_active(&timer->tt_delack)) - xtimer->tt_delack = ticks_to_msecs(timer->tt_delack.c_time - ticks); - if (callout_active(&timer->tt_rexmt)) - xtimer->tt_rexmt = ticks_to_msecs(timer->tt_rexmt.c_time - ticks); - if (callout_active(&timer->tt_persist)) - xtimer->tt_persist = ticks_to_msecs(timer->tt_persist.c_time - ticks); - if (callout_active(&timer->tt_keep)) - xtimer->tt_keep = ticks_to_msecs(timer->tt_keep.c_time - ticks); - if (callout_active(&timer->tt_2msl)) - xtimer->tt_2msl = ticks_to_msecs(timer->tt_2msl.c_time - ticks); + bintime_clear(&bt); + getbinuptime(&now); + if (callout_active(&timer->tt_delack)) + xtimer->tt_delack = delta_bintime_in_msecs( + timer->tt_delack.c_time, now); + if (callout_active(&timer->tt_rexmt)) + xtimer->tt_rexmt = delta_bintime_in_msecs( + timer->tt_rexmt.c_time, now); + if (callout_active(&timer->tt_persist)) + xtimer->tt_persist = delta_bintime_in_msecs( + timer->tt_persist.c_time, now); + if (callout_active(&timer->tt_keep)) + xtimer->tt_keep = delta_bintime_in_msecs( + timer->tt_keep.c_time, now); + if (callout_active(&timer->tt_2msl)) + xtimer->tt_2msl = delta_bintime_in_msecs( + timer->tt_2msl.c_time, now); xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime); } Index: sys/sys/callout.h =================================================================== --- sys/sys/callout.h (.../head) (revision 237923) +++ sys/sys/callout.h (.../projects/calloutng) (revision 237923) @@ -47,7 +47,33 @@ #define CALLOUT_RETURNUNLOCKED 0x0010 /* handler returns with mtx unlocked */ #define CALLOUT_SHAREDLOCK 0x0020 /* callout lock held in shared mode */ #define CALLOUT_DFRMIGRATION 0x0040 /* callout in deferred migration mode */ +#define CALLOUT_PROCESSED 0x0080 /* callout in wheel or processing list? */ +#define CALLOUT_DIRECT 0x1000 /* allow exec from hw int context */ +#define C_DIRECT_EXEC 0x0001 /* direct execution of callout */ +#define C_P1S 0x0002 /* fields related to precision */ +#define C_P500MS 0x0006 +#define C_P250MS 0x000a +#define C_P125MS 0x000e +#define C_P64MS 0x0012 +#define C_P32MS 0x0016 +#define C_P16MS 0x001a +#define C_P8MS 0x001e +#define C_P4MS 0x0022 +#define C_P2MS 0x0026 +#define C_P1MS 0x002a +#define C_P500US 0x002e +#define C_P250US 0x0032 +#define C_P125US 0x0036 +#define C_P64US 0x003a +#define C_P32US 0x003e +#define C_P16US 0x0042 +#define C_P8US 0x0046 +#define C_P4US 0x004a +#define C_P2US 0x004e +#define PRECISION_BITS 7 +#define PRECISION_RANGE ((1 << PRECISION_BITS) - 1) + struct callout_handle { struct callout *callout; }; @@ -67,7 +93,16 @@ void _callout_init_lock(struct callout *, struct l _callout_init_lock((c), ((rw) != NULL) ? &(rw)->lock_object : \ NULL, (flags)) #define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING) -int callout_reset_on(struct callout *, int, void (*)(void *), void *, int); +int _callout_reset_on(struct callout *, struct bintime *, int, + void (*)(void *), void *, int, int); +#define callout_reset_on(c, to_ticks, fn, arg, cpu) \ + _callout_reset_on((c), (NULL), (to_ticks), (fn), (arg), (cpu), \ + (0)) +#define callout_reset_flags_on(c, to_ticks, fn, arg, cpu, flags) \ + _callout_reset_on((c), (NULL), (to_ticks), (fn), (arg), (cpu), \ + (flags)) +#define callout_reset_bt_on(c, bt, fn, arg, cpu, flags) \ + _callout_reset_on((c), (bt), (0), (fn), (arg), (cpu), (flags)) #define callout_reset(c, on_tick, fn, arg) \ callout_reset_on((c), (on_tick), (fn), (arg), (c)->c_cpu) #define callout_reset_curcpu(c, on_tick, fn, arg) \ @@ -78,9 +113,8 @@ int callout_schedule_on(struct callout *, int, int callout_schedule_on((c), (on_tick), PCPU_GET(cpuid)) #define callout_stop(c) _callout_stop_safe(c, 0) int _callout_stop_safe(struct callout *, int); -void callout_tick(void); -int callout_tickstofirst(int limit); -extern void (*callout_new_inserted)(int cpu, int ticks); +void callout_process(void); +extern void (*callout_new_inserted)(int cpu, struct bintime bt); #endif Index: sys/sys/condvar.h =================================================================== --- sys/sys/condvar.h (.../head) (revision 237923) +++ sys/sys/condvar.h (.../projects/calloutng) (revision 237923) @@ -56,7 +56,8 @@ void _cv_wait(struct cv *cvp, struct lock_object * void _cv_wait_unlock(struct cv *cvp, struct lock_object *lock); int _cv_wait_sig(struct cv *cvp, struct lock_object *lock); int _cv_timedwait(struct cv *cvp, struct lock_object *lock, int timo); -int _cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo); +int _cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, + struct bintime *bt, int timo); void cv_signal(struct cv *cvp); void cv_broadcastpri(struct cv *cvp, int pri); @@ -70,7 +71,9 @@ void cv_broadcastpri(struct cv *cvp, int pri); #define cv_timedwait(cvp, lock, timo) \ _cv_timedwait((cvp), &(lock)->lock_object, (timo)) #define cv_timedwait_sig(cvp, lock, timo) \ - _cv_timedwait_sig((cvp), &(lock)->lock_object, (timo)) + _cv_timedwait_sig((cvp), &(lock)->lock_object, (NULL), (timo)) +#define cv_timedwait_bt_sig(cvp, lock, bt) \ + _cv_timedwait_sig((cvp), &(lock)->lock_object, (&bt), (0)) #define cv_broadcast(cvp) cv_broadcastpri(cvp, 0) Index: sys/sys/_callout.h =================================================================== --- sys/sys/_callout.h (.../head) (revision 237923) +++ sys/sys/_callout.h (.../projects/calloutng) (revision 237923) @@ -39,6 +39,7 @@ #define _SYS__CALLOUT_H #include +#include struct lock_object; @@ -50,7 +51,9 @@ struct callout { SLIST_ENTRY(callout) sle; TAILQ_ENTRY(callout) tqe; } c_links; - int c_time; /* ticks to the event */ + TAILQ_ENTRY(callout) c_staiter; + struct bintime c_time; /* ticks to the event */ + struct bintime c_precision; /* delta allowed wrt opt */ void *c_arg; /* function argument */ void (*c_func)(void *); /* function to call */ struct lock_object *c_lock; /* lock to handle */ Index: sys/sys/sleepqueue.h =================================================================== --- sys/sys/sleepqueue.h (.../head) (revision 237923) +++ sys/sys/sleepqueue.h (.../projects/calloutng) (revision 237923) @@ -108,7 +108,11 @@ struct sleepqueue *sleepq_lookup(void *wchan); void sleepq_release(void *wchan); void sleepq_remove(struct thread *td, void *wchan); int sleepq_signal(void *wchan, int flags, int pri, int queue); -void sleepq_set_timeout(void *wchan, int timo); +void _sleepq_set_timeout(void *wchan, struct bintime *bt, int timo); +#define sleepq_set_timeout(wchan, timo) \ + _sleepq_set_timeout((wchan), (NULL), (timo)) +#define sleepq_set_timeout_bt(wchan, bt) \ + _sleepq_set_timeout((wchan), (&bt), (0)) u_int sleepq_sleepcnt(void *wchan, int queue); int sleepq_timedwait(void *wchan, int pri); int sleepq_timedwait_sig(void *wchan, int pri);