Index: sys/sys/_callout.h =================================================================== --- sys/sys/_callout.h (revision 237202) +++ sys/sys/_callout.h (working copy) @@ -53,6 +53,7 @@ struct callout { } c_links; TAILQ_ENTRY(callout) c_staiter; struct bintime c_time; /* ticks to the event */ + struct bintime c_precision; /* delta allowed wrt opt */ void *c_arg; /* function argument */ void (*c_func)(void *); /* function to call */ struct lock_object *c_lock; /* lock to handle */ Index: sys/sys/callout.h =================================================================== --- sys/sys/callout.h (revision 237275) +++ sys/sys/callout.h (working copy) @@ -50,12 +50,15 @@ #define CALLOUT_PROCESSED 0x0080 /* callout in wheel or processing list? */ #define CALLOUT_DIRECT 0x1000 /* allow exec from hw int context */ +#define C_DIRECT_EXEC 0x0001 /* direct execution of callout */ +#define C_10US 0x0002 /* precision field */ +#define C_100US 0x0004 /* precision field */ +#define C_1MS 0x0008 /* precision field */ + struct callout_handle { struct callout *callout; }; -#define C_DIRECT 0x0001 /* direct execution of callout */ - #ifdef _KERNEL extern int ncallout; Index: sys/kern/kern_timeout.c =================================================================== --- sys/kern/kern_timeout.c (revision 237275) +++ sys/kern/kern_timeout.c (working copy) @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -358,7 +359,7 @@ get_bucket(struct bintime *bt) void callout_tick(void) { - struct bintime limit, next, now; + struct bintime limit, max, min, next, now, tmp_max, tmp_min; struct callout *tmp; struct callout_cpu *cc; struct callout_tailq *sc; @@ -375,25 +376,21 @@ callout_tick(void) cpu = curcpu; first = callout_hash(&cc->cc_softticks); last = callout_hash(&now); - next.sec = -1; - next.frac = -1; - future = ((last + hz/4) & callwheelmask); /* * Check if we wrapped around the entire wheel from the last scan. * In case, we need to scan entirely the wheel for pending callouts. */ - if (last - first >= callwheelsize) { - first &= callwheelmask; - last = (first - 1) & callwheelmask; - } - else { - first &= callwheelmask; - last &= callwheelmask; - } + last = (last - first >= callwheelsize) ? (first - 1) & callwheelmask : + last & callwheelmask; + first &= callwheelmask; for (;;) { sc = &cc->cc_callwheel[first]; TAILQ_FOREACH(tmp, sc, c_links.tqe) { if (bintime_cmp(&tmp->c_time, &now, <=)) { + /* + * Consumer told us the callout may be run + * directly from hardware interrupt context. + */ if (tmp->c_flags & CALLOUT_DIRECT) { tmp->c_func(tmp->c_arg); TAILQ_REMOVE(sc, tmp, c_links.tqe); @@ -410,31 +407,69 @@ callout_tick(void) } if (first == last) break; - first = ((first + 1) & callwheelmask); + first = (first + 1) & callwheelmask; } + future = ((last + hz/4) & callwheelmask); + max.sec = max.frac = INT_MAX; + min.sec = min.frac = INT_MAX; limit.sec = 0; limit.frac = (uint64_t)1 << (64 - 2); bintime_add(&limit, &now); - for (;;) { + /* + * Look for the first bucket in the future that contains some event, + * up to some point, so that we can look for aggregation. + */ + for (;;) { sc = &cc->cc_callwheel[last]; TAILQ_FOREACH(tmp, sc, c_links.tqe) { - if (bintime_cmp(&tmp->c_time, &limit, <=)) { - if (next.sec == -1 || - bintime_cmp(&tmp->c_time, &next, <)) { - next = tmp->c_time; - cpu = tmp->c_cpu; - } + tmp_max = tmp_min = tmp->c_time; + bintime_add(&tmp_max, &tmp->c_precision); + bintime_sub(&tmp_min, &tmp->c_precision); + /* + * This is the fist event we're going to process or + * event maximal time is less than present minimal. + * In both cases, take it. + */ + if (bintime_cmp(&tmp_max, &min, <)) { + max = tmp_max; + min = tmp_min; + continue; } - } - if ((last == future) || (next.sec != -1)) + /* + * Event minimal time is bigger than present maximal + * time, so it cannot be aggregated. + */ + if (bintime_cmp(&tmp_min, &max, >)) + continue; + /* + * If neither of the two previous happened, just take + * the intersection of events. + */ + min = (bintime_cmp(&tmp_min, &min, >)) ? tmp_min : min; + max = (bintime_cmp(&tmp_max, &max, >)) ? tmp_max : max; + } + if (last == future || + (max.sec != INT_MAX && min.sec != INT_MAX)) break; - last = ((last + 1) & callwheelmask); - } - if (next.sec == -1) { + last = (last + 1) & callwheelmask; + } + if (max.sec == INT_MAX && min.sec == INT_MAX) { next.sec = 0; next.frac = (uint64_t)1 << (64 - 2); bintime_add(&next, &now); } + /* + * Now that we found something to aggregate, schedule an interrupt in + * the middle of the previously calculated range. + */ + else { + bintime_add(&max, &min); + next = max; + next.frac >>= 1; + if (next.sec & 1) + next.frac |= ((uint64_t)1 << 63); + next.sec >>= 1; + } cc->cc_firsttick = next; if (callout_new_inserted != NULL) (*callout_new_inserted)(cpu, next); @@ -478,6 +513,7 @@ callout_cc_add(struct callout *c, struct callout_c struct bintime to_bintime, void (*func)(void *), void *arg, int cpu, int flags) { + struct timeval tv; int bucket; CC_LOCK_ASSERT(cc); @@ -486,11 +522,28 @@ callout_cc_add(struct callout *c, struct callout_c } c->c_arg = arg; c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); - if (flags & C_DIRECT) + if (flags & C_DIRECT_EXEC) c->c_flags |= CALLOUT_DIRECT; c->c_flags &= ~CALLOUT_PROCESSED; c->c_func = func; c->c_time = to_bintime; + tv.tv_sec = 0; + if (flags & C_10US) { + tv.tv_usec = 10; + timeval2bintime(&tv, &c->c_precision); + } + else if (flags & C_100US) { + tv.tv_usec = 100; + timeval2bintime(&tv, &c->c_precision); + } + else if (flags & C_1MS) { + tv.tv_usec = 1000; + timeval2bintime(&tv, &c->c_precision); + } + else { + c->c_precision.sec = 0; + c->c_precision.frac = 0; + } bucket = get_bucket(&c->c_time); TAILQ_INSERT_TAIL(&cc->cc_callwheel[bucket & callwheelmask], c, c_links.tqe);