Index: usr.bin/procstat =================================================================== --- usr.bin/procstat (.../head) (revision 239166) +++ usr.bin/procstat (.../projects/calloutng) (revision 239166) Property changes on: usr.bin/procstat ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/usr.bin/procstat:r236314-239017 Index: usr.bin/calendar =================================================================== --- usr.bin/calendar (.../head) (revision 239166) +++ usr.bin/calendar (.../projects/calloutng) (revision 239166) Property changes on: usr.bin/calendar ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/usr.bin/calendar:r236314-239017 Index: crypto/openssh =================================================================== --- crypto/openssh (.../head) (revision 239166) +++ crypto/openssh (.../projects/calloutng) (revision 239166) Property changes on: crypto/openssh ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/crypto/openssh:r236314-237808 Index: crypto/openssl =================================================================== --- crypto/openssl (.../head) (revision 239166) +++ crypto/openssl (.../projects/calloutng) (revision 239166) Property changes on: crypto/openssl ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/crypto/openssl:r236314-238495 Index: gnu/usr.bin/cc/cc_tools =================================================================== --- gnu/usr.bin/cc/cc_tools (.../head) (revision 239166) +++ gnu/usr.bin/cc/cc_tools (.../projects/calloutng) (revision 239166) Property changes on: gnu/usr.bin/cc/cc_tools ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/gnu/usr.bin/cc/cc_tools:r236314-238495 Index: gnu/lib =================================================================== --- gnu/lib (.../head) (revision 239166) +++ gnu/lib (.../projects/calloutng) (revision 239166) Property changes on: gnu/lib ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/gnu/lib:r236314-237202 Index: sbin/ipfw =================================================================== --- sbin/ipfw (.../head) (revision 239166) +++ sbin/ipfw (.../projects/calloutng) (revision 239166) Property changes on: sbin/ipfw ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sbin/ipfw:r236314-239017 Index: sbin =================================================================== --- sbin (.../head) (revision 239166) +++ sbin (.../projects/calloutng) (revision 239166) Property changes on: sbin ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sbin:r236314-239165 Index: contrib/gcc =================================================================== --- contrib/gcc (.../head) (revision 239166) +++ contrib/gcc (.../projects/calloutng) (revision 239166) Property changes on: contrib/gcc ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/gcc:r236314-238495 Index: contrib/compiler-rt =================================================================== --- contrib/compiler-rt (.../head) (revision 239166) +++ contrib/compiler-rt (.../projects/calloutng) (revision 239166) Property changes on: contrib/compiler-rt ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/compiler-rt:r236314-239165 Index: contrib/libc++ =================================================================== --- contrib/libc++ (.../head) (revision 239166) +++ contrib/libc++ (.../projects/calloutng) (revision 239166) Property changes on: contrib/libc++ ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/libc++:r236314-237202 Index: contrib/groff =================================================================== --- contrib/groff (.../head) (revision 239166) +++ contrib/groff (.../projects/calloutng) (revision 239166) Property changes on: contrib/groff ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/groff:r236314-239017 Index: contrib/less =================================================================== --- contrib/less (.../head) (revision 239166) +++ contrib/less (.../projects/calloutng) (revision 239166) Property changes on: contrib/less ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/less:r236314-239017 Index: contrib/libarchive/libarchive_fe =================================================================== --- contrib/libarchive/libarchive_fe (.../head) (revision 239166) +++ contrib/libarchive/libarchive_fe (.../projects/calloutng) (revision 239166) Property changes on: contrib/libarchive/libarchive_fe ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/libarchive/libarchive_fe:r236314-239017 Index: contrib/libarchive/tar =================================================================== --- contrib/libarchive/tar (.../head) (revision 239166) +++ contrib/libarchive/tar (.../projects/calloutng) (revision 239166) Property changes on: contrib/libarchive/tar ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/libarchive/tar:r236314-239017 Index: contrib/libarchive/libarchive =================================================================== --- contrib/libarchive/libarchive (.../head) (revision 239166) +++ contrib/libarchive/libarchive (.../projects/calloutng) (revision 239166) Property changes on: contrib/libarchive/libarchive ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/libarchive/libarchive:r236314-239017 Index: contrib/libarchive/cpio =================================================================== --- contrib/libarchive/cpio (.../head) (revision 239166) +++ contrib/libarchive/cpio (.../projects/calloutng) (revision 239166) Property changes on: contrib/libarchive/cpio ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/libarchive/cpio:r236314-239017 Index: contrib/libarchive =================================================================== --- contrib/libarchive (.../head) (revision 239166) +++ contrib/libarchive (.../projects/calloutng) (revision 239166) Property changes on: contrib/libarchive ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/libarchive:r236314-239017 Index: contrib/libstdc++ =================================================================== --- contrib/libstdc++ (.../head) (revision 239166) +++ contrib/libstdc++ (.../projects/calloutng) (revision 239166) Property changes on: contrib/libstdc++ ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/libstdc++:r236314-237202 Index: contrib/binutils =================================================================== --- contrib/binutils (.../head) (revision 239166) +++ contrib/binutils (.../projects/calloutng) (revision 239166) Property changes on: contrib/binutils ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/binutils:r236314-238495 Index: contrib/top =================================================================== --- contrib/top (.../head) (revision 239166) +++ contrib/top (.../projects/calloutng) (revision 239166) Property changes on: contrib/top ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/top:r236314-237808 Index: contrib/bind9 =================================================================== --- contrib/bind9 (.../head) (revision 239166) +++ contrib/bind9 (.../projects/calloutng) (revision 239166) Property changes on: contrib/bind9 ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/bind9:r236314-239017 Index: contrib/dtc =================================================================== --- contrib/dtc (.../head) (revision 239166) +++ contrib/dtc (.../projects/calloutng) (revision 239166) Property changes on: contrib/dtc ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/dtc:r236314-239017 Index: contrib/llvm/tools/clang =================================================================== --- contrib/llvm/tools/clang (.../head) (revision 239166) +++ contrib/llvm/tools/clang (.../projects/calloutng) (revision 239166) Property changes on: contrib/llvm/tools/clang ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/llvm/tools/clang:r236314-239017 Index: contrib/llvm =================================================================== --- contrib/llvm (.../head) (revision 239166) +++ contrib/llvm (.../projects/calloutng) (revision 239166) Property changes on: contrib/llvm ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/contrib/llvm:r236314-239017 Index: share/man/man4 =================================================================== --- share/man/man4 (.../head) (revision 239166) +++ share/man/man4 (.../projects/calloutng) (revision 239166) Property changes on: share/man/man4 ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/share/man/man4:r236314-239165 Index: usr.sbin/ndiscvt =================================================================== --- usr.sbin/ndiscvt (.../head) (revision 239166) +++ usr.sbin/ndiscvt (.../projects/calloutng) (revision 239166) Property changes on: usr.sbin/ndiscvt ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/usr.sbin/ndiscvt:r236314-237202 Index: usr.sbin/jail =================================================================== --- usr.sbin/jail (.../head) (revision 239166) +++ usr.sbin/jail (.../projects/calloutng) (revision 239166) Property changes on: usr.sbin/jail ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/usr.sbin/jail:r236314-237808 Index: cddl/contrib/opensolaris/cmd/zfs =================================================================== --- cddl/contrib/opensolaris/cmd/zfs (.../head) (revision 239166) +++ cddl/contrib/opensolaris/cmd/zfs (.../projects/calloutng) (revision 239166) Property changes on: cddl/contrib/opensolaris/cmd/zfs ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/cddl/contrib/opensolaris/cmd/zfs:r236314-238495 Index: cddl/contrib/opensolaris/lib/libzfs =================================================================== --- cddl/contrib/opensolaris/lib/libzfs (.../head) (revision 239166) +++ cddl/contrib/opensolaris/lib/libzfs (.../projects/calloutng) (revision 239166) Property changes on: cddl/contrib/opensolaris/lib/libzfs ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/cddl/contrib/opensolaris/lib/libzfs:r236314-239017 Index: cddl/contrib/opensolaris =================================================================== --- cddl/contrib/opensolaris (.../head) (revision 239166) +++ cddl/contrib/opensolaris (.../projects/calloutng) (revision 239166) Property changes on: cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/cddl/contrib/opensolaris:r236314-239017 Index: lib/libutil =================================================================== --- lib/libutil (.../head) (revision 239166) +++ lib/libutil (.../projects/calloutng) (revision 239166) Property changes on: lib/libutil ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/lib/libutil:r236314-237808 Index: lib/libc/stdtime =================================================================== --- lib/libc/stdtime (.../head) (revision 239166) +++ lib/libc/stdtime (.../projects/calloutng) (revision 239166) Property changes on: lib/libc/stdtime ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/lib/libc/stdtime:r236314-237808 Index: lib/libc =================================================================== --- lib/libc (.../head) (revision 239166) +++ lib/libc (.../projects/calloutng) (revision 239166) Property changes on: lib/libc ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/lib/libc:r236314-239165 Index: lib/libz =================================================================== --- lib/libz (.../head) (revision 239166) +++ lib/libz (.../projects/calloutng) (revision 239166) Property changes on: lib/libz ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/lib/libz:r236314-237808 Index: lib/libprocstat/zfs.c =================================================================== --- lib/libprocstat/zfs.c (.../head) (revision 239166) +++ lib/libprocstat/zfs.c (.../projects/calloutng) (revision 239166) @@ -35,6 +35,7 @@ #undef lbolt #undef lbolt64 +#undef gethrestime #undef gethrestime_sec #include #include Index: sys/conf/NOTES =================================================================== --- sys/conf/NOTES (.../head) (revision 239166) +++ sys/conf/NOTES (.../projects/calloutng) (revision 239166) @@ -259,6 +259,8 @@ options SX_NOINLINE # SMP Debugging Options: # +# CALLOUT_PROFILING enables rudimentary profiling of the callwheel data +# structure used as backend in callout(9). # PREEMPTION allows the threads that are in the kernel to be preempted by # higher priority [interrupt] threads. It helps with interactivity # and allows interrupt threads to run sooner rather than waiting. @@ -297,6 +299,9 @@ options LOCK_PROFILING options MPROF_BUFFERS="1536" options MPROF_HASH_SIZE="1543" +# Profiling for the callout(9) backend. +options CALLOUT_PROFILING + # Profiling for internal hash tables. options SLEEPQUEUE_PROFILING options TURNSTILE_PROFILING Index: sys/conf/options =================================================================== --- sys/conf/options (.../head) (revision 239166) +++ sys/conf/options (.../projects/calloutng) (revision 239166) @@ -66,6 +66,7 @@ SYSCTL_DEBUG opt_sysctl.h ADAPTIVE_LOCKMGRS ALQ AUDIT opt_global.h +CALLOUT_PROFILING CAPABILITIES opt_capsicum.h CAPABILITY_MODE opt_capsicum.h CODA_COMPAT_5 opt_coda.h Index: sys/conf =================================================================== --- sys/conf (.../head) (revision 239166) +++ sys/conf (.../projects/calloutng) (revision 239166) Property changes on: sys/conf ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/conf:r236314-239165 Index: sys/kern/kern_timeout.c =================================================================== --- sys/kern/kern_timeout.c (.../head) (revision 239166) +++ sys/kern/kern_timeout.c (.../projects/calloutng) (revision 239166) @@ -37,6 +37,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_callout_profiling.h" #include "opt_kdtrace.h" #include @@ -47,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -68,6 +70,7 @@ SDT_PROBE_DEFINE(callout_execute, kernel, , callou SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_end, 0, "struct callout *"); +#ifdef CALLOUT_PROFILING static int avg_depth; SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, "Average number of items examined per softclock call. Units = 1/1000"); @@ -80,65 +83,82 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTL static int avg_mpcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, "Average number of MP callouts made per softclock call. Units = 1/1000"); +static int avg_depth_dir; +SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, + "Average number of direct callouts examined per callout_process call. " + "Units = 1/1000"); +static int avg_lockcalls_dir; +SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, + &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " + "callout_process call. Units = 1/1000"); +static int avg_mpcalls_dir; +SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, + 0, "Average number of MP direct callouts made per callout_process call. " + "Units = 1/1000"); +#endif /* * TODO: * allocate more timeout table slots when table overflows. */ -int callwheelsize, callwheelbits, callwheelmask; +int callwheelsize, callwheelmask; /* - * The callout cpu migration entity represents informations necessary for - * describing the migrating callout to the new callout cpu. + * The callout cpu exec entities represent informations necessary for + * describing the state of callouts currently running on the CPU and the ones + * necessary for migrating callouts to the new callout cpu. In particular, + * the first entry of the array cc_exec_entity holds informations for callout + * running in SWI thread context, while the second one holds informations + * for callout running directly from hardware interrupt context. * The cached informations are very important for deferring migration when * the migrating callout is already running. */ -struct cc_mig_ent { +struct cc_exec { + struct callout *cc_next; + struct callout *cc_curr; #ifdef SMP - void (*ce_migration_func)(void *); - void *ce_migration_arg; - int ce_migration_cpu; - int ce_migration_ticks; + void (*ce_migration_func)(void *); + void *ce_migration_arg; + int ce_migration_cpu; + struct bintime ce_migration_time; #endif + int cc_cancel; + int cc_waiting; }; /* - * There is one struct callout_cpu per cpu, holding all relevant + * There is one struct callou_cpu per cpu, holding all relevant * state for the callout processing thread on the individual CPU. - * In particular: - * cc_ticks is incremented once per tick in callout_cpu(). - * It tracks the global 'ticks' but in a way that the individual - * threads should not worry about races in the order in which - * hardclock() and hardclock_cpu() run on the various CPUs. - * cc_softclock is advanced in callout_cpu() to point to the - * first entry in cc_callwheel that may need handling. In turn, - * a softclock() is scheduled so it can serve the various entries i - * such that cc_softclock <= i <= cc_ticks . - * XXX maybe cc_softclock and cc_ticks should be volatile ? - * - * cc_ticks is also used in callout_reset_cpu() to determine - * when the callout should be served. */ struct callout_cpu { - struct cc_mig_ent cc_migrating_entity; + struct cc_exec cc_exec_entity[2]; struct mtx cc_lock; struct callout *cc_callout; struct callout_tailq *cc_callwheel; + struct callout_tailq cc_expireq; struct callout_list cc_callfree; - struct callout *cc_next; - struct callout *cc_curr; + struct bintime cc_firstevent; + struct bintime cc_lastscan; void *cc_cookie; - int cc_ticks; - int cc_softticks; - int cc_cancel; - int cc_waiting; - int cc_firsttick; }; +#define cc_exec_curr cc_exec_entity[0].cc_curr +#define cc_exec_next cc_exec_entity[0].cc_next +#define cc_exec_cancel cc_exec_entity[0].cc_cancel +#define cc_exec_waiting cc_exec_entity[0].cc_waiting +#define cc_exec_curr_dir cc_exec_entity[1].cc_curr +#define cc_exec_next_dir cc_exec_entity[1].cc_next +#define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel +#define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting + #ifdef SMP -#define cc_migration_func cc_migrating_entity.ce_migration_func -#define cc_migration_arg cc_migrating_entity.ce_migration_arg -#define cc_migration_cpu cc_migrating_entity.ce_migration_cpu -#define cc_migration_ticks cc_migrating_entity.ce_migration_ticks +#define cc_migration_func cc_exec_entity[0].ce_migration_func +#define cc_migration_arg cc_exec_entity[0].ce_migration_arg +#define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu +#define cc_migration_time cc_exec_entity[0].ce_migration_time +#define cc_migration_func_dir cc_exec_entity[1].ce_migration_func +#define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg +#define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu +#define cc_migration_time_dir cc_exec_entity[1].ce_migration_time struct callout_cpu cc_cpu[MAXCPU]; #define CPUBLOCK MAXCPU @@ -152,41 +172,58 @@ struct callout_cpu cc_cpu; #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) #define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) +#define C_PRECISION 0x2 +#define FREQ2BT(freq, bt) \ +{ \ + (bt)->sec = 0; \ + (bt)->frac = ((uint64_t)0x8000000000000000 / (freq)) << 1; \ +} + +#define TIME_T_MAX \ + (sizeof(time_t) == (sizeof(int64_t)) ? INT64_MAX : INT32_MAX) + static int timeout_cpu; -void (*callout_new_inserted)(int cpu, int ticks) = NULL; +void (*callout_new_inserted)(int cpu, struct bintime bt) = NULL; +static struct callout * +softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls, + int *lockcalls, int *gcalls, int direct); static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); /** * Locked by cc_lock: - * cc_curr - If a callout is in progress, it is curr_callout. - * If curr_callout is non-NULL, threads waiting in + * cc_curr - If a callout is in progress, it is cc_curr. + * If cc_curr is non-NULL, threads waiting in * callout_drain() will be woken up as soon as the * relevant callout completes. - * cc_cancel - Changing to 1 with both callout_lock and c_lock held + * cc_cancel - Changing to 1 with both callout_lock and cc_lock held * guarantees that the current callout will not run. * The softclock() function sets this to 0 before it * drops callout_lock to acquire c_lock, and it calls * the handler only if curr_cancelled is still 0 after - * c_lock is successfully acquired. + * cc_lock is successfully acquired. * cc_waiting - If a thread is waiting in callout_drain(), then * callout_wait is nonzero. Set only when - * curr_callout is non-NULL. + * cc_curr is non-NULL. */ /* * Resets the migration entity tied to a specific callout cpu. */ static void -cc_cme_cleanup(struct callout_cpu *cc) +cc_cme_cleanup(struct callout_cpu *cc, int direct) { - + + cc->cc_exec_entity[direct].cc_curr = NULL; + cc->cc_exec_entity[direct].cc_next = NULL; + cc->cc_exec_entity[direct].cc_cancel = 0; + cc->cc_exec_entity[direct].cc_waiting = 0; #ifdef SMP - cc->cc_migration_cpu = CPUBLOCK; - cc->cc_migration_ticks = 0; - cc->cc_migration_func = NULL; - cc->cc_migration_arg = NULL; + cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK; + bintime_clear(&cc->cc_exec_entity[direct].ce_migration_time); + cc->cc_exec_entity[direct].ce_migration_func = NULL; + cc->cc_exec_entity[direct].ce_migration_arg = NULL; #endif } @@ -194,11 +231,12 @@ static void * Checks if migration is requested by a specific callout cpu. */ static int -cc_cme_migrating(struct callout_cpu *cc) +cc_cme_migrating(struct callout_cpu *cc, int direct) { #ifdef SMP - return (cc->cc_migration_cpu != CPUBLOCK); + + return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK); #else return (0); #endif @@ -220,10 +258,9 @@ kern_timeout_callwheel_alloc(caddr_t v) /* * Calculate callout wheel size */ - for (callwheelsize = 1, callwheelbits = 0; - callwheelsize < ncallout; - callwheelsize <<= 1, ++callwheelbits) - ; + callwheelsize = 1; + while (callwheelsize < ncallout) + callwheelsize <<= 1; callwheelmask = callwheelsize - 1; cc->cc_callout = (struct callout *)v; @@ -244,7 +281,9 @@ callout_cpu_init(struct callout_cpu *cc) for (i = 0; i < callwheelsize; i++) { TAILQ_INIT(&cc->cc_callwheel[i]); } - cc_cme_cleanup(cc); + TAILQ_INIT(&cc->cc_expireq); + for (i = 0; i < 2; i++) + cc_cme_cleanup(cc, i); if (cc->cc_callout == NULL) return; for (i = 0; i < ncallout; i++) { @@ -332,62 +371,161 @@ start_softclock(void *dummy) SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); +static inline int +callout_hash(struct bintime *bt) +{ + + return (int) ((bt->sec<<10)+(bt->frac>>54)); +} + +static inline int +get_bucket(struct bintime *bt) +{ + + return callout_hash(bt) & callwheelmask; +} + void -callout_tick(void) +callout_process(struct bintime *now) { + struct bintime max, min, next, tmp_max, tmp_min; + struct callout *tmp; struct callout_cpu *cc; - int need_softclock; - int bucket; + struct callout_tailq *sc; + int cpu, depth_dir, first, future, mpcalls_dir, last, lockcalls_dir, + need_softclock; /* * Process callouts at a very low cpu priority, so we don't keep the * relatively high clock interrupt priority any longer than necessary. */ need_softclock = 0; + depth_dir = 0; + mpcalls_dir = 0; + lockcalls_dir = 0; cc = CC_SELF(); mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); - cc->cc_firsttick = cc->cc_ticks = ticks; - for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) { - bucket = cc->cc_softticks & callwheelmask; - if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) { - need_softclock = 1; + cpu = curcpu; + first = callout_hash(&cc->cc_lastscan); + last = callout_hash(now); + /* + * Check if we wrapped around the entire wheel from the last scan. + * In case, we need to scan entirely the wheel for pending callouts. + */ + last = (last - first >= callwheelsize) ? (first - 1) & callwheelmask : + last & callwheelmask; + first &= callwheelmask; + for (;;) { + sc = &cc->cc_callwheel[first]; + tmp = TAILQ_FIRST(sc); + while (tmp != NULL) { + next = tmp->c_time; + bintime_sub(&next, &tmp->c_precision); + if (bintime_cmp(&next, now, <=)) { + /* + * Consumer told us the callout may be run + * directly from hardware interrupt context. + */ + if (tmp->c_flags & CALLOUT_DIRECT) { + ++depth_dir; + TAILQ_REMOVE(sc, tmp, c_links.tqe); + tmp = softclock_call_cc(tmp, cc, + &mpcalls_dir, &lockcalls_dir, + NULL, 1); + } else { + TAILQ_INSERT_TAIL(&cc->cc_expireq, + tmp, c_staiter); + TAILQ_REMOVE(sc, tmp, c_links.tqe); + tmp->c_flags |= CALLOUT_PROCESSED; + need_softclock = 1; + tmp = TAILQ_NEXT(tmp, c_links.tqe); + } + } + else + tmp = TAILQ_NEXT(tmp, c_links.tqe); + } + if (first == last) break; - } + first = (first + 1) & callwheelmask; } + cc->cc_exec_next_dir = NULL; + future = (last + hz / 4) & callwheelmask; + max.sec = min.sec = TIME_T_MAX; + max.frac = min.frac = UINT64_MAX; + /* + * Look for the first bucket in the future that contains some event, + * up to some point, so that we can look for aggregation. + */ + for (;;) { + sc = &cc->cc_callwheel[last]; + TAILQ_FOREACH(tmp, sc, c_links.tqe) { + tmp_max = tmp_min = tmp->c_time; + if (bintime_isset(&tmp->c_precision)) { + bintime_add(&tmp_max, &tmp->c_precision); + bintime_sub(&tmp_min, &tmp->c_precision); + } + /* + * This is the fist event we're going to process or + * event maximal time is less than present minimal. + * In both cases, take it. + */ + if (bintime_cmp(&tmp_max, &min, <)) { + max = tmp_max; + min = tmp_min; + continue; + } + /* + * Event minimal time is bigger than present maximal + * time, so it cannot be aggregated. + */ + if (bintime_cmp(&tmp_min, &max, >)) + continue; + /* + * If neither of the two previous happened, just take + * the intersection of events. + */ + min = (bintime_cmp(&tmp_min, &min, >)) ? tmp_min : min; + max = (bintime_cmp(&tmp_max, &max, >)) ? tmp_max : max; + } + if (last == future || max.sec != TIME_T_MAX) + break; + last = (last + 1) & callwheelmask; + } + if (max.sec == TIME_T_MAX) { + next = *now; + bintime_addx(&next, (uint64_t)1 << (64 - 2)); + } else { + /* + * Now that we found something to aggregate, schedule an + * interrupt in the middle of the previously calculated range. + */ + if (bintime_cmp(&max, &min, !=)) { + bintime_add(&max, &min); + next = max; + next.frac >>= 1; + if (next.sec & 1) + next.frac |= ((uint64_t)1 << 63); + next.sec >>= 1; + } else + next = max; + } + if (callout_new_inserted != NULL) + (*callout_new_inserted)(cpu, next); + cc->cc_firstevent = next; + cc->cc_lastscan = *now; +#ifdef CALLOUT_PROFILING + avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; + avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; + avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; +#endif mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); /* * swi_sched acquires the thread lock, so we don't want to call it * with cc_lock held; incorrect locking order. */ - if (need_softclock) + if (need_softclock) { swi_sched(cc->cc_cookie, 0); -} - -int -callout_tickstofirst(int limit) -{ - struct callout_cpu *cc; - struct callout *c; - struct callout_tailq *sc; - int curticks; - int skip = 1; - - cc = CC_SELF(); - mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); - curticks = cc->cc_ticks; - while( skip < ncallout && skip < limit ) { - sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ]; - /* search scanning ticks */ - TAILQ_FOREACH( c, sc, c_links.tqe ){ - if (c->c_time - curticks <= ncallout) - goto out; - } - skip++; } -out: - cc->cc_firsttick = curticks + skip; - mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); - return (skip); } static struct callout_cpu * @@ -415,34 +553,78 @@ callout_lock(struct callout *c) } static void -callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks, - void (*func)(void *), void *arg, int cpu) +callout_cc_add(struct callout *c, struct callout_cpu *cc, + struct bintime to_bintime, void (*func)(void *), void *arg, int cpu, + int flags) { - + struct bintime bt; + uint64_t r_val; + int bucket, r_shift; + CC_LOCK_ASSERT(cc); - - if (to_ticks <= 0) - to_ticks = 1; + if (bintime_cmp(&to_bintime, &cc->cc_lastscan, <)) + to_bintime = cc->cc_lastscan; c->c_arg = arg; c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); + if (flags & C_DIRECT_EXEC) + c->c_flags |= CALLOUT_DIRECT; + c->c_flags &= ~CALLOUT_PROCESSED; c->c_func = func; - c->c_time = ticks + to_ticks; - TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], - c, c_links.tqe); - if ((c->c_time - cc->cc_firsttick) < 0 && - callout_new_inserted != NULL) { - cc->cc_firsttick = c->c_time; - (*callout_new_inserted)(cpu, - to_ticks + (ticks - cc->cc_ticks)); + c->c_time = to_bintime; + bintime_clear(&c->c_precision); + if (flags & C_PRECISION) { + r_shift = ((flags >> 2) & PRECISION_RANGE); + r_val = (r_shift != 0) ? (uint64_t)1 << (64 - r_shift) : 0; + /* + * Round as far as precision specified is coarse (up to 8ms). + * In order to play safe, round to to half of the interval and + * set half precision. + */ + if (r_shift < 6) { + r_val = (r_shift != 0) ? r_val >> 2 : + ((uint64_t)1 << (64 - 1)) - 1; + /* + * Round only if c_time is not a multiple of the + * rounding factor. + */ + if ((c->c_time.frac & r_val) != r_val) { + c->c_time.frac |= r_val - 1; + c->c_time.frac += 1; + if (c->c_time.frac == 0) + c->c_time.sec += 1; + } + } + c->c_precision.frac = r_val; + CTR6(KTR_CALLOUT, "rounding %d.%08x%08x to %d.%08x%08x", + to_bintime.sec, (u_int) (to_bintime.frac >> 32), + (u_int) (to_bintime.frac & 0xffffffff), c->c_time.sec, + (u_int) (c->c_time.frac >> 32), + (u_int) (c->c_time.frac & 0xffffffff)); + } + bucket = get_bucket(&c->c_time); + TAILQ_INSERT_TAIL(&cc->cc_callwheel[bucket], c, c_links.tqe); + /* + * Inform the eventtimers(4) subsystem there's a new callout + * that has been inserted, but only if really required. + */ + bt = c->c_time; + bintime_add(&bt, &c->c_precision); + if (callout_new_inserted != NULL && + (bintime_cmp(&bt, &cc->cc_firstevent, <) || + !bintime_isset(&cc->cc_firstevent))) { + cc->cc_firstevent = c->c_time; + (*callout_new_inserted)(cpu, c->c_time); } } static void -callout_cc_del(struct callout *c, struct callout_cpu *cc) +callout_cc_del(struct callout *c, struct callout_cpu *cc, int direct) { - - if (cc->cc_next == c) - cc->cc_next = TAILQ_NEXT(c, c_links.tqe); + + if (cc->cc_exec_next_dir == c) + cc->cc_exec_next_dir = TAILQ_NEXT(c, c_links.tqe); + else if (cc->cc_exec_next == c) + cc->cc_exec_next = TAILQ_NEXT(c, c_staiter); if (c->c_flags & CALLOUT_LOCAL_ALLOC) { c->c_func = NULL; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); @@ -451,18 +633,19 @@ static void static struct callout * softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls, - int *lockcalls, int *gcalls) + int *lockcalls, int *gcalls, int direct) { void (*c_func)(void *); void *c_arg; struct lock_class *class; struct lock_object *c_lock; - int c_flags, sharedlock; + int c_flags, flags, sharedlock; #ifdef SMP struct callout_cpu *new_cc; void (*new_func)(void *); void *new_arg; - int new_cpu, new_ticks; + int new_cpu; + struct bintime new_time; #endif #ifdef DIAGNOSTIC struct bintime bt1, bt2; @@ -471,7 +654,10 @@ softclock_call_cc(struct callout *c, struct callou static timeout_t *lastfunc; #endif - cc->cc_next = TAILQ_NEXT(c, c_links.tqe); + if (direct) + cc->cc_exec_next_dir = TAILQ_NEXT(c, c_links.tqe); + else + cc->cc_exec_next = TAILQ_NEXT(c, c_staiter); class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1; c_lock = c->c_lock; @@ -482,8 +668,8 @@ softclock_call_cc(struct callout *c, struct callou c->c_flags = CALLOUT_LOCAL_ALLOC; else c->c_flags &= ~CALLOUT_PENDING; - cc->cc_curr = c; - cc->cc_cancel = 0; + cc->cc_exec_entity[direct].cc_curr = c; + cc->cc_exec_entity[direct].cc_cancel = 0; CC_UNLOCK(cc); if (c_lock != NULL) { class->lc_lock(c_lock, sharedlock); @@ -491,14 +677,18 @@ softclock_call_cc(struct callout *c, struct callou * The callout may have been cancelled * while we switched locks. */ - if (cc->cc_cancel) { + if (cc->cc_exec_entity[direct].cc_cancel) { class->lc_unlock(c_lock); goto skip; } /* The callout cannot be stopped now. */ - cc->cc_cancel = 1; - - if (c_lock == &Giant.lock_object) { + cc->cc_exec_entity[direct].cc_cancel = 1; + /* + * In case we're processing a direct callout we + * can't hold giant because holding a sleep mutex + * from hardware interrupt context is not allowed. + */ + if ((c_lock == &Giant.lock_object) && gcalls != NULL) { (*gcalls)++; CTR3(KTR_CALLOUT, "callout %p func %p arg %p", c, c_func, c_arg); @@ -515,11 +705,13 @@ softclock_call_cc(struct callout *c, struct callou #ifdef DIAGNOSTIC binuptime(&bt1); #endif - THREAD_NO_SLEEPING(); + if (!direct) + THREAD_NO_SLEEPING(); SDT_PROBE(callout_execute, kernel, , callout_start, c, 0, 0, 0, 0); c_func(c_arg); SDT_PROBE(callout_execute, kernel, , callout_end, c, 0, 0, 0, 0); - THREAD_SLEEPING_OK(); + if (!direct) + THREAD_SLEEPING_OK(); #ifdef DIAGNOSTIC binuptime(&bt2); bintime_sub(&bt2, &bt1); @@ -553,31 +745,31 @@ skip: c->c_func = NULL; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } - cc->cc_curr = NULL; - if (cc->cc_waiting) { + cc->cc_exec_entity[direct].cc_curr = NULL; + if (cc->cc_exec_entity[direct].cc_waiting) { /* * There is someone waiting for the * callout to complete. * If the callout was scheduled for * migration just cancel it. */ - if (cc_cme_migrating(cc)) - cc_cme_cleanup(cc); - cc->cc_waiting = 0; + if (cc_cme_migrating(cc, direct)) + cc_cme_cleanup(cc, direct); + cc->cc_exec_entity[direct].cc_waiting = 0; CC_UNLOCK(cc); - wakeup(&cc->cc_waiting); + wakeup(&cc->cc_exec_entity[direct].cc_waiting); CC_LOCK(cc); - } else if (cc_cme_migrating(cc)) { + } else if (cc_cme_migrating(cc, direct)) { #ifdef SMP /* * If the callout was scheduled for * migration just perform it now. */ - new_cpu = cc->cc_migration_cpu; - new_ticks = cc->cc_migration_ticks; - new_func = cc->cc_migration_func; - new_arg = cc->cc_migration_arg; - cc_cme_cleanup(cc); + new_cpu = cc->cc_exec_entity[direct].ce_migration_cpu; + new_time = cc->cc_exec_entity[direct].ce_migration_time; + new_func = cc->cc_exec_entity[direct].ce_migration_func; + new_arg = cc->cc_exec_entity[direct].ce_migration_arg; + cc_cme_cleanup(cc, direct); /* * Handle deferred callout stops @@ -586,7 +778,7 @@ skip: CTR3(KTR_CALLOUT, "deferred cancelled %p func %p arg %p", c, new_func, new_arg); - callout_cc_del(c, cc); + callout_cc_del(c, cc, direct); goto nextc; } @@ -598,8 +790,9 @@ skip: * is not easy. */ new_cc = callout_cpu_switch(c, cc, new_cpu); - callout_cc_add(c, new_cc, new_ticks, new_func, new_arg, - new_cpu); + flags = (direct) ? C_DIRECT_EXEC : 0; + callout_cc_add(c, new_cc, new_time, new_func, new_arg, + new_cpu, flags); CC_UNLOCK(new_cc); CC_LOCK(cc); #else @@ -609,7 +802,7 @@ skip: #ifdef SMP nextc: #endif - return (cc->cc_next); + return cc->cc_exec_entity[direct].cc_next; } /* @@ -633,61 +826,28 @@ softclock(void *arg) { struct callout_cpu *cc; struct callout *c; - struct callout_tailq *bucket; - int curticks; - int steps; /* #steps since we last allowed interrupts */ - int depth; - int mpcalls; - int lockcalls; - int gcalls; + int depth, gcalls, lockcalls, mpcalls; -#ifndef MAX_SOFTCLOCK_STEPS -#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */ -#endif /* MAX_SOFTCLOCK_STEPS */ - + depth = 0; mpcalls = 0; lockcalls = 0; gcalls = 0; - depth = 0; - steps = 0; cc = (struct callout_cpu *)arg; CC_LOCK(cc); - while (cc->cc_softticks - 1 != cc->cc_ticks) { - /* - * cc_softticks may be modified by hard clock, so cache - * it while we work on a given bucket. - */ - curticks = cc->cc_softticks; - cc->cc_softticks++; - bucket = &cc->cc_callwheel[curticks & callwheelmask]; - c = TAILQ_FIRST(bucket); - while (c != NULL) { - depth++; - if (c->c_time != curticks) { - c = TAILQ_NEXT(c, c_links.tqe); - ++steps; - if (steps >= MAX_SOFTCLOCK_STEPS) { - cc->cc_next = c; - /* Give interrupts a chance. */ - CC_UNLOCK(cc); - ; /* nothing */ - CC_LOCK(cc); - c = cc->cc_next; - steps = 0; - } - } else { - TAILQ_REMOVE(bucket, c, c_links.tqe); - c = softclock_call_cc(c, cc, &mpcalls, - &lockcalls, &gcalls); - steps = 0; - } - } + c = TAILQ_FIRST(&cc->cc_expireq); + while (c != NULL) { + ++depth; + TAILQ_REMOVE(&cc->cc_expireq, c, c_staiter); + c = softclock_call_cc(c, cc, &mpcalls, + &lockcalls, &gcalls, 0); } +#ifdef CALLOUT_PROFILING avg_depth += (depth * 1000 - avg_depth) >> 8; avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; - cc->cc_next = NULL; +#endif + cc->cc_exec_next = NULL; CC_UNLOCK(cc); } @@ -776,29 +936,41 @@ callout_handle_init(struct callout_handle *handle) * callout_pending() - returns truth if callout is still waiting for timeout * callout_deactivate() - marks the callout as having been serviced */ -int -callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *), - void *arg, int cpu) +int +_callout_reset_on(struct callout *c, struct bintime *bt, int to_ticks, + void (*ftn)(void *), void *arg, int cpu, int flags) { + struct bintime now, to_bt; struct callout_cpu *cc; - int cancelled = 0; + int bucket, cancelled, direct; + cancelled = 0; + if (bt == NULL) { + FREQ2BT(hz,&to_bt); + getbinuptime(&now); + bintime_mul(&to_bt,to_ticks); + bintime_add(&to_bt,&now); + } else + to_bt = *bt; /* * Don't allow migration of pre-allocated callouts lest they * become unbalanced. */ if (c->c_flags & CALLOUT_LOCAL_ALLOC) cpu = c->c_cpu; + direct = c->c_flags & CALLOUT_DIRECT; cc = callout_lock(c); - if (cc->cc_curr == c) { + if (cc->cc_exec_entity[direct].cc_curr == c) { /* * We're being asked to reschedule a callout which is * currently in progress. If there is a lock then we * can cancel the callout if it has not really started. */ - if (c->c_lock != NULL && !cc->cc_cancel) - cancelled = cc->cc_cancel = 1; - if (cc->cc_waiting) { + if (c->c_lock != NULL && + !cc->cc_exec_entity[direct].cc_cancel) + cancelled = + cc->cc_exec_entity[direct].cc_cancel = 1; + if (cc->cc_exec_entity[direct].cc_waiting) { /* * Someone has called callout_drain to kill this * callout. Don't reschedule. @@ -811,12 +983,18 @@ callout_handle_init(struct callout_handle *handle) } } if (c->c_flags & CALLOUT_PENDING) { - if (cc->cc_next == c) { - cc->cc_next = TAILQ_NEXT(c, c_links.tqe); + if ((c->c_flags & CALLOUT_PROCESSED) == 0) { + if (cc->cc_exec_next_dir == c) + cc->cc_exec_next_dir = TAILQ_NEXT(c, + c_links.tqe); + bucket = get_bucket(&c->c_time); + TAILQ_REMOVE(&cc->cc_callwheel[bucket], c, + c_links.tqe); + } else { + if (cc->cc_exec_next == c) + cc->cc_exec_next = TAILQ_NEXT(c, c_staiter); + TAILQ_REMOVE(&cc->cc_expireq, c, c_staiter); } - TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c, - c_links.tqe); - cancelled = 1; c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); } @@ -828,15 +1006,17 @@ callout_handle_init(struct callout_handle *handle) * to a more appropriate moment. */ if (c->c_cpu != cpu) { - if (cc->cc_curr == c) { - cc->cc_migration_cpu = cpu; - cc->cc_migration_ticks = to_ticks; - cc->cc_migration_func = ftn; - cc->cc_migration_arg = arg; + if (cc->cc_exec_entity[direct].cc_curr == c) { + cc->cc_exec_entity[direct].ce_migration_cpu = cpu; + cc->cc_exec_entity[direct].ce_migration_time + = to_bt; + cc->cc_exec_entity[direct].ce_migration_func = ftn; + cc->cc_exec_entity[direct].ce_migration_arg = arg; c->c_flags |= CALLOUT_DFRMIGRATION; - CTR5(KTR_CALLOUT, - "migration of %p func %p arg %p in %d to %u deferred", - c, c->c_func, c->c_arg, to_ticks, cpu); + CTR6(KTR_CALLOUT, + "migration of %p func %p arg %p in %d.%08x to %u deferred", + c, c->c_func, c->c_arg, (int)(to_bt.sec), + (u_int)(to_bt.frac >> 32), cpu); CC_UNLOCK(cc); return (cancelled); } @@ -844,9 +1024,10 @@ callout_handle_init(struct callout_handle *handle) } #endif - callout_cc_add(c, cc, to_ticks, ftn, arg, cpu); - CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d", - cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks); + callout_cc_add(c, cc, to_bt, ftn, arg, cpu, flags); + CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", + cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_bt.sec), + (u_int)(to_bt.frac >> 32)); CC_UNLOCK(cc); return (cancelled); @@ -874,7 +1055,7 @@ _callout_stop_safe(c, safe) { struct callout_cpu *cc, *old_cc; struct lock_class *class; - int use_lock, sq_locked; + int bucket, direct, sq_locked, use_lock; /* * Some old subsystems don't hold Giant while running a callout_stop(), @@ -890,7 +1071,7 @@ _callout_stop_safe(c, safe) } } else use_lock = 0; - + direct = c->c_flags & CALLOUT_DIRECT; sq_locked = 0; old_cc = NULL; again: @@ -904,7 +1085,7 @@ again: if (sq_locked != 0 && cc != old_cc) { #ifdef SMP CC_UNLOCK(cc); - sleepq_release(&old_cc->cc_waiting); + sleepq_release(&old_cc->cc_exec_entity[direct].cc_waiting); sq_locked = 0; old_cc = NULL; goto again; @@ -925,12 +1106,13 @@ again: * If it wasn't on the queue and it isn't the current * callout, then we can't stop it, so just bail. */ - if (cc->cc_curr != c) { + if (cc->cc_exec_entity[direct].cc_curr != c) { CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", c, c->c_func, c->c_arg); CC_UNLOCK(cc); if (sq_locked) - sleepq_release(&cc->cc_waiting); + sleepq_release( + &cc->cc_exec_entity[direct].cc_waiting); return (0); } @@ -941,8 +1123,7 @@ again: * just wait for the current invocation to * finish. */ - while (cc->cc_curr == c) { - + while (cc->cc_exec_entity[direct].cc_curr == c) { /* * Use direct calls to sleepqueue interface * instead of cv/msleep in order to avoid @@ -962,7 +1143,8 @@ again: */ if (!sq_locked) { CC_UNLOCK(cc); - sleepq_lock(&cc->cc_waiting); + sleepq_lock( + &cc->cc_exec_entity[direct].cc_waiting); sq_locked = 1; old_cc = cc; goto again; @@ -974,13 +1156,16 @@ again: * will be packed up, just let softclock() * take care of it. */ - cc->cc_waiting = 1; + cc->cc_exec_entity[direct].cc_waiting = 1; DROP_GIANT(); CC_UNLOCK(cc); - sleepq_add(&cc->cc_waiting, - &cc->cc_lock.lock_object, "codrain", + sleepq_add( + &cc->cc_exec_entity[direct].cc_waiting, + &cc->cc_lock.lock_object, "codrain", SLEEPQ_SLEEP, 0); - sleepq_wait(&cc->cc_waiting, 0); + sleepq_wait( + &cc->cc_exec_entity[direct].cc_waiting, + 0); sq_locked = 0; old_cc = NULL; @@ -988,7 +1173,8 @@ again: PICKUP_GIANT(); CC_LOCK(cc); } - } else if (use_lock && !cc->cc_cancel) { + } else if (use_lock && + !cc->cc_exec_entity[direct].cc_cancel) { /* * The current callout is waiting for its * lock which we hold. Cancel the callout @@ -996,10 +1182,10 @@ again: * lock, the callout will be skipped in * softclock(). */ - cc->cc_cancel = 1; + cc->cc_exec_entity[direct].cc_cancel = 1; CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); - KASSERT(!cc_cme_migrating(cc), + KASSERT(!cc_cme_migrating(cc, direct), ("callout wrongly scheduled for migration")); CC_UNLOCK(cc); KASSERT(!sq_locked, ("sleepqueue chain locked")); @@ -1018,15 +1204,18 @@ again: return (0); } if (sq_locked) - sleepq_release(&cc->cc_waiting); - + sleepq_release(&cc->cc_exec_entity[direct].cc_waiting); c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); - TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c, - c_links.tqe); - callout_cc_del(c, cc); + if ((c->c_flags & CALLOUT_PROCESSED) == 0) { + bucket = get_bucket(&c->c_time); + TAILQ_REMOVE(&cc->cc_callwheel[bucket], c, + c_links.tqe); + } else + TAILQ_REMOVE(&cc->cc_expireq, c, c_staiter); + callout_cc_del(c, cc, direct); CC_UNLOCK(cc); return (1); Index: sys/kern/kern_time.c =================================================================== --- sys/kern/kern_time.c (.../head) (revision 239166) +++ sys/kern/kern_time.c (.../projects/calloutng) (revision 239166) @@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -352,37 +353,36 @@ static int nanowait; int kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt) { - struct timespec ts, ts2, ts3; - struct timeval tv; + struct timespec ts; + struct bintime bt, bt2, tmp; int error; if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000) return (EINVAL); if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0)) return (0); - getnanouptime(&ts); - timespecadd(&ts, rqt); - TIMESPEC_TO_TIMEVAL(&tv, rqt); + binuptime(&bt); + timespec2bintime(rqt, &tmp); + bintime_add(&bt,&tmp); for (;;) { - error = tsleep(&nanowait, PWAIT | PCATCH, "nanslp", - tvtohz(&tv)); - getnanouptime(&ts2); + error = tsleep_bt(&nanowait, PWAIT | PCATCH, "nanslp", &bt, + C_DIRECT_EXEC); + binuptime(&bt2); if (error != EWOULDBLOCK) { if (error == ERESTART) error = EINTR; if (rmt != NULL) { - timespecsub(&ts, &ts2); + tmp = bt; + bintime_sub(&tmp, &bt2); + bintime2timespec(&tmp, &ts); if (ts.tv_sec < 0) timespecclear(&ts); *rmt = ts; } return (error); } - if (timespeccmp(&ts2, &ts, >=)) + if (bintime_cmp(&bt2, &bt, >=)) return (0); - ts3 = ts; - timespecsub(&ts3, &ts2); - TIMESPEC_TO_TIMEVAL(&tv, &ts3); } } Index: sys/kern/kern_synch.c =================================================================== --- sys/kern/kern_synch.c (.../head) (revision 239166) +++ sys/kern/kern_synch.c (.../projects/calloutng) (revision 239166) @@ -146,12 +146,12 @@ sleepinit(void) */ int _sleep(void *ident, struct lock_object *lock, int priority, - const char *wmesg, int timo) + const char *wmesg, int timo, struct bintime *bt, int flags) { struct thread *td; struct proc *p; struct lock_class *class; - int catch, flags, lock_state, pri, rval; + int catch, sleepq_flags, lock_state, pri, rval; WITNESS_SAVE_DECL(lock_witness); td = curthread; @@ -162,7 +162,7 @@ _sleep(void *ident, struct lock_object *lock, int #endif WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Sleeping on \"%s\"", wmesg); - KASSERT(timo != 0 || mtx_owned(&Giant) || lock != NULL, + KASSERT(timo != 0 || bt != NULL || mtx_owned(&Giant) || lock != NULL, ("sleeping without a lock")); KASSERT(p != NULL, ("msleep1")); KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); @@ -199,13 +199,13 @@ _sleep(void *ident, struct lock_object *lock, int sleepq_remove(td, td->td_wchan); if (ident == &pause_wchan) - flags = SLEEPQ_PAUSE; + sleepq_flags = SLEEPQ_PAUSE; else - flags = SLEEPQ_SLEEP; + sleepq_flags = SLEEPQ_SLEEP; if (catch) - flags |= SLEEPQ_INTERRUPTIBLE; + sleepq_flags |= SLEEPQ_INTERRUPTIBLE; if (priority & PBDRY) - flags |= SLEEPQ_STOP_ON_BDRY; + sleepq_flags |= SLEEPQ_STOP_ON_BDRY; sleepq_lock(ident); CTR5(KTR_PROC, "sleep: thread %ld (pid %ld, %s) on %s (%p)", @@ -231,18 +231,20 @@ _sleep(void *ident, struct lock_object *lock, int * stopped, then td will no longer be on a sleep queue upon * return from cursig(). */ - sleepq_add(ident, lock, wmesg, flags, 0); - if (timo) - sleepq_set_timeout(ident, timo); + sleepq_add(ident, lock, wmesg, sleepq_flags, 0); + if (bt) + sleepq_set_timeout_bt(ident, bt, flags); + else if (timo) + sleepq_set_timeout_flags(ident, timo, flags); if (lock != NULL && class->lc_flags & LC_SLEEPABLE) { sleepq_release(ident); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); sleepq_lock(ident); } - if (timo && catch) + if ((timo != 0 || bt != NULL) && catch) rval = sleepq_timedwait_sig(ident, pri); - else if (timo) + else if (timo != 0 || bt != NULL) rval = sleepq_timedwait(ident, pri); else if (catch) rval = sleepq_wait_sig(ident, pri); Index: sys/kern/kern_clock.c =================================================================== --- sys/kern/kern_clock.c (.../head) (revision 239166) +++ sys/kern/kern_clock.c (.../projects/calloutng) (revision 239166) @@ -425,6 +425,7 @@ initclocks(dummy) void hardclock_cpu(int usermode) { + struct bintime now; struct pstats *pstats; struct thread *td = curthread; struct proc *p = td->td_proc; @@ -459,7 +460,8 @@ hardclock_cpu(int usermode) if (td->td_intr_frame != NULL) PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); #endif - callout_tick(); + binuptime(&now); + callout_process(&now); } /* @@ -549,7 +551,6 @@ hardclock_cnt(int cnt, int usermode) if (td->td_intr_frame != NULL) PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); #endif - callout_tick(); /* We are in charge to handle this tick duty. */ if (newticks > 0) { /* Dangerous and no need to call these things concurrently. */ Index: sys/kern/kern_clocksource.c =================================================================== --- sys/kern/kern_clocksource.c (.../head) (revision 239166) +++ sys/kern/kern_clocksource.c (.../projects/calloutng) (revision 239166) @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -71,9 +72,7 @@ static int round_freq(struct eventtimer *et, int static void getnextcpuevent(struct bintime *event, int idle); static void getnextevent(struct bintime *event); static int handleevents(struct bintime *now, int fake); -#ifdef SMP -static void cpu_new_callout(int cpu, int ticks); -#endif +static void cpu_new_callout(int cpu, struct bintime bt); static struct mtx et_hw_mtx; @@ -135,6 +134,7 @@ struct pcpu_state { struct bintime nexthard; /* Next hardlock() event. */ struct bintime nextstat; /* Next statclock() event. */ struct bintime nextprof; /* Next profclock() event. */ + struct bintime nextcall; /* Next callout event. */ #ifdef KDTRACE_HOOKS struct bintime nextcyc; /* Next OpenSolaris cyclics event. */ #endif @@ -238,6 +238,11 @@ handleevents(struct bintime *now, int fake) } } else state->nextprof = state->nextstat; + if (bintime_cmp(now, &state->nextcall, >=) && + (state->nextcall.sec != -1)) { + state->nextcall.sec = -1; + callout_process(now); + } #ifdef KDTRACE_HOOKS if (fake == 0 && cyclic_clock_func != NULL && @@ -269,24 +274,28 @@ handleevents(struct bintime *now, int fake) static void getnextcpuevent(struct bintime *event, int idle) { + struct pcpu_state *state; struct bintime tmp; - struct pcpu_state *state; - int skip; - + int hardfreq; + state = DPCPU_PTR(timerstate); - /* Handle hardclock() events. */ + /* Handle hardclock() events, skipping some is CPU is idle. */ *event = state->nexthard; if (idle || (!activetick && !profiling && (timer->et_flags & ET_FLAGS_PERCPU) == 0)) { - skip = idle ? 4 : (stathz / 2); - if (curcpu == CPU_FIRST() && tc_min_ticktock_freq > skip) - skip = tc_min_ticktock_freq; - skip = callout_tickstofirst(hz / skip) - 1; - CTR2(KTR_SPARE2, "skip at %d: %d", curcpu, skip); - tmp = hardperiod; - bintime_mul(&tmp, skip); - bintime_add(event, &tmp); + hardfreq = idle ? 4 : (stathz / 2); + if (curcpu == CPU_FIRST() && tc_min_ticktock_freq > hardfreq) + hardfreq = tc_min_ticktock_freq; + if (hz > hardfreq) { + tmp = hardperiod; + bintime_mul(&tmp, hz / hardfreq - 1); + bintime_add(event, &tmp); + } } + /* Handle callout events. */ + if (state->nextcall.sec != -1 && + bintime_cmp(event, &state->nextcall, >)) + *event = state->nextcall; if (!idle) { /* If CPU is active - handle other types of events. */ if (bintime_cmp(event, &state->nextstat, >)) *event = state->nextstat; @@ -626,10 +635,9 @@ cpu_initclocks_bsp(void) #ifdef KDTRACE_HOOKS state->nextcyc.sec = -1; #endif + state->nextcall.sec = -1; } -#ifdef SMP callout_new_inserted = cpu_new_callout; -#endif periodic = want_periodic; /* Grab requested timer or the best of present. */ if (timername[0]) @@ -853,52 +861,54 @@ clocksource_cyc_set(const struct bintime *t) } #endif -#ifdef SMP static void -cpu_new_callout(int cpu, int ticks) +cpu_new_callout(int cpu, struct bintime bt) { - struct bintime tmp; + struct bintime now; struct pcpu_state *state; - CTR3(KTR_SPARE2, "new co at %d: on %d in %d", - curcpu, cpu, ticks); + CTR5(KTR_SPARE2, "new co at %d: on %d at %d.%08x%08x", + curcpu, cpu, (int)(bt.sec), (u_int)(bt.frac >> 32), + (u_int)(bt.frac & 0xffffffff)); state = DPCPU_ID_PTR(cpu, timerstate); ET_HW_LOCK(state); - if (state->idle == 0 || busy) { + + /* + * If there is callout time already set earlier -- do nothing. + * This check may appear redundant because we check already in + * callout_process() but this double check guarantees we're safe + * with respect to race conditions between interrupts execution + * and scheduling. + */ + if (state->nextcall.sec != -1 && + bintime_cmp(&bt, &state->nextcall, >=)) { ET_HW_UNLOCK(state); return; } - /* - * If timer is periodic - just update next event time for target CPU. - * If timer is global - there is chance it is already programmed. - */ - if (periodic || (timer->et_flags & ET_FLAGS_PERCPU) == 0) { - tmp = hardperiod; - bintime_mul(&tmp, ticks - 1); - bintime_add(&tmp, &state->nexthard); - if (bintime_cmp(&tmp, &state->nextevent, <)) - state->nextevent = tmp; - if (periodic || - bintime_cmp(&state->nextevent, &nexttick, >=)) { - ET_HW_UNLOCK(state); - return; - } + state->nextcall = bt; + /* If there is some some other event set earlier -- do nothing. */ + if (bintime_cmp(&state->nextcall, &state->nextevent, >=)) { + ET_HW_UNLOCK(state); + return; } - /* - * Otherwise we have to wake that CPU up, as we can't get present - * bintime to reprogram global timer from here. If timer is per-CPU, - * we by definition can't do it from here. - */ + state->nextevent = state->nextcall; + /* If timer is periodic -- there is nothing to reprogram. */ + if (periodic) { + ET_HW_UNLOCK(state); + return; + } + /* If timer is global or of the current CPU -- reprogram it. */ + if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || cpu == curcpu) { + binuptime(&now); + loadtimer(&now, 0); + ET_HW_UNLOCK(state); + return; + } + /* Otherwise make other CPU to reprogram it. */ + state->handle = 1; ET_HW_UNLOCK(state); - if (timer->et_flags & ET_FLAGS_PERCPU) { - state->handle = 1; - ipi_cpu(cpu, IPI_HARDCLOCK); - } else { - if (!cpu_idle_wakeup(cpu)) - ipi_cpu(cpu, IPI_AST); - } + ipi_cpu(cpu, IPI_HARDCLOCK); } -#endif /* * Report or change the active event timers hardware. Index: sys/kern/kern_event.c =================================================================== --- sys/kern/kern_event.c (.../head) (revision 239166) +++ sys/kern/kern_event.c (.../projects/calloutng) (revision 239166) @@ -517,25 +517,26 @@ knote_fork(struct knlist *list, int pid) * XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the * interval timer support code. */ -static int -timertoticks(intptr_t data) +static struct bintime +timer2bintime(intptr_t data) { - struct timeval tv; - int tticks; + struct bintime bt, pbt; - tv.tv_sec = data / 1000; - tv.tv_usec = (data % 1000) * 1000; - tticks = tvtohz(&tv); - - return tticks; + getbinuptime(&pbt); + bt.sec = data / 1000; + bt.frac = (data % 1000) * (uint64_t)1844674407309000LL; + bintime_add(&bt, &pbt); + return bt; } static void filt_timerexpire(void *knx) { - struct knote *kn = knx; + struct bintime bt; struct callout *calloutp; + struct knote *kn; + kn = knx; kn->kn_data++; KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */ @@ -547,9 +548,10 @@ filt_timerexpire(void *knx) * when we're delayed. */ if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) { + bt = timer2bintime(kn->kn_sdata); calloutp = (struct callout *)kn->kn_hook; - callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata) - 1, - filt_timerexpire, kn); + callout_reset_bt_on(calloutp, &bt, filt_timerexpire, kn, + PCPU_GET(cpuid), C_P1MS); } } @@ -559,6 +561,7 @@ filt_timerexpire(void *knx) static int filt_timerattach(struct knote *kn) { + struct bintime bt; struct callout *calloutp; atomic_add_int(&kq_ncallouts, 1); @@ -573,8 +576,9 @@ filt_timerattach(struct knote *kn) calloutp = malloc(sizeof(*calloutp), M_KQUEUE, M_WAITOK); callout_init(calloutp, CALLOUT_MPSAFE); kn->kn_hook = calloutp; - callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata), - filt_timerexpire, kn); + bt = timer2bintime(kn->kn_sdata); + callout_reset_bt_on(calloutp, &bt, filt_timerexpire, kn, + PCPU_GET(cpuid), C_P1MS); return (0); } Index: sys/kern/subr_sleepqueue.c =================================================================== --- sys/kern/subr_sleepqueue.c (.../head) (revision 239166) +++ sys/kern/subr_sleepqueue.c (.../projects/calloutng) (revision 239166) @@ -361,9 +361,10 @@ sleepq_add(void *wchan, struct lock_object *lock, * Sets a timeout that will remove the current thread from the specified * sleep queue after timo ticks if the thread has not already been awakened. */ -void -sleepq_set_timeout(void *wchan, int timo) +void +_sleepq_set_timeout(void *wchan, struct bintime *bt, int timo, int flags) { + struct sleepqueue_chain *sc; struct thread *td; @@ -373,7 +374,12 @@ sleepq_add(void *wchan, struct lock_object *lock, MPASS(TD_ON_SLEEPQ(td)); MPASS(td->td_sleepqueue == NULL); MPASS(wchan != NULL); - callout_reset_curcpu(&td->td_slpcallout, timo, sleepq_timeout, td); + if (bt == NULL) + callout_reset_flags_on(&td->td_slpcallout, timo, + sleepq_timeout, td, PCPU_GET(cpuid), flags); + else + callout_reset_bt_on(&td->td_slpcallout, bt, + sleepq_timeout, td, PCPU_GET(cpuid), flags); } /* Index: sys/kern/sys_generic.c =================================================================== --- sys/kern/sys_generic.c (.../head) (revision 239166) +++ sys/kern/sys_generic.c (.../projects/calloutng) (revision 239166) @@ -102,7 +102,7 @@ static int dofilewrite(struct thread *, int, struc off_t, int); static void doselwakeup(struct selinfo *, int); static void seltdinit(struct thread *); -static int seltdwait(struct thread *, int); +static int seltdwait(struct thread *, struct bintime *, int); static void seltdclear(struct thread *); /* @@ -902,7 +902,8 @@ kern_select(struct thread *td, int nd, fd_set *fd_ */ fd_mask s_selbits[howmany(2048, NFDBITS)]; fd_mask *ibits[3], *obits[3], *selbits, *sbp; - struct timeval atv, rtv, ttv; + struct bintime abt, rbt; + struct timeval atv; int error, lf, ndu, timo; u_int nbufbytes, ncpbytes, ncpubytes, nfdbits; @@ -996,33 +997,34 @@ kern_select(struct thread *td, int nd, fd_set *fd_ if (tvp != NULL) { atv = *tvp; - if (itimerfix(&atv)) { + if (atv.tv_sec < 0 || atv.tv_usec < 0 || + atv.tv_usec >= 1000000) { error = EINVAL; goto done; } - getmicrouptime(&rtv); - timevaladd(&atv, &rtv); + binuptime(&rbt); + timeval2bintime(&atv, &abt); + bintime_add(&abt, &rbt); } else { - atv.tv_sec = 0; - atv.tv_usec = 0; + abt.sec = 0; + abt.frac = 0; } - timo = 0; seltdinit(td); /* Iterate until the timeout expires or descriptors become ready. */ for (;;) { error = selscan(td, ibits, obits, nd); if (error || td->td_retval[0] != 0) break; - if (atv.tv_sec || atv.tv_usec) { - getmicrouptime(&rtv); - if (timevalcmp(&rtv, &atv, >=)) + if (abt.sec || abt.frac) { + binuptime(&rbt); + if (bintime_cmp(&rbt, &abt, >=)) break; - ttv = atv; - timevalsub(&ttv, &rtv); - timo = ttv.tv_sec > 24 * 60 * 60 ? - 24 * 60 * 60 * hz : tvtohz(&ttv); + error = seltdwait(td, &abt, 0); } - error = seltdwait(td, timo); + else { + timo = 0; + error = seltdwait(td, NULL, timo); + } if (error) break; error = selrescan(td, ibits, obits); @@ -1254,7 +1256,8 @@ sys_poll(td, uap) { struct pollfd *bits; struct pollfd smallbits[32]; - struct timeval atv, rtv, ttv; + struct bintime abt, rbt; + struct timeval atv; int error, timo; u_int nfds; size_t ni; @@ -1273,33 +1276,33 @@ sys_poll(td, uap) if (uap->timeout != INFTIM) { atv.tv_sec = uap->timeout / 1000; atv.tv_usec = (uap->timeout % 1000) * 1000; - if (itimerfix(&atv)) { + if (atv.tv_sec < 0 || atv.tv_usec < 0 || + atv.tv_usec >= 1000000) { error = EINVAL; goto done; } - getmicrouptime(&rtv); - timevaladd(&atv, &rtv); + binuptime(&rbt); + timeval2bintime(&atv, &abt); + bintime_add(&abt, &rbt); } else { - atv.tv_sec = 0; - atv.tv_usec = 0; + abt.sec = 0; + abt.frac = 0; } - timo = 0; seltdinit(td); /* Iterate until the timeout expires or descriptors become ready. */ for (;;) { error = pollscan(td, bits, nfds); if (error || td->td_retval[0] != 0) break; - if (atv.tv_sec || atv.tv_usec) { - getmicrouptime(&rtv); - if (timevalcmp(&rtv, &atv, >=)) + if (abt.sec || abt.frac) { + binuptime(&rbt); + if (bintime_cmp(&rbt, &abt, >=)) break; - ttv = atv; - timevalsub(&ttv, &rtv); - timo = ttv.tv_sec > 24 * 60 * 60 ? - 24 * 60 * 60 * hz : tvtohz(&ttv); + error = seltdwait(td, &abt, 0); + } else { + timo = 0; + error = seltdwait(td, NULL, timo); } - error = seltdwait(td, timo); if (error) break; error = pollrescan(td); @@ -1518,7 +1521,7 @@ selsocket(struct socket *so, int events, struct ti timo = ttv.tv_sec > 24 * 60 * 60 ? 24 * 60 * 60 * hz : tvtohz(&ttv); } - error = seltdwait(td, timo); + error = seltdwait(td, NULL, timo); seltdclear(td); if (error) break; @@ -1697,7 +1700,7 @@ out: } static int -seltdwait(struct thread *td, int timo) +seltdwait(struct thread *td, struct bintime *bt, int timo) { struct seltd *stp; int error; @@ -1716,9 +1719,12 @@ static int mtx_unlock(&stp->st_mtx); return (0); } - if (timo > 0) + if (bt == NULL && timo > 0) error = cv_timedwait_sig(&stp->st_wait, &stp->st_mtx, timo); - else + else if (bt != NULL) + error = cv_timedwait_bt_sig(&stp->st_wait, &stp->st_mtx, + bt, C_DIRECT_EXEC); + else error = cv_wait_sig(&stp->st_wait, &stp->st_mtx); mtx_unlock(&stp->st_mtx); Index: sys/kern/kern_condvar.c =================================================================== --- sys/kern/kern_condvar.c (.../head) (revision 239166) +++ sys/kern/kern_condvar.c (.../projects/calloutng) (revision 239166) @@ -270,12 +270,12 @@ _cv_wait_sig(struct cv *cvp, struct lock_object *l } /* - * Wait on a condition variable for at most timo/hz seconds. Returns 0 if the - * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout - * expires. + * Wait on a condition variable. Returns 0 if the process was resumed by + * cv_signal or cv_broadcast, EWOULDBLOCK if the timeout expires. */ int -_cv_timedwait(struct cv *cvp, struct lock_object *lock, int timo) +_cv_timedwait(struct cv *cvp, struct lock_object *lock, struct bintime *bt, + int timo, int flags) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; @@ -311,7 +311,10 @@ int DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); - sleepq_set_timeout(cvp, timo); + if (bt == NULL) + sleepq_set_timeout_flags(cvp, timo, flags); + else + sleepq_set_timeout_bt(cvp, bt, flags); if (lock != &Giant.lock_object) { if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); @@ -336,13 +339,14 @@ int } /* - * Wait on a condition variable for at most timo/hz seconds, allowing - * interruption by signals. Returns 0 if the thread was resumed by cv_signal - * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if - * a signal was caught. + * Wait on a condition variable allowing interruption by signals. + * Returns 0 if the thread was resumed by cv_signal or cv_broadcast, + * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR + * or ERESTART if a signal was caught. */ int -_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo) +_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, + struct bintime *bt, int timo, int flags) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; @@ -379,7 +383,10 @@ int sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR | SLEEPQ_INTERRUPTIBLE, 0); - sleepq_set_timeout(cvp, timo); + if (bt == NULL) + sleepq_set_timeout_flags(cvp, timo, flags); + else + sleepq_set_timeout_bt(cvp, bt, flags); if (lock != &Giant.lock_object) { if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); Index: sys/boot =================================================================== --- sys/boot (.../head) (revision 239166) +++ sys/boot (.../projects/calloutng) (revision 239166) Property changes on: sys/boot ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/boot:r236314-239165 Index: sys/netinet/tcp_timer.c =================================================================== --- sys/netinet/tcp_timer.c (.../head) (revision 239166) +++ sys/netinet/tcp_timer.c (.../projects/calloutng) (revision 239166) @@ -696,21 +696,39 @@ tcp_timer_active(struct tcpcb *tp, int timer_type) #define ticks_to_msecs(t) (1000*(t) / hz) +static int +delta_bintime_in_msecs(struct bintime bt, struct bintime now) +{ + bintime_sub(&bt, &now); + return (((uint64_t)1000 * (uint64_t)(bt.frac >> 32)) >> 32) + + (bt.sec * 1000); +} + void -tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, struct xtcp_timer *xtimer) +tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, + struct xtcp_timer *xtimer) { - bzero(xtimer, sizeof(struct xtcp_timer)); + struct bintime bt, now; + + bzero(xtimer, sizeof(*xtimer)); if (timer == NULL) return; - if (callout_active(&timer->tt_delack)) - xtimer->tt_delack = ticks_to_msecs(timer->tt_delack.c_time - ticks); - if (callout_active(&timer->tt_rexmt)) - xtimer->tt_rexmt = ticks_to_msecs(timer->tt_rexmt.c_time - ticks); - if (callout_active(&timer->tt_persist)) - xtimer->tt_persist = ticks_to_msecs(timer->tt_persist.c_time - ticks); - if (callout_active(&timer->tt_keep)) - xtimer->tt_keep = ticks_to_msecs(timer->tt_keep.c_time - ticks); - if (callout_active(&timer->tt_2msl)) - xtimer->tt_2msl = ticks_to_msecs(timer->tt_2msl.c_time - ticks); + bintime_clear(&bt); + getbinuptime(&now); + if (callout_active(&timer->tt_delack)) + xtimer->tt_delack = delta_bintime_in_msecs( + timer->tt_delack.c_time, now); + if (callout_active(&timer->tt_rexmt)) + xtimer->tt_rexmt = delta_bintime_in_msecs( + timer->tt_rexmt.c_time, now); + if (callout_active(&timer->tt_persist)) + xtimer->tt_persist = delta_bintime_in_msecs( + timer->tt_persist.c_time, now); + if (callout_active(&timer->tt_keep)) + xtimer->tt_keep = delta_bintime_in_msecs( + timer->tt_keep.c_time, now); + if (callout_active(&timer->tt_2msl)) + xtimer->tt_2msl = delta_bintime_in_msecs( + timer->tt_2msl.c_time, now); xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime); } Index: sys/contrib/libfdt =================================================================== --- sys/contrib/libfdt (.../head) (revision 239166) +++ sys/contrib/libfdt (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/libfdt ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/libfdt:r236314-239017 Index: sys/contrib/pf =================================================================== --- sys/contrib/pf (.../head) (revision 239166) +++ sys/contrib/pf (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/pf ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/pf:r236314-239017 Index: sys/contrib/dev/acpica/include =================================================================== --- sys/contrib/dev/acpica/include (.../head) (revision 239166) +++ sys/contrib/dev/acpica/include (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/include ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/include:r236314-238495 Index: sys/contrib/dev/acpica/components/debugger =================================================================== --- sys/contrib/dev/acpica/components/debugger (.../head) (revision 239166) +++ sys/contrib/dev/acpica/components/debugger (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/components/debugger ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/components/debugger:r236314-238495 Index: sys/contrib/dev/acpica/components/events =================================================================== --- sys/contrib/dev/acpica/components/events (.../head) (revision 239166) +++ sys/contrib/dev/acpica/components/events (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/components/events ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/components/events:r236314-238495 Index: sys/contrib/dev/acpica/components/executer =================================================================== --- sys/contrib/dev/acpica/components/executer (.../head) (revision 239166) +++ sys/contrib/dev/acpica/components/executer (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/components/executer ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/components/executer:r236314-238495 Index: sys/contrib/dev/acpica/components/dispatcher =================================================================== --- sys/contrib/dev/acpica/components/dispatcher (.../head) (revision 239166) +++ sys/contrib/dev/acpica/components/dispatcher (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/components/dispatcher ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/components/dispatcher:r236314-237808 Index: sys/contrib/dev/acpica/components/resources =================================================================== --- sys/contrib/dev/acpica/components/resources (.../head) (revision 239166) +++ sys/contrib/dev/acpica/components/resources (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/components/resources ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/components/resources:r236314-238495 Index: sys/contrib/dev/acpica/components/tables =================================================================== --- sys/contrib/dev/acpica/components/tables (.../head) (revision 239166) +++ sys/contrib/dev/acpica/components/tables (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/components/tables ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/components/tables:r236314-238495 Index: sys/contrib/dev/acpica/components/utilities =================================================================== --- sys/contrib/dev/acpica/components/utilities (.../head) (revision 239166) +++ sys/contrib/dev/acpica/components/utilities (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/components/utilities ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/components/utilities:r236314-238495 Index: sys/contrib/dev/acpica/components/namespace =================================================================== --- sys/contrib/dev/acpica/components/namespace (.../head) (revision 239166) +++ sys/contrib/dev/acpica/components/namespace (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/components/namespace ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/components/namespace:r236314-238495 Index: sys/contrib/dev/acpica/components/parser =================================================================== --- sys/contrib/dev/acpica/components/parser (.../head) (revision 239166) +++ sys/contrib/dev/acpica/components/parser (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/components/parser ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/components/parser:r236314-238495 Index: sys/contrib/dev/acpica/components/disassembler =================================================================== --- sys/contrib/dev/acpica/components/disassembler (.../head) (revision 239166) +++ sys/contrib/dev/acpica/components/disassembler (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/components/disassembler ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/components/disassembler:r236314-237808 Index: sys/contrib/dev/acpica/components/hardware =================================================================== --- sys/contrib/dev/acpica/components/hardware (.../head) (revision 239166) +++ sys/contrib/dev/acpica/components/hardware (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/components/hardware ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/components/hardware:r236314-238495 Index: sys/contrib/dev/acpica/changes.txt =================================================================== --- sys/contrib/dev/acpica/changes.txt (.../head) (revision 239166) +++ sys/contrib/dev/acpica/changes.txt (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/changes.txt ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/changes.txt:r236314-238495 Index: sys/contrib/dev/acpica/common =================================================================== --- sys/contrib/dev/acpica/common (.../head) (revision 239166) +++ sys/contrib/dev/acpica/common (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/common ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/common:r236314-237808 Index: sys/contrib/dev/acpica/compiler =================================================================== --- sys/contrib/dev/acpica/compiler (.../head) (revision 239166) +++ sys/contrib/dev/acpica/compiler (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica/compiler ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica/compiler:r236314-238495 Index: sys/contrib/dev/acpica =================================================================== --- sys/contrib/dev/acpica (.../head) (revision 239166) +++ sys/contrib/dev/acpica (.../projects/calloutng) (revision 239166) Property changes on: sys/contrib/dev/acpica ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica:r236314-238495 Index: sys/cddl/contrib/opensolaris =================================================================== --- sys/cddl/contrib/opensolaris (.../head) (revision 239166) +++ sys/cddl/contrib/opensolaris (.../projects/calloutng) (revision 239166) Property changes on: sys/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/cddl/contrib/opensolaris:r236314-239165 Index: sys/sys/callout.h =================================================================== --- sys/sys/callout.h (.../head) (revision 239166) +++ sys/sys/callout.h (.../projects/calloutng) (revision 239166) @@ -47,7 +47,33 @@ #define CALLOUT_RETURNUNLOCKED 0x0010 /* handler returns with mtx unlocked */ #define CALLOUT_SHAREDLOCK 0x0020 /* callout lock held in shared mode */ #define CALLOUT_DFRMIGRATION 0x0040 /* callout in deferred migration mode */ +#define CALLOUT_PROCESSED 0x0080 /* callout in wheel or processing list? */ +#define CALLOUT_DIRECT 0x0100 /* allow exec from hw int context */ +#define C_DIRECT_EXEC 0x0001 /* direct execution of callout */ +#define C_P1S 0x0002 /* fields related to precision */ +#define C_P500MS 0x0006 +#define C_P250MS 0x000a +#define C_P125MS 0x000e +#define C_P64MS 0x0012 +#define C_P32MS 0x0016 +#define C_P16MS 0x001a +#define C_P8MS 0x001e +#define C_P4MS 0x0022 +#define C_P2MS 0x0026 +#define C_P1MS 0x002a +#define C_P500US 0x002e +#define C_P250US 0x0032 +#define C_P125US 0x0036 +#define C_P64US 0x003a +#define C_P32US 0x003e +#define C_P16US 0x0042 +#define C_P8US 0x0046 +#define C_P4US 0x004a +#define C_P2US 0x004e +#define PRECISION_BITS 7 +#define PRECISION_RANGE ((1 << PRECISION_BITS) - 1) + struct callout_handle { struct callout *callout; }; @@ -67,7 +93,16 @@ void _callout_init_lock(struct callout *, struct l _callout_init_lock((c), ((rw) != NULL) ? &(rw)->lock_object : \ NULL, (flags)) #define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING) -int callout_reset_on(struct callout *, int, void (*)(void *), void *, int); +int _callout_reset_on(struct callout *, struct bintime *, int, + void (*)(void *), void *, int, int); +#define callout_reset_on(c, to_ticks, fn, arg, cpu) \ + _callout_reset_on((c), (NULL), (to_ticks), (fn), (arg), (cpu), \ + (0)) +#define callout_reset_flags_on(c, to_ticks, fn, arg, cpu, flags) \ + _callout_reset_on((c), (NULL), (to_ticks), (fn), (arg), (cpu), \ + (flags)) +#define callout_reset_bt_on(c, bt, fn, arg, cpu, flags) \ + _callout_reset_on((c), (bt), (0), (fn), (arg), (cpu), (flags)) #define callout_reset(c, on_tick, fn, arg) \ callout_reset_on((c), (on_tick), (fn), (arg), (c)->c_cpu) #define callout_reset_curcpu(c, on_tick, fn, arg) \ @@ -78,9 +113,8 @@ int callout_schedule_on(struct callout *, int, int callout_schedule_on((c), (on_tick), PCPU_GET(cpuid)) #define callout_stop(c) _callout_stop_safe(c, 0) int _callout_stop_safe(struct callout *, int); -void callout_tick(void); -int callout_tickstofirst(int limit); -extern void (*callout_new_inserted)(int cpu, int ticks); +void callout_process(struct bintime *); +extern void (*callout_new_inserted)(int cpu, struct bintime bt); #endif Index: sys/sys/condvar.h =================================================================== --- sys/sys/condvar.h (.../head) (revision 239166) +++ sys/sys/condvar.h (.../projects/calloutng) (revision 239166) @@ -55,8 +55,10 @@ void cv_destroy(struct cv *cvp); void _cv_wait(struct cv *cvp, struct lock_object *lock); void _cv_wait_unlock(struct cv *cvp, struct lock_object *lock); int _cv_wait_sig(struct cv *cvp, struct lock_object *lock); -int _cv_timedwait(struct cv *cvp, struct lock_object *lock, int timo); -int _cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo); +int _cv_timedwait(struct cv *cvp, struct lock_object *lock, + struct bintime *bt, int timo, int flags); +int _cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, + struct bintime *bt, int timo, int flags); void cv_signal(struct cv *cvp); void cv_broadcastpri(struct cv *cvp, int pri); @@ -68,9 +70,20 @@ void cv_broadcastpri(struct cv *cvp, int pri); #define cv_wait_sig(cvp, lock) \ _cv_wait_sig((cvp), &(lock)->lock_object) #define cv_timedwait(cvp, lock, timo) \ - _cv_timedwait((cvp), &(lock)->lock_object, (timo)) + _cv_timedwait((cvp), &(lock)->lock_object, NULL, (timo), 0) +#define cv_timedwait_bt(cvp, lock, bt, flags) \ + _cv_timedwait_sig((cvp), &(lock)->lock_object, (bt), 0, 0) +#define cv_timedwait_bt_sig(cvp, lock, bt, flags) \ + _cv_timedwait_sig((cvp), &(lock)->lock_object, (bt), 0, \ + (flags)) +#define cv_timedwait_flags(cvp, lock, timo, flags) \ + _cv_timedwait((cvp), &(lock)->lock_object, NULL, (timo), \ + (flags)) #define cv_timedwait_sig(cvp, lock, timo) \ - _cv_timedwait_sig((cvp), &(lock)->lock_object, (timo)) + _cv_timedwait_sig((cvp), &(lock)->lock_object, NULL, (timo), 0) +#define cv_timedwait_sig_flags(cvp, lock, timo, flags) \ + _cv_timedwait_sig((cvp), &(lock)->lock_object, NULL, (timo), \ + (flags)) #define cv_broadcast(cvp) cv_broadcastpri(cvp, 0) Index: sys/sys/sx.h =================================================================== --- sys/sys/sx.h (.../head) (revision 239166) +++ sys/sys/sx.h (.../projects/calloutng) (revision 239166) @@ -275,8 +275,9 @@ __sx_sunlock(struct sx *sx, const char *file, int #define sx_unlock(sx) sx_unlock_((sx), LOCK_FILE, LOCK_LINE) #define sx_sleep(chan, sx, pri, wmesg, timo) \ - _sleep((chan), &(sx)->lock_object, (pri), (wmesg), (timo)) - + _sleep((chan), &(sx)->lock_object, (pri), (wmesg), (timo), \ + NULL, 0) + /* * Options passed to sx_init_flags(). */ Index: sys/sys/systm.h =================================================================== --- sys/sys/systm.h (.../head) (revision 239166) +++ sys/sys/systm.h (.../projects/calloutng) (revision 239166) @@ -341,14 +341,23 @@ static __inline void splx(intrmask_t ipl __unused * less often. */ int _sleep(void *chan, struct lock_object *lock, int pri, const char *wmesg, - int timo) __nonnull(1); + int timo, struct bintime *bt, int flags) __nonnull(1); #define msleep(chan, mtx, pri, wmesg, timo) \ - _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (timo)) + _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (timo), \ + NULL, 0) +#define msleep_flags(chan, mtx, pri, wmesg, timo, flags) \ + _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (timo), \ + NULL, (flags)) +#define msleep_bt(chan, mtx, pri, wmesg, bt, flags) \ + _sleep((chan), &(mtx)->lock_object, (pri), (wmesg) 0, (bt), \ + (flags)) int msleep_spin(void *chan, struct mtx *mtx, const char *wmesg, int timo) __nonnull(1); int pause(const char *wmesg, int timo); #define tsleep(chan, pri, wmesg, timo) \ - _sleep((chan), NULL, (pri), (wmesg), (timo)) + _sleep((chan), NULL, (pri), (wmesg), (timo), NULL, 0) +#define tsleep_bt(chan, pri, wmesg, bt, flags) \ + _sleep((chan), NULL, (pri), (wmesg), 0, (bt), (flags)) void wakeup(void *chan) __nonnull(1); void wakeup_one(void *chan) __nonnull(1); Index: sys/sys/_callout.h =================================================================== --- sys/sys/_callout.h (.../head) (revision 239166) +++ sys/sys/_callout.h (.../projects/calloutng) (revision 239166) @@ -39,6 +39,7 @@ #define _SYS__CALLOUT_H #include +#include struct lock_object; @@ -50,7 +51,9 @@ struct callout { SLIST_ENTRY(callout) sle; TAILQ_ENTRY(callout) tqe; } c_links; - int c_time; /* ticks to the event */ + TAILQ_ENTRY(callout) c_staiter; + struct bintime c_time; /* ticks to the event */ + struct bintime c_precision; /* delta allowed wrt opt */ void *c_arg; /* function argument */ void (*c_func)(void *); /* function to call */ struct lock_object *c_lock; /* lock to handle */ Index: sys/sys/mutex.h =================================================================== --- sys/sys/mutex.h (.../head) (revision 239166) +++ sys/sys/mutex.h (.../projects/calloutng) (revision 239166) @@ -339,7 +339,8 @@ extern struct mtx_pool *mtxpool_sleep; mtx_assert_((m), (what), __FILE__, __LINE__) #define mtx_sleep(chan, mtx, pri, wmesg, timo) \ - _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (timo)) + _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (timo), \ + NULL, 0) #define mtx_initialized(m) lock_initalized(&(m)->lock_object) Index: sys/sys/sleepqueue.h =================================================================== --- sys/sys/sleepqueue.h (.../head) (revision 239166) +++ sys/sys/sleepqueue.h (.../projects/calloutng) (revision 239166) @@ -108,7 +108,14 @@ struct sleepqueue *sleepq_lookup(void *wchan); void sleepq_release(void *wchan); void sleepq_remove(struct thread *td, void *wchan); int sleepq_signal(void *wchan, int flags, int pri, int queue); -void sleepq_set_timeout(void *wchan, int timo); +void _sleepq_set_timeout(void *wchan, struct bintime *bt, int timo, + int flags); +#define sleepq_set_timeout(wchan, timo) \ + _sleepq_set_timeout((wchan), NULL, (timo), 0) +#define sleepq_set_timeout_flags(wchan, timo, flags) \ + _sleepq_set_timeout((wchan), NULL, (timo), (flags)) +#define sleepq_set_timeout_bt(wchan, bt, flags) \ + _sleepq_set_timeout((wchan), (bt), 0, (flags)) u_int sleepq_sleepcnt(void *wchan, int queue); int sleepq_timedwait(void *wchan, int pri); int sleepq_timedwait_sig(void *wchan, int pri); Index: sys =================================================================== --- sys (.../head) (revision 239166) +++ sys (.../projects/calloutng) (revision 239166) Property changes on: sys ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys:r236314-239165 Index: . =================================================================== --- . (.../head) (revision 239166) +++ . (.../projects/calloutng) (revision 239166) Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /head:r236314-239165