Index: lib/libprocstat/zfs.c
===================================================================
--- lib/libprocstat/zfs.c	(.../head)	(revision 237923)
+++ lib/libprocstat/zfs.c	(.../projects/calloutng)	(revision 237923)
@@ -35,6 +35,7 @@
 
 #undef lbolt
 #undef lbolt64
+#undef gethrestime
 #undef gethrestime_sec
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
Index: sys/conf/NOTES
===================================================================
--- sys/conf/NOTES	(.../head)	(revision 237923)
+++ sys/conf/NOTES	(.../projects/calloutng)	(revision 237923)
@@ -259,6 +259,8 @@ options 	SX_NOINLINE
 
 # SMP Debugging Options:
 #
+# CALLOUT_PROFILING enables rudimentary profiling of the callwheel data
+#	  structure used as backend in callout(9).
 # PREEMPTION allows the threads that are in the kernel to be preempted by
 #	  higher priority [interrupt] threads.  It helps with interactivity
 #	  and allows interrupt threads to run sooner rather than waiting.
@@ -297,6 +299,9 @@ options 	LOCK_PROFILING
 options 	MPROF_BUFFERS="1536"
 options 	MPROF_HASH_SIZE="1543"
 
+# Profiling for the callout(9) backend.
+options 	CALLOUT_PROFILING
+
 # Profiling for internal hash tables.
 options 	SLEEPQUEUE_PROFILING
 options 	TURNSTILE_PROFILING
Index: sys/conf/options
===================================================================
--- sys/conf/options	(.../head)	(revision 237923)
+++ sys/conf/options	(.../projects/calloutng)	(revision 237923)
@@ -66,6 +66,7 @@ SYSCTL_DEBUG	opt_sysctl.h
 ADAPTIVE_LOCKMGRS
 ALQ
 AUDIT		opt_global.h
+CALLOUT_PROFILING
 CAPABILITIES	opt_capsicum.h
 CAPABILITY_MODE	opt_capsicum.h
 CODA_COMPAT_5	opt_coda.h
Index: sys/kern/kern_timeout.c
===================================================================
--- sys/kern/kern_timeout.c	(.../head)	(revision 237923)
+++ sys/kern/kern_timeout.c	(.../projects/calloutng)	(revision 237923)
@@ -37,6 +37,7 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_callout_profiling.h"
 #include "opt_kdtrace.h"
 
 #include <sys/param.h>
@@ -47,6 +48,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
+#include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
@@ -68,6 +70,7 @@ SDT_PROBE_DEFINE(callout_execute, kernel, , callou
 SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_end, 0,
     "struct callout *");
 
+#ifdef CALLOUT_PROFILING	
 static int avg_depth;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
     "Average number of items examined per softclock call. Units = 1/1000");
@@ -80,11 +83,12 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTL
 static int avg_mpcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
     "Average number of MP callouts made per softclock call. Units = 1/1000");
+#endif
 /*
  * TODO:
  *	allocate more timeout table slots when table overflows.
  */
-int callwheelsize, callwheelbits, callwheelmask;
+int callwheelsize, callwheelmask;
 
 /*
  * The callout cpu migration entity represents informations necessary for
@@ -94,51 +98,38 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFL
  */
 struct cc_mig_ent {
 #ifdef SMP
-	void	(*ce_migration_func)(void *);
-	void	*ce_migration_arg;
-	int	ce_migration_cpu;
-	int	ce_migration_ticks;
+	void			(*ce_migration_func)(void *);
+	void			*ce_migration_arg;
+	int			ce_migration_cpu;
+	struct bintime		ce_migration_time;
 #endif
 };
 	
 /*
  * There is one struct callout_cpu per cpu, holding all relevant
  * state for the callout processing thread on the individual CPU.
- * In particular:
- *	cc_ticks is incremented once per tick in callout_cpu().
- *	It tracks the global 'ticks' but in a way that the individual
- *	threads should not worry about races in the order in which
- *	hardclock() and hardclock_cpu() run on the various CPUs.
- *	cc_softclock is advanced in callout_cpu() to point to the
- *	first entry in cc_callwheel that may need handling. In turn,
- *	a softclock() is scheduled so it can serve the various entries i
- *	such that cc_softclock <= i <= cc_ticks .
- *	XXX maybe cc_softclock and cc_ticks should be volatile ?
- *
- *	cc_ticks is also used in callout_reset_cpu() to determine
- *	when the callout should be served.
  */
 struct callout_cpu {
 	struct cc_mig_ent	cc_migrating_entity;
 	struct mtx		cc_lock;
 	struct callout		*cc_callout;
 	struct callout_tailq	*cc_callwheel;
+	struct callout_tailq	cc_expireq;		  
 	struct callout_list	cc_callfree;
 	struct callout		*cc_next;
 	struct callout		*cc_curr;
+	struct bintime 		cc_firstevent;
+	struct bintime 		cc_lastscan;
 	void			*cc_cookie;
-	int 			cc_ticks;
-	int 			cc_softticks;
 	int			cc_cancel;
 	int			cc_waiting;
-	int 			cc_firsttick;
 };
 
 #ifdef SMP
 #define	cc_migration_func	cc_migrating_entity.ce_migration_func
 #define	cc_migration_arg	cc_migrating_entity.ce_migration_arg
 #define	cc_migration_cpu	cc_migrating_entity.ce_migration_cpu
-#define	cc_migration_ticks	cc_migrating_entity.ce_migration_ticks
+#define	cc_migration_time	cc_migrating_entity.ce_migration_time
 
 struct callout_cpu cc_cpu[MAXCPU];
 #define	CPUBLOCK	MAXCPU
@@ -152,27 +143,37 @@ struct callout_cpu cc_cpu;
 #define	CC_LOCK(cc)	mtx_lock_spin(&(cc)->cc_lock)
 #define	CC_UNLOCK(cc)	mtx_unlock_spin(&(cc)->cc_lock)
 #define	CC_LOCK_ASSERT(cc)	mtx_assert(&(cc)->cc_lock, MA_OWNED)
+#define	C_PRECISION	0x2
 
+#define FREQ2BT(freq, bt)                                               \
+{                                                                       \
+        (bt)->sec = 0;                                                  \
+        (bt)->frac = ((uint64_t)0x8000000000000000  / (freq)) << 1;     \
+}
+
+#define	TIME_T_MAX							\
+	(sizeof(time_t) == (sizeof(int64_t)) ? INT64_MAX : INT32_MAX)		
+
 static int timeout_cpu;
-void (*callout_new_inserted)(int cpu, int ticks) = NULL;
+void (*callout_new_inserted)(int cpu, struct bintime bt) = NULL;
 
 static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
 
 /**
  * Locked by cc_lock:
- *   cc_curr         - If a callout is in progress, it is curr_callout.
- *                     If curr_callout is non-NULL, threads waiting in
+ *   cc_curr         - If a callout is in progress, it is cc_curr.
+ *                     If cc_curr is non-NULL, threads waiting in
  *                     callout_drain() will be woken up as soon as the
  *                     relevant callout completes.
- *   cc_cancel       - Changing to 1 with both callout_lock and c_lock held
+ *   cc_cancel       - Changing to 1 with both callout_lock and cc_lock held
  *                     guarantees that the current callout will not run.
  *                     The softclock() function sets this to 0 before it
  *                     drops callout_lock to acquire c_lock, and it calls
  *                     the handler only if curr_cancelled is still 0 after
- *                     c_lock is successfully acquired.
+ *                     cc_lock is successfully acquired.
  *   cc_waiting      - If a thread is waiting in callout_drain(), then
  *                     callout_wait is nonzero.  Set only when
- *                     curr_callout is non-NULL.
+ *                     cc_curr is non-NULL.
  */
 
 /*
@@ -184,7 +185,8 @@ cc_cme_cleanup(struct callout_cpu *cc)
 
 #ifdef SMP
 	cc->cc_migration_cpu = CPUBLOCK;
-	cc->cc_migration_ticks = 0;
+	cc->cc_migration_time.sec = 0;
+	cc->cc_migration_time.frac = 0;
 	cc->cc_migration_func = NULL;
 	cc->cc_migration_arg = NULL;
 #endif
@@ -220,10 +222,9 @@ kern_timeout_callwheel_alloc(caddr_t v)
 	/*
 	 * Calculate callout wheel size
 	 */
-	for (callwheelsize = 1, callwheelbits = 0;
-	     callwheelsize < ncallout;
-	     callwheelsize <<= 1, ++callwheelbits)
-		;
+	callwheelsize = 1;
+	while (callwheelsize < ncallout) 
+		callwheelsize <<= 1;
 	callwheelmask = callwheelsize - 1;
 
 	cc->cc_callout = (struct callout *)v;
@@ -244,6 +245,7 @@ callout_cpu_init(struct callout_cpu *cc)
 	for (i = 0; i < callwheelsize; i++) {
 		TAILQ_INIT(&cc->cc_callwheel[i]);
 	}
+	TAILQ_INIT(&cc->cc_expireq);
 	cc_cme_cleanup(cc);
 	if (cc->cc_callout == NULL)
 		return;
@@ -332,12 +334,28 @@ start_softclock(void *dummy)
 
 SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
 
+static inline int
+callout_hash(struct bintime *bt) 
+{
+
+	return (int) ((bt->sec<<10)+(bt->frac>>54));
+} 
+
+static inline int
+get_bucket(struct bintime *bt)
+{
+
+	return callout_hash(bt) & callwheelmask;
+}
+
 void
-callout_tick(void)
+callout_process(void)
 {
+	struct bintime max, min, next, now, tmp_max, tmp_min;
+	struct callout *tmp;
 	struct callout_cpu *cc;
-	int need_softclock;
-	int bucket;
+	struct callout_tailq *sc;
+	int cpu, first, future, last, need_softclock; 
 
 	/*
 	 * Process callouts at a very low cpu priority, so we don't keep the
@@ -346,48 +364,112 @@ void
 	need_softclock = 0;
 	cc = CC_SELF();
 	mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
-	cc->cc_firsttick = cc->cc_ticks = ticks;
-	for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) {
-		bucket = cc->cc_softticks & callwheelmask;
-		if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) {
-			need_softclock = 1;
+	binuptime(&now);
+	cpu = curcpu;
+	first = callout_hash(&cc->cc_lastscan);
+	last = callout_hash(&now);
+	/* 
+	 * Check if we wrapped around the entire wheel from the last scan.
+	 * In case, we need to scan entirely the wheel for pending callouts.
+	 */
+	last = (last - first >= callwheelsize) ? (first - 1) & callwheelmask : 
+	    last & callwheelmask;
+	first &= callwheelmask;
+	for (;;) {	
+		sc = &cc->cc_callwheel[first];
+		TAILQ_FOREACH(tmp, sc, c_links.tqe) {
+			next = tmp->c_time;
+			bintime_sub(&next, &tmp->c_precision);
+			if (bintime_cmp(&next, &now, <=)) {
+				/* 
+				 * Consumer told us the callout may be run
+				 * directly from hardware interrupt context.
+				 */
+				if (tmp->c_flags & CALLOUT_DIRECT) {
+					tmp->c_func(tmp->c_arg);
+					TAILQ_REMOVE(sc, tmp, c_links.tqe);
+			                tmp->c_flags &= ~CALLOUT_PENDING;
+				} else {
+					TAILQ_INSERT_TAIL(&cc->cc_expireq, 
+					    tmp, c_staiter);
+					TAILQ_REMOVE(sc, tmp, c_links.tqe);
+					tmp->c_flags |= CALLOUT_PROCESSED;
+					need_softclock = 1;
+				}
+			}	
+		}	
+		if (first == last)
 			break;
-		}
+		first = (first + 1) & callwheelmask;
 	}
+	future = (last + hz / 4) & callwheelmask; 
+	max.sec = min.sec = TIME_T_MAX;
+	max.frac = min.frac = UINT64_MAX;
+	/* 
+	 * Look for the first bucket in the future that contains some event,
+	 * up to some point,  so that we can look for aggregation. 
+	 */ 
+	for (;;) { 
+		sc = &cc->cc_callwheel[last];
+		TAILQ_FOREACH(tmp, sc, c_links.tqe) {
+			tmp_max = tmp_min = tmp->c_time; 
+			bintime_add(&tmp_max, &tmp->c_precision);
+			bintime_sub(&tmp_min, &tmp->c_precision);
+			/*
+			 * This is the fist event we're going to process or 
+			 * event maximal time is less than present minimal. 
+			 * In both cases, take it.
+			 */
+			 if (bintime_cmp(&tmp_max, &min, <)) {
+				max = tmp_max;
+				min = tmp_min;
+				continue;	
+			}
+			/*
+			 * Event minimal time is bigger than present maximal  
+		 	 * time, so it cannot be aggregated.
+			 */
+			if (bintime_cmp(&tmp_min, &max, >))
+				continue;
+			/*
+			 * If neither of the two previous happened, just take 
+			 * the intersection of events.
+			 */	
+			min = (bintime_cmp(&tmp_min, &min, >)) ? tmp_min : min;
+			max = (bintime_cmp(&tmp_max, &max, >)) ? tmp_max : max;
+		}		
+		if (last == future || max.sec != TIME_T_MAX) 
+			break;
+		last = (last + 1) & callwheelmask;
+	}
+	if (max.sec == TIME_T_MAX) { 
+		next.sec = 0;	
+		next.frac = (uint64_t)1 << (64 - 2);	
+		bintime_add(&next, &now);
+	} else {
+		/*
+		 * Now that we found something to aggregate, schedule an  
+		 * interrupt in the middle of the previously calculated range.
+	 	 */
+		bintime_add(&max, &min);
+		next = max;
+		next.frac >>= 1;
+		if (next.sec & 1)	
+			next.frac |= ((uint64_t)1 << 63);
+		next.sec >>= 1;
+	}
+	cc->cc_firstevent = next;
+	if (callout_new_inserted != NULL) 
+		(*callout_new_inserted)(cpu, next);
+	cc->cc_lastscan = now;
 	mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
 	/*
 	 * swi_sched acquires the thread lock, so we don't want to call it
 	 * with cc_lock held; incorrect locking order.
 	 */
-	if (need_softclock)
+	if (need_softclock) {
 		swi_sched(cc->cc_cookie, 0);
-}
-
-int
-callout_tickstofirst(int limit)
-{
-	struct callout_cpu *cc;
-	struct callout *c;
-	struct callout_tailq *sc;
-	int curticks;
-	int skip = 1;
-
-	cc = CC_SELF();
-	mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
-	curticks = cc->cc_ticks;
-	while( skip < ncallout && skip < limit ) {
-		sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ];
-		/* search scanning ticks */
-		TAILQ_FOREACH( c, sc, c_links.tqe ){
-			if (c->c_time - curticks <= ncallout)
-				goto out;
-		}
-		skip++;
 	}
-out:
-	cc->cc_firsttick = curticks + skip;
-	mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
-	return (skip);
 }
 
 static struct callout_cpu *
@@ -415,25 +497,67 @@ callout_lock(struct callout *c)
 }
 
 static void
-callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks,
-    void (*func)(void *), void *arg, int cpu)
+callout_cc_add(struct callout *c, struct callout_cpu *cc, 
+    struct bintime to_bintime, void (*func)(void *), void *arg, int cpu, 
+    int flags)
 {
-
+	struct bintime bt;
+	int bucket, r_shift, r_val;	
+	
 	CC_LOCK_ASSERT(cc);
-
-	if (to_ticks <= 0)
-		to_ticks = 1;
+	if (bintime_cmp(&to_bintime, &cc->cc_lastscan, <)) 
+		to_bintime = cc->cc_lastscan;
 	c->c_arg = arg;
 	c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
+	if (flags & C_DIRECT_EXEC)
+		c->c_flags |= CALLOUT_DIRECT;
+	c->c_flags &= ~CALLOUT_PROCESSED;
 	c->c_func = func;
-	c->c_time = ticks + to_ticks;
-	TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], 
+	c->c_time = to_bintime;
+	bintime_clear(&c->c_precision);
+	if (flags & C_PRECISION) {  
+		r_shift = ((flags >> 2) & PRECISION_RANGE);
+		r_val = (r_shift != 0) ? (uint64_t)1 << (64 - r_shift) : 0;
+		/* 
+		 * Round as far as precision specified is coarse (up to 8ms).
+		 * In order to play safe, round to to half of the interval and
+		 * set half precision.
+		 */	
+		if (r_shift < 6) {
+			r_val = (r_shift != 0) ? r_val >> 2 : 
+			    ((uint64_t)1 << (64 - 1)) - 1;
+			/*
+			 * Round only if c_time is not a multiple of the
+			 * rounding factor.
+			 */
+			if ((c->c_time.frac & r_val) != r_val) {
+				c->c_time.frac |= r_val - 1;
+				c->c_time.frac += 1;
+				if (c->c_time.frac == 0)
+					c->c_time.sec += 1;
+			}
+		}
+		c->c_precision.frac = r_val;
+		CTR6(KTR_CALLOUT, "rounding %d.%08x%08x to %d.%08x%08x", 
+		    to_bintime.sec, (u_int) (to_bintime.frac >> 32), 
+		    (u_int) (to_bintime.frac & 0xffffffff), c->c_time.sec, 
+		    (u_int) (c->c_time.frac >> 32), 
+		    (u_int) (c->c_time.frac & 0xffffffff)); 
+	} 
+	bucket = get_bucket(&c->c_time);
+	TAILQ_INSERT_TAIL(&cc->cc_callwheel[bucket & callwheelmask], 
 	    c, c_links.tqe);
-	if ((c->c_time - cc->cc_firsttick) < 0 &&
-	    callout_new_inserted != NULL) {
-		cc->cc_firsttick = c->c_time;
-		(*callout_new_inserted)(cpu,
-		    to_ticks + (ticks - cc->cc_ticks));
+	/*
+	 * Inform the eventtimers(4) subsystem there's a new callout 
+	 * that has been inserted, but only if really required.
+	 */
+	bt = c->c_time;
+	bintime_add(&bt, &c->c_precision); 
+	if (callout_new_inserted != NULL && 
+	    (bintime_cmp(&bt, &cc->cc_firstevent, <) ||
+	    (cc->cc_firstevent.sec == 0 && cc->cc_firstevent.frac == 0))) {
+		cc->cc_firstevent = c->c_time;
+		(*callout_new_inserted)(cpu, c->c_time);
 	}
 }
 
@@ -442,7 +566,7 @@ callout_cc_del(struct callout *c, struct callout_c
 {
 
 	if (cc->cc_next == c)
-		cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+		cc->cc_next = TAILQ_NEXT(c, c_staiter);
 	if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
 		c->c_func = NULL;
 		SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
@@ -462,7 +586,8 @@ softclock_call_cc(struct callout *c, struct callou
 	struct callout_cpu *new_cc;
 	void (*new_func)(void *);
 	void *new_arg;
-	int new_cpu, new_ticks;
+	int new_cpu;
+	struct bintime new_time;
 #endif
 #ifdef DIAGNOSTIC
 	struct bintime bt1, bt2;
@@ -471,7 +596,7 @@ softclock_call_cc(struct callout *c, struct callou
 	static timeout_t *lastfunc;
 #endif
 
-	cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+	cc->cc_next = TAILQ_NEXT(c, c_staiter);
 	class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
 	sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1;
 	c_lock = c->c_lock;
@@ -574,7 +699,7 @@ skip:
 		 * migration just perform it now.
 		 */
 		new_cpu = cc->cc_migration_cpu;
-		new_ticks = cc->cc_migration_ticks;
+		new_time = cc->cc_migration_time;
 		new_func = cc->cc_migration_func;
 		new_arg = cc->cc_migration_arg;
 		cc_cme_cleanup(cc);
@@ -598,8 +723,8 @@ skip:
 		 * is not easy.
 		 */
 		new_cc = callout_cpu_switch(c, cc, new_cpu);
-		callout_cc_add(c, new_cc, new_ticks, new_func, new_arg,
-		    new_cpu);
+		callout_cc_add(c, new_cc, new_time, new_func, new_arg,
+		    new_cpu, 0);
 		CC_UNLOCK(new_cc);
 		CC_LOCK(cc);
 #else
@@ -633,10 +758,7 @@ softclock(void *arg)
 {
 	struct callout_cpu *cc;
 	struct callout *c;
-	struct callout_tailq *bucket;
-	int curticks;
 	int steps;	/* #steps since we last allowed interrupts */
-	int depth;
 	int mpcalls;
 	int lockcalls;
 	int gcalls;
@@ -644,49 +766,37 @@ softclock(void *arg)
 #ifndef MAX_SOFTCLOCK_STEPS
 #define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
 #endif /* MAX_SOFTCLOCK_STEPS */
-
+	
 	mpcalls = 0;
 	lockcalls = 0;
 	gcalls = 0;
-	depth = 0;
 	steps = 0;
 	cc = (struct callout_cpu *)arg;
 	CC_LOCK(cc);
-	while (cc->cc_softticks - 1 != cc->cc_ticks) {
-		/*
-		 * cc_softticks may be modified by hard clock, so cache
-		 * it while we work on a given bucket.
-		 */
-		curticks = cc->cc_softticks;
-		cc->cc_softticks++;
-		bucket = &cc->cc_callwheel[curticks & callwheelmask];
-		c = TAILQ_FIRST(bucket);
-		while (c != NULL) {
-			depth++;
-			if (c->c_time != curticks) {
-				c = TAILQ_NEXT(c, c_links.tqe);
-				++steps;
-				if (steps >= MAX_SOFTCLOCK_STEPS) {
-					cc->cc_next = c;
-					/* Give interrupts a chance. */
-					CC_UNLOCK(cc);
-					;	/* nothing */
-					CC_LOCK(cc);
-					c = cc->cc_next;
-					steps = 0;
-				}
-			} else {
-				TAILQ_REMOVE(bucket, c, c_links.tqe);
-				c = softclock_call_cc(c, cc, &mpcalls,
-				    &lockcalls, &gcalls);
-				steps = 0;
-			}
-		}
+
+	c = TAILQ_FIRST(&cc->cc_expireq);
+	while (c != NULL) {
+		++steps;
+		if (steps >= MAX_SOFTCLOCK_STEPS) {
+			cc->cc_next = c;
+			/* Give interrupts a chance. */
+			CC_UNLOCK(cc);
+			;	/* nothing */
+			CC_LOCK(cc);
+			c = cc->cc_next;
+			steps = 0;
+		} else {
+			TAILQ_REMOVE(&cc->cc_expireq, c, c_staiter);
+			c = softclock_call_cc(c, cc, &mpcalls,
+			    &lockcalls, &gcalls);
+		}	
 	}
+#ifdef CALLOUT_PROFILING
 	avg_depth += (depth * 1000 - avg_depth) >> 8;
 	avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
 	avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
 	avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
+#endif
 	cc->cc_next = NULL;
 	CC_UNLOCK(cc);
 }
@@ -776,13 +886,22 @@ callout_handle_init(struct callout_handle *handle)
  * callout_pending() - returns truth if callout is still waiting for timeout
  * callout_deactivate() - marks the callout as having been serviced
  */
-int
-callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
-    void *arg, int cpu)
+int 
+_callout_reset_on(struct callout *c, struct bintime *bt, int to_ticks, 
+    void (*ftn)(void *), void *arg, int cpu, int flags)
 {
+	struct bintime now, to_bt;
 	struct callout_cpu *cc;
 	int cancelled = 0;
+	int bucket;
 
+	if (bt == NULL) {	
+		FREQ2BT(hz,&to_bt);
+		getbinuptime(&now);
+		bintime_mul(&to_bt,to_ticks);
+		bintime_add(&to_bt,&now);
+	} else 
+		to_bt = *bt;
 	/*
 	 * Don't allow migration of pre-allocated callouts lest they
 	 * become unbalanced.
@@ -811,12 +930,17 @@ callout_handle_init(struct callout_handle *handle)
 		}
 	}
 	if (c->c_flags & CALLOUT_PENDING) {
-		if (cc->cc_next == c) {
-			cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+		if ((c->c_flags & CALLOUT_PROCESSED) == 0) {	
+			if (cc->cc_next == c)
+				cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+			bucket = get_bucket(&c->c_time);
+			TAILQ_REMOVE(&cc->cc_callwheel[bucket], c,
+			    c_links.tqe);
+		} else {
+			if (cc->cc_next == c)
+				cc->cc_next = TAILQ_NEXT(c, c_staiter);
+			TAILQ_REMOVE(&cc->cc_expireq, c, c_staiter);  
 		}
-		TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
-		    c_links.tqe);
-
 		cancelled = 1;
 		c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
 	}
@@ -830,13 +954,14 @@ callout_handle_init(struct callout_handle *handle)
 	if (c->c_cpu != cpu) {
 		if (cc->cc_curr == c) {
 			cc->cc_migration_cpu = cpu;
-			cc->cc_migration_ticks = to_ticks;
+			cc->cc_migration_time = to_bt;
 			cc->cc_migration_func = ftn;
 			cc->cc_migration_arg = arg;
 			c->c_flags |= CALLOUT_DFRMIGRATION;
-			CTR5(KTR_CALLOUT,
-		    "migration of %p func %p arg %p in %d to %u deferred",
-			    c, c->c_func, c->c_arg, to_ticks, cpu);
+			CTR6(KTR_CALLOUT,
+		    "migration of %p func %p arg %p in %d.%08x to %u deferred",
+			    c, c->c_func, c->c_arg, (int)(to_bt.sec), 
+			    (u_int)(to_bt.frac >> 32), cpu);
 			CC_UNLOCK(cc);
 			return (cancelled);
 		}
@@ -844,9 +969,10 @@ callout_handle_init(struct callout_handle *handle)
 	}
 #endif
 
-	callout_cc_add(c, cc, to_ticks, ftn, arg, cpu);
-	CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d",
-	    cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks);
+	callout_cc_add(c, cc, to_bt, ftn, arg, cpu, flags);
+	CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
+	    cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_bt.sec), 
+	    (u_int)(to_bt.frac >> 32));
 	CC_UNLOCK(cc);
 
 	return (cancelled);
@@ -874,7 +1000,7 @@ _callout_stop_safe(c, safe)
 {
 	struct callout_cpu *cc, *old_cc;
 	struct lock_class *class;
-	int use_lock, sq_locked;
+	int use_lock, sq_locked, bucket;
 
 	/*
 	 * Some old subsystems don't hold Giant while running a callout_stop(),
@@ -1024,8 +1150,12 @@ again:
 
 	CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 	    c, c->c_func, c->c_arg);
-	TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
-	    c_links.tqe);
+	if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
+		bucket = get_bucket(&c->c_time);
+		TAILQ_REMOVE(&cc->cc_callwheel[bucket], c,
+		    c_links.tqe);
+	} else
+		TAILQ_REMOVE(&cc->cc_expireq, c, c_staiter); 
 	callout_cc_del(c, cc);
 
 	CC_UNLOCK(cc);
Index: sys/kern/kern_time.c
===================================================================
--- sys/kern/kern_time.c	(.../head)	(revision 237923)
+++ sys/kern/kern_time.c	(.../projects/calloutng)	(revision 237923)
@@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
+#include <sys/sleepqueue.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
@@ -352,37 +353,38 @@ static int nanowait;
 int
 kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt)
 {
-	struct timespec ts, ts2, ts3;
-	struct timeval tv;
+	struct timespec ts;
+	struct bintime bt, bt2, tmp;
 	int error;
 
 	if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000)
 		return (EINVAL);
 	if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0))
 		return (0);
-	getnanouptime(&ts);
-	timespecadd(&ts, rqt);
-	TIMESPEC_TO_TIMEVAL(&tv, rqt);
+	binuptime(&bt);
+	timespec2bintime(rqt, &tmp);
+	bintime_add(&bt,&tmp);
 	for (;;) {
-		error = tsleep(&nanowait, PWAIT | PCATCH, "nanslp",
-		    tvtohz(&tv));
-		getnanouptime(&ts2);
+		sleepq_lock(&nanowait);	
+		sleepq_add(&nanowait, NULL, "nanslp", PWAIT | PCATCH, 0);
+		sleepq_set_timeout_bt(&nanowait,bt);
+		error = sleepq_timedwait_sig(&nanowait, PWAIT | PCATCH);
+		binuptime(&bt2);
 		if (error != EWOULDBLOCK) {
 			if (error == ERESTART)
 				error = EINTR;
 			if (rmt != NULL) {
-				timespecsub(&ts, &ts2);
+				tmp = bt;
+				bintime_sub(&tmp, &bt2);	
+				bintime2timespec(&tmp, &ts);
 				if (ts.tv_sec < 0)
 					timespecclear(&ts);
 				*rmt = ts;
 			}
 			return (error);
 		}
-		if (timespeccmp(&ts2, &ts, >=))
+		if (bintime_cmp(&bt2, &bt, >=))
 			return (0);
-		ts3 = ts;
-		timespecsub(&ts3, &ts2);
-		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 	}
 }
 
Index: sys/kern/kern_clock.c
===================================================================
--- sys/kern/kern_clock.c	(.../head)	(revision 237923)
+++ sys/kern/kern_clock.c	(.../projects/calloutng)	(revision 237923)
@@ -459,7 +459,7 @@ hardclock_cpu(int usermode)
 	if (td->td_intr_frame != NULL)
 		PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
 #endif
-	callout_tick();
+	callout_process();
 }
 
 /*
@@ -549,7 +549,6 @@ hardclock_cnt(int cnt, int usermode)
 	if (td->td_intr_frame != NULL)
 		PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
 #endif
-	callout_tick();
 	/* We are in charge to handle this tick duty. */
 	if (newticks > 0) {
 		/* Dangerous and no need to call these things concurrently. */
Index: sys/kern/kern_clocksource.c
===================================================================
--- sys/kern/kern_clocksource.c	(.../head)	(revision 237923)
+++ sys/kern/kern_clocksource.c	(.../projects/calloutng)	(revision 237923)
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
+#include <sys/time.h>
 #include <sys/timeet.h>
 #include <sys/timetc.h>
 
@@ -71,9 +72,7 @@ static int		round_freq(struct eventtimer *et, int
 static void		getnextcpuevent(struct bintime *event, int idle);
 static void		getnextevent(struct bintime *event);
 static int		handleevents(struct bintime *now, int fake);
-#ifdef SMP
-static void		cpu_new_callout(int cpu, int ticks);
-#endif
+static void		cpu_new_callout(int cpu, struct bintime bt);
 
 static struct mtx	et_hw_mtx;
 
@@ -135,6 +134,7 @@ struct pcpu_state {
 	struct bintime	nexthard;	/* Next hardlock() event. */
 	struct bintime	nextstat;	/* Next statclock() event. */
 	struct bintime	nextprof;	/* Next profclock() event. */
+	struct bintime	nextcall;	/* Next callout event. */
 #ifdef KDTRACE_HOOKS
 	struct bintime	nextcyc;	/* Next OpenSolaris cyclics event. */
 #endif
@@ -168,8 +168,8 @@ hardclockintr(void)
 	state = DPCPU_PTR(timerstate);
 	now = state->now;
 	CTR4(KTR_SPARE2, "ipi  at %d:    now  %d.%08x%08x",
-	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
-			     (unsigned int)(now.frac & 0xffffffff));
+	    curcpu, now.sec, (u_int)(now.frac >> 32),
+			     (u_int)(now.frac & 0xffffffff));
 	done = handleevents(&now, 0);
 	return (done ? FILTER_HANDLED : FILTER_STRAY);
 }
@@ -188,8 +188,8 @@ handleevents(struct bintime *now, int fake)
 	int done, runs;
 
 	CTR4(KTR_SPARE2, "handle at %d:  now  %d.%08x%08x",
-	    curcpu, now->sec, (unsigned int)(now->frac >> 32),
-		     (unsigned int)(now->frac & 0xffffffff));
+	    curcpu, now->sec, (u_int)(now->frac >> 32),
+		     (u_int)(now->frac & 0xffffffff));
 	done = 0;
 	if (fake) {
 		frame = NULL;
@@ -236,6 +236,11 @@ handleevents(struct bintime *now, int fake)
 		}
 	} else
 		state->nextprof = state->nextstat;
+	if (bintime_cmp(now, &state->nextcall, >=) &&
+		(state->nextcall.sec != -1)) {
+		state->nextcall.sec = -1;
+		callout_process();
+	}
 
 #ifdef KDTRACE_HOOKS
 	if (fake == 0 && cyclic_clock_func != NULL &&
@@ -267,24 +272,28 @@ handleevents(struct bintime *now, int fake)
 static void
 getnextcpuevent(struct bintime *event, int idle)
 {
+	struct pcpu_state *state;
 	struct bintime tmp;
-	struct pcpu_state *state;
-	int skip;
-
+	int hardfreq;
+	
 	state = DPCPU_PTR(timerstate);
-	/* Handle hardclock() events. */
+	/* Handle hardclock() events, skipping some is CPU is idle. */
 	*event = state->nexthard;
 	if (idle || (!activetick && !profiling &&
 	    (timer->et_flags & ET_FLAGS_PERCPU) == 0)) {
-		skip = idle ? 4 : (stathz / 2);
-		if (curcpu == CPU_FIRST() && tc_min_ticktock_freq > skip)
-			skip = tc_min_ticktock_freq;
-		skip = callout_tickstofirst(hz / skip) - 1;
-		CTR2(KTR_SPARE2, "skip   at %d: %d", curcpu, skip);
-		tmp = hardperiod;
-		bintime_mul(&tmp, skip);
-		bintime_add(event, &tmp);
+		hardfreq = idle ? 4 : (stathz / 2);
+		if (curcpu == CPU_FIRST() && tc_min_ticktock_freq > hardfreq)
+			hardfreq = tc_min_ticktock_freq;
+		if (hz > hardfreq) {
+			tmp = hardperiod;
+			bintime_mul(&tmp, hz / hardfreq - 1);
+			bintime_add(event, &tmp);
+		}
 	}
+	/* Handle callout events. */
+	if (state->nextcall.sec != -1 &&
+	    bintime_cmp(event, &state->nextcall, >))
+		*event = state->nextcall;
 	if (!idle) { /* If CPU is active - handle other types of events. */
 		if (bintime_cmp(event, &state->nextstat, >))
 			*event = state->nextstat;
@@ -625,10 +634,9 @@ cpu_initclocks_bsp(void)
 #ifdef KDTRACE_HOOKS
 		state->nextcyc.sec = -1;
 #endif
+		state->nextcall.sec = -1;
 	}
-#ifdef SMP
 	callout_new_inserted = cpu_new_callout;
-#endif
 	periodic = want_periodic;
 	/* Grab requested timer or the best of present. */
 	if (timername[0])
@@ -856,52 +864,48 @@ clocksource_cyc_set(const struct bintime *t)
 }
 #endif
 
-#ifdef SMP
 static void
-cpu_new_callout(int cpu, int ticks)
+cpu_new_callout(int cpu, struct bintime bt)
 {
-	struct bintime tmp;
+	struct bintime now;
 	struct pcpu_state *state;
 
-	CTR3(KTR_SPARE2, "new co at %d:    on %d in %d",
-	    curcpu, cpu, ticks);
+	CTR5(KTR_SPARE2, "new co at %d:    on %d at %d.%08x%08x",
+	    curcpu, cpu, (int)(bt.sec), (u_int)(bt.frac >> 32),
+			 (u_int)(bt.frac & 0xffffffff));
 	state = DPCPU_ID_PTR(cpu, timerstate);
 	ET_HW_LOCK(state);
-	if (state->idle == 0 || busy) {
+
+	/* If there is callout time already set earlier -- do nothing. */
+	if (state->nextcall.sec != -1 &&
+	    bintime_cmp(&bt, &state->nextcall, >=)) {
 		ET_HW_UNLOCK(state);
 		return;
 	}
-	/*
-	 * If timer is periodic - just update next event time for target CPU.
-	 * If timer is global - there is chance it is already programmed.
-	 */
-	if (periodic || (timer->et_flags & ET_FLAGS_PERCPU) == 0) {
-		tmp = hardperiod;
-		bintime_mul(&tmp, ticks - 1);
-		bintime_add(&tmp, &state->nexthard);
-		if (bintime_cmp(&tmp, &state->nextevent, <))
-			state->nextevent = tmp;
-		if (periodic ||
-		    bintime_cmp(&state->nextevent, &nexttick, >=)) {
-			ET_HW_UNLOCK(state);
-			return;
-		}
+	state->nextcall = bt;
+	/* If there is some some other event set earlier -- do nothing. */
+	if (bintime_cmp(&state->nextcall, &state->nextevent, >=)) {
+		ET_HW_UNLOCK(state);
+		return;
 	}
-	/*
-	 * Otherwise we have to wake that CPU up, as we can't get present
-	 * bintime to reprogram global timer from here. If timer is per-CPU,
-	 * we by definition can't do it from here.
-	 */
+	state->nextevent = state->nextcall;
+	/* If timer is periodic -- there is nothing to reprogram. */
+	if (periodic) {
+		ET_HW_UNLOCK(state);
+		return;
+	}
+	/* If timer is global or of the current CPU -- reprogram it. */
+	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || cpu == curcpu) {
+		binuptime(&now);
+		loadtimer(&now, 0);
+		ET_HW_UNLOCK(state);
+		return;
+	}
+	/* Otherwise make other CPU to reprogram it. */
+	state->handle = 1;
 	ET_HW_UNLOCK(state);
-	if (timer->et_flags & ET_FLAGS_PERCPU) {
-		state->handle = 1;
-		ipi_cpu(cpu, IPI_HARDCLOCK);
-	} else {
-		if (!cpu_idle_wakeup(cpu))
-			ipi_cpu(cpu, IPI_AST);
-	}
+	ipi_cpu(cpu, IPI_HARDCLOCK);
 }
-#endif
 
 /*
  * Report or change the active event timers hardware.
Index: sys/kern/subr_sleepqueue.c
===================================================================
--- sys/kern/subr_sleepqueue.c	(.../head)	(revision 237923)
+++ sys/kern/subr_sleepqueue.c	(.../projects/calloutng)	(revision 237923)
@@ -361,9 +361,10 @@ sleepq_add(void *wchan, struct lock_object *lock,
  * Sets a timeout that will remove the current thread from the specified
  * sleep queue after timo ticks if the thread has not already been awakened.
  */
-void
-sleepq_set_timeout(void *wchan, int timo)
+void 
+_sleepq_set_timeout(void *wchan, struct bintime *bt, int timo)
 {
+
 	struct sleepqueue_chain *sc;
 	struct thread *td;
 
@@ -373,7 +374,12 @@ sleepq_add(void *wchan, struct lock_object *lock,
 	MPASS(TD_ON_SLEEPQ(td));
 	MPASS(td->td_sleepqueue == NULL);
 	MPASS(wchan != NULL);
-	callout_reset_curcpu(&td->td_slpcallout, timo, sleepq_timeout, td);
+	if (bt == NULL) 
+		callout_reset_curcpu(&td->td_slpcallout, timo, 
+		    sleepq_timeout, td);
+	else
+		callout_reset_bt_on(&td->td_slpcallout, bt, 
+		    sleepq_timeout, td, PCPU_GET(cpuid), 0); 
 }
 
 /*
Index: sys/kern/sys_generic.c
===================================================================
--- sys/kern/sys_generic.c	(.../head)	(revision 237923)
+++ sys/kern/sys_generic.c	(.../projects/calloutng)	(revision 237923)
@@ -102,7 +102,7 @@ static int	dofilewrite(struct thread *, int, struc
 		    off_t, int);
 static void	doselwakeup(struct selinfo *, int);
 static void	seltdinit(struct thread *);
-static int	seltdwait(struct thread *, int);
+static int	seltdwait(struct thread *, struct bintime *, int);
 static void	seltdclear(struct thread *);
 
 /*
@@ -902,7 +902,8 @@ kern_select(struct thread *td, int nd, fd_set *fd_
 	 */
 	fd_mask s_selbits[howmany(2048, NFDBITS)];
 	fd_mask *ibits[3], *obits[3], *selbits, *sbp;
-	struct timeval atv, rtv, ttv;
+	struct bintime abt, rbt;
+	struct timeval atv;
 	int error, lf, ndu, timo;
 	u_int nbufbytes, ncpbytes, ncpubytes, nfdbits;
 
@@ -996,33 +997,34 @@ kern_select(struct thread *td, int nd, fd_set *fd_
 
 	if (tvp != NULL) {
 		atv = *tvp;
-		if (itimerfix(&atv)) {
+		if (atv.tv_sec < 0 || atv.tv_usec < 0 || 
+		    atv.tv_usec >= 1000000) {
 			error = EINVAL;
 			goto done;
 		}
-		getmicrouptime(&rtv);
-		timevaladd(&atv, &rtv);
+		binuptime(&rbt);
+		timeval2bintime(&atv, &abt);
+		bintime_add(&abt, &rbt);
 	} else {
-		atv.tv_sec = 0;
-		atv.tv_usec = 0;
+		abt.sec = 0;
+		abt.frac = 0;
 	}
-	timo = 0;
 	seltdinit(td);
 	/* Iterate until the timeout expires or descriptors become ready. */
 	for (;;) {
 		error = selscan(td, ibits, obits, nd);
 		if (error || td->td_retval[0] != 0)
 			break;
-		if (atv.tv_sec || atv.tv_usec) {
-			getmicrouptime(&rtv);
-			if (timevalcmp(&rtv, &atv, >=))
+		if (abt.sec || abt.frac) {
+			binuptime(&rbt);
+			if (bintime_cmp(&rbt, &abt, >=))
 				break;
-			ttv = atv;
-			timevalsub(&ttv, &rtv);
-			timo = ttv.tv_sec > 24 * 60 * 60 ?
-			    24 * 60 * 60 * hz : tvtohz(&ttv);
+			error = seltdwait(td, &abt, 0);
 		}
-		error = seltdwait(td, timo);
+		else {
+			timo = 0;
+			error = seltdwait(td, NULL, timo);
+		}
 		if (error)
 			break;
 		error = selrescan(td, ibits, obits);
@@ -1254,7 +1256,8 @@ sys_poll(td, uap)
 {
 	struct pollfd *bits;
 	struct pollfd smallbits[32];
-	struct timeval atv, rtv, ttv;
+	struct bintime abt, rbt;
+	struct timeval atv;
 	int error, timo;
 	u_int nfds;
 	size_t ni;
@@ -1273,33 +1276,33 @@ sys_poll(td, uap)
 	if (uap->timeout != INFTIM) {
 		atv.tv_sec = uap->timeout / 1000;
 		atv.tv_usec = (uap->timeout % 1000) * 1000;
-		if (itimerfix(&atv)) {
+		if (atv.tv_sec < 0 || atv.tv_usec < 0 || 
+		    atv.tv_usec >= 1000000) {
 			error = EINVAL;
 			goto done;
 		}
-		getmicrouptime(&rtv);
-		timevaladd(&atv, &rtv);
+		binuptime(&rbt);
+		timeval2bintime(&atv, &abt);
+		bintime_add(&abt, &rbt);
 	} else {
-		atv.tv_sec = 0;
-		atv.tv_usec = 0;
+		abt.sec = 0;
+		abt.frac = 0;
 	}
-	timo = 0;
 	seltdinit(td);
 	/* Iterate until the timeout expires or descriptors become ready. */
 	for (;;) {
 		error = pollscan(td, bits, nfds);
 		if (error || td->td_retval[0] != 0)
 			break;
-		if (atv.tv_sec || atv.tv_usec) {
-			getmicrouptime(&rtv);
-			if (timevalcmp(&rtv, &atv, >=))
+		if (abt.sec || abt.frac) {
+			binuptime(&rbt);
+			if (bintime_cmp(&rbt, &abt, >=))
 				break;
-			ttv = atv;
-			timevalsub(&ttv, &rtv);
-			timo = ttv.tv_sec > 24 * 60 * 60 ?
-			    24 * 60 * 60 * hz : tvtohz(&ttv);
+			error = seltdwait(td, &abt, 0);
+		} else { 
+			timo = 0;
+			error = seltdwait(td, NULL, timo);
 		}
-		error = seltdwait(td, timo);
 		if (error)
 			break;
 		error = pollrescan(td);
@@ -1518,7 +1521,7 @@ selsocket(struct socket *so, int events, struct ti
 			timo = ttv.tv_sec > 24 * 60 * 60 ?
 			    24 * 60 * 60 * hz : tvtohz(&ttv);
 		}
-		error = seltdwait(td, timo);
+		error = seltdwait(td, NULL, timo);
 		seltdclear(td);
 		if (error)
 			break;
@@ -1697,7 +1700,7 @@ out:
 }
 
 static int
-seltdwait(struct thread *td, int timo)
+seltdwait(struct thread *td, struct bintime *bt, int timo)
 {
 	struct seltd *stp;
 	int error;
@@ -1716,9 +1719,11 @@ static int
 		mtx_unlock(&stp->st_mtx);
 		return (0);
 	}
-	if (timo > 0)
+	if (bt == NULL && timo > 0) 
 		error = cv_timedwait_sig(&stp->st_wait, &stp->st_mtx, timo);
-	else
+	else if (bt != NULL)
+		error = cv_timedwait_bt_sig(&stp->st_wait, &stp->st_mtx, *bt);
+	else	
 		error = cv_wait_sig(&stp->st_wait, &stp->st_mtx);
 	mtx_unlock(&stp->st_mtx);
 
Index: sys/kern/kern_condvar.c
===================================================================
--- sys/kern/kern_condvar.c	(.../head)	(revision 237923)
+++ sys/kern/kern_condvar.c	(.../projects/calloutng)	(revision 237923)
@@ -342,7 +342,8 @@ _cv_timedwait(struct cv *cvp, struct lock_object *
  * a signal was caught.
  */
 int
-_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo)
+_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, 
+    struct bintime *bt, int timo) 
 {
 	WITNESS_SAVE_DECL(lock_witness);
 	struct lock_class *class;
@@ -379,7 +380,10 @@ int
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
 	    SLEEPQ_INTERRUPTIBLE, 0);
-	sleepq_set_timeout(cvp, timo);
+	if (bt == NULL)	
+		sleepq_set_timeout(cvp, timo);
+	else
+		sleepq_set_timeout_bt(cvp, *bt);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
Index: sys/netinet/tcp_timer.c
===================================================================
--- sys/netinet/tcp_timer.c	(.../head)	(revision 237923)
+++ sys/netinet/tcp_timer.c	(.../projects/calloutng)	(revision 237923)
@@ -667,21 +667,39 @@ tcp_timer_active(struct tcpcb *tp, int timer_type)
 
 #define	ticks_to_msecs(t)	(1000*(t) / hz)
 
+static int
+delta_bintime_in_msecs(struct bintime bt, struct bintime now) 
+{
+	bintime_sub(&bt, &now);
+	return (((uint64_t)1000 * (uint64_t)(bt.frac >> 32)) >> 32) +
+	    (bt.sec * 1000);
+}
+
 void
-tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, struct xtcp_timer *xtimer)
+tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, 
+    struct xtcp_timer *xtimer)
 {
-	bzero(xtimer, sizeof(struct xtcp_timer));
+	struct bintime bt, now;
+	
+	bzero(xtimer, sizeof(*xtimer));
 	if (timer == NULL)
 		return;
-	if (callout_active(&timer->tt_delack))
-		xtimer->tt_delack = ticks_to_msecs(timer->tt_delack.c_time - ticks);
-	if (callout_active(&timer->tt_rexmt))
-		xtimer->tt_rexmt = ticks_to_msecs(timer->tt_rexmt.c_time - ticks);
-	if (callout_active(&timer->tt_persist))
-		xtimer->tt_persist = ticks_to_msecs(timer->tt_persist.c_time - ticks);
-	if (callout_active(&timer->tt_keep))
-		xtimer->tt_keep = ticks_to_msecs(timer->tt_keep.c_time - ticks);
-	if (callout_active(&timer->tt_2msl))
-		xtimer->tt_2msl = ticks_to_msecs(timer->tt_2msl.c_time - ticks);
+	bintime_clear(&bt);
+	getbinuptime(&now);	
+	if (callout_active(&timer->tt_delack)) 
+		xtimer->tt_delack = delta_bintime_in_msecs(
+		    timer->tt_delack.c_time, now);
+	if (callout_active(&timer->tt_rexmt)) 
+		xtimer->tt_rexmt = delta_bintime_in_msecs(
+		    timer->tt_rexmt.c_time, now);
+	if (callout_active(&timer->tt_persist)) 
+		xtimer->tt_persist = delta_bintime_in_msecs(
+		    timer->tt_persist.c_time, now);
+	if (callout_active(&timer->tt_keep)) 
+		xtimer->tt_keep = delta_bintime_in_msecs(
+		    timer->tt_keep.c_time, now);
+	if (callout_active(&timer->tt_2msl)) 
+		xtimer->tt_2msl = delta_bintime_in_msecs(
+		    timer->tt_2msl.c_time, now);
 	xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
 }
Index: sys/sys/callout.h
===================================================================
--- sys/sys/callout.h	(.../head)	(revision 237923)
+++ sys/sys/callout.h	(.../projects/calloutng)	(revision 237923)
@@ -47,7 +47,33 @@
 #define	CALLOUT_RETURNUNLOCKED	0x0010 /* handler returns with mtx unlocked */
 #define	CALLOUT_SHAREDLOCK	0x0020 /* callout lock held in shared mode */
 #define	CALLOUT_DFRMIGRATION	0x0040 /* callout in deferred migration mode */
+#define	CALLOUT_PROCESSED	0x0080 /* callout in wheel or processing list? */
+#define	CALLOUT_DIRECT 		0x1000 /* allow exec from hw int context */
 
+#define	C_DIRECT_EXEC		0x0001 /* direct execution of callout */
+#define	C_P1S			0x0002 /* fields related to precision */ 
+#define	C_P500MS		0x0006 	
+#define	C_P250MS		0x000a 	
+#define	C_P125MS		0x000e
+#define	C_P64MS			0x0012
+#define	C_P32MS			0x0016
+#define	C_P16MS			0x001a
+#define	C_P8MS			0x001e
+#define	C_P4MS			0x0022
+#define	C_P2MS			0x0026
+#define	C_P1MS			0x002a
+#define	C_P500US		0x002e
+#define	C_P250US		0x0032
+#define	C_P125US		0x0036
+#define	C_P64US			0x003a
+#define	C_P32US			0x003e
+#define	C_P16US			0x0042
+#define	C_P8US			0x0046
+#define	C_P4US			0x004a
+#define	C_P2US			0x004e
+#define	PRECISION_BITS		7	
+#define	PRECISION_RANGE		((1 << PRECISION_BITS) - 1)	
+
 struct callout_handle {
 	struct callout *callout;
 };
@@ -67,7 +93,16 @@ void	_callout_init_lock(struct callout *, struct l
 	_callout_init_lock((c), ((rw) != NULL) ? &(rw)->lock_object :	\
 	   NULL, (flags))
 #define	callout_pending(c)	((c)->c_flags & CALLOUT_PENDING)
-int	callout_reset_on(struct callout *, int, void (*)(void *), void *, int);
+int	_callout_reset_on(struct callout *, struct bintime *, int,
+	    void (*)(void *), void *, int, int);
+#define	callout_reset_on(c, to_ticks, fn, arg, cpu)			\
+    _callout_reset_on((c), (NULL), (to_ticks), (fn), (arg), (cpu),	\
+        (0))
+#define callout_reset_flags_on(c, to_ticks, fn, arg, cpu, flags)	\
+    _callout_reset_on((c), (NULL), (to_ticks), (fn), (arg), (cpu),	\
+        (flags))			
+#define callout_reset_bt_on(c, bt, fn, arg, cpu, flags)			\
+    _callout_reset_on((c), (bt), (0), (fn), (arg), (cpu), (flags)) 
 #define	callout_reset(c, on_tick, fn, arg)				\
     callout_reset_on((c), (on_tick), (fn), (arg), (c)->c_cpu)
 #define	callout_reset_curcpu(c, on_tick, fn, arg)			\
@@ -78,9 +113,8 @@ int	callout_schedule_on(struct callout *, int, int
     callout_schedule_on((c), (on_tick), PCPU_GET(cpuid))
 #define	callout_stop(c)		_callout_stop_safe(c, 0)
 int	_callout_stop_safe(struct callout *, int);
-void	callout_tick(void);
-int	callout_tickstofirst(int limit);
-extern void (*callout_new_inserted)(int cpu, int ticks);
+void	callout_process(void);
+extern void (*callout_new_inserted)(int cpu, struct bintime bt);
 
 #endif
 
Index: sys/sys/condvar.h
===================================================================
--- sys/sys/condvar.h	(.../head)	(revision 237923)
+++ sys/sys/condvar.h	(.../projects/calloutng)	(revision 237923)
@@ -56,7 +56,8 @@ void	_cv_wait(struct cv *cvp, struct lock_object *
 void	_cv_wait_unlock(struct cv *cvp, struct lock_object *lock);
 int	_cv_wait_sig(struct cv *cvp, struct lock_object *lock);
 int	_cv_timedwait(struct cv *cvp, struct lock_object *lock, int timo);
-int	_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo);
+int	_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock,
+	    struct bintime *bt, int timo);
 
 void	cv_signal(struct cv *cvp);
 void	cv_broadcastpri(struct cv *cvp, int pri);
@@ -70,7 +71,9 @@ void	cv_broadcastpri(struct cv *cvp, int pri);
 #define	cv_timedwait(cvp, lock, timo)					\
 	_cv_timedwait((cvp), &(lock)->lock_object, (timo))
 #define	cv_timedwait_sig(cvp, lock, timo)				\
-	_cv_timedwait_sig((cvp), &(lock)->lock_object, (timo))
+	_cv_timedwait_sig((cvp), &(lock)->lock_object, (NULL), (timo))
+#define cv_timedwait_bt_sig(cvp, lock, bt)				\
+	_cv_timedwait_sig((cvp), &(lock)->lock_object, (&bt), (0))
 
 #define cv_broadcast(cvp)	cv_broadcastpri(cvp, 0)
 
Index: sys/sys/_callout.h
===================================================================
--- sys/sys/_callout.h	(.../head)	(revision 237923)
+++ sys/sys/_callout.h	(.../projects/calloutng)	(revision 237923)
@@ -39,6 +39,7 @@
 #define	_SYS__CALLOUT_H
 
 #include <sys/queue.h>
+#include <sys/time.h>
 
 struct lock_object;
 
@@ -50,7 +51,9 @@ struct callout {
 		SLIST_ENTRY(callout) sle;
 		TAILQ_ENTRY(callout) tqe;
 	} c_links;
-	int	c_time;				/* ticks to the event */
+	TAILQ_ENTRY(callout) c_staiter;
+	struct bintime c_time;			/* ticks to the event */
+	struct bintime c_precision;		/* delta allowed wrt opt */		
 	void	*c_arg;				/* function argument */
 	void	(*c_func)(void *);		/* function to call */
 	struct lock_object *c_lock;		/* lock to handle */
Index: sys/sys/sleepqueue.h
===================================================================
--- sys/sys/sleepqueue.h	(.../head)	(revision 237923)
+++ sys/sys/sleepqueue.h	(.../projects/calloutng)	(revision 237923)
@@ -108,7 +108,11 @@ struct sleepqueue *sleepq_lookup(void *wchan);
 void	sleepq_release(void *wchan);
 void	sleepq_remove(struct thread *td, void *wchan);
 int	sleepq_signal(void *wchan, int flags, int pri, int queue);
-void	sleepq_set_timeout(void *wchan, int timo);
+void	_sleepq_set_timeout(void *wchan, struct bintime *bt, int timo);
+#define	sleepq_set_timeout(wchan, timo)					\
+    _sleepq_set_timeout((wchan), (NULL), (timo))
+#define	sleepq_set_timeout_bt(wchan, bt)				\
+    _sleepq_set_timeout((wchan), (&bt), (0))
 u_int	sleepq_sleepcnt(void *wchan, int queue);
 int	sleepq_timedwait(void *wchan, int pri);
 int	sleepq_timedwait_sig(void *wchan, int pri);