Index: sys/kern/kern_timeout.c
===================================================================
--- sys/kern/kern_timeout.c	(revision 216531)
+++ sys/kern/kern_timeout.c	(working copy)
@@ -56,6 +56,10 @@
 #include <sys/sysctl.h>
 #include <sys/smp.h>
 
+#ifdef SMP
+#include <machine/cpu.h>
+#endif
+
 SDT_PROVIDER_DEFINE(callout_execute);
 SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start);
 SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_start, 0,
@@ -107,10 +111,14 @@
 	struct callout		*cc_next;
 	struct callout		*cc_curr;
 	void			*cc_cookie;
+	void			(*cc_migration_ftn)(void *);
+	void			*cc_migration_arg;
 	int 			cc_ticks;
 	int 			cc_softticks;
 	int			cc_cancel;
 	int			cc_waiting;
+	int			cc_migration_cpu;
+	int			cc_migration_ticks;
 };
 
 #ifdef SMP
@@ -122,8 +130,10 @@
 #define	CC_CPU(cpu)	&cc_cpu
 #define	CC_SELF()	&cc_cpu
 #endif
+#define	CPUBLOCK	MAXCPU
 #define	CC_LOCK(cc)	mtx_lock_spin(&(cc)->cc_lock)
 #define	CC_UNLOCK(cc)	mtx_unlock_spin(&(cc)->cc_lock)
+#define	CC_LOCK_ASSERT(cc)	mtx_assert(&(cc)->cc_lock, MA_OWNED)
 
 static int timeout_cpu;
 
@@ -186,6 +196,7 @@
 	for (i = 0; i < callwheelsize; i++) {
 		TAILQ_INIT(&cc->cc_callwheel[i]);
 	}
+	cc->cc_migration_cpu = CPUBLOCK;
 	if (cc->cc_callout == NULL)
 		return;
 	for (i = 0; i < ncallout; i++) {
@@ -287,6 +298,12 @@
 
 	for (;;) {
 		cpu = c->c_cpu;
+#ifdef SMP
+		if (cpu == CPUBLOCK) {
+			cpu_spinwait();
+			continue;
+		}
+#endif
 		cc = CC_CPU(cpu);
 		CC_LOCK(cc);
 		if (cpu == c->c_cpu)
@@ -296,6 +313,23 @@
 	return (cc);
 }
 
+static void
+callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks,
+    void (*ftn)(void *), void *arg, int cpu)
+{
+
+	CC_LOCK_ASSERT(cc);
+
+	if (to_ticks <= 0)
+		to_ticks = 1;
+	c->c_arg = arg;
+	c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
+	c->c_func = ftn;
+	c->c_time = cc->cc_ticks + to_ticks;
+	TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], 
+	    c, c_links.tqe);
+}
+
 /*
  * The callout mechanism is based on the work of Adam M. Costello and 
  * George Varghese, published in a technical report entitled "Redesigning
@@ -366,11 +400,14 @@
 					steps = 0;
 				}
 			} else {
+				struct callout_cpu *new_cc;
 				void (*c_func)(void *);
-				void *c_arg;
+				void (*new_ftn)(void *);
+				void *c_arg, *new_arg;
 				struct lock_class *class;
 				struct lock_object *c_lock;
 				int c_flags, sharedlock;
+				int new_cpu, new_ticks;
 
 				cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
 				TAILQ_REMOVE(bucket, c, c_links.tqe);
@@ -472,8 +509,41 @@
 					    c_links.sle);
 				}
 				cc->cc_curr = NULL;
-				if (cc->cc_waiting) {
+
+				/*
+				 * If the callout was scheduled for
+				 * migration just perform it now.
+				 */
+				if (cc->cc_migration_cpu != CPUBLOCK) {
+
 					/*
+					 * There must not be any waiting
+					 * thread now because the callout
+					 * has a blocked CPU.
+					 * Also, the callout must not be
+					 * freed, but that is not easy to
+					 * assert.
+					 */
+					MPASS(cc->cc_waiting == 0);
+					new_cpu = cc->cc_migration_cpu;
+					new_ticks = cc->cc_migration_ticks;
+					new_ftn = cc->cc_migration_ftn;
+					new_arg = cc->cc_migration_arg;
+					cc->cc_migration_cpu = CPUBLOCK;
+					cc->cc_migration_ticks = 0;
+					cc->cc_migration_ftn = NULL;
+					cc->cc_migration_arg = NULL;
+					CC_UNLOCK(cc);
+					new_cc = CC_CPU(new_cpu);
+					CC_LOCK(new_cc);
+					MPASS(c->c_cpu == CPUBLOCK);
+					c->c_cpu = new_cpu;
+					callout_cc_add(c, new_cc, new_ticks,
+					    new_ftn, new_arg, new_cpu);
+					CC_UNLOCK(new_cc);
+					CC_LOCK(cc);
+				} else if (cc->cc_waiting) {
+					/*
 					 * There is someone waiting
 					 * for the callout to complete.
 					 */
@@ -593,7 +663,6 @@
 	 */
 	if (c->c_flags & CALLOUT_LOCAL_ALLOC)
 		cpu = c->c_cpu;
-retry:
 	cc = callout_lock(c);
 	if (cc->cc_curr == c) {
 		/*
@@ -625,25 +694,34 @@
 		cancelled = 1;
 		c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
 	}
+#ifdef SMP
 	/*
-	 * If the lock must migrate we have to check the state again as
-	 * we can't hold both the new and old locks simultaneously.
+	 * If the lock must migrate we have to block the callout locking
+	 * until migration is completed.
+	 * If the callout is currently running, just defer the migration
+	 * to a more appropriate moment.
 	 */
 	if (c->c_cpu != cpu) {
+		c->c_cpu = CPUBLOCK;
+		if (cc->cc_curr == c) {
+			cc->cc_migration_cpu = cpu;
+			cc->cc_migration_ticks = to_ticks;
+			cc->cc_migration_ftn = ftn;
+			cc->cc_migration_arg = arg;
+			CTR5(KTR_CALLOUT,
+		    "migration of %p func %p arg %p in %d to %u deferred",
+			    c, c->c_func, c->c_arg, to_ticks, cpu);
+			CC_UNLOCK(cc);
+			return (cancelled);
+		}
+		CC_UNLOCK(cc);
+		cc = CC_CPU(cpu);
+		CC_LOCK(cc);
 		c->c_cpu = cpu;
-		CC_UNLOCK(cc);
-		goto retry;
 	}
+#endif
 
-	if (to_ticks <= 0)
-		to_ticks = 1;
-
-	c->c_arg = arg;
-	c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
-	c->c_func = ftn;
-	c->c_time = cc->cc_ticks + to_ticks;
-	TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], 
-			  c, c_links.tqe);
+	callout_cc_add(c, cc, to_ticks, ftn, arg, cpu);
 	CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d",
 	    cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks);
 	CC_UNLOCK(cc);