Index: sys/kern/kern_timeout.c =================================================================== --- sys/kern/kern_timeout.c (revision 216531) +++ sys/kern/kern_timeout.c (working copy) @@ -56,6 +56,10 @@ #include #include +#ifdef SMP +#include +#endif + SDT_PROVIDER_DEFINE(callout_execute); SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start); SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_start, 0, @@ -107,10 +111,14 @@ struct callout *cc_next; struct callout *cc_curr; void *cc_cookie; + void (*cc_migration_ftn)(void *); + void *cc_migration_arg; int cc_ticks; int cc_softticks; int cc_cancel; int cc_waiting; + int cc_migration_cpu; + int cc_migration_ticks; }; #ifdef SMP @@ -122,8 +130,10 @@ #define CC_CPU(cpu) &cc_cpu #define CC_SELF() &cc_cpu #endif +#define CPUBLOCK MAXCPU #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) +#define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) static int timeout_cpu; @@ -186,6 +196,7 @@ for (i = 0; i < callwheelsize; i++) { TAILQ_INIT(&cc->cc_callwheel[i]); } + cc->cc_migration_cpu = CPUBLOCK; if (cc->cc_callout == NULL) return; for (i = 0; i < ncallout; i++) { @@ -287,6 +298,12 @@ for (;;) { cpu = c->c_cpu; +#ifdef SMP + if (cpu == CPUBLOCK) { + cpu_spinwait(); + continue; + } +#endif cc = CC_CPU(cpu); CC_LOCK(cc); if (cpu == c->c_cpu) @@ -296,6 +313,23 @@ return (cc); } +static void +callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks, + void (*ftn)(void *), void *arg, int cpu) +{ + + CC_LOCK_ASSERT(cc); + + if (to_ticks <= 0) + to_ticks = 1; + c->c_arg = arg; + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); + c->c_func = ftn; + c->c_time = cc->cc_ticks + to_ticks; + TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], + c, c_links.tqe); +} + /* * The callout mechanism is based on the work of Adam M. Costello and * George Varghese, published in a technical report entitled "Redesigning @@ -366,11 +400,14 @@ steps = 0; } } else { + struct callout_cpu *new_cc; void (*c_func)(void *); - void *c_arg; + void (*new_ftn)(void *); + void *c_arg, *new_arg; struct lock_class *class; struct lock_object *c_lock; int c_flags, sharedlock; + int new_cpu, new_ticks; cc->cc_next = TAILQ_NEXT(c, c_links.tqe); TAILQ_REMOVE(bucket, c, c_links.tqe); @@ -472,8 +509,41 @@ c_links.sle); } cc->cc_curr = NULL; - if (cc->cc_waiting) { + + /* + * If the callout was scheduled for + * migration just perform it now. + */ + if (cc->cc_migration_cpu != CPUBLOCK) { + /* + * There must not be any waiting + * thread now because the callout + * has a blocked CPU. + * Also, the callout must not be + * freed, but that is not easy to + * assert. + */ + MPASS(cc->cc_waiting == 0); + new_cpu = cc->cc_migration_cpu; + new_ticks = cc->cc_migration_ticks; + new_ftn = cc->cc_migration_ftn; + new_arg = cc->cc_migration_arg; + cc->cc_migration_cpu = CPUBLOCK; + cc->cc_migration_ticks = 0; + cc->cc_migration_ftn = NULL; + cc->cc_migration_arg = NULL; + CC_UNLOCK(cc); + new_cc = CC_CPU(new_cpu); + CC_LOCK(new_cc); + MPASS(c->c_cpu == CPUBLOCK); + c->c_cpu = new_cpu; + callout_cc_add(c, new_cc, new_ticks, + new_ftn, new_arg, new_cpu); + CC_UNLOCK(new_cc); + CC_LOCK(cc); + } else if (cc->cc_waiting) { + /* * There is someone waiting * for the callout to complete. */ @@ -593,7 +663,6 @@ */ if (c->c_flags & CALLOUT_LOCAL_ALLOC) cpu = c->c_cpu; -retry: cc = callout_lock(c); if (cc->cc_curr == c) { /* @@ -625,25 +694,34 @@ cancelled = 1; c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); } +#ifdef SMP /* - * If the lock must migrate we have to check the state again as - * we can't hold both the new and old locks simultaneously. + * If the lock must migrate we have to block the callout locking + * until migration is completed. + * If the callout is currently running, just defer the migration + * to a more appropriate moment. */ if (c->c_cpu != cpu) { + c->c_cpu = CPUBLOCK; + if (cc->cc_curr == c) { + cc->cc_migration_cpu = cpu; + cc->cc_migration_ticks = to_ticks; + cc->cc_migration_ftn = ftn; + cc->cc_migration_arg = arg; + CTR5(KTR_CALLOUT, + "migration of %p func %p arg %p in %d to %u deferred", + c, c->c_func, c->c_arg, to_ticks, cpu); + CC_UNLOCK(cc); + return (cancelled); + } + CC_UNLOCK(cc); + cc = CC_CPU(cpu); + CC_LOCK(cc); c->c_cpu = cpu; - CC_UNLOCK(cc); - goto retry; } +#endif - if (to_ticks <= 0) - to_ticks = 1; - - c->c_arg = arg; - c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); - c->c_func = ftn; - c->c_time = cc->cc_ticks + to_ticks; - TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], - c, c_links.tqe); + callout_cc_add(c, cc, to_ticks, ftn, arg, cpu); CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d", cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks); CC_UNLOCK(cc);