Index: sys/kern/kern_timeout.c =================================================================== --- sys/kern/kern_timeout.c (revision 220345) +++ sys/kern/kern_timeout.c (working copy) @@ -56,6 +56,10 @@ __FBSDID("$FreeBSD$"); #include #include +#ifdef SMP +#include +#endif + SDT_PROVIDER_DEFINE(callout_execute); SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start, callout-start); SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_start, 0, @@ -83,6 +87,21 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFL int callwheelsize, callwheelbits, callwheelmask; /* + * The callout cpu migration entity represents informations necessary for + * describing the migrating callout to the new callout cpu. + * The cached informations are very important for deferring migration when + * the migrating callout is already running. + */ +struct cc_mig_ent { +#ifdef SMP + void (*ce_migration_func)(void *); + void *ce_migration_arg; + int ce_migration_cpu; + int ce_migration_ticks; +#endif +}; + +/* * There is one struct callout_cpu per cpu, holding all relevant * state for the callout processing thread on the individual CPU. * In particular: @@ -100,6 +119,7 @@ int callwheelsize, callwheelbits, callwheelmask; * when the callout should be served. */ struct callout_cpu { + struct cc_mig_ent cc_migrating_entity; struct mtx cc_lock; struct callout *cc_callout; struct callout_tailq *cc_callwheel; @@ -115,7 +135,13 @@ struct callout_cpu { }; #ifdef SMP +#define cc_migration_func cc_migrating_entity.ce_migration_func +#define cc_migration_arg cc_migrating_entity.ce_migration_arg +#define cc_migration_cpu cc_migrating_entity.ce_migration_cpu +#define cc_migration_ticks cc_migrating_entity.ce_migration_ticks + struct callout_cpu cc_cpu[MAXCPU]; +#define CPUBLOCK MAXCPU #define CC_CPU(cpu) (&cc_cpu[(cpu)]) #define CC_SELF() CC_CPU(PCPU_GET(cpuid)) #else @@ -125,6 +151,7 @@ struct callout_cpu cc_cpu; #endif #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) +#define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) static int timeout_cpu; void (*callout_new_inserted)(int cpu, int ticks) = NULL; @@ -149,6 +176,35 @@ MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datas */ /* + * Resets the migration entity tied to a specific callout cpu. + */ +static void +cc_cme_cleanup(struct callout_cpu *cc) +{ + +#ifdef SMP + cc->cc_migration_cpu = CPUBLOCK; + cc->cc_migration_ticks = 0; + cc->cc_migration_func = NULL; + cc->cc_migration_arg = NULL; +#endif +} + +/* + * Checks if migration is requested by a specific callout cpu. + */ +static int +cc_cme_migrating(struct callout_cpu *cc) +{ + +#ifdef SMP + return (cc->cc_migration_cpu != CPUBLOCK); +#else + return (0); +#endif +} + +/* * kern_timeout_callwheel_alloc() - kernel low level callwheel initialization * * This code is called very early in the kernel initialization sequence, @@ -188,6 +244,7 @@ callout_cpu_init(struct callout_cpu *cc) for (i = 0; i < callwheelsize; i++) { TAILQ_INIT(&cc->cc_callwheel[i]); } + cc_cme_cleanup(cc); if (cc->cc_callout == NULL) return; for (i = 0; i < ncallout; i++) { @@ -198,7 +255,30 @@ callout_cpu_init(struct callout_cpu *cc) } } +#ifdef SMP /* + * Switches the cpu tied to a specific callout. + * The function expects a locked incoming callout cpu and returns with + * locked outcoming callout cpu. + */ +static struct callout_cpu * +callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) +{ + struct callout_cpu *new_cc; + + MPASS(c != NULL && cc != NULL); + CC_LOCK_ASSERT(cc); + + c->c_cpu = CPUBLOCK; + CC_UNLOCK(cc); + new_cc = CC_CPU(new_cpu); + CC_LOCK(new_cc); + c->c_cpu = new_cpu; + return (new_cc); +} +#endif + +/* * kern_timeout_callwheel_init() - initialize previously reserved callwheel * space. * @@ -311,6 +391,13 @@ callout_lock(struct callout *c) for (;;) { cpu = c->c_cpu; +#ifdef SMP + if (cpu == CPUBLOCK) { + while (c->c_cpu == CPUBLOCK) + cpu_spinwait(); + continue; + } +#endif cc = CC_CPU(cpu); CC_LOCK(cc); if (cpu == c->c_cpu) @@ -320,6 +407,29 @@ callout_lock(struct callout *c) return (cc); } +static void +callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks, + void (*func)(void *), void *arg, int cpu) +{ + + CC_LOCK_ASSERT(cc); + + if (to_ticks <= 0) + to_ticks = 1; + c->c_arg = arg; + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); + c->c_func = func; + c->c_time = ticks + to_ticks; + TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], + c, c_links.tqe); + if ((c->c_time - cc->cc_firsttick) < 0 && + callout_new_inserted != NULL) { + cc->cc_firsttick = c->c_time; + (*callout_new_inserted)(cpu, + to_ticks + (ticks - cc->cc_ticks)); + } +} + /* * The callout mechanism is based on the work of Adam M. Costello and * George Varghese, published in a technical report entitled "Redesigning @@ -497,14 +607,50 @@ softclock(void *arg) } cc->cc_curr = NULL; if (cc->cc_waiting) { + /* - * There is someone waiting - * for the callout to complete. + * There is someone waiting for the + * callout to complete. + * If the callout was scheduled for + * migration just cancel it. */ + if (cc_cme_migrating(cc)) + cc_cme_cleanup(cc); cc->cc_waiting = 0; CC_UNLOCK(cc); wakeup(&cc->cc_waiting); CC_LOCK(cc); + } else if (cc_cme_migrating(cc)) { +#ifdef SMP + struct callout_cpu *new_cc; + void (*new_func)(void *); + void *new_arg; + int new_cpu, new_ticks; + + /* + * If the callout was scheduled for + * migration just perform it now. + */ + new_cpu = cc->cc_migration_cpu; + new_ticks = cc->cc_migration_ticks; + new_func = cc->cc_migration_func; + new_arg = cc->cc_migration_arg; + cc_cme_cleanup(cc); + + /* + * It should be assert here that the + * callout is not destroyed but that + * is not easy. + */ + new_cc = callout_cpu_switch(c, cc, + new_cpu); + callout_cc_add(c, new_cc, new_ticks, + new_func, new_arg, new_cpu); + CC_UNLOCK(new_cc); + CC_LOCK(cc); +#else + panic("migration should not happen"); +#endif } steps = 0; c = cc->cc_next; @@ -617,7 +763,6 @@ callout_reset_on(struct callout *c, int to_ticks, */ if (c->c_flags & CALLOUT_LOCAL_ALLOC) cpu = c->c_cpu; -retry: cc = callout_lock(c); if (cc->cc_curr == c) { /* @@ -649,31 +794,30 @@ callout_reset_on(struct callout *c, int to_ticks, cancelled = 1; c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); } + +#ifdef SMP /* - * If the lock must migrate we have to check the state again as - * we can't hold both the new and old locks simultaneously. + * If the callout must migrate try to perform it immediately. + * If the callout is currently running, just defer the migration + * to a more appropriate moment. */ if (c->c_cpu != cpu) { - c->c_cpu = cpu; - CC_UNLOCK(cc); - goto retry; + if (cc->cc_curr == c) { + cc->cc_migration_cpu = cpu; + cc->cc_migration_ticks = to_ticks; + cc->cc_migration_func = ftn; + cc->cc_migration_arg = arg; + CTR5(KTR_CALLOUT, + "migration of %p func %p arg %p in %d to %u deferred", + c, c->c_func, c->c_arg, to_ticks, cpu); + CC_UNLOCK(cc); + return (cancelled); + } + cc = callout_cpu_switch(c, cc, cpu); } +#endif - if (to_ticks <= 0) - to_ticks = 1; - - c->c_arg = arg; - c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); - c->c_func = ftn; - c->c_time = ticks + to_ticks; - TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], - c, c_links.tqe); - if ((c->c_time - cc->cc_firsttick) < 0 && - callout_new_inserted != NULL) { - cc->cc_firsttick = c->c_time; - (*callout_new_inserted)(cpu, - to_ticks + (ticks - cc->cc_ticks)); - } + callout_cc_add(c, cc, to_ticks, ftn, arg, cpu); CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d", cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks); CC_UNLOCK(cc); @@ -701,7 +845,7 @@ _callout_stop_safe(c, safe) struct callout *c; int safe; { - struct callout_cpu *cc; + struct callout_cpu *cc, *old_cc; struct lock_class *class; int use_lock, sq_locked; @@ -721,9 +865,28 @@ _callout_stop_safe(c, safe) use_lock = 0; sq_locked = 0; + old_cc = NULL; again: cc = callout_lock(c); + /* + * If the callout was migrating while the callout cpu lock was + * dropped, just drop the sleepqueue lock and check the states + * again. + */ + if (sq_locked != 0 && cc != old_cc) { +#ifdef SMP + CC_UNLOCK(cc); + sleepq_release(&old_cc->cc_waiting); + sq_locked = 0; + old_cc = NULL; + goto again; +#else + panic("migration should not happen"); +#endif + } + + /* * If the callout isn't pending, it's not on the queue, so * don't attempt to remove it from the queue. We can try to * stop it by other means however. @@ -774,8 +937,16 @@ again: CC_UNLOCK(cc); sleepq_lock(&cc->cc_waiting); sq_locked = 1; + old_cc = cc; goto again; } + + /* + * Migration could be cancelled here, but + * as long as it is still not sure when it + * will be packed up, just let softclock() + * take care of it. + */ cc->cc_waiting = 1; DROP_GIANT(); CC_UNLOCK(cc); @@ -784,6 +955,7 @@ again: SLEEPQ_SLEEP, 0); sleepq_wait(&cc->cc_waiting, 0); sq_locked = 0; + old_cc = NULL; /* Reacquire locks previously released. */ PICKUP_GIANT(); @@ -800,6 +972,8 @@ again: cc->cc_cancel = 1; CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); + KASSERT(!cc_cme_migrating(cc), + ("callout wrongly scheduled for migration")); CC_UNLOCK(cc); KASSERT(!sq_locked, ("sleepqueue chain locked")); return (1);