Index: sys/sys/sched.h =================================================================== --- sys/sys/sched.h (revision 242324) +++ sys/sys/sched.h (working copy) @@ -91,7 +91,7 @@ void sched_nice(struct proc *p, int nice); */ void sched_exit_thread(struct thread *td, struct thread *child); void sched_fork_thread(struct thread *td, struct thread *child); -void sched_lend_prio(struct thread *td, u_char prio); +void sched_lend_prio(struct thread *td, u_char prio, int flags); void sched_lend_user_prio(struct thread *td, u_char pri); fixpt_t sched_pctcpu(struct thread *td); void sched_prio(struct thread *td, u_char prio); @@ -168,6 +168,7 @@ sched_unpin(void) #define SRQ_INTR 0x0004 /* It is probably urgent. */ #define SRQ_PREEMPTED 0x0008 /* has been preempted.. be kind */ #define SRQ_BORROWING 0x0010 /* Priority updated due to prio_lend */ +#define SRQ_WILLSWITCH 0x0020 /* curthread will be switched out */ /* Scheduler stats. */ #ifdef SCHED_STATS Index: sys/kern/sched_ule.c =================================================================== --- sys/kern/sched_ule.c (revision 242324) +++ sys/kern/sched_ule.c (working copy) @@ -295,7 +295,7 @@ static struct tdq tdq_cpu; #define TDQ_LOCKPTR(t) (&(t)->tdq_lock) static void sched_priority(struct thread *); -static void sched_thread_priority(struct thread *, u_char); +static void sched_thread_priority(struct thread *, u_char, int flags); static int sched_interact_score(struct thread *); static void sched_interact_update(struct thread *); static void sched_interact_fork(struct thread *); @@ -1191,7 +1191,7 @@ sched_pickcpu(struct thread *td, int flags) struct td_sched *ts; struct tdq *tdq; cpuset_t mask; - int cpu, pri, self; + int cpu, pri, self, self_load; self = PCPU_GET(cpuid); ts = td->td_sched; @@ -1267,10 +1267,22 @@ sched_pickcpu(struct thread *td, int flags) KASSERT(cpu != -1, ("sched_pickcpu: Failed to find a cpu.")); /* * Compare the lowest loaded cpu to current cpu. + * If SRQ_WILLSWITCH is set, this means curthread on the curcpu + * is going to be switched out very soon. This means we should + * consider the curcpu as less loaded than expected. + * In case the lowest priority thread is curthread, the tdq_lowpri + * should be recalculated based on the new lowest priority thread. + * Unfortunately this operation is quite expensive and requires self + * tdq to be locked, which can introduce cyclic LORs with the target + * tdq lock. If this is really the case, then it is ok to just + * fallback to the !SRQ_WILLSWITCH case decision. */ + self_load = TDQ_CPU(self)->tdq_load; + if (flags & SRQ_WILLSWITCH) + self_load--; if (THREAD_CAN_SCHED(td, self) && TDQ_CPU(self)->tdq_lowpri > pri && TDQ_CPU(cpu)->tdq_lowpri < PRI_MIN_IDLE && - TDQ_CPU(self)->tdq_load <= TDQ_CPU(cpu)->tdq_load + 1) { + self_load <= TDQ_CPU(cpu)->tdq_load + 1) { SCHED_STAT_INC(pickcpu_local); cpu = self; } else @@ -1634,7 +1646,7 @@ sched_pctcpu_update(struct td_sched *ts, int run) * functions. */ static void -sched_thread_priority(struct thread *td, u_char prio) +sched_thread_priority(struct thread *td, u_char prio, int flags) { struct td_sched *ts; struct tdq *tdq; @@ -1664,7 +1676,7 @@ static void if (TD_ON_RUNQ(td) && prio < td->td_priority) { sched_rem(td); td->td_priority = prio; - sched_add(td, SRQ_BORROWING); + sched_add(td, flags | SRQ_BORROWING); return; } /* @@ -1689,11 +1701,11 @@ static void * priority. */ void -sched_lend_prio(struct thread *td, u_char prio) +sched_lend_prio(struct thread *td, u_char prio, int flags) { td->td_flags |= TDF_BORROWING; - sched_thread_priority(td, prio); + sched_thread_priority(td, prio, flags); } /* @@ -1716,9 +1728,9 @@ sched_unlend_prio(struct thread *td, u_char prio) base_pri = td->td_base_pri; if (prio >= base_pri) { td->td_flags &= ~TDF_BORROWING; - sched_thread_priority(td, base_pri); + sched_thread_priority(td, base_pri, 0); } else - sched_lend_prio(td, prio); + sched_lend_prio(td, prio, 0); } /* @@ -1741,7 +1753,7 @@ sched_prio(struct thread *td, u_char prio) /* Change the real priority. */ oldprio = td->td_priority; - sched_thread_priority(td, prio); + sched_thread_priority(td, prio, 0); /* * If the thread is on a turnstile, then let the turnstile update Index: sys/kern/sched_4bsd.c =================================================================== --- sys/kern/sched_4bsd.c (revision 242324) +++ sys/kern/sched_4bsd.c (working copy) @@ -859,7 +859,7 @@ sched_priority(struct thread *td, u_char prio) * priority. */ void -sched_lend_prio(struct thread *td, u_char prio) +sched_lend_prio(struct thread *td, u_char prio, int flags __unused) { td->td_flags |= TDF_BORROWING; @@ -888,7 +888,7 @@ sched_unlend_prio(struct thread *td, u_char prio) td->td_flags &= ~TDF_BORROWING; sched_prio(td, base_pri); } else - sched_lend_prio(td, prio); + sched_lend_prio(td, prio, 0); } void Index: sys/kern/subr_turnstile.c =================================================================== --- sys/kern/subr_turnstile.c (revision 242324) +++ sys/kern/subr_turnstile.c (working copy) @@ -158,7 +158,7 @@ static void init_turnstile0(void *dummy); #ifdef TURNSTILE_PROFILING static void init_turnstile_profiling(void *arg); #endif -static void propagate_priority(struct thread *td); +static void propagate_priority(struct thread *td, int flags); static int turnstile_adjust_thread(struct turnstile *ts, struct thread *td); static struct thread *turnstile_first_waiter(struct turnstile *ts); @@ -178,9 +178,10 @@ SDT_PROBE_DEFINE2(sched, , , wakeup, wakeup, "stru * Walks the chain of turnstiles and their owners to propagate the priority * of the thread being blocked to all the threads holding locks that have to * release their locks before this thread can run again. + * It accepts SRQ_* informations as flags. */ static void -propagate_priority(struct thread *td) +propagate_priority(struct thread *td, int flags) { struct turnstile *ts; int pri; @@ -240,7 +241,7 @@ static void /* * Bump this thread's priority. */ - sched_lend_prio(td, pri); + sched_lend_prio(td, pri, flags); /* * If lock holder is actually running or on the run queue @@ -445,7 +446,7 @@ turnstile_adjust(struct thread *td, u_char oldpri) td->td_tsqueue == TS_SHARED_QUEUE); if (td == TAILQ_FIRST(&ts->ts_blocked[td->td_tsqueue]) && td->td_priority < oldpri) { - propagate_priority(td); + propagate_priority(td, 0); } } @@ -659,7 +660,7 @@ turnstile_claim(struct turnstile *ts) */ thread_lock(owner); if (td->td_priority < owner->td_priority) - sched_lend_prio(owner, td->td_priority); + sched_lend_prio(owner, td->td_priority, 0); thread_unlock(owner); tc = TC_LOOKUP(ts->ts_lockobj); mtx_unlock_spin(&ts->ts_lock); @@ -741,7 +742,7 @@ turnstile_wait(struct turnstile *ts, struct thread td->td_blktick = ticks; TD_SET_LOCK(td); mtx_unlock_spin(&tc->tc_lock); - propagate_priority(td); + propagate_priority(td, SRQ_WILLSWITCH); if (LOCK_LOG_TEST(lock, 0)) CTR4(KTR_LOCK, "%s: td %d blocked on [%p] %s", __func__,