Index: sched_ule.c =================================================================== --- sched_ule.c (revision 230996) +++ sched_ule.c (working copy) @@ -555,6 +555,7 @@ cpuset_t cs_mask; u_int cs_load; u_int cs_cpu; + u_int cs_prefer; int cs_limit; /* Min priority for low min load for high. */ }; @@ -587,7 +588,9 @@ tdq = TDQ_CPU(cpu); if (match & CPU_SEARCH_LOWEST) if (CPU_ISSET(cpu, &low->cs_mask) && - tdq->tdq_load < low->cs_load && + (tdq->tdq_load < low->cs_load || + (tdq->tdq_load == low->cs_load && + cpu == low->cs_prefer)) && tdq->tdq_lowpri > low->cs_limit) { low->cs_cpu = cpu; low->cs_load = tdq->tdq_load; @@ -655,7 +658,10 @@ } total += load; if (match & CPU_SEARCH_LOWEST) - if (load < lload || low->cs_cpu == -1) { + if (low->cs_cpu == -1 || + load < lload || + (load == lload && low->cs_prefer != -1 && + CPU_ISSET(low->cs_prefer, &child->cg_mask))) { *low = lgroup; lload = load; } @@ -703,11 +709,12 @@ * acceptable. */ static inline int -sched_lowest(struct cpu_group *cg, cpuset_t mask, int pri) +sched_lowest(struct cpu_group *cg, cpuset_t mask, int pri, int prefer) { struct cpu_search low; low.cs_cpu = -1; + low.cs_prefer = prefer; low.cs_load = -1; low.cs_mask = mask; low.cs_limit = pri; @@ -742,6 +749,7 @@ struct cpu_search low; low.cs_cpu = -1; + low.cs_prefer = -1; low.cs_limit = -1; low.cs_load = -1; low.cs_mask = mask; @@ -1169,10 +1177,13 @@ /* * If the thread can run on the last cpu and the affinity has not * expired or it is idle run it there. + * Skip this in HTT case to check for other thread activity. */ pri = td->td_priority; tdq = TDQ_CPU(ts->ts_cpu); - if (THREAD_CAN_SCHED(td, ts->ts_cpu)) { + cg = tdq->tdq_cg; + if (THREAD_CAN_SCHED(td, ts->ts_cpu) && + (cg->cg_flags & CG_FLAG_THREAD) == 0) { if (tdq->tdq_lowpri > PRI_MIN_IDLE) { SCHED_STAT_INC(pickcpu_idle_affinity); return (ts->ts_cpu); @@ -1183,18 +1194,24 @@ } } /* - * Search for the highest level in the tree that still has affinity. + * Search for the CPU where we can run now, respecting affinity. */ - cg = NULL; - for (cg = tdq->tdq_cg; cg != NULL; cg = cg->cg_parent) - if (SCHED_AFFINITY(ts, cg->cg_level)) - break; cpu = -1; mask = td->td_cpuset->cs_mask; - if (cg) - cpu = sched_lowest(cg, mask, pri); + for (; cg != NULL; cg = cg->cg_parent) { + if (!SCHED_AFFINITY(ts, cg->cg_level)) + continue; + if (cg->cg_flags & CG_FLAG_THREAD) + continue; + cpu = sched_lowest(cg, mask, pri, ts->ts_cpu); + if (cpu >= 0) + break; + } + /* + * Search for any CPU with lowest load, respecting affinity if equal. + */ if (cpu == -1) - cpu = sched_lowest(cpu_top, mask, -1); + cpu = sched_lowest(cpu_top, mask, -1, ts->ts_cpu); /* * Compare the lowest loaded cpu to current cpu. */