commit ab9e7303bf106df053c2a19aad6dda1e4bda0360
Author: Ryan Stone <rstone@FreeBSD.org>
Date:   Tue Sep 28 12:35:50 2021

    Check for preemption after lowering a thread's priority
    
    When a high-priority thread is waiting for a mutex held by a
    low-priority thread, it temporarily lends its priority to the
    low-priority thread to prevent priority inversion.  When the mutex
    is released, the lent priority is revoked and the low-priority
    thread goes back to its original priority.
    
    When the priority of that thread is lowered (through a call to
    sched_thread_priority()), the scheduler was not checking whether
    there is now a higher priority thread in the run queue.  This can
    cause threads with real-time priority to be starved in the run
    queue while the low-priority thread finishes its quantum.
    
    Fix this by explicitly checking whether preemption is necessary
    when a thread's priority is lowered.

diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index 0f873a6a30b6..13009d5f9a6c 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -327,7 +327,7 @@ static void tdq_add(struct tdq *, struct thread *, int);
 #ifdef SMP
 static struct thread *tdq_move(struct tdq *, struct tdq *);
 static int tdq_idled(struct tdq *);
-static void tdq_notify(struct tdq *, struct thread *);
+static void tdq_notify(struct tdq *, int);
 static struct thread *tdq_steal(struct tdq *, int);
 static struct thread *runq_steal(struct runq *, int);
 static int sched_pickcpu(struct thread *, int);
@@ -950,7 +950,7 @@ sched_balance_pair(struct tdq *high, struct tdq *low)
 		 */
 		cpu = TDQ_ID(low);
 		if (cpu != PCPU_GET(cpuid))
-			tdq_notify(low, td);
+			tdq_notify(low, td->td_priority);
 	}
 	tdq_unlock_pair(high, low);
 	return (td != NULL);
@@ -1106,16 +1106,14 @@ tdq_idled(struct tdq *tdq)
  * Notify a remote cpu of new work.  Sends an IPI if criteria are met.
  */
 static void
-tdq_notify(struct tdq *tdq, struct thread *td)
+tdq_notify(struct tdq *tdq, int pri)
 {
 	struct thread *ctd;
-	int pri;
 	int cpu;
 
 	if (tdq->tdq_owepreempt)
 		return;
-	cpu = td_get_sched(td)->ts_cpu;
-	pri = td->td_priority;
+	cpu = TDQ_ID(tdq);
 	ctd = pcpu_find(cpu)->pc_curthread;
 	if (!sched_shouldpreempt(pri, ctd->td_priority, 1))
 		return;
@@ -1785,6 +1783,22 @@ sched_pctcpu_update(struct td_sched *ts, int run)
 	ts->ts_ltick = t;
 }
 
+static void
+sched_check_preempt(struct tdq *tdq, struct thread *td)
+{
+
+	KASSERT(TD_IS_RUNNING(td), ("thread is not running"));
+	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
+	KASSERT(tdq == TDQ_CPU(td->td_sched->ts_cpu),
+	    ("tdq does not contain td"));
+
+	if (tdq == TDQ_SELF()) {
+		if (sched_shouldpreempt(tdq->tdq_lowpri, td->td_priority, 0))
+			td->td_owepreempt = 1;
+	} else
+		tdq_notify(tdq, tdq->tdq_lowpri);
+}
+
 /*
  * Adjust the priority of a thread.  Move it to the appropriate run-queue
  * if necessary.  This is the back-end for several priority related
@@ -1836,6 +1850,9 @@ sched_thread_priority(struct thread *td, u_char prio)
 			tdq->tdq_lowpri = prio;
 		else if (tdq->tdq_lowpri == oldpri)
 			tdq_setlowpri(tdq, td);
+
+		if (oldpri < prio)
+			sched_check_preempt(tdq, td);
 		return;
 	}
 	td->td_priority = prio;
@@ -2089,7 +2106,7 @@ sched_switch_migrate(struct tdq *tdq, struct thread *td, int flags)
 	TDQ_UNLOCK(tdq);
 	TDQ_LOCK(tdn);
 	tdq_add(tdn, td, flags);
-	tdq_notify(tdn, td);
+	tdq_notify(tdn, td->td_priority);
 	TDQ_UNLOCK(tdn);
 	TDQ_LOCK(tdq);
 #endif
@@ -2679,7 +2696,7 @@ sched_add(struct thread *td, int flags)
 	tdq = sched_setcpu(td, cpu, flags);
 	tdq_add(tdq, td, flags);
 	if (cpu != PCPU_GET(cpuid))
-		tdq_notify(tdq, td);
+		tdq_notify(tdq, td->td_priority);
 	else if (!(flags & SRQ_YIELDING))
 		sched_setpreempt(td);
 #else