--- //depot/projects/smpng/sys/kern/kern_resource.c	2008/11/03 21:11:59
+++ //depot/user/jhb/needresched/kern/kern_resource.c	2008/11/03 22:18:06
@@ -465,6 +465,14 @@
 	return (error);
 }
 
+/*
+ * XXX: This is all very, very busted.  Probably this should be private to
+ * the scheduler.  I.e., there should be a sched_set_rtp(kg) (or td) and
+ * sched_get_rtp().  Realtime threads should really be in the kernel range
+ * above tsleep() and below ithreads.  Also, these settings do not take
+ * into accout things like nice or kg_estcpu that the scheduler knows
+ * about.
+ */
 int
 rtp_to_pri(struct rtprio *rtp, struct thread *td)
 {
--- //depot/projects/smpng/sys/kern/sched_4bsd.c	2009/01/26 15:26:58
+++ //depot/user/jhb/needresched/kern/sched_4bsd.c	2009/01/26 16:07:27
@@ -269,7 +269,7 @@
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
-	if (td->td_priority < curthread->td_priority)
+	if (td->td_priority < curthread->td_ksegrp->kg_user_pri)
 		curthread->td_flags |= TDF_NEEDRESCHED;
 }
 
@@ -1433,7 +1433,50 @@
 void
 sched_userret(struct thread *td)
 {
+
+	/*
+	 * If we have a priority boost from tsleep(), limit it to PUSER - 1
+	 * so that other threads in the kernel will be preferred to this
+	 * thread.  Having a priority of PUSER - 1 still lets us be preferred
+	 * over other time sharing processes that are in userland but keeps
+	 * sched_clock() and schedcpu() from taking away our boost until
+	 * we actually make it out to userland first.
+	 */
+	thread_lock(td);
+	if (td->td_priority < PUSER)
+		td->td_priority = PUSER - 1;
+
+	/*
+	 * The TDF_NEEDRESCHED flag has rotted too much in 5.x to be of real
+	 * use here.  We really need to either be checking the queues or
+	 * some other cached state to see if there is another thread of
+	 * higher priority that we should run next.
+	 */
+	if (td->td_flags & TDF_NEEDRESCHED) {
+#ifdef KTRACE
+		if (KTRPOINT(td, KTR_CSW)) {
+			thread_lock(td);
+			ktrcsw(1, 1);
+			thread_unlock(td);
+		}
+#endif
+		mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL);
+#ifdef KTRACE
+		if (KTRPOINT(td, KTR_CSW)) {
+			thread_lock(td);
+			ktrcsw(0, 1);
+			thread_unlock(td);
+		}
+#endif
+	}
+
 	/*
+	 * Now that we are returning to userland, drop any priority boost
+	 * we may have received from tsleep() as we only wanted it long
+	 * enough to get us scheduled.  Any further scheduling decisions
+	 * after this point should use our real user priority for scheduling
+	 * decisions.
+	 *
 	 * XXX we cheat slightly on the locking here to avoid locking in
 	 * the usual case.  Setting td_priority here is essentially an
 	 * incomplete workaround for not setting it properly elsewhere.
@@ -1445,11 +1488,10 @@
 	KASSERT((td->td_flags & TDF_BORROWING) == 0,
 	    ("thread with borrowed priority returning to userland"));
 	if (td->td_priority != td->td_user_pri) {
-		thread_lock(td);
 		td->td_priority = td->td_user_pri;
 		td->td_base_pri = td->td_user_pri;
-		thread_unlock(td);
 	}
+	thread_unlock(td);
 }
 
 void
--- //depot/projects/smpng/sys/kern/sched_ule.c	2009/05/08 11:53:25
+++ //depot/user/jhb/needresched/kern/sched_ule.c	2009/05/08 14:17:47
@@ -1638,6 +1638,16 @@
 	if (prio >= base_pri) {
 		td->td_flags &= ~TDF_BORROWING;
 		sched_thread_priority(td, base_pri);
+		sched_slice(td->td_kse);
+#if 0
+		/*
+		 * XXX: We can't do this as we are always in a critical
+		 * section here.  Perhaps we should set TDF_NEEDRESCHED
+		 * instead?
+		 */
+		if (td->td_kse->ke_slice == 0)
+			mi_switch(SW_INVOL, NULL);
+#endif
 	} else
 		sched_lend_prio(td, prio);
 }
@@ -1893,6 +1903,7 @@
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		sched_priority(td);
+		/* XXX */
 		sched_prio(td, td->td_base_user_pri);
 		thread_unlock(td);
 	}
@@ -2087,7 +2098,44 @@
 void
 sched_userret(struct thread *td)
 {
+
+	/*
+	 * If we have a priority boost from tsleep(), limit it to PUSER - 1
+	 * so that other threads in the kernel will be preferred to this
+	 * thread.  Having a priority of PUSER - 1 still lets us be preferred
+	 * over other time sharing processes that are in userland but keeps
+	 * sched_clock() from taking away our boost until we actually make it
+	 * out to userland first.
+	 */
+	thread_lock(td);
+	if (td->td_priority < PUSER)
+		td->td_priority = PUSER - 1;
+
 	/*
+	 * The TDF_NEEDRESCHED flag has rotted too much in 5.x to be of real
+	 * use here.  We really need to either be checking the queues or
+	 * some other cached state to see if there is another thread of
+	 * higher priority that we should run next.
+	 */
+	if (td->td_flags & TDF_NEEDRESCHED) {
+#ifdef KTRACE
+		if (KTRPOINT(td, KTR_CSW)) {
+			thread_unlock(td);
+			ktrcsw(1, 1);
+			thread_lock(td);
+		}
+#endif
+		mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL);
+#ifdef KTRACE
+		if (KTRPOINT(td, KTR_CSW)) {
+			thread_lock(td);
+			ktrcsw(0, 1);
+			thread_unlock(td);
+		}
+#endif
+	}
+
+	/*
 	 * XXX we cheat slightly on the locking here to avoid locking in  
 	 * the usual case.  Setting td_priority here is essentially an
 	 * incomplete workaround for not setting it properly elsewhere.
@@ -2099,12 +2147,11 @@
 	KASSERT((td->td_flags & TDF_BORROWING) == 0,
 	    ("thread with borrowed priority returning to userland"));
 	if (td->td_priority != td->td_user_pri) {
-		thread_lock(td);
 		td->td_priority = td->td_user_pri;
 		td->td_base_pri = td->td_user_pri;
 		tdq_setlowpri(TDQ_SELF(), td);
-		thread_unlock(td);
         }
+	thread_unlock(td);
 }
 
 /*
--- //depot/projects/smpng/sys/kern/subr_trap.c	2009/01/15 22:41:24
+++ //depot/user/jhb/needresched/kern/subr_trap.c	2009/01/16 13:17:12
@@ -167,7 +167,7 @@
 	thread_lock(td);
 	flags = td->td_flags;
 	td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK |
-	    TDF_NEEDRESCHED | TDF_ALRMPEND | TDF_PROFPEND | TDF_MACPEND);
+	    TDF_ALRMPEND | TDF_PROFPEND | TDF_MACPEND);
 	thread_unlock(td);
 	PCPU_INC(cnt.v_trap);
 
@@ -205,20 +205,13 @@
 	if (flags & TDF_MACPEND)
 		mac_thread_userret(td);
 #endif
-	if (flags & TDF_NEEDRESCHED) {
-#ifdef KTRACE
-		if (KTRPOINT(td, KTR_CSW))
-			ktrcsw(1, 1);
-#endif
-		thread_lock(td);
-		sched_prio(td, td->td_user_pri);
-		mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL);
-		thread_unlock(td);
-#ifdef KTRACE
-		if (KTRPOINT(td, KTR_CSW))
-			ktrcsw(0, 1);
-#endif
-	}
+
+	/*
+	 * XXX: 4.x does this in userret() and only if a reschedule was
+	 * required.  This probably does belong in userret() and not
+	 * in just ast().  I'm not sure if it needs to be conditional
+	 * on needresched still.
+	 */
 	if (flags & TDF_NEEDSIGCHK) {
 		PROC_LOCK(p);
 		mtx_lock(&p->p_sigacts->ps_mtx);
--- //depot/projects/smpng/sys/notes	2009/02/18 22:05:55
+++ //depot/user/jhb/needresched/notes	2009/02/20 16:15:34
@@ -73,3 +73,15 @@
 - jhb_socket - socket hacking
 
 Space reserved for child branches:
++ Move needresched handling out of ast() and into sched_userret()
+  + 4BSD
+  + ULE
+- Reimplement needresched as a queue check in sched_userret()
+  - 4BSD (ups@ has this)
+  - ULE
+- Move the postsig() loop in ast() into userret(), possibly conditional
+  on needresched?  If so, it goes into sched_userret().
+- Need lots of thought on realtime and idle time priorities and handling
+  tsleep() priority "boosts" for such processes.  Might need to make
+  the priority boost be something passsed to sched_sleep() rather than
+  a sched_prio() call.
--- //depot/projects/smpng/sys/sys/priority.h	2007/01/11 21:47:23
+++ //depot/user/jhb/needresched/sys/priority.h	2007/01/11 22:22:47
@@ -75,6 +75,10 @@
  *
  * XXX If/When the specific interrupt thread and top half thread ranges
  * disappear, a larger range can be used for user processes.
+ *
+ * XXX: We should maybe redo this more like solaris where the organization
+ * is more like: ithreads, realtime threads, kernel sleep priorities, time
+ * sharing user threads, idle threads.
  */
 
 #define	PRI_MIN			(0)		/* Highest priority. */
@@ -120,6 +124,7 @@
 #define	PRI_MIN_IDLE		(224)
 #define	PRI_MAX_IDLE		(PRI_MAX)
 
+/* XXX: This isn't used anymore. */
 struct priority {
 	u_char	pri_class;	/* Scheduling class. */
 	u_char	pri_level;	/* Normal priority level. */