--- //depot/projects/smpng/sys/kern/kern_resource.c 2008/11/03 21:11:59 +++ //depot/user/jhb/needresched/kern/kern_resource.c 2008/11/03 22:18:06 @@ -465,6 +465,14 @@ return (error); } +/* + * XXX: This is all very, very busted. Probably this should be private to + * the scheduler. I.e., there should be a sched_set_rtp(kg) (or td) and + * sched_get_rtp(). Realtime threads should really be in the kernel range + * above tsleep() and below ithreads. Also, these settings do not take + * into accout things like nice or kg_estcpu that the scheduler knows + * about. + */ int rtp_to_pri(struct rtprio *rtp, struct thread *td) { --- //depot/projects/smpng/sys/kern/sched_4bsd.c 2009/01/26 15:26:58 +++ //depot/user/jhb/needresched/kern/sched_4bsd.c 2009/01/26 16:07:27 @@ -269,7 +269,7 @@ { THREAD_LOCK_ASSERT(td, MA_OWNED); - if (td->td_priority < curthread->td_priority) + if (td->td_priority < curthread->td_ksegrp->kg_user_pri) curthread->td_flags |= TDF_NEEDRESCHED; } @@ -1433,7 +1433,50 @@ void sched_userret(struct thread *td) { + + /* + * If we have a priority boost from tsleep(), limit it to PUSER - 1 + * so that other threads in the kernel will be preferred to this + * thread. Having a priority of PUSER - 1 still lets us be preferred + * over other time sharing processes that are in userland but keeps + * sched_clock() and schedcpu() from taking away our boost until + * we actually make it out to userland first. + */ + thread_lock(td); + if (td->td_priority < PUSER) + td->td_priority = PUSER - 1; + + /* + * The TDF_NEEDRESCHED flag has rotted too much in 5.x to be of real + * use here. We really need to either be checking the queues or + * some other cached state to see if there is another thread of + * higher priority that we should run next. + */ + if (td->td_flags & TDF_NEEDRESCHED) { +#ifdef KTRACE + if (KTRPOINT(td, KTR_CSW)) { + thread_lock(td); + ktrcsw(1, 1); + thread_unlock(td); + } +#endif + mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL); +#ifdef KTRACE + if (KTRPOINT(td, KTR_CSW)) { + thread_lock(td); + ktrcsw(0, 1); + thread_unlock(td); + } +#endif + } + /* + * Now that we are returning to userland, drop any priority boost + * we may have received from tsleep() as we only wanted it long + * enough to get us scheduled. Any further scheduling decisions + * after this point should use our real user priority for scheduling + * decisions. + * * XXX we cheat slightly on the locking here to avoid locking in * the usual case. Setting td_priority here is essentially an * incomplete workaround for not setting it properly elsewhere. @@ -1445,11 +1488,10 @@ KASSERT((td->td_flags & TDF_BORROWING) == 0, ("thread with borrowed priority returning to userland")); if (td->td_priority != td->td_user_pri) { - thread_lock(td); td->td_priority = td->td_user_pri; td->td_base_pri = td->td_user_pri; - thread_unlock(td); } + thread_unlock(td); } void --- //depot/projects/smpng/sys/kern/sched_ule.c 2009/05/08 11:53:25 +++ //depot/user/jhb/needresched/kern/sched_ule.c 2009/05/08 14:17:47 @@ -1638,6 +1638,16 @@ if (prio >= base_pri) { td->td_flags &= ~TDF_BORROWING; sched_thread_priority(td, base_pri); + sched_slice(td->td_kse); +#if 0 + /* + * XXX: We can't do this as we are always in a critical + * section here. Perhaps we should set TDF_NEEDRESCHED + * instead? + */ + if (td->td_kse->ke_slice == 0) + mi_switch(SW_INVOL, NULL); +#endif } else sched_lend_prio(td, prio); } @@ -1893,6 +1903,7 @@ FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); sched_priority(td); + /* XXX */ sched_prio(td, td->td_base_user_pri); thread_unlock(td); } @@ -2087,7 +2098,44 @@ void sched_userret(struct thread *td) { + + /* + * If we have a priority boost from tsleep(), limit it to PUSER - 1 + * so that other threads in the kernel will be preferred to this + * thread. Having a priority of PUSER - 1 still lets us be preferred + * over other time sharing processes that are in userland but keeps + * sched_clock() from taking away our boost until we actually make it + * out to userland first. + */ + thread_lock(td); + if (td->td_priority < PUSER) + td->td_priority = PUSER - 1; + /* + * The TDF_NEEDRESCHED flag has rotted too much in 5.x to be of real + * use here. We really need to either be checking the queues or + * some other cached state to see if there is another thread of + * higher priority that we should run next. + */ + if (td->td_flags & TDF_NEEDRESCHED) { +#ifdef KTRACE + if (KTRPOINT(td, KTR_CSW)) { + thread_unlock(td); + ktrcsw(1, 1); + thread_lock(td); + } +#endif + mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL); +#ifdef KTRACE + if (KTRPOINT(td, KTR_CSW)) { + thread_lock(td); + ktrcsw(0, 1); + thread_unlock(td); + } +#endif + } + + /* * XXX we cheat slightly on the locking here to avoid locking in * the usual case. Setting td_priority here is essentially an * incomplete workaround for not setting it properly elsewhere. @@ -2099,12 +2147,11 @@ KASSERT((td->td_flags & TDF_BORROWING) == 0, ("thread with borrowed priority returning to userland")); if (td->td_priority != td->td_user_pri) { - thread_lock(td); td->td_priority = td->td_user_pri; td->td_base_pri = td->td_user_pri; tdq_setlowpri(TDQ_SELF(), td); - thread_unlock(td); } + thread_unlock(td); } /* --- //depot/projects/smpng/sys/kern/subr_trap.c 2009/01/15 22:41:24 +++ //depot/user/jhb/needresched/kern/subr_trap.c 2009/01/16 13:17:12 @@ -167,7 +167,7 @@ thread_lock(td); flags = td->td_flags; td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK | - TDF_NEEDRESCHED | TDF_ALRMPEND | TDF_PROFPEND | TDF_MACPEND); + TDF_ALRMPEND | TDF_PROFPEND | TDF_MACPEND); thread_unlock(td); PCPU_INC(cnt.v_trap); @@ -205,20 +205,13 @@ if (flags & TDF_MACPEND) mac_thread_userret(td); #endif - if (flags & TDF_NEEDRESCHED) { -#ifdef KTRACE - if (KTRPOINT(td, KTR_CSW)) - ktrcsw(1, 1); -#endif - thread_lock(td); - sched_prio(td, td->td_user_pri); - mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL); - thread_unlock(td); -#ifdef KTRACE - if (KTRPOINT(td, KTR_CSW)) - ktrcsw(0, 1); -#endif - } + + /* + * XXX: 4.x does this in userret() and only if a reschedule was + * required. This probably does belong in userret() and not + * in just ast(). I'm not sure if it needs to be conditional + * on needresched still. + */ if (flags & TDF_NEEDSIGCHK) { PROC_LOCK(p); mtx_lock(&p->p_sigacts->ps_mtx); --- //depot/projects/smpng/sys/notes 2009/02/18 22:05:55 +++ //depot/user/jhb/needresched/notes 2009/02/20 16:15:34 @@ -73,3 +73,15 @@ - jhb_socket - socket hacking Space reserved for child branches: ++ Move needresched handling out of ast() and into sched_userret() + + 4BSD + + ULE +- Reimplement needresched as a queue check in sched_userret() + - 4BSD (ups@ has this) + - ULE +- Move the postsig() loop in ast() into userret(), possibly conditional + on needresched? If so, it goes into sched_userret(). +- Need lots of thought on realtime and idle time priorities and handling + tsleep() priority "boosts" for such processes. Might need to make + the priority boost be something passsed to sched_sleep() rather than + a sched_prio() call. --- //depot/projects/smpng/sys/sys/priority.h 2007/01/11 21:47:23 +++ //depot/user/jhb/needresched/sys/priority.h 2007/01/11 22:22:47 @@ -75,6 +75,10 @@ * * XXX If/When the specific interrupt thread and top half thread ranges * disappear, a larger range can be used for user processes. + * + * XXX: We should maybe redo this more like solaris where the organization + * is more like: ithreads, realtime threads, kernel sleep priorities, time + * sharing user threads, idle threads. */ #define PRI_MIN (0) /* Highest priority. */ @@ -120,6 +124,7 @@ #define PRI_MIN_IDLE (224) #define PRI_MAX_IDLE (PRI_MAX) +/* XXX: This isn't used anymore. */ struct priority { u_char pri_class; /* Scheduling class. */ u_char pri_level; /* Normal priority level. */