Index: amd64/amd64/mp_machdep.c =================================================================== RCS file: /home/ncvs/src/sys/amd64/amd64/mp_machdep.c,v retrieving revision 1.288 diff -u -r1.288 mp_machdep.c --- amd64/amd64/mp_machdep.c 2 Mar 2008 07:58:40 -0000 1.288 +++ amd64/amd64/mp_machdep.c 9 Mar 2008 09:07:43 -0000 @@ -950,15 +950,8 @@ ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); - if (ipi_bitmap & (1 << IPI_PREEMPT)) { - struct thread *running_thread = curthread; - thread_lock(running_thread); - if (running_thread->td_critnest > 1) - running_thread->td_owepreempt = 1; - else - mi_switch(SW_INVOL | SW_PREEMPT, NULL); - thread_unlock(running_thread); - } + if (ipi_bitmap & (1 << IPI_PREEMPT)) + sched_preempt(curthread); /* Nothing to do for AST */ } Index: amd64/amd64/trap.c =================================================================== RCS file: /home/ncvs/src/sys/amd64/amd64/trap.c,v retrieving revision 1.324 diff -u -r1.324 trap.c --- amd64/amd64/trap.c 7 Dec 2007 08:20:15 -0000 1.324 +++ amd64/amd64/trap.c 9 Mar 2008 09:07:44 -0000 @@ -63,6 +63,7 @@ #include #include #include +#include #include #include #include @@ -169,7 +170,7 @@ PCPU_INC(cnt.v_trap); type = frame->tf_trapno; - + sched_userenter(td); #ifdef SMP #ifdef STOP_NMI /* Handler for NMI IPIs used for stopping CPUs. */ @@ -776,6 +777,7 @@ } #endif + sched_userenter(td); reg = 0; regcnt = 6; td->td_pticks = 0; Index: kern/kern_condvar.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_condvar.c,v retrieving revision 1.62 diff -u -r1.62 kern_condvar.c --- kern/kern_condvar.c 4 Jun 2007 23:50:56 -0000 1.62 +++ kern/kern_condvar.c 9 Mar 2008 09:07:49 -0000 @@ -125,7 +125,7 @@ cvp->cv_waiters++; DROP_GIANT(); - sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); + sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0, 0); if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); lock_state = class->lc_unlock(lock); @@ -178,7 +178,7 @@ cvp->cv_waiters++; DROP_GIANT(); - sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); + sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0, 0); if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); class->lc_unlock(lock); @@ -236,7 +236,7 @@ DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR | - SLEEPQ_INTERRUPTIBLE, 0); + SLEEPQ_INTERRUPTIBLE, 0, 0); if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); lock_state = class->lc_unlock(lock); @@ -295,7 +295,7 @@ cvp->cv_waiters++; DROP_GIANT(); - sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); + sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0, 0); sleepq_set_timeout(cvp, timo); if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); @@ -359,7 +359,7 @@ DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR | - SLEEPQ_INTERRUPTIBLE, 0); + SLEEPQ_INTERRUPTIBLE, 0, 0); sleepq_set_timeout(cvp, timo); if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); Index: kern/kern_sx.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_sx.c,v retrieving revision 1.58 diff -u -r1.58 kern_sx.c --- kern/kern_sx.c 15 Dec 2007 23:13:31 -0000 1.58 +++ kern/kern_sx.c 9 Mar 2008 09:07:49 -0000 @@ -554,7 +554,7 @@ GIANT_SAVE(); sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name, SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ? - SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE); + SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE, 0); if (!(opts & SX_INTERRUPTIBLE)) sleepq_wait(&sx->lock_object); else @@ -759,7 +759,7 @@ GIANT_SAVE(); sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name, SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ? - SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE); + SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE, 0); if (!(opts & SX_INTERRUPTIBLE)) sleepq_wait(&sx->lock_object); else Index: kern/kern_synch.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_synch.c,v retrieving revision 1.305 diff -u -r1.305 kern_synch.c --- kern/kern_synch.c 10 Jan 2008 22:11:20 -0000 1.305 +++ kern/kern_synch.c 9 Mar 2008 09:07:49 -0000 @@ -160,6 +160,7 @@ return (0); } catch = priority & PCATCH; + pri = priority & PRIMASK; rval = 0; /* @@ -198,7 +199,7 @@ * stopped, then td will no longer be on a sleep queue upon * return from cursig(). */ - sleepq_add(ident, ident == &lbolt ? NULL : lock, wmesg, flags, 0); + sleepq_add(ident, ident == &lbolt ? NULL : lock, wmesg, flags, 0, pri); if (timo) sleepq_set_timeout(ident, timo); if (lock != NULL && class->lc_flags & LC_SLEEPABLE) { @@ -207,17 +208,6 @@ lock_state = class->lc_unlock(lock); sleepq_lock(ident); } - - /* - * Adjust this thread's priority, if necessary. - */ - pri = priority & PRIMASK; - if (pri != 0 && pri != td->td_priority) { - thread_lock(td); - sched_prio(td, pri); - thread_unlock(td); - } - if (timo && catch) rval = sleepq_timedwait_sig(ident); else if (timo) @@ -282,7 +272,7 @@ /* * We put ourselves on the sleep queue and start our timeout. */ - sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0); + sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0, 0); if (timo) sleepq_set_timeout(ident, timo); Index: kern/kern_thread.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_thread.c,v retrieving revision 1.266 diff -u -r1.266 kern_thread.c --- kern/kern_thread.c 2 Mar 2008 07:39:22 -0000 1.266 +++ kern/kern_thread.c 9 Mar 2008 09:07:49 -0000 @@ -876,8 +876,8 @@ p->p_suspcount++; PROC_UNLOCK(p); thread_lock(td); - sched_sleep(td); TD_SET_SUSPENDED(td); + sched_sleep(td, 0); PROC_SUNLOCK(p); DROP_GIANT(); mi_switch(SW_VOL, NULL); @@ -896,8 +896,8 @@ THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); p->p_suspcount++; - sched_sleep(td); TD_SET_SUSPENDED(td); + sched_sleep(td, 0); } void Index: kern/kern_timeout.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_timeout.c,v retrieving revision 1.109 diff -u -r1.109 kern_timeout.c --- kern/kern_timeout.c 6 Feb 2008 00:04:09 -0000 1.109 +++ kern/kern_timeout.c 9 Mar 2008 09:07:49 -0000 @@ -562,7 +562,7 @@ mtx_unlock_spin(&callout_lock); sleepq_add(&callout_wait, &callout_lock.lock_object, "codrain", - SLEEPQ_SLEEP, 0); + SLEEPQ_SLEEP, 0, 0); sleepq_wait(&callout_wait); sq_locked = 0; Index: kern/sched_4bsd.c =================================================================== RCS file: /home/ncvs/src/sys/kern/sched_4bsd.c,v retrieving revision 1.114 diff -u -r1.114 sched_4bsd.c --- kern/sched_4bsd.c 2 Mar 2008 21:34:57 -0000 1.114 +++ kern/sched_4bsd.c 9 Mar 2008 09:07:49 -0000 @@ -799,12 +799,16 @@ } void -sched_sleep(struct thread *td) +sched_sleep(struct thread *td, u_char pri) { THREAD_LOCK_ASSERT(td, MA_OWNED); td->td_slptick = ticks; td->td_sched->ts_slptime = 0; + if (pri) + sched_prio(td, pri); + if (TD_IS_SUSPENDED(td) || prio <= PSOCK) + td->td_flags |= TDF_CANSWAP; } void @@ -922,6 +926,7 @@ THREAD_LOCK_ASSERT(td, MA_OWNED); ts = td->td_sched; + td->td_flags &= ~TDF_CANSWAP; if (ts->ts_slptime > 1) { updatepri(td); resetpriority(td); @@ -1244,6 +1249,22 @@ } void +sched_preempt(struct thread *td) +{ + thread_lock(td); + if (td->td_critnest > 1) + td->td_owepreempt = 1; + else + mi_switch(SW_INVOL | SW_PREEMPT, NULL); + thread_unlock(td); +} + +void +sched_userenter(struct thread *td) +{ +} + +void sched_userret(struct thread *td) { /* Index: kern/sched_ule.c =================================================================== RCS file: /home/ncvs/src/sys/kern/sched_ule.c,v retrieving revision 1.226 diff -u -r1.226 sched_ule.c --- kern/sched_ule.c 2 Mar 2008 08:20:59 -0000 1.226 +++ kern/sched_ule.c 9 Mar 2008 09:07:49 -0000 @@ -186,7 +186,7 @@ #else static int preempt_thresh = 0; #endif -static int lowpri_userret = 1; +static int static_boost = 0; /* * tdq - per processor runqs and statistics. All fields are protected by the @@ -204,6 +204,7 @@ u_char tdq_idx; /* Current insert index. */ u_char tdq_ridx; /* Current removal index. */ u_char tdq_lowpri; /* Lowest priority thread. */ + u_char tdq_ipipending; /* IPI pending. */ int tdq_transferable; /* Transferable thread count. */ char tdq_name[sizeof("sched lock") + 6]; } __aligned(64); @@ -220,10 +221,7 @@ */ static int rebalance = 1; static int balance_interval = 128; /* Default set in sched_initticks(). */ -static int pick_pri = 1; static int affinity; -static int tryself = 1; -static int oldtryself = 0; static int steal_htt = 1; static int steal_idle = 1; static int steal_thresh = 2; @@ -898,16 +896,19 @@ { struct thread *ctd; struct pcpu *pcpu; + struct tdq *tdq; int cpri; int pri; int cpu; cpu = ts->ts_cpu; + tdq = TDQ_CPU(cpu); + if (tdq->tdq_ipipending) + return; pri = ts->ts_thread->td_priority; pcpu = pcpu_find(cpu); ctd = pcpu->pc_curthread; cpri = ctd->td_priority; - /* * If our priority is not better than the current priority there is * nothing to do. @@ -923,7 +924,7 @@ * If we're realtime or better and there is timeshare or worse running * send an IPI. */ - if (pri < PRI_MAX_REALTIME && cpri > PRI_MAX_REALTIME) + if (pri <= PRI_MAX_REALTIME && cpri > PRI_MAX_REALTIME) goto sendipi; /* * Otherwise only IPI if we exceed the threshold. @@ -931,7 +932,7 @@ if (pri > preempt_thresh) return; sendipi: - ctd->td_flags |= TDF_NEEDRESCHED; + tdq->tdq_ipipending = 1; ipi_selected(1 << cpu, IPI_PREEMPT); } @@ -1125,16 +1126,10 @@ /* * Compare the lowest loaded cpu to current cpu. */ - if (THREAD_CAN_SCHED(td, self) && - TDQ_CPU(cpu)->tdq_lowpri < PRI_MIN_IDLE) { - if (tryself && TDQ_CPU(self)->tdq_lowpri > pri) - cpu = self; - else if (oldtryself && curthread->td_priority > pri) - cpu = self; - } - if (cpu == -1) { - panic("cpu == -1, mask 0x%X cpu top %p", mask, cpu_top); - } + if (THREAD_CAN_SCHED(td, self) && TDQ_CPU(self)->tdq_lowpri > pri && + TDQ_CPU(cpu)->tdq_lowpri < PRI_MIN_IDLE) + cpu = self; + KASSERT(cpu != -1, ("sched_pickcpu: Failed to find a cpu.")); return (cpu); } #endif @@ -1854,12 +1849,16 @@ * Record the sleep time for the interactivity scorer. */ void -sched_sleep(struct thread *td) +sched_sleep(struct thread *td, u_char prio) { THREAD_LOCK_ASSERT(td, MA_OWNED); td->td_slptick = ticks; + if (TD_IS_SUSPENDED(td) || prio <= PSOCK) + td->td_flags |= TDF_CANSWAP; + if (static_boost && prio) + sched_prio(td, prio); } /* @@ -1874,6 +1873,7 @@ THREAD_LOCK_ASSERT(td, MA_OWNED); ts = td->td_sched; + td->td_flags &= ~TDF_CANSWAP; /* * If we slept for more than a tick update our interactivity and * priority. @@ -2027,6 +2027,25 @@ thread_unlock(td); } +void +sched_preempt(struct thread *td) +{ + struct tdq *tdq; + + KASSERT(td->td_critnest == 1, ("Unexpected critnest in sched_preempt")); + thread_lock(td); + tdq = TDQ_SELF(); + tdq->tdq_ipipending = 0; + if (td->td_priority > tdq->tdq_lowpri) + mi_switch(SW_INVOL | SW_PREEMPT, NULL); + thread_unlock(td); +} + +void +sched_userenter(struct thread *td) +{ +} + /* * Fix priorities on return to user-space. Priorities may be elevated due * to static priorities in msleep() or similar. @@ -2049,8 +2068,7 @@ thread_lock(td); td->td_priority = td->td_user_pri; td->td_base_pri = td->td_user_pri; - if (lowpri_userret) - tdq_setlowpri(TDQ_SELF(), td); + tdq_setlowpri(TDQ_SELF(), td); thread_unlock(td); } } @@ -2555,13 +2573,10 @@ SYSCTL_INT(_kern_sched, OID_AUTO, preempt_thresh, CTLFLAG_RW, &preempt_thresh, 0,"Min priority for preemption, lower priorities have greater precedence"); #ifdef SMP -SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri, CTLFLAG_RW, &pick_pri, 0, - "Pick the target cpu based on priority rather than load."); SYSCTL_INT(_kern_sched, OID_AUTO, affinity, CTLFLAG_RW, &affinity, 0, "Number of hz ticks to keep thread affinity for"); -SYSCTL_INT(_kern_sched, OID_AUTO, tryself, CTLFLAG_RW, &tryself, 0, ""); -SYSCTL_INT(_kern_sched, OID_AUTO, userret, CTLFLAG_RW, &lowpri_userret, 0, ""); -SYSCTL_INT(_kern_sched, OID_AUTO, oldtryself, CTLFLAG_RW, &oldtryself, 0, ""); +SYSCTL_INT(_kern_sched, OID_AUTO, static_boost, CTLFLAG_RW, &static_boost, + 0, ""); SYSCTL_INT(_kern_sched, OID_AUTO, balance, CTLFLAG_RW, &rebalance, 0, "Enables the long-term load balancer"); SYSCTL_INT(_kern_sched, OID_AUTO, balance_interval, CTLFLAG_RW, Index: kern/subr_sleepqueue.c =================================================================== RCS file: /home/ncvs/src/sys/kern/subr_sleepqueue.c,v retrieving revision 1.45 diff -u -r1.45 subr_sleepqueue.c --- kern/subr_sleepqueue.c 13 Feb 2008 23:36:56 -0000 1.45 +++ kern/subr_sleepqueue.c 9 Mar 2008 09:07:49 -0000 @@ -272,7 +272,7 @@ */ void sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags, - int queue) + int queue, u_char pri) { struct sleepqueue_chain *sc; struct sleepqueue *sq; @@ -338,6 +338,7 @@ td->td_flags |= TDF_SINTR; td->td_flags &= ~TDF_SLEEPABORT; } + sched_sleep(td, pri); thread_unlock(td); } @@ -472,7 +473,6 @@ thread_lock_set(td, &sc->sc_lock); MPASS(td->td_sleepqueue == NULL); - sched_sleep(td); TD_SET_SLEEPING(td); SCHED_STAT_INC(switch_sleepq); mi_switch(SW_VOL, NULL); Index: kern/subr_smp.c =================================================================== RCS file: /home/ncvs/src/sys/kern/subr_smp.c,v retrieving revision 1.204 diff -u -r1.204 subr_smp.c --- kern/subr_smp.c 2 Mar 2008 07:58:41 -0000 1.204 +++ kern/subr_smp.c 9 Mar 2008 09:07:49 -0000 @@ -405,6 +405,10 @@ /* Dual core with no sharing. */ top = smp_topo_1level(CG_SHARE_NONE, 2, 0); break; + case 2: + /* No topology, all cpus are equal. */ + top = smp_topo_none(); + break; case 3: /* Dual core with shared L2. */ top = smp_topo_1level(CG_SHARE_L2, 2, 0); Index: kern/subr_trap.c =================================================================== RCS file: /home/ncvs/src/sys/kern/subr_trap.c,v retrieving revision 1.301 diff -u -r1.301 subr_trap.c --- kern/subr_trap.c 7 Dec 2007 08:20:16 -0000 1.301 +++ kern/subr_trap.c 9 Mar 2008 09:07:50 -0000 @@ -135,11 +135,11 @@ addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio); } - /* * Let the scheduler adjust our priority etc. */ sched_userret(td); + KASSERT(td->td_locks == 0, ("userret: Returning with %d locks held.", td->td_locks)); } @@ -177,6 +177,7 @@ if ((p->p_flag & P_SA) && (td->td_mailbox == NULL)) thread_user_enter(td); #endif + sched_userenter(td); /* * This updates the td_flag's for the checks below in one Index: sys/proc.h =================================================================== RCS file: /home/ncvs/src/sys/sys/proc.h,v retrieving revision 1.504 diff -u -r1.504 proc.h --- sys/proc.h 2 Mar 2008 07:39:22 -0000 1.504 +++ sys/proc.h 9 Mar 2008 09:07:54 -0000 @@ -335,7 +335,7 @@ #define TDF_SINTR 0x00000008 /* Sleep is interruptible. */ #define TDF_TIMEOUT 0x00000010 /* Timing out during sleep. */ #define TDF_IDLETD 0x00000020 /* This is a per-CPU idle thread. */ -#define TDF_UNUSEDx40 0x00000040 /* --available-- */ +#define TDF_CANSWAP 0x00000040 /* Thread can be swapped. */ #define TDF_SLEEPABORT 0x00000080 /* sleepq_abort was called. */ #define TDF_KTH_SUSP 0x00000100 /* kthread is suspended */ #define TDF_UBORROWING 0x00000200 /* Thread is borrowing user pri. */ @@ -782,7 +782,8 @@ } while (0) /* Check whether a thread is safe to be swapped out. */ -#define thread_safetoswapout(td) (TD_IS_SLEEPING(td) || TD_IS_SUSPENDED(td)) +#define thread_safetoswapout(td) \ + ((td)->td_state == TDS_INHIBITED && ((td)->td_flags & TDF_CANSWAP)) /* Control whether or not it is safe for curthread to sleep. */ #define THREAD_NO_SLEEPING() do { \ Index: sys/sched.h =================================================================== RCS file: /home/ncvs/src/sys/sys/sched.h,v retrieving revision 1.34 diff -u -r1.34 sched.h --- sys/sched.h 2 Mar 2008 07:19:35 -0000 1.34 +++ sys/sched.h 9 Mar 2008 09:07:54 -0000 @@ -100,14 +100,16 @@ void sched_lend_user_prio(struct thread *td, u_char pri); fixpt_t sched_pctcpu(struct thread *td); void sched_prio(struct thread *td, u_char prio); -void sched_sleep(struct thread *td); +void sched_sleep(struct thread *td, u_char prio); void sched_switch(struct thread *td, struct thread *newtd, int flags); void sched_throw(struct thread *td); void sched_unlend_prio(struct thread *td, u_char prio); void sched_unlend_user_prio(struct thread *td, u_char pri); void sched_user_prio(struct thread *td, u_char prio); +void sched_userenter(struct thread *td); void sched_userret(struct thread *td); void sched_wakeup(struct thread *td); +void sched_preempt(struct thread *td); /* * Threads are moved on and off of run queues Index: sys/sleepqueue.h =================================================================== RCS file: /home/ncvs/src/sys/sys/sleepqueue.h,v retrieving revision 1.12 diff -u -r1.12 sleepqueue.h --- sys/sleepqueue.h 31 Mar 2007 23:23:42 -0000 1.12 +++ sys/sleepqueue.h 9 Mar 2008 09:07:54 -0000 @@ -92,7 +92,7 @@ void init_sleepqueues(void); void sleepq_abort(struct thread *td, int intrval); void sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, - int flags, int queue); + int flags, int queue, u_char prio); struct sleepqueue *sleepq_alloc(void); void sleepq_broadcast(void *wchan, int flags, int pri, int queue); void sleepq_free(struct sleepqueue *sq); Index: vm/vm_glue.c =================================================================== RCS file: /home/ncvs/src/sys/vm/vm_glue.c,v retrieving revision 1.226 diff -u -r1.226 vm_glue.c --- vm/vm_glue.c 5 Nov 2007 11:36:16 -0000 1.226 +++ vm/vm_glue.c 9 Mar 2008 09:07:54 -0000 @@ -917,8 +917,7 @@ * This could be refined to support * swapping out a thread. */ - if ((td->td_priority) < PSOCK || - !thread_safetoswapout(td)) { + if (!thread_safetoswapout(td)) { thread_unlock(td); goto nextproc; } Index: vm/vm_meter.c =================================================================== RCS file: /home/ncvs/src/sys/vm/vm_meter.c,v retrieving revision 1.96 diff -u -r1.96 vm_meter.c --- vm/vm_meter.c 27 Jul 2007 20:01:21 -0000 1.96 +++ vm/vm_meter.c 9 Mar 2008 09:07:54 -0000 @@ -95,7 +95,6 @@ static int vmtotal(SYSCTL_HANDLER_ARGS) { -/* XXXKSE almost completely broken */ struct proc *p; struct vmtotal total; vm_map_entry_t entry; @@ -139,25 +138,16 @@ break; default: FOREACH_THREAD_IN_PROC(p, td) { - /* Need new statistics XXX */ thread_lock(td); switch (td->td_state) { case TDS_INHIBITED: - /* - * XXX stats no longer synchronized. - */ - if (TD_ON_LOCK(td) || - (td->td_inhibitors == - TDI_SWAPPED)) { + if (TD_IS_SWAPPED(td)) total.t_sw++; - } else if (TD_IS_SLEEPING(td) || - TD_AWAITING_INTR(td) || - TD_IS_SUSPENDED(td)) { - if (td->td_priority <= PZERO) - total.t_dw++; - else - total.t_sl++; - } + else if (TD_IS_SLEEPING(td) && + td->td_priority <= PZERO) + total.t_dw++; + else + total.t_sl++; break; case TDS_CAN_RUN: