Index: DEVEL/share/man/man9/sleepqueue.9 =================================================================== --- DEVEL/share/man/man9/sleepqueue.9 (revision 201878) +++ DEVEL/share/man/man9/sleepqueue.9 (working copy) @@ -23,7 +23,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 12, 2009 +.Dd January 8, 2010 .Dt SLEEPQUEUE 9 .Os .Sh NAME @@ -44,6 +44,7 @@ .Nm sleepq_sleepcnt , .Nm sleepq_timedwait , .Nm sleepq_timedwait_sig , +.Nm sleepq_type , .Nm sleepq_wait , .Nm sleepq_wait_sig .Nd manage the queues of sleeping threads @@ -84,6 +85,8 @@ .Fn sleepq_timedwait "void *wchan" .Ft int .Fn sleepq_timedwait_sig "void *wchan" "int signal_caught" +.Ft int +.Fn sleepq_type "void *wchan" .Ft void .Fn sleepq_wait "void *wchan" .Ft int @@ -366,6 +369,12 @@ .Fa wchan . .Pp The +.Fn sleepq_type +function returns the type of +.Fa wchan +associated to a sleepqueue. +.Pp +The .Fn sleepq_abort , .Fn sleepq_broadcast , and Index: DEVEL/UPDATING =================================================================== --- DEVEL/UPDATING (revision 201878) +++ DEVEL/UPDATING (working copy) @@ -22,6 +22,11 @@ machines to maximize performance. (To disable malloc debugging, run ln -s aj /etc/malloc.conf.) +20100108: + Introduce the kernel thread "deadlock resolver" (which can be enabled + via the DEADLKRES option, see NOTES for more details) and the + sleepq_type() function for sleepqueues. + 20091202: The rc.firewall and rc.firewall6 were unified, and rc.firewall6 and rc.d/ip6fw were removed. Index: DEVEL/sys/conf/NOTES =================================================================== --- DEVEL/sys/conf/NOTES (revision 201878) +++ DEVEL/sys/conf/NOTES (working copy) @@ -2531,6 +2531,11 @@ options SW_WATCHDOG # +# Add the software deadlock resolver thread. +# +options DEADLKRES + +# # Disable swapping of stack pages. This option removes all # code which actually performs swapping, so it's not possible to turn # it back on at run-time. Index: DEVEL/sys/conf/options =================================================================== --- DEVEL/sys/conf/options (revision 201878) +++ DEVEL/sys/conf/options (working copy) @@ -72,6 +72,7 @@ COMPAT_FREEBSD7 opt_compat.h COMPILING_LINT opt_global.h CY_PCI_FASTINTR +DEADLKRES opt_watchdog.h DIRECTIO FULL_PREEMPTION opt_sched.h IPI_PREEMPTION opt_sched.h Index: DEVEL/sys/kern/kern_clock.c =================================================================== --- DEVEL/sys/kern/kern_clock.c (revision 201878) +++ DEVEL/sys/kern/kern_clock.c (working copy) @@ -48,14 +48,16 @@ #include #include #include +#include +#include #include -#include #include #include #include #include #include #include +#include #include #include #include @@ -159,6 +161,124 @@ SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics"); +#ifdef DEADLKRES +static int slptime_threshold = 1800; +static int blktime_threshold = 900; +static int sleepfreq = 3; + +static void +deadlkres(void) +{ + struct proc *p; + struct thread *td; + void *wchan; + int blkticks, slpticks, slptype, tryl, tticks; + + tryl = 0; + for (;;) { + blkticks = blktime_threshold * hz; + slpticks = slptime_threshold * hz; + + /* + * Avoid to sleep on the sx_lock in order to avoid a possible + * priority inversion problem leading to starvation. + * If the lock can't be held after 100 tries, panic. + */ + if (!sx_try_slock(&allproc_lock)) { + if (tryl > 100) + panic("%s: possible deadlock detected on allproc_lock\n", + __func__); + tryl++; + pause("allproc_lock deadlkres", sleepfreq * hz); + continue; + } + tryl = 0; + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); + if (TD_ON_LOCK(td)) { + + /* + * The thread should be blocked on a + * turnstile, simply check if the + * turnstile channel is in good state. + */ + MPASS(td->td_blocked != NULL); + tticks = ticks - td->td_blktick; + thread_unlock(td); + if (tticks > blkticks) { + + /* + * Accordingly with provided + * thresholds, this thread is + * stuck for too long on a + * turnstile. + */ + PROC_UNLOCK(p); + sx_sunlock(&allproc_lock); + panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", + __func__, td, tticks); + } + } else if (TD_IS_SLEEPING(td)) { + + /* + * Check if the thread is sleeping on a + * lock, otherwise skip the check. + * Drop the thread lock in order to + * avoid a LOR with the sleepqueue + * spinlock. + */ + wchan = td->td_wchan; + tticks = ticks - td->td_slptick; + thread_unlock(td); + slptype = sleepq_type(wchan); + if ((slptype == SLEEPQ_SX || + slptype == SLEEPQ_LK) && + tticks > slpticks) { + + /* + * Accordingly with provided + * thresholds, this thread is + * stuck for too long on a + * sleepqueue. + */ + PROC_UNLOCK(p); + sx_sunlock(&allproc_lock); + panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", + __func__, td, tticks); + } + } else + thread_unlock(td); + } + PROC_UNLOCK(p); + } + sx_sunlock(&allproc_lock); + + /* Sleep for sleepfreq seconds. */ + pause("deadlkres", sleepfreq * hz); + } +} + +static struct kthread_desc deadlkres_kd = { + "deadlkres", + deadlkres, + (struct thread **)NULL +}; + +SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd); + +SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, "Deadlock resolver"); +SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW, + &slptime_threshold, 0, + "Number of seconds within is valid to sleep on a sleepqueue"); +SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW, + &blktime_threshold, 0, + "Number of seconds within is valid to block on a turnstile"); +SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0, + "Number of seconds between any deadlock resolver thread run"); +#endif /* DEADLKRES */ + void read_cpu_time(long *cp_time) { Index: DEVEL/sys/kern/subr_turnstile.c =================================================================== --- DEVEL/sys/kern/subr_turnstile.c (revision 201878) +++ DEVEL/sys/kern/subr_turnstile.c (working copy) @@ -733,6 +733,7 @@ td->td_tsqueue = queue; td->td_blocked = ts; td->td_lockname = lock->lo_name; + td->td_blktick = ticks; TD_SET_LOCK(td); mtx_unlock_spin(&tc->tc_lock); propagate_priority(td); @@ -925,6 +926,7 @@ MPASS(TD_CAN_RUN(td)); td->td_blocked = NULL; td->td_lockname = NULL; + td->td_blktick = 0; #ifdef INVARIANTS td->td_tsqueue = 0xff; #endif Index: DEVEL/sys/kern/subr_sleepqueue.c =================================================================== --- DEVEL/sys/kern/subr_sleepqueue.c (revision 201878) +++ DEVEL/sys/kern/subr_sleepqueue.c (working copy) @@ -122,8 +122,8 @@ LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */ LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */ void *sq_wchan; /* (c) Wait channel. */ + int sq_type; /* (c) Queue type. */ #ifdef INVARIANTS - int sq_type; /* (c) Queue type. */ struct lock_object *sq_lock; /* (c) Associated lock. */ #endif }; @@ -317,7 +317,6 @@ ("thread's sleep queue has a non-empty free list")); KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer")); sq->sq_lock = lock; - sq->sq_type = flags & SLEEPQ_TYPE; #endif #ifdef SLEEPQUEUE_PROFILING sc->sc_depth++; @@ -330,6 +329,7 @@ sq = td->td_sleepqueue; LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash); sq->sq_wchan = wchan; + sq->sq_type = flags & SLEEPQ_TYPE; } else { MPASS(wchan == sq->sq_wchan); MPASS(lock == sq->sq_lock); @@ -669,6 +669,28 @@ } /* + * Returns the type of sleepqueue given a waitchannel. + */ +int +sleepq_type(void *wchan) +{ + struct sleepqueue *sq; + int type; + + MPASS(wchan != NULL); + + sleepq_lock(wchan); + sq = sleepq_lookup(wchan); + if (sq == NULL) { + sleepq_release(wchan); + return (-1); + } + type = sq->sq_type; + sleepq_release(wchan); + return (type); +} + +/* * Removes a thread from a sleep queue and makes it * runnable. */ @@ -1176,8 +1198,8 @@ return; found: db_printf("Wait channel: %p\n", sq->sq_wchan); + db_printf("Queue type: %d\n", sq->sq_type); #ifdef INVARIANTS - db_printf("Queue type: %d\n", sq->sq_type); if (sq->sq_lock) { lock = sq->sq_lock; db_printf("Associated Interlock: %p - (%s) %s\n", lock, Index: DEVEL/sys/sys/proc.h =================================================================== --- DEVEL/sys/sys/proc.h (revision 201878) +++ DEVEL/sys/sys/proc.h (working copy) @@ -218,6 +218,7 @@ struct ucred *td_ucred; /* (k) Reference to credentials. */ u_int td_estcpu; /* (t) estimated cpu utilization */ int td_slptick; /* (t) Time at sleep. */ + int td_blktick; /* (t) Time spent blocked. */ struct rusage td_ru; /* (t) rusage information */ uint64_t td_incruntime; /* (t) Cpu ticks to transfer to proc. */ uint64_t td_runtime; /* (t) How many cpu ticks we've run. */ Index: DEVEL/sys/sys/sleepqueue.h =================================================================== --- DEVEL/sys/sys/sleepqueue.h (revision 201878) +++ DEVEL/sys/sys/sleepqueue.h (working copy) @@ -112,6 +112,7 @@ u_int sleepq_sleepcnt(void *wchan, int queue); int sleepq_timedwait(void *wchan, int pri); int sleepq_timedwait_sig(void *wchan, int pri); +int sleepq_type(void *wchan); void sleepq_wait(void *wchan, int pri); int sleepq_wait_sig(void *wchan, int pri);