diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c index ddd55bb..a007511 100644 --- a/sys/kern/kern_lock.c +++ b/sys/kern/kern_lock.c @@ -1231,6 +1231,9 @@ _lockmgr_disown(struct lock *lk, const char *file, int line) { uintptr_t tid, x; + if (SCHEDULER_STOPPED()) + return; + tid = (uintptr_t)curthread; _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line); diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 041b480..2640a8e 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -238,7 +238,8 @@ _mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line) ("mtx_lock_spin() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); if (mtx_owned(m)) - KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0, + KASSERT(SCHEDULER_STOPPED() || + (m->lock_object.lo_flags & LO_RECURSABLE) != 0, ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, @@ -287,6 +288,8 @@ _mtx_trylock(struct mtx *m, int opts, const char *file, int line) KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep, ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); + if (SCHEDULER_STOPPED()) + return (1); if (mtx_owned(m) && (m->lock_object.lo_flags & LO_RECURSABLE) != 0) { m->mtx_recurse++; @@ -337,6 +340,8 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file, int64_t sleep_time = 0; #endif + if (SCHEDULER_STOPPED()) + return; if (mtx_owned(m)) { KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0, ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n", @@ -507,6 +512,8 @@ _mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file, uint64_t waittime = 0; #endif + if (SCHEDULER_STOPPED()) + return; if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m); @@ -552,6 +559,11 @@ _thread_lock_flags(struct thread *td, int opts, const char *file, int line) uint64_t spin_cnt = 0; #endif + if (SCHEDULER_STOPPED()) { + spinlock_enter(); + return; + } + i = 0; tid = (uintptr_t)curthread; for (;;) { @@ -577,6 +589,7 @@ retry: m->mtx_recurse++; break; } + lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime); /* Give interrupts a chance while we spin. */ @@ -655,6 +668,8 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) { struct turnstile *ts; + if (SCHEDULER_STOPPED()) + return; if (mtx_recursed(m)) { if (--(m->mtx_recurse) == 0) atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c index 1c7337d..96aef31 100644 --- a/sys/kern/kern_rmlock.c +++ b/sys/kern/kern_rmlock.c @@ -344,6 +344,9 @@ _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) struct thread *td = curthread; struct pcpu *pc; + if (SCHEDULER_STOPPED()) + return (1); + tracker->rmp_flags = 0; tracker->rmp_thread = td; tracker->rmp_rmlock = rm; @@ -413,6 +416,9 @@ _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker) struct pcpu *pc; struct thread *td = tracker->rmp_thread; + if (SCHEDULER_STOPPED()) + return; + td->td_critnest++; /* critical_enter(); */ pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */ rm_tracker_remove(pc, tracker); @@ -432,6 +438,9 @@ _rm_wlock(struct rmlock *rm) struct turnstile *ts; cpuset_t readcpus; + if (SCHEDULER_STOPPED()) + return; + if (rm->lock_object.lo_flags & RM_SLEEPABLE) sx_xlock(&rm->rm_lock_sx); else @@ -486,6 +495,9 @@ _rm_wunlock(struct rmlock *rm) void _rm_wlock_debug(struct rmlock *rm, const char *file, int line) { + if (SCHEDULER_STOPPED()) + return; + WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); @@ -507,6 +519,9 @@ void _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) { + if (SCHEDULER_STOPPED()) + return; + curthread->td_locks--; if (rm->lock_object.lo_flags & RM_SLEEPABLE) WITNESS_UNLOCK(&rm->rm_lock_sx.lock_object, LOP_EXCLUSIVE, @@ -521,6 +536,10 @@ int _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, int trylock, const char *file, int line) { + + if (SCHEDULER_STOPPED()) + return (1); + if (!trylock && (rm->lock_object.lo_flags & RM_SLEEPABLE)) WITNESS_CHECKORDER(&rm->rm_lock_sx.lock_object, LOP_NEWORDER, file, line, NULL); @@ -544,6 +563,9 @@ _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, const char *file, int line) { + if (SCHEDULER_STOPPED()) + return; + curthread->td_locks--; WITNESS_UNLOCK(&rm->lock_object, 0, file, line); LOCK_LOG_LOCK("RMRUNLOCK", &rm->lock_object, 0, 0, file, line); diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c index 1037f34..b95e260 100644 --- a/sys/kern/kern_rwlock.c +++ b/sys/kern/kern_rwlock.c @@ -324,6 +324,9 @@ _rw_rlock(struct rwlock *rw, const char *file, int line) rw->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); + if (SCHEDULER_STOPPED()) + return; + for (;;) { #ifdef KDTRACE_HOOKS spin_cnt++; @@ -533,6 +536,9 @@ _rw_runlock(struct rwlock *rw, const char *file, int line) WITNESS_UNLOCK(&rw->lock_object, 0, file, line); LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); + if (SCHEDULER_STOPPED()) + return; + /* TODO: drop "owner of record" here. */ for (;;) { @@ -660,6 +666,9 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) return; } + if (SCHEDULER_STOPPED()) + return; + if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); @@ -821,6 +830,9 @@ _rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) return; } + if (SCHEDULER_STOPPED()) + return; + KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), ("%s: neither of the waiter flags are set", __func__)); diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index 2b5a5ae..90866fd 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -121,6 +121,11 @@ SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RW | CTLFLAG_TUN, &sync_on_panic, 0, "Do a sync before rebooting from a panic"); TUNABLE_INT("kern.sync_on_panic", &sync_on_panic); +static int stop_scheduler_on_panic = 0; +SYSCTL_INT(_kern, OID_AUTO, stop_scheduler_on_panic, CTLFLAG_RW | CTLFLAG_TUN, + &stop_scheduler_on_panic, 0, "stop scheduler upon entering panic"); +TUNABLE_INT("kern.stop_scheduler_on_panic", &stop_scheduler_on_panic); + static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment"); @@ -138,6 +143,7 @@ SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, */ const char *panicstr; +int stop_scheduler; int dumping; /* system is dumping */ int rebooting; /* system is rebooting */ static struct dumperinfo dumper; /* our selected dumper */ @@ -294,10 +300,12 @@ kern_reboot(int howto) * systems don't shutdown properly (i.e., ACPI power off) if we * run on another processor. */ - thread_lock(curthread); - sched_bind(curthread, 0); - thread_unlock(curthread); - KASSERT(PCPU_GET(cpuid) == 0, ("%s: not running on cpu 0", __func__)); + if (!SCHEDULER_STOPPED()) { + thread_lock(curthread); + sched_bind(curthread, 0); + thread_unlock(curthread); + KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0")); + } #endif /* We're in the process of rebooting. */ rebooting = 1; @@ -547,13 +555,18 @@ panic(const char *fmt, ...) { #ifdef SMP static volatile u_int panic_cpu = NOCPU; + cpuset_t other_cpus; #endif struct thread *td = curthread; int bootopt, newpanic; va_list ap; static char buf[256]; - critical_enter(); + if (stop_scheduler_on_panic) + spinlock_enter(); + else + critical_enter(); + #ifdef SMP /* * We don't want multiple CPU's to panic at the same time, so we @@ -566,11 +579,18 @@ panic(const char *fmt, ...) PCPU_GET(cpuid)) == 0) while (panic_cpu != NOCPU) ; /* nothing */ + if (stop_scheduler_on_panic) { + if (panicstr == NULL && !kdb_active) { + other_cpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &other_cpus); + stop_cpus_hard(other_cpus); + } + } #endif bootopt = RB_AUTOBOOT; newpanic = 0; - if (panicstr) + if (panicstr != NULL) bootopt |= RB_NOSYNC; else { bootopt |= RB_DUMP; @@ -593,6 +613,9 @@ panic(const char *fmt, ...) printf("cpuid = %d\n", PCPU_GET(cpuid)); #endif + if (stop_scheduler_on_panic) + stop_scheduler = 1; + #ifdef KDB if (newpanic && trace_on_panic) kdb_backtrace(); @@ -602,9 +625,13 @@ panic(const char *fmt, ...) /*thread_lock(td); */ td->td_flags |= TDF_INPANIC; /* thread_unlock(td); */ + if (!sync_on_panic) bootopt |= RB_NOSYNC; - critical_exit(); + + if (!stop_scheduler_on_panic) + critical_exit(); + kern_reboot(bootopt); } diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c index 1e4430a..ea4e150 100644 --- a/sys/kern/kern_sx.c +++ b/sys/kern/kern_sx.c @@ -260,6 +260,8 @@ _sx_try_slock(struct sx *sx, const char *file, int line) { uintptr_t x; + if (SCHEDULER_STOPPED()) + return (1); for (;;) { x = sx->sx_lock; KASSERT(x != SX_LOCK_DESTROYED, @@ -308,6 +310,8 @@ _sx_try_xlock(struct sx *sx, const char *file, int line) KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_try_xlock() of destroyed sx @ %s:%d", file, line)); + if (SCHEDULER_STOPPED()) + return (1); if (sx_xlocked(sx) && (sx->lock_object.lo_flags & LO_RECURSABLE) != 0) { sx->sx_recurse++; @@ -348,6 +352,8 @@ _sx_xunlock(struct sx *sx, const char *file, int line) MPASS(curthread != NULL); KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_xunlock() of destroyed sx @ %s:%d", file, line)); + if (SCHEDULER_STOPPED()) + return; _sx_assert(sx, SA_XLOCKED, file, line); curthread->td_locks--; WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line); @@ -371,6 +377,8 @@ _sx_try_upgrade(struct sx *sx, const char *file, int line) KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line)); + if (SCHEDULER_STOPPED()) + return (1); _sx_assert(sx, SA_SLOCKED, file, line); /* @@ -401,6 +409,8 @@ _sx_downgrade(struct sx *sx, const char *file, int line) KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_downgrade() of destroyed sx @ %s:%d", file, line)); + if (SCHEDULER_STOPPED()) + return; _sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line); #ifndef INVARIANTS if (sx_recursed(sx)) @@ -481,6 +491,9 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, int64_t sleep_time = 0; #endif + if (SCHEDULER_STOPPED()) + return (0); + /* If we already hold an exclusive lock, then recurse. */ if (sx_xlocked(sx)) { KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0, @@ -681,6 +694,9 @@ _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line) uintptr_t x; int queue, wakeup_swapper; + if (SCHEDULER_STOPPED()) + return; + MPASS(!(sx->sx_lock & SX_LOCK_SHARED)); /* If the lock is recursed, then unrecurse one level. */ @@ -691,6 +707,7 @@ _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line) CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx); return; } + MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)); if (LOCK_LOG_TEST(&sx->lock_object, 0)) @@ -753,6 +770,9 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) int64_t sleep_time = 0; #endif + if (SCHEDULER_STOPPED()) + return (0); + /* * As with rwlocks, we don't make any attempt to try to block * shared locks once there is an exclusive waiter. @@ -919,6 +939,9 @@ _sx_sunlock_hard(struct sx *sx, const char *file, int line) uintptr_t x; int wakeup_swapper; + if (SCHEDULER_STOPPED()) + return; + for (;;) { x = sx->sx_lock; diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index a2c26ae..a2316c1 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -158,7 +158,7 @@ _sleep(void *ident, struct lock_object *lock, int priority, else class = NULL; - if (cold) { + if (cold || SCHEDULER_STOPPED()) { /* * During autoconfiguration, just return; * don't run any other threads or panic below, @@ -260,7 +260,7 @@ msleep_spin(void *ident, struct mtx *mtx, const char *wmesg, int timo) KASSERT(p != NULL, ("msleep1")); KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); - if (cold) { + if (cold || SCHEDULER_STOPPED()) { /* * During autoconfiguration, just return; * don't run any other threads or panic below, @@ -411,6 +411,8 @@ mi_switch(int flags, struct thread *newtd) */ if (kdb_active) kdb_switch(); + if (SCHEDULER_STOPPED()) + return; if (flags & SW_VOL) { td->td_ru.ru_nvcsw++; td->td_swvoltick = ticks; diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c index 42e188b..f8a5535 100644 --- a/sys/kern/subr_kdb.c +++ b/sys/kern/subr_kdb.c @@ -226,13 +226,7 @@ kdb_sysctl_trap_code(SYSCTL_HANDLER_ARGS) void kdb_panic(const char *msg) { -#ifdef SMP - cpuset_t other_cpus; - other_cpus = all_cpus; - CPU_CLR(PCPU_GET(cpuid), &other_cpus); - stop_cpus_hard(other_cpus); -#endif printf("KDB: panic\n"); panic("%s", msg); } @@ -594,6 +588,9 @@ kdb_trap(int type, int code, struct trapframe *tf) struct kdb_dbbe *be; register_t intr; int handled; +#ifdef SMP + int did_stop_cpus; +#endif be = kdb_dbbe; if (be == NULL || be->dbbe_trap == NULL) @@ -606,9 +603,13 @@ kdb_trap(int type, int code, struct trapframe *tf) intr = intr_disable(); #ifdef SMP - other_cpus = all_cpus; - CPU_CLR(PCPU_GET(cpuid), &other_cpus); - stop_cpus_hard(other_cpus); + if (panicstr == NULL) { + other_cpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &other_cpus); + stop_cpus_hard(other_cpus); + did_stop_cpus = 1; + } else + did_stop_cpus = 0; #endif kdb_active++; @@ -634,7 +635,8 @@ kdb_trap(int type, int code, struct trapframe *tf) kdb_active--; #ifdef SMP - restart_cpus(stopped_cpus); + if (did_stop_cpus) + restart_cpus(stopped_cpus); #endif intr_restore(intr); diff --git a/sys/sys/mutex.h b/sys/sys/mutex.h index e7e0b28..0f121e2 100644 --- a/sys/sys/mutex.h +++ b/sys/sys/mutex.h @@ -344,7 +344,8 @@ do { \ \ if (mtx_owned(&Giant)) { \ WITNESS_SAVE(&Giant.lock_object, Giant); \ - for (_giantcnt = 0; mtx_owned(&Giant); _giantcnt++) \ + for (_giantcnt = 0; !SCHEDULER_STOPPED() && \ + mtx_owned(&Giant); _giantcnt++) \ mtx_unlock(&Giant); \ } diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 7e537ee..bf8ab3b 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -47,6 +47,7 @@ extern int cold; /* nonzero if we are doing a cold boot */ extern int rebooting; /* kern_reboot() has been called. */ +extern int stop_scheduler; /* only one thread runs after panic */ extern const char *panicstr; /* panic message */ extern char version[]; /* system version */ extern char copyright[]; /* system copyright */ @@ -109,6 +110,14 @@ enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN }; ((uintptr_t)&(var) & (sizeof(void *) - 1)) == 0, msg) /* + * If we have already panic'd and this is the thread that called + * panic(), then don't block on any mutexes but silently succeed. + * Otherwise, the kernel will deadlock since the scheduler isn't + * going to run the thread that holds any lock we need. + */ +#define SCHEDULER_STOPPED() __predict_false(stop_scheduler) + +/* * XXX the hints declarations are even more misplaced than most declarations * in this file, since they are needed in one file (per arch) and only used * in two files.