Index: sys/kern/kern_mutex.c
===================================================================
--- sys/kern/kern_mutex.c	(revision 202719)
+++ sys/kern/kern_mutex.c	(working copy)
@@ -616,7 +616,6 @@
 {
 	struct mtx *lock;
 
-	spinlock_enter();
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	lock = td->td_lock;
 	td->td_lock = &blocked_lock;
@@ -631,7 +630,6 @@
 	mtx_assert(new, MA_OWNED);
 	MPASS(td->td_lock == &blocked_lock);
 	atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new);
-	spinlock_exit();
 }
 
 void
Index: sys/kern/sched_ule.c
===================================================================
--- sys/kern/sched_ule.c	(revision 202719)
+++ sys/kern/sched_ule.c	(working copy)
@@ -301,8 +301,6 @@
 static void sched_balance(void);
 static int sched_balance_pair(struct tdq *, struct tdq *);
 static inline struct tdq *sched_setcpu(struct thread *, int, int);
-static inline struct mtx *thread_block_switch(struct thread *);
-static inline void thread_unblock_switch(struct thread *, struct mtx *);
 static struct mtx *sched_switch_migrate(struct tdq *, struct thread *, int);
 static int sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS);
 static int sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, 
@@ -1106,9 +1104,11 @@
 	 * The hard case, migration, we need to block the thread first to
 	 * prevent order reversals with other cpus locks.
 	 */
+	spinlock_enter();
 	thread_lock_block(td);
 	TDQ_LOCK(tdq);
 	thread_lock_unblock(td, TDQ_LOCKPTR(tdq));
+	spinlock_exit();
 	return (tdq);
 }
 
@@ -1715,23 +1715,6 @@
 }
 
 /*
- * Block a thread for switching.  Similar to thread_block() but does not
- * bump the spin count.
- */
-static inline struct mtx *
-thread_block_switch(struct thread *td)
-{
-	struct mtx *lock;
-
-	THREAD_LOCK_ASSERT(td, MA_OWNED);
-	lock = td->td_lock;
-	td->td_lock = &blocked_lock;
-	mtx_unlock_spin(lock);
-
-	return (lock);
-}
-
-/*
  * Handle migration from sched_switch().  This happens only for
  * cpu binding.
  */
@@ -1749,7 +1732,7 @@
 	 * not holding either run-queue lock.
 	 */
 	spinlock_enter();
-	thread_block_switch(td);	/* This releases the lock on tdq. */
+	thread_lock_block(td);	/* This releases the lock on tdq. */
 
 	/*
 	 * Acquire both run-queue locks before placing the thread on the new
@@ -1769,16 +1752,6 @@
 }
 
 /*
- * Release a thread that was blocked with thread_block_switch().
- */
-static inline void
-thread_unblock_switch(struct thread *td, struct mtx *mtx)
-{
-	atomic_store_rel_ptr((volatile uintptr_t *)&td->td_lock,
-	    (uintptr_t)mtx);
-}
-
-/*
  * Switch threads.  This function has to handle threads coming in while
  * blocked for some reason, running, or idle.  It also must deal with
  * migrating a thread from one queue to another as running threads may
@@ -1825,7 +1798,7 @@
 	} else {
 		/* This thread must be going to sleep. */
 		TDQ_LOCK(tdq);
-		mtx = thread_block_switch(td);
+		mtx = thread_lock_block(td);
 		tdq_load_rem(tdq, td);
 	}
 	/*
@@ -1871,7 +1844,7 @@
 			PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
 #endif
 	} else
-		thread_unblock_switch(td, mtx);
+		thread_lock_unblock(td, mtx);
 	/*
 	 * Assert that all went well and return.
 	 */
Index: sys/kern/sched_4bsd.c
===================================================================
--- sys/kern/sched_4bsd.c	(revision 202719)
+++ sys/kern/sched_4bsd.c	(working copy)
@@ -920,9 +920,11 @@
 void
 sched_switch(struct thread *td, struct thread *newtd, int flags)
 {
+	struct mtx *tmtx;
 	struct td_sched *ts;
 	struct proc *p;
 
+	tmtx = NULL;
 	ts = td->td_sched;
 	p = td->td_proc;
 
@@ -931,10 +933,11 @@
 	/* 
 	 * Switch to the sched lock to fix things up and pick
 	 * a new thread.
+	 * Block the td_lock in order to avoid breaking the critical path.
 	 */
 	if (td->td_lock != &sched_lock) {
 		mtx_lock_spin(&sched_lock);
-		thread_unlock(td);
+		tmtx = thread_lock_block(td);
 	}
 
 	if ((td->td_flags & TDF_NOLOAD) == 0)
@@ -1004,7 +1007,7 @@
 			(*dtrace_vtime_switch_func)(newtd);
 #endif
 
-		cpu_switch(td, newtd, td->td_lock);
+		cpu_switch(td, newtd, tmtx != NULL ? tmtx : td->td_lock);
 		lock_profile_obtain_lock_success(&sched_lock.lock_object,
 		    0, 0, __FILE__, __LINE__);
 		/*