Index: lib/libthr/thread/thr_mutex.c
===================================================================
--- lib/libthr/thread/thr_mutex.c	(revision 264497)
+++ lib/libthr/thread/thr_mutex.c	(working copy)
@@ -248,9 +248,9 @@ _mutex_fork(struct pthread *curthread)
 	 */
 
 	TAILQ_FOREACH(m, &curthread->mutexq, m_qe)
-		m->m_lock.m_owner = TID(curthread);
+		m->m_lock.m_owner = (uintptr_t)curthread;
 	TAILQ_FOREACH(m, &curthread->pp_mutexq, m_qe)
-		m->m_lock.m_owner = TID(curthread) | UMUTEX_CONTESTED;
+		m->m_lock.m_owner = (uintptr_t)curthread | UMUTEX_CONTESTED;
 }
 
 int
@@ -316,13 +316,13 @@ mutex_trylock_common(pthread_mutex_t *mutex)
 {
 	struct pthread *curthread = _get_curthread();
 	struct pthread_mutex *m = *mutex;
-	uint32_t id;
+	uintptr_t newowner;
 	int ret;
 
-	id = TID(curthread);
+	newowner = (uintptr_t)curthread;
 	if (m->m_flags & PMUTEX_FLAG_PRIVATE)
 		THR_CRITICAL_ENTER(curthread);
-	ret = _thr_umutex_trylock(&m->m_lock, id);
+	ret = _thr_umutex_trylock(&m->m_lock, newowner);
 	if (__predict_true(ret == 0)) {
 		ENQUEUE_MUTEX(curthread, m);
 	} else if (m->m_owner == curthread) {
@@ -347,14 +347,16 @@ static int
 mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
 	const struct timespec *abstime)
 {
-	uint32_t	id, owner;
+	struct pthread *thread_owner;
+	enum state_thread *thread_owner_state;
+	uintptr_t	newowner, x;
 	int	count;
 	int	ret;
 
 	if (m->m_owner == curthread)
 		return mutex_self_lock(m, abstime);
 
-	id = TID(curthread);
+	newowner = (uintptr_t)curthread;
 	/*
 	 * For adaptive mutexes, spin for a bit in the expectation
 	 * that if the application requests this mutex type then
@@ -369,25 +371,50 @@ mutex_lock_sleep(struct pthread *curthread, struct
 	if (!_thr_is_smp)
 		goto yield_loop;
 
-	count = m->m_spinloops;
-	while (count--) {
-		owner = m->m_lock.m_owner;
-		if ((owner & ~UMUTEX_CONTESTED) == 0) {
-			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, id|owner)) {
-				ret = 0;
-				goto done;
+	if (PMUTEX_TYPE(m->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
+		x = m->m_lock.m_owner;
+		thread_owner = (struct pthread *)(x & ~UMUTEX_CONTESTED);
+		if (thread_owner != NULL)
+			thread_owner_state = thread_owner->sh_chan_state;
+		if (thread_owner == NULL || thread_owner_state == NULL) {
+			count = m->m_spinloops;
+			while (count--) {
+				x = m->m_lock.m_owner;
+				if ((x & ~UMUTEX_CONTESTED) == 0) {
+					if (atomic_cmpset_acq_ptr(
+					    &m->m_lock.m_owner, x,
+					    newowner | x)) {
+						ret = 0;
+						goto done;
+					}
+				}
+				CPU_SPINWAIT;
 			}
+		} else {
+			while (thread_owner ==
+			    (struct pthread *)(m->m_lock.m_owner &
+			    ~UMUTEX_CONTESTED) &&
+			    *thread_owner_state == TDS_RUNNING)
+				CPU_SPINWAIT;
+			x = m->m_lock.m_owner;
+			if ((x & ~UMUTEX_CONTESTED) == 0) {
+				if (atomic_cmpset_acq_ptr(&m->m_lock.m_owner, x,
+				    newowner | x)) {
+					ret = 0;
+					goto done;
+				}
+			}
 		}
-		CPU_SPINWAIT;
 	}
 
 yield_loop:
 	count = m->m_yieldloops;
 	while (count--) {
 		_sched_yield();
-		owner = m->m_lock.m_owner;
-		if ((owner & ~UMUTEX_CONTESTED) == 0) {
-			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, id|owner)) {
+		x = m->m_lock.m_owner;
+		if ((x & ~UMUTEX_CONTESTED) == 0) {
+			if (atomic_cmpset_acq_ptr(&m->m_lock.m_owner, x,
+			    newowner | x)) {
 				ret = 0;
 				goto done;
 			}
@@ -396,13 +423,13 @@ yield_loop:
 
 sleep_in_kernel:
 	if (abstime == NULL) {
-		ret = __thr_umutex_lock(&m->m_lock, id);
+		ret = __thr_umutex_lock(&m->m_lock, newowner);
 	} else if (__predict_false(
 		   abstime->tv_nsec < 0 ||
 		   abstime->tv_nsec >= 1000000000)) {
 		ret = EINVAL;
 	} else {
-		ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
+		ret = __thr_umutex_timedlock(&m->m_lock, newowner, abstime);
 	}
 done:
 	if (ret == 0)
@@ -420,7 +447,7 @@ mutex_lock_common(struct pthread_mutex *m,
 
 	if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
 		THR_CRITICAL_ENTER(curthread);
-	if (_thr_umutex_trylock2(&m->m_lock, TID(curthread)) == 0) {
+	if (_thr_umutex_trylock2(&m->m_lock, (uintptr_t)curthread) == 0) {
 		ENQUEUE_MUTEX(curthread, m);
 		ret = 0;
 	} else {
@@ -632,7 +659,7 @@ static int
 mutex_unlock_common(struct pthread_mutex *m, int cv, int *mtx_defer)
 {
 	struct pthread *curthread = _get_curthread();
-	uint32_t id;
+	uintptr_t curowner;
 	int defered;
 
 	if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
@@ -647,7 +674,7 @@ mutex_unlock_common(struct pthread_mutex *m, int c
 	if (__predict_false(m->m_owner != curthread))
 		return (EPERM);
 
-	id = TID(curthread);
+	curowner = (uintptr_t)curthread;
 	if (__predict_false(
 		PMUTEX_TYPE(m->m_flags) == PTHREAD_MUTEX_RECURSIVE &&
 		m->m_count > 0)) {
@@ -660,7 +687,7 @@ mutex_unlock_common(struct pthread_mutex *m, int c
 			defered = 0;
 
 		DEQUEUE_MUTEX(curthread, m);
-		_thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
+		_thr_umutex_unlock2(&m->m_lock, curowner, mtx_defer);
 
 		if (mtx_defer == NULL && defered)  {
 			_thr_wake_all(curthread->defer_waiters,
Index: lib/libthr/thread/thr_create.c
===================================================================
--- lib/libthr/thread/thr_create.c	(revision 264497)
+++ lib/libthr/thread/thr_create.c	(working copy)
@@ -55,7 +55,7 @@ _pthread_create(pthread_t * thread, const pthread_
 	struct thr_param param;
 	struct sched_param sched_param;
 	struct rtprio rtp;
-	int ret = 0, locked, create_suspended;
+	int ret = 0, locked, create_suspended, wasthreaded;
 	sigset_t set, oset;
 	cpuset_t *cpusetp = NULL;
 	int cpusetsize = 0;
@@ -66,10 +66,22 @@ _pthread_create(pthread_t * thread, const pthread_
 	/*
 	 * Tell libc and others now they need lock to protect their data.
 	 */
-	if (_thr_isthreaded() == 0 && _thr_setthreaded(1))
+	wasthreaded = _thr_isthreaded();
+	if (wasthreaded == 0 && _thr_setthreaded(1))
 		return (EAGAIN);
 
 	curthread = _get_curthread();
+	if (_thread_active_threads == 1) {
+		if (wasthreaded != 0) {
+			/*
+			 * If curthread is respawning again after having been
+			 * multithreaded once the sh_chan_state pointer could
+			 * sill reference the old shared channel.  Clean it up.
+			 */
+			curthread->sh_chan_state = NULL;
+		} else if (curthread->sh_chan_state != NULL)
+			PANIC("Thread has already a shared channel state");
+	}
 	if ((new_thread = _thr_alloc(curthread)) == NULL)
 		return (EAGAIN);
 
@@ -167,7 +179,17 @@ _pthread_create(pthread_t * thread, const pthread_
 			&sched_param, &rtp);
 		param.rtp = &rtp;
 	}
+	param.child_chan = &new_thread->sh_chan_state;
 
+	/*
+	 * The kernel may decide, to destroy and recreate the shared
+	 * channel for curthread too, in case one all the other threads
+	 * are exiting.
+	 * If this is the case the curthread shared channel gets just
+	 * replaced, otherwise it is just left intact.
+	 */
+	param.parent_chan = &curthread->sh_chan_state;
+
 	/* Schedule the new thread. */
 	if (create_suspended) {
 		SIGFILLSET(set);
Index: lib/libthr/thread/thr_exit.c
===================================================================
--- lib/libthr/thread/thr_exit.c	(revision 264497)
+++ lib/libthr/thread/thr_exit.c	(working copy)
@@ -269,8 +269,11 @@ exit_thread(void)
 		_thread_cleanupspecific();
 	}
 
-	if (!_thr_isthreaded())
+	if (!_thr_isthreaded()) {
+		if (curthread->sh_chan_state != NULL)
+			PANIC("Unexpected shared channel state still up");
 		exit(0);
+	}
 
 	if (atomic_fetchadd_int(&_thread_active_threads, -1) == 1) {
 		exit(0);
Index: lib/libthr/thread/thr_cond.c
===================================================================
--- lib/libthr/thread/thr_cond.c	(revision 264497)
+++ lib/libthr/thread/thr_cond.c	(working copy)
@@ -242,7 +242,7 @@ cond_wait_user(struct pthread_cond *cvp, struct pt
 			defered = 0;
 			if ((mp->m_lock.m_owner & UMUTEX_CONTESTED) == 0)
 				(void)_umtx_op_err(&mp->m_lock, UMTX_OP_MUTEX_WAKE2,
-					 mp->m_lock.m_flags, 0, 0);
+					 mp->m_lock.m_flags, 0, 0, 0);
 		}
 		if (curthread->nwaiter_defer > 0) {
 			_thr_wake_all(curthread->defer_waiters,
Index: lib/libthr/thread/thr_kern.c
===================================================================
--- lib/libthr/thread/thr_kern.c	(revision 264497)
+++ lib/libthr/thread/thr_kern.c	(working copy)
@@ -208,5 +208,5 @@ _thr_wake_all(unsigned int *waddrs[], int count)
 
 	for (i = 0; i < count; ++i)
 		*waddrs[i] = 1;
-	_umtx_op(waddrs, UMTX_OP_NWAKE_PRIVATE, count, NULL, NULL);
+	_umtx_op(waddrs, UMTX_OP_NWAKE_PRIVATE, count, NULL, NULL, 0);
 }
Index: lib/libthr/thread/thr_private.h
===================================================================
--- lib/libthr/thread/thr_private.h	(revision 264497)
+++ lib/libthr/thread/thr_private.h	(working copy)
@@ -41,6 +41,7 @@
 #include <sys/queue.h>
 #include <sys/param.h>
 #include <sys/cpuset.h>
+#include <sys/proc.h>
 #include <machine/atomic.h>
 #include <errno.h>
 #include <limits.h>
@@ -518,6 +519,9 @@ struct pthread {
 	/* Referenced mutex. */
 	struct pthread_mutex	*mutex_obj;
 
+	/* Thread state shared channel. */
+	enum state_thread	*sh_chan_state;
+
 	/* Thread will sleep. */
 	int			will_sleep;
 
@@ -554,27 +558,27 @@ struct pthread {
 	} while (0)
 
 #define THR_UMUTEX_TRYLOCK(thrd, lck)			\
-	_thr_umutex_trylock((lck), TID(thrd))
+	_thr_umutex_trylock((lck), (uintptr_t)(thrd))
 
 #define	THR_UMUTEX_LOCK(thrd, lck)			\
-	_thr_umutex_lock((lck), TID(thrd))
+	_thr_umutex_lock((lck), (uintptr_t)(thrd))
 
 #define	THR_UMUTEX_TIMEDLOCK(thrd, lck, timo)		\
-	_thr_umutex_timedlock((lck), TID(thrd), (timo))
+	_thr_umutex_timedlock((lck), (uintptr_t)(thrd), (timo))
 
 #define	THR_UMUTEX_UNLOCK(thrd, lck)			\
-	_thr_umutex_unlock((lck), TID(thrd))
+	_thr_umutex_unlock((lck), (uintptr_t)(thrd))
 
 #define	THR_LOCK_ACQUIRE(thrd, lck)			\
 do {							\
 	(thrd)->locklevel++;				\
-	_thr_umutex_lock(lck, TID(thrd));		\
+	_thr_umutex_lock(lck, (uintptr_t)(thrd));	\
 } while (0)
 
 #define	THR_LOCK_ACQUIRE_SPIN(thrd, lck)		\
 do {							\
 	(thrd)->locklevel++;				\
-	_thr_umutex_lock_spin(lck, TID(thrd));		\
+	_thr_umutex_lock_spin(lck, (uintptr_t)(thrd));	\
 } while (0)
 
 #ifdef	_PTHREADS_INVARIANTS
@@ -590,7 +594,7 @@ do {							\
 #define	THR_LOCK_RELEASE(thrd, lck)			\
 do {							\
 	THR_ASSERT_LOCKLEVEL(thrd);			\
-	_thr_umutex_unlock((lck), TID(thrd));		\
+	_thr_umutex_unlock((lck), (uintptr_t)(thrd));	\
 	(thrd)->locklevel--;				\
 	_thr_ast(thrd);					\
 } while (0)
Index: lib/libthr/thread/thr_umtx.c
===================================================================
--- lib/libthr/thread/thr_umtx.c	(revision 264497)
+++ lib/libthr/thread/thr_umtx.c	(working copy)
@@ -31,9 +31,10 @@
 #include "thr_umtx.h"
 
 #ifndef HAS__UMTX_OP_ERR
-int _umtx_op_err(void *obj, int op, u_long val, void *uaddr, void *uaddr2)
+int _umtx_op_err(void *obj, int op, u_long val, void *uaddr, void *uaddr2,
+    uintptr_t owner)
 {
-	if (_umtx_op(obj, op, val, uaddr, uaddr2) == -1)
+	if (_umtx_op(obj, op, val, uaddr, uaddr2, owner) == -1)
 		return (errno);
 	return (0);
 }
@@ -55,44 +56,45 @@ _thr_urwlock_init(struct urwlock *rwl)
 }
 
 int
-__thr_umutex_lock(struct umutex *mtx, uint32_t id)
+__thr_umutex_lock(struct umutex *mtx, uintptr_t newowner)
 {
-	uint32_t owner;
+	uintptr_t x;
 
 	if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) {
 		for (;;) {
 			/* wait in kernel */
-			_umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0, 0, 0);
-
-			owner = mtx->m_owner;
-			if ((owner & ~UMUTEX_CONTESTED) == 0 &&
-			     atomic_cmpset_acq_32(&mtx->m_owner, owner, id|owner))
+			_umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0, 0, 0,
+			    newowner);
+			x = mtx->m_owner;
+			if ((x & ~UMUTEX_CONTESTED) == 0 &&
+			    atomic_cmpset_acq_ptr(&mtx->m_owner, x,
+			    newowner | x))
 				return (0);
 		}
 	}
 
-	return	_umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, 0);
+	return	_umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, 0, newowner);
 }
 
 #define SPINLOOPS 1000
 
 int
-__thr_umutex_lock_spin(struct umutex *mtx, uint32_t id)
+__thr_umutex_lock_spin(struct umutex *mtx, uintptr_t newowner)
 {
-	uint32_t owner;
+	uintptr_t x;
 
 	if (!_thr_is_smp)
-		return __thr_umutex_lock(mtx, id);
+		return __thr_umutex_lock(mtx, newowner);
 
 	if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) {
 		for (;;) {
 			int count = SPINLOOPS;
 			while (count--) {
-				owner = mtx->m_owner;
-				if ((owner & ~UMUTEX_CONTESTED) == 0) {
-					if (atomic_cmpset_acq_32(
+				x = mtx->m_owner;
+				if ((x & ~UMUTEX_CONTESTED) == 0) {
+					if (atomic_cmpset_acq_ptr(
 					    &mtx->m_owner,
-					    owner, id|owner)) {
+					    x, newowner | x)) {
 						return (0);
 					}
 				}
@@ -100,20 +102,21 @@ int
 			}
 
 			/* wait in kernel */
-			_umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0, 0, 0);
+			_umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0, 0, 0,
+			    newowner);
 		}
 	}
 
-	return	_umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, 0);
+	return	_umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, 0, newowner);
 }
 
 int
-__thr_umutex_timedlock(struct umutex *mtx, uint32_t id,
+__thr_umutex_timedlock(struct umutex *mtx, uintptr_t newowner,
 	const struct timespec *abstime)
 {
 	struct _umtx_time *tm_p, timeout;
 	size_t tm_size;
-	uint32_t owner;
+	uintptr_t x;
 	int ret;
 
 	if (abstime == NULL) {
@@ -132,16 +135,19 @@ int
 
 			/* wait in kernel */
 			ret = _umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0,
-				 (void *)tm_size, __DECONST(void *, tm_p));
+			    (void *)tm_size, __DECONST(void *, tm_p),
+			    newowner);
 
 			/* now try to lock it */
-			owner = mtx->m_owner;
-			if ((owner & ~UMUTEX_CONTESTED) == 0 &&
-			     atomic_cmpset_acq_32(&mtx->m_owner, owner, id|owner))
+			x = mtx->m_owner;
+			if ((x & ~UMUTEX_CONTESTED) == 0 &&
+			    atomic_cmpset_acq_ptr(&mtx->m_owner, x,
+			    newowner | x))
 				return (0);
 		} else {
 			ret = _umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 
-				 (void *)tm_size, __DECONST(void *, tm_p));
+			    (void *)tm_size, __DECONST(void *, tm_p),
+			    newowner);
 			if (ret == 0)
 				break;
 		}
@@ -152,22 +158,23 @@ int
 }
 
 int
-__thr_umutex_unlock(struct umutex *mtx, uint32_t id)
+__thr_umutex_unlock(struct umutex *mtx, uintptr_t curowner)
 {
-	return _umtx_op_err(mtx, UMTX_OP_MUTEX_UNLOCK, 0, 0, 0);
+	return _umtx_op_err(mtx, UMTX_OP_MUTEX_UNLOCK, 0, 0, 0, curowner);
 }
 
 int
 __thr_umutex_trylock(struct umutex *mtx)
 {
-	return _umtx_op_err(mtx, UMTX_OP_MUTEX_TRYLOCK, 0, 0, 0);
+	return _umtx_op_err(mtx, UMTX_OP_MUTEX_TRYLOCK, 0, 0, 0, 0);
 }
 
 int
 __thr_umutex_set_ceiling(struct umutex *mtx, uint32_t ceiling,
 	uint32_t *oldceiling)
 {
-	return _umtx_op_err(mtx, UMTX_OP_SET_CEILING, ceiling, oldceiling, 0);
+	return _umtx_op_err(mtx, UMTX_OP_SET_CEILING, ceiling, oldceiling, 0,
+	    0);
 }
 
 int
@@ -177,7 +184,7 @@ _thr_umtx_wait(volatile long *mtx, long id, const
 		timeout->tv_nsec <= 0)))
 		return (ETIMEDOUT);
 	return _umtx_op_err(__DEVOLATILE(void *, mtx), UMTX_OP_WAIT, id, 0,
-		__DECONST(void*, timeout));
+		__DECONST(void*, timeout), 0);
 }
 
 int
@@ -188,7 +195,7 @@ _thr_umtx_wait_uint(volatile u_int *mtx, u_int id,
 		return (ETIMEDOUT);
 	return _umtx_op_err(__DEVOLATILE(void *, mtx), 
 			shared ? UMTX_OP_WAIT_UINT : UMTX_OP_WAIT_UINT_PRIVATE, id, 0,
-			__DECONST(void*, timeout));
+			__DECONST(void*, timeout), 0);
 }
 
 int
@@ -211,14 +218,14 @@ _thr_umtx_timedwait_uint(volatile u_int *mtx, u_in
 
 	return _umtx_op_err(__DEVOLATILE(void *, mtx), 
 		shared ? UMTX_OP_WAIT_UINT : UMTX_OP_WAIT_UINT_PRIVATE, id, 
-		(void *)tm_size, __DECONST(void *, tm_p));
+		(void *)tm_size, __DECONST(void *, tm_p), 0);
 }
 
 int
 _thr_umtx_wake(volatile void *mtx, int nr_wakeup, int shared)
 {
 	return _umtx_op_err(__DEVOLATILE(void *, mtx), shared ? UMTX_OP_WAKE : UMTX_OP_WAKE_PRIVATE,
-		nr_wakeup, 0, 0);
+		nr_wakeup, 0, 0, 0);
 }
 
 void
@@ -231,14 +238,17 @@ int
 _thr_ucond_wait(struct ucond *cv, struct umutex *m,
 	const struct timespec *timeout, int flags)
 {
+	uintptr_t curowner;
+
+	curowner = (uintptr_t)_get_curthread();
+
 	if (timeout && (timeout->tv_sec < 0 || (timeout->tv_sec == 0 &&
 	    timeout->tv_nsec <= 0))) {
-		struct pthread *curthread = _get_curthread();
-		_thr_umutex_unlock(m, TID(curthread));
+		_thr_umutex_unlock(m, curowner);
                 return (ETIMEDOUT);
 	}
-	return _umtx_op_err(cv, UMTX_OP_CV_WAIT, flags,
-		     m, __DECONST(void*, timeout));
+	return _umtx_op_err(cv, UMTX_OP_CV_WAIT, flags, m,
+	    __DECONST(void*, timeout), curowner);
 }
  
 int
@@ -246,7 +256,7 @@ _thr_ucond_signal(struct ucond *cv)
 {
 	if (!cv->c_has_waiters)
 		return (0);
-	return _umtx_op_err(cv, UMTX_OP_CV_SIGNAL, 0, NULL, NULL);
+	return _umtx_op_err(cv, UMTX_OP_CV_SIGNAL, 0, NULL, NULL, 0);
 }
 
 int
@@ -254,7 +264,7 @@ _thr_ucond_broadcast(struct ucond *cv)
 {
 	if (!cv->c_has_waiters)
 		return (0);
-	return _umtx_op_err(cv, UMTX_OP_CV_BROADCAST, 0, NULL, NULL);
+	return _umtx_op_err(cv, UMTX_OP_CV_BROADCAST, 0, NULL, NULL, 0);
 }
 
 int
@@ -274,7 +284,8 @@ __thr_rwlock_rdlock(struct urwlock *rwlock, int fl
 		tm_p = &timeout;
 		tm_size = sizeof(timeout);
 	}
-	return _umtx_op_err(rwlock, UMTX_OP_RW_RDLOCK, flags, (void *)tm_size, tm_p);
+	return _umtx_op_err(rwlock, UMTX_OP_RW_RDLOCK, flags, (void *)tm_size,
+	    tm_p, 0);
 }
 
 int
@@ -293,13 +304,14 @@ __thr_rwlock_wrlock(struct urwlock *rwlock, const
 		tm_p = &timeout;
 		tm_size = sizeof(timeout);
 	}
-	return _umtx_op_err(rwlock, UMTX_OP_RW_WRLOCK, 0, (void *)tm_size, tm_p);
+	return _umtx_op_err(rwlock, UMTX_OP_RW_WRLOCK, 0, (void *)tm_size,
+	    tm_p, 0);
 }
 
 int
 __thr_rwlock_unlock(struct urwlock *rwlock)
 {
-	return _umtx_op_err(rwlock, UMTX_OP_RW_UNLOCK, 0, NULL, NULL);
+	return _umtx_op_err(rwlock, UMTX_OP_RW_UNLOCK, 0, NULL, NULL, 0);
 }
 
 void
Index: lib/libthr/thread/thr_umtx.h
===================================================================
--- lib/libthr/thread/thr_umtx.h	(revision 264497)
+++ lib/libthr/thread/thr_umtx.h	(working copy)
@@ -35,12 +35,12 @@
 #define DEFAULT_UMUTEX	{0,0,{0,0},{0,0,0,0}}
 #define DEFAULT_URWLOCK {0,0,0,0,{0,0,0,0}}
 
-int _umtx_op_err(void *, int op, u_long, void *, void *) __hidden;
-int __thr_umutex_lock(struct umutex *mtx, uint32_t id) __hidden;
-int __thr_umutex_lock_spin(struct umutex *mtx, uint32_t id) __hidden;
-int __thr_umutex_timedlock(struct umutex *mtx, uint32_t id,
+int _umtx_op_err(void *, int op, u_long, void *, void *, uintptr_t) __hidden;
+int __thr_umutex_lock(struct umutex *mtx, uintptr_t newowner) __hidden;
+int __thr_umutex_lock_spin(struct umutex *mtx, uintptr_t newowner) __hidden;
+int __thr_umutex_timedlock(struct umutex *mtx, uintptr_t newowner,
 	const struct timespec *timeout) __hidden;
-int __thr_umutex_unlock(struct umutex *mtx, uint32_t id) __hidden;
+int __thr_umutex_unlock(struct umutex *mtx, uintptr_t curowner) __hidden;
 int __thr_umutex_trylock(struct umutex *mtx) __hidden;
 int __thr_umutex_set_ceiling(struct umutex *mtx, uint32_t ceiling,
 	uint32_t *oldceiling) __hidden;
@@ -73,9 +73,9 @@ void _thr_rwl_wrlock(struct urwlock *rwlock) __hid
 void _thr_rwl_unlock(struct urwlock *rwlock) __hidden;
 
 static inline int
-_thr_umutex_trylock(struct umutex *mtx, uint32_t id)
+_thr_umutex_trylock(struct umutex *mtx, uintptr_t newowner)
 {
-    if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id))
+    if (atomic_cmpset_acq_ptr(&mtx->m_owner, UMUTEX_UNOWNED, newowner))
 	return (0);
     if ((mtx->m_flags & UMUTEX_PRIO_PROTECT) == 0)
     	return (EBUSY);
@@ -83,72 +83,74 @@ static inline int
 }
 
 static inline int
-_thr_umutex_trylock2(struct umutex *mtx, uint32_t id)
+_thr_umutex_trylock2(struct umutex *mtx, uintptr_t newowner)
 {
-    if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id) != 0)
+    if (atomic_cmpset_acq_ptr(&mtx->m_owner, UMUTEX_UNOWNED, newowner) != 0)
 	return (0);
-    if ((uint32_t)mtx->m_owner == UMUTEX_CONTESTED &&
+    if (mtx->m_owner == UMUTEX_CONTESTED &&
         __predict_true((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0))
-    	if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_CONTESTED, id | UMUTEX_CONTESTED))
+    	if (atomic_cmpset_acq_ptr(&mtx->m_owner, UMUTEX_CONTESTED,
+	    newowner | UMUTEX_CONTESTED))
 		return (0);
     return (EBUSY);
 }
 
 static inline int
-_thr_umutex_lock(struct umutex *mtx, uint32_t id)
+_thr_umutex_lock(struct umutex *mtx, uintptr_t newowner)
 {
-    if (_thr_umutex_trylock2(mtx, id) == 0)
+    if (_thr_umutex_trylock2(mtx, newowner) == 0)
 	return (0);
-    return (__thr_umutex_lock(mtx, id));
+    return (__thr_umutex_lock(mtx, newowner));
 }
 
 static inline int
-_thr_umutex_lock_spin(struct umutex *mtx, uint32_t id)
+_thr_umutex_lock_spin(struct umutex *mtx, uintptr_t newowner)
 {
-    if (_thr_umutex_trylock2(mtx, id) == 0)
+    if (_thr_umutex_trylock2(mtx, newowner) == 0)
 	return (0);
-    return (__thr_umutex_lock_spin(mtx, id));
+    return (__thr_umutex_lock_spin(mtx, newowner));
 }
 
 static inline int
-_thr_umutex_timedlock(struct umutex *mtx, uint32_t id,
+_thr_umutex_timedlock(struct umutex *mtx, uintptr_t newowner,
 	const struct timespec *timeout)
 {
-    if (_thr_umutex_trylock2(mtx, id) == 0)
+    if (_thr_umutex_trylock2(mtx, newowner) == 0)
 	return (0);
-    return (__thr_umutex_timedlock(mtx, id, timeout));
+    return (__thr_umutex_timedlock(mtx, newowner, timeout));
 }
 
 static inline int
-_thr_umutex_unlock2(struct umutex *mtx, uint32_t id, int *defer)
+_thr_umutex_unlock2(struct umutex *mtx, uintptr_t curowner, int *defer)
 {
+	uintptr_t x;
 	uint32_t flags = mtx->m_flags;
 
 	if ((flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) {
-		uint32_t owner;
 		do {
-			owner = mtx->m_owner;
-			if (__predict_false((owner & ~UMUTEX_CONTESTED) != id))
+			x = mtx->m_owner;
+			if (__predict_false((x & ~UMUTEX_CONTESTED) !=
+			    curowner))
 				return (EPERM);
-		} while (__predict_false(!atomic_cmpset_rel_32(&mtx->m_owner,
-					 owner, UMUTEX_UNOWNED)));
-		if ((owner & UMUTEX_CONTESTED)) {
+		} while (__predict_false(!atomic_cmpset_rel_ptr(&mtx->m_owner,
+		    x, UMUTEX_UNOWNED)));
+		if ((x & UMUTEX_CONTESTED)) {
 			if (defer == NULL)
-				(void)_umtx_op_err(mtx, UMTX_OP_MUTEX_WAKE2, flags, 0, 0);
+				(void)_umtx_op_err(mtx, UMTX_OP_MUTEX_WAKE2, flags, 0, 0, 0);
 			else
 				*defer = 1;
 		}
 		return (0);
 	}
-    	if (atomic_cmpset_rel_32(&mtx->m_owner, id, UMUTEX_UNOWNED))
+    	if (atomic_cmpset_rel_ptr(&mtx->m_owner, curowner, UMUTEX_UNOWNED))
 		return (0);
-	return (__thr_umutex_unlock(mtx, id));
+	return (__thr_umutex_unlock(mtx, curowner));
 }
 
 static inline int
-_thr_umutex_unlock(struct umutex *mtx, uint32_t id)
+_thr_umutex_unlock(struct umutex *mtx, uintptr_t curowner)
 {
-	return _thr_umutex_unlock2(mtx, id, NULL);
+	return _thr_umutex_unlock2(mtx, curowner, NULL);
 }
 
 static inline int
Index: lib/libthr/thread/thr_rtld.c
===================================================================
--- lib/libthr/thread/thr_rtld.c	(revision 264497)
+++ lib/libthr/thread/thr_rtld.c	(working copy)
@@ -188,7 +188,7 @@ _thr_rtld_init(void)
 	curthread = _get_curthread();
 
 	/* force to resolve _umtx_op PLT */
-	_umtx_op_err((struct umtx *)&dummy, UMTX_OP_WAKE, 1, 0, 0);
+	_umtx_op_err((struct umtx *)&dummy, UMTX_OP_WAKE, 1, 0, 0, 0);
 	
 	/* force to resolve errno() PLT */
 	__error();
Index: lib/libc/gen/sem_new.c
===================================================================
--- lib/libc/gen/sem_new.c	(revision 264497)
+++ lib/libc/gen/sem_new.c	(working copy)
@@ -332,7 +332,7 @@ _sem_getvalue(sem_t * __restrict sem, int * __rest
 static __inline int
 usem_wake(struct _usem *sem)
 {
-	return _umtx_op(sem, UMTX_OP_SEM_WAKE, 0, NULL, NULL);
+	return _umtx_op(sem, UMTX_OP_SEM_WAKE, 0, NULL, NULL, 0);
 }
 
 static __inline int
@@ -352,7 +352,7 @@ usem_wait(struct _usem *sem, const struct timespec
 		tm_size = sizeof(timeout);
 	}
 	return _umtx_op(sem, UMTX_OP_SEM_WAIT, 0, 
-		    (void *)tm_size, __DECONST(void*, tm_p));
+		    (void *)tm_size, __DECONST(void*, tm_p), 0);
 }
 
 int
Index: lib/libc/gen/sem.c
===================================================================
--- lib/libc/gen/sem.c	(revision 264497)
+++ lib/libc/gen/sem.c	(working copy)
@@ -329,14 +329,14 @@ _umtx_wait_uint(volatile unsigned *mtx, unsigned i
 	}
 	return _umtx_op(__DEVOLATILE(void *, mtx),
 		UMTX_OP_WAIT_UINT_PRIVATE, id, 
-		(void *)tm_size, __DECONST(void*, tm_p));
+		(void *)tm_size, __DECONST(void*, tm_p), 0);
 }
 
 static int
 _umtx_wake(volatile void *mtx)
 {
 	return _umtx_op(__DEVOLATILE(void *, mtx), UMTX_OP_WAKE_PRIVATE,
-			1, NULL, NULL);
+			1, NULL, NULL, 0);
 }
 
 #define TIMESPEC_SUB(dst, src, val)                             \
Index: sys/vm/vm_glue.c
===================================================================
--- sys/vm/vm_glue.c	(revision 264497)
+++ sys/vm/vm_glue.c	(working copy)
@@ -777,6 +777,82 @@ kick_proc0(void)
 	wakeup(&proc0);
 }
 
+int
+vm_create_shchan(vm_map_t map, rlim_t lmemlim, vm_offset_t start_uva,
+    vm_offset_t *kva, vm_offset_t *uva)
+{
+#ifdef VM_SHARED_CHANS
+	vm_offset_t local_kva, local_uva;
+	vm_page_t m;
+
+	if (ptoa(pmap_wired_count(vm_map_pmap(map)) + 1) > lmemlim)
+		return (1);
+	local_kva = kva_alloc(PAGE_SIZE);
+	if (local_kva == 0)
+		return (1);
+	local_uva = start_uva;
+	if (vm_map_find(map, NULL, 0, &local_uva, PAGE_SIZE, 0,
+	    VMFS_ANY_SPACE, VM_PROT_READ, VM_PROT_READ, 0) != KERN_SUCCESS) {
+		kva_free(local_kva, PAGE_SIZE);
+		return (1);
+	}
+
+	do {
+		m = vm_page_alloc(NULL, 0, VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
+		if (m == NULL)
+			VM_WAIT;
+	} while (m == NULL);
+
+	pmap_qenter(local_kva, &m, 1);
+	pmap_enter(vm_map_pmap(map), local_uva, VM_PROT_READ, m, VM_PROT_READ,
+	    TRUE);
+
+	*kva = local_kva;
+	*uva = local_uva;
+	return (0);
+#else
+	return (1);
+#endif
+}
+
+void
+vm_destroy_shchan_nofreeuva(vm_map_t map, vm_offset_t kva, vm_offset_t uva)
+{
+#ifdef VM_SHARED_CHANS
+	vm_page_t m;
+
+	m = PHYS_TO_VM_PAGE(vtophys(kva));
+
+	pmap_remove(vm_map_pmap(map), uva, uva + PAGE_SIZE);
+	pmap_qremove(kva, 1);
+
+	vm_page_lock(m);
+	vm_page_unwire(m, 0);
+	vm_page_free(m);
+	vm_page_unlock(m);
+
+	kva_free(kva, PAGE_SIZE);
+#endif
+}
+
+void
+vm_destroy_shchan_uva(vm_map_t map, vm_offset_t uva)
+{
+
+#ifdef VM_SHARED_CHANS
+	if (vm_map_remove(map, uva, uva + PAGE_SIZE) != KERN_SUCCESS)
+		panic("vm_destroy_shchan: invalid return value");
+#endif
+}
+
+void
+vm_destroy_shchan(vm_map_t map, vm_offset_t kva, vm_offset_t uva)
+{
+
+	vm_destroy_shchan_nofreeuva(map, kva, uva);
+	vm_destroy_shchan_uva(map, uva);
+}
+
 #ifndef NO_SWAPPING
 
 /*
Index: sys/vm/vm_extern.h
===================================================================
--- sys/vm/vm_extern.h	(revision 264497)
+++ sys/vm/vm_extern.h	(working copy)
@@ -72,6 +72,11 @@ void kmeminit(void);
 void swapout_procs(int);
 int kernacc(void *, int, int);
 int useracc(void *, int, int);
+int vm_create_shchan(vm_map_t, rlim_t, vm_offset_t, vm_offset_t *,
+    vm_offset_t *);
+void vm_destroy_shchan(vm_map_t, vm_offset_t, vm_offset_t);
+void vm_destroy_shchan_nofreeuva(vm_map_t, vm_offset_t, vm_offset_t);
+void vm_destroy_shchan_uva(vm_map_t, vm_offset_t);
 int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int);
 void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t,
     vm_ooffset_t *);
Index: sys/sys/_umtx.h
===================================================================
--- sys/sys/_umtx.h	(revision 264497)
+++ sys/sys/_umtx.h	(working copy)
@@ -34,7 +34,7 @@
 #include <sys/_timespec.h>
 
 struct umutex {
-	volatile __lwpid_t	m_owner;	/* Owner of the mutex */
+	volatile __uintptr_t	m_owner;	/* Owner of the mutex */
 	__uint32_t		m_flags;	/* Flags of the mutex */
 	__uint32_t		m_ceilings[2];	/* Priority protect ceiling */
 	__uint32_t		m_spare[4];
Index: sys/sys/thr.h
===================================================================
--- sys/sys/thr.h	(revision 264497)
+++ sys/sys/thr.h	(working copy)
@@ -55,6 +55,8 @@ struct thr_param {
     long	*parent_tid;		/* parent accesses the new TID here. */
     int		flags;			/* thread flags. */
     struct rtprio	*rtp;		/* Real-time scheduling priority */
+    enum state_thread	**child_chan;	/* Shared chan access. */
+    enum state_thread	**parent_chan;	/* Shared chan access for parent. */
     void	*spare[3];		/* TODO: cpu affinity mask etc. */
 };
 
Index: sys/sys/umtx.h
===================================================================
--- sys/sys/umtx.h	(revision 264497)
+++ sys/sys/umtx.h	(working copy)
@@ -35,7 +35,7 @@
 #define USYNC_PROCESS_SHARED	0x0001	/* Process shared sync objs */
 
 #define	UMUTEX_UNOWNED		0x0
-#define	UMUTEX_CONTESTED	0x80000000U
+#define	UMUTEX_CONTESTED	0x1
 
 #define	UMUTEX_PRIO_INHERIT	0x0004	/* Priority inherited mutex */
 #define	UMUTEX_PRIO_PROTECT	0x0008	/* Priority protect mutex */
@@ -89,7 +89,8 @@
 
 #ifndef _KERNEL
 
-int _umtx_op(void *obj, int op, u_long val, void *uaddr, void *uaddr2);
+int	_umtx_op(void *obj, int op, u_long val, void *uaddr, void *uaddr2,
+	    uintptr_t owner);
 
 #else
 
Index: sys/sys/proc.h
===================================================================
--- sys/sys/proc.h	(revision 264497)
+++ sys/sys/proc.h	(working copy)
@@ -176,6 +176,20 @@ struct trapframe;
 struct turnstile;
 
 /*
+ * Allowed threads states.
+ * TDS_INVALID should not be used directly.  It is used as a marker for
+ * "invalid state" purposes.
+ */
+enum state_thread {
+	TDS_INACTIVE = 0x0,
+	TDS_INHIBITED,
+	TDS_CAN_RUN,
+	TDS_RUNQ,
+	TDS_RUNNING,
+	TDS_INVALID
+};
+
+/*
  * XXX: Does this belong in resource.h or resourcevar.h instead?
  * Resource usage extension.  The times in rusage structs in the kernel are
  * never up to date.  The actual times are kept as runtimes and tick counts
@@ -197,6 +211,18 @@ struct rusage_ext {
 };
 
 /*
+ * Shared channels buckets.
+ * Implemented as a contiguous collection of KVA/UVA couplets from which
+ * shared channels are extracted and allocated.
+ */
+struct shchan {
+	SLIST_ENTRY(shchan)	sh_iter;
+	enum state_thread	*sh_kern;
+	enum state_thread	*sh_user;
+	u_int			sh_free_slots;
+};
+
+/*
  * Kernel runnable context (thread).
  * This is what is put to sleep and reactivated.
  * Thread context.  Processes may have multiple threads.
@@ -275,6 +301,7 @@ struct thread {
 	u_int		td_vp_reserv;	/* (k) Count of reserved vnodes. */
 	int		td_no_sleeping;	/* (k) Sleeping disabled count. */
 	int		td_dom_rr_idx;	/* (k) RR Numa domain selection. */
+	enum state_thread *td_sh_state;	/* (t) Shared channel thread state. */
 #define	td_endzero td_sigmask
 
 /* Copied during fork1() or create_thread(). */
@@ -293,13 +320,8 @@ struct thread {
  * or already have been set in the allocator, constructor, etc.
  */
 	struct pcb	*td_pcb;	/* (k) Kernel VA of pcb and kstack. */
-	enum {
-		TDS_INACTIVE = 0x0,
-		TDS_INHIBITED,
-		TDS_CAN_RUN,
-		TDS_RUNQ,
-		TDS_RUNNING
-	} td_state;			/* (t) thread state */
+	struct shchan	*td_sh_chan;	/* (t) Shared channel bucket. */
+	enum state_thread td_state;	/* (t) Thread state. */
 	union {
 		register_t	tdu_retval[2];
 		off_t		tdu_off;	
@@ -457,12 +479,17 @@ do {									\
 #define	TD_SET_INHIB(td, inhib) do {			\
 	(td)->td_state = TDS_INHIBITED;			\
 	(td)->td_inhibitors |= (inhib);			\
+	if ((td)->td_sh_state != NULL)			\
+		*(td)->td_sh_state = TDS_INHIBITED;	\
 } while (0)
 
-#define	TD_CLR_INHIB(td, inhib) do {			\
-	if (((td)->td_inhibitors & (inhib)) &&		\
-	    (((td)->td_inhibitors &= ~(inhib)) == 0))	\
-		(td)->td_state = TDS_CAN_RUN;		\
+#define	TD_CLR_INHIB(td, inhib) do {				\
+	if (((td)->td_inhibitors & (inhib)) &&			\
+	    (((td)->td_inhibitors &= ~(inhib)) == 0)) {		\
+		(td)->td_state = TDS_CAN_RUN;			\
+		if ((td)->td_sh_state != NULL)			\
+			*td->td_sh_state = TDS_CAN_RUN;		\
+	}							\
 } while (0)
 
 #define	TD_SET_SLEEPING(td)	TD_SET_INHIB((td), TDI_SLEEPING)
@@ -478,9 +505,21 @@ do {									\
 #define	TD_CLR_SUSPENDED(td)	TD_CLR_INHIB((td), TDI_SUSPENDED)
 #define	TD_CLR_IWAIT(td)	TD_CLR_INHIB((td), TDI_IWAIT)
 
-#define	TD_SET_RUNNING(td)	(td)->td_state = TDS_RUNNING
-#define	TD_SET_RUNQ(td)		(td)->td_state = TDS_RUNQ
-#define	TD_SET_CAN_RUN(td)	(td)->td_state = TDS_CAN_RUN
+#define	TD_SET_RUNNING(td) do {			\
+	(td)->td_state = TDS_RUNNING;		\
+	if ((td)->td_sh_state != NULL)		\
+		*td->td_sh_state = TDS_RUNNING;	\
+} while (0)
+#define	TD_SET_RUNQ(td)	do {			\
+	(td)->td_state = TDS_RUNQ;		\
+	if ((td)->td_sh_state != NULL)		\
+		*td->td_sh_state = TDS_RUNQ;	\
+} while (0)
+#define	TD_SET_CAN_RUN(td) do {			\
+	(td)->td_state = TDS_CAN_RUN;		\
+	if ((td)->td_sh_state != NULL)		\
+		*td->td_sh_state = TDS_CAN_RUN;	\
+} while (0)
 
 /*
  * Process structure.
@@ -488,6 +527,8 @@ do {									\
 struct proc {
 	LIST_ENTRY(proc) p_list;	/* (d) List of all processes. */
 	TAILQ_HEAD(, thread) p_threads;	/* (c) all threads. */
+	SLIST_HEAD(, shchan) p_shchans; /* (c) All shared channel buckets. */
+	SLIST_HEAD(, shchan) p_shcasync; /* (c) Async freed shchans buckets. */
 	struct mtx	p_slock;	/* process spin lock */
 	struct ucred	*p_ucred;	/* (c) Process owner's identity. */
 	struct filedesc	*p_fd;		/* (b) Open files. */
@@ -884,6 +925,12 @@ int	proc_getargv(struct thread *td, struct proc *p
 int	proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb);
 int	proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb);
 void	procinit(void);
+int	proc_alloc_shchan(struct proc *p, struct shchan **retch,
+	    enum state_thread **kent, enum state_thread **uent);
+void	proc_reap_shchans_uva(struct proc *p);
+void	proc_reclaim_shchans(struct proc *p);
+void	_proc_free_shchan(struct proc *p, struct shchan *chan,
+	    enum state_thread *kent, boolean_t sync);
 void	proc_linkup0(struct proc *p, struct thread *td);
 void	proc_linkup(struct proc *p, struct thread *td);
 void	proc_reap(struct thread *td, struct proc *p, int *status, int options);
@@ -971,6 +1018,22 @@ curthread_pflags_restore(int save)
 	curthread->td_pflags &= save;
 }
 
+static __inline void
+proc_free_shchan(struct proc *p, struct shchan *chan,
+    enum state_thread *kent)
+{
+
+	_proc_free_shchan(p, chan, kent, TRUE);
+}
+
+static __inline void
+proc_free_shchan_async(struct proc *p, struct shchan *chan,
+    enum state_thread *kent)
+{
+
+	_proc_free_shchan(p, chan, kent, FALSE);
+}
+
 #endif	/* _KERNEL */
 
 #endif	/* !_SYS_PROC_H_ */
Index: sys/sys/sysproto.h
===================================================================
--- sys/sys/sysproto.h	(revision 264497)
+++ sys/sys/sysproto.h	(working copy)
@@ -1374,6 +1374,7 @@ struct _umtx_op_args {
 	char val_l_[PADL_(u_long)]; u_long val; char val_r_[PADR_(u_long)];
 	char uaddr1_l_[PADL_(void *)]; void * uaddr1; char uaddr1_r_[PADR_(void *)];
 	char uaddr2_l_[PADL_(void *)]; void * uaddr2; char uaddr2_r_[PADR_(void *)];
+	char owner_l_[PADL_(uintptr_t)]; uintptr_t owner; char owner_r_[PADR_(uintptr_t)];
 };
 struct thr_new_args {
 	char param_l_[PADL_(struct thr_param *)]; struct thr_param * param; char param_r_[PADR_(struct thr_param *)];
Index: sys/conf/options
===================================================================
--- sys/conf/options	(revision 264497)
+++ sys/conf/options	(working copy)
@@ -593,6 +593,7 @@ VM_KMEM_SIZE_SCALE	opt_vm.h
 VM_KMEM_SIZE_MAX	opt_vm.h
 VM_NRESERVLEVEL		opt_vm.h
 VM_LEVEL_0_ORDER	opt_vm.h
+VM_SHARED_CHANS		opt_vm.h
 NO_SWAPPING		opt_vm.h
 MALLOC_MAKE_FAILURES	opt_vm.h
 MALLOC_PROFILE		opt_vm.h
Index: sys/kern/kern_thread.c
===================================================================
--- sys/kern/kern_thread.c	(revision 264497)
+++ sys/kern/kern_thread.c	(working copy)
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
 #include <sys/selinfo.h>
+#include <sys/stddef.h>
 #include <sys/turnstile.h>
 #include <sys/ktr.h>
 #include <sys/rwlock.h>
@@ -58,12 +59,17 @@ __FBSDID("$FreeBSD$");
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
 #include <vm/uma.h>
 #include <sys/eventhandler.h>
 
+#define	PAGE_NUM_THRSTATE	(PAGE_SIZE / sizeof (enum state_thread))
+
 SDT_PROVIDER_DECLARE(proc);
 SDT_PROBE_DEFINE(proc, , , lwp__exit);
 
+static MALLOC_DEFINE(M_SHCHAN, "shchan", "shared channels");
 
 /*
  * thread related storage.
@@ -242,6 +248,8 @@ void
 proc_linkup0(struct proc *p, struct thread *td)
 {
 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
+	SLIST_INIT(&p->p_shchans);
+	SLIST_INIT(&p->p_shcasync);
 	proc_linkup(p, td);
 }
 
@@ -261,6 +269,203 @@ proc_linkup(struct proc *p, struct thread *td)
 }
 
 /*
+ * Alloc a shared channel linked to proc p.
+ * Returns the bucket from which the channel is allocated, the kernel
+ * address and the userland address related to the shared channel.
+ * In case of failure, a non-zero error code is returned.
+ */
+int
+proc_alloc_shchan(struct proc *p, struct shchan **retch,
+    enum state_thread **kent, enum state_thread **uent)
+{
+	rlim_t lmemlim;
+	vm_offset_t start_uva;
+	enum state_thread *newkva, *newuva;
+	struct shchan *chan, *newchan;
+	u_int i;
+
+	proc_reap_shchans_uva(p);
+
+	PROC_LOCK(p);
+	SLIST_FOREACH(chan, &p->p_shchans, sh_iter)
+		if (chan->sh_free_slots != 0)
+			break;
+	if (chan == NULL) {
+		lmemlim = lim_cur(p, RLIMIT_MEMLOCK);
+		start_uva = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
+		    lim_max(p, RLIMIT_DATA));
+		PROC_UNLOCK(p);
+		newchan = NULL;
+
+		if (vm_create_shchan(&p->p_vmspace->vm_map, lmemlim, start_uva,
+		    (vm_offset_t *)&newkva, (vm_offset_t *)&newuva))
+			return (ENOMEM);
+		newchan = malloc(sizeof(*newchan), M_SHCHAN, M_WAITOK);
+		newchan->sh_kern = newkva;
+		newchan->sh_user = newuva;
+		newchan->sh_free_slots = PAGE_NUM_THRSTATE;
+		for (i = 0; i < PAGE_NUM_THRSTATE; i++)
+			newkva[i] = TDS_INVALID;
+
+		PROC_LOCK(p);
+		SLIST_FOREACH(chan, &p->p_shchans, sh_iter)
+			if (chan->sh_free_slots != 0)
+				break;
+		if (chan != NULL) {
+			/*
+			 * New space has been made available while allocating
+			 * the new shared channel page.
+			 * Free the newly created page and reclaim the
+			 * just freed slot.
+			 */
+			vm_destroy_shchan_nofreeuva(&p->p_vmspace->vm_map,
+			    (vm_offset_t)newchan->sh_kern,
+	    		    (vm_offset_t)newchan->sh_user);
+			newchan->sh_kern = NULL;
+			SLIST_INSERT_HEAD(&p->p_shcasync, newchan, sh_iter);
+		} else {
+			SLIST_INSERT_HEAD(&p->p_shchans, newchan, sh_iter);
+			chan = newchan;
+		}
+	}
+	KASSERT(chan != NULL && chan->sh_free_slots != 0,
+	    ("proc_alloc_shchan: invalid NULL shared channel"));
+
+	for (i = 0; i < PAGE_NUM_THRSTATE; i++) {
+		if (chan->sh_kern[i] > TDS_INVALID)
+			panic("proc_alloc_shchan: invalid page %p content %p",
+			    chan->sh_kern, &chan->sh_kern[i]);
+		if (chan->sh_kern[i] == TDS_INVALID)
+			break;
+	}
+	if (i == PAGE_NUM_THRSTATE)
+		panic("proc_alloc_shchan: no valid state found");
+
+	/* Use the same value as thread_ctor(). */
+	chan->sh_kern[i] = TDS_INACTIVE;
+	chan->sh_free_slots--;
+	PROC_UNLOCK(p);
+
+	/* There could have been the need for an async free due to races. */
+	proc_reap_shchans_uva(p);
+
+	*retch = chan;
+	*kent = chan->sh_kern + i;
+	*uent = chan->sh_user + i;
+	return (0);
+}
+
+/*
+ * Reap all the UVA asynchronously freed from shared channels, related
+ * to a specific process p.
+ */
+void
+proc_reap_shchans_uva(struct proc *p)
+{
+	struct shchan *chan;
+
+	PROC_LOCK(p);
+	while (!SLIST_EMPTY(&p->p_shcasync)) {
+		chan = SLIST_FIRST(&p->p_shcasync);
+		SLIST_REMOVE_HEAD(&p->p_shcasync, sh_iter);
+		PROC_UNLOCK(p);
+		if (chan->sh_kern != NULL)
+			panic("proc_reap_shchans_uva: invalid shchan");
+		vm_destroy_shchan_uva(&p->p_vmspace->vm_map,
+		    (vm_offset_t)chan->sh_user);
+		free(chan, M_SHCHAN);
+		PROC_LOCK(p);
+        }
+	PROC_UNLOCK(p);
+}
+
+/*
+ * Free all the shared channels related to a specific process p.
+ * It unlocks PROC_LOCK before to return.
+ */
+void
+proc_reclaim_shchans(struct proc *p)
+{
+	SLIST_HEAD(, shchan) local_chans;
+	struct shchan *tmpchan;
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	SLIST_INIT(&local_chans);
+
+	while (!SLIST_EMPTY(&p->p_shchans)) {
+		tmpchan = SLIST_FIRST(&p->p_shchans);
+		SLIST_REMOVE_HEAD(&p->p_shchans, sh_iter);
+		SLIST_INSERT_HEAD(&local_chans, tmpchan, sh_iter);
+	}
+	PROC_UNLOCK(p);
+
+	while (!SLIST_EMPTY(&local_chans)) {
+		tmpchan = SLIST_FIRST(&local_chans);
+		SLIST_REMOVE_HEAD(&local_chans, sh_iter);
+		vm_destroy_shchan(&p->p_vmspace->vm_map, 
+		    (vm_offset_t)tmpchan->sh_kern,
+		    (vm_offset_t)tmpchan->sh_user);
+		free(tmpchan, M_SHCHAN);
+	}
+
+	/*
+	 * As the proc lock can be dropped this seems also a good point
+	 * for reaping UVA not freed yet.
+	 */
+	proc_reap_shchans_uva(p);
+}
+
+/*
+ * Free a shared channel, related to a specific process p.
+ * In case of an asynchronous request, if needed, the UVA of the shared
+ * channel will not be immediately freed but moved to an asynchronous queue.
+ * It is responsibility of the caller to properly schedule later reaping.
+ * However, the backing page will be unwired right away as well as the
+ * KVA will be freed right away.
+ * It unlocks PROC_LOCK before to return if a synchronous request is
+ * performed, otherwise the PROC_LOCK is held for the whole duration.
+ */
+void
+_proc_free_shchan(struct proc *p, struct shchan *chan, enum state_thread *kent,
+    boolean_t sync)
+{
+	ptrdiff_t i;
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	i = kent - chan->sh_kern;
+	KASSERT(kent == &chan->sh_kern[i] && chan->sh_kern[i] != TDS_INVALID,
+	    ("proc_free_shchan: invalid index retrieval %jd", (intmax_t)i));
+
+	chan->sh_kern[i] = TDS_INVALID;
+	chan->sh_free_slots++;
+	if (chan->sh_free_slots < PAGE_NUM_THRSTATE) {
+		if (sync == TRUE)
+			PROC_UNLOCK(p);
+		return;
+	}
+	KASSERT(chan->sh_free_slots == PAGE_NUM_THRSTATE,
+	    ("proc_free_shchan: invalid number of free slots"));
+
+	SLIST_REMOVE(&p->p_shchans, chan, shchan, sh_iter);
+
+	if (sync == FALSE) {
+		vm_destroy_shchan_nofreeuva(&p->p_vmspace->vm_map,
+		    (vm_offset_t)chan->sh_kern,
+		    (vm_offset_t)chan->sh_user);
+		chan->sh_kern = NULL;
+		SLIST_INSERT_HEAD(&p->p_shcasync, chan, sh_iter);
+		return;
+	}
+	PROC_UNLOCK(p);
+
+	vm_destroy_shchan(&p->p_vmspace->vm_map,
+	    (vm_offset_t)chan->sh_kern, (vm_offset_t)chan->sh_user);
+	free(chan, M_SHCHAN);
+}
+
+/*
  * Initialize global thread allocation resources.
  */
 void
@@ -810,6 +1015,12 @@ thread_suspend_check(int return_instead)
 		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
 		 */
 		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
+			/*
+			 * The shared channels should be teared down now.
+			 * However it is responsibility of the thread
+			 * requesting single-threading to do so when it is
+			 * actually safe.
+			 */
 			PROC_UNLOCK(p);
 			tidhash_remove(td);
 			PROC_LOCK(p);
Index: sys/kern/kern_umtx.c
===================================================================
--- sys/kern/kern_umtx.c	(revision 264497)
+++ sys/kern/kern_umtx.c	(working copy)
@@ -70,6 +70,25 @@ __FBSDID("$FreeBSD$");
 	(((w) > (sw)) || ((w) == (sw) && (f) > (sf)))
 #endif
 
+#ifdef __LP64__
+CTASSERT(sizeof(long) == sizeof(uintptr_t));
+#define	umtx_fuword	fuword
+#define	umtx_casuword	casuword
+#define	umtx_suword	suword
+#else
+CTASSERT(sizeof(uint32_t) == sizeof(uintptr_t));
+#define	umtx_fuword	fuword32
+#define	umtx_casuword	casuword32
+#define	umtx_suword	suword32
+#endif
+
+#define	UMTX_OWNER_ULOAD(m)						\
+	umtx_fuword(__DEVOLATILE(uintptr_t *, &(m)->m_owner))
+#define	UMTX_OWNER_USTORE(m, curowner, newowner)			\
+	umtx_casuword(&(m)->m_owner, (curowner), (newowner))
+#define	UMTX_OWNER_UDEFSTORE(m, newowner)				\
+	umtx_suword(__DEVOLATILE(uintptr_t *, &(m)->m_owner), (newowner))
+
 /* Priority inheritance mutex info. */
 struct umtx_pi {
 	/* Owner thread */
@@ -227,7 +246,8 @@ static int umtxq_sleep(struct umtx_q *uq, const ch
 static int umtxq_count(struct umtx_key *key);
 static struct umtx_pi *umtx_pi_alloc(int);
 static void umtx_pi_free(struct umtx_pi *pi);
-static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
+static int do_unlock_pp(struct thread *td, struct umutex *m,
+    uintptr_t curowner, uint32_t flags);
 static void umtx_thread_cleanup(struct thread *td);
 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
 	struct image_params *imgp __unused);
@@ -902,15 +922,14 @@ kern_umtx_wake(struct thread *td, void *uaddr, int
  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
  */
 static int
-do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
-	struct _umtx_time *timeout, int mode)
+do_lock_normal(struct thread *td, struct umutex *m, uintptr_t newowner,
+    uint32_t flags, struct _umtx_time *timeout, int mode)
 {
 	struct abs_timeout timo;
 	struct umtx_q *uq;
-	uint32_t owner, old, id;
+	uintptr_t owner, oldowner;
 	int error = 0;
 
-	id = td->td_tid;
 	uq = td->td_umtxq;
 
 	if (timeout != NULL)
@@ -921,7 +940,7 @@ static int
 	 * can fault on any access.
 	 */
 	for (;;) {
-		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
+		owner = UMTX_OWNER_ULOAD(m);
 		if (mode == _UMUTEX_WAIT) {
 			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
 				return (0);
@@ -929,7 +948,7 @@ static int
 			/*
 			 * Try the uncontested case.  This should be done in userland.
 			 */
-			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
+			owner = UMTX_OWNER_USTORE(m, UMUTEX_UNOWNED, newowner);
 
 			/* The acquire succeeded. */
 			if (owner == UMUTEX_UNOWNED)
@@ -941,8 +960,9 @@ static int
 
 			/* If no one owns it but it is contested try to acquire it. */
 			if (owner == UMUTEX_CONTESTED) {
-				owner = casuword32(&m->m_owner,
-				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
+				owner = UMTX_OWNER_USTORE(m,
+				    UMUTEX_CONTESTED,
+				    newowner | UMUTEX_CONTESTED);
 
 				if (owner == UMUTEX_CONTESTED)
 					return (0);
@@ -985,10 +1005,11 @@ static int
 		 * either some one else has acquired the lock or it has been
 		 * released.
 		 */
-		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
+		oldowner = UMTX_OWNER_USTORE(m, owner,
+		    owner | UMUTEX_CONTESTED);
 
 		/* The address was invalid. */
-		if (old == -1) {
+		if (oldowner == -1) {
 			umtxq_lock(&uq->uq_key);
 			umtxq_remove(uq);
 			umtxq_unbusy(&uq->uq_key);
@@ -1004,7 +1025,7 @@ static int
 		 */
 		umtxq_lock(&uq->uq_key);
 		umtxq_unbusy(&uq->uq_key);
-		if (old == owner)
+		if (oldowner == owner)
 			error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
 			    NULL : &timo);
 		umtxq_remove(uq);
@@ -1022,31 +1043,31 @@ static int
  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
  */
 static int
-do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
+do_unlock_normal(struct thread *td, struct umutex *m, uintptr_t curowner,
+    uint32_t flags)
 {
 	struct umtx_key key;
-	uint32_t owner, old, id;
+	uintptr_t owner, oldowner;
 	int error;
 	int count;
 
-	id = td->td_tid;
 	/*
 	 * Make sure we own this mtx.
 	 */
-	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
+	owner = UMTX_OWNER_ULOAD(m);
 	if (owner == -1)
 		return (EFAULT);
 
-	if ((owner & ~UMUTEX_CONTESTED) != id)
+	if ((owner & ~UMUTEX_CONTESTED) != curowner)
 		return (EPERM);
 
 	if ((owner & UMUTEX_CONTESTED) == 0) {
-		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
-		if (old == -1)
+		oldowner = UMTX_OWNER_USTORE(m, owner, UMUTEX_UNOWNED);
+		if (oldowner == -1)
 			return (EFAULT);
-		if (old == owner)
+		if (oldowner == owner)
 			return (0);
-		owner = old;
+		owner = oldowner;
 	}
 
 	/* We should only ever be in here for contested locks */
@@ -1064,16 +1085,16 @@ static int
 	 * there is zero or one thread only waiting for it.
 	 * Otherwise, it must be marked as contested.
 	 */
-	old = casuword32(&m->m_owner, owner,
+	oldowner = UMTX_OWNER_USTORE(m, owner,
 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 	umtxq_lock(&key);
 	umtxq_signal(&key,1);
 	umtxq_unbusy(&key);
 	umtxq_unlock(&key);
 	umtx_key_release(&key);
-	if (old == -1)
+	if (oldowner == -1)
 		return (EFAULT);
-	if (old != owner)
+	if (oldowner != owner)
 		return (EINVAL);
 	return (0);
 }
@@ -1086,12 +1107,12 @@ static int
 do_wake_umutex(struct thread *td, struct umutex *m)
 {
 	struct umtx_key key;
-	uint32_t owner;
+	uintptr_t owner;
 	uint32_t flags;
 	int error;
 	int count;
 
-	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
+	owner = UMTX_OWNER_ULOAD(m);
 	if (owner == -1)
 		return (EFAULT);
 
@@ -1111,7 +1132,7 @@ do_wake_umutex(struct thread *td, struct umutex *m
 	umtxq_unlock(&key);
 
 	if (count <= 1)
-		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
+		owner = UMTX_OWNER_USTORE(m, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
 
 	umtxq_lock(&key);
 	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
@@ -1129,7 +1150,7 @@ static int
 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
 {
 	struct umtx_key key;
-	uint32_t owner, old;
+	uintptr_t owner, oldowner;
 	int type;
 	int error;
 	int count;
@@ -1162,29 +1183,29 @@ do_wake2_umutex(struct thread *td, struct umutex *
 	 * any memory.
 	 */
 	if (count > 1) {
-		owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
+		owner = UMTX_OWNER_ULOAD(m);
 		while ((owner & UMUTEX_CONTESTED) ==0) {
-			old = casuword32(&m->m_owner, owner,
-			    owner|UMUTEX_CONTESTED);
-			if (old == owner)
+			oldowner = UMTX_OWNER_USTORE(m, owner,
+			    owner | UMUTEX_CONTESTED);
+			if (oldowner == owner)
 				break;
-			owner = old;
-			if (old == -1)
+			owner = oldowner;
+			if (oldowner == -1)
 				break;
 			error = umtxq_check_susp(td);
 			if (error != 0)
 				break;
 		}
 	} else if (count == 1) {
-		owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
+		owner = UMTX_OWNER_ULOAD(m);
 		while ((owner & ~UMUTEX_CONTESTED) != 0 &&
 		       (owner & UMUTEX_CONTESTED) == 0) {
-			old = casuword32(&m->m_owner, owner,
-			    owner|UMUTEX_CONTESTED);
-			if (old == owner)
+			oldowner = UMTX_OWNER_USTORE(m, owner,
+			    owner | UMUTEX_CONTESTED);
+			if (oldowner == owner)
 				break;
-			owner = old;
-			if (old == -1)
+			owner = oldowner;
+			if (oldowner == -1)
 				break;
 			error = umtxq_check_susp(td);
 			if (error != 0)
@@ -1569,16 +1590,15 @@ umtx_pi_insert(struct umtx_pi *pi)
  * Lock a PI mutex.
  */
 static int
-do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
-    struct _umtx_time *timeout, int try)
+do_lock_pi(struct thread *td, struct umutex *m, uintptr_t newowner,
+    uint32_t flags, struct _umtx_time *timeout, int try)
 {
 	struct abs_timeout timo;
 	struct umtx_q *uq;
 	struct umtx_pi *pi, *new_pi;
-	uint32_t id, owner, old;
+	uintptr_t owner, oldowner;
 	int error;
 
-	id = td->td_tid;
 	uq = td->td_umtxq;
 
 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
@@ -1619,7 +1639,7 @@ static int
 		/*
 		 * Try the uncontested case.  This should be done in userland.
 		 */
-		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
+		owner = UMTX_OWNER_USTORE(m, UMUTEX_UNOWNED, newowner);
 
 		/* The acquire succeeded. */
 		if (owner == UMUTEX_UNOWNED) {
@@ -1635,8 +1655,8 @@ static int
 
 		/* If no one owns it but it is contested try to acquire it. */
 		if (owner == UMUTEX_CONTESTED) {
-			owner = casuword32(&m->m_owner,
-			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
+			owner = UMTX_OWNER_USTORE(m, UMUTEX_CONTESTED,
+			    newowner | UMUTEX_CONTESTED);
 
 			if (owner == UMUTEX_CONTESTED) {
 				umtxq_lock(&uq->uq_key);
@@ -1683,10 +1703,11 @@ static int
 		 * either some one else has acquired the lock or it has been
 		 * released.
 		 */
-		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
+		oldowner = UMTX_OWNER_USTORE(m, owner,
+		    owner | UMUTEX_CONTESTED);
 
 		/* The address was invalid. */
-		if (old == -1) {
+		if (oldowner == -1) {
 			umtxq_lock(&uq->uq_key);
 			umtxq_unbusy(&uq->uq_key);
 			umtxq_unlock(&uq->uq_key);
@@ -1700,7 +1721,7 @@ static int
 		 * and we need to retry or we lost a race to the thread
 		 * unlocking the umtx.
 		 */
-		if (old == owner)
+		if (oldowner == owner)
 			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
 			    "umtxpi", timeout == NULL ? NULL : &timo);
 		else {
@@ -1725,35 +1746,35 @@ static int
  * Unlock a PI mutex.
  */
 static int
-do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
+do_unlock_pi(struct thread *td, struct umutex *m, uintptr_t curowner,
+    uint32_t flags)
 {
 	struct umtx_key key;
 	struct umtx_q *uq_first, *uq_first2, *uq_me;
 	struct umtx_pi *pi, *pi2;
-	uint32_t owner, old, id;
+	uintptr_t owner, oldowner;
 	int error;
 	int count;
 	int pri;
 
-	id = td->td_tid;
 	/*
 	 * Make sure we own this mtx.
 	 */
-	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
+	owner = UMTX_OWNER_ULOAD(m);
 	if (owner == -1)
 		return (EFAULT);
 
-	if ((owner & ~UMUTEX_CONTESTED) != id)
+	if ((owner & ~UMUTEX_CONTESTED) != curowner)
 		return (EPERM);
 
 	/* This should be done in userland */
 	if ((owner & UMUTEX_CONTESTED) == 0) {
-		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
-		if (old == -1)
+		oldowner = UMTX_OWNER_USTORE(m, owner, UMUTEX_UNOWNED);
+		if (oldowner == -1)
 			return (EFAULT);
-		if (old == owner)
+		if (oldowner == owner)
 			return (0);
-		owner = old;
+		owner = oldowner;
 	}
 
 	/* We should only ever be in here for contested locks */
@@ -1807,16 +1828,16 @@ static int
 	 * there is zero or one thread only waiting for it.
 	 * Otherwise, it must be marked as contested.
 	 */
-	old = casuword32(&m->m_owner, owner,
-		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
+	oldowner = UMTX_OWNER_USTORE(m, owner,
+	    count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 
 	umtxq_lock(&key);
 	umtxq_unbusy(&key);
 	umtxq_unlock(&key);
 	umtx_key_release(&key);
-	if (old == -1)
+	if (oldowner == -1)
 		return (EFAULT);
-	if (old != owner)
+	if (oldowner != owner)
 		return (EINVAL);
 	return (0);
 }
@@ -1825,17 +1846,16 @@ static int
  * Lock a PP mutex.
  */
 static int
-do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
-    struct _umtx_time *timeout, int try)
+do_lock_pp(struct thread *td, struct umutex *m, uintptr_t newowner,
+    uint32_t flags, struct _umtx_time *timeout, int try)
 {
 	struct abs_timeout timo;
 	struct umtx_q *uq, *uq2;
 	struct umtx_pi *pi;
+	uintptr_t owner;
 	uint32_t ceiling;
-	uint32_t owner, id;
 	int error, pri, old_inherited_pri, su;
 
-	id = td->td_tid;
 	uq = td->td_umtxq;
 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 	    &uq->uq_key)) != 0)
@@ -1872,8 +1892,8 @@ static int
 		}
 		mtx_unlock_spin(&umtx_lock);
 
-		owner = casuword32(&m->m_owner,
-		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
+		owner = UMTX_OWNER_USTORE(m, UMUTEX_CONTESTED,
+		    newowner | UMUTEX_CONTESTED);
 
 		if (owner == UMUTEX_CONTESTED) {
 			error = 0;
@@ -1955,27 +1975,27 @@ out:
  * Unlock a PP mutex.
  */
 static int
-do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
+do_unlock_pp(struct thread *td, struct umutex *m, uintptr_t curowner,
+    uint32_t flags)
 {
 	struct umtx_key key;
 	struct umtx_q *uq, *uq2;
 	struct umtx_pi *pi;
-	uint32_t owner, id;
+	uintptr_t owner;
 	uint32_t rceiling;
 	int error, pri, new_inherited_pri, su;
 
-	id = td->td_tid;
 	uq = td->td_umtxq;
 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 
 	/*
 	 * Make sure we own this mtx.
 	 */
-	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
+	owner = UMTX_OWNER_ULOAD(m);
 	if (owner == -1)
 		return (EFAULT);
 
-	if ((owner & ~UMUTEX_CONTESTED) != id)
+	if ((owner & ~UMUTEX_CONTESTED) != curowner)
 		return (EPERM);
 
 	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
@@ -2003,8 +2023,7 @@ static int
 	 * to lock the mutex, it is necessary because thread priority
 	 * has to be adjusted for such mutex.
 	 */
-	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
-		UMUTEX_CONTESTED);
+	error = UMTX_OWNER_UDEFSTORE(m, UMUTEX_CONTESTED);
 
 	umtxq_lock(&key);
 	if (error == 0)
@@ -2038,12 +2057,12 @@ static int
 }
 
 static int
-do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
-	uint32_t *old_ceiling)
+do_set_ceiling(struct thread *td, struct umutex *m, uintptr_t newowner,
+    uint32_t ceiling, uint32_t *old_ceiling)
 {
 	struct umtx_q *uq;
+	uintptr_t owner;
 	uint32_t save_ceiling;
-	uint32_t owner, id;
 	uint32_t flags;
 	int error;
 
@@ -2052,7 +2071,6 @@ static int
 		return (EINVAL);
 	if (ceiling > RTP_PRIO_MAX)
 		return (EINVAL);
-	id = td->td_tid;
 	uq = td->td_umtxq;
 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 	   &uq->uq_key)) != 0)
@@ -2064,13 +2082,12 @@ static int
 
 		save_ceiling = fuword32(&m->m_ceilings[0]);
 
-		owner = casuword32(&m->m_owner,
-		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
+		owner = UMTX_OWNER_USTORE(m, UMUTEX_CONTESTED,
+		    newowner | UMUTEX_CONTESTED);
 
 		if (owner == UMUTEX_CONTESTED) {
 			suword32(&m->m_ceilings[0], ceiling);
-			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
-				UMUTEX_CONTESTED);
+			UMTX_OWNER_UDEFSTORE(m, UMUTEX_CONTESTED);
 			error = 0;
 			break;
 		}
@@ -2081,7 +2098,7 @@ static int
 			break;
 		}
 
-		if ((owner & ~UMUTEX_CONTESTED) == id) {
+		if ((owner & ~UMUTEX_CONTESTED) == newowner) {
 			suword32(&m->m_ceilings[0], ceiling);
 			error = 0;
 			break;
@@ -2121,25 +2138,28 @@ static int
  * Lock a userland POSIX mutex.
  */
 static int
-do_lock_umutex(struct thread *td, struct umutex *m,
+do_lock_umutex(struct thread *td, struct umutex *m, uintptr_t newowner,
     struct _umtx_time *timeout, int mode)
 {
 	uint32_t flags;
 	int error;
 
+	if (newowner == UMUTEX_UNOWNED)
+		return (EINVAL);
+
 	flags = fuword32(&m->m_flags);
 	if (flags == -1)
 		return (EFAULT);
 
 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 	case 0:
-		error = do_lock_normal(td, m, flags, timeout, mode);
+		error = do_lock_normal(td, m, newowner, flags, timeout, mode);
 		break;
 	case UMUTEX_PRIO_INHERIT:
-		error = do_lock_pi(td, m, flags, timeout, mode);
+		error = do_lock_pi(td, m, newowner, flags, timeout, mode);
 		break;
 	case UMUTEX_PRIO_PROTECT:
-		error = do_lock_pp(td, m, flags, timeout, mode);
+		error = do_lock_pp(td, m, newowner, flags, timeout, mode);
 		break;
 	default:
 		return (EINVAL);
@@ -2159,21 +2179,24 @@ static int
  * Unlock a userland POSIX mutex.
  */
 static int
-do_unlock_umutex(struct thread *td, struct umutex *m)
+do_unlock_umutex(struct thread *td, struct umutex *m, uintptr_t curowner)
 {
 	uint32_t flags;
 
+	if (curowner == UMUTEX_UNOWNED)
+		return (EINVAL);
+
 	flags = fuword32(&m->m_flags);
 	if (flags == -1)
 		return (EFAULT);
 
 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 	case 0:
-		return (do_unlock_normal(td, m, flags));
+		return (do_unlock_normal(td, m, curowner, flags));
 	case UMUTEX_PRIO_INHERIT:
-		return (do_unlock_pi(td, m, flags));
+		return (do_unlock_pi(td, m, curowner, flags));
 	case UMUTEX_PRIO_PROTECT:
-		return (do_unlock_pp(td, m, flags));
+		return (do_unlock_pp(td, m, curowner, flags));
 	}
 
 	return (EINVAL);
@@ -2181,7 +2204,7 @@ static int
 
 static int
 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
-	struct timespec *timeout, u_long wflags)
+    uintptr_t curowner, struct timespec *timeout, u_long wflags)
 {
 	struct abs_timeout timo;
 	struct umtx_q *uq;
@@ -2222,7 +2245,7 @@ do_cv_wait(struct thread *td, struct ucond *cv, st
 	umtxq_unbusy(&uq->uq_key);
 	umtxq_unlock(&uq->uq_key);
 
-	error = do_unlock_umutex(td, m);
+	error = do_unlock_umutex(td, m, curowner);
 
 	if (timeout != NULL)
 		abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0),
@@ -2943,13 +2966,13 @@ __umtx_op_lock_umutex(struct thread *td, struct _u
 			return (error);
 		tm_p = &timeout;
 	}
-	return do_lock_umutex(td, uap->obj, tm_p, 0);
+	return do_lock_umutex(td, uap->obj, uap->owner, tm_p, 0);
 }
 
 static int
 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
 {
-	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
+	return do_lock_umutex(td, uap->obj, uap->owner, NULL, _UMUTEX_TRY);
 }
 
 static int
@@ -2968,7 +2991,7 @@ __umtx_op_wait_umutex(struct thread *td, struct _u
 			return (error);
 		tm_p = &timeout;
 	}
-	return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
+	return do_lock_umutex(td, uap->obj, uap->owner, tm_p, _UMUTEX_WAIT);
 }
 
 static int
@@ -2980,13 +3003,13 @@ __umtx_op_wake_umutex(struct thread *td, struct _u
 static int
 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
 {
-	return do_unlock_umutex(td, uap->obj);
+	return do_unlock_umutex(td, uap->obj, uap->owner);
 }
 
 static int
 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
 {
-	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
+	return do_set_ceiling(td, uap->obj, uap->owner, uap->val, uap->uaddr1);
 }
 
 static int
@@ -3004,7 +3027,8 @@ __umtx_op_cv_wait(struct thread *td, struct _umtx_
 			return (error);
 		ts = &timeout;
 	}
-	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
+	return (do_cv_wait(td, uap->obj, uap->uaddr1, uap->owner, ts,
+	    uap->val));
 }
 
 static int
@@ -3222,7 +3246,7 @@ __umtx_op_lock_umutex_compat32(struct thread *td,
 			return (error);
 		tm_p = &timeout;
 	}
-	return do_lock_umutex(td, uap->obj, tm_p, 0);
+	return do_lock_umutex(td, uap->obj, uap->owner, tm_p, 0);
 }
 
 static int
@@ -3241,7 +3265,7 @@ __umtx_op_wait_umutex_compat32(struct thread *td,
 			return (error);
 		tm_p = &timeout;
 	}
-	return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
+	return do_lock_umutex(td, uap->obj, uap->owner, tm_p, _UMUTEX_WAIT);
 }
 
 static int
@@ -3259,7 +3283,8 @@ __umtx_op_cv_wait_compat32(struct thread *td, stru
 			return (error);
 		ts = &timeout;
 	}
-	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
+	return (do_cv_wait(td, uap->obj, uap->uaddr1, uap->owner, ts,
+	    uap->val));
 }
 
 static int
Index: sys/kern/kern_thr.c
===================================================================
--- sys/kern/kern_thr.c	(revision 264497)
+++ sys/kern/kern_thr.c	(working copy)
@@ -92,7 +92,9 @@ static int create_thread(struct thread *td, mconte
 			 char *stack_base, size_t stack_size,
 			 char *tls_base,
 			 long *child_tid, long *parent_tid,
-			 int flags, struct rtprio *rtp);
+			 int flags, struct rtprio *rtp,
+			 enum state_thread **child_chan,
+			 enum state_thread **parent_chan);
 
 /*
  * System call interface.
@@ -108,7 +110,7 @@ sys_thr_create(struct thread *td, struct thr_creat
 		return (error);
 
 	error = create_thread(td, &ctx.uc_mcontext, NULL, NULL,
-		NULL, 0, NULL, uap->id, NULL, uap->flags, NULL);
+		NULL, 0, NULL, uap->id, NULL, uap->flags, NULL, NULL, NULL);
 	return (error);
 }
 
@@ -143,7 +145,7 @@ kern_thr_new(struct thread *td, struct thr_param *
 	error = create_thread(td, NULL, param->start_func, param->arg,
 		param->stack_base, param->stack_size, param->tls_base,
 		param->child_tid, param->parent_tid, param->flags,
-		rtpp);
+		rtpp, param->child_chan, param->parent_chan);
 	return (error);
 }
 
@@ -153,12 +155,15 @@ create_thread(struct thread *td, mcontext_t *ctx,
 	    char *stack_base, size_t stack_size,
 	    char *tls_base,
 	    long *child_tid, long *parent_tid,
-	    int flags, struct rtprio *rtp)
+	    int flags, struct rtprio *rtp, enum state_thread **child_chan,
+	    enum state_thread **parent_chan)
 {
 	stack_t stack;
 	struct thread *newtd;
+	struct shchan *local_shchan;
+	enum state_thread *kern_shchanp, *user_shchanp;
 	struct proc *p;
-	int error;
+	int error, numthreads, ret_pchan;
 
 	p = td->td_proc;
 
@@ -250,7 +255,62 @@ create_thread(struct thread *td, mcontext_t *ctx,
 		}
 	}
 
+	if (child_chan != NULL && proc_alloc_shchan(p, &local_shchan,
+	    &kern_shchanp, &user_shchanp) == 0) {
+		/* Lockless, the thread is not linked anywhere. */
+		newtd->td_sh_state = kern_shchanp;
+		newtd->td_sh_chan = local_shchan;
+		if (copyout(child_chan, &user_shchanp,
+		    sizeof(enum state_thread *)) != 0) {
+			PROC_LOCK(p);
+			proc_free_shchan(p, local_shchan, kern_shchanp);
+			newtd->td_sh_state = NULL;
+			newtd->td_sh_chan = NULL;
+		}
+	}
+
+	/*
+	 * If there is just one single thread it means that no other
+	 * threads can be added in the meanwhile, as curthread is dealing
+	 * with current thr_new().
+	 * There is no race, then about allocating also a shared channel
+	 * for the single curthread.
+	 * It is only important to care about the race where a
+	 * multi-threaded process is made single-thread while PROC_LOCK()
+	 * is dropped.
+	 */
+	ret_pchan = ENOMEM;
 	PROC_LOCK(td->td_proc);
+	do {
+		numthreads = td->td_proc->p_numthreads;
+		PROC_UNLOCK(td->td_proc);
+
+		if (parent_chan != NULL && numthreads == 1) {
+			ret_pchan = proc_alloc_shchan(p, &local_shchan,
+			    &kern_shchanp, &user_shchanp);
+			if (ret_pchan == 0) {
+				/*
+				 * Lock for consistency as, right now,
+				 * the process is still single-threaded
+				 * and the only thread is executing
+				 * sys_thr_new().
+				 */
+				thread_lock(td);
+				if (td->td_sh_state != NULL ||
+				    td->td_sh_chan != NULL)
+					panic("thr_new: inconsistent state");
+				*kern_shchanp = TDS_RUNNING;
+				td->td_sh_state = kern_shchanp;
+				td->td_sh_chan = local_shchan;
+				thread_unlock(td);
+			}
+		}
+
+		PROC_LOCK(td->td_proc);
+		if (numthreads == 1 && td->td_proc->p_numthreads > 1)
+			panic("sys_thr_new: unexpected threading of curproc");
+	} while (numthreads > 1 && td->td_proc->p_numthreads == 1);
+
 	td->td_proc->p_flag |= P_HADTHREADS;
 	thread_link(newtd, p); 
 	bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name));
@@ -262,6 +322,19 @@ create_thread(struct thread *td, mcontext_t *ctx,
 		newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
 	PROC_UNLOCK(p);
 
+	if (parent_chan != NULL && ret_pchan == 0 && copyout(parent_chan,
+	    &user_shchanp, sizeof(enum state_thread *)) != 0) {
+		/* See locking comment above. */
+		thread_lock(td);
+		kern_shchanp = td->td_sh_state;
+		local_shchan = td->td_sh_chan;
+		td->td_sh_state = NULL;
+		td->td_sh_chan = NULL;
+		thread_unlock(td);
+		PROC_LOCK(p);
+		proc_free_shchan(p, local_shchan, kern_shchanp);
+	}
+
 	tidhash_add(newtd);
 
 	thread_lock(newtd);
@@ -304,6 +377,9 @@ sys_thr_exit(struct thread *td, struct thr_exit_ar
     /* long *state */
 {
 	struct proc *p;
+	struct thread *td2;
+	struct shchan *tmpchan;
+	enum state_thread *tmpstate;
 
 	p = td->td_proc;
 
@@ -326,11 +402,48 @@ sys_thr_exit(struct thread *td, struct thr_exit_ar
 		LIST_REMOVE(td, td_hash);
 		rw_wunlock(&tidhash_lock);
 		tdsigcleanup(td);
+		thread_lock(td);
+		if (td->td_sh_state != NULL) {
+			KASSERT(td->td_sh_chan != NULL,
+			    ("sys_thr_exit: invalid td_sh_chan"));
+			tmpchan = td->td_sh_chan;
+			tmpstate = td->td_sh_state;
+			td->td_sh_state = NULL;
+			td->td_sh_chan = NULL;
+			thread_unlock(td);
+			proc_free_shchan_async(p, tmpchan, tmpstate);
+		} else
+			thread_unlock(td);
+
+		/*
+		 * In case the process is going to be single-threaded after
+		 * this thr_exit(), free also the remaining thread shared
+		 * channel.
+		 */
+		if (p->p_numthreads == 2) {
+			td2 = TAILQ_FIRST(&p->p_threads);
+			if (td2 == td)
+				td2 = TAILQ_NEXT(td2, td_plist);
+			thread_lock(td2);
+			if (td2->td_sh_state != NULL) {
+				KASSERT(td2->td_sh_chan != NULL,
+				    ("sys_thr_exit: invalid td_sh_chan"));
+				tmpchan = td2->td_sh_chan;
+				tmpstate = td2->td_sh_state;
+				td2->td_sh_state = NULL;
+				td2->td_sh_chan = NULL;
+				thread_unlock(td2);
+				proc_free_shchan_async(p, tmpchan, tmpstate);
+			} else
+				thread_unlock(td2);
+		}
 		PROC_SLOCK(p);
 		thread_stopped(p);
 		thread_exit();
 		/* NOTREACHED */
 	}
+	KASSERT(SLIST_EMPTY(&p->p_shchans),
+	    ("sys_thr_exit: shared channels present with single thread"));
 	PROC_UNLOCK(p);
 	rw_wunlock(&tidhash_lock);
 	return (0);
Index: sys/kern/kern_proc.c
===================================================================
--- sys/kern/kern_proc.c	(revision 264497)
+++ sys/kern/kern_proc.c	(working copy)
@@ -227,6 +227,8 @@ proc_init(void *mem, int size, int flags)
 	cv_init(&p->p_pwait, "ppwait");
 	cv_init(&p->p_dbgwait, "dbgwait");
 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
+	SLIST_INIT(&p->p_shchans);
+	SLIST_INIT(&p->p_shcasync);
 	EVENTHANDLER_INVOKE(process_init, p);
 	p->p_stats = pstats_alloc();
 	SDT_PROBE(proc, kernel, init, return, p, size, flags, 0, 0);
Index: sys/kern/kern_exit.c
===================================================================
--- sys/kern/kern_exit.c	(revision 264497)
+++ sys/kern/kern_exit.c	(working copy)
@@ -219,7 +219,13 @@ exit1(struct thread *td, int rv)
 		msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0);
 
 	p->p_xstat = rv;	/* Let event handler change exit status */
-	PROC_UNLOCK(p);
+
+	/*
+	 * As long as the process is single-threaded now, reclaim all the
+	 * shared channels.
+	 */
+	proc_reclaim_shchans(p);
+
 	/* Drain the limit callout while we don't have the proc locked */
 	callout_drain(&p->p_limco);
 
@@ -855,6 +861,8 @@ proc_reap(struct thread *td, struct proc *p, int *
 #endif
 	KASSERT(FIRST_THREAD_IN_PROC(p),
 	    ("proc_reap: no residual thread!"));
+	KASSERT(SLIST_EMPTY(&p->p_shchans),
+	    ("proc_reap: shared channels present when destroying proc"));
 	uma_zfree(proc_zone, p);
 	sx_xlock(&allproc_lock);
 	nprocs--;
Index: sys/kern/kern_exec.c
===================================================================
--- sys/kern/kern_exec.c	(revision 264497)
+++ sys/kern/kern_exec.c	(working copy)
@@ -306,11 +306,13 @@ kern_execve(td, args, mac_p)
 		 * If success, we upgrade to SINGLE_EXIT state to
 		 * force other threads to suicide.
 		 */
-		if (error == 0)
+		if (error == 0) {
 			thread_single(SINGLE_EXIT);
-		else
+			proc_reclaim_shchans(p);
+		} else {
 			thread_single_end();
-		PROC_UNLOCK(p);
+			PROC_UNLOCK(p);
+		}
 	}
 
 	return (error);
Index: sys/kern/kern_fork.c
===================================================================
--- sys/kern/kern_fork.c	(revision 264497)
+++ sys/kern/kern_fork.c	(working copy)
@@ -926,6 +926,8 @@ fail:
 #endif
 	racct_proc_exit(newproc);
 fail1:
+	KASSERT(SLIST_EMPTY(&newproc->p_shchans),
+	    ("fork1: shared channels present when destroying proc"));
 	if (vm2 != NULL)
 		vmspace_free(vm2);
 	uma_zfree(proc_zone, newproc);