Index: lib/libc/gen/sem_new.c =================================================================== --- lib/libc/gen/sem_new.c (revision 234336) +++ lib/libc/gen/sem_new.c (working copy) @@ -61,7 +61,8 @@ __weak_reference(_sem_wait, sem_wait); #define SEM_PREFIX "/tmp/SEMD" -#define SEM_MAGIC ((u_int32_t)0x73656d31) +#define SEM_MAGIC_V1 ((u_int32_t)0x73656d31) +#define SEM_MAGIC ((u_int32_t)0x73656d32) struct sem_nameinfo { int open_count; @@ -109,7 +110,7 @@ sem_check_validity(sem_t *sem) { - if (sem->_magic == SEM_MAGIC) + if (sem->_magic == SEM_MAGIC || sem->_magic == SEM_MAGIC_V1) return (0); else { errno = EINVAL; @@ -130,7 +131,7 @@ sem->_magic = SEM_MAGIC; sem->_kern._count = (u_int32_t)value; sem->_kern._has_waiters = 0; - sem->_kern._flags = pshared ? USYNC_PROCESS_SHARED : 0; + sem->_kern._flags = (pshared ? USYNC_PROCESS_SHARED : 0) | SEM_VER2; return (0); } @@ -213,7 +214,7 @@ tmp._magic = SEM_MAGIC; tmp._kern._has_waiters = 0; tmp._kern._count = value; - tmp._kern._flags = USYNC_PROCESS_SHARED | SEM_NAMED; + tmp._kern._flags = USYNC_PROCESS_SHARED | SEM_NAMED | SEM_VER2; if (_write(fd, &tmp, sizeof(tmp)) != sizeof(tmp)) { flock(fd, LOCK_UN); goto error; @@ -331,14 +332,20 @@ if (sem_check_validity(sem) != 0) return (-1); - *sval = (int)sem->_kern._count; + *sval = (int)sem->_kern._count & ~SEM_WAITERS; return (0); } -static __inline int -usem_wake(struct _usem *sem) +static inline int +_trywait(sem_t *sem) { - return _umtx_op(sem, UMTX_OP_SEM_WAKE, 0, NULL, NULL); + int val; + + while ((val = sem->_kern._count) > 0) { + if (atomic_cmpset_acq_int(&sem->_kern._count, val, val - 1)) + return (0); + } + return (EAGAIN); } static __inline int @@ -364,16 +371,14 @@ int _sem_trywait(sem_t *sem) { - int val; + int status; if (sem_check_validity(sem) != 0) return (-1); - while ((val = sem->_kern._count) > 0) { - if (atomic_cmpset_acq_int(&sem->_kern._count, val, val - 1)) - return (0); - } - errno = EAGAIN; + if ((status = _trywait(sem)) == 0) + return (0); + errno = status; return (-1); } @@ -381,19 +386,17 @@ _sem_timedwait(sem_t * __restrict sem, const struct timespec * __restrict abstime) { - int val, retval; + int error; if (sem_check_validity(sem) != 0) return (-1); - retval = 0; + error = 0; for (;;) { - while ((val = sem->_kern._count) > 0) { - if (atomic_cmpset_acq_int(&sem->_kern._count, val, val - 1)) - return (0); - } + if (_trywait(sem) == 0) + return (0); - if (retval) { + if (error) { _pthread_testcancel(); break; } @@ -403,16 +406,17 @@ * be checked if the thread would have blocked. */ if (abstime != NULL) { - if (abstime->tv_nsec >= 1000000000 || abstime->tv_nsec < 0) { + if (abstime->tv_nsec >= 1000000000 || + abstime->tv_nsec < 0) { errno = EINVAL; return (-1); } } _pthread_cancel_enter(1); - retval = usem_wait(&sem->_kern, abstime); + error = usem_wait(&sem->_kern, abstime); _pthread_cancel_leave(0); } - return (retval); + return (error); } int @@ -421,6 +425,59 @@ return _sem_timedwait(sem, NULL); } +static +int sem_post_1(sem_t *sem, unsigned int flags) +{ + unsigned int count, newcount; + + for (;;) { + count = sem->_kern._count; + if (__predict_false((count & ~SEM_WAITERS) == + SEM_VALUE_MAX)) { + errno = EOVERFLOW; + return (-1); + } + newcount = (count + 1) & ~SEM_WAITERS; + if (atomic_cmpset_rel_int(&sem->_kern._count, count, newcount)) + break; + } + + if ((count & SEM_WAITERS) != 0) + return _umtx_op(&sem->_kern, UMTX_OP_SEM_WAKE, flags, + NULL, NULL); + return (0); +} + +static +int sem_post_2(sem_t *sem) +{ + int error = 0; + unsigned int count; + + for (;;) { + count = sem->_kern._count; + if (__predict_false((count & SEM_WAITERS) == + SEM_VALUE_MAX)) { + errno = EOVERFLOW; + error = -1; + break; + } + if ((count & SEM_WAITERS) != 0) { + /* + * Automically increase count and wake thread up, + * semaphore will be in inconsistent state if + * process crashed. + */ + error = _umtx_op(&sem->_kern, UMTX_OP_SEM_POST, 1, + NULL, NULL); + break; + } + if (atomic_cmpset_rel_int(&sem->_kern._count, count, count+1)) + break; + } + return (error); +} + /* * POSIX: * The sem_post() interface is reentrant with respect to signals and may be @@ -430,16 +487,25 @@ int _sem_post(sem_t *sem) { - unsigned int count; + unsigned int count, flags; if (sem_check_validity(sem) != 0) return (-1); - do { - count = sem->_kern._count; - if (count + 1 > SEM_VALUE_MAX) - return (EOVERFLOW); - } while(!atomic_cmpset_rel_int(&sem->_kern._count, count, count+1)); - (void)usem_wake(&sem->_kern); - return (0); + flags = sem->_kern._flags; + if ((flags & SEM_VER2) != 0) { + if ((flags & USYNC_PROCESS_SHARED) == 0) + return sem_post_1(sem, flags); + else + return sem_post_2(sem); + } else { + do { + count = sem->_kern._count; + if (count + 1 > SEM_VALUE_MAX) + return (EOVERFLOW); + } while(!atomic_cmpset_rel_int(&sem->_kern._count, count, + count+1)); + (void) _umtx_op(sem, UMTX_OP_SEM_WAKE, 0, 0, NULL); + return (0); + } } Index: sys/sys/umtx.h =================================================================== --- sys/sys/umtx.h (revision 233912) +++ sys/sys/umtx.h (working copy) @@ -56,7 +56,10 @@ /* _usem flags */ #define SEM_NAMED 0x0002 +#define SEM_VER2 0x0004 +#define SEM_WAITERS 0x80000000 + /* op code for _umtx_op */ #define UMTX_OP_LOCK 0 #define UMTX_OP_UNLOCK 1 @@ -81,7 +84,8 @@ #define UMTX_OP_SEM_WAKE 20 #define UMTX_OP_NWAKE_PRIVATE 21 #define UMTX_OP_MUTEX_WAKE2 22 -#define UMTX_OP_MAX 23 +#define UMTX_OP_SEM_POST 23 +#define UMTX_OP_MAX 24 /* Flags for UMTX_OP_CV_WAIT */ #define CVWAIT_CHECK_UNPARKING 0x01 Index: sys/kern/kern_umtx.c =================================================================== --- sys/kern/kern_umtx.c (revision 233913) +++ sys/kern/kern_umtx.c (working copy) @@ -2827,6 +2827,9 @@ uint32_t flags, count; int error; + count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count)); + if ((count & ~SEM_WAITERS) != 0) + return (0); uq = td->td_umtxq; flags = fuword32(&sem->_flags); error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); @@ -2840,25 +2843,47 @@ umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); - casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1); - count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count)); - if (count != 0) { - umtxq_lock(&uq->uq_key); - umtxq_unbusy(&uq->uq_key); - umtxq_remove(uq); - umtxq_unlock(&uq->uq_key); - umtx_key_release(&uq->uq_key); - return (0); + if ((flags & SEM_VER2) != 0) { + count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count)); + for (;;) { + if ((count & ~SEM_WAITERS) != 0) + break; + if ((count & SEM_WAITERS) == 0) { + int old = casuword32(__DEVOLATILE(uint32_t *, + &sem->_count), count, count | SEM_WAITERS); + if (old == count) + break; + count = old; + } else + break; + } + } else { + casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1); + count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count)); } umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); - - error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); - + if ((count & ~SEM_WAITERS) == 0) + error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); if ((uq->uq_flags & UQF_UMTXQ) == 0) error = 0; else { - umtxq_remove(uq); + umtxq_busy(&uq->uq_key); + if ((uq->uq_flags & UQF_UMTXQ) != 0) { + int oldlen = uq->uq_cur_queue->length; + umtxq_remove(uq); + if ((flags & SEM_VER2) != 0 && (oldlen == 1)) { + for (;;) { + count = fuword32(__DEVOLATILE(uint32_t *, + &sem->_count)); + int old = casuword32(__DEVOLATILE(uint32_t *, + &sem->_count), count, count & ~SEM_WAITERS); + if (old == count) + break; + } + } + } + umtxq_unbusy(&uq->uq_key); if (error == ERESTART) error = EINTR; } @@ -2867,42 +2892,106 @@ return (error); } -/* - * Signal a userland condition variable. - */ static int -do_sem_wake(struct thread *td, struct _usem *sem) +do_sem_post(struct thread *td, struct _usem *sem, unsigned int npost) { struct umtx_key key; - int error, cnt; + int error; + unsigned int nwait; uint32_t flags; + int count; flags = fuword32(&sem->_flags); if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); - cnt = umtxq_count(&key); - if (cnt > 0) { - umtxq_signal(&key, 1); - /* - * Check if count is greater than 0, this means the memory is - * still being referenced by user code, so we can safely - * update _has_waiters flag. - */ - if (cnt == 1) { + nwait = umtxq_count(&key); + umtxq_unlock(&key); + count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count)); + for (;;) { + if (__predict_false((count & ~SEM_WAITERS) + (int)npost < 0)) { + umtxq_lock(&key); + umtxq_unbusy(&key); umtxq_unlock(&key); - error = suword32( - __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0); - umtxq_lock(&key); + return (EOVERFLOW); } + int newval = count + npost; + if (npost >= nwait) + newval &= ~SEM_WAITERS; + int old = casuword32(__DEVOLATILE(uint32_t *, + &sem->_count), count, newval); + if (old == count) + break; + count = old; } + umtxq_lock(&key); + if (nwait > 0) + umtxq_signal(&key, npost); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } +/* + * Signal a userland condition variable. + */ +static int +do_sem_wake(struct thread *td, struct _usem *sem, uint32_t flags) +{ + struct umtx_key key; + int error; + int count; + unsigned int nwait, nwake; + + if (flags == 0) + flags = fuword32(&sem->_flags); + if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) + return (error); + nwake = 1; + umtxq_lock(&key); + umtxq_busy(&key); + nwait = umtxq_count(&key); + if (nwait == 0) + goto out; + /* + * if nwait is greater than 0, this means the memory is + * still being referenced by user code, so we can safely + * update waiting flag. + */ + if ((flags & SEM_VER2) != 0) { + for (;;) { + uint32_t old; + + count = fuword32(__DEVOLATILE(uint32_t *, + &sem->_count)); + if ((count & SEM_WAITERS) == 0 && + (count & ~SEM_WAITERS) < nwait) { + old = casuword32(__DEVOLATILE(uint32_t *, + &sem->_count), count, + count | SEM_WAITERS); + if (old == count) + break; + } else + break; + } + nwake = count & ~SEM_WAITERS; + } else if (nwait <= nwake) { + umtxq_unlock(&key); + error = suword32( + __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0); + umtxq_lock(&key); + } + umtxq_signal(&key, nwake); + +out: + umtxq_unbusy(&key); + umtxq_unlock(&key); + umtx_key_release(&key); + return (error); +} + int sys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap) /* struct umtx *umtx */ @@ -3226,7 +3315,7 @@ static int __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) { - return do_sem_wake(td, uap->obj); + return do_sem_wake(td, uap->obj, uap->val); } static int @@ -3235,6 +3324,12 @@ return do_wake2_umutex(td, uap->obj, uap->val); } +static int +__umtx_op_sem_post(struct thread *td, struct _umtx_op_args *uap) +{ + return do_sem_post(td, uap->obj, uap->val); +} + typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); static _umtx_op_func op_table[] = { @@ -3260,7 +3355,8 @@ __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */ __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ __umtx_op_nwake_private, /* UMTX_OP_NWAKE_PRIVATE */ - __umtx_op_wake2_umutex /* UMTX_OP_UMUTEX_WAKE2 */ + __umtx_op_wake2_umutex, /* UMTX_OP_UMUTEX_WAKE2 */ + __umtx_op_sem_post /* UMTX_OP_SEM_POST */ }; int @@ -3563,7 +3659,8 @@ __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */ __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ __umtx_op_nwake_private32, /* UMTX_OP_NWAKE_PRIVATE */ - __umtx_op_wake2_umutex /* UMTX_OP_UMUTEX_WAKE2 */ + __umtx_op_wake2_umutex, /* UMTX_OP_UMUTEX_WAKE2 */ + __umtx_op_sem_post /* UMTX_OP_SEM_POST */ }; int