Index: sys/vm/vm_map.c =================================================================== --- sys/vm/vm_map.c (revision 264627) +++ sys/vm/vm_map.c (working copy) @@ -1121,6 +1121,9 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm boolean_t charge_prev_obj; VM_MAP_ASSERT_LOCKED(map); + KASSERT((cow & (MAP_INHERIT_SHARE | MAP_INHERIT_NONE)) != + (MAP_INHERIT_SHARE | MAP_INHERIT_NONE), + ("vm_map_insert: Conflicting inherit requests provided 0x%x", cow)); /* * Check that the start and end points are not bogus. @@ -1165,6 +1168,8 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm protoeflags |= MAP_ENTRY_VN_WRITECNT; if (cow & MAP_INHERIT_SHARE) inheritance = VM_INHERIT_SHARE; + else if (cow & MAP_INHERIT_NONE) + inheritance = VM_INHERIT_NONE; else inheritance = VM_INHERIT_DEFAULT; @@ -1904,6 +1909,18 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm vm_map_unlock(map); return (KERN_PROTECTION_FAILURE); } + + /* + * In case the requested protection does not include + * VM_PROT_READ fail if there is any user wired region in + * order to avoid the removal of mappings in the pmap layer + * while the region is still marked as wired. + */ + if ((new_prot & VM_PROT_READ) == VM_PROT_NONE && + (current->eflags & MAP_ENTRY_USER_WIRED) != 0) { + vm_map_unlock(map); + return (KERN_PROTECTION_FAILURE); + } current = current->next; } Index: sys/vm/vm_map.h =================================================================== --- sys/vm/vm_map.h (revision 264627) +++ sys/vm/vm_map.h (working copy) @@ -309,19 +309,20 @@ long vmspace_resident_count(struct vmspace *vmspac /* * Copy-on-write flags for vm_map operations */ -#define MAP_INHERIT_SHARE 0x0001 -#define MAP_COPY_ON_WRITE 0x0002 -#define MAP_NOFAULT 0x0004 -#define MAP_PREFAULT 0x0008 -#define MAP_PREFAULT_PARTIAL 0x0010 -#define MAP_DISABLE_SYNCER 0x0020 -#define MAP_DISABLE_COREDUMP 0x0100 -#define MAP_PREFAULT_MADVISE 0x0200 /* from (user) madvise request */ -#define MAP_VN_WRITECOUNT 0x0400 -#define MAP_STACK_GROWS_DOWN 0x1000 -#define MAP_STACK_GROWS_UP 0x2000 -#define MAP_ACC_CHARGED 0x4000 -#define MAP_ACC_NO_CHARGE 0x8000 +#define MAP_INHERIT_SHARE 0x00001 +#define MAP_COPY_ON_WRITE 0x00002 +#define MAP_NOFAULT 0x00004 +#define MAP_PREFAULT 0x00008 +#define MAP_PREFAULT_PARTIAL 0x00010 +#define MAP_DISABLE_SYNCER 0x00020 +#define MAP_DISABLE_COREDUMP 0x00100 +#define MAP_PREFAULT_MADVISE 0x00200 /* from (user) madvise request */ +#define MAP_VN_WRITECOUNT 0x00400 +#define MAP_STACK_GROWS_DOWN 0x01000 +#define MAP_STACK_GROWS_UP 0x02000 +#define MAP_ACC_CHARGED 0x04000 +#define MAP_ACC_NO_CHARGE 0x08000 +#define MAP_INHERIT_NONE 0x10000 /* * vm_fault option flags Index: sys/vm/vm_glue.c =================================================================== --- sys/vm/vm_glue.c (revision 264627) +++ sys/vm/vm_glue.c (working copy) @@ -777,6 +777,93 @@ kick_proc0(void) wakeup(&proc0); } +int +vm_create_shchan(vm_map_t map, rlim_t lmemlim, vm_offset_t start_uva, + vm_offset_t *kva, vm_offset_t *uva) +{ +#ifdef VM_SHARED_CHANS + vm_paddr_t pa; + vm_offset_t local_kva, local_uva; + vm_page_t m; + + if (ptoa(pmap_wired_count(vm_map_pmap(map)) + 1) > lmemlim) + return (1); + + local_kva = kva_alloc(PAGE_SIZE); + if (local_kva == 0) + return (1); + local_uva = start_uva; + if (vm_map_find(map, NULL, 0, &local_uva, PAGE_SIZE, 0, + VMFS_ANY_SPACE, VM_PROT_READ, VM_PROT_READ, + MAP_INHERIT_NONE) != KERN_SUCCESS) { + kva_free(local_kva, PAGE_SIZE); + return (1); + } + + KASSERT(round_page(local_uva + PAGE_SIZE) == (local_uva + PAGE_SIZE), + ("vm_create_shchan: unexpected wrong page alignment")); + if (vm_map_wire(map, local_uva, local_uva + PAGE_SIZE, + VM_MAP_WIRE_USER | VM_MAP_WIRE_HOLESOK) != KERN_SUCCESS) { + kva_free(local_kva, PAGE_SIZE); + vm_destroy_shchan_uva(map, local_uva); + return (1); + } + + pa = pmap_extract(vm_map_pmap(map), local_uva); + if (pa == 0) + panic("vm_create_shchan: wired page but no valid mapping"); + m = PHYS_TO_VM_PAGE(pa); + pmap_qenter(local_kva, &m, 1); + + *kva = local_kva; + *uva = local_uva; + return (0); +#else + return (1); +#endif +} + +void +vm_destroy_shchan_nofreeuva(vm_map_t map, vm_offset_t kva, vm_offset_t uva) +{ +#ifdef VM_SHARED_CHANS + vm_page_t m; + + KASSERT(round_page(uva + PAGE_SIZE) == (uva + PAGE_SIZE), + ("vm_destroy_shchan_nofreeuva: unexpected wrong page alignment")); + + m = PHYS_TO_VM_PAGE(vtophys(kva)); + pmap_qremove(kva, 1); + + /* + * The backing page is not directly freed but it can be reclaimed + * as needed. + */ + if (vm_map_unwire(map, uva, uva + PAGE_SIZE, + VM_MAP_WIRE_USER | VM_MAP_WIRE_HOLESOK) != KERN_SUCCESS) + panic("vm_destroy_shchan_nofreeuva: impossible to unwire UVA"); + kva_free(kva, PAGE_SIZE); +#endif +} + +void +vm_destroy_shchan_uva(vm_map_t map, vm_offset_t uva) +{ + +#ifdef VM_SHARED_CHANS + if (vm_map_remove(map, uva, uva + PAGE_SIZE) != KERN_SUCCESS) + panic("vm_destroy_shchan: invalid return value"); +#endif +} + +void +vm_destroy_shchan(vm_map_t map, vm_offset_t kva, vm_offset_t uva) +{ + + vm_destroy_shchan_nofreeuva(map, kva, uva); + vm_destroy_shchan_uva(map, uva); +} + #ifndef NO_SWAPPING /* Index: sys/vm/vm_extern.h =================================================================== --- sys/vm/vm_extern.h (revision 264627) +++ sys/vm/vm_extern.h (working copy) @@ -72,6 +72,11 @@ void kmeminit(void); void swapout_procs(int); int kernacc(void *, int, int); int useracc(void *, int, int); +int vm_create_shchan(vm_map_t, rlim_t, vm_offset_t, vm_offset_t *, + vm_offset_t *); +void vm_destroy_shchan(vm_map_t, vm_offset_t, vm_offset_t); +void vm_destroy_shchan_nofreeuva(vm_map_t, vm_offset_t, vm_offset_t); +void vm_destroy_shchan_uva(vm_map_t, vm_offset_t); int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int); void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t, vm_ooffset_t *); Index: sys/sys/_umtx.h =================================================================== --- sys/sys/_umtx.h (revision 264627) +++ sys/sys/_umtx.h (working copy) @@ -34,7 +34,7 @@ #include struct umutex { - volatile __lwpid_t m_owner; /* Owner of the mutex */ + volatile __uintptr_t m_owner; /* Owner of the mutex */ __uint32_t m_flags; /* Flags of the mutex */ __uint32_t m_ceilings[2]; /* Priority protect ceiling */ __uint32_t m_spare[4]; Index: sys/sys/thr.h =================================================================== --- sys/sys/thr.h (revision 264627) +++ sys/sys/thr.h (working copy) @@ -55,6 +55,8 @@ struct thr_param { long *parent_tid; /* parent accesses the new TID here. */ int flags; /* thread flags. */ struct rtprio *rtp; /* Real-time scheduling priority */ + enum state_thread **child_chan; /* Shared chan access. */ + enum state_thread **parent_chan; /* Shared chan access for parent. */ void *spare[3]; /* TODO: cpu affinity mask etc. */ }; Index: sys/sys/umtx.h =================================================================== --- sys/sys/umtx.h (revision 264627) +++ sys/sys/umtx.h (working copy) @@ -35,7 +35,7 @@ #define USYNC_PROCESS_SHARED 0x0001 /* Process shared sync objs */ #define UMUTEX_UNOWNED 0x0 -#define UMUTEX_CONTESTED 0x80000000U +#define UMUTEX_CONTESTED 0x1 #define UMUTEX_PRIO_INHERIT 0x0004 /* Priority inherited mutex */ #define UMUTEX_PRIO_PROTECT 0x0008 /* Priority protect mutex */ @@ -89,7 +89,8 @@ #ifndef _KERNEL -int _umtx_op(void *obj, int op, u_long val, void *uaddr, void *uaddr2); +int _umtx_op(void *obj, int op, u_long val, void *uaddr, void *uaddr2, + uintptr_t owner); #else Index: sys/sys/proc.h =================================================================== --- sys/sys/proc.h (revision 264627) +++ sys/sys/proc.h (working copy) @@ -176,6 +176,20 @@ struct trapframe; struct turnstile; /* + * Allowed threads states. + * TDS_INVALID should not be used directly. It is used as a marker for + * "invalid state" purposes. + */ +enum state_thread { + TDS_INACTIVE = 0x0, + TDS_INHIBITED, + TDS_CAN_RUN, + TDS_RUNQ, + TDS_RUNNING, + TDS_INVALID +}; + +/* * XXX: Does this belong in resource.h or resourcevar.h instead? * Resource usage extension. The times in rusage structs in the kernel are * never up to date. The actual times are kept as runtimes and tick counts @@ -197,6 +211,18 @@ struct rusage_ext { }; /* + * Shared channels buckets. + * Implemented as a contiguous collection of KVA/UVA couplets from which + * shared channels are extracted and allocated. + */ +struct shchan { + SLIST_ENTRY(shchan) sh_iter; + enum state_thread *sh_kern; + enum state_thread *sh_user; + u_int sh_free_slots; +}; + +/* * Kernel runnable context (thread). * This is what is put to sleep and reactivated. * Thread context. Processes may have multiple threads. @@ -275,6 +301,7 @@ struct thread { u_int td_vp_reserv; /* (k) Count of reserved vnodes. */ int td_no_sleeping; /* (k) Sleeping disabled count. */ int td_dom_rr_idx; /* (k) RR Numa domain selection. */ + enum state_thread *td_sh_state; /* (t) Shared channel thread state. */ #define td_endzero td_sigmask /* Copied during fork1() or create_thread(). */ @@ -293,13 +320,8 @@ struct thread { * or already have been set in the allocator, constructor, etc. */ struct pcb *td_pcb; /* (k) Kernel VA of pcb and kstack. */ - enum { - TDS_INACTIVE = 0x0, - TDS_INHIBITED, - TDS_CAN_RUN, - TDS_RUNQ, - TDS_RUNNING - } td_state; /* (t) thread state */ + struct shchan *td_sh_chan; /* (t) Shared channel bucket. */ + enum state_thread td_state; /* (t) Thread state. */ union { register_t tdu_retval[2]; off_t tdu_off; @@ -457,12 +479,17 @@ do { \ #define TD_SET_INHIB(td, inhib) do { \ (td)->td_state = TDS_INHIBITED; \ (td)->td_inhibitors |= (inhib); \ + if ((td)->td_sh_state != NULL) \ + *(td)->td_sh_state = TDS_INHIBITED; \ } while (0) -#define TD_CLR_INHIB(td, inhib) do { \ - if (((td)->td_inhibitors & (inhib)) && \ - (((td)->td_inhibitors &= ~(inhib)) == 0)) \ - (td)->td_state = TDS_CAN_RUN; \ +#define TD_CLR_INHIB(td, inhib) do { \ + if (((td)->td_inhibitors & (inhib)) && \ + (((td)->td_inhibitors &= ~(inhib)) == 0)) { \ + (td)->td_state = TDS_CAN_RUN; \ + if ((td)->td_sh_state != NULL) \ + *td->td_sh_state = TDS_CAN_RUN; \ + } \ } while (0) #define TD_SET_SLEEPING(td) TD_SET_INHIB((td), TDI_SLEEPING) @@ -478,9 +505,21 @@ do { \ #define TD_CLR_SUSPENDED(td) TD_CLR_INHIB((td), TDI_SUSPENDED) #define TD_CLR_IWAIT(td) TD_CLR_INHIB((td), TDI_IWAIT) -#define TD_SET_RUNNING(td) (td)->td_state = TDS_RUNNING -#define TD_SET_RUNQ(td) (td)->td_state = TDS_RUNQ -#define TD_SET_CAN_RUN(td) (td)->td_state = TDS_CAN_RUN +#define TD_SET_RUNNING(td) do { \ + (td)->td_state = TDS_RUNNING; \ + if ((td)->td_sh_state != NULL) \ + *td->td_sh_state = TDS_RUNNING; \ +} while (0) +#define TD_SET_RUNQ(td) do { \ + (td)->td_state = TDS_RUNQ; \ + if ((td)->td_sh_state != NULL) \ + *td->td_sh_state = TDS_RUNQ; \ +} while (0) +#define TD_SET_CAN_RUN(td) do { \ + (td)->td_state = TDS_CAN_RUN; \ + if ((td)->td_sh_state != NULL) \ + *td->td_sh_state = TDS_CAN_RUN; \ +} while (0) /* * Process structure. @@ -488,6 +527,8 @@ do { \ struct proc { LIST_ENTRY(proc) p_list; /* (d) List of all processes. */ TAILQ_HEAD(, thread) p_threads; /* (c) all threads. */ + SLIST_HEAD(, shchan) p_shchans; /* (c) All shared channel buckets. */ + SLIST_HEAD(, shchan) p_shcasync; /* (c) Async freed shchans buckets. */ struct mtx p_slock; /* process spin lock */ struct ucred *p_ucred; /* (c) Process owner's identity. */ struct filedesc *p_fd; /* (b) Open files. */ @@ -884,6 +925,12 @@ int proc_getargv(struct thread *td, struct proc *p int proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb); int proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb); void procinit(void); +int proc_alloc_shchan(struct proc *p, struct shchan **retch, + enum state_thread **kent, enum state_thread **uent); +void proc_reap_shchans_uva(struct proc *p); +void proc_reclaim_shchans(struct proc *p); +void _proc_free_shchan(struct proc *p, struct shchan *chan, + enum state_thread *kent, boolean_t sync); void proc_linkup0(struct proc *p, struct thread *td); void proc_linkup(struct proc *p, struct thread *td); void proc_reap(struct thread *td, struct proc *p, int *status, int options); @@ -971,6 +1018,22 @@ curthread_pflags_restore(int save) curthread->td_pflags &= save; } +static __inline void +proc_free_shchan(struct proc *p, struct shchan *chan, + enum state_thread *kent) +{ + + _proc_free_shchan(p, chan, kent, TRUE); +} + +static __inline void +proc_free_shchan_async(struct proc *p, struct shchan *chan, + enum state_thread *kent) +{ + + _proc_free_shchan(p, chan, kent, FALSE); +} + #endif /* _KERNEL */ #endif /* !_SYS_PROC_H_ */ Index: sys/sys/sysproto.h =================================================================== --- sys/sys/sysproto.h (revision 264627) +++ sys/sys/sysproto.h (working copy) @@ -1374,6 +1374,7 @@ struct _umtx_op_args { char val_l_[PADL_(u_long)]; u_long val; char val_r_[PADR_(u_long)]; char uaddr1_l_[PADL_(void *)]; void * uaddr1; char uaddr1_r_[PADR_(void *)]; char uaddr2_l_[PADL_(void *)]; void * uaddr2; char uaddr2_r_[PADR_(void *)]; + char owner_l_[PADL_(uintptr_t)]; uintptr_t owner; char owner_r_[PADR_(uintptr_t)]; }; struct thr_new_args { char param_l_[PADL_(struct thr_param *)]; struct thr_param * param; char param_r_[PADR_(struct thr_param *)]; Index: sys/conf/options =================================================================== --- sys/conf/options (revision 264627) +++ sys/conf/options (working copy) @@ -593,6 +593,7 @@ VM_KMEM_SIZE_SCALE opt_vm.h VM_KMEM_SIZE_MAX opt_vm.h VM_NRESERVLEVEL opt_vm.h VM_LEVEL_0_ORDER opt_vm.h +VM_SHARED_CHANS opt_vm.h NO_SWAPPING opt_vm.h MALLOC_MAKE_FAILURES opt_vm.h MALLOC_PROFILE opt_vm.h Index: sys/kern/kern_thread.c =================================================================== --- sys/kern/kern_thread.c (revision 264627) +++ sys/kern/kern_thread.c (working copy) @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -58,12 +59,17 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include #include #include +#define PAGE_NUM_THRSTATE (PAGE_SIZE / sizeof (enum state_thread)) + SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE(proc, , , lwp__exit); +static MALLOC_DEFINE(M_SHCHAN, "shchan", "shared channels"); /* * thread related storage. @@ -242,6 +248,8 @@ void proc_linkup0(struct proc *p, struct thread *td) { TAILQ_INIT(&p->p_threads); /* all threads in proc */ + SLIST_INIT(&p->p_shchans); + SLIST_INIT(&p->p_shcasync); proc_linkup(p, td); } @@ -261,6 +269,203 @@ proc_linkup(struct proc *p, struct thread *td) } /* + * Alloc a shared channel linked to proc p. + * Returns the bucket from which the channel is allocated, the kernel + * address and the userland address related to the shared channel. + * In case of failure, a non-zero error code is returned. + */ +int +proc_alloc_shchan(struct proc *p, struct shchan **retch, + enum state_thread **kent, enum state_thread **uent) +{ + rlim_t lmemlim; + vm_offset_t start_uva; + enum state_thread *newkva, *newuva; + struct shchan *chan, *newchan; + u_int i; + + proc_reap_shchans_uva(p); + + PROC_LOCK(p); + SLIST_FOREACH(chan, &p->p_shchans, sh_iter) + if (chan->sh_free_slots != 0) + break; + if (chan == NULL) { + lmemlim = lim_cur(p, RLIMIT_MEMLOCK); + start_uva = round_page((vm_offset_t)p->p_vmspace->vm_daddr + + lim_max(p, RLIMIT_DATA)); + PROC_UNLOCK(p); + newchan = NULL; + + if (vm_create_shchan(&p->p_vmspace->vm_map, lmemlim, start_uva, + (vm_offset_t *)&newkva, (vm_offset_t *)&newuva)) + return (ENOMEM); + newchan = malloc(sizeof(*newchan), M_SHCHAN, M_WAITOK); + newchan->sh_kern = newkva; + newchan->sh_user = newuva; + newchan->sh_free_slots = PAGE_NUM_THRSTATE; + for (i = 0; i < PAGE_NUM_THRSTATE; i++) + newkva[i] = TDS_INVALID; + + PROC_LOCK(p); + SLIST_FOREACH(chan, &p->p_shchans, sh_iter) + if (chan->sh_free_slots != 0) + break; + if (chan != NULL) { + /* + * New space has been made available while allocating + * the new shared channel page. + * Free the newly created page and reclaim the + * just freed slot. + */ + vm_destroy_shchan_nofreeuva(&p->p_vmspace->vm_map, + (vm_offset_t)newchan->sh_kern, + (vm_offset_t)newchan->sh_user); + newchan->sh_kern = NULL; + SLIST_INSERT_HEAD(&p->p_shcasync, newchan, sh_iter); + } else { + SLIST_INSERT_HEAD(&p->p_shchans, newchan, sh_iter); + chan = newchan; + } + } + KASSERT(chan != NULL && chan->sh_free_slots != 0, + ("proc_alloc_shchan: invalid NULL shared channel")); + + for (i = 0; i < PAGE_NUM_THRSTATE; i++) { + if (chan->sh_kern[i] > TDS_INVALID) + panic("proc_alloc_shchan: invalid page %p content %p", + chan->sh_kern, &chan->sh_kern[i]); + if (chan->sh_kern[i] == TDS_INVALID) + break; + } + if (i == PAGE_NUM_THRSTATE) + panic("proc_alloc_shchan: no valid state found"); + + /* Use the same value as thread_ctor(). */ + chan->sh_kern[i] = TDS_INACTIVE; + chan->sh_free_slots--; + PROC_UNLOCK(p); + + /* There could have been the need for an async free due to races. */ + proc_reap_shchans_uva(p); + + *retch = chan; + *kent = chan->sh_kern + i; + *uent = chan->sh_user + i; + return (0); +} + +/* + * Reap all the UVA asynchronously freed from shared channels, related + * to a specific process p. + */ +void +proc_reap_shchans_uva(struct proc *p) +{ + struct shchan *chan; + + PROC_LOCK(p); + while (!SLIST_EMPTY(&p->p_shcasync)) { + chan = SLIST_FIRST(&p->p_shcasync); + SLIST_REMOVE_HEAD(&p->p_shcasync, sh_iter); + PROC_UNLOCK(p); + if (chan->sh_kern != NULL) + panic("proc_reap_shchans_uva: invalid shchan"); + vm_destroy_shchan_uva(&p->p_vmspace->vm_map, + (vm_offset_t)chan->sh_user); + free(chan, M_SHCHAN); + PROC_LOCK(p); + } + PROC_UNLOCK(p); +} + +/* + * Free all the shared channels related to a specific process p. + * It unlocks PROC_LOCK before to return. + */ +void +proc_reclaim_shchans(struct proc *p) +{ + SLIST_HEAD(, shchan) local_chans; + struct shchan *tmpchan; + + PROC_LOCK_ASSERT(p, MA_OWNED); + + SLIST_INIT(&local_chans); + + while (!SLIST_EMPTY(&p->p_shchans)) { + tmpchan = SLIST_FIRST(&p->p_shchans); + SLIST_REMOVE_HEAD(&p->p_shchans, sh_iter); + SLIST_INSERT_HEAD(&local_chans, tmpchan, sh_iter); + } + PROC_UNLOCK(p); + + while (!SLIST_EMPTY(&local_chans)) { + tmpchan = SLIST_FIRST(&local_chans); + SLIST_REMOVE_HEAD(&local_chans, sh_iter); + vm_destroy_shchan(&p->p_vmspace->vm_map, + (vm_offset_t)tmpchan->sh_kern, + (vm_offset_t)tmpchan->sh_user); + free(tmpchan, M_SHCHAN); + } + + /* + * As the proc lock can be dropped this seems also a good point + * for reaping UVA not freed yet. + */ + proc_reap_shchans_uva(p); +} + +/* + * Free a shared channel, related to a specific process p. + * In case of an asynchronous request, if needed, the UVA of the shared + * channel will not be immediately freed but moved to an asynchronous queue. + * It is responsibility of the caller to properly schedule later reaping. + * However, the backing page will be unwired right away as well as the + * KVA will be freed right away. + * It unlocks PROC_LOCK before to return if a synchronous request is + * performed, otherwise the PROC_LOCK is held for the whole duration. + */ +void +_proc_free_shchan(struct proc *p, struct shchan *chan, enum state_thread *kent, + boolean_t sync) +{ + ptrdiff_t i; + + PROC_LOCK_ASSERT(p, MA_OWNED); + + i = kent - chan->sh_kern; + KASSERT(kent == &chan->sh_kern[i] && chan->sh_kern[i] != TDS_INVALID, + ("proc_free_shchan: invalid index retrieval %jd", (intmax_t)i)); + + chan->sh_kern[i] = TDS_INVALID; + chan->sh_free_slots++; + if (chan->sh_free_slots < PAGE_NUM_THRSTATE) { + if (sync == TRUE) + PROC_UNLOCK(p); + return; + } + KASSERT(chan->sh_free_slots == PAGE_NUM_THRSTATE, + ("proc_free_shchan: invalid number of free slots")); + + SLIST_REMOVE(&p->p_shchans, chan, shchan, sh_iter); + + if (sync == FALSE) { + vm_destroy_shchan_nofreeuva(&p->p_vmspace->vm_map, + (vm_offset_t)chan->sh_kern, + (vm_offset_t)chan->sh_user); + chan->sh_kern = NULL; + SLIST_INSERT_HEAD(&p->p_shcasync, chan, sh_iter); + return; + } + PROC_UNLOCK(p); + + vm_destroy_shchan(&p->p_vmspace->vm_map, + (vm_offset_t)chan->sh_kern, (vm_offset_t)chan->sh_user); + free(chan, M_SHCHAN); +} + +/* * Initialize global thread allocation resources. */ void @@ -810,6 +1015,12 @@ thread_suspend_check(int return_instead) * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. */ if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { + /* + * The shared channels should be teared down now. + * However it is responsibility of the thread + * requesting single-threading to do so when it is + * actually safe. + */ PROC_UNLOCK(p); tidhash_remove(td); PROC_LOCK(p); Index: sys/kern/kern_umtx.c =================================================================== --- sys/kern/kern_umtx.c (revision 264627) +++ sys/kern/kern_umtx.c (working copy) @@ -70,6 +70,25 @@ __FBSDID("$FreeBSD$"); (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) #endif +#ifdef __LP64__ +CTASSERT(sizeof(long) == sizeof(uintptr_t)); +#define umtx_fuword fuword +#define umtx_casuword casuword +#define umtx_suword suword +#else +CTASSERT(sizeof(uint32_t) == sizeof(uintptr_t)); +#define umtx_fuword fuword32 +#define umtx_casuword casuword32 +#define umtx_suword suword32 +#endif + +#define UMTX_OWNER_ULOAD(m) \ + umtx_fuword(__DEVOLATILE(uintptr_t *, &(m)->m_owner)) +#define UMTX_OWNER_USTORE(m, curowner, newowner) \ + umtx_casuword(&(m)->m_owner, (curowner), (newowner)) +#define UMTX_OWNER_UDEFSTORE(m, newowner) \ + umtx_suword(__DEVOLATILE(uintptr_t *, &(m)->m_owner), (newowner)) + /* Priority inheritance mutex info. */ struct umtx_pi { /* Owner thread */ @@ -227,7 +246,8 @@ static int umtxq_sleep(struct umtx_q *uq, const ch static int umtxq_count(struct umtx_key *key); static struct umtx_pi *umtx_pi_alloc(int); static void umtx_pi_free(struct umtx_pi *pi); -static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); +static int do_unlock_pp(struct thread *td, struct umutex *m, + uintptr_t curowner, uint32_t flags); static void umtx_thread_cleanup(struct thread *td); static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, struct image_params *imgp __unused); @@ -902,15 +922,14 @@ kern_umtx_wake(struct thread *td, void *uaddr, int * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. */ static int -do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, - struct _umtx_time *timeout, int mode) +do_lock_normal(struct thread *td, struct umutex *m, uintptr_t newowner, + uint32_t flags, struct _umtx_time *timeout, int mode) { struct abs_timeout timo; struct umtx_q *uq; - uint32_t owner, old, id; + uintptr_t owner, oldowner; int error = 0; - id = td->td_tid; uq = td->td_umtxq; if (timeout != NULL) @@ -921,7 +940,7 @@ static int * can fault on any access. */ for (;;) { - owner = fuword32(__DEVOLATILE(void *, &m->m_owner)); + owner = UMTX_OWNER_ULOAD(m); if (mode == _UMUTEX_WAIT) { if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) return (0); @@ -929,7 +948,7 @@ static int /* * Try the uncontested case. This should be done in userland. */ - owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); + owner = UMTX_OWNER_USTORE(m, UMUTEX_UNOWNED, newowner); /* The acquire succeeded. */ if (owner == UMUTEX_UNOWNED) @@ -941,8 +960,9 @@ static int /* If no one owns it but it is contested try to acquire it. */ if (owner == UMUTEX_CONTESTED) { - owner = casuword32(&m->m_owner, - UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); + owner = UMTX_OWNER_USTORE(m, + UMUTEX_CONTESTED, + newowner | UMUTEX_CONTESTED); if (owner == UMUTEX_CONTESTED) return (0); @@ -985,10 +1005,11 @@ static int * either some one else has acquired the lock or it has been * released. */ - old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); + oldowner = UMTX_OWNER_USTORE(m, owner, + owner | UMUTEX_CONTESTED); /* The address was invalid. */ - if (old == -1) { + if (oldowner == -1) { umtxq_lock(&uq->uq_key); umtxq_remove(uq); umtxq_unbusy(&uq->uq_key); @@ -1004,7 +1025,7 @@ static int */ umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); - if (old == owner) + if (oldowner == owner) error = umtxq_sleep(uq, "umtxn", timeout == NULL ? NULL : &timo); umtxq_remove(uq); @@ -1022,31 +1043,31 @@ static int * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. */ static int -do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) +do_unlock_normal(struct thread *td, struct umutex *m, uintptr_t curowner, + uint32_t flags) { struct umtx_key key; - uint32_t owner, old, id; + uintptr_t owner, oldowner; int error; int count; - id = td->td_tid; /* * Make sure we own this mtx. */ - owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); + owner = UMTX_OWNER_ULOAD(m); if (owner == -1) return (EFAULT); - if ((owner & ~UMUTEX_CONTESTED) != id) + if ((owner & ~UMUTEX_CONTESTED) != curowner) return (EPERM); if ((owner & UMUTEX_CONTESTED) == 0) { - old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); - if (old == -1) + oldowner = UMTX_OWNER_USTORE(m, owner, UMUTEX_UNOWNED); + if (oldowner == -1) return (EFAULT); - if (old == owner) + if (oldowner == owner) return (0); - owner = old; + owner = oldowner; } /* We should only ever be in here for contested locks */ @@ -1064,16 +1085,16 @@ static int * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ - old = casuword32(&m->m_owner, owner, + oldowner = UMTX_OWNER_USTORE(m, owner, count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); umtxq_lock(&key); umtxq_signal(&key,1); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); - if (old == -1) + if (oldowner == -1) return (EFAULT); - if (old != owner) + if (oldowner != owner) return (EINVAL); return (0); } @@ -1086,12 +1107,12 @@ static int do_wake_umutex(struct thread *td, struct umutex *m) { struct umtx_key key; - uint32_t owner; + uintptr_t owner; uint32_t flags; int error; int count; - owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); + owner = UMTX_OWNER_ULOAD(m); if (owner == -1) return (EFAULT); @@ -1111,7 +1132,7 @@ do_wake_umutex(struct thread *td, struct umutex *m umtxq_unlock(&key); if (count <= 1) - owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED); + owner = UMTX_OWNER_USTORE(m, UMUTEX_CONTESTED, UMUTEX_UNOWNED); umtxq_lock(&key); if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) @@ -1129,7 +1150,7 @@ static int do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) { struct umtx_key key; - uint32_t owner, old; + uintptr_t owner, oldowner; int type; int error; int count; @@ -1162,29 +1183,29 @@ do_wake2_umutex(struct thread *td, struct umutex * * any memory. */ if (count > 1) { - owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); + owner = UMTX_OWNER_ULOAD(m); while ((owner & UMUTEX_CONTESTED) ==0) { - old = casuword32(&m->m_owner, owner, - owner|UMUTEX_CONTESTED); - if (old == owner) + oldowner = UMTX_OWNER_USTORE(m, owner, + owner | UMUTEX_CONTESTED); + if (oldowner == owner) break; - owner = old; - if (old == -1) + owner = oldowner; + if (oldowner == -1) break; error = umtxq_check_susp(td); if (error != 0) break; } } else if (count == 1) { - owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); + owner = UMTX_OWNER_ULOAD(m); while ((owner & ~UMUTEX_CONTESTED) != 0 && (owner & UMUTEX_CONTESTED) == 0) { - old = casuword32(&m->m_owner, owner, - owner|UMUTEX_CONTESTED); - if (old == owner) + oldowner = UMTX_OWNER_USTORE(m, owner, + owner | UMUTEX_CONTESTED); + if (oldowner == owner) break; - owner = old; - if (old == -1) + owner = oldowner; + if (oldowner == -1) break; error = umtxq_check_susp(td); if (error != 0) @@ -1569,16 +1590,15 @@ umtx_pi_insert(struct umtx_pi *pi) * Lock a PI mutex. */ static int -do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, - struct _umtx_time *timeout, int try) +do_lock_pi(struct thread *td, struct umutex *m, uintptr_t newowner, + uint32_t flags, struct _umtx_time *timeout, int try) { struct abs_timeout timo; struct umtx_q *uq; struct umtx_pi *pi, *new_pi; - uint32_t id, owner, old; + uintptr_t owner, oldowner; int error; - id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), @@ -1619,7 +1639,7 @@ static int /* * Try the uncontested case. This should be done in userland. */ - owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); + owner = UMTX_OWNER_USTORE(m, UMUTEX_UNOWNED, newowner); /* The acquire succeeded. */ if (owner == UMUTEX_UNOWNED) { @@ -1635,8 +1655,8 @@ static int /* If no one owns it but it is contested try to acquire it. */ if (owner == UMUTEX_CONTESTED) { - owner = casuword32(&m->m_owner, - UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); + owner = UMTX_OWNER_USTORE(m, UMUTEX_CONTESTED, + newowner | UMUTEX_CONTESTED); if (owner == UMUTEX_CONTESTED) { umtxq_lock(&uq->uq_key); @@ -1683,10 +1703,11 @@ static int * either some one else has acquired the lock or it has been * released. */ - old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); + oldowner = UMTX_OWNER_USTORE(m, owner, + owner | UMUTEX_CONTESTED); /* The address was invalid. */ - if (old == -1) { + if (oldowner == -1) { umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); @@ -1700,7 +1721,7 @@ static int * and we need to retry or we lost a race to the thread * unlocking the umtx. */ - if (old == owner) + if (oldowner == owner) error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, "umtxpi", timeout == NULL ? NULL : &timo); else { @@ -1725,35 +1746,35 @@ static int * Unlock a PI mutex. */ static int -do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) +do_unlock_pi(struct thread *td, struct umutex *m, uintptr_t curowner, + uint32_t flags) { struct umtx_key key; struct umtx_q *uq_first, *uq_first2, *uq_me; struct umtx_pi *pi, *pi2; - uint32_t owner, old, id; + uintptr_t owner, oldowner; int error; int count; int pri; - id = td->td_tid; /* * Make sure we own this mtx. */ - owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); + owner = UMTX_OWNER_ULOAD(m); if (owner == -1) return (EFAULT); - if ((owner & ~UMUTEX_CONTESTED) != id) + if ((owner & ~UMUTEX_CONTESTED) != curowner) return (EPERM); /* This should be done in userland */ if ((owner & UMUTEX_CONTESTED) == 0) { - old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); - if (old == -1) + oldowner = UMTX_OWNER_USTORE(m, owner, UMUTEX_UNOWNED); + if (oldowner == -1) return (EFAULT); - if (old == owner) + if (oldowner == owner) return (0); - owner = old; + owner = oldowner; } /* We should only ever be in here for contested locks */ @@ -1807,16 +1828,16 @@ static int * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ - old = casuword32(&m->m_owner, owner, - count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); + oldowner = UMTX_OWNER_USTORE(m, owner, + count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); umtxq_lock(&key); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); - if (old == -1) + if (oldowner == -1) return (EFAULT); - if (old != owner) + if (oldowner != owner) return (EINVAL); return (0); } @@ -1825,17 +1846,16 @@ static int * Lock a PP mutex. */ static int -do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, - struct _umtx_time *timeout, int try) +do_lock_pp(struct thread *td, struct umutex *m, uintptr_t newowner, + uint32_t flags, struct _umtx_time *timeout, int try) { struct abs_timeout timo; struct umtx_q *uq, *uq2; struct umtx_pi *pi; + uintptr_t owner; uint32_t ceiling; - uint32_t owner, id; int error, pri, old_inherited_pri, su; - id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) @@ -1872,8 +1892,8 @@ static int } mtx_unlock_spin(&umtx_lock); - owner = casuword32(&m->m_owner, - UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); + owner = UMTX_OWNER_USTORE(m, UMUTEX_CONTESTED, + newowner | UMUTEX_CONTESTED); if (owner == UMUTEX_CONTESTED) { error = 0; @@ -1955,27 +1975,27 @@ out: * Unlock a PP mutex. */ static int -do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) +do_unlock_pp(struct thread *td, struct umutex *m, uintptr_t curowner, + uint32_t flags) { struct umtx_key key; struct umtx_q *uq, *uq2; struct umtx_pi *pi; - uint32_t owner, id; + uintptr_t owner; uint32_t rceiling; int error, pri, new_inherited_pri, su; - id = td->td_tid; uq = td->td_umtxq; su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); /* * Make sure we own this mtx. */ - owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); + owner = UMTX_OWNER_ULOAD(m); if (owner == -1) return (EFAULT); - if ((owner & ~UMUTEX_CONTESTED) != id) + if ((owner & ~UMUTEX_CONTESTED) != curowner) return (EPERM); error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); @@ -2003,8 +2023,7 @@ static int * to lock the mutex, it is necessary because thread priority * has to be adjusted for such mutex. */ - error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner), - UMUTEX_CONTESTED); + error = UMTX_OWNER_UDEFSTORE(m, UMUTEX_CONTESTED); umtxq_lock(&key); if (error == 0) @@ -2038,12 +2057,12 @@ static int } static int -do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, - uint32_t *old_ceiling) +do_set_ceiling(struct thread *td, struct umutex *m, uintptr_t newowner, + uint32_t ceiling, uint32_t *old_ceiling) { struct umtx_q *uq; + uintptr_t owner; uint32_t save_ceiling; - uint32_t owner, id; uint32_t flags; int error; @@ -2052,7 +2071,6 @@ static int return (EINVAL); if (ceiling > RTP_PRIO_MAX) return (EINVAL); - id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) @@ -2064,13 +2082,12 @@ static int save_ceiling = fuword32(&m->m_ceilings[0]); - owner = casuword32(&m->m_owner, - UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); + owner = UMTX_OWNER_USTORE(m, UMUTEX_CONTESTED, + newowner | UMUTEX_CONTESTED); if (owner == UMUTEX_CONTESTED) { suword32(&m->m_ceilings[0], ceiling); - suword32(__DEVOLATILE(uint32_t *, &m->m_owner), - UMUTEX_CONTESTED); + UMTX_OWNER_UDEFSTORE(m, UMUTEX_CONTESTED); error = 0; break; } @@ -2081,7 +2098,7 @@ static int break; } - if ((owner & ~UMUTEX_CONTESTED) == id) { + if ((owner & ~UMUTEX_CONTESTED) == newowner) { suword32(&m->m_ceilings[0], ceiling); error = 0; break; @@ -2121,25 +2138,28 @@ static int * Lock a userland POSIX mutex. */ static int -do_lock_umutex(struct thread *td, struct umutex *m, +do_lock_umutex(struct thread *td, struct umutex *m, uintptr_t newowner, struct _umtx_time *timeout, int mode) { uint32_t flags; int error; + if (newowner == UMUTEX_UNOWNED) + return (EINVAL); + flags = fuword32(&m->m_flags); if (flags == -1) return (EFAULT); switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { case 0: - error = do_lock_normal(td, m, flags, timeout, mode); + error = do_lock_normal(td, m, newowner, flags, timeout, mode); break; case UMUTEX_PRIO_INHERIT: - error = do_lock_pi(td, m, flags, timeout, mode); + error = do_lock_pi(td, m, newowner, flags, timeout, mode); break; case UMUTEX_PRIO_PROTECT: - error = do_lock_pp(td, m, flags, timeout, mode); + error = do_lock_pp(td, m, newowner, flags, timeout, mode); break; default: return (EINVAL); @@ -2159,21 +2179,24 @@ static int * Unlock a userland POSIX mutex. */ static int -do_unlock_umutex(struct thread *td, struct umutex *m) +do_unlock_umutex(struct thread *td, struct umutex *m, uintptr_t curowner) { uint32_t flags; + if (curowner == UMUTEX_UNOWNED) + return (EINVAL); + flags = fuword32(&m->m_flags); if (flags == -1) return (EFAULT); switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { case 0: - return (do_unlock_normal(td, m, flags)); + return (do_unlock_normal(td, m, curowner, flags)); case UMUTEX_PRIO_INHERIT: - return (do_unlock_pi(td, m, flags)); + return (do_unlock_pi(td, m, curowner, flags)); case UMUTEX_PRIO_PROTECT: - return (do_unlock_pp(td, m, flags)); + return (do_unlock_pp(td, m, curowner, flags)); } return (EINVAL); @@ -2181,7 +2204,7 @@ static int static int do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, - struct timespec *timeout, u_long wflags) + uintptr_t curowner, struct timespec *timeout, u_long wflags) { struct abs_timeout timo; struct umtx_q *uq; @@ -2222,7 +2245,7 @@ do_cv_wait(struct thread *td, struct ucond *cv, st umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); - error = do_unlock_umutex(td, m); + error = do_unlock_umutex(td, m, curowner); if (timeout != NULL) abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0), @@ -2943,13 +2966,13 @@ __umtx_op_lock_umutex(struct thread *td, struct _u return (error); tm_p = &timeout; } - return do_lock_umutex(td, uap->obj, tm_p, 0); + return do_lock_umutex(td, uap->obj, uap->owner, tm_p, 0); } static int __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) { - return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); + return do_lock_umutex(td, uap->obj, uap->owner, NULL, _UMUTEX_TRY); } static int @@ -2968,7 +2991,7 @@ __umtx_op_wait_umutex(struct thread *td, struct _u return (error); tm_p = &timeout; } - return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); + return do_lock_umutex(td, uap->obj, uap->owner, tm_p, _UMUTEX_WAIT); } static int @@ -2980,13 +3003,13 @@ __umtx_op_wake_umutex(struct thread *td, struct _u static int __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) { - return do_unlock_umutex(td, uap->obj); + return do_unlock_umutex(td, uap->obj, uap->owner); } static int __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) { - return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); + return do_set_ceiling(td, uap->obj, uap->owner, uap->val, uap->uaddr1); } static int @@ -3004,7 +3027,8 @@ __umtx_op_cv_wait(struct thread *td, struct _umtx_ return (error); ts = &timeout; } - return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); + return (do_cv_wait(td, uap->obj, uap->uaddr1, uap->owner, ts, + uap->val)); } static int @@ -3222,7 +3246,7 @@ __umtx_op_lock_umutex_compat32(struct thread *td, return (error); tm_p = &timeout; } - return do_lock_umutex(td, uap->obj, tm_p, 0); + return do_lock_umutex(td, uap->obj, uap->owner, tm_p, 0); } static int @@ -3241,7 +3265,7 @@ __umtx_op_wait_umutex_compat32(struct thread *td, return (error); tm_p = &timeout; } - return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); + return do_lock_umutex(td, uap->obj, uap->owner, tm_p, _UMUTEX_WAIT); } static int @@ -3259,7 +3283,8 @@ __umtx_op_cv_wait_compat32(struct thread *td, stru return (error); ts = &timeout; } - return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); + return (do_cv_wait(td, uap->obj, uap->uaddr1, uap->owner, ts, + uap->val)); } static int Index: sys/kern/kern_thr.c =================================================================== --- sys/kern/kern_thr.c (revision 264627) +++ sys/kern/kern_thr.c (working copy) @@ -92,7 +92,9 @@ static int create_thread(struct thread *td, mconte char *stack_base, size_t stack_size, char *tls_base, long *child_tid, long *parent_tid, - int flags, struct rtprio *rtp); + int flags, struct rtprio *rtp, + enum state_thread **child_chan, + enum state_thread **parent_chan); /* * System call interface. @@ -108,7 +110,7 @@ sys_thr_create(struct thread *td, struct thr_creat return (error); error = create_thread(td, &ctx.uc_mcontext, NULL, NULL, - NULL, 0, NULL, uap->id, NULL, uap->flags, NULL); + NULL, 0, NULL, uap->id, NULL, uap->flags, NULL, NULL, NULL); return (error); } @@ -143,7 +145,7 @@ kern_thr_new(struct thread *td, struct thr_param * error = create_thread(td, NULL, param->start_func, param->arg, param->stack_base, param->stack_size, param->tls_base, param->child_tid, param->parent_tid, param->flags, - rtpp); + rtpp, param->child_chan, param->parent_chan); return (error); } @@ -153,12 +155,15 @@ create_thread(struct thread *td, mcontext_t *ctx, char *stack_base, size_t stack_size, char *tls_base, long *child_tid, long *parent_tid, - int flags, struct rtprio *rtp) + int flags, struct rtprio *rtp, enum state_thread **child_chan, + enum state_thread **parent_chan) { stack_t stack; struct thread *newtd; + struct shchan *local_shchan; + enum state_thread *kern_shchanp, *user_shchanp; struct proc *p; - int error; + int error, numthreads, ret_pchan; p = td->td_proc; @@ -250,7 +255,62 @@ create_thread(struct thread *td, mcontext_t *ctx, } } + if (child_chan != NULL && proc_alloc_shchan(p, &local_shchan, + &kern_shchanp, &user_shchanp) == 0) { + /* Lockless, the thread is not linked anywhere. */ + newtd->td_sh_state = kern_shchanp; + newtd->td_sh_chan = local_shchan; + if (copyout(child_chan, &user_shchanp, + sizeof(enum state_thread *)) != 0) { + PROC_LOCK(p); + proc_free_shchan(p, local_shchan, kern_shchanp); + newtd->td_sh_state = NULL; + newtd->td_sh_chan = NULL; + } + } + + /* + * If there is just one single thread it means that no other + * threads can be added in the meanwhile, as curthread is dealing + * with current thr_new(). + * There is no race, then about allocating also a shared channel + * for the single curthread. + * It is only important to care about the race where a + * multi-threaded process is made single-thread while PROC_LOCK() + * is dropped. + */ + ret_pchan = ENOMEM; PROC_LOCK(td->td_proc); + do { + numthreads = td->td_proc->p_numthreads; + PROC_UNLOCK(td->td_proc); + + if (parent_chan != NULL && numthreads == 1) { + ret_pchan = proc_alloc_shchan(p, &local_shchan, + &kern_shchanp, &user_shchanp); + if (ret_pchan == 0) { + /* + * Lock for consistency as, right now, + * the process is still single-threaded + * and the only thread is executing + * sys_thr_new(). + */ + thread_lock(td); + if (td->td_sh_state != NULL || + td->td_sh_chan != NULL) + panic("thr_new: inconsistent state"); + *kern_shchanp = TDS_RUNNING; + td->td_sh_state = kern_shchanp; + td->td_sh_chan = local_shchan; + thread_unlock(td); + } + } + + PROC_LOCK(td->td_proc); + if (numthreads == 1 && td->td_proc->p_numthreads > 1) + panic("sys_thr_new: unexpected threading of curproc"); + } while (numthreads > 1 && td->td_proc->p_numthreads == 1); + td->td_proc->p_flag |= P_HADTHREADS; thread_link(newtd, p); bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name)); @@ -262,6 +322,19 @@ create_thread(struct thread *td, mcontext_t *ctx, newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK; PROC_UNLOCK(p); + if (parent_chan != NULL && ret_pchan == 0 && copyout(parent_chan, + &user_shchanp, sizeof(enum state_thread *)) != 0) { + /* See locking comment above. */ + thread_lock(td); + kern_shchanp = td->td_sh_state; + local_shchan = td->td_sh_chan; + td->td_sh_state = NULL; + td->td_sh_chan = NULL; + thread_unlock(td); + PROC_LOCK(p); + proc_free_shchan(p, local_shchan, kern_shchanp); + } + tidhash_add(newtd); thread_lock(newtd); @@ -304,6 +377,9 @@ sys_thr_exit(struct thread *td, struct thr_exit_ar /* long *state */ { struct proc *p; + struct thread *td2; + struct shchan *tmpchan; + enum state_thread *tmpstate; p = td->td_proc; @@ -326,11 +402,48 @@ sys_thr_exit(struct thread *td, struct thr_exit_ar LIST_REMOVE(td, td_hash); rw_wunlock(&tidhash_lock); tdsigcleanup(td); + thread_lock(td); + if (td->td_sh_state != NULL) { + KASSERT(td->td_sh_chan != NULL, + ("sys_thr_exit: invalid td_sh_chan")); + tmpchan = td->td_sh_chan; + tmpstate = td->td_sh_state; + td->td_sh_state = NULL; + td->td_sh_chan = NULL; + thread_unlock(td); + proc_free_shchan_async(p, tmpchan, tmpstate); + } else + thread_unlock(td); + + /* + * In case the process is going to be single-threaded after + * this thr_exit(), free also the remaining thread shared + * channel. + */ + if (p->p_numthreads == 2) { + td2 = TAILQ_FIRST(&p->p_threads); + if (td2 == td) + td2 = TAILQ_NEXT(td2, td_plist); + thread_lock(td2); + if (td2->td_sh_state != NULL) { + KASSERT(td2->td_sh_chan != NULL, + ("sys_thr_exit: invalid td_sh_chan")); + tmpchan = td2->td_sh_chan; + tmpstate = td2->td_sh_state; + td2->td_sh_state = NULL; + td2->td_sh_chan = NULL; + thread_unlock(td2); + proc_free_shchan_async(p, tmpchan, tmpstate); + } else + thread_unlock(td2); + } PROC_SLOCK(p); thread_stopped(p); thread_exit(); /* NOTREACHED */ } + KASSERT(SLIST_EMPTY(&p->p_shchans), + ("sys_thr_exit: shared channels present with single thread")); PROC_UNLOCK(p); rw_wunlock(&tidhash_lock); return (0); Index: sys/kern/kern_proc.c =================================================================== --- sys/kern/kern_proc.c (revision 264627) +++ sys/kern/kern_proc.c (working copy) @@ -227,6 +227,8 @@ proc_init(void *mem, int size, int flags) cv_init(&p->p_pwait, "ppwait"); cv_init(&p->p_dbgwait, "dbgwait"); TAILQ_INIT(&p->p_threads); /* all threads in proc */ + SLIST_INIT(&p->p_shchans); + SLIST_INIT(&p->p_shcasync); EVENTHANDLER_INVOKE(process_init, p); p->p_stats = pstats_alloc(); SDT_PROBE(proc, kernel, init, return, p, size, flags, 0, 0); Index: sys/kern/kern_exit.c =================================================================== --- sys/kern/kern_exit.c (revision 264627) +++ sys/kern/kern_exit.c (working copy) @@ -219,7 +219,13 @@ exit1(struct thread *td, int rv) msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0); p->p_xstat = rv; /* Let event handler change exit status */ - PROC_UNLOCK(p); + + /* + * As long as the process is single-threaded now, reclaim all the + * shared channels. + */ + proc_reclaim_shchans(p); + /* Drain the limit callout while we don't have the proc locked */ callout_drain(&p->p_limco); @@ -855,6 +861,8 @@ proc_reap(struct thread *td, struct proc *p, int * #endif KASSERT(FIRST_THREAD_IN_PROC(p), ("proc_reap: no residual thread!")); + KASSERT(SLIST_EMPTY(&p->p_shchans), + ("proc_reap: shared channels present when destroying proc")); uma_zfree(proc_zone, p); sx_xlock(&allproc_lock); nprocs--; Index: sys/kern/kern_exec.c =================================================================== --- sys/kern/kern_exec.c (revision 264627) +++ sys/kern/kern_exec.c (working copy) @@ -306,11 +306,13 @@ kern_execve(td, args, mac_p) * If success, we upgrade to SINGLE_EXIT state to * force other threads to suicide. */ - if (error == 0) + if (error == 0) { thread_single(SINGLE_EXIT); - else + proc_reclaim_shchans(p); + } else { thread_single_end(); - PROC_UNLOCK(p); + PROC_UNLOCK(p); + } } return (error); Index: sys/kern/kern_fork.c =================================================================== --- sys/kern/kern_fork.c (revision 264627) +++ sys/kern/kern_fork.c (working copy) @@ -926,6 +926,8 @@ fail: #endif racct_proc_exit(newproc); fail1: + KASSERT(SLIST_EMPTY(&newproc->p_shchans), + ("fork1: shared channels present when destroying proc")); if (vm2 != NULL) vmspace_free(vm2); uma_zfree(proc_zone, newproc);