Index: sys/kern/kern_exit.c =================================================================== --- sys/kern/kern_exit.c (revision 272596) +++ sys/kern/kern_exit.c (working copy) @@ -160,7 +160,8 @@ void exit1(struct thread *td, int rv) { - struct proc *p, *nq, *q, *t; + struct proc *p, *nq, *q, *t, *reproc; + struct sysreaper *reap; struct thread *tdt; struct vnode *ttyvp = NULL; @@ -443,15 +444,27 @@ WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid); + sx_xlock(&proctree_lock); /* - * Reparent all children processes: - * - traced ones to the original parent (or init if we are that parent) - * - the rest to init + * release controlled reaper for exit if we own it and return the + * remaining reaper (the one for us), which we will drop after we + * are done. */ - sx_xlock(&proctree_lock); + reap = reaper_exit(p); + + /* + * Reparent all of this process's children to the init process or + * to the designated reaper. We must hold the reaper's p_token in + * order to safely mess with p_children. + * + * We already hold p->p_token (to remove the children from our list). + */ + reproc = NULL; q = LIST_FIRST(&p->p_children); - if (q != NULL) /* only need this if any child is S_ZOMB */ - wakeup(initproc); + if (q != NULL) { /* only need this if any child is S_ZOMB */ + reproc = reaper_get(reap); + wakeup(reproc); + } for (; q != NULL; q = nq) { nq = LIST_NEXT(q, p_sibling); PROC_LOCK(q); @@ -458,7 +471,7 @@ q->p_sigparent = SIGCHLD; if (!(q->p_flag & P_TRACED)) { - proc_reparent(q, initproc); + proc_reparent(q, reproc); } else { /* * Traced processes are killed since their existence @@ -466,7 +479,7 @@ */ t = proc_realparent(q); if (t == p) { - proc_reparent(q, initproc); + proc_reparent(q, reproc); } else { PROC_LOCK(t); proc_reparent(q, t); @@ -543,8 +556,10 @@ /* * Notify parent that we're gone. If parent has the * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN, - * notify process 1 instead (and hope it will handle this - * situation). + * notify the reaper process instead (it will handle + * this situation). + * + * NOTE: The reaper can still be the parent process. */ PROC_LOCK(p->p_pptr); mtx_lock(&p->p_pptr->p_sigacts->ps_mtx); @@ -555,7 +570,9 @@ mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); pp = p->p_pptr; PROC_UNLOCK(pp); - proc_reparent(p, initproc); + if (reproc == NULL) + reproc = reaper_get(reap); + proc_reparent(p, reproc); p->p_sigparent = SIGCHLD; PROC_LOCK(p->p_pptr); @@ -568,6 +585,10 @@ } else mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); + /* + * Signal (possibly new) parent. + * XXX I MAY MISS SOMETHING HERE + */ if (p->p_pptr == initproc) kern_psignal(p->p_pptr, SIGCHLD); else if (p->p_sigparent != 0) { @@ -578,6 +599,11 @@ } } else PROC_LOCK(p->p_pptr); + if (reproc) + PRELE(reproc); + if (reap) + reaper_drop(reap); + sx_xunlock(&proctree_lock); /* Index: sys/kern/kern_fork.c =================================================================== --- sys/kern/kern_fork.c (revision 272596) +++ sys/kern/kern_fork.c (working copy) @@ -382,6 +382,14 @@ p2->p_state = PRS_NEW; /* protect against others */ p2->p_pid = trypid; + + /* + * NOTE: Process 0 will not have a reaper, but process 1 (init) and + * all other processes always will. + */ + if ((p2->p_reaper = p1->p_reaper) != NULL) + reaper_hold(p2->p_reaper); + AUDIT_ARG_PID(p2->p_pid); LIST_INSERT_HEAD(&allproc, p2, p_list); LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); @@ -1038,3 +1046,169 @@ ktrsysret(SYS_fork, 0, 0); #endif } + +/* + * Bump ref on reaper, preventing destruction + */ +void +reaper_hold(struct sysreaper *reap) +{ + KASSERT(reap->refs > 0, ("Empty refs count in reaper_hold")); + refcount_acquire(&reap->refs); +} + +/* + * Drop ref on reaper, destroy the structure on the 1->0 + * transition and loop on the parent. + */ +void +reaper_drop(struct sysreaper *next) +{ + struct sysreaper *reap; + + while ((reap = next) != NULL) { + next = NULL; + if (refcount_release(&reap->refs)) { + next = reap->parent; + KASSERT(reap->p == NULL, + ("Null process in the reaper")); + reap->parent = NULL; + free(reap, M_REAPER); + } + } +} + +/* + * Initialize a static or newly allocated reaper structure + */ +void +reaper_init(struct proc *p, struct sysreaper *reap) +{ + reap->parent = p->p_reaper; + reap->p = p; + reap->flags = REAPER_STAT_OWNED; + reap->refs = 1; + if (p == initproc) { + reap->flags |= REAPER_STAT_REALINIT; + reap->refs = 2; + } + rw_init(&reap->lock, "subrp"); + p->p_reaper = reap; +} + +/* + * Called with p->p_token held during exit. + * + * This is a bit simpler than RELEASE because there are no threads remaining + * to race. We only release if we own the reaper, the exit code will handle + * the final p_reaper release. + */ +struct sysreaper * +reaper_exit(struct proc *p) +{ + struct sysreaper *reap; + + /* + * Release acquired reaper + */ + if ((reap = p->p_reaper) != NULL && reap->p == p) { + + rw_wlock(&reap->lock); + p->p_reaper = reap->parent; + if (p->p_reaper) + reaper_hold(p->p_reaper); + reap->p = NULL; + rw_wunlock(&reap->lock); + reaper_drop(reap); + } + + /* + * Return and clear reaper (caller is holding p_token for us) + * (reap->p does not equal p). Caller must drop it. + */ + if ((reap = p->p_reaper) != NULL) { + p->p_reaper = NULL; + } + return (reap); +} + +/* + * Return a held (PHOLD) process representing the reaper for process (p). + * NULL should not normally be returned. Caller should PRELE() the returned + * reaper process when finished. + * + * Remove dead internal nodes while we are at it. + * + * Process (p)'s token must be held on call. + * The returned process's token is NOT acquired by this routine. + */ +struct proc * +reaper_get(struct sysreaper *reap) +{ + struct sysreaper *next; + struct proc *reproc; + + if (reap == NULL) + return (NULL); + + /* + * Extra hold for loop + */ + reaper_hold(reap); + + while (reap) { + rw_rlock(&reap->lock); + if (reap->p) { + /* + * Probable reaper + */ + if (reap->p) { + reproc = reap->p; + PHOLD(reproc); + rw_runlock(&reap->lock); + reaper_drop(reap); + return (reproc); + } + + /* + * Raced, try again + */ + rw_runlock(&reap->lock); + continue; + } + + /* + * Traverse upwards in the reaper topology, destroy + * dead internal nodes when possible. + * + * NOTE: Our ref on next means that a dead node should + * have 2 (ours and reap->parent's). + */ + next = reap->parent; + while (next) { + reaper_hold(next); + if (next->refs == 2 && next->p == NULL) { + rw_runlock(&reap->lock); + rw_wlock(&reap->lock); + if (next->refs == 2 && + reap->parent == next && + next->p == NULL) { + /* + * reap->parent inherits ref from next. + */ + reap->parent = next->parent; + next->parent = NULL; + reaper_drop(next); /* ours */ + reaper_drop(next); /* old parent */ + next = reap->parent; + continue; /* possible chain */ + } + } + break; + } + rw_unlock(&reap->lock); + reaper_drop(reap); + reap = next; + } + return (NULL); +} Index: sys/kern/sys_process.c =================================================================== --- sys/kern/sys_process.c (revision 272596) +++ sys/kern/sys_process.c (working copy) @@ -1316,6 +1316,56 @@ return (0); } +static int +reaper_set(struct thread *td, struct proc *p, int flags) +{ + int error; + struct sysreaper *reap; + + error = EINVAL; + + switch (PREAP_OP(flags)) { + case PREAP_SET: + PROC_LOCK(p); + if (p->p_reaper == NULL || p->p_reaper->p != p) { + reap = malloc(sizeof(*reap), M_REAPER, M_WAITOK|M_ZERO); + reaper_init(p, reap); + error = 0; + } + PROC_UNLOCK(p); + case PREAP_CLEAR: + PROC_LOCK(p); +release_again: + reap = p->p_reaper; + KASSERT(reap != NULL, ("Reaper null in reaper_set")); + if (reap->p == p) { + reaper_hold(reap); /* in case of thread race */ + rw_wlock(&reap->lock); + if (reap->p != p) { + rw_wunlock(&reap->lock); + reaper_drop(reap); + goto release_again; + } + reap->p = NULL; + p->p_reaper = reap->parent; + if (p->p_reaper) + reaper_hold(p->p_reaper); + rw_wunlock(&reap->lock); + reaper_drop(reap); /* our ref */ + reaper_drop(reap); /* old p_reaper ref */ + error = 0; + } + PROC_UNLOCK(p); + break; + case PREAP_GET: + if (p->p_reaper == NULL || p->p_reaper->p != p) + error = 0; + break; + } + + return (error); +} + #ifndef _SYS_SYSPROTO_H_ struct procctl_args { idtype_t idtype; @@ -1333,6 +1383,7 @@ switch (uap->com) { case PROC_SPROTECT: + case PROC_REAPCTL: error = copyin(uap->data, &flags, sizeof(flags)); if (error) return (error); @@ -1353,6 +1404,8 @@ switch (com) { case PROC_SPROTECT: return (protect_set(td, p, *(int *)data)); + case PROC_REAPCTL: + return (reaper_set(td, p, *(int *)data)); default: return (EINVAL); } Index: sys/sys/proc.h =================================================================== --- sys/sys/proc.h (revision 272596) +++ sys/sys/proc.h (working copy) @@ -45,7 +45,8 @@ #include #endif #include -#include +#include +#include #include #include #include @@ -170,6 +171,7 @@ struct racct; struct sbuf; struct sleepqueue; +struct sysreaper; struct td_sched; struct thread; struct trapframe; @@ -588,6 +590,7 @@ uint64_t p_prev_runtime; /* (c) Resource usage accounting. */ struct racct *p_racct; /* (b) Resource accounting. */ u_char p_throttled; /* (c) Flag for racct pcpu throttling */ + struct sysreaper *p_reaper; /*reaper control */ /* * An orphan is the child that has beed re-parented to the * debugger as a result of attaching to it. Need to keep @@ -598,6 +601,17 @@ LIST_HEAD(, proc) p_orphans; /* (e) Pointer to list of orphans. */ }; +struct sysreaper { + struct rwlock lock; /* thread or topo access */ + struct sysreaper *parent; /* upward topology only */ + struct proc *p; /* who the reaper is */ + uint32_t flags; /* control flags */ + u_int refs; /* shared structure refs */ +}; + +#define REAPER_STAT_OWNED 0x00000001 +#define REAPER_STAT_REALINIT 0x00000002 + #define p_session p_pgrp->pg_session #define p_pgid p_pgrp->pg_id @@ -700,6 +714,7 @@ MALLOC_DECLARE(M_PGRP); MALLOC_DECLARE(M_SESSION); MALLOC_DECLARE(M_SUBPROC); +MALLOC_DECLARE(M_REAPER); #endif #define FOREACH_PROC_IN_SYSTEM(p) \ @@ -958,6 +973,11 @@ int thread_unsuspend_one(struct thread *td); void thread_wait(struct proc *p); struct thread *thread_find(struct proc *p, lwpid_t tid); +void reaper_hold(struct sysreaper *reap); +void reaper_drop(struct sysreaper *reap); +struct sysreaper *reaper_exit(struct proc *p); +void reaper_init(struct proc *p, struct sysreaper *reap); +struct proc *reaper_get(struct sysreaper *reap); static __inline int curthread_pflags_set(int flags) Index: sys/sys/procctl.h =================================================================== --- sys/sys/procctl.h (revision 272596) +++ sys/sys/procctl.h (working copy) @@ -31,6 +31,7 @@ #define _SYS_PROCCTL_H_ #define PROC_SPROTECT 1 /* set protected state */ +#define PROC_REAPCTL 2 /* set the reaper process */ /* Operations for PROC_SPROTECT (passed in integer arg). */ #define PPROT_OP(x) ((x) & 0xf) @@ -42,6 +43,12 @@ #define PPROT_DESCEND 0x10 #define PPROT_INHERIT 0x20 +/* Operation for PROC_REAPERCTL (passed in interger arg). */ +#define PREAP_OP(x) ((x) & 0xf) +#define PREAP_SET 1 +#define PREAP_CLEAR 2 +#define PREAP_GET 4 + #ifndef _KERNEL #include #include