--- //depot/vendor/freebsd_6/src/sys/fs/pseudofs/pseudofs_vncache.c 2005/07/07 01:32:03 +++ //depot/yahoo/ybsd_6/src/sys/fs/pseudofs/pseudofs_vncache.c 2006/10/31 15:17:15 @@ -110,27 +111,28 @@ { struct pfs_vdata *pvd; int error; + struct vnode *vp; /* * See if the vnode is in the cache. * XXX linear search is not very efficient. */ +retry: mtx_lock(&pfs_vncache_mutex); for (pvd = pfs_vncache; pvd; pvd = pvd->pvd_next) { if (pvd->pvd_pn == pn && pvd->pvd_pid == pid && pvd->pvd_vnode->v_mount == mp) { - if (vget(pvd->pvd_vnode, 0, curthread) == 0) { + vp = pvd->pvd_vnode; + VI_LOCK(vp); + mtx_unlock(&pfs_vncache_mutex); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, curthread) == 0) { ++pfs_vncache_hits; - *vpp = pvd->pvd_vnode; - mtx_unlock(&pfs_vncache_mutex); + *vpp = vp; /* XXX see comment at top of pfs_lookup() */ - cache_purge(*vpp); - vn_lock(*vpp, LK_RETRY | LK_EXCLUSIVE, - curthread); + cache_purge(vp); return (0); } - /* XXX if this can happen, we're in trouble */ - break; + goto retry; } } mtx_unlock(&pfs_vncache_mutex); --- //depot/vendor/freebsd_6/src/sys/kern/kern_descrip.c 2006/09/29 12:29:26 +++ //depot/yahoo/ybsd_6/src/sys/kern/kern_descrip.c 2006/12/29 06:39:47 @@ -2152,6 +2152,18 @@ } /* We have the last ref so we can proceed without the file lock. */ FILE_UNLOCK(fp); + + /* + * Avoid race condition with unp_gc + * by shortly acquiring the filelist_lock + * Releasing FWAIT is not protected by a mutex + * so we may do some unnecessary locks/unlocks + */ + if (fp->f_gcflag & FWAIT) { + sx_xlock(&filelist_lock); + sx_xunlock(&filelist_lock); + } + if (fp->f_count < 0) panic("fdrop: count < 0"); if (fp->f_ops != &badfileops) --- //depot/vendor/freebsd_6/src/sys/kern/uipc_usrreq.c 2006/07/13 07:28:19 +++ //depot/yahoo/ybsd_6/src/sys/kern/uipc_usrreq.c 2006/12/29 06:50:25 @@ -68,6 +69,13 @@ #include +struct unpcb_wrapper { + struct unpcb unpw_unpcb; + u_int unpw_refcount; +}; + +#define UNP_REFCOUNT(unp) (((struct unpcb_wrapper *)(unp))->unpw_refcount) + static uma_zone_t unp_zone; static unp_gen_t unp_gencnt; static u_int unp_count; @@ -769,6 +777,7 @@ unp->unp_socket = so; so->so_pcb = unp; + UNP_REFCOUNT(unp) = 1; UNP_LOCK(); unp->unp_gencnt = ++unp_gencnt; unp_count++; @@ -782,13 +791,18 @@ static void unp_detach(struct unpcb *unp) { + struct sockaddr_un *saved_unp_addr; struct vnode *vp; int local_unp_rights; + int freeunp; UNP_LOCK_ASSERT(); LIST_REMOVE(unp, unp_link); unp->unp_gencnt = ++unp_gencnt; + UNP_REFCOUNT(unp)--; + freeunp = (UNP_REFCOUNT(unp) == 0); + --unp_count; if ((vp = unp->unp_vnode) != NULL) { /* @@ -807,10 +821,12 @@ soisdisconnected(unp->unp_socket); unp->unp_socket->so_pcb = NULL; local_unp_rights = unp_rights; + saved_unp_addr = unp->unp_addr; UNP_UNLOCK(); - if (unp->unp_addr != NULL) - FREE(unp->unp_addr, M_SONAME); - uma_zfree(unp_zone, unp); + if (saved_unp_addr != NULL) + FREE(saved_unp_addr, M_SONAME); + if (freeunp) + uma_zfree(unp_zone, unp); if (vp) { int vfslocked; @@ -1126,6 +1142,7 @@ unp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n; + int freeunp; struct unpcb *unp, **unp_list; unp_gen_t gencnt; struct xunpgen *xug; @@ -1177,6 +1194,7 @@ unp->unp_socket->so_cred)) continue; unp_list[i++] = unp; + UNP_REFCOUNT(unp)++; } } UNP_UNLOCK(); @@ -1186,7 +1204,9 @@ xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO); for (i = 0; i < n; i++) { unp = unp_list[i]; - if (unp->unp_gencnt <= gencnt) { + UNP_LOCK(); + UNP_REFCOUNT(unp)--; + if (UNP_REFCOUNT(unp) != 0 && unp->unp_gencnt <= gencnt) { xu->xu_len = sizeof *xu; xu->xu_unpp = unp; /* @@ -1203,7 +1223,13 @@ unp->unp_conn->unp_addr->sun_len); bcopy(unp, &xu->xu_unp, sizeof *unp); sotoxsocket(unp->unp_socket, &xu->xu_socket); + UNP_UNLOCK(); error = SYSCTL_OUT(req, xu, sizeof *xu); + } else { + freeunp = (UNP_REFCOUNT(unp) == 0); + UNP_UNLOCK(); + if (freeunp) + uma_zfree(unp_zone, unp); } } free(xu, M_TEMP); @@ -1401,8 +1427,8 @@ void unp_init(void) { - unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, - NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb_wrapper), NULL, + NULL,NULL, NULL, UMA_ALIGN_PTR, 0); if (unp_zone == NULL) panic("unp_init"); uma_zone_set_max(unp_zone, maxsockets); @@ -1622,6 +1648,12 @@ static int unp_recycled; SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, ""); +#define UNP_GC_SET_FLAGS(fp, flags) \ + (volatile short) ((fp)->f_gcflag) = (fp)->f_gcflag | (flags) + +#define UNP_GC_CLEAR_FLAGS(fp, flags) \ + (volatile short) ((fp)->f_gcflag) = (fp)->f_gcflag & ~(flags) + static void unp_gc(__unused void *arg, int pending) { @@ -1640,7 +1672,7 @@ */ sx_slock(&filelist_lock); LIST_FOREACH(fp, &filehead, f_list) - fp->f_gcflag &= ~(FMARK|FDEFER); + UNP_GC_CLEAR_FLAGS(fp, FMARK|FDEFER); do { LIST_FOREACH(fp, &filehead, f_list) { FILE_LOCK(fp); @@ -1654,7 +1686,7 @@ if (fp->f_count == 0) { if (fp->f_gcflag & FDEFER) unp_defer--; - fp->f_gcflag &= ~(FMARK|FDEFER); + UNP_GC_CLEAR_FLAGS(fp, FMARK|FDEFER); FILE_UNLOCK(fp); continue; } @@ -1664,7 +1696,7 @@ * and un-mark it */ if (fp->f_gcflag & FDEFER) { - fp->f_gcflag &= ~FDEFER; + UNP_GC_CLEAR_FLAGS(fp, FDEFER); unp_defer--; } else { /* @@ -1688,7 +1720,7 @@ * If it got this far then it must be * externally accessible. */ - fp->f_gcflag |= FMARK; + UNP_GC_SET_FLAGS(fp, FMARK); } /* * either it was defered, or it is externally @@ -1702,8 +1734,14 @@ } FILE_UNLOCK(fp); if (so->so_proto->pr_domain != &localdomain || - (so->so_proto->pr_flags&PR_RIGHTS) == 0) + (so->so_proto->pr_flags & PR_RIGHTS) == 0) { + FILE_UNLOCK(fp); continue; + } + + UNP_GC_SET_FLAGS(fp, FWAIT); + FILE_UNLOCK(fp); + /* * So, Ok, it's one of our sockets and it IS externally * accessible (or was defered). Now we look @@ -1713,7 +1751,9 @@ */ SOCKBUF_LOCK(&so->so_rcv); unp_scan(so->so_rcv.sb_mb, unp_mark); + UNP_GC_CLEAR_FLAGS(fp, FWAIT); SOCKBUF_UNLOCK(&so->so_rcv); + } } while (unp_defer); sx_sunlock(&filelist_lock); @@ -1897,7 +1937,7 @@ if (fp->f_gcflag & FMARK) return; unp_defer++; - fp->f_gcflag |= (FMARK|FDEFER); + UNP_GC_SET_FLAGS(fp, FMARK|FDEFER); } static void --- //depot/vendor/freebsd_6/src/sys/netinet/in_pcb.c 2006/09/15 03:28:40 +++ //depot/yahoo/ybsd_6/src/sys/netinet/in_pcb.c 2006/12/29 06:39:47 @@ -167,19 +168,20 @@ /* * Allocate a PCB and associate it with the socket. + * On success return with the PCB locked. */ int -in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, const char *type) +in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) { struct inpcb *inp; int error; INP_INFO_WLOCK_ASSERT(pcbinfo); error = 0; - inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT | M_ZERO); + inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT); if (inp == NULL) return (ENOBUFS); - inp->inp_gencnt = ++pcbinfo->ipi_gencnt; + bzero(inp, inp_zero_size); inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; #ifdef MAC @@ -209,11 +211,13 @@ LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); pcbinfo->ipi_count++; so->so_pcb = (caddr_t)inp; - INP_LOCK_INIT(inp, "inp", type); #ifdef INET6 if (ip6_auto_flowlabel) inp->inp_flags |= IN6P_AUTOFLOWLABEL; #endif + INP_LOCK(inp); + inp->inp_gencnt = ++pcbinfo->ipi_gencnt; + #if defined(IPSEC) || defined(FAST_IPSEC) || defined(MAC) out: if (error != 0) @@ -710,10 +714,11 @@ (void)m_free(inp->inp_options); ip_freemoptions(inp->inp_moptions); inp->inp_vflag = 0; - INP_LOCK_DESTROY(inp); + #ifdef MAC mac_destroy_inpcb(inp); #endif + INP_UNLOCK(inp); uma_zfree(ipi->ipi_zone, inp); } --- //depot/vendor/freebsd_6/src/sys/netinet/in_pcb.h 2006/08/20 13:28:22 +++ //depot/yahoo/ybsd_6/src/sys/netinet/in_pcb.h 2006/10/31 15:17:15 @@ -164,6 +165,7 @@ } inp_depend6; LIST_ENTRY(inpcb) inp_portlist; struct inpcbport *inp_phd; /* head of this list */ +#define inp_zero_size offsetof(struct inpcb, inp_gencnt) inp_gen_t inp_gencnt; /* generation count of this instance */ struct mtx inp_mtx; @@ -340,7 +342,7 @@ extern struct callout ipport_tick_callout; void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); -int in_pcballoc(struct socket *, struct inpcbinfo *, const char *); +int in_pcballoc(struct socket *, struct inpcbinfo *); int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *); int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *, u_short *, struct ucred *); --- //depot/vendor/freebsd_6/src/sys/netinet/ip_divert.c 2006/05/16 01:28:18 +++ //depot/yahoo/ybsd_6/src/sys/netinet/ip_divert.c 2006/12/29 06:39:47 @@ -123,6 +124,23 @@ uma_zone_set_max(divcbinfo.ipi_zone, maxsockets); } +static int +div_inpcb_init(void *mem, int size, int flags) +{ + struct inpcb *inp = mem; + + INP_LOCK_INIT(inp, "inp", "divinp"); + return (0); +} + +static void +div_inpcb_fini(void *mem, int size) +{ + struct inpcb *inp = mem; + + INP_LOCK_DESTROY(inp); +} + void div_init(void) { @@ -137,7 +155,8 @@ divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask); divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask); divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR, + UMA_ZONE_NOFREE); uma_zone_set_max(divcbinfo.ipi_zone, maxsockets); EVENTHANDLER_REGISTER(maxsockets_change, div_zone_change, NULL, EVENTHANDLER_PRI_ANY); @@ -418,13 +437,12 @@ INP_INFO_WUNLOCK(&divcbinfo); return error; } - error = in_pcballoc(so, &divcbinfo, "divinp"); + error = in_pcballoc(so, &divcbinfo); if (error) { INP_INFO_WUNLOCK(&divcbinfo); return error; } inp = (struct inpcb *)so->so_pcb; - INP_LOCK(inp); INP_INFO_WUNLOCK(&divcbinfo); inp->inp_ip_p = proto; inp->inp_vflag |= INP_IPV4; @@ -589,6 +607,7 @@ error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; + INP_LOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; bzero(&xi, sizeof(xi)); @@ -597,8 +616,10 @@ bcopy(inp, &xi.xi_inp, sizeof *inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); + INP_UNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); - } + } else + INP_UNLOCK(inp); } if (!error) { /* --- //depot/vendor/freebsd_6/src/sys/netinet/raw_ip.c 2006/10/06 14:28:23 +++ //depot/yahoo/ybsd_6/src/sys/netinet/raw_ip.c 2006/12/29 06:39:47 @@ -123,6 +124,15 @@ uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets); } +static int +rip_inpcb_init(void *mem, int size, int flags) +{ + struct inpcb *inp = mem; + + INP_LOCK_INIT(inp, "inp", "rawinp"); + return (0); +} + void rip_init() { @@ -137,7 +147,7 @@ ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask); ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask); ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + NULL, NULL, rip_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets); EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL, EVENTHANDLER_PRI_ANY); @@ -613,13 +623,12 @@ INP_INFO_WUNLOCK(&ripcbinfo); return error; } - error = in_pcballoc(so, &ripcbinfo, "rawinp"); + error = in_pcballoc(so, &ripcbinfo); if (error) { INP_INFO_WUNLOCK(&ripcbinfo); return error; } inp = (struct inpcb *)so->so_pcb; - INP_LOCK(inp); INP_INFO_WUNLOCK(&ripcbinfo); inp->inp_vflag |= INP_IPV4; inp->inp_ip_p = proto; @@ -861,6 +870,7 @@ error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; + INP_LOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; bzero(&xi, sizeof(xi)); @@ -869,8 +879,10 @@ bcopy(inp, &xi.xi_inp, sizeof *inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); + INP_UNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); - } + } else + INP_UNLOCK(inp); } if (!error) { /* --- //depot/vendor/freebsd_6/src/sys/netinet/tcp_usrreq.c 2006/11/28 14:28:31 +++ //depot/yahoo/ybsd_6/src/sys/netinet/tcp_usrreq.c 2006/11/30 11:15:25 @@ -1191,7 +1192,7 @@ if (error) return (error); } - error = in_pcballoc(so, &tcbinfo, "tcpinp"); + error = in_pcballoc(so, &tcbinfo); if (error) return (error); inp = sotoinpcb(so); @@ -1209,7 +1210,6 @@ so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ - INP_LOCK(inp); #ifdef INET6 if (isipv6) in6_pcbdetach(inp); @@ -1220,6 +1220,7 @@ return (ENOBUFS); } tp->t_state = TCPS_CLOSED; + INP_UNLOCK(inp); return (0); } --- //depot/vendor/freebsd_6/src/sys/netinet/udp_usrreq.c 2006/10/06 14:28:23 +++ //depot/yahoo/ybsd_6/src/sys/netinet/udp_usrreq.c 2006/12/29 06:39:47 @@ -136,6 +143,15 @@ uma_zone_set_max(udbinfo.ipi_zone, maxsockets); } +static int +udp_inpcb_init(void *mem, int size, int flags) +{ + struct inpcb *inp = mem; + + INP_LOCK_INIT(inp, "inp", "udpinp"); + return (0); +} + void udp_init() { @@ -146,7 +162,7 @@ udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.porthashmask); udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL, - NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + NULL, udp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(udbinfo.ipi_zone, maxsockets); EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL, EVENTHANDLER_PRI_ANY); @@ -632,6 +660,7 @@ error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; + INP_LOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; bzero(&xi, sizeof(xi)); @@ -641,8 +670,10 @@ if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); xi.xi_inp.inp_gencnt = inp->inp_gencnt; + INP_UNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); - } + } else + INP_UNLOCK(inp); } if (!error) { /* @@ -973,14 +1004,13 @@ INP_INFO_WUNLOCK(&udbinfo); return error; } - error = in_pcballoc(so, &udbinfo, "udpinp"); + error = in_pcballoc(so, &udbinfo); if (error) { INP_INFO_WUNLOCK(&udbinfo); return error; } inp = (struct inpcb *)so->so_pcb; - INP_LOCK(inp); INP_INFO_WUNLOCK(&udbinfo); inp->inp_vflag |= INP_IPV4; inp->inp_ip_ttl = ip_defttl; --- //depot/vendor/freebsd_6/src/sys/netinet6/in6_pcb.c 2006/09/04 04:28:26 +++ //depot/yahoo/ybsd_6/src/sys/netinet6/in6_pcb.c 2006/10/31 15:17:15 @@ -457,7 +458,7 @@ (void)m_free(inp->inp_options); ip_freemoptions(inp->inp_moptions); inp->inp_vflag = 0; - INP_LOCK_DESTROY(inp); + INP_UNLOCK(inp); uma_zfree(ipi->ipi_zone, inp); } --- //depot/vendor/freebsd_6/src/sys/netinet6/raw_ip6.c 2005/12/25 17:31:32 +++ //depot/yahoo/ybsd_6/src/sys/netinet6/raw_ip6.c 2006/10/31 15:17:15 @@ -571,7 +572,7 @@ return ENOMEM; } s = splnet(); - error = in_pcballoc(so, &ripcbinfo, "raw6inp"); + error = in_pcballoc(so, &ripcbinfo); splx(s); if (error) { INP_INFO_WUNLOCK(&ripcbinfo); @@ -579,7 +580,6 @@ return error; } inp = (struct inpcb *)so->so_pcb; - INP_LOCK(inp); INP_INFO_WUNLOCK(&ripcbinfo); inp->inp_vflag |= INP_IPV6; inp->in6p_ip6_nxt = (long)proto; --- //depot/vendor/freebsd_6/src/sys/netinet6/udp6_usrreq.c 2006/02/08 19:30:52 +++ //depot/yahoo/ybsd_6/src/sys/netinet6/udp6_usrreq.c 2006/10/31 15:17:15 @@ -539,14 +540,13 @@ } } s = splnet(); - error = in_pcballoc(so, &udbinfo, "udp6inp"); + error = in_pcballoc(so, &udbinfo); splx(s); if (error) { INP_INFO_WUNLOCK(&udbinfo); return error; } inp = (struct inpcb *)so->so_pcb; - INP_LOCK(inp); INP_INFO_WUNLOCK(&udbinfo); inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) --- //depot/vendor/freebsd_6/src/sys/sys/file.h 2006/05/29 20:28:17 +++ //depot/yahoo/ybsd_6/src/sys/sys/file.h 2006/12/29 06:39:47 @@ -127,6 +127,7 @@ short f_gcflag; /* used by thread doing fd garbage collection */ #define FMARK 0x1 /* mark during gc() */ #define FDEFER 0x2 /* defer for next gc pass */ +#define FWAIT 0x4 /* Used to avoid gc / fdrop race condition */ int f_msgcount; /* (f) references from message queue */ /* DTYPE_VNODE specific fields */