commit 24124e12020a2ee6fc49c1151649757380331e87 Author: Mikolaj Golub Date: Sun Feb 19 10:28:21 2012 +0200 Introduce VOP_UNP_BIND(), VOP_UNP_CONNECT(), and VOP_UNP_DETACH() operations for setting and accessing vnode's v_socket field. The operations are necessary to implement proper unix socket handling on layered file systems like nullfs(5). This change fixes the long standing issue with nullfs(5) being in that unix sockets did not work between lower and upper layers: if we bound to a socket on the lower layer we could connect only to the lower path; if we bound to the upper layer we could connect only to the upper path. The new behavior is one can connect to both the lower and the upper paths regardless what layer path one binds to. Suggested by: kib Reviewed by: arch MFC after: XXX diff --git a/UPDATING b/UPDATING index 9da1e29..89099ff 100644 --- a/UPDATING +++ b/UPDATING @@ -22,6 +22,14 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 10.x IS SLOW: machines to maximize performance. (To disable malloc debugging, run ln -s aj /etc/malloc.conf.) +20120225: + Now unix domain sockets behave "as expected" on nullfs(5). Previously + nullfs(5) did not pass through all behaviours to the underlying layer, + as a result if we bound to a socket on the lower layer we could connect + only to the lower path; if we bound to the upper layer we could connect + only to the upper path. The new behavior is one can connect to both the + lower and the upper paths regardless what layer path one binds to. + 20120211: The getifaddrs upgrade path broken with 20111215 has been restored. If you have upgraded in between 20111215 and 20120209 you need to diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 8f329fd..72cc483 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -542,7 +542,7 @@ restart: UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); - vp->v_socket = unp->unp_socket; + VOP_UNP_BIND(vp, unp->unp_socket); unp->unp_vnode = vp; unp->unp_addr = soun; unp->unp_flags &= ~UNP_BINDING; @@ -638,7 +638,7 @@ uipc_detach(struct socket *so) * XXXRW: Should assert vp->v_socket == so. */ if ((vp = unp->unp_vnode) != NULL) { - unp->unp_vnode->v_socket = NULL; + VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; } unp2 = unp->unp_conn; @@ -1308,7 +1308,7 @@ unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) * and to protect simultaneous locking of multiple pcbs. */ UNP_LINK_WLOCK(); - so2 = vp->v_socket; + VOP_UNP_CONNECT(vp, &so2); if (so2 == NULL) { error = ECONNREFUSED; goto bad2; @@ -2318,17 +2318,15 @@ vfs_unp_reclaim(struct vnode *vp) active = 0; UNP_LINK_WLOCK(); - so = vp->v_socket; + VOP_UNP_CONNECT(vp, &so); if (so == NULL) goto done; unp = sotounpcb(so); if (unp == NULL) goto done; UNP_PCB_LOCK(unp); - if (unp->unp_vnode != NULL) { - KASSERT(unp->unp_vnode == vp, - ("vfs_unp_reclaim: vp != unp->unp_vnode")); - vp->v_socket = NULL; + if (unp->unp_vnode == vp) { + VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; active = 1; } diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index e47498e..25278c8 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -123,6 +123,9 @@ struct vop_vector default_vnodeops = { .vop_unlock = vop_stdunlock, .vop_vptocnp = vop_stdvptocnp, .vop_vptofh = vop_stdvptofh, + .vop_unp_bind = vop_stdunp_bind, + .vop_unp_connect = vop_stdunp_connect, + .vop_unp_detach = vop_stdunp_detach, }; /* @@ -1037,6 +1040,30 @@ vop_stdadvise(struct vop_advise_args *ap) return (error); } +int +vop_stdunp_bind(struct vop_unp_bind_args *ap) +{ + + ap->a_vp->v_socket = ap->a_socket; + return (0); +} + +int +vop_stdunp_connect(struct vop_unp_connect_args *ap) +{ + + *ap->a_socket = ap->a_vp->v_socket; + return (0); +} + +int +vop_stdunp_detach(struct vop_unp_detach_args *ap) +{ + + ap->a_vp->v_socket = NULL; + return (0); +} + /* * vfs default ops * used to fill the vfs function table to get reasonable default return values. diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index 5da4c1c..5e3fa11 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -640,6 +640,26 @@ vop_advise { IN int advice; }; +%% unp_bind vp E E E + +vop_unp_bind { + IN struct vnode *vp; + IN struct socket *socket; +}; + +%% unp_connect vp L L L + +vop_unp_connect { + IN struct vnode *vp; + OUT struct socket **socket; +}; + +%% unp_detach vp = = = + +vop_unp_detach { + IN struct vnode *vp; +}; + # The VOPs below are spares at the end of the table to allow new VOPs to be # added in stable branches without breaking the KBI. New VOPs in HEAD should # be added above these spares. When merging a new VOP to a stable branch, diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index a3ba1b7..a2f0086 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -703,6 +703,9 @@ int vop_stdpathconf(struct vop_pathconf_args *); int vop_stdpoll(struct vop_poll_args *); int vop_stdvptocnp(struct vop_vptocnp_args *ap); int vop_stdvptofh(struct vop_vptofh_args *ap); +int vop_stdunp_bind(struct vop_unp_bind_args *ap); +int vop_stdunp_connect(struct vop_unp_connect_args *ap); +int vop_stdunp_detach(struct vop_unp_detach_args *ap); int vop_eopnotsupp(struct vop_generic_args *ap); int vop_ebadf(struct vop_generic_args *ap); int vop_einval(struct vop_generic_args *ap);