Index: . =================================================================== --- . (revision 244002) +++ . (working copy) Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys:r240283-240285,241025,241225,241548,241554,242476,242560,243340 Index: compat/linux/linux_misc.c =================================================================== --- compat/linux/linux_misc.c (revision 244002) +++ compat/linux/linux_misc.c (working copy) @@ -232,8 +232,7 @@ linux_uselib(struct thread *td, struct linux_useli unsigned long bss_size; char *library; ssize_t aresid; - int error; - int locked, vfslocked; + int error, locked, vfslocked, writecount; LCONVPATHEXIST(td, args->library, &library); @@ -265,7 +264,10 @@ linux_uselib(struct thread *td, struct linux_useli locked = 1; /* Writable? */ - if (vp->v_writecount) { + error = VOP_GET_WRITECOUNT(vp, &writecount); + if (error != 0) + goto cleanup; + if (writecount != 0) { error = ETXTBSY; goto cleanup; } @@ -372,7 +374,7 @@ linux_uselib(struct thread *td, struct linux_useli * XXX: Note that if any of the VM operations fail below we don't * clear this flag. */ - vp->v_vflag |= VV_TEXT; + VOP_SET_TEXT(vp); /* * Lock no longer needed Index: fs =================================================================== --- fs (revision 244002) +++ fs (working copy) Property changes on: fs ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/fs:r240285,241025,241548,241554,242476,243340 Index: fs/coda/coda_subr.c =================================================================== --- fs/coda/coda_subr.c (revision 244002) +++ fs/coda/coda_subr.c (working copy) @@ -486,7 +486,7 @@ handleDownCall(struct coda_mntinfo *mnt, int opcod cache_purge(CTOV(cp)); cp->c_flags &= ~(C_VATTR | C_ACCCACHE); ASSERT_VOP_LOCKED(CTOV(cp), "coda HandleDownCall"); - if (CTOV(cp)->v_vflag & VV_TEXT) + if (VOP_IS_TEXT(CTOV(cp))) error = coda_vmflush(cp); CODADEBUG(CODA_ZAPFILE, myprintf(("zapfile: fid = %s, refcnt = %d, error = " @@ -532,7 +532,7 @@ handleDownCall(struct coda_mntinfo *mnt, int opcod cp->c_flags &= ~(C_VATTR | C_ACCCACHE); ASSERT_VOP_LOCKED(CTOV(cp), "coda HandleDownCall"); if (!(IS_DIR(out->coda_purgefid.Fid)) - && (CTOV(cp)->v_vflag & VV_TEXT)) + && VOP_IS_TEXT(CTOV(cp))) error = coda_vmflush(cp); CODADEBUG(CODA_PURGEFID, myprintf(("purgefid: fid " "= %s, refcnt = %d, error = %d\n", Index: fs/nfsserver/nfs_nfsdport.c =================================================================== --- fs/nfsserver/nfs_nfsdport.c (revision 244002) +++ fs/nfsserver/nfs_nfsdport.c (working copy) @@ -253,7 +253,7 @@ nfsvno_accchk(struct vnode *vp, accmode_t accmode, * the inode, try to free it up once. If * we fail, we can't allow writing. */ - if ((vp->v_vflag & VV_TEXT) != 0 && error == 0) + if (VOP_IS_TEXT(vp) && error == 0) error = ETXTBSY; } if (error != 0) { Index: fs/nullfs/null.h =================================================================== --- fs/nullfs/null.h (revision 244002) +++ fs/nullfs/null.h (working copy) @@ -56,6 +56,7 @@ struct null_node { int nullfs_init(struct vfsconf *vfsp); int nullfs_uninit(struct vfsconf *vfsp); int null_nodeget(struct mount *mp, struct vnode *target, struct vnode **vpp); +struct vnode *null_hashget(struct mount *mp, struct vnode *lowervp); void null_hashrem(struct null_node *xp); int null_bypass(struct vop_generic_args *ap); Index: fs/nullfs/null_subr.c =================================================================== --- fs/nullfs/null_subr.c (revision 244002) +++ fs/nullfs/null_subr.c (working copy) @@ -67,7 +67,6 @@ struct mtx null_hashmtx; static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table"); MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part"); -static struct vnode * null_hashget(struct mount *, struct vnode *); static struct vnode * null_hashins(struct mount *, struct null_node *); /* @@ -98,7 +97,7 @@ nullfs_uninit(vfsp) * Return a VREF'ed alias for lower vnode if already exists, else 0. * Lower vnode should be locked on entry and will be left locked on exit. */ -static struct vnode * +struct vnode * null_hashget(mp, lowervp) struct mount *mp; struct vnode *lowervp; @@ -209,14 +208,10 @@ null_nodeget(mp, lowervp, vpp) struct vnode *vp; int error; - /* - * The insmntque1() call below requires the exclusive lock on - * the nullfs vnode. - */ - ASSERT_VOP_ELOCKED(lowervp, "lowervp"); - KASSERT(lowervp->v_usecount >= 1, ("Unreferenced vnode %p\n", lowervp)); + ASSERT_VOP_LOCKED(lowervp, "lowervp"); + KASSERT(lowervp->v_usecount >= 1, ("Unreferenced vnode %p", lowervp)); - /* Lookup the hash firstly */ + /* Lookup the hash firstly. */ *vpp = null_hashget(mp, lowervp); if (*vpp != NULL) { vrele(lowervp); @@ -224,6 +219,19 @@ null_nodeget(mp, lowervp, vpp) } /* + * The insmntque1() call below requires the exclusive lock on + * the nullfs vnode. Upgrade the lock now if hash failed to + * provide ready to use vnode. + */ + if (VOP_ISLOCKED(lowervp) != LK_EXCLUSIVE) { + vn_lock(lowervp, LK_UPGRADE | LK_RETRY); + if ((lowervp->v_iflag & VI_DOOMED) != 0) { + vput(lowervp); + return (ENOENT); + } + } + + /* * We do not serialize vnode creation, instead we will check for * duplicates later, when adding new vnode to hash. * Note that duplicate can only appear in hash if the lowervp is @@ -233,8 +241,7 @@ null_nodeget(mp, lowervp, vpp) * might cause a bogus v_data pointer to get dereferenced * elsewhere if MALLOC should block. */ - xp = malloc(sizeof(struct null_node), - M_NULLFSNODE, M_WAITOK); + xp = malloc(sizeof(struct null_node), M_NULLFSNODE, M_WAITOK); error = getnewvnode("null", mp, &null_vnodeops, &vp); if (error) { @@ -248,8 +255,6 @@ null_nodeget(mp, lowervp, vpp) vp->v_type = lowervp->v_type; vp->v_data = xp; vp->v_vnlock = lowervp->v_vnlock; - if (vp->v_vnlock == NULL) - panic("null_nodeget: Passed a NULL vnlock.\n"); error = insmntque1(vp, mp, null_insmntque_dtr, xp); if (error != 0) return (error); Index: fs/nullfs/null_vfsops.c =================================================================== --- fs/nullfs/null_vfsops.c (revision 244002) +++ fs/nullfs/null_vfsops.c (working copy) @@ -65,6 +65,7 @@ static vfs_statfs_t nullfs_statfs; static vfs_unmount_t nullfs_unmount; static vfs_vget_t nullfs_vget; static vfs_extattrctl_t nullfs_extattrctl; +static vfs_reclaim_lowervp_t nullfs_reclaim_lowervp; /* * Mount null layer @@ -121,8 +122,10 @@ nullfs_mount(struct mount *mp) */ NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target, curthread); error = namei(ndp); + /* * Re-lock vnode. + * XXXKIB This is deadlock-prone as well. */ if (isvnunlocked) vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY); @@ -146,7 +149,7 @@ nullfs_mount(struct mount *mp) } xmp = (struct null_mount *) malloc(sizeof(struct null_mount), - M_NULLFSMNT, M_WAITOK); /* XXX */ + M_NULLFSMNT, M_WAITOK); /* * Save reference to underlying FS @@ -186,10 +189,15 @@ nullfs_mount(struct mount *mp) } MNT_ILOCK(mp); mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag & - (MNTK_MPSAFE | MNTK_SHARED_WRITES); + (MNTK_MPSAFE | MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED | + MNTK_EXTENDED_SHARED); + mp->mnt_kern_flag |= MNTK_LOOKUP_EXCL_DOTDOT; MNT_IUNLOCK(mp); mp->mnt_data = xmp; vfs_getnewfsid(mp); + MNT_ILOCK(xmp->nullm_vfs); + TAILQ_INSERT_TAIL(&xmp->nullm_vfs->mnt_uppers, mp, mnt_upper_link); + MNT_IUNLOCK(xmp->nullm_vfs); vfs_mountedfrom(mp, target); @@ -206,14 +214,16 @@ nullfs_unmount(mp, mntflags) struct mount *mp; int mntflags; { - void *mntdata; - int error; - int flags = 0; + struct null_mount *mntdata; + struct mount *ump; + int error, flags; NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp); if (mntflags & MNT_FORCE) - flags |= FORCECLOSE; + flags = FORCECLOSE; + else + flags = 0; /* There is 1 extra root vnode reference (nullm_rootvp). */ error = vflush(mp, 1, flags, curthread); @@ -224,9 +234,17 @@ nullfs_unmount(mp, mntflags) * Finally, throw away the null_mount structure */ mntdata = mp->mnt_data; - mp->mnt_data = 0; + ump = mntdata->nullm_vfs; + MNT_ILOCK(ump); + while ((ump->mnt_kern_flag & MNTK_VGONE_UPPER) != 0) { + ump->mnt_kern_flag |= MNTK_VGONE_WAITER; + msleep(&ump->mnt_uppers, &ump->mnt_mtx, 0, "vgnupw", 0); + } + TAILQ_REMOVE(&ump->mnt_uppers, mp, mnt_upper_link); + MNT_IUNLOCK(ump); + mp->mnt_data = NULL; free(mntdata, M_NULLFSMNT); - return 0; + return (0); } static int @@ -316,13 +334,10 @@ nullfs_vget(mp, ino, flags, vpp) KASSERT((flags & LK_TYPE_MASK) != 0, ("nullfs_vget: no lock requested")); - flags &= ~LK_TYPE_MASK; - flags |= LK_EXCLUSIVE; error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, flags, vpp); - if (error) + if (error != 0) return (error); - return (null_nodeget(mp, *vpp, vpp)); } @@ -334,11 +349,11 @@ nullfs_fhtovp(mp, fidp, flags, vpp) struct vnode **vpp; { int error; - error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, LK_EXCLUSIVE, + + error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, flags, vpp); - if (error) + if (error != 0) return (error); - return (null_nodeget(mp, *vpp, vpp)); } @@ -350,11 +365,23 @@ nullfs_extattrctl(mp, cmd, filename_vp, namespace, int namespace; const char *attrname; { - return VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, filename_vp, - namespace, attrname); + + return (VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, + filename_vp, namespace, attrname)); } +static void +nullfs_reclaim_lowervp(struct mount *mp, struct vnode *lowervp) +{ + struct vnode *vp; + vp = null_hashget(mp, lowervp); + if (vp == NULL) + return; + vgone(vp); + vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY); +} + static struct vfsops null_vfsops = { .vfs_extattrctl = nullfs_extattrctl, .vfs_fhtovp = nullfs_fhtovp, @@ -367,6 +394,7 @@ static struct vfsops null_vfsops = { .vfs_uninit = nullfs_uninit, .vfs_unmount = nullfs_unmount, .vfs_vget = nullfs_vget, + .vfs_reclaim_lowervp = nullfs_reclaim_lowervp, }; VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL); Index: fs/nullfs/null_vnops.c =================================================================== --- fs/nullfs/null_vnops.c (revision 244002) +++ fs/nullfs/null_vnops.c (working copy) @@ -329,6 +329,26 @@ null_bypass(struct vop_generic_args *ap) return (error); } +static int +null_add_writecount(struct vop_add_writecount_args *ap) +{ + struct vnode *lvp, *vp; + int error; + + vp = ap->a_vp; + lvp = NULLVPTOLOWERVP(vp); + KASSERT(vp->v_writecount + ap->a_inc >= 0, ("wrong writecount inc")); + if (vp->v_writecount > 0 && vp->v_writecount + ap->a_inc == 0) + error = VOP_ADD_WRITECOUNT(lvp, -1); + else if (vp->v_writecount == 0 && vp->v_writecount + ap->a_inc > 0) + error = VOP_ADD_WRITECOUNT(lvp, 1); + else + error = 0; + if (error == 0) + vp->v_writecount += ap->a_inc; + return (error); +} + /* * We have to carry on the locking protocol on the null layer vnodes * as we progress through the tree. We also have to enforce read-only @@ -665,34 +685,20 @@ null_unlock(struct vop_unlock_args *ap) } /* - * There is no way to tell that someone issued remove/rmdir operation - * on the underlying filesystem. For now we just have to release lowervp - * as soon as possible. - * - * Note, we can't release any resources nor remove vnode from hash before - * appropriate VXLOCK stuff is done because other process can find this - * vnode in hash during inactivation and may be sitting in vget() and waiting - * for null_inactive to unlock vnode. Thus we will do all those in VOP_RECLAIM. + * Do not allow the VOP_INACTIVE to be passed to the lower layer, + * since the reference count on the lower vnode is not related to + * ours. */ static int -null_inactive(struct vop_inactive_args *ap) +null_inactive(struct vop_inactive_args *ap __unused) { - struct vnode *vp = ap->a_vp; - struct thread *td = ap->a_td; - vp->v_object = NULL; - - /* - * If this is the last reference, then free up the vnode - * so as not to tie up the lower vnodes. - */ - vrecycle(vp, td); - return (0); } /* - * Now, the VXLOCK is in force and we're free to destroy the null vnode. + * Now, the nullfs vnode and, due to the sharing lock, the lower + * vnode, are exclusively locked, and we shall destroy the null vnode. */ static int null_reclaim(struct vop_reclaim_args *ap) @@ -840,4 +846,5 @@ struct vop_vector null_vnodeops = { .vop_unlock = null_unlock, .vop_vptocnp = null_vptocnp, .vop_vptofh = null_vptofh, + .vop_add_writecount = null_add_writecount, }; Index: fs/unionfs/union_subr.c =================================================================== --- fs/unionfs/union_subr.c (revision 244002) +++ fs/unionfs/union_subr.c (working copy) @@ -954,7 +954,7 @@ unionfs_vn_create_on_upper(struct vnode **vpp, str vput(vp); goto unionfs_vn_create_on_upper_free_out1; } - vp->v_writecount++; + VOP_ADD_WRITECOUNT(vp, 1); *vpp = vp; unionfs_vn_create_on_upper_free_out1: @@ -1089,7 +1089,7 @@ unionfs_copyfile(struct unionfs_node *unp, int doc } } VOP_CLOSE(uvp, FWRITE, cred, td); - uvp->v_writecount--; + VOP_ADD_WRITECOUNT(uvp, -1); vn_finished_write(mp); Index: i386/ibcs2/imgact_coff.c =================================================================== --- i386/ibcs2/imgact_coff.c (revision 244002) +++ i386/ibcs2/imgact_coff.c (working copy) @@ -168,7 +168,7 @@ coff_load_file(struct thread *td, char *name) unsigned long text_offset = 0, text_address = 0, text_size = 0; unsigned long data_offset = 0, data_address = 0, data_size = 0; unsigned long bss_size = 0; - int i; + int i, writecount; NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME, UIO_SYSSPACE, name, td); @@ -181,7 +181,10 @@ coff_load_file(struct thread *td, char *name) if (vp == NULL) return ENOEXEC; - if (vp->v_writecount) { + error = VOP_GET_WRITECOUNT(vp, &writecount); + if (error != 0) + goto fail; + if (writecount != 0) { error = ETXTBSY; goto fail; } Index: kern/imgact_elf.c =================================================================== --- kern/imgact_elf.c (revision 244002) +++ kern/imgact_elf.c (working copy) @@ -646,7 +646,7 @@ __elfN(load_file)(struct proc *p, const char *file * Also make certain that the interpreter stays the same, so set * its VV_TEXT flag, too. */ - nd->ni_vp->v_vflag |= VV_TEXT; + VOP_SET_TEXT(nd->ni_vp); imgp->object = nd->ni_vp->v_object; Index: kern/kern_exec.c =================================================================== --- kern/kern_exec.c (revision 244002) +++ kern/kern_exec.c (working copy) @@ -473,9 +473,8 @@ interpret: * Remember if this was set before and unset it in case this is not * actually an executable image. */ - textset = imgp->vp->v_vflag & VV_TEXT; - ASSERT_VOP_ELOCKED(imgp->vp, "vv_text"); - imgp->vp->v_vflag |= VV_TEXT; + textset = VOP_IS_TEXT(imgp->vp); + VOP_SET_TEXT(imgp->vp); error = exec_map_first_page(imgp); if (error) @@ -506,10 +505,8 @@ interpret: if (error) { if (error == -1) { - if (textset == 0) { - ASSERT_VOP_ELOCKED(imgp->vp, "vv_text"); - imgp->vp->v_vflag &= ~VV_TEXT; - } + if (textset == 0) + VOP_UNSET_TEXT(imgp->vp); error = ENOEXEC; } goto exec_fail_dealloc; @@ -527,7 +524,7 @@ interpret: * VV_TEXT will be set. The vnode lock is held over this * entire period so nothing should illegitimately be blocked. */ - imgp->vp->v_vflag &= ~VV_TEXT; + VOP_UNSET_TEXT(imgp->vp); /* free name buffer and old vnode */ if (args->fname != NULL) NDFREE(&nd, NDF_ONLY_PNBUF); @@ -1396,7 +1393,7 @@ exec_check_permissions(imgp) struct vnode *vp = imgp->vp; struct vattr *attr = imgp->attr; struct thread *td; - int error; + int error, writecount; td = curthread; @@ -1441,7 +1438,10 @@ exec_check_permissions(imgp) * Check number of open-for-writes on the file and deny execution * if there are any. */ - if (vp->v_writecount) + error = VOP_GET_WRITECOUNT(vp, &writecount); + if (error != 0) + return (error); + if (writecount != 0) return (ETXTBSY); /* Index: kern/vfs_default.c =================================================================== --- kern/vfs_default.c (revision 244002) +++ kern/vfs_default.c (working copy) @@ -78,6 +78,12 @@ static int dirent_exists(struct vnode *vp, const c #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4) +static int vop_stdis_text(struct vop_is_text_args *ap); +static int vop_stdset_text(struct vop_set_text_args *ap); +static int vop_stdunset_text(struct vop_unset_text_args *ap); +static int vop_stdget_writecount(struct vop_get_writecount_args *ap); +static int vop_stdadd_writecount(struct vop_add_writecount_args *ap); + /* * This vnode table stores what we want to do if the filesystem doesn't * implement a particular VOP. @@ -126,6 +132,11 @@ struct vop_vector default_vnodeops = { .vop_unp_bind = vop_stdunp_bind, .vop_unp_connect = vop_stdunp_connect, .vop_unp_detach = vop_stdunp_detach, + .vop_is_text = vop_stdis_text, + .vop_set_text = vop_stdset_text, + .vop_unset_text = vop_stdunset_text, + .vop_get_writecount = vop_stdget_writecount, + .vop_add_writecount = vop_stdadd_writecount, }; /* @@ -1073,6 +1084,45 @@ vop_stdunp_detach(struct vop_unp_detach_args *ap) return (0); } +static int +vop_stdis_text(struct vop_is_text_args *ap) +{ + + return ((ap->a_vp->v_vflag & VV_TEXT) != 0); +} + +static int +vop_stdset_text(struct vop_set_text_args *ap) +{ + + ap->a_vp->v_vflag |= VV_TEXT; + return (0); +} + +static int +vop_stdunset_text(struct vop_unset_text_args *ap) +{ + + ap->a_vp->v_vflag &= ~VV_TEXT; + return (0); +} + +static int +vop_stdget_writecount(struct vop_get_writecount_args *ap) +{ + + *ap->a_writecount = ap->a_vp->v_writecount; + return (0); +} + +static int +vop_stdadd_writecount(struct vop_add_writecount_args *ap) +{ + + ap->a_vp->v_writecount += ap->a_inc; + return (0); +} + /* * vfs default ops * used to fill the vfs function table to get reasonable default return values. Index: kern/vfs_lookup.c =================================================================== --- kern/vfs_lookup.c (revision 244002) +++ kern/vfs_lookup.c (working copy) @@ -396,11 +396,13 @@ namei(struct nameidata *ndp) } static int -compute_cn_lkflags(struct mount *mp, int lkflags) +compute_cn_lkflags(struct mount *mp, int lkflags, int cnflags) { - if (mp == NULL || - ((lkflags & LK_SHARED) && !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED))) { + if (mp == NULL || ((lkflags & LK_SHARED) && + (!(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED) || + ((cnflags & ISDOTDOT) && + (mp->mnt_kern_flag & MNTK_LOOKUP_EXCL_DOTDOT))))) { lkflags &= ~LK_SHARED; lkflags |= LK_EXCLUSIVE; } @@ -529,7 +531,8 @@ lookup(struct nameidata *ndp) dp = ndp->ni_startdir; ndp->ni_startdir = NULLVP; vn_lock(dp, - compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY)); + compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, + cnp->cn_flags)); dirloop: /* @@ -686,7 +689,7 @@ dirloop: VFS_UNLOCK_GIANT(tvfslocked); vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | - LK_RETRY)); + LK_RETRY, ISDOTDOT)); } } @@ -724,7 +727,8 @@ unionlookup: vprint("lookup in", dp); #endif lkflags_save = cnp->cn_lkflags; - cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags); + cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags, + cnp->cn_flags); if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) { cnp->cn_lkflags = lkflags_save; KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); @@ -743,7 +747,7 @@ unionlookup: VFS_UNLOCK_GIANT(tvfslocked); vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | - LK_RETRY)); + LK_RETRY, cnp->cn_flags)); goto unionlookup; } @@ -815,8 +819,8 @@ unionlookup: dvfslocked = 0; vref(vp_crossmp); ndp->ni_dvp = vp_crossmp; - error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags), - &tdp); + error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags, + cnp->cn_flags), &tdp); vfs_unbusy(mp); if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) panic("vp_crossmp exclusively locked or reclaimed"); Index: kern/vfs_mount.c =================================================================== --- kern/vfs_mount.c (revision 244002) +++ kern/vfs_mount.c (working copy) @@ -479,6 +479,7 @@ vfs_mount_alloc(struct vnode *vp, struct vfsconf * mac_mount_create(cred, mp); #endif arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0); + TAILQ_INIT(&mp->mnt_uppers); return (mp); } @@ -512,6 +513,7 @@ vfs_mount_destroy(struct mount *mp) vprint("", vp); panic("unmount: dangling vnode"); } + KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers")); if (mp->mnt_nvnodelistsize != 0) panic("vfs_mount_destroy: nonzero nvnodelistsize"); if (mp->mnt_activevnodelistsize != 0) @@ -1260,7 +1262,8 @@ dounmount(mp, flags, td) } MNT_ILOCK(mp); - if (mp->mnt_kern_flag & MNTK_UNMOUNT) { + if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 || + !TAILQ_EMPTY(&mp->mnt_uppers)) { MNT_IUNLOCK(mp); if (coveredvp) VOP_UNLOCK(coveredvp, 0); Index: kern/vfs_subr.c =================================================================== --- kern/vfs_subr.c (revision 244002) +++ kern/vfs_subr.c (working copy) @@ -2740,7 +2740,59 @@ vgone(struct vnode *vp) VI_UNLOCK(vp); } +static void +vgonel_reclaim_lowervp_vfs(struct mount *mp __unused, + struct vnode *lowervp __unused) +{ +} + /* + * Notify upper mounts about reclaimed vnode. + */ +static void +vgonel_reclaim_lowervp(struct vnode *vp) +{ + static struct vfsops vgonel_vfsops = { + .vfs_reclaim_lowervp = vgonel_reclaim_lowervp_vfs + }; + struct mount *mp, *ump, *mmp; + + mp = vp->v_mount; + if (mp == NULL) + return; + + MNT_ILOCK(mp); + if (TAILQ_EMPTY(&mp->mnt_uppers)) + goto unlock; + MNT_IUNLOCK(mp); + mmp = malloc(sizeof(struct mount), M_TEMP, M_WAITOK | M_ZERO); + mmp->mnt_op = &vgonel_vfsops; + mmp->mnt_kern_flag |= MNTK_MARKER; + MNT_ILOCK(mp); + mp->mnt_kern_flag |= MNTK_VGONE_UPPER; + for (ump = TAILQ_FIRST(&mp->mnt_uppers); ump != NULL;) { + if ((ump->mnt_kern_flag & MNTK_MARKER) != 0) { + ump = TAILQ_NEXT(ump, mnt_upper_link); + continue; + } + TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link); + MNT_IUNLOCK(mp); + VFS_RECLAIM_LOWERVP(ump, vp); + MNT_ILOCK(mp); + ump = TAILQ_NEXT(mmp, mnt_upper_link); + TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link); + } + free(mmp, M_TEMP); + mp->mnt_kern_flag &= ~MNTK_VGONE_UPPER; + if ((mp->mnt_kern_flag & MNTK_VGONE_WAITER) != 0) { + mp->mnt_kern_flag &= ~MNTK_VGONE_WAITER; + wakeup(&mp->mnt_uppers); + } +unlock: + MNT_IUNLOCK(mp); +} + +/* * vgone, with the vp interlock held. */ void @@ -2764,6 +2816,7 @@ vgonel(struct vnode *vp) if (vp->v_iflag & VI_DOOMED) return; vp->v_iflag |= VI_DOOMED; + /* * Check to see if the vnode is in use. If so, we have to call * VOP_CLOSE() and VOP_INACTIVE(). @@ -2771,6 +2824,8 @@ vgonel(struct vnode *vp) active = vp->v_usecount; oweinact = (vp->v_iflag & VI_OWEINACT); VI_UNLOCK(vp); + vgonel_reclaim_lowervp(vp); + /* * Clean out any buffers associated with the vnode. * If the flush fails, just toss the buffers. @@ -3084,6 +3139,11 @@ DB_SHOW_COMMAND(mount, db_show_mount) MNT_KERN_FLAG(MNTK_REFEXPIRE); MNT_KERN_FLAG(MNTK_EXTENDED_SHARED); MNT_KERN_FLAG(MNTK_SHARED_WRITES); + MNT_KERN_FLAG(MNTK_NO_IOPF); + MNT_KERN_FLAG(MNTK_VGONE_UPPER); + MNT_KERN_FLAG(MNTK_VGONE_WAITER); + MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT); + MNT_KERN_FLAG(MNTK_MARKER); MNT_KERN_FLAG(MNTK_NOASYNC); MNT_KERN_FLAG(MNTK_UNMOUNT); MNT_KERN_FLAG(MNTK_MWAIT); Index: kern/vfs_vnops.c =================================================================== --- kern/vfs_vnops.c (revision 244002) +++ kern/vfs_vnops.c (working copy) @@ -252,7 +252,7 @@ restart: goto bad; if (fmode & FWRITE) - vp->v_writecount++; + VOP_ADD_WRITECOUNT(vp, 1); *flagp = fmode; ASSERT_VOP_LOCKED(vp, "vn_open_cred"); if (!mpsafe) @@ -282,7 +282,7 @@ vn_writechk(vp) * the vnode, try to free it up once. If * we fail, we can't allow writing. */ - if (vp->v_vflag & VV_TEXT) + if (VOP_IS_TEXT(vp)) return (ETXTBSY); return (0); @@ -314,7 +314,7 @@ vn_close(vp, flags, file_cred, td) if (flags & FWRITE) { VNASSERT(vp->v_writecount > 0, vp, ("vn_close: negative writecount")); - vp->v_writecount--; + VOP_ADD_WRITECOUNT(vp, -1); } error = VOP_CLOSE(vp, flags, file_cred, td); vput(vp); Index: kern/vnode_if.src =================================================================== --- kern/vnode_if.src (revision 244002) +++ kern/vnode_if.src (working copy) @@ -658,6 +658,38 @@ vop_unp_detach { IN struct vnode *vp; }; +%% is_text vp L L L + +vop_is_text { + IN struct vnode *vp; +}; + +%% set_text vp E E E + +vop_set_text { + IN struct vnode *vp; +}; + +%% vop_unset_text vp E E E + +vop_unset_text { + IN struct vnode *vp; +}; + +%% get_writecount vp L L L + +vop_get_writecount { + IN struct vnode *vp; + OUT int *writecount; +}; + +%% add_writecount vp E E E + +vop_add_writecount { + IN struct vnode *vp; + IN int inc; +}; + # The VOPs below are spares at the end of the table to allow new VOPs to be # added in stable branches without breaking the KBI. New VOPs in HEAD should # be added above these spares. When merging a new VOP to a stable branch, Index: nfsserver/nfs_serv.c =================================================================== --- nfsserver/nfs_serv.c (revision 244002) +++ nfsserver/nfs_serv.c (working copy) @@ -3891,7 +3891,7 @@ nfsrv_access(struct vnode *vp, accmode_t accmode, * If there's shared text associated with * the inode, we can't allow writing. */ - if (vp->v_vflag & VV_TEXT) + if (VOP_IS_TEXT(vp)) return (ETXTBSY); } Index: sys/mount.h =================================================================== --- sys/mount.h (revision 244002) +++ sys/mount.h (working copy) @@ -189,6 +189,8 @@ struct mount { #define mnt_endzero mnt_gjprovider char *mnt_gjprovider; /* gjournal provider name */ struct lock mnt_explock; /* vfs_export walkers lock */ + TAILQ_ENTRY(mount) mnt_upper_link; /* (m) we in the all uppers */ + TAILQ_HEAD(, mount) mnt_uppers; /* (m) upper mounts over us*/ }; /* @@ -374,6 +376,10 @@ void __mnt_vnode_markerfree(struct vnode #define MNTK_NO_IOPF 0x00000100 /* Disallow page faults during reads and writes. Filesystem shall properly handle i/o state on EFAULT. */ +#define MNTK_VGONE_UPPER 0x00000200 +#define MNTK_VGONE_WAITER 0x00000400 +#define MNTK_LOOKUP_EXCL_DOTDOT 0x00000800 +#define MNTK_MARKER 0x00001000 #define MNTK_NOASYNC 0x00800000 /* disable async */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ @@ -629,6 +635,7 @@ typedef int vfs_mount_t(struct mount *mp); typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op, struct sysctl_req *req); typedef void vfs_susp_clean_t(struct mount *mp); +typedef void vfs_reclaim_lowervp_t(struct mount *mp, struct vnode *lowervp); struct vfsops { vfs_mount_t *vfs_mount; @@ -646,6 +653,7 @@ struct vfsops { vfs_extattrctl_t *vfs_extattrctl; vfs_sysctl_t *vfs_sysctl; vfs_susp_clean_t *vfs_susp_clean; + vfs_reclaim_lowervp_t *vfs_reclaim_lowervp; }; vfs_statfs_t __vfs_statfs; @@ -671,6 +679,9 @@ vfs_statfs_t __vfs_statfs; #define VFS_SUSP_CLEAN(MP) \ ({if (*(MP)->mnt_op->vfs_susp_clean != NULL) \ (*(MP)->mnt_op->vfs_susp_clean)(MP); }) +#define VFS_RECLAIM_LOWERVP(MP, VP) \ + ({if (*(MP)->mnt_op->vfs_reclaim_lowervp != NULL) \ + (*(MP)->mnt_op->vfs_reclaim_lowervp)((MP), (VP)); }) #define VFS_NEEDSGIANT_(MP) \ ((MP) != NULL && ((MP)->mnt_kern_flag & MNTK_MPSAFE) == 0) Index: ufs/ufs/ufs_extattr.c =================================================================== --- ufs/ufs/ufs_extattr.c (revision 244002) +++ ufs/ufs/ufs_extattr.c (working copy) @@ -334,7 +334,7 @@ ufs_extattr_enable_with_open(struct ufsmount *ump, return (error); } - vp->v_writecount++; + VOP_ADD_WRITECOUNT(vp, 1); vref(vp); Index: vm/vm_object.c =================================================================== --- vm/vm_object.c (revision 244002) +++ vm/vm_object.c (working copy) @@ -455,7 +455,7 @@ vm_object_vndeallocate(vm_object_t object) VOP_UNLOCK(vp, 0); } else { if (object->ref_count == 0) - vp->v_vflag &= ~VV_TEXT; + VOP_UNSET_TEXT(vp); VM_OBJECT_UNLOCK(object); vput(vp); } Index: vm/vnode_pager.c =================================================================== --- vm/vnode_pager.c (revision 244002) +++ vm/vnode_pager.c (working copy) @@ -272,10 +272,10 @@ vnode_pager_dealloc(object) ASSERT_VOP_ELOCKED(vp, "vnode_pager_dealloc"); if (object->un_pager.vnp.writemappings > 0) { object->un_pager.vnp.writemappings = 0; - vp->v_writecount--; + VOP_ADD_WRITECOUNT(vp, -1); } vp->v_object = NULL; - vp->v_vflag &= ~VV_TEXT; + VOP_UNSET_TEXT(vp); VM_OBJECT_UNLOCK(object); while (refs-- > 0) vunref(vp); @@ -1216,10 +1216,10 @@ vnode_pager_update_writecount(vm_object_t object, vp = object->handle; if (old_wm == 0 && object->un_pager.vnp.writemappings != 0) { ASSERT_VOP_ELOCKED(vp, "v_writecount inc"); - vp->v_writecount++; + VOP_ADD_WRITECOUNT(vp, 1); } else if (old_wm != 0 && object->un_pager.vnp.writemappings == 0) { ASSERT_VOP_ELOCKED(vp, "v_writecount dec"); - vp->v_writecount--; + VOP_ADD_WRITECOUNT(vp, -1); } VM_OBJECT_UNLOCK(object); }