diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c index fb6a13449df7..f93b05669014 100644 --- a/sys/fs/nullfs/null_subr.c +++ b/sys/fs/nullfs/null_subr.c @@ -71,8 +71,7 @@ static struct vnode * null_hashins(struct mount *, struct null_node *); * Initialise cache headers */ int -nullfs_init(vfsp) - struct vfsconf *vfsp; +nullfs_init(struct vfsconf *vfsp) { null_node_hashtbl = hashinit(desiredvnodes, M_NULLFSHASH, @@ -82,8 +81,7 @@ nullfs_init(vfsp) } int -nullfs_uninit(vfsp) - struct vfsconf *vfsp; +nullfs_uninit(struct vfsconf *vfsp) { rw_destroy(&null_hash_lock); @@ -95,16 +93,15 @@ nullfs_uninit(vfsp) * Return a VREF'ed alias for lower vnode if already exists, else 0. * Lower vnode should be locked on entry and will be left locked on exit. */ -struct vnode * -null_hashget(mp, lowervp) - struct mount *mp; - struct vnode *lowervp; +static struct vnode * +null_hashget_locked(struct mount *mp, struct vnode *lowervp) { struct null_node_hashhead *hd; struct null_node *a; struct vnode *vp; ASSERT_VOP_LOCKED(lowervp, "null_hashget"); + rw_assert(&null_hash_lock, RA_LOCKED); /* * Find hash base, and then search the (two-way) linked @@ -113,9 +110,6 @@ null_hashget(mp, lowervp) * reference count (but NOT the lower vnode's VREF counter). */ hd = NULL_NHASH(lowervp); - if (LIST_EMPTY(hd)) - return (NULLVP); - rw_rlock(&null_hash_lock); LIST_FOREACH(a, hd, null_hash) { if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) { /* @@ -126,29 +120,43 @@ null_hashget(mp, lowervp) */ vp = NULLTOV(a); vref(vp); - rw_runlock(&null_hash_lock); return (vp); } } - rw_runlock(&null_hash_lock); return (NULLVP); } +struct vnode * +null_hashget(struct mount *mp, struct vnode *lowervp) +{ + struct null_node_hashhead *hd; + struct vnode *vp; + + hd = NULL_NHASH(lowervp); + if (LIST_EMPTY(hd)) + return (NULLVP); + + rw_rlock(&null_hash_lock); + vp = null_hashget_locked(mp, lowervp); + rw_runlock(&null_hash_lock); + + return (vp); +} + /* * Act like null_hashget, but add passed null_node to hash if no existing * node found. */ static struct vnode * -null_hashins(mp, xp) - struct mount *mp; - struct null_node *xp; +null_hashins(struct mount *mp, struct null_node *xp) { struct null_node_hashhead *hd; struct null_node *oxp; struct vnode *ovp; + rw_assert(&null_hash_lock, RA_WLOCKED); + hd = NULL_NHASH(xp->null_lowervp); - rw_wlock(&null_hash_lock); LIST_FOREACH(oxp, hd, null_hash) { if (oxp->null_lowervp == xp->null_lowervp && NULLTOV(oxp)->v_mount == mp) { @@ -158,12 +166,10 @@ null_hashins(mp, xp) */ ovp = NULLTOV(oxp); vref(ovp); - rw_wunlock(&null_hash_lock); return (ovp); } } LIST_INSERT_HEAD(hd, xp, null_hash); - rw_wunlock(&null_hash_lock); return (NULLVP); } @@ -183,10 +189,10 @@ null_destroy_proto(struct vnode *vp, void *xp) } static void -null_insmntque_dtr(struct vnode *vp, void *xp) +null_insmntque_dtr(struct vnode *vp, struct null_node *xp) { - vput(((struct null_node *)xp)->null_lowervp); + vput(xp->null_lowervp); null_destroy_proto(vp, xp); } @@ -199,10 +205,7 @@ null_insmntque_dtr(struct vnode *vp, void *xp) * the caller's "spare" reference to created nullfs vnode. */ int -null_nodeget(mp, lowervp, vpp) - struct mount *mp; - struct vnode *lowervp; - struct vnode **vpp; +null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp) { struct null_node *xp; struct vnode *vp; @@ -218,19 +221,6 @@ null_nodeget(mp, lowervp, vpp) return (0); } - /* - * The insmntque1() call below requires the exclusive lock on - * the nullfs vnode. Upgrade the lock now if hash failed to - * provide ready to use vnode. - */ - if (VOP_ISLOCKED(lowervp) != LK_EXCLUSIVE) { - vn_lock(lowervp, LK_UPGRADE | LK_RETRY); - if (VN_IS_DOOMED(lowervp)) { - vput(lowervp); - return (ENOENT); - } - } - /* * We do not serialize vnode creation, instead we will check for * duplicates later, when adding new vnode to hash. @@ -246,15 +236,28 @@ null_nodeget(mp, lowervp, vpp) return (error); } + rw_wlock(&null_hash_lock); xp->null_vnode = vp; xp->null_lowervp = lowervp; xp->null_flags = 0; vp->v_type = lowervp->v_type; vp->v_data = xp; vp->v_vnlock = lowervp->v_vnlock; - error = insmntque1(vp, mp, null_insmntque_dtr, xp); - if (error != 0) + *vpp = null_hashget_locked(mp, lowervp); + if (*vpp != NULL) { + printf("%s: lost race vp %p\n", __func__, vp); + rw_wunlock(&null_hash_lock); + null_insmntque_dtr(vp, xp); + return (0); + } + + error = insmntque1(vp, mp, NULL, NULL); + if (error != 0) { + printf("%s: lost race 2 vp %p\n", __func__, vp); + rw_wunlock(&null_hash_lock); + null_insmntque_dtr(vp, xp); return (error); + } if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp) vp->v_vflag |= VV_ROOT; @@ -276,17 +279,9 @@ null_nodeget(mp, lowervp, vpp) } } - /* - * Atomically insert our new node into the hash or vget existing - * if someone else has beaten us to it. - */ *vpp = null_hashins(mp, xp); - if (*vpp != NULL) { - vrele(lowervp); - vp->v_object = NULL; /* in case VIRF_PGREAD set it */ - null_destroy_proto(vp, xp); - return (0); - } + MPASS(*vpp == NULL); + rw_wunlock(&null_hash_lock); *vpp = vp; return (0); @@ -296,8 +291,7 @@ null_nodeget(mp, lowervp, vpp) * Remove node from hash. */ void -null_hashrem(xp) - struct null_node *xp; +null_hashrem(struct null_node *xp) { rw_wlock(&null_hash_lock); @@ -308,10 +302,7 @@ null_hashrem(xp) #ifdef DIAGNOSTIC struct vnode * -null_checkvp(vp, fil, lno) - struct vnode *vp; - char *fil; - int lno; +null_checkvp(struct vnode *vp, char *fil, int lno) { struct null_node *a = VTONULL(vp); diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c index 73301c9275d2..98abe9ea1602 100644 --- a/sys/fs/nullfs/null_vfsops.c +++ b/sys/fs/nullfs/null_vfsops.c @@ -207,7 +207,7 @@ nullfs_mount(struct mount *mp) (MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED); } - mp->mnt_kern_flag |= MNTK_LOOKUP_EXCL_DOTDOT | MNTK_NOMSYNC; + mp->mnt_kern_flag |= MNTK_NOMSYNC; mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag & (MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS); MNT_IUNLOCK(mp); diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 28d7ada0f434..b3ada4e581f5 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -757,9 +757,7 @@ compute_cn_lkflags(struct mount *mp, int lkflags, int cnflags) { if (mp == NULL || ((lkflags & LK_SHARED) && - (!(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED) || - ((cnflags & ISDOTDOT) && - (mp->mnt_kern_flag & MNTK_LOOKUP_EXCL_DOTDOT))))) { + !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED))) { lkflags &= ~LK_SHARED; lkflags |= LK_EXCLUSIVE; } diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 33a556fbfa2b..903d5ed40d86 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1955,7 +1955,7 @@ insmntque1(struct vnode *vp, struct mount *mp, KASSERT(vp->v_mount == NULL, ("insmntque: vnode already on per mount vnode list")); VNASSERT(mp != NULL, vp, ("Don't call insmntque(foo, NULL)")); - ASSERT_VOP_ELOCKED(vp, "insmntque: non-locked vp"); +// ASSERT_VOP_ELOCKED(vp, "insmntque: non-locked vp"); /* * We acquire the vnode interlock early to ensure that the @@ -4345,7 +4345,6 @@ DB_SHOW_COMMAND(mount, db_show_mount) MNT_KERN_FLAG(MNTK_NO_IOPF); MNT_KERN_FLAG(MNTK_RECURSE); MNT_KERN_FLAG(MNTK_UPPER_WAITER); - MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT); MNT_KERN_FLAG(MNTK_USES_BCACHE); MNT_KERN_FLAG(MNTK_FPLOOKUP); MNT_KERN_FLAG(MNTK_TASKQUEUE_WAITER); diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 8368595b685b..d9514d88123b 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -486,7 +486,7 @@ struct mntoptnames { handle i/o state on EFAULT. */ #define MNTK_RECURSE 0x00000200 /* pending recursive unmount */ #define MNTK_UPPER_WAITER 0x00000400 /* waiting to drain MNTK_UPPER_PENDING */ -#define MNTK_LOOKUP_EXCL_DOTDOT 0x00000800 +/* UNUSED 0x00000800 */ #define MNTK_UNMAPPED_BUFS 0x00002000 #define MNTK_USES_BCACHE 0x00004000 /* FS uses the buffer cache. */ #define MNTK_TEXT_REFS 0x00008000 /* Keep use ref for text */