diff --git a/sys/fs/devfs/devfs.h b/sys/fs/devfs/devfs.h index 5f64a267279..88e32ea99a5 100644 --- a/sys/fs/devfs/devfs.h +++ b/sys/fs/devfs/devfs.h @@ -153,6 +153,7 @@ struct devfs_dirent { struct timespec de_ctime; struct vnode *de_vnode; char *de_symlink; + int de_usecount; }; struct devfs_mount { @@ -202,6 +203,9 @@ struct devfs_dirent *devfs_vmkdir(struct devfs_mount *, char *, int, struct devfs_dirent *devfs_find(struct devfs_dirent *, const char *, int, int); +void devfs_ctty_ref(struct vnode *); +void devfs_ctty_unref(struct vnode *); + #endif /* _KERNEL */ #endif /* !_FS_DEVFS_DEVFS_H_ */ diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index c21c1128543..051052cb28c 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -222,6 +222,88 @@ devfs_clear_cdevpriv(void) devfs_fpdrop(fp); } +static void +devfs_usecount_add(struct vnode *vp) +{ + struct devfs_dirent *de; + struct cdev *dev; + + VI_LOCK(vp); + if (VN_IS_DOOMED(vp)) { + VI_UNLOCK(vp); + return; + } + + de = vp->v_data; + dev = vp->v_rdev; + MPASS(de != NULL); + MPASS(dev != NULL); + dev_lock(); + dev->si_usecount++; + de->de_usecount++; + dev_unlock(); + VI_UNLOCK(vp); +} + +static void +devfs_usecount_sub_locked(struct vnode *vp) +{ + struct devfs_dirent *de; + struct cdev *dev; + + ASSERT_VI_LOCKED(vp, __func__); + + de = vp->v_data; + dev = vp->v_rdev; + if (de == NULL) + return; + if (dev == NULL) { + MPASS(de->de_usecount == 0); + return; + } + dev_lock(); + if (dev->si_usecount < de->de_usecount) + panic("%s: si_usecount underflow for dev %p " + "(has %ld, dirent has %d)\n", + __func__, dev, dev->si_usecount, de->de_usecount); + if (VN_IS_DOOMED(vp)) { + dev->si_usecount -= de->de_usecount; + de->de_usecount = 0; + } else { + if (de->de_usecount == 0) + panic("%s: de_usecount underflow for dev %p\n", + __func__, dev); + dev->si_usecount--; + de->de_usecount--; + } + dev_unlock(); +} + +static void +devfs_usecount_sub(struct vnode *vp) +{ + + VI_LOCK(vp); + devfs_usecount_sub_locked(vp); + VI_UNLOCK(vp); +} + +void +devfs_ctty_ref(struct vnode *vp) +{ + + vrefact(vp); + devfs_usecount_add(vp); +} + +void +devfs_ctty_unref(struct vnode *vp) +{ + + devfs_usecount_sub(vp); + vrele(vp); +} + /* * On success devfs_populate_vp() returns with dmp->dm_lock held. */ @@ -480,7 +562,6 @@ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode, /* XXX: v_rdev should be protect by vnode lock */ vp->v_rdev = dev; VNPASS(vp->v_usecount == 1, vp); - dev->si_usecount++; /* Special casing of ttys for deadfs. Probably redundant. */ dsw = dev->si_devsw; if (dsw != NULL && (dsw->d_flags & D_TTY) != 0) @@ -562,6 +643,7 @@ devfs_close(struct vop_close_args *ap) struct proc *p; struct cdev *dev = vp->v_rdev; struct cdevsw *dsw; + struct devfs_dirent *de = vp->v_data; int dflags, error, ref, vp_locked; /* @@ -580,7 +662,7 @@ devfs_close(struct vop_close_args *ap) * if the reference count is 2 (this last descriptor * plus the session), release the reference from the session. */ - if (vp->v_usecount == 2 && td != NULL) { + if (de->de_usecount == 2 && td != NULL) { p = td->td_proc; PROC_LOCK(p); if (vp == p->p_session->s_ttyvp) { @@ -590,7 +672,7 @@ devfs_close(struct vop_close_args *ap) if (vp == p->p_session->s_ttyvp) { SESS_LOCK(p->p_session); VI_LOCK(vp); - if (vp->v_usecount == 2 && vcount(vp) == 1 && + if (de->de_usecount == 2 && vcount(vp) == 2 && !VN_IS_DOOMED(vp)) { p->p_session->s_ttyvp = NULL; p->p_session->s_ttydp = NULL; @@ -601,7 +683,7 @@ devfs_close(struct vop_close_args *ap) } sx_xunlock(&proctree_lock); if (oldvp != NULL) - vrele(oldvp); + devfs_ctty_unref(oldvp); } else PROC_UNLOCK(p); } @@ -619,8 +701,9 @@ devfs_close(struct vop_close_args *ap) return (ENXIO); dflags = 0; VI_LOCK(vp); - if (vp->v_usecount == 1 && vcount(vp) == 1) + if (de->de_usecount == 1 && vcount(vp) == 1) dflags |= FLASTCLOSE; + devfs_usecount_sub_locked(vp); if (VN_IS_DOOMED(vp)) { /* Forced close. */ dflags |= FREVOKE | FNONBLOCK; @@ -843,7 +926,7 @@ devfs_ioctl(struct vop_ioctl_args *ap) return (0); } - vrefact(vp); + devfs_ctty_ref(vp); SESS_LOCK(sess); vpold = sess->s_ttyvp; sess->s_ttyvp = vp; @@ -1152,6 +1235,8 @@ devfs_open(struct vop_open_args *ap) return (ENXIO); } + devfs_usecount_add(vp); + vlocked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp); @@ -1171,6 +1256,9 @@ devfs_open(struct vop_open_args *ap) td->td_fpop = fpop; vn_lock(vp, vlocked | LK_RETRY); + if (error != 0) + devfs_usecount_sub(vp); + dev_relthread(dev, ref); if (error != 0) { if (error == ERESTART) @@ -1406,6 +1494,7 @@ devfs_reclaim(struct vop_reclaim_args *ap) struct devfs_dirent *de; vp = ap->a_vp; + devfs_usecount_sub(vp); mtx_lock(&devfs_de_interlock); de = vp->v_data; if (de != NULL) { @@ -1431,8 +1520,6 @@ devfs_reclaim_vchr(struct vop_reclaim_args *ap) dev_lock(); dev = vp->v_rdev; vp->v_rdev = NULL; - if (dev != NULL) - dev->si_usecount -= (vp->v_usecount > 0); dev_unlock(); VI_UNLOCK(vp); if (dev != NULL) diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index ce9554b6dfa..50449938c5c 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -88,6 +88,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include + #ifdef COMPAT_FREEBSD32 #include #include @@ -858,7 +860,7 @@ killjobc(void) VOP_REVOKE(ttyvp, REVOKEALL); VOP_UNLOCK(ttyvp); } - vrele(ttyvp); + devfs_ctty_unref(ttyvp); sx_xlock(&proctree_lock); } } diff --git a/sys/kern/tty.c b/sys/kern/tty.c index 4c11ff56000..a6ed98f8629 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -67,6 +67,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include + #include static MALLOC_DEFINE(M_TTY, "tty", "tty device"); @@ -1256,7 +1258,7 @@ tty_drop_ctty(struct tty *tp, struct proc *p) * is either changed or released. */ if (vp != NULL) - vrele(vp); + devfs_ctty_unref(vp); return (0); } diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 16c493a1e39..5e679b12742 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -807,6 +807,15 @@ cache_negative_remove(struct namecache *ncp) } else { list_locked = true; mtx_lock(&neglist->nl_lock); + /* + * We may be racing against promotion in lockless lookup. + */ + if ((negstate->neg_flag & NEG_HOT) != 0) { + mtx_unlock(&neglist->nl_lock); + hot_locked = true; + mtx_lock(&ncneg_hot.nl_lock); + mtx_lock(&neglist->nl_lock); + } } if ((negstate->neg_flag & NEG_HOT) != 0) { mtx_assert(&ncneg_hot.nl_lock, MA_OWNED); @@ -3002,7 +3011,8 @@ cache_fpl_handled_impl(struct cache_fpl *fpl, int error, int line) #define cache_fpl_handled(x, e) cache_fpl_handled_impl((x), (e), __LINE__) #define CACHE_FPL_SUPPORTED_CN_FLAGS \ - (LOCKLEAF | FOLLOW | LOCKSHARED | SAVENAME | ISOPEN | AUDITVNODE1) + (LOCKLEAF | LOCKPARENT | WANTPARENT | FOLLOW | LOCKSHARED | SAVENAME | \ + ISOPEN | NOMACCHECK | AUDITVNODE1 | AUDITVNODE2) static bool cache_can_fplookup(struct cache_fpl *fpl) @@ -3059,6 +3069,103 @@ cache_fplookup_vnode_supported(struct vnode *vp) return (vp->v_type != VLNK); } +/* + * Move a negative entry to the hot list. + * + * We have to take locks, but they may be contended and in the worst + * case we may need to go off CPU. We don't want to spin within the + * smr section and we can't block with it. Instead we are going to + * look it up again. + */ +static int __noinline +cache_fplookup_negative_promote(struct cache_fpl *fpl, struct namecache *oncp, + uint32_t hash) +{ + struct componentname *cnp; + struct namecache *ncp; + struct neglist *neglist; + struct negstate *negstate; + struct vnode *dvp; + u_char nc_flag; + + cnp = fpl->cnp; + dvp = fpl->dvp; + + if (!vhold_smr(dvp)) + return (cache_fpl_aborted(fpl)); + + neglist = NCP2NEGLIST(oncp); + cache_fpl_smr_exit(fpl); + + mtx_lock(&ncneg_hot.nl_lock); + mtx_lock(&neglist->nl_lock); + /* + * For hash iteration. + */ + cache_fpl_smr_enter(fpl); + + /* + * Avoid all surprises by only succeeding if we got the same entry and + * bailing completely otherwise. + * + * In particular at this point there can be a new ncp which matches the + * search but hashes to a different neglist. + */ + CK_LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { + if (ncp == oncp) + break; + } + + /* + * No match to begin with. + */ + if (__predict_false(ncp == NULL)) { + goto out_abort; + } + + /* + * The newly found entry may be something different... + */ + if (!(ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && + !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))) { + goto out_abort; + } + + /* + * ... and not even negative. + */ + nc_flag = atomic_load_char(&ncp->nc_flag); + if ((nc_flag & NCF_NEGATIVE) == 0) { + goto out_abort; + } + + if (__predict_false(cache_ncp_invalid(ncp))) { + goto out_abort; + } + + negstate = NCP2NEGSTATE(ncp); + if ((negstate->neg_flag & NEG_HOT) == 0) { + numhotneg++; + TAILQ_REMOVE(&neglist->nl_list, ncp, nc_dst); + TAILQ_INSERT_TAIL(&ncneg_hot.nl_list, ncp, nc_dst); + negstate->neg_flag |= NEG_HOT; + } + + SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp, ncp->nc_name); + counter_u64_add(numneghits, 1); + cache_fpl_smr_exit(fpl); + mtx_unlock(&neglist->nl_lock); + mtx_unlock(&ncneg_hot.nl_lock); + vdrop(dvp); + return (cache_fpl_handled(fpl, ENOENT)); +out_abort: + cache_fpl_smr_exit(fpl); + mtx_unlock(&neglist->nl_lock); + mtx_unlock(&ncneg_hot.nl_lock); + vdrop(dvp); + return (cache_fpl_aborted(fpl)); +} + /* * The target vnode is not supported, prepare for the slow path to take over. */ @@ -3101,10 +3208,42 @@ cache_fplookup_partial_setup(struct cache_fpl *fpl) } static int -cache_fplookup_final(struct cache_fpl *fpl) +cache_fplookup_final_child(struct cache_fpl *fpl, enum vgetstate tvs) { struct componentname *cnp; - enum vgetstate tvs; + struct vnode *tvp; + seqc_t tvp_seqc; + int error; + + cnp = fpl->cnp; + tvp = fpl->tvp; + tvp_seqc = fpl->tvp_seqc; + + if ((cnp->cn_flags & LOCKLEAF) != 0) { + error = vget_finish(tvp, cnp->cn_lkflags, tvs); + if (error != 0) { + return (cache_fpl_aborted(fpl)); + } + } else { + vget_finish_ref(tvp, tvs); + } + + if (!vn_seqc_consistent(tvp, tvp_seqc)) { + if ((cnp->cn_flags & LOCKLEAF) != 0) + vput(tvp); + else + vrele(tvp); + return (cache_fpl_aborted(fpl)); + } + + return (cache_fpl_handled(fpl, 0)); +} + +static int __noinline +cache_fplookup_final_withparent(struct cache_fpl *fpl) +{ + enum vgetstate dvs, tvs; + struct componentname *cnp; struct vnode *dvp, *tvp; seqc_t dvp_seqc, tvp_seqc; int error; @@ -3115,39 +3254,90 @@ cache_fplookup_final(struct cache_fpl *fpl) tvp = fpl->tvp; tvp_seqc = fpl->tvp_seqc; - VNPASS(cache_fplookup_vnode_supported(dvp), dvp); + MPASS((cnp->cn_flags & (LOCKPARENT|WANTPARENT)) != 0); + /* + * This is less efficient than it can be for simplicity. + */ + dvs = vget_prep_smr(dvp); + if (dvs == VGET_NONE) { + return (cache_fpl_aborted(fpl)); + } tvs = vget_prep_smr(tvp); if (tvs == VGET_NONE) { - return (cache_fpl_partial(fpl)); - } - - if (!vn_seqc_consistent(dvp, dvp_seqc)) { cache_fpl_smr_exit(fpl); - vget_abort(tvp, tvs); + vget_abort(dvp, dvs); return (cache_fpl_aborted(fpl)); } cache_fpl_smr_exit(fpl); - if ((cnp->cn_flags & LOCKLEAF) != 0) { - error = vget_finish(tvp, cnp->cn_lkflags, tvs); + if ((cnp->cn_flags & LOCKPARENT) != 0) { + error = vget_finish(dvp, LK_EXCLUSIVE, dvs); if (error != 0) { + vget_abort(tvp, tvs); return (cache_fpl_aborted(fpl)); } } else { - vget_finish_ref(tvp, tvs); + vget_finish_ref(dvp, dvs); } - if (!vn_seqc_consistent(tvp, tvp_seqc)) { - if ((cnp->cn_flags & LOCKLEAF) != 0) - vput(tvp); + if (!vn_seqc_consistent(dvp, dvp_seqc)) { + vget_abort(tvp, tvs); + if ((cnp->cn_flags & LOCKPARENT) != 0) + vput(dvp); else - vrele(tvp); + vrele(dvp); + cache_fpl_aborted(fpl); + return (error); + } + + error = cache_fplookup_final_child(fpl, tvs); + if (error != 0) { + if ((cnp->cn_flags & LOCKPARENT) != 0) + vput(dvp); + else + vrele(dvp); + cache_fpl_aborted(fpl); + return (error); + } + + MPASS(fpl->status == CACHE_FPL_STATUS_HANDLED); + return (0); +} + +static int +cache_fplookup_final(struct cache_fpl *fpl) +{ + struct componentname *cnp; + enum vgetstate tvs; + struct vnode *dvp, *tvp; + seqc_t dvp_seqc, tvp_seqc; + + cnp = fpl->cnp; + dvp = fpl->dvp; + dvp_seqc = fpl->dvp_seqc; + tvp = fpl->tvp; + tvp_seqc = fpl->tvp_seqc; + + VNPASS(cache_fplookup_vnode_supported(dvp), dvp); + + if ((cnp->cn_flags & (LOCKPARENT|WANTPARENT)) != 0) + return (cache_fplookup_final_withparent(fpl)); + + tvs = vget_prep_smr(tvp); + if (tvs == VGET_NONE) { + return (cache_fpl_partial(fpl)); + } + + if (!vn_seqc_consistent(dvp, dvp_seqc)) { + cache_fpl_smr_exit(fpl); + vget_abort(tvp, tvs); return (cache_fpl_aborted(fpl)); } - return (cache_fpl_handled(fpl, 0)); + cache_fpl_smr_exit(fpl); + return (cache_fplookup_final_child(fpl, tvs)); } static int @@ -3203,12 +3393,7 @@ cache_fplookup_next(struct cache_fpl *fpl) return (cache_fpl_partial(fpl)); } if (!neg_hot) { - /* - * TODO - * Promoting to hot negative requires locks, thus is - * left not yet supported for simplicity. - */ - return (cache_fpl_partial(fpl)); + return (cache_fplookup_negative_promote(fpl, ncp, hash)); } SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp, ncp->nc_name); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 2eec4677432..07599cbb0f9 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -108,8 +108,6 @@ static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, static void syncer_shutdown(void *arg, int howto); static int vtryrecycle(struct vnode *vp); static void v_init_counters(struct vnode *); -static void v_incr_devcount(struct vnode *); -static void v_decr_devcount(struct vnode *); static void vgonel(struct vnode *); static void vfs_knllock(void *arg); static void vfs_knlunlock(void *arg); @@ -2813,59 +2811,6 @@ v_init_counters(struct vnode *vp) refcount_init(&vp->v_usecount, 1); } -/* - * Increment si_usecount of the associated device, if any. - */ -static void -v_incr_devcount(struct vnode *vp) -{ - - ASSERT_VI_LOCKED(vp, __FUNCTION__); - if (vp->v_type == VCHR && vp->v_rdev != NULL) { - dev_lock(); - vp->v_rdev->si_usecount++; - dev_unlock(); - } -} - -/* - * Decrement si_usecount of the associated device, if any. - * - * The caller is required to hold the interlock when transitioning a VCHR use - * count to zero. This prevents a race with devfs_reclaim_vchr() that would - * leak a si_usecount reference. The vnode lock will also prevent this race - * if it is held while dropping the last ref. - * - * The race is: - * - * CPU1 CPU2 - * devfs_reclaim_vchr - * make v_usecount == 0 - * VI_LOCK - * sees v_usecount == 0, no updates - * vp->v_rdev = NULL; - * ... - * VI_UNLOCK - * VI_LOCK - * v_decr_devcount - * sees v_rdev == NULL, no updates - * - * In this scenario si_devcount decrement is not performed. - */ -static void -v_decr_devcount(struct vnode *vp) -{ - - ASSERT_VOP_LOCKED(vp, __func__); - ASSERT_VI_LOCKED(vp, __FUNCTION__); - if (vp->v_type == VCHR && vp->v_rdev != NULL) { - dev_lock(); - VNPASS(vp->v_rdev->si_usecount > 0, vp); - vp->v_rdev->si_usecount--; - dev_unlock(); - } -} - /* * Grab a particular vnode from the free list, increment its * reference count and lock it. VIRF_DOOMED is set if the vnode @@ -2941,41 +2886,6 @@ vget(struct vnode *vp, int flags, struct thread *td) return (vget_finish(vp, flags, vs)); } -static void __noinline -vget_finish_vchr(struct vnode *vp) -{ - - VNASSERT(vp->v_type == VCHR, vp, ("type != VCHR)")); - - /* - * See the comment in vget_finish before usecount bump. - */ - if (refcount_acquire_if_not_zero(&vp->v_usecount)) { -#ifdef INVARIANTS - int old = atomic_fetchadd_int(&vp->v_holdcnt, -1); - VNASSERT(old > 0, vp, ("%s: wrong hold count %d", __func__, old)); -#else - refcount_release(&vp->v_holdcnt); -#endif - return; - } - - VI_LOCK(vp); - if (refcount_acquire_if_not_zero(&vp->v_usecount)) { -#ifdef INVARIANTS - int old = atomic_fetchadd_int(&vp->v_holdcnt, -1); - VNASSERT(old > 1, vp, ("%s: wrong hold count %d", __func__, old)); -#else - refcount_release(&vp->v_holdcnt); -#endif - VI_UNLOCK(vp); - return; - } - v_incr_devcount(vp); - refcount_acquire(&vp->v_usecount); - VI_UNLOCK(vp); -} - int vget_finish(struct vnode *vp, int flags, enum vgetstate vs) { @@ -3013,11 +2923,6 @@ vget_finish_ref(struct vnode *vp, enum vgetstate vs) if (vs == VGET_USECOUNT) return; - if (__predict_false(vp->v_type == VCHR)) { - vget_finish_vchr(vp); - return; - } - /* * We hold the vnode. If the usecount is 0 it will be utilized to keep * the vnode around. Otherwise someone else lended their hold count and @@ -3039,61 +2944,12 @@ vget_finish_ref(struct vnode *vp, enum vgetstate vs) * Increase the reference (use) and hold count of a vnode. * This will also remove the vnode from the free list if it is presently free. */ -static void __noinline -vref_vchr(struct vnode *vp, bool interlock) -{ - - /* - * See the comment in vget_finish before usecount bump. - */ - if (!interlock) { - if (refcount_acquire_if_not_zero(&vp->v_usecount)) { - VNODE_REFCOUNT_FENCE_ACQ(); - VNASSERT(vp->v_holdcnt > 0, vp, - ("%s: active vnode not held", __func__)); - return; - } - VI_LOCK(vp); - /* - * By the time we get here the vnode might have been doomed, at - * which point the 0->1 use count transition is no longer - * protected by the interlock. Since it can't bounce back to - * VCHR and requires vref semantics, punt it back - */ - if (__predict_false(vp->v_type == VBAD)) { - VI_UNLOCK(vp); - vref(vp); - return; - } - } - VNASSERT(vp->v_type == VCHR, vp, ("type != VCHR)")); - if (refcount_acquire_if_not_zero(&vp->v_usecount)) { - VNODE_REFCOUNT_FENCE_ACQ(); - VNASSERT(vp->v_holdcnt > 0, vp, - ("%s: active vnode not held", __func__)); - if (!interlock) - VI_UNLOCK(vp); - return; - } - vhold(vp); - v_incr_devcount(vp); - refcount_acquire(&vp->v_usecount); - if (!interlock) - VI_UNLOCK(vp); - return; -} - void vref(struct vnode *vp) { int old; CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - if (__predict_false(vp->v_type == VCHR)) { - vref_vchr(vp, false); - return; - } - if (refcount_acquire_if_not_zero(&vp->v_usecount)) { VNODE_REFCOUNT_FENCE_ACQ(); VNASSERT(vp->v_holdcnt > 0, vp, @@ -3122,10 +2978,6 @@ vrefl(struct vnode *vp) ASSERT_VI_LOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - if (__predict_false(vp->v_type == VCHR)) { - vref_vchr(vp, true); - return; - } vref(vp); } @@ -3266,9 +3118,6 @@ enum vput_op { VRELE, VPUT, VUNREF }; * By releasing the last usecount we take ownership of the hold count which * provides liveness of the vnode, meaning we have to vdrop. * - * If the vnode is of type VCHR we may need to decrement si_usecount, see - * v_decr_devcount for details. - * * For all vnodes we may need to perform inactive processing. It requires an * exclusive lock on the vnode, while it is legal to call here with only a * shared lock (or no locks). If locking the vnode in an expected manner fails, @@ -3289,8 +3138,6 @@ vput_final(struct vnode *vp, enum vput_op func) VNPASS(vp->v_holdcnt > 0, vp); VI_LOCK(vp); - if (__predict_false(vp->v_type == VCHR && func != VRELE)) - v_decr_devcount(vp); /* * By the time we got here someone else might have transitioned @@ -3378,28 +3225,9 @@ vput_final(struct vnode *vp, enum vput_op func) * Releasing the last use count requires additional processing, see vput_final * above for details. * - * Note that releasing use count without the vnode lock requires special casing - * for VCHR, see v_decr_devcount for details. - * * Comment above each variant denotes lock state on entry and exit. */ -static void __noinline -vrele_vchr(struct vnode *vp) -{ - - if (refcount_release_if_not_last(&vp->v_usecount)) - return; - VI_LOCK(vp); - if (!refcount_release(&vp->v_usecount)) { - VI_UNLOCK(vp); - return; - } - v_decr_devcount(vp); - VI_UNLOCK(vp); - vput_final(vp, VRELE); -} - /* * in: any * out: same as passed in @@ -3409,10 +3237,6 @@ vrele(struct vnode *vp) { ASSERT_VI_UNLOCKED(vp, __func__); - if (__predict_false(vp->v_type == VCHR)) { - vrele_vchr(vp); - return; - } if (!refcount_release(&vp->v_usecount)) return; vput_final(vp, VRELE); @@ -5597,21 +5421,21 @@ vop_rename_pre(void *ap) #ifdef DEBUG_VFS_LOCKS void -vop_fplookup_vexec_pre(void *ap __unused) +vop_fplookup_vexec_debugpre(void *ap __unused) { VFS_SMR_ASSERT_ENTERED(); } void -vop_fplookup_vexec_post(void *ap __unused, int rc __unused) +vop_fplookup_vexec_debugpost(void *ap __unused, int rc __unused) { VFS_SMR_ASSERT_ENTERED(); } void -vop_strategy_pre(void *ap) +vop_strategy_debugpre(void *ap) { struct vop_strategy_args *a; struct buf *bp; @@ -5635,7 +5459,7 @@ vop_strategy_pre(void *ap) } void -vop_lock_pre(void *ap) +vop_lock_debugpre(void *ap) { struct vop_lock1_args *a = ap; @@ -5646,7 +5470,7 @@ vop_lock_pre(void *ap) } void -vop_lock_post(void *ap, int rc) +vop_lock_debugpost(void *ap, int rc) { struct vop_lock1_args *a = ap; @@ -5656,7 +5480,7 @@ vop_lock_post(void *ap, int rc) } void -vop_unlock_pre(void *ap) +vop_unlock_debugpre(void *ap) { struct vop_unlock_args *a = ap; @@ -5664,7 +5488,7 @@ vop_unlock_pre(void *ap) } void -vop_need_inactive_pre(void *ap) +vop_need_inactive_debugpre(void *ap) { struct vop_need_inactive_args *a = ap; @@ -5672,7 +5496,7 @@ vop_need_inactive_pre(void *ap) } void -vop_need_inactive_post(void *ap, int rc) +vop_need_inactive_debugpost(void *ap, int rc) { struct vop_need_inactive_args *a = ap; diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 2caf09f3412..7af735f6f2b 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -4197,7 +4197,7 @@ sys_revoke(struct thread *td, struct revoke_args *uap) if (error != 0) goto out; } - if (vp->v_usecount > 1 || vcount(vp) > 1) + if (vcount(vp) > 0) VOP_REVOKE(vp, REVOKEALL); out: vput(vp); diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index 5c0649fdada..e5a7b389fb3 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -147,8 +147,8 @@ vop_close { %% fplookup_vexec vp - - - -%! fplookup_vexec pre vop_fplookup_vexec_pre -%! fplookup_vexec post vop_fplookup_vexec_post +%! fplookup_vexec debugpre vop_fplookup_vexec_debugpre +%! fplookup_vexec debugpost vop_fplookup_vexec_debugpost vop_fplookup_vexec { IN struct vnode *vp; @@ -379,8 +379,8 @@ vop_inactive { IN struct thread *td; }; -%! need_inactive pre vop_need_inactive_pre -%! need_inactive post vop_need_inactive_post +%! need_inactive debugpre vop_need_inactive_debugpre +%! need_inactive debugpost vop_need_inactive_debugpost vop_need_inactive { IN struct vnode *vp; @@ -395,8 +395,8 @@ vop_reclaim { }; -%! lock1 pre vop_lock_pre -%! lock1 post vop_lock_post +%! lock1 debugpre vop_lock_debugpre +%! lock1 debugpost vop_lock_debugpost vop_lock1 { IN struct vnode *vp; @@ -406,7 +406,7 @@ vop_lock1 { }; -%! unlock pre vop_unlock_pre +%! unlock debugpre vop_unlock_debugpre vop_unlock { IN struct vnode *vp; @@ -426,7 +426,7 @@ vop_bmap { %% strategy vp L L L -%! strategy pre vop_strategy_pre +%! strategy debugpre vop_strategy_debugpre vop_strategy { IN struct vnode *vp; diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 8273842a91f..3a83ea5af6e 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -869,23 +869,23 @@ void vop_symlink_post(void *a, int rc); int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a); #ifdef DEBUG_VFS_LOCKS -void vop_fplookup_vexec_pre(void *a); -void vop_fplookup_vexec_post(void *a, int rc); -void vop_strategy_pre(void *a); -void vop_lock_pre(void *a); -void vop_lock_post(void *a, int rc); -void vop_unlock_pre(void *a); -void vop_need_inactive_pre(void *a); -void vop_need_inactive_post(void *a, int rc); +void vop_fplookup_vexec_debugpre(void *a); +void vop_fplookup_vexec_debugpost(void *a, int rc); +void vop_strategy_debugpre(void *a); +void vop_lock_debugpre(void *a); +void vop_lock_debugpost(void *a, int rc); +void vop_unlock_debugpre(void *a); +void vop_need_inactive_debugpre(void *a); +void vop_need_inactive_debugpost(void *a, int rc); #else -#define vop_fplookup_vexec_pre(x) do { } while (0) -#define vop_fplookup_vexec_post(x, y) do { } while (0) -#define vop_strategy_pre(x) do { } while (0) -#define vop_lock_pre(x) do { } while (0) -#define vop_lock_post(x, y) do { } while (0) -#define vop_unlock_pre(x) do { } while (0) -#define vop_need_inactive_pre(x) do { } while (0) -#define vop_need_inactive_post(x, y) do { } while (0) +#define vop_fplookup_vexec_debugpre(x) do { } while (0) +#define vop_fplookup_vexec_debugpost(x, y) do { } while (0) +#define vop_strategy_debugpre(x) do { } while (0) +#define vop_lock_debugpre(x) do { } while (0) +#define vop_lock_debugpost(x, y) do { } while (0) +#define vop_unlock_debugpre(x) do { } while (0) +#define vop_need_inactive_debugpre(x) do { } while (0) +#define vop_need_inactive_debugpost(x, y) do { } while (0) #endif void vop_rename_fail(struct vop_rename_args *ap); diff --git a/sys/tools/vnode_if.awk b/sys/tools/vnode_if.awk index cd138bef75d..486f0e6b2ce 100644 --- a/sys/tools/vnode_if.awk +++ b/sys/tools/vnode_if.awk @@ -87,6 +87,24 @@ function add_debug_code(name, arg, pos, ind) } } +function add_debugpre(name) +{ + if (lockdata[name, "debugpre"]) { + printc("#ifdef DEBUG_VFS_LOCKS"); + printc("\t"lockdata[name, "debugpre"]"(a);"); + printc("#endif"); + } +} + +function add_debugpost(name) +{ + if (lockdata[name, "debugpost"]) { + printc("#ifdef DEBUG_VFS_LOCKS"); + printc("\t"lockdata[name, "debugpost"]"(a, rc);"); + printc("#endif"); + } +} + function add_pre(name) { if (lockdata[name, "pre"]) { @@ -101,6 +119,15 @@ function add_post(name) } } +function can_inline(name) +{ + if (lockdata[name, "pre"]) + return 0; + if (lockdata[name, "post"]) + return 0; + return 1; +} + function find_arg_with_type (type) { for (jj = 0; jj < numargs; jj++) { @@ -213,7 +240,8 @@ while ((getline < srcfile) > 0) { if ($1 ~ /^%!/) { if (NF != 4 || - ($3 != "pre" && $3 != "post")) { + ($3 != "pre" && $3 != "post" && + $3 != "debugpre" && $3 != "debugpost")) { die("Invalid %s construction", "%!"); continue; } @@ -316,7 +344,18 @@ while ((getline < srcfile) > 0) { printh("\ta.a_gen.a_desc = &" name "_desc;"); for (i = 0; i < numargs; ++i) printh("\ta.a_" args[i] " = " args[i] ";"); + if (can_inline(name)) { + printh("\n#if !defined(DEBUG_VFS_LOCKS) && !defined(INVARIANTS) && !defined(KTR)"); + printh("\tif (!SDT_PROBES_ENABLED())"); + printh("\t\treturn (" args[0]"->v_op->"name"(&a));"); + printh("\telse"); + printh("\t\treturn (" uname "_APV("args[0]"->v_op, &a));"); + printh("#else"); + } printh("\treturn (" uname "_APV("args[0]"->v_op, &a));"); + if (can_inline(name)) + printh("#endif"); + printh("}"); printh(""); @@ -364,6 +403,7 @@ while ((getline < srcfile) > 0) { printc("\t (\"Wrong a_desc in " name "(%p, %p)\", a->a_" args[0]", a));"); printc("\tVNASSERT(vop != NULL, a->a_" args[0]", (\"No "name"(%p, %p)\", a->a_" args[0]", a));") printc("\tKTR_START" ctrstr); + add_debugpre(name); add_pre(name); for (i = 0; i < numargs; ++i) add_debug_code(name, args[i], "Entry", "\t"); @@ -382,6 +422,7 @@ while ((getline < srcfile) > 0) { add_debug_code(name, args[i], "Error", "\t\t"); printc("\t}"); add_post(name); + add_debugpost(name); printc("\tKTR_STOP" ctrstr); printc("\treturn (rc);"); printc("}\n");