diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c index 3f50a91..c6f492c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c @@ -856,6 +856,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, } } + getnewvnode_reserve(1); ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db)); @@ -1042,6 +1043,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, KASSERT(err == 0, ("insmntque() failed: error %d", err)); } ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); + getnewvnode_drop_reserve(); } /* @@ -1152,12 +1154,14 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) *zpp = NULL; + getnewvnode_reserve(1); again: ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + getnewvnode_drop_reserve(); return (err); } @@ -1168,6 +1172,7 @@ again: doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + getnewvnode_drop_reserve(); return (EINVAL); } @@ -1231,6 +1236,7 @@ again: sa_buf_rele(db, NULL); mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + getnewvnode_drop_reserve(); return (err); } @@ -1266,6 +1272,7 @@ again: } } ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + getnewvnode_drop_reserve(); return (err); } diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c b/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c index a7ec8b9..4854119 100644 --- a/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c +++ b/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c @@ -67,12 +67,10 @@ int traverse(vnode_t **cvpp, int lktype) { vnode_t *cvp; - vnode_t *tvp; vfs_t *vfsp; int error; cvp = *cvpp; - tvp = NULL; /* * If this vnode is mounted on, then we transparently indirect @@ -88,22 +86,19 @@ traverse(vnode_t **cvpp, int lktype) vfsp = vn_mountedvfs(cvp); if (vfsp == NULL) break; - /* - * tvp is NULL for *cvpp vnode, which we can't unlock. - */ - if (tvp != NULL) - vput(cvp); - else - vrele(cvp); + error = vfs_busy(vfsp, 0); + vput(cvp); + if (error) + return (error); /* * The read lock must be held across the call to VFS_ROOT() to * prevent a concurrent unmount from destroying the vfs. */ - error = VFS_ROOT(vfsp, lktype, &tvp); + error = VFS_ROOT(vfsp, lktype, &cvp); + vfs_unbusy(vfsp); if (error != 0) return (error); - cvp = tvp; } *cvpp = cvp; diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c b/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c index a266eca..d04b3f3 100644 --- a/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c +++ b/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c @@ -134,13 +134,13 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath, return (ENODEV); vp = *vpp; + ASSERT_VOP_ELOCKED(vp, __func__); if (vp->v_type != VDIR) return (ENOTDIR); /* * We need vnode lock to protect v_mountedhere and vnode interlock * to protect v_iflag. */ - vn_lock(vp, LK_SHARED | LK_RETRY); VI_LOCK(vp); if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) { VI_UNLOCK(vp); @@ -191,6 +191,9 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath, td->td_ucred = cr; if (error != 0) { + VI_LOCK(vp); + vp->v_iflag &= ~VI_MOUNT; + VI_UNLOCK(vp); vrele(vp); vfs_unbusy(mp); vfs_mount_destroy(mp); @@ -225,7 +228,7 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath, vfs_event_signal(NULL, VQ_MOUNT, 0); if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp)) panic("mount: lost mount"); - vput(vp); + VOP_UNLOCK(vp, 0); vfs_unbusy(mp); *vpp = mvp; return (0); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c b/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c index 436918b..85cb672 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c @@ -433,8 +433,11 @@ gfs_readdir_fini(gfs_readdir_state_t *st, int error, int *eofp, int eof) * Performs a basic check for "." and ".." directory entries. */ int -gfs_lookup_dot(vnode_t **vpp, vnode_t *dvp, vnode_t *pvp, const char *nm) +gfs_lookup_dot(vnode_t **vpp, vnode_t *dvp, vnode_t *pvp, const char *nm, + int flags) { + int err; + if (*nm == '\0' || strcmp(nm, ".") == 0) { VN_HOLD(dvp); *vpp = dvp; @@ -444,12 +447,16 @@ gfs_lookup_dot(vnode_t **vpp, vnode_t *dvp, vnode_t *pvp, const char *nm) ASSERT(dvp->v_flag & VROOT); VN_HOLD(dvp); *vpp = dvp; + return (0); } else { VN_HOLD(pvp); - *vpp = pvp; + err = vn_lock(pvp, flags); + if (err != 0) + VN_RELE(pvp); + else + *vpp = pvp; + return (err); } - vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); - return (0); } return (-1); @@ -900,13 +907,16 @@ gfs_dir_lookup_static(int (*compare)(const char *, const char *), * a callback function we try a dynamic lookup via gfs_dir_lookup_dynamic(). * * This function returns 0 on success, non-zero on error. + * + * In FreeBSD the vnode is returned referenced and locked. + * The 'flag' argument is repurposed for the lock flags. */ int gfs_dir_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, cred_t *cr, int flags, int *direntflags, pathname_t *realpnp) { gfs_dir_t *dp = dvp->v_data; - boolean_t casecheck; + boolean_t casecheck = B_FALSE;; vnode_t *dynvp = NULL; vnode_t *vp = NULL; int (*compare)(const char *, const char *); @@ -914,16 +924,19 @@ gfs_dir_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, cred_t *cr, ASSERT(dvp->v_type == VDIR); - if (gfs_lookup_dot(vpp, dvp, dp->gfsd_file.gfs_parent, nm) == 0) - return (0); - + error = gfs_lookup_dot(vpp, dvp, dp->gfsd_file.gfs_parent, nm, flags); + if (error != -1) + return (error); +#ifdef sun casecheck = (flags & FIGNORECASE) != 0 && direntflags != NULL; if (vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) || (flags & FIGNORECASE)) compare = strcasecmp; else +#endif compare = strcmp; +retry_doomed: gfs_dir_lock(dp); error = gfs_dir_lookup_static(compare, dp, nm, dvp, &idx, &vp, realpnp); @@ -944,7 +957,7 @@ gfs_dir_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, cred_t *cr, if ((error || casecheck) && dp->gfsd_lookup) error = gfs_dir_lookup_dynamic(dp->gfsd_lookup, dp, nm, dvp, - &dynvp, cr, flags, direntflags, vp ? NULL : realpnp); + &dynvp, cr, 0, direntflags, vp ? NULL : realpnp); if (vp && dynvp) { /* static and dynamic entries are case-insensitive conflict */ @@ -963,6 +976,14 @@ gfs_dir_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, cred_t *cr, out: gfs_dir_unlock(dp); + if (error == 0 && vp != NULL) + error = vn_lock(vp, flags); + if (error != 0) { + VN_RELE(vp); + vp = NULL; + } + if (error == ENOENT && (flags & LK_NOWAIT) == 0) + goto retry_doomed; *vpp = vp; return (error); } @@ -1220,7 +1241,7 @@ gfs_vop_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, /* ARGSUSED */ int gfs_vop_inactive(ap) - struct vop_inactive_args /* { + struct vop_reclaim_args /* { struct vnode *a_vp; struct thread *a_td; } */ *ap; @@ -1233,6 +1254,8 @@ gfs_vop_inactive(ap) else gfs_file_inactive(vp); + vnode_destroy_vobject(vp); + VI_LOCK(vp); vp->v_data = NULL; VI_UNLOCK(vp); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h index d3955d7..270a50f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h @@ -207,8 +207,6 @@ typedef struct znode { list_node_t z_link_node; /* all znodes in fs link */ sa_handle_t *z_sa_hdl; /* handle to sa data */ boolean_t z_is_sa; /* are we native sa? */ - /* FreeBSD-specific field. */ - struct task z_task; } znode_t; @@ -255,7 +253,6 @@ VTOZ(vnode_t *vp) /* * ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation. - * ZFS_ENTER_NOERROR() is called when we can't return EIO. * ZFS_EXIT() must be called before exitting the vop. * ZFS_VERIFY_ZP() verifies the znode is valid. */ @@ -268,9 +265,6 @@ VTOZ(vnode_t *vp) } \ } -#define ZFS_ENTER_NOERROR(zfsvfs) \ - rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG) - #define ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG) #define ZFS_VERIFY_ZP(zp) \ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c index 65fc902..575196a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c @@ -509,6 +509,9 @@ zfsctl_root_getattr(ap) /* * Special case the handling of "..". + * + * On FreeBSD the vnode is returned referenced and locked according + * to 'flags' repurposed for passing LK_ flags. */ /* ARGSUSED */ int @@ -528,9 +531,7 @@ zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, ZFS_ENTER(zfsvfs); if (strcmp(nm, "..") == 0) { - err = VFS_ROOT(dvp->v_vfsp, LK_EXCLUSIVE, vpp); - if (err == 0) - VOP_UNLOCK(*vpp, 0); + err = VFS_ROOT(dvp->v_vfsp, flags, vpp); } else { err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir, cr, ct, direntflags, realpnp); @@ -592,6 +593,7 @@ zfsctl_freebsd_root_lookup(ap) vnode_t **vpp = ap->a_vpp; cred_t *cr = ap->a_cnp->cn_cred; int flags = ap->a_cnp->cn_flags; + int lkflags = ap->a_cnp->cn_lkflags; int nameiop = ap->a_cnp->cn_nameiop; char nm[NAME_MAX + 1]; int err; @@ -602,9 +604,9 @@ zfsctl_freebsd_root_lookup(ap) ASSERT(ap->a_cnp->cn_namelen < sizeof(nm)); strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1); - err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr, NULL, NULL, NULL); - if (err == 0 && (nm[0] != '.' || nm[1] != '\0')) - vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); + err = zfsctl_root_lookup(dvp, nm, vpp, NULL, lkflags, NULL, cr, + NULL, NULL, NULL); + return (err); } @@ -617,8 +619,8 @@ static struct vop_vector zfsctl_ops_root = { .vop_access = zfsctl_common_access, .vop_readdir = gfs_vop_readdir, .vop_lookup = zfsctl_freebsd_root_lookup, - .vop_inactive = gfs_vop_inactive, - .vop_reclaim = zfsctl_common_reclaim, + .vop_inactive = VOP_NULL, + .vop_reclaim = gfs_vop_inactive, #ifdef TODO .vop_pathconf = zfsctl_pathconf, #endif @@ -937,6 +939,7 @@ zfsctl_snapdir_lookup(ap) size_t mountpoint_len; avl_index_t where; zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; + int lkflags = cnp->cn_lkflags; int err; int flags = 0; @@ -963,9 +966,10 @@ zfsctl_snapdir_lookup(ap) ZFS_ENTER(zfsvfs); - if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) { + err = gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm, lkflags); + if (err != -1) { ZFS_EXIT(zfsvfs); - return (0); + return (err); } if (flags & FIGNORECASE) { @@ -987,21 +991,29 @@ zfsctl_snapdir_lookup(ap) *direntflags = ED_CASE_CONFLICT; #endif } - +retry: mutex_enter(&sdp->sd_lock); search.se_name = (char *)nm; if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) { *vpp = sep->se_root; - VN_HOLD(*vpp); - err = traverse(vpp, LK_EXCLUSIVE | LK_RETRY); + err = vget(*vpp, LK_EXCLUSIVE, curthread); + if (err == ENOENT && (lkflags & LK_NOWAIT) == 0) { + /* + * The vnode is doomed, the entry is being removed. + */ + mutex_exit(&sdp->sd_lock); + goto retry; + } + if (err == 0) + err = traverse(vpp, lkflags); if (err) { - VN_RELE(*vpp); *vpp = NULL; } else if (*vpp == sep->se_root) { /* * The snapshot was unmounted behind our backs, * try to remount it. */ + VERIFY(zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname) == 0); goto domount; } else { /* @@ -1009,7 +1021,7 @@ zfsctl_snapdir_lookup(ap) * to clear it since we're pretending to be part * of our parent's vfs. */ - (*vpp)->v_flag &= ~VROOT; + (*vpp)->v_flag &= ~VROOT; /* XXX */ } mutex_exit(&sdp->sd_lock); ZFS_EXIT(zfsvfs); @@ -1100,9 +1112,10 @@ zfsctl_shares_lookup(ap) ASSERT(cnp->cn_namelen < sizeof(nm)); strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); - if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) { + error = gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm, cnp->cn_lkflags); + if (error != -1) { ZFS_EXIT(zfsvfs); - return (0); + return (error); } if (zfsvfs->z_shares_dir == 0) { @@ -1313,6 +1326,8 @@ zfsctl_snapdir_inactive(ap) zfsctl_snapdir_t *sdp = vp->v_data; zfs_snapentry_t *sep; + vnode_destroy_vobject(vp); + /* * On forced unmount we have to free snapshots from here. */ @@ -1329,6 +1344,9 @@ zfsctl_snapdir_inactive(ap) avl_destroy(&sdp->sd_snaps); kmem_free(sdp, sizeof (zfsctl_snapdir_t)); + VI_LOCK(vp); + vp->v_data = NULL; + VI_UNLOCK(vp); return (0); } @@ -1374,8 +1392,8 @@ static struct vop_vector zfsctl_ops_snapdir = { .vop_mkdir = zfsctl_freebsd_snapdir_mkdir, .vop_readdir = gfs_vop_readdir, .vop_lookup = zfsctl_snapdir_lookup, - .vop_inactive = zfsctl_snapdir_inactive, - .vop_reclaim = zfsctl_common_reclaim, + .vop_inactive = VOP_NULL, + .vop_reclaim = zfsctl_snapdir_inactive, .vop_fid = zfsctl_common_fid, }; @@ -1388,8 +1406,8 @@ static struct vop_vector zfsctl_ops_shares = { .vop_access = zfsctl_common_access, .vop_readdir = zfsctl_shares_readdir, .vop_lookup = zfsctl_shares_lookup, - .vop_inactive = gfs_vop_inactive, - .vop_reclaim = zfsctl_common_reclaim, + .vop_inactive = VOP_NULL, + .vop_reclaim = gfs_vop_inactive, .vop_fid = zfsctl_shares_fid, }; #endif /* !sun */ @@ -1412,7 +1430,6 @@ zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset) VN_HOLD(vp); zcp = vp->v_data; zcp->zc_id = objset; - VOP_UNLOCK(vp, 0); return (vp); } @@ -1426,18 +1443,17 @@ zfsctl_snapshot_inactive(ap) { vnode_t *vp = ap->a_vp; cred_t *cr = ap->a_td->td_ucred; - struct vop_inactive_args iap; + struct vop_reclaim_args iap; zfsctl_snapdir_t *sdp; zfs_snapentry_t *sep, *next; int locked; vnode_t *dvp; - if (vp->v_count > 0) + /* if snapdir is already reclaimed, just go to the end */ + if (gfs_dir_lookup(vp, "..", &dvp, cr, LK_EXCLUSIVE, NULL, NULL) != 0) goto end; - VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0); sdp = dvp->v_data; - VOP_UNLOCK(dvp, 0); if (!(locked = MUTEX_HELD(&sdp->sd_lock))) mutex_enter(&sdp->sd_lock); @@ -1460,6 +1476,7 @@ zfsctl_snapshot_inactive(ap) if (!locked) mutex_exit(&sdp->sd_lock); + VOP_UNLOCK(dvp, 0); VN_RELE(dvp); end: @@ -1471,14 +1488,19 @@ end: * creating a new vnode. */ iap.a_vp = vp; - return (gfs_vop_inactive(&iap)); + gfs_vop_inactive(&iap); + return (0); } static int zfsctl_traverse_begin(vnode_t **vpp, int lktype) { + int locked; - VN_HOLD(*vpp); + locked = VOP_ISLOCKED(*vpp); + ASSERT(locked == LK_EXCLUSIVE || locked == LK_SHARED); + /* traverse unlocks starting vnode, but we need to keep it locked */ + VERIFY(vget(*vpp, locked | LK_CANRECURSE, curthread) == 0); /* Snapshot should be already mounted, but just in case. */ if (vn_mountedvfs(*vpp) == NULL) return (ENOENT); @@ -1489,10 +1511,7 @@ static void zfsctl_traverse_end(vnode_t *vp, int err) { - if (err == 0) - vput(vp); - else - VN_RELE(vp); + vput(vp); } static int @@ -1506,7 +1525,7 @@ zfsctl_snapshot_getattr(ap) vnode_t *vp = ap->a_vp; int err; - err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY); + err = zfsctl_traverse_begin(&vp, LK_SHARED); if (err == 0) err = VOP_GETATTR(vp, ap->a_vap, ap->a_cred); zfsctl_traverse_end(vp, err); @@ -1523,7 +1542,7 @@ zfsctl_snapshot_fid(ap) vnode_t *vp = ap->a_vp; int err; - err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY); + err = zfsctl_traverse_begin(&vp, LK_SHARED); if (err == 0) err = VOP_VPTOFH(vp, (void *)ap->a_fid); zfsctl_traverse_end(vp, err); @@ -1543,6 +1562,7 @@ zfsctl_snapshot_lookup(ap) struct componentname *cnp = ap->a_cnp; cred_t *cr = ap->a_cnp->cn_cred; zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; + int lkflags = ap->a_cnp->cn_lkflags; int error; if (cnp->cn_namelen != 2 || cnp->cn_nameptr[0] != '.' || @@ -1554,9 +1574,7 @@ zfsctl_snapshot_lookup(ap) ASSERT(zfsvfs->z_ctldir != NULL); error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", vpp, - NULL, 0, NULL, cr, NULL, NULL, NULL); - if (error == 0) - vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); + NULL, lkflags, NULL, cr, NULL, NULL, NULL); return (error); } @@ -1571,7 +1589,7 @@ zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap) ASSERT(zfsvfs->z_ctldir != NULL); error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, - NULL, 0, NULL, kcred, NULL, NULL, NULL); + NULL, LK_EXCLUSIVE, NULL, kcred, NULL, NULL, NULL); if (error != 0) return (error); sdp = dvp->v_data; @@ -1597,6 +1615,7 @@ zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap) vref(dvp); *ap->a_vpp = dvp; } + VOP_UNLOCK(dvp, 0); VN_RELE(dvp); return (error); @@ -1608,9 +1627,9 @@ zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap) */ static struct vop_vector zfsctl_ops_snapshot = { .vop_default = &default_vnodeops, - .vop_inactive = zfsctl_snapshot_inactive, + .vop_inactive = VOP_NULL, .vop_lookup = zfsctl_snapshot_lookup, - .vop_reclaim = zfsctl_common_reclaim, + .vop_reclaim = zfsctl_snapshot_inactive, .vop_getattr = zfsctl_snapshot_getattr, .vop_fid = zfsctl_snapshot_fid, .vop_vptocnp = zfsctl_snapshot_vptocnp, @@ -1628,7 +1647,7 @@ zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) ASSERT(zfsvfs->z_ctldir != NULL); error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, - NULL, 0, NULL, kcred, NULL, NULL, NULL); + NULL, LK_EXCLUSIVE, NULL, kcred, NULL, NULL, NULL); if (error != 0) return (error); sdp = dvp->v_data; @@ -1652,7 +1671,7 @@ zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) * and returns the ZFS vnode mounted on top of the GFS node. * This ZFS vnode is the root of the vfs for objset 'objsetid'. */ - error = traverse(&vp, LK_SHARED | LK_RETRY); + error = traverse(&vp, LK_SHARED); if (error == 0) { if (vp == sep->se_root) error = EINVAL; @@ -1669,6 +1688,7 @@ zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) mutex_exit(&sdp->sd_lock); } + VOP_UNLOCK(dvp, 0); VN_RELE(dvp); return (error); @@ -1690,7 +1710,7 @@ zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr) ASSERT(zfsvfs->z_ctldir != NULL); error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, - NULL, 0, NULL, cr, NULL, NULL, NULL); + NULL, LK_EXCLUSIVE, NULL, cr, NULL, NULL, NULL); if (error != 0) return (error); sdp = dvp->v_data; @@ -1727,6 +1747,7 @@ zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr) } mutex_exit(&sdp->sd_lock); + VOP_UNLOCK(dvp, 0); VN_RELE(dvp); return (error); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c index 90a6f7a..0c8eb4f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c @@ -1135,6 +1135,7 @@ zfs_domount(vfs_t *vfsp, char *osname) vfsp->mnt_kern_flag |= MNTK_MPSAFE; vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; + vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; /* * The fsid is 64 bits, composed of an 8-bit fs type, which @@ -1756,15 +1757,7 @@ zfs_vnode_lock(vnode_t *vp, int flags) ASSERT(vp != NULL); - /* - * Check if the file system wasn't forcibly unmounted in the meantime. - */ error = vn_lock(vp, flags); - if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) { - VOP_UNLOCK(vp, 0); - error = ENOENT; - } - return (error); } @@ -1775,7 +1768,7 @@ zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) znode_t *rootzp; int error; - ZFS_ENTER_NOERROR(zfsvfs); + ZFS_ENTER(zfsvfs); error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); if (error == 0) @@ -1867,18 +1860,6 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) zfsvfs->z_unmounted = B_TRUE; rrw_exit(&zfsvfs->z_teardown_lock, FTAG); rw_exit(&zfsvfs->z_teardown_inactive_lock); - -#ifdef __FreeBSD__ - /* - * Some znodes might not be fully reclaimed, wait for them. - */ - mutex_enter(&zfsvfs->z_znodes_lock); - while (list_head(&zfsvfs->z_all_znodes) != NULL) { - msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0, - "zteardown", 0); - } - mutex_exit(&zfsvfs->z_znodes_lock); -#endif } /* @@ -1913,6 +1894,8 @@ zfs_umount(vfs_t *vfsp, int fflag) zfsvfs_t *zfsvfs = vfsp->vfs_data; objset_t *os; cred_t *cr = td->td_ucred; + vnode_t *rootvp; + int rootrefs; int ret; ret = secpolicy_fs_unmount(cr, vfsp); @@ -1948,30 +1931,24 @@ zfs_umount(vfs_t *vfsp, int fflag) ASSERT(zfsvfs->z_ctldir == NULL); } - if (fflag & MS_FORCE) { - /* - * Mark file system as unmounted before calling - * vflush(FORCECLOSE). This way we ensure no future vnops - * will be called and risk operating on DOOMED vnodes. - */ - rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); - zfsvfs->z_unmounted = B_TRUE; - rrw_exit(&zfsvfs->z_teardown_lock, FTAG); - } - /* - * Flush all the files. + * Try to flush all the files, gracefully at first. */ - ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); - if (ret != 0) { - if (!zfsvfs->z_issnap) { - zfsctl_create(zfsvfs); - ASSERT(zfsvfs->z_ctldir != NULL); - } - return (ret); - } + VERIFY(zfs_root(vfsp, LK_EXCLUSIVE, &rootvp) == 0); + vput(rootvp); /* we have a reference elsewhere */ + rootrefs = 1; + ret = vflush(vfsp, rootrefs, 0, td); + if (ret == 0) + rootrefs = 0; if (!(fflag & MS_FORCE)) { + if (ret != 0) { + if (!zfsvfs->z_issnap) { + zfsctl_create(zfsvfs); + ASSERT(zfsvfs->z_ctldir != NULL); + } + return (ret); + } /* * Check the number of active vnodes in the file system. * Our count is maintained in the vfs structure, but the @@ -1990,16 +1967,26 @@ zfs_umount(vfs_t *vfsp, int fflag) zfsvfs->z_ctldir->v_count > 1) return (EBUSY); } - } else { - MNT_ILOCK(vfsp); - vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; - MNT_IUNLOCK(vfsp); } VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); os = zfsvfs->z_os; /* + * We could have got some new free/inactive vnodes after + * zfsvfs_teardown->zil_close->zil_commit->zfs_get_data->zfs_zget + * so we need to flush them. We also need to flush all the + * remaining vnodes if this is forceful unmount. + * + * VFS_ROOT would fail after zfsvfs_teardown (because of z_unmounted), + * so we pass rootrefs value of zero to vflush and then + * vrele the rootvp. + */ + VERIFY(vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td) == 0); + for (; rootrefs > 0; rootrefs--) + vrele(rootvp); + + /* * z_os will be NULL if there was an error in * attempting to reopen zfsvfs. */ @@ -2140,16 +2127,25 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { *vpp = zfsvfs->z_ctldir; ASSERT(*vpp != NULL); + VN_HOLD(*vpp); + ZFS_EXIT(zfsvfs); if (object == ZFSCTL_INO_SNAPDIR) { - VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, - 0, NULL, NULL, NULL, NULL, NULL) == 0); + vnode_t *dvp = *vpp; + + err = vn_lock(dvp, LK_SHARED); + if (err == 0) { + err = zfsctl_root_lookup(dvp, "snapshot", vpp, + NULL, flags, NULL, NULL, NULL, NULL, NULL); + VOP_UNLOCK(dvp, 0); + } + VN_RELE(dvp); } else { - VN_HOLD(*vpp); + err = zfs_vnode_lock(*vpp, flags); + if (err != 0) { + VN_RELE(*vpp); + *vpp = NULL; + } } - ZFS_EXIT(zfsvfs); - err = zfs_vnode_lock(*vpp, flags | LK_RETRY); - if (err != 0) - *vpp = NULL; return (err); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index d48d00d..270191b 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -1749,7 +1749,9 @@ out: * vp - ctime (if nlink > 0) */ +#ifndef __FreeBSD__ uint64_t null_xattr = 0; +#endif /*ARGSUSED*/ static int @@ -1761,13 +1763,17 @@ zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, vnode_t *vp; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zilog_t *zilog; - uint64_t acl_obj, xattr_obj; - uint64_t xattr_obj_unlinked = 0; + uint64_t xattr_obj; uint64_t obj = 0; zfs_dirlock_t *dl; dmu_tx_t *tx; +#ifndef __FreeBSD__ + uint64_t xattr_obj_unlinked = 0; + uint64_t acl_obj; boolean_t may_delete_now, delete_now = FALSE; - boolean_t unlinked, toobig = FALSE; + boolean_t toobig = FALSE; +#endif + boolean_t unlinked; uint64_t txtype; pathname_t *realnmp = NULL; pathname_t realnm; @@ -1818,11 +1824,11 @@ top: dnlc_remove(dvp, realnmp->pn_buf); else dnlc_remove(dvp, name); - +#ifndef __FreeBSD__ VI_LOCK(vp); may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); VI_UNLOCK(vp); - +#endif /* * We may delete the znode now, or we may put it in the unlinked set; * it depends on whether we're the last link, and on whether there are @@ -1835,6 +1841,7 @@ top: dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); zfs_sa_upgrade_txholds(tx, dzp); +#ifndef __FreeBSD__ if (may_delete_now) { toobig = zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; @@ -1842,7 +1849,7 @@ top: dmu_tx_hold_free(tx, zp->z_id, 0, (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); } - +#endif /* are there any extended attributes? */ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr_obj, sizeof (xattr_obj)); @@ -1853,10 +1860,12 @@ top: dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); } +#ifndef __FreeBSD__ mutex_enter(&zp->z_lock); if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); mutex_exit(&zp->z_lock); +#endif /* charge as an update -- would be nice not to charge at all */ dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); @@ -1889,6 +1898,7 @@ top: goto out; } +#ifndef __FreeBSD__ if (unlinked) { /* @@ -1897,9 +1907,9 @@ top: * zfs_sa_upgrade(). */ mutex_enter(&zp->z_lock); - VI_LOCK(vp); (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); + VI_LOCK(vp); delete_now = may_delete_now && !toobig && vp->v_count == 1 && !vn_has_cached_data(vp) && xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == @@ -1938,6 +1948,12 @@ top: mutex_exit(&zp->z_lock); zfs_unlinked_add(zp, tx); } +#else + if (unlinked) { + vp->v_vflag |= VV_NOSYNC; + zfs_unlinked_add(zp, tx); + } +#endif txtype = TX_REMOVE; if (flags & FIGNORECASE) @@ -1951,7 +1967,9 @@ out: zfs_dirent_unlock(dl); +#ifndef __FreeBSD__ if (!delete_now) +#endif VN_RELE(vp); if (xzp) VN_RELE(ZTOV(xzp)); @@ -4560,12 +4578,8 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) * The fs has been unmounted, or we did a * suspend/resume and this file no longer exists. */ - VI_LOCK(vp); - ASSERT(vp->v_count <= 1); - vp->v_count = 0; - VI_UNLOCK(vp); - vrecycle(vp); rw_exit(&zfsvfs->z_teardown_inactive_lock); + vrecycle(vp); return; } @@ -4586,8 +4600,6 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) dmu_tx_commit(tx); } } - - zfs_zinactive(zp); rw_exit(&zfsvfs->z_teardown_inactive_lock); } @@ -6103,28 +6115,6 @@ zfs_freebsd_inactive(ap) return (0); } -static void -zfs_reclaim_complete(void *arg, int pending) -{ - znode_t *zp = arg; - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - - rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); - if (zp->z_sa_hdl != NULL) { - ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id); - zfs_znode_dmu_fini(zp); - ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); - } - zfs_znode_free(zp); - rw_exit(&zfsvfs->z_teardown_inactive_lock); - /* - * If the file system is being unmounted, there is a process waiting - * for us, wake it up. - */ - if (zfsvfs->z_unmounted) - wakeup_one(zfsvfs); -} - static int zfs_freebsd_reclaim(ap) struct vop_reclaim_args /* { @@ -6135,53 +6125,26 @@ zfs_freebsd_reclaim(ap) vnode_t *vp = ap->a_vp; znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; - boolean_t rlocked; - - rlocked = rw_tryenter(&zfsvfs->z_teardown_inactive_lock, RW_READER); + int refcnt; ASSERT(zp != NULL); - /* - * Destroy the vm object and flush associated pages. - */ + /* Destroy the vm object and flush associated pages. */ vnode_destroy_vobject(vp); - mutex_enter(&zp->z_lock); - zp->z_vnode = NULL; - mutex_exit(&zp->z_lock); - - if (zp->z_unlinked) { - ; /* Do nothing. */ - } else if (!rlocked) { - TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp); - taskqueue_enqueue(taskqueue_thread, &zp->z_task); - } else if (zp->z_sa_hdl == NULL) { + /* + * z_teardown_inactive_lock protects from a race with + * zfs_znode_dmu_fini in zfsvfs_teardown during + * force unmount. + */ + rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); + if (zp->z_sa_hdl == NULL) zfs_znode_free(zp); - } else /* if (!zp->z_unlinked && zp->z_dbuf != NULL) */ { - int locked; + else + zfs_zinactive(zp); + rw_exit(&zfsvfs->z_teardown_inactive_lock); - locked = MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)) ? 2 : - ZFS_OBJ_HOLD_TRYENTER(zfsvfs, zp->z_id); - if (locked == 0) { - /* - * Lock can't be obtained due to deadlock possibility, - * so defer znode destruction. - */ - TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp); - taskqueue_enqueue(taskqueue_thread, &zp->z_task); - } else { - zfs_znode_dmu_fini(zp); - if (locked == 1) - ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); - zfs_znode_free(zp); - } - } - VI_LOCK(vp); vp->v_data = NULL; - ASSERT(vp->v_holdcnt >= 1); - VI_UNLOCK(vp); - if (rlocked) - rw_exit(&zfsvfs->z_teardown_inactive_lock); return (0); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c index 8b64d34..32da403 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c @@ -633,12 +633,11 @@ static void zfs_vnode_forget(vnode_t *vp) { - VOP_UNLOCK(vp, 0); - VI_LOCK(vp); - vp->v_usecount--; - vp->v_iflag |= VI_DOOMED; + /* copied from insmntque_stddtr */ vp->v_data = NULL; - vdropl(vp); + vp->v_op = &dead_vnodeops; + vgone(vp); + vput(vp); } /* @@ -1148,14 +1147,16 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) dmu_object_info_t doi; dmu_buf_t *db; znode_t *zp; - int err; + vnode_t *vp; sa_handle_t *hdl; - int first = 1; - - *zpp = NULL; + struct thread *td; + int locked; + int err; + td = curthread; getnewvnode_reserve(1); again: + *zpp = NULL; ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); @@ -1180,7 +1181,6 @@ again: if (hdl != NULL) { zp = sa_get_userdata(hdl); - /* * Since "SA" does immediate eviction we * should never find a sa handle that doesn't @@ -1194,48 +1194,35 @@ again: if (zp->z_unlinked) { err = ENOENT; } else { - vnode_t *vp; - int dying = 0; - vp = ZTOV(zp); - if (vp == NULL) - dying = 1; - else { - VN_HOLD(vp); - if ((vp->v_iflag & VI_DOOMED) != 0) { - dying = 1; - /* - * Don't VN_RELE() vnode here, because - * it can call vn_lock() which creates - * LOR between vnode lock and znode - * lock. We will VN_RELE() the vnode - * after droping znode lock. - */ - } - } - if (dying) { - if (first) { - ZFS_LOG(1, "dying znode detected (zp=%p)", zp); - first = 0; - } - /* - * znode is dying so we can't reuse it, we must - * wait until destruction is completed. - */ - sa_buf_rele(db, NULL); - mutex_exit(&zp->z_lock); - ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); - if (vp != NULL) - VN_RELE(vp); - tsleep(zp, 0, "zcollide", 1); - goto again; - } *zpp = zp; err = 0; } sa_buf_rele(db, NULL); + + /* Don't let the vnode disappear after ZFS_OBJ_HOLD_EXIT. */ + if (err == 0) + VN_HOLD(vp); + mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + + if (err == 0) { + locked = VOP_ISLOCKED(vp); + VI_LOCK(vp); + if ((vp->v_iflag & VI_DOOMED) != 0 && + locked != LK_EXCLUSIVE && locked != LK_SHARED) { + /* + * The vnode is doomed and this thread doesn't + * hold any lock on it, so the vnode must be + * being actively reclaimed. + */ + VI_UNLOCK(vp); + VN_RELE(vp); + goto again; + } + VI_UNLOCK(vp); + } getnewvnode_drop_reserve(); return (err); } @@ -1261,7 +1248,22 @@ again: if (err == 0) { vnode_t *vp = ZTOV(zp); + /* + * VV_FORCEINSMQ is only needed for zfs_zget called from + * zfs_unmount -> zfsvfs_teardown -> zil_close -> + * -> zil_commit -> zfs_get_data -> zfs_zget. + * It is not needed in any other case, so this flag should + * probably be controlled by an explicit flag to zfs_zget. + * Without VV_FORCEINSMQ we woul fail here and thus fail + * to process a ZIL record. + * The vnode and znode created in that case should be gone + * as soon as zfs_get_data is done with them and should not + * linger on the mount queue. + */ + vp->v_vflag |= VV_FORCEINSMQ; err = insmntque(vp, zfsvfs->z_vfs); + vp->v_vflag &= ~VV_FORCEINSMQ; + ASSERT3S(err, ==, 0); if (err == 0) VOP_UNLOCK(vp, 0); else { @@ -1401,7 +1403,6 @@ zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) void zfs_zinactive(znode_t *zp) { - vnode_t *vp = ZTOV(zp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; uint64_t z_id = zp->z_id; @@ -1413,19 +1414,6 @@ zfs_zinactive(znode_t *zp) ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); mutex_enter(&zp->z_lock); - VI_LOCK(vp); - if (vp->v_count > 0) { - /* - * If the hold count is greater than zero, somebody has - * obtained a new reference on this znode while we were - * processing it here, so we are done. - */ - VI_UNLOCK(vp); - mutex_exit(&zp->z_lock); - ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); - return; - } - VI_UNLOCK(vp); /* * If this was the last reference to a file with no links, @@ -1434,14 +1422,14 @@ zfs_zinactive(znode_t *zp) if (zp->z_unlinked) { mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); - ASSERT(vp->v_count == 0); - vrecycle(vp); zfs_rmnode(zp); return; } mutex_exit(&zp->z_lock); + zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); + zfs_znode_free(zp); } void @@ -1449,7 +1437,6 @@ zfs_znode_free(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; - ASSERT(ZTOV(zp) == NULL); ASSERT(zp->z_sa_hdl == NULL); mutex_enter(&zfsvfs->z_znodes_lock); POINTER_INVALIDATE(&zp->z_zfsvfs); diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/gfs.h b/sys/cddl/contrib/opensolaris/uts/common/sys/gfs.h index f3fc634..77b74c2 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/gfs.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/gfs.h @@ -146,10 +146,10 @@ extern int gfs_get_parent_ino(vnode_t *, cred_t *, caller_context_t *, */ #define GFS_STATIC_ENTRY_OFFSET ((offset_t)2) -extern int gfs_lookup_dot(vnode_t **, vnode_t *, vnode_t *, const char *); +extern int gfs_lookup_dot(vnode_t **, vnode_t *, vnode_t *, const char *, int); extern int gfs_vop_readdir(struct vop_readdir_args *); -extern int gfs_vop_inactive(struct vop_inactive_args *); +extern int gfs_vop_inactive(struct vop_reclaim_args *); #ifdef __cplusplus