# HG changeset patch # Parent 27f8c64bc5520dadad9fc3a9ea63418cd05e04cf Rather than using a vnode lock to prevent updates that may cause updates to the segments while doing grooming, instead suspend writes to the mount points. This fixes an LoR and possible random memory access after free. diff -r 27f8c64bc552 -r ee8cbc8ad23a sys/fs/nandfs/nandfs.h --- a/sys/fs/nandfs/nandfs.h +++ b/sys/fs/nandfs/nandfs.h @@ -186,7 +186,6 @@ struct nandfs_device { struct cv nd_sync_cv; struct mtx nd_clean_mtx; struct cv nd_clean_cv; - struct lock nd_seg_const; struct nandfs_seginfo *nd_seginfo; @@ -233,33 +232,6 @@ extern SLIST_HEAD(_nandfs_devices, nandf #define SYNCER_FSYNC 0x4 #define SYNCER_ROUPD 0x5 -static __inline int -nandfs_writelockflags(struct nandfs_device *fsdev, int flags) -{ - int error = 0; - - if (lockstatus(&fsdev->nd_seg_const) != LK_EXCLUSIVE) - error = lockmgr(&fsdev->nd_seg_const, flags | LK_SHARED, NULL); - - return (error); -} - -static __inline void -nandfs_writeunlock(struct nandfs_device *fsdev) -{ - - if (lockstatus(&fsdev->nd_seg_const) != LK_EXCLUSIVE) - lockmgr(&(fsdev)->nd_seg_const, LK_RELEASE, NULL); -} - -#define NANDFS_WRITELOCKFLAGS(fsdev, flags) nandfs_writelockflags(fsdev, flags) - -#define NANDFS_WRITELOCK(fsdev) NANDFS_WRITELOCKFLAGS(fsdev, 0) - -#define NANDFS_WRITEUNLOCK(fsdev) nandfs_writeunlock(fsdev) - -#define NANDFS_WRITEASSERT(fsdev) lockmgr_assert(&(fsdev)->nd_seg_const, KA_LOCKED) - /* Specific mountpoint; head or a checkpoint/snapshot */ struct nandfsmount { STAILQ_ENTRY(nandfsmount) nm_next_mount; diff -r 27f8c64bc552 -r ee8cbc8ad23a sys/fs/nandfs/nandfs_cleaner.c --- a/sys/fs/nandfs/nandfs_cleaner.c +++ b/sys/fs/nandfs/nandfs_cleaner.c @@ -336,6 +336,7 @@ nandfs_cleaner_body(struct nandfs_device struct nandfs_bdesc *bdesc, *bdp, *bdpi; struct nandfs_cpstat cpstat; struct nandfs_cpinfo *cpinfo = NULL; + struct nandfsmount *nmp; uint64_t *segnums, *segp; int select, selected; int error = 0; @@ -403,11 +404,17 @@ nandfs_cleaner_body(struct nandfs_device cpinfo, cpstat.ncp_nss, NULL); if (error) { nandfs_error("%s:%d\n", __FILE__, __LINE__); - goto out_locked; + goto out; } } - NANDFS_WRITELOCK(fsdev); + mtx_lock(&fsdev->nd_mutex); + STAILQ_FOREACH(nmp, &fsdev->nd_mounts, nm_next_mount) { + error = vfs_write_suspend(nmp->nm_vfs_mountp, VS_SKIP_UNMOUNT); + /* XXX check error? */ + } + mtx_unlock(&fsdev->nd_mutex); + DPRINTF(CLEAN, ("%s: got lock\n", __func__)); error = nandfs_get_dat_vinfo(fsdev, vinfo, vip - vinfo); @@ -446,7 +453,11 @@ nandfs_cleaner_body(struct nandfs_device nandfs_error("%s:%d\n", __FILE__, __LINE__); out_locked: - NANDFS_WRITEUNLOCK(fsdev); + mtx_lock(&fsdev->nd_mutex); + STAILQ_FOREACH(nmp, &fsdev->nd_mounts, nm_next_mount) { + vfs_write_resume(nmp->nm_vfs_mountp, 0); + } + mtx_unlock(&fsdev->nd_mutex); out: free(cpinfo, M_NANDFSTEMP); free(segnums, M_NANDFSTEMP); diff -r 27f8c64bc552 -r ee8cbc8ad23a sys/fs/nandfs/nandfs_segment.c --- a/sys/fs/nandfs/nandfs_segment.c +++ b/sys/fs/nandfs/nandfs_segment.c @@ -990,12 +990,7 @@ nandfs_sync_file(struct vnode *vp) cp = fsdev->nd_cp_node; ifile = nmp->nm_ifile_node; - NANDFS_WRITEASSERT(fsdev); - if (lockmgr(&fsdev->nd_seg_const, LK_UPGRADE, NULL) != 0) { - DPRINTF(SYNC, ("%s: lost shared lock\n", __func__)); - if (lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL) != 0) - panic("couldn't lock exclusive"); - } + /* XXX -- write block */ DPRINTF(SYNC, ("%s: got lock\n", __func__)); VOP_LOCK(NTOV(su), LK_EXCLUSIVE); @@ -1015,7 +1010,6 @@ nandfs_sync_file(struct vnode *vp) clean_seginfo(seginfo, 0); delete_seginfo(seginfo); VOP_UNLOCK(NTOV(su), 0); - lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); nandfs_error("%s: err:%d iterating dirty bufs vp:%p", __func__, error, vp); return (error); @@ -1031,7 +1025,6 @@ nandfs_sync_file(struct vnode *vp) delete_seginfo(seginfo); VOP_UNLOCK(NTOV(ifile), 0); VOP_UNLOCK(NTOV(su), 0); - lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); nandfs_error("%s: err:%d updating vp:%p", __func__, error, vp); return (error); @@ -1050,7 +1043,6 @@ nandfs_sync_file(struct vnode *vp) delete_seginfo(seginfo); VOP_UNLOCK(NTOV(cp), 0); VOP_UNLOCK(NTOV(su), 0); - lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); nandfs_error("%s: err:%d getting cp:%jx", __func__, error, fsdev->nd_last_cno + 1); return (error); @@ -1067,7 +1059,6 @@ nandfs_sync_file(struct vnode *vp) delete_seginfo(seginfo); VOP_UNLOCK(NTOV(cp), 0); VOP_UNLOCK(NTOV(su), 0); - lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); nandfs_error("%s: err:%d setting cp:%jx", __func__, error, fsdev->nd_last_cno + 1); return (error); @@ -1085,7 +1076,6 @@ nandfs_sync_file(struct vnode *vp) delete_seginfo(seginfo); VOP_UNLOCK(NTOV(dat), 0); VOP_UNLOCK(NTOV(su), 0); - lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); nandfs_error("%s: err:%d updating seg", __func__, error); return (error); @@ -1096,7 +1086,6 @@ nandfs_sync_file(struct vnode *vp) VOP_UNLOCK(NTOV(su), 0); delete_seginfo(seginfo); - lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); if (cno_changed && !error) { if (nandfs_cps_between_sblocks != 0 && @@ -1121,7 +1110,6 @@ nandfs_segment_constructor(struct nandfs DPRINTF(SYNC, ("%s: START\n", __func__)); fsdev = nmp->nm_nandfsdev; - lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL); DPRINTF(SYNC, ("%s: git lock\n", __func__)); again: create_seginfo(fsdev, &seginfo); @@ -1232,8 +1220,6 @@ reiterate: MPASS(fsdev->nd_free_base == NULL); - lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL); - if (cno_changed) { if ((nandfs_cps_between_sblocks != 0 && fsdev->nd_last_cno % nandfs_cps_between_sblocks == 0) || @@ -1250,7 +1236,6 @@ error_locks: VOP_UNLOCK(NTOV(gc), 0); VOP_UNLOCK(NTOV(ifile), 0); VOP_UNLOCK(NTOV(su), 0); - lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL); return (error); } diff -r 27f8c64bc552 -r ee8cbc8ad23a sys/fs/nandfs/nandfs_subr.c --- a/sys/fs/nandfs/nandfs_subr.c +++ b/sys/fs/nandfs/nandfs_subr.c @@ -254,9 +254,6 @@ nandfs_bdestroy(struct nandfs_node *node { int error; - if (!NANDFS_SYS_NODE(node->nn_ino)) - NANDFS_WRITEASSERT(node->nn_nandfsdev); - error = nandfs_vblock_end(node->nn_nandfsdev, vblk); if (error) { nandfs_error("%s: ending vblk: %jx failed\n", @@ -275,8 +272,6 @@ nandfs_bcreate(struct nandfs_node *node, int error; ASSERT_VOP_LOCKED(NTOV(node), __func__); - if (!NANDFS_SYS_NODE(node->nn_ino)) - NANDFS_WRITEASSERT(node->nn_nandfsdev); DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node), blocknr)); @@ -314,7 +309,6 @@ nandfs_bcreate_meta(struct nandfs_node * int error; ASSERT_VOP_LOCKED(NTOV(node), __func__); - NANDFS_WRITEASSERT(node->nn_nandfsdev); DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node), blocknr)); @@ -649,8 +643,6 @@ nandfs_get_node_raw(struct nandfs_device return (error); } - if (mp) - NANDFS_WRITELOCK(nandfsdev); DPRINTF(IFILE, ("%s: ino: %#jx -> vp: %p\n", __func__, (uintmax_t)ino, nvp)); diff -r 27f8c64bc552 -r ee8cbc8ad23a sys/fs/nandfs/nandfs_sufile.c --- a/sys/fs/nandfs/nandfs_sufile.c +++ b/sys/fs/nandfs/nandfs_sufile.c @@ -439,13 +439,12 @@ nandfs_get_seg_stat(struct nandfs_device su_node = nandfsdev->nd_su_node; - NANDFS_WRITELOCK(nandfsdev); + /* XXX block write */ VOP_LOCK(NTOV(su_node), LK_SHARED); err = nandfs_bread(nandfsdev->nd_su_node, 0, NOCRED, 0, &bp); if (err) { brelse(bp); VOP_UNLOCK(NTOV(su_node), 0); - NANDFS_WRITEUNLOCK(nandfsdev); return (-1); } @@ -460,8 +459,6 @@ nandfs_get_seg_stat(struct nandfs_device brelse(bp); VOP_UNLOCK(NTOV(su_node), 0); - NANDFS_WRITEUNLOCK(nandfsdev); - return (0); } @@ -512,7 +509,7 @@ nandfs_get_segment_info_filter(struct na curr = ~(0); - lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL); + /* XXX block write */ su_node = fsdev->nd_su_node; VOP_LOCK(NTOV(su_node), LK_SHARED); @@ -563,7 +560,6 @@ out: if (bp != NULL) brelse(bp); VOP_UNLOCK(NTOV(su_node), 0); - lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL); return (err); } diff -r 27f8c64bc552 -r ee8cbc8ad23a sys/fs/nandfs/nandfs_vfsops.c --- a/sys/fs/nandfs/nandfs_vfsops.c +++ b/sys/fs/nandfs/nandfs_vfsops.c @@ -791,7 +791,6 @@ nandfs_unmount_device(struct nandfs_devi cv_destroy(&nandfsdev->nd_clean_cv); mtx_destroy(&nandfsdev->nd_clean_mtx); mtx_destroy(&nandfsdev->nd_mutex); - lockdestroy(&nandfsdev->nd_seg_const); free(nandfsdev, M_NANDFSMNT); } @@ -895,8 +894,6 @@ nandfs_mount_device(struct vnode *devvp, cv_init(&nandfsdev->nd_clean_cv, "nandfsclean"); mtx_init(&nandfsdev->nd_clean_mtx, "nffscleanmtx", NULL, MTX_DEF); mtx_init(&nandfsdev->nd_mutex, "nandfsdev lock", NULL, MTX_DEF); - lockinit(&nandfsdev->nd_seg_const, PVFS, "nffssegcon", VLKTIMEOUT, - LK_CANRECURSE); STAILQ_INIT(&nandfsdev->nd_mounts); nandfsdev->nd_devsize = pp->mediasize; diff -r 27f8c64bc552 -r ee8cbc8ad23a sys/fs/nandfs/nandfs_vnops.c --- a/sys/fs/nandfs/nandfs_vnops.c +++ b/sys/fs/nandfs/nandfs_vnops.c @@ -102,7 +102,6 @@ nandfs_reclaim(struct vop_reclaim_args * { struct vnode *vp = ap->a_vp; struct nandfs_node *nandfs_node = VTON(vp); - struct nandfs_device *fsdev = nandfs_node->nn_nandfsdev; uint64_t ino = nandfs_node->nn_ino; DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, nandfs_node)); @@ -114,15 +113,12 @@ nandfs_reclaim(struct vop_reclaim_args * vnode_destroy_vobject(vp); /* Remove from vfs hash if not system vnode */ - if (!NANDFS_SYS_NODE(nandfs_node->nn_ino)) + if (!NANDFS_SYS_NODE(ino)) vfs_hash_remove(vp); /* Dispose all node knowledge */ nandfs_dispose_node(&nandfs_node); - if (!NANDFS_SYS_NODE(ino)) - NANDFS_WRITEUNLOCK(fsdev); - return (0); } @@ -2279,51 +2275,6 @@ nandfs_pathconf(struct vop_pathconf_args return (error); } -static int -nandfs_vnlock1(struct vop_lock1_args *ap) -{ - struct vnode *vp = ap->a_vp; - struct nandfs_node *node = VTON(vp); - int error, vi_locked; - - /* - * XXX can vnode go away while we are sleeping? - */ - vi_locked = mtx_owned(&vp->v_interlock); - if (vi_locked) - VI_UNLOCK(vp); - error = NANDFS_WRITELOCKFLAGS(node->nn_nandfsdev, - ap->a_flags & LK_NOWAIT); - if (vi_locked && !error) - VI_LOCK(vp); - if (error) - return (error); - - error = vop_stdlock(ap); - if (error) { - NANDFS_WRITEUNLOCK(node->nn_nandfsdev); - return (error); - } - - return (0); -} - -static int -nandfs_vnunlock(struct vop_unlock_args *ap) -{ - struct vnode *vp = ap->a_vp; - struct nandfs_node *node = VTON(vp); - int error; - - error = vop_stdunlock(ap); - if (error) - return (error); - - NANDFS_WRITEUNLOCK(node->nn_nandfsdev); - - return (0); -} - /* * Global vfs data structures */ @@ -2358,8 +2309,6 @@ struct vop_vector nandfs_vnodeops = { .vop_setattr = nandfs_setattr, .vop_strategy = nandfs_strategy, .vop_symlink = nandfs_symlink, - .vop_lock1 = nandfs_vnlock1, - .vop_unlock = nandfs_vnunlock, }; struct vop_vector nandfs_system_vnodeops = { @@ -2423,8 +2372,6 @@ struct vop_vector nandfs_fifoops = { .vop_reclaim = nandfs_reclaim, .vop_setattr = nandfs_setattr, .vop_write = VOP_PANIC, - .vop_lock1 = nandfs_vnlock1, - .vop_unlock = nandfs_vnunlock, }; int