Index: geom/geom_vfs.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/geom/geom_vfs.c,v retrieving revision 1.10 diff -u -r1.10 geom_vfs.c --- geom/geom_vfs.c 2 Mar 2006 05:37:44 -0000 1.10 +++ geom/geom_vfs.c 28 Dec 2006 21:33:45 -0000 @@ -50,6 +50,7 @@ .bop_write = bufwrite, .bop_strategy = g_vfs_strategy, .bop_sync = bufsync, + .bop_bdflush = bufbdflush }; struct buf_ops *g_vfs_bufops = &__g_vfs_bufops; Index: kern/vfs_bio.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/kern/vfs_bio.c,v retrieving revision 1.515 diff -u -r1.515 vfs_bio.c --- kern/vfs_bio.c 20 Dec 2006 09:22:31 -0000 1.515 +++ kern/vfs_bio.c 28 Dec 2006 21:33:45 -0000 @@ -80,6 +80,7 @@ .bop_write = bufwrite, .bop_strategy = bufstrategy, .bop_sync = bufsync, + .bop_bdflush = bufbdflush, }; /* @@ -146,10 +147,13 @@ static int hirunningspace; SYSCTL_INT(_vfs, OID_AUTO, hirunningspace, CTLFLAG_RW, &hirunningspace, 0, "Maximum amount of space to use for in-progress I/O"); -static int dirtybufferflushes; +int dirtybufferflushes; SYSCTL_INT(_vfs, OID_AUTO, dirtybufferflushes, CTLFLAG_RW, &dirtybufferflushes, 0, "Number of bdwrite to bawrite conversions to limit dirty buffers"); -static int altbufferflushes; +int bdwriteskip; +SYSCTL_INT(_vfs, OID_AUTO, bdwriteskip, CTLFLAG_RW, &bdwriteskip, + 0, "Number of buffers supplied to bdwrite with snapshot deadlock risk"); +int altbufferflushes; SYSCTL_INT(_vfs, OID_AUTO, altbufferflushes, CTLFLAG_RW, &altbufferflushes, 0, "Number of fsync flushes to limit dirty buffers"); static int recursiveflushes; @@ -164,7 +168,7 @@ static int hidirtybuffers; SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW, &hidirtybuffers, 0, "When the number of dirty buffers is considered severe"); -static int dirtybufthresh; +int dirtybufthresh; SYSCTL_INT(_vfs, OID_AUTO, dirtybufthresh, CTLFLAG_RW, &dirtybufthresh, 0, "Number of bdwrite to bawrite conversions to clear dirty buffers"); static int numfreebuffers; @@ -886,6 +890,47 @@ return (0); } +void +bufbdflush(struct bufobj *bo, struct buf *bp) +{ + struct buf *nbp; + + if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10) { + (void) VOP_FSYNC(bp->b_vp, MNT_NOWAIT, curthread); + altbufferflushes++; + } else if (bo->bo_dirty.bv_cnt > dirtybufthresh) { + BO_LOCK(bo); + /* + * Try to find a buffer to flush. + */ + TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) { + if ((nbp->b_vflags & BV_BKGRDINPROG) || + BUF_LOCK(nbp, + LK_EXCLUSIVE | LK_NOWAIT, NULL)) + continue; + if (bp == nbp) + panic("bdwrite: found ourselves"); + BO_UNLOCK(bo); + /* Don't countdeps with the bo lock held. */ + if (buf_countdeps(nbp, 0)) { + BO_LOCK(bo); + BUF_UNLOCK(nbp); + continue; + } + if (nbp->b_flags & B_CLUSTEROK) { + vfs_bio_awrite(nbp); + } else { + bremfree(nbp); + bawrite(nbp); + } + dirtybufferflushes++; + break; + } + if (nbp == NULL) + BO_UNLOCK(bo); + } +} + /* * Delayed write. (Buffer is marked dirty). Do not bother writing * anything if the buffer is marked invalid. @@ -900,7 +945,6 @@ { struct thread *td = curthread; struct vnode *vp; - struct buf *nbp; struct bufobj *bo; CTR3(KTR_BUF, "bdwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); @@ -921,44 +965,9 @@ */ vp = bp->b_vp; bo = bp->b_bufobj; - if ((td->td_pflags & TDP_COWINPROGRESS) == 0) { - BO_LOCK(bo); - if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10) { - BO_UNLOCK(bo); - (void) VOP_FSYNC(vp, MNT_NOWAIT, td); - altbufferflushes++; - } else if (bo->bo_dirty.bv_cnt > dirtybufthresh) { - /* - * Try to find a buffer to flush. - */ - TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) { - if ((nbp->b_vflags & BV_BKGRDINPROG) || - BUF_LOCK(nbp, - LK_EXCLUSIVE | LK_NOWAIT, NULL)) - continue; - if (bp == nbp) - panic("bdwrite: found ourselves"); - BO_UNLOCK(bo); - /* Don't countdeps with the bo lock held. */ - if (buf_countdeps(nbp, 0)) { - BO_LOCK(bo); - BUF_UNLOCK(nbp); - continue; - } - if (nbp->b_flags & B_CLUSTEROK) { - vfs_bio_awrite(nbp); - } else { - bremfree(nbp); - bawrite(nbp); - } - dirtybufferflushes++; - break; - } - if (nbp == NULL) - BO_UNLOCK(bo); - } else - BO_UNLOCK(bo); - } else + if ((td->td_pflags & TDP_COWINPROGRESS) == 0) + BO_BDFLUSH(bo, bp); + else recursiveflushes++; bdirty(bp); Index: sys/bufobj.h =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/sys/bufobj.h,v retrieving revision 1.17 diff -u -r1.17 bufobj.h --- sys/bufobj.h 24 Jul 2006 00:33:24 -0000 1.17 +++ sys/bufobj.h 28 Dec 2006 21:33:45 -0000 @@ -70,17 +70,20 @@ typedef void b_strategy_t(struct bufobj *, struct buf *); typedef int b_write_t(struct buf *); typedef int b_sync_t(struct bufobj *, int waitfor, struct thread *td); +typedef void b_bdflush_t(struct bufobj *, struct buf *); struct buf_ops { char *bop_name; b_write_t *bop_write; b_strategy_t *bop_strategy; b_sync_t *bop_sync; + b_bdflush_t *bop_bdflush; }; #define BO_STRATEGY(bo, bp) ((bo)->bo_ops->bop_strategy((bo), (bp))) #define BO_SYNC(bo, w, td) ((bo)->bo_ops->bop_sync((bo), (w), (td))) #define BO_WRITE(bo, bp) ((bo)->bo_ops->bop_write((bp))) +#define BO_BDFLUSH(bo, bp) ((bo)->bo_ops->bop_bdflush((bo), (bp))) struct bufobj { struct mtx *bo_mtx; /* Mutex which protects "i" things */ @@ -130,6 +133,7 @@ int bufobj_invalbuf(struct bufobj *bo, int flags, struct thread *td, int slpflag, int slptimeo); int bufobj_wwait(struct bufobj *bo, int slpflag, int timeo); int bufsync(struct bufobj *bo, int waitfor, struct thread *td); +void bufbdflush(struct bufobj *bo, struct buf *bp); #endif /* defined(_KERNEL) || defined(_KVM_VNODE) */ #endif /* _SYS_BUFOBJ_H_ */ Index: sys/buf.h =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/sys/buf.h,v retrieving revision 1.194 diff -u -r1.194 buf.h --- sys/buf.h 2 Oct 2006 02:06:27 -0000 1.194 +++ sys/buf.h 28 Dec 2006 21:33:45 -0000 @@ -483,6 +483,10 @@ extern int maxbcache; /* Max KVA for buffer cache */ extern int runningbufspace; extern int hibufspace; +extern int dirtybufthresh; +extern int bdwriteskip; +extern int dirtybufferflushes; +extern int altbufferflushes; extern int buf_maxio; /* nominal maximum I/O for buffer */ extern struct buf *buf; /* The buffer headers. */ extern char *buffers; /* The buffer contents. */ Index: ufs/ffs/ffs_extern.h =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/ufs/ffs/ffs_extern.h,v retrieving revision 1.72 diff -u -r1.72 ffs_extern.h --- ufs/ffs/ffs_extern.h 31 Oct 2006 21:48:53 -0000 1.72 +++ ufs/ffs/ffs_extern.h 28 Dec 2006 21:33:45 -0000 @@ -61,6 +61,7 @@ ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *); int ffs_checkfreefile(struct fs *, struct vnode *, ino_t); void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t); +void ffs_bdflush(struct bufobj *, struct buf *); int ffs_copyonwrite(struct vnode *, struct buf *); int ffs_flushfiles(struct mount *, int, struct thread *); void ffs_fragacct(struct fs *, int, int32_t [], int); Index: ufs/ffs/ffs_snapshot.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/ufs/ffs/ffs_snapshot.c,v retrieving revision 1.131 diff -u -r1.131 ffs_snapshot.c --- ufs/ffs/ffs_snapshot.c 10 Oct 2006 09:20:54 -0000 1.131 +++ ufs/ffs/ffs_snapshot.c 28 Dec 2006 21:33:45 -0000 @@ -163,6 +163,7 @@ static int readblock(struct vnode *vp, struct buf *, ufs2_daddr_t); static void process_deferred_inactive(struct mount *); static void try_free_snapdata(struct vnode *devvp, struct thread *td); +static int ffs_bp_snapblk(struct vnode *, struct buf *); /* * To ensure the consistency of snapshots across crashes, we must @@ -2077,6 +2078,119 @@ } /* + * Check the buffer block to be belong to device buffer that shall be + * locked after snaplk. devvp shall be locked on entry, and will be + * leaved locked upon exit. + */ +static int +ffs_bp_snapblk(devvp, bp) + struct vnode *devvp; + struct buf *bp; +{ + struct snapdata *sn; + struct fs *fs; + ufs2_daddr_t lbn, *snapblklist; + int lower, upper, mid; + + ASSERT_VI_LOCKED(devvp, "ffs_bp_snapblk"); + KASSERT(devvp->v_type == VCHR, ("Not a device %p", devvp)); + sn = devvp->v_rdev->si_snapdata; + if (sn == NULL || TAILQ_FIRST(&sn->sn_head) == NULL) + return (0); + fs = TAILQ_FIRST(&sn->sn_head)->i_fs; + lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno)); + snapblklist = sn->sn_blklist; + upper = sn->sn_listsize - 1; + lower = 1; + while (lower <= upper) { + mid = (lower + upper) / 2; + if (snapblklist[mid] == lbn) + break; + if (snapblklist[mid] < lbn) + lower = mid + 1; + else + upper = mid - 1; + } + if (lower <= upper) + return (1); + return (0); +} + +void +ffs_bdflush(bo, bp) + struct bufobj *bo; + struct buf *bp; +{ + struct thread *td; + struct vnode *vp, *devvp; + struct buf *nbp; + int bp_bdskip; + + if (bo->bo_dirty.bv_cnt <= dirtybufthresh) + return; + + td = curthread; + vp = bp->b_vp; + devvp = bo->__bo_vnode; + KASSERT(vp == devvp, ("devvp != vp %p %p", bo, bp)); + + VI_LOCK(devvp); + bp_bdskip = ffs_bp_snapblk(devvp, bp); + if (bp_bdskip) + bdwriteskip++; + VI_UNLOCK(devvp); + if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10 && !bp_bdskip) { + (void) VOP_FSYNC(vp, MNT_NOWAIT, td); + altbufferflushes++; + } else { + BO_LOCK(bo); + /* + * Try to find a buffer to flush. + */ + TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) { + if ((nbp->b_vflags & BV_BKGRDINPROG) || + BUF_LOCK(nbp, + LK_EXCLUSIVE | LK_NOWAIT, NULL)) + continue; + if (bp == nbp) + panic("bdwrite: found ourselves"); + BO_UNLOCK(bo); + /* + * Don't countdeps with the bo lock + * held. + */ + if (buf_countdeps(nbp, 0)) { + BO_LOCK(bo); + BUF_UNLOCK(nbp); + continue; + } + if (bp_bdskip) { + VI_LOCK(devvp); + if (!ffs_bp_snapblk(vp, nbp)) { + if (BO_MTX(bo) != VI_MTX(vp)) { + VI_UNLOCK(devvp); + BO_LOCK(bo); + } + BUF_UNLOCK(nbp); + continue; + } + VI_UNLOCK(devvp); + } + if (nbp->b_flags & B_CLUSTEROK) { + vfs_bio_awrite(nbp); + } else { + bremfree(nbp); + bawrite(nbp); + } + dirtybufferflushes++; + break; + } + if (nbp == NULL) + BO_UNLOCK(bo); + } +} + +/* * Check for need to copy block that is about to be written, * copying the block if necessary. */ Index: ufs/ffs/ffs_vfsops.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/ufs/ffs/ffs_vfsops.c,v retrieving revision 1.323 diff -u -r1.323 ffs_vfsops.c --- ufs/ffs/ffs_vfsops.c 6 Nov 2006 13:42:09 -0000 1.323 +++ ufs/ffs/ffs_vfsops.c 28 Dec 2006 21:33:45 -0000 @@ -117,6 +117,11 @@ .bop_write = ffs_bufwrite, .bop_strategy = ffs_geom_strategy, .bop_sync = bufsync, +#ifdef NO_FFS_SNAPSHOT + .bop_bdflush = bufbdflush, +#else + .bop_bdflush = ffs_bdflush, +#endif }; static const char *ffs_opts[] = { "acls", "async", "atime", "clusterr", Index: gnu/fs/xfs/FreeBSD/xfs_mountops.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/gnu/fs/xfs/FreeBSD/xfs_mountops.c,v retrieving revision 1.6 diff -u -r1.6 xfs_mountops.c --- gnu/fs/xfs/FreeBSD/xfs_mountops.c 10 Jun 2006 19:04:21 -0000 1.6 +++ gnu/fs/xfs/FreeBSD/xfs_mountops.c 28 Dec 2006 21:33:45 -0000 @@ -497,9 +497,16 @@ return bufsync(bo,waitfor,td); } +static void +xfs_geom_bufbdflush(struct bufobj *bo, struct buf *bp) +{ + bufbdflush(bo, bp); +} + struct buf_ops xfs_bo_ops = { .bop_name = "XFS", .bop_write = xfs_geom_bufwrite, .bop_strategy = xfs_geom_strategy, .bop_sync = xfs_geom_bufsync, + .bop_bdflush = xfs_geom_bufbdflush, }; Index: nfs4client/nfs4_vnops.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/nfs4client/nfs4_vnops.c,v retrieving revision 1.34 diff -u -r1.34 nfs4_vnops.c --- nfs4client/nfs4_vnops.c 13 Sep 2006 18:39:08 -0000 1.34 +++ nfs4client/nfs4_vnops.c 28 Dec 2006 21:33:45 -0000 @@ -2874,4 +2874,5 @@ .bop_write = nfs4_bwrite, .bop_strategy = bufstrategy, .bop_sync = bufsync, + .bop_bdflush = bufbdflush, }; Index: nfsclient/nfs_vnops.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/nfsclient/nfs_vnops.c,v retrieving revision 1.271 diff -u -r1.271 nfs_vnops.c --- nfsclient/nfs_vnops.c 16 Nov 2006 23:02:37 -0000 1.271 +++ nfsclient/nfs_vnops.c 28 Dec 2006 21:33:45 -0000 @@ -3275,4 +3275,5 @@ .bop_write = nfs_bwrite, .bop_strategy = bufstrategy, .bop_sync = bufsync, + .bop_bdflush = bufbdflush, };