diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c b/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c index 0ebad7d..0210842 100644 --- a/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c @@ -81,7 +81,7 @@ xfs_buf_get_empty(size_t size, xfs_buftarg_t *target) { struct buf *bp; - bp = geteblk(0); + bp = geteblk(0, 0); if (bp != NULL) { bp->b_bufsize = size; bp->b_bcount = size; @@ -100,7 +100,7 @@ xfs_buf_get_noaddr(size_t len, xfs_buftarg_t *target) if (len >= MAXPHYS) return (NULL); - bp = geteblk(len); + bp = geteblk(len, 0); if (bp != NULL) { BUF_ASSERT_HELD(bp); diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 8b1ef0f..aaf966e 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -105,7 +105,8 @@ static void vfs_setdirty_locked_object(struct buf *bp); static void vfs_vmio_release(struct buf *bp); static int vfs_bio_clcheck(struct vnode *vp, int size, daddr_t lblkno, daddr_t blkno); -static int flushbufqueues(int, int); +static int buf_do_flush(struct vnode *vp); +static int flushbufqueues(struct vnode *, int, int); static void buf_daemon(void); static void bremfreel(struct buf *bp); @@ -247,6 +248,7 @@ static struct mtx nblock; #define QUEUE_DIRTY_GIANT 3 /* B_DELWRI buffers that need giant */ #define QUEUE_EMPTYKVA 4 /* empty buffer headers w/KVA assignment */ #define QUEUE_EMPTY 5 /* empty buffer headers */ +#define QUEUE_SENTINEL 1024 /* not an queue index, but mark for sentinel */ /* Queues for free buffers with various properties */ static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } }; @@ -1353,7 +1355,7 @@ brelse(struct buf *bp) if ((bp->b_flags & (B_DELWRI|B_NEEDSGIANT)) == (B_DELWRI|B_NEEDSGIANT)) bp->b_qindex = QUEUE_DIRTY_GIANT; - if (bp->b_flags & B_DELWRI) + else if (bp->b_flags & B_DELWRI) bp->b_qindex = QUEUE_DIRTY; else bp->b_qindex = QUEUE_CLEAN; @@ -1673,21 +1675,23 @@ vfs_bio_awrite(struct buf *bp) */ static struct buf * -getnewbuf(int slpflag, int slptimeo, int size, int maxsize) +getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int size, int maxsize, + int gbflags) { + struct thread *td; struct buf *bp; struct buf *nbp; int defrag = 0; int nqindex; static int flushingbufs; + td = curthread; /* * We can't afford to block since we might be holding a vnode lock, * which may prevent system daemons from running. We deal with * low-memory situations by proactively returning memory and running * async I/O rather then sync I/O. */ - atomic_add_int(&getnewbufcalls, 1); atomic_subtract_int(&getnewbufrestarts, 1); restart: @@ -1919,8 +1923,9 @@ restart: */ if (bp == NULL) { - int flags; + int flags, norunbuf; char *waitmsg; + int fl; if (defrag) { flags = VFS_BIO_NEED_BUFSPACE; @@ -1938,9 +1943,35 @@ restart: mtx_unlock(&bqlock); bd_speedup(); /* heeeelp */ + if (gbflags & GB_NOWAIT_BD) + return (NULL); mtx_lock(&nblock); while (needsbuffer & flags) { + if (vp != NULL && (td->td_pflags & TDP_BUFNEED) == 0) { + mtx_unlock(&nblock); + /* + * getblk() is called with a vnode + * locked, and some majority of the + * dirty buffers may as well belong to + * the vnode. Flushing the buffers + * there would make a progress that + * cannot be achieved by the + * buf_daemon, that cannot lock the + * vnode. + */ + norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) | + (td->td_pflags & TDP_NORUNNINGBUF); + /* play bufdaemon */ + td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF; + fl = buf_do_flush(vp); + td->td_pflags &= norunbuf; + mtx_lock(&nblock); + if (fl != 0) + continue; + if ((needsbuffer & flags) == 0) + break; + } if (msleep(&needsbuffer, &nblock, (PRIBIO + 4) | slpflag, waitmsg, slptimeo)) { mtx_unlock(&nblock); @@ -2009,6 +2040,35 @@ static struct kproc_desc buf_kp = { }; SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp); +static int +buf_do_flush(struct vnode *vp) +{ + int flushed; + + flushed = flushbufqueues(vp, QUEUE_DIRTY, 0); + /* The list empty check here is slightly racy */ + if (!TAILQ_EMPTY(&bufqueues[QUEUE_DIRTY_GIANT])) { + mtx_lock(&Giant); + flushed += flushbufqueues(vp, QUEUE_DIRTY_GIANT, 0); + mtx_unlock(&Giant); + } + if (flushed == 0) { + /* + * Could not find any buffers without rollback + * dependencies, so just write the first one + * in the hopes of eventually making progress. + */ + flushbufqueues(vp, QUEUE_DIRTY, 1); + if (!TAILQ_EMPTY( + &bufqueues[QUEUE_DIRTY_GIANT])) { + mtx_lock(&Giant); + flushbufqueues(vp, QUEUE_DIRTY_GIANT, 1); + mtx_unlock(&Giant); + } + } + return (flushed); +} + static void buf_daemon() { @@ -2022,7 +2082,7 @@ buf_daemon() /* * This process is allowed to take the buffer cache to the limit */ - curthread->td_pflags |= TDP_NORUNNINGBUF; + curthread->td_pflags |= TDP_NORUNNINGBUF | TDP_BUFNEED; mtx_lock(&bdlock); for (;;) { bd_request = 0; @@ -2037,30 +2097,8 @@ buf_daemon() * normally would so they can run in parallel with our drain. */ while (numdirtybuffers > lodirtybuffers) { - int flushed; - - flushed = flushbufqueues(QUEUE_DIRTY, 0); - /* The list empty check here is slightly racy */ - if (!TAILQ_EMPTY(&bufqueues[QUEUE_DIRTY_GIANT])) { - mtx_lock(&Giant); - flushed += flushbufqueues(QUEUE_DIRTY_GIANT, 0); - mtx_unlock(&Giant); - } - if (flushed == 0) { - /* - * Could not find any buffers without rollback - * dependencies, so just write the first one - * in the hopes of eventually making progress. - */ - flushbufqueues(QUEUE_DIRTY, 1); - if (!TAILQ_EMPTY( - &bufqueues[QUEUE_DIRTY_GIANT])) { - mtx_lock(&Giant); - flushbufqueues(QUEUE_DIRTY_GIANT, 1); - mtx_unlock(&Giant); - } + if (buf_do_flush(NULL) == 0) break; - } uio_yield(); } @@ -2106,7 +2144,7 @@ SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, &flushwithdeps, 0, "Number of buffers flushed with dependecies that require rollbacks"); static int -flushbufqueues(int queue, int flushdeps) +flushbufqueues(struct vnode *lvp, int queue, int flushdeps) { struct buf sentinel; struct vnode *vp; @@ -2121,15 +2159,29 @@ flushbufqueues(int queue, int flushdeps) target /= 2; flushed = 0; bp = NULL; + sentinel.b_qindex = QUEUE_SENTINEL; mtx_lock(&bqlock); - TAILQ_INSERT_TAIL(&bufqueues[queue], &sentinel, b_freelist); + TAILQ_INSERT_HEAD(&bufqueues[queue], &sentinel, b_freelist); while (flushed != target) { - bp = TAILQ_FIRST(&bufqueues[queue]); - if (bp == &sentinel) + bp = TAILQ_NEXT(&sentinel, b_freelist); + if (bp != NULL) { + TAILQ_REMOVE(&bufqueues[queue], &sentinel, b_freelist); + TAILQ_INSERT_AFTER(&bufqueues[queue], bp, &sentinel, + b_freelist); + } else break; - TAILQ_REMOVE(&bufqueues[queue], bp, b_freelist); - TAILQ_INSERT_TAIL(&bufqueues[queue], bp, b_freelist); - + /* + * Skip sentinels inserted by other invocations of the + * flushbufqueues(), taking care to not reorder them. + */ + if (bp->b_qindex == QUEUE_SENTINEL) + continue; + /* + * Only flush the buffers that belong to the + * vnode locked by the curthread. + */ + if (lvp != NULL && bp->b_vp != lvp) + continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) continue; if (bp->b_pin_count > 0) { @@ -2177,16 +2229,26 @@ flushbufqueues(int queue, int flushdeps) BUF_UNLOCK(bp); continue; } - if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) { + if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_CANRECURSE) == 0) { mtx_unlock(&bqlock); CTR3(KTR_BUF, "flushbufqueue(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); - vfs_bio_awrite(bp); + if (curproc == bufdaemonproc) + vfs_bio_awrite(bp); + else { + bremfree(bp); + bwrite(bp); + } vn_finished_write(mp); VOP_UNLOCK(vp, 0); flushwithdeps += hasdeps; flushed++; - waitrunningbufspace(); + /* + * Sleeping on runningbufspace while holding + * vnode lock leads to deadlock. + */ + if (curproc == bufdaemonproc) + waitrunningbufspace(); numdirtywakeup((lodirtybuffers + hidirtybuffers) / 2); mtx_lock(&bqlock); continue; @@ -2568,7 +2630,7 @@ loop: maxsize = vmio ? size + (offset & PAGE_MASK) : size; maxsize = imax(maxsize, bsize); - bp = getnewbuf(slpflag, slptimeo, size, maxsize); + bp = getnewbuf(vp, slpflag, slptimeo, size, maxsize, flags); if (bp == NULL) { if (slpflag || slptimeo) return NULL; @@ -2643,14 +2705,17 @@ loop: * set to B_INVAL. */ struct buf * -geteblk(int size) +geteblk(int size, int flags) { struct buf *bp; int maxsize; maxsize = (size + BKVAMASK) & ~BKVAMASK; - while ((bp = getnewbuf(0, 0, size, maxsize)) == 0) - continue; + while ((bp = getnewbuf(NULL, 0, 0, size, maxsize, flags)) == NULL) { + if ((flags & GB_NOWAIT_BD) && + (curthread->td_pflags & TDP_BUFNEED) != 0) + return (NULL); + } allocbuf(bp, size); bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */ BUF_ASSERT_HELD(bp); diff --git a/sys/sys/buf.h b/sys/sys/buf.h index e05b20c..09e5c03 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -443,6 +443,7 @@ buf_countdeps(struct buf *bp, int i) */ #define GB_LOCK_NOWAIT 0x0001 /* Fail if we block on a buf lock. */ #define GB_NOCREAT 0x0002 /* Don't create a buf if not found. */ +#define GB_NOWAIT_BD 0x0004 /* Do not wait for bufdaemon */ #ifdef _KERNEL extern int nbuf; /* The number of buffer headers */ @@ -487,7 +488,7 @@ struct buf * getpbuf(int *); struct buf *incore(struct bufobj *, daddr_t); struct buf *gbincore(struct bufobj *, daddr_t); struct buf *getblk(struct vnode *, daddr_t, int, int, int, int); -struct buf *geteblk(int); +struct buf *geteblk(int, int); int bufwait(struct buf *); int bufwrite(struct buf *); void bufdone(struct buf *); diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 3779591..c44784d 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -339,7 +339,7 @@ do { \ #define TDP_OLDMASK 0x00000001 /* Need to restore mask after suspend. */ #define TDP_INKTR 0x00000002 /* Thread is currently in KTR code. */ #define TDP_INKTRACE 0x00000004 /* Thread is currently in KTRACE code. */ -#define TDP_UNUSED8 0x00000008 /* available */ +#define TDP_BUFNEED 0x00000008 /* Do not recurse into the buf flush */ #define TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */ #define TDP_ALTSTACK 0x00000020 /* Have alternate signal stack. */ #define TDP_DEADLKTREAT 0x00000040 /* Lock aquisition - deadlock treatment. */ diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index 5ee123a..2f28e7e 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1746,7 +1746,9 @@ ffs_bufwrite(struct buf *bp) ("bufwrite: needs chained iodone (%p)", bp->b_iodone)); /* get a new block */ - newbp = geteblk(bp->b_bufsize); + newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD); + if (newbp == NULL) + goto normal_write; /* * set it to be identical to the old block. We have to @@ -1786,6 +1788,7 @@ ffs_bufwrite(struct buf *bp) } /* Let the normal bufwrite do the rest for us */ +normal_write: return (bufwrite(bp)); }