Index: sys/nfsclient/nfs.h =========================================================================== --- sys/nfsclient/nfs.h 2006/05/15 19:00:49 #1 +++ sys/nfsclient/nfs.h 2006/05/15 19:00:49 @@ -131,6 +131,7 @@ extern struct callout nfs_callout; extern struct nfsstats nfsstats; +extern struct mtx nfs_iod_mtx; extern int nfs_numasync; extern unsigned int nfs_iodmax; @@ -178,6 +179,7 @@ int r_rtt; /* RTT for rpc */ int r_lastmsg; /* last tprintf */ struct thread *r_td; /* Proc that did I/O system call */ + struct mtx r_mtx; /* Protects nfsreq fields */ }; /* @@ -310,8 +312,6 @@ void nfs_set_sigmask __P((struct thread *td, sigset_t *oldset)); void nfs_restore_sigmask __P((struct thread *td, sigset_t *set)); -int nfs_tsleep __P((struct thread *td, void *ident, int priority, char *wmesg, - int timo)); int nfs_msleep __P((struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo)); Index: sys/nfsclient/nfs_bio.c =========================================================================== --- sys/nfsclient/nfs_bio.c 2006/05/15 19:00:49 #3 +++ sys/nfsclient/nfs_bio.c 2006/05/15 19:00:49 @@ -33,7 +33,7 @@ */ #include -__FBSDID("$FreeBSD: src/sys/nfsclient/nfs_bio.c,v 1.155 2006/04/06 01:20:30 mohans Exp $"); +__FBSDID("$FreeBSD: src/sys/nfsclient/nfs_bio.c,v 1.154 2005/11/21 19:23:46 ps Exp $"); #include #include @@ -71,6 +71,7 @@ extern int nfs_directio_enable; extern int nfs_directio_allow_mmap; + /* * Vnode op for VM getpages. */ @@ -90,8 +91,6 @@ vm_page_t *pages; struct nfsnode *np; - GIANT_REQUIRED; - vp = ap->a_vp; np = VTONFS(vp); td = curthread; /* XXX */ @@ -101,22 +100,28 @@ count = ap->a_count; if ((object = vp->v_object) == NULL) { - printf("nfs_getpages: called with non-merged cache vnode??\n"); + nfs_printf("nfs_getpages: called with non-merged cache vnode??\n"); return VM_PAGER_ERROR; } - if (nfs_directio_enable && !nfs_directio_allow_mmap && - (np->n_flag & NNONCACHE) && - (vp->v_type == VREG)) { - printf("nfs_getpages: called on non-cacheable vnode??\n"); - return VM_PAGER_ERROR; + if (nfs_directio_enable && !nfs_directio_allow_mmap) { + mtx_lock(&np->n_mtx); + if ((np->n_flag & NNONCACHE) && (vp->v_type == VREG)) { + mtx_unlock(&np->n_mtx); + nfs_printf("nfs_getpages: called on non-cacheable vnode??\n"); + return VM_PAGER_ERROR; + } else + mtx_unlock(&np->n_mtx); } + mtx_lock(&nmp->nm_mtx); if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && - (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { + (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { + mtx_unlock(&nmp->nm_mtx); /* We'll never get here for v4, because we always have fsinfo */ (void)nfs_fsinfo(nmp, vp, cred, td); - } + } else + mtx_unlock(&nmp->nm_mtx); npages = btoc(count); @@ -173,7 +178,7 @@ relpbuf(bp, &nfs_pbuf_freecnt); if (error && (uio.uio_resid == count)) { - printf("nfs_getpages: error %d\n", error); + nfs_printf("nfs_getpages: error %d\n", error); VM_OBJECT_LOCK(object); vm_page_lock_queues(); for (i = 0; i < npages; ++i) { @@ -270,8 +275,6 @@ struct nfsnode *np; vm_page_t *pages; - GIANT_REQUIRED; - vp = ap->a_vp; np = VTONFS(vp); td = curthread; /* XXX */ @@ -282,15 +285,22 @@ rtvals = ap->a_rtvals; npages = btoc(count); offset = IDX_TO_OFF(pages[0]->pindex); - + + mtx_lock(&nmp->nm_mtx); if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { + mtx_unlock(&nmp->nm_mtx); (void)nfs_fsinfo(nmp, vp, cred, td); - } + } else + mtx_unlock(&nmp->nm_mtx); + mtx_lock(&np->n_mtx); if (nfs_directio_enable && !nfs_directio_allow_mmap && - (np->n_flag & NNONCACHE) && (vp->v_type == VREG)) - printf("nfs_putpages: called on noncache-able vnode??\n"); + (np->n_flag & NNONCACHE) && (vp->v_type == VREG)) { + mtx_unlock(&np->n_mtx); + nfs_printf("nfs_putpages: called on noncache-able vnode??\n"); + mtx_lock(&np->n_mtx); + } for (i = 0; i < npages; i++) rtvals[i] = VM_PAGER_AGAIN; @@ -298,12 +308,12 @@ /* * When putting pages, do not extend file past EOF. */ - if (offset + count > np->n_size) { count = np->n_size - offset; if (count < 0) count = 0; } + mtx_unlock(&np->n_mtx); /* * We use only the kva address for the buffer, but this is extremely @@ -350,6 +360,81 @@ } /* + * For nfs, cache consistency can only be maintained approximately. + * Although RFC1094 does not specify the criteria, the following is + * believed to be compatible with the reference port. + * For nfs: + * If the file's modify time on the server has changed since the + * last read rpc or you have written to the file, + * you may have lost data cache consistency with the + * server, so flush all of the file's data out of the cache. + * Then force a getattr rpc to ensure that you have up to date + * attributes. + * NB: This implies that cache data can be read when up to + * NFS_ATTRTIMEO seconds out of date. If you find that you need current + * attributes this could be forced by setting n_attrstamp to 0 before + * the VOP_GETATTR() call. + */ +static inline int +nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred) +{ + int error = 0; + struct vattr vattr; + struct nfsnode *np = VTONFS(vp); + int old_lock; + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + + /* + * Grab the exclusive lock before checking whether the cache is + * consistent. + * XXX - We can make this cheaper later (by acquiring cheaper locks). + * But for now, this suffices. + */ + old_lock = nfs_upgrade_vnlock(vp, td); + mtx_lock(&np->n_mtx); + if (np->n_flag & NMODIFIED) { + mtx_unlock(&np->n_mtx); + if (vp->v_type != VREG) { + if (vp->v_type != VDIR) + panic("nfs: bioread, not dir"); + (nmp->nm_rpcops->nr_invaldir)(vp); + error = nfs_vinvalbuf(vp, V_SAVE, td, 1); + if (error) + goto out; + } + np->n_attrstamp = 0; + error = VOP_GETATTR(vp, &vattr, cred, td); + if (error) + goto out; + mtx_lock(&np->n_mtx); + np->n_mtime = vattr.va_mtime; + mtx_unlock(&np->n_mtx); + } else { + mtx_unlock(&np->n_mtx); + error = VOP_GETATTR(vp, &vattr, cred, td); + if (error) + return (error); + mtx_lock(&np->n_mtx); + if ((np->n_flag & NSIZECHANGED) + || (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime))) { + mtx_unlock(&np->n_mtx); + if (vp->v_type == VDIR) + (nmp->nm_rpcops->nr_invaldir)(vp); + error = nfs_vinvalbuf(vp, V_SAVE, td, 1); + if (error) + goto out; + mtx_lock(&np->n_mtx); + np->n_mtime = vattr.va_mtime; + np->n_flag &= ~NSIZECHANGED; + } + mtx_unlock(&np->n_mtx); + } +out: + nfs_downgrade_vnlock(vp, td, old_lock); + return error; +} + +/* * Vnode op for read using bio */ int @@ -358,7 +443,6 @@ struct nfsnode *np = VTONFS(vp); int biosize, i; struct buf *bp, *rabp; - struct vattr vattr; struct thread *td; struct nfsmount *nmp = VFSTONFS(vp->v_mount); daddr_t lbn, rabn; @@ -376,9 +460,14 @@ return (EINVAL); td = uio->uio_td; + mtx_lock(&nmp->nm_mtx); if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && - (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) + (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { + mtx_unlock(&nmp->nm_mtx); (void)nfs_fsinfo(nmp, vp, cred, td); + } else + mtx_unlock(&nmp->nm_mtx); + if (vp->v_type != VDIR && (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) return (EFBIG); @@ -389,52 +478,18 @@ biosize = vp->v_mount->mnt_stat.f_iosize; seqcount = (int)((off_t)(ioflag >> IO_SEQSHIFT) * biosize / BKVASIZE); - /* - * For nfs, cache consistency can only be maintained approximately. - * Although RFC1094 does not specify the criteria, the following is - * believed to be compatible with the reference port. - * For nfs: - * If the file's modify time on the server has changed since the - * last read rpc or you have written to the file, - * you may have lost data cache consistency with the - * server, so flush all of the file's data out of the cache. - * Then force a getattr rpc to ensure that you have up to date - * attributes. - * NB: This implies that cache data can be read when up to - * NFS_ATTRTIMEO seconds out of date. If you find that you need current - * attributes this could be forced by setting n_attrstamp to 0 before - * the VOP_GETATTR() call. - */ - if (np->n_flag & NMODIFIED) { - if (vp->v_type != VREG) { - if (vp->v_type != VDIR) - panic("nfs: bioread, not dir"); - (nmp->nm_rpcops->nr_invaldir)(vp); - error = nfs_vinvalbuf(vp, V_SAVE, td, 1); - if (error) - return (error); - } - np->n_attrstamp = 0; - error = VOP_GETATTR(vp, &vattr, cred, td); - if (error) - return (error); - np->n_mtime = vattr.va_mtime; - } else { - error = VOP_GETATTR(vp, &vattr, cred, td); - if (error) - return (error); - if ((np->n_flag & NSIZECHANGED) - || (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime))) { - if (vp->v_type == VDIR) - (nmp->nm_rpcops->nr_invaldir)(vp); - error = nfs_vinvalbuf(vp, V_SAVE, td, 1); - if (error) - return (error); - np->n_mtime = vattr.va_mtime; - np->n_flag &= ~NSIZECHANGED; - } - } + + error = nfs_bioread_check_cons(vp, td, cred); + if (error) + return error; + do { + u_quad_t nsize; + + mtx_lock(&np->n_mtx); + nsize = np->n_size; + mtx_unlock(&np->n_mtx); + switch (vp->v_type) { case VREG: nfsstats.biocache_reads++; @@ -443,12 +498,10 @@ /* * Start the read ahead(s), as required. - * The readahead is kicked off only if sequential access - * is detected, based on the readahead hint (ra_expect_lbn). */ - if (nmp->nm_readahead > 0 && np->ra_expect_lbn == lbn) { + if (nmp->nm_readahead > 0) { for (nra = 0; nra < nmp->nm_readahead && nra < seqcount && - (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) { + (off_t)(lbn + 1 + nra) * biosize < nsize; nra++) { rabn = lbn + 1 + nra; if (incore(&vp->v_bufobj, rabn) == NULL) { rabp = nfs_getcacheblk(vp, rabn, biosize, td); @@ -472,15 +525,14 @@ } } } - np->ra_expect_lbn = lbn + 1; } /* Note that bcount is *not* DEV_BSIZE aligned. */ bcount = biosize; - if ((off_t)lbn * biosize >= np->n_size) { + if ((off_t)lbn * biosize >= nsize) { bcount = 0; - } else if ((off_t)(lbn + 1) * biosize > np->n_size) { - bcount = np->n_size - (off_t)lbn * biosize; + } else if ((off_t)(lbn + 1) * biosize > nsize) { + bcount = nsize - (off_t)lbn * biosize; } bp = nfs_getcacheblk(vp, lbn, bcount, td); @@ -652,7 +704,7 @@ n = np->n_direofoffset - uio->uio_offset; break; default: - printf(" nfs_bioread: type %x unexpected\n", vp->v_type); + nfs_printf(" nfs_bioread: type %x unexpected\n", vp->v_type); bp = NULL; break; }; @@ -690,14 +742,18 @@ struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct thread *td = uiop->uio_td; int size; - + int wsize; + + mtx_lock(&nmp->nm_mtx); + wsize = nmp->nm_wsize; + mtx_unlock(&nmp->nm_mtx); if (ioflag & IO_SYNC) { int iomode, must_commit; struct uio uio; struct iovec iov; do_sync: while (uiop->uio_resid > 0) { - size = min(uiop->uio_resid, nmp->nm_wsize); + size = min(uiop->uio_resid, wsize); size = min(uiop->uio_iov->iov_len, size); iov.iov_base = uiop->uio_iov->iov_base; iov.iov_len = size; @@ -746,7 +802,7 @@ * in NFS directio access. */ while (uiop->uio_resid > 0) { - size = min(uiop->uio_resid, nmp->nm_wsize); + size = min(uiop->uio_resid, wsize); size = min(uiop->uio_iov->iov_len, size); bp = getpbuf(&nfs_pbuf_freecnt); t_uio = malloc(sizeof(struct uio), M_NFSDIRECTIO, M_WAITOK); @@ -819,8 +875,6 @@ int n, on, error = 0; struct proc *p = td?td->td_proc:NULL; - GIANT_REQUIRED; - #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("nfs_write mode"); @@ -829,20 +883,29 @@ #endif if (vp->v_type != VREG) return (EIO); + mtx_lock(&np->n_mtx); if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; + mtx_unlock(&np->n_mtx); return (np->n_error); - } + } else + mtx_unlock(&np->n_mtx); + mtx_lock(&nmp->nm_mtx); if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && - (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) + (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { + mtx_unlock(&nmp->nm_mtx); (void)nfs_fsinfo(nmp, vp, cred, td); + } else + mtx_unlock(&nmp->nm_mtx); /* * Synchronously flush pending buffers if we are in synchronous * mode or if we are appending. */ if (ioflag & (IO_APPEND | IO_SYNC)) { + mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { + mtx_unlock(&np->n_mtx); #ifdef notyet /* Needs matching nonblock semantics elsewhere, too. */ /* * Require non-blocking, synchronous writes to @@ -857,7 +920,8 @@ error = nfs_vinvalbuf(vp, V_SAVE, td, 1); if (error) return (error); - } + } else + mtx_unlock(&np->n_mtx); } /* @@ -869,7 +933,9 @@ error = VOP_GETATTR(vp, &vattr, cred, td); if (error) return (error); + mtx_lock(&np->n_mtx); uio->uio_offset = np->n_size; + mtx_unlock(&np->n_mtx); } if (uio->uio_offset < 0) @@ -907,6 +973,11 @@ * no point optimizing for something that really won't ever happen. */ if (!(ioflag & IO_SYNC)) { + int nflag; + + mtx_lock(&np->n_mtx); + nflag = np->n_flag; + mtx_unlock(&np->n_mtx); int needrestart = 0; if (nmp->nm_wcommitsize < uio->uio_resid) { /* @@ -918,9 +989,9 @@ if (ioflag & IO_NDELAY) return (EAGAIN); ioflag |= IO_SYNC; - if (np->n_flag & NMODIFIED) + if (nflag & NMODIFIED) needrestart = 1; - } else if (np->n_flag & NMODIFIED) { + } else if (nflag & NMODIFIED) { int wouldcommit = 0; BO_LOCK(&vp->v_bufobj); if (vp->v_bufobj.bo_dirty.bv_cnt != 0) { @@ -961,8 +1032,9 @@ * Handle direct append and file extension cases, calculate * unaligned buffer size. */ - + mtx_lock(&np->n_mtx); if (uio->uio_offset == np->n_size && n) { + mtx_unlock(&np->n_mtx); /* * Get the buffer (in its pre-append state to maintain * B_CACHE if it was previously set). Resize the @@ -975,9 +1047,11 @@ if (bp != NULL) { long save; + mtx_lock(&np->n_mtx); np->n_size = uio->uio_offset + n; np->n_flag |= NMODIFIED; vnode_pager_setsize(vp, np->n_size); + mtx_unlock(&np->n_mtx); save = bp->b_flags & B_CACHE; bcount += n; @@ -996,12 +1070,15 @@ else bcount = np->n_size - (off_t)lbn * biosize; } + mtx_unlock(&np->n_mtx); bp = nfs_getcacheblk(vp, lbn, bcount, td); + mtx_lock(&np->n_mtx); if (uio->uio_offset + n > np->n_size) { np->n_size = uio->uio_offset + n; np->n_flag |= NMODIFIED; vnode_pager_setsize(vp, np->n_size); } + mtx_unlock(&np->n_mtx); } if (!bp) { @@ -1047,7 +1124,9 @@ } if (bp->b_wcred == NOCRED) bp->b_wcred = crhold(cred); + mtx_lock(&np->n_mtx); np->n_flag |= NMODIFIED; + mtx_unlock(&np->n_mtx); /* * If dirtyend exceeds file size, chop it down. This should @@ -1059,7 +1138,7 @@ */ if (bp->b_dirtyend > bcount) { - printf("NFS append race @%lx:%d\n", + nfs_printf("NFS append race @%lx:%d\n", (long)bp->b_blkno * DEV_BSIZE, bp->b_dirtyend - bcount); bp->b_dirtyend = bcount; @@ -1139,7 +1218,7 @@ break; } else if ((n + on) == biosize) { bp->b_flags |= B_ASYNC; - (void) (nmp->nm_rpcops->nr_writebp)(bp, 0, 0); + (void) (nmp->nm_rpcops->nr_writebp)(bp, 0, NULL); } else { bdwrite(bp); } @@ -1229,15 +1308,7 @@ slptimeo = 0; } - if ((old_lock = VOP_ISLOCKED(vp, td)) != LK_EXCLUSIVE) { - if (old_lock == LK_SHARED) { - /* Upgrade to exclusive lock, this might block */ - vn_lock(vp, LK_UPGRADE | LK_RETRY, td); - } else { - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - } - } - + old_lock = nfs_upgrade_vnlock(vp, td); /* * Now, flush as required. */ @@ -1247,17 +1318,12 @@ goto out; error = vinvalbuf(vp, flags, td, 0, slptimeo); } + mtx_lock(&np->n_mtx); if (np->n_directio_asyncwr == 0) np->n_flag &= ~NMODIFIED; + mtx_unlock(&np->n_mtx); out: - if (old_lock != LK_EXCLUSIVE) { - if (old_lock == LK_SHARED) { - /* Downgrade from exclusive lock, this might block */ - vn_lock(vp, LK_DOWNGRADE, td); - } else { - VOP_UNLOCK(vp, 0, td); - } - } + nfs_downgrade_vnlock(vp, td, old_lock); return error; } @@ -1283,11 +1349,12 @@ * leave the async daemons for more important rpc's (such as reads * and writes). */ + mtx_lock(&nfs_iod_mtx); if (bp->b_iocmd == BIO_WRITE && (bp->b_flags & B_NEEDCOMMIT) && (nmp->nm_bufqiods > nfs_numasync / 2)) { + mtx_unlock(&nfs_iod_mtx); return(EIO); } - again: if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; @@ -1350,12 +1417,15 @@ NFS_DPF(ASYNCIO, ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp)); nmp->nm_bufqwant = TRUE; - error = nfs_tsleep(td, &nmp->nm_bufq, slpflag | PRIBIO, + error = nfs_msleep(td, &nmp->nm_bufq, &nfs_iod_mtx, + slpflag | PRIBIO, "nfsaio", slptimeo); if (error) { error2 = nfs_sigintr(nmp, NULL, td); - if (error2) + if (error2) { + mtx_unlock(&nfs_iod_mtx); return (error2); + } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; @@ -1385,11 +1455,17 @@ BUF_KERNPROC(bp); TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist); nmp->nm_bufqlen++; - if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) + if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) { + mtx_lock(&(VTONFS(bp->b_vp))->n_mtx); VTONFS(bp->b_vp)->n_directio_asyncwr++; + mtx_unlock(&(VTONFS(bp->b_vp))->n_mtx); + } + mtx_unlock(&nfs_iod_mtx); return (0); } + mtx_unlock(&nfs_iod_mtx); + /* * All the iods are busy on other mounts, so return EIO to * force the caller to process the i/o synchronously. @@ -1415,12 +1491,13 @@ free(uiop, M_NFSDIRECTIO); if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) { struct nfsnode *np = VTONFS(bp->b_vp); - + mtx_lock(&np->n_mtx); np->n_directio_asyncwr--; if ((np->n_flag & NFSYNCWAIT) && np->n_directio_asyncwr == 0) { np->n_flag &= ~NFSYNCWAIT; wakeup((caddr_t)&np->n_directio_asyncwr); } + mtx_unlock(&np->n_mtx); } vdrop(bp->b_vp); bp->b_vp = NULL; @@ -1441,7 +1518,8 @@ struct uio uio; struct iovec io; struct proc *p = td ? td->td_proc : NULL; - + uint8_t iocmd; + np = VTONFS(vp); nmp = VFSTONFS(vp->v_mount); uiop = &uio; @@ -1459,8 +1537,8 @@ bp->b_ioflags &= ~BIO_ERROR; KASSERT(!(bp->b_flags & B_DONE), ("nfs_doio: bp %p already marked done", bp)); - - if (bp->b_iocmd == BIO_READ) { + iocmd = bp->b_iocmd; + if (iocmd == BIO_READ) { io.iov_len = uiop->uio_resid = bp->b_bcount; io.iov_base = bp->b_data; uiop->uio_rw = UIO_READ; @@ -1490,11 +1568,15 @@ } } /* ASSERT_VOP_LOCKED(vp, "nfs_doio"); */ - if (p && (vp->v_vflag & VV_TEXT) && - (NFS_TIMESPEC_COMPARE(&np->n_mtime, &np->n_vattr.va_mtime))) { - PROC_LOCK(p); - killproc(p, "text file modification"); - PROC_UNLOCK(p); + if (p && (vp->v_vflag & VV_TEXT)) { + mtx_lock(&np->n_mtx); + if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &np->n_vattr.va_mtime)) { + mtx_unlock(&np->n_mtx); + PROC_LOCK(p); + killproc(p, "text file modification"); + PROC_UNLOCK(p); + } else + mtx_unlock(&np->n_mtx); } break; case VLNK: @@ -1524,7 +1606,7 @@ bp->b_flags |= B_INVAL; break; default: - printf("nfs_doio: type %x unexpected\n", vp->v_type); + nfs_printf("nfs_doio: type %x unexpected\n", vp->v_type); break; }; if (error) { @@ -1558,9 +1640,10 @@ /* * Setup for actual write */ - + mtx_lock(&np->n_mtx); if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size) bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE; + mtx_unlock(&np->n_mtx); if (bp->b_dirtyend > bp->b_dirtyoff) { io.iov_len = uiop->uio_resid = bp->b_dirtyend @@ -1635,7 +1718,9 @@ if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_error = np->n_error = error; + mtx_lock(&np->n_mtx); np->n_flag |= NWRITEERR; + mtx_unlock(&np->n_mtx); } bp->b_dirtyoff = bp->b_dirtyend = 0; } @@ -1664,13 +1749,16 @@ nfs_meta_setsize(struct vnode *vp, struct ucred *cred, struct thread *td, u_quad_t nsize) { struct nfsnode *np = VTONFS(vp); - u_quad_t tsize = np->n_size; + u_quad_t tsize; int biosize = vp->v_mount->mnt_stat.f_iosize; int error = 0; + mtx_lock(&np->n_mtx); + tsize = np->n_size; np->n_size = nsize; + mtx_unlock(&np->n_mtx); - if (np->n_size < tsize) { + if (nsize < tsize) { struct buf *bp; daddr_t lbn; int bufsize; Index: sys/nfsclient/nfs_nfsiod.c =========================================================================== --- sys/nfsclient/nfs_nfsiod.c 2006/05/15 19:00:49 #2 +++ sys/nfsclient/nfs_nfsiod.c 2006/05/15 19:00:49 @@ -102,17 +102,22 @@ error = sysctl_handle_int(oidp, &newmin, 0, req); if (error || (req->newptr == NULL)) return (error); - if (newmin > nfs_iodmax) - return (EINVAL); + mtx_lock(&nfs_iod_mtx); + if (newmin > nfs_iodmax) { + error = EINVAL; + goto out; + } nfs_iodmin = newmin; if (nfs_numasync >= nfs_iodmin) - return (0); + goto out; /* * If the current number of nfsiod is lower * than the new minimum, create some more. */ for (i = nfs_iodmin - nfs_numasync; i > 0; i--) nfs_nfsiodnew(); +out: + mtx_unlock(&nfs_iod_mtx); return (0); } SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmin, CTLTYPE_UINT | CTLFLAG_RW, 0, @@ -131,9 +136,10 @@ return (error); if (newmax > NFS_MAXASYNCDAEMON) return (EINVAL); + mtx_lock(&nfs_iod_mtx); nfs_iodmax = newmax; if (nfs_numasync <= nfs_iodmax) - return (0); + goto out; /* * If there are some asleep nfsiods that should * exit, wakeup() them so that they check nfs_iodmax @@ -146,6 +152,8 @@ wakeup(&nfs_iodwant[iod]); iod--; } +out: + mtx_unlock(&nfs_iod_mtx); return (0); } SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmax, CTLTYPE_UINT | CTLFLAG_RW, 0, @@ -168,8 +176,10 @@ } if (newiod == -1) return (-1); + mtx_unlock(&nfs_iod_mtx); error = kthread_create(nfssvc_iod, nfs_asyncdaemon + i, NULL, RFHIGHPID, 0, "nfsiod %d", newiod); + mtx_lock(&nfs_iod_mtx); if (error) return (-1); nfs_numasync++; @@ -183,6 +193,7 @@ int error; TUNABLE_INT_FETCH("vfs.nfs.iodmin", &nfs_iodmin); + mtx_lock(&nfs_iod_mtx); /* Silently limit the start number of nfsiod's */ if (nfs_iodmin > NFS_MAXASYNCDAEMON) nfs_iodmin = NFS_MAXASYNCDAEMON; @@ -192,6 +203,7 @@ if (error == -1) panic("nfsiod_setup: nfs_nfsiodnew failed"); } + mtx_unlock(&nfs_iod_mtx); } SYSINIT(nfsiod, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, nfsiod_setup, NULL); @@ -211,7 +223,7 @@ int myiod, timo; int error = 0; - mtx_lock(&Giant); + mtx_lock(&nfs_iod_mtx); myiod = (int *)instance - nfs_asyncdaemon; /* * Main loop @@ -230,7 +242,7 @@ * Always keep at least nfs_iodmin kthreads. */ timo = (myiod < nfs_iodmin) ? 0 : nfs_iodmaxidle * hz; - error = tsleep(&nfs_iodwant[myiod], PWAIT | PCATCH, + error = msleep(&nfs_iodwant[myiod], &nfs_iod_mtx, PWAIT | PCATCH, "-", timo); } if (error) @@ -243,6 +255,7 @@ nmp->nm_bufqwant = 0; wakeup(&nmp->nm_bufq); } + mtx_unlock(&nfs_iod_mtx); if (bp->b_flags & B_DIRECT) { KASSERT((bp->b_iocmd == BIO_WRITE), ("nfscvs_iod: BIO_WRITE not set")); (void)nfs_doio_directwrite(bp); @@ -252,7 +265,7 @@ else (void) nfs_doio(bp->b_vp, bp, bp->b_wcred, NULL); } - + mtx_lock(&nfs_iod_mtx); /* * If there are more than one iod on this mount, then defect * so that the iods can be shared out fairly between the mounts @@ -276,7 +289,7 @@ /* Someone may be waiting for the last nfsiod to terminate. */ if (--nfs_numasync == 0) wakeup(&nfs_numasync); - mtx_unlock(&Giant); + mtx_unlock(&nfs_iod_mtx); if ((error == 0) || (error == EWOULDBLOCK)) kthread_exit(0); /* Abnormal termination */ Index: sys/nfsclient/nfs_node.c =========================================================================== --- sys/nfsclient/nfs_node.c 2006/05/15 19:00:49 #3 +++ sys/nfsclient/nfs_node.c 2006/05/15 19:00:49 @@ -164,6 +164,7 @@ np->n_fhp = &np->n_fh; bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize); np->n_fhsize = fhsize; + mtx_init(&np->n_mtx, "NFSnode lock", NULL, MTX_DEF); *npp = np; return (0); @@ -234,7 +235,7 @@ if (np->n_fhsize > NFS_SMALLFH) { FREE((caddr_t)np->n_fhp, M_NFSBIGFH); } - + mtx_destroy(&np->n_mtx); uma_zfree(nfsnode_zone, vp->v_data); vp->v_data = NULL; return (0); Index: sys/nfsclient/nfs_socket.c =========================================================================== --- sys/nfsclient/nfs_socket.c 2006/05/15 19:00:49 #12 +++ sys/nfsclient/nfs_socket.c 2006/05/15 19:00:49 @@ -115,7 +115,7 @@ static int nfs_realign_count; static int nfs_bufpackets = 4; static int nfs_reconnects; -static int nfs3_jukebox_delay = 10; +static int nfs3_jukebox_delay = 10; SYSCTL_DECL(_vfs_nfs); @@ -125,9 +125,8 @@ SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, "number of times the nfs client has had to reconnect"); SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0, - "number of seconds to delay a retry after receiving EJUKEBOX"); + "number of seconds to delay a retry after receiving EJUKEBOX"); - /* * There is a congestion window for outstanding rpcs maintained per mount * point. The cwnd size is adjusted in roughly the way that: @@ -154,10 +153,8 @@ static int nfs_reconnect(struct nfsreq *rep); static void nfs_clnt_tcp_soupcall(struct socket *so, void *arg, int waitflag); static void nfs_clnt_udp_soupcall(struct socket *so, void *arg, int waitflag); -static void wakeup_nfsreq(struct nfsreq *req); extern struct mtx nfs_reqq_mtx; -extern struct mtx nfs_reply_mtx; /* * Initialize sockets and congestion for a new NFS connection. @@ -172,13 +169,13 @@ struct sockaddr *saddr; struct thread *td = &thread0; /* only used for socreate and sobind */ - NET_ASSERT_GIANT(); + NET_LOCK_GIANT(); if (nmp->nm_sotype == SOCK_STREAM) { - mtx_lock(&nmp->nm_nfstcpstate.mtx); + mtx_lock(&nmp->nm_mtx); nmp->nm_nfstcpstate.flags |= NFS_TCP_EXPECT_RPCMARKER; nmp->nm_nfstcpstate.rpcresid = 0; - mtx_unlock(&nmp->nm_nfstcpstate.mtx); + mtx_unlock(&nmp->nm_mtx); } nmp->nm_so = NULL; saddr = nmp->nm_nam; @@ -243,12 +240,16 @@ * Protocols that do not require connections may be optionally left * unconnected for servers that reply from a port other than NFS_PORT. */ + mtx_lock(&nmp->nm_mtx); if (nmp->nm_flag & NFSMNT_NOCONN) { if (nmp->nm_soflags & PR_CONNREQUIRED) { error = ENOTCONN; + mtx_unlock(&nmp->nm_mtx); goto bad; - } + } else + mtx_unlock(&nmp->nm_mtx); } else { + mtx_unlock(&nmp->nm_mtx); error = soconnect(so, nmp->nm_nam, td); if (error) goto bad; @@ -290,7 +291,7 @@ pktscale = 2; if (pktscale > 64) pktscale = 64; - + mtx_lock(&nmp->nm_mtx); if (nmp->nm_sotype == SOCK_DGRAM) { sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + @@ -313,7 +314,9 @@ sopt.sopt_val = &val; sopt.sopt_valsize = sizeof val; val = 1; + mtx_unlock(&nmp->nm_mtx); sosetopt(so, &sopt); + mtx_lock(&nmp->nm_mtx); } if (so->so_proto->pr_protocol == IPPROTO_TCP) { struct sockopt sopt; @@ -326,13 +329,16 @@ sopt.sopt_val = &val; sopt.sopt_valsize = sizeof val; val = 1; + mtx_unlock(&nmp->nm_mtx); sosetopt(so, &sopt); + mtx_lock(&nmp->nm_mtx); } sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_int32_t)) * pktscale; rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_int32_t)) * pktscale; } + mtx_unlock(&nmp->nm_mtx); error = soreserve(so, sndreserve, rcvreserve); if (error) goto bad; @@ -349,6 +355,7 @@ so->so_snd.sb_flags |= SB_NOINTR; SOCKBUF_UNLOCK(&so->so_snd); + mtx_lock(&nmp->nm_mtx); /* Initialize other non-zero congestion variables */ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = (NFS_TIMEO << 3); @@ -357,10 +364,13 @@ nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ nmp->nm_sent = 0; nmp->nm_timeouts = 0; + mtx_unlock(&nmp->nm_mtx); + NET_UNLOCK_GIANT(); return (0); bad: nfs_disconnect(nmp); + NET_UNLOCK_GIANT(); return (error); } @@ -387,7 +397,9 @@ error = EINTR; if (error == EIO || error == EINTR) return (error); + mtx_lock(&Giant); (void) tsleep(&lbolt, PSOCK, "nfscon", 0); + mtx_unlock(&Giant); } /* @@ -399,9 +411,10 @@ * until the connection is established successfully, and * then re-transmit the request. */ - mtx_lock(&nmp->nm_nfstcpstate.mtx); + mtx_lock(&nmp->nm_mtx); nmp->nm_nfstcpstate.flags &= ~NFS_TCP_FORCE_RECONNECT; - mtx_unlock(&nmp->nm_nfstcpstate.mtx); + nmp->nm_nfstcpstate.rpcresid = 0; + mtx_unlock(&nmp->nm_mtx); /* * Loop through outstanding request list and fix up all requests @@ -409,8 +422,11 @@ */ mtx_lock(&nfs_reqq_mtx); TAILQ_FOREACH(rp, &nfs_reqq, r_chain) { - if (rp->r_nmp == nmp) + if (rp->r_nmp == nmp) { + mtx_lock(&rp->r_mtx); rp->r_flags |= R_MUSTRESEND; + mtx_unlock(&rp->r_mtx); + } } mtx_unlock(&nfs_reqq_mtx); return (0); @@ -426,9 +442,11 @@ NET_ASSERT_GIANT(); + mtx_lock(&nmp->nm_mtx); if (nmp->nm_so) { so = nmp->nm_so; nmp->nm_so = NULL; + mtx_unlock(&nmp->nm_mtx); SOCKBUF_LOCK(&so->so_rcv); so->so_upcallarg = NULL; so->so_upcall = NULL; @@ -436,7 +454,8 @@ SOCKBUF_UNLOCK(&so->so_rcv); soshutdown(so, SHUT_WR); soclose(so); - } + } else + mtx_unlock(&nmp->nm_mtx); } void @@ -463,22 +482,29 @@ struct sockaddr *sendnam; int error, error2, soflags, flags; - NET_ASSERT_GIANT(); + NET_LOCK_GIANT(); KASSERT(rep, ("nfs_send: called with rep == NULL")); error = nfs_sigintr(rep->r_nmp, rep, rep->r_td); if (error) { m_freem(top); - return (error); + goto out; } + mtx_lock(&rep->r_nmp->nm_mtx); + mtx_lock(&rep->r_mtx); if ((so = rep->r_nmp->nm_so) == NULL) { rep->r_flags |= R_MUSTRESEND; + mtx_unlock(&rep->r_mtx); + mtx_unlock(&rep->r_nmp->nm_mtx); m_freem(top); - return (0); + error = 0; + goto out; } rep->r_flags &= ~R_MUSTRESEND; soflags = rep->r_nmp->nm_soflags; + mtx_unlock(&rep->r_mtx); + mtx_unlock(&rep->r_nmp->nm_mtx); if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) sendnam = NULL; @@ -493,7 +519,9 @@ flags, curthread /*XXX*/); if (error == ENOBUFS && so->so_type == SOCK_DGRAM) { error = 0; + mtx_lock(&rep->r_mtx); rep->r_flags |= R_MUSTRESEND; + mtx_unlock(&rep->r_mtx); } if (error) { @@ -513,8 +541,11 @@ error2 = NFS_SIGREP(rep); if (error2) error = error2; - else + else { + mtx_lock(&rep->r_mtx); rep->r_flags |= R_MUSTRESEND; + mtx_unlock(&rep->r_mtx); + } /* * Handle any recoverable (soft) socket errors here. (?) @@ -523,6 +554,8 @@ error != EWOULDBLOCK && error != EPIPE) error = 0; } +out: + NET_UNLOCK_GIANT(); return (error); } @@ -533,7 +566,7 @@ register struct mbuf *m; int error = 0, sotype, slpflag; - NET_ASSERT_GIANT(); + NET_LOCK_GIANT(); sotype = rep->r_nmp->nm_sotype; /* @@ -543,30 +576,39 @@ if (sotype != SOCK_DGRAM) { error = nfs_sndlock(rep); if (error) - return (error); + goto out; tryagain: + mtx_lock(&rep->r_nmp->nm_mtx); + mtx_lock(&rep->r_mtx); if (rep->r_mrep) { + mtx_unlock(&rep->r_mtx); + mtx_unlock(&rep->r_nmp->nm_mtx); nfs_sndunlock(rep); - return (0); + error = 0; + goto out; } if (rep->r_flags & R_SOFTTERM) { + mtx_unlock(&rep->r_mtx); + mtx_unlock(&rep->r_nmp->nm_mtx); nfs_sndunlock(rep); - return (EINTR); + error = EINTR; + goto out; } so = rep->r_nmp->nm_so; - mtx_lock(&rep->r_nmp->nm_nfstcpstate.mtx); if (!so || (rep->r_nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT)) { - mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx); + mtx_unlock(&rep->r_mtx); + mtx_unlock(&rep->r_nmp->nm_mtx); error = nfs_reconnect(rep); if (error) { nfs_sndunlock(rep); - return (error); + goto out; } goto tryagain; - } else - mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx); + } while (rep->r_flags & R_MUSTRESEND) { + mtx_unlock(&rep->r_mtx); + mtx_unlock(&rep->r_nmp->nm_mtx); m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); nfsstats.rpcretries++; error = nfs_send(so, rep->r_nmp->nm_nam, m, rep); @@ -574,41 +616,59 @@ if (error == EINTR || error == ERESTART || (error = nfs_reconnect(rep)) != 0) { nfs_sndunlock(rep); - return (error); + goto out; } goto tryagain; } + mtx_lock(&rep->r_nmp->nm_mtx); + mtx_lock(&rep->r_mtx); } + mtx_unlock(&rep->r_nmp->nm_mtx); + mtx_unlock(&rep->r_mtx); nfs_sndunlock(rep); } slpflag = 0; + mtx_lock(&rep->r_nmp->nm_mtx); if (rep->r_nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; - mtx_lock(&nfs_reply_mtx); + mtx_unlock(&rep->r_nmp->nm_mtx); + mtx_lock(&rep->r_mtx); while ((rep->r_mrep == NULL) && (error == 0) && ((rep->r_flags & R_SOFTTERM) == 0) && ((sotype == SOCK_DGRAM) || ((rep->r_flags & R_MUSTRESEND) == 0))) - error = msleep((caddr_t)rep, &nfs_reply_mtx, + error = msleep((caddr_t)rep, &rep->r_mtx, slpflag | (PZERO - 1), "nfsreq", 0); - mtx_unlock(&nfs_reply_mtx); - if (error == EINTR || error == ERESTART) + if (error == EINTR || error == ERESTART) { /* NFS operations aren't restartable. Map ERESTART to EINTR */ - return (EINTR); - if (rep->r_flags & R_SOFTTERM) + error = EINTR; + mtx_unlock(&rep->r_mtx); + goto out; + } + if (rep->r_flags & R_SOFTTERM) { /* Request was terminated because we exceeded the retries (soft mount) */ - return (ETIMEDOUT); + error = ETIMEDOUT; + mtx_unlock(&rep->r_mtx); + goto out; + } + mtx_unlock(&rep->r_mtx); if (sotype == SOCK_STREAM) { - mtx_lock(&rep->r_nmp->nm_nfstcpstate.mtx); + mtx_lock(&rep->r_nmp->nm_mtx); + mtx_lock(&rep->r_mtx); if (((rep->r_nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT) || (rep->r_flags & R_MUSTRESEND))) { - mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx); + mtx_unlock(&rep->r_mtx); + mtx_unlock(&rep->r_nmp->nm_mtx); error = nfs_sndlock(rep); if (error) - return (error); + goto out; goto tryagain; - } else - mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx); + } else { + mtx_unlock(&rep->r_mtx); + mtx_unlock(&rep->r_nmp->nm_mtx); + } } +out: + NET_UNLOCK_GIANT(); return (error); } @@ -660,6 +720,8 @@ * Iff no match, just drop the datagram */ TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { + mtx_lock(&nmp->nm_mtx); + mtx_lock(&rep->r_mtx); if (rep->r_mrep == NULL && rxid == rep->r_xid) { /* Found it.. */ rep->r_mrep = mrep; @@ -703,8 +765,13 @@ NFS_SDRTT(rep) += t1; } nmp->nm_timeouts = 0; + wakeup((caddr_t)rep); + mtx_unlock(&rep->r_mtx); + mtx_unlock(&nmp->nm_mtx); break; } + mtx_unlock(&rep->r_mtx); + mtx_unlock(&nmp->nm_mtx); } /* * If not matched to a request, drop it. @@ -713,31 +780,18 @@ if (rep == 0) { nfsstats.rpcunexpected++; m_freem(mrep); - } else - wakeup_nfsreq(rep); + } mtx_unlock(&nfs_reqq_mtx); } -/* - * The wakeup of the requestor should be done under the mutex - * to avoid potential missed wakeups. - */ -static void -wakeup_nfsreq(struct nfsreq *req) -{ - mtx_lock(&nfs_reply_mtx); - wakeup((caddr_t)req); - mtx_unlock(&nfs_reply_mtx); -} - static void nfs_mark_for_reconnect(struct nfsmount *nmp) { struct nfsreq *rp; - mtx_lock(&nmp->nm_nfstcpstate.mtx); + mtx_lock(&nmp->nm_mtx); nmp->nm_nfstcpstate.flags |= NFS_TCP_FORCE_RECONNECT; - mtx_unlock(&nmp->nm_nfstcpstate.mtx); + mtx_unlock(&nmp->nm_mtx); /* * Wakeup all processes that are waiting for replies * on this mount point. One of them does the reconnect. @@ -745,8 +799,10 @@ mtx_lock(&nfs_reqq_mtx); TAILQ_FOREACH(rp, &nfs_reqq, r_chain) { if (rp->r_nmp == nmp) { + mtx_lock(&rp->r_mtx); rp->r_flags |= R_MUSTRESEND; - wakeup_nfsreq(rp); + wakeup((caddr_t)rp); + mtx_unlock(&rp->r_mtx); } } mtx_unlock(&nfs_reqq_mtx); @@ -795,19 +851,21 @@ * Don't pick any more data from the socket if we've marked the * mountpoint for reconnect. */ - mtx_lock(&nmp->nm_nfstcpstate.mtx); + mtx_lock(&nmp->nm_mtx); if (nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT) { - mtx_unlock(&nmp->nm_nfstcpstate.mtx); + mtx_unlock(&nmp->nm_mtx); return; } else - mtx_unlock(&nmp->nm_nfstcpstate.mtx); + mtx_unlock(&nmp->nm_mtx); auio.uio_td = curthread; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; for ( ; ; ) { + mtx_lock(&nmp->nm_mtx); if (nmp->nm_nfstcpstate.flags & NFS_TCP_EXPECT_RPCMARKER) { int resid; + mtx_unlock(&nmp->nm_mtx); if (!nfstcp_marker_readable(so)) { /* Marker is not readable */ return; @@ -864,14 +922,20 @@ nmp->nm_mountp->mnt_stat.f_mntfromname); goto mark_reconnect; } + mtx_lock(&nmp->nm_mtx); nmp->nm_nfstcpstate.rpcresid = len; nmp->nm_nfstcpstate.flags &= ~(NFS_TCP_EXPECT_RPCMARKER); - } + mtx_unlock(&nmp->nm_mtx); + } else + mtx_unlock(&nmp->nm_mtx); + /* * Processed RPC marker or no RPC marker to process. * Pull in and process data. */ + mtx_lock(&nmp->nm_mtx); if (nmp->nm_nfstcpstate.rpcresid > 0) { + mtx_unlock(&nmp->nm_mtx); if (!nfstcp_readable(so, nmp->nm_nfstcpstate.rpcresid)) { /* All data not readable */ return; @@ -894,11 +958,14 @@ } if (mp == NULL) panic("nfs_clnt_tcp_soupcall: Got empty mbuf chain from sorecv\n"); + mtx_lock(&nmp->nm_mtx); nmp->nm_nfstcpstate.rpcresid = 0; nmp->nm_nfstcpstate.flags |= NFS_TCP_EXPECT_RPCMARKER; + mtx_unlock(&nmp->nm_mtx); /* We got the entire RPC reply. Match XIDs and wake up requestor */ nfs_clnt_match_xid(so, nmp, mp); - } + } else + mtx_unlock(&nmp->nm_mtx); } mark_reconnect: @@ -953,7 +1020,7 @@ struct mbuf *m, *md, *mheadend; time_t waituntil; caddr_t dpos; - int s, error = 0, mrest_len, auth_len, auth_type; + int error = 0, mrest_len, auth_len, auth_type; struct timeval now; u_int32_t *xidp; @@ -966,11 +1033,12 @@ if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) return nfs4_request(vp, mrest, procnum, td, cred, mrp, mdp, dposp); MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); - rep->r_mrep = rep->r_md = NULL; + bzero(rep, sizeof(struct nfsreq)); rep->r_nmp = nmp; rep->r_vp = vp; rep->r_td = td; rep->r_procnum = procnum; + mtx_init(&rep->r_mtx, "NFSrep lock", NULL, MTX_DEF); getmicrouptime(&now); rep->r_lastmsg = now.tv_sec - @@ -1019,7 +1087,6 @@ * Chain request into list of outstanding requests. Be sure * to put it LAST so timer finds oldest requests first. */ - s = splsoftclock(); mtx_lock(&nfs_reqq_mtx); if (TAILQ_EMPTY(&nfs_reqq)) callout_reset(&nfs_callout, nfs_ticks, nfs_timer, NULL); @@ -1031,10 +1098,11 @@ * send this one now but let timer do it. If not timing a request, * do it now. */ + mtx_lock(&nmp->nm_mtx); if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) { - splx(s); + mtx_unlock(&nmp->nm_mtx); error = nfs_sndlock(rep); if (!error) { m2 = m_copym(m, 0, M_COPYALL, M_TRYWAIT); @@ -1047,12 +1115,14 @@ * blocking on nfs_send() too long, so check for R_SENT here. */ if (!error && (rep->r_flags & (R_SENT | R_MUSTRESEND)) == 0) { + mtx_lock(&nmp->nm_mtx); nmp->nm_sent += NFS_CWNDSCALE; + mtx_unlock(&nmp->nm_mtx); rep->r_flags |= R_SENT; } mtx_unlock(&nfs_reqq_mtx); } else { - splx(s); + mtx_unlock(&nmp->nm_mtx); rep->r_rtt = -1; } @@ -1065,7 +1135,6 @@ /* * RPC done, unlink the request. */ - s = splsoftclock(); mtx_lock(&nfs_reqq_mtx); /* * nfs_timer() may be in the process of re-transmitting this request. @@ -1086,10 +1155,11 @@ */ if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_SENT; /* paranoia */ + mtx_lock(&nmp->nm_mtx); nmp->nm_sent -= NFS_CWNDSCALE; + mtx_unlock(&nmp->nm_mtx); } mtx_unlock(&nfs_reqq_mtx); - splx(s); /* * If there was a successful reply and a tprintf msg. @@ -1113,6 +1183,7 @@ if (rep->r_mrep != NULL) m_freem(rep->r_mrep); m_freem(rep->r_mreq); + mtx_destroy(&rep->r_mtx); free((caddr_t)rep, M_NFSREQ); return (error); } @@ -1131,6 +1202,7 @@ error = EACCES; m_freem(mrep); m_freem(rep->r_mreq); + mtx_destroy(&rep->r_mtx); free((caddr_t)rep, M_NFSREQ); return (error); } @@ -1153,12 +1225,16 @@ m_freem(mrep); error = 0; waituntil = time_second + nfs3_jukebox_delay; - while (time_second < waituntil) - (void) tsleep(&lbolt, - PSOCK, "nqnfstry", 0); + while (time_second < waituntil) { + mtx_lock(&Giant); + (void) tsleep(&lbolt, PSOCK, "nqnfstry", 0); + mtx_unlock(&Giant); + } + mtx_lock(&nfs_reqq_mtx); if (++nfs_xid == 0) nfs_xid++; rep->r_xid = *xidp = txdr_unsigned(nfs_xid); + mtx_unlock(&nfs_reqq_mtx); goto tryagain; } @@ -1176,6 +1252,7 @@ } else m_freem(mrep); m_freem(rep->r_mreq); + mtx_destroy(&rep->r_mtx); free((caddr_t)rep, M_NFSREQ); return (error); } @@ -1184,6 +1261,7 @@ *mdp = md; *dposp = dpos; m_freem(rep->r_mreq); + mtx_destroy(&rep->r_mtx); FREE((caddr_t)rep, M_NFSREQ); return (0); } @@ -1191,6 +1269,7 @@ error = EPROTONOSUPPORT; nfsmout: m_freem(rep->r_mreq); + mtx_destroy(&rep->r_mtx); free((caddr_t)rep, M_NFSREQ); return (error); } @@ -1215,34 +1294,34 @@ struct socket *so; struct nfsmount *nmp; int timeo; - int s, error; + int error; struct timeval now; getmicrouptime(&now); - s = splnet(); mtx_lock(&Giant); /* nfs_down -> tprintf */ mtx_lock(&nfs_reqq_mtx); TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { nmp = rep->r_nmp; - if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) + mtx_lock(&rep->r_mtx); + if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { + mtx_unlock(&rep->r_mtx); continue; + } else + mtx_unlock(&rep->r_mtx); if (nfs_sigintr(nmp, rep, rep->r_td)) continue; + mtx_lock(&nmp->nm_mtx); + mtx_lock(&rep->r_mtx); if (nmp->nm_tprintf_initial_delay != 0 && (rep->r_rexmit > 2 || (rep->r_flags & R_RESENDERR)) && rep->r_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) { + mtx_unlock(&rep->r_mtx); + mtx_unlock(&nmp->nm_mtx); rep->r_lastmsg = now.tv_sec; nfs_down(rep, nmp, rep->r_td, "not responding", - 0, NFSSTA_TIMEO); -#if 0 - if (!(nmp->nm_state & NFSSTA_MOUNTED)) { - /* we're not yet completely mounted and */ - /* we can't complete an RPC, so we fail */ - nfsstats.rpctimeouts++; - nfs_softterm(rep); - continue; - } -#endif + 0, NFSSTA_TIMEO); + mtx_lock(&nmp->nm_mtx); + mtx_lock(&rep->r_mtx); } if (rep->r_rtt >= 0) { rep->r_rtt++; @@ -1252,14 +1331,19 @@ timeo = NFS_RTO(nmp, proct[rep->r_procnum]); if (nmp->nm_timeouts > 0) timeo *= nfs_backoff[nmp->nm_timeouts - 1]; - if (rep->r_rtt <= timeo) + if (rep->r_rtt <= timeo) { + mtx_unlock(&rep->r_mtx); + mtx_unlock(&nmp->nm_mtx); continue; + } if (nmp->nm_timeouts < NFS_NBACKOFF) nmp->nm_timeouts++; } if (rep->r_rexmit >= rep->r_retry) { /* too many */ nfsstats.rpctimeouts++; nfs_softterm(rep); + mtx_unlock(&rep->r_mtx); + mtx_unlock(&nmp->nm_mtx); continue; } if (nmp->nm_sotype != SOCK_DGRAM) { @@ -1272,12 +1356,17 @@ * if necessary. */ rep->r_flags |= R_MUSTRESEND; - wakeup_nfsreq(rep); + wakeup((caddr_t)rep); rep->r_rtt = 0; + mtx_unlock(&rep->r_mtx); + mtx_unlock(&nmp->nm_mtx); continue; } - if ((so = nmp->nm_so) == NULL) + if ((so = nmp->nm_so) == NULL) { + mtx_unlock(&rep->r_mtx); + mtx_unlock(&nmp->nm_mtx); continue; + } /* * If there is enough space and the window allows.. * Resend it @@ -1285,57 +1374,69 @@ */ rep->r_rtt = -1; if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && - ((nmp->nm_flag & NFSMNT_DUMBTIMR) || - (rep->r_flags & R_SENT) || - nmp->nm_sent < nmp->nm_cwnd) && - (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) { - /* - * Mark the request to indicate that a XMIT is in progress - * to prevent the req structure being removed in nfs_request(). - */ - rep->r_flags |= R_REXMIT_INPROG; - mtx_unlock(&nfs_reqq_mtx); - NET_LOCK_GIANT(); - if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) - error = (*so->so_proto->pr_usrreqs->pru_send) - (so, 0, m, NULL, NULL, curthread); - else - error = (*so->so_proto->pr_usrreqs->pru_send) - (so, 0, m, nmp->nm_nam, NULL, curthread); - NET_UNLOCK_GIANT(); - mtx_lock(&nfs_reqq_mtx); - rep->r_flags &= ~R_REXMIT_INPROG; - wakeup((caddr_t)&rep->r_flags); - if (error) { - if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) - so->so_error = 0; - rep->r_flags |= R_RESENDERR; - } else { + ((nmp->nm_flag & NFSMNT_DUMBTIMR) || (rep->r_flags & R_SENT) || + nmp->nm_sent < nmp->nm_cwnd)) { + mtx_unlock(&rep->r_mtx); + mtx_unlock(&nmp->nm_mtx); + if ((m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) { /* - * Iff first send, start timing - * else turn timing off, backoff timer - * and divide congestion window by 2. + * Mark the request to indicate that a XMIT is in + * progress to prevent the req structure being + * removed in nfs_request(). */ - rep->r_flags &= ~R_RESENDERR; - if (rep->r_flags & R_SENT) { - rep->r_flags &= ~R_TIMING; - if (++rep->r_rexmit > NFS_MAXREXMIT) - rep->r_rexmit = NFS_MAXREXMIT; - nmp->nm_cwnd >>= 1; - if (nmp->nm_cwnd < NFS_CWNDSCALE) - nmp->nm_cwnd = NFS_CWNDSCALE; - nfsstats.rpcretries++; + mtx_lock(&rep->r_mtx); + rep->r_flags |= R_REXMIT_INPROG; + mtx_unlock(&rep->r_mtx); + mtx_unlock(&nfs_reqq_mtx); + NET_LOCK_GIANT(); + if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) + error = (*so->so_proto->pr_usrreqs->pru_send) + (so, 0, m, NULL, NULL, curthread); + else + error = (*so->so_proto->pr_usrreqs->pru_send) + (so, 0, m, nmp->nm_nam, NULL, + curthread); + NET_UNLOCK_GIANT(); + mtx_lock(&nfs_reqq_mtx); + mtx_lock(&nmp->nm_mtx); + mtx_lock(&rep->r_mtx); + rep->r_flags &= ~R_REXMIT_INPROG; + wakeup((caddr_t)&rep->r_flags); + if (error) { + if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) + so->so_error = 0; + rep->r_flags |= R_RESENDERR; } else { - rep->r_flags |= R_SENT; - nmp->nm_sent += NFS_CWNDSCALE; + /* + * Iff first send, start timing + * else turn timing off, backoff timer + * and divide congestion window by 2. + */ + rep->r_flags &= ~R_RESENDERR; + if (rep->r_flags & R_SENT) { + rep->r_flags &= ~R_TIMING; + if (++rep->r_rexmit > NFS_MAXREXMIT) + rep->r_rexmit = NFS_MAXREXMIT; + nmp->nm_cwnd >>= 1; + if (nmp->nm_cwnd < NFS_CWNDSCALE) + nmp->nm_cwnd = NFS_CWNDSCALE; + nfsstats.rpcretries++; + } else { + rep->r_flags |= R_SENT; + nmp->nm_sent += NFS_CWNDSCALE; + } + rep->r_rtt = 0; } - rep->r_rtt = 0; + mtx_unlock(&rep->r_mtx); + mtx_unlock(&nmp->nm_mtx); } + } else { + mtx_unlock(&rep->r_mtx); + mtx_unlock(&nmp->nm_mtx); } } mtx_unlock(&nfs_reqq_mtx); mtx_unlock(&Giant); /* nfs_down -> tprintf */ - splx(s); callout_reset(&nfs_callout, nfs_ticks, nfs_timer, NULL); } @@ -1349,31 +1450,33 @@ struct nfsmount *nmp; { struct nfsreq *req; - int i, s; + int i; - s = splnet(); mtx_lock(&nfs_reqq_mtx); TAILQ_FOREACH(req, &nfs_reqq, r_chain) { + mtx_lock(&req->r_mtx); if (nmp != req->r_nmp || req->r_mrep != NULL || - (req->r_flags & R_SOFTTERM)) + (req->r_flags & R_SOFTTERM)) { + mtx_unlock(&req->r_mtx); continue; + } nfs_softterm(req); + mtx_unlock(&req->r_mtx); } mtx_unlock(&nfs_reqq_mtx); - splx(s); for (i = 0; i < 30; i++) { - s = splnet(); mtx_lock(&nfs_reqq_mtx); TAILQ_FOREACH(req, &nfs_reqq, r_chain) { if (nmp == req->r_nmp) break; } mtx_unlock(&nfs_reqq_mtx); - splx(s); if (req == NULL) return (0); + mtx_lock(&Giant); tsleep(&lbolt, PSOCK, "nfscancel", 0); + mtx_unlock(&Giant); } return (EBUSY); } @@ -1387,7 +1490,7 @@ static void nfs_softterm(struct nfsreq *rep) { - + KASSERT(mtx_owned(&rep->r_mtx), ("NFS req lock not owned !")); rep->r_flags |= R_SOFTTERM; if (rep->r_flags & R_SENT) { rep->r_nmp->nm_sent -= NFS_CWNDSCALE; @@ -1397,7 +1500,7 @@ * Request terminated, wakeup the blocked process, so that we * can return EINTR back. */ - wakeup_nfsreq(rep); + wakeup((caddr_t)rep); } /* @@ -1494,28 +1597,6 @@ } /* - * NFS wrapper to tsleep(), that shoves a new p_sigmask and restores the - * old one after tsleep() returns. - */ -int -nfs_tsleep(struct thread *td, void *ident, int priority, char *wmesg, int timo) -{ - sigset_t oldset; - int error; - struct proc *p; - - if ((priority & PCATCH) == 0) - return tsleep(ident, priority, wmesg, timo); - if (td == NULL) - td = curthread; /* XXX */ - nfs_set_sigmask(td, &oldset); - error = tsleep(ident, priority, wmesg, timo); - nfs_restore_sigmask(td, &oldset); - p = td->td_proc; - return (error); -} - -/* * Test for a termination condition pending on the process. * This is used for NFSMNT_INT mounts. */ @@ -1524,19 +1605,28 @@ { struct proc *p; sigset_t tmpset; - + int error = 0; + if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) return nfs4_sigintr(nmp, rep, td); - if (rep && (rep->r_flags & R_SOFTTERM)) - return (EIO); + if (rep) { + mtx_lock(&rep->r_mtx); + if (rep->r_flags & R_SOFTTERM) { + mtx_unlock(&rep->r_mtx); + error = EIO; + goto out; + } else + mtx_unlock(&rep->r_mtx); + } /* Terminate all requests while attempting a forced unmount. */ - if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) - return (EIO); + if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) { + error = EIO; + goto out; + } if (!(nmp->nm_flag & NFSMNT_INT)) - return (0); + goto out; if (td == NULL) return (0); - p = td->td_proc; PROC_LOCK(p); tmpset = p->p_siglist; @@ -1551,6 +1641,8 @@ PROC_UNLOCK(p); return (0); +out: + return(error); } /* @@ -1567,21 +1659,25 @@ int error, slpflag = 0, slptimeo = 0; td = rep->r_td; + mtx_lock(&rep->r_nmp->nm_mtx); if (rep->r_nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; while (*statep & NFSSTA_SNDLOCK) { error = nfs_sigintr(rep->r_nmp, rep, td); - if (error) + if (error) { + mtx_unlock(&rep->r_nmp->nm_mtx); return (error); + } *statep |= NFSSTA_WANTSND; - (void) tsleep(statep, slpflag | (PZERO - 1), - "nfsndlck", slptimeo); + (void) msleep(statep, &rep->r_nmp->nm_mtx, + slpflag | (PZERO - 1), "nfsndlck", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } *statep |= NFSSTA_SNDLOCK; + mtx_unlock(&rep->r_nmp->nm_mtx); return (0); } @@ -1593,6 +1689,7 @@ { int *statep = &rep->r_nmp->nm_state; + mtx_lock(&rep->r_nmp->nm_mtx); if ((*statep & NFSSTA_SNDLOCK) == 0) panic("nfs sndunlock"); *statep &= ~NFSSTA_SNDLOCK; @@ -1600,6 +1697,7 @@ *statep &= ~NFSSTA_WANTSND; wakeup(statep); } + mtx_unlock(&rep->r_nmp->nm_mtx); } /* @@ -1703,8 +1801,10 @@ nmp->nm_state |= NFSSTA_LOCKTIMEO; } #endif + mtx_lock(&rep->r_mtx); if (rep) rep->r_flags |= R_TPRINTFMSG; + mtx_unlock(&rep->r_mtx); nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); } @@ -1721,8 +1821,10 @@ if (nmp == NULL) return; + mtx_lock(&rep->r_mtx); if ((rep == NULL) || (rep->r_flags & R_TPRINTFMSG) != 0) nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); + mtx_unlock(&rep->r_mtx); if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { nmp->nm_state &= ~NFSSTA_TIMEO; vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, @@ -1736,4 +1838,3 @@ } #endif } - Index: sys/nfsclient/nfs_subs.c =========================================================================== --- sys/nfsclient/nfs_subs.c 2006/05/15 19:00:49 #2 +++ sys/nfsclient/nfs_subs.c 2006/05/15 19:00:49 @@ -76,6 +76,12 @@ #include /* + * Note that stdarg.h and the ANSI style va_start macro is used for both + * ANSI and traditional C compilers. + */ +#include + +/* * Data items converted to xdr at startup, since they are constant * This is kinda hokey, but may save a little time doing byte swaps */ @@ -95,7 +101,6 @@ struct nfs_reqq nfs_reqq; struct mtx nfs_reqq_mtx; -struct mtx nfs_reply_mtx; struct nfs_bufq nfs_bufq; /* @@ -182,6 +187,7 @@ */ tl = nfsm_build(u_int32_t *, 8 * NFSX_UNSIGNED); + mtx_lock(&nfs_reqq_mtx); /* Get a pretty random xid to start with */ if (!nfs_xid) nfs_xid = random(); @@ -193,6 +199,7 @@ *xidpp = tl; *tl++ = txdr_unsigned(nfs_xid); + mtx_unlock(&nfs_reqq_mtx); *tl++ = rpc_call; *tl++ = rpc_vers; *tl++ = txdr_unsigned(NFS_PROG); @@ -416,7 +423,7 @@ TAILQ_INIT(&nfs_reqq); callout_init(&nfs_callout, CALLOUT_MPSAFE); mtx_init(&nfs_reqq_mtx, "NFS reqq lock", NULL, MTX_DEF); - mtx_init(&nfs_reply_mtx, "Synch NFS reply posting", NULL, MTX_DEF); + mtx_init(&nfs_iod_mtx, "NFS iod lock", NULL, MTX_DEF); nfs_pbuf_freecnt = nswbuf / 2 + 1; @@ -437,19 +444,80 @@ * Tell all nfsiod processes to exit. Clear nfs_iodmax, and wakeup * any sleeping nfsiods so they check nfs_iodmax and exit. */ + mtx_lock(&nfs_iod_mtx); nfs_iodmax = 0; for (i = 0; i < nfs_numasync; i++) if (nfs_iodwant[i]) wakeup(&nfs_iodwant[i]); /* The last nfsiod to exit will wake us up when nfs_numasync hits 0 */ while (nfs_numasync) - tsleep(&nfs_numasync, PWAIT, "ioddie", 0); - + msleep(&nfs_numasync, &nfs_iod_mtx, PWAIT, "ioddie", 0); + mtx_unlock(&nfs_iod_mtx); nfs_nhuninit(); uma_zdestroy(nfsmount_zone); return (0); } +void +nfs_dircookie_lock(struct nfsnode *np) +{ + mtx_lock(&np->n_mtx); + while (np->n_flag & NDIRCOOKIELK) + (void) msleep(&np->n_flag, &np->n_mtx, PZERO, "nfsdirlk", 0); + np->n_flag |= NDIRCOOKIELK; + mtx_unlock(&np->n_mtx); +} + +void +nfs_dircookie_unlock(struct nfsnode *np) +{ + mtx_lock(&np->n_mtx); + np->n_flag &= ~NDIRCOOKIELK; + wakeup(&np->n_flag); + mtx_unlock(&np->n_mtx); +} + +int +nfs_upgrade_vnlock(struct vnode *vp, struct thread *td) +{ + int old_lock; + + if ((old_lock = VOP_ISLOCKED(vp, td)) != LK_EXCLUSIVE) { + if (old_lock == LK_SHARED) { + /* Upgrade to exclusive lock, this might block */ + vn_lock(vp, LK_UPGRADE | LK_RETRY, td); + } else { + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + } + } + return old_lock; +} + +void +nfs_downgrade_vnlock(struct vnode *vp, struct thread *td, int old_lock) +{ + if (old_lock != LK_EXCLUSIVE) { + if (old_lock == LK_SHARED) { + /* Downgrade from exclusive lock, this might block */ + vn_lock(vp, LK_DOWNGRADE, td); + } else { + VOP_UNLOCK(vp, 0, td); + } + } +} + +void +nfs_printf(const char *fmt, ...) +{ + va_list ap; + + mtx_lock(&Giant); + va_start(ap, fmt); + printf(fmt, ap); + va_end(ap); + mtx_unlock(&Giant); +} + /* * Attribute cache routines. * nfs_loadattrcache() - loads or updates the cache contents from attributes @@ -466,7 +534,7 @@ */ int nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp, - struct vattr *vaper, int dontshrink) + struct vattr *vaper, int dontshrink) { struct vnode *vp = *vpp; struct vattr *vap; @@ -535,6 +603,7 @@ * information. */ np = VTONFS(vp); + mtx_lock(&np->n_mtx); if (vp->v_type != vtyp) { vp->v_type = vtyp; if (vp->v_type == VFIFO) @@ -617,6 +686,7 @@ vaper->va_mtime = np->n_mtim; } } + mtx_unlock(&np->n_mtx); return (0); } @@ -639,16 +709,20 @@ struct vattr *vap; struct nfsmount *nmp; int timeo; - + np = VTONFS(vp); vap = &np->n_vattr; nmp = VFSTONFS(vp->v_mount); +#ifdef NFS_ACDEBUG + mtx_lock(&Giant); /* nfs_printf() */ +#endif + mtx_lock(&np->n_mtx); /* XXX n_mtime doesn't seem to be updated on a miss-and-reload */ timeo = (time_second - np->n_mtime.tv_sec) / 10; #ifdef NFS_ACDEBUG if (nfs_acdebug>1) - printf("nfs_getattrcache: initial timeo = %d\n", timeo); + nfs_printf("nfs_getattrcache: initial timeo = %d\n", timeo); #endif if (vap->va_type == VDIR) { @@ -665,18 +739,19 @@ #ifdef NFS_ACDEBUG if (nfs_acdebug > 2) - printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n", - nmp->nm_acregmin, nmp->nm_acregmax, - nmp->nm_acdirmin, nmp->nm_acdirmax); + nfs_printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n", + nmp->nm_acregmin, nmp->nm_acregmax, + nmp->nm_acdirmin, nmp->nm_acdirmax); if (nfs_acdebug) - printf("nfs_getattrcache: age = %d; final timeo = %d\n", - (time_second - np->n_attrstamp), timeo); + nfs_printf("nfs_getattrcache: age = %d; final timeo = %d\n", + (time_second - np->n_attrstamp), timeo); #endif if ((time_second - np->n_attrstamp) >= timeo) { nfsstats.attrcache_misses++; - return (ENOENT); + mtx_unlock(&np->n_mtx); + return( ENOENT); } nfsstats.attrcache_hits++; if (vap->va_size != np->n_size) { @@ -701,6 +776,10 @@ if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } + mtx_unlock(&np->n_mtx); +#ifdef NFS_ACDEBUG + mtx_unlock(&Giant); /* nfs_printf() */ +#endif return (0); } @@ -714,7 +793,8 @@ { struct nfsdmap *dp, *dp2; int pos; - + nfsuint64 *retval = NULL; + pos = (uoff_t)off / NFS_DIRBLKSIZ; if (pos == 0 || off < 0) { #ifdef DIAGNOSTIC @@ -732,14 +812,14 @@ dp->ndm_eocookie = 0; LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); } else - return (NULL); + goto out; } while (pos >= NFSNUMCOOKIES) { pos -= NFSNUMCOOKIES; if (LIST_NEXT(dp, ndm_list)) { if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && - pos >= dp->ndm_eocookie) - return (NULL); + pos >= dp->ndm_eocookie) + goto out; dp = LIST_NEXT(dp, ndm_list); } else if (add) { MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), @@ -748,15 +828,17 @@ LIST_INSERT_AFTER(dp, dp2, ndm_list); dp = dp2; } else - return (NULL); + goto out; } if (pos >= dp->ndm_eocookie) { if (add) dp->ndm_eocookie = pos + 1; else - return (NULL); + goto out; } - return (&dp->ndm_cookies[pos]); + retval = &dp->ndm_cookies[pos]; +out: + return (retval); } /* @@ -773,11 +855,13 @@ if (vp->v_type != VDIR) panic("nfs: invaldir not dir"); #endif + nfs_dircookie_lock(np); np->n_direofoffset = 0; np->n_cookieverf.nfsuquad[0] = 0; np->n_cookieverf.nfsuquad[1] = 0; if (LIST_FIRST(&np->n_cookies)) LIST_FIRST(&np->n_cookies)->ndm_eocookie = 0; + nfs_dircookie_unlock(np); } /* @@ -797,8 +881,6 @@ struct buf *bp, *nbp; int s; - GIANT_REQUIRED; - s = splbio(); MNT_ILOCK(mp); MNT_VNODE_FOREACH(vp, mp, nvp) { @@ -896,7 +978,7 @@ int nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md, - caddr_t *dpos) + caddr_t *dpos) { int t1; @@ -910,7 +992,7 @@ int nfsm_postop_attr_xx(struct vnode **v, int *f, struct mbuf **md, - caddr_t *dpos) + caddr_t *dpos) { u_int32_t *tl; int t1; @@ -945,9 +1027,11 @@ tl = nfsm_dissect_xx(6 * NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; + mtx_lock(&(VTONFS(*v))->n_mtx); if (*f) ttretf = (VTONFS(*v)->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) && VTONFS(*v)->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3))); + mtx_unlock(&(VTONFS(*v))->n_mtx); } t1 = nfsm_postop_attr_xx(v, &ttattrf, md, dpos); if (t1) Index: sys/nfsclient/nfs_vfsops.c =========================================================================== --- sys/nfsclient/nfs_vfsops.c 2006/05/15 19:00:49 #4 +++ sys/nfsclient/nfs_vfsops.c 2006/05/15 19:00:49 @@ -33,7 +33,7 @@ */ #include -__FBSDID("$FreeBSD: src/sys/nfsclient/nfs_vfsops.c,v 1.180 2006/04/01 01:15:23 jeff Exp $"); +__FBSDID("$FreeBSD: src/sys/nfsclient/nfs_vfsops.c,v 1.179 2006/01/09 20:42:18 tegge Exp $"); #include "opt_bootp.h" #include "opt_nfsroot.h" @@ -84,6 +84,7 @@ uma_zone_t nfsmount_zone; struct nfsstats nfsstats; + SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem"); SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RD, &nfsstats, nfsstats, "S,nfsstats"); @@ -183,7 +184,8 @@ * space. */ iosize = max(nmp->nm_rsize, nmp->nm_wsize); - if (iosize < PAGE_SIZE) iosize = PAGE_SIZE; + if (iosize < PAGE_SIZE) + iosize = PAGE_SIZE; return iosize; } @@ -257,8 +259,12 @@ return (error); } vp = NFSTOV(np); - if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) + mtx_lock(&nmp->nm_mtx); + if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { + mtx_unlock(&nmp->nm_mtx); (void)nfs_fsinfo(nmp, vp, td->td_ucred, td); + } else + mtx_unlock(&nmp->nm_mtx); nfsstats.rpccnt[NFSPROC_FSSTAT]++; mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3)); mb = mreq; @@ -273,7 +279,9 @@ goto nfsmout; } sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3)); + mtx_lock(&nmp->nm_mtx); sbp->f_iosize = nfs_iosize(nmp); + mtx_unlock(&nmp->nm_mtx); if (v3) { sbp->f_bsize = NFS_FABLKSIZE; tquad = fxdr_hyper(&sfp->sf_tbytes); @@ -314,7 +322,7 @@ int error = 0, retattr; struct mbuf *mreq, *mrep, *md, *mb; u_int64_t maxfsize; - + nfsstats.rpccnt[NFSPROC_FSINFO]++; mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1)); mb = mreq; @@ -323,6 +331,7 @@ nfsm_request(vp, NFSPROC_FSINFO, td, cred); nfsm_postop_attr(vp, retattr); if (!error) { + mtx_lock(&nmp->nm_mtx); fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO); pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref); if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE) @@ -358,6 +367,7 @@ nmp->nm_maxfilesize = maxfsize; nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp); nmp->nm_state |= NFSSTA_GOTFSINFO; + mtx_unlock(&nmp->nm_mtx); } m_freem(mrep); nfsmout: @@ -664,8 +674,7 @@ if (nmp->nm_sotype == SOCK_DGRAM) while (nfs_connect(nmp, NULL)) { printf("nfs_args: retrying connect\n"); - (void) tsleep((caddr_t)&lbolt, - PSOCK, "nfscon", 0); + (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); } } } @@ -693,24 +702,31 @@ size_t len; u_char nfh[NFSX_V3FHMAX]; - if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) - return (EINVAL); + if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) { + error = EINVAL; + goto out; + } - if (mp->mnt_flag & MNT_ROOTFS) - return (nfs_mountroot(mp, td)); + if (mp->mnt_flag & MNT_ROOTFS) { + error = nfs_mountroot(mp, td); + goto out; + } error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args); if (error) - return (error); + goto out; if (args.version != NFS_ARGSVERSION) { - return (EPROGMISMATCH); + error = EPROGMISMATCH; + goto out; } if (mp->mnt_flag & MNT_UPDATE) { struct nfsmount *nmp = VFSTONFS(mp); - if (nmp == NULL) - return (EIO); + if (nmp == NULL) { + error = EIO; + goto out; + } /* * When doing an update, we can't change from or to * v3, switch lockd strategies or change cookie translation @@ -720,7 +736,7 @@ (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)); nfs_decode_args(mp, nmp, &args); - return (0); + goto out; } /* @@ -734,21 +750,25 @@ */ if (nfs_ip_paranoia == 0) args.flags |= NFSMNT_NOCONN; - if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) - return (EINVAL); + if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) { + error = EINVAL; + goto out; + } error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); if (error) - return (error); + goto out; error = copyinstr(args.hostname, hst, MNAMELEN-1, &len); if (error) - return (error); + goto out; bzero(&hst[len], MNAMELEN - len); /* sockargs() call must be after above copyin() calls */ error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); if (error) - return (error); + goto out; args.fh = nfh; error = mountnfs(&args, mp, nam, hst, &vp, td->td_ucred); + mp->mnt_kern_flag |= MNTK_MPSAFE; +out: return (error); } @@ -771,12 +791,11 @@ error = copyin(data, &args, sizeof (struct nfs_args)); if (error) - return (error); + return error; ma = mount_arg(ma, "nfs_args", &args, sizeof args); error = kernel_mount(ma, flags); - return (error); } @@ -805,6 +824,7 @@ } vfs_getnewfsid(mp); nmp->nm_mountp = mp; + mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF); /* * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too @@ -851,10 +871,6 @@ nfs_decode_args(mp, nmp, argp); - if (nmp->nm_sotype == SOCK_STREAM) - mtx_init(&nmp->nm_nfstcpstate.mtx, "NFS/TCP state lock", - NULL, MTX_DEF); - /* * For Connection based sockets (TCP,...) defer the connect until * the first request, in case the server is not responding. @@ -869,7 +885,9 @@ * stuck on a dead server and we are holding a lock on the mount * point. */ + mtx_lock(&nmp->nm_mtx); mp->mnt_stat.f_iosize = nfs_iosize(nmp); + mtx_unlock(&nmp->nm_mtx); /* * A reference count is needed on the nfsnode representing the * remote root. If this object is not persistent, then backward @@ -900,8 +918,7 @@ return (0); bad: - if (nmp->nm_sotype == SOCK_STREAM) - mtx_destroy(&nmp->nm_nfstcpstate.mtx); + mtx_destroy(&nmp->nm_mtx); nfs_disconnect(nmp); uma_zfree(nfsmount_zone, nmp); FREE(nam, M_SONAME); @@ -930,12 +947,12 @@ if (flags & FORCECLOSE) { error = nfs_nmcancelreqs(nmp); if (error) - return (error); + goto out; } /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */ error = vflush(mp, 1, flags, td); if (error) - return (error); + goto out; /* * We are now committed to the unmount. @@ -943,11 +960,10 @@ nfs_disconnect(nmp); FREE(nmp->nm_nam, M_SONAME); - if (nmp->nm_sotype == SOCK_STREAM) - mtx_destroy(&nmp->nm_nfstcpstate.mtx); - + mtx_destroy(&nmp->nm_mtx); uma_zfree(nfsmount_zone, nmp); - return (0); +out: + return (error); } /* @@ -964,15 +980,18 @@ nmp = VFSTONFS(mp); error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); if (error) - return (error); + return error; vp = NFSTOV(np); /* * Get transfer parameters and attributes for root vnode once. */ + mtx_lock(&nmp->nm_mtx); if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 && (nmp->nm_flag & NFSMNT_NFSV3)) { + mtx_unlock(&nmp->nm_mtx); nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread); - } + } else + mtx_unlock(&nmp->nm_mtx); if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_vflag |= VV_ROOT; @@ -1051,8 +1070,10 @@ break; #endif case VFS_CTL_QUERY: + mtx_lock(&nmp->nm_mtx); if (nmp->nm_state & NFSSTA_TIMEO) vq.vq_flags |= VQ_NOTRESP; + mtx_unlock(&nmp->nm_mtx); #if 0 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) Index: sys/nfsclient/nfs_vnops.c =========================================================================== --- sys/nfsclient/nfs_vnops.c 2006/05/15 19:00:49 #8 +++ sys/nfsclient/nfs_vnops.c 2006/05/15 19:00:49 @@ -33,7 +33,7 @@ */ #include -__FBSDID("$FreeBSD: src/sys/nfsclient/nfs_vnops.c,v 1.265 2006/04/06 01:20:30 mohans Exp $"); +__FBSDID("$FreeBSD: src/sys/nfsclient/nfs_vnops.c,v 1.264 2006/03/08 01:43:01 cel Exp $"); /* * vnode op calls for Sun NFS version 2 and 3 @@ -192,6 +192,7 @@ /* * Global variables */ +struct mtx nfs_iod_mtx; struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; int nfs_numasync = 0; @@ -241,6 +242,23 @@ #define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \ | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \ | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP) + +/* + * SMP Locking Note : + * The list of locks after the description of the lock is the ordering + * of other locks acquired with the lock held. + * np->n_mtx : Protects the fields in the nfsnode. + VM Object Lock + VI_MTX (acquired indirectly) + * nmp->nm_mtx : Protects the fields in the nfsmount. + rep->r_mtx + * nfs_iod_mtx : Global lock, protects shared nfsiod state. + * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. + nmp->nm_mtx + rep->r_mtx + * rep->r_mtx : Protects the fields in an nfsreq. + */ + static int nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td, struct ucred *cred) @@ -266,9 +284,11 @@ if (!error) { tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); rmode = fxdr_unsigned(u_int32_t, *tl); + mtx_lock(&np->n_mtx); np->n_mode = rmode; np->n_modeuid = cred->cr_uid; np->n_modestamp = time_second; + mtx_unlock(&np->n_mtx); } m_freem(mrep); nfsmout: @@ -343,6 +363,7 @@ * Does our cached result allow us to give a definite yes to * this request? */ + mtx_lock(&np->n_mtx); if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) && (ap->a_cred->cr_uid == np->n_modeuid) && ((np->n_mode & mode) == mode)) { @@ -352,18 +373,21 @@ * Either a no, or a don't know. Go to the wire. */ nfsstats.accesscache_misses++; + mtx_unlock(&np->n_mtx); error = nfs3_access_otw(vp, wmode, ap->a_td,ap->a_cred); + mtx_lock(&np->n_mtx); if (!error) { if ((np->n_mode & mode) != mode) { error = EACCES; } } } + mtx_unlock(&np->n_mtx); return (error); } else { - if ((error = nfsspec_access(ap)) != 0) + if ((error = nfsspec_access(ap)) != 0) { return (error); - + } /* * Attempt to prevent a mapped root from accessing a file * which it shouldn't. We try to read a byte from the file @@ -371,12 +395,14 @@ * After calling nfsspec_access, we should have the correct * file size cached. */ + mtx_lock(&np->n_mtx); if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD) && VTONFS(vp)->n_size > 0) { struct iovec aiov; struct uio auio; char buf[1]; + mtx_unlock(&np->n_mtx); aiov.iov_base = buf; aiov.iov_len = 1; auio.uio_iov = &aiov; @@ -400,7 +426,8 @@ error = nfs_readlinkrpc(vp, &auio, ap->a_cred); else error = EACCES; - } + } else + mtx_unlock(&np->n_mtx); return (error); } } @@ -428,7 +455,9 @@ /* * Get a valid lease. If cached data is stale, flush it. */ + mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { + mtx_unlock(&np->n_mtx); error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == EINTR || error == EIO) return (error); @@ -438,20 +467,28 @@ error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td); if (error) return (error); + mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; + mtx_unlock(&np->n_mtx); } else { np->n_attrstamp = 0; + mtx_unlock(&np->n_mtx); error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td); if (error) return (error); + mtx_lock(&np->n_mtx); if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { if (vp->v_type == VDIR) np->n_direofoffset = 0; + mtx_unlock(&np->n_mtx); error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); - if (error == EINTR || error == EIO) + if (error == EINTR || error == EIO) { return (error); + } + mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; } + mtx_unlock(&np->n_mtx); } /* * If the object has >= 1 O_DIRECT active opens, we disable caching. @@ -461,11 +498,12 @@ error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error) return (error); + mtx_lock(&np->n_mtx); np->n_flag |= NNONCACHE; + mtx_unlock(&np->n_mtx); } np->n_directio_opens++; } - np->ra_expect_lbn = 0; vnode_create_vobject(vp, vattr.va_size, ap->a_td); return (0); } @@ -519,7 +557,9 @@ vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_UNLOCK(vp->v_object); } + mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { + mtx_unlock(&np->n_mtx); if (NFS_ISV3(vp)) { /* * Under NFSv3 we have dirty buffers to dispose of. We @@ -539,6 +579,7 @@ /* np->n_flag &= ~NMODIFIED; */ } else error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); + mtx_lock(&np->n_mtx); } /* * Invalidate the attribute cache in all cases. @@ -551,13 +592,16 @@ np->n_flag &= ~NWRITEERR; error = np->n_error; } + mtx_unlock(&np->n_mtx); } if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { + mtx_lock(&np->n_mtx); KASSERT((np->n_directio_opens > 0), - ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); + ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); np->n_directio_opens--; if (np->n_directio_opens == 0) np->n_flag &= ~NNONCACHE; + mtx_unlock(&np->n_mtx); } return (error); } @@ -578,21 +622,21 @@ /* * Update local times for special files. */ + mtx_lock(&np->n_mtx); if (np->n_flag & (NACC | NUPD)) np->n_flag |= NCHG; + mtx_unlock(&np->n_mtx); /* * First look in the cache. */ if (nfs_getattrcache(vp, ap->a_vap) == 0) - return (0); - + goto nfsmout; if (v3 && nfsaccess_cache_timeout > 0) { nfsstats.accesscache_misses++; nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_td, ap->a_cred); if (nfs_getattrcache(vp, ap->a_vap) == 0) - return (0); + goto nfsmout; } - nfsstats.rpccnt[NFSPROC_GETATTR]++; mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); mb = mreq; @@ -635,8 +679,10 @@ if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && - (vp->v_mount->mnt_flag & MNT_RDONLY)) - return (EROFS); + (vp->v_mount->mnt_flag & MNT_RDONLY)) { + error = EROFS; + goto out; + } if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: @@ -650,7 +696,7 @@ vap->va_mode == (mode_t)VNOVAL && vap->va_uid == (uid_t)VNOVAL && vap->va_gid == (gid_t)VNOVAL) - return (0); + return (0); vap->va_size = VNOVAL; break; default: @@ -660,47 +706,60 @@ */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); - /* * We run vnode_pager_setsize() early (why?), * we must set np->n_size now to avoid vinvalbuf * V_SAVE races that might setsize a lower * value. */ - + mtx_lock(&np->n_mtx); tsize = np->n_size; + mtx_unlock(&np->n_mtx); error = nfs_meta_setsize(vp, ap->a_cred, - ap->a_td, vap->va_size); - + ap->a_td, vap->va_size); + mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { + tsize = np->n_size; + mtx_unlock(&np->n_mtx); if (vap->va_size == 0) error = nfs_vinvalbuf(vp, 0, ap->a_td, 1); else error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error) { - vnode_pager_setsize(vp, np->n_size); - return (error); + vnode_pager_setsize(vp, tsize); + goto out; } - } + } else + mtx_unlock(&np->n_mtx); /* * np->n_size has already been set to vap->va_size * in nfs_meta_setsize(). We must set it again since * nfs_loadattrcache() could be called through * nfs_meta_setsize() and could modify np->n_size. */ + mtx_lock(&np->n_mtx); np->n_vattr.va_size = np->n_size = vap->va_size; + mtx_unlock(&np->n_mtx); }; - } else if ((vap->va_mtime.tv_sec != VNOVAL || - vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && - vp->v_type == VREG && - (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1)) != 0 && - (error == EINTR || error == EIO)) - return (error); + } else { + mtx_lock(&np->n_mtx); + if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && + (np->n_flag & NMODIFIED) && vp->v_type == VREG) { + mtx_unlock(&np->n_mtx); + if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1)) != 0 && + (error == EINTR || error == EIO)) + return error; + } else + mtx_unlock(&np->n_mtx); + } error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_td); if (error && vap->va_size != VNOVAL) { + mtx_lock(&np->n_mtx); np->n_size = np->n_vattr.va_size = tsize; - vnode_pager_setsize(vp, np->n_size); + vnode_pager_setsize(vp, tsize); + mtx_unlock(&np->n_mtx); } +out: return (error); } @@ -779,7 +838,7 @@ int error = 0, attrflag, fhsize; int v3 = NFS_ISV3(dvp); struct thread *td = cnp->cn_thread; - + *vpp = NULLVP; if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) @@ -964,8 +1023,10 @@ nfsm_strsiz(len, NFS_MAXPATHLEN); if (len == NFS_MAXPATHLEN) { struct nfsnode *np = VTONFS(vp); + mtx_lock(&np->n_mtx); if (np->n_size && np->n_size < NFS_MAXPATHLEN) len = np->n_size; + mtx_unlock(&np->n_mtx); } nfsm_mtouio(uiop, len); } @@ -987,17 +1048,23 @@ struct nfsmount *nmp; int error = 0, len, retlen, tsiz, eof, attrflag; int v3 = NFS_ISV3(vp); + int rsize; #ifndef nolint eof = 0; #endif nmp = VFSTONFS(vp->v_mount); tsiz = uiop->uio_resid; - if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) + mtx_lock(&nmp->nm_mtx); + if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { + mtx_unlock(&nmp->nm_mtx); return (EFBIG); + } + rsize = nmp->nm_rsize; + mtx_unlock(&nmp->nm_mtx); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_READ]++; - len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz; + len = (tsiz > rsize) ? rsize : tsiz; mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3); mb = mreq; bpos = mtod(mb, caddr_t); @@ -1020,9 +1087,10 @@ } tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); eof = fxdr_unsigned(int, *(tl + 1)); - } else + } else { nfsm_loadattr(vp, NULL); - nfsm_strsiz(retlen, nmp->nm_rsize); + } + nfsm_strsiz(retlen, rsize); nfsm_mtouio(uiop, retlen); m_freem(mrep); tsiz -= retlen; @@ -1043,7 +1111,7 @@ */ int nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, - int *iomode, int *must_commit) + int *iomode, int *must_commit) { u_int32_t *tl; int32_t backup; @@ -1052,18 +1120,24 @@ struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit; int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC; - + int wsize; + #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1) panic("nfs: writerpc iovcnt > 1"); #endif *must_commit = 0; tsiz = uiop->uio_resid; - if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) + mtx_lock(&nmp->nm_mtx); + if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { + mtx_unlock(&nmp->nm_mtx); return (EFBIG); + } + wsize = nmp->nm_wsize; + mtx_unlock(&nmp->nm_mtx); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_WRITE]++; - len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz; + len = (tsiz > wsize) ? wsize : tsiz; mreq = nfsm_reqhead(vp, NFSPROC_WRITE, NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); mb = mreq; @@ -1122,6 +1196,7 @@ else if (committed == NFSV3WRITE_DATASYNC && commit == NFSV3WRITE_UNSTABLE) committed = commit; + mtx_lock(&nmp->nm_mtx); if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){ bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); @@ -1132,11 +1207,16 @@ bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); } + mtx_unlock(&nmp->nm_mtx); } - } else - nfsm_loadattr(vp, NULL); - if (wccflag) - VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime; + } else { + nfsm_loadattr(vp, NULL); + } + if (wccflag) { + mtx_lock(&(VTONFS(vp))->n_mtx); + VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime; + mtx_unlock(&(VTONFS(vp))->n_mtx); + } m_freem(mrep); if (error) break; @@ -1232,9 +1312,11 @@ cache_enter(dvp, newvp, cnp); *vpp = newvp; } + mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; + mtx_unlock(&(VTONFS(dvp))->n_mtx); return (error); } @@ -1246,7 +1328,6 @@ static int nfs_mknod(struct vop_mknod_args *ap) { - return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); } @@ -1359,9 +1440,11 @@ cache_enter(dvp, newvp, cnp); *ap->a_vpp = newvp; } + mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; + mtx_unlock(&(VTONFS(dvp))->n_mtx); return (error); } @@ -1434,7 +1517,6 @@ int nfs_removeit(struct sillyrename *sp) { - /* * Make sure that the directory vnode is still valid. * XXX we should lock sp->s_dvp here. @@ -1469,9 +1551,11 @@ nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: + mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; + mtx_unlock(&(VTONFS(dvp))->n_mtx); return (error); } @@ -1502,7 +1586,7 @@ } if (fvp == tvp) { - printf("nfs_rename: fvp == tvp (can't happen)\n"); + nfs_printf("nfs_rename: fvp == tvp (can't happen)\n"); error = 0; goto out; } @@ -1609,8 +1693,12 @@ } m_freem(mrep); nfsmout: + mtx_lock(&(VTONFS(fdvp))->n_mtx); VTONFS(fdvp)->n_flag |= NMODIFIED; + mtx_unlock(&(VTONFS(fdvp))->n_mtx); + mtx_lock(&(VTONFS(tdvp))->n_mtx); VTONFS(tdvp)->n_flag |= NMODIFIED; + mtx_unlock(&(VTONFS(tdvp))->n_mtx); if (!fwccflag) VTONFS(fdvp)->n_attrstamp = 0; if (!twccflag) @@ -1659,7 +1747,9 @@ } m_freem(mrep); nfsmout: + mtx_lock(&(VTONFS(tdvp))->n_mtx); VTONFS(tdvp)->n_flag |= NMODIFIED; + mtx_unlock(&(VTONFS(tdvp))->n_mtx); if (!attrflag) VTONFS(vp)->n_attrstamp = 0; if (!wccflag) @@ -1758,7 +1848,9 @@ } else { *ap->a_vpp = newvp; } + mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; + mtx_unlock(&(VTONFS(dvp))->n_mtx); if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; return (error); @@ -1813,7 +1905,9 @@ nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: + mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; + mtx_unlock(&(VTONFS(dvp))->n_mtx); if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; /* @@ -1869,7 +1963,9 @@ nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: + mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; + mtx_unlock(&(VTONFS(dvp))->n_mtx); if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; cache_purge(dvp); @@ -1891,20 +1987,25 @@ struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct uio *uio = ap->a_uio; - int tresid, error; + int tresid, error = 0; struct vattr vattr; + + if (vp->v_type != VDIR) + return(EPERM); - if (vp->v_type != VDIR) - return (EPERM); /* * First, check for hit on the EOF offset cache */ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && (np->n_flag & NMODIFIED) == 0) { - if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_td) == 0 && - !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { - nfsstats.direofcache_hits++; - return (0); + if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_td) == 0) { + mtx_lock(&np->n_mtx); + if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { + mtx_unlock(&np->n_mtx); + nfsstats.direofcache_hits++; + goto out; + } else + mtx_unlock(&np->n_mtx); } } @@ -1914,8 +2015,10 @@ tresid = uio->uio_resid; error = nfs_bioread(vp, uio, 0, ap->a_cred); - if (!error && uio->uio_resid == tresid) + if (!error && uio->uio_resid == tresid) { nfsstats.direofcache_misses++; + } +out: return (error); } @@ -1950,11 +2053,16 @@ /* * If there is no cookie, assume directory was stale. */ + nfs_dircookie_lock(dnp); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); - if (cookiep) + if (cookiep) { cookie = *cookiep; - else + nfs_dircookie_unlock(dnp); + } else { + nfs_dircookie_unlock(dnp); return (NFSERR_BAD_COOKIE); + } + /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. @@ -1971,8 +2079,10 @@ tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; *tl++ = cookie.nfsuquad[1]; + mtx_lock(&dnp->n_mtx); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; + mtx_unlock(&dnp->n_mtx); } else { tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; @@ -1984,8 +2094,10 @@ if (!error) { tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); + mtx_lock(&dnp->n_mtx); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl; + mtx_unlock(&dnp->n_mtx); } else { m_freem(mrep); goto nfsmout; @@ -2100,9 +2212,11 @@ dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) - printf("EEK! readdirrpc resid > 0\n"); + nfs_printf("EEK! readdirrpc resid > 0\n"); + nfs_dircookie_lock(dnp); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; + nfs_dircookie_unlock(dnp); } nfsmout: return (error); @@ -2146,11 +2260,15 @@ /* * If there is no cookie, assume directory was stale. */ + nfs_dircookie_lock(dnp); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); - if (cookiep) + if (cookiep) { cookie = *cookiep; - else + nfs_dircookie_unlock(dnp); + } else { + nfs_dircookie_unlock(dnp); return (NFSERR_BAD_COOKIE); + } /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. @@ -2166,8 +2284,10 @@ tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; *tl++ = cookie.nfsuquad[1]; + mtx_lock(&dnp->n_mtx); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; + mtx_unlock(&dnp->n_mtx); *tl++ = txdr_unsigned(nmp->nm_readdirsize); *tl = txdr_unsigned(nmp->nm_rsize); nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred); @@ -2177,8 +2297,10 @@ goto nfsmout; } tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); + mtx_lock(&dnp->n_mtx); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl++; + mtx_unlock(&dnp->n_mtx); more_dirs = fxdr_unsigned(int, *tl); /* loop thru the dir entries, doctoring them to 4bsd form */ @@ -2313,9 +2435,9 @@ tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i) { - tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); - fhsize = fxdr_unsigned(int, *tl); - nfsm_adv(nfsm_rndup(fhsize)); + tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); + fhsize = fxdr_unsigned(int, *tl); + nfsm_adv(nfsm_rndup(fhsize)); } } if (newvp != NULLVP) { @@ -2359,9 +2481,11 @@ dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) - printf("EEK! readdirplusrpc resid > 0\n"); + nfs_printf("EEK! readdirplusrpc resid > 0\n"); + nfs_dircookie_lock(dnp); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; + nfs_dircookie_unlock(dnp); } nfsmout: if (newvp != NULLVP) { @@ -2521,7 +2645,7 @@ */ int nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, - struct thread *td) + struct thread *td) { u_int32_t *tl; struct nfsmount *nmp = VFSTONFS(vp->v_mount); @@ -2529,8 +2653,12 @@ int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb; - if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) + mtx_lock(&nmp->nm_mtx); + if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { + mtx_unlock(&nmp->nm_mtx); return (0); + } + mtx_unlock(&nmp->nm_mtx); nfsstats.rpccnt[NFSPROC_COMMIT]++; mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1)); mb = mreq; @@ -2600,7 +2728,6 @@ static int nfs_fsync(struct vop_fsync_args *ap) { - return (nfs_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1)); } @@ -2821,8 +2948,10 @@ LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, VI_MTX(vp), "nfsfsync", slpflag, slptimeo); splx(s); - if (error == 0) - panic("nfs_fsync: inconsistent lock"); + if (error == 0) { + BUF_UNLOCK(bp); + goto loop; + } if (error == ENOLCK) goto loop; if (nfs_sigintr(nmp, NULL, td)) { @@ -2880,23 +3009,28 @@ VI_UNLOCK(vp); goto loop; } - /* - * Wait for all the async IO requests to drain + /* + * Wait for all the async IO requests to drain */ + VI_UNLOCK(vp); + mtx_lock(&np->n_mtx); while (np->n_directio_asyncwr > 0) { np->n_flag |= NFSYNCWAIT; - error = nfs_tsleep(td, (caddr_t)&np->n_directio_asyncwr, - slpflag | (PRIBIO + 1), "nfsfsync", 0); + error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr, + &np->n_mtx, slpflag | (PRIBIO + 1), + "nfsfsync", 0); if (error) { if (nfs_sigintr(nmp, (struct nfsreq *)0, td)) { - error = EINTR; + mtx_unlock(&np->n_mtx); + error = EINTR; goto done; } } } - - } - VI_UNLOCK(vp); + mtx_unlock(&np->n_mtx); + } else + VI_UNLOCK(vp); + mtx_lock(&np->n_mtx); if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; @@ -2904,6 +3038,7 @@ if (commit && vp->v_bufobj.bo_dirty.bv_cnt == 0 && vp->v_bufobj.bo_numoutput == 0 && np->n_directio_asyncwr == 0) np->n_flag &= ~NMODIFIED; + mtx_unlock(&np->n_mtx); done: if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); @@ -2916,13 +3051,19 @@ static int nfs_advlock(struct vop_advlock_args *ap) { - + int error; + + mtx_lock(&Giant); if ((VFSTONFS(ap->a_vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { struct nfsnode *np = VTONFS(ap->a_vp); - return (lf_advlock(ap, &(np->n_lockf), np->n_size)); + error = lf_advlock(ap, &(np->n_lockf), np->n_size); + goto out; } - return (nfs_dolock(ap)); + error = nfs_dolock(ap); +out: + mtx_unlock(&Giant); + return (error); } /* @@ -2934,7 +3075,7 @@ struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); - printf("\tfileid %ld fsid 0x%x", + nfs_printf("\tfileid %ld fsid 0x%x", np->n_vattr.va_fileid, np->n_vattr.va_fsid); if (vp->v_type == VFIFO) fifo_printinfo(vp); @@ -2998,7 +3139,6 @@ reassignbuf(bp); splx(s); } - brelse(bp); return (rtval); } @@ -3039,9 +3179,11 @@ vap = &vattr; error = VOP_GETATTR(vp, vap, cred, ap->a_td); if (error) - return (error); - return (vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, - mode, cred, NULL)); + goto out; + error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, + mode, cred, NULL); +out: + return error; } /* @@ -3051,13 +3193,17 @@ nfsfifo_read(struct vop_read_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); + int error; /* * Set access flag. */ + mtx_lock(&np->n_mtx); np->n_flag |= NACC; getnanotime(&np->n_atim); - return (fifo_specops.vop_read(ap)); + mtx_unlock(&np->n_mtx); + error = fifo_specops.vop_read(ap); + return error; } /* @@ -3071,9 +3217,11 @@ /* * Set update flag. */ + mtx_lock(&np->n_mtx); np->n_flag |= NUPD; getnanotime(&np->n_mtim); - return (fifo_specops.vop_write(ap)); + mtx_unlock(&np->n_mtx); + return(fifo_specops.vop_write(ap)); } /* @@ -3089,6 +3237,7 @@ struct vattr vattr; struct timespec ts; + mtx_lock(&np->n_mtx); if (np->n_flag & (NACC | NUPD)) { getnanotime(&ts); if (np->n_flag & NACC) @@ -3103,9 +3252,13 @@ vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; + mtx_unlock(&np->n_mtx); (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_td); + goto out; } } + mtx_unlock(&np->n_mtx); +out: return (fifo_specops.vop_close(ap)); } Index: sys/nfsclient/nfsmount.h =========================================================================== --- sys/nfsclient/nfsmount.h 2006/05/15 19:00:49 #1 +++ sys/nfsclient/nfsmount.h 2006/05/15 19:00:49 @@ -41,7 +41,6 @@ #define NFS_TCP_EXPECT_RPCMARKER 0x0001 /* Expect to see a RPC/TCP marker next */ #define NFS_TCP_FORCE_RECONNECT 0x0002 /* Force a TCP reconnect */ int flags; - struct mtx mtx; }; /* @@ -50,6 +49,7 @@ * Holds NFS specific information for mount. */ struct nfsmount { + struct mtx nm_mtx; int nm_flag; /* Flags for soft/hard... */ int nm_state; /* Internal state flags */ struct mount *nm_mountp; /* Vfs structure for this filesystem */ Index: sys/nfsclient/nfsnode.h =========================================================================== --- sys/nfsclient/nfsnode.h 2006/05/15 19:00:49 #2 +++ sys/nfsclient/nfsnode.h 2006/05/15 19:00:49 @@ -30,7 +30,7 @@ * SUCH DAMAGE. * * @(#)nfsnode.h 8.9 (Berkeley) 5/14/95 - * $FreeBSD: src/sys/nfsclient/nfsnode.h,v 1.57 2006/04/06 01:20:30 mohans Exp $ + * $FreeBSD: src/sys/nfsclient/nfsnode.h,v 1.56 2005/07/21 22:46:56 ps Exp $ */ #ifndef _NFSCLIENT_NFSNODE_H_ @@ -88,6 +88,7 @@ * be well aligned and, therefore, tightly packed. */ struct nfsnode { + struct mtx n_mtx; /* Protects all of these members */ u_quad_t n_size; /* Current size of file */ u_quad_t n_brev; /* Modify rev when cached */ u_quad_t n_lrev; /* Modify rev for lease */ @@ -124,9 +125,8 @@ struct nfs4_fctx n_wfc; u_char *n_name; /* leaf name, for v4 OPEN op */ uint32_t n_namelen; - daddr_t ra_expect_lbn; int n_directio_opens; - int n_directio_asyncwr; + int n_directio_asyncwr; }; #define n_atim n_un1.nf_atim @@ -140,6 +140,8 @@ /* * Flags for n_flag */ +#define NFSYNCWAIT 0x0002 /* fsync waiting for all directio async writes + to drain */ #define NMODIFIED 0x0004 /* Might have a modified buffer in bio */ #define NWRITEERR 0x0008 /* Flag write errors so close will know */ /* 0x20, 0x40, 0x80 free */ @@ -150,8 +152,7 @@ #define NTRUNCATE 0x1000 /* Opened by nfs_setattr() */ #define NSIZECHANGED 0x2000 /* File size has changed: need cache inval */ #define NNONCACHE 0x4000 /* Node marked as noncacheable */ -#define NFSYNCWAIT 0x8000 /* fsync waiting for all directio async writes - to drain */ +#define NDIRCOOKIELK 0x8000 /* Lock to serialize access to directory cookies */ /* * Convert between nfsnode pointers and vnode pointers @@ -193,6 +194,12 @@ uint64_t *nfs4_getcookie(struct nfsnode *, off_t, int); void nfs_invaldir(struct vnode *); void nfs4_invaldir(struct vnode *); +int nfs_upgrade_vnlock(struct vnode *vp, struct thread *td); +void nfs_downgrade_vnlock(struct vnode *vp, struct thread *td, int old_lock); +void nfs_printf(const char *fmt, ...); + +void nfs_dircookie_lock(struct nfsnode *np); +void nfs_dircookie_unlock(struct nfsnode *np); #endif /* _KERNEL */