diff -r f47e9457a7b8 src/sys/fs/fdescfs/fdesc.h --- a/src/sys/fs/fdescfs/fdesc.h Sun Apr 27 22:16:23 2008 +0200 +++ b/src/sys/fs/fdescfs/fdesc.h Fri May 23 18:16:34 2008 +0200 @@ -35,8 +35,11 @@ */ #ifdef _KERNEL +/* Private mount flags for fdescfs. */ +#define FMNT_UNMOUNTF 0x01 struct fdescmount { struct vnode *f_root; /* Root node */ + int flags; }; #define FD_ROOT 1 @@ -55,10 +58,12 @@ int fd_ix; /* filesystem index */ }; +extern struct mtx fdesc_hashmtx; #define VFSTOFDESC(mp) ((struct fdescmount *)((mp)->mnt_data)) #define VTOFDESC(vp) ((struct fdescnode *)(vp)->v_data) extern vfs_init_t fdesc_init; -extern int fdesc_allocvp(fdntype, int, struct mount *, struct vnode **, - struct thread *); +extern vfs_uninit_t fdesc_uninit; +extern int fdesc_allocvp(fdntype, unsigned, int, struct mount *, + struct vnode **, struct thread *); #endif /* _KERNEL */ diff -r f47e9457a7b8 src/sys/fs/fdescfs/fdesc_vfsops.c --- a/src/sys/fs/fdescfs/fdesc_vfsops.c Sun Apr 27 22:16:23 2008 +0200 +++ b/src/sys/fs/fdescfs/fdesc_vfsops.c Fri May 23 18:16:34 2008 +0200 @@ -85,18 +85,30 @@ if (mp->mnt_flag & (MNT_UPDATE | MNT_ROOTFS)) return (EOPNOTSUPP); - error = fdesc_allocvp(Froot, FD_ROOT, mp, &rvp, td); - if (error) - return (error); - MALLOC(fmp, struct fdescmount *, sizeof(struct fdescmount), M_FDESCMNT, M_WAITOK); /* XXX */ + + /* + * We need to initialize a few bits of our local mount point struct to + * avoid confusion in allocvp. + */ + mp->mnt_data = (qaddr_t) fmp; + fmp->flags = 0; + error = fdesc_allocvp(Froot, -1, FD_ROOT, mp, &rvp, td); + if (error) { + free(fmp, M_FDESCMNT); + mp->mnt_data = 0; + return (error); + } rvp->v_type = VDIR; rvp->v_vflag |= VV_ROOT; fmp->f_root = rvp; + VOP_UNLOCK(rvp, 0); /* XXX -- don't mark as local to work around fts() problems */ /*mp->mnt_flag |= MNT_LOCAL;*/ - mp->mnt_data = fmp; + MNT_ILOCK(mp); + mp->mnt_kern_flag |= MNTK_MPSAFE; + MNT_IUNLOCK(mp); vfs_getnewfsid(mp); vfs_mountedfrom(mp, "fdescfs"); @@ -109,11 +121,19 @@ int mntflags; struct thread *td; { + struct fdescmount *fmp; + caddr_t data; int error; int flags = 0; - if (mntflags & MNT_FORCE) + fmp = (struct fdescmount *)mp->mnt_data; + if (mntflags & MNT_FORCE) { + /* The hash mutex protects the private mount flags. */ + mtx_lock(&fdesc_hashmtx); + fmp->flags |= FMNT_UNMOUNTF; + mtx_unlock(&fdesc_hashmtx); flags |= FORCECLOSE; + } /* * Clear out buffer cache. I don't think we @@ -127,10 +147,14 @@ return (error); /* - * Finally, throw away the fdescmount structure + * Finally, throw away the fdescmount structure. Hold the hashmtx to + * protect the fdescmount structure. */ - free(mp->mnt_data, M_FDESCMNT); /* XXX */ + mtx_lock(&fdesc_hashmtx); + data = mp->mnt_data; mp->mnt_data = 0; + mtx_unlock(&fdesc_hashmtx); + free(data, M_FDESCMNT); /* XXX */ return (0); } @@ -148,8 +172,7 @@ * Return locked reference to root. */ vp = VFSTOFDESC(mp)->f_root; - VREF(vp); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + vget(vp, LK_EXCLUSIVE | LK_RETRY, td); *vpp = vp; return (0); } @@ -208,6 +231,7 @@ .vfs_mount = fdesc_mount, .vfs_root = fdesc_root, .vfs_statfs = fdesc_statfs, + .vfs_uninit = fdesc_uninit, .vfs_unmount = fdesc_unmount, }; diff -r f47e9457a7b8 src/sys/fs/fdescfs/fdesc_vnops.c --- a/src/sys/fs/fdescfs/fdesc_vnops.c Sun Apr 27 22:16:23 2008 +0200 +++ b/src/sys/fs/fdescfs/fdesc_vnops.c Fri May 23 18:16:34 2008 +0200 @@ -56,18 +56,15 @@ #include -#define FDL_WANT 0x01 -#define FDL_LOCKED 0x02 -static int fdcache_lock; - #define NFDCACHE 4 #define FD_NHASH(ix) \ (&fdhashtbl[(ix) & fdhash]) static LIST_HEAD(fdhashhead, fdescnode) *fdhashtbl; static u_long fdhash; +struct mtx fdesc_hashmtx; + static vop_getattr_t fdesc_getattr; -static vop_inactive_t fdesc_inactive; static vop_lookup_t fdesc_lookup; static vop_open_t fdesc_open; static vop_readdir_t fdesc_readdir; @@ -79,7 +76,6 @@ .vop_access = VOP_NULL, .vop_getattr = fdesc_getattr, - .vop_inactive = fdesc_inactive, .vop_lookup = fdesc_lookup, .vop_open = fdesc_open, .vop_pathconf = vop_stdpathconf, @@ -87,6 +83,9 @@ .vop_reclaim = fdesc_reclaim, .vop_setattr = fdesc_setattr, }; + +static void fdesc_insmntque_dtr(struct vnode *, void *); +static void fdesc_remove_entry(struct fdescnode *); /* * Initialise cache headers @@ -96,81 +95,154 @@ struct vfsconf *vfsp; { + mtx_init(&fdesc_hashmtx, "fdescfs_hash", NULL, MTX_DEF); fdhashtbl = hashinit(NFDCACHE, M_CACHE, &fdhash); return (0); } +/* + * Uninit ready for unload. + */ int -fdesc_allocvp(ftype, ix, mp, vpp, td) +fdesc_uninit(vfsp) + struct vfsconf *vfsp; +{ + + hashdestroy(fdhashtbl, M_CACHE, fdhash); + mtx_destroy(&fdesc_hashmtx); + return (0); +} + +/* + * If allocating vnode fails, call this. + */ +static void +fdesc_insmntque_dtr(struct vnode *vp, void *arg) +{ + + vgone(vp); + vput(vp); +} + +/* + * Remove an entry from the hash if it exists. + */ +static void +fdesc_remove_entry(struct fdescnode *fd) +{ + struct fdhashhead *fc; + struct fdescnode *fd2; + + fc = FD_NHASH(fd->fd_ix); + mtx_lock(&fdesc_hashmtx); + LIST_FOREACH(fd2, fc, fd_hash) { + if (fd == fd2) { + LIST_REMOVE(fd, fd_hash); + break; + } + } + mtx_unlock(&fdesc_hashmtx); +} + +int +fdesc_allocvp(ftype, fd_fd, ix, mp, vpp, td) fdntype ftype; + unsigned fd_fd; int ix; struct mount *mp; struct vnode **vpp; struct thread *td; { + struct fdescmount *fmp; struct fdhashhead *fc; - struct fdescnode *fd; + struct fdescnode *fd, *fd2; + struct vnode *vp, *vp2; int error = 0; fc = FD_NHASH(ix); loop: + mtx_lock(&fdesc_hashmtx); + /* + * If a forced unmount is progressing, we need to stop. The flags are + * protected by the hashmtx. + */ + fmp = (struct fdescmount *)mp->mnt_data; + if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) { + mtx_unlock(&fdesc_hashmtx); + return (-1); + } + LIST_FOREACH(fd, fc, fd_hash) { if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) { - if (vget(fd->fd_vnode, LK_EXCLUSIVE | LK_CANRECURSE, - td)) + /* Get reference to vnode in case it's being free'd */ + vp = fd->fd_vnode; + VI_LOCK(vp); + mtx_unlock(&fdesc_hashmtx); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) goto loop; - *vpp = fd->fd_vnode; - VOP_UNLOCK(*vpp, 0); + *vpp = vp; + return (0); + } + } + mtx_unlock(&fdesc_hashmtx); + + MALLOC(fd, struct fdescnode *, sizeof(struct fdescnode), M_TEMP, M_WAITOK); + + error = getnewvnode("fdescfs", mp, &fdesc_vnodeops, &vp); + if (error) { + FREE(fd, M_TEMP); + return (error); + } + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + vp->v_data = fd; + fd->fd_vnode = vp; + fd->fd_type = ftype; + fd->fd_fd = fd_fd; + fd->fd_ix = ix; + error = insmntque1(vp, mp, fdesc_insmntque_dtr, NULL); + if (error != 0) { + *vpp = NULLVP; + return (error); + } + + /* Make sure that someone didn't beat us when inserting the vnode. */ + mtx_lock(&fdesc_hashmtx); + /* + * If a forced unmount is progressing, we need to drop it. The flags are + * protected by the hashmtx. + */ + fmp = (struct fdescmount *)mp->mnt_data; + if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) { + mtx_unlock(&fdesc_hashmtx); + vgone(vp); + vput(vp); + *vpp = NULLVP; + return (-1); + } + + LIST_FOREACH(fd2, fc, fd_hash) { + if (fd2->fd_ix == ix && fd2->fd_vnode->v_mount == mp) { + /* Get reference to vnode in case it's being free'd */ + vp2 = fd2->fd_vnode; + VI_LOCK(vp2); + mtx_unlock(&fdesc_hashmtx); + error = vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK, td); + /* Someone beat us, dec use count and wait for reclaim */ + vgone(vp); + vput(vp); + /* If we didn't get it, return no vnode. */ + if (error) + vp2 = NULLVP; + *vpp = vp2; return (error); } } - /* - * otherwise lock the array while we call getnewvnode - * since that can block. - */ - if (fdcache_lock & FDL_LOCKED) { - fdcache_lock |= FDL_WANT; - (void) tsleep( &fdcache_lock, PINOD, "fdalvp", 0); - goto loop; - } - fdcache_lock |= FDL_LOCKED; - - /* - * Do the MALLOC before the getnewvnode since doing so afterward - * might cause a bogus v_data pointer to get dereferenced - * elsewhere if MALLOC should block. - */ - MALLOC(fd, struct fdescnode *, sizeof(struct fdescnode), M_TEMP, M_WAITOK); - - error = getnewvnode("fdescfs", mp, &fdesc_vnodeops, vpp); - if (error) { - FREE(fd, M_TEMP); - goto out; - } - (*vpp)->v_data = fd; - fd->fd_vnode = *vpp; - fd->fd_type = ftype; - fd->fd_fd = -1; - fd->fd_ix = ix; - /* XXX: vnode should be locked here */ - error = insmntque(*vpp, mp); /* XXX: Too early for mpsafe fs */ - if (error != 0) { - free(fd, M_TEMP); - *vpp = NULLVP; - goto out; - } + /* If we came here, we can insert it safely. */ LIST_INSERT_HEAD(fc, fd, fd_hash); - -out: - fdcache_lock &= ~FDL_LOCKED; - - if (fdcache_lock & FDL_WANT) { - fdcache_lock &= ~FDL_WANT; - wakeup( &fdcache_lock); - } - - return (error); + mtx_unlock(&fdesc_hashmtx); + *vpp = vp; + return (0); } /* @@ -230,13 +302,43 @@ if ((error = fget(td, fd, &fp)) != 0) goto bad; - error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp, td); - fdrop(fp, td); + /* Check if we're looking up ourselves. */ + if (VTOFDESC(dvp)->fd_ix == FD_DESC + fd) { + /* + * In case we're holding the last reference to the file, the dvp + * will be re-acquired. + */ + vhold(dvp); + VOP_UNLOCK(dvp, 0); + fdrop(fp, td); + + /* Re-aquire the lock afterwards. */ + vn_lock(dvp, LK_RETRY | LK_EXCLUSIVE); + vdrop(dvp); + fvp = dvp; + } else { + /* + * Unlock our root node (dvp) when doing this, since we might + * deadlock since the vnode might be locked by another thread + * and the root vnode lock will be obtained afterwards (in case + * we're looking up the fd of the root vnode), which will be the + * opposite lock order. Vhold the root vnode first so we don't + * loose it. + */ + vhold(dvp); + VOP_UNLOCK(dvp, 0); + error = fdesc_allocvp(Fdesc, fd, FD_DESC + fd, dvp->v_mount, + &fvp, td); + fdrop(fp, td); + /* + * The root vnode must be locked last to prevent deadlock condition. + */ + vn_lock(dvp, LK_RETRY | LK_EXCLUSIVE); + vdrop(dvp); + } + if (error) goto bad; - VTOFDESC(fvp)->fd_fd = fd; - if (fvp != dvp) - vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); *vpp = fvp; return (0); @@ -505,34 +607,18 @@ } static int -fdesc_inactive(ap) - struct vop_inactive_args /* { - struct vnode *a_vp; - struct thread *a_td; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - - /* - * Clear out the v_type field to avoid - * nasty things happening in vgone(). - */ - vp->v_type = VNON; - return (0); -} - -static int fdesc_reclaim(ap) struct vop_reclaim_args /* { struct vnode *a_vp; } */ *ap; { - struct vnode *vp = ap->a_vp; - struct fdescnode *fd = VTOFDESC(vp); + struct vnode *vp; + struct fdescnode *fd; - LIST_REMOVE(fd, fd_hash); + vp = ap->a_vp; + fd = VTOFDESC(vp); + fdesc_remove_entry(fd); FREE(vp->v_data, M_TEMP); - vp->v_data = 0; - + vp->v_data = NULL; return (0); }