Index: sys/conf.h =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/sys/conf.h,v retrieving revision 1.222.2.3 diff -u -r1.222.2.3 conf.h --- sys/conf.h 26 Sep 2005 14:36:54 -0000 1.222.2.3 +++ sys/conf.h 20 Oct 2006 08:45:28 -0000 @@ -105,6 +105,7 @@ struct uio; struct knote; struct clonedevs; +struct vnode; /* * Note: d_thread_t is provided as a transition aid for those drivers @@ -245,6 +246,7 @@ int count_dev(struct cdev *_dev); void destroy_dev(struct cdev *_dev); struct cdevsw *dev_refthread(struct cdev *_dev); +struct cdevsw *devvn_refthread(struct vnode *vp, struct cdev **devp); void dev_relthread(struct cdev *_dev); void dev_depends(struct cdev *_pdev, struct cdev *_cdev); void dev_ref(struct cdev *dev); Index: kern/kern_conf.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/kern/kern_conf.c,v retrieving revision 1.186.2.6 diff -u -r1.186.2.6 kern_conf.c --- kern/kern_conf.c 20 Jun 2006 19:56:26 -0000 1.186.2.6 +++ kern/kern_conf.c 20 Oct 2006 08:45:28 -0000 @@ -125,6 +125,24 @@ return (csw); } +struct cdevsw * +devvn_refthread(struct vnode *vp, struct cdev **devp) +{ + struct cdevsw *csw; + + mtx_assert(&devmtx, MA_NOTOWNED); + csw = NULL; + dev_lock(); + *devp = vp->v_rdev; + if (*devp != NULL) { + csw = (*devp)->si_devsw; + if (csw != NULL) + (*devp)->si_threadcount++; + } + dev_unlock(); + return (csw); +} + void dev_relthread(struct cdev *dev) { Index: fs/devfs/devfs.h =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/fs/devfs/devfs.h,v retrieving revision 1.22.2.2 diff -u -r1.22.2.2 devfs.h --- fs/devfs/devfs.h 26 Sep 2005 14:36:52 -0000 1.22.2.2 +++ fs/devfs/devfs.h 20 Oct 2006 08:45:28 -0000 @@ -129,6 +129,8 @@ #define DE_WHITEOUT 0x1 #define DE_DOT 0x2 #define DE_DOTDOT 0x4 +#define DE_DOOMED 0x8 + int de_holdcnt; struct dirent *de_dirent; TAILQ_ENTRY(devfs_dirent) de_list; TAILQ_HEAD(, devfs_dirent) de_dlist; @@ -150,6 +152,7 @@ struct mount *dm_mount; struct devfs_dirent *dm_rootdir; unsigned dm_generation; + int dm_holdcnt; struct sx dm_lock; devfs_rsnum dm_ruleset; }; @@ -160,15 +163,21 @@ #define VFSTODEVFS(mp) ((struct devfs_mount *)((mp)->mnt_data)) +#define DEVFS_DE_HOLD(de) ((de)->de_holdcnt++) +#define DEVFS_DE_DROP(de) (--(de)->de_holdcnt == 0) + +#define DEVFS_DMP_HOLD(dmp) ((dmp)->dm_holdcnt++) +#define DEVFS_DMP_DROP(dmp) (--(dmp)->dm_holdcnt == 0) + void devfs_rules_apply(struct devfs_mount *dm, struct devfs_dirent *de); void devfs_rules_cleanup (struct devfs_mount *dm); int devfs_rules_ioctl(struct devfs_mount *dm, u_long cmd, caddr_t data, struct thread *td); int devfs_allocv (struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td); -struct cdev **devfs_itod (int inode); -struct devfs_dirent **devfs_itode (struct devfs_mount *dm, int inode); void devfs_delete(struct devfs_mount *dm, struct devfs_dirent *de); +void devfs_dirent_free(struct devfs_dirent *de); void devfs_populate (struct devfs_mount *dm); void devfs_cleanup (struct devfs_mount *dm); +void devfs_unmount_final(struct devfs_mount *mp); struct devfs_dirent *devfs_newdirent (char *name, int namelen); struct devfs_dirent *devfs_vmkdir (struct devfs_mount *, char *name, int namelen, struct devfs_dirent *dotdot, u_int inode); struct devfs_dirent *devfs_find (struct devfs_dirent *dd, const char *name, int namelen); Index: fs/devfs/devfs_devs.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/fs/devfs/devfs_devs.c,v retrieving revision 1.36.2.4 diff -u -r1.36.2.4 devfs_devs.c --- fs/devfs/devfs_devs.c 13 Mar 2006 03:05:06 -0000 1.36.2.4 +++ fs/devfs/devfs_devs.c 20 Oct 2006 08:45:28 -0000 @@ -180,6 +180,7 @@ vfs_timestamp(&de->de_ctime); de->de_mtime = de->de_atime = de->de_ctime; de->de_links = 1; + de->de_holdcnt = 1; #ifdef MAC mac_init_devfsdirent(de); #endif @@ -231,9 +232,18 @@ } void +devfs_dirent_free(struct devfs_dirent *de) +{ + free(de, M_DEVFS3); +} + +void devfs_delete(struct devfs_mount *dm, struct devfs_dirent *de) { + KASSERT((de->de_flags & DE_DOOMED) == 0, + ("devfs_delete doomed dirent")); + de->de_flags |= DE_DOOMED; if (de->de_symlink) { free(de->de_symlink, M_DEVFS); de->de_symlink = NULL; @@ -252,7 +262,8 @@ free_unr(devfs_inos, de->de_inode); de->de_inode = 0; } - free(de, M_DEVFS3); + if (DEVFS_DE_DROP(de)) + devfs_dirent_free(de); } /* Index: fs/devfs/devfs_vfsops.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/fs/devfs/devfs_vfsops.c,v retrieving revision 1.44.2.4 diff -u -r1.44.2.4 devfs_vfsops.c --- fs/devfs/devfs_vfsops.c 9 Oct 2006 19:47:13 -0000 1.44.2.4 +++ fs/devfs/devfs_vfsops.c 20 Oct 2006 08:45:28 -0000 @@ -81,6 +81,7 @@ fmp = malloc(sizeof *fmp, M_DEVFS, M_WAITOK | M_ZERO); fmp->dm_idx = alloc_unr(devfs_unr); sx_init(&fmp->dm_lock, "devfsmount"); + fmp->dm_holdcnt = 1; MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; @@ -110,14 +111,25 @@ return (0); } +void +devfs_unmount_final(struct devfs_mount *fmp) +{ + sx_destroy(&fmp->dm_lock); + free(fmp, M_DEVFS); +} + static int devfs_unmount(struct mount *mp, int mntflags, struct thread *td) { int error; int flags = 0; struct devfs_mount *fmp; + int hold; + u_int idx; fmp = VFSTODEVFS(mp); + KASSERT(fmp->dm_mount != NULL, + ("devfs_unmount unmounted devfs_mount")); /* There is 1 extra root vnode reference from devfs_mount(). */ error = vflush(mp, 1, flags, td); if (error) @@ -125,11 +137,14 @@ sx_xlock(&fmp->dm_lock); devfs_cleanup(fmp); devfs_rules_cleanup(fmp); - sx_xunlock(&fmp->dm_lock); + fmp->dm_mount = NULL; + hold = --fmp->dm_holdcnt; mp->mnt_data = NULL; - sx_destroy(&fmp->dm_lock); - free_unr(devfs_unr, fmp->dm_idx); - free(fmp, M_DEVFS); + idx = fmp->dm_idx; + sx_xunlock(&fmp->dm_lock); + free_unr(devfs_unr, idx); + if (hold == 0) + devfs_unmount_final(fmp); return 0; } @@ -143,6 +158,7 @@ struct devfs_mount *dmp; dmp = VFSTODEVFS(mp); + sx_xlock(&dmp->dm_lock); error = devfs_allocv(dmp->dm_rootdir, mp, &vp, td); if (error) return (error); Index: fs/devfs/devfs_vnops.c =================================================================== RCS file: /usr/local/arch/ncvs/src/sys/fs/devfs/devfs_vnops.c,v retrieving revision 1.114.2.7 diff -u -r1.114.2.7 devfs_vnops.c --- fs/devfs/devfs_vnops.c 12 Nov 2005 21:21:27 -0000 1.114.2.7 +++ fs/devfs/devfs_vnops.c 20 Oct 2006 08:45:28 -0000 @@ -72,16 +72,21 @@ #include #include +static struct mtx devfs_de_interlock; +MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF); + static int devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp) { - *devp = fp->f_vnode->v_rdev; - if (*devp != fp->f_data) + *dswp = devvn_refthread(fp->f_vnode, devp); + if (*devp != fp->f_data) { + if (*dswp != NULL) + dev_relthread(*devp); return (ENXIO); + } KASSERT((*devp)->si_refcount > 0, ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp))); - *dswp = dev_refthread(*devp); if (*dswp == NULL) return (ENXIO); return (0); @@ -123,31 +128,84 @@ return (buf + i); } +static int +devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp, + struct devfs_dirent *de) +{ + int not_found; + + not_found = 0; + if (de->de_flags & DE_DOOMED) + not_found = 1; + if (DEVFS_DE_DROP(de)) { + KASSERT(not_found == 1, ("DEVFS de dropped but not doomed")); + devfs_dirent_free(de); + } + if (DEVFS_DMP_DROP(dmp)) { + KASSERT(not_found == 1, + ("DEVFS mount struct freed before dirent")); + not_found = 2; + sx_xunlock(&dmp->dm_lock); + devfs_unmount_final(dmp); + } + if (not_found == 1 || (drop_dm_lock && not_found != 2)) + sx_unlock(&dmp->dm_lock); + return (not_found); +} + +/* + * devfs_allocv shall be entered with dmp->dm_lock held, and it drops + * it on return. + */ int devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td) { int error; struct vnode *vp; struct cdev *dev; + struct devfs_mount *dmp; KASSERT(td == curthread, ("devfs_allocv: td != curthread")); -loop: + dmp = VFSTODEVFS(mp); + if (de->de_flags & DE_DOOMED) { + sx_xunlock(&dmp->dm_lock); + return (ENOENT); + } + loop: + DEVFS_DE_HOLD(de); + DEVFS_DMP_HOLD(dmp); + mtx_lock(&devfs_de_interlock); vp = de->de_vnode; if (vp != NULL) { - if (vget(vp, LK_EXCLUSIVE, td)) + VI_LOCK(vp); + mtx_unlock(&devfs_de_interlock); + sx_xunlock(&dmp->dm_lock); + error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); + sx_xlock(&dmp->dm_lock); + if (devfs_allocv_drop_refs(0, dmp, de)) { + if (error == 0) + vput(vp); + return (ENOENT); + } + else if (error) goto loop; + sx_xunlock(&dmp->dm_lock); *vpp = vp; return (0); } + mtx_unlock(&devfs_de_interlock); if (de->de_dirent->d_type == DT_CHR) { - if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) + if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) { + devfs_allocv_drop_refs(1, dmp, de); return (ENOENT); + } dev = &de->de_cdp->cdp_c; } else { dev = NULL; } error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp); if (error != 0) { + devfs_allocv_drop_refs(1, dmp, de); printf("devfs_allocv: failed to allocate new vnode\n"); return (error); } @@ -171,12 +229,21 @@ } else { vp->v_type = VBAD; } + mtx_lock(&devfs_de_interlock); vp->v_data = de; de->de_vnode = vp; + mtx_unlock(&devfs_de_interlock); + sx_xunlock(&dmp->dm_lock); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + sx_xlock(&dmp->dm_lock); + if (devfs_allocv_drop_refs(0, dmp, de)) { + vput(vp); + return (ENOENT); + } #ifdef MAC mac_associate_vnode_devfs(mp, de, vp); #endif + sx_xunlock(&dmp->dm_lock); *vpp = vp; return (0); } @@ -453,7 +520,7 @@ } static int -devfs_lookupx(struct vop_lookup_args *ap) +devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock) { struct componentname *cnp; struct vnode *dvp, **vpp; @@ -505,6 +572,7 @@ de = TAILQ_NEXT(de, de_list); /* ".." */ de = de->de_dir; error = devfs_allocv(de, dvp->v_mount, vpp, td); + *dm_unlock = 0; vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); return (error); } @@ -562,6 +630,7 @@ } } error = devfs_allocv(de, dvp->v_mount, vpp, td); + *dm_unlock = 0; return (error); } @@ -570,11 +639,14 @@ { int j; struct devfs_mount *dmp; + int dm_unlock; dmp = VFSTODEVFS(ap->a_dvp->v_mount); + dm_unlock = 1; sx_xlock(&dmp->dm_lock); - j = devfs_lookupx(ap); - sx_xunlock(&dmp->dm_lock); + j = devfs_lookupx(ap, &dm_unlock); + if (dm_unlock == 1) + sx_xunlock(&dmp->dm_lock); return (j); } @@ -596,7 +668,6 @@ return (EOPNOTSUPP); dvp = ap->a_dvp; dmp = VFSTODEVFS(dvp->v_mount); - sx_xlock(&dmp->dm_lock); cnp = ap->a_cnp; vpp = ap->a_vpp; @@ -604,6 +675,7 @@ dd = dvp->v_data; error = ENOENT; + sx_xlock(&dmp->dm_lock); TAILQ_FOREACH(de, &dd->de_dlist, de_list) { if (cnp->cn_namelen != de->de_dirent->d_namlen) continue; @@ -618,6 +690,7 @@ goto notfound; de->de_flags &= ~DE_WHITEOUT; error = devfs_allocv(de, dvp->v_mount, vpp, td); + return (error); notfound: sx_xunlock(&dmp->dm_lock); return (error); @@ -777,9 +850,9 @@ uio->uio_offset = fp->f_offset; error = dsw->d_read(dev, uio, ioflag); - dev_relthread(dev); if (uio->uio_resid != resid || (error == 0 && resid != 0)) vfs_timestamp(&dev->si_atime); + dev_relthread(dev); if ((flags & FOF_OFFSET) == 0) fp->f_offset = uio->uio_offset; @@ -875,20 +948,26 @@ struct vnode *vp = ap->a_vp; struct devfs_dirent *de; struct cdev *dev; - + + mtx_lock(&devfs_de_interlock); de = vp->v_data; - if (de != NULL) + if (de != NULL) { de->de_vnode = NULL; - vp->v_data = NULL; + vp->v_data = NULL; + } + mtx_unlock(&devfs_de_interlock); + vnode_destroy_vobject(vp); + dev_lock(); dev = vp->v_rdev; vp->v_rdev = NULL; - if (dev == NULL) + if (dev == NULL) { + dev_unlock(); return (0); + } - dev_lock(); dev->si_usecount -= vp->v_usecount; dev_unlock(); dev_rel(dev); @@ -921,10 +1000,6 @@ * is orphaned by setting v_op to deadfs so we need to let go of it * as well so that we create a new one next time around. * - * XXX: locking :-( - * XXX: We mess around with other mountpoints without holding their sxlock. - * XXX: We hold the devlock() when we zero their vnode pointer, but is that - * XXX: enough ? */ static int devfs_revoke(struct vop_revoke_args *ap) @@ -940,22 +1015,32 @@ dev = vp->v_rdev; cdp = dev->si_priv; for (;;) { + mtx_lock(&devfs_de_interlock); dev_lock(); vp2 = NULL; for (i = 0; i <= cdp->cdp_maxdirent; i++) { de = cdp->cdp_dirents[i]; if (de == NULL) continue; - vp2 = de->de_vnode; - de->de_vnode = NULL; - if (vp2 != NULL) + + vp2 = de->de_vnode; + if (vp2 != NULL) { + de->de_vnode = NULL; + dev_unlock(); + VI_LOCK(vp2); + mtx_unlock(&devfs_de_interlock); + vholdl(vp2); + VI_UNLOCK(vp2); + vgone(vp2); + vdrop(vp2); break; + } } - dev_unlock(); if (vp2 != NULL) { - vgone(vp2); continue; } + dev_unlock(); + mtx_unlock(&devfs_de_interlock); break; } return (0); @@ -1123,9 +1208,7 @@ mac_create_devfs_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de); #endif TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); - devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td); - sx_xunlock(&dmp->dm_lock); - return (0); + return (devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td)); } /* ARGSUSED */ @@ -1149,11 +1232,11 @@ resid = uio->uio_resid; error = dsw->d_write(dev, uio, ioflag); - dev_relthread(dev); if (uio->uio_resid != resid || (error == 0 && resid != 0)) { vfs_timestamp(&dev->si_ctime); dev->si_mtime = dev->si_ctime; } + dev_relthread(dev); if ((flags & FOF_OFFSET) == 0) fp->f_offset = uio->uio_offset;