diff --git a/sys/fs/devfs/devfs.h b/sys/fs/devfs/devfs.h index f4d961b..63362b7 100644 --- a/sys/fs/devfs/devfs.h +++ b/sys/fs/devfs/devfs.h @@ -130,6 +130,7 @@ struct devfs_dirent { #define DE_DOT 0x2 #define DE_DOTDOT 0x4 #define DE_DOOMED 0x8 +#define DE_FAKE 0x10 int de_holdcnt; struct dirent *de_dirent; TAILQ_ENTRY(devfs_dirent) de_list; @@ -178,8 +179,9 @@ void devfs_dirent_free(struct devfs_dirent *de); void devfs_populate (struct devfs_mount *dm); void devfs_cleanup (struct devfs_mount *dm); void devfs_unmount_final(struct devfs_mount *mp); -struct devfs_dirent *devfs_newdirent (char *name, int namelen); -struct devfs_dirent *devfs_vmkdir (struct devfs_mount *, char *name, int namelen, struct devfs_dirent *dotdot, u_int inode); +struct devfs_dirent *devfs_newdirent (const char *name, int namelen); +struct devfs_dirent *devfs_vmkdir (struct devfs_mount *, const char *name, + int namelen, struct devfs_dirent *dotdot, u_int inode); struct devfs_dirent *devfs_find (struct devfs_dirent *dd, const char *name, int namelen); #endif /* _KERNEL */ diff --git a/sys/fs/devfs/devfs_devs.c b/sys/fs/devfs/devfs_devs.c index 187c08b..6cf9645 100644 --- a/sys/fs/devfs/devfs_devs.c +++ b/sys/fs/devfs/devfs_devs.c @@ -163,7 +163,7 @@ devfs_find(struct devfs_dirent *dd, const char *name, int namelen) } struct devfs_dirent * -devfs_newdirent(char *name, int namelen) +devfs_newdirent(const char *name, int namelen) { int i; struct devfs_dirent *de; @@ -188,7 +188,8 @@ devfs_newdirent(char *name, int namelen) } struct devfs_dirent * -devfs_vmkdir(struct devfs_mount *dmp, char *name, int namelen, struct devfs_dirent *dotdot, u_int inode) +devfs_vmkdir(struct devfs_mount *dmp, const char *name, int namelen, + struct devfs_dirent *dotdot, u_int inode) { struct devfs_dirent *dd; struct devfs_dirent *de; @@ -226,7 +227,8 @@ devfs_vmkdir(struct devfs_mount *dmp, char *name, int namelen, struct devfs_dire } #ifdef MAC - mac_devfs_create_directory(dmp->dm_mount, name, namelen, dd); + mac_devfs_create_directory(dmp->dm_mount, __DECONST(char *, name), + namelen, dd); #endif return (dd); } @@ -402,6 +404,8 @@ devfs_populate_loop(struct devfs_mount *dm, int cleanup) if (cleanup) continue; KASSERT((cdp->cdp_flags & CDP_ACTIVE), ("Bogons, I tell ya'!")); + if (cdp->cdp_flags & CDP_WHTOUT) + continue; if (dm->dm_idx <= cdp->cdp_maxdirent && cdp->cdp_dirents[dm->dm_idx] != NULL) { @@ -534,6 +538,8 @@ devfs_devs_init(void *junk __unused) { devfs_inos = new_unrhdr(DEVFS_ROOTINO + 1, INT_MAX, &devmtx); + fdclone_units = new_unrhdr(1, 0xffffff, NULL); + mtx_init(&fdclone_mtx, "fdclone", NULL, MTX_DEF); } SYSINIT(devfs_devs, SI_SUB_DEVFS, SI_ORDER_FIRST, devfs_devs_init, NULL); diff --git a/sys/fs/devfs/devfs_int.h b/sys/fs/devfs/devfs_int.h index 51c3625..65d19c1 100644 --- a/sys/fs/devfs/devfs_int.h +++ b/sys/fs/devfs/devfs_int.h @@ -38,6 +38,7 @@ #ifdef _KERNEL struct devfs_dirent; +struct mount; struct cdev_priv { struct cdev cdp_c; @@ -48,6 +49,7 @@ struct cdev_priv { u_int cdp_flags; #define CDP_ACTIVE (1 << 0) #define CDP_SCHED_DTR (1 << 1) +#define CDP_WHTOUT (1 << 2) u_int cdp_inuse; u_int cdp_maxdirent; @@ -69,6 +71,8 @@ extern struct mtx devmtx; extern struct mtx devfs_de_interlock; extern struct sx clone_drain_lock; extern TAILQ_HEAD(cdev_priv_list, cdev_priv) cdevp_list; +extern struct unrhdr *fdclone_units; +extern struct mtx fdclone_mtx; #endif /* _KERNEL */ diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index d9565ee..129ef0d 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -78,6 +78,9 @@ MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF) struct sx clone_drain_lock; SX_SYSINIT(clone_drain_lock, &clone_drain_lock, "clone events drain lock"); +static int devfs_newvnode(struct devfs_dirent *de, struct mount *mp, + struct cdev *dev, struct vnode **vpp, struct thread *td); + static int devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp) { @@ -170,56 +173,16 @@ devfs_insmntque_dtr(struct vnode *vp, void *arg) vput(vp); } -/* - * devfs_allocv shall be entered with dmp->dm_lock held, and it drops - * it on return. - */ -int -devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td) +static int +devfs_newvnode(struct devfs_dirent *de, struct mount *mp, struct cdev *dev, + struct vnode **vpp, struct thread *td) { - int error; - struct vnode *vp; - struct cdev *dev; struct devfs_mount *dmp; + struct vnode *vp; + int error; - KASSERT(td == curthread, ("devfs_allocv: td != curthread")); dmp = VFSTODEVFS(mp); - if (de->de_flags & DE_DOOMED) { - sx_xunlock(&dmp->dm_lock); - return (ENOENT); - } - loop: - DEVFS_DE_HOLD(de); - DEVFS_DMP_HOLD(dmp); - mtx_lock(&devfs_de_interlock); - vp = de->de_vnode; - if (vp != NULL) { - VI_LOCK(vp); - mtx_unlock(&devfs_de_interlock); - sx_xunlock(&dmp->dm_lock); - error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); - sx_xlock(&dmp->dm_lock); - if (devfs_allocv_drop_refs(0, dmp, de)) { - if (error == 0) - vput(vp); - return (ENOENT); - } - else if (error) - goto loop; - sx_xunlock(&dmp->dm_lock); - *vpp = vp; - return (0); - } - mtx_unlock(&devfs_de_interlock); - if (de->de_dirent->d_type == DT_CHR) { - if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) { - devfs_allocv_drop_refs(1, dmp, de); - return (ENOENT); - } - dev = &de->de_cdp->cdp_c; - } else { - dev = NULL; - } + sx_assert(&dmp->dm_lock, SA_XLOCKED); error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp); if (error != 0) { devfs_allocv_drop_refs(1, dmp, de); @@ -269,6 +232,59 @@ devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, stru return (0); } +/* + * devfs_allocv shall be entered with dmp->dm_lock held, and it drops + * it on return. + */ +int +devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td) +{ + int error; + struct vnode *vp; + struct cdev *dev; + struct devfs_mount *dmp; + + KASSERT(td == curthread, ("devfs_allocv: td != curthread")); + dmp = VFSTODEVFS(mp); + if (de->de_flags & DE_DOOMED) { + sx_xunlock(&dmp->dm_lock); + return (ENOENT); + } + loop: + DEVFS_DE_HOLD(de); + DEVFS_DMP_HOLD(dmp); + mtx_lock(&devfs_de_interlock); + vp = de->de_vnode; + if (vp != NULL) { + VI_LOCK(vp); + mtx_unlock(&devfs_de_interlock); + sx_xunlock(&dmp->dm_lock); + error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); + sx_xlock(&dmp->dm_lock); + if (devfs_allocv_drop_refs(0, dmp, de)) { + if (error == 0) + vput(vp); + return (ENOENT); + } + else if (error) + goto loop; + sx_xunlock(&dmp->dm_lock); + *vpp = vp; + return (0); + } + mtx_unlock(&devfs_de_interlock); + if (de->de_dirent->d_type == DT_CHR) { + if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) { + devfs_allocv_drop_refs(1, dmp, de); + return (ENOENT); + } + dev = &de->de_cdp->cdp_c; + } else { + dev = NULL; + } + return (devfs_newvnode(de, mp, dev, vpp, td)); +} + static int devfs_access(struct vop_access_args *ap) { @@ -310,7 +326,9 @@ devfs_close(struct vop_close_args *ap) struct thread *td = ap->a_td; struct cdev *dev = vp->v_rdev; struct cdevsw *dsw; - int vp_locked, error; + struct devfs_mount *dmp; + struct devfs_dirent *de; + int vp_locked, error, dmp_clean, unit; /* * Hack: a tty device that is a controlling terminal @@ -374,6 +392,37 @@ devfs_close(struct vop_close_args *ap) dev_relthread(dev); vn_lock(vp, vp_locked | LK_RETRY); vdrop(vp); + /* + * Recycle the dirent, vnode and cdev for fdcloned devices on + * the last close. + */ + if (dev->si_flags & SI_FDCLONE) { + /* + * Destroy the device before deleting the directory + * entry. This way, vgonel call to VOP_CLOSE() gets + * ENXIO above and does not recurse into the d_close + * method. + */ + unit = dev2unit(dev); + destroy_dev(dev); + free_unr(fdclone_units, unit); + /* + * Now, destroy the dirent (and vnode). + */ + dmp = VFSTODEVFS(vp->v_mount); + sx_xlock(&dmp->dm_lock); + mtx_lock(&devfs_de_interlock); + de = vp->v_data; + mtx_unlock(&devfs_de_interlock); + dmp_clean = 0; + DEVFS_DMP_HOLD(dmp); + TAILQ_REMOVE(&de->de_dir->de_dlist, de, de_list); + devfs_delete(dmp, de, 1); + dmp_clean = DEVFS_DMP_DROP(dmp); + sx_xunlock(&dmp->dm_lock); + if (dmp_clean) + devfs_unmount_final(dmp); + } return (error); } @@ -747,10 +796,13 @@ devfs_open(struct vop_open_args *ap) { struct thread *td = ap->a_td; struct vnode *vp = ap->a_vp; + struct vnode *rvp; struct cdev *dev = vp->v_rdev; struct file *fp = ap->a_fp; int error; struct cdevsw *dsw; + void *xdev; + int fdcloned; if (vp->v_type == VBLK) return (ENXIO); @@ -771,7 +823,12 @@ devfs_open(struct vop_open_args *ap) vp->v_vflag |= VV_ISTTY; VOP_UNLOCK(vp, 0); - + if (dsw->d_fdopen != NULL) { + mtx_lock(&fdclone_mtx); + fp->f_data = dev; + fp->f_vnode = vp; + mtx_unlock(&fdclone_mtx); + } if(!(dsw->d_flags & D_NEEDGIANT)) { DROP_GIANT(); if (dsw->d_fdopen != NULL) @@ -785,7 +842,17 @@ devfs_open(struct vop_open_args *ap) else error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); } - + fdcloned = 0; + if (error == 0 && fp != NULL) { + mtx_lock(&fdclone_mtx); + rvp = fp->f_vnode; + if (rvp != NULL && rvp != vp) { + vp = rvp; + ap->a_vp = rvp; + fdcloned = 1; + } + mtx_unlock(&fdclone_mtx); + } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); dev_relthread(dev); @@ -800,9 +867,13 @@ devfs_open(struct vop_open_args *ap) if(fp == NULL) return (error); #endif - KASSERT(fp->f_ops == &badfileops, - ("Could not vnode bypass device on fdops %p", fp->f_ops)); - finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f); + if (!fdcloned) { + KASSERT(fp->f_ops == &badfileops, + ("Could not vnode bypass device on fdops %p", fp->f_ops)); + xdev = dev; + } else + xdev = fp->f_data; + finit(fp, fp->f_flag, DTYPE_VNODE, xdev, &devfs_ops_f); return (error); } @@ -1329,6 +1400,124 @@ dev2udev(struct cdev *x) return (x->si_priv->cdp_inode); } +struct unrhdr *fdclone_units; +struct mtx fdclone_mtx; + +int +fdclone(struct cdevsw *csw, struct file *fp, int fmode, + struct cdev **clone, void *si_drv1, struct thread *td) +{ + struct cdev *master, *rclone; + struct vnode *vp, *rvp; + struct devfs_dirent *de, *clones_dd; + struct mount *mp; + struct devfs_mount *dmp; + struct ucred *cr; + int unit; + int error; + static const char clones_dn[] = "clones"; + + /* + * fdclone shall be called from the fdopen(), and we do not + * support tracking the close. + */ + if (fp == NULL || (csw->d_flags & D_TRACKCLOSE) != 0) + return (EOPNOTSUPP); + mtx_lock(&fdclone_mtx); + KASSERT(fp->f_ops == &badfileops, ("not badfileops in fdclone")); + vp = fp->f_vnode; + master = (struct cdev *)fp->f_data; + mtx_unlock(&fdclone_mtx); + /* + * fp holds ref on the vp. Do not try to proceed if the devfs + * mountpoint is being unmounted. + */ + error = vn_lock(vp, LK_EXCLUSIVE); + if (error) + return (error); + mp = vp->v_mount; + MNT_ILOCK(mp); + if (mp->mnt_kern_flag & MNTK_UNMOUNT) { + MNT_IUNLOCK(mp); + VOP_UNLOCK(vp, 0); + return (EBUSY); + } + MNT_IUNLOCK(mp); + /* + * Create the cloned cdev. + */ + cr = td->td_ucred; + unit = alloc_unr(fdclone_units); + if (unit == -1) { + VOP_UNLOCK(vp, 0); + return (ENOSPC); + } + rclone = make_dev_credf(MAKEDEV_WHTOUT | MAKEDEV_FDCLONE, csw, + unit2minor(unit), cr, + cr->cr_uid, cr->cr_gid, 0600, "clones/_fdclone"); + if (rclone == NULL) { + VOP_UNLOCK(vp, 0); + free_unr(fdclone_units, unit); + return (ENOMEM); + } + /* + * Create the fake devfs_dirent for the cloned cdev. + */ + de = devfs_newdirent("_fdclone", 8); + de->de_flags |= DE_FAKE | DE_WHITEOUT; + de->de_uid = rclone->si_uid; + de->de_gid = rclone->si_gid; + de->de_mode = rclone->si_mode; + de->de_dirent->d_type = DT_CHR; + dmp = VFSTODEVFS(mp); + sx_xlock(&dmp->dm_lock); + DEVFS_DE_HOLD(de); + DEVFS_DMP_HOLD(dmp); + clones_dd = devfs_find(dmp->dm_rootdir, clones_dn, sizeof(clones_dn) - 1); + if (clones_dd == NULL) { + clones_dd = devfs_vmkdir(dmp, clones_dn, sizeof(clones_dn) - 1, + dmp->dm_rootdir, 0); + clones_dd->de_flags |= DE_WHITEOUT; + } + de->de_dir = clones_dd; + TAILQ_INSERT_TAIL(&clones_dd->de_dlist, de, de_list); + /* + * Create the vnode for replacement of master cdev' vnode. + */ + error = devfs_newvnode(de, vp->v_mount, rclone, &rvp, td); + if (error) { + VOP_UNLOCK(vp, 0); + destroy_dev(rclone); + free_unr(fdclone_units, unit); + return (error); + } + /* + * Flip the master and cloned vnode on fp. + */ + mtx_lock(&fdclone_mtx); + fp->f_vnode = rvp; + fp->f_data = rclone; + mtx_unlock(&fdclone_mtx); + /* + * Give the cloned device notification on open(). + */ + rclone->si_drv1 = si_drv1; + VOP_UNLOCK(vp, 0); + if ((error = VOP_OPEN(rvp, fmode, cr, td, fp)) != 0) { + mtx_lock(&fdclone_mtx); + fp->f_vnode = NULL; + fp->f_data = master; + mtx_unlock(&fdclone_mtx); + vput(rvp); + destroy_dev(rclone); + free_unr(fdclone_units, unit); + return (error); + } + VOP_UNLOCK(rvp, 0); + vrele(vp); + return (0); +} + static struct fileops devfs_ops_f = { .fo_read = devfs_read_f, .fo_write = devfs_write_f, diff --git a/sys/kern/kern_conf.c b/sys/kern/kern_conf.c index e911913..71015c4 100644 --- a/sys/kern/kern_conf.c +++ b/sys/kern/kern_conf.c @@ -672,6 +672,10 @@ make_dev_credv(int flags, struct cdevsw *devsw, int minornr, dev = newdev(devsw, minornr, dev); if (flags & MAKEDEV_REF) dev_refl(dev); + if (flags & MAKEDEV_WHTOUT) + dev->si_priv->cdp_flags |= CDP_WHTOUT; + if (flags & MAKEDEV_FDCLONE) + dev->si_flags |= SI_FDCLONE; if (dev->si_flags & SI_CHEAPCLONE && dev->si_flags & SI_NAMED) { /* diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index a28117a..a64e0b4 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -64,6 +64,8 @@ __FBSDID("$FreeBSD: src/sys/kern/vfs_vnops.c,v 1.261 2008/03/31 11:57:18 kib Exp #include +#include + static fo_rdwr_t vn_read; static fo_rdwr_t vn_write; static fo_truncate_t vn_truncate; @@ -236,6 +238,14 @@ restart: if ((error = VOP_OPEN(vp, fmode, cred, td, fp)) != 0) goto bad; + if (fp != NULL && fp->f_vnode != NULL) { + mtx_lock(&fdclone_mtx); + if (vp != fp->f_vnode) { + /* reference to vp was dropped by fdclone() */ + ndp->ni_vp = vp = fp->f_vnode; + } + mtx_unlock(&fdclone_mtx); + } if (fmode & FWRITE) vp->v_writecount++; *flagp = fmode; diff --git a/sys/sys/conf.h b/sys/sys/conf.h index 22d7abc..6dfb0e1 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -64,6 +64,7 @@ struct cdev { #define SI_DUMPDEV 0x0080 /* is kernel dumpdev */ #define SI_CANDELETE 0x0100 /* can do BIO_DELETE */ #define SI_CLONELIST 0x0200 /* on a clone list */ +#define SI_FDCLONE 0x0400 /* fdcloned */ struct timespec si_atime; struct timespec si_ctime; struct timespec si_mtime; @@ -210,7 +211,7 @@ struct cdevsw { const char *d_kind; /* These fields should not be messed with by drivers */ - LIST_ENTRY(cdevsw) d_list; + LIST_ENTRY(cdevsw) d_postfree_list; LIST_HEAD(, cdev) d_devs; int d_spare3; union { @@ -247,6 +248,8 @@ void clone_cleanup(struct clonedevs **); #define CLONE_UNITMASK 0xfffff #define CLONE_FLAG0 (CLONE_UNITMASK + 1) int clone_create(struct clonedevs **, struct cdevsw *, int *unit, struct cdev **dev, int extra); +int fdclone(struct cdevsw *_csw, struct file *_fp, int _fmode, + struct cdev **_clone, void *si_drv1, struct thread *td); int count_dev(struct cdev *_dev); void destroy_dev(struct cdev *_dev); @@ -267,8 +270,9 @@ struct cdev *make_dev(struct cdevsw *_devsw, int _minor, uid_t _uid, gid_t _gid, struct cdev *make_dev_cred(struct cdevsw *_devsw, int _minor, struct ucred *_cr, uid_t _uid, gid_t _gid, int _perms, const char *_fmt, ...) __printflike(7, 8); -#define MAKEDEV_REF 0x1 -#define MAKEDEV_WHTOUT 0x2 +#define MAKEDEV_REF 0x1 +#define MAKEDEV_WHTOUT 0x2 +#define MAKEDEV_FDCLONE 0x4 struct cdev *make_dev_credf(int _flags, struct cdevsw *_devsw, int _minornr, struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode,