diff --git a/sys/fs/devfs/devfs.h b/sys/fs/devfs/devfs.h index f4d961b..63362b7 100644 --- a/sys/fs/devfs/devfs.h +++ b/sys/fs/devfs/devfs.h @@ -130,6 +130,7 @@ struct devfs_dirent { #define DE_DOT 0x2 #define DE_DOTDOT 0x4 #define DE_DOOMED 0x8 +#define DE_FAKE 0x10 int de_holdcnt; struct dirent *de_dirent; TAILQ_ENTRY(devfs_dirent) de_list; @@ -178,8 +179,9 @@ void devfs_dirent_free(struct devfs_dirent *de); void devfs_populate (struct devfs_mount *dm); void devfs_cleanup (struct devfs_mount *dm); void devfs_unmount_final(struct devfs_mount *mp); -struct devfs_dirent *devfs_newdirent (char *name, int namelen); -struct devfs_dirent *devfs_vmkdir (struct devfs_mount *, char *name, int namelen, struct devfs_dirent *dotdot, u_int inode); +struct devfs_dirent *devfs_newdirent (const char *name, int namelen); +struct devfs_dirent *devfs_vmkdir (struct devfs_mount *, const char *name, + int namelen, struct devfs_dirent *dotdot, u_int inode); struct devfs_dirent *devfs_find (struct devfs_dirent *dd, const char *name, int namelen); #endif /* _KERNEL */ diff --git a/sys/fs/devfs/devfs_devs.c b/sys/fs/devfs/devfs_devs.c index 11b3097..2754d57 100644 --- a/sys/fs/devfs/devfs_devs.c +++ b/sys/fs/devfs/devfs_devs.c @@ -163,7 +163,7 @@ devfs_find(struct devfs_dirent *dd, const char *name, int namelen) } struct devfs_dirent * -devfs_newdirent(char *name, int namelen) +devfs_newdirent(const char *name, int namelen) { int i; struct devfs_dirent *de; @@ -188,7 +188,8 @@ devfs_newdirent(char *name, int namelen) } struct devfs_dirent * -devfs_vmkdir(struct devfs_mount *dmp, char *name, int namelen, struct devfs_dirent *dotdot, u_int inode) +devfs_vmkdir(struct devfs_mount *dmp, const char *name, int namelen, + struct devfs_dirent *dotdot, u_int inode) { struct devfs_dirent *dd; struct devfs_dirent *de; @@ -226,7 +227,8 @@ devfs_vmkdir(struct devfs_mount *dmp, char *name, int namelen, struct devfs_dire } #ifdef MAC - mac_create_devfs_directory(dmp->dm_mount, name, namelen, dd); + mac_create_devfs_directory(dmp->dm_mount, __DECONST(char *, name), + namelen, dd); #endif return (dd); } @@ -404,6 +406,8 @@ devfs_populate_loop(struct devfs_mount *dm, int cleanup) if (cleanup) continue; KASSERT((cdp->cdp_flags & CDP_ACTIVE), ("Bogons, I tell ya'!")); + if (cdp->cdp_flags & CDP_WHTOUT) + continue; if (dm->dm_idx <= cdp->cdp_maxdirent && cdp->cdp_dirents[dm->dm_idx] != NULL) { @@ -536,6 +540,7 @@ devfs_devs_init(void *junk __unused) { devfs_inos = new_unrhdr(DEVFS_ROOTINO + 1, INT_MAX, &devmtx); + fdclone_units = new_unrhdr(1, 0xffffff, NULL); } SYSINIT(devfs_devs, SI_SUB_DEVFS, SI_ORDER_FIRST, devfs_devs_init, NULL); diff --git a/sys/fs/devfs/devfs_int.h b/sys/fs/devfs/devfs_int.h index 51c3625..4fa4812 100644 --- a/sys/fs/devfs/devfs_int.h +++ b/sys/fs/devfs/devfs_int.h @@ -38,6 +38,7 @@ #ifdef _KERNEL struct devfs_dirent; +struct mount; struct cdev_priv { struct cdev cdp_c; @@ -48,6 +49,7 @@ struct cdev_priv { u_int cdp_flags; #define CDP_ACTIVE (1 << 0) #define CDP_SCHED_DTR (1 << 1) +#define CDP_WHTOUT (1 << 2) u_int cdp_inuse; u_int cdp_maxdirent; @@ -63,12 +65,15 @@ struct cdev *devfs_alloc(void); void devfs_free(struct cdev *); void devfs_create(struct cdev *dev); void devfs_destroy(struct cdev *dev); +int devfs_newvnode(struct devfs_dirent *de, struct mount *mp, struct cdev *dev, + struct vnode **vpp, struct thread *td); extern struct unrhdr *devfs_inos; extern struct mtx devmtx; extern struct mtx devfs_de_interlock; extern struct sx clone_drain_lock; extern TAILQ_HEAD(cdev_priv_list, cdev_priv) cdevp_list; +extern struct unrhdr *fdclone_units; #endif /* _KERNEL */ diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index 625a6f4..b1c620f 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -170,56 +170,15 @@ devfs_insmntque_dtr(struct vnode *vp, void *arg) vput(vp); } -/* - * devfs_allocv shall be entered with dmp->dm_lock held, and it drops - * it on return. - */ int -devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td) +devfs_newvnode(struct devfs_dirent *de, struct mount *mp, struct cdev *dev, + struct vnode **vpp, struct thread *td) { - int error; - struct vnode *vp; - struct cdev *dev; struct devfs_mount *dmp; + struct vnode *vp; + int error; - KASSERT(td == curthread, ("devfs_allocv: td != curthread")); dmp = VFSTODEVFS(mp); - if (de->de_flags & DE_DOOMED) { - sx_xunlock(&dmp->dm_lock); - return (ENOENT); - } - loop: - DEVFS_DE_HOLD(de); - DEVFS_DMP_HOLD(dmp); - mtx_lock(&devfs_de_interlock); - vp = de->de_vnode; - if (vp != NULL) { - VI_LOCK(vp); - mtx_unlock(&devfs_de_interlock); - sx_xunlock(&dmp->dm_lock); - error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); - sx_xlock(&dmp->dm_lock); - if (devfs_allocv_drop_refs(0, dmp, de)) { - if (error == 0) - vput(vp); - return (ENOENT); - } - else if (error) - goto loop; - sx_xunlock(&dmp->dm_lock); - *vpp = vp; - return (0); - } - mtx_unlock(&devfs_de_interlock); - if (de->de_dirent->d_type == DT_CHR) { - if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) { - devfs_allocv_drop_refs(1, dmp, de); - return (ENOENT); - } - dev = &de->de_cdp->cdp_c; - } else { - dev = NULL; - } error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp); if (error != 0) { devfs_allocv_drop_refs(1, dmp, de); @@ -269,6 +228,59 @@ devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, stru return (0); } +/* + * devfs_allocv shall be entered with dmp->dm_lock held, and it drops + * it on return. + */ +int +devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td) +{ + int error; + struct vnode *vp; + struct cdev *dev; + struct devfs_mount *dmp; + + KASSERT(td == curthread, ("devfs_allocv: td != curthread")); + dmp = VFSTODEVFS(mp); + if (de->de_flags & DE_DOOMED) { + sx_xunlock(&dmp->dm_lock); + return (ENOENT); + } + loop: + DEVFS_DE_HOLD(de); + DEVFS_DMP_HOLD(dmp); + mtx_lock(&devfs_de_interlock); + vp = de->de_vnode; + if (vp != NULL) { + VI_LOCK(vp); + mtx_unlock(&devfs_de_interlock); + sx_xunlock(&dmp->dm_lock); + error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); + sx_xlock(&dmp->dm_lock); + if (devfs_allocv_drop_refs(0, dmp, de)) { + if (error == 0) + vput(vp); + return (ENOENT); + } + else if (error) + goto loop; + sx_xunlock(&dmp->dm_lock); + *vpp = vp; + return (0); + } + mtx_unlock(&devfs_de_interlock); + if (de->de_dirent->d_type == DT_CHR) { + if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) { + devfs_allocv_drop_refs(1, dmp, de); + return (ENOENT); + } + dev = &de->de_cdp->cdp_c; + } else { + dev = NULL; + } + return (devfs_newvnode(de, mp, dev, vpp, td)); +} + static int devfs_access(struct vop_access_args *ap) { @@ -310,7 +322,9 @@ devfs_close(struct vop_close_args *ap) struct thread *td = ap->a_td; struct cdev *dev = vp->v_rdev; struct cdevsw *dsw; - int vp_locked, error; + struct devfs_mount *dmp; + struct devfs_dirent *de; + int vp_locked, error, dmp_clean; /* * Hack: a tty device that is a controlling terminal @@ -374,6 +388,28 @@ devfs_close(struct vop_close_args *ap) dev_relthread(dev); vn_lock(vp, vp_locked | LK_RETRY, td); vdrop(vp); + /* + * fdclone() can be called from d_fdopen only. Attempt to + * recycle the dirent (and vnode) for fdcloned devices on the + * last close. + */ + if (dsw->d_fdopen != NULL) { + dmp = VFSTODEVFS(vp->v_mount); + sx_xlock(&dmp->dm_lock); + mtx_lock(&devfs_de_interlock); + de = vp->v_data; + mtx_unlock(&devfs_de_interlock); + dmp_clean = 0; + if (de->de_flags & DE_FAKE) { + DEVFS_DMP_HOLD(dmp); + TAILQ_REMOVE(&de->de_dir->de_dlist, de, de_list); + devfs_delete(dmp, de, 1); + dmp_clean = DEVFS_DMP_DROP(dmp); + } + sx_xunlock(&dmp->dm_lock); + if (dmp_clean) + devfs_unmount_final(dmp); + } return (error); } @@ -747,10 +783,12 @@ devfs_open(struct vop_open_args *ap) { struct thread *td = ap->a_td; struct vnode *vp = ap->a_vp; + struct vnode *rvp; struct cdev *dev = vp->v_rdev; struct file *fp = ap->a_fp; int error; struct cdevsw *dsw; + int fdcloned; if (vp->v_type == VBLK) return (ENXIO); @@ -771,7 +809,12 @@ devfs_open(struct vop_open_args *ap) vp->v_vflag |= VV_ISTTY; VOP_UNLOCK(vp, 0, td); - + if (dsw->d_fdopen != NULL) { + FILE_LOCK(fp); + fp->f_data = dev; + fp->f_vnode = vp; + FILE_UNLOCK(fp); + } if(!(dsw->d_flags & D_NEEDGIANT)) { DROP_GIANT(); if (dsw->d_fdopen != NULL) @@ -785,7 +828,16 @@ devfs_open(struct vop_open_args *ap) else error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); } - + fdcloned = 0; + if (fp != NULL) { + FILE_LOCK(fp); + rvp = fp->f_vnode; + if (rvp != NULL && rvp != vp) { + vp = rvp; + fdcloned = 1; + } + FILE_UNLOCK(fp); + } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); dev_relthread(dev); @@ -801,9 +853,11 @@ devfs_open(struct vop_open_args *ap) return (error); #endif FILE_LOCK(fp); - KASSERT(fp->f_ops == &badfileops, - ("Could not vnode bypass device on fdops %p", fp->f_ops)); - fp->f_data = dev; + if (!fdcloned) { + KASSERT(fp->f_ops == &badfileops, + ("Could not vnode bypass device on fdops %p", fp->f_ops)); + fp->f_data = dev; + } fp->f_ops = &devfs_ops_f; FILE_UNLOCK(fp); return (error); @@ -1322,6 +1376,130 @@ dev2udev(struct cdev *x) return (x->si_priv->cdp_inode); } +struct unrhdr *fdclone_units; + +int +fdclone(struct cdevsw *csw, struct file *fp, int fmode, + struct cdev **clone, void *si_drv1, struct thread *td) +{ + struct cdev *master, *rclone; + struct vnode *vp, *rvp; + struct devfs_dirent *de, *clones_dd; + struct devfs_mount *dmp; + struct ucred *cr; + int unit; + int error; + static const char clones_dn[] = "clones"; + + /* + * fdclone shall be called from the fdopen(), and we do not + * support tracking the close. + */ + if (fp == NULL || (csw->d_flags & D_TRACKCLOSE) != 0) + return (EOPNOTSUPP); + FILE_LOCK(fp); + KASSERT(fp->f_ops == &badfileops, ("not badfileops in fdclone")); + vp = fp->f_vnode; + master = (struct cdev *)fp->f_data; + FILE_UNLOCK(fp); + /* + * fp holds ref on the vp + */ + error = vn_lock(vp, LK_EXCLUSIVE, td); + if (error) + return (error); + /* + * Create the cloned cdev. + */ + cr = td->td_ucred; + unit = alloc_unr(fdclone_units); + rclone = make_dev_credf(MAKEDEV_WHTOUT, csw, unit2minor(unit), cr, + cr->cr_uid, cr->cr_gid, 0600, "clones/_fdclone"); + if (rclone == NULL) { + VOP_UNLOCK(vp, 0, td); + free_unr(fdclone_units, unit); + return (ENOMEM); + } + /* + * Create the fake devfs_dirent for the cloned cdev. + */ + de = devfs_newdirent("fdclone", 7); + de->de_flags |= DE_FAKE | DE_WHITEOUT; + de->de_uid = rclone->si_uid; + de->de_gid = rclone->si_gid; + de->de_mode = rclone->si_mode; + de->de_dirent->d_type = DT_CHR; + dmp = VFSTODEVFS(vp->v_mount); + sx_xlock(&dmp->dm_lock); + DEVFS_DE_HOLD(de); + DEVFS_DMP_HOLD(dmp); + clones_dd = devfs_find(dmp->dm_rootdir, clones_dn, sizeof(clones_dn) - 1); + if (clones_dd == NULL) { + clones_dd = devfs_vmkdir(dmp, clones_dn, sizeof(clones_dn) - 1, + dmp->dm_rootdir, 0); + clones_dd->de_flags |= DE_WHITEOUT; + } + de->de_dir = clones_dd; + TAILQ_INSERT_TAIL(&clones_dd->de_dlist, de, de_list); + /* + * Create the vnode for replacement of master cdev' vnode. + */ + error = devfs_newvnode(de, vp->v_mount, rclone, &rvp, td); + if (error) { + VOP_UNLOCK(vp, 0, td); + destroy_dev(rclone); + free_unr(fdclone_units, unit); + return (error); + } + /* + * Flip the master and cloned vnode on fp. + */ + FILE_LOCK(fp); + fp->f_vnode = rvp; + fp->f_data = rclone; + FILE_UNLOCK(fp); + /* + * Give the cloned device notification on open(). + */ + rclone->si_drv1 = si_drv1; + if ((error = VOP_OPEN(rvp, fmode, cr, td, fp)) != 0) { + FILE_LOCK(fp); + fp->f_vnode = NULL; + fp->f_data = master; + FILE_UNLOCK(fp); + vput(rvp); + destroy_dev(rclone); + free_unr(fdclone_units, unit); + return (error); + } else { + if (fmode & FWRITE) { + vp->v_writecount--; + rvp->v_writecount++; + } + vput(vp); + } + VOP_UNLOCK(rvp, 0, td); + return (0); +} + +static void +fdclone_destroy_cb(void *arg) +{ + int unit; + + unit = (int)arg; + free_unr(fdclone_units, unit); +} + +void +fdclone_destroy_dev(struct cdev *dev) +{ + int unit; + + unit = dev2unit(dev); + destroy_dev_sched_cb(dev, fdclone_destroy_cb, (void *)unit); +} + static struct fileops devfs_ops_f = { .fo_read = devfs_read_f, .fo_write = devfs_write_f, diff --git a/sys/kern/kern_conf.c b/sys/kern/kern_conf.c index b10c555..10cd5f5 100644 --- a/sys/kern/kern_conf.c +++ b/sys/kern/kern_conf.c @@ -587,6 +587,8 @@ make_dev_credv(int flags, struct cdevsw *devsw, int minornr, dev = newdev(devsw, minornr, dev); if (flags & MAKEDEV_REF) dev_refl(dev); + if (flags & MAKEDEV_WHTOUT) + dev->si_priv->cdp_flags |= CDP_WHTOUT; if (dev->si_flags & SI_CHEAPCLONE && dev->si_flags & SI_NAMED) { /* diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 2f12557..6bdb20a 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1065,10 +1065,13 @@ kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, td->td_dupfd = 0; vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); - vp = nd.ni_vp; FILE_LOCK(fp); - fp->f_vnode = vp; + if (fp->f_vnode == NULL) { + vp = nd.ni_vp; + fp->f_vnode = vp; + } else + vp = fp->f_vnode; if (fp->f_data == NULL) fp->f_data = vp; fp->f_flag = flags & FMASK; diff --git a/sys/sys/conf.h b/sys/sys/conf.h index ee8a79b..074b943 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -242,6 +242,9 @@ void clone_cleanup(struct clonedevs **); #define CLONE_UNITMASK 0xfffff #define CLONE_FLAG0 (CLONE_UNITMASK + 1) int clone_create(struct clonedevs **, struct cdevsw *, int *unit, struct cdev **dev, int extra); +int fdclone(struct cdevsw *_csw, struct file *_fp, int _fmode, + struct cdev **_clone, void *si_drv1, struct thread *td); +void fdclone_destroy_dev(struct cdev *_dev); int count_dev(struct cdev *_dev); void destroy_dev(struct cdev *_dev);