--- sys/fs/fdescfs/fdesc_vfsops.c.orig +++ sys/fs/fdescfs/fdesc_vfsops.c @@ -208,7 +208,7 @@ last = min(fdp->fd_nfiles, lim); freefd = 0; for (i = fdp->fd_freefile; i < last; i++) - if (fdp->fd_ofiles[i] == NULL) + if (fdp->fd_ofiles[i].fde_file == NULL) freefd++; /* --- sys/fs/fdescfs/fdesc_vnops.c.orig +++ sys/fs/fdescfs/fdesc_vnops.c @@ -534,7 +534,7 @@ dp->d_type = DT_DIR; break; default: - if (fdp->fd_ofiles[fcnt] == NULL) + if (fdp->fd_ofiles[fcnt].fde_file == NULL) break; dp->d_namlen = sprintf(dp->d_name, "%d", fcnt); dp->d_reclen = UIO_MX; --- sys/fs/nfsserver/nfs_nfsdport.c.orig +++ sys/fs/nfsserver/nfs_nfsdport.c @@ -2786,8 +2786,8 @@ int error = 0; fdp = p->td_proc->p_fd; - if (fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL) { + if ((u_int)fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { error = EBADF; goto out; } --- sys/kern/kern_descrip.c.orig +++ sys/kern/kern_descrip.c @@ -147,15 +147,10 @@ #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) /* - * Storage required per open file descriptor. - */ -#define OFILESIZE (sizeof(struct file *) + sizeof(char)) - -/* * Storage to hold unused ofiles that need to be reclaimed. */ struct freetable { - struct file **ft_table; + struct filedescent *ft_table; SLIST_ENTRY(freetable) ft_next; }; @@ -173,8 +168,7 @@ * These arrays are used when the number of open files is * <= NDFILE, and are then pointed to by the pointers above. */ - struct file *fd_dfiles[NDFILE]; - char fd_dfileflags[NDFILE]; + struct filedescent fd_dfiles[NDFILE]; NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; }; @@ -280,7 +274,8 @@ FILEDESC_XLOCK_ASSERT(fdp); KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd)); - KASSERT(fdp->fd_ofiles[fd] == NULL, ("fd=%d is still in use", fd)); + KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, + ("fd=%d is still in use", fd)); fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); if (fd < fdp->fd_freefile) @@ -430,36 +425,14 @@ return (error); } -static inline int -fdunwrap(int fd, cap_rights_t rights, struct filedesc *fdp, struct file **fpp) -{ - - FILEDESC_LOCK_ASSERT(fdp); - - *fpp = fget_locked(fdp, fd); - if (*fpp == NULL) - return (EBADF); - -#ifdef CAPABILITIES - if ((*fpp)->f_type == DTYPE_CAPABILITY) { - int err = cap_funwrap(*fpp, rights, fpp); - if (err != 0) { - *fpp = NULL; - return (err); - } - } -#endif /* CAPABILITIES */ - return (0); -} - int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) { struct filedesc *fdp; struct flock *flp; - struct file *fp; + struct file *fp, *fp2; + struct filedescent *fde; struct proc *p; - char *pop; struct vnode *vp; int error, flg, tmp; int vfslocked; @@ -490,8 +463,9 @@ error = EBADF; break; } - pop = &fdp->fd_ofileflags[fd]; - td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; + fde = &fdp->fd_ofiles[fd]; + td->td_retval[0] = + (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0; FILEDESC_SUNLOCK(fdp); break; @@ -502,32 +476,24 @@ error = EBADF; break; } - pop = &fdp->fd_ofileflags[fd]; - *pop = (*pop &~ UF_EXCLOSE) | + fde = &fdp->fd_ofiles[fd]; + fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) | (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); FILEDESC_XUNLOCK(fdp); break; case F_GETFL: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, &fp); + if (error != 0) break; - } td->td_retval[0] = OFLAGS(fp->f_flag); - FILEDESC_SUNLOCK(fdp); + fdrop(fp); break; case F_SETFL: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, &fp); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); do { tmp = flg = fp->f_flag; tmp &= ~FCNTLFLAGS; @@ -535,7 +501,7 @@ } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0); tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); - if (error) { + if (error != 0) { fdrop(fp, td); break; } @@ -552,14 +518,9 @@ break; case F_GETOWN: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, &fp); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); if (error == 0) td->td_retval[0] = tmp; @@ -567,14 +528,9 @@ break; case F_SETOWN: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, &fp); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); tmp = arg; error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); fdrop(fp, td); @@ -593,34 +549,27 @@ case F_SETLK: do_setlk: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FLOCK, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, &fp); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); error = EBADF; + fdrop(fp, td); break; } + flp = (struct flock *)arg; if (flp->l_whence == SEEK_CUR) { - if (fp->f_offset < 0 || + if (fp->f_offset < 0 || /* XXXPJD: Access to f_offset is not synchronized in any way. */ (flp->l_start > 0 && - fp->f_offset > OFF_MAX - flp->l_start)) { - FILEDESC_SUNLOCK(fdp); + fp->f_offset > OFF_MAX - flp->l_start)) { /* XXXPJD: Access to f_offset is not synchronized in any way. */ error = EOVERFLOW; + fdrop(fp, td); break; } - flp->l_start += fp->f_offset; + flp->l_start += fp->f_offset; /* XXXPJD: Access to f_offset is not synchronized in any way. */ } - /* - * VOP_ADVLOCK() may block. - */ - fhold(fp); - FILEDESC_SUNLOCK(fdp); vp = fp->f_vnode; vfslocked = VFS_LOCK_GIANT(vp->v_mount); switch (flp->l_type) { @@ -690,9 +639,12 @@ * that the closing thread was a bit slower and that the * advisory lock succeeded before the close. */ - FILEDESC_SLOCK(fdp); - if (fget_locked(fdp, fd) != fp) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, 0, &fp2); + if (error != 0) { + fdrop(fp, td); + break; + } + if (fp != fp2) { flp->l_whence = SEEK_SET; flp->l_start = 0; flp->l_len = 0; @@ -702,46 +654,38 @@ F_UNLCK, flp, F_POSIX); VFS_UNLOCK_GIANT(vfslocked); vfslocked = 0; - } else - FILEDESC_SUNLOCK(fdp); + } fdrop(fp, td); + fdrop(fp2, td); break; case F_GETLK: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FLOCK, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, 0, &fp); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); error = EBADF; + fdrop(fp, td); break; } flp = (struct flock *)arg; if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && flp->l_type != F_UNLCK) { - FILEDESC_SUNLOCK(fdp); error = EINVAL; + fdrop(fp, td); break; } if (flp->l_whence == SEEK_CUR) { if ((flp->l_start > 0 && - fp->f_offset > OFF_MAX - flp->l_start) || + fp->f_offset > OFF_MAX - flp->l_start) || /* XXXPJD: Access to f_offset is not synchronized in any way. */ (flp->l_start < 0 && - fp->f_offset < OFF_MIN - flp->l_start)) { - FILEDESC_SUNLOCK(fdp); + fp->f_offset < OFF_MIN - flp->l_start)) { /* XXXPJD: Access to f_offset is not synchronized in any way. */ error = EOVERFLOW; + fdrop(fp, td); break; } - flp->l_start += fp->f_offset; + flp->l_start += fp->f_offset; /* XXXPJD: Access to f_offset is not synchronized in any way. */ } - /* - * VOP_ADVLOCK() may block. - */ - fhold(fp); - FILEDESC_SUNLOCK(fdp); vp = fp->f_vnode; vfslocked = VFS_LOCK_GIANT(vp->v_mount); error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, @@ -755,19 +699,14 @@ arg = arg ? 128 * 1024: 0; /* FALLTHROUGH */ case F_READAHEAD: - FILEDESC_SLOCK(fdp); - if ((fp = fget_locked(fdp, fd)) == NULL) { - FILEDESC_SUNLOCK(fdp); - error = EBADF; + error = fget_unlocked(fdp, fd, 0, &fp); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); + fdrop(fp, td); error = EBADF; break; } - fhold(fp); - FILEDESC_SUNLOCK(fdp); if (arg != 0) { vp = fp->f_vnode; vfslocked = VFS_LOCK_GIANT(vp->v_mount); @@ -842,7 +781,7 @@ FILEDESC_XUNLOCK(fdp); return (0); } - fp = fdp->fd_ofiles[old]; + fp = fdp->fd_ofiles[old].fde_file; fhold(fp); /* @@ -874,7 +813,7 @@ #endif fdgrowtable(fdp, new + 1); } - if (fdp->fd_ofiles[new] == NULL) + if (fdp->fd_ofiles[new].fde_file == NULL) fdused(fdp, new); } else { if ((error = fdalloc(td, new, &new)) != 0) { @@ -884,15 +823,18 @@ } } - KASSERT(fp == fdp->fd_ofiles[old], ("old fd has been modified")); + KASSERT(fp == fdp->fd_ofiles[old].fde_file, + ("old fd has been modified")); KASSERT(old != new, ("new fd is same as old")); - delfp = fdp->fd_ofiles[new]; + delfp = fdp->fd_ofiles[new].fde_file; + /* * Duplicate the source descriptor. */ - fdp->fd_ofiles[new] = fp; - fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; + bcopy(&fdp->fd_ofiles[old], &fdp->fd_ofiles[new], + sizeof(fdp->fd_ofiles[new])); + fdp->fd_ofiles[new].fde_flags &= ~UF_EXCLOSE; if (new > fdp->fd_lastfile) fdp->fd_lastfile = new; *retval = new; @@ -1133,7 +1075,6 @@ closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders) { - struct file *fp_object; int error; FILEDESC_XLOCK_ASSERT(fdp); @@ -1159,12 +1100,10 @@ knote_fdclose(td, fd); /* - * When we're closing an fd with a capability, we need to notify - * mqueue if the underlying object is of type mqueue. + * We need to notify mqueue if the object is of type mqueue. */ - (void)cap_funwrap(fp, 0, &fp_object); - if (fp_object->f_type == DTYPE_MQUEUE) - mq_fdclose(td, fd, fp_object); + if (fp->f_type == DTYPE_MQUEUE) + mq_fdclose(td, fd, fp); FILEDESC_XUNLOCK(fdp); error = closef(fp, td); @@ -1216,8 +1155,7 @@ FILEDESC_XUNLOCK(fdp); return (EBADF); } - fdp->fd_ofiles[fd] = NULL; - fdp->fd_ofileflags[fd] = 0; + bzero(&fdp->fd_ofiles[fd], sizeof(fdp->fd_ofiles[fd])); fdunused(fdp, fd); /* closefp() drops the FILEDESC lock for us. */ @@ -1250,7 +1188,7 @@ uap->lowfd = 0; FILEDESC_SLOCK(fdp); for (fd = uap->lowfd; fd < fdp->fd_nfiles; fd++) { - if (fdp->fd_ofiles[fd] != NULL) { + if (fdp->fd_ofiles[fd].fde_file != NULL) { FILEDESC_SUNLOCK(fdp); (void)kern_close(td, fd); FILEDESC_SLOCK(fdp); @@ -1413,9 +1351,8 @@ { struct filedesc0 *fdp0; struct freetable *fo; - struct file **ntable; - struct file **otable; - char *nfileflags; + struct filedescent *ntable; + struct filedescent *otable; int nnfiles, onfiles; NDSLOTTYPE *nmap; @@ -1432,19 +1369,16 @@ return; /* allocate a new table and (if required) new bitmaps */ - ntable = malloc((nnfiles * OFILESIZE) + sizeof(struct freetable), + ntable = malloc(nnfiles * sizeof(ntable[0]) + sizeof(struct freetable), M_FILEDESC, M_ZERO | M_WAITOK); - nfileflags = (char *)&ntable[nnfiles]; if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC, M_ZERO | M_WAITOK); else nmap = NULL; - bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable)); - bcopy(fdp->fd_ofileflags, nfileflags, onfiles); + bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(ntable[0])); otable = fdp->fd_ofiles; - fdp->fd_ofileflags = nfileflags; fdp->fd_ofiles = ntable; /* * We must preserve ofiles until the process exits because we can't @@ -1458,7 +1392,7 @@ SLIST_INSERT_HEAD(&fdp0->fd_free, fo, ft_next); } if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { - bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap)); + bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(nmap[0])); if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) free(fdp->fd_map, M_FILEDESC); fdp->fd_map = nmap; @@ -1519,8 +1453,9 @@ ("invalid descriptor %d", fd)); KASSERT(!fdisused(fdp, fd), ("fd_first_free() returned non-free descriptor")); - KASSERT(fdp->fd_ofiles[fd] == NULL, ("file descriptor isn't free")); - KASSERT(fdp->fd_ofileflags[fd] == 0, ("file flags are set")); + KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, + ("file descriptor isn't free")); + KASSERT(fdp->fd_ofiles[fd].fde_flags == 0, ("file flags are set")); fdused(fdp, fd); *result = fd; return (0); @@ -1551,7 +1486,7 @@ return (1); last = min(fdp->fd_nfiles, lim); for (i = fdp->fd_freefile; i < last; i++) { - if (fdp->fd_ofiles[i] == NULL && --n <= 0) + if (fdp->fd_ofiles[i].fde_file == NULL && --n <= 0) return (1); } return (0); @@ -1574,7 +1509,7 @@ if (error) return (error); /* no reference held on error */ - error = finstall(td, fp, &fd, flags); + error = finstall(td, fp, &fd, flags, CAP_ALL); if (error) { fdrop(fp, td); /* one reference (fp only) */ return (error); @@ -1628,13 +1563,17 @@ * Install a file in a file descriptor table. */ int -finstall(struct thread *td, struct file *fp, int *fd, int flags) +finstall(struct thread *td, struct file *fp, int *fd, int flags, + cap_rights_t rights) { struct filedesc *fdp = td->td_proc->p_fd; + struct filedescent *fde; int error; KASSERT(fd != NULL, ("%s: fd == NULL", __func__)); KASSERT(fp != NULL, ("%s: fp == NULL", __func__)); + KASSERT((rights | CAP_MASK_VALID) == CAP_MASK_VALID, + ("%s: invalid rights", __func__)); FILEDESC_XLOCK(fdp); if ((error = fdalloc(td, 0, fd))) { @@ -1642,9 +1581,11 @@ return (error); } fhold(fp); - fdp->fd_ofiles[*fd] = fp; + fde = &fdp->fd_ofiles[*fd]; + fde->fde_file = fp; if ((flags & O_CLOEXEC) != 0) - fdp->fd_ofileflags[*fd] |= UF_EXCLOSE; + fde->fde_flags |= UF_EXCLOSE; + fde->fde_caprights = rights; FILEDESC_XUNLOCK(fdp); return (0); } @@ -1679,7 +1620,6 @@ newfdp->fd_fd.fd_holdcnt = 1; newfdp->fd_fd.fd_cmask = CMASK; newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; - newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; newfdp->fd_fd.fd_nfiles = NDFILE; newfdp->fd_fd.fd_map = newfdp->fd_dmap; newfdp->fd_fd.fd_lastfile = -1; @@ -1780,11 +1720,11 @@ newfdp->fd_freefile = -1; for (i = 0; i <= fdp->fd_lastfile; ++i) { if (fdisused(fdp, i) && - (fdp->fd_ofiles[i]->f_ops->fo_flags & DFLAG_PASSABLE) && - fdp->fd_ofiles[i]->f_ops != &badfileops) { - newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; - newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; - fhold(newfdp->fd_ofiles[i]); + (fdp->fd_ofiles[i].fde_file->f_ops->fo_flags & DFLAG_PASSABLE) && + fdp->fd_ofiles[i].fde_file->f_ops != &badfileops) { + bcopy(&fdp->fd_ofiles[i], &newfdp->fd_ofiles[i], + sizeof(newfdp->fd_ofiles[i])); + fhold(newfdp->fd_ofiles[i].fde_file); newfdp->fd_lastfile = i; } else { if (newfdp->fd_freefile == -1) @@ -1794,9 +1734,10 @@ newfdp->fd_cmask = fdp->fd_cmask; FILEDESC_SUNLOCK(fdp); FILEDESC_XLOCK(newfdp); - for (i = 0; i <= newfdp->fd_lastfile; ++i) - if (newfdp->fd_ofiles[i] != NULL) + for (i = 0; i <= newfdp->fd_lastfile; ++i) { + if (newfdp->fd_ofiles[i].fde_file != NULL) fdused(newfdp, i); + } if (newfdp->fd_freefile == -1) newfdp->fd_freefile = i; FILEDESC_XUNLOCK(newfdp); @@ -1832,12 +1773,12 @@ if (fdtol != NULL) { FILEDESC_XLOCK(fdp); KASSERT(fdtol->fdl_refcount > 0, - ("filedesc_to_refcount botch: fdl_refcount=%d", - fdtol->fdl_refcount)); + ("filedesc_to_refcount botch: fdl_refcount=%d", + fdtol->fdl_refcount)); if (fdtol->fdl_refcount == 1 && (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fp = fdp->fd_ofiles[i].fde_file; if (fp == NULL || fp->f_type != DTYPE_VNODE) continue; fhold(fp); @@ -1899,10 +1840,10 @@ return; for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fp = fdp->fd_ofiles[i].fde_file; if (fp != NULL) { FILEDESC_XLOCK(fdp); - fdp->fd_ofiles[i] = NULL; + bzero(&fdp->fd_ofiles[i], sizeof(fdp->fd_ofiles[i])); FILEDESC_XUNLOCK(fdp); (void) closef(fp, td); } @@ -1929,17 +1870,17 @@ fdp->fd_jdir = NULL; FILEDESC_XUNLOCK(fdp); - if (cdir) { + if (cdir != NULL) { locked = VFS_LOCK_GIANT(cdir->v_mount); vrele(cdir); VFS_UNLOCK_GIANT(locked); } - if (rdir) { + if (rdir != NULL) { locked = VFS_LOCK_GIANT(rdir->v_mount); vrele(rdir); VFS_UNLOCK_GIANT(locked); } - if (jdir) { + if (jdir != NULL) { locked = VFS_LOCK_GIANT(jdir->v_mount); vrele(jdir); VFS_UNLOCK_GIANT(locked); @@ -1976,6 +1917,7 @@ setugidsafety(struct thread *td) { struct filedesc *fdp; + struct file *fp; int i; /* Certain daemons might not have file descriptors. */ @@ -1991,17 +1933,14 @@ for (i = 0; i <= fdp->fd_lastfile; i++) { if (i > 2) break; - if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { - struct file *fp; - + fp = fdp->fd_ofiles[i].fde_file; + if (fp != NULL && is_unsafe(fp)) { knote_fdclose(td, i); /* * NULL-out descriptor prior to close to avoid * a race while close blocks. */ - fp = fdp->fd_ofiles[i]; - fdp->fd_ofiles[i] = NULL; - fdp->fd_ofileflags[i] = 0; + bzero(&fdp->fd_ofiles[i], sizeof(fdp->fd_ofiles[i])); fdunused(fdp, i); FILEDESC_XUNLOCK(fdp); (void) closef(fp, td); @@ -2023,8 +1962,8 @@ { FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[idx] == fp) { - fdp->fd_ofiles[idx] = NULL; + if (fdp->fd_ofiles[idx].fde_file == fp) { + bzero(&fdp->fd_ofiles[idx], sizeof(fdp->fd_ofiles[idx])); fdunused(fdp, idx); FILEDESC_XUNLOCK(fdp); fdrop(fp, td); @@ -2039,6 +1978,7 @@ fdcloseexec(struct thread *td) { struct filedesc *fdp; + struct filedescent *fde; struct file *fp; int i; @@ -2053,11 +1993,11 @@ */ FILEDESC_XLOCK(fdp); for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fde = &fdp->fd_ofiles[i]; + fp = fde->fde_file; if (fp != NULL && (fp->f_type == DTYPE_MQUEUE || - (fdp->fd_ofileflags[i] & UF_EXCLOSE))) { - fdp->fd_ofiles[i] = NULL; - fdp->fd_ofileflags[i] = 0; + (fde->fde_flags & UF_EXCLOSE))) { + bzero(fde, sizeof(*fde)); fdunused(fdp, i); (void) closefp(fdp, i, fp, td, 0); /* closefp() drops the FILEDESC lock. */ @@ -2088,7 +2028,7 @@ devnull = -1; error = 0; for (i = 0; i < 3; i++) { - if (fdp->fd_ofiles[i] != NULL) + if (fdp->fd_ofiles[i].fde_file != NULL) continue; if (devnull < 0) { save = td->td_retval[0]; @@ -2123,7 +2063,6 @@ struct flock lf; struct filedesc_to_leader *fdtol; struct filedesc *fdp; - struct file *fp_object; /* * POSIX record locking dictates that any close releases ALL @@ -2136,15 +2075,11 @@ * NULL thread pointer when there really is no owning * context that might have locks, or the locks will be * leaked. - * - * If this is a capability, we do lock processing under the underlying - * node, not the capability itself. */ - (void)cap_funwrap(fp, 0, &fp_object); - if (fp_object->f_type == DTYPE_VNODE && td != NULL) { + if (fp->f_type == DTYPE_VNODE && td != NULL) { int vfslocked; - vp = fp_object->f_vnode; + vp = fp->f_vnode; vfslocked = VFS_LOCK_GIANT(vp->v_mount); if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { lf.l_whence = SEEK_SET; @@ -2174,7 +2109,7 @@ lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; - vp = fp_object->f_vnode; + vp = fp->f_vnode; (void) VOP_ADVLOCK(vp, (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf, F_POSIX); @@ -2209,14 +2144,19 @@ atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops); } -struct file * -fget_unlocked(struct filedesc *fdp, int fd) +int +fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t needrights, + struct file **fpp) { struct file *fp; u_int count; +#ifdef CAPABILITIES + cap_rights_t haverights; + int error; +#endif if (fd < 0 || fd >= fdp->fd_nfiles) - return (NULL); + return (EBADF); /* * Fetch the descriptor locklessly. We avoid fdrop() races by * never raising a refcount above 0. To accomplish this we have @@ -2226,9 +2166,17 @@ * due to preemption. */ for (;;) { - fp = fdp->fd_ofiles[fd]; + fp = fdp->fd_ofiles[fd].fde_file; if (fp == NULL) - break; + return (EBADF); +#ifdef CAPABILITIES + if (needrights != 0) { + haverights = fdp->fd_ofiles[fd].fde_caprights; + error = cap_check(haverights, needrights); + if (error != 0) + return (error); + } +#endif count = fp->f_count; if (count == 0) continue; @@ -2238,12 +2186,12 @@ */ if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1) continue; - if (fp == fdp->fd_ofiles[fd]) + if (fp == fdp->fd_ofiles[fd].fde_file) break; fdrop(fp, curthread); } - - return (fp); + *fpp = fp; + return (0); } /* @@ -2253,84 +2201,51 @@ * If the descriptor doesn't exist or doesn't match 'flags', EBADF is * returned. * - * If the FGET_GETCAP flag is set, the capability itself will be returned. - * Calling _fget() with FGET_GETCAP on a non-capability will return EINVAL. - * Otherwise, if the file is a capability, its rights will be checked against - * the capability rights mask, and if successful, the object will be unwrapped. + * File's rights will be checked against the capability rights mask. * * If an error occured the non-zero error is returned and *fpp is set to * NULL. Otherwise *fpp is held and set and zero is returned. Caller is * responsible for fdrop(). */ -#define FGET_GETCAP 0x00000001 static __inline int _fget(struct thread *td, int fd, struct file **fpp, int flags, - cap_rights_t needrights, cap_rights_t *haverightsp, u_char *maxprotp, - int fget_flags) + cap_rights_t needrights, cap_rights_t *haverightsp, u_char *maxprotp) { struct filedesc *fdp; struct file *fp; #ifdef CAPABILITIES - struct file *fp_fromcap; + cap_rights_t haverights; int error; #endif *fpp = NULL; if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) return (EBADF); - if ((fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); + if (maxprotp != NULL) + needrights |= CAP_MMAP; + error = fget_unlocked(fdp, fd, needrights, &fp); + if (error != 0) + return (error); if (fp->f_ops == &badfileops) { fdrop(fp, td); return (EBADF); } #ifdef CAPABILITIES + haverights = cap_rights(fdp, fd); + /* - * If this is a capability, what rights does it have? + * What capability rights does it have? */ - if (haverightsp != NULL) { - if (fp->f_type == DTYPE_CAPABILITY) - *haverightsp = cap_rights(fp); - else - *haverightsp = CAP_MASK_VALID; - } + if (haverightsp != NULL) + *haverightsp = haverights; /* - * If a capability has been requested, return the capability directly. - * Otherwise, check capability rights, extract the underlying object, - * and check its access flags. + * If requested, convert capability rights to access flags. */ - if (fget_flags & FGET_GETCAP) { - if (fp->f_type != DTYPE_CAPABILITY) { - fdrop(fp, td); - return (EINVAL); - } - } else { - if (maxprotp == NULL) - error = cap_funwrap(fp, needrights, &fp_fromcap); - else - error = cap_funwrap_mmap(fp, needrights, maxprotp, - &fp_fromcap); - if (error) { - fdrop(fp, td); - return (error); - } - - /* - * If we've unwrapped a file, drop the original capability - * and hold the new descriptor. fp after this point refers to - * the actual (unwrapped) object, not the capability. - */ - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, td); - fp = fp_fromcap; - } - } + if (maxprotp != NULL) + *maxprotp = cap_rights_to_vmprot(haverights); #else /* !CAPABILITIES */ - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("%s: saw capability", __func__)); if (maxprotp != NULL) *maxprotp = VM_PROT_ALL; #endif /* CAPABILITIES */ @@ -2353,7 +2268,7 @@ fget(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return(_fget(td, fd, fpp, 0, rights, NULL, NULL, 0)); + return(_fget(td, fd, fpp, 0, rights, NULL, NULL)); } int @@ -2361,37 +2276,24 @@ struct file **fpp) { - return (_fget(td, fd, fpp, 0, rights, NULL, maxprotp, 0)); + return (_fget(td, fd, fpp, 0, rights, NULL, maxprotp)); } int fget_read(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return(_fget(td, fd, fpp, FREAD, rights, NULL, NULL, 0)); + return(_fget(td, fd, fpp, FREAD, rights, NULL, NULL)); } int fget_write(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return (_fget(td, fd, fpp, FWRITE, rights, NULL, NULL, 0)); + return (_fget(td, fd, fpp, FWRITE, rights, NULL, NULL)); } /* - * Unlike the other fget() calls, which accept and check capability rights - * but never return capabilities, fgetcap() returns the capability but doesn't - * check capability rights. - */ -int -fgetcap(struct thread *td, int fd, struct file **fpp) -{ - - return (_fget(td, fd, fpp, 0, 0, NULL, NULL, FGET_GETCAP)); -} - - -/* * Like fget() but loads the underlying vnode, or returns an error if the * descriptor does not represent a vnode. Note that pipes use vnodes but * never have VM objects. The returned vnode will be vref()'d. @@ -2406,8 +2308,8 @@ int error; *vpp = NULL; - if ((error = _fget(td, fd, &fp, flags, needrights, haverightsp, - NULL, 0)) != 0) + error = _fget(td, fd, &fp, flags, needrights, haverightsp, NULL); + if (error) return (error); if (fp->f_vnode == NULL) { error = EINVAL; @@ -2472,7 +2374,7 @@ *spp = NULL; if (fflagp != NULL) *fflagp = 0; - if ((error = _fget(td, fd, &fp, 0, rights, NULL, NULL, 0)) != 0) + if ((error = _fget(td, fd, &fp, 0, rights, NULL, NULL)) != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { error = ENOTSOCK; @@ -2508,9 +2410,6 @@ /* * Handle the last reference to a file being closed. - * - * No special capability handling here, as the capability's fo_close will run - * instead of the object here, and perform any necessary drop on the object. */ int _fdrop(struct file *fp, struct thread *td) @@ -2590,7 +2489,8 @@ * Duplicate the specified descriptor to a free descriptor. */ int -dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int openerror, int *indxp) +dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, + int openerror, int *indxp) { struct file *fp; int error, indx; @@ -2634,18 +2534,17 @@ FILEDESC_XUNLOCK(fdp); return (EACCES); } - fdp->fd_ofiles[indx] = fp; - fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; fhold(fp); + bcopy(&fdp->fd_ofiles[dfd], &fdp->fd_ofiles[indx], + sizeof(fdp->fd_ofiles[indx])); break; case ENXIO: /* * Steal away the file pointer from dfd and stuff it into indx. */ - fdp->fd_ofiles[indx] = fp; - fdp->fd_ofiles[dfd] = NULL; - fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; - fdp->fd_ofileflags[dfd] = 0; + bcopy(&fdp->fd_ofiles[dfd], &fdp->fd_ofiles[indx], + sizeof(fdp->fd_ofiles[indx])); + bzero(&fdp->fd_ofiles[dfd], sizeof(fdp->fd_ofiles[dfd])); fdunused(fdp, dfd); break; } @@ -2801,7 +2700,7 @@ continue; FILEDESC_SLOCK(fdp); for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { - if ((fp = fdp->fd_ofiles[n]) == NULL) + if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) continue; xf.xf_fd = n; xf.xf_file = fp; @@ -2810,7 +2709,7 @@ xf.xf_type = fp->f_type; xf.xf_count = fp->f_count; xf.xf_msgcount = 0; - xf.xf_offset = fp->f_offset; + xf.xf_offset = fp->f_offset; /* XXXPJD: Access to f_offset is not synchronized in any way. */ xf.xf_flag = fp->f_flag; error = SYSCTL_OUT(req, &xf, sizeof(xf)); if (error) @@ -2917,7 +2816,7 @@ export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif, fdp, req); for (i = 0; i < fdp->fd_nfiles; i++) { - if ((fp = fdp->fd_ofiles[i]) == NULL) + if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) continue; bzero(kif, sizeof(*kif)); kif->kf_structsize = sizeof(*kif); @@ -2927,21 +2826,6 @@ shmfd = NULL; kif->kf_fd = i; -#ifdef CAPABILITIES - /* - * When reporting a capability, most fields will be from the - * underlying object, but do mark as a capability. With - * ofiledesc, we don't have a field to export the cap_rights_t, - * but we do with the new filedesc. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - kif->kf_flags |= KF_FLAG_CAPABILITY; - (void)cap_funwrap(fp, 0, &fp); - } -#else - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("sysctl_kern_proc_ofiledesc: saw capability")); -#endif switch (fp->f_type) { case DTYPE_VNODE: kif->kf_type = KF_TYPE_VNODE; @@ -3015,7 +2899,7 @@ kif->kf_flags |= KF_FLAG_DIRECT; if (fp->f_flag & FHASLOCK) kif->kf_flags |= KF_FLAG_HASLOCK; - kif->kf_offset = fp->f_offset; + kif->kf_offset = fp->f_offset; /* XXXPJD: Access to f_offset is not synchronized in any way. */ if (vp != NULL) { vref(vp); switch (vp->v_type) { @@ -3112,8 +2996,8 @@ static int export_fd_for_sysctl(void *data, int type, int fd, int fflags, int refcnt, - int64_t offset, int fd_is_cap, cap_rights_t fd_cap_rights, - struct kinfo_file *kif, struct sysctl_req *req) + int64_t offset, cap_rights_t fd_cap_rights, struct kinfo_file *kif, + struct sysctl_req *req) { struct { int fflag; @@ -3177,10 +3061,7 @@ for (i = 0; i < NFFLAGS; i++) if (fflags & fflags_table[i].fflag) kif->kf_flags |= fflags_table[i].kf_fflag; - if (fd_is_cap) - kif->kf_flags |= KF_FLAG_CAPABILITY; - if (fd_is_cap) - kif->kf_cap_rights = fd_cap_rights; + kif->kf_cap_rights = fd_cap_rights; kif->kf_fd = fd; kif->kf_type = type; kif->kf_ref_count = refcnt; @@ -3208,7 +3089,7 @@ int64_t offset; void *data; int error, i, *name; - int fd_is_cap, type, refcnt, fflags; + int type, refcnt, fflags; cap_rights_t fd_cap_rights; name = (int *)arg1; @@ -3238,13 +3119,13 @@ kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); if (tracevp != NULL) export_fd_for_sysctl(tracevp, KF_TYPE_VNODE, KF_FD_TYPE_TRACE, - FREAD | FWRITE, -1, -1, 0, 0, kif, req); + FREAD | FWRITE, -1, -1, 0, kif, req); if (textvp != NULL) export_fd_for_sysctl(textvp, KF_TYPE_VNODE, KF_FD_TYPE_TEXT, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); if (cttyvp != NULL) export_fd_for_sysctl(cttyvp, KF_TYPE_VNODE, KF_FD_TYPE_CTTY, - FREAD | FWRITE, -1, -1, 0, 0, kif, req); + FREAD | FWRITE, -1, -1, 0, kif, req); if (fdp == NULL) goto fail; FILEDESC_SLOCK(fdp); @@ -3254,7 +3135,7 @@ data = fdp->fd_cdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_CWD, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } /* root directory */ @@ -3263,7 +3144,7 @@ data = fdp->fd_rdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_ROOT, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } /* jail directory */ @@ -3272,30 +3153,17 @@ data = fdp->fd_jdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_JAIL, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } for (i = 0; i < fdp->fd_nfiles; i++) { - if ((fp = fdp->fd_ofiles[i]) == NULL) + if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) continue; data = NULL; - fd_is_cap = 0; - fd_cap_rights = 0; - #ifdef CAPABILITIES - /* - * When reporting a capability, most fields will be from the - * underlying object, but do mark as a capability and export - * the capability rights mask. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - fd_is_cap = 1; - fd_cap_rights = cap_rights(fp); - (void)cap_funwrap(fp, 0, &fp); - } + fd_cap_rights = cap_rights(fdp, i); #else /* !CAPABILITIES */ - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("sysctl_kern_proc_filedesc: saw capability")); + fd_cap_rights = 0; #endif switch (fp->f_type) { case DTYPE_VNODE: @@ -3359,7 +3227,7 @@ } refcnt = fp->f_count; fflags = fp->f_flag; - offset = fp->f_offset; + offset = fp->f_offset; /* XXXPJD: Access to f_offset is not synchronized in any way. */ /* * Create sysctl entry. @@ -3371,7 +3239,7 @@ if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO) FILEDESC_SUNLOCK(fdp); error = export_fd_for_sysctl(data, type, i, fflags, refcnt, - offset, fd_is_cap, fd_cap_rights, kif, req); + offset, fd_cap_rights, kif, req); if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO) FILEDESC_SLOCK(fdp); if (error) { @@ -3632,7 +3500,7 @@ if (fdp == NULL) continue; for (n = 0; n < fdp->fd_nfiles; n++) { - if (fp == fdp->fd_ofiles[n]) + if (fp == fdp->fd_ofiles[n].fde_file) return (p); } } @@ -3682,7 +3550,7 @@ if ((fdp = p->p_fd) == NULL) continue; for (n = 0; n < fdp->fd_nfiles; ++n) { - if ((fp = fdp->fd_ofiles[n]) == NULL) + if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) continue; db_print_file(fp, header); header = 0; --- sys/kern/sys_capability.c.orig +++ sys/kern/sys_capability.c @@ -113,7 +113,7 @@ { u_int i; - i = (IN_CAPABILITY_MODE(td)) ? 1 : 0; + i = IN_CAPABILITY_MODE(td) ? 1 : 0; return (copyout(&i, uap->modep, sizeof(i))); } @@ -139,90 +139,47 @@ FEATURE(security_capabilities, "Capsicum Capabilities"); -/* - * struct capability describes a capability, and is hung off of its struct - * file f_data field. cap_file and cap_rightss are static once hooked up, as - * neither the object it references nor the rights it encapsulates are - * permitted to change. - */ -struct capability { - struct file *cap_object; /* Underlying object's file. */ - struct file *cap_file; /* Back-pointer to cap's file. */ - cap_rights_t cap_rights; /* Mask of rights on object. */ -}; +static __inline int +_cap_check(cap_rights_t have, cap_rights_t need, enum ktr_cap_fail_type type) +{ + + if ((have | need) != have) { +#ifdef KTRACE + if (KTRPOINT(curthread, KTR_CAPFAIL)) + ktrcapfail(type, need, have); +#endif + return (ENOTCAPABLE); + } + return (0); +} /* - * Capabilities have a fileops vector, but in practice none should ever be - * called except for fo_close, as the capability will normally not be - * returned during a file descriptor lookup in the system call code. + * Test whether a capability grants the requested rights. */ -static fo_rdwr_t capability_read; -static fo_rdwr_t capability_write; -static fo_truncate_t capability_truncate; -static fo_ioctl_t capability_ioctl; -static fo_poll_t capability_poll; -static fo_kqfilter_t capability_kqfilter; -static fo_stat_t capability_stat; -static fo_close_t capability_close; -static fo_chmod_t capability_chmod; -static fo_chown_t capability_chown; - -static struct fileops capability_ops = { - .fo_read = capability_read, - .fo_write = capability_write, - .fo_truncate = capability_truncate, - .fo_ioctl = capability_ioctl, - .fo_poll = capability_poll, - .fo_kqfilter = capability_kqfilter, - .fo_stat = capability_stat, - .fo_close = capability_close, - .fo_chmod = capability_chmod, - .fo_chown = capability_chown, - .fo_flags = DFLAG_PASSABLE, -}; - -static struct fileops capability_ops_unpassable = { - .fo_read = capability_read, - .fo_write = capability_write, - .fo_truncate = capability_truncate, - .fo_ioctl = capability_ioctl, - .fo_poll = capability_poll, - .fo_kqfilter = capability_kqfilter, - .fo_stat = capability_stat, - .fo_close = capability_close, - .fo_chmod = capability_chmod, - .fo_chown = capability_chown, - .fo_flags = 0, -}; - -static uma_zone_t capability_zone; - -static void -capability_init(void *dummy __unused) +int +cap_check(cap_rights_t have, cap_rights_t need) { - capability_zone = uma_zcreate("capability", sizeof(struct capability), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - if (capability_zone == NULL) - panic("capability_init: capability_zone not initialized"); + return (_cap_check(have, need, CAPFAIL_NOTCAPABLE)); } -SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, capability_init, NULL); /* - * Test whether a capability grants the requested rights. + * Convert capability rights into VM access flags. */ -static int -cap_check(struct capability *c, cap_rights_t rights) +u_char +cap_rights_to_vmprot(cap_rights_t have) { + u_char maxprot; + + maxprot = 0; + if (have & CAP_READ) + maxprot |= VM_PROT_READ; + if (have & CAP_WRITE) + maxprot |= VM_PROT_WRITE; + if (have & CAP_MAPEXEC) + maxprot |= VM_PROT_EXECUTE; - if ((c->cap_rights | rights) != c->cap_rights) { -#ifdef KTRACE - if (KTRPOINT(curthread, KTR_CAPFAIL)) - ktrcapfail(CAPFAIL_NOTCAPABLE, rights, c->cap_rights); -#endif - return (ENOTCAPABLE); - } - return (0); + return (maxprot); } /* @@ -231,15 +188,10 @@ * this one file. */ cap_rights_t -cap_rights(struct file *fp_cap) +cap_rights(struct filedesc *fdp, int fd) { - struct capability *c; - KASSERT(fp_cap->f_type == DTYPE_CAPABILITY, - ("cap_rights: !capability")); - - c = fp_cap->f_data; - return (c->cap_rights); + return (fdp->fd_ofiles[fd].fde_caprights); } /* @@ -247,27 +199,28 @@ * file object or an an existing capability. */ int -sys_cap_new(struct thread *td, struct cap_new_args *uap) +sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap) { - int error, capfd; - int fd = uap->fd; - struct file *fp; - cap_rights_t rights = uap->rights; + struct filedesc *fdp; + cap_rights_t rights; + int error, fd; + + fd = uap->fd; + rights = uap->rights; AUDIT_ARG_FD(fd); AUDIT_ARG_RIGHTS(rights); - error = fget(td, fd, rights, &fp); - if (error) - return (error); - AUDIT_ARG_FILE(td->td_proc, fp); - error = kern_capwrap(td, fp, rights, &capfd); - /* - * Release our reference to the file (kern_capwrap has held a reference - * for the filedesc array). - */ - fdrop(fp, td); + + fdp = td->td_proc->p_fd; + FILEDESC_XLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_XUNLOCK(fdp); + return (EBADF); + } + error = _cap_check(cap_rights(fdp, fd), rights, CAPFAIL_INCREASE); if (error == 0) - td->td_retval[0] = capfd; + fdp->fd_ofiles[fd].fde_caprights = rights; + FILEDESC_XUNLOCK(fdp); return (error); } @@ -275,291 +228,101 @@ * System call to query the rights mask associated with a capability. */ int -sys_cap_getrights(struct thread *td, struct cap_getrights_args *uap) +sys_cap_rights_get(struct thread *td, struct cap_rights_get_args *uap) { - struct capability *cp; - struct file *fp; - int error; + struct filedesc *fdp; + cap_rights_t rights; + int fd; - AUDIT_ARG_FD(uap->fd); - error = fgetcap(td, uap->fd, &fp); - if (error) - return (error); - cp = fp->f_data; - error = copyout(&cp->cap_rights, uap->rightsp, sizeof(*uap->rightsp)); - fdrop(fp, td); - return (error); -} + fd = uap->fd; -/* - * Create a capability to wrap around an existing file. - */ -int -kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights, - int *capfdp) -{ - struct capability *cp, *cp_old; - struct file *fp_object, *fcapp; - int error; + AUDIT_ARG_FD(fd); - if ((rights | CAP_MASK_VALID) != CAP_MASK_VALID) - return (EINVAL); - - /* - * If a new capability is being derived from an existing capability, - * then the new capability rights must be a subset of the existing - * rights. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - cp_old = fp->f_data; - if ((cp_old->cap_rights | rights) != cp_old->cap_rights) { -#ifdef KTRACE - if (KTRPOINT(curthread, KTR_CAPFAIL)) - ktrcapfail(CAPFAIL_INCREASE, - rights, cp_old->cap_rights); -#endif - return (ENOTCAPABLE); - } + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_SUNLOCK(fdp); + return (EBADF); } - - /* - * Allocate a new file descriptor to hang the capability off of. - */ - error = falloc(td, &fcapp, capfdp, fp->f_flag); - if (error) - return (error); - - /* - * Rather than nesting capabilities, directly reference the object an - * existing capability references. There's nothing else interesting - * to preserve for future use, as we've incorporated the previous - * rights mask into the new one. This prevents us from having to - * deal with capability chains. - */ - if (fp->f_type == DTYPE_CAPABILITY) - fp_object = ((struct capability *)fp->f_data)->cap_object; - else - fp_object = fp; - fhold(fp_object); - cp = uma_zalloc(capability_zone, M_WAITOK | M_ZERO); - cp->cap_rights = rights; - cp->cap_object = fp_object; - cp->cap_file = fcapp; - if (fp->f_flag & DFLAG_PASSABLE) - finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp, - &capability_ops); - else - finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp, - &capability_ops_unpassable); - - /* - * Release our private reference (the proc filedesc still has one). - */ - fdrop(fcapp, td); - return (0); + rights = cap_rights(fdp, fd); + FILEDESC_SUNLOCK(fdp); + return (copyout(&rights, uap->rightsp, sizeof(*uap->rightsp))); } -/* - * Given a file descriptor, test it against a capability rights mask and then - * return the file descriptor on which to actually perform the requested - * operation. As long as the reference to fp_cap remains valid, the returned - * pointer in *fp will remain valid, so no extra reference management is - * required, and the caller should fdrop() fp_cap as normal when done with - * both. - */ int -cap_funwrap(struct file *fp_cap, cap_rights_t rights, struct file **fpp) +sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap) { - struct capability *c; - int error; - if (fp_cap->f_type != DTYPE_CAPABILITY) { - *fpp = fp_cap; - return (0); - } - c = fp_cap->f_data; - error = cap_check(c, rights); - if (error) - return (error); - *fpp = c->cap_object; - return (0); + return (ENOSYS); } -/* - * Slightly different routine for memory mapping file descriptors: unwrap the - * capability and check CAP_MMAP, but also return a bitmask representing the - * maximum mapping rights the capability allows on the object. - */ int -cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, u_char *maxprotp, - struct file **fpp) +sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap) { - struct capability *c; - u_char maxprot; - int error; - if (fp_cap->f_type != DTYPE_CAPABILITY) { - *fpp = fp_cap; - *maxprotp = VM_PROT_ALL; - return (0); - } - c = fp_cap->f_data; - error = cap_check(c, rights | CAP_MMAP); - if (error) - return (error); - *fpp = c->cap_object; - maxprot = 0; - if (c->cap_rights & CAP_READ) - maxprot |= VM_PROT_READ; - if (c->cap_rights & CAP_WRITE) - maxprot |= VM_PROT_WRITE; - if (c->cap_rights & CAP_MAPEXEC) - maxprot |= VM_PROT_EXECUTE; - *maxprotp = maxprot; - return (0); + return (ENOSYS); } -/* - * When a capability is closed, simply drop the reference on the underlying - * object and free the capability. fdrop() will handle the case where the - * underlying object also needs to close, and the caller will have already - * performed any object-specific lock or mqueue handling. - */ -static int -capability_close(struct file *fp, struct thread *td) +int +sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap) { - struct capability *c; - struct file *fp_object; - KASSERT(fp->f_type == DTYPE_CAPABILITY, - ("capability_close: !capability")); - - c = fp->f_data; - fp->f_ops = &badfileops; - fp->f_data = NULL; - fp_object = c->cap_object; - uma_zfree(capability_zone, c); - return (fdrop(fp_object, td)); + return (ENOSYS); } -/* - * In general, file descriptor operations should never make it to the - * capability, only the underlying file descriptor operation vector, so panic - * if we do turn up here. - */ -static int -capability_read(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) +int +sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap) { - panic("capability_read"); + return (ENOSYS); } -static int -capability_write(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) -{ +#else /* !CAPABILITIES */ - panic("capability_write"); -} - -static int -capability_truncate(struct file *fp, off_t length, struct ucred *active_cred, - struct thread *td) -{ - - panic("capability_truncate"); -} - -static int -capability_ioctl(struct file *fp, u_long com, void *data, - struct ucred *active_cred, struct thread *td) -{ - - panic("capability_ioctl"); -} - -static int -capability_poll(struct file *fp, int events, struct ucred *active_cred, - struct thread *td) -{ - - panic("capability_poll"); -} - -static int -capability_kqfilter(struct file *fp, struct knote *kn) -{ - - panic("capability_kqfilter"); -} - -static int -capability_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, - struct thread *td) -{ - - panic("capability_stat"); -} - +/* + * Stub Capability functions for when options CAPABILITIES isn't compiled + * into the kernel. + */ int -capability_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, - struct thread *td) +sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap) { - panic("capability_chmod"); + return (ENOSYS); } int -capability_chown(struct file *fp, uid_t uid, gid_t gid, - struct ucred *active_cred, struct thread *td) +sys_cap_rights_get(struct thread *td, struct cap_rights_get_args *uap) { - panic("capability_chown"); + return (ENOSYS); } -#else /* !CAPABILITIES */ - -/* - * Stub Capability functions for when options CAPABILITIES isn't compiled - * into the kernel. - */ int -sys_cap_new(struct thread *td, struct cap_new_args *uap) +sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap) { return (ENOSYS); } int -sys_cap_getrights(struct thread *td, struct cap_getrights_args *uap) +sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap) { return (ENOSYS); } int -cap_funwrap(struct file *fp_cap, cap_rights_t rights, struct file **fpp) +sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap) { - KASSERT(fp_cap->f_type != DTYPE_CAPABILITY, - ("cap_funwrap: saw capability")); - - *fpp = fp_cap; - return (0); + return (ENOSYS); } int -cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, u_char *maxprotp, - struct file **fpp) +sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap) { - KASSERT(fp_cap->f_type != DTYPE_CAPABILITY, - ("cap_funwrap_mmap: saw capability")); - - *fpp = fp_cap; - *maxprotp = VM_PROT_ALL; - return (0); + return (ENOSYS); } #endif /* CAPABILITIES */ --- sys/kern/sys_generic.c.orig +++ sys/kern/sys_generic.c @@ -718,12 +718,12 @@ switch (com) { case FIONCLEX: FILEDESC_XLOCK(fdp); - fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; + fdp->fd_ofiles[fd].fde_flags &= ~UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); goto out; case FIOCLEX: FILEDESC_XLOCK(fdp); - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); goto out; case FIONBIO: @@ -1129,32 +1129,8 @@ static __inline int getselfd_cap(struct filedesc *fdp, int fd, struct file **fpp) { - struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; - int error; -#endif - if ((fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use - * the file descriptor references by the capability. - */ - error = cap_funwrap(fp, CAP_POLL_EVENT, &fp_fromcap); - if (error) { - fdrop(fp, curthread); - return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ - *fpp = fp; - return (0); + return (fget_unlocked(fdp, fd, CAP_POLL_EVENT, fpp)); } /* @@ -1335,6 +1311,9 @@ struct filedesc *fdp; struct file *fp; struct pollfd *fd; +#ifdef CAPABILITIES + cap_rights_t haverights; +#endif int n; n = 0; @@ -1348,13 +1327,15 @@ /* If the selinfo wasn't cleared the event didn't fire. */ if (si != NULL) continue; - fp = fdp->fd_ofiles[fd->fd]; + fp = fdp->fd_ofiles[fd->fd].fde_file; #ifdef CAPABILITIES - if ((fp == NULL) - || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) { + haverights = fdp->fd_ofiles[fd->fd].fde_caprights; + if (fp == NULL || + cap_check(haverights, CAP_POLL_EVENT) != 0) #else - if (fp == NULL) { + if (fp == NULL) #endif + { fd->revents = POLLNVAL; n++; continue; @@ -1407,9 +1388,11 @@ u_int nfd; { struct filedesc *fdp = td->td_proc->p_fd; - int i; struct file *fp; - int n = 0; +#ifdef CAPABILITIES + cap_rights_t haverights; +#endif + int i, n = 0; FILEDESC_SLOCK(fdp); for (i = 0; i < nfd; i++, fds++) { @@ -1419,13 +1402,15 @@ } else if (fds->fd < 0) { fds->revents = 0; } else { - fp = fdp->fd_ofiles[fds->fd]; + fp = fdp->fd_ofiles[fds->fd].fde_file; #ifdef CAPABILITIES - if ((fp == NULL) - || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) { + haverights = fdp->fd_ofiles[fds->fd].fde_caprights; + if (fp == NULL || + cap_check(haverights, CAP_POLL_EVENT) != 0) #else - if (fp == NULL) { + if (fp == NULL) #endif + { fds->revents = POLLNVAL; n++; } else { --- sys/kern/tty.c.orig +++ sys/kern/tty.c @@ -1840,23 +1840,15 @@ int error, ref; /* Validate the file descriptor. */ - if ((fdp = p->p_fd) == NULL) - return (EBADF); - - fp = fget_unlocked(fdp, fd); - if (fp == NULL) - return (EBADF); + fdp = p->p_fd; + error = fget_unlocked(fdp, fd, CAP_TTYHOOK, &fp); + if (error != 0) + return (error); if (fp->f_ops == &badfileops) { error = EBADF; goto done1; } -#ifdef CAPABILITIES - error = cap_funwrap(fp, CAP_TTYHOOK, &fp); - if (error) - goto done1; -#endif - /* * Make sure the vnode is bound to a character device. * Unlocked check for the vnode type is ok there, because we --- sys/kern/uipc_mqueue.c.orig +++ sys/kern/uipc_mqueue.c @@ -45,6 +45,7 @@ #include __FBSDID("$FreeBSD: src/sys/kern/uipc_mqueue.c,v 1.58 2012/04/23 14:10:34 trasz Exp $"); +#include "opt_capsicum.h" #include "opt_compat.h" #include @@ -2033,8 +2034,8 @@ &mqueueops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); td->td_retval[0] = fd; fdrop(fp, td); @@ -2276,11 +2277,13 @@ error = EBADF; goto out; } - error = cap_funwrap(fp2, CAP_POLL_EVENT, &fp2); +#ifdef CAPABILITIES + error = cap_check(fdp, uap->mqd, CAP_POLL_EVENT); if (error) { FILEDESC_SUNLOCK(fdp); goto out; } +#endif if (fp2 != fp) { FILEDESC_SUNLOCK(fdp); error = EBADF; --- sys/kern/uipc_sem.c.orig +++ sys/kern/uipc_sem.c @@ -579,8 +579,8 @@ finit(fp, FREAD | FWRITE, DTYPE_SEM, ks, &ksem_ops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); fdrop(fp, td); --- sys/kern/uipc_shm.c.orig +++ sys/kern/uipc_shm.c @@ -629,8 +629,8 @@ finit(fp, FFLAGS(uap->flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); td->td_retval[0] = fd; fdrop(fp, td); --- sys/kern/uipc_syscalls.c.orig +++ sys/kern/uipc_syscalls.c @@ -129,29 +129,11 @@ struct file **fpp, u_int *fflagp) { struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; int error; -#endif - if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use - * the file descriptor referenced by the capability. - */ - error = cap_funwrap(fp, rights, &fp_fromcap); - if (error) { - fdrop(fp, curthread); + error = fget_unlocked(fdp, fd, rights, &fp); + if (error != 0) return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (ENOTSOCK); --- sys/kern/uipc_usrreq.c.orig +++ sys/kern/uipc_usrreq.c @@ -279,7 +279,7 @@ static void unp_gc(__unused void *, int); static void unp_scan(struct mbuf *, void (*)(struct file *)); static void unp_discard(struct file *); -static void unp_freerights(struct file **, int); +static void unp_freerights(struct filedescent *, int); static void unp_init(void); static int unp_internalize(struct mbuf **, struct thread *); static void unp_internalize_fp(struct file *); @@ -1659,14 +1659,14 @@ } static void -unp_freerights(struct file **rp, int fdcount) +unp_freerights(struct filedescent *fde, int fdcount) { + struct file *fp; int i; - struct file *fp; - for (i = 0; i < fdcount; i++) { - fp = *rp; - *rp++ = NULL; + for (i = 0; i < fdcount; i++, fde++) { + fp = fde->fde_file; + bzero(fde, sizeof(*fde)); unp_discard(fp); } } @@ -1678,8 +1678,8 @@ struct cmsghdr *cm = mtod(control, struct cmsghdr *); int i; int *fdp; - struct file **rp; - struct file *fp; + struct filedesc *fdesc = td->td_proc->p_fd; + struct filedescent *fde, *fdep; void *data; socklen_t clen = control->m_len, datalen; int error, newfds; @@ -1700,20 +1700,20 @@ datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { - newfds = datalen / sizeof(struct file *); - rp = data; + newfds = datalen / sizeof(*fdep); + fdep = data; /* If we're not outputting the descriptors free them. */ if (error || controlp == NULL) { - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } - FILEDESC_XLOCK(td->td_proc->p_fd); + FILEDESC_XLOCK(fdesc); /* if the new FD's will not fit free them. */ if (!fdavail(td, newfds)) { - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); error = EMSGSIZE; - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } @@ -1727,23 +1727,24 @@ *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); error = E2BIG; - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } fdp = (int *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); - for (i = 0; i < newfds; i++) { + for (i = 0; i < newfds; i++, fdep++, fdp++) { if (fdalloc(td, 0, &f)) panic("unp_externalize fdalloc failed"); - fp = *rp++; - td->td_proc->p_fd->fd_ofiles[f] = fp; - unp_externalize_fp(fp); - *fdp++ = f; + fde = &fdesc->fd_ofiles[f]; + fde->fde_file = fdep->fde_file; + fde->fde_caprights = fdep->fde_caprights; + unp_externalize_fp(fde->fde_file); + *fdp = f; } - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); } else { /* We can just copy anything else across. */ if (error || controlp == NULL) @@ -1813,10 +1814,10 @@ { struct mbuf *control = *controlp; struct proc *p = td->td_proc; - struct filedesc *fdescp = p->p_fd; + struct filedesc *fdesc = p->p_fd; struct cmsghdr *cm = mtod(control, struct cmsghdr *); struct cmsgcred *cmcred; - struct file **rp; + struct filedescent *fde, *fdep; struct file *fp; struct timeval *tv; int i, fd, *fdp; @@ -1869,18 +1870,18 @@ * files. If not, reject the entire operation. */ fdp = data; - FILEDESC_SLOCK(fdescp); + FILEDESC_SLOCK(fdesc); for (i = 0; i < oldfds; i++) { fd = *fdp++; - if (fd < 0 || fd >= fdescp->fd_nfiles || - fdescp->fd_ofiles[fd] == NULL) { - FILEDESC_SUNLOCK(fdescp); + if (fd < 0 || fd >= fdesc->fd_nfiles || + fdesc->fd_ofiles[fd].fde_file == NULL) { + FILEDESC_SUNLOCK(fdesc); error = EBADF; goto out; } - fp = fdescp->fd_ofiles[fd]; + fp = fdesc->fd_ofiles[fd].fde_file; if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); error = EOPNOTSUPP; goto out; } @@ -1889,25 +1890,26 @@ /* * Now replace the integer FDs with pointers to the - * associated global file table entry.. + * file structure and capability rights. */ - newlen = oldfds * sizeof(struct file *); + newlen = oldfds * sizeof(*fdep); *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); error = E2BIG; goto out; } fdp = data; - rp = (struct file **) + fdep = (struct filedescent *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); - for (i = 0; i < oldfds; i++) { - fp = fdescp->fd_ofiles[*fdp++]; - *rp++ = fp; - unp_internalize_fp(fp); + for (i = 0; i < oldfds; i++, fdep++, fdp++) { + fde = &fdesc->fd_ofiles[*fdp]; + fdep->fde_file = fde->fde_file; + fdep->fde_caprights = fde->fde_caprights; + unp_internalize_fp(fdep->fde_file); } - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); break; case SCM_TIMESTAMP: @@ -2255,7 +2257,7 @@ unp_scan(struct mbuf *m0, void (*op)(struct file *)) { struct mbuf *m; - struct file **rp; + struct filedescent *fdep; struct cmsghdr *cm; void *data; int i; @@ -2280,10 +2282,10 @@ if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { - qfds = datalen / sizeof (struct file *); - rp = data; - for (i = 0; i < qfds; i++) - (*op)(*rp++); + qfds = datalen / sizeof(*fdep); + fdep = data; + for (i = 0; i < qfds; i++, fdep++) + (*op)(fdep->fde_file); } if (CMSG_SPACE(datalen) < clen) { --- sys/kern/vfs_lookup.c.orig +++ sys/kern/vfs_lookup.c @@ -233,7 +233,7 @@ AUDIT_ARG_ATFD2(ndp->ni_dirfd); error = fgetvp_rights(td, ndp->ni_dirfd, ndp->ni_rightsneeded | CAP_LOOKUP, - &(ndp->ni_baserights), &dp); + &ndp->ni_baserights, &dp); #ifdef CAPABILITIES /* * Lookups relative to a capability must also be --- sys/kern/vfs_syscalls.c.orig +++ sys/kern/vfs_syscalls.c @@ -1195,8 +1195,8 @@ type = F_FLOCK; if ((flags & FNONBLOCK) == 0) type |= F_WAIT; - if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, - type)) != 0) + error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type); + if (error) goto bad; atomic_set_int(&fp->f_flag, FHASLOCK); } @@ -1211,19 +1211,15 @@ * If we haven't already installed the FD (for dupfdopen), do so now. */ if (indx == -1) { + cap_rights_t rights = CAP_ALL; + #ifdef CAPABILITIES - if (nd.ni_strictrelative == 1) { - /* - * We are doing a strict relative lookup; wrap the - * result in a capability. - */ - if ((error = kern_capwrap(td, fp, nd.ni_baserights, - &indx)) != 0) - goto bad_unlocked; - } else + if (nd.ni_strictrelative == 1) + rights = nd.ni_baserights; #endif - if ((error = finstall(td, fp, &indx, flags)) != 0) - goto bad_unlocked; + error = finstall(td, fp, &indx, flags, rights); + if (error != 0) + goto bad_unlocked; } @@ -4319,33 +4315,14 @@ * entry is held upon returning. */ int -getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, - struct file **fpp) +getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, struct file **fpp) { struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; int error; -#endif - if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use the - * file descriptor referenced by the capability. - */ - error = cap_funwrap(fp, rights, &fp_fromcap); - if (error) { - fdrop(fp, curthread); + error = fget_unlocked(fdp, fd, rights, &fp); + if (error != 0) return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ /* * The file could be not of the vnode type, or it may be not @@ -4545,8 +4522,8 @@ type = F_FLOCK; if ((fmode & FNONBLOCK) == 0) type |= F_WAIT; - if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, - type)) != 0) + error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type); + if (error != 0) goto bad; atomic_set_int(&fp->f_flag, FHASLOCK); } @@ -4556,7 +4533,7 @@ goto bad; } - error = finstall(td, fp, &indx, fmode); + error = finstall(td, fp, &indx, fmode, CAP_ALL); bad: VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); --- sys/netsmb/smb_dev.c.orig +++ sys/netsmb/smb_dev.c @@ -376,7 +376,7 @@ FILEDESC_SLOCK(fdp); if (fd < 0 || fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || + (fp = fdp->fd_ofiles[fd].fde_file) == NULL || (fp->f_flag & flag) == 0) { FILEDESC_SUNLOCK(fdp); return (NULL); --- sys/ofed/include/linux/file.h.orig +++ sys/ofed/include/linux/file.h @@ -47,7 +47,8 @@ { struct file *file; - file = fget_unlocked(curthread->td_proc->p_fd, fd); + if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, &file) != 0) + return (NULL); return (struct linux_file *)file->f_data; } @@ -69,8 +70,7 @@ { struct file *file; - file = fget_unlocked(curthread->td_proc->p_fd, fd); - if (file == NULL) + if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, &file) != 0) return; fdclose(curthread->td_proc->p_fd, file, fd, curthread); } @@ -80,7 +80,8 @@ { struct file *file; - file = fget_unlocked(curthread->td_proc->p_fd, fd); + if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, &file) != 0) + file = NULL; filp->_file = file; finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops); } --- sys/security/audit/audit_bsm.c.orig +++ sys/security/audit/audit_bsm.c @@ -1589,6 +1589,7 @@ } break; +#if 0 /* XXXPJD */ case AUE_CAP_NEW: /* * XXXRW/XXXJA: Would be nice to audit socket/etc information. @@ -1606,6 +1607,7 @@ kau_write(rec, tok); } break; +#endif case AUE_CAP_ENTER: case AUE_CAP_GETMODE: --- sys/sys/capability.h.orig +++ sys/sys/capability.h @@ -62,7 +62,7 @@ #define CAP_FEXECVE 0x0000000000000010ULL #define CAP_FSYNC 0x0000000000000020ULL #define CAP_FTRUNCATE 0x0000000000000040ULL -#define CAP_SEEK 0x0000000000000080ULL +#define CAP_SEEK 0x0000000000000080ULL /* lseek/pread/pwrite */ /* VFS methods. */ #define CAP_FCHFLAGS 0x0000000000000100ULL @@ -136,37 +136,39 @@ #define CAP_PDWAIT 0x0020000000000000ULL #define CAP_PDKILL 0x0040000000000000ULL -/* The mask of all valid method rights. */ +/* + * The mask of all valid method rights. + * Note that only 63 bits can be used, not 64, because one bit + * is reserved for the UF_EXCLOSE flag. Check the filedescent + * structure defined in sys/sys/filedesc.h for more details. + */ #define CAP_MASK_VALID 0x007fffffffffffffULL +#define CAP_ALL CAP_MASK_VALID #ifdef _KERNEL -#define IN_CAPABILITY_MODE(td) (td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) +#include + +CTASSERT((CAP_MASK_VALID & 0x8000000000000000ULL) == 0); + +#define IN_CAPABILITY_MODE(td) ((td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) != 0) + +struct filedesc; /* - * Create a capability to wrap a file object. + * Test whether a capability grants the requested rights. */ -int kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights, - int *capfd); - +int cap_check(cap_rights_t have, cap_rights_t need); /* - * Unwrap a capability if its rights mask is a superset of 'rights'. - * - * Unwrapping a non-capability is effectively a no-op; the value of fp_cap - * is simply copied into fpp. + * Convert capability rights into VM access flags. */ -int cap_funwrap(struct file *fp_cap, cap_rights_t rights, - struct file **fpp); -int cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, - u_char *maxprotp, struct file **fpp); +u_char cap_rights_to_vmprot(cap_rights_t have); /* * For the purposes of procstat(1) and similar tools, allow kern_descrip.c to - * extract the rights from a capability. However, this should not be used by - * kernel code generally, instead cap_funwrap() should be used in order to - * keep all access control in one place. + * extract the rights from a capability. */ -cap_rights_t cap_rights(struct file *fp_cap); +cap_rights_t cap_rights(struct filedesc *fdp, int fd); #else /* !_KERNEL */ @@ -188,19 +190,20 @@ /* * cap_getmode(): Are we in capability mode? */ -int cap_getmode(u_int* modep); +int cap_getmode(u_int *modep); -/* - * cap_new(): Create a new capability derived from an existing file - * descriptor with the specified rights. If the existing file descriptor is - * a capability, then the new rights must be a subset of the existing rights. - */ -int cap_new(int fd, cap_rights_t rights); - -/* - * cap_getrights(): Query the rights on a capability. - */ -int cap_getrights(int fd, cap_rights_t *rightsp); +/* XXXPJD: Description. */ +int cap_rights_limit(int fd, cap_rights_t rights); +/* XXXPJD: Description. */ +int cap_rights_get(int fd, cap_rights_t *rightsp); +/* XXXPJD: Description. */ +int cap_ioctls_limit(int fd, const unsigned long *cmds, size_t ncmds); +/* XXXPJD: Description. */ +ssize_t cap_ioctls_get(int fd, unsigned long *cmds, size_t maxcmds); +/* XXXPJD: Description. */ +int cap_fcntls_limit(int fd, const int *cmds, size_t ncmds); +/* XXXPJD: Description. */ +ssize_t cap_fcntls_get(int fd, int *cmds, size_t maxcmds); __END_DECLS --- sys/sys/file.h.orig +++ sys/sys/file.h @@ -64,8 +64,7 @@ #define DTYPE_SEM 9 /* posix semaphore */ #define DTYPE_PTS 10 /* pseudo teletype master device */ #define DTYPE_DEV 11 /* Device specific fd type */ -#define DTYPE_CAPABILITY 12 /* capability */ -#define DTYPE_PROCDESC 13 /* process descriptor */ +#define DTYPE_PROCDESC 12 /* process descriptor */ #ifdef _KERNEL @@ -199,7 +198,6 @@ struct file **fpp); int fget_write(struct thread *td, int fd, cap_rights_t rights, struct file **fpp); -int fgetcap(struct thread *td, int fd, struct file **fpp); int _fdrop(struct file *fp, struct thread *td); /* --- sys/sys/filedesc.h.orig +++ sys/sys/filedesc.h @@ -47,9 +47,14 @@ */ #define NDSLOTTYPE u_long +struct filedescent { + struct file *fde_file; /* file structure for open file */ + cap_rights_t fde_caprights:63; /* per-descriptor capability rights */ + uint64_t fde_flags:1; /* per-process open file flags */ +}; + struct filedesc { - struct file **fd_ofiles; /* file structures for open files */ - char *fd_ofileflags; /* per-process open file flags */ + struct filedescent *fd_ofiles; /* open files */ struct vnode *fd_cdir; /* current directory */ struct vnode *fd_rdir; /* root directory */ struct vnode *fd_jdir; /* jail root directory */ @@ -88,10 +93,14 @@ /* * Per-process open flags. */ -#define UF_EXCLOSE 0x01 /* auto-close on exec */ +#define UF_EXCLOSE 0x01 /* auto-close on exec */ #ifdef _KERNEL +#include /* CTASSERT() */ + +CTASSERT(sizeof(cap_rights_t) == sizeof(uint64_t)); + /* Lock a file descriptor table. */ #define FILEDESC_LOCK_INIT(fdp) sx_init(&(fdp)->fd_sx, "filedesc structure") #define FILEDESC_LOCK_DESTROY(fdp) sx_destroy(&(fdp)->fd_sx) @@ -115,7 +124,8 @@ int falloc(struct thread *td, struct file **resultfp, int *resultfd, int flags); int falloc_noinstall(struct thread *td, struct file **resultfp); -int finstall(struct thread *td, struct file *fp, int *resultfp, int flags); +int finstall(struct thread *td, struct file *fp, int *resultfp, int flags, + cap_rights_t rights); int fdalloc(struct thread *td, int minfd, int *result); int fdavail(struct thread *td, int n); int fdcheckstd(struct thread *td); @@ -135,7 +145,8 @@ void setugidsafety(struct thread *td); /* Return a referenced file from an unlocked descriptor. */ -struct file *fget_unlocked(struct filedesc *fdp, int fd); +int fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t needrights, + struct file **fpp); /* Requires a FILEDESC_{S,X}LOCK held and returns without a ref. */ static __inline struct file * @@ -147,7 +158,7 @@ if (fd < 0 || fd >= fdp->fd_nfiles) return (NULL); - return (fdp->fd_ofiles[fd]); + return (fdp->fd_ofiles[fd].fde_file); } #endif /* _KERNEL */