--- contrib/openbsm/etc/audit_event.orig +++ contrib/openbsm/etc/audit_event @@ -548,7 +548,7 @@ 43184:AUE_OPENAT:openat(2) - attr only:fa 43185:AUE_POSIX_OPENPT:posix_openpt(2):ip 43186:AUE_CAP_NEW:cap_new(2):fm -43187:AUE_CAP_GETRIGHTS:cap_getrights(2):fm +43187:AUE_CAP_RIGHTS_GET:cap_rights_get(2):fm 43188:AUE_CAP_ENTER:cap_enter(2):pc 43189:AUE_CAP_GETMODE:cap_getmode(2):pc 43190:AUE_POSIX_SPAWN:posix_spawn(2):pc @@ -563,6 +563,11 @@ 43199:AUE_PDGETPID:pdgetpid(2):pc 43200:AUE_PDWAIT:pdwait(2):pc 43201:AUE_WAIT6:wait6(2):pc +43202:AUE_CAP_RIGHTS_LIMIT:cap_rights_limit(2):fm +43203:AUE_CAP_IOCTLS_LIMIT:cap_ioctls_limit(2):fm +43204:AUE_CAP_IOCTLS_GET:cap_ioctls_get(2):fm +43205:AUE_CAP_FCNTLS_LIMIT:cap_fcntls_limit(2):fm +43206:AUE_CAP_FCNTLS_GET:cap_fcntls_get(2):fm # # Solaris userspace events. # --- lib/libc/gen/Makefile.inc.orig +++ lib/libc/gen/Makefile.inc @@ -16,6 +16,7 @@ assert.c \ auxv.c \ basename.c \ + cap_sandboxed.c \ check_utility_compat.c \ clock.c \ clock_getcpuclockid.c \ @@ -168,6 +169,7 @@ MAN+= alarm.3 \ arc4random.3 \ basename.3 \ + cap_sandboxed.3 \ check_utility_compat.3 \ clock.3 \ clock_getcpuclockid.3 \ --- /dev/null 2013-02-26 00:11:00.000000000 +0100 +++ lib/libc/gen/cap_sandboxed.3 2013-02-26 00:15:34.138527628 +0100 @@ -0,0 +1,70 @@ +.\" Copyright (c) 2012 The FreeBSD Foundation +.\" All rights reserved. +.\" +.\" This documentation was written by Pawel Jakub Dawidek under sponsorship +.\" from the FreeBSD Foundation. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd September 18, 2012 +.Dt CAP_SANDBOXED 3 +.Os +.Sh NAME +.Nm cap_sandboxed +.Nd Check if in a capability mode sandbox +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/capability.h +.In stdbool.h +.Ft bool +.Fn cap_sandboxed "void" +.Sh DESCRIPTION +.Fn cap_sandboxed +returns +.Va true +if the process is in a capability mode sandbox or +.Va false +if it is not. +This function is a more handy alternative to the +.Xr cap_getmode 2 +system call as it always succeeds, so there is no need for error checking. +If the support for capability mode is not compiled into the kernel, +.Fn cap_sandboxed +will always return +.Va false . +.Sh RETURN VALUES +Function +.Fn cap_sandboxed +is always successful and will return either +.Va true +or +.Va false . +.Sh SEE ALSO +.Xr cap_enter 2 , +.Xr capsicum 4 +.Sh AUTHORS +This function was implemented and manual page was written by +.An Pawel Jakub Dawidek Aq pawel@dawidek.net +under sponsorship of the FreeBSD Foundation. --- /dev/null 2013-02-26 00:11:00.000000000 +0100 +++ lib/libc/gen/cap_sandboxed.c 2013-02-26 00:15:35.188869994 +0100 @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include +#include +#include + +bool +cap_sandboxed(void) +{ + u_int mode; + + if (cap_getmode(&mode) == -1) { + assert(errno == ENOSYS); + return (false); + } + assert(mode == 0 || mode == 1); + return (mode == 1); +} --- lib/libc/include/compat.h.orig +++ lib/libc/include/compat.h @@ -42,6 +42,8 @@ __sym_compat(msgctl, freebsd7_msgctl, FBSD_1.0); __sym_compat(shmctl, freebsd7_shmctl, FBSD_1.0); +__sym_compat(cap_getrights, cap_rights_get, FBSD_1.2); + #undef __sym_compat #endif /* __LIBC_COMPAT_H__ */ --- lib/libc/sys/Makefile.inc.orig +++ lib/libc/sys/Makefile.inc @@ -93,7 +93,9 @@ bind.2 \ brk.2 \ cap_enter.2 \ - cap_new.2 \ + cap_fcntls_limit.2 \ + cap_ioctls_limit.2 \ + cap_rights_limit.2 \ chdir.2 \ chflags.2 \ chmod.2 \ @@ -270,7 +272,9 @@ access.2 faccessat.2 MLINKS+=brk.2 sbrk.2 MLINKS+=cap_enter.2 cap_getmode.2 -MLINKS+=cap_new.2 cap_getrights.2 +MLINKS+=cap_fcntls_limit.2 cap_fcntls_get.2 +MLINKS+=cap_ioctls_limit.2 cap_ioctls_get.2 +MLINKS+=cap_rights_limit.2 cap_rights_get.2 MLINKS+=chdir.2 fchdir.2 MLINKS+=chflags.2 fchflags.2 \ chflags.2 lchflags.2 --- lib/libc/sys/Symbol.map.orig +++ lib/libc/sys/Symbol.map @@ -364,7 +364,6 @@ cap_enter; cap_getmode; cap_new; - cap_getrights; getloginclass; pdfork; pdgetpid; @@ -379,6 +378,13 @@ }; FBSD_1.3 { + cap_fcntls_get; + cap_fcntls_limit; + cap_ioctls_get; + cap_ioctls_limit; + cap_rights_get; + cap_rights_limit; + cap_sandboxed; clock_getcpuclockid2; ffclock_getcounter; ffclock_getestimate; --- lib/libc/sys/cap_enter.2.orig +++ lib/libc/sys/cap_enter.2 @@ -58,8 +58,10 @@ .Xr pdfork 2 will be placed in capability mode from inception. .Pp -When combined with capabilities created with -.Xr cap_new 2 , +When combined with +.Xr cap_rights_limit 2 , +.Xr cap_ioctls_limit 2 , +.Xr cap_fcntls_limit 2 , .Fn cap_enter may be used to create kernel-enforced sandboxes in which appropriately-crafted applications or application components may be run. @@ -71,11 +73,6 @@ Creating effective process sandboxes is a tricky process that involves identifying the least possible rights required by the process and then passing those rights into the process in a safe manner. -See the CAVEAT -section of -.Xr cap_new 2 -for why this is particularly tricky with UNIX file descriptors as the -canonical representation of a right. Consumers of .Fn cap_enter should also be aware of other inherited rights, such as access to VM @@ -87,8 +84,33 @@ acquired rights as possible. .Sh RETURN VALUES .Rv -std cap_enter cap_getmode +.Sh ERRORS +The +.Fn cap_enter +and +.Fn cap_getmode +system calls +will fail if: +.Bl -tag -width Er +.It Bq Er ENOSYS +The kernel is compiled without: +.Pp +.Cd "options CAPABILITY_MODE" +.El +.Pp +The +.Fn cap_getmode +system call may also return the following error: +.Bl -tag -width Er +.It Bq Er EFAULT +Pointer +.Fa modep +points outside the process's allocated address space. +.El .Sh SEE ALSO -.Xr cap_new 2 , +.Xr cap_fcntls_limit 2 , +.Xr cap_ioctls_limit 2 , +.Xr cap_rights_limit 2 , .Xr fexecve 2 , .Xr capsicum 4 .Sh HISTORY --- /dev/null 2013-02-26 00:11:00.000000000 +0100 +++ lib/libc/sys/cap_fcntls_limit.2 2013-02-26 00:15:37.160546326 +0100 @@ -0,0 +1,127 @@ +.\" +.\" Copyright (c) 2012 The FreeBSD Foundation +.\" All rights reserved. +.\" +.\" This documentation was written by Pawel Jakub Dawidek under sponsorship +.\" the FreeBSD Foundation. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd September 20, 2012 +.Dt CAP_FCNTLS_LIMIT 2 +.Os +.Sh NAME +.Nm cap_fcntls_limit , +.Nm cap_fcntls_get +.Nd manage allowed fcntl commands +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/capability.h +.Ft int +.Fn cap_fcntls_limit "int fd" "uint32_t fcntlrights" +.Ft int +.Fn cap_fcntls_get "int fd" "uint32_t *fcntlrightsp" +.Sh DESCRIPTION +If a file descriptor is granted the +.Dv CAP_FCNTL +capability right, the list of allowed +.Xr fcntl 2 +commands can be selectively reduced (but never expanded) with the +.Fn cap_fcntls_limit +system call. +.Pp +A bitmask of allowed fcntls commands for a given file descriptor can be obtained +with the +.Fn cap_fcntls_get +system call. +.Sh FLAGS +The following flags may be specified in the +.Fa fcntlrights +argument or returned in the +.Fa fcntlrightsp +argument: +.Bl -tag -width CAP_FCNTL_GETOWN +.It Dv CAP_FCNTL_GETFL +Permit +.Dv F_GETFL +command. +.It Dv CAP_FCNTL_SETFL +Permit +.Dv F_SETFL +command. +.It Dv CAP_FCNTL_GETOWN +Permit +.Dv F_GETOWN +command. +.It Dv CAP_FCNTL_SETOWN +Permit +.Dv F_SETOWN +command. +.El +.Sh RETURN VALUES +.Rv -std +.Sh ERRORS +.Fn cap_fcntls_limit +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid descriptor. +.It Bq Er EINVAL +An invalid flag has been passed in +.Fa fcntlrights . +.It Bq Er ENOTCAPABLE +.Fa fcntlrights +would expand the list of allowed +.Xr fcntl 2 +commands. +.El +.Pp +.Fn cap_fcntls_get +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid descriptor. +.It Bq Er EFAULT +The +.Fa fcntlrightsp +argument points at an invalid address. +.El +.Sh SEE ALSO +.Xr cap_ioctls_limit 2 , +.Xr cap_rights_limit 2 , +.Xr fcntl 2 +.Sh HISTORY +Support for capabilities and capabilities mode was developed as part of the +.Tn TrustedBSD +Project. +.Pp +.Sh AUTHORS +This function was created by +.An Pawel Jakub Dawidek Aq pawel@dawidek.net +under sponsorship of the FreeBSD Foundation. --- /dev/null 2013-02-26 00:11:00.000000000 +0100 +++ lib/libc/sys/cap_ioctls_limit.2 2013-02-26 00:15:39.108527627 +0100 @@ -0,0 +1,158 @@ +.\" +.\" Copyright (c) 2012 The FreeBSD Foundation +.\" All rights reserved. +.\" +.\" This documentation was written by Pawel Jakub Dawidek under sponsorship +.\" the FreeBSD Foundation. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd September 20, 2012 +.Dt CAP_IOCTLS_LIMIT 2 +.Os +.Sh NAME +.Nm cap_ioctls_limit , +.Nm cap_ioctls_get +.Nd manage allowed ioctl commands +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/capability.h +.Ft int +.Fn cap_ioctls_limit "int fd" "const unsigned long *cmds" "size_t ncmds" +.Ft ssize_t +.Fn cap_ioctls_get "int fd" "unsigned long *cmds" "size_t maxcmds" +.Sh DESCRIPTION +If a file descriptor is granted the +.Dv CAP_IOCTL +capability right, the list of allowed +.Xr ioctl 2 +commands can be selectively reduced (but never expanded) with the +.Fn cap_ioctls_limit +system call. +The +.Fa cmds +argument is an array of +.Xr ioctl 2 +commands and the +.Fa ncmds +argument specifies the number of elements in the array. +There might be up to +.Va 256 +elements in the array. +.Pp +The list of allowed ioctl commands for a given file descriptor can be obtained +with the +.Fn cap_ioctls_get +system call. +The +.Fa cmds +argument points at memory that can hold up to +.Fa maxcmds +values. +The function populates the provided buffer with up to +.Fa maxcmds +elements, but always returns the total number of ioctl commands allowed for the +given file descriptor. +The total number of ioctls commands for the given file descriptor can be +obtained by passing +.Dv NULL as the +.Fa cmds +argument and +.Va 0 +as the +.Fa maxcmds +argument. +If all ioctl commands are allowed +.Dv ( CAP_IOCTL +capability right is assigned to the file descriptor and the +.Fn cap_ioctls_limit +system call was never called for this file descriptor), the +.Fn cap_ioctls_get +system call will return +.Dv CAP_IOCTLS_ALL +and won't modify the buffer pointed out by the +.Fa cmds +argument. +.Sh RETURN VALUES +.Rv -std cap_ioctls_limit +.Pp +The +.Fn cap_ioctls_limit +function, if successfull, returns the total number of allowed ioctl commands or +the value +.Dv INT_MAX +if all ioctls commands are allowed. +On failure the value +.Va -1 +is returned and the global variable errno is set to indicate the error. +.Sh ERRORS +.Fn cap_ioctls_limit +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid descriptor. +.It Bq Er EFAULT +The +.Fa cmds +argument points at an invalid address. +.It Bq Er EINVAL +The +.Fa ncmds +argument is greater than +.Va 256 . +.It Bq Er ENOTCAPABLE +.Fa cmds +would expand the list of allowed +.Xr ioctl 2 +commands. +.El +.Pp +.Fn cap_ioctls_get +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid descriptor. +.It Bq Er EFAULT +The +.Fa cmds +argument points at invalid address. +.El +.Sh SEE ALSO +.Xr cap_fcntls_limit 2 , +.Xr cap_rights_limit 2 , +.Xr ioctl 2 +.Sh HISTORY +Support for capabilities and capabilities mode was developed as part of the +.Tn TrustedBSD +Project. +.Pp +.Sh AUTHORS +This function was created by +.An Pawel Jakub Dawidek Aq pawel@dawidek.net +under sponsorship of the FreeBSD Foundation. --- lib/libc/sys/cap_new.2 2013-02-26 00:15:41.068866438 +0100 +++ /dev/null 2013-02-26 00:11:00.000000000 +0100 @@ -1,475 +0,0 @@ -.\" -.\" Copyright (c) 2008-2010 Robert N. M. Watson -.\" All rights reserved. -.\" -.\" This software was developed at the University of Cambridge Computer -.\" Laboratory with support from a grant from Google, Inc. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" $FreeBSD: head/lib/libc/sys/cap_new.2 233648 2012-03-29 05:02:12Z eadler $ -.\" -.Dd July 20, 2011 -.Dt CAP_NEW 2 -.Os -.Sh NAME -.Nm cap_new , -.Nm cap_getrights -.Nd System calls to manipulate capabilities -.Sh LIBRARY -.Lb libc -.Sh SYNOPSIS -.In sys/capability.h -.Ft int -.Fn cap_new "int fd" "cap_rights_t rights" -.Ft int -.Fn cap_getrights "int fd" "cap_rights_t *rightsp" -.Sh DESCRIPTION -Capabilities are special file descriptors derived from an existing file -descriptor, such as one returned by -.Xr fhopen 2 , -.Xr kqueue 2 , -.Xr mq_open 2 , -.Xr open 2 , -.Xr pipe 2 , -.Xr shm_open 2 , -.Xr socket 2 , -or -.Xr socketpair 2 , -but with a restricted set of permitted operations determined by a rights -mask set when the capability is created. -These restricted rights cannot be changed after the capability is created, -although further capabilities with yet more restricted rights may be created -from an existing capability. -In every other sense, a capability behaves in the same way as the file -descriptor it was created from. -.Pp -.Fn cap_new -creates a new capability for the existing file descriptor -.Fa fd , -and returns a file descriptor for it. -Operations on the capability will be limited to those permitted by -.Fa rights , -which is static for the lifetime of the capability. -If -.Fa fd -refers to an existing capability, then -.Fa rights -must be equal to or a subset of the rights on that capability. -As with -.Xr dup 2 -and -.Xr dup2 2 , -many properties are shared between the new capability and the existing file -descriptor, including open file flags, blocking disposition, and file offset. -Many applications will prefer to use the -.Xr cap_limitfd 3 -library call, part of -.Xr libcapsicum 3 , -as it offers a more convenient interface. -.Pp -.Fn cap_getrights -queries the rights associated with the capability referred to by file -descriptor -.Fa fd . -.Pp -These system calls, when combined with -.Xr cap_enter 2 , -may be used to construct process sandboxes with highly granular rights -assignment. -.Sh RIGHTS -The following rights may be specified in a new capability rights mask: -.Bl -tag -width CAP_EXTATTR_DELETE -.It Dv CAP_ACCEPT -Permit -.Xr accept 2 . -.It Dv CAP_ACL_CHECK -Permit checking of an ACL on a file descriptor; there is no cross-reference -for this system call. -.It Dv CAP_ACL_DELETE -Permit -.Xr acl_delete_fd_np 3 . -.It Dv CAP_ACL_GET -Permit -.Xr acl_get_fd 3 -and -.Xr acl_get_fd_np 3 . -.It Dv CAP_ACL_SET -Permit -.Xr acl_set_fd 3 -and -.Xr acl_set_fd_np 3 . -.It Dv CAP_BIND -Permit -.Xr bind 2 . -Note that sockets can also become bound implicitly as a result of -.Xr connect 2 -or -.Xr send 2 , -and that socket options set with -.Xr setsockopt 2 -may also affect binding behavior. -.It Dv CAP_CONNECT -Permit -.Xr connect 2 ; -also required for -.Xr sendto 2 -with a non-NULL destination address. -.It Dv CAP_EVENT -Permit -.Xr select 2 , -.Xr poll 2 , -and -.Xr kevent 2 -to be used in monitoring the file descriptor for events. -.It Dv CAP_FEXECVE -Permit -.Xr fexecve 2 ; -.Dv CAP_READ -will also be required. -.It Dv CAP_EXTATTR_DELETE -Permit -.Xr extattr_delete_fd 2 . -.It Dv CAP_EXTATTR_GET -Permit -.Xr extattr_get_fd 2 . -.It Dv CAP_EXTATTR_LIST -Permit -.Xr extattr_list_fd 2 . -.It Dv CAP_EXTATTR_SET -Permit -.Xr extattr_set_fd 2 . -.It Dv CAP_FCHDIR -Permit -.Xr fchdir 2 . -.It Dv CAP_FCHFLAGS -Permit -.Xr fchflags 2 . -.It Dv CAP_FCHMOD -Permit -.Xr fchmod 2 . -.It Dv CAP_FCHOWN -Permit -.Xr fchown 2 . -.It Dv CAP_FCNTL -Permit -.Xr fcntl 2 ; -be aware that this call provides indirect access to other operations, such as -.Xr flock 2 . -.It Dv CAP_FLOCK -Permit -.Xr flock 2 -and related calls. -.It Dv CAP_FPATHCONF -Permit -.Xr fpathconf 2 . -.It Dv CAP_FSCK -Permit UFS background-fsck operations on the descriptor. -.It Dv CAP_FSTAT -Permit -.Xr fstat 2 . -.It Dv CAP_FSTATFS -Permit -.Xr fstatfs 2 . -.It Dv CAP_FSYNC -Permit -.Xr aio_fsync 2 -and -.Xr fsync 2 . -.Pp -.It Dv CAP_FTRUNCATE -Permit -.Xr ftruncate 2 . -.It Dv CAP_FUTIMES -Permit -.Xr futimes 2 . -.It Dv CAP_GETPEERNAME -Permit -.Xr getpeername 2 . -.It Dv CAP_GETSOCKNAME -Permit -.Xr getsockname 2 . -.It Dv CAP_GETSOCKOPT -Permit -.Xr getsockopt 2 . -.It Dv CAP_IOCTL -Permit -.Xr ioctl 2 . -Be aware that this system call has enormous scope, including potentially -global scope for some objects. -.It Dv CAP_KEVENT -Permit -.Xr kevent 2 ; -.Dv CAP_EVENT -is also required on file descriptors that will be monitored using -.Xr kevent 2 . -.It Dv CAP_LISTEN -Permit -.Xr listen 2 ; -not much use (generally) without -.Dv CAP_BIND . -.It Dv CAP_LOOKUP -Permit the file descriptor to be used as a starting directory for calls such -as -.Xr linkat 2 , -.Xr openat 2 , -and -.Xr unlinkat 2 . -Note that these calls are not available in capability mode as they manipulate -a global name space; see -.Xr cap_enter 2 -for details. -.It Dv CAP_MAC_GET -Permit -.Xr mac_get_fd 3 . -.It Dv CAP_MAC_SET -Permit -.Xr mac_set_fd 3 . -.It Dv CAP_MMAP -Permit -.Xr mmap 2 ; -specific invocations may also require -.Dv CAP_READ -or -.Dv CAP_WRITE . -.Pp -.It Dv CAP_PDGETPID -Permit -.Xr pdgetpid 2 . -.It Dv CAP_PDKILL -Permit -.Xr pdkill 2 . -.It Dv CAP_PDWAIT -Permit -.Xr pdwait4 2 . -.It Dv CAP_PEELOFF -Permit -.Xr sctp_peeloff 2 . -.It Dv CAP_READ -Allow -.Xr aio_read 2 , -.Xr pread 2 , -.Xr read 2 , -.Xr recv 2 , -.Xr recvfrom 2 , -.Xr recvmsg 2 , -and related system calls. -.Pp -For files and other seekable objects, -.Dv CAP_SEEK -may also be required. -.It Dv CAP_REVOKE -Permit -.Xr frevoke 2 -in certain ABI compatibility modes that support this system call. -.It Dv CAP_SEEK -Permit operations that seek on the file descriptor, such as -.Xr lseek 2 , -but also required for I/O system calls that modify the file offset, such as -.Xr read 2 -and -.Xr write 2 . -.It Dv CAP_SEM_GETVALUE -Permit -.Xr sem_getvalue 3 . -.It Dv CAP_SEM_POST -Permit -.Xr sem_post 3 . -.It Dv CAP_SEM_WAIT -Permit -.Xr sem_wait 3 -and -.Xr sem_trywait 3 . -.It Dv CAP_SETSOCKOPT -Permit -.Xr setsockopt 2 ; -this controls various aspects of socket behavior and may affect binding, -connecting, and other behaviors with global scope. -.It Dv CAP_SHUTDOWN -Permit explicit -.Xr shutdown 2 ; -closing the socket will also generally shut down any connections on it. -.It Dv CAP_TTYHOOK -Allow configuration of TTY hooks, such as -.Xr snp 4 , -on the file descriptor. -.It Dv CAP_WRITE -Allow -.Xr aio_write 2 , -.Xr pwrite 2 , -.Xr send 2 , -.Xr sendmsg 2 , -.Xr sendto 2 , -.Xr write 2 , -and related system calls. -.Pp -For files and other seekable objects, -.Dv CAP_SEEK -may also be required. -.Pp -For -.Xr sendto 2 -with a non-NULL connection address, -.Dv CAP_CONNECT -is also required. -.El -.Sh CAVEAT -The -.Fn cap_new -system call and the capabilities it creates may be used to assign -fine-grained rights to sandboxed processes running in capability mode. -However, the semantics of objects accessed via file descriptors are complex, -so caution should be exercised in passing object capabilities into sandboxes. -.Sh RETURN VALUES -If successful, -.Fn cap_new -returns a non-negative integer, termed a file descriptor. -It returns -1 on failure, and sets -.Va errno -to indicate the error. -.Pp -.Rv -std cap_getrights -.Sh ERRORS -.Fn cap_new -may return the following errors: -.Bl -tag -width Er -.It Bq Er EBADF -The -.Fa fd -argument is not a valid active descriptor. -.It Bq Er EINVAL -An invalid right has been requested in -.Fa rights . -.It Bq Er EMFILE -The process has already reached its limit for open file descriptors. -.It Bq Er ENFILE -The system file table is full. -.It Bq Er EPERM -.Fa rights -contains requested rights not present in the current rights mask associated -with the capability referenced by -.Fa fd , -if any. -.El -.Pp -.Fn cap_getrights -may return the following errors: -.Bl -tag -width Er -.It Bq Er EBADF -The -.Fa fd -argument is not a valid active descriptor. -.It Bq Er EINVAL -The -.Fa fd -argument is not a capability. -.El -.Sh SEE ALSO -.Xr accept 2 , -.Xr aio_fsync 2 , -.Xr aio_read 2 , -.Xr aio_write 2 , -.Xr bind 2 , -.Xr cap_enter 2 , -.Xr connect 2 , -.Xr dup 2 , -.Xr dup2 2 , -.Xr extattr_delete_fd 2 , -.Xr extattr_get_fd 2 , -.Xr extattr_list_fd 2 , -.Xr extattr_set_fd 2 , -.Xr fchflags 2 , -.Xr fchown 2 , -.Xr fcntl 2 , -.Xr fexecve 2 , -.Xr fhopen 2 , -.Xr flock 2 , -.Xr fpathconf 2 , -.Xr fstat 2 , -.Xr fstatfs 2 , -.Xr fsync 2 , -.Xr ftruncate 2 , -.Xr futimes 2 , -.Xr getpeername 2 , -.Xr getsockname 2 , -.Xr getsockopt 2 , -.Xr ioctl 2 , -.Xr kevent 2 , -.Xr kqueue 2 , -.Xr linkat 2 , -.Xr listen 2 , -.Xr mmap 2 , -.Xr mq_open 2 , -.Xr open 2 , -.Xr openat 2 , -.Xr pdgetpid 2 , -.Xr pdkill 2 , -.Xr pdwait4 2 , -.Xr pipe 2 , -.Xr poll 2 , -.Xr pread 2 , -.Xr pwrite 2 , -.Xr read 2 , -.Xr recv 2 , -.Xr recvfrom 2 , -.Xr recvmsg 2 , -.Xr sctp_peeloff 2 , -.Xr select 2 , -.Xr send 2 , -.Xr sendmsg 2 , -.Xr sendto 2 , -.Xr setsockopt 2 , -.Xr shm_open 2 , -.Xr shutdown 2 , -.Xr socket 2 , -.Xr socketpair 2 , -.Xr unlinkat 2 , -.Xr write 2 , -.Xr acl_delete_fd_np 3 , -.Xr acl_get_fd 3 , -.Xr acl_get_fd_np 3 , -.Xr acl_set_fd_np 3 , -.Xr cap_limitfd 3 , -.Xr libcapsicum 3 , -.Xr mac_get_fd 3 , -.Xr mac_set_fd 3 , -.Xr sem_getvalue 3 , -.Xr sem_post 3 , -.Xr sem_trywait 3 , -.Xr sem_wait 3 , -.Xr capsicum 4 , -.Xr snp 4 -.Sh HISTORY -Support for capabilities and capabilities mode was developed as part of the -.Tn TrustedBSD -Project. -.Sh AUTHORS -These functions and the capability facility were created by -.An "Robert N. M. Watson" -at the University of Cambridge Computer Laboratory with support from a grant -from Google, Inc. -.Sh BUGS -This man page should list the set of permitted system calls more specifically -for each capability right. -.Pp -Capability rights sometimes have unclear indirect impacts, which should be -documented, or at least hinted at. --- /dev/null 2013-02-26 00:11:00.000000000 +0100 +++ lib/libc/sys/cap_rights_limit.2 2013-02-26 00:15:43.283527846 +0100 @@ -0,0 +1,603 @@ +.\" +.\" Copyright (c) 2008-2010 Robert N. M. Watson +.\" Copyright (c) 2012-2013 The FreeBSD Foundation +.\" All rights reserved. +.\" +.\" This software was developed at the University of Cambridge Computer +.\" Laboratory with support from a grant from Google, Inc. +.\" +.\" Portions of this documentation were written by Pawel Jakub Dawidek +.\" under sponsorship from the FreeBSD Foundation. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd February 23, 2013 +.Dt CAP_RIGHTS_LIMIT 2 +.Os +.Sh NAME +.Nm cap_rights_limit , +.Nm cap_rights_get +.Nd manage capability rights +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/capability.h +.Ft int +.Fn cap_rights_limit "int fd" "cap_rights_t rights" +.Ft int +.Fn cap_rights_get "int fd" "cap_rights_t *rightsp" +.Sh DESCRIPTION +When a file descriptor is created by a function such as +.Xr fhopen 2 , +.Xr kqueue 2 , +.Xr mq_open 2 , +.Xr open 2 , +.Xr openat 2 , +.Xr pdfork 2 , +.Xr pipe 2 , +.Xr shm_open 2 , +.Xr socket 2 , +or +.Xr socketpair 2 , +it is assigned all capability rights. +Those rights can be reduced (but never expanded) by using the +.Fn cap_rights_limit +system call. +Once capability rights are reduced, operations on the file descriptor will be +limited to those permitted by +.Fa rights . +.Pp +A bitmask of capability rights assigned to a file descriptor can be obtained with +the +.Fn cap_rights_get +system call. +.Sh RIGHTS +The following rights may be specified in a rights mask: +.Bl -tag -width CAP_EXTATTR_DELETE +.It Dv CAP_ACCEPT +Permit +.Xr accept 2 . +.It Dv CAP_ACL_CHECK +Permit checking of an ACL on a file descriptor; there is no cross-reference +for this system call. +.It Dv CAP_ACL_DELETE +Permit +.Xr acl_delete_fd_np 3 . +.It Dv CAP_ACL_GET +Permit +.Xr acl_get_fd 3 +and +.Xr acl_get_fd_np 3 . +.It Dv CAP_ACL_SET +Permit +.Xr acl_set_fd 3 +and +.Xr acl_set_fd_np 3 . +.It Dv CAP_BIND +Permit +.Xr bind 2 . +Note that sockets can also become bound implicitly as a result of +.Xr connect 2 +or +.Xr send 2 , +and that socket options set with +.Xr setsockopt 2 +may also affect binding behavior. +.It Dv CAP_CONNECT +Permit +.Xr connect 2 ; +also required for +.Xr sendto 2 +with a non-NULL destination address. +.It Dv CAP_CREATE +Permit +.Xr openat 2 +with the +.Dv O_CREAT +flag. +.\" XXXPJD: Doesn't exist anymore. +.It Dv CAP_EVENT +Permit +.Xr select 2 , +.Xr poll 2 , +and +.Xr kevent 2 +to be used in monitoring the file descriptor for events. +.It Dv CAP_FEXECVE +Permit +.Xr fexecve 2 +and +.Xr openat 2 +with the +.Dv O_EXEC +flag; +.Dv CAP_READ +will also be required. +.It Dv CAP_EXTATTR_DELETE +Permit +.Xr extattr_delete_fd 2 . +.It Dv CAP_EXTATTR_GET +Permit +.Xr extattr_get_fd 2 . +.It Dv CAP_EXTATTR_LIST +Permit +.Xr extattr_list_fd 2 . +.It Dv CAP_EXTATTR_SET +Permit +.Xr extattr_set_fd 2 . +.It Dv CAP_FCHDIR +Permit +.Xr fchdir 2 . +.It Dv CAP_FCHFLAGS +Permit +.Xr fchflags 2 . +.It Dv CAP_FCHMOD +Permit +.Xr fchmod 2 +and +.Xr fchmodat 2 . +.It Dv CAP_FCHMODAT +An alias to +.Dv CAP_FCHMOD . +.It Dv CAP_FCHOWN +Permit +.Xr fchown 2 +and +.Xr fchownat 2 . +.It Dv CAP_FCHOWNAT +An alias to +.Dv CAP_FCHOWN . +.It Dv CAP_FCNTL +Permit +.Xr fcntl 2 . +Note that only the +.Dv F_GETFL , +.Dv F_SETFL , +.Dv F_GETOWN +and +.Dv F_SETOWN +commands require this capability right. +Also note that the list of permitted commands can be further limited with the +.Xr cap_fcntls_limit 2 +system call. +.It Dv CAP_FLOCK +Permit +.Xr flock 2 , +.Xr fcntl 2 +(with +.Dv F_GETLK , +.Dv F_SETLK +or +.Dv F_SETLKW +flag) and +.Xr openat 2 +(with +.Dv O_EXLOCK +or +.Dv O_SHLOCK +flag). +.It Dv CAP_FPATHCONF +Permit +.Xr fpathconf 2 . +.It Dv CAP_FSCK +Permit UFS background-fsck operations on the descriptor. +.It Dv CAP_FSTAT +Permit +.Xr fstat 2 +and +.Xr fstatat 2 . +.It Dv CAP_FSTATAT +An alias to +.Dv CAP_FSTAT . +.It Dv CAP_FSTATFS +Permit +.Xr fstatfs 2 . +.It Dv CAP_FSYNC +Permit +.Xr aio_fsync 2 , +.Xr fsync 2 +and +.Xr openat 2 +with +.Dv O_FSYNC +or +.Dv O_SYNC +flag. +.It Dv CAP_FTRUNCATE +Permit +.Xr ftruncate 2 +and +.Xr openat 2 +with the +.Dv O_TRUNC +flag. +.It Dv CAP_FUTIMES +Permit +.Xr futimes 2 +and +.Xr futimesat 2 . +.It Dv CAP_FUTIMESAT +An alias to +.Dv CAP_FUTIMES . +.It Dv CAP_GETPEERNAME +Permit +.Xr getpeername 2 . +.It Dv CAP_GETSOCKNAME +Permit +.Xr getsockname 2 . +.It Dv CAP_GETSOCKOPT +Permit +.Xr getsockopt 2 . +.It Dv CAP_IOCTL +Permit +.Xr ioctl 2 . +Be aware that this system call has enormous scope, including potentially +global scope for some objects. +The list of permitted ioctl commands can be further limited with the +.Xr cap_ioctls_limit 2 +system call. +.\" XXXPJD: Doesn't exist anymore. +.It Dv CAP_KEVENT +Permit +.Xr kevent 2 ; +.Dv CAP_EVENT +is also required on file descriptors that will be monitored using +.Xr kevent 2 . +.It Dv CAP_LINKAT +Permit +.Xr linkat 2 +and +.Xr renameat 2 . +This right is required for the destination directory descriptor. +.It Dv CAP_LISTEN +Permit +.Xr listen 2 ; +not much use (generally) without +.Dv CAP_BIND . +.It Dv CAP_LOOKUP +Permit the file descriptor to be used as a starting directory for calls such as +.Xr linkat 2 , +.Xr openat 2 , +and +.Xr unlinkat 2 . +.It Dv CAP_MAC_GET +Permit +.Xr mac_get_fd 3 . +.It Dv CAP_MAC_SET +Permit +.Xr mac_set_fd 3 . +.It Dv CAP_MKDIRAT +Permit +.Xr mkdirat 2 . +.It Dv CAP_MKFIFOAT +Permit +.Xr mkfifoat 2 . +.It Dv CAP_MKNODAT +Permit +.Xr mknodat 2 . +.It Dv CAP_MMAP +Permit +.Xr mmap 2 +with the +.Dv PROT_NONE +protection. +.It Dv CAP_MMAP_R +Permit +.Xr mmap 2 +with the +.Dv PROT_READ +protection. +This also implies +.Dv CAP_READ +and +.Dv CAP_SEEK +rights. +.It Dv CAP_MMAP_W +Permit +.Xr mmap 2 +with the +.Dv PROT_WRITE +protection. +This also implies +.Dv CAP_WRITE +and +.Dv CAP_SEEK +rights. +.It Dv CAP_MMAP_X +Permit +.Xr mmap 2 +with the +.Dv PROT_EXEC +protection. +This also implies +.Dv CAP_SEEK +right. +.It Dv CAP_MMAP_RW +Implies +.Dv CAP_MMAP_R +and +.Dv CAP_MMAP_W . +.It Dv CAP_MMAP_RX +Implies +.Dv CAP_MMAP_R +and +.Dv CAP_MMAP_X . +.It Dv CAP_MMAP_WX +Implies +.Dv CAP_MMAP_W +and +.Dv CAP_MMAP_X . +.It Dv CAP_MMAP_RWX +Implies +.Dv CAP_MMAP_R , +.Dv CAP_MMAP_W +and +.Dv CAP_MMAP_X . +.It Dv CAP_PDGETPID +Permit +.Xr pdgetpid 2 . +.It Dv CAP_PDKILL +Permit +.Xr pdkill 2 . +.It Dv CAP_PDWAIT +Permit +.Xr pdwait4 2 . +.It Dv CAP_PEELOFF +Permit +.Xr sctp_peeloff 2 . +.\" XXXPJD: Not documented. +.It Dv CAP_POLL_EVENT +.\" XXXPJD: Not documented. +.It Dv CAP_POST_EVENT +.It Dv CAP_PREAD +Implies +.Dv CAP_SEEK +and +.Dv CAP_READ . +.It Dv CAP_PWRITE +Implies +.Dv CAP_SEEK +and +.Dv CAP_WRITE . +.It Dv CAP_READ +Allow +.Xr aio_read 2 , +.Xr openat +with the +.Dv O_RDONLY flag, +.Xr read 2 , +.Xr recv 2 , +.Xr recvfrom 2 , +.Xr recvmsg 2 +and related system calls. +.It Dv CAP_RECV +An alias to +.Dv CAP_READ . +.It Dv CAP_RENAMEAT +Permit +.Xr renameat 2 . +This right is required for the source directory descriptor. +.It Dv CAP_SEEK +Permit operations that seek on the file descriptor, such as +.Xr lseek 2 , +but also required for I/O system calls that can read or write at any position +in the file, such as +.Xr pread 2 +and +.Xr pwrite 2 . +.It Dv CAP_SEM_GETVALUE +Permit +.Xr sem_getvalue 3 . +.It Dv CAP_SEM_POST +Permit +.Xr sem_post 3 . +.It Dv CAP_SEM_WAIT +Permit +.Xr sem_wait 3 +and +.Xr sem_trywait 3 . +.It Dv CAP_SEND +An alias to +.Dv CAP_WRITE . +.It Dv CAP_SETSOCKOPT +Permit +.Xr setsockopt 2 ; +this controls various aspects of socket behavior and may affect binding, +connecting, and other behaviors with global scope. +.It Dv CAP_SHUTDOWN +Permit explicit +.Xr shutdown 2 ; +closing the socket will also generally shut down any connections on it. +.It Dv CAP_SYMLINKAT +Permit +.Xr symlinkat 2 . +.It Dv CAP_TTYHOOK +Allow configuration of TTY hooks, such as +.Xr snp 4 , +on the file descriptor. +.It Dv CAP_UNLINKAT +Permit +.Xr unlinkat 2 +and +.Xr renameat 2 . +This right is only required for +.Xr renameat 2 +on the destination directory descriptor if the destination object already +exists and will be removed by the rename. +.It Dv CAP_WRITE +Allow +.Xr aio_write 2 , +.Xr openat 2 +with +.Dv O_WRONLY +and +.Dv O_APPEND +flags, +.Xr send 2 , +.Xr sendmsg 2 , +.Xr sendto 2 , +.Xr write 2 , +and related system calls. +For +.Xr sendto 2 +with a non-NULL connection address, +.Dv CAP_CONNECT +is also required. +For +.Xr openat 2 +with the +.Dv O_WRONLY +flag, but without the +.Dv O_APPEND +flag, +.Dv CAP_SEEK +is also required. +.El +.Sh RETURN VALUES +.Rv -std +.Sh ERRORS +.Fn cap_rights_limit +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid active descriptor. +.It Bq Er EINVAL +An invalid right has been requested in +.Fa rights . +.It Bq Er ENOTCAPABLE +.Fa rights +contains requested rights not present in the current rights mask associated +with the given file descriptor. +.El +.Pp +.Fn cap_rights_get +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid active descriptor. +.It Bq Er EFAULT +The +.Fa rightsp +argument points at an invalid address. +.El +.Sh SEE ALSO +.Xr accept 2 , +.Xr aio_fsync 2 , +.Xr aio_read 2 , +.Xr aio_write 2 , +.Xr bind 2 , +.Xr cap_enter 2 , +.Xr cap_fcntls_limit 2 , +.Xr cap_ioctls_limit 2 , +.Xr cap_rights_limit 2 , +.Xr connect 2 , +.Xr dup 2 , +.Xr dup2 2 , +.Xr extattr_delete_fd 2 , +.Xr extattr_get_fd 2 , +.Xr extattr_list_fd 2 , +.Xr extattr_set_fd 2 , +.Xr fchflags 2 , +.Xr fchown 2 , +.Xr fcntl 2 , +.Xr fexecve 2 , +.Xr fhopen 2 , +.Xr flock 2 , +.Xr fpathconf 2 , +.Xr fstat 2 , +.Xr fstatfs 2 , +.Xr fsync 2 , +.Xr ftruncate 2 , +.Xr futimes 2 , +.Xr getpeername 2 , +.Xr getsockname 2 , +.Xr getsockopt 2 , +.Xr ioctl 2 , +.Xr kevent 2 , +.Xr kqueue 2 , +.Xr linkat 2 , +.Xr listen 2 , +.Xr mmap 2 , +.Xr mq_open 2 , +.Xr open 2 , +.Xr openat 2 , +.Xr pdfork 2 , +.Xr pdgetpid 2 , +.Xr pdkill 2 , +.Xr pdwait4 2 , +.Xr pipe 2 , +.Xr poll 2 , +.Xr pread 2 , +.Xr pwrite 2 , +.Xr read 2 , +.Xr recv 2 , +.Xr recvfrom 2 , +.Xr recvmsg 2 , +.Xr renameat 2 , +.Xr sctp_peeloff 2 , +.Xr select 2 , +.Xr send 2 , +.Xr sendmsg 2 , +.Xr sendto 2 , +.Xr setsockopt 2 , +.Xr shm_open 2 , +.Xr shutdown 2 , +.Xr socket 2 , +.Xr socketpair 2 , +.Xr symlinkat 2 , +.Xr unlinkat 2 , +.Xr write 2 , +.Xr acl_delete_fd_np 3 , +.Xr acl_get_fd 3 , +.Xr acl_get_fd_np 3 , +.Xr acl_set_fd_np 3 , +.Xr cap_limitfd 3 , +.Xr libcapsicum 3 , +.Xr mac_get_fd 3 , +.Xr mac_set_fd 3 , +.Xr sem_getvalue 3 , +.Xr sem_post 3 , +.Xr sem_trywait 3 , +.Xr sem_wait 3 , +.Xr capsicum 4 , +.Xr snp 4 +.Sh HISTORY +Support for capabilities and capabilities mode was developed as part of the +.Tn TrustedBSD +Project. +.Pp +.Sh AUTHORS +This function was created by +.An Pawel Jakub Dawidek Aq pawel@dawidek.net +under sponsorship of the FreeBSD Foundation. +.Sh BUGS +This man page should list the set of permitted system calls more specifically +for each capability right. +.Pp +Capability rights sometimes have unclear indirect impacts, which should be +documented, or at least hinted at. --- lib/libc/sys/dup.2.orig +++ lib/libc/sys/dup.2 @@ -115,11 +115,6 @@ is a valid descriptor, then .Fn dup2 is successful, and does nothing. -.Pp -The related -.Xr cap_new 2 -system call allows file descriptors to be duplicated with restrictions on -their use. .Sh RETURN VALUES The value -1 is returned if an error occurs in either call. The external variable @@ -152,7 +147,6 @@ .El .Sh SEE ALSO .Xr accept 2 , -.Xr cap_new 2 , .Xr close 2 , .Xr fcntl 2 , .Xr getdtablesize 2 , --- lib/libprocstat/libprocstat.c.orig +++ lib/libprocstat/libprocstat.c @@ -600,7 +600,6 @@ } kfflags2fst[] = { { KF_FLAG_APPEND, PS_FST_FFLAG_APPEND }, { KF_FLAG_ASYNC, PS_FST_FFLAG_ASYNC }, - { KF_FLAG_CAPABILITY, PS_FST_FFLAG_CAPABILITY }, { KF_FLAG_CREAT, PS_FST_FFLAG_CREAT }, { KF_FLAG_DIRECT, PS_FST_FFLAG_DIRECT }, { KF_FLAG_EXCL, PS_FST_FFLAG_EXCL }, --- lib/libprocstat/libprocstat.h.orig +++ lib/libprocstat/libprocstat.h @@ -88,7 +88,6 @@ #define PS_FST_FFLAG_DIRECT 0x1000 #define PS_FST_FFLAG_EXEC 0x2000 #define PS_FST_FFLAG_HASLOCK 0x4000 -#define PS_FST_FFLAG_CAPABILITY 0x8000 struct procstat; struct filestat { --- sys/bsm/audit_kevents.h.orig +++ sys/bsm/audit_kevents.h @@ -588,7 +588,7 @@ #define AUE_OPENAT 43184 /* FreeBSD. */ #define AUE_POSIX_OPENPT 43185 /* FreeBSD. */ #define AUE_CAP_NEW 43186 /* TrustedBSD. */ -#define AUE_CAP_GETRIGHTS 43187 /* TrustedBSD. */ +#define AUE_CAP_RIGHTS_GET 43187 /* TrustedBSD. */ #define AUE_CAP_ENTER 43188 /* TrustedBSD. */ #define AUE_CAP_GETMODE 43189 /* TrustedBSD. */ #define AUE_POSIX_SPAWN 43190 /* Darwin. */ @@ -603,6 +603,11 @@ #define AUE_PDGETPID 43199 /* FreeBSD. */ #define AUE_PDWAIT 43200 /* FreeBSD. */ #define AUE_WAIT6 43201 /* FreeBSD. */ +#define AUE_CAP_RIGHTS_LIMIT 43202 /* TrustedBSD. */ +#define AUE_CAP_IOCTLS_LIMIT 43203 /* TrustedBSD. */ +#define AUE_CAP_IOCTLS_GET 43204 /* TrustedBSD. */ +#define AUE_CAP_FCNTLS_LIMIT 43205 /* TrustedBSD. */ +#define AUE_CAP_FCNTLS_GET 43206 /* TrustedBSD. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the --- sys/cddl/compat/opensolaris/sys/file.h.orig +++ sys/cddl/compat/opensolaris/sys/file.h @@ -39,15 +39,11 @@ #include static __inline file_t * -getf(int fd) +getf(int fd, cap_rights_t rights) { struct file *fp; - /* - * We wouldn't need all of these rights on every invocation - * if we had more information about intent. - */ - if (fget(curthread, fd, CAP_READ | CAP_WRITE | CAP_SEEK, &fp) == 0) + if (fget(curthread, fd, rights, &fp) == 0) return (fp); return (NULL); } --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c.orig +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -3822,7 +3822,7 @@ return (error); fd = zc->zc_cookie; - fp = getf(fd); + fp = getf(fd, CAP_PREAD); if (fp == NULL) { nvlist_free(props); return (EBADF); @@ -4079,7 +4079,7 @@ error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj, &zc->zc_objset_type); } else { - file_t *fp = getf(zc->zc_cookie); + file_t *fp = getf(zc->zc_cookie, CAP_WRITE); if (fp == NULL) { dsl_dataset_rele(ds, FTAG); if (dsfrom) @@ -4675,7 +4675,7 @@ return (error); } - fp = getf(zc->zc_cookie); + fp = getf(zc->zc_cookie, CAP_WRITE); if (fp == NULL) { dmu_objset_rele(fromsnap, FTAG); dmu_objset_rele(tosnap, FTAG); --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c.orig +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c @@ -124,7 +124,7 @@ void *data; int error; - fp = getf(fd); + fp = getf(fd, CAP_NONE); if (fp == NULL) return (EBADF); --- sys/compat/freebsd32/freebsd32_syscall.h.orig +++ sys/compat/freebsd32/freebsd32_syscall.h @@ -416,7 +416,7 @@ #define FREEBSD32_SYS_freebsd32_shmctl 512 #define FREEBSD32_SYS_lpathconf 513 #define FREEBSD32_SYS_cap_new 514 -#define FREEBSD32_SYS_cap_getrights 515 +#define FREEBSD32_SYS_cap_rights_get 515 #define FREEBSD32_SYS_cap_enter 516 #define FREEBSD32_SYS_cap_getmode 517 #define FREEBSD32_SYS_freebsd32_pselect 522 @@ -430,4 +430,9 @@ #define FREEBSD32_SYS_freebsd32_posix_fallocate 530 #define FREEBSD32_SYS_freebsd32_posix_fadvise 531 #define FREEBSD32_SYS_freebsd32_wait6 532 -#define FREEBSD32_SYS_MAXSYSCALL 533 +#define FREEBSD32_SYS_cap_rights_limit 533 +#define FREEBSD32_SYS_cap_ioctls_limit 534 +#define FREEBSD32_SYS_cap_ioctls_get 535 +#define FREEBSD32_SYS_cap_fcntls_limit 536 +#define FREEBSD32_SYS_cap_fcntls_get 537 +#define FREEBSD32_SYS_MAXSYSCALL 538 --- sys/compat/freebsd32/freebsd32_syscalls.c.orig +++ sys/compat/freebsd32/freebsd32_syscalls.c @@ -538,7 +538,7 @@ "freebsd32_shmctl", /* 512 = freebsd32_shmctl */ "lpathconf", /* 513 = lpathconf */ "cap_new", /* 514 = cap_new */ - "cap_getrights", /* 515 = cap_getrights */ + "cap_rights_get", /* 515 = cap_rights_get */ "cap_enter", /* 516 = cap_enter */ "cap_getmode", /* 517 = cap_getmode */ "#518", /* 518 = pdfork */ @@ -556,4 +556,9 @@ "freebsd32_posix_fallocate", /* 530 = freebsd32_posix_fallocate */ "freebsd32_posix_fadvise", /* 531 = freebsd32_posix_fadvise */ "freebsd32_wait6", /* 532 = freebsd32_wait6 */ + "cap_rights_limit", /* 533 = cap_rights_limit */ + "cap_ioctls_limit", /* 534 = cap_ioctls_limit */ + "cap_ioctls_get", /* 535 = cap_ioctls_get */ + "cap_fcntls_limit", /* 536 = cap_fcntls_limit */ + "cap_fcntls_get", /* 537 = cap_fcntls_get */ }; --- sys/compat/freebsd32/freebsd32_sysent.c.orig +++ sys/compat/freebsd32/freebsd32_sysent.c @@ -575,7 +575,7 @@ { AS(freebsd32_shmctl_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 512 = freebsd32_shmctl */ { AS(lpathconf_args), (sy_call_t *)sys_lpathconf, AUE_LPATHCONF, NULL, 0, 0, 0, SY_THR_STATIC }, /* 513 = lpathconf */ { AS(cap_new_args), (sy_call_t *)sys_cap_new, AUE_CAP_NEW, NULL, 0, 0, 0, SY_THR_STATIC }, /* 514 = cap_new */ - { AS(cap_getrights_args), (sy_call_t *)sys_cap_getrights, AUE_CAP_GETRIGHTS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 515 = cap_getrights */ + { AS(cap_rights_get_args), (sy_call_t *)sys_cap_rights_get, AUE_CAP_RIGHTS_GET, NULL, 0, 0, 0, SY_THR_STATIC }, /* 515 = cap_rights_get */ { 0, (sy_call_t *)sys_cap_enter, AUE_CAP_ENTER, NULL, 0, 0, 0, SY_THR_STATIC }, /* 516 = cap_enter */ { AS(cap_getmode_args), (sy_call_t *)sys_cap_getmode, AUE_CAP_GETMODE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 517 = cap_getmode */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 518 = pdfork */ @@ -593,4 +593,9 @@ { AS(freebsd32_posix_fallocate_args), (sy_call_t *)freebsd32_posix_fallocate, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 530 = freebsd32_posix_fallocate */ { AS(freebsd32_posix_fadvise_args), (sy_call_t *)freebsd32_posix_fadvise, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 531 = freebsd32_posix_fadvise */ { AS(freebsd32_wait6_args), (sy_call_t *)freebsd32_wait6, AUE_WAIT6, NULL, 0, 0, 0, SY_THR_STATIC }, /* 532 = freebsd32_wait6 */ + { AS(cap_rights_limit_args), (sy_call_t *)sys_cap_rights_limit, AUE_CAP_RIGHTS_LIMIT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 533 = cap_rights_limit */ + { AS(cap_ioctls_limit_args), (sy_call_t *)sys_cap_ioctls_limit, AUE_CAP_IOCTLS_LIMIT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 534 = cap_ioctls_limit */ + { AS(cap_ioctls_get_args), (sy_call_t *)sys_cap_ioctls_get, AUE_CAP_IOCTLS_GET, NULL, 0, 0, 0, SY_THR_STATIC }, /* 535 = cap_ioctls_get */ + { AS(cap_fcntls_limit_args), (sy_call_t *)sys_cap_fcntls_limit, AUE_CAP_FCNTLS_LIMIT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 536 = cap_fcntls_limit */ + { AS(cap_fcntls_get_args), (sy_call_t *)sys_cap_fcntls_get, AUE_CAP_FCNTLS_GET, NULL, 0, 0, 0, SY_THR_STATIC }, /* 537 = cap_fcntls_get */ }; --- sys/compat/freebsd32/freebsd32_systrace_args.c.orig +++ sys/compat/freebsd32/freebsd32_systrace_args.c @@ -2956,9 +2956,9 @@ *n_args = 2; break; } - /* cap_getrights */ + /* cap_rights_get */ case 515: { - struct cap_getrights_args *p = params; + struct cap_rights_get_args *p = params; iarg[0] = p->fd; /* int */ uarg[1] = (intptr_t) p->rightsp; /* uint64_t * */ *n_args = 2; @@ -3088,6 +3088,48 @@ *n_args = 6; break; } + /* cap_rights_limit */ + case 533: { + struct cap_rights_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = p->rights; /* uint64_t */ + *n_args = 2; + break; + } + /* cap_ioctls_limit */ + case 534: { + struct cap_ioctls_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->cmds; /* const u_long * */ + uarg[2] = p->ncmds; /* size_t */ + *n_args = 3; + break; + } + /* cap_ioctls_get */ + case 535: { + struct cap_ioctls_get_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->cmds; /* u_long * */ + uarg[2] = p->maxcmds; /* size_t */ + *n_args = 3; + break; + } + /* cap_fcntls_limit */ + case 536: { + struct cap_fcntls_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = p->fcntlrights; /* uint32_t */ + *n_args = 2; + break; + } + /* cap_fcntls_get */ + case 537: { + struct cap_fcntls_get_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->fcntlrightsp; /* uint32_t * */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -8002,7 +8044,7 @@ break; }; break; - /* cap_getrights */ + /* cap_rights_get */ case 515: switch(ndx) { case 0: @@ -8243,6 +8285,77 @@ break; }; break; + /* cap_rights_limit */ + case 533: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint64_t"; + break; + default: + break; + }; + break; + /* cap_ioctls_limit */ + case 534: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "const u_long *"; + break; + case 2: + p = "size_t"; + break; + default: + break; + }; + break; + /* cap_ioctls_get */ + case 535: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "u_long *"; + break; + case 2: + p = "size_t"; + break; + default: + break; + }; + break; + /* cap_fcntls_limit */ + case 536: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* cap_fcntls_get */ + case 537: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint32_t *"; + break; + default: + break; + }; + break; default: break; }; @@ -9938,7 +10051,7 @@ if (ndx == 0 || ndx == 1) p = "int"; break; - /* cap_getrights */ + /* cap_rights_get */ case 515: if (ndx == 0 || ndx == 1) p = "int"; @@ -10005,6 +10118,31 @@ if (ndx == 0 || ndx == 1) p = "int"; break; + /* cap_rights_limit */ + case 533: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_ioctls_limit */ + case 534: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_ioctls_get */ + case 535: + if (ndx == 0 || ndx == 1) + p = "ssize_t"; + break; + /* cap_fcntls_limit */ + case 536: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_fcntls_get */ + case 537: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; --- sys/compat/freebsd32/syscalls.master.orig +++ sys/compat/freebsd32/syscalls.master @@ -963,7 +963,7 @@ struct shmid_ds32 *buf); } 513 AUE_LPATHCONF NOPROTO { int lpathconf(char *path, int name); } 514 AUE_CAP_NEW NOPROTO { int cap_new(int fd, uint64_t rights); } -515 AUE_CAP_GETRIGHTS NOPROTO { int cap_getrights(int fd, \ +515 AUE_CAP_RIGHTS_GET NOPROTO { int cap_rights_get(int fd, \ uint64_t *rightsp); } 516 AUE_CAP_ENTER NOPROTO { int cap_enter(void); } 517 AUE_CAP_GETMODE NOPROTO { int cap_getmode(u_int *modep); } @@ -1005,3 +1005,13 @@ struct wrusage32 *wrusage, \ siginfo_t *info); } +533 AUE_CAP_RIGHTS_LIMIT NOPROTO { int cap_rights_limit(int fd, \ + uint64_t rights); } +534 AUE_CAP_IOCTLS_LIMIT NOPROTO { int cap_ioctls_limit(int fd, \ + const u_long *cmds, size_t ncmds); } +535 AUE_CAP_IOCTLS_GET NOPROTO { ssize_t cap_ioctls_get(int fd, \ + u_long *cmds, size_t maxcmds); } +536 AUE_CAP_FCNTLS_LIMIT NOPROTO { int cap_fcntls_limit(int fd, \ + uint32_t fcntlrights); } +537 AUE_CAP_FCNTLS_GET NOPROTO { int cap_fcntls_get(int fd, \ + uint32_t *fcntlrightsp); } --- sys/compat/linux/linux_file.c.orig +++ sys/compat/linux/linux_file.c @@ -154,6 +154,7 @@ SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) { PROC_UNLOCK(p); sx_unlock(&proctree_lock); + /* XXXPJD: Verify if TIOCSCTTY is allowed. */ if (fp->f_type == DTYPE_VNODE) (void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0, td->td_ucred, td); @@ -1039,7 +1040,7 @@ if (error == 0) { /* This seems to violate POSIX but linux does it */ - if ((error = fgetvp(td, uap->fd, CAP_READ, &vp)) != 0) + if ((error = fgetvp(td, uap->fd, CAP_PREAD, &vp)) != 0) return (error); if (vp->v_type == VDIR) { vrele(vp); --- sys/compat/svr4/svr4_fcntl.c.orig +++ sys/compat/svr4/svr4_fcntl.c @@ -265,14 +265,14 @@ /* * If we ever want to support Capsicum on SVR4 processes (unlikely) * or FreeBSD grows a native frevoke() (more likely), we will need a - * CAP_REVOKE here. + * CAP_FREVOKE here. * - * In the meantime, use CAP_MASK_VALID: if a SVR4 process wants to + * In the meantime, use CAP_ALL: if a SVR4 process wants to * do an frevoke(), it needs to do it on either a regular file * descriptor or a fully-privileged capability (which is effectively * the same as a non-capability-restricted file descriptor). */ - if ((error = fgetvp(td, fd, CAP_MASK_VALID, &vp)) != 0) + if ((error = fgetvp(td, fd, CAP_ALL, &vp)) != 0) return (error); if (vp->v_type != VCHR && vp->v_type != VBLK) { --- sys/compat/svr4/svr4_misc.c.orig +++ sys/compat/svr4/svr4_misc.c @@ -247,10 +247,8 @@ DPRINTF(("svr4_sys_getdents64(%d, *, %d)\n", uap->fd, uap->nbytes)); - if ((error = getvnode(td->td_proc->p_fd, uap->fd, - CAP_READ | CAP_SEEK, &fp)) != 0) { + if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, &fp)) != 0) return (error); - } if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); @@ -426,8 +424,7 @@ if (uap->nbytes < 0) return (EINVAL); - if ((error = getvnode(td->td_proc->p_fd, uap->fd, - CAP_READ | CAP_SEEK, &fp)) != 0) + if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { --- sys/compat/svr4/svr4_stream.c.orig +++ sys/compat/svr4/svr4_stream.c @@ -1449,7 +1449,7 @@ struct file *fp; int error; - if ((error = fget(td, uap->fd, CAP_WRITE, &fp)) != 0) { + if ((error = fget(td, uap->fd, CAP_SEND, &fp)) != 0) { #ifdef DEBUG_SVR4 uprintf("putmsg: bad fp\n"); #endif @@ -1621,7 +1621,7 @@ struct file *fp; int error; - if ((error = fget(td, uap->fd, CAP_READ, &fp)) != 0) { + if ((error = fget(td, uap->fd, CAP_RECV, &fp)) != 0) { #ifdef DEBUG_SVR4 uprintf("getmsg: bad fp\n"); #endif --- sys/dev/iscsi/initiator/iscsi.c.orig +++ sys/dev/iscsi/initiator/iscsi.c @@ -387,11 +387,11 @@ if(sp->soc != NULL) isc_stop_receiver(sp); - error = fget(td, fd, CAP_SOCK_ALL, &sp->fp); + error = fget(td, fd, CAP_SOCK_CLIENT, &sp->fp); if(error) return error; - if((error = fgetsock(td, fd, CAP_SOCK_ALL, &sp->soc, 0)) == 0) { + if((error = fgetsock(td, fd, CAP_SOCK_CLIENT, &sp->soc, 0)) == 0) { sp->td = td; isc_start_receiver(sp); } --- sys/fs/fdescfs/fdesc_vfsops.c.orig +++ sys/fs/fdescfs/fdesc_vfsops.c @@ -205,7 +205,7 @@ last = min(fdp->fd_nfiles, lim); freefd = 0; for (i = fdp->fd_freefile; i < last; i++) - if (fdp->fd_ofiles[i] == NULL) + if (fdp->fd_ofiles[i].fde_file == NULL) freefd++; /* --- sys/fs/fdescfs/fdesc_vnops.c.orig +++ sys/fs/fdescfs/fdesc_vnops.c @@ -534,7 +534,7 @@ dp->d_type = DT_DIR; break; default: - if (fdp->fd_ofiles[fcnt] == NULL) + if (fdp->fd_ofiles[fcnt].fde_file == NULL) break; dp->d_namlen = sprintf(dp->d_name, "%d", fcnt); dp->d_reclen = UIO_MX; --- sys/fs/nfs/nfsdport.h.orig +++ sys/fs/nfs/nfsdport.h @@ -94,8 +94,6 @@ #define NFSFPCRED(f) ((f)->f_cred) #define NFSFPFLAG(f) ((f)->f_flag) -int fp_getfvp(NFSPROC_T *, int, struct file **, struct vnode **); - #define NFSNAMEICNDSET(n, c, o, f) do { \ (n)->cn_cred = (c); \ (n)->cn_nameiop = (o); \ --- sys/fs/nfsclient/nfs_clport.c.orig +++ sys/fs/nfsclient/nfs_clport.c @@ -1215,7 +1215,7 @@ * pretend that we need them all. It is better to be too * careful than too reckless. */ - if ((error = fget(td, nfscbdarg.sock, CAP_SOCK_ALL, &fp)) + if ((error = fget(td, nfscbdarg.sock, CAP_SOCK_CLIENT, &fp)) != 0) { return (error); } --- sys/fs/nfsserver/nfs_nfsdport.c.orig +++ sys/fs/nfsserver/nfs_nfsdport.c @@ -2767,7 +2767,7 @@ /* * glue for fp. */ -int +static int fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) { struct filedesc *fdp; @@ -2775,8 +2775,8 @@ int error = 0; fdp = p->td_proc->p_fd; - if (fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL) { + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { error = EBADF; goto out; } @@ -3041,7 +3041,7 @@ * pretend that we need them all. It is better to be too * careful than too reckless. */ - if ((error = fget(td, sockarg.sock, CAP_SOCK_ALL, &fp)) != 0) + if ((error = fget(td, sockarg.sock, CAP_SOCK_SERVER, &fp)) != 0) goto out; if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); --- sys/i386/ibcs2/ibcs2_misc.c.orig +++ sys/i386/ibcs2/ibcs2_misc.c @@ -337,8 +337,7 @@ #define BSD_DIRENT(cp) ((struct dirent *)(cp)) #define IBCS2_RECLEN(reclen) (reclen + sizeof(u_short)) - if ((error = getvnode(td->td_proc->p_fd, uap->fd, - CAP_READ | CAP_SEEK, &fp)) != 0) + if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); @@ -491,8 +490,8 @@ u_long *cookies = NULL, *cookiep; int ncookies; - if ((error = getvnode(td->td_proc->p_fd, uap->fd, - CAP_READ | CAP_SEEK, &fp)) != 0) { + if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, + &fp)) != 0) { if (error == EINVAL) return sys_read(td, (struct read_args *)uap); else --- sys/kern/capabilities.conf.orig +++ sys/kern/capabilities.conf @@ -110,9 +110,14 @@ ## Allow capability mode and capability system calls. ## cap_enter +cap_fcntls_get +cap_fcntls_limit cap_getmode -cap_getrights +cap_ioctls_get +cap_ioctls_limit cap_new +cap_rights_get +cap_rights_limit ## ## Allow read-only clock operations. @@ -239,7 +244,7 @@ ## Allow directory I/O on a file descriptor, subject to capability rights. ## Originally we had separate capabilities for directory-specific read ## operations, but on BSD we allow reading the raw directory data, so we just -## rely on CAP_READ and CAP_SEEK now. +## rely on CAP_READ now. ## getdents getdirentries @@ -317,13 +322,10 @@ getuid ## -## Disallow ioctl(2) for now, as frequently ioctl(2) operations have global -## scope, but this is a tricky one as it is also required for tty control. -## We do have a capability right for this operation. +## Allow ioctl(2), which hopefully will be limited by applications only to +## required commands with cap_ioctls_limit(2) syscall. ## -## XXXRW: This needs to be revisited. -## -#ioctl +ioctl ## ## Allow querying current process credential state. --- sys/kern/init_sysent.c.orig +++ sys/kern/init_sysent.c @@ -549,7 +549,7 @@ { AS(shmctl_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 512 = shmctl */ { AS(lpathconf_args), (sy_call_t *)sys_lpathconf, AUE_LPATHCONF, NULL, 0, 0, 0, SY_THR_STATIC }, /* 513 = lpathconf */ { AS(cap_new_args), (sy_call_t *)sys_cap_new, AUE_CAP_NEW, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 514 = cap_new */ - { AS(cap_getrights_args), (sy_call_t *)sys_cap_getrights, AUE_CAP_GETRIGHTS, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 515 = cap_getrights */ + { AS(cap_rights_get_args), (sy_call_t *)sys_cap_rights_get, AUE_CAP_RIGHTS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 515 = cap_rights_get */ { 0, (sy_call_t *)sys_cap_enter, AUE_CAP_ENTER, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 516 = cap_enter */ { AS(cap_getmode_args), (sy_call_t *)sys_cap_getmode, AUE_CAP_GETMODE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 517 = cap_getmode */ { AS(pdfork_args), (sy_call_t *)sys_pdfork, AUE_PDFORK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 518 = pdfork */ @@ -567,4 +567,9 @@ { AS(posix_fallocate_args), (sy_call_t *)sys_posix_fallocate, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 530 = posix_fallocate */ { AS(posix_fadvise_args), (sy_call_t *)sys_posix_fadvise, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 531 = posix_fadvise */ { AS(wait6_args), (sy_call_t *)sys_wait6, AUE_WAIT6, NULL, 0, 0, 0, SY_THR_STATIC }, /* 532 = wait6 */ + { AS(cap_rights_limit_args), (sy_call_t *)sys_cap_rights_limit, AUE_CAP_RIGHTS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 533 = cap_rights_limit */ + { AS(cap_ioctls_limit_args), (sy_call_t *)sys_cap_ioctls_limit, AUE_CAP_IOCTLS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 534 = cap_ioctls_limit */ + { AS(cap_ioctls_get_args), (sy_call_t *)sys_cap_ioctls_get, AUE_CAP_IOCTLS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 535 = cap_ioctls_get */ + { AS(cap_fcntls_limit_args), (sy_call_t *)sys_cap_fcntls_limit, AUE_CAP_FCNTLS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 536 = cap_fcntls_limit */ + { AS(cap_fcntls_get_args), (sy_call_t *)sys_cap_fcntls_get, AUE_CAP_FCNTLS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 537 = cap_fcntls_get */ }; --- sys/kern/kern_descrip.c.orig +++ sys/kern/kern_descrip.c @@ -110,15 +110,8 @@ static uma_zone_t file_zone; -/* Flags for do_dup() */ -#define DUP_FIXED 0x1 /* Force fixed allocation */ -#define DUP_FCNTL 0x2 /* fcntl()-style errors */ -#define DUP_CLOEXEC 0x4 /* Atomically set FD_CLOEXEC. */ - static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders); -static int do_dup(struct thread *td, int flags, int old, int new, - register_t *retval); static int fd_first_free(struct filedesc *fdp, int low, int size); static int fd_last_used(struct filedesc *fdp, int size); static void fdgrowtable(struct filedesc *fdp, int nfd); @@ -166,7 +159,7 @@ * the process exits. */ struct freetable { - struct file **ft_table; + struct filedescent *ft_table; SLIST_ENTRY(freetable) ft_next; }; @@ -177,8 +170,7 @@ struct filedesc0 { struct filedesc fd_fd; SLIST_HEAD(, freetable) fd_free; - struct file *fd_dfiles[NDFILE]; - char fd_dfileflags[NDFILE]; + struct filedescent fd_dfiles[NDFILE]; NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; }; @@ -284,7 +276,8 @@ FILEDESC_XLOCK_ASSERT(fdp); KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd)); - KASSERT(fdp->fd_ofiles[fd] == NULL, ("fd=%d is still in use", fd)); + KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, + ("fd=%d is still in use", fd)); fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); if (fd < fdp->fd_freefile) @@ -294,6 +287,20 @@ } /* + * Free a file descriptor. + */ +static inline void +fdfree(struct filedesc *fdp, int fd) +{ + struct filedescent *fde; + + fde = fdp->fd_ofiles[fd]; + filecaps_free(&fde->fde_caps); + bzero(fde, sizeof(*fde)); + fdunused(fdp, fd); +} + +/* * System calls on descriptors. */ #ifndef _SYS_SYSPROTO_H_ @@ -434,36 +441,14 @@ return (error); } -static inline int -fdunwrap(int fd, cap_rights_t rights, struct filedesc *fdp, struct file **fpp) -{ - - FILEDESC_LOCK_ASSERT(fdp); - - *fpp = fget_locked(fdp, fd); - if (*fpp == NULL) - return (EBADF); - -#ifdef CAPABILITIES - if ((*fpp)->f_type == DTYPE_CAPABILITY) { - int err = cap_funwrap(*fpp, rights, fpp); - if (err != 0) { - *fpp = NULL; - return (err); - } - } -#endif /* CAPABILITIES */ - return (0); -} - int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) { struct filedesc *fdp; struct flock *flp; - struct file *fp; + struct file *fp, *fp2; + struct filedescent *fde; struct proc *p; - char *pop; struct vnode *vp; int error, flg, tmp; u_int old, new; @@ -505,8 +490,9 @@ error = EBADF; break; } - pop = &fdp->fd_ofileflags[fd]; - td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; + fde = &fdp->fd_ofiles[fd]; + td->td_retval[0] = + (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0; FILEDESC_SUNLOCK(fdp); break; @@ -517,32 +503,24 @@ error = EBADF; break; } - pop = &fdp->fd_ofileflags[fd]; - *pop = (*pop &~ UF_EXCLOSE) | + fde = &fdp->fd_ofiles[fd]; + fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) | (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); FILEDESC_XUNLOCK(fdp); break; case F_GETFL: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_GETFL, &fp, NULL); + if (error != 0) break; - } td->td_retval[0] = OFLAGS(fp->f_flag); - FILEDESC_SUNLOCK(fdp); + fdrop(fp, td); break; case F_SETFL: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_SETFL, &fp, NULL); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); do { tmp = flg = fp->f_flag; tmp &= ~FCNTLFLAGS; @@ -550,7 +528,7 @@ } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0); tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); - if (error) { + if (error != 0) { fdrop(fp, td); break; } @@ -567,14 +545,9 @@ break; case F_GETOWN: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_GETOWN, &fp, NULL); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); if (error == 0) td->td_retval[0] = tmp; @@ -582,14 +555,9 @@ break; case F_SETOWN: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_SETOWN, &fp, NULL); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); tmp = arg; error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); fdrop(fp, td); @@ -608,17 +576,15 @@ case F_SETLK: do_setlk: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FLOCK, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FLOCK, 0, &fp, NULL); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); error = EBADF; + fdrop(fp, td); break; } + flp = (struct flock *)arg; if (flp->l_whence == SEEK_CUR) { foffset = foffset_get(fp); @@ -627,16 +593,12 @@ foffset > OFF_MAX - flp->l_start)) { FILEDESC_SUNLOCK(fdp); error = EOVERFLOW; + fdrop(fp, td); break; } flp->l_start += foffset; } - /* - * VOP_ADVLOCK() may block. - */ - fhold(fp); - FILEDESC_SUNLOCK(fdp); vp = fp->f_vnode; switch (flp->l_type) { case F_RDLCK: @@ -703,37 +665,37 @@ * that the closing thread was a bit slower and that the * advisory lock succeeded before the close. */ - FILEDESC_SLOCK(fdp); - if (fget_locked(fdp, fd) != fp) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, 0, 0, &fp2, NULL); + if (error != 0) { + fdrop(fp, td); + break; + } + if (fp != fp2) { flp->l_whence = SEEK_SET; flp->l_start = 0; flp->l_len = 0; flp->l_type = F_UNLCK; (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, flp, F_POSIX); - } else - FILEDESC_SUNLOCK(fdp); + } fdrop(fp, td); + fdrop(fp2, td); break; case F_GETLK: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FLOCK, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FLOCK, 0, &fp, NULL); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); error = EBADF; + fdrop(fp, td); break; } flp = (struct flock *)arg; if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && flp->l_type != F_UNLCK) { - FILEDESC_SUNLOCK(fdp); error = EINVAL; + fdrop(fp, td); break; } if (flp->l_whence == SEEK_CUR) { @@ -744,15 +706,11 @@ foffset < OFF_MIN - flp->l_start)) { FILEDESC_SUNLOCK(fdp); error = EOVERFLOW; + fdrop(fp, td); break; } flp->l_start += foffset; } - /* - * VOP_ADVLOCK() may block. - */ - fhold(fp); - FILEDESC_SUNLOCK(fdp); vp = fp->f_vnode; error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, F_POSIX); @@ -763,19 +721,14 @@ arg = arg ? 128 * 1024: 0; /* FALLTHROUGH */ case F_READAHEAD: - FILEDESC_SLOCK(fdp); - if ((fp = fget_locked(fdp, fd)) == NULL) { - FILEDESC_SUNLOCK(fdp); - error = EBADF; + error = fget_unlocked(fdp, fd, 0, 0, &fp, NULL); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); + fdrop(fp, td); error = EBADF; break; } - fhold(fp); - FILEDESC_SUNLOCK(fdp); if (arg >= 0) { vp = fp->f_vnode; error = vn_lock(vp, LK_SHARED); @@ -809,11 +762,12 @@ /* * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD). */ -static int +int do_dup(struct thread *td, int flags, int old, int new, register_t *retval) { struct filedesc *fdp; + struct filedescent *oldfde, *newfde; struct proc *p; struct file *fp; struct file *delfp; @@ -842,14 +796,15 @@ FILEDESC_XUNLOCK(fdp); return (EBADF); } + oldfde = &fdp->fd_ofiles[old]; if (flags & DUP_FIXED && old == new) { *retval = new; if (flags & DUP_CLOEXEC) - fdp->fd_ofileflags[new] |= UF_EXCLOSE; + fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); return (0); } - fp = fdp->fd_ofiles[old]; + fp = oldfde->fde_file; fhold(fp); /* @@ -880,8 +835,10 @@ } #endif fdgrowtable(fdp, new + 1); + oldfde = &fdp->fd_ofiles[old]; } - if (fdp->fd_ofiles[new] == NULL) + newfde = &fdp->fd_ofiles[new]; + if (newfde->fde_file == NULL) fdused(fdp, new); } else { if ((error = fdalloc(td, new, &new)) != 0) { @@ -889,20 +846,23 @@ fdrop(fp, td); return (error); } + newfde = &fdp->fd_ofiles[new]; } - KASSERT(fp == fdp->fd_ofiles[old], ("old fd has been modified")); + KASSERT(fp == oldfde->fde_file, ("old fd has been modified")); KASSERT(old != new, ("new fd is same as old")); - delfp = fdp->fd_ofiles[new]; + delfp = newfde->fde_file; + /* * Duplicate the source descriptor. */ - fdp->fd_ofiles[new] = fp; + *newfde = *oldfde; + filecaps_copy(&oldfde->fde_caps, newfde->fde_caps); if ((flags & DUP_CLOEXEC) != 0) - fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] | UF_EXCLOSE; + newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE; else - fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; + newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE; if (new > fdp->fd_lastfile) fdp->fd_lastfile = new; *retval = new; @@ -1143,7 +1103,6 @@ closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders) { - struct file *fp_object; int error; FILEDESC_XLOCK_ASSERT(fdp); @@ -1169,12 +1128,10 @@ knote_fdclose(td, fd); /* - * When we're closing an fd with a capability, we need to notify - * mqueue if the underlying object is of type mqueue. + * We need to notify mqueue if the object is of type mqueue. */ - (void)cap_funwrap(fp, 0, &fp_object); - if (fp_object->f_type == DTYPE_MQUEUE) - mq_fdclose(td, fd, fp_object); + if (fp->f_type == DTYPE_MQUEUE) + mq_fdclose(td, fd, fp); FILEDESC_XUNLOCK(fdp); error = closef(fp, td); @@ -1226,9 +1183,7 @@ FILEDESC_XUNLOCK(fdp); return (EBADF); } - fdp->fd_ofiles[fd] = NULL; - fdp->fd_ofileflags[fd] = 0; - fdunused(fdp, fd); + fdfree(fdp, fd); /* closefp() drops the FILEDESC lock for us. */ return (closefp(fdp, fd, fp, td, 1)); @@ -1260,7 +1215,7 @@ uap->lowfd = 0; FILEDESC_SLOCK(fdp); for (fd = uap->lowfd; fd < fdp->fd_nfiles; fd++) { - if (fdp->fd_ofiles[fd] != NULL) { + if (fdp->fd_ofiles[fd].fde_file != NULL) { FILEDESC_SUNLOCK(fdp); (void)kern_close(td, fd); FILEDESC_SLOCK(fdp); @@ -1412,6 +1367,91 @@ } /* + * Initialize filecaps structure. + */ +void +filecaps_init(struct filecaps *fcaps) +{ + + bzero(fcaps, sizeof(*fcaps)); + fcaps->fc_nioctls = -1; +} + +/* + * Copy filecaps structure allocating memory for ioctls array if needed. + */ +void +filecaps_copy(const struct filecaps *src, struct filecaps *dst) +{ + size_t size; + + *dst = *src; + if (src->fc_ioctls != NULL) { + KASSERT(src->fc_nioctls > 0, + ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls)); + + size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls; + dst->fc_ioctls = malloc(size, M_TEMP, M_WAITOK); + bcopy(src->fc_ioctls, dst->fc_ioctls, size); + } +} + +/* + * Move filecaps structure to the new place and clear the old place. + */ +static void +filecaps_move(struct filecaps *src, struct filecaps *dst) +{ + + *dst = *src; + bzero(src, sizeof(*src)); +} + +/* + * Fill the given filecaps structure with full rights. + */ +static void +filecaps_fill(struct filecaps *fcaps) +{ + + fcaps->fc_rights = CAP_ALL; + fcaps->fc_ioctls = NULL; + fcaps->fc_nioctls = -1; + fcaps->fc_fcntls = CAP_FCNTL_ALL; +} + +/* + * Free memory allocated within filecaps structure. + */ +void +filecaps_free(struct filecaps *fcaps) +{ + + free(fcaps->fc_ioctls, M_TEMP); + bzero(fcaps, sizeof(*fcaps)); +} + +/* + * Validate the given filecaps structure. + */ +static void +filecaps_validate(const struct filecaps *fcaps, const char *func) +{ + + KASSERT((fcaps->fc_rights & ~CAP_MASK_VALID) == 0, + ("%s: invalid rights", func)); + KASSERT((fcaps->fc_fcntls & ~CAP_FCNTL_ALL) == 0, + ("%s: invalid fcntls", func)); + KASSERT(fcaps->fc_fcntls == 0 || (fcaps->fc_rights & CAP_FCNTL) != 0, + ("%s: fcntls without CAP_FCNTL", func)); + KASSERT(fcaps->fc_ioctls != NULL ? fcaps->fc_nioctls > 0 : + (fcaps->fc_nioctls == -1 || fcaps->fc_nioctls == 0), + ("%s: invalid ioctls", func)); + KASSERT(fcaps->fc_nioctls == 0 || (fcaps->fc_rights & CAP_IOCTL) != 0, + ("%s: ioctls without CAP_IOCTL", func)); +} + +/* * Grow the file table to accomodate (at least) nfd descriptors. */ static void @@ -1419,9 +1459,8 @@ { struct filedesc0 *fdp0; struct freetable *ft; - struct file **ntable; - struct file **otable; - char *nfileflags, *ofileflags; + struct filedescent *ntable; + struct filedescent *otable; int nnfiles, onfiles; NDSLOTTYPE *nmap, *omap; @@ -1433,7 +1472,6 @@ /* save old values */ onfiles = fdp->fd_nfiles; otable = fdp->fd_ofiles; - ofileflags = fdp->fd_ofileflags; omap = fdp->fd_map; /* compute the size of the new table */ @@ -1442,30 +1480,20 @@ /* the table is already large enough */ return; - /* - * Allocate a new table and map. We need enough space for a) the - * file entries themselves, b) the file flags, and c) the struct - * freetable we will use when we decommission the table and place - * it on the freelist. We place the struct freetable in the - * middle so we don't have to worry about padding. - */ - ntable = malloc(nnfiles * sizeof(*ntable) + - sizeof(struct freetable) + - nnfiles * sizeof(*nfileflags), + /* allocate a new table and (if required) new bitmaps */ + ntable = malloc(nnfiles * sizeof(ntable[0]) + sizeof(struct freetable), M_FILEDESC, M_ZERO | M_WAITOK); - nfileflags = (char *)&ntable[nnfiles] + sizeof(struct freetable); nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC, M_ZERO | M_WAITOK); /* copy the old data over and point at the new tables */ memcpy(ntable, otable, onfiles * sizeof(*otable)); - memcpy(nfileflags, ofileflags, onfiles * sizeof(*ofileflags)); memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap)); /* update the pointers and counters */ fdp->fd_nfiles = nnfiles; + memcpy(ntable, otable, onfiles * sizeof(ntable[0])); fdp->fd_ofiles = ntable; - fdp->fd_ofileflags = nfileflags; fdp->fd_map = nmap; /* @@ -1541,8 +1569,9 @@ ("invalid descriptor %d", fd)); KASSERT(!fdisused(fdp, fd), ("fd_first_free() returned non-free descriptor")); - KASSERT(fdp->fd_ofiles[fd] == NULL, ("file descriptor isn't free")); - KASSERT(fdp->fd_ofileflags[fd] == 0, ("file flags are set")); + KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, + ("file descriptor isn't free")); + KASSERT(fdp->fd_ofiles[fd].fde_flags == 0, ("file flags are set")); fdused(fdp, fd); *result = fd; return (0); @@ -1573,7 +1602,7 @@ return (1); last = min(fdp->fd_nfiles, lim); for (i = fdp->fd_freefile; i < last; i++) { - if (fdp->fd_ofiles[i] == NULL && --n <= 0) + if (fdp->fd_ofiles[i].fde_file == NULL && --n <= 0) return (1); } return (0); @@ -1596,7 +1625,7 @@ if (error) return (error); /* no reference held on error */ - error = finstall(td, fp, &fd, flags); + error = finstall(td, fp, &fd, flags, NULL); if (error) { fdrop(fp, td); /* one reference (fp only) */ return (error); @@ -1650,13 +1679,17 @@ * Install a file in a file descriptor table. */ int -finstall(struct thread *td, struct file *fp, int *fd, int flags) +finstall(struct thread *td, struct file *fp, int *fd, int flags, + struct filecaps *fcaps) { struct filedesc *fdp = td->td_proc->p_fd; + struct filedescent *fde; int error; KASSERT(fd != NULL, ("%s: fd == NULL", __func__)); KASSERT(fp != NULL, ("%s: fp == NULL", __func__)); + if (fcaps != NULL) + filecaps_validate(fcaps, __func__); FILEDESC_XLOCK(fdp); if ((error = fdalloc(td, 0, fd))) { @@ -1664,9 +1697,14 @@ return (error); } fhold(fp); - fdp->fd_ofiles[*fd] = fp; + fde = &fdp->fd_ofiles[*fd]; + fde->fde_file = fp; if ((flags & O_CLOEXEC) != 0) - fdp->fd_ofileflags[*fd] |= UF_EXCLOSE; + fde->fde_flags |= UF_EXCLOSE; + if (fcaps != NULL) + filecaps_move(fcaps, &fde->fde_caps); + else + filecaps_fill(&fde->fde_caps); FILEDESC_XUNLOCK(fdp); return (0); } @@ -1701,7 +1739,6 @@ newfdp->fd_fd.fd_holdcnt = 1; newfdp->fd_fd.fd_cmask = CMASK; newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; - newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; newfdp->fd_fd.fd_nfiles = NDFILE; newfdp->fd_fd.fd_map = newfdp->fd_dmap; newfdp->fd_fd.fd_lastfile = -1; @@ -1783,6 +1820,7 @@ fdcopy(struct filedesc *fdp) { struct filedesc *newfdp; + struct filedescent *nfde, *ofde; int i; /* Certain daemons might not have file descriptors. */ @@ -1801,12 +1839,14 @@ /* copy all passable descriptors (i.e. not kqueue) */ newfdp->fd_freefile = -1; for (i = 0; i <= fdp->fd_lastfile; ++i) { + ofde = &fdp->fd_ofiles[i]; if (fdisused(fdp, i) && - (fdp->fd_ofiles[i]->f_ops->fo_flags & DFLAG_PASSABLE) && - fdp->fd_ofiles[i]->f_ops != &badfileops) { - newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; - newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; - fhold(newfdp->fd_ofiles[i]); + (ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) && + ofde->fde_file->f_ops != &badfileops) { + nfde = &newfdp->fd_ofiles[i]; + *nfde = *ofde; + filecaps_copy(&ofde->fde_caps, &nfde->fde_caps); + fhold(nfde->fde_file); newfdp->fd_lastfile = i; } else { if (newfdp->fd_freefile == -1) @@ -1816,9 +1856,10 @@ newfdp->fd_cmask = fdp->fd_cmask; FILEDESC_SUNLOCK(fdp); FILEDESC_XLOCK(newfdp); - for (i = 0; i <= newfdp->fd_lastfile; ++i) - if (newfdp->fd_ofiles[i] != NULL) + for (i = 0; i <= newfdp->fd_lastfile; ++i) { + if (newfdp->fd_ofiles[i].fde_file != NULL) fdused(newfdp, i); + } if (newfdp->fd_freefile == -1) newfdp->fd_freefile = i; FILEDESC_XUNLOCK(newfdp); @@ -1854,12 +1895,12 @@ if (fdtol != NULL) { FILEDESC_XLOCK(fdp); KASSERT(fdtol->fdl_refcount > 0, - ("filedesc_to_refcount botch: fdl_refcount=%d", - fdtol->fdl_refcount)); + ("filedesc_to_refcount botch: fdl_refcount=%d", + fdtol->fdl_refcount)); if (fdtol->fdl_refcount == 1 && (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fp = fdp->fd_ofiles[i].fde_file; if (fp == NULL || fp->f_type != DTYPE_VNODE) continue; fhold(fp); @@ -1919,10 +1960,10 @@ return; for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fp = fdp->fd_ofiles[i].fde_file; if (fp != NULL) { FILEDESC_XLOCK(fdp); - fdp->fd_ofiles[i] = NULL; + bzero(&fdp->fd_ofiles[i], sizeof(fdp->fd_ofiles[i])); FILEDESC_XUNLOCK(fdp); (void) closef(fp, td); } @@ -1987,6 +2028,7 @@ setugidsafety(struct thread *td) { struct filedesc *fdp; + struct file *fp; int i; /* Certain daemons might not have file descriptors. */ @@ -2002,18 +2044,14 @@ for (i = 0; i <= fdp->fd_lastfile; i++) { if (i > 2) break; - if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { - struct file *fp; - + fp = fdp->fd_ofiles[i].fde_file; + if (fp != NULL && is_unsafe(fp)) { knote_fdclose(td, i); /* * NULL-out descriptor prior to close to avoid * a race while close blocks. */ - fp = fdp->fd_ofiles[i]; - fdp->fd_ofiles[i] = NULL; - fdp->fd_ofileflags[i] = 0; - fdunused(fdp, i); + fdfree(fdp, i); FILEDESC_XUNLOCK(fdp); (void) closef(fp, td); FILEDESC_XLOCK(fdp); @@ -2034,9 +2072,8 @@ { FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[idx] == fp) { - fdp->fd_ofiles[idx] = NULL; - fdunused(fdp, idx); + if (fdp->fd_ofiles[idx].fde_file == fp) { + fdfree(fdp, idx); FILEDESC_XUNLOCK(fdp); fdrop(fp, td); } else @@ -2050,6 +2087,7 @@ fdcloseexec(struct thread *td) { struct filedesc *fdp; + struct filedescent *fde; struct file *fp; int i; @@ -2064,12 +2102,11 @@ */ FILEDESC_XLOCK(fdp); for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fde = &fdp->fd_ofiles[i]; + fp = fde->fde_file; if (fp != NULL && (fp->f_type == DTYPE_MQUEUE || - (fdp->fd_ofileflags[i] & UF_EXCLOSE))) { - fdp->fd_ofiles[i] = NULL; - fdp->fd_ofileflags[i] = 0; - fdunused(fdp, i); + (fde->fde_flags & UF_EXCLOSE))) { + fdfree(fdp, i); (void) closefp(fdp, i, fp, td, 0); /* closefp() drops the FILEDESC lock. */ FILEDESC_XLOCK(fdp); @@ -2099,7 +2136,7 @@ devnull = -1; error = 0; for (i = 0; i < 3; i++) { - if (fdp->fd_ofiles[i] != NULL) + if (fdp->fd_ofiles[i].fde_file != NULL) continue; if (devnull < 0) { save = td->td_retval[0]; @@ -2134,7 +2171,6 @@ struct flock lf; struct filedesc_to_leader *fdtol; struct filedesc *fdp; - struct file *fp_object; /* * POSIX record locking dictates that any close releases ALL @@ -2147,13 +2183,9 @@ * NULL thread pointer when there really is no owning * context that might have locks, or the locks will be * leaked. - * - * If this is a capability, we do lock processing under the underlying - * node, not the capability itself. */ - (void)cap_funwrap(fp, 0, &fp_object); - if (fp_object->f_type == DTYPE_VNODE && td != NULL) { - vp = fp_object->f_vnode; + if (fp->f_type == DTYPE_VNODE && td != NULL) { + vp = fp->f_vnode; if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { lf.l_whence = SEEK_SET; lf.l_start = 0; @@ -2182,7 +2214,7 @@ lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; - vp = fp_object->f_vnode; + vp = fp->f_vnode; (void) VOP_ADVLOCK(vp, (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf, F_POSIX); @@ -2216,14 +2248,19 @@ atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops); } -struct file * -fget_unlocked(struct filedesc *fdp, int fd) +int +fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t needrights, + int needfcntl, struct file **fpp, cap_rights_t *haverightsp) { struct file *fp; u_int count; +#ifdef CAPABILITIES + cap_rights_t haverights; + int error; +#endif if (fd < 0 || fd >= fdp->fd_nfiles) - return (NULL); + return (EBADF); /* * Fetch the descriptor locklessly. We avoid fdrop() races by * never raising a refcount above 0. To accomplish this we have @@ -2233,9 +2270,20 @@ * due to preemption. */ for (;;) { - fp = fdp->fd_ofiles[fd]; + fp = fdp->fd_ofiles[fd].fde_file; if (fp == NULL) - break; + return (EBADF); +#ifdef CAPABILITIES + haverights = cap_rights(fdp, fd); + error = cap_check(haverights, needrights); + if (error != 0) + return (error); + if ((needrights & CAP_FCNTL) != 0) { + error = cap_fcntl_check(fdp, fd, needfcntl); + if (error != 0) + return (error); + } +#endif count = fp->f_count; if (count == 0) continue; @@ -2245,12 +2293,19 @@ */ if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1) continue; - if (fp == fdp->fd_ofiles[fd]) + if (fp == fdp->fd_ofiles[fd].fde_file) break; fdrop(fp, curthread); } - - return (fp); + *fpp = fp; + if (haverightsp != NULL) { +#ifdef CAPABILITIES + *haverightsp = haverights; +#else + *haverightsp = CAP_ALL; +#endif + } + return (0); } /* @@ -2260,33 +2315,29 @@ * If the descriptor doesn't exist or doesn't match 'flags', EBADF is * returned. * - * If the FGET_GETCAP flag is set, the capability itself will be returned. - * Calling _fget() with FGET_GETCAP on a non-capability will return EINVAL. - * Otherwise, if the file is a capability, its rights will be checked against - * the capability rights mask, and if successful, the object will be unwrapped. + * File's rights will be checked against the capability rights mask. * * If an error occured the non-zero error is returned and *fpp is set to * NULL. Otherwise *fpp is held and set and zero is returned. Caller is * responsible for fdrop(). */ -#define FGET_GETCAP 0x00000001 static __inline int _fget(struct thread *td, int fd, struct file **fpp, int flags, - cap_rights_t needrights, cap_rights_t *haverightsp, u_char *maxprotp, - int fget_flags) + cap_rights_t needrights, u_char *maxprotp) { struct filedesc *fdp; struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; -#endif + cap_rights_t haverights; int error; *fpp = NULL; if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) return (EBADF); - if ((fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); + if (maxprotp != NULL) + needrights |= CAP_MMAP; + error = fget_unlocked(fdp, fd, needrights, 0, &fp, &haverights); + if (error != 0) + return (error); if (fp->f_ops == &badfileops) { fdrop(fp, td); return (EBADF); @@ -2294,50 +2345,11 @@ #ifdef CAPABILITIES /* - * If this is a capability, what rights does it have? + * If requested, convert capability rights to access flags. */ - if (haverightsp != NULL) { - if (fp->f_type == DTYPE_CAPABILITY) - *haverightsp = cap_rights(fp); - else - *haverightsp = CAP_MASK_VALID; - } - - /* - * If a capability has been requested, return the capability directly. - * Otherwise, check capability rights, extract the underlying object, - * and check its access flags. - */ - if (fget_flags & FGET_GETCAP) { - if (fp->f_type != DTYPE_CAPABILITY) { - fdrop(fp, td); - return (EINVAL); - } - } else { - if (maxprotp == NULL) - error = cap_funwrap(fp, needrights, &fp_fromcap); - else - error = cap_funwrap_mmap(fp, needrights, maxprotp, - &fp_fromcap); - if (error != 0) { - fdrop(fp, td); - return (error); - } - - /* - * If we've unwrapped a file, drop the original capability - * and hold the new descriptor. fp after this point refers to - * the actual (unwrapped) object, not the capability. - */ - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, td); - fp = fp_fromcap; - } - } + if (maxprotp != NULL) + *maxprotp = cap_rights_to_vmprot(haverights); #else /* !CAPABILITIES */ - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("%s: saw capability", __func__)); if (maxprotp != NULL) *maxprotp = VM_PROT_ALL; #endif /* CAPABILITIES */ @@ -2376,7 +2388,7 @@ fget(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return(_fget(td, fd, fpp, 0, rights, NULL, NULL, 0)); + return(_fget(td, fd, fpp, 0, rights, NULL)); } int @@ -2384,37 +2396,24 @@ struct file **fpp) { - return (_fget(td, fd, fpp, 0, rights, NULL, maxprotp, 0)); + return (_fget(td, fd, fpp, 0, rights, maxprotp)); } int fget_read(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return(_fget(td, fd, fpp, FREAD, rights, NULL, NULL, 0)); + return(_fget(td, fd, fpp, FREAD, rights, NULL)); } int fget_write(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return (_fget(td, fd, fpp, FWRITE, rights, NULL, NULL, 0)); + return (_fget(td, fd, fpp, FWRITE, rights, NULL)); } /* - * Unlike the other fget() calls, which accept and check capability rights - * but never return capabilities, fgetcap() returns the capability but doesn't - * check capability rights. - */ -int -fgetcap(struct thread *td, int fd, struct file **fpp) -{ - - return (_fget(td, fd, fpp, 0, 0, NULL, NULL, FGET_GETCAP)); -} - - -/* * Like fget() but loads the underlying vnode, or returns an error if the * descriptor does not represent a vnode. Note that pipes use vnodes but * never have VM objects. The returned vnode will be vref()'d. @@ -2423,14 +2422,14 @@ */ static __inline int _fgetvp(struct thread *td, int fd, int flags, cap_rights_t needrights, - cap_rights_t *haverightsp, struct vnode **vpp) + struct vnode **vpp) { struct file *fp; int error; *vpp = NULL; - if ((error = _fget(td, fd, &fp, flags, needrights, haverightsp, - NULL, 0)) != 0) + error = _fget(td, fd, &fp, flags, needrights, NULL); + if (error) return (error); if (fp->f_vnode == NULL) { error = EINVAL; @@ -2447,28 +2446,50 @@ fgetvp(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, 0, rights, NULL, vpp)); + return (_fgetvp(td, fd, 0, rights, vpp)); } int -fgetvp_rights(struct thread *td, int fd, cap_rights_t need, cap_rights_t *have, - struct vnode **vpp) +fgetvp_rights(struct thread *td, int fd, cap_rights_t need, + struct filecaps *havecaps, struct vnode **vpp) { - return (_fgetvp(td, fd, 0, need, have, vpp)); + struct filedesc *fdp; + struct file *fp; + int error; + + if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) + return (EBADF); + + fp = fget_locked(fdp, fd); + if (fp == NULL || fp->f_ops == &badfileops) + return (EBADF); + + error = cap_check(cap_rights(fdp, fd), need); + if (error != 0) + return (error); + + if (fp->f_vnode == NULL) + return (EINVAL); + + *vpp = fp->f_vnode; + vref(*vpp); + filecaps_copy(&fdp->fd_ofiles[fd].fde_caps, havecaps); + + return (0); } int fgetvp_read(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, FREAD, rights, NULL, vpp)); + return (_fgetvp(td, fd, FREAD, rights, vpp)); } int fgetvp_exec(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, FEXEC, rights, NULL, vpp)); + return (_fgetvp(td, fd, FEXEC, rights, vpp)); } #ifdef notyet @@ -2477,7 +2498,7 @@ struct vnode **vpp) { - return (_fgetvp(td, fd, FWRITE, rights, NULL, vpp)); + return (_fgetvp(td, fd, FWRITE, rights, vpp)); } #endif @@ -2502,7 +2523,7 @@ *spp = NULL; if (fflagp != NULL) *fflagp = 0; - if ((error = _fget(td, fd, &fp, 0, rights, NULL, NULL, 0)) != 0) + if ((error = _fget(td, fd, &fp, 0, rights, NULL)) != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { error = ENOTSOCK; @@ -2538,9 +2559,6 @@ /* * Handle the last reference to a file being closed. - * - * No special capability handling here, as the capability's fo_close will run - * instead of the object here, and perform any necessary drop on the object. */ int _fdrop(struct file *fp, struct thread *td) @@ -2617,7 +2635,8 @@ * Duplicate the specified descriptor to a free descriptor. */ int -dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int openerror, int *indxp) +dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, + int openerror, int *indxp) { struct file *fp; int error, indx; @@ -2661,18 +2680,17 @@ FILEDESC_XUNLOCK(fdp); return (EACCES); } - fdp->fd_ofiles[indx] = fp; - fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; fhold(fp); + fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; + filecaps_copy(&fdp->fd_ofiles[dfd].fde_caps, + &fdp->fd_ofiles[indx].fde_caps); break; case ENXIO: /* * Steal away the file pointer from dfd and stuff it into indx. */ - fdp->fd_ofiles[indx] = fp; - fdp->fd_ofiles[dfd] = NULL; - fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; - fdp->fd_ofileflags[dfd] = 0; + fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; + bzero(&fdp->fd_ofiles[dfd], sizeof(fdp->fd_ofiles[dfd])); fdunused(fdp, dfd); break; } @@ -2828,7 +2846,7 @@ continue; FILEDESC_SLOCK(fdp); for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { - if ((fp = fdp->fd_ofiles[n]) == NULL) + if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) continue; xf.xf_fd = n; xf.xf_file = fp; @@ -2940,7 +2958,7 @@ export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif, fdp, req); for (i = 0; i < fdp->fd_nfiles; i++) { - if ((fp = fdp->fd_ofiles[i]) == NULL) + if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) continue; bzero(kif, sizeof(*kif)); kif->kf_structsize = sizeof(*kif); @@ -2950,21 +2968,6 @@ shmfd = NULL; kif->kf_fd = i; -#ifdef CAPABILITIES - /* - * When reporting a capability, most fields will be from the - * underlying object, but do mark as a capability. With - * ofiledesc, we don't have a field to export the cap_rights_t, - * but we do with the new filedesc. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - kif->kf_flags |= KF_FLAG_CAPABILITY; - (void)cap_funwrap(fp, 0, &fp); - } -#else - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("sysctl_kern_proc_ofiledesc: saw capability")); -#endif switch (fp->f_type) { case DTYPE_VNODE: kif->kf_type = KF_TYPE_VNODE; @@ -3133,8 +3136,8 @@ static int export_fd_for_sysctl(void *data, int type, int fd, int fflags, int refcnt, - int64_t offset, int fd_is_cap, cap_rights_t fd_cap_rights, - struct kinfo_file *kif, struct sysctl_req *req) + int64_t offset, cap_rights_t fd_cap_rights, struct kinfo_file *kif, + struct sysctl_req *req) { struct { int fflag; @@ -3196,10 +3199,7 @@ for (i = 0; i < NFFLAGS; i++) if (fflags & fflags_table[i].fflag) kif->kf_flags |= fflags_table[i].kf_fflag; - if (fd_is_cap) - kif->kf_flags |= KF_FLAG_CAPABILITY; - if (fd_is_cap) - kif->kf_cap_rights = fd_cap_rights; + kif->kf_cap_rights = fd_cap_rights; kif->kf_fd = fd; kif->kf_type = type; kif->kf_ref_count = refcnt; @@ -3227,7 +3227,7 @@ int64_t offset; void *data; int error, i, *name; - int fd_is_cap, type, refcnt, fflags; + int type, refcnt, fflags; cap_rights_t fd_cap_rights; name = (int *)arg1; @@ -3257,13 +3257,13 @@ kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); if (tracevp != NULL) export_fd_for_sysctl(tracevp, KF_TYPE_VNODE, KF_FD_TYPE_TRACE, - FREAD | FWRITE, -1, -1, 0, 0, kif, req); + FREAD | FWRITE, -1, -1, 0, kif, req); if (textvp != NULL) export_fd_for_sysctl(textvp, KF_TYPE_VNODE, KF_FD_TYPE_TEXT, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); if (cttyvp != NULL) export_fd_for_sysctl(cttyvp, KF_TYPE_VNODE, KF_FD_TYPE_CTTY, - FREAD | FWRITE, -1, -1, 0, 0, kif, req); + FREAD | FWRITE, -1, -1, 0, kif, req); if (fdp == NULL) goto fail; FILEDESC_SLOCK(fdp); @@ -3273,7 +3273,7 @@ data = fdp->fd_cdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_CWD, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } /* root directory */ @@ -3282,7 +3282,7 @@ data = fdp->fd_rdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_ROOT, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } /* jail directory */ @@ -3291,30 +3291,17 @@ data = fdp->fd_jdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_JAIL, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } for (i = 0; i < fdp->fd_nfiles; i++) { - if ((fp = fdp->fd_ofiles[i]) == NULL) + if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) continue; data = NULL; - fd_is_cap = 0; - fd_cap_rights = 0; - #ifdef CAPABILITIES - /* - * When reporting a capability, most fields will be from the - * underlying object, but do mark as a capability and export - * the capability rights mask. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - fd_is_cap = 1; - fd_cap_rights = cap_rights(fp); - (void)cap_funwrap(fp, 0, &fp); - } + fd_cap_rights = cap_rights(fdp, i); #else /* !CAPABILITIES */ - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("sysctl_kern_proc_filedesc: saw capability")); + fd_cap_rights = 0; #endif switch (fp->f_type) { case DTYPE_VNODE: @@ -3390,7 +3377,7 @@ if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO) FILEDESC_SUNLOCK(fdp); error = export_fd_for_sysctl(data, type, i, fflags, refcnt, - offset, fd_is_cap, fd_cap_rights, kif, req); + offset, fd_cap_rights, kif, req); if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO) FILEDESC_SLOCK(fdp); if (error) { @@ -3649,7 +3636,7 @@ if (fdp == NULL) continue; for (n = 0; n < fdp->fd_nfiles; n++) { - if (fp == fdp->fd_ofiles[n]) + if (fp == fdp->fd_ofiles[n].fde_file) return (p); } } @@ -3699,7 +3686,7 @@ if ((fdp = p->p_fd) == NULL) continue; for (n = 0; n < fdp->fd_nfiles; ++n) { - if ((fp = fdp->fd_ofiles[n]) == NULL) + if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) continue; db_print_file(fp, header); header = 0; --- sys/kern/kern_exec.c.orig +++ sys/kern/kern_exec.c @@ -438,9 +438,6 @@ } else { AUDIT_ARG_FD(args->fd); /* - * Some might argue that CAP_READ and/or CAP_MMAP should also - * be required here; such arguments will be entertained. - * * Descriptors opened only with O_EXEC or O_RDONLY are allowed. */ error = fgetvp_exec(td, args->fd, CAP_FEXECVE, &binvp); --- sys/kern/sys_capability.c.orig +++ sys/kern/sys_capability.c @@ -1,11 +1,15 @@ /*- * Copyright (c) 2008-2011 Robert N. M. Watson * Copyright (c) 2010-2011 Jonathan Anderson + * Copyright (c) 2012 FreeBSD Foundation * All rights reserved. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * + * Portions of this software were developed by Pawel Jakub Dawidek under + * sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -62,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -139,90 +144,48 @@ FEATURE(security_capabilities, "Capsicum Capabilities"); -/* - * struct capability describes a capability, and is hung off of its struct - * file f_data field. cap_file and cap_rightss are static once hooked up, as - * neither the object it references nor the rights it encapsulates are - * permitted to change. - */ -struct capability { - struct file *cap_object; /* Underlying object's file. */ - struct file *cap_file; /* Back-pointer to cap's file. */ - cap_rights_t cap_rights; /* Mask of rights on object. */ -}; +static inline int +_cap_check(cap_rights_t have, cap_rights_t need, enum ktr_cap_fail_type type) +{ + + + if ((need & ~have) != 0) { +#ifdef KTRACE + if (KTRPOINT(curthread, KTR_CAPFAIL)) + ktrcapfail(type, need, have); +#endif + return (ENOTCAPABLE); + } + return (0); +} /* - * Capabilities have a fileops vector, but in practice none should ever be - * called except for fo_close, as the capability will normally not be - * returned during a file descriptor lookup in the system call code. + * Test whether a capability grants the requested rights. */ -static fo_rdwr_t capability_read; -static fo_rdwr_t capability_write; -static fo_truncate_t capability_truncate; -static fo_ioctl_t capability_ioctl; -static fo_poll_t capability_poll; -static fo_kqfilter_t capability_kqfilter; -static fo_stat_t capability_stat; -static fo_close_t capability_close; -static fo_chmod_t capability_chmod; -static fo_chown_t capability_chown; - -static struct fileops capability_ops = { - .fo_read = capability_read, - .fo_write = capability_write, - .fo_truncate = capability_truncate, - .fo_ioctl = capability_ioctl, - .fo_poll = capability_poll, - .fo_kqfilter = capability_kqfilter, - .fo_stat = capability_stat, - .fo_close = capability_close, - .fo_chmod = capability_chmod, - .fo_chown = capability_chown, - .fo_flags = DFLAG_PASSABLE, -}; - -static struct fileops capability_ops_unpassable = { - .fo_read = capability_read, - .fo_write = capability_write, - .fo_truncate = capability_truncate, - .fo_ioctl = capability_ioctl, - .fo_poll = capability_poll, - .fo_kqfilter = capability_kqfilter, - .fo_stat = capability_stat, - .fo_close = capability_close, - .fo_chmod = capability_chmod, - .fo_chown = capability_chown, - .fo_flags = 0, -}; - -static uma_zone_t capability_zone; - -static void -capability_init(void *dummy __unused) +int +cap_check(cap_rights_t have, cap_rights_t need) { - capability_zone = uma_zcreate("capability", sizeof(struct capability), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - if (capability_zone == NULL) - panic("capability_init: capability_zone not initialized"); + return (_cap_check(have, need, CAPFAIL_NOTCAPABLE)); } -SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, capability_init, NULL); /* - * Test whether a capability grants the requested rights. + * Convert capability rights into VM access flags. */ -static int -cap_check(struct capability *c, cap_rights_t rights) +u_char +cap_rights_to_vmprot(cap_rights_t have) { + u_char maxprot; + + maxprot = VM_PROT_NONE; + if (have & CAP_MMAP_R) + maxprot |= VM_PROT_READ; + if (have & CAP_MMAP_W) + maxprot |= VM_PROT_WRITE; + if (have & CAP_MMAP_X) + maxprot |= VM_PROT_EXECUTE; - if ((c->cap_rights | rights) != c->cap_rights) { -#ifdef KTRACE - if (KTRPOINT(curthread, KTR_CAPFAIL)) - ktrcapfail(CAPFAIL_NOTCAPABLE, rights, c->cap_rights); -#endif - return (ENOTCAPABLE); - } - return (0); + return (maxprot); } /* @@ -231,43 +194,49 @@ * this one file. */ cap_rights_t -cap_rights(struct file *fp_cap) +cap_rights(struct filedesc *fdp, int fd) { - struct capability *c; - KASSERT(fp_cap->f_type == DTYPE_CAPABILITY, - ("cap_rights: !capability")); - - c = fp_cap->f_data; - return (c->cap_rights); + return (fdp->fd_ofiles[fd].fde_rights); } /* - * System call to create a new capability reference to either an existing - * file object or an an existing capability. + * System call to limit rights of the given capability. */ int -sys_cap_new(struct thread *td, struct cap_new_args *uap) +sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap) { - int error, capfd; - int fd = uap->fd; - struct file *fp; - cap_rights_t rights = uap->rights; + struct filedesc *fdp; + cap_rights_t rights; + int error, fd; + + fd = uap->fd; + rights = uap->rights; AUDIT_ARG_FD(fd); AUDIT_ARG_RIGHTS(rights); - error = fget(td, fd, rights, &fp); - if (error) - return (error); - AUDIT_ARG_FILE(td->td_proc, fp); - error = kern_capwrap(td, fp, rights, &capfd); - /* - * Release our reference to the file (kern_capwrap has held a reference - * for the filedesc array). - */ - fdrop(fp, td); - if (error == 0) - td->td_retval[0] = capfd; + + if ((rights & ~CAP_ALL) != 0) + return (EINVAL); + + fdp = td->td_proc->p_fd; + FILEDESC_XLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_XUNLOCK(fdp); + return (EBADF); + } + error = _cap_check(cap_rights(fdp, fd), rights, CAPFAIL_INCREASE); + if (error == 0) { + fdp->fd_ofiles[fd].fde_rights = rights; + if ((rights & CAP_IOCTL) == 0) { + free(fdp->fd_ofiles[fd].fde_ioctls, M_TEMP); + fdp->fd_ofiles[fd].fde_ioctls = NULL; + fdp->fd_ofiles[fd].fde_nioctls = 0; + } + if ((rights & CAP_FCNTL) == 0) + fdp->fd_ofiles[fd].fde_fcntls = 0; + } + FILEDESC_XUNLOCK(fdp); return (error); } @@ -275,247 +244,321 @@ * System call to query the rights mask associated with a capability. */ int -sys_cap_getrights(struct thread *td, struct cap_getrights_args *uap) +sys_cap_rights_get(struct thread *td, struct cap_rights_get_args *uap) { - struct capability *cp; - struct file *fp; - int error; + struct filedesc *fdp; + cap_rights_t rights; + int fd; + + fd = uap->fd; + + AUDIT_ARG_FD(fd); - AUDIT_ARG_FD(uap->fd); - error = fgetcap(td, uap->fd, &fp); - if (error) - return (error); - cp = fp->f_data; - error = copyout(&cp->cap_rights, uap->rightsp, sizeof(*uap->rightsp)); - fdrop(fp, td); - return (error); + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_SUNLOCK(fdp); + return (EBADF); + } + rights = cap_rights(fdp, fd); + FILEDESC_SUNLOCK(fdp); + return (copyout(&rights, uap->rightsp, sizeof(*uap->rightsp))); } /* - * Create a capability to wrap around an existing file. + * Test whether a capability grants the given ioctl command. + * If descriptor doesn't have CAP_IOCTL, then ioctls list is empty and + * ENOTCAPABLE will be returned. */ int -kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights, - int *capfdp) +cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd) { - struct capability *cp, *cp_old; - struct file *fp_object, *fcapp; - int error; + u_long *cmds; + ssize_t ncmds; + int i; + + FILEDESC_LOCK_ASSERT(fdp); + KASSERT(fd >= 0 && fd < fdp->fd_nfiles, + ("%s: invalid fd=%d", __func__, fd)); - if ((rights | CAP_MASK_VALID) != CAP_MASK_VALID) - return (EINVAL); + ncmds = fdp->fd_ofiles[fd].fde_nioctls; + if (ncmds == -1) + return (0); - /* - * If a new capability is being derived from an existing capability, - * then the new capability rights must be a subset of the existing - * rights. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - cp_old = fp->f_data; - if ((cp_old->cap_rights | rights) != cp_old->cap_rights) { -#ifdef KTRACE - if (KTRPOINT(curthread, KTR_CAPFAIL)) - ktrcapfail(CAPFAIL_INCREASE, - rights, cp_old->cap_rights); -#endif - return (ENOTCAPABLE); - } + cmds = fdp->fd_ofiles[fd].fde_ioctls; + for (i = 0; i < ncmds; i++) { + if (cmds[i] == cmd) + return (0); } - /* - * Allocate a new file descriptor to hang the capability off of. - */ - error = falloc(td, &fcapp, capfdp, fp->f_flag); - if (error) - return (error); - - /* - * Rather than nesting capabilities, directly reference the object an - * existing capability references. There's nothing else interesting - * to preserve for future use, as we've incorporated the previous - * rights mask into the new one. This prevents us from having to - * deal with capability chains. - */ - if (fp->f_type == DTYPE_CAPABILITY) - fp_object = ((struct capability *)fp->f_data)->cap_object; - else - fp_object = fp; - fhold(fp_object); - cp = uma_zalloc(capability_zone, M_WAITOK | M_ZERO); - cp->cap_rights = rights; - cp->cap_object = fp_object; - cp->cap_file = fcapp; - if (fp->f_flag & DFLAG_PASSABLE) - finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp, - &capability_ops); - else - finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp, - &capability_ops_unpassable); - - /* - * Release our private reference (the proc filedesc still has one). - */ - fdrop(fcapp, td); - return (0); + return (ENOTCAPABLE); } /* - * Given a file descriptor, test it against a capability rights mask and then - * return the file descriptor on which to actually perform the requested - * operation. As long as the reference to fp_cap remains valid, the returned - * pointer in *fp will remain valid, so no extra reference management is - * required, and the caller should fdrop() fp_cap as normal when done with - * both. + * Check if the current ioctls list can be replaced by the new one. */ -int -cap_funwrap(struct file *fp_cap, cap_rights_t rights, struct file **fpp) +static int +cap_ioctl_limit_check(struct filedesc *fdp, int fd, const u_long *cmds, + size_t ncmds) { - struct capability *c; - int error; + u_long *ocmds; + ssize_t oncmds; + u_int i; + int j; - if (fp_cap->f_type != DTYPE_CAPABILITY) { - *fpp = fp_cap; + oncmds = fdp->fd_ofiles[fd].fde_nioctls; + if (oncmds == -1) return (0); + if (oncmds < (ssize_t)ncmds) + return (ENOTCAPABLE); + + ocmds = fdp->fd_ofiles[fd].fde_ioctls; + for (i = 0; i < ncmds; i++) { + for (j = 0; j < oncmds; j++) { + if (cmds[i] == ocmds[j]) + break; + } + if (j == oncmds) + return (ENOTCAPABLE); } - c = fp_cap->f_data; - error = cap_check(c, rights); - if (error) - return (error); - *fpp = c->cap_object; + return (0); } -/* - * Slightly different routine for memory mapping file descriptors: unwrap the - * capability and check CAP_MMAP, but also return a bitmask representing the - * maximum mapping rights the capability allows on the object. - */ int -cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, u_char *maxprotp, - struct file **fpp) +sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap) { - struct capability *c; - u_char maxprot; - int error; + struct filedesc *fdp; + u_long *cmds, *ocmds; + size_t ncmds; + int error, fd; + + fd = uap->fd; + ncmds = uap->ncmds; + + AUDIT_ARG_FD(fd); + + if (ncmds > 256) /* XXX: Is 256 sane? */ + return (EINVAL); + + if (ncmds == 0) { + cmds = NULL; + } else { + cmds = malloc(sizeof(cmds[0]) * ncmds, M_TEMP, M_WAITOK); + error = copyin(uap->cmds, cmds, sizeof(cmds[0]) * ncmds); + if (error != 0) { + free(cmds, M_TEMP); + return (error); + } + } + + fdp = td->td_proc->p_fd; + FILEDESC_XLOCK(fdp); - if (fp_cap->f_type != DTYPE_CAPABILITY) { - *fpp = fp_cap; - *maxprotp = VM_PROT_ALL; - return (0); + if (fget_locked(fdp, fd) == NULL) { + error = EBADF; + goto out; } - c = fp_cap->f_data; - error = cap_check(c, rights | CAP_MMAP); - if (error) - return (error); - *fpp = c->cap_object; - maxprot = 0; - if (c->cap_rights & CAP_READ) - maxprot |= VM_PROT_READ; - if (c->cap_rights & CAP_WRITE) - maxprot |= VM_PROT_WRITE; - if (c->cap_rights & CAP_MAPEXEC) - maxprot |= VM_PROT_EXECUTE; - *maxprotp = maxprot; - return (0); + + error = cap_ioctl_limit_check(fdp, fd, cmds, ncmds); + if (error != 0) + goto out; + + ocmds = fdp->fd_ofiles[fd].fde_ioctls; + fdp->fd_ofiles[fd].fde_ioctls = cmds; + fdp->fd_ofiles[fd].fde_nioctls = ncmds; + + cmds = ocmds; + error = 0; +out: + FILEDESC_XUNLOCK(fdp); + free(cmds, M_TEMP); + return (error); } -/* - * When a capability is closed, simply drop the reference on the underlying - * object and free the capability. fdrop() will handle the case where the - * underlying object also needs to close, and the caller will have already - * performed any object-specific lock or mqueue handling. - */ -static int -capability_close(struct file *fp, struct thread *td) +int +sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap) { - struct capability *c; - struct file *fp_object; + struct filedesc *fdp; + struct filedescent *fdep; + u_long *cmds; + size_t maxcmds; + int error, fd; + + fd = uap->fd; + cmds = uap->cmds; + maxcmds = uap->maxcmds; + + AUDIT_ARG_FD(fd); + + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + + if (fget_locked(fdp, fd) == NULL) { + error = EBADF; + goto out; + } + + /* + * If all ioctls are allowed (fde_nioctls == -1 && fde_ioctls == NULL) + * the only sane thing we can do is to not populate the given array and + * return CAP_IOCTLS_ALL. + */ - KASSERT(fp->f_type == DTYPE_CAPABILITY, - ("capability_close: !capability")); + fdep = &fdp->fd_ofiles[fd]; + if (cmds != NULL && fdep->fde_ioctls != NULL) { + error = copyout(fdep->fde_ioctls, cmds, + sizeof(cmds[0]) * MIN(fdep->fde_nioctls, maxcmds)); + if (error != 0) + goto out; + } + if (fdep->fde_nioctls == -1) + td->td_retval[0] = CAP_IOCTLS_ALL; + else + td->td_retval[0] = fdep->fde_nioctls; - c = fp->f_data; - fp->f_ops = &badfileops; - fp->f_data = NULL; - fp_object = c->cap_object; - uma_zfree(capability_zone, c); - return (fdrop(fp_object, td)); + error = 0; +out: + FILEDESC_SUNLOCK(fdp); + return (error); } /* - * In general, file descriptor operations should never make it to the - * capability, only the underlying file descriptor operation vector, so panic - * if we do turn up here. + * Test whether a capability grants the given fcntl command. */ -static int -capability_read(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) +int +cap_fcntl_check(struct filedesc *fdp, int fd, int cmd) { + uint32_t fcntlcap; + + KASSERT(fd >= 0 && fd < fdp->fd_nfiles, + ("%s: invalid fd=%d", __func__, fd)); + + fcntlcap = (1 << cmd); + KASSERT((CAP_FCNTL_ALL & fcntlcap) != 0, + ("Unsupported fcntl=%d.", cmd)); - panic("capability_read"); + if ((fdp->fd_ofiles[fd].fde_fcntls & fcntlcap) != 0) + return (0); + + return (ENOTCAPABLE); } -static int -capability_write(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) +int +sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap) { + struct filedesc *fdp; + uint32_t fcntlrights; + int fd; + + fd = uap->fd; + fcntlrights = uap->fcntlrights; - panic("capability_write"); -} + AUDIT_ARG_FD(fd); + AUDIT_ARG_FCNTL_RIGHTS(fcntlrights); -static int -capability_truncate(struct file *fp, off_t length, struct ucred *active_cred, - struct thread *td) -{ + if ((fcntlrights & ~CAP_FCNTL_ALL) != 0) + return (EINVAL); - panic("capability_truncate"); -} + fdp = td->td_proc->p_fd; + FILEDESC_XLOCK(fdp); -static int -capability_ioctl(struct file *fp, u_long com, void *data, - struct ucred *active_cred, struct thread *td) -{ + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_XUNLOCK(fdp); + return (EBADF); + } - panic("capability_ioctl"); -} + if ((fcntlrights & ~fdp->fd_ofiles[fd].fde_fcntls) != 0) { + FILEDESC_XUNLOCK(fdp); + return (ENOTCAPABLE); + } -static int -capability_poll(struct file *fp, int events, struct ucred *active_cred, - struct thread *td) -{ + fdp->fd_ofiles[fd].fde_fcntls = fcntlrights; + FILEDESC_XUNLOCK(fdp); - panic("capability_poll"); + return (0); } -static int -capability_kqfilter(struct file *fp, struct knote *kn) +int +sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap) { + struct filedesc *fdp; + uint32_t rights; + int fd; - panic("capability_kqfilter"); -} + fd = uap->fd; + + AUDIT_ARG_FD(fd); -static int -capability_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, - struct thread *td) -{ + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_SUNLOCK(fdp); + return (EBADF); + } + rights = fdp->fd_ofiles[fd].fde_fcntls; + FILEDESC_SUNLOCK(fdp); - panic("capability_stat"); + return (copyout(&rights, uap->fcntlrightsp, sizeof(rights))); } +/* + * For backward compatibility. + */ int -capability_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, - struct thread *td) +sys_cap_new(struct thread *td, struct cap_new_args *uap) { + struct filedesc *fdp; + cap_rights_t rights; + register_t newfd; + int error, fd; + + fd = uap->fd; + rights = uap->rights; + + AUDIT_ARG_FD(fd); + AUDIT_ARG_RIGHTS(rights); + + if ((rights & ~CAP_ALL) != 0) + return (EINVAL); + + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_SUNLOCK(fdp); + return (EBADF); + } + error = _cap_check(cap_rights(fdp, fd), rights, CAPFAIL_INCREASE); + FILEDESC_SUNLOCK(fdp); + if (error != 0) + return (error); + + error = do_dup(td, 0, fd, 0, &newfd); + if (error != 0) + return (error); - panic("capability_chmod"); -} + FILEDESC_XLOCK(fdp); + /* + * We don't really care about the race between checking capability + * rights for the source descriptor and now. If capability rights + * were ok at that earlier point, the process had this descriptor + * with those rights, so we don't increase them in security sense, + * the process might have done the cap_new(2) a bit earlier to get + * the same effect. + */ + fdp->fd_ofiles[newfd].fde_rights = rights; + if ((rights & CAP_IOCTL) == 0) { + free(fdp->fd_ofiles[newfd].fde_ioctls, M_TEMP); + fdp->fd_ofiles[newfd].fde_ioctls = NULL; + fdp->fd_ofiles[newfd].fde_nioctls = 0; + } + if ((rights & CAP_FCNTL) == 0) + fdp->fd_ofiles[newfd].fde_fcntls = 0; + FILEDESC_XUNLOCK(fdp); -int -capability_chown(struct file *fp, uid_t uid, gid_t gid, - struct ucred *active_cred, struct thread *td) -{ + td->td_retval[0] = newfd; - panic("capability_chown"); + return (0); } #else /* !CAPABILITIES */ @@ -524,42 +567,54 @@ * Stub Capability functions for when options CAPABILITIES isn't compiled * into the kernel. */ + +int +sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap) +{ + + return (ENOSYS); +} + int -sys_cap_new(struct thread *td, struct cap_new_args *uap) +sys_cap_rights_get(struct thread *td, struct cap_rights_get_args *uap) { return (ENOSYS); } int -sys_cap_getrights(struct thread *td, struct cap_getrights_args *uap) +sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap) { return (ENOSYS); } int -cap_funwrap(struct file *fp_cap, cap_rights_t rights, struct file **fpp) +sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap) { - KASSERT(fp_cap->f_type != DTYPE_CAPABILITY, - ("cap_funwrap: saw capability")); + return (ENOSYS); +} + +int +sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap) +{ - *fpp = fp_cap; - return (0); + return (ENOSYS); } int -cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, u_char *maxprotp, - struct file **fpp) +sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap) { - KASSERT(fp_cap->f_type != DTYPE_CAPABILITY, - ("cap_funwrap_mmap: saw capability")); + return (ENOSYS); +} + +int +sys_cap_new(struct thread *td, struct cap_new_args *uap) +{ - *fpp = fp_cap; - *maxprotp = VM_PROT_ALL; - return (0); + return (ENOSYS); } #endif /* CAPABILITIES */ --- sys/kern/sys_generic.c.orig +++ sys/kern/sys_generic.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -244,7 +245,7 @@ struct file *fp; int error; - error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp); + error = fget_read(td, fd, CAP_READ, &fp); if (error) return (error); error = dofileread(td, fd, fp, auio, (off_t)-1, 0); @@ -287,7 +288,7 @@ struct file *fp; int error; - error = fget_read(td, fd, CAP_READ, &fp); + error = fget_read(td, fd, CAP_PREAD, &fp); if (error) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) @@ -453,7 +454,7 @@ struct file *fp; int error; - error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp); + error = fget_write(td, fd, CAP_WRITE, &fp); if (error) return (error); error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0); @@ -496,7 +497,7 @@ struct file *fp; int error; - error = fget_write(td, fd, CAP_WRITE, &fp); + error = fget_write(td, fd, CAP_PWRITE, &fp); if (error) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) @@ -704,28 +705,60 @@ { struct file *fp; struct filedesc *fdp; - int error; - int tmp; + int error, tmp, locked; AUDIT_ARG_FD(fd); AUDIT_ARG_CMD(com); - if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0) - return (error); + + fdp = td->td_proc->p_fd; + + switch (com) { + case FIONCLEX: + case FIOCLEX: + FILEDESC_XLOCK(fdp); + locked = LA_XLOCKED; + break; + default: +#ifdef CAPABILITIES + FILEDESC_SLOCK(fdp); + locked = LA_SLOCKED; +#else + locked = LA_UNLOCKED; +#endif + break; + } + +#ifdef CAPABILITIES + if ((fp = fget_locked(fdp, fd)) == NULL) { + error = EBADF; + goto out; + } + if ((error = cap_ioctl_check(fdp, fd, com)) != 0) { + fp = NULL; /* fhold() was not called yet */ + goto out; + } + fhold(fp); + if (locked == LA_SLOCKED) { + FILEDESC_SUNLOCK(fdp); + locked = LA_UNLOCKED; + } +#else + if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0) { + fp = NULL; + goto out; + } +#endif if ((fp->f_flag & (FREAD | FWRITE)) == 0) { - fdrop(fp, td); - return (EBADF); + error = EBADF; + goto out; } - fdp = td->td_proc->p_fd; + switch (com) { case FIONCLEX: - FILEDESC_XLOCK(fdp); - fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; - FILEDESC_XUNLOCK(fdp); + fdp->fd_ofiles[fd].fde_flags &= ~UF_EXCLOSE; goto out; case FIOCLEX: - FILEDESC_XLOCK(fdp); - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; - FILEDESC_XUNLOCK(fdp); + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; goto out; case FIONBIO: if ((tmp = *(int *)data)) @@ -745,7 +778,21 @@ error = fo_ioctl(fp, com, data, td->td_ucred, td); out: - fdrop(fp, td); + switch (locked) { + case LA_XLOCKED: + FILEDESC_XUNLOCK(fdp); + break; +#ifdef CAPABILITIES + case LA_SLOCKED: + FILEDESC_SUNLOCK(fdp); + break; +#endif + default: + FILEDESC_UNLOCK_ASSERT(fdp); + break; + } + if (fp != NULL) + fdrop(fp, td); return (error); } @@ -1130,32 +1177,8 @@ static __inline int getselfd_cap(struct filedesc *fdp, int fd, struct file **fpp) { - struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; - int error; -#endif - if ((fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use - * the file descriptor references by the capability. - */ - error = cap_funwrap(fp, CAP_POLL_EVENT, &fp_fromcap); - if (error) { - fdrop(fp, curthread); - return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ - *fpp = fp; - return (0); + return (fget_unlocked(fdp, fd, CAP_POLL_EVENT, 0, fpp, NULL)); } /* @@ -1349,13 +1372,14 @@ /* If the selinfo wasn't cleared the event didn't fire. */ if (si != NULL) continue; - fp = fdp->fd_ofiles[fd->fd]; + fp = fdp->fd_ofiles[fd->fd].fde_file; #ifdef CAPABILITIES - if ((fp == NULL) - || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) { + if (fp == NULL || + cap_check(cap_rights(fdp, fd->fd), CAP_POLL_EVENT) != 0) #else - if (fp == NULL) { + if (fp == NULL) #endif + { fd->revents = POLLNVAL; n++; continue; @@ -1408,9 +1432,8 @@ u_int nfd; { struct filedesc *fdp = td->td_proc->p_fd; - int i; struct file *fp; - int n = 0; + int i, n = 0; FILEDESC_SLOCK(fdp); for (i = 0; i < nfd; i++, fds++) { @@ -1420,13 +1443,15 @@ } else if (fds->fd < 0) { fds->revents = 0; } else { - fp = fdp->fd_ofiles[fds->fd]; + fp = fdp->fd_ofiles[fds->fd].fde_file; #ifdef CAPABILITIES - if ((fp == NULL) - || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) { + if (fp == NULL || + cap_check(cap_rights(fdp, fds->fd), + CAP_POLL_EVENT) != 0) #else - if (fp == NULL) { + if (fp == NULL) #endif + { fds->revents = POLLNVAL; n++; } else { --- sys/kern/syscalls.c.orig +++ sys/kern/syscalls.c @@ -522,7 +522,7 @@ "shmctl", /* 512 = shmctl */ "lpathconf", /* 513 = lpathconf */ "cap_new", /* 514 = cap_new */ - "cap_getrights", /* 515 = cap_getrights */ + "cap_rights_get", /* 515 = cap_rights_get */ "cap_enter", /* 516 = cap_enter */ "cap_getmode", /* 517 = cap_getmode */ "pdfork", /* 518 = pdfork */ @@ -540,4 +540,9 @@ "posix_fallocate", /* 530 = posix_fallocate */ "posix_fadvise", /* 531 = posix_fadvise */ "wait6", /* 532 = wait6 */ + "cap_rights_limit", /* 533 = cap_rights_limit */ + "cap_ioctls_limit", /* 534 = cap_ioctls_limit */ + "cap_ioctls_get", /* 535 = cap_ioctls_get */ + "cap_fcntls_limit", /* 536 = cap_fcntls_limit */ + "cap_fcntls_get", /* 537 = cap_fcntls_get */ }; --- sys/kern/syscalls.master.orig +++ sys/kern/syscalls.master @@ -917,7 +917,7 @@ struct shmid_ds *buf); } 513 AUE_LPATHCONF STD { int lpathconf(char *path, int name); } 514 AUE_CAP_NEW STD { int cap_new(int fd, uint64_t rights); } -515 AUE_CAP_GETRIGHTS STD { int cap_getrights(int fd, \ +515 AUE_CAP_RIGHTS_GET STD { int cap_rights_get(int fd, \ uint64_t *rightsp); } 516 AUE_CAP_ENTER STD { int cap_enter(void); } 517 AUE_CAP_GETMODE STD { int cap_getmode(u_int *modep); } @@ -955,5 +955,15 @@ int *status, int options, \ struct __wrusage *wrusage, \ siginfo_t *info); } +533 AUE_CAP_RIGHTS_LIMIT STD { int cap_rights_limit(int fd, \ + uint64_t rights); } +534 AUE_CAP_IOCTLS_LIMIT STD { int cap_ioctls_limit(int fd, \ + const u_long *cmds, size_t ncmds); } +535 AUE_CAP_IOCTLS_GET STD { ssize_t cap_ioctls_get(int fd, \ + u_long *cmds, size_t maxcmds); } +536 AUE_CAP_FCNTLS_LIMIT STD { int cap_fcntls_limit(int fd, \ + uint32_t fcntlrights); } +537 AUE_CAP_FCNTLS_GET STD { int cap_fcntls_get(int fd, \ + uint32_t *fcntlrightsp); } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master --- sys/kern/systrace_args.c.orig +++ sys/kern/systrace_args.c @@ -3134,9 +3134,9 @@ *n_args = 2; break; } - /* cap_getrights */ + /* cap_rights_get */ case 515: { - struct cap_getrights_args *p = params; + struct cap_rights_get_args *p = params; iarg[0] = p->fd; /* int */ uarg[1] = (intptr_t) p->rightsp; /* uint64_t * */ *n_args = 2; @@ -3286,6 +3286,48 @@ *n_args = 6; break; } + /* cap_rights_limit */ + case 533: { + struct cap_rights_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = p->rights; /* uint64_t */ + *n_args = 2; + break; + } + /* cap_ioctls_limit */ + case 534: { + struct cap_ioctls_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->cmds; /* const u_long * */ + uarg[2] = p->ncmds; /* size_t */ + *n_args = 3; + break; + } + /* cap_ioctls_get */ + case 535: { + struct cap_ioctls_get_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->cmds; /* u_long * */ + uarg[2] = p->maxcmds; /* size_t */ + *n_args = 3; + break; + } + /* cap_fcntls_limit */ + case 536: { + struct cap_fcntls_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = p->fcntlrights; /* uint32_t */ + *n_args = 2; + break; + } + /* cap_fcntls_get */ + case 537: { + struct cap_fcntls_get_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->fcntlrightsp; /* uint32_t * */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -8477,7 +8519,7 @@ break; }; break; - /* cap_getrights */ + /* cap_rights_get */ case 515: switch(ndx) { case 0: @@ -8745,6 +8787,77 @@ break; }; break; + /* cap_rights_limit */ + case 533: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint64_t"; + break; + default: + break; + }; + break; + /* cap_ioctls_limit */ + case 534: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "const u_long *"; + break; + case 2: + p = "size_t"; + break; + default: + break; + }; + break; + /* cap_ioctls_get */ + case 535: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "u_long *"; + break; + case 2: + p = "size_t"; + break; + default: + break; + }; + break; + /* cap_fcntls_limit */ + case 536: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* cap_fcntls_get */ + case 537: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint32_t *"; + break; + default: + break; + }; + break; default: break; }; @@ -10556,7 +10669,7 @@ if (ndx == 0 || ndx == 1) p = "int"; break; - /* cap_getrights */ + /* cap_rights_get */ case 515: if (ndx == 0 || ndx == 1) p = "int"; @@ -10638,6 +10751,31 @@ if (ndx == 0 || ndx == 1) p = "int"; break; + /* cap_rights_limit */ + case 533: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_ioctls_limit */ + case 534: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_ioctls_get */ + case 535: + if (ndx == 0 || ndx == 1) + p = "ssize_t"; + break; + /* cap_fcntls_limit */ + case 536: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_fcntls_get */ + case 537: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; --- sys/kern/tty.c.orig +++ sys/kern/tty.c @@ -1840,23 +1840,15 @@ int error, ref; /* Validate the file descriptor. */ - if ((fdp = p->p_fd) == NULL) - return (EBADF); - - fp = fget_unlocked(fdp, fd); - if (fp == NULL) - return (EBADF); + fdp = p->p_fd; + error = fget_unlocked(fdp, fd, CAP_TTYHOOK, 0, &fp, NULL); + if (error != 0) + return (error); if (fp->f_ops == &badfileops) { error = EBADF; goto done1; } -#ifdef CAPABILITIES - error = cap_funwrap(fp, CAP_TTYHOOK, &fp); - if (error) - goto done1; -#endif - /* * Make sure the vnode is bound to a character device. * Unlocked check for the vnode type is ok there, because we --- sys/kern/uipc_mqueue.c.orig +++ sys/kern/uipc_mqueue.c @@ -45,6 +45,7 @@ #include __FBSDID("$FreeBSD: head/sys/kern/uipc_mqueue.c 242833 2012-11-09 18:02:25Z attilio $"); +#include "opt_capsicum.h" #include "opt_compat.h" #include @@ -2032,8 +2033,8 @@ &mqueueops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); td->td_retval[0] = fd; fdrop(fp, td); @@ -2275,11 +2276,13 @@ error = EBADF; goto out; } - error = cap_funwrap(fp2, CAP_POLL_EVENT, &fp2); +#ifdef CAPABILITIES + error = cap_check(cap_rights(fdp, uap->mqd), CAP_POLL_EVENT); if (error) { FILEDESC_SUNLOCK(fdp); goto out; } +#endif if (fp2 != fp) { FILEDESC_SUNLOCK(fdp); error = EBADF; --- sys/kern/uipc_sem.c.orig +++ sys/kern/uipc_sem.c @@ -579,8 +579,8 @@ finit(fp, FREAD | FWRITE, DTYPE_SEM, ks, &ksem_ops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); fdrop(fp, td); --- sys/kern/uipc_shm.c.orig +++ sys/kern/uipc_shm.c @@ -629,8 +629,8 @@ finit(fp, FFLAGS(uap->flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); td->td_retval[0] = fd; fdrop(fp, td); --- sys/kern/uipc_syscalls.c.orig +++ sys/kern/uipc_syscalls.c @@ -121,38 +121,20 @@ "Number of sendfile(2) sf_bufs in use"); /* - * Convert a user file descriptor to a kernel file entry and check that, if - * it is a capability, the right rights are present. A reference on the file - * entry is held upon returning. + * Convert a user file descriptor to a kernel file entry and check if required + * capability rights are present. + * A reference on the file entry is held upon returning. */ static int getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights, struct file **fpp, u_int *fflagp) { struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; int error; -#endif - if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use - * the file descriptor referenced by the capability. - */ - error = cap_funwrap(fp, rights, &fp_fromcap); - if (error) { - fdrop(fp, curthread); + error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL); + if (error != 0) return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (ENOTSOCK); @@ -765,7 +747,7 @@ #endif AUDIT_ARG_FD(s); - rights = CAP_WRITE; + rights = CAP_SEND; if (mp->msg_name != NULL) { AUDIT_ARG_SOCKADDR(td, mp->msg_name); rights |= CAP_CONNECT; @@ -974,7 +956,7 @@ *controlp = NULL; AUDIT_ARG_FD(s); - error = getsock_cap(td->td_proc->p_fd, s, CAP_READ, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, s, CAP_RECV, &fp, NULL); if (error) return (error); so = fp->f_data; @@ -1850,7 +1832,11 @@ * we send only the header/trailer and no payload data. */ AUDIT_ARG_FD(uap->fd); - if ((error = fgetvp_read(td, uap->fd, CAP_READ, &vp)) != 0) + /* + * sendfile(2) can start at any offset within a file so we require + * CAP_READ+CAP_SEEK = CAP_PREAD. + */ + if ((error = fgetvp_read(td, uap->fd, CAP_PREAD, &vp)) != 0) goto out; vn_lock(vp, LK_SHARED | LK_RETRY); if (vp->v_type == VREG) { @@ -1886,7 +1872,7 @@ * The socket must be a stream socket and connected. * Remember if it a blocking or non-blocking socket. */ - if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_WRITE, + if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SEND, &sock_fp, NULL)) != 0) goto out; so = sock_fp->f_data; @@ -2423,7 +2409,7 @@ u_sinfo = &sinfo; } - rights = CAP_WRITE; + rights = CAP_SEND; if (uap->tolen) { error = getsockaddr(&to, uap->to, uap->tolen); if (error) { @@ -2534,7 +2520,7 @@ return (error); u_sinfo = &sinfo; } - rights = CAP_WRITE; + rights = CAP_SEND; if (uap->tolen) { error = getsockaddr(&to, uap->to, uap->tolen); if (error) { @@ -2658,7 +2644,7 @@ #endif AUDIT_ARG_FD(uap->sd); - error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_READ, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_RECV, &fp, NULL); if (error) { return (error); } --- sys/kern/uipc_usrreq.c.orig +++ sys/kern/uipc_usrreq.c @@ -279,7 +279,7 @@ static void unp_gc(__unused void *, int); static void unp_scan(struct mbuf *, void (*)(struct file *)); static void unp_discard(struct file *); -static void unp_freerights(struct file **, int); +static void unp_freerights(struct filedescent *, int); static void unp_init(void); static int unp_internalize(struct mbuf **, struct thread *); static void unp_internalize_fp(struct file *); @@ -1642,14 +1642,14 @@ } static void -unp_freerights(struct file **rp, int fdcount) +unp_freerights(struct filedescent *fde, int fdcount) { + struct file *fp; int i; - struct file *fp; - for (i = 0; i < fdcount; i++) { - fp = *rp; - *rp++ = NULL; + for (i = 0; i < fdcount; i++, fde++) { + fp = fde->fde_file; + bzero(fde, sizeof(*fde)); unp_discard(fp); } } @@ -1661,8 +1661,8 @@ struct cmsghdr *cm = mtod(control, struct cmsghdr *); int i; int *fdp; - struct file **rp; - struct file *fp; + struct filedesc *fdesc = td->td_proc->p_fd; + struct filedescent *fde, *fdep; void *data; socklen_t clen = control->m_len, datalen; int error, newfds; @@ -1683,20 +1683,20 @@ datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { - newfds = datalen / sizeof(struct file *); - rp = data; + newfds = datalen / sizeof(*fdep); + fdep = data; /* If we're not outputting the descriptors free them. */ if (error || controlp == NULL) { - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } - FILEDESC_XLOCK(td->td_proc->p_fd); + FILEDESC_XLOCK(fdesc); /* if the new FD's will not fit free them. */ if (!fdavail(td, newfds)) { - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); error = EMSGSIZE; - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } @@ -1710,23 +1710,24 @@ *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); error = E2BIG; - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } fdp = (int *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); - for (i = 0; i < newfds; i++) { + for (i = 0; i < newfds; i++, fdep++, fdp++) { if (fdalloc(td, 0, &f)) panic("unp_externalize fdalloc failed"); - fp = *rp++; - td->td_proc->p_fd->fd_ofiles[f] = fp; - unp_externalize_fp(fp); - *fdp++ = f; + fde = &fdesc->fd_ofiles[f]; + fde->fde_file = fdep->fde_file; + filecaps_copy(&fdep->fde_caps, &fde->fde_caps); + unp_externalize_fp(fde->fde_file); + *fdp = f; } - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); } else { /* We can just copy anything else across. */ if (error || controlp == NULL) @@ -1797,11 +1798,11 @@ { struct mbuf *control = *controlp; struct proc *p = td->td_proc; - struct filedesc *fdescp = p->p_fd; + struct filedesc *fdesc = p->p_fd; struct bintime *bt; struct cmsghdr *cm = mtod(control, struct cmsghdr *); struct cmsgcred *cmcred; - struct file **rp; + struct filedescent *fde, *fdep; struct file *fp; struct timeval *tv; int i, fd, *fdp; @@ -1854,18 +1855,17 @@ * files. If not, reject the entire operation. */ fdp = data; - FILEDESC_SLOCK(fdescp); + FILEDESC_SLOCK(fdesc); for (i = 0; i < oldfds; i++) { fd = *fdp++; - if (fd < 0 || fd >= fdescp->fd_nfiles || - fdescp->fd_ofiles[fd] == NULL) { - FILEDESC_SUNLOCK(fdescp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_SUNLOCK(fdesc); error = EBADF; goto out; } - fp = fdescp->fd_ofiles[fd]; + fp = fdesc->fd_ofiles[fd].fde_file; if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); error = EOPNOTSUPP; goto out; } @@ -1874,25 +1874,26 @@ /* * Now replace the integer FDs with pointers to the - * associated global file table entry.. + * file structure and capability rights. */ - newlen = oldfds * sizeof(struct file *); + newlen = oldfds * sizeof(*fdep); *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); error = E2BIG; goto out; } fdp = data; - rp = (struct file **) + fdep = (struct filedescent *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); - for (i = 0; i < oldfds; i++) { - fp = fdescp->fd_ofiles[*fdp++]; - *rp++ = fp; - unp_internalize_fp(fp); + for (i = 0; i < oldfds; i++, fdep++, fdp++) { + fde = &fdesc->fd_ofiles[*fdp]; + fdep->fde_file = fde->fde_file; + filecaps_copy(&fde->fde_caps, &fdep->fde_caps); + unp_internalize_fp(fdep->fde_file); } - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); break; case SCM_TIMESTAMP: @@ -2252,7 +2253,7 @@ unp_scan(struct mbuf *m0, void (*op)(struct file *)) { struct mbuf *m; - struct file **rp; + struct filedescent *fdep; struct cmsghdr *cm; void *data; int i; @@ -2277,10 +2278,10 @@ if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { - qfds = datalen / sizeof (struct file *); - rp = data; - for (i = 0; i < qfds; i++) - (*op)(*rp++); + qfds = datalen / sizeof(*fdep); + fdep = data; + for (i = 0; i < qfds; i++, fdep++) + (*op)(fdep->fde_file); } if (CMSG_SPACE(datalen) < clen) { --- sys/kern/vfs_aio.c.orig +++ sys/kern/vfs_aio.c @@ -1593,16 +1593,16 @@ fd = aiocbe->uaiocb.aio_fildes; switch (opcode) { case LIO_WRITE: - error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp); + error = fget_write(td, fd, CAP_PWRITE, &fp); break; case LIO_READ: - error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp); + error = fget_read(td, fd, CAP_PREAD, &fp); break; case LIO_SYNC: error = fget(td, fd, CAP_FSYNC, &fp); break; case LIO_NOP: - error = fget(td, fd, 0, &fp); + error = fget(td, fd, CAP_NONE, &fp); break; default: error = EINVAL; --- sys/kern/vfs_lookup.c.orig +++ sys/kern/vfs_lookup.c @@ -227,17 +227,18 @@ AUDIT_ARG_ATFD2(ndp->ni_dirfd); error = fgetvp_rights(td, ndp->ni_dirfd, ndp->ni_rightsneeded | CAP_LOOKUP, - &(ndp->ni_baserights), &dp); + &ndp->ni_filecaps, &dp); #ifdef CAPABILITIES /* - * Lookups relative to a capability must also be + * If file descriptor doesn't have all rights, + * all lookups relative to it must also be * strictly relative. - * - * Note that a capability with rights CAP_MASK_VALID - * is treated exactly like a regular file descriptor. */ - if (ndp->ni_baserights != CAP_MASK_VALID) + if (ndp->ni_filecaps.fc_rights != CAP_ALL || + ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL || + ndp->ni_filecaps.fc_nioctls != -1) { ndp->ni_strictrelative = 1; + } #endif } if (error != 0 || dp != NULL) { --- sys/kern/vfs_syscalls.c.orig +++ sys/kern/vfs_syscalls.c @@ -970,6 +970,8 @@ /* FALLTHROUGH */ case O_WRONLY: rights |= CAP_WRITE; + if (!(flags & O_APPEND)) + rights |= CAP_SEEK; break; } } @@ -1143,19 +1145,22 @@ * If we haven't already installed the FD (for dupfdopen), do so now. */ if (indx == -1) { + struct filecaps *fcaps; + #ifdef CAPABILITIES - if (nd.ni_strictrelative == 1) { - /* - * We are doing a strict relative lookup; wrap the - * result in a capability. - */ - if ((error = kern_capwrap(td, fp, nd.ni_baserights, - &indx)) != 0) - goto bad; - } else + if (nd.ni_strictrelative == 1) + fcaps = &nd.ni_filecaps; + else #endif - if ((error = finstall(td, fp, &indx, flags)) != 0) - goto bad; + fcaps = NULL; + error = finstall(td, fp, &indx, flags, fcaps); + /* On success finstall() consumes fcaps. */ + if (error != 0) { + filecaps_free(&nd.ni_filecaps); + goto bad; + } + } else { + filecaps_free(&nd.ni_filecaps); } /* @@ -1279,7 +1284,7 @@ restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - pathseg, path, fd, CAP_MKNOD, td); + pathseg, path, fd, CAP_MKNODAT, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; @@ -1399,7 +1404,7 @@ restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - pathseg, path, fd, CAP_MKFIFO, td); + pathseg, path, fd, CAP_MKFIFOAT, td); if ((error = namei(&nd)) != 0) return (error); if (nd.ni_vp != NULL) { @@ -1553,7 +1558,7 @@ return (error); } NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, - segflg, path2, fd2, CAP_CREATE, td); + segflg, path2, fd2, CAP_LINKAT, td); if ((error = namei(&nd)) == 0) { if (nd.ni_vp != NULL) { if (nd.ni_dvp == nd.ni_vp) @@ -1646,7 +1651,7 @@ restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - segflg, path2, fd, CAP_CREATE, td); + segflg, path2, fd, CAP_SYMLINKAT, td); if ((error = namei(&nd)) != 0) goto out; if (nd.ni_vp) { @@ -1798,7 +1803,7 @@ restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, - pathseg, path, fd, CAP_DELETE, td); + pathseg, path, fd, CAP_UNLINKAT, td); if ((error = namei(&nd)) != 0) return (error == EINVAL ? EPERM : error); vp = nd.ni_vp; @@ -3502,10 +3507,10 @@ bwillwrite(); #ifdef MAC NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | - AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td); + AUDITVNODE1, pathseg, old, oldfd, CAP_RENAMEAT, td); #else NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, - pathseg, old, oldfd, CAP_DELETE, td); + pathseg, old, oldfd, CAP_RENAMEAT, td); #endif if ((error = namei(&fromnd)) != 0) @@ -3527,7 +3532,7 @@ goto out1; } NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | - SAVESTART | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE, td); + SAVESTART | AUDITVNODE2, pathseg, new, newfd, CAP_LINKAT, td); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&tond)) != 0) { @@ -3550,6 +3555,13 @@ error = EISDIR; goto out; } + /* + * If the target already exists we require CAP_UNLINKAT + * from 'newfd'. + */ + error = cap_check(tond.ni_filecaps.fc_rights, CAP_UNLINKAT); + if (error != 0) + goto out; } if (fvp == tdvp) { error = EINVAL; @@ -3650,7 +3662,7 @@ restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - segflg, path, fd, CAP_MKDIR, td); + segflg, path, fd, CAP_MKDIRAT, td); nd.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&nd)) != 0) return (error); @@ -3734,7 +3746,7 @@ restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, - pathseg, path, fd, CAP_RMDIR, td); + pathseg, path, fd, CAP_UNLINKAT, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; @@ -3987,8 +3999,7 @@ if (count > IOSIZE_MAX) return (EINVAL); auio.uio_resid = count; - if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK, - &fp)) != 0) + if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); @@ -4151,33 +4162,14 @@ * entry is held upon returning. */ int -getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, - struct file **fpp) +getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, struct file **fpp) { struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; int error; -#endif - if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use the - * file descriptor referenced by the capability. - */ - error = cap_funwrap(fp, rights, &fp_fromcap); - if (error) { - fdrop(fp, curthread); + error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL); + if (error != 0) return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ /* * The file could be not of the vnode type, or it may be not @@ -4361,7 +4353,7 @@ goto bad; } - error = finstall(td, fp, &indx, fmode); + error = finstall(td, fp, &indx, fmode, NULL); bad: fdrop(fp, td); td->td_retval[0] = indx; @@ -4614,7 +4606,7 @@ return (EINVAL); } /* XXX: CAP_POSIX_FADVISE? */ - error = fget(td, fd, 0, &fp); + error = fget(td, fd, CAP_NONE, &fp); if (error != 0) goto out; --- sys/netsmb/smb_dev.c.orig +++ sys/netsmb/smb_dev.c @@ -399,9 +399,7 @@ struct file* fp; FILEDESC_SLOCK(fdp); - if (fd < 0 || fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || - (fp->f_flag & flag) == 0) { + if ((fp = fget_locked(fdp, fd)) == NULL || (fp->f_flag & flag) == 0) { FILEDESC_SUNLOCK(fdp); return (NULL); } --- sys/nfsserver/nfs_srvkrpc.c.orig +++ sys/nfsserver/nfs_srvkrpc.c @@ -174,7 +174,8 @@ sizeof(addsockarg)); if (error) return (error); - if ((error = fget(td, addsockarg.sock, CAP_SOCK_ALL, &fp)) != 0) + error = fget(td, addsockarg.sock, CAP_SOCK_SERVER, &fp); + if (error) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); --- sys/ofed/include/linux/file.h.orig +++ sys/ofed/include/linux/file.h @@ -47,7 +47,8 @@ { struct file *file; - file = fget_unlocked(curthread->td_proc->p_fd, fd); + if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, 0, &file, NULL) != 0) + return (NULL); return (struct linux_file *)file->f_data; } @@ -69,8 +70,7 @@ { struct file *file; - file = fget_unlocked(curthread->td_proc->p_fd, fd); - if (file == NULL) + if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, 0, &file, NULL) != 0) return; fdclose(curthread->td_proc->p_fd, file, fd, curthread); } @@ -80,7 +80,8 @@ { struct file *file; - file = fget_unlocked(curthread->td_proc->p_fd, fd); + if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, 0, &file, NULL) != 0) + file = NULL; filp->_file = file; finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops); } --- sys/security/audit/audit.h.orig +++ sys/security/audit/audit.h @@ -115,6 +115,7 @@ void audit_arg_argv(char *argv, int argc, int length); void audit_arg_envv(char *envv, int envc, int length); void audit_arg_rights(cap_rights_t rights); +void audit_arg_fcntl_rights(uint32_t fcntlrights); void audit_sysclose(struct thread *td, int fd); void audit_cred_copy(struct ucred *src, struct ucred *dest); void audit_cred_destroy(struct ucred *cred); @@ -241,6 +242,11 @@ audit_arg_rights((rights)); \ } while (0) +#define AUDIT_ARG_FCNTL_RIGHTS(fcntlrights) do { \ + if (AUDITING_TD(curthread)) \ + audit_arg_fcntl_rights((fcntlrights)); \ +} while (0) + #define AUDIT_ARG_RUID(ruid) do { \ if (AUDITING_TD(curthread)) \ audit_arg_ruid((ruid)); \ @@ -354,6 +360,7 @@ #define AUDIT_ARG_PROCESS(p) #define AUDIT_ARG_RGID(rgid) #define AUDIT_ARG_RIGHTS(rights) +#define AUDIT_ARG_FCNTL_RIGHTS(fcntlrights) #define AUDIT_ARG_RUID(ruid) #define AUDIT_ARG_SIGNUM(signum) #define AUDIT_ARG_SGID(sgid) --- sys/security/audit/audit_arg.c.orig +++ sys/security/audit/audit_arg.c @@ -871,6 +871,19 @@ ARG_SET_VALID(ar, ARG_RIGHTS); } +void +audit_arg_fcntl_rights(uint32_t fcntlrights) +{ + struct kaudit_record *ar; + + ar = currecord(); + if (ar == NULL) + return; + + ar->k_ar.ar_arg_fcntl_rights = fcntlrights; + ARG_SET_VALID(ar, ARG_FCNTL_RIGHTS); +} + /* * The close() system call uses it's own audit call to capture the path/vnode * information because those pieces are not easily obtained within the system --- sys/security/audit/audit_bsm.c.orig +++ sys/security/audit/audit_bsm.c @@ -1597,6 +1597,7 @@ break; case AUE_CAP_NEW: + case AUE_CAP_RIGHTS_LIMIT: /* * XXXRW/XXXJA: Would be nice to audit socket/etc information. */ @@ -1607,13 +1608,25 @@ } break; - case AUE_CAP_GETRIGHTS: + case AUE_CAP_FCNTLS_GET: + case AUE_CAP_IOCTLS_GET: + case AUE_CAP_IOCTLS_LIMIT: + case AUE_CAP_RIGHTS_GET: if (ARG_IS_VALID(kar, ARG_FD)) { tok = au_to_arg32(1, "fd", ar->ar_arg_fd); kau_write(rec, tok); } break; + case AUE_CAP_FCNTLS_LIMIT: + FD_VNODE1_TOKENS; + if (ARG_IS_VALID(kar, ARG_FCNTL_RIGHTS)) { + tok = au_to_arg32(2, "fcntlrights", + ar->ar_arg_fcntl_rights); + kau_write(rec, tok); + } + break; + case AUE_CAP_ENTER: case AUE_CAP_GETMODE: break; --- sys/security/audit/audit_private.h.orig +++ sys/security/audit/audit_private.h @@ -230,6 +230,7 @@ int ar_arg_exitretval; struct sockaddr_storage ar_arg_sockaddr; cap_rights_t ar_arg_rights; + uint32_t ar_arg_fcntl_rights; char ar_jailname[MAXHOSTNAMELEN]; }; @@ -291,6 +292,7 @@ #define ARG_ATFD1 0x0004000000000000ULL #define ARG_ATFD2 0x0008000000000000ULL #define ARG_RIGHTS 0x0010000000000000ULL +#define ARG_FCNTL_RIGHTS 0x0020000000000000ULL #define ARG_NONE 0x0000000000000000ULL #define ARG_ALL 0xFFFFFFFFFFFFFFFFULL --- sys/sys/capability.h.orig +++ sys/sys/capability.h @@ -1,10 +1,14 @@ /*- * Copyright (c) 2008-2010 Robert N. M. Watson + * Copyright (c) 2012 FreeBSD Foundation * All rights reserved. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * + * Portions of this software were developed by Pawel Jakub Dawidek under + * sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -36,9 +40,10 @@ #define _SYS_CAPABILITY_H_ #include -#include +#include #include +#include /* * Possible rights on capabilities. @@ -54,34 +59,69 @@ * involve reads or writes depending a great deal on context. */ -/* General file I/O. */ -#define CAP_READ 0x0000000000000001ULL /* read/recv */ -#define CAP_WRITE 0x0000000000000002ULL /* write/send */ -#define CAP_MMAP 0x0000000000000004ULL /* mmap */ -#define CAP_MAPEXEC 0x0000000000000008ULL /* mmap(2) as exec */ +#define CAP_NONE 0x0000000000000000ULL + +/* + * General file I/O. + */ +/* Allows for openat(O_RDONLY), read(2), readv(2). */ +#define CAP_READ 0x0000000000000001ULL +/* Allows for openat(O_WRONLY | O_APPEND), write(2), writev(2). */ +#define CAP_WRITE 0x0000000000000002ULL +/* Allows for lseek(2). */ +#define CAP_SEEK 0x0000000000000080ULL +/* Allows for pread(2), preadv(2). */ +#define CAP_PREAD (CAP_SEEK | CAP_READ) +/* Allows for openat(O_WRONLY) (without O_APPEND), pwrite(2), pwritev(2). */ +#define CAP_PWRITE (CAP_SEEK | CAP_WRITE) +/* Allows for mmap(PROT_NONE). */ +#define CAP_MMAP 0x0000000000000004ULL +/* Allows for mmap(PROT_READ). */ +#define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ) +/* Allows for mmap(PROT_WRITE). */ +#define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE) +/* Allows for mmap(PROT_EXEC). */ +#define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL) +/* Allows for mmap(PROT_READ | PROT_WRITE). */ +#define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W) +/* Allows for mmap(PROT_READ | PROT_EXEC). */ +#define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X) +/* Allows for mmap(PROT_WRITE | PROT_EXEC). */ +#define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X) +/* Allows for mmap(PROT_READ | PROT_WRITE | PROT_EXEC). */ +#define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X) +/* Allows for openat(O_CREAT). */ +#define CAP_CREATE 0x0000000000080000ULL +/* Allows for openat(O_EXEC) and fexecve(2) in turn. */ #define CAP_FEXECVE 0x0000000000000010ULL +/* Allows for openat(O_SYNC), openat(O_FSYNC), fsync(2). */ #define CAP_FSYNC 0x0000000000000020ULL +/* Allows for openat(O_TRUNC), ftruncate(2). */ #define CAP_FTRUNCATE 0x0000000000000040ULL -#define CAP_SEEK 0x0000000000000080ULL /* VFS methods. */ +#define CAP_FCHDIR 0x0000000000000200ULL #define CAP_FCHFLAGS 0x0000000000000100ULL -#define CAP_FCHDIR 0x0000000000000200ULL #define CAP_FCHMOD 0x0000000000000400ULL +#define CAP_FCHMODAT CAP_FCHMOD #define CAP_FCHOWN 0x0000000000000800ULL +#define CAP_FCHOWNAT CAP_FCHOWN #define CAP_FCNTL 0x0000000000001000ULL +#define CAP_FLOCK 0x0000000000004000ULL #define CAP_FPATHCONF 0x0000000000002000ULL -#define CAP_FLOCK 0x0000000000004000ULL #define CAP_FSCK 0x0000000000008000ULL #define CAP_FSTAT 0x0000000000010000ULL +#define CAP_FSTATAT CAP_FSTAT #define CAP_FSTATFS 0x0000000000020000ULL #define CAP_FUTIMES 0x0000000000040000ULL -#define CAP_CREATE 0x0000000000080000ULL -#define CAP_DELETE 0x0000000000100000ULL -#define CAP_MKDIR 0x0000000000200000ULL -#define CAP_RMDIR 0x0000000000400000ULL -#define CAP_MKFIFO 0x0000000000800000ULL -#define CAP_MKNOD 0x0080000000000000ULL +#define CAP_FUTIMESAT CAP_FUTIMES +#define CAP_LINKAT 0x0000000000400000ULL +#define CAP_MKDIRAT 0x0000000000200000ULL +#define CAP_MKFIFOAT 0x0000000000800000ULL +#define CAP_MKNODAT 0x0080000000000000ULL +#define CAP_RENAMEAT 0x0200000000000000ULL +#define CAP_SYMLINKAT 0x0100000000000000ULL +#define CAP_UNLINKAT 0x0000000000100000ULL /* Lookups - used to constrain *at() calls. */ #define CAP_LOOKUP 0x0000000001000000ULL @@ -107,13 +147,18 @@ #define CAP_GETSOCKOPT 0x0000004000000000ULL #define CAP_LISTEN 0x0000008000000000ULL #define CAP_PEELOFF 0x0000010000000000ULL +#define CAP_RECV CAP_READ +#define CAP_SEND CAP_WRITE #define CAP_SETSOCKOPT 0x0000020000000000ULL #define CAP_SHUTDOWN 0x0000040000000000ULL -#define CAP_SOCK_ALL \ - (CAP_ACCEPT | CAP_BIND | CAP_CONNECT \ - | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT \ - | CAP_LISTEN | CAP_PEELOFF | CAP_SETSOCKOPT | CAP_SHUTDOWN) +#define CAP_SOCK_CLIENT \ + (CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \ + CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN) +#define CAP_SOCK_SERVER \ + (CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \ + CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \ + CAP_SETSOCKOPT | CAP_SHUTDOWN) /* Mandatory Access Control. */ #define CAP_MAC_GET 0x0000080000000000ULL @@ -138,40 +183,77 @@ #define CAP_PDKILL 0x0040000000000000ULL /* The mask of all valid method rights. */ -#define CAP_MASK_VALID 0x00ffffffffffffffULL +#define CAP_MASK_VALID 0x03ffffffffffffffULL +#define CAP_ALL CAP_MASK_VALID + +/* Available bits. */ +#define CAP_UNUSED5 0x0400000000000000ULL +#define CAP_UNUSED4 0x0800000000000000ULL +#define CAP_UNUSED3 0x1000000000000000ULL +#define CAP_UNUSED2 0x2000000000000000ULL +#define CAP_UNUSED1 0x4000000000000000ULL +#define CAP_UNUSED0 0x8000000000000000ULL + +/* + * The following defines are provided for backward API compatibility and + * should not be used in new code. + */ +#define CAP_MAPEXEC CAP_MMAP_X +#define CAP_DELETE CAP_UNLINKAT +#define CAP_MKDIR CAP_MKDIRAT +#define CAP_RMDIR CAP_UNLINKAT +#define CAP_MKFIFO CAP_MKFIFOAT +#define CAP_MKNOD CAP_MKNODAT +#define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER) + +/* + * Allowed fcntl(2) commands. + */ +#define CAP_FCNTL_GETFL (1 << F_GETFL) +#define CAP_FCNTL_SETFL (1 << F_SETFL) +#if __BSD_VISIBLE || __XSI_VISIBLE || __POSIX_VISIBLE >= 200112 +#define CAP_FCNTL_GETOWN (1 << F_GETOWN) +#define CAP_FCNTL_SETOWN (1 << F_SETOWN) +#endif +#if __BSD_VISIBLE || __XSI_VISIBLE || __POSIX_VISIBLE >= 200112 +#define CAP_FCNTL_ALL (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL | \ + CAP_FCNTL_GETOWN | CAP_FCNTL_SETOWN) +#else +#define CAP_FCNTL_ALL (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) +#endif + +#define CAP_IOCTLS_ALL SSIZE_MAX #ifdef _KERNEL -#define IN_CAPABILITY_MODE(td) (td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) +#include + +#define IN_CAPABILITY_MODE(td) ((td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) != 0) + +struct filedesc; /* - * Create a capability to wrap a file object. + * Test whether a capability grants the requested rights. */ -int kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights, - int *capfd); - +int cap_check(cap_rights_t have, cap_rights_t need); /* - * Unwrap a capability if its rights mask is a superset of 'rights'. - * - * Unwrapping a non-capability is effectively a no-op; the value of fp_cap - * is simply copied into fpp. + * Convert capability rights into VM access flags. */ -int cap_funwrap(struct file *fp_cap, cap_rights_t rights, - struct file **fpp); -int cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, - u_char *maxprotp, struct file **fpp); +u_char cap_rights_to_vmprot(cap_rights_t have); /* * For the purposes of procstat(1) and similar tools, allow kern_descrip.c to - * extract the rights from a capability. However, this should not be used by - * kernel code generally, instead cap_funwrap() should be used in order to - * keep all access control in one place. + * extract the rights from a capability. */ -cap_rights_t cap_rights(struct file *fp_cap); +cap_rights_t cap_rights(struct filedesc *fdp, int fd); + +int cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd); +int cap_fcntl_check(struct filedesc *fdp, int fd, int cmd); #else /* !_KERNEL */ __BEGIN_DECLS +#include /* * cap_enter(): Cause the process to enter capability mode, which will @@ -187,21 +269,46 @@ int cap_enter(void); /* + * Are we sandboxed (in capability mode)? + * This is a libc wrapper around the cap_getmode(2) system call. + */ +bool cap_sandboxed(void); + +/* * cap_getmode(): Are we in capability mode? */ -int cap_getmode(u_int* modep); +int cap_getmode(u_int *modep); /* - * cap_new(): Create a new capability derived from an existing file - * descriptor with the specified rights. If the existing file descriptor is - * a capability, then the new rights must be a subset of the existing rights. + * Limits capability rights for the given descriptor (CAP_*). + */ +int cap_rights_limit(int fd, cap_rights_t rights); +/* + * Returns bitmask of capability rights for the given descriptor. + */ +int cap_rights_get(int fd, cap_rights_t *rightsp); +/* + * Limits allowed ioctls for the given descriptor. + */ +int cap_ioctls_limit(int fd, const unsigned long *cmds, size_t ncmds); +/* + * Returns array of allowed ioctls for the given descriptor. + * If all ioctls are allowed, the cmds array is not populated and + * the function returns CAP_IOCTLS_ALL. + */ +ssize_t cap_ioctls_get(int fd, unsigned long *cmds, size_t maxcmds); +/* + * Limits allowed fcntls for the given descriptor (CAP_FCNTL_*). */ -int cap_new(int fd, cap_rights_t rights); - +int cap_fcntls_limit(int fd, uint32_t fcntlrights); /* - * cap_getrights(): Query the rights on a capability. + * Returns bitmask of allowed fcntls for the given descriptor. */ -int cap_getrights(int fd, cap_rights_t *rightsp); +int cap_fcntls_get(int fd, uint32_t *fcntlrightsp); + +/* For backward compatibility. */ +int cap_new(int fd, cap_rights_t rights); +#define cap_getrights(fd, rightsp) cap_rights_get((fd), (rightsp)) __END_DECLS --- sys/sys/file.h.orig +++ sys/sys/file.h @@ -64,12 +64,12 @@ #define DTYPE_SEM 9 /* posix semaphore */ #define DTYPE_PTS 10 /* pseudo teletype master device */ #define DTYPE_DEV 11 /* Device specific fd type */ -#define DTYPE_CAPABILITY 12 /* capability */ -#define DTYPE_PROCDESC 13 /* process descriptor */ +#define DTYPE_PROCDESC 12 /* process descriptor */ #ifdef _KERNEL struct file; +struct filecaps; struct ucred; #define FOF_OFFSET 0x01 /* Use the offset in uio argument */ @@ -217,7 +217,6 @@ struct file **fpp); int fget_write(struct thread *td, int fd, cap_rights_t rights, struct file **fpp); -int fgetcap(struct thread *td, int fd, struct file **fpp); int _fdrop(struct file *fp, struct thread *td); /* @@ -242,7 +241,7 @@ int fgetvp_exec(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp); int fgetvp_rights(struct thread *td, int fd, cap_rights_t need, - cap_rights_t *have, struct vnode **vpp); + struct filecaps *havecaps, struct vnode **vpp); int fgetvp_read(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp); int fgetvp_write(struct thread *td, int fd, cap_rights_t rights, --- sys/sys/filedesc.h.orig +++ sys/sys/filedesc.h @@ -41,6 +41,23 @@ #include +struct filecaps { + cap_rights_t fc_rights; /* per-descriptor capability rights */ + uint32_t fc_fcntls; /* per-descriptor allowed fcntls */ + u_long *fc_ioctls; /* per-descriptor allowed ioctls */ + int16_t fc_nioctls; /* fc_ioctls array size */ +}; + +struct filedescent { + struct file *fde_file; /* file structure for open file */ + struct filecaps fde_caps; /* per-descriptor rights */ + uint8_t fde_flags; /* per-process open file flags */ +}; +#define fde_rights fde_caps.fc_rights +#define fde_fcntls fde_caps.fc_fcntls +#define fde_ioctls fde_caps.fc_ioctls +#define fde_nioctls fde_caps.fc_nioctls + /* * This structure is used for the management of descriptors. It may be * shared by multiple processes. @@ -48,8 +65,7 @@ #define NDSLOTTYPE u_long struct filedesc { - struct file **fd_ofiles; /* file structures for open files */ - char *fd_ofileflags; /* per-process open file flags */ + struct filedescent *fd_ofiles; /* open files */ struct vnode *fd_cdir; /* current directory */ struct vnode *fd_rdir; /* root directory */ struct vnode *fd_jdir; /* jail root directory */ @@ -92,6 +108,15 @@ #ifdef _KERNEL +#include /* CTASSERT() */ + +CTASSERT(sizeof(cap_rights_t) == sizeof(uint64_t)); + +/* Flags for do_dup() */ +#define DUP_FIXED 0x1 /* Force fixed allocation. */ +#define DUP_FCNTL 0x2 /* fcntl()-style errors. */ +#define DUP_CLOEXEC 0x4 /* Atomically set FD_CLOEXEC. */ + /* Lock a file descriptor table. */ #define FILEDESC_LOCK_INIT(fdp) sx_init(&(fdp)->fd_sx, "filedesc structure") #define FILEDESC_LOCK_DESTROY(fdp) sx_destroy(&(fdp)->fd_sx) @@ -109,13 +134,20 @@ struct thread; +void filecaps_init(struct filecaps *fcaps); +void filecaps_copy(const struct filecaps *src, struct filecaps *dst); +void filecaps_free(struct filecaps *fcaps); + int closef(struct file *fp, struct thread *td); +int do_dup(struct thread *td, int flags, int old, int new, + register_t *retval); int dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int openerror, int *indxp); int falloc(struct thread *td, struct file **resultfp, int *resultfd, int flags); int falloc_noinstall(struct thread *td, struct file **resultfp); -int finstall(struct thread *td, struct file *fp, int *resultfp, int flags); +int finstall(struct thread *td, struct file *fp, int *resultfp, int flags, + struct filecaps *fcaps); int fdalloc(struct thread *td, int minfd, int *result); int fdavail(struct thread *td, int n); int fdcheckstd(struct thread *td); @@ -135,7 +167,8 @@ void setugidsafety(struct thread *td); /* Return a referenced file from an unlocked descriptor. */ -struct file *fget_unlocked(struct filedesc *fdp, int fd); +int fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t needrights, + int needfcntl, struct file **fpp, cap_rights_t *haverightsp); /* Requires a FILEDESC_{S,X}LOCK held and returns without a ref. */ static __inline struct file * @@ -147,7 +180,7 @@ if (fd < 0 || fd >= fdp->fd_nfiles) return (NULL); - return (fdp->fd_ofiles[fd]); + return (fdp->fd_ofiles[fd].fde_file); } #endif /* _KERNEL */ --- sys/sys/namei.h.orig +++ sys/sys/namei.h @@ -33,6 +33,7 @@ #ifndef _SYS_NAMEI_H_ #define _SYS_NAMEI_H_ +#include #include #include @@ -75,7 +76,7 @@ /* * Results: returned from namei */ - cap_rights_t ni_baserights; /* rights the *at base has (or -1) */ + struct filecaps ni_filecaps; /* rights the *at base has */ /* * Results: returned from/manipulated by lookup */ @@ -180,7 +181,7 @@ ndp->ni_startdir = startdir; ndp->ni_strictrelative = 0; ndp->ni_rightsneeded = rights; - ndp->ni_baserights = 0; + filecaps_init(&ndp->ni_filecaps); ndp->ni_cnd.cn_thread = td; } --- sys/sys/syscall.h.orig +++ sys/sys/syscall.h @@ -435,7 +435,7 @@ #define SYS_shmctl 512 #define SYS_lpathconf 513 #define SYS_cap_new 514 -#define SYS_cap_getrights 515 +#define SYS_cap_rights_get 515 #define SYS_cap_enter 516 #define SYS_cap_getmode 517 #define SYS_pdfork 518 @@ -452,4 +452,9 @@ #define SYS_posix_fallocate 530 #define SYS_posix_fadvise 531 #define SYS_wait6 532 -#define SYS_MAXSYSCALL 533 +#define SYS_cap_rights_limit 533 +#define SYS_cap_ioctls_limit 534 +#define SYS_cap_ioctls_get 535 +#define SYS_cap_fcntls_limit 536 +#define SYS_cap_fcntls_get 537 +#define SYS_MAXSYSCALL 538 --- sys/sys/syscall.mk.orig +++ sys/sys/syscall.mk @@ -384,7 +384,7 @@ shmctl.o \ lpathconf.o \ cap_new.o \ - cap_getrights.o \ + cap_rights_get.o \ cap_enter.o \ cap_getmode.o \ pdfork.o \ @@ -400,4 +400,9 @@ rctl_remove_rule.o \ posix_fallocate.o \ posix_fadvise.o \ - wait6.o + wait6.o \ + cap_rights_limit.o \ + cap_ioctls_limit.o \ + cap_ioctls_get.o \ + cap_fcntls_limit.o \ + cap_fcntls_get.o --- sys/sys/sysproto.h.orig +++ sys/sys/sysproto.h @@ -1676,7 +1676,7 @@ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; char rights_l_[PADL_(uint64_t)]; uint64_t rights; char rights_r_[PADR_(uint64_t)]; }; -struct cap_getrights_args { +struct cap_rights_get_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; char rightsp_l_[PADL_(uint64_t *)]; uint64_t * rightsp; char rightsp_r_[PADR_(uint64_t *)]; }; @@ -1762,6 +1762,28 @@ char wrusage_l_[PADL_(struct __wrusage *)]; struct __wrusage * wrusage; char wrusage_r_[PADR_(struct __wrusage *)]; char info_l_[PADL_(siginfo_t *)]; siginfo_t * info; char info_r_[PADR_(siginfo_t *)]; }; +struct cap_rights_limit_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char rights_l_[PADL_(uint64_t)]; uint64_t rights; char rights_r_[PADR_(uint64_t)]; +}; +struct cap_ioctls_limit_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char cmds_l_[PADL_(const u_long *)]; const u_long * cmds; char cmds_r_[PADR_(const u_long *)]; + char ncmds_l_[PADL_(size_t)]; size_t ncmds; char ncmds_r_[PADR_(size_t)]; +}; +struct cap_ioctls_get_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char cmds_l_[PADL_(u_long *)]; u_long * cmds; char cmds_r_[PADR_(u_long *)]; + char maxcmds_l_[PADL_(size_t)]; size_t maxcmds; char maxcmds_r_[PADR_(size_t)]; +}; +struct cap_fcntls_limit_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char fcntlrights_l_[PADL_(uint32_t)]; uint32_t fcntlrights; char fcntlrights_r_[PADR_(uint32_t)]; +}; +struct cap_fcntls_get_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char fcntlrightsp_l_[PADL_(uint32_t *)]; uint32_t * fcntlrightsp; char fcntlrightsp_r_[PADR_(uint32_t *)]; +}; int nosys(struct thread *, struct nosys_args *); void sys_sys_exit(struct thread *, struct sys_exit_args *); int sys_fork(struct thread *, struct fork_args *); @@ -2127,7 +2149,7 @@ int sys_shmctl(struct thread *, struct shmctl_args *); int sys_lpathconf(struct thread *, struct lpathconf_args *); int sys_cap_new(struct thread *, struct cap_new_args *); -int sys_cap_getrights(struct thread *, struct cap_getrights_args *); +int sys_cap_rights_get(struct thread *, struct cap_rights_get_args *); int sys_cap_enter(struct thread *, struct cap_enter_args *); int sys_cap_getmode(struct thread *, struct cap_getmode_args *); int sys_pdfork(struct thread *, struct pdfork_args *); @@ -2144,6 +2166,11 @@ int sys_posix_fallocate(struct thread *, struct posix_fallocate_args *); int sys_posix_fadvise(struct thread *, struct posix_fadvise_args *); int sys_wait6(struct thread *, struct wait6_args *); +int sys_cap_rights_limit(struct thread *, struct cap_rights_limit_args *); +int sys_cap_ioctls_limit(struct thread *, struct cap_ioctls_limit_args *); +int sys_cap_ioctls_get(struct thread *, struct cap_ioctls_get_args *); +int sys_cap_fcntls_limit(struct thread *, struct cap_fcntls_limit_args *); +int sys_cap_fcntls_get(struct thread *, struct cap_fcntls_get_args *); #ifdef COMPAT_43 @@ -2823,7 +2850,7 @@ #define SYS_AUE_shmctl AUE_SHMCTL #define SYS_AUE_lpathconf AUE_LPATHCONF #define SYS_AUE_cap_new AUE_CAP_NEW -#define SYS_AUE_cap_getrights AUE_CAP_GETRIGHTS +#define SYS_AUE_cap_rights_get AUE_CAP_RIGHTS_GET #define SYS_AUE_cap_enter AUE_CAP_ENTER #define SYS_AUE_cap_getmode AUE_CAP_GETMODE #define SYS_AUE_pdfork AUE_PDFORK @@ -2840,6 +2867,11 @@ #define SYS_AUE_posix_fallocate AUE_NULL #define SYS_AUE_posix_fadvise AUE_NULL #define SYS_AUE_wait6 AUE_WAIT6 +#define SYS_AUE_cap_rights_limit AUE_CAP_RIGHTS_LIMIT +#define SYS_AUE_cap_ioctls_limit AUE_CAP_IOCTLS_LIMIT +#define SYS_AUE_cap_ioctls_get AUE_CAP_IOCTLS_GET +#define SYS_AUE_cap_fcntls_limit AUE_CAP_FCNTLS_LIMIT +#define SYS_AUE_cap_fcntls_get AUE_CAP_FCNTLS_GET #undef PAD_ #undef PADL_ --- sys/sys/user.h.orig +++ sys/sys/user.h @@ -251,8 +251,7 @@ #define KF_TYPE_SHM 8 #define KF_TYPE_SEM 9 #define KF_TYPE_PTS 10 -/* no KF_TYPE_CAPABILITY (11), since capabilities wrap other file objects */ -#define KF_TYPE_PROCDESC 12 +#define KF_TYPE_PROCDESC 11 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 @@ -288,7 +287,6 @@ #define KF_FLAG_TRUNC 0x00001000 #define KF_FLAG_EXCL 0x00002000 #define KF_FLAG_EXEC 0x00004000 -#define KF_FLAG_CAPABILITY 0x00008000 /* * Old format. Has variable hidden padding due to alignment. --- sys/vm/vm_mmap.c.orig +++ sys/vm/vm_mmap.c @@ -305,13 +305,13 @@ */ rights = CAP_MMAP; if (prot & PROT_READ) - rights |= CAP_READ; + rights |= CAP_MMAP_R; if ((flags & MAP_SHARED) != 0) { if (prot & PROT_WRITE) - rights |= CAP_WRITE; + rights |= CAP_MMAP_W; } if (prot & PROT_EXEC) - rights |= CAP_MAPEXEC; + rights |= CAP_MMAP_X; if ((error = fget_mmap(td, uap->fd, rights, &cap_maxprot, &fp)) != 0) goto done; --- /dev/null 2013-02-26 00:11:00.000000000 +0100 +++ tools/regression/capsicum/syscalls/Makefile 2013-02-26 00:15:44.329527558 +0100 @@ -0,0 +1,28 @@ +# $FreeBSD$ + +SYSCALLS= cap_fcntls_limit cap_getmode cap_ioctls_limit + +CFLAGS= -O2 -pipe -std=gnu99 -fstack-protector +CFLAGS+= -Wsystem-headers -Werror -Wall -Wno-format-y2k -W -Wno-unused-parameter +CFLAGS+= -Wstrict-prototypes -Wmissing-prototypes -Wpointer-arith -Wreturn-type +CFLAGS+= -Wcast-qual -Wwrite-strings -Wswitch -Wshadow -Wunused-parameter +CFLAGS+= -Wcast-align -Wchar-subscripts -Winline -Wnested-externs -Wredundant-decls +CFLAGS+= -Wold-style-definition -Wno-pointer-sign + +all: ${SYSCALLS} ${SYSCALLS:=.t} + +.for SYSCALL in ${SYSCALLS} + +${SYSCALL}: ${SYSCALL}.c misc.c + ${CC} ${CFLAGS} ${@}.c misc.c -o $@ + +${SYSCALL}.t: ${SYSCALL} + @printf "#!/bin/sh\n\n%s/%s\n" ${.CURDIR} ${@:.t=} > $@ + +.endfor + +test: all + @prove -r ${.CURDIR} + +clean: + rm -f ${SYSCALLS} ${SYSCALLS:=.t} --- /dev/null 2013-02-26 00:11:00.000000000 +0100 +++ tools/regression/capsicum/syscalls/cap_fcntls_limit.c 2013-02-26 00:15:46.233527417 +0100 @@ -0,0 +1,540 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "misc.h" + +static void +fcntl_tests_0(int fd) +{ + uint32_t fcntlrights; + + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_ALL); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == 0); + CHECK(fcntl(fd, F_GETFL) == (O_RDWR | O_NONBLOCK)); + CHECK(fcntl(fd, F_SETFL, 0) == 0); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + + errno = 0; + CHECK(cap_fcntls_limit(fd, ~CAP_FCNTL_ALL) == -1); + CHECK(errno == EINVAL); + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == 0); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL)); + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == 0); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL)); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == 0); + CHECK(fcntl(fd, F_GETFL) == (O_RDWR | O_NONBLOCK)); + CHECK(fcntl(fd, F_SETFL, 0) == 0); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == 0); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_GETFL); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_GETFL); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + errno = 0; + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + errno = 0; + CHECK(fcntl(fd, F_SETFL, 0) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + + CHECK(cap_fcntls_limit(fd, 0) == 0); + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, 0) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); +} + +static void +fcntl_tests_1(int fd) +{ + uint32_t fcntlrights; + + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == 0); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_GETFL); + + CHECK(cap_rights_limit(fd, CAP_ALL & ~CAP_FCNTL) == 0); + + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, 0) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); +} + +static void +fcntl_tests_2(int fd) +{ + uint32_t fcntlrights; + + CHECK(cap_rights_limit(fd, CAP_ALL & ~CAP_FCNTL) == 0); + + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, 0) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); +} + +static void +fcntl_tests_send_0(int sock) +{ + int fd; + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK(cap_fcntls_limit(fd, 0) == 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); +} + +static void +fcntl_tests_recv_0(int sock) +{ + uint32_t fcntlrights; + int fd; + + CHECK(descriptor_recv(sock, &fd) == 0); + + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_ALL); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == 0); + CHECK(fcntl(fd, F_GETFL) == (O_RDWR | O_NONBLOCK)); + CHECK(fcntl(fd, F_SETFL, 0) == 0); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + + CHECK(close(fd) == 0); + + CHECK(descriptor_recv(sock, &fd) == 0); + + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL)); + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == 0); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL)); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == 0); + CHECK(fcntl(fd, F_GETFL) == (O_RDWR | O_NONBLOCK)); + CHECK(fcntl(fd, F_SETFL, 0) == 0); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + + CHECK(close(fd) == 0); + + CHECK(descriptor_recv(sock, &fd) == 0); + + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_GETFL); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_GETFL); + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == 0); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_GETFL); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + errno = 0; + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + errno = 0; + CHECK(fcntl(fd, F_SETFL, 0) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + + CHECK(close(fd) == 0); + + CHECK(descriptor_recv(sock, &fd) == 0); + + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_SETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, 0) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + + CHECK(close(fd) == 0); +} + +int +main(void) +{ + int fd, pfd, sp[2]; + pid_t pid; + + printf("1..870\n"); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + fcntl_tests_0(fd); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + fcntl_tests_1(fd); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + fcntl_tests_2(fd); + CHECK(close(fd) == 0); + + /* Child inherits descriptor and operates on it first. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + fcntl_tests_0(fd); + CHECK(close(fd) == 0); + exit(0); + } else { + CHECK(waitpid(pid, NULL, 0) == pid); + fcntl_tests_0(fd); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor, but operates on it after parent. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + sleep(1); + fcntl_tests_0(fd); + CHECK(close(fd) == 0); + exit(0); + } else { + fcntl_tests_0(fd); + CHECK(waitpid(pid, NULL, 0) == pid); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor and operates on it first. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK((pid = pdfork(&pfd, 0)) >= 0); + if (pid == 0) { + fcntl_tests_1(fd); + exit(0); + } else { + CHECK(pdwait(pfd) == 0); +/* + It fails with EBADF, which I believe is a bug. + CHECK(close(pfd) == 0); +*/ + fcntl_tests_1(fd); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor, but operates on it after parent. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK((pid = pdfork(&pfd, 0)) >= 0); + if (pid == 0) { + sleep(1); + fcntl_tests_1(fd); + exit(0); + } else { + fcntl_tests_1(fd); + CHECK(pdwait(pfd) == 0); +/* + It fails with EBADF, which I believe is a bug. + CHECK(close(pfd) == 0); +*/ + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor and operates on it first. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + fcntl_tests_2(fd); + exit(0); + } else { + CHECK(waitpid(pid, NULL, 0) == pid); + fcntl_tests_2(fd); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor, but operates on it after parent. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + sleep(1); + fcntl_tests_2(fd); + exit(0); + } else { + fcntl_tests_2(fd); + CHECK(waitpid(pid, NULL, 0) == pid); + } + CHECK(close(fd) == 0); + + /* Send descriptors from parent to child. */ + CHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, sp) == 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + CHECK(close(sp[0]) == 0); + fcntl_tests_recv_0(sp[1]); + CHECK(close(sp[1]) == 0); + exit(0); + } else { + CHECK(close(sp[1]) == 0); + fcntl_tests_send_0(sp[0]); + CHECK(waitpid(pid, NULL, 0) == pid); + CHECK(close(sp[0]) == 0); + } + + /* Send descriptors from child to parent. */ + CHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, sp) == 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + CHECK(close(sp[0]) == 0); + fcntl_tests_send_0(sp[1]); + CHECK(close(sp[1]) == 0); + exit(0); + } else { + CHECK(close(sp[1]) == 0); + fcntl_tests_recv_0(sp[0]); + CHECK(waitpid(pid, NULL, 0) == pid); + CHECK(close(sp[0]) == 0); + } + + exit(0); +} --- /dev/null 2013-02-26 00:11:00.000000000 +0100 +++ tools/regression/capsicum/syscalls/cap_getmode.c 2013-02-26 00:15:48.145527230 +0100 @@ -0,0 +1,167 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "misc.h" + +int +main(void) +{ + unsigned int mode; + pid_t pid; + int pfd; + + printf("1..27\n"); + + mode = 666; + CHECK(cap_getmode(&mode) == 0); + /* If cap_getmode() succeeded mode should be modified. */ + CHECK(mode != 666); + /* We are not in capability mode. */ + CHECK(mode == 0); + + /* Expect EFAULT. */ + errno = 0; + CHECK(cap_getmode(NULL) == -1); + CHECK(errno == EFAULT); + errno = 0; + CHECK(cap_getmode((void *)(uintptr_t)0xdeadc0de) == -1); + CHECK(errno == EFAULT); + + /* If parent is not in capability mode, child after fork() also won't be. */ + pid = fork(); + switch (pid) { + case -1: + err(1, "fork() failed"); + case 0: + mode = 666; + CHECK(cap_getmode(&mode) == 0); + /* If cap_getmode() succeeded mode should be modified. */ + CHECK(mode != 666); + /* We are not in capability mode. */ + CHECK(mode == 0); + exit(0); + default: + if (waitpid(pid, NULL, 0) == -1) + err(1, "waitpid() failed"); + } + + /* If parent is not in capability mode, child after pdfork() also won't be. */ + pid = pdfork(&pfd, 0); + switch (pid) { + case -1: + err(1, "pdfork() failed"); + case 0: + mode = 666; + CHECK(cap_getmode(&mode) == 0); + /* If cap_getmode() succeeded mode should be modified. */ + CHECK(mode != 666); + /* We are not in capability mode. */ + CHECK(mode == 0); + exit(0); + default: + if (pdwait(pfd) == -1) + err(1, "pdwait() failed"); + close(pfd); + } + + /* In capability mode... */ + + CHECK(cap_enter() == 0); + + mode = 666; + CHECK(cap_getmode(&mode) == 0); + /* If cap_getmode() succeeded mode should be modified. */ + CHECK(mode != 666); + /* We are in capability mode. */ + CHECK(mode == 1); + + /* Expect EFAULT. */ + errno = 0; + CHECK(cap_getmode(NULL) == -1); + CHECK(errno == EFAULT); + errno = 0; + CHECK(cap_getmode((void *)(uintptr_t)0xdeadc0de) == -1); + CHECK(errno == EFAULT); + + /* If parent is in capability mode, child after fork() also will be. */ + pid = fork(); + switch (pid) { + case -1: + err(1, "fork() failed"); + case 0: + mode = 666; + CHECK(cap_getmode(&mode) == 0); + /* If cap_getmode() succeeded mode should be modified. */ + CHECK(mode != 666); + /* We are in capability mode. */ + CHECK(mode == 1); + exit(0); + default: + /* + * wait(2) and friends are not permitted in the capability mode, + * so we can only just wait for a while. + */ + sleep(1); + } + + /* If parent is in capability mode, child after pdfork() also will be. */ + pid = pdfork(&pfd, 0); + switch (pid) { + case -1: + err(1, "pdfork() failed"); + case 0: + mode = 666; + CHECK(cap_getmode(&mode) == 0); + /* If cap_getmode() succeeded mode should be modified. */ + CHECK(mode != 666); + /* We are in capability mode. */ + CHECK(mode == 1); + exit(0); + default: + if (pdwait(pfd) == -1) + err(1, "pdwait() failed"); + close(pfd); + } + + exit(0); +} --- /dev/null 2013-02-26 00:11:00.000000000 +0100 +++ tools/regression/capsicum/syscalls/cap_ioctls_limit.c 2013-02-26 00:15:50.058527197 +0100 @@ -0,0 +1,462 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "misc.h" + +static void +ioctl_tests_0(int fd) +{ + unsigned long cmds[2]; + + CHECK(cap_ioctls_get(fd, NULL, 0) == INT_MAX); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(ioctl(fd, FIOCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(ioctl(fd, FIONCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + cmds[0] = FIOCLEX; + cmds[1] = FIONCLEX; + CHECK(cap_ioctls_limit(fd, cmds, nitems(cmds)) == 0); + cmds[0] = cmds[1] = 0; + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == nitems(cmds)); + CHECK((cmds[0] == FIOCLEX && cmds[1] == FIONCLEX) || + (cmds[0] == FIONCLEX && cmds[1] == FIOCLEX)); + cmds[0] = FIOCLEX; + cmds[1] = FIONCLEX; + CHECK(cap_ioctls_limit(fd, cmds, nitems(cmds)) == 0); + cmds[0] = cmds[1] = 0; + CHECK(cap_ioctls_get(fd, cmds, 1) == nitems(cmds)); + CHECK(cmds[0] == FIOCLEX || cmds[0] == FIONCLEX); + CHECK(cmds[1] == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(ioctl(fd, FIOCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(ioctl(fd, FIONCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + cmds[0] = FIOCLEX; + CHECK(cap_ioctls_limit(fd, cmds, 1) == 0); + cmds[0] = cmds[1] = 0; + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 1); + CHECK(cmds[0] == FIOCLEX); + cmds[0] = FIOCLEX; + cmds[1] = FIONCLEX; + errno = 0; + CHECK(cap_ioctls_limit(fd, cmds, nitems(cmds)) == -1); + CHECK(errno == ENOTCAPABLE); + cmds[0] = cmds[1] = 0; + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 1); + CHECK(cmds[0] == FIOCLEX); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(ioctl(fd, FIOCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + errno = 0; + CHECK(ioctl(fd, FIONCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(cap_ioctls_limit(fd, NULL, 0) == 0); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + cmds[0] = FIOCLEX; + errno = 0; + CHECK(cap_ioctls_limit(fd, cmds, 1) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + errno = 0; + CHECK(ioctl(fd, FIOCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + errno = 0; + CHECK(ioctl(fd, FIONCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); +} + +static void +ioctl_tests_1(int fd) +{ + unsigned long cmds[2]; + + cmds[0] = FIOCLEX; + CHECK(cap_ioctls_limit(fd, cmds, 1) == 0); + cmds[0] = cmds[1] = 0; + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 1); + CHECK(cmds[0] == FIOCLEX); + CHECK(cmds[1] == 0); + + CHECK(cap_rights_limit(fd, CAP_ALL & ~CAP_IOCTL) == 0); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + + cmds[0] = FIOCLEX; + cmds[1] = FIONCLEX; + errno = 0; + CHECK(cap_ioctls_limit(fd, cmds, nitems(cmds)) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + cmds[0] = FIOCLEX; + errno = 0; + CHECK(cap_ioctls_limit(fd, cmds, 1) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + errno = 0; + CHECK(ioctl(fd, FIOCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + errno = 0; + CHECK(ioctl(fd, FIONCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); +} + +static void +ioctl_tests_2(int fd) +{ + unsigned long cmds[2]; + + CHECK(cap_rights_limit(fd, CAP_ALL & ~CAP_IOCTL) == 0); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + + cmds[0] = FIOCLEX; + cmds[1] = FIONCLEX; + errno = 0; + CHECK(cap_ioctls_limit(fd, cmds, nitems(cmds)) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + cmds[0] = FIOCLEX; + errno = 0; + CHECK(cap_ioctls_limit(fd, cmds, 1) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + errno = 0; + CHECK(ioctl(fd, FIOCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + errno = 0; + CHECK(ioctl(fd, FIONCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); +} + +static void +ioctl_tests_send_0(int sock) +{ + unsigned long cmds[2]; + int fd; + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + cmds[0] = FIOCLEX; + cmds[1] = FIONCLEX; + CHECK(cap_ioctls_limit(fd, cmds, nitems(cmds)) == 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + cmds[0] = FIOCLEX; + CHECK(cap_ioctls_limit(fd, cmds, 1) == 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK(cap_ioctls_limit(fd, NULL, 0) == 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); +} + +static void +ioctl_tests_recv_0(int sock) +{ + unsigned long cmds[2]; + int fd; + + CHECK(descriptor_recv(sock, &fd) == 0); + + CHECK(cap_ioctls_get(fd, NULL, 0) == INT_MAX); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(ioctl(fd, FIOCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(ioctl(fd, FIONCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(close(fd) == 0); + + CHECK(descriptor_recv(sock, &fd) == 0); + + cmds[0] = cmds[1] = 0; + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == nitems(cmds)); + CHECK((cmds[0] == FIOCLEX && cmds[1] == FIONCLEX) || + (cmds[0] == FIONCLEX && cmds[1] == FIOCLEX)); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(ioctl(fd, FIOCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(ioctl(fd, FIONCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(close(fd) == 0); + + CHECK(descriptor_recv(sock, &fd) == 0); + + cmds[0] = cmds[1] = 0; + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 1); + CHECK(cmds[0] == FIOCLEX); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(ioctl(fd, FIOCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + errno = 0; + CHECK(ioctl(fd, FIONCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(close(fd) == 0); + + CHECK(descriptor_recv(sock, &fd) == 0); + + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + errno = 0; + CHECK(ioctl(fd, FIOCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + errno = 0; + CHECK(ioctl(fd, FIONCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(close(fd) == 0); +} + +int +main(void) +{ + int fd, pfd, sp[2]; + pid_t pid; + + printf("1..607\n"); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + ioctl_tests_0(fd); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + ioctl_tests_1(fd); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + ioctl_tests_2(fd); + CHECK(close(fd) == 0); + + /* Child inherits descriptor and operates on it first. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + pid = fork(); + switch (pid) { + case -1: + err(1, "fork() failed"); + case 0: + ioctl_tests_0(fd); + CHECK(close(fd) == 0); + exit(0); + default: + if (waitpid(pid, NULL, 0) == -1) + err(1, "waitpid() failed"); + ioctl_tests_0(fd); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor, but operates on it after parent. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + pid = fork(); + switch (pid) { + case -1: + err(1, "fork() failed"); + case 0: + sleep(1); + ioctl_tests_0(fd); + CHECK(close(fd) == 0); + exit(0); + default: + ioctl_tests_0(fd); + if (waitpid(pid, NULL, 0) == -1) + err(1, "waitpid() failed"); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor and operates on it first. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + pid = pdfork(&pfd, 0); + switch (pid) { + case -1: + err(1, "pdfork() failed"); + case 0: + ioctl_tests_1(fd); + exit(0); + default: + if (pdwait(pfd) == -1) + err(1, "pdwait() failed"); + close(pfd); + ioctl_tests_1(fd); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor, but operates on it after parent. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + pid = pdfork(&pfd, 0); + switch (pid) { + case -1: + err(1, "pdfork() failed"); + case 0: + sleep(1); + ioctl_tests_1(fd); + exit(0); + default: + ioctl_tests_1(fd); + if (pdwait(pfd) == -1) + err(1, "pdwait() failed"); + close(pfd); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor and operates on it first. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + pid = fork(); + switch (pid) { + case -1: + err(1, "fork() failed"); + case 0: + ioctl_tests_2(fd); + exit(0); + default: + if (waitpid(pid, NULL, 0) == -1) + err(1, "waitpid() failed"); + ioctl_tests_2(fd); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor, but operates on it after parent. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + pid = fork(); + switch (pid) { + case -1: + err(1, "fork() failed"); + case 0: + sleep(1); + ioctl_tests_2(fd); + exit(0); + default: + ioctl_tests_2(fd); + if (waitpid(pid, NULL, 0) == -1) + err(1, "waitpid() failed"); + } + CHECK(close(fd) == 0); + + /* Send descriptors from parent to child. */ + CHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, sp) == 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + CHECK(close(sp[0]) == 0); + ioctl_tests_recv_0(sp[1]); + CHECK(close(sp[1]) == 0); + exit(0); + } else { + CHECK(close(sp[1]) == 0); + ioctl_tests_send_0(sp[0]); + CHECK(waitpid(pid, NULL, 0) == pid); + CHECK(close(sp[0]) == 0); + } + + /* Send descriptors from child to parent. */ + CHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, sp) == 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + CHECK(close(sp[0]) == 0); + ioctl_tests_send_0(sp[1]); + CHECK(close(sp[1]) == 0); + exit(0); + } else { + CHECK(close(sp[1]) == 0); + ioctl_tests_recv_0(sp[0]); + CHECK(waitpid(pid, NULL, 0) == pid); + CHECK(close(sp[0]) == 0); + } + + exit(0); +} --- /dev/null 2013-02-26 00:11:00.000000000 +0100 +++ tools/regression/capsicum/syscalls/misc.c 2013-02-26 00:15:51.972528208 +0100 @@ -0,0 +1,128 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include +#include +#include +#include + +#include "misc.h" + +int +pdwait(int pfd) +{ + fd_set fdset; + + FD_ZERO(&fdset); + FD_SET(pfd, &fdset); + + return (select(pfd + 1, NULL, &fdset, NULL, NULL) == -1 ? -1 : 0); +} + +int +descriptor_send(int sock, int fd) +{ + unsigned char ctrl[CMSG_SPACE(sizeof(fd))]; + struct msghdr msg; + struct cmsghdr *cmsg; + + assert(sock >= 0); + assert(fd >= 0); + + bzero(&msg, sizeof(msg)); + bzero(&ctrl, sizeof(ctrl)); + + msg.msg_iov = NULL; + msg.msg_iovlen = 0; + msg.msg_control = ctrl; + msg.msg_controllen = sizeof(ctrl); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); + bcopy(&fd, CMSG_DATA(cmsg), sizeof(fd)); + + if (sendmsg(sock, &msg, 0) == -1) + return (errno); + + return (0); +} + +int +descriptor_recv(int sock, int *fdp) +{ + unsigned char ctrl[CMSG_SPACE(sizeof(*fdp))]; + struct msghdr msg; + struct cmsghdr *cmsg; + struct iovec iov; + int val; + + assert(sock >= 0); + assert(fdp != NULL); + + bzero(&msg, sizeof(msg)); + bzero(&ctrl, sizeof(ctrl)); + +#if 1 + /* + * This doesn't really make sense, as we don't plan to receive any + * data, but if no buffer is provided and recv(2) returns 0 without + * control message. Must be kernel bug. + */ + iov.iov_base = &val; + iov.iov_len = sizeof(val); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; +#else + msg.msg_iov = NULL; + msg.msg_iovlen = 0; +#endif + msg.msg_control = ctrl; + msg.msg_controllen = sizeof(ctrl); + + if (recvmsg(sock, &msg, 0) == -1) + return (errno); + + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg == NULL || cmsg->cmsg_level != SOL_SOCKET || + cmsg->cmsg_type != SCM_RIGHTS) { + return (EINVAL); + } + bcopy(CMSG_DATA(cmsg), fdp, sizeof(*fdp)); + + return (0); +} --- /dev/null 2013-02-26 00:11:00.000000000 +0100 +++ tools/regression/capsicum/syscalls/misc.h 2013-02-26 00:15:53.022676761 +0100 @@ -0,0 +1,60 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MISC_H_ +#define _MISC_H_ + +#define OK() do { \ + int _serrno = errno; \ + printf("ok # line %u\n", __LINE__); \ + fflush(stdout); \ + errno = _serrno; \ +} while (0) +#define NOK() do { \ + int _serrno = errno; \ + printf("not ok # line %u\n", __LINE__); \ + fflush(stdout); \ + errno = _serrno; \ +} while (0) +#define CHECK(cond) do { \ + if ((cond)) \ + OK(); \ + else \ + NOK(); \ +} while (0) + +/* + * This can be removed once pdwait4(2) is implemented. + */ +int pdwait(int pfd); + +int descriptor_send(int sock, int fd); +int descriptor_recv(int sock, int *fdp); + +#endif /* !_MISC_H_ */ --- tools/regression/security/cap_test/cap_test_capabilities.c.orig +++ tools/regression/security/cap_test/cap_test_capabilities.c @@ -1,8 +1,12 @@ /*- * Copyright (c) 2009-2011 Robert N. M. Watson * Copyright (c) 2011 Jonathan Anderson + * Copyright (c) 2012 FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by Pawel Jakub Dawidek under + * sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -43,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -60,14 +65,20 @@ */ #define CHECK_RESULT(syscall, rights_needed, succeeded) do { \ if ((rights & (rights_needed)) == (rights_needed)) { \ - if (!(succeeded)) \ + if (succeeded) { \ + if (success == -1) \ + success = PASSED; \ + } else { \ SYSCALL_FAIL(syscall, "failed"); \ + } \ } else { \ - if (succeeded) \ + if (succeeded) { \ FAILX("%s:\tsucceeded when it shouldn't have" \ - " (rights 0x%jx)", #syscall, rights); \ - else if (errno != ENOTCAPABLE) \ + " (rights 0x%jx)", #syscall, \ + (uintmax_t)rights); \ + } else if (errno != ENOTCAPABLE) { \ SYSCALL_FAIL(syscall, "errno != ENOTCAPABLE"); \ + } \ } \ errno = 0; \ } while (0) @@ -79,8 +90,11 @@ if ((rights & (rights_needed)) == (rights_needed)) { \ if (p == MAP_FAILED) \ SYSCALL_FAIL(mmap, "failed"); \ - else \ + else { \ (void)munmap(p, getpagesize()); \ + if (success == -1) \ + success = PASSED; \ + } \ } else { \ if (p != MAP_FAILED) { \ FAILX("%s:\tsucceeded when it shouldn't have" \ @@ -97,96 +111,200 @@ * make sure only those rights work. */ static int -try_file_ops(int fd, cap_rights_t rights) +try_file_ops(int filefd, int dirfd, cap_rights_t rights) { struct stat sb; struct statfs sf; - int fd_cap, fd_capcap; + cap_rights_t erights; + int fd_cap, fd_capcap, dfd_cap; ssize_t ssize, ssize2; off_t off; void *p; char ch; int ret, is_nfs; struct pollfd pollfd; - int success = PASSED; + int success = -1; - REQUIRE(fstatfs(fd, &sf)); - is_nfs = (strncmp("nfs", sf.f_fstypename, sizeof(sf.f_fstypename)) - == 0); + REQUIRE(fstatfs(filefd, &sf)); + is_nfs = (strcmp("nfs", sf.f_fstypename) == 0); - REQUIRE(fd_cap = cap_new(fd, rights)); + REQUIRE(fd_cap = cap_new(filefd, rights)); + CHECK(cap_getrights(fd_cap, &erights) == 0); + CHECK(rights == erights); REQUIRE(fd_capcap = cap_new(fd_cap, rights)); + CHECK(cap_getrights(fd_capcap, &erights) == 0); + CHECK(rights == erights); CHECK(fd_capcap != fd_cap); + REQUIRE(dfd_cap = cap_new(dirfd, rights)); + CHECK(cap_getrights(dfd_cap, &erights) == 0); + CHECK(rights == erights); - pollfd.fd = fd_cap; - pollfd.events = POLLIN | POLLERR | POLLHUP; - pollfd.revents = 0; + ssize = read(fd_cap, &ch, sizeof(ch)); + CHECK_RESULT(read, CAP_READ, ssize >= 0); + + ssize = write(fd_cap, &ch, sizeof(ch)); + CHECK_RESULT(write, CAP_WRITE, ssize >= 0); - ssize = read(fd_cap, &ch, sizeof(ch)); - CHECK_RESULT(read, CAP_READ | CAP_SEEK, ssize >= 0); + off = lseek(fd_cap, 0, SEEK_SET); + CHECK_RESULT(lseek, CAP_SEEK, off >= 0); ssize = pread(fd_cap, &ch, sizeof(ch), 0); ssize2 = pread(fd_cap, &ch, sizeof(ch), 0); - CHECK_RESULT(pread, CAP_READ, ssize >= 0); + CHECK_RESULT(pread, CAP_PREAD, ssize >= 0); CHECK(ssize == ssize2); - ssize = write(fd_cap, &ch, sizeof(ch)); - CHECK_RESULT(write, CAP_WRITE | CAP_SEEK, ssize >= 0); - ssize = pwrite(fd_cap, &ch, sizeof(ch), 0); - CHECK_RESULT(pwrite, CAP_WRITE, ssize >= 0); + CHECK_RESULT(pwrite, CAP_PWRITE, ssize >= 0); - off = lseek(fd_cap, 0, SEEK_SET); - CHECK_RESULT(lseek, CAP_SEEK, off >= 0); - - /* - * Note: this is not expected to work over NFS. - */ - ret = fchflags(fd_cap, UF_NODUMP); - CHECK_RESULT(fchflags, CAP_FCHFLAGS, - (ret == 0) || (is_nfs && (errno == EOPNOTSUPP))); - - ret = fstat(fd_cap, &sb); - CHECK_RESULT(fstat, CAP_FSTAT, ret == 0); + p = mmap(NULL, getpagesize(), PROT_NONE, MAP_SHARED, fd_cap, 0); + CHECK_MMAP_RESULT(CAP_MMAP); p = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, fd_cap, 0); - CHECK_MMAP_RESULT(CAP_MMAP | CAP_READ); + CHECK_MMAP_RESULT(CAP_MMAP_R); p = mmap(NULL, getpagesize(), PROT_WRITE, MAP_SHARED, fd_cap, 0); - CHECK_MMAP_RESULT(CAP_MMAP | CAP_WRITE); + CHECK_MMAP_RESULT(CAP_MMAP_W); p = mmap(NULL, getpagesize(), PROT_EXEC, MAP_SHARED, fd_cap, 0); - CHECK_MMAP_RESULT(CAP_MMAP | CAP_MAPEXEC); + CHECK_MMAP_RESULT(CAP_MMAP_X); p = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_SHARED, fd_cap, 0); - CHECK_MMAP_RESULT(CAP_MMAP | CAP_READ | CAP_WRITE); + CHECK_MMAP_RESULT(CAP_MMAP_RW); p = mmap(NULL, getpagesize(), PROT_READ | PROT_EXEC, MAP_SHARED, fd_cap, 0); - CHECK_MMAP_RESULT(CAP_MMAP | CAP_READ | CAP_MAPEXEC); + CHECK_MMAP_RESULT(CAP_MMAP_RX); p = mmap(NULL, getpagesize(), PROT_EXEC | PROT_WRITE, MAP_SHARED, fd_cap, 0); - CHECK_MMAP_RESULT(CAP_MMAP | CAP_MAPEXEC | CAP_WRITE); + CHECK_MMAP_RESULT(CAP_MMAP_WX); p = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED, fd_cap, 0); - CHECK_MMAP_RESULT(CAP_MMAP | CAP_READ | CAP_WRITE | CAP_MAPEXEC); + CHECK_MMAP_RESULT(CAP_MMAP_RWX); + + /* TODO: openat(O_APPEND) */ + ret = openat(dfd_cap, "cap_create", O_CREAT | O_RDONLY, 0600); + CHECK_RESULT(openat(O_CREATE | O_RDONLY), + CAP_CREATE | CAP_READ | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_create", 0) == 0); + ret = openat(dfd_cap, "cap_create", O_CREAT | O_WRONLY, 0600); + CHECK_RESULT(openat(O_CREATE | O_WRONLY), + CAP_CREATE | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_create", 0) == 0); + ret = openat(dfd_cap, "cap_create", O_CREAT | O_RDWR, 0600); + CHECK_RESULT(openat(O_CREATE | O_RDWR), + CAP_CREATE | CAP_READ | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_create", 0) == 0); ret = fsync(fd_cap); CHECK_RESULT(fsync, CAP_FSYNC, ret == 0); + ret = openat(dirfd, "cap_fsync", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = openat(dfd_cap, "cap_fsync", O_FSYNC | O_RDONLY, 0600); + CHECK_RESULT(openat(O_FSYNC | O_RDONLY), + CAP_FSYNC | CAP_READ | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + ret = openat(dfd_cap, "cap_fsync", O_FSYNC | O_WRONLY, 0600); + CHECK_RESULT(openat(O_FSYNC | O_WRONLY), + CAP_FSYNC | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + ret = openat(dfd_cap, "cap_fsync", O_FSYNC | O_RDWR, 0600); + CHECK_RESULT(openat(O_FSYNC | O_RDWR), + CAP_FSYNC | CAP_READ | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + ret = openat(dfd_cap, "cap_fsync", O_SYNC | O_RDONLY, 0600); + CHECK_RESULT(openat(O_SYNC | O_RDONLY), + CAP_FSYNC | CAP_READ | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + ret = openat(dfd_cap, "cap_fsync", O_SYNC | O_WRONLY, 0600); + CHECK_RESULT(openat(O_SYNC | O_WRONLY), + CAP_FSYNC | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + ret = openat(dfd_cap, "cap_fsync", O_SYNC | O_RDWR, 0600); + CHECK_RESULT(openat(O_SYNC | O_RDWR), + CAP_FSYNC | CAP_READ | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + CHECK(unlinkat(dirfd, "cap_fsync", 0) == 0); + + ret = ftruncate(fd_cap, 0); + CHECK_RESULT(ftruncate, CAP_FTRUNCATE, ret == 0); + + ret = openat(dirfd, "cap_ftruncate", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = openat(dfd_cap, "cap_ftruncate", O_TRUNC | O_RDONLY); + CHECK_RESULT(openat(O_TRUNC | O_RDONLY), + CAP_FTRUNCATE | CAP_READ | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + ret = openat(dfd_cap, "cap_ftruncate", O_TRUNC | O_WRONLY); + CHECK_RESULT(openat(O_TRUNC | O_WRONLY), + CAP_FTRUNCATE | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + ret = openat(dfd_cap, "cap_ftruncate", O_TRUNC | O_RDWR); + CHECK_RESULT(openat(O_TRUNC | O_RDWR), + CAP_FTRUNCATE | CAP_READ | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + CHECK(unlinkat(dirfd, "cap_ftruncate", 0) == 0); + + /* + * Note: this is not expected to work over NFS. + */ + ret = fchflags(fd_cap, UF_NODUMP); + CHECK_RESULT(fchflags, CAP_FCHFLAGS, + ret == 0 || (is_nfs && errno == EOPNOTSUPP)); + +#ifdef TODO /* No such syscalls yet. */ + ret = openat(dirfd, "cap_fchflagsat", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = fchflagsat(dfd_cap, "cap_fchflagsat", UF_NODUMP, 0); + CHECK_RESULT(fchflagsat, CAP_FCHFLAGSAT | CAP_LOOKUP, ret == 0); + CHECK(unlinkat(dirfd, "cap_fchflagsat", 0) == 0); +#endif + ret = fchown(fd_cap, -1, -1); CHECK_RESULT(fchown, CAP_FCHOWN, ret == 0); + ret = openat(dirfd, "cap_fchownat", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = fchownat(dfd_cap, "cap_fchownat", -1, -1, 0); + CHECK_RESULT(fchownat, CAP_FCHOWN | CAP_LOOKUP, ret == 0); + CHECK(unlinkat(dirfd, "cap_fchownat", 0) == 0); + ret = fchmod(fd_cap, 0644); CHECK_RESULT(fchmod, CAP_FCHMOD, ret == 0); + ret = openat(dirfd, "cap_fchmodat", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = fchmodat(dfd_cap, "cap_fchmodat", 0600, 0); + CHECK_RESULT(fchmodat, CAP_FCHMOD | CAP_LOOKUP, ret == 0); + CHECK(unlinkat(dirfd, "cap_fchmodat", 0) == 0); + + ret = fcntl(fd_cap, F_GETFL); + CHECK_RESULT(fcntl(F_GETFL), CAP_FCNTL, ret >= 0); + ret = fcntl(fd_cap, F_SETFL, ret); + CHECK_RESULT(fcntl(F_SETFL), CAP_FCNTL, ret == 0); + /* XXX flock */ - ret = ftruncate(fd_cap, 0); - CHECK_RESULT(ftruncate, CAP_FTRUNCATE, ret == 0); + ret = fstat(fd_cap, &sb); + CHECK_RESULT(fstat, CAP_FSTAT, ret == 0); + + ret = openat(dirfd, "cap_fstatat", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = fstatat(dfd_cap, "cap_fstatat", &sb, 0); + CHECK_RESULT(fstatat, CAP_FSTAT | CAP_LOOKUP, ret == 0); + CHECK(unlinkat(dirfd, "cap_fstatat", 0) == 0); ret = fstatfs(fd_cap, &sf); CHECK_RESULT(fstatfs, CAP_FSTATFS, ret == 0); @@ -197,6 +315,55 @@ ret = futimes(fd_cap, NULL); CHECK_RESULT(futimes, CAP_FUTIMES, ret == 0); + ret = openat(dirfd, "cap_futimesat", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = futimesat(dfd_cap, "cap_futimesat", NULL); + CHECK_RESULT(futimesat, CAP_FUTIMES | CAP_LOOKUP, ret == 0); + CHECK(unlinkat(dirfd, "cap_futimesat", 0) == 0); + + ret = openat(dirfd, "cap_linkat_src", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = linkat(dirfd, "cap_linkat_src", dfd_cap, "cap_linkat_dst", 0); + CHECK_RESULT(linkat, CAP_LINKAT | CAP_LOOKUP, ret == 0); + CHECK(unlinkat(dirfd, "cap_linkat_src", 0) == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_linkat_dst", 0) == 0); + + ret = mkdirat(dfd_cap, "cap_mkdirat", 0700); + CHECK_RESULT(mkdirat, CAP_MKDIRAT | CAP_LOOKUP, ret == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_mkdirat", AT_REMOVEDIR) == 0); + + ret = mkfifoat(dfd_cap, "cap_mkfifoat", 0600); + CHECK_RESULT(mkfifoat, CAP_MKFIFOAT | CAP_LOOKUP, ret == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_mkfifoat", 0) == 0); + + ret = mknodat(dfd_cap, "cap_mknodat", S_IFCHR | 0600, 0); + CHECK_RESULT(mknodat, CAP_MKNODAT | CAP_LOOKUP, ret == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_mknodat", 0) == 0); + + /* TODO: renameat(2) */ + + ret = symlinkat("test", dfd_cap, "cap_symlinkat"); + CHECK_RESULT(symlinkat, CAP_SYMLINKAT | CAP_LOOKUP, ret == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_symlinkat", 0) == 0); + + ret = openat(dirfd, "cap_unlinkat", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = unlinkat(dfd_cap, "cap_unlinkat", 0); + CHECK_RESULT(unlinkat, CAP_UNLINKAT | CAP_LOOKUP, ret == 0); + CHECK(ret == 0 || unlinkat(dirfd, "cap_unlinkat", 0) == 0); + ret = mkdirat(dirfd, "cap_unlinkat", 0700); + CHECK(ret == 0); + ret = unlinkat(dfd_cap, "cap_unlinkat", AT_REMOVEDIR); + CHECK_RESULT(unlinkat, CAP_UNLINKAT | CAP_LOOKUP, ret == 0); + CHECK(ret == 0 || unlinkat(dirfd, "cap_unlinkat", AT_REMOVEDIR) == 0); + + pollfd.fd = fd_cap; + pollfd.events = POLLIN | POLLERR | POLLHUP; + pollfd.revents = 0; + ret = poll(&pollfd, 1, 0); if (rights & CAP_POLL_EVENT) CHECK((pollfd.revents & POLLNVAL) == 0); @@ -205,79 +372,159 @@ /* XXX: select, kqueue */ - close (fd_cap); + close(fd_cap); + close(fd_capcap); + + if (success == -1) { + fprintf(stderr, "No tests for rights 0x%jx.\n", + (uintmax_t)rights); + success = FAILED; + } return (success); } -#define TRY(fd, rights) \ +#define TRY(rights) \ do { \ if (success == PASSED) \ - success = try_file_ops(fd, rights); \ + success = try_file_ops(filefd, dirfd, (rights)); \ else \ /* We've already failed, but try the test anyway. */ \ - try_file_ops(fd, rights); \ + try_file_ops(filefd, dirfd, (rights)); \ } while (0) +#define KEEP_ERRNO(...) do { \ + int _saved_errno = errno; \ + __VA_ARGS__; \ + errno = _saved_errno; \ +} while (0); + int test_capabilities(void) { - int fd; + int filefd, dirfd, tmpfd; int success = PASSED; + char file[] = "/tmp/cap_test.XXXXXXXXXX"; + char dir[] = "/tmp/cap_test.XXXXXXXXXX"; - fd = open("/tmp/cap_test_capabilities", O_RDWR | O_CREAT, 0644); - if (fd < 0) + filefd = mkstemp(file); + if (filefd < 0) + err(-1, "mkstemp"); + if (mkdtemp(dir) == NULL) { + KEEP_ERRNO(unlink(file)); + err(-1, "mkdtemp"); + } + dirfd = open(dir, O_RDONLY | O_DIRECTORY); + if (dirfd == -1) { + KEEP_ERRNO(unlink(file)); + KEEP_ERRNO(rmdir(dir)); + err(-1, "open"); + } + tmpfd = open("/tmp", O_RDONLY | O_DIRECTORY); + if (tmpfd == -1) { + KEEP_ERRNO(unlink(file)); + KEEP_ERRNO(rmdir(dir)); err(-1, "open"); + } - if (cap_enter() < 0) + if (cap_enter() == -1) { + KEEP_ERRNO(unlink(file)); + KEEP_ERRNO(rmdir(dir)); err(-1, "cap_enter"); + } - /* XXX: Really want to try all combinations. */ - TRY(fd, CAP_READ); - TRY(fd, CAP_READ | CAP_SEEK); - TRY(fd, CAP_WRITE); - TRY(fd, CAP_WRITE | CAP_SEEK); - TRY(fd, CAP_READ | CAP_WRITE); - TRY(fd, CAP_READ | CAP_WRITE | CAP_SEEK); - TRY(fd, CAP_SEEK); - TRY(fd, CAP_FCHFLAGS); - TRY(fd, CAP_IOCTL); - TRY(fd, CAP_FSTAT); - TRY(fd, CAP_MMAP); - TRY(fd, CAP_MMAP | CAP_READ); - TRY(fd, CAP_MMAP | CAP_WRITE); - TRY(fd, CAP_MMAP | CAP_MAPEXEC); - TRY(fd, CAP_MMAP | CAP_READ | CAP_WRITE); - TRY(fd, CAP_MMAP | CAP_READ | CAP_MAPEXEC); - TRY(fd, CAP_MMAP | CAP_MAPEXEC | CAP_WRITE); - TRY(fd, CAP_MMAP | CAP_READ | CAP_WRITE | CAP_MAPEXEC); - TRY(fd, CAP_FCNTL); - TRY(fd, CAP_POST_EVENT); - TRY(fd, CAP_POLL_EVENT); - TRY(fd, CAP_FSYNC); - TRY(fd, CAP_FCHOWN); - TRY(fd, CAP_FCHMOD); - TRY(fd, CAP_FTRUNCATE); - TRY(fd, CAP_FLOCK); - TRY(fd, CAP_FSTATFS); - TRY(fd, CAP_FPATHCONF); - TRY(fd, CAP_FUTIMES); - TRY(fd, CAP_ACL_GET); - TRY(fd, CAP_ACL_SET); - TRY(fd, CAP_ACL_DELETE); - TRY(fd, CAP_ACL_CHECK); - TRY(fd, CAP_EXTATTR_GET); - TRY(fd, CAP_EXTATTR_SET); - TRY(fd, CAP_EXTATTR_DELETE); - TRY(fd, CAP_EXTATTR_LIST); - TRY(fd, CAP_MAC_GET); - TRY(fd, CAP_MAC_SET); + TRY(CAP_READ); + TRY(CAP_WRITE); + TRY(CAP_SEEK); + TRY(CAP_PREAD); + TRY(CAP_PWRITE); + TRY(CAP_READ | CAP_WRITE); + TRY(CAP_PREAD | CAP_PWRITE); + TRY(CAP_MMAP); + TRY(CAP_MMAP_R); + TRY(CAP_MMAP_W); + TRY(CAP_MMAP_X); + TRY(CAP_MMAP_RW); + TRY(CAP_MMAP_RX); + TRY(CAP_MMAP_WX); + TRY(CAP_MMAP_RWX); + TRY(CAP_CREATE | CAP_READ | CAP_LOOKUP); + TRY(CAP_CREATE | CAP_WRITE | CAP_LOOKUP); + TRY(CAP_CREATE | CAP_READ | CAP_WRITE | CAP_LOOKUP); +#ifdef TODO + TRY(CAP_FEXECVE); +#endif + TRY(CAP_FSYNC); + TRY(CAP_FSYNC | CAP_READ | CAP_LOOKUP); + TRY(CAP_FSYNC | CAP_WRITE | CAP_LOOKUP); + TRY(CAP_FSYNC | CAP_READ | CAP_WRITE | CAP_LOOKUP); + TRY(CAP_FTRUNCATE); + TRY(CAP_FTRUNCATE | CAP_READ | CAP_LOOKUP); + TRY(CAP_FTRUNCATE | CAP_WRITE | CAP_LOOKUP); + TRY(CAP_FTRUNCATE | CAP_READ | CAP_WRITE | CAP_LOOKUP); +#ifdef TODO + TRY(CAP_FCHDIR); +#endif + TRY(CAP_FCHFLAGS); + TRY(CAP_FCHOWN); + TRY(CAP_FCHOWN | CAP_LOOKUP); + TRY(CAP_FCHMOD | CAP_LOOKUP); + TRY(CAP_FCNTL); +#ifdef TODO + TRY(CAP_FLOCK); +#endif + TRY(CAP_FPATHCONF); +#ifdef TODO + TRY(CAP_FSCK); +#endif + TRY(CAP_FSTAT | CAP_LOOKUP); + TRY(CAP_FSTATFS); + TRY(CAP_FUTIMES | CAP_LOOKUP); + TRY(CAP_LINKAT | CAP_LOOKUP); + TRY(CAP_MKDIRAT | CAP_LOOKUP); + TRY(CAP_MKFIFOAT | CAP_LOOKUP); + TRY(CAP_MKNODAT | CAP_LOOKUP); + TRY(CAP_SYMLINKAT | CAP_LOOKUP); + TRY(CAP_UNLINKAT | CAP_LOOKUP); + /* Rename needs CAP_RENAMEAT on source directory and CAP_LINKAT on destination directory. */ + TRY(CAP_RENAMEAT | CAP_UNLINKAT | CAP_LOOKUP); +#ifdef TODO + TRY(CAP_LOOKUP); + TRY(CAP_EXTATTR_DELETE); + TRY(CAP_EXTATTR_GET); + TRY(CAP_EXTATTR_LIST); + TRY(CAP_EXTATTR_SET); + TRY(CAP_ACL_CHECK); + TRY(CAP_ACL_DELETE); + TRY(CAP_ACL_GET); + TRY(CAP_ACL_SET); + TRY(CAP_ACCEPT); + TRY(CAP_BIND); + TRY(CAP_CONNECT); + TRY(CAP_GETPEERNAME); + TRY(CAP_GETSOCKNAME); + TRY(CAP_GETSOCKOPT); + TRY(CAP_LISTEN); + TRY(CAP_PEELOFF); + TRY(CAP_RECV); + TRY(CAP_SEND); + TRY(CAP_SETSOCKOPT); + TRY(CAP_SHUTDOWN); + TRY(CAP_MAC_GET); + TRY(CAP_MAC_SET); + TRY(CAP_SEM_GETVALUE); + TRY(CAP_SEM_POST); + TRY(CAP_SEM_WAIT); + TRY(CAP_POST_EVENT); + TRY(CAP_POLL_EVENT); + TRY(CAP_IOCTL); + TRY(CAP_TTYHOOK); + TRY(CAP_PDGETPID); + TRY(CAP_PDWAIT); + TRY(CAP_PDKILL); +#endif - /* - * Socket-specific. - */ - TRY(fd, CAP_GETPEERNAME); - TRY(fd, CAP_GETSOCKNAME); - TRY(fd, CAP_ACCEPT); + (void)unlinkat(tmpfd, file + strlen("/tmp/"), 0); + (void)unlinkat(tmpfd, dir + strlen("/tmp/"), AT_REMOVEDIR); return (success); } --- tools/regression/security/cap_test/cap_test_relative.c.orig +++ tools/regression/security/cap_test/cap_test_relative.c @@ -61,7 +61,8 @@ cap_rights_t rights; REQUIRE(etc = open("/etc/", O_RDONLY)); - CHECK_SYSCALL_FAILS(EINVAL, cap_getrights, etc, &rights); + CHECK_SYSCALL_SUCCEEDS(cap_getrights, etc, &rights); + CHECK_RIGHTS(rights, CAP_ALL); MAKE_CAPABILITY(etc_cap, etc, CAP_READ); MAKE_CAPABILITY(etc_cap_ro, etc, CAP_READ | CAP_LOOKUP); --- usr.bin/kdump/kdump.c.orig +++ usr.bin/kdump/kdump.c @@ -1008,6 +1008,7 @@ narg--; break; case SYS_cap_new: + case SYS_cap_rights_limit: print_number(ip, narg, c); putchar(','); arg = *ip; @@ -1035,6 +1036,14 @@ } capname(arg); break; + case SYS_cap_fcntls_limit: + print_number(ip, narg, c); + putchar(','); + arg = *ip; + ip++; + narg--; + capfcntlname(arg); + break; case SYS_posix_fadvise: print_number(ip, narg, c); print_number(ip, narg, c); --- usr.bin/kdump/mksubr.orig +++ usr.bin/kdump/mksubr @@ -361,6 +361,7 @@ auto_or_type "accessmodename" "[A-Z]_OK[[:space:]]+0?x?[0-9A-Fa-f]+" "sys/unistd.h" auto_switch_type "acltypename" "ACL_TYPE_[A-Z4_]+[[:space:]]+0x[0-9]+" "sys/acl.h" auto_or_type "capname" "CAP_[A-Z]+[[:space:]]+0x[01248]{16}ULL" "sys/capability.h" +auto_or_type "capfcntlname" "CAP_FCNTL_[A-Z]+[[:space:]]+\(1" "sys/capability.h" auto_switch_type "extattrctlname" "EXTATTR_NAMESPACE_[A-Z]+[[:space:]]+0x[0-9]+" "sys/extattr.h" auto_switch_type "fadvisebehavname" "POSIX_FADV_[A-Z]+[[:space:]]+[0-9]+" "sys/fcntl.h" auto_or_type "flagsname" "O_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/fcntl.h" --- usr.bin/procstat/procstat_files.c.orig +++ usr.bin/procstat/procstat_files.c @@ -139,33 +139,34 @@ /* General file I/O. */ { CAP_READ, "rd" }, { CAP_WRITE, "wr" }, + { CAP_SEEK, "se" }, { CAP_MMAP, "mm" }, - { CAP_MAPEXEC, "me" }, + { CAP_CREATE, "cr" }, { CAP_FEXECVE, "fe" }, { CAP_FSYNC, "fy" }, { CAP_FTRUNCATE, "ft" }, - { CAP_SEEK, "se" }, /* VFS methods. */ + { CAP_FCHDIR, "cd" }, { CAP_FCHFLAGS, "cf" }, - { CAP_FCHDIR, "cd" }, { CAP_FCHMOD, "cm" }, { CAP_FCHOWN, "cn" }, { CAP_FCNTL, "fc" }, + { CAP_FLOCK, "fl" }, { CAP_FPATHCONF, "fp" }, - { CAP_FLOCK, "fl" }, { CAP_FSCK, "fk" }, { CAP_FSTAT, "fs" }, { CAP_FSTATFS, "sf" }, { CAP_FUTIMES, "fu" }, - { CAP_CREATE, "cr" }, - { CAP_DELETE, "de" }, - { CAP_MKDIR, "md" }, - { CAP_RMDIR, "rm" }, - { CAP_MKFIFO, "mf" }, - { CAP_MKNOD, "mn" }, + { CAP_LINKAT, "li" }, + { CAP_MKDIRAT, "md" }, + { CAP_MKFIFOAT, "mf" }, + { CAP_MKNODAT, "mn" }, + { CAP_RENAMEAT, "rn" }, + { CAP_SYMLINKAT, "sl" }, + { CAP_UNLINKAT, "un" }, - /* Lookups - used to constraint *at() calls. */ + /* Lookups - used to constrain *at() calls. */ { CAP_LOOKUP, "lo" }, /* Extended attributes. */ @@ -213,6 +214,24 @@ { CAP_PDGETPID, "pg" }, { CAP_PDWAIT, "pw" }, { CAP_PDKILL, "pk" }, + + /* Aliases and defines that combine multiple rights. */ + { CAP_PREAD, "prd" }, + { CAP_PWRITE, "pwr" }, + + { CAP_MMAP_R, "mmr" }, + { CAP_MMAP_W, "mmw" }, + { CAP_MMAP_X, "mmx" }, + { CAP_MMAP_RW, "mrw" }, + { CAP_MMAP_RX, "mrx" }, + { CAP_MMAP_WX, "mwx" }, + { CAP_MMAP_RWX, "mma" }, + + { CAP_RECV, "re" }, + { CAP_SEND, "sd" }, + + { CAP_SOCK_CLIENT, "scl" }, + { CAP_SOCK_SERVER, "ssr" }, }; static const u_int cap_desc_count = sizeof(cap_desc) / sizeof(cap_desc[0]); @@ -225,7 +244,7 @@ count = 0; width = 0; for (i = 0; i < cap_desc_count; i++) { - if (rights & cap_desc[i].cd_right) { + if ((cap_desc[i].cd_right & ~rights) == 0) { width += strlen(cap_desc[i].cd_desc); if (count) width++; @@ -249,7 +268,7 @@ printf("-"); } for (i = 0; i < cap_desc_count; i++) { - if (rights & cap_desc[i].cd_right) { + if ((cap_desc[i].cd_right & ~rights) == 0) { printf("%s%s", count ? "," : "", cap_desc[i].cd_desc); width += strlen(cap_desc[i].cd_desc); if (count) @@ -261,7 +280,7 @@ void procstat_files(struct procstat *procstat, struct kinfo_proc *kipp) -{ +{ struct sockstat sock; struct filestat_list *head; struct filestat *fst; @@ -423,8 +442,6 @@ printf("%s", fst->fs_fflags & PS_FST_FFLAG_NONBLOCK ? "n" : "-"); printf("%s", fst->fs_fflags & PS_FST_FFLAG_DIRECT ? "d" : "-"); printf("%s", fst->fs_fflags & PS_FST_FFLAG_HASLOCK ? "l" : "-"); - printf("%s ", fst->fs_fflags & PS_FST_FFLAG_CAPABILITY ? - "c" : "-"); if (!Cflag) { if (fst->fs_ref_count > -1) printf("%3d ", fst->fs_ref_count);