--- //depot/vendor/freebsd/src/lib/libc/gen/getvfsbyname.3 2007/01/10 16:42:27 +++ //depot/user/pjd/zfs/lib/libc/gen/getvfsbyname.3 2007/04/01 14:10:09 @@ -81,6 +81,11 @@ aliases some other mounted FS .It Dv VFCF_UNICODE stores file names as Unicode +.It Dv VFCF_JAIL +can be mounted from within a jail if +.Va security.jail.mount_allowed +sysctl is set to +.Dv 1 . .El .Sh RETURN VALUES .Rv -std getvfsbyname @@ -99,8 +104,10 @@ specifies a file system that is unknown or not configured in the kernel. .El .Sh SEE ALSO +.Xr jail 2 , .Xr mount 2 , .Xr sysctl 3 , +.Xr jail 8 , .Xr mount 8 , .Xr sysctl 8 .Sh HISTORY --- //depot/vendor/freebsd/src/share/man/man9/VFS_SET.9 2005/01/18 21:37:12 +++ //depot/user/pjd/zfs/share/man/man9/VFS_SET.9 2007/04/01 14:10:09 @@ -70,6 +70,11 @@ Loopback file system layer. .It Dv VFCF_UNICODE File names are stored as Unicode. +.It Dv VFCF_JAIL +can be mounted from within a jail if +.Va security.jail.mount_allowed +sysctl is set to +.Dv 1 . .El .Sh PSEUDOCODE .Bd -literal @@ -96,6 +101,8 @@ VFS_SET(myfs_vfsops, skelfs, 0); .Ed .Sh SEE ALSO +.Xr jail 2 , +.Xr jail 8 , .Xr DECLARE_MODULE 9 , .Xr vfsconf 9 , .Xr vfs_modevent 9 --- //depot/vendor/freebsd/src/sys/kern/kern_jail.c 2007/03/09 05:26:33 +++ //depot/user/pjd/zfs/sys/kern/kern_jail.c 2007/03/18 19:54:08 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -72,22 +73,42 @@ &jail_chflags_allowed, 0, "Processes in jail can alter system file flags"); -/* allprison, lastprid, and prisoncount are protected by allprison_mtx. */ +int jail_mount_allowed = 0; +SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW, + &jail_mount_allowed, 0, + "Processes in jail can mount/unmount file systems"); + +/* allprison, lastprid, and prisoncount are protected by allprison_lock. */ struct prisonlist allprison; -struct mtx allprison_mtx; +struct sx allprison_lock; int lastprid = 0; int prisoncount = 0; +/* + * List of jail services. Protected by allprison_lock. + */ +TAILQ_HEAD(prison_services_head, prison_service); +static struct prison_services_head prison_services = + TAILQ_HEAD_INITIALIZER(prison_services); +static int prison_service_slots = 0; + +struct prison_service { + prison_create_t ps_create; + prison_destroy_t ps_destroy; + int ps_slotno; + TAILQ_ENTRY(prison_service) ps_next; + char ps_name[0]; +}; + static void init_prison(void *); static void prison_complete(void *context, int pending); -static struct prison *prison_find(int); static int sysctl_jail_list(SYSCTL_HANDLER_ARGS); static void init_prison(void *data __unused) { - mtx_init(&allprison_mtx, "allprison", NULL, MTX_DEF); + sx_init(&allprison_lock, "allprison"); LIST_INIT(&allprison); } @@ -103,6 +124,7 @@ { struct nameidata nd; struct prison *pr, *tpr; + struct prison_service *psrv; struct jail j; struct jail_attach_args jaa; int vfslocked, error, tryprid; @@ -135,9 +157,15 @@ pr->pr_ip = j.ip_number; pr->pr_linux = NULL; pr->pr_securelevel = securelevel; + if (prison_service_slots > 0) { + pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots, + M_PRISON, M_ZERO | M_WAITOK); + } else { + pr->pr_slots = NULL; + } /* Determine next pr_id and add prison to allprison list. */ - mtx_lock(&allprison_mtx); + sx_xlock(&allprison_lock); tryprid = lastprid + 1; if (tryprid == JAIL_MAX) tryprid = 1; @@ -146,7 +174,7 @@ if (tpr->pr_id == tryprid) { tryprid++; if (tryprid == JAIL_MAX) { - mtx_unlock(&allprison_mtx); + sx_xunlock(&allprison_lock); error = EAGAIN; goto e_dropvnref; } @@ -156,7 +184,11 @@ pr->pr_id = jaa.jid = lastprid = tryprid; LIST_INSERT_HEAD(&allprison, pr, pr_list); prisoncount++; - mtx_unlock(&allprison_mtx); + sx_downgrade(&allprison_lock); + TAILQ_FOREACH(psrv, &prison_services, ps_next) { + psrv->ps_create(psrv, pr); + } + sx_sunlock(&allprison_lock); error = jail_attach(td, &jaa); if (error) @@ -167,10 +199,14 @@ td->td_retval[0] = jaa.jid; return (0); e_dropprref: - mtx_lock(&allprison_mtx); + sx_xlock(&allprison_lock); LIST_REMOVE(pr, pr_list); prisoncount--; - mtx_unlock(&allprison_mtx); + sx_downgrade(&allprison_lock); + TAILQ_FOREACH(psrv, &prison_services, ps_next) { + psrv->ps_destroy(psrv, pr); + } + sx_sunlock(&allprison_lock); e_dropvnref: vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); vrele(pr->pr_root); @@ -207,15 +243,15 @@ return (error); p = td->td_proc; - mtx_lock(&allprison_mtx); + sx_slock(&allprison_lock); pr = prison_find(uap->jid); if (pr == NULL) { - mtx_unlock(&allprison_mtx); + sx_sunlock(&allprison_lock); return (EINVAL); } pr->pr_ref++; mtx_unlock(&pr->pr_mtx); - mtx_unlock(&allprison_mtx); + sx_sunlock(&allprison_lock); vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY, td); @@ -251,12 +287,12 @@ /* * Returns a locked prison instance, or NULL on failure. */ -static struct prison * +struct prison * prison_find(int prid) { struct prison *pr; - mtx_assert(&allprison_mtx, MA_OWNED); + sx_assert(&allprison_lock, SX_LOCKED); LIST_FOREACH(pr, &allprison, pr_list) { if (pr->pr_id == prid) { mtx_lock(&pr->pr_mtx); @@ -269,22 +305,27 @@ void prison_free(struct prison *pr) { + struct prison_service *psrv; - mtx_lock(&allprison_mtx); + sx_xlock(&allprison_lock); mtx_lock(&pr->pr_mtx); pr->pr_ref--; if (pr->pr_ref == 0) { LIST_REMOVE(pr, pr_list); mtx_unlock(&pr->pr_mtx); prisoncount--; - mtx_unlock(&allprison_mtx); + sx_downgrade(&allprison_lock); + TAILQ_FOREACH(psrv, &prison_services, ps_next) { + psrv->ps_destroy(psrv, pr); + } + sx_sunlock(&allprison_lock); TASK_INIT(&pr->pr_task, 0, prison_complete, pr); taskqueue_enqueue(taskqueue_thread, &pr->pr_task); return; } mtx_unlock(&pr->pr_mtx); - mtx_unlock(&allprison_mtx); + sx_xunlock(&allprison_lock); } static void @@ -652,6 +693,18 @@ return (EPERM); /* + * Depending on the global setting, allow privilege of + * mounting/unmounting file systems. + */ + case PRIV_VFS_MOUNT: + case PRIV_VFS_UNMOUNT: + case PRIV_VFS_MOUNT_NONUSER: + if (jail_mount_allowed) + return (0); + else + return (EPERM); + + /* * Allow jailed root to bind reserved ports. */ case PRIV_NETINET_RESERVEDPORT: @@ -684,6 +737,201 @@ } } +/* + * Register jail service. Provides 'create' and 'destroy' methods. + * 'create' method will be called for every existing jail and all + * jails in the future as they beeing created. + * 'destroy' method will be called for every jail going away and + * for all existing jails at the time of service deregistration. + */ +struct prison_service * +prison_service_register(const char *name, prison_create_t create, + prison_destroy_t destroy) +{ + struct prison_service *psrv, *psrv2; + struct prison *pr; + int reallocate = 1, slotno = 0; + void **slots, **oldslots; + + psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON, + M_WAITOK | M_ZERO); + psrv->ps_create = create; + psrv->ps_destroy = destroy; + strcpy(psrv->ps_name, name); + /* + * Grab the allprison_lock here, so we won't miss any jail + * creation/destruction. + */ + sx_xlock(&allprison_lock); + /* + * Find free slot, when there is no available existing slow, allocate + * one at the end. + */ + TAILQ_FOREACH(psrv2, &prison_services, ps_next) { + if (psrv2->ps_slotno != slotno) { + KASSERT(slotno < psrv2->ps_slotno, + ("Invalid slotno (slotno=%d >= ps_slotno=%d", + slotno, psrv2->ps_slotno)); + /* We found free slot. */ + reallocate = 0; + break; + } + slotno++; + } + psrv->ps_slotno = slotno; + /* + * Keep the list sorted by slot number. + */ + if (psrv2 != NULL) { + KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0")); + TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next); + } else { + KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0")); + TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next); + } + prison_service_slots++; + sx_downgrade(&allprison_lock); + /* + * Allocate memory for new slot if we didn't found empty one. + * Do not use realloc(9), because pr_slots is protected with a mutex, + * so we can't sleep. + */ + LIST_FOREACH(pr, &allprison, pr_list) { + if (reallocate) { + /* First allocate memory with M_WAITOK. */ + slots = malloc(sizeof(*slots) * prison_service_slots, + M_PRISON, M_WAITOK); + /* Now grab the mutex and replace pr_slots. */ + mtx_lock(&pr->pr_mtx); + oldslots = pr->pr_slots; + if (psrv->ps_slotno > 0) { + bcopy(oldslots, slots, + sizeof(*slots) * (prison_service_slots - 1)); + } + slots[psrv->ps_slotno] = NULL; + pr->pr_slots = slots; + mtx_unlock(&pr->pr_mtx); + if (oldslots != NULL) + free(oldslots, M_PRISON); + } + /* + * Call 'create' method for each existing jail. + */ + psrv->ps_create(psrv, pr); + } + sx_sunlock(&allprison_lock); + + return (psrv); +} + +void +prison_service_deregister(struct prison_service *psrv) +{ + struct prison *pr; + void **slots, **oldslots; + int last = 0; + + sx_xlock(&allprison_lock); + if (TAILQ_LAST(&prison_services, prison_services_head) == psrv) + last = 1; + TAILQ_REMOVE(&prison_services, psrv, ps_next); + prison_service_slots--; + sx_downgrade(&allprison_lock); + LIST_FOREACH(pr, &allprison, pr_list) { + /* + * Call 'destroy' method for every currently existing jail. + */ + psrv->ps_destroy(psrv, pr); + /* + * If this is the last slot, free the memory allocated for it. + */ + if (last) { + if (prison_service_slots == 0) + slots = NULL; + else { + slots = malloc(sizeof(*slots) * prison_service_slots, + M_PRISON, M_WAITOK); + } + mtx_lock(&pr->pr_mtx); + oldslots = pr->pr_slots; + /* + * We require setting slot to NULL after freeing it, + * this way we can check for memory leaks here. + */ + KASSERT(oldslots[psrv->ps_slotno] == NULL, + ("Slot %d (service %s, jailid=%d) still contains data?", + psrv->ps_slotno, psrv->ps_name, pr->pr_id)); + if (psrv->ps_slotno > 0) { + bcopy(oldslots, slots, + sizeof(*slots) * prison_service_slots); + } + pr->pr_slots = slots; + mtx_unlock(&pr->pr_mtx); + KASSERT(oldslots != NULL, ("oldslots == NULL")); + free(oldslots, M_PRISON); + } + } + sx_sunlock(&allprison_lock); + free(psrv, M_PRISON); +} + +/* + * Functions sets data for the given jail in slot assigned for the given + * jail service. + * We don't require prison structure to be locked, it is acceptable that the + * caller increment reference count only, it that case, we acquire prison + * mutex internally. + */ +void +prison_service_data_set(struct prison_service *psrv, struct prison *pr, + void *data) +{ + int unlock = 0; + + if (!mtx_owned(&pr->pr_mtx)) { + unlock = 1; + mtx_lock(&pr->pr_mtx); + } + pr->pr_slots[psrv->ps_slotno] = data; + if (unlock) + mtx_unlock(&pr->pr_mtx); +} + +/* + * Functions clears slots assigned for the given jail service in the given + * prison structure and returns current slot data. + * We don't require prison structure to be locked, it is acceptable that the + * caller increment reference count only, it that case, we acquire prison + * mutex internally. + */ +void * +prison_service_data_del(struct prison_service *psrv, struct prison *pr) +{ + int unlock = 0; + void *data; + + if (!mtx_owned(&pr->pr_mtx)) { + unlock = 1; + mtx_lock(&pr->pr_mtx); + } + data = pr->pr_slots[psrv->ps_slotno]; + pr->pr_slots[psrv->ps_slotno] = NULL; + if (unlock) + mtx_unlock(&pr->pr_mtx); + return (data); +} + +/* + * Function returns current data from the slot assigned to the given jail + * service for the given jail. + */ +void * +prison_service_data_get(struct prison_service *psrv, struct prison *pr) +{ + + return (pr->pr_slots[psrv->ps_slotno]); +} + static int sysctl_jail_list(SYSCTL_HANDLER_ARGS) { @@ -693,21 +941,14 @@ if (jailed(req->td->td_ucred)) return (0); -retry: - mtx_lock(&allprison_mtx); - count = prisoncount; - mtx_unlock(&allprison_mtx); - if (count == 0) + sx_slock(&allprison_lock); + if ((count = prisoncount) == 0) { + sx_sunlock(&allprison_lock); return (0); + } sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO); - mtx_lock(&allprison_mtx); - if (count != prisoncount) { - mtx_unlock(&allprison_mtx); - free(sxp, M_TEMP); - goto retry; - } LIST_FOREACH(pr, &allprison, pr_list) { mtx_lock(&pr->pr_mtx); @@ -719,7 +960,7 @@ mtx_unlock(&pr->pr_mtx); xp++; } - mtx_unlock(&allprison_mtx); + sx_sunlock(&allprison_lock); error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count); free(sxp, M_TEMP); @@ -741,3 +982,19 @@ } SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD, NULL, 0, sysctl_jail_jailed, "I", "Process in jail?"); + +static int +sysctl_jail_jailid(SYSCTL_HANDLER_ARGS) +{ + int error, jailid; + + if (jailed(req->td->td_ucred)) + jailid = req->td->td_ucred->cr_prison->pr_id; + else + jailid = 0; + error = SYSCTL_OUT(req, &jailid, sizeof(jailid)); + + return (error); +} +SYSCTL_PROC(_security_jail, OID_AUTO, jailid, CTLTYPE_INT | CTLFLAG_RD, + NULL, 0, sysctl_jail_jailid, "I", "ID of the current jail."); --- //depot/vendor/freebsd/src/sys/kern/subr_witness.c 2007/03/21 21:22:46 +++ //depot/user/pjd/zfs/sys/kern/subr_witness.c 2007/03/22 08:02:14 @@ -276,6 +276,7 @@ */ { "proctree", &lock_class_sx }, { "allproc", &lock_class_sx }, + { "allprison", &lock_class_sx }, { NULL, NULL }, /* * Various mutexes @@ -289,7 +290,6 @@ { "session", &lock_class_mtx_sleep }, { "uidinfo hash", &lock_class_mtx_sleep }, { "uidinfo struct", &lock_class_mtx_sleep }, - { "allprison", &lock_class_mtx_sleep }, { NULL, NULL }, /* * Sockets --- //depot/vendor/freebsd/src/sys/kern/vfs_mount.c 2007/04/01 13:12:37 +++ //depot/user/pjd/zfs/sys/kern/vfs_mount.c 2007/04/01 14:10:09 @@ -847,6 +847,8 @@ vfsp = vfs_byname_kld(fstype, td, &error); if (vfsp == NULL) return (ENODEV); + if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL)) + return (EPERM); } /* * Get vnode to be covered @@ -863,6 +865,11 @@ return (EINVAL); } mp = vp->v_mount; + vfsp = mp->mnt_vfc; + if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL)) { + vput(vp); + return (EPERM); + } MNT_ILOCK(mp); flag = mp->mnt_flag; /* --- //depot/vendor/freebsd/src/sys/sys/jail.h 2006/11/06 13:43:21 +++ //depot/user/pjd/zfs/sys/sys/jail.h 2007/03/18 07:37:41 @@ -54,7 +54,7 @@ * delete the struture when the last inmate is dead. * * Lock key: - * (a) allprison_mtx + * (a) allprison_lock * (p) locked by pr_mtx * (c) set only during creation before the structure is shared, no mutex * required to read @@ -73,6 +73,7 @@ int pr_securelevel; /* (p) securelevel */ struct task pr_task; /* (d) destroy task */ struct mtx pr_mtx; + void **pr_slots; /* (p) additional data */ }; #endif /* _KERNEL || _WANT_PRISON */ @@ -91,6 +92,7 @@ LIST_HEAD(prisonlist, prison); extern struct prisonlist allprison; +extern struct sx allprison_lock; /* * Kernel support functions for jail(). @@ -105,6 +107,7 @@ int prison_canseemount(struct ucred *cred, struct mount *mp); void prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp); +struct prison *prison_find(int prid); void prison_free(struct prison *pr); u_int32_t prison_getip(struct ucred *cred); void prison_hold(struct prison *pr); @@ -113,5 +116,21 @@ int prison_priv_check(struct ucred *cred, int priv); void prison_remote_ip(struct ucred *cred, int flags, u_int32_t *ip); +/* + * Kernel jail services. + */ +struct prison_service; +typedef int (*prison_create_t)(struct prison_service *psrv, struct prison *pr); +typedef int (*prison_destroy_t)(struct prison_service *psrv, struct prison *pr); + +struct prison_service *prison_service_register(const char *name, + prison_create_t create, prison_destroy_t destroy); +void prison_service_deregister(struct prison_service *psrv); + +void prison_service_data_set(struct prison_service *psrv, struct prison *pr, + void *data); +void *prison_service_data_get(struct prison_service *psrv, struct prison *pr); +void *prison_service_data_del(struct prison_service *psrv, struct prison *pr); + #endif /* _KERNEL */ #endif /* !_SYS_JAIL_H_ */ --- //depot/vendor/freebsd/src/sys/sys/mount.h 2007/04/01 13:41:56 +++ //depot/user/pjd/zfs/sys/sys/mount.h 2007/04/01 14:10:09 @@ -427,6 +427,7 @@ #define VFCF_SYNTHETIC 0x00080000 /* data does not represent real files */ #define VFCF_LOOPBACK 0x00100000 /* aliases some other mounted FS */ #define VFCF_UNICODE 0x00200000 /* stores file names as Unicode*/ +#define VFCF_JAIL 0x00400000 /* can be mounted from within a jail */ typedef uint32_t fsctlop_t; --- //depot/vendor/freebsd/src/sys/sys/priv.h 2007/03/01 20:52:22 +++ //depot/user/pjd/zfs/sys/sys/priv.h 2007/03/18 07:33:29 @@ -227,6 +227,15 @@ #define PRIV_UFS_SETUSE 273 /* setuse(). */ /* + * ZFS-specific privileges. + */ +#define PRIV_ZFS_POOL_CONFIG 280 /* Can configure ZFS pools. */ +#define PRIV_ZFS_INJECT 281 /* Can inject faults in the ZFS fault + injection framework. */ +#define PRIV_ZFS_JAIL 282 /* Can configure ZFS file systems from + within a jail. */ + +/* * VFS privileges. */ #define PRIV_VFS_READ 310 /* Override vnode DAC read perm. */ --- //depot/vendor/freebsd/src/usr.bin/lsvfs/lsvfs.c 2002/08/11 02:42:08 +++ //depot/user/pjd/zfs/usr.bin/lsvfs/lsvfs.c 2007/04/01 14:10:09 @@ -105,5 +105,10 @@ strcat(buf, "unicode"); } + if(flags & VFCF_JAIL) { + if(comma++) strcat(buf, ", "); + strcat(buf, "jail"); + } + return buf; } --- //depot/vendor/freebsd/src/usr.sbin/jail/jail.8 2006/11/22 00:41:51 +++ //depot/user/pjd/zfs/usr.sbin/jail/jail.8 2007/04/01 14:13:04 @@ -546,6 +546,15 @@ privileged, and may manipulate system file flags subject to the usual constraints on .Va kern.securelevel . +.It Va security.jail.mount_allowed +This MIB entry determines if a privileged user inside a jail will be +able to mount and unmount file system types marked as jail-friendly. +The +.Xr lsvfs 1 +command can be used to find file system types available for mount from within +a jail. +This functionality is disabled by default, but can be enabled by setting this +MIB entry to 1. .El .Pp The read-only sysctl variable @@ -553,6 +562,11 @@ can be used to determine if a process is running inside a jail (value is one) or not (value is zero). .Pp +The read-only sysctl variable +.Va security.jail.jailid +can be used to get jail identifier of a jail the process is running in (value of +zero means that process is not jailed). +.Pp The .Va security.jail.list MIB entry is read-only and it returns an array of @@ -572,6 +586,7 @@ .Va kern.hostname . .Sh SEE ALSO .Xr killall 1 , +.Xr lsvfs 1 , .Xr newaliases 1 , .Xr pgrep 1 , .Xr pkill 1 ,