commit 7d097e403160e4d4fa7b081f5a7f5e34a6adb21a Author: Andrey Zonov Date: Sun Sep 16 21:44:42 2012 +0400 - Unprivileged mlock. diff --git a/etc/login.conf b/etc/login.conf index 2c3e7ae..9ddd2be 100644 --- a/etc/login.conf +++ b/etc/login.conf @@ -32,7 +32,7 @@ default:\ :cputime=unlimited:\ :datasize=unlimited:\ :stacksize=unlimited:\ - :memorylocked=unlimited:\ + :memorylocked=8M:\ :memoryuse=unlimited:\ :filesize=unlimited:\ :coredumpsize=unlimited:\ @@ -72,6 +72,7 @@ dialer:\ # in preference to 'default'. root:\ :ignorenologin:\ + :memorylocked=unlimited:\ :tc=default: # diff --git a/lib/libc/sys/mlock.2 b/lib/libc/sys/mlock.2 index 645571a..2a238ad 100644 --- a/lib/libc/sys/mlock.2 +++ b/lib/libc/sys/mlock.2 @@ -28,7 +28,7 @@ .\" @(#)mlock.2 8.2 (Berkeley) 12/11/93 .\" $FreeBSD: head/lib/libc/sys/mlock.2 210551 2010-07-27 20:34:37Z trasz $ .\" -.Dd July 27, 2010 +.Dd September 16, 2012 .Dt MLOCK 2 .Os .Sh NAME @@ -99,7 +99,7 @@ the per-process .Li RLIMIT_MEMLOCK resource limit. .Pp -These calls are only available to the super-user. +These calls are not available in the jail. .Sh RETURN VALUES .Rv -std .Pp @@ -112,7 +112,7 @@ system call will fail if: .Bl -tag -width Er .It Bq Er EPERM -The caller is not the super-user. +The caller is in the jail. .It Bq Er EINVAL The address given is not page aligned or the length is negative. .It Bq Er EAGAIN @@ -129,7 +129,7 @@ system call will fail if: .Bl -tag -width Er .It Bq Er EPERM -The caller is not the super-user. +The caller is in the jail. .It Bq Er EINVAL The address given is not page aligned or the length is negative. .It Bq Er ENOMEM diff --git a/lib/libc/sys/mlockall.2 b/lib/libc/sys/mlockall.2 index ed1b81b..3c726c3 100644 --- a/lib/libc/sys/mlockall.2 +++ b/lib/libc/sys/mlockall.2 @@ -30,7 +30,7 @@ .\" .\" $FreeBSD: head/lib/libc/sys/mlockall.2 210551 2010-07-27 20:34:37Z trasz $ .\" -.Dd July 27, 2010 +.Dd September 16, 2012 .Dt MLOCKALL 2 .Os .Sh NAME @@ -72,7 +72,7 @@ limit and the per-process .Dv RLIMIT_MEMLOCK resource limit. .Pp -These calls are only available to the super-user. +These calls are not available in the jail. .Pp The .Fn munlockall diff --git a/sys/kern/kern_priv.c b/sys/kern/kern_priv.c index 95a6322..1bf0ae4 100644 --- a/sys/kern/kern_priv.c +++ b/sys/kern/kern_priv.c @@ -94,6 +94,17 @@ priv_check_cred(struct ucred *cred, int priv, int flags) goto out; /* + * Allow unprivileged users to call mlock(2)/munlock(2) and + * mlockall(2)/munlockall(2). + */ + switch (priv) { + case PRIV_VM_MLOCK: + case PRIV_VM_MUNLOCK: + error = 0; + goto out; + } + + /* * Having determined if privilege is restricted by various policies, * now determine if privilege is granted. At this point, any policy * may grant privilege. For now, we allow short-circuit boolean diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 5033980..87b092b 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -3247,7 +3247,7 @@ vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, vm_offset_t bot, top; vm_size_t growsize, init_ssize; int orient, rv; - rlim_t vmemlim; + rlim_t lmemlim, vmemlim; /* * The stack orientation is piggybacked with the cow argument. @@ -3268,6 +3268,7 @@ vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, init_ssize = (max_ssize < growsize) ? max_ssize : growsize; PROC_LOCK(curthread->td_proc); + lmemlim = lim_cur(curthread->td_proc, RLIMIT_MEMLOCK); vmemlim = lim_cur(curthread->td_proc, RLIMIT_VMEM); PROC_UNLOCK(curthread->td_proc); @@ -3279,6 +3280,14 @@ vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, return (KERN_NO_SPACE); } + if (map->flags & MAP_WIREFUTURE) { + if (ptoa(vmspace_wired_count(curthread->td_proc->p_vmspace)) + + init_ssize > lmemlim) { + vm_map_unlock(map); + return (KERN_NO_SPACE); + } + } + /* If we would blow our VMEM resource limit, no go */ if (map->size + init_ssize > vmemlim) { vm_map_unlock(map); @@ -3360,7 +3369,7 @@ vm_map_growstack(struct proc *p, vm_offset_t addr) vm_offset_t end; vm_size_t growsize; size_t grow_amount, max_grow; - rlim_t stacklim, vmemlim; + rlim_t lmemlim, stacklim, vmemlim; int is_procstack, rv; struct ucred *cred; #ifdef notyet @@ -3372,6 +3381,7 @@ vm_map_growstack(struct proc *p, vm_offset_t addr) Retry: PROC_LOCK(p); + lmemlim = lim_cur(p, RLIMIT_MEMLOCK); stacklim = lim_cur(p, RLIMIT_STACK); vmemlim = lim_cur(p, RLIMIT_VMEM); PROC_UNLOCK(p); @@ -3494,7 +3504,25 @@ Retry: if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit)) grow_amount = limit - ctob(vm->vm_ssize); #endif - + if (map->flags & MAP_WIREFUTURE) { + if (ptoa(vmspace_wired_count(p->p_vmspace)) + grow_amount > + lmemlim) { + vm_map_unlock_read(map); + rv = KERN_NO_SPACE; + goto out; + } +#ifdef RACCT + PROC_LOCK(p); + if (racct_set(p, RACCT_MEMLOCK, + ptoa(vmspace_wired_count(p->p_vmspace)) + grow_amount)) { + PROC_UNLOCK(p); + vm_map_unlock_read(map); + rv = KERN_NO_SPACE; + goto out; + } + PROC_UNLOCK(p); +#endif + } /* If we would blow our VMEM resource limit, no go */ if (map->size + grow_amount > vmemlim) { vm_map_unlock_read(map); @@ -3615,6 +3643,9 @@ out: PROC_LOCK(p); error = racct_set(p, RACCT_VMEM, map->size); KASSERT(error == 0, ("decreasing RACCT_VMEM failed")); + error = racct_set(p, RACCT_MEMLOCK, + ptoa(vmspace_wired_count(p->p_vmspace))); + KASSERT(error == 0, ("decreasing RACCT_MEMLOCK failed")); error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize)); KASSERT(error == 0, ("decreasing RACCT_STACK failed")); PROC_UNLOCK(p); diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 6691b04..284936a 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -1096,27 +1096,25 @@ sys_mlockall(td, uap) int error; map = &td->td_proc->p_vmspace->vm_map; - error = 0; + error = priv_check(td, PRIV_VM_MLOCK); + if (error) + return (error); if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) return (EINVAL); -#if 0 /* * If wiring all pages in the process would cause it to exceed * a hard resource limit, return ENOMEM. */ - PROC_LOCK(td->td_proc); - if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { + if (uap->how & MCL_CURRENT) { + PROC_LOCK(td->td_proc); + if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { + PROC_UNLOCK(td->td_proc); + return (ENOMEM); + } PROC_UNLOCK(td->td_proc); - return (ENOMEM); } - PROC_UNLOCK(td->td_proc); -#else - error = priv_check(td, PRIV_VM_MLOCK); - if (error) - return (error); -#endif #ifdef RACCT PROC_LOCK(td->td_proc); error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); @@ -1486,6 +1484,24 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, PROC_UNLOCK(td->td_proc); return (ENOMEM); } + if (map->flags & MAP_WIREFUTURE) { + if (ptoa(vmspace_wired_count(td->td_proc->p_vmspace)) + + size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { + racct_set_force(td->td_proc, RACCT_VMEM, + map->size); + PROC_UNLOCK(td->td_proc); + return (ENOMEM); + } + error = racct_set(td->td_proc, RACCT_MEMLOCK, + ptoa(vmspace_wired_count(td->td_proc->p_vmspace)) + + size); + if (error != 0) { + racct_set_force(td->td_proc, RACCT_VMEM, + map->size); + PROC_UNLOCK(td->td_proc); + return (error); + } + } PROC_UNLOCK(td->td_proc); } diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c index 0894dc7..6f6c44d 100644 --- a/sys/vm/vm_unix.c +++ b/sys/vm/vm_unix.c @@ -77,13 +77,14 @@ sys_obreak(td, uap) { struct vmspace *vm = td->td_proc->p_vmspace; vm_offset_t new, old, base; - rlim_t datalim, vmemlim; + rlim_t datalim, lmemlim, vmemlim; int prot, rv; int error = 0; boolean_t do_map_wirefuture; PROC_LOCK(td->td_proc); datalim = lim_cur(td->td_proc, RLIMIT_DATA); + lmemlim = lim_cur(td->td_proc, RLIMIT_MEMLOCK); vmemlim = lim_cur(td->td_proc, RLIMIT_VMEM); PROC_UNLOCK(td->td_proc); @@ -116,6 +117,13 @@ sys_obreak(td, uap) goto done; } if (new > old) { + if (vm->vm_map.flags & MAP_WIREFUTURE) { + if (ptoa(vmspace_wired_count(td->td_proc->p_vmspace)) + + (new - old) > lmemlim) { + error = ENOMEM; + goto done; + } + } if (vm->vm_map.size + (new - old) > vmemlim) { error = ENOMEM; goto done; @@ -136,6 +144,20 @@ sys_obreak(td, uap) error = ENOMEM; goto done; } + if (vm->vm_map.flags & MAP_WIREFUTURE) { + error = racct_set(td->td_proc, RACCT_MEMLOCK, + ptoa(vmspace_wired_count(td->td_proc->p_vmspace)) + + (new - old)); + if (error != 0) { + racct_set_force(td->td_proc, RACCT_DATA, + old - base); + racct_set_force(td->td_proc, RACCT_VMEM, + vm->vm_map.size); + PROC_UNLOCK(td->td_proc); + error = ENOMEM; + goto done; + } + } PROC_UNLOCK(td->td_proc); #endif prot = VM_PROT_RW; @@ -151,7 +173,13 @@ sys_obreak(td, uap) #ifdef RACCT PROC_LOCK(td->td_proc); racct_set_force(td->td_proc, RACCT_DATA, old - base); - racct_set_force(td->td_proc, RACCT_VMEM, vm->vm_map.size); + racct_set_force(td->td_proc, RACCT_VMEM, + vm->vm_map.size); + if (vm->vm_map.flags & MAP_WIREFUTURE) { + racct_set_force(td->td_proc, RACCT_MEMLOCK, + ptoa(vmspace_wired_count( + td->td_proc->p_vmspace))); + } PROC_UNLOCK(td->td_proc); #endif error = ENOMEM; @@ -183,6 +211,10 @@ sys_obreak(td, uap) PROC_LOCK(td->td_proc); racct_set_force(td->td_proc, RACCT_DATA, new - base); racct_set_force(td->td_proc, RACCT_VMEM, vm->vm_map.size); + if (vm->vm_map.flags & MAP_WIREFUTURE) { + racct_set_force(td->td_proc, RACCT_MEMLOCK, + ptoa(vmspace_wired_count(td->td_proc->p_vmspace))); + } PROC_UNLOCK(td->td_proc); #endif }