diff --git a/lib/libc/stdlib/malloc.3 b/lib/libc/stdlib/malloc.3 index 5f09733..edb2d92 100644 --- a/lib/libc/stdlib/malloc.3 +++ b/lib/libc/stdlib/malloc.3 @@ -32,7 +32,7 @@ .\" @(#)malloc.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd January 31, 2010 +.Dd May 6, 2011 .Dt MALLOC 3 .Os .Sh NAME @@ -253,6 +253,16 @@ This is intended for debugging and will impact performance negatively. .It K Double/halve the virtual memory chunk size. The default chunk size is 4 MiB. +.It L +If using +.Xr mmap 2 +to acquire the backing memory is enabled, pass +.Va MAP_DATALIMIT +flag to the +.Xr mmap 2 +system call, making the allocation accountable for the +.Va RLIMIT_DATA +resource type. .It M Use .Xr mmap 2 diff --git a/lib/libc/stdlib/malloc.c b/lib/libc/stdlib/malloc.c index 5290512..9a4be68 100644 --- a/lib/libc/stdlib/malloc.c +++ b/lib/libc/stdlib/malloc.c @@ -1170,6 +1170,7 @@ static bool opt_sysv = false; static bool opt_xmalloc = false; static bool opt_zero = false; static int opt_narenas_lshift = 0; +static bool opt_datalimit = false; typedef struct { void *p; @@ -1789,8 +1790,8 @@ pages_map(void *addr, size_t size) * We don't use MAP_FIXED here, because it can cause the *replacement* * of existing mappings, and we only want to create new mappings. */ - ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, - -1, 0); + ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | + (opt_datalimit ? MAP_DATALIMIT : 0), -1, 0); assert(ret != NULL); if (ret == MAP_FAILED) @@ -5593,6 +5594,12 @@ MALLOC_OUT: (sizeof(size_t) << 3)) opt_lg_chunk++; break; + case 'L': + opt_datalimit = true; + break; + case 'l': + opt_datalimit = false; + break; case 'm': #ifdef MALLOC_DSS opt_mmap = false; diff --git a/lib/libc/sys/getrlimit.2 b/lib/libc/sys/getrlimit.2 index 35198bc..53026ec 100644 --- a/lib/libc/sys/getrlimit.2 +++ b/lib/libc/sys/getrlimit.2 @@ -28,7 +28,7 @@ .\" @(#)getrlimit.2 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd August 20, 2008 +.Dd May 6, 2011 .Dt GETRLIMIT 2 .Os .Sh NAME @@ -83,6 +83,8 @@ system call. The maximum number of open files for this process. .It Dv RLIMIT_NPROC The maximum number of simultaneous processes for this user id. +.It Dv RLIMIT_NPTS +The maximum number of pseudo-terminals created by this user id. .It Dv RLIMIT_RSS The maximum size (in bytes) to which a process's resident set size may grow. @@ -106,8 +108,9 @@ sysctl is set. Please see .Xr tuning 7 for a complete description of this sysctl. -.It Dv RLIMIT_NPTS -The maximum number of pseudo-terminals created by this user id. +.It Dv RLIMIT_VMEM +The maximum size (in bytes) of all mappings in the process virtual +address space. .El .Pp A resource limit is specified as a soft limit and a hard limit. diff --git a/lib/libc/sys/mmap.2 b/lib/libc/sys/mmap.2 index 5e0c226..41cd9e6 100644 --- a/lib/libc/sys/mmap.2 +++ b/lib/libc/sys/mmap.2 @@ -28,7 +28,7 @@ .\" @(#)mmap.2 8.4 (Berkeley) 5/11/95 .\" $FreeBSD$ .\" -.Dd August 28, 2010 +.Dd May 6, 2011 .Dt MMAP 2 .Os .Sh NAME @@ -112,6 +112,11 @@ argument must be 0. This flag is identical to .Dv MAP_ANON and is provided for compatibility. +.It Dv MAP_DATALIMIT +Account the mapping for +.Va RLIMIT_DATA +resource limit, making it similar to the +.Xr sbrk 2 . .It Dv MAP_FIXED Do not permit the system to select a different address than the one specified. diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index cf9d6a8..0870a64 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -740,7 +740,7 @@ statclock(int usermode) vm = p->p_vmspace; ru = &td->td_ru; ru->ru_ixrss += pgtok(vm->vm_tsize); - ru->ru_idrss += pgtok(vm->vm_dsize); + ru->ru_idrss += pgtok(vm->vm_dsize) + pgtok(vm->vm_map.mapped_dsize); ru->ru_isrss += pgtok(vm->vm_ssize); rss = pgtok(vmspace_resident_count(vm)); if (ru->ru_maxrss < rss) diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 6d1430c..9f40413 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -774,7 +774,7 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp) } kp->ki_swrss = vm->vm_swrss; kp->ki_tsize = vm->vm_tsize; - kp->ki_dsize = vm->vm_dsize; + kp->ki_dsize = vm->vm_dsize + vm->vm_map.mapped_dsize; kp->ki_ssize = vm->vm_ssize; } else if (p->p_state == PRS_ZOMBIE) kp->ki_stat = SZOMB; diff --git a/sys/sys/mman.h b/sys/sys/mman.h index 379ed14..4d74e93 100644 --- a/sys/sys/mman.h +++ b/sys/sys/mman.h @@ -91,6 +91,7 @@ */ #define MAP_NOCORE 0x00020000 /* dont include these pages in a coredump */ #define MAP_PREFAULT_READ 0x00040000 /* prefault mapping for reading */ +#define MAP_DATALIMIT 0x000080000 /* account the mapping into RLIMIT_DATA */ #endif /* __BSD_VISIBLE */ #if __POSIX_VISIBLE >= 199309 diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index d62576f..878901d 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -240,6 +240,7 @@ vm_map_zinit(void *mem, int size, int flags) map = (vm_map_t)mem; map->nentries = 0; map->size = 0; + map->mapped_dsize = 0; mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK); sx_init(&map->lock, "user map"); return (0); @@ -1173,6 +1174,8 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, protoeflags |= MAP_ENTRY_NOSYNC; if (cow & MAP_DISABLE_COREDUMP) protoeflags |= MAP_ENTRY_NOCOREDUMP; + if (cow & MAP_DATA_LIMIT) + protoeflags |= MAP_ENTRY_DATALIMIT; cred = NULL; KASSERT((object != kmem_object && object != kernel_object) || @@ -1231,6 +1234,8 @@ charged: (prev_entry->protection == prot) && (prev_entry->max_protection == max)) { map->size += (end - prev_entry->end); + if (cow & MAP_DATA_LIMIT) + map->mapped_dsize += end - prev_entry->end; prev_entry->end = end; vm_map_entry_resize_free(map, prev_entry); vm_map_simplify_entry(map, prev_entry); @@ -1290,6 +1295,8 @@ charged: */ vm_map_entry_link(map, prev_entry, new_entry); map->size += new_entry->end - new_entry->start; + if (cow & MAP_DATA_LIMIT) + map->mapped_dsize += new_entry->end - new_entry->start; /* * It may be possible to merge the new entry with the next and/or @@ -2672,6 +2679,8 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry) object = entry->object.vm_object; size = entry->end - entry->start; map->size -= size; + if (entry->eflags & MAP_ENTRY_DATALIMIT) + map->mapped_dsize -= size; if (entry->cred != NULL) { swap_release_by_cred(size, entry->cred); @@ -3011,6 +3020,8 @@ vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2, (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)); vm2->vm_tsize += btoc(newend - entry->start); } + if (entry->eflags & MAP_ENTRY_DATALIMIT) + vm2->vm_map.mapped_dsize += btoc(entrysize); } /* diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 5311e02..e5aa987 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -134,6 +134,7 @@ struct vm_map_entry { #define MAP_ENTRY_IN_TRANSITION 0x0100 /* entry being changed */ #define MAP_ENTRY_NEEDS_WAKEUP 0x0200 /* waiters in transition */ #define MAP_ENTRY_NOCOREDUMP 0x0400 /* don't include in a core */ +#define MAP_ENTRY_DATALIMIT 0x0800 #define MAP_ENTRY_GROWS_DOWN 0x1000 /* Top-down stacks */ #define MAP_ENTRY_GROWS_UP 0x2000 /* Bottom-up stacks */ @@ -179,6 +180,7 @@ struct vm_map { struct mtx system_mtx; int nentries; /* Number of entries */ vm_size_t size; /* virtual size */ + vm_size_t mapped_dsize; /* fake dsize from MAP_DATALIMIT */ u_int timestamp; /* Version number */ u_char needs_wakeup; u_char system_map; /* (c) Am I a system map? */ @@ -315,6 +317,7 @@ long vmspace_wired_count(struct vmspace *vmspace); #define MAP_DISABLE_SYNCER 0x0020 #define MAP_DISABLE_COREDUMP 0x0100 #define MAP_PREFAULT_MADVISE 0x0200 /* from (user) madvise request */ +#define MAP_DATA_LIMIT 0x0400 #define MAP_STACK_GROWS_DOWN 0x1000 #define MAP_STACK_GROWS_UP 0x2000 #define MAP_ACC_CHARGED 0x4000 diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index e85b681..2ac9fc3 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -1427,29 +1427,46 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, objtype_t handle_type, void *handle, vm_ooffset_t foff) { + vm_object_t object; + struct vmspace *vm; + struct thread *td; + int docow, error, rv; boolean_t fitit; - vm_object_t object = NULL; - int rv = KERN_SUCCESS; - int docow, error; - struct thread *td = curthread; if (size == 0) return (0); + td = curthread; + object = NULL; + rv = KERN_SUCCESS; size = round_page(size); + vm = td->td_proc->p_vmspace; PROC_LOCK(td->td_proc); - if (td->td_proc->p_vmspace->vm_map.size + size > - lim_cur(td->td_proc, RLIMIT_VMEM)) { + if (vm->vm_map.size + size > lim_cur(td->td_proc, RLIMIT_VMEM)) { PROC_UNLOCK(td->td_proc); return (ENOMEM); } - if (racct_set(td->td_proc, RACCT_VMEM, - td->td_proc->p_vmspace->vm_map.size + size)) { + if (racct_add(td->td_proc, RACCT_VMEM, size)) { PROC_UNLOCK(td->td_proc); return (ENOMEM); } PROC_UNLOCK(td->td_proc); + if ((flags & MAP_DATALIMIT) != 0) { + vm_map_lock(map); + PROC_LOCK(td->td_proc); + if (lim_cur(td->td_proc, RLIMIT_DATA) < vm->vm_dsize + + vm->vm_map.mapped_dsize + size) { + error = ENOMEM; + } else + error = racct_add(td->td_proc, RACCT_DATA, size); + PROC_UNLOCK(td->td_proc); + vm_map_unlock(map); + if (error != 0) { + racct_sub(td->td_proc, RACCT_VMEM, size); + return (error); + } + } /* * We currently can only deal with page aligned file offsets. @@ -1459,15 +1476,19 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, * cause pmap inconsistencies...so we want to be sure to * disallow this in all cases. */ - if (foff & PAGE_MASK) - return (EINVAL); + if (foff & PAGE_MASK) { + error = EINVAL; + goto revert_racct; + } if ((flags & MAP_FIXED) == 0) { fitit = TRUE; *addr = round_page(*addr); } else { - if (*addr != trunc_page(*addr)) - return (EINVAL); + if (*addr != trunc_page(*addr)) { + error = EINVAL; + goto revert_racct; + } fitit = FALSE; } /* @@ -1497,7 +1518,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, break; } if (error) - return (error); + goto revert_racct; if (flags & MAP_ANON) { object = NULL; docow = 0; @@ -1517,6 +1538,8 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, docow |= MAP_DISABLE_SYNCER; if (flags & MAP_NOCORE) docow |= MAP_DISABLE_COREDUMP; + if (flags & MAP_DATALIMIT) + docow |= MAP_DATA_LIMIT; if (flags & MAP_STACK) rv = vm_map_stack(map, *addr, size, prot, maxprot, @@ -1553,7 +1576,18 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, vm_map_wire(map, *addr, *addr + size, VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES); - return (vm_mmap_to_errno(rv)); + + error = vm_mmap_to_errno(rv); + +revert_racct: + if (error != 0) { + PROC_LOCK(td->td_proc); + if ((flags & MAP_DATALIMIT) != 0) + racct_sub(td->td_proc, RACCT_DATA, size); + racct_sub(td->td_proc, RACCT_VMEM, size); + PROC_UNLOCK(td->td_proc); + } + return (error); } int diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c index 253ab77..3278d2b 100644 --- a/sys/vm/vm_unix.c +++ b/sys/vm/vm_unix.c @@ -98,7 +98,8 @@ sys_obreak(td, uap) * Check the resource limit, but allow a process to reduce * its usage, even if it remains over the limit. */ - if (new - base > datalim && new > old) { + if (new - base + vm->vm_map.mapped_dsize > datalim && + new > old) { error = ENOMEM; goto done; } @@ -122,7 +123,8 @@ sys_obreak(td, uap) } #ifdef RACCT PROC_LOCK(td->td_proc); - error = racct_set(td->td_proc, RACCT_DATA, new - base); + error = racct_set(td->td_proc, RACCT_DATA, new - base + + vm->vm_map.mapped_dsize); if (error != 0) { PROC_UNLOCK(td->td_proc); error = ENOMEM; @@ -131,7 +133,8 @@ sys_obreak(td, uap) error = racct_set(td->td_proc, RACCT_VMEM, vm->vm_map.size + (new - old)); if (error != 0) { - racct_set_force(td->td_proc, RACCT_DATA, old - base); + racct_set_force(td->td_proc, RACCT_DATA, old - base + + vm->vm_map.mapped_dsize); PROC_UNLOCK(td->td_proc); error = ENOMEM; goto done; @@ -150,7 +153,8 @@ sys_obreak(td, uap) if (rv != KERN_SUCCESS) { #ifdef RACCT PROC_LOCK(td->td_proc); - racct_set_force(td->td_proc, RACCT_DATA, old - base); + racct_set_force(td->td_proc, RACCT_DATA, old - base + + vm->vm_map.mapped_dsize); racct_set_force(td->td_proc, RACCT_VMEM, vm->vm_map.size); PROC_UNLOCK(td->td_proc); #endif @@ -181,7 +185,8 @@ sys_obreak(td, uap) vm->vm_dsize -= btoc(old - new); #ifdef RACCT PROC_LOCK(td->td_proc); - racct_set_force(td->td_proc, RACCT_DATA, new - base); + racct_set_force(td->td_proc, RACCT_DATA, new - base + + vm->vm_map.mapped_dsize); racct_set_force(td->td_proc, RACCT_VMEM, vm->vm_map.size); PROC_UNLOCK(td->td_proc); #endif