diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 8d710cd..3c9f57d 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -649,7 +649,7 @@ trap_pfault(frame, usermode) PROC_UNLOCK(p); /* Fault in the user page: */ - rv = vm_fault(map, va, ftype, + rv = vm_fault(td, map, va, ftype, (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : VM_FAULT_NORMAL); @@ -661,7 +661,7 @@ trap_pfault(frame, usermode) * Don't have to worry about process locking or stacks in the * kernel. */ - rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + rv = vm_fault(td, map, va, ftype, VM_FAULT_NORMAL); } if (rv == KERN_SUCCESS) return (0); diff --git a/sys/arm/arm/trap.c b/sys/arm/arm/trap.c index fc03b52..41d01b9 100644 --- a/sys/arm/arm/trap.c +++ b/sys/arm/arm/trap.c @@ -425,7 +425,7 @@ data_abort_handler(trapframe_t *tf) p->p_lock++; PROC_UNLOCK(p); } - error = vm_fault(map, va, ftype, (ftype & VM_PROT_WRITE) ? + error = vm_fault(td, map, va, ftype, (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : VM_FAULT_NORMAL); pcb->pcb_onfault = onfault; @@ -784,7 +784,7 @@ prefetch_abort_handler(trapframe_t *tf) PROC_UNLOCK(p); } - error = vm_fault(map, va, VM_PROT_READ | VM_PROT_EXECUTE, + error = vm_fault(td, map, va, VM_PROT_READ | VM_PROT_EXECUTE, VM_FAULT_NORMAL); if (map != kernel_map) { PROC_LOCK(p); diff --git a/sys/dev/cxgb/ulp/tom/cxgb_vm.c b/sys/dev/cxgb/ulp/tom/cxgb_vm.c index 9c6898a..bcb97ae 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_vm.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_vm.c @@ -145,9 +145,10 @@ retry: m = PHYS_TO_VM_PAGE(pa); if (flags & VM_HOLD_WRITEABLE) { if (m == NULL || (m->flags & PG_WRITEABLE) == 0) - rv = vm_fault(map, va, VM_PROT_WRITE, VM_FAULT_DIRTY); + rv = vm_fault(td, map, va, VM_PROT_WRITE, + VM_FAULT_DIRTY); } else if (m == NULL) - rv = vm_fault(map, va, VM_PROT_READ, VM_FAULT_NORMAL); + rv = vm_fault(td, map, va, VM_PROT_READ, VM_FAULT_NORMAL); if (rv) { printf("vm_fault bad return rv=%d va=0x%zx\n", rv, va); diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 999e73d..0852ff0 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -830,7 +830,7 @@ trap_pfault(frame, usermode, eva) PROC_UNLOCK(p); /* Fault in the user page: */ - rv = vm_fault(map, va, ftype, + rv = vm_fault(td, map, va, ftype, (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : VM_FAULT_NORMAL); @@ -842,7 +842,7 @@ trap_pfault(frame, usermode, eva) * Don't have to worry about process locking or stacks in the * kernel. */ - rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + rv = vm_fault(td, map, va, ftype, VM_FAULT_NORMAL); } if (rv == KERN_SUCCESS) return (0); diff --git a/sys/ia64/ia64/trap.c b/sys/ia64/ia64/trap.c index aa31e6c..51c900e 100644 --- a/sys/ia64/ia64/trap.c +++ b/sys/ia64/ia64/trap.c @@ -574,7 +574,7 @@ trap(int vector, struct trapframe *tf) PROC_UNLOCK(p); /* Fault in the user page: */ - rv = vm_fault(map, va, ftype, (ftype & VM_PROT_WRITE) + rv = vm_fault(td, map, va, ftype, (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : VM_FAULT_NORMAL); PROC_LOCK(p); @@ -585,7 +585,7 @@ trap(int vector, struct trapframe *tf) * Don't have to worry about process locking or * stacks in the kernel. */ - rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + rv = vm_fault(td, map, va, ftype, VM_FAULT_NORMAL); } if (rv == KERN_SUCCESS) diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index d5d1813..4e9f2ca 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -2682,6 +2682,7 @@ killproc(p, why) p, p->p_pid, p->p_comm); log(LOG_ERR, "pid %d (%s), uid %d, was killed: %s\n", p->p_pid, p->p_comm, p->p_ucred ? p->p_ucred->cr_uid : -1, why); + p->p_flag |= P_WKILLED; psignal(p, SIGKILL); } diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c index 52b3f8f..57e9c43 100644 --- a/sys/kern/link_elf.c +++ b/sys/kern/link_elf.c @@ -773,7 +773,7 @@ link_elf_load_file(linker_class_t cls, const char* filename, /* * Wire down the pages */ - error = vm_map_wire(kernel_map, + error = vm_map_wire(td, kernel_map, (vm_offset_t) segbase, (vm_offset_t) segbase + segs[i]->p_memsz, VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES); diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c index 29165fe..07a6697 100644 --- a/sys/kern/link_elf_obj.c +++ b/sys/kern/link_elf_obj.c @@ -683,7 +683,7 @@ link_elf_load_file(linker_class_t cls, const char *filename, } /* Wire the pages */ - error = vm_map_wire(kernel_map, mapbase, + error = vm_map_wire(td, kernel_map, mapbase, mapbase + round_page(mapsize), VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES); if (error != KERN_SUCCESS) { diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index dbdd30f..ecf41a8 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -250,9 +250,10 @@ proc_rwmem(struct proc *p, struct uio *uio) vm_pindex_t pindex; u_int len; vm_page_t m; + struct thread *tdp; object = NULL; - + tdp = FIRST_THREAD_IN_PROC(p); uva = (vm_offset_t)uio->uio_offset; /* @@ -269,7 +270,7 @@ proc_rwmem(struct proc *p, struct uio *uio) /* * Fault the page on behalf of the process */ - error = vm_fault(map, pageno, reqprot, fault_flags); + error = vm_fault(tdp, map, pageno, reqprot, fault_flags); if (error) { error = EFAULT; break; diff --git a/sys/mips/mips/trap.c b/sys/mips/mips/trap.c index 8c32b59..8206ddf 100644 --- a/sys/mips/mips/trap.c +++ b/sys/mips/mips/trap.c @@ -506,7 +506,8 @@ trap(trapframe) kernel_fault: va = trunc_page((vm_offset_t)trapframe->badvaddr); - rv = vm_fault(kernel_map, va, ftype, VM_FAULT_NORMAL); + rv = vm_fault(td, kernel_map, va, ftype, + VM_FAULT_NORMAL); if (rv == KERN_SUCCESS) return (trapframe->pc); if ((i = td->td_pcb->pcb_onfault) != 0) { @@ -565,7 +566,7 @@ dofault: ++p->p_lock; PROC_UNLOCK(p); - rv = vm_fault(map, va, ftype, flag); + rv = vm_fault(td, map, va, ftype, flag); PROC_LOCK(p); --p->p_lock; diff --git a/sys/powerpc/aim/trap.c b/sys/powerpc/aim/trap.c index 0a3827d..dea6373 100644 --- a/sys/powerpc/aim/trap.c +++ b/sys/powerpc/aim/trap.c @@ -542,7 +542,7 @@ trap_pfault(struct trapframe *frame, int user) PROC_UNLOCK(p); /* Fault in the user page: */ - rv = vm_fault(map, va, ftype, + rv = vm_fault(td, map, va, ftype, (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : VM_FAULT_NORMAL); @@ -554,7 +554,7 @@ trap_pfault(struct trapframe *frame, int user) * Don't have to worry about process locking or stacks in the * kernel. */ - rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + rv = vm_fault(td, map, va, ftype, VM_FAULT_NORMAL); } if (rv == KERN_SUCCESS) diff --git a/sys/powerpc/booke/trap.c b/sys/powerpc/booke/trap.c index d50b754..3014624 100644 --- a/sys/powerpc/booke/trap.c +++ b/sys/powerpc/booke/trap.c @@ -534,7 +534,7 @@ trap_pfault(struct trapframe *frame, int user) PROC_UNLOCK(p); /* Fault in the user page: */ - rv = vm_fault(map, va, ftype, + rv = vm_fault(td, map, va, ftype, (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : VM_FAULT_NORMAL); PROC_LOCK(p); @@ -545,7 +545,7 @@ trap_pfault(struct trapframe *frame, int user) * Don't have to worry about process locking or stacks in the * kernel. */ - rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + rv = vm_fault(td, map, va, ftype, VM_FAULT_NORMAL); } if (rv == KERN_SUCCESS) diff --git a/sys/sparc64/sparc64/trap.c b/sys/sparc64/sparc64/trap.c index 9de8e26..8e91a61 100644 --- a/sys/sparc64/sparc64/trap.c +++ b/sys/sparc64/sparc64/trap.c @@ -469,7 +469,7 @@ trap_pfault(struct thread *td, struct trapframe *tf) PROC_UNLOCK(p); /* Fault in the user page. */ - rv = vm_fault(&vm->vm_map, va, prot, flags); + rv = vm_fault(td, &vm->vm_map, va, prot, flags); /* * Now the process can be swapped again. @@ -503,7 +503,7 @@ trap_pfault(struct thread *td, struct trapframe *tf) * We don't have to worry about process locking or stacks in * the kernel. */ - rv = vm_fault(kernel_map, va, prot, VM_FAULT_NORMAL); + rv = vm_fault(td, kernel_map, va, prot, VM_FAULT_NORMAL); } CTR3(KTR_TRAP, "trap_pfault: return td=%p va=%#lx rv=%d", diff --git a/sys/sun4v/sun4v/trap.c b/sys/sun4v/sun4v/trap.c index f04c424..b690b09 100644 --- a/sys/sun4v/sun4v/trap.c +++ b/sys/sun4v/sun4v/trap.c @@ -527,7 +527,7 @@ trap_pfault(struct thread *td, struct trapframe *tf, int64_t type, uint64_t data PROC_UNLOCK(p); /* Fault in the user page. */ - rv = vm_fault(&vm->vm_map, va, prot, flags); + rv = vm_fault(td, &vm->vm_map, va, prot, flags); /* * Now the process can be swapped again. @@ -548,7 +548,7 @@ trap_pfault(struct thread *td, struct trapframe *tf, int64_t type, uint64_t data * Don't have to worry about process locking or stacks in the * kernel. */ - rv = vm_fault(kernel_map, va, prot, VM_FAULT_NORMAL); + rv = vm_fault(td, kernel_map, va, prot, VM_FAULT_NORMAL); } CTR3(KTR_TRAP, "trap_pfault: return td=%p va=%#lx rv=%d", diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 66e95ac..0c47fa7 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -560,7 +560,7 @@ struct proc { #define P_WAITED 0x01000 /* Someone is waiting for us. */ #define P_WEXIT 0x02000 /* Working on exiting. */ #define P_EXEC 0x04000 /* Process called exec. */ -#define P_UNUSED8000 0x08000 /* available. */ +#define P_WKILLED 0x08000 /* Killed, go to kernel/user boundary ASAP. */ #define P_CONTINUED 0x10000 /* Proc has continued from a stopped state. */ #define P_STOPPED_SIG 0x20000 /* Stopped due to SIGSTOP/SIGTSTP. */ #define P_STOPPED_TRACE 0x40000 /* Stopped because of tracing. */ @@ -579,6 +579,7 @@ struct proc { #define P_STOPPED (P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE) #define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED) +#define P_KILLED(p) ((p)->p_flag & P_WKILLED) /* * These were process status values (p_stat), now they are only used in diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 74138c7..19fb3a8 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1711,9 +1711,12 @@ retry: if (swap == NULL) { mtx_unlock(&swhash_mtx); VM_OBJECT_UNLOCK(object); - if (uma_zone_exhausted(swap_zone)) - panic("swap zone exhausted, increase kern.maxswzone\n"); - VM_WAIT; + if (uma_zone_exhausted(swap_zone)) { + printf("swap zone exhausted, increase kern.maxswzone\n"); + vm_pageout_oom(VM_OOM_SWAPZ); + pause("swzonex", 10); + } else + VM_WAIT; VM_OBJECT_LOCK(object); goto retry; } diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c index 8fec101..3b1e1b2 100644 --- a/sys/vm/vm_contig.c +++ b/sys/vm/vm_contig.c @@ -221,7 +221,7 @@ contigmapping(vm_page_t m, vm_pindex_t npages, int flags) tmp_addr += PAGE_SIZE; } VM_OBJECT_UNLOCK(object); - vm_map_wire(map, addr, addr + (npages << PAGE_SHIFT), + vm_map_wire(curthread, map, addr, addr + (npages << PAGE_SHIFT), VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); return ((void *)addr); } diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index 0a54372..ceff576 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -67,10 +67,11 @@ vm_map_t kmem_suballoc(vm_map_t, vm_offset_t *, vm_offset_t *, vm_size_t, boolean_t); void swapout_procs(int); int useracc(void *, int, int); -int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int); +int vm_fault(struct thread *, vm_map_t, vm_offset_t, vm_prot_t, int); void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t); void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); -int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t); +int vm_fault_wire(struct thread *, vm_map_t, vm_offset_t, vm_offset_t, + boolean_t, boolean_t); int vm_forkproc(struct thread *, struct proc *, struct thread *, struct vmspace *, int); void vm_waitproc(struct proc *); int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, objtype_t, void *, vm_ooffset_t); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 6950c82..2b80d2e 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -206,8 +206,8 @@ unlock_and_deallocate(struct faultstate *fs) * Caller may hold no locks. */ int -vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, - int fault_flags) +vm_fault(struct thread *td, vm_map_t map, vm_offset_t vaddr, + vm_prot_t fault_type, int fault_flags) { vm_prot_t prot; int is_first_object_locked, result; @@ -217,6 +217,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, vm_page_t marray[VM_FAULT_READ]; int hardfault; int faultcount; + int alloc_req; struct faultstate fs; hardfault = 0; @@ -236,8 +237,8 @@ RetryFault:; if (result != KERN_PROTECTION_FAILURE || (fault_flags & VM_FAULT_WIRE_MASK) != VM_FAULT_USER_WIRE) { if (growstack && result == KERN_INVALID_ADDRESS && - map != kernel_map && curproc != NULL) { - result = vm_map_growstack(curproc, vaddr); + map != kernel_map && td != NULL) { + result = vm_map_growstack(td->td_proc, vaddr); if (result != KERN_SUCCESS) return (KERN_FAILURE); growstack = FALSE; @@ -438,8 +439,22 @@ RetryFault:; fs.pindex; } #endif + /* + * Unlocked read of the p_flag is + * harmless. At worst, the P_KILLED + * would be not observed there, and + * allocation may fail, causing + * restart and new reading of the + * p_flag. + */ + if (P_KILLED(td->td_proc)) + alloc_req = VM_ALLOC_SYSTEM; + else if (fs.vp || fs.object->backing_object) + alloc_req = VM_ALLOC_NORMAL; + else + alloc_req = VM_ALLOC_ZERO; fs.m = vm_page_alloc(fs.object, fs.pindex, - (fs.vp || fs.object->backing_object)? VM_ALLOC_NORMAL: VM_ALLOC_ZERO); + alloc_req); } if (fs.m == NULL) { unlock_and_deallocate(&fs); @@ -447,6 +462,10 @@ RetryFault:; goto RetryFault; } else if ((fs.m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) break; + else if (alloc_req == VM_ALLOC_SYSTEM && fs.vp == NULL && + fs.object->backing_object == NULL && + (fs.m->flags & PG_ZERO) == 0) + pmap_zero_page(fs.m); } readrest: @@ -465,7 +484,8 @@ readrest: int ahead, behind; u_char behavior = vm_map_entry_behavior(fs.entry); - if (behavior == MAP_ENTRY_BEHAV_RANDOM) { + if (behavior == MAP_ENTRY_BEHAV_RANDOM || + P_KILLED(td->td_proc)) { ahead = 0; behind = 0; } else { @@ -600,7 +620,7 @@ readrest: */ if (rv == VM_PAGER_ERROR) printf("vm_fault: pager read error, pid %d (%s)\n", - curproc->p_pid, curproc->p_comm); + td->td_proc->p_pid, td->td_proc->p_comm); /* * Data outside the range of the pager or an I/O error */ @@ -916,9 +936,9 @@ readrest: */ unlock_and_deallocate(&fs); if (hardfault) - curthread->td_ru.ru_majflt++; + td->td_ru.ru_majflt++; else - curthread->td_ru.ru_minflt++; + td->td_ru.ru_minflt++; return (KERN_SUCCESS); } @@ -1019,8 +1039,8 @@ vm_fault_quick(caddr_t v, int prot) * Wire down a range of virtual addresses in a map. */ int -vm_fault_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, - boolean_t user_wire, boolean_t fictitious) +vm_fault_wire(struct thread *td, vm_map_t map, vm_offset_t start, + vm_offset_t end, boolean_t user_wire, boolean_t fictitious) { vm_offset_t va; int rv; @@ -1031,7 +1051,7 @@ vm_fault_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, * read-only sections. */ for (va = start; va < end; va += PAGE_SIZE) { - rv = vm_fault(map, va, + rv = vm_fault(td, map, va, user_wire ? VM_PROT_READ : VM_PROT_READ | VM_PROT_WRITE, user_wire ? VM_FAULT_USER_WIRE : VM_FAULT_CHANGE_WIRING); if (rv) { diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index d6ec2ba..136ef44 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -219,7 +219,7 @@ vslock(void *addr, size_t len) if (npages + cnt.v_wire_count > vm_page_max_wired) return (EAGAIN); #endif - error = vm_map_wire(&curproc->p_vmspace->vm_map, start, end, + error = vm_map_wire(curthread, &curproc->p_vmspace->vm_map, start, end, VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); /* * Return EFAULT on error to match copy{in,out}() behaviour diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 9a1ac63..4a0202e 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -185,7 +185,7 @@ kmem_alloc(map, size) /* * And finally, mark the data as non-pageable. */ - (void) vm_map_wire(map, addr, addr + size, + (void) vm_map_wire(curthread, map, addr, addr + size, VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES); return (addr); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index a1f0ef3..fc2fa71 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1979,7 +1979,7 @@ done: * Implements both kernel and user wiring. */ int -vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, +vm_map_wire(struct thread *td, vm_map_t map, vm_offset_t start, vm_offset_t end, int flags) { vm_map_entry_t entry, first_entry, tmp_entry; @@ -2069,7 +2069,7 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, * mark. */ vm_map_unlock(map); - rv = vm_fault_wire(map, saved_start, saved_end, + rv = vm_fault_wire(td, map, saved_start, saved_end, user_wire, fictitious); vm_map_lock(map); if (last_timestamp + 1 != map->timestamp) { @@ -3005,7 +3005,7 @@ Retry: * Heed the MAP_WIREFUTURE flag if it was set for this process. */ if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE)) { - vm_map_wire(map, + vm_map_wire(FIRST_THREAD_IN_PROC(p), map, (stack_entry == next_entry) ? addr : addr - grow_amount, (stack_entry == next_entry) ? stack_entry->start : addr, (p->p_flag & P_SYSTEM) diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index bc2ae43..1e92166 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -372,8 +372,8 @@ int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, vm_prot_t, vm_prot_t, int); int vm_map_growstack (struct proc *p, vm_offset_t addr); int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags); -int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, - int flags); +int vm_map_wire(struct thread *td, vm_map_t map, vm_offset_t start, + vm_offset_t end, int flags); int vmspace_swap_count (struct vmspace *vmspace); #endif /* _KERNEL */ #endif /* _VM_MAP_ */ diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index c3f08b0..8a522aa 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -1004,7 +1004,7 @@ mlock(td, uap) PROC_UNLOCK(proc); if (npages + cnt.v_wire_count > vm_page_max_wired) return (EAGAIN); - error = vm_map_wire(&proc->p_vmspace->vm_map, start, end, + error = vm_map_wire(td, &proc->p_vmspace->vm_map, start, end, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); return (error == KERN_SUCCESS ? 0 : ENOMEM); } @@ -1064,7 +1064,7 @@ mlockall(td, uap) * from mlockall(). vm_map_wire() will wire pages, by * calling vm_fault_wire() for each page in the region. */ - error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), + error = vm_map_wire(td, map, vm_map_min(map), vm_map_max(map), VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); error = (error == KERN_SUCCESS ? 0 : EAGAIN); } @@ -1485,7 +1485,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, * be wired, then heed this. */ if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE)) - vm_map_wire(map, *addr, *addr + size, + vm_map_wire(td, map, *addr, *addr + size, VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES); switch (rv) { diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 49abe42..ea8776d 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -681,9 +681,6 @@ vm_pageout_scan(int pass) struct vm_page marker; int page_shortage, maxscan, pcount; int addl_page_shortage, addl_page_shortage_init; - struct proc *p, *bigproc; - struct thread *td; - vm_offset_t size, bigsize; vm_object_t object; int actcount; int vnodes_skipped = 0; @@ -1174,7 +1171,22 @@ unlock_and_continue: * doing this on the first pass in order to give ourselves a * chance to flush out dirty vnode-backed pages and to allow * active pages to be moved to the inactive queue and reclaimed. - * + */ + if (pass != 0 && + ((swap_pager_avail < 64 && vm_page_count_min()) || + (swap_pager_full && vm_paging_target() > 0))) + vm_pageout_oom(VM_OOM_MEM); +} + + +void +vm_pageout_oom(int shortage) +{ + struct proc *p, *bigproc; + vm_offset_t size, bigsize; + struct thread *td; + + /* * We keep the process bigproc locked once we find it to keep anyone * from messing with it; however, there is a possibility of * deadlock if process B is bigproc and one of it's child processes @@ -1182,75 +1194,72 @@ unlock_and_continue: * lock while walking this list. To avoid this, we don't block on * the process lock but just skip a process if it is already locked. */ - if (pass != 0 && - ((swap_pager_avail < 64 && vm_page_count_min()) || - (swap_pager_full && vm_paging_target() > 0))) { - bigproc = NULL; - bigsize = 0; - sx_slock(&allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - int breakout; + bigproc = NULL; + bigsize = 0; + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + int breakout; - if (PROC_TRYLOCK(p) == 0) - continue; - /* - * If this is a system or protected process, skip it. - */ - if ((p->p_flag & P_SYSTEM) || (p->p_pid == 1) || - (p->p_flag & P_PROTECTED) || - ((p->p_pid < 48) && (swap_pager_avail != 0))) { - PROC_UNLOCK(p); - continue; - } - /* - * If the process is in a non-running type state, - * don't touch it. Check all the threads individually. - */ - breakout = 0; - FOREACH_THREAD_IN_PROC(p, td) { - thread_lock(td); - if (!TD_ON_RUNQ(td) && - !TD_IS_RUNNING(td) && - !TD_IS_SLEEPING(td)) { - thread_unlock(td); - breakout = 1; - break; - } + if (PROC_TRYLOCK(p) == 0) + continue; + /* + * If this is a system, protected or killed process, skip it. + */ + if ((p->p_flag & P_SYSTEM) || (p->p_pid == 1) || + (p->p_flag & P_PROTECTED) || P_KILLED(p) || + ((p->p_pid < 48) && (swap_pager_avail != 0))) { + PROC_UNLOCK(p); + continue; + } + /* + * If the process is in a non-running type state, + * don't touch it. Check all the threads individually. + */ + breakout = 0; + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); + if (!TD_ON_RUNQ(td) && + !TD_IS_RUNNING(td) && + !TD_IS_SLEEPING(td)) { thread_unlock(td); + breakout = 1; + break; } - if (breakout) { - PROC_UNLOCK(p); - continue; - } - /* - * get the process size - */ - if (!vm_map_trylock_read(&p->p_vmspace->vm_map)) { - PROC_UNLOCK(p); - continue; - } - size = vmspace_swap_count(p->p_vmspace); - vm_map_unlock_read(&p->p_vmspace->vm_map); - size += vmspace_resident_count(p->p_vmspace); - /* - * if the this process is bigger than the biggest one - * remember it. - */ - if (size > bigsize) { - if (bigproc != NULL) - PROC_UNLOCK(bigproc); - bigproc = p; - bigsize = size; - } else - PROC_UNLOCK(p); + thread_unlock(td); } - sx_sunlock(&allproc_lock); - if (bigproc != NULL) { - killproc(bigproc, "out of swap space"); - sched_nice(bigproc, PRIO_MIN); - PROC_UNLOCK(bigproc); - wakeup(&cnt.v_free_count); + if (breakout) { + PROC_UNLOCK(p); + continue; + } + /* + * get the process size + */ + if (!vm_map_trylock_read(&p->p_vmspace->vm_map)) { + PROC_UNLOCK(p); + continue; } + size = vmspace_swap_count(p->p_vmspace); + vm_map_unlock_read(&p->p_vmspace->vm_map); + if (shortage == VM_OOM_MEM) + size += vmspace_resident_count(p->p_vmspace); + /* + * if the this process is bigger than the biggest one + * remember it. + */ + if (size > bigsize) { + if (bigproc != NULL) + PROC_UNLOCK(bigproc); + bigproc = p; + bigsize = size; + } else + PROC_UNLOCK(p); + } + sx_sunlock(&allproc_lock); + if (bigproc != NULL) { + killproc(bigproc, "out of swap space"); + sched_nice(bigproc, PRIO_MIN); + PROC_UNLOCK(bigproc); + wakeup(&cnt.v_free_count); } } diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h index 1586ac4..15ca570 100644 --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -83,6 +83,9 @@ extern int vm_pageout_page_count; #define VM_SWAP_NORMAL 1 #define VM_SWAP_IDLE 2 +#define VM_OOM_MEM 1 +#define VM_OOM_SWAPZ 2 + /* * Exported routines. */ @@ -100,5 +103,6 @@ extern void vm_waitpfault(void); #ifdef _KERNEL boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *); int vm_pageout_flush(vm_page_t *, int, int); +void vm_pageout_oom(int shortage); #endif #endif /* _VM_VM_PAGEOUT_H_ */ diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c index cd98be9..7e80500 100644 --- a/sys/vm/vm_unix.c +++ b/sys/vm/vm_unix.c @@ -149,7 +149,7 @@ done: vm_map_unlock(&vm->vm_map); if (do_map_wirefuture) - (void) vm_map_wire(&vm->vm_map, old, new, + (void) vm_map_wire(td, &vm->vm_map, old, new, VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES); return (error);