diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index ff02ee3..0fe6823 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -2674,6 +2674,7 @@ killproc(p, why) p, p->p_pid, p->p_comm); log(LOG_ERR, "pid %d (%s), uid %d, was killed: %s\n", p->p_pid, p->p_comm, p->p_ucred ? p->p_ucred->cr_uid : -1, why); + p->p_flag |= P_WKILLED; psignal(p, SIGKILL); } diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 654e3bf..a19f695 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -559,7 +559,7 @@ struct proc { #define P_WAITED 0x01000 /* Someone is waiting for us. */ #define P_WEXIT 0x02000 /* Working on exiting. */ #define P_EXEC 0x04000 /* Process called exec. */ -#define P_UNUSED8000 0x08000 /* available. */ +#define P_WKILLED 0x08000 /* Killed, go to kernel/user boundary ASAP. */ #define P_CONTINUED 0x10000 /* Proc has continued from a stopped state. */ #define P_STOPPED_SIG 0x20000 /* Stopped due to SIGSTOP/SIGTSTP. */ #define P_STOPPED_TRACE 0x40000 /* Stopped because of tracing. */ @@ -578,6 +578,7 @@ struct proc { #define P_STOPPED (P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE) #define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED) +#define P_KILLED(p) ((p)->p_flag & P_WKILLED) /* * These were process status values (p_stat), now they are only used in diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index b00c67b..140cfb4 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1711,9 +1711,12 @@ retry: if (swap == NULL) { mtx_unlock(&swhash_mtx); VM_OBJECT_UNLOCK(object); - if (uma_zone_exhausted(swap_zone)) - panic("swap zone exhausted, increase kern.maxswzone\n"); - VM_WAIT; + if (uma_zone_exhausted(swap_zone)) { + printf("swap zone exhausted, increase kern.maxswzone\n"); + vm_pageout_oom(VM_OOM_SWAPZ); + pause("swzonex", 10); + } else + VM_WAIT; VM_OBJECT_LOCK(object); goto retry; } diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 6950c82..eff0230 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -217,6 +217,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, vm_page_t marray[VM_FAULT_READ]; int hardfault; int faultcount; + int alloc_req; struct faultstate fs; hardfault = 0; @@ -438,8 +439,22 @@ RetryFault:; fs.pindex; } #endif + /* + * Unlocked read of the p_flag is + * harmless. At worst, the P_KILLED + * would be not observed there, and + * allocation may fail, causing + * restart and new reading of the + * p_flag. + */ + if (P_KILLED(curproc)) + alloc_req = VM_ALLOC_SYSTEM; + else if (fs.vp || fs.object->backing_object) + alloc_req = VM_ALLOC_NORMAL; + else + alloc_req = VM_ALLOC_ZERO; fs.m = vm_page_alloc(fs.object, fs.pindex, - (fs.vp || fs.object->backing_object)? VM_ALLOC_NORMAL: VM_ALLOC_ZERO); + alloc_req); } if (fs.m == NULL) { unlock_and_deallocate(&fs); @@ -447,6 +462,10 @@ RetryFault:; goto RetryFault; } else if ((fs.m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) break; + else if (alloc_req == VM_ALLOC_SYSTEM && fs.vp == NULL && + fs.object->backing_object == NULL && + (fs.m->flags & PG_ZERO) == 0) + pmap_zero_page(fs.m); } readrest: @@ -465,7 +484,7 @@ readrest: int ahead, behind; u_char behavior = vm_map_entry_behavior(fs.entry); - if (behavior == MAP_ENTRY_BEHAV_RANDOM) { + if (behavior == MAP_ENTRY_BEHAV_RANDOM || P_KILLED(curproc)) { ahead = 0; behind = 0; } else { diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index fa5da26..2b03912 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -681,9 +681,6 @@ vm_pageout_scan(int pass) struct vm_page marker; int page_shortage, maxscan, pcount; int addl_page_shortage, addl_page_shortage_init; - struct proc *p, *bigproc; - struct thread *td; - vm_offset_t size, bigsize; vm_object_t object; int actcount; int vnodes_skipped = 0; @@ -1174,7 +1171,22 @@ unlock_and_continue: * doing this on the first pass in order to give ourselves a * chance to flush out dirty vnode-backed pages and to allow * active pages to be moved to the inactive queue and reclaimed. - * + */ + if (pass != 0 && + ((swap_pager_avail < 64 && vm_page_count_min()) || + (swap_pager_full && vm_paging_target() > 0))) + vm_pageout_oom(VM_OOM_MEM); +} + + +void +vm_pageout_oom(int shortage) +{ + struct proc *p, *bigproc; + vm_offset_t size, bigsize; + struct thread *td; + + /* * We keep the process bigproc locked once we find it to keep anyone * from messing with it; however, there is a possibility of * deadlock if process B is bigproc and one of it's child processes @@ -1182,75 +1194,72 @@ unlock_and_continue: * lock while walking this list. To avoid this, we don't block on * the process lock but just skip a process if it is already locked. */ - if (pass != 0 && - ((swap_pager_avail < 64 && vm_page_count_min()) || - (swap_pager_full && vm_paging_target() > 0))) { - bigproc = NULL; - bigsize = 0; - sx_slock(&allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - int breakout; + bigproc = NULL; + bigsize = 0; + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + int breakout; - if (PROC_TRYLOCK(p) == 0) - continue; - /* - * If this is a system or protected process, skip it. - */ - if ((p->p_flag & P_SYSTEM) || (p->p_pid == 1) || - (p->p_flag & P_PROTECTED) || - ((p->p_pid < 48) && (swap_pager_avail != 0))) { - PROC_UNLOCK(p); - continue; - } - /* - * If the process is in a non-running type state, - * don't touch it. Check all the threads individually. - */ - breakout = 0; - FOREACH_THREAD_IN_PROC(p, td) { - thread_lock(td); - if (!TD_ON_RUNQ(td) && - !TD_IS_RUNNING(td) && - !TD_IS_SLEEPING(td)) { - thread_unlock(td); - breakout = 1; - break; - } + if (PROC_TRYLOCK(p) == 0) + continue; + /* + * If this is a system, protected or killed process, skip it. + */ + if ((p->p_flag & P_SYSTEM) || (p->p_pid == 1) || + (p->p_flag & P_PROTECTED) || P_KILLED(p) || + ((p->p_pid < 48) && (swap_pager_avail != 0))) { + PROC_UNLOCK(p); + continue; + } + /* + * If the process is in a non-running type state, + * don't touch it. Check all the threads individually. + */ + breakout = 0; + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); + if (!TD_ON_RUNQ(td) && + !TD_IS_RUNNING(td) && + !TD_IS_SLEEPING(td)) { thread_unlock(td); + breakout = 1; + break; } - if (breakout) { - PROC_UNLOCK(p); - continue; - } - /* - * get the process size - */ - if (!vm_map_trylock_read(&p->p_vmspace->vm_map)) { - PROC_UNLOCK(p); - continue; - } - size = vmspace_swap_count(p->p_vmspace); - vm_map_unlock_read(&p->p_vmspace->vm_map); - size += vmspace_resident_count(p->p_vmspace); - /* - * if the this process is bigger than the biggest one - * remember it. - */ - if (size > bigsize) { - if (bigproc != NULL) - PROC_UNLOCK(bigproc); - bigproc = p; - bigsize = size; - } else - PROC_UNLOCK(p); + thread_unlock(td); } - sx_sunlock(&allproc_lock); - if (bigproc != NULL) { - killproc(bigproc, "out of swap space"); - sched_nice(bigproc, PRIO_MIN); - PROC_UNLOCK(bigproc); - wakeup(&cnt.v_free_count); + if (breakout) { + PROC_UNLOCK(p); + continue; + } + /* + * get the process size + */ + if (!vm_map_trylock_read(&p->p_vmspace->vm_map)) { + PROC_UNLOCK(p); + continue; } + size = vmspace_swap_count(p->p_vmspace); + vm_map_unlock_read(&p->p_vmspace->vm_map); + if (shortage == VM_OOM_MEM) + size += vmspace_resident_count(p->p_vmspace); + /* + * if the this process is bigger than the biggest one + * remember it. + */ + if (size > bigsize) { + if (bigproc != NULL) + PROC_UNLOCK(bigproc); + bigproc = p; + bigsize = size; + } else + PROC_UNLOCK(p); + } + sx_sunlock(&allproc_lock); + if (bigproc != NULL) { + killproc(bigproc, "out of swap space"); + sched_nice(bigproc, PRIO_MIN); + PROC_UNLOCK(bigproc); + wakeup(&cnt.v_free_count); } } diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h index 1586ac4..15ca570 100644 --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -83,6 +83,9 @@ extern int vm_pageout_page_count; #define VM_SWAP_NORMAL 1 #define VM_SWAP_IDLE 2 +#define VM_OOM_MEM 1 +#define VM_OOM_SWAPZ 2 + /* * Exported routines. */ @@ -100,5 +103,6 @@ extern void vm_waitpfault(void); #ifdef _KERNEL boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *); int vm_pageout_flush(vm_page_t *, int, int); +void vm_pageout_oom(int shortage); #endif #endif /* _VM_VM_PAGEOUT_H_ */