--- //depot/yahoo/ybsd_7/src/sys/kern/kern_sig.c +++ /home/jhb/work/y/p4/ybsd_7/src/sys/kern/kern_sig.c @@ -2833,6 +2833,7 @@ p, p->p_pid, p->p_comm); log(LOG_ERR, "pid %d (%s), uid %d, was killed: %s\n", p->p_pid, p->p_comm, p->p_ucred ? p->p_ucred->cr_uid : -1, why); + p->p_flag |= P_WKILLED; psignal(p, SIGKILL); } --- //depot/yahoo/ybsd_7/src/sys/sys/proc.h +++ /home/jhb/work/y/p4/ybsd_7/src/sys/sys/proc.h @@ -625,6 +625,7 @@ #define P_HWPMC 0x800000 /* Process is using HWPMCs */ #define P_JAILED 0x1000000 /* Process is in jail. */ +#define P_WKILLED 0x2000000 /* Killed, go to kernel/user boundary ASAP. */ #define P_INEXEC 0x4000000 /* Process is in execve(). */ #define P_STATCHILD 0x8000000 /* Child process stopped or exited. */ #define P_INMEM 0x10000000 /* Loaded into memory. */ @@ -634,6 +635,7 @@ #define P_STOPPED (P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE) #define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED) +#define P_KILLED(p) ((p)->p_flag & P_WKILLED) /* * These were process status values (p_stat), now they are only used in --- //depot/yahoo/ybsd_7/src/sys/vm/swap_pager.c +++ /home/jhb/work/y/p4/ybsd_7/src/sys/vm/swap_pager.c @@ -1711,9 +1711,12 @@ if (swap == NULL) { mtx_unlock(&swhash_mtx); VM_OBJECT_UNLOCK(object); - if (uma_zone_exhausted(swap_zone)) - panic("swap zone exhausted, increase kern.maxswzone\n"); - VM_WAIT; + if (uma_zone_exhausted(swap_zone)) { + printf("swap zone exhausted, increase kern.maxswzone\n"); + vm_pageout_oom(VM_OOM_SWAPZ); + pause("swzonex", 10); + } else + VM_WAIT; VM_OBJECT_LOCK(object); goto retry; } --- //depot/yahoo/ybsd_7/src/sys/vm/vm_fault.c +++ /home/jhb/work/y/p4/ybsd_7/src/sys/vm/vm_fault.c @@ -215,6 +215,7 @@ vm_page_t marray[VM_FAULT_READ]; int hardfault; int faultcount; + int alloc_req; struct faultstate fs; hardfault = 0; @@ -429,8 +430,22 @@ */ fs.m = NULL; if (!vm_page_count_severe()) { + /* + * Unlocked read of the p_flag is + * harmless. At worst, the P_KILLED + * would be not observed there, and + * allocation may fail, causing + * restart and new reading of the + * p_flag. + */ + if (P_KILLED(curproc)) + alloc_req = VM_ALLOC_SYSTEM; + else if (fs.vp || fs.object->backing_object) + alloc_req = VM_ALLOC_NORMAL; + else + alloc_req = VM_ALLOC_ZERO; fs.m = vm_page_alloc(fs.object, fs.pindex, - (fs.vp || fs.object->backing_object)? VM_ALLOC_NORMAL: VM_ALLOC_ZERO); + alloc_req); } if (fs.m == NULL) { unlock_and_deallocate(&fs); @@ -438,6 +453,10 @@ goto RetryFault; } else if ((fs.m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) break; + else if (alloc_req == VM_ALLOC_SYSTEM && fs.vp == NULL && + fs.object->backing_object == NULL && + (fs.m->flags & PG_ZERO) == 0) + pmap_zero_page(fs.m); } readrest: @@ -456,7 +475,7 @@ int ahead, behind; u_char behavior = vm_map_entry_behavior(fs.entry); - if (behavior == MAP_ENTRY_BEHAV_RANDOM) { + if (behavior == MAP_ENTRY_BEHAV_RANDOM || P_KILLED(curproc)) { ahead = 0; behind = 0; } else { --- //depot/yahoo/ybsd_7/src/sys/vm/vm_pageout.c +++ /home/jhb/work/y/p4/ybsd_7/src/sys/vm/vm_pageout.c @@ -681,9 +681,6 @@ struct vm_page marker; int page_shortage, maxscan, pcount; int addl_page_shortage, addl_page_shortage_init; - struct proc *p, *bigproc; - struct thread *td; - vm_offset_t size, bigsize; vm_object_t object; int actcount; int vnodes_skipped = 0; @@ -1174,7 +1171,22 @@ * doing this on the first pass in order to give ourselves a * chance to flush out dirty vnode-backed pages and to allow * active pages to be moved to the inactive queue and reclaimed. - * + */ + if (pass != 0 && + ((swap_pager_avail < 64 && vm_page_count_min()) || + (swap_pager_full && vm_paging_target() > 0))) + vm_pageout_oom(VM_OOM_MEM); +} + + +void +vm_pageout_oom(int shortage) +{ + struct proc *p, *bigproc; + vm_offset_t size, bigsize; + struct thread *td; + + /* * We keep the process bigproc locked once we find it to keep anyone * from messing with it; however, there is a possibility of * deadlock if process B is bigproc and one of it's child processes @@ -1182,79 +1194,76 @@ * lock while walking this list. To avoid this, we don't block on * the process lock but just skip a process if it is already locked. */ - if (pass != 0 && - ((swap_pager_avail < 64 && vm_page_count_min()) || - (swap_pager_full && vm_paging_target() > 0))) { - bigproc = NULL; - bigsize = 0; - sx_slock(&allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - int breakout; + bigproc = NULL; + bigsize = 0; + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + int breakout; - if (PROC_TRYLOCK(p) == 0) - continue; - /* - * If this is a system or protected process, skip it. - */ - if ((p->p_flag & P_SYSTEM) || (p->p_pid == 1) || - (p->p_flag & P_PROTECTED) || - ((p->p_pid < 48) && (swap_pager_avail != 0))) { - PROC_UNLOCK(p); - continue; - } - /* - * If the process is in a non-running type state, - * don't touch it. Check all the threads individually. - */ - PROC_SLOCK(p); - breakout = 0; - FOREACH_THREAD_IN_PROC(p, td) { - thread_lock(td); - if (!TD_ON_RUNQ(td) && - !TD_IS_RUNNING(td) && - !TD_IS_SLEEPING(td)) { - thread_unlock(td); - breakout = 1; - break; - } + if (PROC_TRYLOCK(p) == 0) + continue; + /* + * If this is a system, protected or killed process, skip it. + */ + if ((p->p_flag & P_SYSTEM) || (p->p_pid == 1) || + (p->p_flag & P_PROTECTED) || P_KILLED(p) || + ((p->p_pid < 48) && (swap_pager_avail != 0))) { + PROC_UNLOCK(p); + continue; + } + /* + * If the process is in a non-running type state, + * don't touch it. Check all the threads individually. + */ + PROC_SLOCK(p); + breakout = 0; + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); + if (!TD_ON_RUNQ(td) && + !TD_IS_RUNNING(td) && + !TD_IS_SLEEPING(td)) { thread_unlock(td); + breakout = 1; + break; } - PROC_SUNLOCK(p); - if (breakout) { - PROC_UNLOCK(p); - continue; - } - /* - * get the process size - */ - if (!vm_map_trylock_read(&p->p_vmspace->vm_map)) { - PROC_UNLOCK(p); - continue; - } - size = vmspace_swap_count(p->p_vmspace); - vm_map_unlock_read(&p->p_vmspace->vm_map); - size += vmspace_resident_count(p->p_vmspace); - /* - * if the this process is bigger than the biggest one - * remember it. - */ - if (size > bigsize) { - if (bigproc != NULL) - PROC_UNLOCK(bigproc); - bigproc = p; - bigsize = size; - } else - PROC_UNLOCK(p); + thread_unlock(td); + } + PROC_SUNLOCK(p); + if (breakout) { + PROC_UNLOCK(p); + continue; } - sx_sunlock(&allproc_lock); - if (bigproc != NULL) { - killproc(bigproc, "out of swap space"); - PROC_SLOCK(bigproc); - sched_nice(bigproc, PRIO_MIN); - PROC_SUNLOCK(bigproc); - PROC_UNLOCK(bigproc); - wakeup(&cnt.v_free_count); + /* + * get the process size + */ + if (!vm_map_trylock_read(&p->p_vmspace->vm_map)) { + PROC_UNLOCK(p); + continue; } + size = vmspace_swap_count(p->p_vmspace); + vm_map_unlock_read(&p->p_vmspace->vm_map); + if (shortage == VM_OOM_MEM) + size += vmspace_resident_count(p->p_vmspace); + /* + * if the this process is bigger than the biggest one + * remember it. + */ + if (size > bigsize) { + if (bigproc != NULL) + PROC_UNLOCK(bigproc); + bigproc = p; + bigsize = size; + } else + PROC_UNLOCK(p); + } + sx_sunlock(&allproc_lock); + if (bigproc != NULL) { + killproc(bigproc, "out of swap space"); + PROC_SLOCK(bigproc); + sched_nice(bigproc, PRIO_MIN); + PROC_SUNLOCK(bigproc); + PROC_UNLOCK(bigproc); + wakeup(&cnt.v_free_count); } } --- //depot/yahoo/ybsd_7/src/sys/vm/vm_pageout.h +++ /home/jhb/work/y/p4/ybsd_7/src/sys/vm/vm_pageout.h @@ -83,6 +83,9 @@ #define VM_SWAP_NORMAL 1 #define VM_SWAP_IDLE 2 +#define VM_OOM_MEM 1 +#define VM_OOM_SWAPZ 2 + /* * Exported routines. */ @@ -99,5 +102,6 @@ #ifdef _KERNEL int vm_pageout_flush(vm_page_t *, int, int); +void vm_pageout_oom(int shortage); #endif #endif /* _VM_VM_PAGEOUT_H_ */