From b47950f4551463a1c08908dd2a3fbae03beab3a3 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Sun, 21 Feb 2016 15:52:17 -0800 Subject: [PATCH 4/6] Add a basic laundering policy. This policy stems from the notion that there are two reasons to launder pages: 1. Shortfall, in which the inactive and free queues are depleted, and the system _must_ launder dirty pages in order to reclaim memory. 2. Fairness: the system should periodically launder dirty pages to ensure that applications cannot excessively influence the system's memory reclaimation behaviour. Note that this does not imply that clear and dirty pages must be treated equally: page laundering is an expensive operation. However, the relative costs of reclaiming a clean vs. dirty page should be bounded in some well-defined way, and in particular, it should not be possible to force the system to reclaim only clean pages indefinitely. Under memory pressure the system should eventually launder some dirty pages, even when inactive clean pages are plentiful. Thus, laundering targets are chosen based on the current state of the paging queues. In shortfall, the laundry thread attempts to meet the shortfall within 0.5s, the pagedaemon sleep period. Because it is the sole source of clean pages, no attempts are made to limit the laundering rate: the laundry thread goes all-out. If the system is not in shortfall, the laundry thread may elect to launder some dirty pages in an attempt to satisfy the fairness policy. This is referred to as background laundering. Several conditions must be met for background laundering to occur: a) The laundry queue must contain a significant amount of the system's inactive memory: if the number of dirty pages is miniscule, nothing is gained by laundering them. Moreover, write clustering works better if the number of dirty pages is allowed to grow to some threshold before any laundering is performed. The ratio of clean to dirty pages serves as a threshold here, controlled by bkgrd_launder_ratio. By default, dirty pages must constitute at least 1% of inactive memory for background laundering to occur. b) The number of free pages must be low. If there is plentiful free memory, there's no reason to launder pages. The number of free pages must be smaller than bkgrd_launder_thresh for background laundering to occur. By default, this is chosen to be the max of half the free target and 3/2s of the pagedaemon wakeup threshold. The idea is to start laundering before the pagedaemon wakes up. c) The pagedaemon thread(s) must be active. If the number of free pages is low but the system is not under memory pressure, we should not continue background laundering indefinitely. We use vm_cnt.v_pdwakeups as a proxy for pagedaemon activity: when a background laundering run begins, the pdwakeups value is recorded; a second run cannot begin until pdwakeups has been incremented at least once. When the conditions for background laundering are met, the laundry thread determines the target number of pages and begins laundering. It attempts to meet the target within one second unless the corresponding laundering rate would exceed bkgrd_launder_max (32MB/s by default). The target is given by 0.5*l(L)*FT/l(I), where FT is the free page threshold used by the pagedaemon. In particular, the number of pages laundered is proportional to the ratio of dirty to clean inactive pages. --- sys/vm/vm_pageout.c | 145 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 122 insertions(+), 23 deletions(-) diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 4f0b5c1..9854569 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -233,6 +233,21 @@ SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RW, &act_scan_laundry_weight, 0, "weight given to clean vs. dirty pages in active queue scans"); +static u_int bkgrd_launder_ratio = 100; +SYSCTL_UINT(_vm, OID_AUTO, bkgrd_launder_ratio, + CTLFLAG_RW, &bkgrd_launder_ratio, 0, + "ratio of inactive to laundry pages to trigger background laundering"); + +static u_int bkgrd_launder_max = 32768; +SYSCTL_UINT(_vm, OID_AUTO, bkgrd_launder_max, + CTLFLAG_RW, &bkgrd_launder_max, 0, + "maximum background laundering rate, in pages per second"); + +static u_int bkgrd_launder_thresh; +SYSCTL_UINT(_vm, OID_AUTO, bkgrd_launder_thresh, + CTLFLAG_RW, &bkgrd_launder_thresh, 0, + "free page threshold below which background laundering may be started"); + #define VM_PAGEOUT_PAGE_COUNT 16 int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT; @@ -241,7 +256,8 @@ SYSCTL_INT(_vm, OID_AUTO, max_wired, CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count"); static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *); -static void vm_pageout_launder(struct vm_domain *vmd, struct vm_oom_state *oom); +static int vm_pageout_launder(struct vm_domain *vmd, struct vm_oom_state *oom, + int launder); static void vm_pageout_laundry_worker(void *arg); static void vm_pageout_swapon(void *arg, struct swdevt *sp __unused); static void vm_pageout_swapoff(void *arg __unused, struct swdevt *sp __unused); @@ -889,33 +905,23 @@ unlock_mp: } /* - * XXX + * Attempt to launder the specified number of pages. + * + * Returns the number of pages successfully laundered. */ -static void -vm_pageout_launder(struct vm_domain *vmd, struct vm_oom_state *oom) +static int +vm_pageout_launder(struct vm_domain *vmd, struct vm_oom_state *oom, int launder) { vm_page_t m, next; struct vm_pagequeue *pq; vm_object_t object; - int act_delta, error, launder, maxscan, numpagedout, pass; - int starting_target, vnodes_skipped; + int act_delta, error, maxscan, numpagedout, pass, starting_target; + int vnodes_skipped; boolean_t pageout_ok, queue_locked; - /* - * Compute the number of pages we want to move from the laundry queue to - * the inactive queue. If there is no shortage of clean, inactive - * pages, we allow laundering to proceed at a trickle to ensure that - * dirty pages will eventually be reused. Otherwise, the inactive queue - * target is scaled by the ratio of the sleep intervals of the laundry - * queue and inactive queue worker threads. - */ - launder = vm_cnt.v_inactive_target - vm_cnt.v_inactive_count + - vm_paging_target() + vm_pageout_deficit; - if (launder < 0) - launder = 1; - else - launder /= VM_LAUNDER_RATE; + DTRACE_PROBE1(vm__launder, "int", launder); + starting_target = launder; vnodes_skipped = 0; /* @@ -1112,26 +1118,41 @@ relock_queue: * If we failed to launder any pages, vote for OOM. */ vm_pageout_mightbe_oom(oom, launder, starting_target); + + return (starting_target - launder); } /* - * XXX + * Perform the work of the laundry thread: periodically wake up and determine + * whether any pages need to be laundered. If so, determine the number of pages + * that need to be laundered, and launder them. */ static void vm_pageout_laundry_worker(void *arg) { struct vm_oom_state oom; struct vm_domain *domain; - int domidx; + uint64_t ninact, nlaundry; + int cycle, tcycle, domidx, gen, launder, laundered; + int shortfall, prev_shortfall, target; domidx = (uintptr_t)arg; domain = &vm_dom[domidx]; KASSERT(domain->vmd_segs != 0, ("domain without segments")); vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY); + cycle = tcycle = 0; + gen = -1; + shortfall = prev_shortfall = 0; + target = 0; + oom.oom_seq = 0; oom.oom_voted = FALSE; + if (bkgrd_launder_thresh == 0) + bkgrd_launder_thresh = max(vm_cnt.v_free_target / 2, + 3 * vm_pageout_wakeup_thresh / 2); + /* * Calls to these handlers are serialized by the swapconf lock. */ @@ -1144,9 +1165,87 @@ vm_pageout_laundry_worker(void *arg) * The pageout laundry worker is never done, so loop forever. */ for (;;) { + KASSERT(target >= 0, ("negative target %d", target)); + launder = 0; + + /* + * First determine whether we're in shortfall. If so, there's + * an impending need for clean pages. We attempt to launder the + * target within one pagedaemon sleep period. + */ + shortfall = vm_cnt.v_inactive_target - vm_cnt.v_inactive_count + + vm_paging_target() + vm_pageout_deficit; + if (shortfall > 0) { + /* + * If the shortfall has grown since the last cycle or + * we're still in shortfall despite a previous + * laundering run, start a new run. + */ + if (shortfall > prev_shortfall || cycle == tcycle) { + target = shortfall; + cycle = 0; + tcycle = VM_LAUNDER_RATE; + } + prev_shortfall = shortfall; + launder = target / (tcycle - (cycle % tcycle)); + goto launder; + } else { + if (prev_shortfall > 0) + /* We're out of shortfall; the target is met. */ + target = 0; + shortfall = prev_shortfall = 0; + } + + /* + * There's no immediate need to launder any pages; see if we + * meet the conditions to perform background laundering: + * + * 1. we haven't yet reached the target of the current + * background laundering run, or + * 2. the ratio of dirty to clean inactive pages exceeds the + * background laundering threshold and the free page count is + * low. + * + * We don't start a new background laundering run unless the + * pagedaemon has been woken up at least once since the previous + * run. + */ + if (target > 0 && cycle != tcycle) { + /* Continue an ongoing background run. */ + launder = target / (tcycle - (cycle % tcycle)); + goto launder; + } + + ninact = vm_cnt.v_inactive_count; + nlaundry = vm_cnt.v_laundry_count; + if (ninact > 0 && + vm_cnt.v_pdwakeups != gen && + vm_cnt.v_free_count < bkgrd_launder_thresh && + nlaundry * bkgrd_launder_ratio >= ninact) { + cycle = 0; + tcycle = VM_LAUNDER_INTERVAL; + gen = vm_cnt.v_pdwakeups; + if (nlaundry >= ninact) + target = vm_cnt.v_free_target; + else + target = (nlaundry * vm_cnt.v_free_target << 16) / + ninact >> 16; + target /= 2; + if (target > bkgrd_launder_max) + tcycle = target * VM_LAUNDER_INTERVAL / + bkgrd_launder_max; + launder = target / (tcycle - (cycle % tcycle)); + } + +launder: + if (launder > 0) { + laundered = vm_pageout_launder(domain, &oom, launder); + target -= min(laundered, target); + } + tsleep(&vm_cnt.v_laundry_count, PVM, "laundr", hz / VM_LAUNDER_INTERVAL); - vm_pageout_launder(domain, &oom); + cycle++; } } -- 2.7.2