diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h index f15b3c3..28a3f58 100644 --- a/sys/sys/vmmeter.h +++ b/sys/sys/vmmeter.h @@ -96,6 +96,7 @@ struct vmmeter { u_int v_active_count; /* (q) pages active */ u_int v_inactive_target; /* (c) pages desired inactive */ u_int v_inactive_count; /* (q) pages inactive */ + u_int v_dinactive_count; /* (a) pages deferred inactive */ u_int v_laundry_count; /* (q) pages dirty */ u_int v_cache_count; /* (f) pages on cache queue */ u_int v_pageout_free_min; /* (c) min pages reserved for kernel */ diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index c4261f9..d1112fb 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -303,6 +303,7 @@ VM_STATS_VM(v_wire_count, "Wired pages"); VM_STATS_VM(v_active_count, "Active pages"); VM_STATS_VM(v_inactive_target, "Desired inactive pages"); VM_STATS_VM(v_inactive_count, "Inactive pages"); +VM_STATS_VM(v_dinactive_count, "Deferred inactive pages"); VM_STATS_VM(v_laundry_count, "Dirty pages"); VM_STATS_VM(v_cache_count, "Pages on cache queue"); VM_STATS_VM(v_pageout_free_min, "Min pages reserved for kernel"); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 57eda09..d094232 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -72,6 +72,11 @@ * * The page daemon can acquire and hold any pair of page queue * locks in any order. * + * * Deferred queues are used to batch insertions of pages into the + * inactive page queue, with the aim of reducing lock contention. + * Only a page lock is needed to insert or remove a page from its + * corresponding deferred queue. + * * - The object lock is required when inserting or removing * pages from an object (vm_page_insert() or vm_page_remove()). * @@ -144,6 +149,12 @@ static int pa_tryrelock_restart; SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD, &pa_tryrelock_restart, 0, "Number of tryrelock restarts"); +#if PQ_DINACT_COUNT > 0 +static int dinact_thresh = 16; +SYSCTL_INT(_vm, OID_AUTO, dinact_thresh, CTLFLAG_RW, + &dinact_thresh, 0, "Maximum pages in a deferred inactive queue"); +#endif + static TAILQ_HEAD(, vm_page) blacklist_head; static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD | @@ -158,12 +169,15 @@ static struct vnode *vm_page_alloc_init(vm_page_t m); static void vm_page_cache_turn_free(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(uint8_t queue, vm_page_t m); +static void vm_page_enqueue_deferred(vm_page_t m); static void vm_page_free_wakeup(void); static void vm_page_init_fakepg(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); +static void vm_page_push_deferred(struct vm_domain *vmd, + struct vm_pagequeue *pq); static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, vm_paddr_t high); @@ -404,6 +418,14 @@ vm_page_domain_init(struct vm_domain *vmd) TAILQ_INIT(&pq->pq_pl); mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue", MTX_DEF | MTX_DUPOK); + pq->pq_mutex_ptr = &pq->pq_mutex; + } + for (i = 0; i < PQ_DINACT_COUNT; i++) { + pq = &vmd->vmd_dinactqueues[i]; + TAILQ_INIT(&pq->pq_pl); + pq->pq_mutex_ptr = &pa_lock[i]; + *__DECONST(char **, &pq->pq_name) = "vm deferred pagequeue"; + *__DECONST(int **, &pq->pq_vcnt) = &vm_cnt.v_dinactive_count; } } @@ -2702,8 +2724,11 @@ vm_waitpfault(void) struct vm_pagequeue * vm_page_pagequeue(vm_page_t m) { + struct vm_domain *vmd; - return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]); + vmd = vm_phys_domain(m); + return ((m->flags & PG_DINACT) == 0 ? &vmd->vmd_pagequeues[m->queue] : + &vmd->vmd_dinactqueues[PQ_DINACT_IDX(m)]); } /* @@ -2721,12 +2746,22 @@ vm_page_dequeue(vm_page_t m) vm_page_assert_locked(m); KASSERT(m->queue < PQ_COUNT, ("vm_page_dequeue: page %p is not queued", m)); + KASSERT((m->flags & PG_DINACT) == 0 || m->queue == PQ_INACTIVE, + ("vm_page_dequeue: deferred inact page %p in wrong queue", m)); + pq = vm_page_pagequeue(m); - vm_pagequeue_lock(pq); + if ((m->flags & PG_DINACT) != 0) { + vm_pagequeue_assert_locked(pq); + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + vm_pagequeue_cnt_dec(pq); + m->flags &= ~PG_DINACT; + } else { + vm_pagequeue_lock(pq); + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + vm_pagequeue_cnt_dec(pq); + vm_pagequeue_unlock(pq); + } m->queue = PQ_NONE; - TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); - vm_pagequeue_cnt_dec(pq); - vm_pagequeue_unlock(pq); } /* @@ -2744,6 +2779,7 @@ vm_page_dequeue_locked(vm_page_t m) vm_page_lock_assert(m, MA_OWNED); pq = vm_page_pagequeue(m); vm_pagequeue_assert_locked(pq); + m->flags &= ~PG_DINACT; m->queue = PQ_NONE; TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_dec(pq); @@ -2761,16 +2797,106 @@ vm_page_enqueue(uint8_t queue, vm_page_t m) { struct vm_pagequeue *pq; - vm_page_lock_assert(m, MA_OWNED); + vm_page_assert_locked(m); KASSERT(queue < PQ_COUNT, ("vm_page_enqueue: invalid queue %u request for page %p", queue, m)); - pq = &vm_phys_domain(m)->vmd_pagequeues[queue]; - vm_pagequeue_lock(pq); - m->queue = queue; + + if (queue == PQ_INACTIVE) + vm_page_enqueue_deferred(m); + else { + pq = &vm_phys_domain(m)->vmd_pagequeues[queue]; + vm_pagequeue_lock(pq); + m->queue = queue; + TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); + vm_pagequeue_cnt_inc(pq); + vm_pagequeue_unlock(pq); + } +} + +/* + * vm_page_enqueue_deferred: + * + * Add the given page to its corresponding deferred inactive queue. + * + * The page must be locked. This implies that the deferred queue + * is locked as well. + */ +static void +vm_page_enqueue_deferred(vm_page_t m) +{ + struct vm_domain *vmd; + struct vm_pagequeue *pq; + + vm_page_assert_locked(m); + +#if PQ_DINACT_COUNT > 0 + vmd = vm_phys_domain(m); + pq = &vmd->vmd_dinactqueues[PQ_DINACT_IDX(m)]; + vm_pagequeue_assert_locked(pq); + m->flags |= PG_DINACT; + m->queue = PQ_INACTIVE; TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_inc(pq); - vm_pagequeue_unlock(pq); + /* + * If we've hit the per-queue threshold, push the deferred pages into + * the inactive queue. + */ + if (pq->pq_cnt >= dinact_thresh) + vm_page_push_deferred(vmd, pq); +#else + vm_page_enqueue(PQ_INACTIVE, m); +#endif +} + +/* + * vm_page_push_deferred: + * + * Move pages from the specified deferred queue into the inactive queue. + * + * The pagequeue must be locked. + */ +static void +vm_page_push_deferred(struct vm_domain *vmd, struct vm_pagequeue *pq) +{ + struct vm_pagequeue *ipq; + vm_page_t m; + int cnt; + + vm_pagequeue_assert_locked(pq); + + TAILQ_FOREACH(m, &pq->pq_pl, plinks.q) { + vm_page_assert_locked(m); + KASSERT(m->queue == PQ_INACTIVE && (m->flags & PG_DINACT) != 0, + ("page %p not deferred", m)); + m->flags &= ~PG_DINACT; + } + cnt = pq->pq_cnt; + vm_pagequeue_cnt_add(pq, -cnt); + ipq = &vmd->vmd_pagequeues[PQ_INACTIVE]; + vm_pagequeue_lock(ipq); + TAILQ_CONCAT(&ipq->pq_pl, &pq->pq_pl, plinks.q); + vm_pagequeue_cnt_add(ipq, cnt); + vm_pagequeue_unlock(ipq); +} + +/* + * vm_page_purge_deferred: + * + * Move deferred inactive pages into the inactive queue. + */ +void +vm_page_purge_deferred(struct vm_domain *vmd) +{ + struct vm_pagequeue *pq; + int i; + + for (i = 0; i < PQ_DINACT_COUNT; i++) { + pq = &vmd->vmd_dinactqueues[i]; + vm_pagequeue_lock(pq); + vm_page_push_deferred(vmd, pq); + vm_pagequeue_unlock(pq); + } } /* @@ -2785,14 +2911,19 @@ vm_page_requeue(vm_page_t m) { struct vm_pagequeue *pq; - vm_page_lock_assert(m, MA_OWNED); + vm_page_assert_locked(m); KASSERT(m->queue != PQ_NONE, ("vm_page_requeue: page %p is not queued", m)); + pq = vm_page_pagequeue(m); - vm_pagequeue_lock(pq); + if ((m->flags & PG_DINACT) == 0) + vm_pagequeue_lock(pq); + else + vm_pagequeue_assert_locked(pq); TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); - vm_pagequeue_unlock(pq); + if ((m->flags & PG_DINACT) == 0) + vm_pagequeue_unlock(pq); } /* @@ -3087,6 +3218,7 @@ vm_page_unwire(vm_page_t m, uint8_t queue) static inline void _vm_page_deactivate(vm_page_t m, boolean_t noreuse) { + struct vm_domain *vmd; struct vm_pagequeue *pq; int queue; @@ -3099,25 +3231,35 @@ _vm_page_deactivate(vm_page_t m, boolean_t noreuse) if ((queue = m->queue) == PQ_INACTIVE && !noreuse) return; if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { - pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE]; - /* Avoid multiple acquisitions of the inactive queue lock. */ - if (queue == PQ_INACTIVE) { - vm_pagequeue_lock(pq); - vm_page_dequeue_locked(m); + vmd = vm_phys_domain(m); + if (noreuse) { + /* + * If the page is already in the inactive queue, we must + * be moving it to the head, in which case we skip the + * deferred queues. + */ + pq = &vmd->vmd_pagequeues[PQ_INACTIVE]; + if (queue == PQ_INACTIVE) { + vm_pagequeue_lock(pq); + /* + * The page is either already in the inactive + * queue, or is in a deferred queue. Either + * way, its pagequeue lock is held. + */ + vm_page_dequeue_locked(m); + } else if (queue != PQ_NONE) { + vm_page_dequeue(m); + vm_pagequeue_lock(pq); + } + m->queue = PQ_INACTIVE; + TAILQ_INSERT_BEFORE(&vmd->vmd_inacthead, m, plinks.q); + vm_pagequeue_cnt_inc(pq); + vm_pagequeue_unlock(pq); } else { if (queue != PQ_NONE) vm_page_dequeue(m); - vm_pagequeue_lock(pq); + vm_page_enqueue_deferred(m); } - m->queue = PQ_INACTIVE; - if (noreuse) { - PCPU_INC(cnt.v_noreuse); - TAILQ_INSERT_BEFORE(&vm_phys_domain(m)->vmd_inacthead, - m, plinks.q); - } else - TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); - vm_pagequeue_cnt_inc(pq); - vm_pagequeue_unlock(pq); } } diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index d2a3ae4..e6fcf51 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -67,6 +67,7 @@ #ifndef _VM_PAGE_ #define _VM_PAGE_ +#include #include /* @@ -209,20 +210,23 @@ struct vm_page { #define PQ_LAUNDRY 2 #define PQ_COUNT 3 +#define PQ_DINACT_IDX(m) (pa_index(VM_PAGE_TO_PHYS(m)) % PQ_DINACT_COUNT) + TAILQ_HEAD(pglist, vm_page); SLIST_HEAD(spglist, vm_page); struct vm_pagequeue { - struct mtx pq_mutex; + struct mtx_padalign pq_mutex; + struct mtx_padalign *pq_mutex_ptr; struct pglist pq_pl; int pq_cnt; int * const pq_vcnt; const char * const pq_name; -} __aligned(CACHE_LINE_SIZE); - +}; struct vm_domain { struct vm_pagequeue vmd_pagequeues[PQ_COUNT]; + struct vm_pagequeue vmd_dinactqueues[PQ_DINACT_COUNT]; u_int vmd_page_count; u_int vmd_free_count; long vmd_segs; /* bitmask of the segments */ @@ -237,21 +241,17 @@ struct vm_domain { extern struct vm_domain vm_dom[MAXMEMDOM]; -#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED) -#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex) -#define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex) +#define vm_pagequeue_assert_locked(pq) mtx_assert((pq)->pq_mutex_ptr, MA_OWNED) +#define vm_pagequeue_lock(pq) mtx_lock((pq)->pq_mutex_ptr) +#define vm_pagequeue_unlock(pq) mtx_unlock((pq)->pq_mutex_ptr) #ifdef _KERNEL -static __inline void -vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend) -{ +#define vm_pagequeue_cnt_add(pq, addend) do { \ + vm_pagequeue_assert_locked(pq); \ + pq->pq_cnt += addend; \ + atomic_add_int(pq->pq_vcnt, addend); \ +} while (0) -#ifdef notyet - vm_pagequeue_assert_locked(pq); -#endif - pq->pq_cnt += addend; - atomic_add_int(pq->pq_vcnt, addend); -} #define vm_pagequeue_cnt_inc(pq) vm_pagequeue_cnt_add((pq), 1) #define vm_pagequeue_cnt_dec(pq) vm_pagequeue_cnt_add((pq), -1) #endif /* _KERNEL */ @@ -327,6 +327,7 @@ extern struct mtx_padalign pa_lock[]; * freeing, the modification must be protected by the vm_page lock. */ #define PG_CACHED 0x0001 /* page is cached */ +#define PG_DINACT 0x0002 /* page in a deferred inactive queue */ #define PG_FICTITIOUS 0x0004 /* physical page doesn't exist */ #define PG_ZERO 0x0008 /* page is zeroed */ #define PG_MARKER 0x0010 /* special queue marker page */ @@ -473,6 +474,7 @@ int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *); struct vm_pagequeue *vm_page_pagequeue(vm_page_t m); vm_page_t vm_page_prev(vm_page_t m); boolean_t vm_page_ps_is_valid(vm_page_t m); +void vm_page_purge_deferred(struct vm_domain *vmd); void vm_page_putfake(vm_page_t m); void vm_page_readahead_finish(vm_page_t m); bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 799f341..e0fc53a 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -1298,10 +1298,13 @@ drop_page: /* * Wakeup the laundry thread(s) if we didn't free the targeted number - * of pages. + * of pages. Also purge the deferred inactive queues so that lingering + * pages will be reclaimed on the next pass. */ - if (page_shortage > 0) + if (page_shortage > 0) { wakeup(&vm_cnt.v_laundry_count); + vm_page_purge_deferred(vmd); + } #if !defined(NO_SWAPPING) /* diff --git a/sys/vm/vm_param.h b/sys/vm/vm_param.h index 68e5cd1..c0c69cf 100644 --- a/sys/vm/vm_param.h +++ b/sys/vm/vm_param.h @@ -119,6 +119,12 @@ struct xswdev { #endif /* !SMP */ #endif /* !PA_LOCK_COUNT */ +#ifdef SMP +#define PQ_DINACT_COUNT PA_LOCK_COUNT +#else +#define PQ_DINACT_COUNT 0 +#endif + #ifndef ASSEMBLER #ifdef _KERNEL #define num_pages(x) \