Index: sys/pcpu.h =================================================================== --- sys/pcpu.h (.../head/sys) (revision 268414) +++ sys/pcpu.h (.../user/attilio/rm_vmobj_cache/sys) (revision 268414) @@ -162,7 +162,6 @@ struct pcpu { long pc_cp_time[CPUSTATES]; /* statclock ticks */ struct device *pc_device; void *pc_netisr; /* netisr SWI cookie */ - int pc_dnweight; /* vm_page_dontneed() */ int pc_domain; /* Memory domain. */ struct rm_queue pc_rm_queue; /* rmlock list of trackers */ uintptr_t pc_dynamic; /* Dynamic per-cpu data area */ Index: sys/vmmeter.h =================================================================== --- sys/vmmeter.h (.../head/sys) (revision 268414) +++ sys/vmmeter.h (.../user/attilio/rm_vmobj_cache/sys) (revision 268414) @@ -96,6 +96,7 @@ struct vmmeter { u_int v_active_count; /* (q) pages active */ u_int v_inactive_target; /* (c) pages desired inactive */ u_int v_inactive_count; /* (q) pages inactive */ + u_int v_disposed_count; /* (q) pages disposed */ u_int v_cache_count; /* (f) pages on cache queue */ u_int v_cache_min; /* (c) min pages desired on cache queue */ u_int v_cache_max; /* (c) max pages in cached obj (unused) */ Index: vm/vm_page.h =================================================================== --- vm/vm_page.h (.../head/sys) (revision 268414) +++ vm/vm_page.h (.../user/attilio/rm_vmobj_cache/sys) (revision 268414) @@ -206,7 +206,8 @@ struct vm_page { #define PQ_NONE 255 #define PQ_INACTIVE 0 #define PQ_ACTIVE 1 -#define PQ_COUNT 2 +#define PQ_DISPOSED 2 +#define PQ_COUNT 3 TAILQ_HEAD(pglist, vm_page); SLIST_HEAD(spglist, vm_page); @@ -438,6 +439,7 @@ int vm_page_try_to_free (vm_page_t); void vm_page_deactivate (vm_page_t); void vm_page_dequeue(vm_page_t m); void vm_page_dequeue_locked(vm_page_t m); +void vm_page_dispose(vm_page_t m); vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t); vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); Index: vm/vm_pageout.c =================================================================== --- vm/vm_pageout.c (.../head/sys) (revision 268414) +++ vm/vm_pageout.c (.../user/attilio/rm_vmobj_cache/sys) (revision 268414) @@ -244,10 +244,10 @@ vm_pageout_init_marker(vm_page_t marker, u_short q * vm_pageout_fallback_object_lock: * * Lock vm object currently associated with `m'. VM_OBJECT_TRYWLOCK is - * known to have failed and page queue must be either PQ_ACTIVE or - * PQ_INACTIVE. To avoid lock order violation, unlock the page queues - * while locking the vm object. Use marker page to detect page queue - * changes and maintain notion of next page on page queue. Return + * known to have failed and page queue must be either PQ_ACTIVE, + * PQ_INACTIVE or PQ_DISPOSED. To avoid lock order violation, unlock the + * page queues while locking the vm object. Use marker page to detect page + * queue changes and maintain notion of next page on page queue. Return * TRUE if no changes were detected, FALSE otherwise. vm object is * locked on return. * @@ -892,7 +892,7 @@ vm_pageout_map_deactivate_pages(map, desired) * vm_pageout_scan does the dirty work for the pageout daemon. * * pass 0 - Update active LRU/deactivate pages - * pass 1 - Move inactive to cache or free + * pass 1 - Free disposed pages and move inactive to cache or free * pass 2 - Launder dirty pages */ static void @@ -943,6 +943,75 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) } else page_shortage = deficit = 0; + pq = &vmd->vmd_pagequeues[PQ_DISPOSED]; + maxscan = pq->pq_cnt; + vm_pagequeue_lock(pq); + for (m = TAILQ_FIRST(&pq->pq_pl); + m != NULL && maxscan-- > 0 && page_shortage > 0; + m = next) { + vm_pagequeue_assert_locked(pq); + KASSERT(m->queue == PQ_DISPOSED, ("Disposed queue %p", m)); + + PCPU_INC(cnt.v_pdpages); + next = TAILQ_NEXT(m, plinks.q); + + /* + * skip marker pages + */ + if (m->flags & PG_MARKER) + continue; + + KASSERT((m->flags & PG_FICTITIOUS) == 0, + ("Fictitious page %p cannot be in disposed queue", m)); + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("Unmanaged page %p cannot be in disposed queue", m)); + + /* + * The page or object lock acquisitions fail if the + * page was removed from the queue or moved to a + * different position within the queue. In either + * case, addl_page_shortage should not be incremented. + */ + if (!vm_pageout_page_lock(m, &next)) { + vm_page_unlock(m); + continue; + } + object = m->object; + if (!VM_OBJECT_TRYWLOCK(object) && + !vm_pageout_fallback_object_lock(m, &next)) { + vm_page_unlock(m); + VM_OBJECT_WUNLOCK(object); + continue; + } + vm_page_test_dirty(m); + + if (m->dirty != 0) + panic("Disposed page %p is dirty", m); + if (pmap_page_is_mapped(m)) + panic("Disposed page %p has active mappings", m); + if ((m->aflags & PGA_REFERENCED) != 0) + panic("Disposed page %p is referenced", m); + + /* + * These checks are already present when inserting pages + * into the disposed queue, so make them just asserts here. + */ + KASSERT(!vm_page_busied(m) && m->hold_count == 0 && + m->wire_count == 0, ("Disposed page %p busied", m)); + + /* + * Dequeue the page first in order to avoid pagequeue + * lock recursion. + */ + vm_page_dequeue_locked(m); + vm_page_free(m); + vm_page_unlock(m); + VM_OBJECT_WUNLOCK(object); + PCPU_INC(cnt.v_dfree); + --page_shortage; + } + vm_pagequeue_unlock(pq); + /* * maxlaunder limits the number of dirty pages we flush per scan. * For most systems a smaller value (16 or 32) is more robust under Index: vm/vm_page.c =================================================================== --- vm/vm_page.c (.../head/sys) (revision 268414) +++ vm/vm_page.c (.../user/attilio/rm_vmobj_cache/sys) (revision 268414) @@ -258,6 +258,10 @@ vm_page_domain_init(struct vm_domain *vmd) "vm active pagequeue"; *__DECONST(int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) = &vm_cnt.v_active_count; + *__DECONST(char **, &vmd->vmd_pagequeues[PQ_DISPOSED].pq_name) = + "vm disposed pagequeue"; + *__DECONST(int **, &vmd->vmd_pagequeues[PQ_DISPOSED].pq_vcnt) = + &vm_cnt.v_disposed_count; vmd->vmd_page_count = 0; vmd->vmd_free_count = 0; vmd->vmd_segs = 0; @@ -2432,6 +2436,49 @@ vm_page_deactivate(vm_page_t m) } /* + * Move the specified page to the disposed queue. + * + * XXXWIP + * + * The page must be locked. + * The page also must be unqueued already and not wired or busy. + * Finally, the page must also belong to an object, so it must not be + * unmanaged. + */ +void +vm_page_dispose(vm_page_t m) +{ + struct vm_pagequeue *pq; + int queue; + + vm_page_lock_assert(m, MA_OWNED); + + queue = m->queue; + if (queue == PQ_DISPOSED) + return; + if (queue != PQ_NONE) + vm_page_dequeue(m); + if (m->hold_count != 0) + panic("vm_page_dispose: page %p hold count %d", + m, m->hold_count); + if (m->wire_count != 0) + panic("vm_page_dispose: page %p wire count %d", + m, m->wire_count); + if (vm_page_busied(m)) + panic("vm_page_dispose: page %p is busied", m); + if ((m->oflags & VPO_UNMANAGED) != 0) + panic("vm_page_dispose: page %p is unmanaged", m); + + m->flags &= ~PG_WINATCFLS; + pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_DISPOSED]; + vm_pagequeue_lock(pq); + m->queue = PQ_DISPOSED; + TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); + vm_pagequeue_cnt_inc(pq); + vm_pagequeue_unlock(pq); +} + +/* * vm_page_try_to_cache: * * Returns 0 on failure, 1 on success @@ -2584,30 +2631,23 @@ vm_page_cache(vm_page_t m) /* * vm_page_advise * - * Cache, deactivate, or do nothing as appropriate. This routine + * Dispose, deactivate, or do nothing as appropriate. This routine * is used by madvise(). * - * Generally speaking we want to move the page into the cache so - * it gets reused quickly. However, this can result in a silly syndrome - * due to the page recycling too quickly. Small objects will not be - * fully cached. On the other hand, if we move the page to the inactive - * queue we wind up with a problem whereby very large objects - * unnecessarily blow away our inactive and cache queues. + * For MADV_FREE the pages are moved directly to the higher priority + * disposed pagequeue, for a quick reuse. + * For MADV_DONTNEED the pages are moved directly at the head of the + * inactive queue to boost their priority within the inactive queue. + * The only exception to this last statement is in case of pages are + * also dirty which are then moved to the tail of the inactive queue + * as there are high chances they will be moved there anyway by + * pagedaemon normal scanning. * - * The solution is to move the pages based on a fixed weighting. We - * either leave them alone, deactivate them, or move them to the cache, - * where moving them to the cache has the highest weighting. - * By forcing some pages into other queues we eventually force the - * system to balance the queues, potentially recovering other unrelated - * space from active. The idea is to not force this to happen too - * often. - * * The object and page must be locked. */ void vm_page_advise(vm_page_t m, int advice) { - int dnw, head; vm_page_assert_locked(m); VM_OBJECT_ASSERT_WLOCKED(m->object); @@ -2629,17 +2669,10 @@ vm_page_advise(vm_page_t m, int advice) m->act_count = 0; } else if (advice != MADV_DONTNEED) return; - dnw = PCPU_GET(dnweight); - PCPU_INC(dnweight); - /* - * Occasionally leave the page alone. - */ - if ((dnw & 0x01F0) == 0 || m->queue == PQ_INACTIVE) { - if (m->act_count >= ACT_INIT) - --m->act_count; - return; - } + /* Set the dirty page bit if appropriate. */ + if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m)) + vm_page_dirty(m); /* * Clear any references to the page. Otherwise, the page daemon will @@ -2647,23 +2680,15 @@ vm_page_advise(vm_page_t m, int advice) */ vm_page_aflag_clear(m, PGA_REFERENCED); - if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m)) - vm_page_dirty(m); - - if (m->dirty || (dnw & 0x0070) == 0) { - /* - * Deactivate the page 3 times out of 32. - */ - head = 0; - } else { - /* - * Cache the page 28 times out of every 32. Note that - * the page is deactivated instead of cached, but placed - * at the head of the queue instead of the tail. - */ - head = 1; - } - _vm_page_deactivate(m, head); + /* + * For MADV_FREE put the pages into the disposed queue. + * For MADV_DONTNEED, put the pages at the head of the inactive + * queue if clean, otherwise normally at the tail. + */ + if (advice == MADV_FREE) + vm_page_dispose(m); + else + _vm_page_deactivate(m, !m->dirty); } /* @@ -3164,10 +3189,11 @@ DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) vm_cnt.v_free_count, vm_cnt.v_cache_count); for (dom = 0; dom < vm_ndomains; dom++) { db_printf( - "dom %d page_cnt %d free %d pq_act %d pq_inact %d pass %d\n", +"dom %d page_cnt %d free %d pq_disposed %d pq_act %d pq_inact %d pass %d\n", dom, vm_dom[dom].vmd_page_count, vm_dom[dom].vmd_free_count, + vm_dom[dom].vmd_pagequeues[PQ_DISPOSED].pq_cnt, vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt, vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt, vm_dom[dom].vmd_pass);