Index: conf/files =================================================================== --- conf/files (.../head/sys) (revision 246728) +++ conf/files (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -3628,6 +3628,7 @@ vm/vm_page.c standard vm/vm_pageout.c standard vm/vm_pager.c standard vm/vm_phys.c standard +vm/vm_radix.c standard vm/vm_reserv.c standard vm/vm_unix.c standard vm/vm_zeroidle.c standard Index: amd64/include/pmap.h =================================================================== --- amd64/include/pmap.h (.../head/sys) (revision 246728) +++ amd64/include/pmap.h (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -235,10 +235,20 @@ struct pv_entry; struct pv_chunk; struct md_page { - TAILQ_HEAD(,pv_entry) pv_list; - int pat_mode; + union { + TAILQ_HEAD(,pv_entry) pvi_list; + struct { + vm_page_t pii_left; + vm_page_t pii_right; + } pvi_siters; + } pv_structs; + int pat_mode; }; +#define pv_list pv_structs.pvi_list +#define pv_left pv_structs.pvi_siters.pii_left +#define pv_right pv_structs.pvi_siters.pii_right + /* * The kernel virtual address (KVA) of the level 4 page table page is always * within the direct map (DMAP) region. @@ -277,7 +287,7 @@ extern struct pmap kernel_pmap_store; */ typedef struct pv_entry { vm_offset_t pv_va; /* virtual address for mapping */ - TAILQ_ENTRY(pv_entry) pv_list; + TAILQ_ENTRY(pv_entry) pv_next; } *pv_entry_t; /* Index: amd64/amd64/pmap.c =================================================================== --- amd64/amd64/pmap.c (.../head/sys) (revision 246728) +++ amd64/amd64/pmap.c (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -306,6 +306,7 @@ static boolean_t pmap_try_insert_pv_entry(pmap_t p static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde); static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); +static vm_page_t pmap_vmpage_splay(vm_pindex_t pindex, vm_page_t root); static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp); @@ -1497,7 +1498,8 @@ pmap_free_zero_pages(vm_page_t free) while (free != NULL) { m = free; - free = m->right; + free = (void *)m->object; + m->object = NULL; /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } @@ -1516,7 +1518,7 @@ pmap_add_delayed_free_list(vm_page_t m, vm_page_t m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; - m->right = *free; + m->object = (void *)*free; *free = m; } @@ -1534,20 +1536,20 @@ pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) PMAP_LOCK_ASSERT(pmap, MA_OWNED); root = pmap->pm_root; if (root == NULL) { - mpte->left = NULL; - mpte->right = NULL; + mpte->md.pv_left = NULL; + mpte->md.pv_right = NULL; } else { - root = vm_page_splay(mpte->pindex, root); + root = pmap_vmpage_splay(mpte->pindex, root); if (mpte->pindex < root->pindex) { - mpte->left = root->left; - mpte->right = root; - root->left = NULL; + mpte->md.pv_left = root->md.pv_left; + mpte->md.pv_right = root; + root->md.pv_left = NULL; } else if (mpte->pindex == root->pindex) panic("pmap_insert_pt_page: pindex already inserted"); else { - mpte->right = root->right; - mpte->left = root; - root->right = NULL; + mpte->md.pv_right = root->md.pv_right; + mpte->md.pv_left = root; + root->md.pv_right = NULL; } } pmap->pm_root = mpte; @@ -1566,7 +1568,7 @@ pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va) PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) { - mpte = vm_page_splay(pindex, mpte); + mpte = pmap_vmpage_splay(pindex, mpte); if ((pmap->pm_root = mpte)->pindex != pindex) mpte = NULL; } @@ -1585,18 +1587,24 @@ pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte) PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (mpte != pmap->pm_root) { - root = vm_page_splay(mpte->pindex, pmap->pm_root); + root = pmap_vmpage_splay(mpte->pindex, pmap->pm_root); KASSERT(mpte == root, ("pmap_remove_pt_page: mpte %p is missing from pmap %p", mpte, pmap)); } - if (mpte->left == NULL) - root = mpte->right; + if (mpte->md.pv_left == NULL) + root = mpte->md.pv_right; else { - root = vm_page_splay(mpte->pindex, mpte->left); - root->right = mpte->right; + root = pmap_vmpage_splay(mpte->pindex, mpte->md.pv_left); + root->md.pv_right = mpte->md.pv_right; } pmap->pm_root = root; + + /* + * Reinitialize the pv_list which could be dirty now because of the + * splay tree work. + */ + TAILQ_INIT(&mpte->md.pv_list); } /* @@ -1672,6 +1680,61 @@ _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_p } /* + * Implements Sleator and Tarjan's top-down splay algorithm. Returns + * the vm_page containing the given pindex. If, however, that + * pindex is not found in the pmap, returns a vm_page that is + * adjacent to the pindex, coming before or after it. + */ +static vm_page_t +pmap_vmpage_splay(vm_pindex_t pindex, vm_page_t root) +{ + struct vm_page dummy; + vm_page_t lefttreemax, righttreemin, y; + + if (root == NULL) + return (root); + lefttreemax = righttreemin = &dummy; + for (;; root = y) { + if (pindex < root->pindex) { + if ((y = root->md.pv_left) == NULL) + break; + if (pindex < y->pindex) { + /* Rotate right. */ + root->md.pv_left = y->md.pv_right; + y->md.pv_right = root; + root = y; + if ((y = root->md.pv_left) == NULL) + break; + } + /* Link into the new root's right tree. */ + righttreemin->md.pv_left = root; + righttreemin = root; + } else if (pindex > root->pindex) { + if ((y = root->md.pv_right) == NULL) + break; + if (pindex > y->pindex) { + /* Rotate left. */ + root->md.pv_right = y->md.pv_left; + y->md.pv_left = root; + root = y; + if ((y = root->md.pv_right) == NULL) + break; + } + /* Link into the new root's left tree. */ + lefttreemax->md.pv_right = root; + lefttreemax = root; + } else + break; + } + /* Assemble the new root. */ + lefttreemax->md.pv_right = root->md.pv_left; + righttreemin->md.pv_left = root->md.pv_right; + root->md.pv_left = dummy.md.pv_right; + root->md.pv_right = dummy.md.pv_left; + return (root); +} + +/* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ @@ -2221,7 +2284,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock if ((tpte & PG_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); @@ -2273,7 +2336,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock } if (m_pc == NULL && free != NULL) { m_pc = free; - free = m_pc->right; + free = (void *)m_pc->object; /* Recycle a freed page table page. */ m_pc->wire_count = 1; atomic_add_int(&cnt.v_wire_count, 1); @@ -2505,9 +2568,9 @@ pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, pv_entry_t pv; rw_assert(&pvh_global_lock, RA_LOCKED); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { - TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); break; } } @@ -2546,7 +2609,7 @@ pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); m = PHYS_TO_VM_PAGE(pa); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); /* Instantiate the remaining NPTEPG - 1 pv entries. */ PV_STAT(atomic_add_long(&pv_entry_allocs, NPTEPG - 1)); va_last = va + NBPDR - PAGE_SIZE; @@ -2564,7 +2627,7 @@ pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm m++; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_pv_demote_pde: page %p is not managed", m)); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); if (va == va_last) goto out; } @@ -2612,7 +2675,7 @@ pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, v pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); /* Free the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { @@ -2653,7 +2716,7 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t if ((pv = get_pv_entry(pmap, NULL)) != NULL) { pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); return (TRUE); } else return (FALSE); @@ -2677,7 +2740,7 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); return (TRUE); } else return (FALSE); @@ -3156,7 +3219,7 @@ small_mappings: vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, *pde, &free); pmap_invalidate_page(pmap, pv->pv_va); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } @@ -3602,7 +3665,7 @@ retry: pv = get_pv_entry(pmap, &lock); pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); if ((newpte & PG_RW) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); } @@ -4295,7 +4358,7 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; @@ -4306,7 +4369,7 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; @@ -4358,7 +4421,7 @@ pmap_pvh_wired_mappings(struct md_page *pvh, int c pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); @@ -4487,7 +4550,7 @@ pmap_remove_pages(pmap_t pmap) if ((tpte & PG_PS) != 0) { pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); pvh = pa_to_pvh(tpte & PG_PS_FRAME); - TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) if ((mt->aflags & PGA_WRITEABLE) != 0 && @@ -4506,7 +4569,7 @@ pmap_remove_pages(pmap_t pmap) } } else { pmap_resident_count_dec(pmap, 1); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if ((m->aflags & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { @@ -4581,7 +4644,7 @@ pmap_is_modified_pvh(struct md_page *pvh) rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); @@ -4652,7 +4715,7 @@ pmap_is_referenced_pvh(struct md_page *pvh) rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); @@ -4693,7 +4756,7 @@ pmap_remove_write(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4703,7 +4766,7 @@ pmap_remove_write(vm_page_t m) PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4756,7 +4819,7 @@ pmap_ts_referenced(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, pvn) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4790,9 +4853,9 @@ small_mappings: if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; do { - pvn = TAILQ_NEXT(pv, pv_list); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + pvn = TAILQ_NEXT(pv, pv_next); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4844,7 +4907,7 @@ pmap_clear_modify(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4876,7 +4939,7 @@ pmap_clear_modify(vm_page_t m) PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4913,7 +4976,7 @@ pmap_clear_reference(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4936,7 +4999,7 @@ pmap_clear_reference(vm_page_t m) PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); Index: vm/uma_int.h =================================================================== --- vm/uma_int.h (.../head/sys) (revision 246728) +++ vm/uma_int.h (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -118,7 +118,8 @@ #define UMA_SLAB_MASK (PAGE_SIZE - 1) /* Mask to get back to the page */ #define UMA_SLAB_SHIFT PAGE_SHIFT /* Number of bits PAGE_MASK */ -#define UMA_BOOT_PAGES 64 /* Pages allocated for startup */ +/* Initial pages allocated for startup */ +#define UMA_INIT_BOOT_PAGES 64 /* Max waste before going to off page slab management */ #define UMA_MAX_WASTE (UMA_SLAB_SIZE / 10) Index: vm/uma_core.c =================================================================== --- vm/uma_core.c (.../head/sys) (revision 246728) +++ vm/uma_core.c (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -329,7 +329,7 @@ bucket_alloc(int entries, int bflags) /* * This is to stop us from allocating per cpu buckets while we're - * running out of vm.boot_pages. Otherwise, we would exhaust the + * running out of boot_pages. Otherwise, we would exhaust the * boot pages. This also prevents us from allocating buckets in * low memory situations. */ @@ -984,7 +984,7 @@ startup_alloc(uma_zone_t zone, int bytes, u_int8_t } mtx_unlock(&uma_boot_pages_mtx); if (booted < UMA_STARTUP2) - panic("UMA: Increase vm.boot_pages"); + panic("UMA: Increase vm.initial_boot_pages"); /* * Now that we've booted reset these users to their real allocator. */ Index: vm/vm_init.c =================================================================== --- vm/vm_init.c (.../head/sys) (revision 246728) +++ vm/vm_init.c (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -82,6 +82,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -123,6 +124,7 @@ vm_mem_init(dummy) vm_object_init(); vm_map_startup(); kmem_init(virtual_avail, virtual_end); + vm_radix_init(); pmap_init(); vm_pager_init(); } Index: vm/vm_object.c =================================================================== --- vm/vm_object.c (.../head/sys) (revision 246728) +++ vm/vm_object.c (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -93,6 +93,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -172,7 +173,7 @@ vm_object_zdtor(void *mem, int size, void *arg) ("object %p has reservations", object)); #endif - KASSERT(object->cache == NULL, + KASSERT(vm_object_cache_is_empty(object), ("object %p has cached pages", object)); KASSERT(object->paging_in_progress == 0, @@ -210,7 +211,7 @@ _vm_object_allocate(objtype_t type, vm_pindex_t si TAILQ_INIT(&object->memq); LIST_INIT(&object->shadow_head); - object->root = NULL; + object->rtree.rt_root = 0; object->type = type; switch (type) { case OBJT_DEAD: @@ -248,7 +249,7 @@ _vm_object_allocate(objtype_t type, vm_pindex_t si #if VM_NRESERVLEVEL > 0 LIST_INIT(&object->rvq); #endif - object->cache = NULL; + object->cache.rt_root = 0; mtx_lock(&vm_object_list_mtx); TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); @@ -736,13 +737,13 @@ vm_object_terminate(vm_object_t object) } vm_page_unlock(p); } + vm_radix_reclaim_allnodes(&object->rtree); /* * If the object contained any pages, then reset it to an empty state. * None of the object's fields, including "resident_page_count", were * modified by the preceding loop. */ if (object->resident_page_count != 0) { - object->root = NULL; TAILQ_INIT(&object->memq); object->resident_page_count = 0; if (object->type == OBJT_VNODE) @@ -753,7 +754,7 @@ vm_object_terminate(vm_object_t object) if (__predict_false(!LIST_EMPTY(&object->rvq))) vm_reserv_break_all(object); #endif - if (__predict_false(object->cache != NULL)) + if (!vm_object_cache_is_empty(object)) vm_page_cache_free(object, 0, 0); /* @@ -1379,7 +1380,7 @@ retry: * should still be OBJT_DEFAULT and orig_object should not * contain any cached pages within the specified range. */ - if (__predict_false(orig_object->cache != NULL)) + if (!vm_object_cache_is_empty(orig_object)) vm_page_cache_transfer(orig_object, offidxstart, new_object); } @@ -1728,7 +1729,7 @@ vm_object_collapse(vm_object_t object) /* * Free any cached pages from backing_object. */ - if (__predict_false(backing_object->cache != NULL)) + if (!vm_object_cache_is_empty(backing_object)) vm_page_cache_free(backing_object, 0, 0); } /* @@ -1922,7 +1923,7 @@ again: } vm_object_pip_wakeup(object); skipmemq: - if (__predict_false(object->cache != NULL)) + if (!vm_object_cache_is_empty(object)) vm_page_cache_free(object, start, end); } Index: vm/vm_reserv.c =================================================================== --- vm/vm_reserv.c (.../head/sys) (revision 246728) +++ vm/vm_reserv.c (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include /* @@ -341,34 +342,22 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pind /* * Look for an existing reservation. */ - msucc = NULL; - mpred = object->root; - while (mpred != NULL) { - KASSERT(mpred->pindex != pindex, + mpred = vm_radix_lookup_le(&object->rtree, pindex); + if (mpred != NULL) { + KASSERT(mpred->pindex <= pindex, ("vm_reserv_alloc_contig: pindex already allocated")); rv = vm_reserv_from_page(mpred); if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) goto found; - else if (mpred->pindex < pindex) { - if (msucc != NULL || - (msucc = TAILQ_NEXT(mpred, listq)) == NULL) - break; - KASSERT(msucc->pindex != pindex, - ("vm_reserv_alloc_contig: pindex already allocated")); - rv = vm_reserv_from_page(msucc); - if (rv->object == object && - vm_reserv_has_pindex(rv, pindex)) - goto found; - else if (pindex < msucc->pindex) - break; - } else if (msucc == NULL) { - msucc = mpred; - mpred = TAILQ_PREV(msucc, pglist, listq); - continue; - } - msucc = NULL; - mpred = object->root = vm_page_splay(pindex, object->root); } + msucc = vm_radix_lookup_ge(&object->rtree, pindex); + if (msucc != NULL) { + KASSERT(msucc->pindex >= pindex, + ("vm_reserv_alloc_page: pindex already allocated")); + rv = vm_reserv_from_page(msucc); + if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) + goto found; + } /* * Could at least one reservation fit between the first index to the @@ -507,34 +496,22 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex /* * Look for an existing reservation. */ - msucc = NULL; - mpred = object->root; - while (mpred != NULL) { - KASSERT(mpred->pindex != pindex, + mpred = vm_radix_lookup_le(&object->rtree, pindex); + if (mpred != NULL) { + KASSERT(mpred->pindex <= pindex, ("vm_reserv_alloc_page: pindex already allocated")); rv = vm_reserv_from_page(mpred); if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) goto found; - else if (mpred->pindex < pindex) { - if (msucc != NULL || - (msucc = TAILQ_NEXT(mpred, listq)) == NULL) - break; - KASSERT(msucc->pindex != pindex, - ("vm_reserv_alloc_page: pindex already allocated")); - rv = vm_reserv_from_page(msucc); - if (rv->object == object && - vm_reserv_has_pindex(rv, pindex)) - goto found; - else if (pindex < msucc->pindex) - break; - } else if (msucc == NULL) { - msucc = mpred; - mpred = TAILQ_PREV(msucc, pglist, listq); - continue; - } - msucc = NULL; - mpred = object->root = vm_page_splay(pindex, object->root); } + msucc = vm_radix_lookup_ge(&object->rtree, pindex); + if (msucc != NULL) { + KASSERT(msucc->pindex >= pindex, + ("vm_reserv_alloc_page: pindex already allocated")); + rv = vm_reserv_from_page(msucc); + if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) + goto found; + } /* * Could a reservation fit between the first index to the left that Index: vm/vm_radix.c =================================================================== --- vm/vm_radix.c (.../head/sys) (revision 0) +++ vm/vm_radix.c (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -0,0 +1,755 @@ +/* + * Copyright (c) 2013 EMC Corp. + * Copyright (c) 2011 Jeffrey Roberson + * Copyright (c) 2008 Mayur Shardul + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + * Path-compressed radix trie implementation. + * The following code is not generalized into a general purpose library + * because there are way too many parameters embedded that should really + * be decided by the library consumers. At the same time, consumers + * of this code must achieve highest possible performance. + * + * The implementation takes into account the following rationale: + * - Size of the nodes might be as small as possible. + * - There is no bias toward lookup operations over inserts or removes, + * and vice-versa. + * - In average there are not many complete levels, than level + * compression may just complicate things. + */ + +#include + +#include "opt_ddb.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef DDB +#include +#endif + +/* + * Such sizes should permit to keep node children contained into a single + * cache-line, or to at least not span many of those. + * In particular, sparse tries should however be compressed properly and + * then make some extra-levels not a big deal. + */ +#ifdef __LP64__ +#define VM_RADIX_WIDTH 4 +#else +#define VM_RADIX_WIDTH 3 +#endif + +#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH) +#define VM_RADIX_MASK (VM_RADIX_COUNT - 1) +#define VM_RADIX_LIMIT \ + (howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) - 1) + +/* Flag bits stored in node pointers. */ +#define VM_RADIX_ISLEAF 0x1 +#define VM_RADIX_FLAGS 0x1 +#define VM_RADIX_PAD VM_RADIX_FLAGS + +/* Returns one unit associated with specified level. */ +#define VM_RADIX_UNITLEVEL(lev) \ + ((vm_pindex_t)1 << ((VM_RADIX_LIMIT - (lev)) * VM_RADIX_WIDTH)) + +struct vm_radix_node { + void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */ + vm_pindex_t rn_owner; /* Owner of record. */ + uint16_t rn_count; /* Valid children. */ + uint16_t rn_clev; /* Current level. */ +}; + +static uma_zone_t vm_radix_node_zone; + +#ifdef INVARIANTS +/* + * Radix node zone destructor. + */ +static void +vm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused) +{ + struct vm_radix_node *rnode; + + rnode = mem; + KASSERT(rnode->rn_count == 0, + ("vm_radix_node_put: Freeing node %p with %d children\n", mem, + rnode->rn_count)); +} +#endif + +/* + * Allocate a radix node. Pre-allocation ensures that the request will be + * always successfully satisfied. + */ +static __inline struct vm_radix_node * +vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel) +{ + struct vm_radix_node *rnode; + + rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO); + + /* + * The required number of nodes might be already correctly + * pre-allocated in vm_radix_init(). However, UMA can reserve few + * nodes on per-cpu specific buckets, which will not be accessible + * from the curcpu. The allocation could then return NULL when the + * pre-allocation pool is close to be exhausted. + * Anyway, in practice this should never be a problem because a new + * node is not always required for insert, thus the pre-allocation + * pool should already have some extra-pages that indirectly deal with + * this situation. + */ + if (rnode == NULL) + panic("%s: uma_zalloc() returned NULL for a new node", + __func__); + rnode->rn_owner = owner; + rnode->rn_count = count; + rnode->rn_clev = clevel; + return (rnode); +} + +/* + * Free radix node. + */ +static __inline void +vm_radix_node_put(struct vm_radix_node *rnode) +{ + + uma_zfree(vm_radix_node_zone, rnode); +} + +/* + * Return the position in the array for a given level. + */ +static __inline int +vm_radix_slot(vm_pindex_t index, uint16_t level) +{ + + return ((index >> ((VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH)) & + VM_RADIX_MASK); +} + +/* Trims the key after the specified level. */ +static __inline vm_pindex_t +vm_radix_trimkey(vm_pindex_t index, uint16_t level) +{ + vm_pindex_t ret; + + ret = index; + if (level < VM_RADIX_LIMIT) { + ret >>= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH; + ret <<= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH; + } + return (ret); +} + +/* + * Get the root node for a radix tree. + */ +static __inline struct vm_radix_node * +vm_radix_getroot(struct vm_radix *rtree) +{ + + return ((struct vm_radix_node *)(rtree->rt_root & ~VM_RADIX_FLAGS)); +} + +/* + * Set the root node for a radix tree. + */ +static __inline void +vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode) +{ + + rtree->rt_root = (uintptr_t)rnode; +} + +/* + * Returns the associated page extracted from rnode if available, + * NULL otherwise. + */ +static __inline vm_page_t +vm_radix_node_page(struct vm_radix_node *rnode) +{ + + return ((((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0) ? + (vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS) : NULL); +} + +/* + * Adds the page as a child of provided node. + */ +static __inline void +vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev, + vm_page_t page) +{ + int slot; + + slot = vm_radix_slot(index, clev); + rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF); +} + +/* + * Returns the slot where two keys differ. + * It cannot accept 2 equal keys. + */ +static __inline uint16_t +vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2) +{ + uint16_t clev; + + KASSERT(index1 != index2, ("%s: passing the same key value %jx", + __func__, (uintmax_t)index1)); + + index1 ^= index2; + for (clev = 0; clev <= VM_RADIX_LIMIT ; clev++) + if (vm_radix_slot(index1, clev)) + return (clev); + panic("%s: it might have not reached this point", __func__); + return (0); +} + +/* + * Returns TRUE if it can be determined that key does not belong to the + * specified rnode. FALSE otherwise. + */ +static __inline boolean_t +vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx) +{ + + if (rnode->rn_clev > 0) { + idx = vm_radix_trimkey(idx, rnode->rn_clev - 1); + idx -= rnode->rn_owner; + if (idx != 0) + return (TRUE); + } + return (FALSE); +} + +/* + * Adjusts the idx key to the first upper level available, based on a valid + * initial level and map of available levels. + * Returns a value bigger than 0 to signal that there are not valid levels + * available. + */ +static __inline int +vm_radix_addlev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev) +{ + vm_pindex_t wrapidx; + + for (; levels[ilev] == FALSE || + vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1); ilev--) + if (ilev == 0) + break; + KASSERT(ilev > 0 || levels[0] == TRUE, + ("%s: levels back-scanning problem", __func__)); + if (ilev == 0 && vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1)) + return (1); + wrapidx = *idx; + *idx = vm_radix_trimkey(*idx, ilev); + *idx += VM_RADIX_UNITLEVEL(ilev); + if (*idx < wrapidx) + return (1); + return (0); +} + +/* + * Adjusts the idx key to the first lower level available, based on a valid + * initial level and map of available levels. + * Returns a value bigger than 0 to signal that there are not valid levels + * available. + */ +static __inline int +vm_radix_declev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev) +{ + vm_pindex_t wrapidx; + + for (; levels[ilev] == FALSE || + vm_radix_slot(*idx, ilev) == 0; ilev--) + if (ilev == 0) + break; + KASSERT(ilev > 0 || levels[0] == TRUE, + ("%s: levels back-scanning problem", __func__)); + if (ilev == 0 && vm_radix_slot(*idx, ilev) == 0) + return (1); + wrapidx = *idx; + *idx = vm_radix_trimkey(*idx, ilev); + *idx |= VM_RADIX_UNITLEVEL(ilev) - 1; + *idx -= VM_RADIX_UNITLEVEL(ilev); + if (*idx < wrapidx) + return (1); + return (0); +} + +/* + * Internal handwork for vm_radix_reclaim_allonodes() primitive. + * This function is recrusive. + */ +static void +vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode) +{ + int slot; + + for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) { + if (rnode->rn_child[slot] == NULL) + continue; + if (vm_radix_node_page(rnode->rn_child[slot]) == NULL) + vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]); + rnode->rn_count--; + } + vm_radix_node_put(rnode); +} + +/* + * Returns the amount of requested memory to satisfy nodes pre-allocation. + */ +size_t +vm_radix_allocphys_size(size_t nitems) +{ + + return (nitems * sizeof(struct vm_radix_node)); +} + +/* + * Pre-allocate intermediate nodes from the UMA slab zone. + */ +void +vm_radix_init(void) +{ + + vm_radix_node_zone = uma_zcreate("RADIX NODE", + sizeof(struct vm_radix_node), NULL, +#ifdef INVARIANTS + vm_radix_node_zone_dtor, +#else + NULL, +#endif + NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_NOFREE); + uma_prealloc(vm_radix_node_zone, vm_page_array_size); +} + +/* + * Inserts the key-value pair in to the trie. + * Panics if the key already exists. + */ +void +vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, vm_page_t page) +{ + vm_pindex_t newind; + struct vm_radix_node *rnode, *tmp, *tmp2; + vm_page_t m; + int slot; + uint16_t clev; + + /* + * The owner of record for root is not really important because it + * will never be used. + */ + rnode = vm_radix_getroot(rtree); + if (rnode == NULL) { + rnode = vm_radix_node_get(0, 1, 0); + vm_radix_setroot(rtree, rnode); + vm_radix_addpage(rnode, index, 0, page); + return; + } + while (rnode) { + if (vm_radix_keybarr(rnode, index) == TRUE) + break; + slot = vm_radix_slot(index, rnode->rn_clev); + m = vm_radix_node_page(rnode->rn_child[slot]); + if (m != NULL) { + if (m->pindex == index) + panic("%s: key %jx is already present", + __func__, (uintmax_t)index); + clev = vm_radix_keydiff(m->pindex, index); + tmp = vm_radix_node_get(vm_radix_trimkey(index, + clev - 1), 2, clev); + rnode->rn_child[slot] = tmp; + vm_radix_addpage(tmp, index, clev, page); + vm_radix_addpage(tmp, m->pindex, clev, m); + return; + } + if (rnode->rn_child[slot] == NULL) { + rnode->rn_count++; + vm_radix_addpage(rnode, index, rnode->rn_clev, page); + return; + } + rnode = rnode->rn_child[slot]; + } + if (rnode == NULL) + panic("%s: path traversal ended unexpectedly", __func__); + + /* + * Scan the trie from the top and find the parent to insert + * the new object. + */ + newind = rnode->rn_owner; + clev = vm_radix_keydiff(newind, index); + slot = VM_RADIX_COUNT; + for (rnode = vm_radix_getroot(rtree); ; rnode = tmp) { + KASSERT(rnode != NULL, ("%s: edge cannot be NULL in the scan", + __func__)); + KASSERT(clev >= rnode->rn_clev, + ("%s: unexpected trie depth: clev: %d, rnode->rn_clev: %d", + __func__, clev, rnode->rn_clev)); + slot = vm_radix_slot(index, rnode->rn_clev); + tmp = rnode->rn_child[slot]; + KASSERT(tmp != NULL && vm_radix_node_page(tmp) == NULL, + ("%s: unexpected lookup interruption", __func__)); + if (tmp->rn_clev > clev) + break; + } + KASSERT(rnode != NULL && tmp != NULL && slot < VM_RADIX_COUNT, + ("%s: invalid scan parameters rnode: %p, tmp: %p, slot: %d", + __func__, (void *)rnode, (void *)tmp, slot)); + + /* + * A new node is needed because the right insertion level is reached. + * Setup the new intermediate node and add the 2 children: the + * new object and the older edge. + */ + tmp2 = vm_radix_node_get(vm_radix_trimkey(page->pindex, clev - 1), 2, + clev); + rnode->rn_child[slot] = tmp2; + vm_radix_addpage(tmp2, index, clev, page); + slot = vm_radix_slot(newind, clev); + tmp2->rn_child[slot] = tmp; +} + +/* + * Returns the value stored at the index. If the index is not present + * NULL is returned. + */ +vm_page_t +vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) +{ + struct vm_radix_node *rnode; + vm_page_t m; + int slot; + + rnode = vm_radix_getroot(rtree); + while (rnode) { + if (vm_radix_keybarr(rnode, index) == TRUE) + return (NULL); + slot = vm_radix_slot(index, rnode->rn_clev); + rnode = rnode->rn_child[slot]; + m = vm_radix_node_page(rnode); + if (m != NULL) { + if (m->pindex == index) + return (m); + else + return (NULL); + } + } + return (NULL); +} + +/* + * Look up any entry at a position bigger than or equal to index. + */ +vm_page_t +vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index) +{ + vm_pindex_t inc; + vm_page_t m; + struct vm_radix_node *rnode; + int slot; + uint16_t difflev; + boolean_t maplevels[VM_RADIX_LIMIT + 1]; +#ifdef INVARIANTS + int loops = 0; +#endif + +restart: + KASSERT(++loops < 1000, ("%s: too many loops", __func__)); + for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++) + maplevels[difflev] = FALSE; + rnode = vm_radix_getroot(rtree); + while (rnode) { + maplevels[rnode->rn_clev] = TRUE; + + /* + * If the keys differ before the current bisection node + * the search key might rollback to the earlierst + * available bisection node, or to the smaller value + * in the current domain (if the owner is bigger than the + * search key). + * The search for a valid bisection node is helped through + * the use of maplevels array which should bring immediately + * a lower useful level, skipping holes. + */ + if (vm_radix_keybarr(rnode, index) == TRUE) { + difflev = vm_radix_keydiff(index, rnode->rn_owner); + if (index > rnode->rn_owner) { + if (vm_radix_addlev(&index, maplevels, + difflev) > 0) + break; + } else + index = vm_radix_trimkey(rnode->rn_owner, + difflev); + goto restart; + } + slot = vm_radix_slot(index, rnode->rn_clev); + m = vm_radix_node_page(rnode->rn_child[slot]); + if (m != NULL && m->pindex >= index) + return (m); + if (rnode->rn_child[slot] != NULL && m == NULL) { + rnode = rnode->rn_child[slot]; + continue; + } + + /* + * Look for an available edge or page within the current + * bisection node. + */ + if (slot < (VM_RADIX_COUNT - 1)) { + inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); + index = vm_radix_trimkey(index, rnode->rn_clev); + index += inc; + slot++; + for (;; index += inc, slot++) { + m = vm_radix_node_page(rnode->rn_child[slot]); + if (m != NULL && m->pindex >= index) + return (m); + if ((rnode->rn_child[slot] != NULL && + m == NULL) || slot == (VM_RADIX_COUNT - 1)) + break; + } + } + + /* + * If a valid page or edge, bigger than the search slot, is + * found in the traversal, skip to the next higher-level key. + */ + if (slot == (VM_RADIX_COUNT - 1) && + (rnode->rn_child[slot] == NULL || m != NULL)) { + if (rnode->rn_clev == 0 || vm_radix_addlev(&index, + maplevels, rnode->rn_clev - 1) > 0) + break; + goto restart; + } + rnode = rnode->rn_child[slot]; + } + return (NULL); +} + +/* + * Look up any entry at a position less than or equal to index. + */ +vm_page_t +vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index) +{ + vm_pindex_t inc; + vm_page_t m; + struct vm_radix_node *rnode; + int slot; + uint16_t difflev; + boolean_t maplevels[VM_RADIX_LIMIT + 1]; +#ifdef INVARIANTS + int loops = 0; +#endif + +restart: + KASSERT(++loops < 1000, ("%s: too many loops", __func__)); + for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++) + maplevels[difflev] = FALSE; + rnode = vm_radix_getroot(rtree); + while (rnode) { + maplevels[rnode->rn_clev] = TRUE; + + /* + * If the keys differ before the current bisection node + * the search key might rollback to the earlierst + * available bisection node, or to the higher value + * in the current domain (if the owner is smaller than the + * search key). + * The search for a valid bisection node is helped through + * the use of maplevels array which should bring immediately + * a lower useful level, skipping holes. + */ + if (vm_radix_keybarr(rnode, index) == TRUE) { + difflev = vm_radix_keydiff(index, rnode->rn_owner); + if (index > rnode->rn_owner) { + index = vm_radix_trimkey(rnode->rn_owner, + difflev); + index |= VM_RADIX_UNITLEVEL(difflev) - 1; + } else if (vm_radix_declev(&index, maplevels, + difflev) > 0) + break; + goto restart; + } + slot = vm_radix_slot(index, rnode->rn_clev); + m = vm_radix_node_page(rnode->rn_child[slot]); + if (m != NULL && m->pindex <= index) + return (m); + if (rnode->rn_child[slot] != NULL && m == NULL) { + rnode = rnode->rn_child[slot]; + continue; + } + + /* + * Look for an available edge or page within the current + * bisection node. + */ + if (slot > 0) { + inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); + index = vm_radix_trimkey(index, rnode->rn_clev); + index |= inc - 1; + index -= inc; + slot--; + for (;; index -= inc, slot--) { + m = vm_radix_node_page(rnode->rn_child[slot]); + if (m != NULL && m->pindex <= index) + return (m); + if ((rnode->rn_child[slot] != NULL && + m == NULL) || slot == 0) + break; + } + } + + /* + * If a valid page or edge, smaller than the search slot, is + * found in the traversal, skip to the next higher-level key. + */ + if (slot == 0 && (rnode->rn_child[slot] == NULL || m != NULL)) { + if (rnode->rn_clev == 0 || vm_radix_declev(&index, + maplevels, rnode->rn_clev - 1) > 0) + break; + goto restart; + } + rnode = rnode->rn_child[slot]; + } + return (NULL); +} + +/* + * Remove the specified index from the tree. + * Panics if the key is not present. + */ +void +vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) +{ + struct vm_radix_node *rnode, *parent; + vm_page_t m; + int i, slot; + + parent = NULL; + rnode = vm_radix_getroot(rtree); + for (;;) { + if (rnode == NULL) + panic("vm_radix_remove: impossible to locate the key"); + slot = vm_radix_slot(index, rnode->rn_clev); + m = vm_radix_node_page(rnode->rn_child[slot]); + if (m != NULL && m->pindex == index) { + rnode->rn_child[slot] = NULL; + rnode->rn_count--; + if (rnode->rn_count > 1) + break; + if (parent == NULL) { + if (rnode->rn_count == 0) { + vm_radix_node_put(rnode); + vm_radix_setroot(rtree, NULL); + } + break; + } + for (i = 0; i < VM_RADIX_COUNT; i++) + if (rnode->rn_child[i] != NULL) + break; + KASSERT(i != VM_RADIX_COUNT, + ("%s: invalid node configuration", __func__)); + slot = vm_radix_slot(index, parent->rn_clev); + KASSERT(parent->rn_child[slot] == rnode, + ("%s: invalid child value", __func__)); + parent->rn_child[slot] = rnode->rn_child[i]; + rnode->rn_count--; + rnode->rn_child[i] = NULL; + vm_radix_node_put(rnode); + break; + } + if (m != NULL && m->pindex != index) + panic("%s: invalid key found", __func__); + parent = rnode; + rnode = rnode->rn_child[slot]; + } +} + +/* + * Remove and free all the nodes from the radix tree. + * This function is recrusive but there is a tight control on it as the + * maximum depth of the tree is fixed. + */ +void +vm_radix_reclaim_allnodes(struct vm_radix *rtree) +{ + struct vm_radix_node *root; + + root = vm_radix_getroot(rtree); + if (root == NULL) + return; + vm_radix_reclaim_allnodes_int(root); + vm_radix_setroot(rtree, NULL); +} + +#ifdef DDB +/* + * Show details about the given vnode. + */ +DB_SHOW_COMMAND(radixnode, db_show_radixnode) +{ + struct vm_radix_node *rnode; + int i; + + if (!have_addr) + return; + rnode = (struct vm_radix_node *)addr; + db_printf("radixnode %p, owner %jx, children count %u, level %u:\n", + (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count, + rnode->rn_clev); + for (i = 0; i < VM_RADIX_COUNT; i++) + if (rnode->rn_child[i] != NULL) + db_printf("slot: %d, val: %p, page: %p, clev: %d\n", + i, (void *)rnode->rn_child[i], + (void *)vm_radix_node_page(rnode->rn_child[i]), + rnode->rn_clev); +} +#endif /* DDB */ Index: vm/vm_object.h =================================================================== --- vm/vm_object.h (.../head/sys) (revision 246728) +++ vm/vm_object.h (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -71,6 +71,8 @@ #include #include +#include + /* * Types defined: * @@ -78,10 +80,10 @@ * * The root of cached pages pool is protected by both the per-object mutex * and the free pages queue mutex. - * On insert in the cache splay tree, the per-object mutex is expected + * On insert in the cache radix trie, the per-object mutex is expected * to be already held and the free pages queue mutex will be * acquired during the operation too. - * On remove and lookup from the cache splay tree, only the free + * On remove and lookup from the cache radix trie, only the free * pages queue mutex is expected to be locked. * These rules allow for reliably checking for the presence of cached * pages with only the per-object lock held, thereby reducing contention @@ -100,7 +102,7 @@ struct vm_object { LIST_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */ LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */ TAILQ_HEAD(, vm_page) memq; /* list of resident pages */ - vm_page_t root; /* root of the resident page splay tree */ + struct vm_radix rtree; /* root of the resident page radix trie*/ vm_pindex_t size; /* Object size */ int generation; /* generation ID */ int ref_count; /* How many refs?? */ @@ -115,7 +117,7 @@ struct vm_object { vm_ooffset_t backing_object_offset;/* Offset in backing object */ TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */ LIST_HEAD(, vm_reserv) rvq; /* list of reservations */ - vm_page_t cache; /* (o + f) root of the cache page splay tree */ + struct vm_radix cache; /* (o + f) root of the cache page radix trie */ void *handle; union { /* @@ -231,6 +233,13 @@ void vm_object_pip_wakeup(vm_object_t object); void vm_object_pip_wakeupn(vm_object_t object, short i); void vm_object_pip_wait(vm_object_t object, char *waitid); +static __inline boolean_t +vm_object_cache_is_empty(vm_object_t object) +{ + + return (__predict_true(object->cache.rt_root == 0)); +} + vm_object_t vm_object_allocate (objtype_t, vm_pindex_t); void _vm_object_allocate (objtype_t, vm_pindex_t, vm_object_t); boolean_t vm_object_coalesce(vm_object_t, vm_ooffset_t, vm_size_t, vm_size_t, Index: vm/_vm_radix.h =================================================================== --- vm/_vm_radix.h (.../head/sys) (revision 0) +++ vm/_vm_radix.h (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 Jeffrey Roberson + * Copyright (c) 2008 Mayur Shardul + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef __VM_RADIX_H_ +#define __VM_RADIX_H_ + +/* + * Radix tree root. + */ +struct vm_radix { + uintptr_t rt_root; +}; + +#endif /* !__VM_RADIX_H_ */ Index: vm/vm_radix.h =================================================================== --- vm/vm_radix.h (.../head/sys) (revision 0) +++ vm/vm_radix.h (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2013 EMC Corp. + * Copyright (c) 2011 Jeffrey Roberson + * Copyright (c) 2008 Mayur Shardul + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _VM_RADIX_H_ +#define _VM_RADIX_H_ + +#include + +#ifdef _KERNEL + +size_t vm_radix_allocphys_size(size_t nitems); +void vm_radix_init(void); +void vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, + vm_page_t page); +vm_page_t vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index); +vm_page_t vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index); +vm_page_t vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index); +void vm_radix_reclaim_allnodes(struct vm_radix *rtree); +void vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index); + +#endif /* _KERNEL */ +#endif /* !_VM_RADIX_H_ */ Index: vm/vm_page.c =================================================================== --- vm/vm_page.c (.../head/sys) (revision 246728) +++ vm/vm_page.c (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -109,6 +109,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -144,10 +145,10 @@ long vm_page_array_size; long first_page; int vm_page_zero_count; -static int boot_pages = UMA_BOOT_PAGES; -TUNABLE_INT("vm.boot_pages", &boot_pages); -SYSCTL_INT(_vm, OID_AUTO, boot_pages, CTLFLAG_RD, &boot_pages, 0, - "number of pages allocated for bootstrapping the VM system"); +static int boot_pages = UMA_INIT_BOOT_PAGES; +TUNABLE_INT("vm.initial_boot_pages", &boot_pages); +SYSCTL_INT(_vm, OID_AUTO, initial_boot_pages, CTLFLAG_RD, &boot_pages, 0, + "Initial number of pages allocated for bootstrapping the VM system"); static int pa_tryrelock_restart; SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD, @@ -306,7 +307,7 @@ vm_page_startup(vm_offset_t vaddr) low_water = 0; #endif - end = phys_avail[biggestone+1]; + new_end = phys_avail[biggestone+1]; /* * Initialize the page and queue locks. @@ -317,17 +318,6 @@ vm_page_startup(vm_offset_t vaddr) for (i = 0; i < PQ_COUNT; i++) vm_pagequeue_init_lock(&vm_pagequeues[i]); - /* - * Allocate memory for use when boot strapping the kernel memory - * allocator. - */ - new_end = end - (boot_pages * UMA_SLAB_SIZE); - new_end = trunc_page(new_end); - mapped = pmap_map(&vaddr, new_end, end, - VM_PROT_READ | VM_PROT_WRITE); - bzero((void *)mapped, end - new_end); - uma_startup((void *)mapped, boot_pages); - #if defined(__amd64__) || defined(__i386__) || defined(__arm__) || \ defined(__mips__) /* @@ -383,6 +373,20 @@ vm_page_startup(vm_offset_t vaddr) end = new_end; /* + * Allocate memory for use when boot strapping the kernel memory + * allocator. + */ + boot_pages += howmany(vm_radix_allocphys_size(page_range), + UMA_SLAB_SIZE - UMA_MAX_WASTE); + new_end = end - (boot_pages * UMA_SLAB_SIZE); + new_end = trunc_page(new_end); + mapped = pmap_map(&vaddr, new_end, end, + VM_PROT_READ | VM_PROT_WRITE); + bzero((void *)mapped, end - new_end); + uma_startup((void *)mapped, boot_pages); + end = new_end; + + /* * Reserve an unmapped guard page to trap access to vm_page_array[-1]. */ vaddr += PAGE_SIZE; @@ -793,63 +797,6 @@ vm_page_dirty_KBI(vm_page_t m) } /* - * vm_page_splay: - * - * Implements Sleator and Tarjan's top-down splay algorithm. Returns - * the vm_page containing the given pindex. If, however, that - * pindex is not found in the vm_object, returns a vm_page that is - * adjacent to the pindex, coming before or after it. - */ -vm_page_t -vm_page_splay(vm_pindex_t pindex, vm_page_t root) -{ - struct vm_page dummy; - vm_page_t lefttreemax, righttreemin, y; - - if (root == NULL) - return (root); - lefttreemax = righttreemin = &dummy; - for (;; root = y) { - if (pindex < root->pindex) { - if ((y = root->left) == NULL) - break; - if (pindex < y->pindex) { - /* Rotate right. */ - root->left = y->right; - y->right = root; - root = y; - if ((y = root->left) == NULL) - break; - } - /* Link into the new root's right tree. */ - righttreemin->left = root; - righttreemin = root; - } else if (pindex > root->pindex) { - if ((y = root->right) == NULL) - break; - if (pindex > y->pindex) { - /* Rotate left. */ - root->right = y->left; - y->left = root; - root = y; - if ((y = root->right) == NULL) - break; - } - /* Link into the new root's left tree. */ - lefttreemax->right = root; - lefttreemax = root; - } else - break; - } - /* Assemble the new root. */ - lefttreemax->right = root->left; - righttreemin->left = root->right; - root->left = dummy.right; - root->right = dummy.left; - return (root); -} - -/* * vm_page_insert: [ internal use only ] * * Inserts the given mem entry into the object and object list. @@ -864,7 +811,7 @@ vm_page_dirty_KBI(vm_page_t m) void vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) { - vm_page_t root; + vm_page_t neighbor; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if (m->object != NULL) @@ -876,31 +823,19 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm m->object = object; m->pindex = pindex; - /* - * Now link into the object's ordered list of backed pages. - */ - root = object->root; - if (root == NULL) { - m->left = NULL; - m->right = NULL; + if (object->resident_page_count == 0) { TAILQ_INSERT_TAIL(&object->memq, m, listq); - } else { - root = vm_page_splay(pindex, root); - if (pindex < root->pindex) { - m->left = root->left; - m->right = root; - root->left = NULL; - TAILQ_INSERT_BEFORE(root, m, listq); - } else if (pindex == root->pindex) - panic("vm_page_insert: offset already allocated"); - else { - m->right = root->right; - m->left = root; - root->right = NULL; - TAILQ_INSERT_AFTER(&object->memq, root, m, listq); - } + } else { + neighbor = vm_radix_lookup_ge(&object->rtree, pindex); + if (neighbor != NULL) { + KASSERT(pindex < neighbor->pindex, + ("vm_page_insert: offset %ju not minor than %ju", + (uintmax_t)pindex, (uintmax_t)neighbor->pindex)); + TAILQ_INSERT_BEFORE(neighbor, m, listq); + } else + TAILQ_INSERT_TAIL(&object->memq, m, listq); } - object->root = m; + vm_radix_insert(&object->rtree, pindex, m); /* * Show that the object has one more resident page. @@ -936,7 +871,6 @@ void vm_page_remove(vm_page_t m) { vm_object_t object; - vm_page_t next, prev, root; if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_lock_assert(m, MA_OWNED); @@ -948,45 +882,7 @@ vm_page_remove(vm_page_t m) vm_page_flash(m); } - /* - * Now remove from the object's list of backed pages. - */ - if ((next = TAILQ_NEXT(m, listq)) != NULL && next->left == m) { - /* - * Since the page's successor in the list is also its parent - * in the tree, its right subtree must be empty. - */ - next->left = m->left; - KASSERT(m->right == NULL, - ("vm_page_remove: page %p has right child", m)); - } else if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL && - prev->right == m) { - /* - * Since the page's predecessor in the list is also its parent - * in the tree, its left subtree must be empty. - */ - KASSERT(m->left == NULL, - ("vm_page_remove: page %p has left child", m)); - prev->right = m->right; - } else { - if (m != object->root) - vm_page_splay(m->pindex, object->root); - if (m->left == NULL) - root = m->right; - else if (m->right == NULL) - root = m->left; - else { - /* - * Move the page's successor to the root, because - * pages are usually removed in ascending order. - */ - if (m->right != next) - vm_page_splay(m->pindex, m->right); - next->left = m->left; - root = next; - } - object->root = root; - } + vm_radix_remove(&object->rtree, m->pindex); TAILQ_REMOVE(&object->memq, m, listq); /* @@ -1014,15 +910,10 @@ vm_page_remove(vm_page_t m) vm_page_t vm_page_lookup(vm_object_t object, vm_pindex_t pindex) { - vm_page_t m; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); - if ((m = object->root) != NULL && m->pindex != pindex) { - m = vm_page_splay(pindex, m); - if ((object->root = m)->pindex != pindex) - m = NULL; - } - return (m); + + return (vm_radix_lookup(&object->rtree, pindex)); } /* @@ -1036,17 +927,11 @@ vm_page_lookup(vm_object_t object, vm_pindex_t pin vm_page_t vm_page_find_least(vm_object_t object, vm_pindex_t pindex) { - vm_page_t m; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); - if ((m = TAILQ_FIRST(&object->memq)) != NULL) { - if (m->pindex < pindex) { - m = vm_page_splay(pindex, object->root); - if ((object->root = m)->pindex < pindex) - m = TAILQ_NEXT(m, listq); - } - } - return (m); + if (object->resident_page_count != 0) + return (vm_radix_lookup_ge(&object->rtree, pindex)); + return (NULL); } /* @@ -1121,49 +1006,26 @@ vm_page_rename(vm_page_t m, vm_object_t new_object * infinity. If the given object is backed by a vnode and it * transitions from having one or more cached pages to none, the * vnode's hold count is reduced. + * + * The object must be locked. */ void vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { - vm_page_t m, m_next; + vm_page_t m; boolean_t empty; + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + mtx_lock(&vm_page_queue_free_mtx); - if (__predict_false(object->cache == NULL)) { + if (vm_object_cache_is_empty(object)) { mtx_unlock(&vm_page_queue_free_mtx); return; } - m = object->cache = vm_page_splay(start, object->cache); - if (m->pindex < start) { - if (m->right == NULL) - m = NULL; - else { - m_next = vm_page_splay(start, m->right); - m_next->left = m; - m->right = NULL; - m = object->cache = m_next; - } - } - - /* - * At this point, "m" is either (1) a reference to the page - * with the least pindex that is greater than or equal to - * "start" or (2) NULL. - */ - for (; m != NULL && (m->pindex < end || end == 0); m = m_next) { - /* - * Find "m"'s successor and remove "m" from the - * object's cache. - */ - if (m->right == NULL) { - object->cache = m->left; - m_next = NULL; - } else { - m_next = vm_page_splay(start, m->right); - m_next->left = m->left; - object->cache = m_next; - } - /* Convert "m" to a free page. */ + while ((m = vm_radix_lookup_ge(&object->cache, start)) != NULL) { + if (end != 0 && m->pindex >= end) + break; + vm_radix_remove(&object->cache, m->pindex); m->object = NULL; m->valid = 0; /* Clear PG_CACHED and set PG_FREE. */ @@ -1173,7 +1035,7 @@ vm_page_cache_free(vm_object_t object, vm_pindex_t cnt.v_cache_count--; cnt.v_free_count++; } - empty = object->cache == NULL; + empty = vm_object_cache_is_empty(object); mtx_unlock(&vm_page_queue_free_mtx); if (object->type == OBJT_VNODE && empty) vdrop(object->handle); @@ -1183,20 +1045,17 @@ vm_page_cache_free(vm_object_t object, vm_pindex_t * Returns the cached page that is associated with the given * object and offset. If, however, none exists, returns NULL. * - * The free page queue must be locked. + * The free page queue and object must be locked. */ static inline vm_page_t vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex) { - vm_page_t m; + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - if ((m = object->cache) != NULL && m->pindex != pindex) { - m = vm_page_splay(pindex, m); - if ((object->cache = m)->pindex != pindex) - m = NULL; - } - return (m); + if (!vm_object_cache_is_empty(object)) + return (vm_radix_lookup(&object->cache, pindex)); + return (NULL); } /* @@ -1208,28 +1067,11 @@ vm_page_cache_lookup(vm_object_t object, vm_pindex static void vm_page_cache_remove(vm_page_t m) { - vm_object_t object; - vm_page_t root; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); KASSERT((m->flags & PG_CACHED) != 0, ("vm_page_cache_remove: page %p is not cached", m)); - object = m->object; - if (m != object->cache) { - root = vm_page_splay(m->pindex, object->cache); - KASSERT(root == m, - ("vm_page_cache_remove: page %p is not cached in object %p", - m, object)); - } - if (m->left == NULL) - root = m->right; - else if (m->right == NULL) - root = m->left; - else { - root = vm_page_splay(m->pindex, m->left); - root->right = m->right; - } - object->cache = root; + vm_radix_remove(&m->object->cache, m->pindex); m->object = NULL; cnt.v_cache_count--; } @@ -1243,13 +1085,13 @@ vm_page_cache_remove(vm_page_t m) * empty. Offset 'offidxstart' in the original object must * correspond to offset zero in the new object. * - * The new object must be locked. + * The new object and original object must be locked. */ void vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart, vm_object_t new_object) { - vm_page_t m, m_next; + vm_page_t m; /* * Insertion into an object's collection of cached pages @@ -1257,53 +1099,19 @@ vm_page_cache_transfer(vm_object_t orig_object, vm * not. */ VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED); - KASSERT(new_object->cache == NULL, + VM_OBJECT_LOCK_ASSERT(orig_object, MA_OWNED); + KASSERT(vm_object_cache_is_empty(new_object), ("vm_page_cache_transfer: object %p has cached pages", new_object)); mtx_lock(&vm_page_queue_free_mtx); - if ((m = orig_object->cache) != NULL) { - /* - * Transfer all of the pages with offset greater than or - * equal to 'offidxstart' from the original object's - * cache to the new object's cache. - */ - m = vm_page_splay(offidxstart, m); - if (m->pindex < offidxstart) { - orig_object->cache = m; - new_object->cache = m->right; - m->right = NULL; - } else { - orig_object->cache = m->left; - new_object->cache = m; - m->left = NULL; - } - while ((m = new_object->cache) != NULL) { - if ((m->pindex - offidxstart) >= new_object->size) { - /* - * Return all of the cached pages with - * offset greater than or equal to the - * new object's size to the original - * object's cache. - */ - new_object->cache = m->left; - m->left = orig_object->cache; - orig_object->cache = m; - break; - } - m_next = vm_page_splay(m->pindex, m->right); - /* Update the page's object and offset. */ - m->object = new_object; - m->pindex -= offidxstart; - if (m_next == NULL) - break; - m->right = NULL; - m_next->left = m; - new_object->cache = m_next; - } - KASSERT(new_object->cache == NULL || - new_object->type == OBJT_SWAP, - ("vm_page_cache_transfer: object %p's type is incompatible" - " with cached pages", new_object)); + while ((m = vm_radix_lookup_ge(&orig_object->cache, + offidxstart)) != NULL) { + if ((m->pindex - offidxstart) >= new_object->size) + break; + vm_radix_remove(&orig_object->cache, m->pindex); + vm_radix_insert(&new_object->cache, m->pindex - offidxstart, m); + m->object = new_object; + m->pindex -= offidxstart; } mtx_unlock(&vm_page_queue_free_mtx); } @@ -1327,7 +1135,7 @@ vm_page_is_cached(vm_object_t object, vm_pindex_t * exist. */ VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); - if (__predict_true(object->cache == NULL)) + if (vm_object_cache_is_empty(object)) return (FALSE); mtx_lock(&vm_page_queue_free_mtx); m = vm_page_cache_lookup(object, pindex); @@ -1465,7 +1273,8 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind m->valid = 0; m_object = m->object; vm_page_cache_remove(m); - if (m_object->type == OBJT_VNODE && m_object->cache == NULL) + if (m_object->type == OBJT_VNODE && + vm_object_cache_is_empty(m_object)) vp = m_object->handle; } else { KASSERT(VM_PAGE_IS_FREE(m), @@ -1722,7 +1531,8 @@ vm_page_alloc_init(vm_page_t m) m->valid = 0; m_object = m->object; vm_page_cache_remove(m); - if (m_object->type == OBJT_VNODE && m_object->cache == NULL) + if (m_object->type == OBJT_VNODE && + vm_object_cache_is_empty(m_object)) drop = m_object->handle; } else { KASSERT(VM_PAGE_IS_FREE(m), @@ -2321,7 +2131,7 @@ void vm_page_cache(vm_page_t m) { vm_object_t object; - vm_page_t next, prev, root; + int old_empty_cache; vm_page_lock_assert(m, MA_OWNED); object = m->object; @@ -2352,46 +2162,7 @@ vm_page_cache(vm_page_t m) */ vm_page_remque(m); - /* - * Remove the page from the object's collection of resident - * pages. - */ - if ((next = TAILQ_NEXT(m, listq)) != NULL && next->left == m) { - /* - * Since the page's successor in the list is also its parent - * in the tree, its right subtree must be empty. - */ - next->left = m->left; - KASSERT(m->right == NULL, - ("vm_page_cache: page %p has right child", m)); - } else if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL && - prev->right == m) { - /* - * Since the page's predecessor in the list is also its parent - * in the tree, its left subtree must be empty. - */ - KASSERT(m->left == NULL, - ("vm_page_cache: page %p has left child", m)); - prev->right = m->right; - } else { - if (m != object->root) - vm_page_splay(m->pindex, object->root); - if (m->left == NULL) - root = m->right; - else if (m->right == NULL) - root = m->left; - else { - /* - * Move the page's successor to the root, because - * pages are usually removed in ascending order. - */ - if (m->right != next) - vm_page_splay(m->pindex, m->right); - next->left = m->left; - root = next; - } - object->root = root; - } + vm_radix_remove(&object->rtree, m->pindex); TAILQ_REMOVE(&object->memq, m, listq); object->resident_page_count--; @@ -2408,26 +2179,9 @@ vm_page_cache(vm_page_t m) m->flags &= ~PG_ZERO; mtx_lock(&vm_page_queue_free_mtx); m->flags |= PG_CACHED; + old_empty_cache = vm_object_cache_is_empty(object); cnt.v_cache_count++; - root = object->cache; - if (root == NULL) { - m->left = NULL; - m->right = NULL; - } else { - root = vm_page_splay(m->pindex, root); - if (m->pindex < root->pindex) { - m->left = root->left; - m->right = root; - root->left = NULL; - } else if (__predict_false(m->pindex == root->pindex)) - panic("vm_page_cache: offset already cached"); - else { - m->right = root->right; - m->left = root; - root->right = NULL; - } - } - object->cache = m; + vm_radix_insert(&object->cache, m->pindex, m); #if VM_NRESERVLEVEL > 0 if (!vm_reserv_free_page(m)) { #else @@ -2445,9 +2199,10 @@ vm_page_cache(vm_page_t m) * the object's only resident page. */ if (object->type == OBJT_VNODE) { - if (root == NULL && object->resident_page_count != 0) + if (old_empty_cache != 0 && object->resident_page_count != 0) vhold(object->handle); - else if (root != NULL && object->resident_page_count == 0) + else if (old_empty_cache == 0 && + object->resident_page_count == 0) vdrop(object->handle); } } Index: vm/vm_page.h =================================================================== --- vm/vm_page.h (.../head/sys) (revision 246728) +++ vm/vm_page.h (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -128,8 +128,6 @@ typedef uint64_t vm_page_bits_t; struct vm_page { TAILQ_ENTRY(vm_page) pageq; /* page queue or free list (Q) */ TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ - struct vm_page *left; /* splay tree link (O) */ - struct vm_page *right; /* splay tree link (O) */ vm_object_t object; /* which object am I in (O,P)*/ vm_pindex_t pindex; /* offset into object (O,P) */ @@ -404,7 +402,6 @@ void vm_page_requeue(vm_page_t m); void vm_page_requeue_locked(vm_page_t m); void vm_page_set_valid_range(vm_page_t m, int base, int size); void vm_page_sleep(vm_page_t m, const char *msg); -vm_page_t vm_page_splay(vm_pindex_t, vm_page_t); vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_unhold_pages(vm_page_t *ma, int count); void vm_page_unwire (vm_page_t, int); Index: i386/include/pmap.h =================================================================== --- i386/include/pmap.h (.../head/sys) (revision 246728) +++ i386/include/pmap.h (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -426,10 +426,20 @@ struct pv_entry; struct pv_chunk; struct md_page { - TAILQ_HEAD(,pv_entry) pv_list; - int pat_mode; + union { + TAILQ_HEAD(,pv_entry) pvi_list; + struct { + vm_page_t pii_left; + vm_page_t pii_right; + } pvi_siters; + } pv_structs; + int pat_mode; }; +#define pv_list pv_structs.pvi_list +#define pv_left pv_structs.pvi_siters.pii_left +#define pv_right pv_structs.pvi_siters.pii_right + struct pmap { struct mtx pm_mtx; pd_entry_t *pm_pdir; /* KVA of page directory */ @@ -468,7 +478,7 @@ extern struct pmap kernel_pmap_store; */ typedef struct pv_entry { vm_offset_t pv_va; /* virtual address for mapping */ - TAILQ_ENTRY(pv_entry) pv_list; + TAILQ_ENTRY(pv_entry) pv_next; } *pv_entry_t; /* Index: i386/i386/pmap.c =================================================================== --- i386/i386/pmap.c (.../head/sys) (revision 246728) +++ i386/i386/pmap.c (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -330,6 +330,7 @@ static boolean_t pmap_try_insert_pv_entry(pmap_t p static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde); static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); +static vm_page_t pmap_vmpage_splay(vm_pindex_t pindex, vm_page_t root); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); @@ -1574,7 +1575,8 @@ pmap_free_zero_pages(vm_page_t free) while (free != NULL) { m = free; - free = m->right; + free = (void *)m->object; + m->object = NULL; /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } @@ -1593,7 +1595,7 @@ pmap_add_delayed_free_list(vm_page_t m, vm_page_t m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; - m->right = *free; + m->object = (void *)*free; *free = m; } @@ -1611,20 +1613,20 @@ pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) PMAP_LOCK_ASSERT(pmap, MA_OWNED); root = pmap->pm_root; if (root == NULL) { - mpte->left = NULL; - mpte->right = NULL; + mpte->md.pv_left = NULL; + mpte->md.pv_right = NULL; } else { - root = vm_page_splay(mpte->pindex, root); + root = pmap_vmpage_splay(mpte->pindex, root); if (mpte->pindex < root->pindex) { - mpte->left = root->left; - mpte->right = root; - root->left = NULL; + mpte->md.pv_left = root->md.pv_left; + mpte->md.pv_right = root; + root->md.pv_left = NULL; } else if (mpte->pindex == root->pindex) panic("pmap_insert_pt_page: pindex already inserted"); else { - mpte->right = root->right; - mpte->left = root; - root->right = NULL; + mpte->md.pv_right = root->md.pv_right; + mpte->md.pv_left = root; + root->md.pv_right = NULL; } } pmap->pm_root = mpte; @@ -1643,7 +1645,7 @@ pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va) PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) { - mpte = vm_page_splay(pindex, mpte); + mpte = pmap_vmpage_splay(pindex, mpte); if ((pmap->pm_root = mpte)->pindex != pindex) mpte = NULL; } @@ -1662,14 +1664,20 @@ pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte) PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (mpte != pmap->pm_root) - vm_page_splay(mpte->pindex, pmap->pm_root); - if (mpte->left == NULL) - root = mpte->right; + pmap_vmpage_splay(mpte->pindex, pmap->pm_root); + if (mpte->md.pv_left == NULL) + root = mpte->md.pv_right; else { - root = vm_page_splay(mpte->pindex, mpte->left); - root->right = mpte->right; + root = pmap_vmpage_splay(mpte->pindex, mpte->md.pv_left); + root->md.pv_right = mpte->md.pv_right; } pmap->pm_root = root; + + /* + * Reinitialize the pv_list which could be dirty now because of the + * splay tree work. + */ + TAILQ_INIT(&mpte->md.pv_list); } /* @@ -1723,6 +1731,61 @@ _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page } /* + * Implements Sleator and Tarjan's top-down splay algorithm. Returns + * the vm_page containing the given pindex. If, however, that + * pindex is not found in the pmap, returns a vm_page that is + * adjacent to the pindex, coming before or after it. + */ +static vm_page_t +pmap_vmpage_splay(vm_pindex_t pindex, vm_page_t root) +{ + struct vm_page dummy; + vm_page_t lefttreemax, righttreemin, y; + + if (root == NULL) + return (root); + lefttreemax = righttreemin = &dummy; + for (;; root = y) { + if (pindex < root->pindex) { + if ((y = root->md.pv_left) == NULL) + break; + if (pindex < y->pindex) { + /* Rotate right. */ + root->md.pv_left = y->md.pv_right; + y->md.pv_right = root; + root = y; + if ((y = root->md.pv_left) == NULL) + break; + } + /* Link into the new root's right tree. */ + righttreemin->md.pv_left = root; + righttreemin = root; + } else if (pindex > root->pindex) { + if ((y = root->md.pv_right) == NULL) + break; + if (pindex > y->pindex) { + /* Rotate left. */ + root->md.pv_right = y->md.pv_left; + y->md.pv_left = root; + root = y; + if ((y = root->md.pv_right) == NULL) + break; + } + /* Link into the new root's left tree. */ + lefttreemax->md.pv_right = root; + lefttreemax = root; + } else + break; + } + /* Assemble the new root. */ + lefttreemax->md.pv_right = root->md.pv_left; + righttreemin->md.pv_left = root->md.pv_right; + root->md.pv_left = dummy.md.pv_right; + root->md.pv_right = dummy.md.pv_left; + return (root); +} + +/* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ @@ -2286,7 +2349,7 @@ pmap_pv_reclaim(pmap_t locked_pmap) vm_page_dirty(m); if ((tpte & PG_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); @@ -2344,7 +2407,7 @@ out: } if (m_pc == NULL && pv_vafree != 0 && free != NULL) { m_pc = free; - free = m_pc->right; + free = (void *)m_pc->object; /* Recycle a freed page table page. */ m_pc->wire_count = 1; atomic_add_int(&cnt.v_wire_count, 1); @@ -2491,9 +2554,9 @@ pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { - TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); break; } } @@ -2521,7 +2584,7 @@ pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); m = PHYS_TO_VM_PAGE(pa); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); /* Instantiate the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { @@ -2557,7 +2620,7 @@ pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, v pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); /* Free the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { @@ -2604,7 +2667,7 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_ PMAP_LOCK_ASSERT(pmap, MA_OWNED); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } /* @@ -2620,7 +2683,7 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); return (TRUE); } else return (FALSE); @@ -2640,7 +2703,7 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); return (TRUE); } else return (FALSE); @@ -3095,7 +3158,7 @@ small_mappings: vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, &free); pmap_invalidate_page(pmap, pv->pv_va); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } @@ -3551,7 +3614,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t if (pv == NULL) pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); pa |= PG_MANAGED; } else if (pv != NULL) free_pv_entry(pmap, pv); @@ -4259,7 +4322,7 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; rw_wlock(&pvh_global_lock); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; @@ -4270,7 +4333,7 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; @@ -4322,7 +4385,7 @@ pmap_pvh_wired_mappings(struct md_page *pvh, int c rw_assert(&pvh_global_lock, RA_WLOCKED); sched_pin(); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); @@ -4449,7 +4512,7 @@ pmap_remove_pages(pmap_t pmap) if ((tpte & PG_PS) != 0) { pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; pvh = pa_to_pvh(tpte & PG_PS_FRAME); - TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list)) @@ -4467,7 +4530,7 @@ pmap_remove_pages(pmap_t pmap) } } else { pmap->pm_stats.resident_count--; - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); @@ -4537,7 +4600,7 @@ pmap_is_modified_pvh(struct md_page *pvh) rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; sched_pin(); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); @@ -4610,7 +4673,7 @@ pmap_is_referenced_pvh(struct md_page *pvh) rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; sched_pin(); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); @@ -4653,7 +4716,7 @@ pmap_remove_write(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); @@ -4663,7 +4726,7 @@ pmap_remove_write(vm_page_t m) PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4722,7 +4785,7 @@ pmap_ts_referenced(vm_page_t m) sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, pvn) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); @@ -4756,9 +4819,9 @@ small_mappings: if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; do { - pvn = TAILQ_NEXT(pv, pv_list); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + pvn = TAILQ_NEXT(pv, pv_next); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4812,7 +4875,7 @@ pmap_clear_modify(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); @@ -4849,7 +4912,7 @@ pmap_clear_modify(vm_page_t m) PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4893,7 +4956,7 @@ pmap_clear_reference(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); @@ -4916,7 +4979,7 @@ pmap_clear_reference(vm_page_t m) PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -5427,7 +5490,7 @@ pmap_pvdump(vm_paddr_t pa) printf("pa %x", pa); m = PHYS_TO_VM_PAGE(pa); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va); pads(pmap); Index: cddl/compat/opensolaris/sys/vnode.h =================================================================== --- cddl/compat/opensolaris/sys/vnode.h (.../head/sys) (revision 246728) +++ cddl/compat/opensolaris/sys/vnode.h (.../user/attilio/vmc-playground/sys) (revision 246728) @@ -76,7 +76,7 @@ vn_is_readonly(vnode_t *vp) #define vn_has_cached_data(vp) \ ((vp)->v_object != NULL && \ ((vp)->v_object->resident_page_count > 0 || \ - (vp)->v_object->cache != NULL)) + !vm_object_cache_is_empty((vp)->v_object))) #define vn_exists(vp) do { } while (0) #define vn_invalid(vp) do { } while (0) #define vn_renamepath(tdvp, svp, tnm, lentnm) do { } while (0)