Index: sys/fs/tmpfs/tmpfs_vnops.c =================================================================== --- sys/fs/tmpfs/tmpfs_vnops.c (revisione 236731) +++ sys/fs/tmpfs/tmpfs_vnops.c (copia locale) @@ -514,7 +514,7 @@ tmpfs_mappedread(vm_object_t vobj, vm_object_t tob goto nocache; VM_OBJECT_LOCK(vobj); - if (vobj->resident_page_count == 0 && vobj->cached_page_count == 0) { + if (vobj->resident_page_count == 0 && vm_object_cache_is_empty(vobj)) { VM_OBJECT_UNLOCK(vobj); goto nocache; } @@ -647,41 +647,38 @@ tmpfs_mappedwrite(vm_object_t vobj, vm_object_t to } VM_OBJECT_LOCK(vobj); - if (vobj->resident_page_count == 0 && vobj->cached_page_count == 0) { + if (vobj->resident_page_count == 0 && vm_object_cache_is_empty(vobj)) { VM_OBJECT_UNLOCK(vobj); vpg = NULL; goto nocache; } lookupvpg: - vpg = vm_radix_lookup(&vobj->rtree, idx, VM_RADIX_ANY); - if (vpg != NULL) { - if (vm_page_is_valid(vpg, offset, tlen)) { - if ((vpg->oflags & VPO_BUSY) != 0) { - /* - * Reference the page before unlocking and - * sleeping so that the page daemon is less - * likely to reclaim it. - */ - vm_page_reference(vpg); - vm_page_sleep(vpg, "tmfsmw"); - goto lookupvpg; - } - vm_page_busy(vpg); - vm_page_undirty(vpg); - VM_OBJECT_UNLOCK(vobj); - error = uiomove_fromphys(&vpg, offset, tlen, uio); - } else { - if (vpg->flags & PG_CACHED) { - mtx_lock(&vm_page_queue_free_mtx); - if (vpg->object == vobj) - vm_page_cache_free(vpg); - mtx_unlock(&vm_page_queue_free_mtx); - } - VM_OBJECT_UNLOCK(vobj); - vpg = NULL; + if (((vpg = vm_radix_lookup(&vobj->rtree, idx)) != NULL) && + vm_page_is_valid(vpg, offset, tlen)) { + if ((vpg->oflags & VPO_BUSY) != 0) { + /* + * Reference the page before unlocking and sleeping so + * that the page daemon is less likely to reclaim it. + */ + vm_page_reference(vpg); + vm_page_sleep(vpg, "tmfsmw"); + goto lookupvpg; } - } else + vm_page_busy(vpg); + vm_page_undirty(vpg); VM_OBJECT_UNLOCK(vobj); + error = uiomove_fromphys(&vpg, offset, tlen, uio); + } else { + vpg = vm_page_is_cached(vobj, idx); + if (vpg != NULL) { + mtx_lock(&vm_page_queue_free_mtx); + if (vpg->object == vobj) + vm_page_cache_free(vpg); + mtx_unlock(&vm_page_queue_free_mtx); + } + VM_OBJECT_UNLOCK(vobj); + vpg = NULL; + } nocache: VM_OBJECT_LOCK(tobj); tpg = vm_page_grab(tobj, idx, VM_ALLOC_WIRED | Index: sys/vm/vm_radix.c =================================================================== --- sys/vm/vm_radix.c (revisione 236763) +++ sys/vm/vm_radix.c (copia locale) @@ -293,14 +293,12 @@ vm_radix_setroot(struct vm_radix *rtree, struct vm } static inline void * -vm_radix_match(void *child, int color) +vm_radix_match(void *child) { uintptr_t c; c = (uintptr_t)child; - if ((c & color) == 0) - return (NULL); return ((void *)(c & ~VM_RADIX_FLAGS)); } @@ -316,7 +314,7 @@ vm_radix_reclaim_allnodes_internal(struct vm_radix * case, free the node and return. */ if (level == 0) { - CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level); + CTR2(KTR_FLO, "reclaiming: node %p, level %d", rnode, level); rnode->rn_count = 0; vm_radix_node_put(rnode); return; @@ -324,7 +322,7 @@ vm_radix_reclaim_allnodes_internal(struct vm_radix for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) { if (rnode->rn_child[slot] == NULL) continue; - CTR3(KTR_VM, + CTR3(KTR_FLO, "reclaiming: node %p, level %d recursing in slot %d", rnode, level, slot); vm_radix_reclaim_allnodes_internal(rnode->rn_child[slot], @@ -332,7 +330,7 @@ vm_radix_reclaim_allnodes_internal(struct vm_radix rnode->rn_count--; } MPASS(rnode->rn_count == 0); - CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level); + CTR2(KTR_FLO, "reclaiming: node %p, level %d", rnode, level); vm_radix_node_put(rnode); } @@ -429,9 +427,8 @@ vm_radix_insert(struct vm_radix *rtree, vm_pindex_ KASSERT(rnode->rn_child[slot] == NULL, ("vm_radix_insert: Duplicate value %p at index: %lu\n", rnode->rn_child[slot], (u_long)index)); - val = (void *)((uintptr_t)val | VM_RADIX_BLACK); rnode->rn_child[slot] = val; - atomic_add_32(&rnode->rn_count, 1); + rnode->rn_count++; CTR5(KTR_VM, "insert: tree %p, " KFRMT64(index) ", level %d, slot %d", rtree, KSPLT64L(index), KSPLT64H(index), level, slot); @@ -446,7 +443,7 @@ vm_radix_insert(struct vm_radix *rtree, vm_pindex_ * NULL is returned. */ void * -vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, int color) +vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) { struct vm_radix_node *rnode; int slot; @@ -458,57 +455,23 @@ void * level--; while (rnode) { slot = vm_radix_slot(index, level); - CTR6(KTR_VM, + CTR6(KTR_FLO, "lookup: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p", rtree, KSPLT64L(index), KSPLT64H(index), level, slot, rnode); - CTR2(KTR_VM, "lookup: rnode %p, child %p", rnode, + CTR2(KTR_FLO, "lookup: rnode %p, child %p", rnode, rnode->rn_child[slot]); if (level == 0) - return vm_radix_match(rnode->rn_child[slot], color); + return vm_radix_match(rnode->rn_child[slot]); rnode = rnode->rn_child[slot]; level--; } - CTR3(KTR_VM, "lookup: tree %p, " KFRMT64(index) " failed", rtree, + CTR3(KTR_FLO, "lookup: tree %p, " KFRMT64(index) " failed", rtree, KSPLT64L(index), KSPLT64H(index)); return NULL; } -void * -vm_radix_color(struct vm_radix *rtree, vm_pindex_t index, int color) -{ - struct vm_radix_node *rnode; - uintptr_t child; - int slot; - int level; - - level = vm_radix_height(rtree, &rnode); - if (index > VM_RADIX_MAX(level)) - return NULL; - level--; - while (rnode) { - slot = vm_radix_slot(index, level); - CTR6(KTR_VM, - "color: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p", - rtree, KSPLT64L(index), KSPLT64H(index), level, slot, - rnode); - CTR2(KTR_VM, "color: rnode %p, child %p", rnode, - rnode->rn_child[slot]); - if (level == 0) - break; - rnode = rnode->rn_child[slot]; - level--; - } - if (rnode == NULL || rnode->rn_child[slot] == NULL) - return (NULL); - child = (uintptr_t)rnode->rn_child[slot]; - child &= ~VM_RADIX_FLAGS; - rnode->rn_child[slot] = (void *)(child | color); - - return (void *)child; -} - /* * Find the first leaf with a valid node between *startp and end. Return * the index of the first valid item in the leaf in *startp. @@ -598,7 +561,7 @@ out: */ int vm_radix_lookupn(struct vm_radix *rtree, vm_pindex_t start, - vm_pindex_t end, int color, void **out, int cnt, vm_pindex_t *next) + vm_pindex_t end, void **out, int cnt, vm_pindex_t *next) { struct vm_radix_node *rnode; void *val; @@ -616,7 +579,7 @@ vm_radix_lookupn(struct vm_radix *rtree, vm_pindex for (; slot < VM_RADIX_COUNT; slot++, start++) { if (end != 0 && start >= end) goto out; - val = vm_radix_match(rnode->rn_child[slot], color); + val = vm_radix_match(rnode->rn_child[slot]); if (val == NULL) { /* @@ -656,38 +619,11 @@ out: return (outidx); } -#if 0 -void -vm_radix_foreach(struct vm_radix *rtree, vm_pindex_t start, vm_pindex_t end, - int color, void (*iter)(void *)) -{ - struct vm_radix_node *rnode; - void *val; - int slot; - - if (rtree->rt_root == 0) - return; - while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) { - slot = vm_radix_slot(start, 0); - for (; slot < VM_RADIX_COUNT; slot++, start++) { - if (end != 0 && start >= end) - return; - val = vm_radix_match(rnode->rn_child[slot], color); - if (val) - iter(val); - } - if (end != 0 && start >= end) - return; - } -} -#endif - - /* * Look up any entry at a position less than or equal to index. */ void * -vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index, int color) +vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index) { struct vm_radix_node *rnode; struct vm_radix_node *child; @@ -751,7 +687,7 @@ restart: } if (rnode) { for (; slot >= 0; slot--, index--) { - val = vm_radix_match(rnode->rn_child[slot], color); + val = vm_radix_match(rnode->rn_child[slot]); if (val) return (val); } @@ -767,7 +703,7 @@ restart: * panics if the key is not present. */ void -vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index, int color) +vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) { struct vm_radix_node *stack[VM_RADIX_LIMIT]; struct vm_radix_node *rnode, *root; @@ -786,11 +722,11 @@ void while (level && rnode) { stack[level] = rnode; slot = vm_radix_slot(index, level); - CTR6(KTR_VM, + CTR6(KTR_FLO, "remove: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p", rtree, KSPLT64L(index), KSPLT64H(index), level, slot, rnode); - CTR4(KTR_VM, "remove: tree %p, rnode %p, child %p, count %u", + CTR4(KTR_FLO, "remove: tree %p, rnode %p, child %p, count %u", rtree, rnode, rnode->rn_child[slot], rnode->rn_count); rnode = rnode->rn_child[slot]; level--; @@ -798,36 +734,29 @@ void KASSERT(rnode != NULL, ("vm_radix_remove: index not present in the tree.\n")); slot = vm_radix_slot(index, 0); - KASSERT(vm_radix_match(rnode->rn_child[slot], color) != NULL, + KASSERT(vm_radix_match(rnode->rn_child[slot]) != NULL, ("vm_radix_remove: index not present in the tree.\n")); for (;;) { - CTR6(KTR_VM, + CTR6(KTR_FLO, "remove: resetting tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p", rtree, KSPLT64L(index), KSPLT64H(index), level, slot, rnode); - CTR4(KTR_VM, + CTR4(KTR_FLO, "remove: resetting tree %p, rnode %p, child %p, count %u", rtree, rnode, (rnode != NULL) ? rnode->rn_child[slot] : NULL, (rnode != NULL) ? rnode->rn_count : 0); rnode->rn_child[slot] = NULL; /* - * Use atomics for the last level since red and black - * will both adjust it. * Use a write memory barrier here in order to avoid * rn_count reaching 0 before to fetch the actual pointer. - * Concurrent black removal, infact, may want to reclaim + * Concurrent node removal, infact, may want to reclaim * the radix node itself before to read it. */ - if (level == 0) - atomic_add_rel_32(&rnode->rn_count, -1); - else - rnode->rn_count--; - /* - * Only allow black removes to prune the tree. - */ - if ((color & VM_RADIX_BLACK) == 0 || rnode->rn_count > 0) + rnode->rn_count--; + wmb(); + if (rnode->rn_count > 0) break; vm_radix_node_put(rnode); if (rnode == root) { @@ -857,35 +786,3 @@ vm_radix_reclaim_allnodes(struct vm_radix *rtree) vm_radix_reclaim_allnodes_internal(root, level - 1); rtree->rt_root = 0; } - -#ifdef notyet -/* - * Attempts to reduce the height of the tree. - */ -void -vm_radix_shrink(struct vm_radix *rtree) -{ - struct vm_radix_node *tmp, *root; - int level; - - if (rtree->rt_root == 0) - return; - level = vm_radix_height(rtree, &root); - - /* Adjust the height of the tree. */ - while (root->rn_count == 1 && root->rn_child[0] != NULL) { - tmp = root; - root->rn_count--; - root = root->rn_child[0]; - level--; - vm_radix_node_put(tmp); - } - /* Finally see if we have an empty tree. */ - if (root->rn_count == 0) { - vm_radix_node_put(root); - root = NULL; - level--; - } - vm_radix_setroot(rtree, root, level); -} -#endif Index: sys/vm/vm_radix.h =================================================================== --- sys/vm/vm_radix.h (revisione 236731) +++ sys/vm/vm_radix.h (copia locale) @@ -29,9 +29,6 @@ #ifndef _VM_RADIX_H_ #define _VM_RADIX_H_ -#define VM_RADIX_BLACK 0x1 /* Black node. (leaf only) */ -#define VM_RADIX_RED 0x2 /* Red node. (leaf only) */ -#define VM_RADIX_ANY (VM_RADIX_RED | VM_RADIX_BLACK) #define VM_RADIX_STACK 8 /* Nodes to store on stack. */ /* @@ -44,70 +41,58 @@ struct vm_radix { #ifdef _KERNEL -/* - * Initialize the radix tree subsystem. - */ void vm_radix_init(void); - -/* - * Functions which only work with black nodes. (object lock) - */ int vm_radix_insert(struct vm_radix *, vm_pindex_t, void *); - -/* - * Functions which work on specified colors. (object, vm_page_queue_free locks) - */ -void *vm_radix_color(struct vm_radix *, vm_pindex_t, int); -void *vm_radix_lookup(struct vm_radix *, vm_pindex_t, int); -int vm_radix_lookupn(struct vm_radix *, vm_pindex_t, vm_pindex_t, int, - void **, int, vm_pindex_t *); -void *vm_radix_lookup_le(struct vm_radix *, vm_pindex_t, int); +void *vm_radix_lookup(struct vm_radix *, vm_pindex_t); +int vm_radix_lookupn(struct vm_radix *, vm_pindex_t, vm_pindex_t, void **, + int, vm_pindex_t *); +void *vm_radix_lookup_le(struct vm_radix *, vm_pindex_t); void vm_radix_reclaim_allnodes(struct vm_radix *); -void vm_radix_remove(struct vm_radix *, vm_pindex_t, int); +void vm_radix_remove(struct vm_radix *, vm_pindex_t); /* * Look up any entry at a position greater or equal to index. */ static inline void * -vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index, int color) +vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index) { void *val; - if (vm_radix_lookupn(rtree, index, 0, color, &val, 1, &index)) + if (vm_radix_lookupn(rtree, index, 0, &val, 1, &index)) return (val); return (NULL); } static inline void * -vm_radix_last(struct vm_radix *rtree, int color) +vm_radix_last(struct vm_radix *rtree) { - return vm_radix_lookup_le(rtree, 0, color); + return vm_radix_lookup_le(rtree, 0); } static inline void * -vm_radix_first(struct vm_radix *rtree, int color) +vm_radix_first(struct vm_radix *rtree) { - return vm_radix_lookup_ge(rtree, 0, color); + return vm_radix_lookup_ge(rtree, 0); } static inline void * -vm_radix_next(struct vm_radix *rtree, vm_pindex_t index, int color) +vm_radix_next(struct vm_radix *rtree, vm_pindex_t index) { if (index == -1) return (NULL); - return vm_radix_lookup_ge(rtree, index + 1, color); + return vm_radix_lookup_ge(rtree, index + 1); } static inline void * -vm_radix_prev(struct vm_radix *rtree, vm_pindex_t index, int color) +vm_radix_prev(struct vm_radix *rtree, vm_pindex_t index) { if (index == 0) return (NULL); - return vm_radix_lookup_le(rtree, index - 1, color); + return vm_radix_lookup_le(rtree, index - 1); } #endif /* _KERNEL */ Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c (revisione 236811) +++ sys/vm/vm_object.c (copia locale) @@ -73,6 +73,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include /* for curproc, pageproc */ @@ -164,9 +165,6 @@ vm_object_zdtor(void *mem, int size, void *arg) vm_object_t object; object = (vm_object_t)mem; - KASSERT(object->resident_page_count == 0, - ("object %p resident_page_count = %d", - object, object->resident_page_count)); KASSERT(TAILQ_EMPTY(&object->memq), ("object %p has resident pages", object)); @@ -175,12 +173,15 @@ vm_object_zdtor(void *mem, int size, void *arg) ("object %p has reservations", object)); #endif - KASSERT(object->cached_page_count == 0, + KASSERT(vm_object_cache_is_empty(object), ("object %p has cached pages", object)); KASSERT(object->paging_in_progress == 0, ("object %p paging_in_progress = %d", object, object->paging_in_progress)); + KASSERT(object->resident_page_count == 0, + ("object %p resident_page_count = %d", + object, object->resident_page_count)); KASSERT(object->shadow_count == 0, ("object %p shadow_count = %d", object, object->shadow_count)); @@ -211,7 +212,6 @@ _vm_object_allocate(objtype_t type, vm_pindex_t si LIST_INIT(&object->shadow_head); object->rtree.rt_root = 0; - object->cache.rt_root = 0; object->type = type; object->size = size; object->generation = 1; @@ -229,6 +229,7 @@ _vm_object_allocate(objtype_t type, vm_pindex_t si #if VM_NRESERVLEVEL > 0 LIST_INIT(&object->rvq); #endif + object->cache.rt_root = 0; mtx_lock(&vm_object_list_mtx); TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); @@ -680,6 +681,7 @@ vm_object_terminate(vm_object_t object) vm_page_t pa[VM_RADIX_STACK]; vm_page_t p; vm_pindex_t start; + struct vnode *vp; int n, i; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); @@ -726,32 +728,16 @@ vm_object_terminate(vm_object_t object) * the object, the page and object are reset to any empty state. */ start = 0; - while ((n = vm_radix_lookupn(&object->rtree, start, 0, VM_RADIX_ANY, - (void **)pa, VM_RADIX_STACK, &start)) != 0) { + while ((n = vm_radix_lookupn(&object->rtree, start, 0, (void **)pa, + VM_RADIX_STACK, &start)) != 0) { for (i = 0; i < n; i++) { p = pa[i]; - /* - * Another thread may allocate this cached page from - * the queue before we acquire the page queue free - * mtx. - */ - if (p->flags & PG_CACHED) { - mtx_lock(&vm_page_queue_free_mtx); - if (p->object == object) { - p->object = NULL; - p->valid = 0; - /* Clear PG_CACHED and set PG_FREE. */ - p->flags ^= PG_CACHED | PG_FREE; - cnt.v_cache_count--; - cnt.v_free_count++; - } - mtx_unlock(&vm_page_queue_free_mtx); + if (p->object != object) continue; - } else if (p->object != object) - continue; KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, ("vm_object_terminate: freeing busy page %p", p)); vm_page_lock(p); + /* * Optimize the page's removal from the object by * resetting its "object" field. Specifically, if @@ -774,7 +760,42 @@ vm_object_terminate(vm_object_t object) break; } vm_radix_reclaim_allnodes(&object->rtree); - vm_radix_reclaim_allnodes(&object->cache); + vp = NULL; + if (!vm_object_cache_is_empty(object)) { + mtx_lock(&vm_page_queue_free_mtx); + start = 0; + while ((n = vm_radix_lookupn(&object->cache, start, 0, + (void **)pa, VM_RADIX_STACK, &start)) != 0) { + for (i = 0; i < n; i++) { + p = pa[i]; + MPASS(p->object == object); + if (p->object != object) + continue; + p->object = NULL; + p->valid = 0; + + /* Clear PG_CACHED and set PG_FREE. */ + p->flags ^= PG_CACHED | PG_FREE; + cnt.v_cache_count--; + cnt.v_free_count++; + + /* + * At least one cached page was removed and + * in the end all the cached pages will be + * reclaimed. If the object is a vnode, + * drop a reference to it. + */ + if (object->type == OBJT_VNODE) + vp = object->handle; + } + if (n < VM_RADIX_STACK) + break; + } + CTR2(KTR_FLO, "reclaiming: object %p, cache 0x%x", object, + object->cache.rt_root); + vm_radix_reclaim_allnodes(&object->cache); + mtx_unlock(&vm_page_queue_free_mtx); + } /* * If the object contained any pages, then reset it to an empty state. * None of the object's fields, including "resident_page_count", were @@ -786,11 +807,8 @@ vm_object_terminate(vm_object_t object) if (object->type == OBJT_VNODE) vdrop(object->handle); } - if (object->cached_page_count != 0) { - object->cached_page_count = 0; - if (object->type == OBJT_VNODE) - vdrop(object->handle); - } + if (vp) + vdrop(vp); #if VM_NRESERVLEVEL > 0 if (__predict_false(!LIST_EMPTY(&object->rvq))) @@ -1373,20 +1391,13 @@ vm_object_split(vm_map_entry_t entry) start = offidxstart; retry: while ((n = vm_radix_lookupn(&orig_object->rtree, start, - offidxstart + size, VM_RADIX_ANY, (void **)ma, VM_RADIX_STACK, - &start)) != 0) { + offidxstart + size, (void **)ma, VM_RADIX_STACK, &start)) != 0) { for (i = 0; i < n; i++) { m = ma[i]; idx = m->pindex - offidxstart; - if (m->flags & PG_CACHED) { - mtx_lock(&vm_page_queue_free_mtx); - if (m->object == orig_object) - vm_page_cache_rename(m, new_object, - idx); - mtx_unlock(&vm_page_queue_free_mtx); + if (m->object != orig_object) continue; - } else if (m->object != orig_object) - continue; + /* * We must wait for pending I/O to complete before * we can rename the page. @@ -1438,6 +1449,30 @@ retry: * and new_object's locks are released and reacquired. */ swap_pager_copy(orig_object, new_object, offidxstart, 0); + + /* + * Transfer any cached pages from orig_object to new_object. + */ + if (!vm_object_cache_is_empty(orig_object)) { + mtx_lock(&vm_page_queue_free_mtx); + start = offidxstart; + while ((n = vm_radix_lookupn(&orig_object->cache, start, + offidxstart + size, (void **)ma, VM_RADIX_STACK, + &start)) != 0) { + for (i = 0; i < n; i++) { + m = ma[i]; + MPASS(m->object == orig_object); + if (m->object != orig_object) + continue; + idx = m->pindex - offidxstart; + vm_page_cache_rename(m, new_object, + idx); + } + if (n < VM_RADIX_STACK) + break; + } + mtx_unlock(&vm_page_queue_free_mtx); + } } VM_OBJECT_UNLOCK(orig_object); TAILQ_FOREACH(m, &new_object->memq, listq) @@ -1461,7 +1496,7 @@ vm_object_backing_scan(vm_object_t object, int op) vm_object_t backing_object; vm_pindex_t backing_offset_index, new_pindex; vm_pindex_t start; - int color, i, n; + int i, n; int r = 1; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); @@ -1490,9 +1525,6 @@ vm_object_backing_scan(vm_object_t object, int op) if (op & OBSC_COLLAPSE_WAIT) { vm_object_set_flag(backing_object, OBJ_DEAD); } - color = VM_RADIX_BLACK; - if (op & OBSC_COLLAPSE_WAIT) - color |= VM_RADIX_RED; /* * Our scan */ @@ -1504,23 +1536,14 @@ restart: if (n < VM_RADIX_STACK) break; if ((n = vm_radix_lookupn(&backing_object->rtree, - start, 0, color, (void **)pa, VM_RADIX_STACK, + start, 0, (void **)pa, VM_RADIX_STACK, &start)) == 0) break; i = 0; } p = pa[i++]; - /* - * Free cached pages. XXX Why? Emulating old behavior here. - */ - if (p->flags & PG_CACHED) { - mtx_lock(&vm_page_queue_free_mtx); - if (p->object == backing_object) - vm_page_cache_free(p); - mtx_unlock(&vm_page_queue_free_mtx); + if (p->object != backing_object) continue; - } else if (p->object != backing_object) - continue; new_pindex = p->pindex - backing_offset_index; if (op & OBSC_TEST_ALL_SHADOWED) { @@ -1914,6 +1937,7 @@ vm_object_page_remove(vm_object_t object, vm_pinde { struct vnode *vp; vm_page_t pa[VM_RADIX_STACK]; + vm_pindex_t cstart; vm_page_t p; int i, n; int wirings; @@ -1922,32 +1946,20 @@ vm_object_page_remove(vm_object_t object, vm_pinde KASSERT((object->type != OBJT_DEVICE && object->type != OBJT_PHYS) || (options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED, ("vm_object_page_remove: illegal options for object %p", object)); - if (object->resident_page_count == 0 && object->cached_page_count == 0) + if (object->resident_page_count == 0 && + vm_object_cache_is_empty(object)) return; vp = NULL; vm_object_pip_add(object, 1); + cstart = start; restart: - while ((n = vm_radix_lookupn(&object->rtree, start, end, VM_RADIX_ANY, - (void **)pa, VM_RADIX_STACK, &start)) != 0) { + while ((n = vm_radix_lookupn(&object->rtree, start, end, (void **)pa, + VM_RADIX_STACK, &start)) != 0) { for (i = 0; i < n; i++) { p = pa[i]; - /* - * Another thread may allocate this cached page from - * the queue before we acquire the page queue free - * mtx. - */ - if (p->flags & PG_CACHED) { - mtx_lock(&vm_page_queue_free_mtx); - if (p->object == object) { - vm_page_cache_free(p); - if (object->type == OBJT_VNODE && - object->cached_page_count == 0) - vp = object->handle; - } - mtx_unlock(&vm_page_queue_free_mtx); + if (p->object != object) continue; - } else if (p->object != object) - continue; + /* * If the page is wired for any reason besides * the existence of managed, wired mappings, then @@ -2005,6 +2017,26 @@ restart: break; } vm_object_pip_wakeup(object); + if (!vm_object_cache_is_empty(object)) { + mtx_lock(&vm_page_queue_free_mtx); + start = cstart; + while ((n = vm_radix_lookupn(&object->cache, start, end, + (void **)pa, VM_RADIX_STACK, &start)) != 0) { + for (i = 0; i < n; i++) { + p = pa[i]; + MPASS(p->object == object); + if (p->object != object) + continue; + vm_page_cache_free(p); + if (vm_object_cache_is_empty(object) && + object->type == OBJT_VNODE) + vp = object->handle; + } + if (n < VM_RADIX_STACK) + break; + } + mtx_unlock(&vm_page_queue_free_mtx); + } if (vp) vdrop(vp); } @@ -2240,6 +2272,13 @@ vm_object_set_writeable_dirty(vm_object_t object) vm_object_set_flag(object, OBJ_MIGHTBEDIRTY); } +int +vm_object_cache_is_empty(vm_object_t object) +{ + + return (__predict_true(object->cache.rt_root == 0)); +} + #include "opt_ddb.h" #ifdef DDB #include Index: sys/vm/vm_object.h =================================================================== --- sys/vm/vm_object.h (revisione 236811) +++ sys/vm/vm_object.h (copia locale) @@ -101,7 +101,6 @@ struct vm_object { u_short pg_color; /* (c) color of first page in obj */ u_int paging_in_progress; /* Paging (in or out) so don't collapse or destroy */ int resident_page_count; /* number of resident pages */ - int cached_page_count; /* number of cached pages */ struct vm_object *backing_object; /* object that I'm a shadow of */ vm_ooffset_t backing_object_offset;/* Offset in backing object */ TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */ @@ -222,6 +221,7 @@ vm_object_t vm_object_allocate (objtype_t, vm_pind void _vm_object_allocate (objtype_t, vm_pindex_t, vm_object_t); boolean_t vm_object_coalesce(vm_object_t, vm_ooffset_t, vm_size_t, vm_size_t, boolean_t); +int vm_object_cache_is_empty (vm_object_t); void vm_object_collapse (vm_object_t); void vm_object_deallocate (vm_object_t); void vm_object_destroy (vm_object_t); Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c (revisione 236763) +++ sys/vm/vm_page.c (copia locale) @@ -85,6 +85,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -298,7 +299,7 @@ vm_page_startup(vm_offset_t vaddr) mtx_init(&vm_page_queue_mtx, "vm page queue mutex", NULL, MTX_DEF | MTX_RECURSE); mtx_init(&vm_page_queue_free_mtx, "vm page queue free mutex", NULL, - MTX_DEF); + MTX_DEF | MTX_RECURSE); /* Setup page locks. */ for (i = 0; i < PA_LOCK_COUNT; i++) @@ -835,8 +836,7 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm if (object->resident_page_count == 0) { TAILQ_INSERT_TAIL(&object->memq, m, listq); } else { - neighbor = vm_radix_lookup_ge(&object->rtree, pindex, - VM_RADIX_BLACK); + neighbor = vm_radix_lookup_ge(&object->rtree, pindex); if (neighbor != NULL) { KASSERT(pindex < neighbor->pindex, ("vm_page_insert: offset %ju not minor than %ju", @@ -893,7 +893,7 @@ vm_page_remove(vm_page_t m) vm_page_flash(m); } - vm_radix_remove(&object->rtree, m->pindex, VM_RADIX_BLACK); + vm_radix_remove(&object->rtree, m->pindex); TAILQ_REMOVE(&object->memq, m, listq); /* @@ -925,7 +925,7 @@ vm_page_lookup(vm_object_t object, vm_pindex_t pin VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); - return vm_radix_lookup(&object->rtree, pindex, VM_RADIX_BLACK); + return vm_radix_lookup(&object->rtree, pindex); } /* @@ -943,8 +943,7 @@ vm_page_find_least(vm_object_t object, vm_pindex_t VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if (object->resident_page_count) - return vm_radix_lookup_ge(&object->rtree, pindex, - VM_RADIX_BLACK); + return (vm_radix_lookup_ge(&object->rtree, pindex)); return (NULL); } @@ -1026,8 +1025,8 @@ vm_page_cache_lookup(vm_object_t object, vm_pindex VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - if (object->cached_page_count != 0) - return vm_radix_lookup(&object->rtree, pindex, VM_RADIX_RED); + if (!vm_object_cache_is_empty(object)) + return (vm_radix_lookup(&object->cache, pindex)); return (NULL); } @@ -1044,8 +1043,8 @@ vm_page_cache_remove(vm_page_t m) mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); KASSERT((m->flags & PG_CACHED) != 0, ("vm_page_cache_remove: page %p is not cached", m)); - vm_radix_remove(&m->object->rtree, m->pindex, VM_RADIX_RED); - m->object->cached_page_count--; + CTR2(KTR_FLO, "free: object %p, page %p", m->object, m); + vm_radix_remove(&m->object->cache, m->pindex); m->object = NULL; cnt.v_cache_count--; } @@ -1071,8 +1070,8 @@ vm_page_cache_free(vm_page_t m) * Replicate vm_page_cache_remove with a version that can collapse * internal nodes since the object lock is held. */ - vm_radix_remove(&object->rtree, m->pindex, VM_RADIX_ANY); - object->cached_page_count--; + CTR2(KTR_FLO, "free: object %p, page %p", object, m); + vm_radix_remove(&object->cache, m->pindex); m->object = NULL; m->valid = 0; /* Clear PG_CACHED and set PG_FREE. */ @@ -1094,26 +1093,41 @@ vm_page_cache_rename(vm_page_t m, vm_object_t new_ VM_OBJECT_LOCK_ASSERT(orig_object, MA_OWNED); VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - /* - * If the insert fails we simply free the cached page. - */ - if (vm_radix_insert(&new_object->rtree, idx, m) != 0) { - vm_page_cache_free(m); - return; - } - vm_radix_color(&new_object->rtree, idx, VM_RADIX_RED); - /* - * We use any color here though we know it's red so that tree - * compaction will still work. - */ - vm_radix_remove(&orig_object->rtree, m->pindex, VM_RADIX_ANY); + CTR3(KTR_FLO, "renaming: old %p, new %p, page %p", orig_object, + new_object, m); + vm_radix_remove(&orig_object->cache, m->pindex); + if (vm_radix_insert(&new_object->cache, idx, m) != 0) + panic("vm_page_cache_rename: failed vm_radix_insert"); m->object = new_object; m->pindex = idx; - new_object->cached_page_count++; - orig_object->cached_page_count--; } /* + * Returns a pointer to the cached page associated with the given object + * and offset, NULL otherwise. + * + * The object must be locked. + */ +vm_page_t +vm_page_is_cached(vm_object_t object, vm_pindex_t pindex) +{ + vm_page_t m; + + /* + * Insertion into an object's collection of cached pages requires the + * object to be locked. Therefore, if the object is locked and the + * object's collection is empty, there is no need to acquire the free + * page queues lock in order to prove that the specified page doesn't + * exist. + */ + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + mtx_lock(&vm_page_queue_free_mtx); + m = vm_page_cache_lookup(object, pindex); + mtx_unlock(&vm_page_queue_free_mtx); + return (m); +} + +/* * vm_page_alloc: * * Allocate and return a page that is associated with the specified @@ -1245,7 +1259,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind m_object = m->object; vm_page_cache_remove(m); if (m_object->type == OBJT_VNODE && - m_object->cached_page_count == 0) + vm_object_cache_is_empty(m_object)) vp = m_object->handle; } else { KASSERT(VM_PAGE_IS_FREE(m), @@ -1505,7 +1519,7 @@ vm_page_alloc_init(vm_page_t m) m_object = m->object; vm_page_cache_remove(m); if (m_object->type == OBJT_VNODE && - m_object->cached_page_count == 0) + vm_object_cache_is_empty(m_object)) drop = m_object->handle; } else { KASSERT(VM_PAGE_IS_FREE(m), @@ -2085,7 +2099,7 @@ void vm_page_cache(vm_page_t m) { vm_object_t object; - int old_cached; + int old_empty_cache; vm_page_lock_assert(m, MA_OWNED); object = m->object; @@ -2116,7 +2130,7 @@ vm_page_cache(vm_page_t m) */ vm_pageq_remove(m); - vm_radix_color(&object->rtree, m->pindex, VM_RADIX_RED); + vm_radix_remove(&object->rtree, m->pindex); TAILQ_REMOVE(&object->memq, m, listq); object->resident_page_count--; @@ -2133,9 +2147,10 @@ vm_page_cache(vm_page_t m) m->flags &= ~PG_ZERO; mtx_lock(&vm_page_queue_free_mtx); m->flags |= PG_CACHED; - old_cached = object->cached_page_count; - object->cached_page_count++; + old_empty_cache = vm_object_cache_is_empty(object); cnt.v_cache_count++; + if (vm_radix_insert(&object->cache, m->pindex, m) != 0) + panic("vm_page_cache: vm_radix_insert failed"); #if VM_NRESERVLEVEL > 0 if (!vm_reserv_free_page(m)) { #else @@ -2153,9 +2168,10 @@ vm_page_cache(vm_page_t m) * the object's only resident page. */ if (object->type == OBJT_VNODE) { - if (old_cached == 0 && object->resident_page_count != 0) + if (old_empty_cache != 0 && object->resident_page_count != 0) vhold(object->handle); - else if (old_cached != 0 && object->resident_page_count == 0) + else if (old_empty_cache == 0 && + object->resident_page_count == 0) vdrop(object->handle); } } Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h (revisione 236763) +++ sys/vm/vm_page.h (copia locale) @@ -379,6 +379,7 @@ vm_page_t vm_page_find_least(vm_object_t, vm_pinde vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); +vm_page_t vm_page_is_cached(vm_object_t object, vm_pindex_t pindex); vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t); vm_page_t vm_page_next(vm_page_t m); int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *); Index: sys/vm/vm_mmap.c =================================================================== --- sys/vm/vm_mmap.c (revisione 236731) +++ sys/vm/vm_mmap.c (copia locale) @@ -81,7 +81,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #ifdef HWPMC_HOOKS @@ -888,15 +887,10 @@ RestartScan: object->type == OBJT_VNODE) { pindex = OFF_TO_IDX(current->offset + (addr - current->start)); - m = vm_radix_lookup(&object->rtree, - pindex, VM_RADIX_ANY); - - /* Lock just for consistency. */ - mtx_lock(&vm_page_queue_free_mtx); - if (m != NULL && - (m->flags & PG_CACHED) != 0) + m = vm_page_lookup(object, pindex); + if (m == NULL && + vm_page_is_cached(object, pindex)) mincoreinfo = MINCORE_INCORE; - mtx_unlock(&vm_page_queue_free_mtx); if (m != NULL && m->valid == 0) m = NULL; if (m != NULL) Index: sys/vm/vm_reserv.c =================================================================== --- sys/vm/vm_reserv.c (revisione 236731) +++ sys/vm/vm_reserv.c (copia locale) @@ -341,7 +341,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pind /* * Look for an existing reservation. */ - mpred = vm_radix_lookup_le(&object->rtree, pindex, VM_RADIX_BLACK); + mpred = vm_radix_lookup_le(&object->rtree, pindex); if (mpred != NULL) { KASSERT(mpred->pindex != pindex, ("vm_reserv_alloc_contig: pindex already allocated")); @@ -349,7 +349,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pind if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) goto found; } - msucc = vm_radix_lookup_ge(&object->rtree, pindex, VM_RADIX_BLACK); + msucc = vm_radix_lookup_ge(&object->rtree, pindex); if (msucc != NULL) { KASSERT(msucc->pindex != pindex, ("vm_reserv_alloc_page: pindex already allocated")); @@ -495,7 +495,7 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex /* * Look for an existing reservation. */ - mpred = vm_radix_lookup_le(&object->rtree, pindex, VM_RADIX_BLACK); + mpred = vm_radix_lookup_le(&object->rtree, pindex); if (mpred != NULL) { KASSERT(mpred->pindex != pindex, ("vm_reserv_alloc_page: pindex already allocated")); @@ -503,7 +503,7 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) goto found; } - msucc = vm_radix_lookup_ge(&object->rtree, pindex, VM_RADIX_BLACK); + msucc = vm_radix_lookup_ge(&object->rtree, pindex); if (msucc != NULL) { KASSERT(msucc->pindex != pindex, ("vm_reserv_alloc_page: pindex already allocated")); Index: sys/vm/vnode_pager.c =================================================================== --- sys/vm/vnode_pager.c (revisione 236731) +++ sys/vm/vnode_pager.c (copia locale) @@ -401,42 +401,18 @@ vnode_pager_setsize(vp, nsize) /* * this gets rid of garbage at the end of a page that is now * only partially backed by the vnode. + * + * XXX for some reason (I don't know yet), if we take a + * completely invalid page and mark it partially valid + * it can screw up NFS reads, so we don't allow the case. */ - if ((nsize & PAGE_MASK) && - (m = vm_radix_lookup(&object->rtree, OFF_TO_IDX(nsize), - VM_RADIX_ANY)) != NULL) { - int base; - int size; + if ((nsize & PAGE_MASK) && (m = vm_radix_lookup(&object->rtree, + OFF_TO_IDX(nsize))) != NULL && m->valid != 0) { + int base = (int)nsize & PAGE_MASK; + int size = PAGE_SIZE - base; - /* - * Eliminate any cached page as we would have to - * do too much work to save it. - */ - if (m->flags & PG_CACHED) { - drop = NULL; - mtx_lock(&vm_page_queue_free_mtx); - if (m->object == object) { - vm_page_cache_free(m); - if (object->cached_page_count == 0) - drop = vp; - } - mtx_unlock(&vm_page_queue_free_mtx); - if (drop) - vdrop(drop); - goto out; - } - /* - * XXX for some reason (I don't know yet), if we take a - * completely invalid page and mark it partially valid - * it can screw up NFS reads, so we don't allow the - * case. - */ - if (m->valid != 0 || m->object != object) - goto out; + MPASS(m->object == object); - base = (int)nsize & PAGE_MASK; - size = PAGE_SIZE - base; - /* * Clear out partial-page garbage in case * the page has been mapped. @@ -465,9 +441,27 @@ vnode_pager_setsize(vp, nsize) * replacement from working properly. */ vm_page_clear_dirty(m, base, PAGE_SIZE - base); + } else if ((nsize & PAGE_MASK) && + !vm_object_cache_is_empty(object)) { + drop = NULL; + mtx_lock(&vm_page_queue_free_mtx); + m = vm_radix_lookup(&object->cache, OFF_TO_IDX(nsize)); + if (m != NULL) { + MPASS(m->object == object); + + /* + * Eliminate any cached page as we would have + * to do too much work to save it. + */ + vm_page_cache_free(m); + if (vm_object_cache_is_empty(object)) + drop = vp; + } + mtx_unlock(&vm_page_queue_free_mtx); + if (drop) + vdrop(drop); } } -out: object->un_pager.vnp.vnp_size = nsize; object->size = nobjsize; VM_OBJECT_UNLOCK(object); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (revisione 236731) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (copia locale) @@ -330,32 +330,29 @@ page_lookup(vnode_t *vp, int64_t start, int64_t of VM_OBJECT_LOCK_ASSERT(obj, MA_OWNED); for (;;) { - pp = vm_radix_lookup(&obj->rtree, OFF_TO_IDX(start), - VM_RADIX_ANY); - if (pp != NULL) { - if (vm_page_is_valid(pp, (vm_offset_t)off, nbytes)) { - if ((pp->oflags & VPO_BUSY) != 0) { - /* - * Reference the page before unlocking - * and sleeping so that the page - * daemon is less likely to reclaim it. - */ - vm_page_reference(pp); - vm_page_sleep(pp, "zfsmwb"); - continue; - } - vm_page_busy(pp); - vm_page_undirty(pp); - } else { - if (obj->cached_page_count != 0 && - (pp->flags & PG_CACHED) != 0) { - mtx_lock(&vm_page_queue_free_mtx); - if (pp->object == obj) - vm_page_cache_free(pp); - mtx_unlock(&vm_page_queue_free_mtx); - } - pp = NULL; + if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && + vm_page_is_valid(pp, (vm_offset_t)off, nbytes)) { + if ((pp->oflags & VPO_BUSY) != 0) { + /* + * Reference the page before unlocking and + * sleeping so that the page daemon is less + * likely to reclaim it. + */ + vm_page_reference(pp); + vm_page_sleep(pp, "zfsmwb"); + continue; } + vm_page_busy(pp); + vm_page_undirty(pp); + } else { + pp = vm_page_is_cached(obj, OFF_TO_IDX(start)); + if (pp != NULL) { + mtx_lock(&vm_page_queue_free_mtx); + if (pp->object == obj) + vm_page_cache_free(pp); + mtx_unlock(&vm_page_queue_free_mtx); + } + pp = NULL; } break; } Index: sys/cddl/compat/opensolaris/sys/vnode.h =================================================================== --- sys/cddl/compat/opensolaris/sys/vnode.h (revisione 236731) +++ sys/cddl/compat/opensolaris/sys/vnode.h (copia locale) @@ -75,7 +75,7 @@ vn_is_readonly(vnode_t *vp) #define vn_mountedvfs(vp) ((vp)->v_mountedhere) #define vn_has_cached_data(vp) \ ((vp)->v_object != NULL && \ - (vp)->v_object->cached_page_count > 0) + !vm_object_cache_is_empty((vp)->v_object)) #define vn_exists(vp) do { } while (0) #define vn_invalid(vp) do { } while (0) #define vn_renamepath(tdvp, svp, tnm, lentnm) do { } while (0) Index: sys/sys/ktr.h =================================================================== --- sys/sys/ktr.h (revisione 236731) +++ sys/sys/ktr.h (copia locale) @@ -75,7 +75,8 @@ #define KTR_INET6 0x10000000 /* IPv6 stack */ #define KTR_SCHED 0x20000000 /* Machine parsed sched info. */ #define KTR_BUF 0x40000000 /* Buffer cache */ -#define KTR_ALL 0x7fffffff +#define KTR_FLO 0x80000000 +#define KTR_ALL 0xffffffff /* Trace classes to compile in */ #ifdef KTR Index: sys/kern/uipc_shm.c =================================================================== --- sys/kern/uipc_shm.c (revisione 236731) +++ sys/kern/uipc_shm.c (copia locale) @@ -278,8 +278,7 @@ shm_dotruncate(struct shmfd *shmfd, off_t length) if (base != 0) { idx = OFF_TO_IDX(length); retry: - m = vm_radix_lookup(&object->rtree, idx, - VM_RADIX_BLACK); + m = vm_radix_lookup(&object->rtree, idx); if (m != NULL) { if ((m->oflags & VPO_BUSY) != 0 || m->busy != 0) {