Index: conf/files =================================================================== --- conf/files (.../head/sys) (revision 227753) +++ conf/files (.../user/attilio/vmcontention/sys) (revision 227756) @@ -3365,6 +3365,7 @@ vm/vm_pageout.c standard vm/vm_pager.c standard vm/vm_phys.c standard +vm/vm_radix.c standard vm/vm_reserv.c standard vm/vm_unix.c standard vm/vm_zeroidle.c standard Property changes on: conf ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/conf:r225232-227754 Index: kern/uipc_shm.c =================================================================== --- kern/uipc_shm.c (.../head/sys) (revision 227753) +++ kern/uipc_shm.c (.../user/attilio/vmcontention/sys) (revision 227756) @@ -289,11 +289,24 @@ * a page swapped out to disk? */ if ((length & PAGE_MASK) && - (m = vm_page_lookup(object, OFF_TO_IDX(length))) != NULL && - m->valid != 0) { - int base = (int)length & PAGE_MASK; - int size = PAGE_SIZE - base; + (m = vm_radix_lookup(&object->rtree, OFF_TO_IDX(length), + VM_RADIX_ANY)) != NULL) { + int base; + int size; + + if (m->flags & PG_CACHED) { + mtx_lock(&vm_page_queue_free_mtx); + if (m->object == object) + vm_page_cache_remove(m); + mtx_unlock(&vm_page_queue_free_mtx); + goto out; + } + if (m->valid != 0 || m->object != object) + goto out; + base = (int)length & PAGE_MASK; + size = PAGE_SIZE - base; + pmap_zero_page_area(m, base, size); /* @@ -311,10 +324,6 @@ base = roundup2(base, DEV_BSIZE); vm_page_clear_dirty(m, base, PAGE_SIZE - base); - } else if ((length & PAGE_MASK) && - __predict_false(object->cache != NULL)) { - vm_page_cache_free(object, OFF_TO_IDX(length), - nobjsize); } } else { @@ -326,6 +335,7 @@ } object->charge += delta; } +out: shmfd->shm_size = length; mtx_lock(&shm_timestamp_lock); vfs_timestamp(&shmfd->shm_ctime); Property changes on: boot/powerpc/boot1.chrp ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/boot/powerpc/boot1.chrp:r225232-227754 Property changes on: boot/powerpc/ofw ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/boot/powerpc/ofw:r225232-227754 Property changes on: boot/i386/efi ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/boot/i386/efi:r225232-227754 Property changes on: boot/ia64/efi ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/boot/ia64/efi:r225232-227754 Property changes on: boot/ia64/ski ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/boot/ia64/ski:r225232-227754 Property changes on: boot ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/boot:r225232-227754 Index: vm/vm_radix.c =================================================================== --- vm/vm_radix.c (.../head/sys) (revision 0) +++ vm/vm_radix.c (.../user/attilio/vmcontention/sys) (revision 227756) @@ -0,0 +1,729 @@ +/* + * Copyright (c) 2011 Jeffrey Roberson + * Copyright (c) 2008 Mayur Shardul + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + + +/* + * Radix tree implementation. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE); + +static uma_zone_t vm_radix_node_zone; + +#ifndef UMA_MD_SMALL_ALLOC +static void * +vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait) +{ + vm_offset_t addr; + vm_page_t m; + int pflags; + + /* Inform UMA that this allocator uses kernel_map. */ + *flags = UMA_SLAB_KERNEL; + + pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ; + + /* + * As kmem_alloc_nofault() can however fail, let just assume that + * M_NOWAIT is on and act accordingly. + */ + pflags |= ((wait & M_USE_RESERVE) != 0) ? VM_ALLOC_INTERRUPT : + VM_ALLOC_SYSTEM; + if ((wait & M_ZERO) != 0) + pflags |= VM_ALLOC_ZERO; + addr = kmem_alloc_nofault(kernel_map, size); + if (addr == 0) + return (NULL); + + /* Just one page allocation is assumed here. */ + m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS), + pflags); + if (m == NULL) { + kmem_free(kernel_map, addr, size); + return (NULL); + } + if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + pmap_qenter(addr, &m, 1); + return ((void *)addr); +} + +static void +vm_radix_node_zone_freef(void *item, int size, uint8_t flags) +{ + vm_page_t m; + vm_offset_t voitem; + + MPASS((flags & UMA_SLAB_KERNEL) != 0); + + /* Just one page allocation is assumed here. */ + voitem = (vm_offset_t)item; + m = PHYS_TO_VM_PAGE(pmap_kextract(voitem)); + pmap_qremove(voitem, 1); + vm_page_free(m); + kmem_free(kernel_map, voitem, size); +} + +static void +init_vm_radix_alloc(void *dummy __unused) +{ + + uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf); + uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef); +} +SYSINIT(vm_radix, SI_SUB_KMEM, SI_ORDER_SECOND, init_vm_radix_alloc, NULL); +#endif + +/* + * Radix node zone destructor. + */ +#ifdef INVARIANTS +static void +vm_radix_node_zone_dtor(void *mem, int size, void *arg) +{ + struct vm_radix_node *rnode; + + rnode = mem; + KASSERT(rnode->rn_count == 0, + ("vm_radix_node_put: Freeing a node with %d children\n", + rnode->rn_count)); +} +#endif + +/* + * Allocate a radix node. Initializes all elements to 0. + */ +static __inline struct vm_radix_node * +vm_radix_node_get(void) +{ + + return (uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO)); +} + +/* + * Free radix node. + */ +static __inline void +vm_radix_node_put(struct vm_radix_node *rnode) +{ + + uma_zfree(vm_radix_node_zone, rnode); +} + +/* + * Return the position in the array for a given level. + */ +static __inline int +vm_radix_slot(vm_pindex_t index, int level) +{ + + return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK); +} + +void +vm_radix_init(void) +{ + + vm_radix_node_zone = uma_zcreate("RADIX NODE", + sizeof(struct vm_radix_node), NULL, +#ifdef INVARIANTS + vm_radix_node_zone_dtor, +#else + NULL, +#endif + NULL, NULL, VM_RADIX_HEIGHT, UMA_ZONE_VM); +} + +/* + * Extract the root node and height from a radix tree with a single load. + */ +static __inline int +vm_radix_height(struct vm_radix *rtree, struct vm_radix_node **rnode) +{ + uintptr_t root; + int height; + + root = rtree->rt_root; + height = root & VM_RADIX_HEIGHT; + *rnode = (struct vm_radix_node *)(root - height); + return (height); +} + + +/* + * Set the root node and height for a radix tree. + */ +static inline void +vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode, + int height) +{ + uintptr_t root; + + root = (uintptr_t)rnode | height; + rtree->rt_root = root; +} + +static inline void * +vm_radix_match(void *child, int color) +{ + uintptr_t c; + + c = (uintptr_t)child; + + if ((c & color) == 0) + return (NULL); + return ((void *)(c & ~VM_RADIX_FLAGS)); +} + +static void +vm_radix_reclaim_allnodes_internal(struct vm_radix_node *rnode, int level) +{ + int slot; + + MPASS(rnode != NULL && level >= 0); + + /* + * Level 0 just contains pages as children, thus make it a special + * case, free the node and return. + */ + if (level == 0) { + CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level); + rnode->rn_count = 0; + vm_radix_node_put(rnode); + return; + } + for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) { + if (rnode->rn_child[slot] == NULL) + continue; + CTR3(KTR_VM, + "reclaiming: node %p, level %d recursing in slot %d", + rnode, level, slot); + vm_radix_reclaim_allnodes_internal(rnode->rn_child[slot], + level - 1); + rnode->rn_count--; + } + MPASS(rnode->rn_count == 0); + CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level); + vm_radix_node_put(rnode); +} + +/* + * Inserts the key-value pair in to the radix tree. Returns errno. + * Panics if the key already exists. + */ +int +vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val) +{ + struct vm_radix_node *rnode; + struct vm_radix_node *root; + int level; + int slot; + + CTR3(KTR_VM, + "insert: tree %p, index %p, val %p", rtree, (void *)index, val); + if (index == -1) + panic("vm_radix_insert: -1 is not a valid index.\n"); + level = vm_radix_height(rtree, &root); + /* + * Increase the height by adding nodes at the root until + * there is sufficient space. + */ + while (level == 0 || index > VM_RADIX_MAX(level)) { + CTR3(KTR_VM, "insert: expanding %jd > %jd height %d", + index, VM_RADIX_MAX(level), level); + level++; + KASSERT(level <= VM_RADIX_LIMIT, + ("vm_radix_insert: Tree %p height %d too tall", + rtree, level)); + /* + * Only allocate tree nodes if they are needed. + */ + if (root == NULL || root->rn_count != 0) { + rnode = vm_radix_node_get(); + if (rnode == NULL) { + CTR4(KTR_VM, +"insert: tree %p, root %p, index: %d, level: %d failed to allocate a new node", + rtree, root, index, level); + return (ENOMEM); + } + /* + * Store the new pointer with a memory barrier so + * that it is visible before the new root. + */ + if (root) { + atomic_store_rel_ptr((volatile uintptr_t *) + &rnode->rn_child[0], (uintptr_t)root); + rnode->rn_count = 1; + } + root = rnode; + } + vm_radix_setroot(rtree, root, level); + } + + /* Now that the tree is tall enough, fill in the path to the index. */ + rnode = root; + for (level = level - 1; level > 0; level--) { + slot = vm_radix_slot(index, level); + /* Add the required intermidiate nodes. */ + if (rnode->rn_child[slot] == NULL) { + rnode->rn_child[slot] = vm_radix_node_get(); + if (rnode->rn_child[slot] == NULL) { + CTR5(KTR_VM, +"insert: tree %p, index %jd, level %d, slot %d, child %p failed to populate", + rtree, index, level, slot, + rnode->rn_child[slot]); + return (ENOMEM); + } + rnode->rn_count++; + } + CTR5(KTR_VM, + "insert: tree %p, index %p, level %d, slot %d, child %p", + rtree, (void *)index, level, slot, rnode->rn_child[slot]); + rnode = rnode->rn_child[slot]; + } + + slot = vm_radix_slot(index, 0); + CTR5(KTR_VM, "insert: tree %p, index %p, level %d, slot %d, child %p", + rtree, (void *)index, level, slot, rnode->rn_child[slot]); + KASSERT(rnode->rn_child[slot] == NULL, + ("vm_radix_insert: Duplicate value %p at index: %lu\n", + rnode->rn_child[slot], (u_long)index)); + val = (void *)((uintptr_t)val | VM_RADIX_BLACK); + rnode->rn_child[slot] = val; + atomic_add_int((volatile int *)&rnode->rn_count, 1); + + return 0; +} + +/* + * Returns the value stored at the index. If the index is not present + * NULL is returned. + */ +void * +vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, int color) +{ + struct vm_radix_node *rnode; + int slot; + int level; + + level = vm_radix_height(rtree, &rnode); + if (index > VM_RADIX_MAX(level)) + return NULL; + level--; + while (rnode) { + slot = vm_radix_slot(index, level); + CTR5(KTR_VM, + "lookup: tree %p, index %p, level %d, slot %d, child %p", + rtree, (void *)index, level, slot, rnode->rn_child[slot]); + if (level == 0) + return vm_radix_match(rnode->rn_child[slot], color); + rnode = rnode->rn_child[slot]; + level--; + } + CTR2(KTR_VM, "lookup: tree %p, index %p failed", rtree, (void *)index); + + return NULL; +} + +void * +vm_radix_color(struct vm_radix *rtree, vm_pindex_t index, int color) +{ + struct vm_radix_node *rnode; + uintptr_t child; + int slot; + int level; + + level = vm_radix_height(rtree, &rnode); + if (index > VM_RADIX_MAX(level)) + return NULL; + level--; + while (rnode) { + slot = vm_radix_slot(index, level); + CTR5(KTR_VM, + "color: tree %p, index %p, level %d, slot %d, child %p", + rtree, (void *)index, level, slot, rnode->rn_child[slot]); + if (level == 0) + break; + rnode = rnode->rn_child[slot]; + level--; + } + if (rnode == NULL || rnode->rn_child[slot] == NULL) + return (NULL); + child = (uintptr_t)rnode->rn_child[slot]; + child &= ~VM_RADIX_FLAGS; + rnode->rn_child[slot] = (void *)(child | color); + + return (void *)child; +} + +/* + * Find the first leaf with a valid node between *startp and end. Return + * the index of the first valid item in the leaf in *startp. + */ +static struct vm_radix_node * +vm_radix_leaf(struct vm_radix *rtree, vm_pindex_t *startp, vm_pindex_t end) +{ + struct vm_radix_node *rnode; + vm_pindex_t start; + vm_pindex_t inc; + int slot; + int level; + + start = *startp; +restart: + level = vm_radix_height(rtree, &rnode); + if (start > VM_RADIX_MAX(level) || (end && start >= end)) { + rnode = NULL; + goto out; + } + /* + * Search the tree from the top for any leaf node holding an index + * between start and end. + */ + for (level--; level; level--) { + slot = vm_radix_slot(start, level); + CTR5(KTR_VM, + "leaf: tree %p, index %p, level %d, slot %d, child %p", + rtree, (void *)start, level, slot, rnode->rn_child[slot]); + if (rnode->rn_child[slot] != NULL) { + rnode = rnode->rn_child[slot]; + continue; + } + /* + * Calculate how much to increment our index by + * based on the tree level. We must truncate the + * lower bits to start from the begnning of the + * next leaf. + */ + inc = 1LL << (level * VM_RADIX_WIDTH); + start &= ~VM_RADIX_MAX(level); + start += inc; + slot++; + CTR5(KTR_VM, + "leaf: start %p end %p inc %d mask 0x%lX slot %d", + (void *)start, (void *)end, inc, + ~VM_RADIX_MAX(level), slot); + for (; slot < VM_RADIX_COUNT; slot++, start += inc) { + if (end != 0 && start >= end) { + rnode = NULL; + goto out; + } + if (rnode->rn_child[slot]) { + rnode = rnode->rn_child[slot]; + break; + } + } + if (slot == VM_RADIX_COUNT) + goto restart; + } + +out: + *startp = start; + return (rnode); +} + + + +/* + * Looks up as many as cnt values between start and end, and stores + * them in the caller allocated array out. The next index can be used + * to restart the scan. This optimizes forward scans in the tree. + */ +int +vm_radix_lookupn(struct vm_radix *rtree, vm_pindex_t start, + vm_pindex_t end, int color, void **out, int cnt, vm_pindex_t *next) +{ + struct vm_radix_node *rnode; + void *val; + int slot; + int outidx; + + CTR3(KTR_VM, "lookupn: tree %p, start %p, end %p", + rtree, (void *)start, (void *)end); + if (rtree->rt_root == 0) + return (0); + outidx = 0; + while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) { + slot = vm_radix_slot(start, 0); + for (; slot < VM_RADIX_COUNT; slot++, start++) { + if (end != 0 && start >= end) + goto out; + val = vm_radix_match(rnode->rn_child[slot], color); + if (val == NULL) + continue; + CTR4(KTR_VM, + "lookupn: tree %p index %p slot %d found child %p", + rtree, (void *)start, slot, val); + out[outidx] = val; + if (++outidx == cnt) + goto out; + } + if (end != 0 && start >= end) + break; + } +out: + *next = start; + return (outidx); +} + +void +vm_radix_foreach(struct vm_radix *rtree, vm_pindex_t start, vm_pindex_t end, + int color, void (*iter)(void *)) +{ + struct vm_radix_node *rnode; + void *val; + int slot; + + if (rtree->rt_root == 0) + return; + while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) { + slot = vm_radix_slot(start, 0); + for (; slot < VM_RADIX_COUNT; slot++, start++) { + if (end != 0 && start >= end) + return; + val = vm_radix_match(rnode->rn_child[slot], color); + if (val) + iter(val); + } + if (end != 0 && start >= end) + return; + } +} + + +/* + * Look up any entry at a position less than or equal to index. + */ +void * +vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index, int color) +{ + struct vm_radix_node *rnode; + struct vm_radix_node *child; + vm_pindex_t max; + vm_pindex_t inc; + void *val; + int slot; + int level; + + CTR2(KTR_VM, + "lookup_le: tree %p, index %p", rtree, (void *)index); +restart: + level = vm_radix_height(rtree, &rnode); + if (rnode == NULL) + return (NULL); + max = VM_RADIX_MAX(level); + if (index > max || index == 0) + index = max; + /* + * Search the tree from the top for any leaf node holding an index + * lower than 'index'. + */ + level--; + while (rnode) { + slot = vm_radix_slot(index, level); + CTR5(KTR_VM, + "lookup_le: tree %p, index %p, level %d, slot %d, child %p", + rtree, (void *)index, level, slot, rnode->rn_child[slot]); + if (level == 0) + break; + /* + * If we don't have an exact match we must start our search + * from the next leaf and adjust our index appropriately. + */ + if ((child = rnode->rn_child[slot]) == NULL) { + /* + * Calculate how much to decrement our index by + * based on the tree level. We must set the + * lower bits to start from the end of the next + * leaf. + */ + inc = 1LL << (level * VM_RADIX_WIDTH); + index |= VM_RADIX_MAX(level); + index -= inc; + slot--; + CTR4(KTR_VM, + "lookup_le: start %p inc %ld mask 0x%lX slot %d", + (void *)index, inc, VM_RADIX_MAX(level), slot); + for (; slot >= 0; slot--, index -= inc) { + child = rnode->rn_child[slot]; + if (child) + break; + } + } + rnode = child; + level--; + } + if (rnode) { + for (; slot >= 0; slot--, index--) { + val = vm_radix_match(rnode->rn_child[slot], color); + if (val) + return (val); + } + } + if (index != -1) + goto restart; + return (NULL); +} + +/* + * Remove the specified index from the tree. If possible the height of the + * tree is adjusted after deletion. The value stored at index is returned + * panics if the key is not present. + */ +void * +vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index, int color) +{ + struct vm_radix_node *stack[VM_RADIX_LIMIT]; + struct vm_radix_node *rnode, *root; + void *val; + int level; + int slot; + + level = vm_radix_height(rtree, &root); + KASSERT(index <= VM_RADIX_MAX(level), + ("vm_radix_remove: %p index %jd out of range %jd.", + rtree, index, VM_RADIX_MAX(level))); + rnode = root; + val = NULL; + level--; + /* + * Find the node and record the path in stack. + */ + while (level && rnode) { + stack[level] = rnode; + slot = vm_radix_slot(index, level); + rnode = rnode->rn_child[slot]; + CTR5(KTR_VM, + "remove: tree %p, index %p, level %d, slot %d, child %p", + rtree, (void *)index, level, slot, rnode->rn_child[slot]); + level--; + } + KASSERT(rnode != NULL, + ("vm_radix_remove: index %jd not present in the tree.\n", index)); + slot = vm_radix_slot(index, 0); + val = vm_radix_match(rnode->rn_child[slot], color); + KASSERT(val != NULL, + ("vm_radix_remove: index %jd not present in the tree.\n", index)); + + for (;;) { + rnode->rn_child[slot] = NULL; + /* + * Use atomics for the last level since red and black + * will both adjust it. + */ + if (level == 0) + atomic_add_int((volatile int *)&rnode->rn_count, -1); + else + rnode->rn_count--; + /* + * Only allow black removes to prune the tree. + */ + if ((color & VM_RADIX_BLACK) == 0 || rnode->rn_count > 0) + break; + vm_radix_node_put(rnode); + if (rnode == root) { + vm_radix_setroot(rtree, NULL, 0); + break; + } + rnode = stack[++level]; + slot = vm_radix_slot(index, level); + + } + return (val); +} + +/* + * Remove and free all the nodes from the radix tree. + * This function is recrusive but there is a tight control on it as the + * maximum depth of the tree is fixed. + */ +void +vm_radix_reclaim_allnodes(struct vm_radix *rtree) +{ + struct vm_radix_node *root; + int level; + + if (rtree->rt_root == 0) + return; + level = vm_radix_height(rtree, &root); + vm_radix_reclaim_allnodes_internal(root, level - 1); + rtree->rt_root = 0; +} + +/* + * Attempts to reduce the height of the tree. + */ +void +vm_radix_shrink(struct vm_radix *rtree) +{ + struct vm_radix_node *tmp, *root; + int level; + + if (rtree->rt_root == 0) + return; + level = vm_radix_height(rtree, &root); + + /* Adjust the height of the tree. */ + while (root->rn_count == 1 && root->rn_child[0] != NULL) { + tmp = root; + root->rn_count--; + root = root->rn_child[0]; + level--; + vm_radix_node_put(tmp); + } + /* Finally see if we have an empty tree. */ + if (root->rn_count == 0) { + vm_radix_node_put(root); + root = NULL; + level--; + } + vm_radix_setroot(rtree, root, level); +} Property changes on: vm/vm_radix.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: vm/vm_radix.h =================================================================== --- vm/vm_radix.h (.../head/sys) (revision 0) +++ vm/vm_radix.h (.../user/attilio/vmcontention/sys) (revision 227756) @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2011 Jeffrey Roberson + * Copyright (c) 2008 Mayur Shardul + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _VM_RADIX_H_ +#define _VM_RADIX_H_ + +#include + +/* Default values of the tree parameters */ +#define VM_RADIX_WIDTH 5 +#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH) +#define VM_RADIX_MASK (VM_RADIX_COUNT - 1) +#define VM_RADIX_LIMIT howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) +#define VM_RADIX_FLAGS 0x3 /* Flag bits stored in node pointers. */ +#define VM_RADIX_BLACK 0x1 /* Black node. (leaf only) */ +#define VM_RADIX_RED 0x2 /* Red node. (leaf only) */ +#define VM_RADIX_ANY (VM_RADIX_RED | VM_RADIX_BLACK) +#define VM_RADIX_EMPTY 0x1 /* Empty hint. (internal only) */ +#define VM_RADIX_HEIGHT 0xf /* Bits of height in root */ +#define VM_RADIX_STACK 8 /* Nodes to store on stack. */ + +/* Calculates maximum value for a tree of height h. */ +#define VM_RADIX_MAX(h) \ + ((h) == VM_RADIX_LIMIT ? ((vm_pindex_t)-1) : \ + (((vm_pindex_t)1 << ((h) * VM_RADIX_WIDTH)) - 1)) + +/* + * Radix tree root. The height and pointer are set together to permit + * coherent lookups while the root is modified. + */ +struct vm_radix { + uintptr_t rt_root; /* root + height */ +}; + +#ifdef _KERNEL +CTASSERT(VM_RADIX_HEIGHT >= VM_RADIX_LIMIT); + +struct vm_radix_node { + void *rn_child[VM_RADIX_COUNT]; /* child nodes. */ + uint16_t rn_count; /* Valid children. */ +}; + +void vm_radix_init(void); + +/* + * Functions which only work with black nodes. (object lock) + */ +int vm_radix_insert(struct vm_radix *, vm_pindex_t, void *); +void vm_radix_shrink(struct vm_radix *); + +/* + * Functions which work on specified colors. (object, vm_page_queue_free locks) + */ +void *vm_radix_color(struct vm_radix *, vm_pindex_t, int); +void *vm_radix_lookup(struct vm_radix *, vm_pindex_t, int); +int vm_radix_lookupn(struct vm_radix *, vm_pindex_t, vm_pindex_t, int, + void **, int, vm_pindex_t *); +void *vm_radix_lookup_le(struct vm_radix *, vm_pindex_t, int); +void vm_radix_reclaim_allnodes(struct vm_radix *); +void *vm_radix_remove(struct vm_radix *, vm_pindex_t, int); +void vm_radix_foreach(struct vm_radix *, vm_pindex_t, vm_pindex_t, int, + void (*)(void *)); + +/* + * Look up any entry at a position greater or equal to index. + */ +static inline void * +vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index, int color) +{ + void *val; + + if (vm_radix_lookupn(rtree, index, 0, color, &val, 1, &index)) + return (val); + return (NULL); +} + +static inline void * +vm_radix_last(struct vm_radix *rtree, int color) +{ + + return vm_radix_lookup_le(rtree, 0, color); +} + +static inline void * +vm_radix_first(struct vm_radix *rtree, int color) +{ + + return vm_radix_lookup_ge(rtree, 0, color); +} + +static inline void * +vm_radix_next(struct vm_radix *rtree, vm_pindex_t index, int color) +{ + + if (index == -1) + return (NULL); + return vm_radix_lookup_ge(rtree, index + 1, color); +} + +static inline void * +vm_radix_prev(struct vm_radix *rtree, vm_pindex_t index, int color) +{ + + if (index == 0) + return (NULL); + return vm_radix_lookup_le(rtree, index - 1, color); +} + +#endif /* _KERNEL */ +#endif /* !_VM_RADIX_H_ */ Property changes on: vm/vm_radix.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: vm/vm_init.c =================================================================== --- vm/vm_init.c (.../head/sys) (revision 227753) +++ vm/vm_init.c (.../user/attilio/vmcontention/sys) (revision 227756) @@ -82,6 +82,7 @@ #include #include #include +#include #include #include #include @@ -123,6 +124,7 @@ vm_object_init(); vm_map_startup(); kmem_init(virtual_avail, virtual_end); + vm_radix_init(); pmap_init(); vm_pager_init(); } Index: vm/vm_reserv.c =================================================================== --- vm/vm_reserv.c (.../head/sys) (revision 227753) +++ vm/vm_reserv.c (.../user/attilio/vmcontention/sys) (revision 227756) @@ -309,44 +309,32 @@ /* * Look for an existing reservation. */ - msucc = NULL; - mpred = object->root; - while (mpred != NULL) { + mpred = vm_radix_lookup_le(&object->rtree, pindex, VM_RADIX_BLACK); + if (mpred != NULL) { KASSERT(mpred->pindex != pindex, ("vm_reserv_alloc_page: pindex already allocated")); rv = vm_reserv_from_page(mpred); if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) { m = &rv->pages[VM_RESERV_INDEX(object, pindex)]; - /* Handle vm_page_rename(m, new_object, ...). */ if ((m->flags & (PG_CACHED | PG_FREE)) == 0) return (NULL); vm_reserv_populate(rv); return (m); - } else if (mpred->pindex < pindex) { - if (msucc != NULL || - (msucc = TAILQ_NEXT(mpred, listq)) == NULL) - break; - KASSERT(msucc->pindex != pindex, - ("vm_reserv_alloc_page: pindex already allocated")); - rv = vm_reserv_from_page(msucc); - if (rv->object == object && - vm_reserv_has_pindex(rv, pindex)) { - m = &rv->pages[VM_RESERV_INDEX(object, pindex)]; - /* Handle vm_page_rename(m, new_object, ...). */ - if ((m->flags & (PG_CACHED | PG_FREE)) == 0) - return (NULL); - vm_reserv_populate(rv); - return (m); - } else if (pindex < msucc->pindex) - break; - } else if (msucc == NULL) { - msucc = mpred; - mpred = TAILQ_PREV(msucc, pglist, listq); - continue; } - msucc = NULL; - mpred = object->root = vm_page_splay(pindex, object->root); } + msucc = vm_radix_lookup_ge(&object->rtree, pindex, VM_RADIX_BLACK); + if (msucc != NULL) { + KASSERT(msucc->pindex != pindex, + ("vm_reserv_alloc_page: pindex already allocated")); + rv = vm_reserv_from_page(msucc); + if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) { + m = &rv->pages[VM_RESERV_INDEX(object, pindex)]; + if ((m->flags & (PG_CACHED | PG_FREE)) == 0) + return (NULL); + vm_reserv_populate(rv); + return (m); + } + } /* * Determine the first index to the left that can be used. Index: vm/vm_object.c =================================================================== --- vm/vm_object.c (.../head/sys) (revision 227753) +++ vm/vm_object.c (.../user/attilio/vmcontention/sys) (revision 227756) @@ -163,6 +163,9 @@ vm_object_t object; object = (vm_object_t)mem; + KASSERT(object->resident_page_count == 0, + ("object %p resident_page_count = %d", + object, object->resident_page_count)); KASSERT(TAILQ_EMPTY(&object->memq), ("object %p has resident pages", object)); @@ -171,15 +174,12 @@ ("object %p has reservations", object)); #endif - KASSERT(object->cache == NULL, + KASSERT(object->cached_page_count == 0, ("object %p has cached pages", object)); KASSERT(object->paging_in_progress == 0, ("object %p paging_in_progress = %d", object, object->paging_in_progress)); - KASSERT(object->resident_page_count == 0, - ("object %p resident_page_count = %d", - object, object->resident_page_count)); KASSERT(object->shadow_count == 0, ("object %p shadow_count = %d", object, object->shadow_count)); @@ -209,7 +209,7 @@ TAILQ_INIT(&object->memq); LIST_INIT(&object->shadow_head); - object->root = NULL; + object->rtree.rt_root = 0; object->type = type; object->size = size; object->generation = 1; @@ -227,7 +227,6 @@ #if VM_NRESERVLEVEL > 0 LIST_INIT(&object->rvq); #endif - object->cache = NULL; mtx_lock(&vm_object_list_mtx); TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); @@ -304,7 +303,7 @@ case OBJT_SG: case OBJT_SWAP: case OBJT_VNODE: - if (!TAILQ_EMPTY(&object->memq)) + if (object->resident_page_count == 0) return (KERN_FAILURE); break; case OBJT_DEAD: @@ -676,7 +675,10 @@ void vm_object_terminate(vm_object_t object) { - vm_page_t p, p_next; + vm_page_t pa[VM_RADIX_STACK]; + vm_page_t p; + vm_pindex_t start; + int n, i; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); @@ -721,43 +723,75 @@ * from the object. Rather than incrementally removing each page from * the object, the page and object are reset to any empty state. */ - TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { - KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, - ("vm_object_terminate: freeing busy page %p", p)); - vm_page_lock(p); - /* - * Optimize the page's removal from the object by resetting - * its "object" field. Specifically, if the page is not - * wired, then the effect of this assignment is that - * vm_page_free()'s call to vm_page_remove() will return - * immediately without modifying the page or the object. - */ - p->object = NULL; - if (p->wire_count == 0) { - vm_page_free(p); - PCPU_INC(cnt.v_pfree); + start = 0; + while ((n = vm_radix_lookupn(&object->rtree, start, 0, VM_RADIX_ANY, + (void **)pa, VM_RADIX_STACK, &start)) != 0) { + for (i = 0; i < n; i++) { + p = pa[i]; + /* + * Another thread may allocate this cached page from + * the queue before we acquire the page queue free + * mtx. + */ + if (p->flags & PG_CACHED) { + mtx_lock(&vm_page_queue_free_mtx); + if (p->object == object) { + p->object = NULL; + p->valid = 0; + /* Clear PG_CACHED and set PG_FREE. */ + p->flags ^= PG_CACHED | PG_FREE; + cnt.v_cache_count--; + cnt.v_free_count++; + } + mtx_unlock(&vm_page_queue_free_mtx); + continue; + } else if (p->object != object) + continue; + KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, + ("vm_object_terminate: freeing busy page %p", p)); + vm_page_lock(p); + /* + * Optimize the page's removal from the object by + * resetting its "object" field. Specifically, if + * the page is not wired, then the effect of this + * assignment is that vm_page_free()'s call to + * vm_page_remove() will return immediately without + * modifying the page or the object. + * Anyway, the radix tree cannot be accessed anymore + * from within the object, thus all the nodes need + * to be reclaimed later on. + */ + p->object = NULL; + if (p->wire_count == 0) { + vm_page_free(p); + PCPU_INC(cnt.v_pfree); + } + vm_page_unlock(p); } - vm_page_unlock(p); + if (n < VM_RADIX_STACK) + break; } + vm_radix_reclaim_allnodes(&object->rtree); /* * If the object contained any pages, then reset it to an empty state. * None of the object's fields, including "resident_page_count", were * modified by the preceding loop. */ if (object->resident_page_count != 0) { - object->root = NULL; TAILQ_INIT(&object->memq); object->resident_page_count = 0; if (object->type == OBJT_VNODE) vdrop(object->handle); } + if (object->cached_page_count != 0 && object->type == OBJT_VNODE) { + object->cached_page_count = 0; + vdrop(object->handle); + } #if VM_NRESERVLEVEL > 0 if (__predict_false(!LIST_EMPTY(&object->rvq))) vm_reserv_break_all(object); #endif - if (__predict_false(object->cache != NULL)) - vm_page_cache_free(object, 0, 0); /* * Let the pager know object is dead. @@ -1232,10 +1266,12 @@ void vm_object_split(vm_map_entry_t entry) { - vm_page_t m, m_next; + vm_page_t ma[VM_RADIX_STACK]; + vm_page_t m; vm_object_t orig_object, new_object, source; - vm_pindex_t idx, offidxstart; + vm_pindex_t idx, offidxstart, start; vm_size_t size; + int i, n; orig_object = entry->object.vm_object; if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP) @@ -1288,31 +1324,50 @@ ("orig_object->charge < 0")); orig_object->charge -= ptoa(size); } + start = offidxstart; retry: - m = vm_page_find_least(orig_object, offidxstart); - for (; m != NULL && (idx = m->pindex - offidxstart) < size; - m = m_next) { - m_next = TAILQ_NEXT(m, listq); - - /* - * We must wait for pending I/O to complete before we can - * rename the page. - * - * We do not have to VM_PROT_NONE the page as mappings should - * not be changed by this operation. - */ - if ((m->oflags & VPO_BUSY) || m->busy) { - VM_OBJECT_UNLOCK(new_object); - m->oflags |= VPO_WANTED; - msleep(m, VM_OBJECT_MTX(orig_object), PVM, "spltwt", 0); - VM_OBJECT_LOCK(new_object); - goto retry; + while ((n = vm_radix_lookupn(&orig_object->rtree, start, + offidxstart + size, VM_RADIX_ANY, (void **)ma, VM_RADIX_STACK, + &start)) != 0) { + for (i = 0; i < n; i++) { + m = ma[i]; + idx = m->pindex - offidxstart; + if (m->flags & PG_CACHED) { + mtx_lock(&vm_page_queue_free_mtx); + if (m->object == orig_object) + vm_page_cache_rename(m, new_object, + idx); + mtx_unlock(&vm_page_queue_free_mtx); + continue; + } else if (m->object != orig_object) + continue; + /* + * We must wait for pending I/O to complete before + * we can rename the page. + * + * We do not have to VM_PROT_NONE the page as mappings + * should not be changed by this operation. + */ + if ((m->oflags & VPO_BUSY) || m->busy) { + start = m->pindex; + VM_OBJECT_UNLOCK(new_object); + m->oflags |= VPO_WANTED; + msleep(m, VM_OBJECT_MTX(orig_object), PVM, + "spltwt", 0); + VM_OBJECT_LOCK(new_object); + goto retry; + } + vm_page_lock(m); + vm_page_rename(m, new_object, idx); + vm_page_unlock(m); + /* + * page automatically made dirty by rename and + * cache handled + */ + vm_page_busy(m); } - vm_page_lock(m); - vm_page_rename(m, new_object, idx); - vm_page_unlock(m); - /* page automatically made dirty by rename and cache handled */ - vm_page_busy(m); + if (n < VM_RADIX_STACK) + break; } if (orig_object->type == OBJT_SWAP) { /* @@ -1320,13 +1375,6 @@ * and new_object's locks are released and reacquired. */ swap_pager_copy(orig_object, new_object, offidxstart, 0); - - /* - * Transfer any cached pages from orig_object to new_object. - */ - if (__predict_false(orig_object->cache != NULL)) - vm_page_cache_transfer(orig_object, offidxstart, - new_object); } VM_OBJECT_UNLOCK(orig_object); TAILQ_FOREACH(m, &new_object->memq, listq) @@ -1345,10 +1393,13 @@ static int vm_object_backing_scan(vm_object_t object, int op) { - int r = 1; + vm_page_t pa[VM_RADIX_STACK]; vm_page_t p; vm_object_t backing_object; - vm_pindex_t backing_offset_index; + vm_pindex_t backing_offset_index, new_pindex; + vm_pindex_t start; + int color, i, n; + int r = 1; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); VM_OBJECT_LOCK_ASSERT(object->backing_object, MA_OWNED); @@ -1376,15 +1427,39 @@ if (op & OBSC_COLLAPSE_WAIT) { vm_object_set_flag(backing_object, OBJ_DEAD); } - + color = VM_RADIX_BLACK; + if (op & OBSC_COLLAPSE_WAIT) + color |= VM_RADIX_RED; /* * Our scan */ - p = TAILQ_FIRST(&backing_object->memq); - while (p) { - vm_page_t next = TAILQ_NEXT(p, listq); - vm_pindex_t new_pindex = p->pindex - backing_offset_index; +restart: + start = 0; + i = n = VM_RADIX_STACK; + for (;;) { + if (i == n) { + if (n < VM_RADIX_STACK) + break; + if ((n = vm_radix_lookupn(&backing_object->rtree, + start, 0, color, (void **)pa, VM_RADIX_STACK, + &start)) == 0) + break; + i = 0; + } + p = pa[i++]; + /* + * Free cached pages. XXX Why? Emulating old behavior here. + */ + if (p->flags & PG_CACHED) { + mtx_lock(&vm_page_queue_free_mtx); + if (p->object == backing_object) + vm_page_cache_free(p); + mtx_unlock(&vm_page_queue_free_mtx); + continue; + } else if (p->object != backing_object) + continue; + new_pindex = p->pindex - backing_offset_index; if (op & OBSC_TEST_ALL_SHADOWED) { vm_page_t pp; @@ -1396,13 +1471,9 @@ * note that we do not busy the backing object's * page. */ - if ( - p->pindex < backing_offset_index || - new_pindex >= object->size - ) { - p = next; + if (p->pindex < backing_offset_index || + new_pindex >= object->size) continue; - } /* * See if the parent has the page or if the parent's @@ -1431,12 +1502,9 @@ vm_page_t pp; if (op & OBSC_COLLAPSE_NOWAIT) { - if ((p->oflags & VPO_BUSY) || - !p->valid || - p->busy) { - p = next; + if ((p->oflags & VPO_BUSY) || !p->valid || + p->busy) continue; - } } else if (op & OBSC_COLLAPSE_WAIT) { if ((p->oflags & VPO_BUSY) || p->busy) { VM_OBJECT_UNLOCK(object); @@ -1452,8 +1520,7 @@ * should not have changed so we * just restart our scan. */ - p = TAILQ_FIRST(&backing_object->memq); - continue; + goto restart; } } @@ -1489,7 +1556,6 @@ else vm_page_remove(p); vm_page_unlock(p); - p = next; continue; } @@ -1509,7 +1575,6 @@ * page before we can (re)lock the parent. * Hence we can get here. */ - p = next; continue; } if ( @@ -1531,7 +1596,6 @@ else vm_page_remove(p); vm_page_unlock(p); - p = next; continue; } @@ -1555,7 +1619,6 @@ vm_page_unlock(p); /* page automatically made dirty by rename */ } - p = next; } return (r); } @@ -1666,12 +1729,6 @@ backing_object, object, OFF_TO_IDX(object->backing_object_offset), TRUE); - - /* - * Free any cached pages from backing_object. - */ - if (__predict_false(backing_object->cache != NULL)) - vm_page_cache_free(backing_object, 0, 0); } /* * Object now shadows whatever backing_object did. @@ -1792,75 +1849,101 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int options) { - vm_page_t p, next; + struct vnode *vp; + vm_page_t pa[VM_RADIX_STACK]; + vm_page_t p; + int i, n; int wirings; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT((object->type != OBJT_DEVICE && object->type != OBJT_PHYS) || (options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED, ("vm_object_page_remove: illegal options for object %p", object)); - if (object->resident_page_count == 0) - goto skipmemq; + if (object->resident_page_count == 0 && object->cached_page_count == 0) + return; + vp = NULL; vm_object_pip_add(object, 1); -again: - p = vm_page_find_least(object, start); - - /* - * Here, the variable "p" is either (1) the page with the least pindex - * greater than or equal to the parameter "start" or (2) NULL. - */ - for (; p != NULL && (p->pindex < end || end == 0); p = next) { - next = TAILQ_NEXT(p, listq); - - /* - * If the page is wired for any reason besides the existence - * of managed, wired mappings, then it cannot be freed. For - * example, fictitious pages, which represent device memory, - * are inherently wired and cannot be freed. They can, - * however, be invalidated if the option OBJPR_CLEANONLY is - * not specified. - */ - vm_page_lock(p); - if ((wirings = p->wire_count) != 0 && - (wirings = pmap_page_wired_mappings(p)) != p->wire_count) { +restart: + while ((n = vm_radix_lookupn(&object->rtree, start, end, VM_RADIX_ANY, + (void **)pa, VM_RADIX_STACK, &start)) != 0) { + for (i = 0; i < n; i++) { + p = pa[i]; + /* + * Another thread may allocate this cached page from + * the queue before we acquire the page queue free + * mtx. + */ + if (p->flags & PG_CACHED) { + mtx_lock(&vm_page_queue_free_mtx); + if (p->object == object) { + vm_page_cache_free(p); + if (object->type == OBJT_VNODE && + object->cached_page_count == 0) + vp = object->handle; + } + mtx_unlock(&vm_page_queue_free_mtx); + continue; + } else if (p->object != object) + continue; + /* + * If the page is wired for any reason besides + * the existence of managed, wired mappings, then + * it cannot be freed. For example, fictitious + * pages, which represent device memory, are + * inherently wired and cannot be freed. They can, + * however, be invalidated if the option + * OBJPR_CLEANONLY is not specified. + */ + vm_page_lock(p); + if ((wirings = p->wire_count) != 0 && + (wirings = pmap_page_wired_mappings(p)) != + p->wire_count) { + if ((options & OBJPR_NOTMAPPED) == 0) { + pmap_remove_all(p); + /* + * Account for removal of wired + * mappings. + */ + if (wirings != 0) + p->wire_count -= wirings; + } + if ((options & OBJPR_CLEANONLY) == 0) { + p->valid = 0; + vm_page_undirty(p); + } + vm_page_unlock(p); + continue; + } + if (vm_page_sleep_if_busy(p, TRUE, "vmopar")) { + start = 0; + goto restart; + } + KASSERT((p->flags & PG_FICTITIOUS) == 0, + ("vm_object_page_remove: page %p is fictitious", + p)); + if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { + if ((options & OBJPR_NOTMAPPED) == 0) + pmap_remove_write(p); + if (p->dirty) { + vm_page_unlock(p); + continue; + } + } if ((options & OBJPR_NOTMAPPED) == 0) { pmap_remove_all(p); /* Account for removal of wired mappings. */ if (wirings != 0) p->wire_count -= wirings; } - if ((options & OBJPR_CLEANONLY) == 0) { - p->valid = 0; - vm_page_undirty(p); - } + vm_page_free(p); vm_page_unlock(p); - continue; } - if (vm_page_sleep_if_busy(p, TRUE, "vmopar")) - goto again; - KASSERT((p->flags & PG_FICTITIOUS) == 0, - ("vm_object_page_remove: page %p is fictitious", p)); - if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { - if ((options & OBJPR_NOTMAPPED) == 0) - pmap_remove_write(p); - if (p->dirty) { - vm_page_unlock(p); - continue; - } - } - if ((options & OBJPR_NOTMAPPED) == 0) { - pmap_remove_all(p); - /* Account for removal of wired mappings. */ - if (wirings != 0) - p->wire_count -= wirings; - } - vm_page_free(p); - vm_page_unlock(p); + if (n < VM_RADIX_STACK) + break; } vm_object_pip_wakeup(object); -skipmemq: - if (__predict_false(object->cache != NULL)) - vm_page_cache_free(object, start, end); + if (vp) + vdrop(vp); } /* @@ -2239,8 +2322,9 @@ db_printf(","); count++; - db_printf("(off=0x%jx,page=0x%jx)", - (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p)); + db_printf("(off=0x%jx,page=0x%jx,obj=%p,flags=0x%X)", + (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p), + p->object, p->flags); } if (count != 0) db_printf("\n"); Index: vm/vm_object.h =================================================================== --- vm/vm_object.h (.../head/sys) (revision 227753) +++ vm/vm_object.h (.../user/attilio/vmcontention/sys) (revision 227756) @@ -71,6 +71,8 @@ #include #include +#include + /* * Types defined: * @@ -87,7 +89,7 @@ LIST_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */ LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */ TAILQ_HEAD(, vm_page) memq; /* list of resident pages */ - vm_page_t root; /* root of the resident page splay tree */ + struct vm_radix rtree; /* root of the resident page radix index tree */ vm_pindex_t size; /* Object size */ int generation; /* generation ID */ int ref_count; /* How many refs?? */ @@ -98,11 +100,11 @@ u_short pg_color; /* (c) color of first page in obj */ u_short paging_in_progress; /* Paging (in or out) so don't collapse or destroy */ int resident_page_count; /* number of resident pages */ + int cached_page_count; /* number of cached pages */ struct vm_object *backing_object; /* object that I'm a shadow of */ vm_ooffset_t backing_object_offset;/* Offset in backing object */ TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */ LIST_HEAD(, vm_reserv) rvq; /* list of reservations */ - vm_page_t cache; /* root of the cache page splay tree */ void *handle; union { /* Index: vm/vm_page.c =================================================================== --- vm/vm_page.c (.../head/sys) (revision 227753) +++ vm/vm_page.c (.../user/attilio/vmcontention/sys) (revision 227756) @@ -103,6 +103,7 @@ #include #include #include +#include #include #include #include @@ -312,7 +313,6 @@ vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; vm_page_queues[PQ_HOLD].cnt = &cnt.v_active_count; - /* * Allocate memory for use when boot strapping the kernel memory * allocator. @@ -765,63 +765,6 @@ } /* - * vm_page_splay: - * - * Implements Sleator and Tarjan's top-down splay algorithm. Returns - * the vm_page containing the given pindex. If, however, that - * pindex is not found in the vm_object, returns a vm_page that is - * adjacent to the pindex, coming before or after it. - */ -vm_page_t -vm_page_splay(vm_pindex_t pindex, vm_page_t root) -{ - struct vm_page dummy; - vm_page_t lefttreemax, righttreemin, y; - - if (root == NULL) - return (root); - lefttreemax = righttreemin = &dummy; - for (;; root = y) { - if (pindex < root->pindex) { - if ((y = root->left) == NULL) - break; - if (pindex < y->pindex) { - /* Rotate right. */ - root->left = y->right; - y->right = root; - root = y; - if ((y = root->left) == NULL) - break; - } - /* Link into the new root's right tree. */ - righttreemin->left = root; - righttreemin = root; - } else if (pindex > root->pindex) { - if ((y = root->right) == NULL) - break; - if (pindex > y->pindex) { - /* Rotate left. */ - root->right = y->left; - y->left = root; - root = y; - if ((y = root->right) == NULL) - break; - } - /* Link into the new root's left tree. */ - lefttreemax->right = root; - lefttreemax = root; - } else - break; - } - /* Assemble the new root. */ - lefttreemax->right = root->left; - righttreemin->left = root->right; - root->left = dummy.right; - root->right = dummy.left; - return (root); -} - -/* * vm_page_insert: [ internal use only ] * * Inserts the given mem entry into the object and object list. @@ -837,8 +780,7 @@ void vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) { - vm_page_t root; - + vm_page_t neighbor; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if (m->object != NULL) panic("vm_page_insert: page already inserted"); @@ -849,31 +791,20 @@ m->object = object; m->pindex = pindex; - /* - * Now link into the object's ordered list of backed pages. - */ - root = object->root; - if (root == NULL) { - m->left = NULL; - m->right = NULL; + if (object->resident_page_count == 0) { TAILQ_INSERT_TAIL(&object->memq, m, listq); - } else { - root = vm_page_splay(pindex, root); - if (pindex < root->pindex) { - m->left = root->left; - m->right = root; - root->left = NULL; - TAILQ_INSERT_BEFORE(root, m, listq); - } else if (pindex == root->pindex) - panic("vm_page_insert: offset already allocated"); - else { - m->right = root->right; - m->left = root; - root->right = NULL; - TAILQ_INSERT_AFTER(&object->memq, root, m, listq); - } + } else { + neighbor = vm_radix_lookup_ge(&object->rtree, pindex, + VM_RADIX_BLACK); + if (neighbor != NULL) { + KASSERT(pindex != neighbor->pindex, + ("vm_page_insert: offset already allocated")); + TAILQ_INSERT_BEFORE(neighbor, m, listq); + } else + TAILQ_INSERT_TAIL(&object->memq, m, listq); } - object->root = m; + if (vm_radix_insert(&object->rtree, pindex, m) != 0) + panic("vm_page_insert: unable to insert the new page"); /* * show that the object has one more resident page. @@ -909,7 +840,6 @@ vm_page_remove(vm_page_t m) { vm_object_t object; - vm_page_t next, prev, root; if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_lock_assert(m, MA_OWNED); @@ -921,45 +851,7 @@ vm_page_flash(m); } - /* - * Now remove from the object's list of backed pages. - */ - if ((next = TAILQ_NEXT(m, listq)) != NULL && next->left == m) { - /* - * Since the page's successor in the list is also its parent - * in the tree, its right subtree must be empty. - */ - next->left = m->left; - KASSERT(m->right == NULL, - ("vm_page_remove: page %p has right child", m)); - } else if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL && - prev->right == m) { - /* - * Since the page's predecessor in the list is also its parent - * in the tree, its left subtree must be empty. - */ - KASSERT(m->left == NULL, - ("vm_page_remove: page %p has left child", m)); - prev->right = m->right; - } else { - if (m != object->root) - vm_page_splay(m->pindex, object->root); - if (m->left == NULL) - root = m->right; - else if (m->right == NULL) - root = m->left; - else { - /* - * Move the page's successor to the root, because - * pages are usually removed in ascending order. - */ - if (m->right != next) - vm_page_splay(m->pindex, m->right); - next->left = m->left; - root = next; - } - object->root = root; - } + vm_radix_remove(&object->rtree, m->pindex, VM_RADIX_BLACK); TAILQ_REMOVE(&object->memq, m, listq); /* @@ -988,15 +880,10 @@ vm_page_t vm_page_lookup(vm_object_t object, vm_pindex_t pindex) { - vm_page_t m; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); - if ((m = object->root) != NULL && m->pindex != pindex) { - m = vm_page_splay(pindex, m); - if ((object->root = m)->pindex != pindex) - m = NULL; - } - return (m); + + return vm_radix_lookup(&object->rtree, pindex, VM_RADIX_BLACK); } /* @@ -1011,17 +898,12 @@ vm_page_t vm_page_find_least(vm_object_t object, vm_pindex_t pindex) { - vm_page_t m; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); - if ((m = TAILQ_FIRST(&object->memq)) != NULL) { - if (m->pindex < pindex) { - m = vm_page_splay(pindex, object->root); - if ((object->root = m)->pindex < pindex) - m = TAILQ_NEXT(m, listq); - } - } - return (m); + if (object->resident_page_count) + return vm_radix_lookup_ge(&object->rtree, pindex, + VM_RADIX_BLACK); + return (NULL); } /* @@ -1091,71 +973,6 @@ } /* - * Convert all of the given object's cached pages that have a - * pindex within the given range into free pages. If the value - * zero is given for "end", then the range's upper bound is - * infinity. If the given object is backed by a vnode and it - * transitions from having one or more cached pages to none, the - * vnode's hold count is reduced. - */ -void -vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end) -{ - vm_page_t m, m_next; - boolean_t empty; - - mtx_lock(&vm_page_queue_free_mtx); - if (__predict_false(object->cache == NULL)) { - mtx_unlock(&vm_page_queue_free_mtx); - return; - } - m = object->cache = vm_page_splay(start, object->cache); - if (m->pindex < start) { - if (m->right == NULL) - m = NULL; - else { - m_next = vm_page_splay(start, m->right); - m_next->left = m; - m->right = NULL; - m = object->cache = m_next; - } - } - - /* - * At this point, "m" is either (1) a reference to the page - * with the least pindex that is greater than or equal to - * "start" or (2) NULL. - */ - for (; m != NULL && (m->pindex < end || end == 0); m = m_next) { - /* - * Find "m"'s successor and remove "m" from the - * object's cache. - */ - if (m->right == NULL) { - object->cache = m->left; - m_next = NULL; - } else { - m_next = vm_page_splay(start, m->right); - m_next->left = m->left; - object->cache = m_next; - } - /* Convert "m" to a free page. */ - m->object = NULL; - m->valid = 0; - /* Clear PG_CACHED and set PG_FREE. */ - m->flags ^= PG_CACHED | PG_FREE; - KASSERT((m->flags & (PG_CACHED | PG_FREE)) == PG_FREE, - ("vm_page_cache_free: page %p has inconsistent flags", m)); - cnt.v_cache_count--; - cnt.v_free_count++; - } - empty = object->cache == NULL; - mtx_unlock(&vm_page_queue_free_mtx); - if (object->type == OBJT_VNODE && empty) - vdrop(object->handle); -} - -/* * Returns the cached page that is associated with the given * object and offset. If, however, none exists, returns NULL. * @@ -1164,15 +981,12 @@ static inline vm_page_t vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex) { - vm_page_t m; + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - if ((m = object->cache) != NULL && m->pindex != pindex) { - m = vm_page_splay(pindex, m); - if ((object->cache = m)->pindex != pindex) - m = NULL; - } - return (m); + if (object->cached_page_count != 0) + return vm_radix_lookup(&object->rtree, pindex, VM_RADIX_RED); + return (NULL); } /* @@ -1184,104 +998,77 @@ void vm_page_cache_remove(vm_page_t m) { - vm_object_t object; - vm_page_t root; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); KASSERT((m->flags & PG_CACHED) != 0, ("vm_page_cache_remove: page %p is not cached", m)); - object = m->object; - if (m != object->cache) { - root = vm_page_splay(m->pindex, object->cache); - KASSERT(root == m, - ("vm_page_cache_remove: page %p is not cached in object %p", - m, object)); - } - if (m->left == NULL) - root = m->right; - else if (m->right == NULL) - root = m->left; - else { - root = vm_page_splay(m->pindex, m->left); - root->right = m->right; - } - object->cache = root; + vm_radix_remove(&m->object->rtree, m->pindex, VM_RADIX_RED); + m->object->cached_page_count--; m->object = NULL; cnt.v_cache_count--; } /* - * Transfer all of the cached pages with offset greater than or - * equal to 'offidxstart' from the original object's cache to the - * new object's cache. However, any cached pages with offset - * greater than or equal to the new object's size are kept in the - * original object. Initially, the new object's cache must be - * empty. Offset 'offidxstart' in the original object must - * correspond to offset zero in the new object. + * Move a given cached page from an object's cached pages to + * the free list. * - * The new object must be locked. + * The free page queue mtx and object lock must be locked. */ void -vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart, - vm_object_t new_object) +vm_page_cache_free(vm_page_t m) { - vm_page_t m, m_next; + vm_object_t object; + object = m->object; + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + KASSERT((m->flags & PG_CACHED) != 0, + ("vm_page_cache_free: page %p is not cached", m)); + /* - * Insertion into an object's collection of cached pages - * requires the object to be locked. In contrast, removal does - * not. + * Replicate vm_page_cache_remove with a version that can collapse + * internal nodes since the object lock is held. */ + vm_radix_remove(&object->rtree, m->pindex, VM_RADIX_ANY); + object->cached_page_count--; + m->object = NULL; + m->valid = 0; + /* Clear PG_CACHED and set PG_FREE. */ + m->flags ^= PG_CACHED | PG_FREE; + cnt.v_cache_count--; + cnt.v_free_count++; +} + +/* + * Attempt to rename a cached page from one object to another. If + * it fails the cached page is freed. + */ +void +vm_page_cache_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t idx) +{ + vm_object_t orig_object; + + orig_object = m->object; + VM_OBJECT_LOCK_ASSERT(orig_object, MA_OWNED); VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED); - KASSERT(new_object->cache == NULL, - ("vm_page_cache_transfer: object %p has cached pages", - new_object)); - mtx_lock(&vm_page_queue_free_mtx); - if ((m = orig_object->cache) != NULL) { - /* - * Transfer all of the pages with offset greater than or - * equal to 'offidxstart' from the original object's - * cache to the new object's cache. - */ - m = vm_page_splay(offidxstart, m); - if (m->pindex < offidxstart) { - orig_object->cache = m; - new_object->cache = m->right; - m->right = NULL; - } else { - orig_object->cache = m->left; - new_object->cache = m; - m->left = NULL; - } - while ((m = new_object->cache) != NULL) { - if ((m->pindex - offidxstart) >= new_object->size) { - /* - * Return all of the cached pages with - * offset greater than or equal to the - * new object's size to the original - * object's cache. - */ - new_object->cache = m->left; - m->left = orig_object->cache; - orig_object->cache = m; - break; - } - m_next = vm_page_splay(m->pindex, m->right); - /* Update the page's object and offset. */ - m->object = new_object; - m->pindex -= offidxstart; - if (m_next == NULL) - break; - m->right = NULL; - m_next->left = m; - new_object->cache = m_next; - } - KASSERT(new_object->cache == NULL || - new_object->type == OBJT_SWAP, - ("vm_page_cache_transfer: object %p's type is incompatible" - " with cached pages", new_object)); + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + /* + * If the insert fails we simply free the cached page. + */ + if (vm_radix_insert(&new_object->rtree, idx, m) != 0) { + vm_page_cache_free(m); + return; } - mtx_unlock(&vm_page_queue_free_mtx); + vm_radix_color(&new_object->rtree, idx, VM_RADIX_RED); + /* + * We use any color here though we know it's red so that tree + * compaction will still work. + */ + vm_radix_remove(&orig_object->rtree, m->pindex, VM_RADIX_ANY); + m->object = new_object; + m->pindex = idx; + new_object->cached_page_count++; + orig_object->cached_page_count--; } /* @@ -1414,7 +1201,8 @@ m->valid = 0; m_object = m->object; vm_page_cache_remove(m); - if (m_object->type == OBJT_VNODE && m_object->cache == NULL) + if (m_object->type == OBJT_VNODE && + m_object->cached_page_count == 0) vp = m_object->handle; } else { KASSERT(VM_PAGE_IS_FREE(m), @@ -1666,7 +1454,8 @@ m->valid = 0; m_object = m->object; vm_page_cache_remove(m); - if (m_object->type == OBJT_VNODE && m_object->cache == NULL) + if (m_object->type == OBJT_VNODE && + m_object->cached_page_count == 0) drop = m_object->handle; } else { KASSERT(VM_PAGE_IS_FREE(m), @@ -2249,7 +2038,7 @@ vm_page_cache(vm_page_t m) { vm_object_t object; - vm_page_t next, prev, root; + int old_cached; vm_page_lock_assert(m, MA_OWNED); object = m->object; @@ -2280,46 +2069,7 @@ */ vm_pageq_remove(m); - /* - * Remove the page from the object's collection of resident - * pages. - */ - if ((next = TAILQ_NEXT(m, listq)) != NULL && next->left == m) { - /* - * Since the page's successor in the list is also its parent - * in the tree, its right subtree must be empty. - */ - next->left = m->left; - KASSERT(m->right == NULL, - ("vm_page_cache: page %p has right child", m)); - } else if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL && - prev->right == m) { - /* - * Since the page's predecessor in the list is also its parent - * in the tree, its left subtree must be empty. - */ - KASSERT(m->left == NULL, - ("vm_page_cache: page %p has left child", m)); - prev->right = m->right; - } else { - if (m != object->root) - vm_page_splay(m->pindex, object->root); - if (m->left == NULL) - root = m->right; - else if (m->right == NULL) - root = m->left; - else { - /* - * Move the page's successor to the root, because - * pages are usually removed in ascending order. - */ - if (m->right != next) - vm_page_splay(m->pindex, m->right); - next->left = m->left; - root = next; - } - object->root = root; - } + vm_radix_color(&object->rtree, m->pindex, VM_RADIX_RED); TAILQ_REMOVE(&object->memq, m, listq); object->resident_page_count--; @@ -2336,26 +2086,9 @@ m->flags &= ~PG_ZERO; mtx_lock(&vm_page_queue_free_mtx); m->flags |= PG_CACHED; + old_cached = object->cached_page_count; + object->cached_page_count++; cnt.v_cache_count++; - root = object->cache; - if (root == NULL) { - m->left = NULL; - m->right = NULL; - } else { - root = vm_page_splay(m->pindex, root); - if (m->pindex < root->pindex) { - m->left = root->left; - m->right = root; - root->left = NULL; - } else if (__predict_false(m->pindex == root->pindex)) - panic("vm_page_cache: offset already cached"); - else { - m->right = root->right; - m->left = root; - root->right = NULL; - } - } - object->cache = m; #if VM_NRESERVLEVEL > 0 if (!vm_reserv_free_page(m)) { #else @@ -2373,9 +2106,9 @@ * the object's only resident page. */ if (object->type == OBJT_VNODE) { - if (root == NULL && object->resident_page_count != 0) + if (old_cached == 0 && object->resident_page_count != 0) vhold(object->handle); - else if (root != NULL && object->resident_page_count == 0) + else if (old_cached != 0 && object->resident_page_count == 0) vdrop(object->handle); } } Index: vm/vm_page.h =================================================================== --- vm/vm_page.h (.../head/sys) (revision 227753) +++ vm/vm_page.h (.../user/attilio/vmcontention/sys) (revision 227756) @@ -130,8 +130,6 @@ struct vm_page { TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO queue or free list (Q) */ TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ - struct vm_page *left; /* splay tree link (O) */ - struct vm_page *right; /* splay tree link (O) */ vm_object_t object; /* which object am I in (O,P)*/ vm_pindex_t pindex; /* offset into object (O,P) */ @@ -365,9 +363,9 @@ vm_page_t vm_page_alloc_freelist(int, int); vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); void vm_page_cache(vm_page_t); -void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t); +void vm_page_cache_free(vm_page_t); void vm_page_cache_remove(vm_page_t); -void vm_page_cache_transfer(vm_object_t, vm_pindex_t, vm_object_t); +void vm_page_cache_rename(vm_page_t, vm_object_t, vm_pindex_t); int vm_page_try_to_cache (vm_page_t); int vm_page_try_to_free (vm_page_t); void vm_page_dontneed(vm_page_t); @@ -386,7 +384,6 @@ void vm_page_requeue(vm_page_t m); void vm_page_set_valid(vm_page_t m, int base, int size); void vm_page_sleep(vm_page_t m, const char *msg); -vm_page_t vm_page_splay(vm_pindex_t, vm_page_t); vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_unhold_pages(vm_page_t *ma, int count); void vm_page_unwire (vm_page_t, int); Index: vm/vnode_pager.c =================================================================== --- vm/vnode_pager.c (.../head/sys) (revision 227753) +++ vm/vnode_pager.c (.../user/attilio/vmcontention/sys) (revision 227756) @@ -366,6 +366,7 @@ vm_ooffset_t nsize; { vm_object_t object; + struct vnode *drop; vm_page_t m; vm_pindex_t nobjsize; @@ -391,18 +392,43 @@ /* * this gets rid of garbage at the end of a page that is now * only partially backed by the vnode. - * - * XXX for some reason (I don't know yet), if we take a - * completely invalid page and mark it partially valid - * it can screw up NFS reads, so we don't allow the case. */ if ((nsize & PAGE_MASK) && - (m = vm_page_lookup(object, OFF_TO_IDX(nsize))) != NULL && - m->valid != 0) { - int base = (int)nsize & PAGE_MASK; - int size = PAGE_SIZE - base; + (m = vm_radix_lookup(&object->rtree, OFF_TO_IDX(nsize), + VM_RADIX_ANY)) != NULL) { + int base; + int size; /* + * Eliminate any cached page as we would have to + * do too much work to save it. + */ + if (m->flags & PG_CACHED) { + drop = NULL; + mtx_lock(&vm_page_queue_free_mtx); + if (m->object == object) { + vm_page_cache_remove(m); + if (object->cached_page_count == 0) + drop = vp; + } + mtx_unlock(&vm_page_queue_free_mtx); + if (drop) + vdrop(drop); + goto out; + } + /* + * XXX for some reason (I don't know yet), if we take a + * completely invalid page and mark it partially valid + * it can screw up NFS reads, so we don't allow the + * case. + */ + if (m->valid != 0 || m->object != object) + goto out; + + base = (int)nsize & PAGE_MASK; + size = PAGE_SIZE - base; + + /* * Clear out partial-page garbage in case * the page has been mapped. */ @@ -430,12 +456,9 @@ * replacement from working properly. */ vm_page_clear_dirty(m, base, PAGE_SIZE - base); - } else if ((nsize & PAGE_MASK) && - __predict_false(object->cache != NULL)) { - vm_page_cache_free(object, OFF_TO_IDX(nsize), - nobjsize); } } +out: object->un_pager.vnp.vnp_size = nsize; object->size = nobjsize; VM_OBJECT_UNLOCK(object); Property changes on: contrib/pf ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/pf:r225232-227754 Property changes on: contrib/octeon-sdk ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/octeon-sdk:r225232-227754 Property changes on: contrib/x86emu ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/x86emu:r225232-227754 Property changes on: contrib/dev/acpica ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica:r225232-227754 Property changes on: cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/cddl/contrib/opensolaris:r225232-227754 Index: amd64/include/pmap.h =================================================================== --- amd64/include/pmap.h (.../head/sys) (revision 227753) +++ amd64/include/pmap.h (.../user/attilio/vmcontention/sys) (revision 227756) @@ -240,10 +240,20 @@ struct pv_chunk; struct md_page { - TAILQ_HEAD(,pv_entry) pv_list; - int pat_mode; + union { + TAILQ_HEAD(,pv_entry) pvi_list; + struct { + vm_page_t pii_left; + vm_page_t pii_right; + } pvi_siters; + } pv_structs; + int pat_mode; }; +#define pv_list pv_structs.pvi_list +#define pv_left pv_structs.pvi_siters.pii_left +#define pv_right pv_structs.pvi_siters.pii_right + /* * The kernel virtual address (KVA) of the level 4 page table page is always * within the direct map (DMAP) region. @@ -282,7 +292,7 @@ */ typedef struct pv_entry { vm_offset_t pv_va; /* virtual address for mapping */ - TAILQ_ENTRY(pv_entry) pv_list; + TAILQ_ENTRY(pv_entry) pv_next; } *pv_entry_t; /* Property changes on: amd64/include/xen ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/amd64/include/xen:r225232-227754 Index: amd64/amd64/pmap.c =================================================================== --- amd64/amd64/pmap.c (.../head/sys) (revision 227753) +++ amd64/amd64/pmap.c (.../user/attilio/vmcontention/sys) (revision 227756) @@ -265,6 +265,7 @@ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde); static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); +static vm_page_t pmap_vmpage_splay(vm_pindex_t pindex, vm_page_t root); static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); @@ -1424,7 +1425,8 @@ while (free != NULL) { m = free; - free = m->right; + free = (void *)m->object; + m->object = NULL; /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } @@ -1443,7 +1445,7 @@ m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; - m->right = *free; + m->object = (void *)*free; *free = m; } @@ -1461,20 +1463,20 @@ PMAP_LOCK_ASSERT(pmap, MA_OWNED); root = pmap->pm_root; if (root == NULL) { - mpte->left = NULL; - mpte->right = NULL; + mpte->md.pv_left = NULL; + mpte->md.pv_right = NULL; } else { - root = vm_page_splay(mpte->pindex, root); + root = pmap_vmpage_splay(mpte->pindex, root); if (mpte->pindex < root->pindex) { - mpte->left = root->left; - mpte->right = root; - root->left = NULL; + mpte->md.pv_left = root->md.pv_left; + mpte->md.pv_right = root; + root->md.pv_left = NULL; } else if (mpte->pindex == root->pindex) panic("pmap_insert_pt_page: pindex already inserted"); else { - mpte->right = root->right; - mpte->left = root; - root->right = NULL; + mpte->md.pv_right = root->md.pv_right; + mpte->md.pv_left = root; + root->md.pv_right = NULL; } } pmap->pm_root = mpte; @@ -1493,7 +1495,7 @@ PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) { - mpte = vm_page_splay(pindex, mpte); + mpte = pmap_vmpage_splay(pindex, mpte); if ((pmap->pm_root = mpte)->pindex != pindex) mpte = NULL; } @@ -1512,18 +1514,24 @@ PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (mpte != pmap->pm_root) { - root = vm_page_splay(mpte->pindex, pmap->pm_root); + root = pmap_vmpage_splay(mpte->pindex, pmap->pm_root); KASSERT(mpte == root, ("pmap_remove_pt_page: mpte %p is missing from pmap %p", mpte, pmap)); } - if (mpte->left == NULL) - root = mpte->right; + if (mpte->md.pv_left == NULL) + root = mpte->md.pv_right; else { - root = vm_page_splay(mpte->pindex, mpte->left); - root->right = mpte->right; + root = pmap_vmpage_splay(mpte->pindex, mpte->md.pv_left); + root->md.pv_right = mpte->md.pv_right; } pmap->pm_root = root; + + /* + * Reinitialize the pv_list which could be dirty now because of the + * splay tree work. + */ + TAILQ_INIT(&mpte->md.pv_list); } /* @@ -1599,6 +1607,61 @@ } /* + * Implements Sleator and Tarjan's top-down splay algorithm. Returns + * the vm_page containing the given pindex. If, however, that + * pindex is not found in the pmap, returns a vm_page that is + * adjacent to the pindex, coming before or after it. + */ +static vm_page_t +pmap_vmpage_splay(vm_pindex_t pindex, vm_page_t root) +{ + struct vm_page dummy; + vm_page_t lefttreemax, righttreemin, y; + + if (root == NULL) + return (root); + lefttreemax = righttreemin = &dummy; + for (;; root = y) { + if (pindex < root->pindex) { + if ((y = root->md.pv_left) == NULL) + break; + if (pindex < y->pindex) { + /* Rotate right. */ + root->md.pv_left = y->md.pv_right; + y->md.pv_right = root; + root = y; + if ((y = root->md.pv_left) == NULL) + break; + } + /* Link into the new root's right tree. */ + righttreemin->md.pv_left = root; + righttreemin = root; + } else if (pindex > root->pindex) { + if ((y = root->md.pv_right) == NULL) + break; + if (pindex > y->pindex) { + /* Rotate left. */ + root->md.pv_right = y->md.pv_left; + y->md.pv_left = root; + root = y; + if ((y = root->md.pv_right) == NULL) + break; + } + /* Link into the new root's left tree. */ + lefttreemax->md.pv_right = root; + lefttreemax = root; + } else + break; + } + /* Assemble the new root. */ + lefttreemax->md.pv_right = root->md.pv_left; + righttreemin->md.pv_left = root->md.pv_right; + root->md.pv_left = dummy.md.pv_right; + root->md.pv_right = dummy.md.pv_left; + return (root); +} + +/* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ @@ -2105,7 +2168,7 @@ TAILQ_FOREACH(m, &vpq->pl, pageq) { if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy) continue; - TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); /* Avoid deadlock and lock recursion. */ @@ -2129,7 +2192,7 @@ pmap_unuse_pt(pmap, va, *pde, &free); pmap_invalidate_page(pmap, va); pmap_free_zero_pages(free); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); @@ -2277,9 +2340,9 @@ pv_entry_t pv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { - TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); break; } } @@ -2312,7 +2375,7 @@ pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); m = PHYS_TO_VM_PAGE(pa); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); /* Instantiate the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { @@ -2353,7 +2416,7 @@ pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); /* Free the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { @@ -2405,7 +2468,7 @@ mtx_assert(&vm_page_queue_mtx, MA_OWNED); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } /* @@ -2421,7 +2484,7 @@ if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); return (TRUE); } else return (FALSE); @@ -2441,7 +2504,7 @@ (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); return (TRUE); } else return (FALSE); @@ -2878,7 +2941,7 @@ vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, *pde, &free); pmap_invalidate_page(pmap, pv->pv_va); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } @@ -3279,7 +3342,7 @@ if (pv == NULL) pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); pa |= PG_MANAGED; } else if (pv != NULL) free_pv_entry(pmap, pv); @@ -3959,7 +4022,7 @@ ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; vm_page_lock_queues(); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; @@ -3970,7 +4033,7 @@ } if (!rv && loops < 16) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; @@ -4018,7 +4081,7 @@ pv_entry_t pv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); @@ -4140,7 +4203,7 @@ if ((tpte & PG_PS) != 0) { pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); pvh = pa_to_pvh(tpte & PG_PS_FRAME); - TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list)) @@ -4158,7 +4221,7 @@ } } else { pmap_resident_count_dec(pmap, 1); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list)) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) @@ -4230,7 +4293,7 @@ mtx_assert(&vm_page_queue_mtx, MA_OWNED); rv = FALSE; - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); @@ -4300,7 +4363,7 @@ mtx_assert(&vm_page_queue_mtx, MA_OWNED); rv = FALSE; - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); @@ -4339,7 +4402,7 @@ return; vm_page_lock_queues(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4348,7 +4411,7 @@ (void)pmap_demote_pde(pmap, pde, va); PMAP_UNLOCK(pmap); } - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4398,7 +4461,7 @@ ("pmap_ts_referenced: page %p is not managed", m)); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); vm_page_lock_queues(); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, pvn) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4431,9 +4494,9 @@ if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; do { - pvn = TAILQ_NEXT(pv, pv_list); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + pvn = TAILQ_NEXT(pv, pv_next); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4483,7 +4546,7 @@ return; vm_page_lock_queues(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4514,7 +4577,7 @@ } PMAP_UNLOCK(pmap); } - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4549,7 +4612,7 @@ ("pmap_clear_reference: page %p is not managed", m)); vm_page_lock_queues(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4571,7 +4634,7 @@ } PMAP_UNLOCK(pmap); } - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys:r225232-227754