Index: conf/files =================================================================== --- conf/files (.../head/sys) (revisione 233850) +++ conf/files (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -3511,6 +3511,7 @@ vm/vm_pageout.c standard vm/vm_pager.c standard vm/vm_phys.c standard +vm/vm_radix.c standard vm/vm_reserv.c standard vm/vm_unix.c standard vm/vm_zeroidle.c standard Index: conf =================================================================== --- conf (.../head/sys) (revisione 233850) +++ conf (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: conf ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/conf:r225232-233853 Index: kern/subr_uio.c =================================================================== --- kern/subr_uio.c (.../head/sys) (revisione 233850) +++ kern/subr_uio.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -85,6 +85,7 @@ vm_map_entry_t entry; vm_pindex_t upindex; vm_prot_t prot; + vm_page_bits_t vbits; boolean_t wired; KASSERT((uaddr & PAGE_MASK) == 0, @@ -95,6 +96,7 @@ * unwired in sf_buf_mext(). */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); + vbits = kern_pg->valid; kern_pg->valid = VM_PAGE_BITS_ALL; KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1, ("vm_pgmoveco: kern_pg is not correctly wired")); @@ -105,6 +107,13 @@ return(EFAULT); } VM_OBJECT_LOCK(uobject); + if (vm_page_insert(kern_pg, uobject, upindex) != 0) { + kern_pg->valid = vbits; + VM_OBJECT_UNLOCK(uobject); + vm_map_lookup_done(map, entry); + return(ENOMEM); + } + vm_page_dirty(kern_pg); retry: if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco")) @@ -122,8 +131,6 @@ if (uobject->backing_object != NULL) pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE); } - vm_page_insert(kern_pg, uobject, upindex); - vm_page_dirty(kern_pg); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); Index: kern/uipc_shm.c =================================================================== --- kern/uipc_shm.c (.../head/sys) (revisione 233850) +++ kern/uipc_shm.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -278,7 +278,8 @@ if (base != 0) { idx = OFF_TO_IDX(length); retry: - m = vm_page_lookup(object, idx); + m = vm_radix_lookup(&object->rtree, idx, + VM_RADIX_BLACK); if (m != NULL) { if ((m->oflags & VPO_BUSY) != 0 || m->busy != 0) { Index: kern/subr_witness.c =================================================================== --- kern/subr_witness.c (.../head/sys) (revisione 233850) +++ kern/subr_witness.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -602,6 +602,7 @@ * VM * */ + { "system map", &lock_class_mtx_sleep }, { "vm object", &lock_class_mtx_sleep }, { "page lock", &lock_class_mtx_sleep }, { "vm page queue mutex", &lock_class_mtx_sleep }, Index: boot/powerpc/boot1.chrp =================================================================== --- boot/powerpc/boot1.chrp (.../head/sys) (revisione 233850) +++ boot/powerpc/boot1.chrp (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: boot/powerpc/boot1.chrp ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/boot/powerpc/boot1.chrp:r225232-232160 Index: boot/powerpc/ofw =================================================================== --- boot/powerpc/ofw (.../head/sys) (revisione 233850) +++ boot/powerpc/ofw (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: boot/powerpc/ofw ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/boot/powerpc/ofw:r225232-229659 Index: boot/i386/efi =================================================================== --- boot/i386/efi (.../head/sys) (revisione 233850) +++ boot/i386/efi (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: boot/i386/efi ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/boot/i386/efi:r225232-229659 Index: boot/ia64/efi =================================================================== --- boot/ia64/efi (.../head/sys) (revisione 233850) +++ boot/ia64/efi (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: boot/ia64/efi ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/boot/ia64/efi:r225232-229659 Index: boot/ia64/ski =================================================================== --- boot/ia64/ski (.../head/sys) (revisione 233850) +++ boot/ia64/ski (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: boot/ia64/ski ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/boot/ia64/ski:r225232-229659 Index: boot =================================================================== --- boot (.../head/sys) (revisione 233850) +++ boot (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: boot ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/boot:r225232-233702 Index: fs/tmpfs/tmpfs_fifoops.c =================================================================== --- fs/tmpfs/tmpfs_fifoops.c (.../head/sys) (revisione 233850) +++ fs/tmpfs/tmpfs_fifoops.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -39,6 +39,7 @@ #include #include #include +#include #include #include Index: fs/tmpfs/tmpfs_vnops.c =================================================================== --- fs/tmpfs/tmpfs_vnops.c (.../head/sys) (revisione 233850) +++ fs/tmpfs/tmpfs_vnops.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -510,11 +510,14 @@ offset = addr & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); - if ((vobj == NULL) || - (vobj->resident_page_count == 0 && vobj->cache == NULL)) + if (vobj == NULL) goto nocache; VM_OBJECT_LOCK(vobj); + if (vobj->resident_page_count == 0 && vobj->cached_page_count == 0) { + VM_OBJECT_UNLOCK(vobj); + goto nocache; + } lookupvpg: if (((m = vm_page_lookup(vobj, idx)) != NULL) && vm_page_is_valid(m, offset, tlen)) { @@ -638,35 +641,47 @@ offset = addr & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); - if ((vobj == NULL) || - (vobj->resident_page_count == 0 && vobj->cache == NULL)) { + if (vobj == NULL) { vpg = NULL; goto nocache; } VM_OBJECT_LOCK(vobj); + if (vobj->resident_page_count == 0 && vobj->cached_page_count == 0) { + VM_OBJECT_UNLOCK(vobj); + vpg = NULL; + goto nocache; + } lookupvpg: - if (((vpg = vm_page_lookup(vobj, idx)) != NULL) && - vm_page_is_valid(vpg, offset, tlen)) { - if ((vpg->oflags & VPO_BUSY) != 0) { - /* - * Reference the page before unlocking and sleeping so - * that the page daemon is less likely to reclaim it. - */ - vm_page_reference(vpg); - vm_page_sleep(vpg, "tmfsmw"); - goto lookupvpg; + vpg = vm_radix_lookup(&vobj->rtree, idx, VM_RADIX_ANY); + if (vpg != NULL) { + if (vm_page_is_valid(vpg, offset, tlen)) { + if ((vpg->oflags & VPO_BUSY) != 0) { + /* + * Reference the page before unlocking and + * sleeping so that the page daemon is less + * likely to reclaim it. + */ + vm_page_reference(vpg); + vm_page_sleep(vpg, "tmfsmw"); + goto lookupvpg; + } + vm_page_busy(vpg); + vm_page_undirty(vpg); + VM_OBJECT_UNLOCK(vobj); + error = uiomove_fromphys(&vpg, offset, tlen, uio); + } else { + if (vpg->flags & PG_CACHED) { + mtx_lock(&vm_page_queue_free_mtx); + if (vpg->object == vobj) + vm_page_cache_free(vpg); + mtx_unlock(&vm_page_queue_free_mtx); + } + VM_OBJECT_UNLOCK(vobj); + vpg = NULL; } - vm_page_busy(vpg); - vm_page_undirty(vpg); + } else VM_OBJECT_UNLOCK(vobj); - error = uiomove_fromphys(&vpg, offset, tlen, uio); - } else { - if (__predict_false(vobj->cache != NULL)) - vm_page_cache_free(vobj, idx, idx + 1); - VM_OBJECT_UNLOCK(vobj); - vpg = NULL; - } nocache: VM_OBJECT_LOCK(tobj); tpg = vm_page_grab(tobj, idx, VM_ALLOC_WIRED | Index: vm/vm_radix.c =================================================================== --- vm/vm_radix.c (.../head/sys) (revisione 0) +++ vm/vm_radix.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -0,0 +1,890 @@ +/* + * Copyright (c) 2011 Jeffrey Roberson + * Copyright (c) 2008 Mayur Shardul + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + + +/* + * Radix tree implementation. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef UMA_MD_SMALL_ALLOC +#include +#endif +#include +#include + +#include + +#ifndef UMA_MD_SMALL_ALLOC +#define VM_RADIX_RNODE_MAP_SCALE (1024 * 1024 / 2) +#define VM_RADIX_WIDTH 4 + +/* + * Bits of height in root. + * The mask of smaller power of 2 containing VM_RADIX_LIMIT. + */ +#define VM_RADIX_HEIGHT 0x1f +#else +#define VM_RADIX_WIDTH 5 + +/* See the comment above. */ +#define VM_RADIX_HEIGHT 0xf +#endif + +#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH) +#define VM_RADIX_MASK (VM_RADIX_COUNT - 1) +#define VM_RADIX_MAXVAL ((vm_pindex_t)-1) +#define VM_RADIX_LIMIT howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) + +/* Flag bits stored in node pointers. */ +#define VM_RADIX_FLAGS 0x3 + +/* Calculates maximum value for a tree of height h. */ +#define VM_RADIX_MAX(h) \ + ((h) == VM_RADIX_LIMIT ? VM_RADIX_MAXVAL : \ + (((vm_pindex_t)1 << ((h) * VM_RADIX_WIDTH)) - 1)) + +CTASSERT(VM_RADIX_HEIGHT >= VM_RADIX_LIMIT); +CTASSERT((sizeof(u_int) * NBBY) >= VM_RADIX_LIMIT); + +struct vm_radix_node { + void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */ + volatile uint32_t rn_count; /* Valid children. */ +}; + +CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE); + +static uma_zone_t vm_radix_node_zone; + +#ifndef UMA_MD_SMALL_ALLOC +static vm_map_t rnode_map; +static u_long rnode_map_scale; + +static void * +vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait) +{ + vm_offset_t addr; + vm_page_t m; + int pflags; + + /* Inform UMA that this allocator uses rnode_map. */ + *flags = UMA_SLAB_KERNEL; + + pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ; + + /* + * As kmem_alloc_nofault() can however fail, let just assume that + * M_NOWAIT is on and act accordingly. + */ + pflags |= ((wait & M_USE_RESERVE) != 0) ? VM_ALLOC_INTERRUPT : + VM_ALLOC_SYSTEM; + if ((wait & M_ZERO) != 0) + pflags |= VM_ALLOC_ZERO; + addr = kmem_alloc_nofault(rnode_map, size); + if (addr == 0) + return (NULL); + + /* Just one page allocation is assumed here. */ + m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS), + pflags); + if (m == NULL) { + kmem_free(rnode_map, addr, size); + return (NULL); + } + if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + pmap_qenter(addr, &m, 1); + return ((void *)addr); +} + +static void +vm_radix_node_zone_freef(void *item, int size, uint8_t flags) +{ + vm_page_t m; + vm_offset_t voitem; + + MPASS((flags & UMA_SLAB_KERNEL) != 0); + + /* Just one page allocation is assumed here. */ + voitem = (vm_offset_t)item; + m = PHYS_TO_VM_PAGE(pmap_kextract(voitem)); + pmap_qremove(voitem, 1); + vm_page_lock(m); + vm_page_unwire(m, 0); + vm_page_free(m); + vm_page_unlock(m); + kmem_free(rnode_map, voitem, size); +} + +static void +init_vm_radix_alloc(void *dummy __unused) +{ + + uma_zone_set_max(vm_radix_node_zone, rnode_map_scale); + uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf); + uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef); +} +SYSINIT(vm_radix, SI_SUB_KMEM, SI_ORDER_SECOND, init_vm_radix_alloc, NULL); +#endif + +/* + * Radix node zone destructor. + */ +#ifdef INVARIANTS +static void +vm_radix_node_zone_dtor(void *mem, int size, void *arg) +{ + struct vm_radix_node *rnode; + + rnode = mem; + KASSERT(rnode->rn_count == 0, + ("vm_radix_node_put: Freeing a node with %d children\n", + rnode->rn_count)); +} +#endif + +/* + * Allocate a radix node. Initializes all elements to 0. + */ +static __inline struct vm_radix_node * +vm_radix_node_get(void) +{ + + return (uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO)); +} + +/* + * Free radix node. + */ +static __inline void +vm_radix_node_put(struct vm_radix_node *rnode) +{ + + uma_zfree(vm_radix_node_zone, rnode); +} + +/* + * Return the position in the array for a given level. + */ +static __inline int +vm_radix_slot(vm_pindex_t index, int level) +{ + + return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK); +} + +/* + * Initialize the radix node submap (for architectures not supporting + * direct-mapping) and the radix node zone. + * + * WITNESS reports a lock order reversal, for architectures not + * supporting direct-mapping, between the "system map" lock + * and the "vm object" lock. This is because the well established ordering + * "system map" -> "vm object" is not honoured in this case as allocating + * from the radix node submap ends up adding a mapping entry to it, meaning + * it is necessary to lock the submap. However, the radix node submap is + * a leaf and self-contained, thus a deadlock cannot happen here and + * adding MTX_NOWITNESS to all map locks would be largerly sub-optimal. + */ +void +vm_radix_init(void) +{ +#ifndef UMA_MD_SMALL_ALLOC + vm_offset_t maxaddr, minaddr; + + rnode_map_scale = VM_RADIX_RNODE_MAP_SCALE; + TUNABLE_ULONG_FETCH("hw.rnode_map_scale", &rnode_map_scale); + rnode_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, + rnode_map_scale * sizeof(struct vm_radix_node), FALSE); + rnode_map->system_map = 1; +#endif + + vm_radix_node_zone = uma_zcreate("RADIX NODE", + sizeof(struct vm_radix_node), NULL, +#ifdef INVARIANTS + vm_radix_node_zone_dtor, +#else + NULL, +#endif + NULL, NULL, VM_RADIX_HEIGHT, UMA_ZONE_VM); +} + +/* + * Extract the root node and height from a radix tree with a single load. + */ +static __inline int +vm_radix_height(struct vm_radix *rtree, struct vm_radix_node **rnode) +{ + uintptr_t root; + int height; + + root = rtree->rt_root; + height = root & VM_RADIX_HEIGHT; + *rnode = (struct vm_radix_node *)(root - height); + return (height); +} + + +/* + * Set the root node and height for a radix tree. + */ +static inline void +vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode, + int height) +{ + uintptr_t root; + + root = (uintptr_t)rnode | height; + rtree->rt_root = root; +} + +static inline void +vm_radix_unwind_heightup(struct vm_radix *rtree, struct vm_radix_node *root, + struct vm_radix_node *iroot, int ilevel) +{ + struct vm_radix_node *rnode; + + CTR4(KTR_VM, "unwind: tree %p, root %p, iroot %p, ilevel %d", + rtree, root, iroot, ilevel); + while (iroot != root && root != NULL) { + rnode = root; + MPASS(rnode->rn_count == 0 || rnode->rn_count == 1); + rnode->rn_count = 0; + root = rnode->rn_child[0]; + vm_radix_node_put(rnode); + } + vm_radix_setroot(rtree, iroot, ilevel); +} + +static inline void * +vm_radix_match(void *child, int color) +{ + uintptr_t c; + + c = (uintptr_t)child; + + if ((c & color) == 0) + return (NULL); + return ((void *)(c & ~VM_RADIX_FLAGS)); +} + +static void +vm_radix_reclaim_allnodes_internal(struct vm_radix_node *rnode, int level) +{ + int slot; + + MPASS(rnode != NULL && level >= 0); + + /* + * Level 0 just contains pages as children, thus make it a special + * case, free the node and return. + */ + if (level == 0) { + CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level); + rnode->rn_count = 0; + vm_radix_node_put(rnode); + return; + } + for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) { + if (rnode->rn_child[slot] == NULL) + continue; + CTR3(KTR_VM, + "reclaiming: node %p, level %d recursing in slot %d", + rnode, level, slot); + vm_radix_reclaim_allnodes_internal(rnode->rn_child[slot], + level - 1); + rnode->rn_count--; + } + MPASS(rnode->rn_count == 0); + CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level); + vm_radix_node_put(rnode); +} + +/* + * Inserts the key-value pair in to the radix tree. Returns errno. + * Panics if the key already exists. + */ +int +vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val) +{ + struct vm_radix_node *iroot, *rnode, *root; + u_int allocmsk; + int clev, ilevel, level, slot; + + CTR3(KTR_VM, + "insert: tree %p, index %ju, val %p", rtree, (uintmax_t)index, val); + if (index == -1) + panic("vm_radix_insert: -1 is not a valid index.\n"); + level = vm_radix_height(rtree, &root); + /* + * Increase the height by adding nodes at the root until + * there is sufficient space. + */ + ilevel = level; + iroot = root; + while (level == 0 || index > VM_RADIX_MAX(level)) { + CTR3(KTR_VM, "insert: expanding %ju > %ju height %d", + (uintmax_t)index, (uintmax_t)VM_RADIX_MAX(level), level); + level++; + KASSERT(level <= VM_RADIX_LIMIT, + ("vm_radix_insert: Tree %p height %d too tall", + rtree, level)); + /* + * Only allocate tree nodes if they are needed. + */ + if (root == NULL || root->rn_count != 0) { + rnode = vm_radix_node_get(); + if (rnode == NULL) { + CTR4(KTR_VM, + "insert: tree %p, root %p, index: %ju, level: %d ENOMEM", + rtree, root, (uintmax_t)index, level); + vm_radix_unwind_heightup(rtree, root, iroot, + ilevel); + return (ENOMEM); + } + /* + * Store the new pointer with a memory barrier so + * that it is visible before the new root. + */ + if (root) { + atomic_store_rel_ptr((volatile uintptr_t *) + &rnode->rn_child[0], (uintptr_t)root); + rnode->rn_count = 1; + } + root = rnode; + } + vm_radix_setroot(rtree, root, level); + } + + /* Now that the tree is tall enough, fill in the path to the index. */ + allocmsk = 0; + clev = level; + rnode = root; + for (level = level - 1; level > 0; level--) { + slot = vm_radix_slot(index, level); + /* Add the required intermidiate nodes. */ + if (rnode->rn_child[slot] == NULL) { + rnode->rn_child[slot] = vm_radix_node_get(); + if (rnode->rn_child[slot] == NULL) { + CTR5(KTR_VM, + "insert: tree %p, index %ju, level %d, slot %d, rnode %p ENOMEM", + rtree, (uintmax_t)index, level, slot, + rnode); + CTR4(KTR_VM, + "insert: tree %p, rnode %p, child %p, count %u ENOMEM", + rtree, rnode, rnode->rn_child[slot], + rnode->rn_count); + MPASS(level != clev || allocmsk == 0); + while (allocmsk != 0) { + rnode = root; + level = clev; + level--; + slot = vm_radix_slot(index, level); + CTR4(KTR_VM, + "insert: unwind root %p, level %d, slot %d, allocmsk: 0x%x", + root, level, slot, allocmsk); + MPASS(level >= (ffs(allocmsk) - 1)); + while (level > (ffs(allocmsk) - 1)) { + MPASS(level > 0); + slot = vm_radix_slot(index, + level); + rnode = rnode->rn_child[slot]; + level--; + } + MPASS((allocmsk & (1 << level)) != 0); + allocmsk &= ~(1 << level); + rnode->rn_count--; + vm_radix_node_put(rnode->rn_child[slot]); + rnode->rn_child[slot] = NULL; + } + vm_radix_unwind_heightup(rtree, root, iroot, + ilevel); + return (ENOMEM); + } + rnode->rn_count++; + allocmsk |= (1 << level); + } + CTR5(KTR_VM, + "insert: tree %p, index %ju, level %d, slot %d, rnode %p", + rtree, (uintmax_t)index, level, slot, rnode); + CTR4(KTR_VM, + "insert: tree %p, rnode %p, child %p, count %u", + rtree, rnode, rnode->rn_child[slot], rnode->rn_count); + rnode = rnode->rn_child[slot]; + } + + slot = vm_radix_slot(index, 0); + MPASS(rnode != NULL); + KASSERT(rnode->rn_child[slot] == NULL, + ("vm_radix_insert: Duplicate value %p at index: %lu\n", + rnode->rn_child[slot], (u_long)index)); + val = (void *)((uintptr_t)val | VM_RADIX_BLACK); + rnode->rn_child[slot] = val; + atomic_add_32(&rnode->rn_count, 1); + CTR6(KTR_VM, + "insert: tree %p, index %ju, level %d, slot %d, rnode %p, count %u", + rtree, (uintmax_t)index, level, slot, rnode, rnode->rn_count); + + return 0; +} + +/* + * Returns the value stored at the index. If the index is not present + * NULL is returned. + */ +void * +vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, int color) +{ + struct vm_radix_node *rnode; + int slot; + int level; + + level = vm_radix_height(rtree, &rnode); + if (index > VM_RADIX_MAX(level)) + return NULL; + level--; + while (rnode) { + slot = vm_radix_slot(index, level); + CTR6(KTR_VM, + "lookup: tree %p, index %ju, level %d, slot %d, rnode %p, child %p", + rtree, (uintmax_t)index, level, slot, rnode, + rnode->rn_child[slot]); + if (level == 0) + return vm_radix_match(rnode->rn_child[slot], color); + rnode = rnode->rn_child[slot]; + level--; + } + CTR2(KTR_VM, "lookup: tree %p, index %ju failed", rtree, + (uintmax_t)index); + + return NULL; +} + +void * +vm_radix_color(struct vm_radix *rtree, vm_pindex_t index, int color) +{ + struct vm_radix_node *rnode; + uintptr_t child; + int slot; + int level; + + level = vm_radix_height(rtree, &rnode); + if (index > VM_RADIX_MAX(level)) + return NULL; + level--; + while (rnode) { + slot = vm_radix_slot(index, level); + CTR6(KTR_VM, + "color: tree %p, index %ju, level %d, slot %d, rnode %p, child %p", + rtree, (uintmax_t)index, level, slot, rnode, + rnode->rn_child[slot]); + if (level == 0) + break; + rnode = rnode->rn_child[slot]; + level--; + } + if (rnode == NULL || rnode->rn_child[slot] == NULL) + return (NULL); + child = (uintptr_t)rnode->rn_child[slot]; + child &= ~VM_RADIX_FLAGS; + rnode->rn_child[slot] = (void *)(child | color); + + return (void *)child; +} + +/* + * Find the first leaf with a valid node between *startp and end. Return + * the index of the first valid item in the leaf in *startp. + */ +static struct vm_radix_node * +vm_radix_leaf(struct vm_radix *rtree, vm_pindex_t *startp, vm_pindex_t end) +{ + struct vm_radix_node *rnode; + vm_pindex_t start; + vm_pindex_t inc; + int slot; + int level; + + start = *startp; +restart: + level = vm_radix_height(rtree, &rnode); + if (start > VM_RADIX_MAX(level) || (end && start >= end)) { + rnode = NULL; + goto out; + } + /* + * Search the tree from the top for any leaf node holding an index + * between start and end. + */ + for (level--; level; level--) { + slot = vm_radix_slot(start, level); + CTR6(KTR_VM, + "leaf: tree %p, index %ju, level %d, slot %d, rnode %p, child %p", + rtree, (uintmax_t)start, level, slot, rnode, + (rnode != NULL) ? rnode->rn_child[slot] : NULL); + if (rnode->rn_child[slot] != NULL) { + rnode = rnode->rn_child[slot]; + continue; + } + /* + * Calculate how much to increment our index by + * based on the tree level. We must truncate the + * lower bits to start from the begnning of the + * next leaf. + */ + inc = 1LL << (level * VM_RADIX_WIDTH); + start &= ~VM_RADIX_MAX(level); + + /* Avoid start address wrapping up. */ + if ((VM_RADIX_MAXVAL - start) < inc) { + rnode = NULL; + goto out; + } + start += inc; + slot++; + CTR5(KTR_VM, + "leaf: start %ju end %ju inc %ju mask 0x%jX slot %d", + (uintmax_t)start, (uintmax_t)end, (uintmax_t)inc, + (uintmax_t)~VM_RADIX_MAX(level), slot); + for (; slot < VM_RADIX_COUNT; slot++, start += inc) { + if (end != 0 && start >= end) { + rnode = NULL; + goto out; + } + if (rnode->rn_child[slot]) { + rnode = rnode->rn_child[slot]; + break; + } + if ((VM_RADIX_MAXVAL - start) < inc) { + rnode = NULL; + goto out; + } + } + if (slot == VM_RADIX_COUNT) + goto restart; + } + +out: + *startp = start; + return (rnode); +} + + + +/* + * Looks up as many as cnt values between start and end, and stores + * them in the caller allocated array out. The next index can be used + * to restart the scan. This optimizes forward scans in the tree. + */ +int +vm_radix_lookupn(struct vm_radix *rtree, vm_pindex_t start, + vm_pindex_t end, int color, void **out, int cnt, vm_pindex_t *next) +{ + struct vm_radix_node *rnode; + void *val; + int slot; + int outidx; + + CTR3(KTR_VM, "lookupn: tree %p, start %ju, end %ju", + rtree, (uintmax_t)start, (uintmax_t)end); + if (rtree->rt_root == 0) + return (0); + outidx = 0; + while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) { + slot = vm_radix_slot(start, 0); + for (; slot < VM_RADIX_COUNT; slot++, start++) { + if (end != 0 && start >= end) + goto out; + val = vm_radix_match(rnode->rn_child[slot], color); + if (val == NULL) + continue; + CTR4(KTR_VM, + "lookupn: tree %p index %ju slot %d found child %p", + rtree, (uintmax_t)start, slot, val); + out[outidx] = val; + if (++outidx == cnt) + goto out; + } + if (end != 0 && start >= end) + break; + } +out: + *next = start; + return (outidx); +} + +void +vm_radix_foreach(struct vm_radix *rtree, vm_pindex_t start, vm_pindex_t end, + int color, void (*iter)(void *)) +{ + struct vm_radix_node *rnode; + void *val; + int slot; + + if (rtree->rt_root == 0) + return; + while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) { + slot = vm_radix_slot(start, 0); + for (; slot < VM_RADIX_COUNT; slot++, start++) { + if (end != 0 && start >= end) + return; + val = vm_radix_match(rnode->rn_child[slot], color); + if (val) + iter(val); + } + if (end != 0 && start >= end) + return; + } +} + + +/* + * Look up any entry at a position less than or equal to index. + */ +void * +vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index, int color) +{ + struct vm_radix_node *rnode; + struct vm_radix_node *child; + vm_pindex_t max; + vm_pindex_t inc; + void *val; + int slot; + int level; + + CTR2(KTR_VM, + "lookup_le: tree %p, index %ju", rtree, (uintmax_t)index); +restart: + level = vm_radix_height(rtree, &rnode); + if (rnode == NULL) + return (NULL); + max = VM_RADIX_MAX(level); + if (index > max || index == 0) + index = max; + /* + * Search the tree from the top for any leaf node holding an index + * lower than 'index'. + */ + level--; + while (rnode) { + slot = vm_radix_slot(index, level); + CTR6(KTR_VM, + "lookup_le: tree %p, index %ju, level %d, slot %d, rnode %p, child %p", + rtree, (uintmax_t)index, level, slot, rnode, + rnode->rn_child[slot]); + if (level == 0) + break; + /* + * If we don't have an exact match we must start our search + * from the next leaf and adjust our index appropriately. + */ + if ((child = rnode->rn_child[slot]) == NULL) { + /* + * Calculate how much to decrement our index by + * based on the tree level. We must set the + * lower bits to start from the end of the next + * leaf. + */ + inc = 1LL << (level * VM_RADIX_WIDTH); + index |= VM_RADIX_MAX(level); + index -= inc; + slot--; + CTR4(KTR_VM, + "lookup_le: start %ju inc %ju mask 0x%jX slot %d", + (uintmax_t)index, (uintmax_t)inc, + (uintmax_t)VM_RADIX_MAX(level), slot); + for (; slot >= 0; slot--, index -= inc) { + child = rnode->rn_child[slot]; + if (child) + break; + } + } + rnode = child; + level--; + } + if (rnode) { + for (; slot >= 0; slot--, index--) { + val = vm_radix_match(rnode->rn_child[slot], color); + if (val) + return (val); + } + } + if (index != -1) + goto restart; + return (NULL); +} + +/* + * Remove the specified index from the tree. If possible the height of the + * tree is adjusted after deletion. The value stored at index is returned + * panics if the key is not present. + */ +void * +vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index, int color) +{ + struct vm_radix_node *stack[VM_RADIX_LIMIT]; + struct vm_radix_node *rnode, *root; + void *val; + int level; + int slot; + + level = vm_radix_height(rtree, &root); + KASSERT(index <= VM_RADIX_MAX(level), + ("vm_radix_remove: %p index %ju out of range %jd.", + rtree, (uintmax_t)index, VM_RADIX_MAX(level))); + rnode = root; + val = NULL; + level--; + /* + * Find the node and record the path in stack. + */ + while (level && rnode) { + stack[level] = rnode; + slot = vm_radix_slot(index, level); + CTR5(KTR_VM, + "remove: tree %p, index %ju, level %d, slot %d, rnode %p", + rtree, (uintmax_t)index, level, slot, rnode); + CTR4(KTR_VM, "remove: tree %p, rnode %p, child %p, count %u", + rtree, rnode, rnode->rn_child[slot], rnode->rn_count); + rnode = rnode->rn_child[slot]; + level--; + } + KASSERT(rnode != NULL, + ("vm_radix_remove: index %ju not present in the tree.\n", + (uintmax_t)index)); + slot = vm_radix_slot(index, 0); + val = vm_radix_match(rnode->rn_child[slot], color); + KASSERT(val != NULL, + ("vm_radix_remove: index %ju not present in the tree.\n", + (uintmax_t)index)); + + for (;;) { + CTR5(KTR_VM, + "remove: resetting tree %p, index %ju, level %d, slot %d, rnode %p", + rtree, (uintmax_t)index, level, slot, rnode); + CTR4(KTR_VM, + "remove: resetting tree %p, rnode %p, child %p, count %u", + rtree, rnode, + (rnode != NULL) ? rnode->rn_child[slot] : NULL, + (rnode != NULL) ? rnode->rn_count : 0); + rnode->rn_child[slot] = NULL; + /* + * Use atomics for the last level since red and black + * will both adjust it. + * Use a write memory barrier here in order to avoid + * rn_count reaching 0 before to fetch the actual pointer. + * Concurrent black removal, infact, may want to reclaim + * the radix node itself before to read it. + */ + if (level == 0) + atomic_add_rel_32(&rnode->rn_count, -1); + else + rnode->rn_count--; + /* + * Only allow black removes to prune the tree. + */ + if ((color & VM_RADIX_BLACK) == 0 || rnode->rn_count > 0) + break; + vm_radix_node_put(rnode); + if (rnode == root) { + vm_radix_setroot(rtree, NULL, 0); + break; + } + rnode = stack[++level]; + slot = vm_radix_slot(index, level); + + } + return (val); +} + +/* + * Remove and free all the nodes from the radix tree. + * This function is recrusive but there is a tight control on it as the + * maximum depth of the tree is fixed. + */ +void +vm_radix_reclaim_allnodes(struct vm_radix *rtree) +{ + struct vm_radix_node *root; + int level; + + if (rtree->rt_root == 0) + return; + level = vm_radix_height(rtree, &root); + vm_radix_reclaim_allnodes_internal(root, level - 1); + rtree->rt_root = 0; +} + +#ifdef notyet +/* + * Attempts to reduce the height of the tree. + */ +void +vm_radix_shrink(struct vm_radix *rtree) +{ + struct vm_radix_node *tmp, *root; + int level; + + if (rtree->rt_root == 0) + return; + level = vm_radix_height(rtree, &root); + + /* Adjust the height of the tree. */ + while (root->rn_count == 1 && root->rn_child[0] != NULL) { + tmp = root; + root->rn_count--; + root = root->rn_child[0]; + level--; + vm_radix_node_put(tmp); + } + /* Finally see if we have an empty tree. */ + if (root->rn_count == 0) { + vm_radix_node_put(root); + root = NULL; + level--; + } + vm_radix_setroot(rtree, root, level); +} +#endif Proprietà modificate su: vm/vm_radix.c ___________________________________________________________________ Added: svn:mime-type ## -0,0 +1 ## +text/plain Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H Added: svn:eol-style ## -0,0 +1 ## +native Index: vm/vm_radix.h =================================================================== --- vm/vm_radix.h (.../head/sys) (revisione 0) +++ vm/vm_radix.h (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2011 Jeffrey Roberson + * Copyright (c) 2008 Mayur Shardul + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _VM_RADIX_H_ +#define _VM_RADIX_H_ + +#define VM_RADIX_BLACK 0x1 /* Black node. (leaf only) */ +#define VM_RADIX_RED 0x2 /* Red node. (leaf only) */ +#define VM_RADIX_ANY (VM_RADIX_RED | VM_RADIX_BLACK) +#define VM_RADIX_STACK 8 /* Nodes to store on stack. */ + +/* + * Radix tree root. The height and pointer are set together to permit + * coherent lookups while the root is modified. + */ +struct vm_radix { + uintptr_t rt_root; /* root + height */ +}; + +#ifdef _KERNEL + +/* + * Initialize the radix tree subsystem. + */ +void vm_radix_init(void); + +/* + * Functions which only work with black nodes. (object lock) + */ +int vm_radix_insert(struct vm_radix *, vm_pindex_t, void *); + +/* + * Functions which work on specified colors. (object, vm_page_queue_free locks) + */ +void *vm_radix_color(struct vm_radix *, vm_pindex_t, int); +void *vm_radix_lookup(struct vm_radix *, vm_pindex_t, int); +int vm_radix_lookupn(struct vm_radix *, vm_pindex_t, vm_pindex_t, int, + void **, int, vm_pindex_t *); +void *vm_radix_lookup_le(struct vm_radix *, vm_pindex_t, int); +void vm_radix_reclaim_allnodes(struct vm_radix *); +void *vm_radix_remove(struct vm_radix *, vm_pindex_t, int); +void vm_radix_foreach(struct vm_radix *, vm_pindex_t, vm_pindex_t, int, + void (*)(void *)); + +/* + * Look up any entry at a position greater or equal to index. + */ +static inline void * +vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index, int color) +{ + void *val; + + if (vm_radix_lookupn(rtree, index, 0, color, &val, 1, &index)) + return (val); + return (NULL); +} + +static inline void * +vm_radix_last(struct vm_radix *rtree, int color) +{ + + return vm_radix_lookup_le(rtree, 0, color); +} + +static inline void * +vm_radix_first(struct vm_radix *rtree, int color) +{ + + return vm_radix_lookup_ge(rtree, 0, color); +} + +static inline void * +vm_radix_next(struct vm_radix *rtree, vm_pindex_t index, int color) +{ + + if (index == -1) + return (NULL); + return vm_radix_lookup_ge(rtree, index + 1, color); +} + +static inline void * +vm_radix_prev(struct vm_radix *rtree, vm_pindex_t index, int color) +{ + + if (index == 0) + return (NULL); + return vm_radix_lookup_le(rtree, index - 1, color); +} + +#endif /* _KERNEL */ +#endif /* !_VM_RADIX_H_ */ Proprietà modificate su: vm/vm_radix.h ___________________________________________________________________ Added: svn:mime-type ## -0,0 +1 ## +text/plain Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H Added: svn:eol-style ## -0,0 +1 ## +native Index: vm/sg_pager.c =================================================================== --- vm/sg_pager.c (.../head/sys) (revisione 233850) +++ vm/sg_pager.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -179,6 +179,10 @@ /* Construct a new fake page. */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_LOCK(object); + if (vm_page_insert(page, object, offset) != 0) { + vm_page_putfake(page); + return (VM_PAGER_FAIL); + } TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, pageq); /* Free the original pages and insert this fake page into the object. */ @@ -187,7 +191,6 @@ vm_page_free(m[i]); vm_page_unlock(m[i]); } - vm_page_insert(page, object, offset); m[reqpage] = page; page->valid = VM_PAGE_BITS_ALL; Index: vm/vm_init.c =================================================================== --- vm/vm_init.c (.../head/sys) (revisione 233850) +++ vm/vm_init.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -82,6 +82,7 @@ #include #include #include +#include #include #include #include @@ -123,6 +124,7 @@ vm_object_init(); vm_map_startup(); kmem_init(virtual_avail, virtual_end); + vm_radix_init(); pmap_init(); vm_pager_init(); } Index: vm/vm_reserv.c =================================================================== --- vm/vm_reserv.c (.../head/sys) (revisione 233850) +++ vm/vm_reserv.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -341,34 +341,22 @@ /* * Look for an existing reservation. */ - msucc = NULL; - mpred = object->root; - while (mpred != NULL) { + mpred = vm_radix_lookup_le(&object->rtree, pindex, VM_RADIX_BLACK); + if (mpred != NULL) { KASSERT(mpred->pindex != pindex, ("vm_reserv_alloc_contig: pindex already allocated")); rv = vm_reserv_from_page(mpred); if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) goto found; - else if (mpred->pindex < pindex) { - if (msucc != NULL || - (msucc = TAILQ_NEXT(mpred, listq)) == NULL) - break; - KASSERT(msucc->pindex != pindex, - ("vm_reserv_alloc_contig: pindex already allocated")); - rv = vm_reserv_from_page(msucc); - if (rv->object == object && - vm_reserv_has_pindex(rv, pindex)) - goto found; - else if (pindex < msucc->pindex) - break; - } else if (msucc == NULL) { - msucc = mpred; - mpred = TAILQ_PREV(msucc, pglist, listq); - continue; - } - msucc = NULL; - mpred = object->root = vm_page_splay(pindex, object->root); } + msucc = vm_radix_lookup_ge(&object->rtree, pindex, VM_RADIX_BLACK); + if (msucc != NULL) { + KASSERT(msucc->pindex != pindex, + ("vm_reserv_alloc_page: pindex already allocated")); + rv = vm_reserv_from_page(msucc); + if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) + goto found; + } /* * Could at least one reservation fit between the first index to the @@ -507,34 +495,22 @@ /* * Look for an existing reservation. */ - msucc = NULL; - mpred = object->root; - while (mpred != NULL) { + mpred = vm_radix_lookup_le(&object->rtree, pindex, VM_RADIX_BLACK); + if (mpred != NULL) { KASSERT(mpred->pindex != pindex, ("vm_reserv_alloc_page: pindex already allocated")); rv = vm_reserv_from_page(mpred); if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) goto found; - else if (mpred->pindex < pindex) { - if (msucc != NULL || - (msucc = TAILQ_NEXT(mpred, listq)) == NULL) - break; - KASSERT(msucc->pindex != pindex, - ("vm_reserv_alloc_page: pindex already allocated")); - rv = vm_reserv_from_page(msucc); - if (rv->object == object && - vm_reserv_has_pindex(rv, pindex)) - goto found; - else if (pindex < msucc->pindex) - break; - } else if (msucc == NULL) { - msucc = mpred; - mpred = TAILQ_PREV(msucc, pglist, listq); - continue; - } - msucc = NULL; - mpred = object->root = vm_page_splay(pindex, object->root); } + msucc = vm_radix_lookup_ge(&object->rtree, pindex, VM_RADIX_BLACK); + if (msucc != NULL) { + KASSERT(msucc->pindex != pindex, + ("vm_reserv_alloc_page: pindex already allocated")); + rv = vm_reserv_from_page(msucc); + if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) + goto found; + } /* * Could a reservation fit between the first index to the left that Index: vm/vm_object.c =================================================================== --- vm/vm_object.c (.../head/sys) (revisione 233850) +++ vm/vm_object.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -164,6 +164,9 @@ vm_object_t object; object = (vm_object_t)mem; + KASSERT(object->resident_page_count == 0, + ("object %p resident_page_count = %d", + object, object->resident_page_count)); KASSERT(TAILQ_EMPTY(&object->memq), ("object %p has resident pages", object)); @@ -172,15 +175,12 @@ ("object %p has reservations", object)); #endif - KASSERT(object->cache == NULL, + KASSERT(object->cached_page_count == 0, ("object %p has cached pages", object)); KASSERT(object->paging_in_progress == 0, ("object %p paging_in_progress = %d", object, object->paging_in_progress)); - KASSERT(object->resident_page_count == 0, - ("object %p resident_page_count = %d", - object, object->resident_page_count)); KASSERT(object->shadow_count == 0, ("object %p shadow_count = %d", object, object->shadow_count)); @@ -210,7 +210,7 @@ TAILQ_INIT(&object->memq); LIST_INIT(&object->shadow_head); - object->root = NULL; + object->rtree.rt_root = 0; object->type = type; object->size = size; object->generation = 1; @@ -228,7 +228,6 @@ #if VM_NRESERVLEVEL > 0 LIST_INIT(&object->rvq); #endif - object->cache = NULL; mtx_lock(&vm_object_list_mtx); TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); @@ -305,7 +304,7 @@ case OBJT_SG: case OBJT_SWAP: case OBJT_VNODE: - if (!TAILQ_EMPTY(&object->memq)) + if (object->resident_page_count == 0) return (KERN_FAILURE); break; case OBJT_DEAD: @@ -677,7 +676,10 @@ void vm_object_terminate(vm_object_t object) { - vm_page_t p, p_next; + vm_page_t pa[VM_RADIX_STACK]; + vm_page_t p; + vm_pindex_t start; + int n, i; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); @@ -722,43 +724,76 @@ * from the object. Rather than incrementally removing each page from * the object, the page and object are reset to any empty state. */ - TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { - KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, - ("vm_object_terminate: freeing busy page %p", p)); - vm_page_lock(p); - /* - * Optimize the page's removal from the object by resetting - * its "object" field. Specifically, if the page is not - * wired, then the effect of this assignment is that - * vm_page_free()'s call to vm_page_remove() will return - * immediately without modifying the page or the object. - */ - p->object = NULL; - if (p->wire_count == 0) { - vm_page_free(p); - PCPU_INC(cnt.v_pfree); + start = 0; + while ((n = vm_radix_lookupn(&object->rtree, start, 0, VM_RADIX_ANY, + (void **)pa, VM_RADIX_STACK, &start)) != 0) { + for (i = 0; i < n; i++) { + p = pa[i]; + /* + * Another thread may allocate this cached page from + * the queue before we acquire the page queue free + * mtx. + */ + if (p->flags & PG_CACHED) { + mtx_lock(&vm_page_queue_free_mtx); + if (p->object == object) { + p->object = NULL; + p->valid = 0; + /* Clear PG_CACHED and set PG_FREE. */ + p->flags ^= PG_CACHED | PG_FREE; + cnt.v_cache_count--; + cnt.v_free_count++; + } + mtx_unlock(&vm_page_queue_free_mtx); + continue; + } else if (p->object != object) + continue; + KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, + ("vm_object_terminate: freeing busy page %p", p)); + vm_page_lock(p); + /* + * Optimize the page's removal from the object by + * resetting its "object" field. Specifically, if + * the page is not wired, then the effect of this + * assignment is that vm_page_free()'s call to + * vm_page_remove() will return immediately without + * modifying the page or the object. + * Anyway, the radix tree cannot be accessed anymore + * from within the object, thus all the nodes need + * to be reclaimed later on. + */ + p->object = NULL; + if (p->wire_count == 0) { + vm_page_free(p); + PCPU_INC(cnt.v_pfree); + } + vm_page_unlock(p); } - vm_page_unlock(p); + if (n < VM_RADIX_STACK) + break; } + vm_radix_reclaim_allnodes(&object->rtree); /* * If the object contained any pages, then reset it to an empty state. * None of the object's fields, including "resident_page_count", were * modified by the preceding loop. */ if (object->resident_page_count != 0) { - object->root = NULL; TAILQ_INIT(&object->memq); object->resident_page_count = 0; if (object->type == OBJT_VNODE) vdrop(object->handle); } + if (object->cached_page_count != 0) { + object->cached_page_count = 0; + if (object->type == OBJT_VNODE) + vdrop(object->handle); + } #if VM_NRESERVLEVEL > 0 if (__predict_false(!LIST_EMPTY(&object->rvq))) vm_reserv_break_all(object); #endif - if (__predict_false(object->cache != NULL)) - vm_page_cache_free(object, 0, 0); /* * Let the pager know object is dead. @@ -1275,10 +1310,12 @@ void vm_object_split(vm_map_entry_t entry) { - vm_page_t m, m_next; + vm_page_t ma[VM_RADIX_STACK]; + vm_page_t m; vm_object_t orig_object, new_object, source; - vm_pindex_t idx, offidxstart; + vm_pindex_t idx, offidxstart, start; vm_size_t size; + int i, n; orig_object = entry->object.vm_object; if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP) @@ -1331,46 +1368,65 @@ ("orig_object->charge < 0")); orig_object->charge -= ptoa(size); } + start = offidxstart; retry: - m = vm_page_find_least(orig_object, offidxstart); - for (; m != NULL && (idx = m->pindex - offidxstart) < size; - m = m_next) { - m_next = TAILQ_NEXT(m, listq); - - /* - * We must wait for pending I/O to complete before we can - * rename the page. - * - * We do not have to VM_PROT_NONE the page as mappings should - * not be changed by this operation. - */ - if ((m->oflags & VPO_BUSY) || m->busy) { - VM_OBJECT_UNLOCK(new_object); - m->oflags |= VPO_WANTED; - msleep(m, VM_OBJECT_MTX(orig_object), PVM, "spltwt", 0); - VM_OBJECT_LOCK(new_object); - goto retry; - } + while ((n = vm_radix_lookupn(&orig_object->rtree, start, + offidxstart + size, VM_RADIX_ANY, (void **)ma, VM_RADIX_STACK, + &start)) != 0) { + for (i = 0; i < n; i++) { + m = ma[i]; + idx = m->pindex - offidxstart; + if (m->flags & PG_CACHED) { + mtx_lock(&vm_page_queue_free_mtx); + if (m->object == orig_object) + vm_page_cache_rename(m, new_object, + idx); + mtx_unlock(&vm_page_queue_free_mtx); + continue; + } else if (m->object != orig_object) + continue; + /* + * We must wait for pending I/O to complete before + * we can rename the page. + * + * We do not have to VM_PROT_NONE the page as mappings + * should not be changed by this operation. + */ + if ((m->oflags & VPO_BUSY) || m->busy) { + start = m->pindex; + VM_OBJECT_UNLOCK(new_object); + m->oflags |= VPO_WANTED; + msleep(m, VM_OBJECT_MTX(orig_object), PVM, + "spltwt", 0); + VM_OBJECT_LOCK(new_object); + goto retry; + } #if VM_NRESERVLEVEL > 0 - /* - * If some of the reservation's allocated pages remain with - * the original object, then transferring the reservation to - * the new object is neither particularly beneficial nor - * particularly harmful as compared to leaving the reservation - * with the original object. If, however, all of the - * reservation's allocated pages are transferred to the new - * object, then transferring the reservation is typically - * beneficial. Determining which of these two cases applies - * would be more costly than unconditionally renaming the - * reservation. - */ - vm_reserv_rename(m, new_object, orig_object, offidxstart); + /* + * If some of the reservation's allocated pages remain + * with the original object, then transferring the + * reservation to the new object is neither + * particularly beneficial nor particularly harmful as + * compared to leaving the reservation with the + * original object. If, however, all of the + * reservation's allocated pages are transferred to + * the new object, then transferring the reservation + * is typically beneficial. Determining which of + * these two cases applies would be more costly than + * unconditionally renaming the reservation. + */ + vm_reserv_rename(m, new_object, orig_object, + offidxstart); #endif - vm_page_lock(m); - vm_page_rename(m, new_object, idx); - vm_page_unlock(m); - /* page automatically made dirty by rename and cache handled */ - vm_page_busy(m); + vm_page_rename(m, new_object, idx); + /* + * page automatically made dirty by rename and + * cache handled + */ + vm_page_busy(m); + } + if (n < VM_RADIX_STACK) + break; } if (orig_object->type == OBJT_SWAP) { /* @@ -1378,13 +1434,6 @@ * and new_object's locks are released and reacquired. */ swap_pager_copy(orig_object, new_object, offidxstart, 0); - - /* - * Transfer any cached pages from orig_object to new_object. - */ - if (__predict_false(orig_object->cache != NULL)) - vm_page_cache_transfer(orig_object, offidxstart, - new_object); } VM_OBJECT_UNLOCK(orig_object); TAILQ_FOREACH(m, &new_object->memq, listq) @@ -1403,10 +1452,13 @@ static int vm_object_backing_scan(vm_object_t object, int op) { - int r = 1; + vm_page_t pa[VM_RADIX_STACK]; vm_page_t p; vm_object_t backing_object; - vm_pindex_t backing_offset_index; + vm_pindex_t backing_offset_index, new_pindex; + vm_pindex_t start; + int color, i, n; + int r = 1; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); VM_OBJECT_LOCK_ASSERT(object->backing_object, MA_OWNED); @@ -1434,15 +1486,39 @@ if (op & OBSC_COLLAPSE_WAIT) { vm_object_set_flag(backing_object, OBJ_DEAD); } - + color = VM_RADIX_BLACK; + if (op & OBSC_COLLAPSE_WAIT) + color |= VM_RADIX_RED; /* * Our scan */ - p = TAILQ_FIRST(&backing_object->memq); - while (p) { - vm_page_t next = TAILQ_NEXT(p, listq); - vm_pindex_t new_pindex = p->pindex - backing_offset_index; +restart: + start = 0; + i = n = VM_RADIX_STACK; + for (;;) { + if (i == n) { + if (n < VM_RADIX_STACK) + break; + if ((n = vm_radix_lookupn(&backing_object->rtree, + start, 0, color, (void **)pa, VM_RADIX_STACK, + &start)) == 0) + break; + i = 0; + } + p = pa[i++]; + /* + * Free cached pages. XXX Why? Emulating old behavior here. + */ + if (p->flags & PG_CACHED) { + mtx_lock(&vm_page_queue_free_mtx); + if (p->object == backing_object) + vm_page_cache_free(p); + mtx_unlock(&vm_page_queue_free_mtx); + continue; + } else if (p->object != backing_object) + continue; + new_pindex = p->pindex - backing_offset_index; if (op & OBSC_TEST_ALL_SHADOWED) { vm_page_t pp; @@ -1454,13 +1530,9 @@ * note that we do not busy the backing object's * page. */ - if ( - p->pindex < backing_offset_index || - new_pindex >= object->size - ) { - p = next; + if (p->pindex < backing_offset_index || + new_pindex >= object->size) continue; - } /* * See if the parent has the page or if the parent's @@ -1489,12 +1561,9 @@ vm_page_t pp; if (op & OBSC_COLLAPSE_NOWAIT) { - if ((p->oflags & VPO_BUSY) || - !p->valid || - p->busy) { - p = next; + if ((p->oflags & VPO_BUSY) || !p->valid || + p->busy) continue; - } } else if (op & OBSC_COLLAPSE_WAIT) { if ((p->oflags & VPO_BUSY) || p->busy) { VM_OBJECT_UNLOCK(object); @@ -1510,8 +1579,7 @@ * should not have changed so we * just restart our scan. */ - p = TAILQ_FIRST(&backing_object->memq); - continue; + goto restart; } } @@ -1547,7 +1615,6 @@ else vm_page_remove(p); vm_page_unlock(p); - p = next; continue; } @@ -1567,7 +1634,6 @@ * page before we can (re)lock the parent. * Hence we can get here. */ - p = next; continue; } if ( @@ -1589,7 +1655,6 @@ else vm_page_remove(p); vm_page_unlock(p); - p = next; continue; } @@ -1608,12 +1673,9 @@ * If the page was mapped to a process, it can remain * mapped through the rename. */ - vm_page_lock(p); vm_page_rename(p, object, new_pindex); - vm_page_unlock(p); /* page automatically made dirty by rename */ } - p = next; } return (r); } @@ -1724,12 +1786,6 @@ backing_object, object, OFF_TO_IDX(object->backing_object_offset), TRUE); - - /* - * Free any cached pages from backing_object. - */ - if (__predict_false(backing_object->cache != NULL)) - vm_page_cache_free(backing_object, 0, 0); } /* * Object now shadows whatever backing_object did. @@ -1850,75 +1906,101 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int options) { - vm_page_t p, next; + struct vnode *vp; + vm_page_t pa[VM_RADIX_STACK]; + vm_page_t p; + int i, n; int wirings; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT((object->type != OBJT_DEVICE && object->type != OBJT_PHYS) || (options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED, ("vm_object_page_remove: illegal options for object %p", object)); - if (object->resident_page_count == 0) - goto skipmemq; + if (object->resident_page_count == 0 && object->cached_page_count == 0) + return; + vp = NULL; vm_object_pip_add(object, 1); -again: - p = vm_page_find_least(object, start); - - /* - * Here, the variable "p" is either (1) the page with the least pindex - * greater than or equal to the parameter "start" or (2) NULL. - */ - for (; p != NULL && (p->pindex < end || end == 0); p = next) { - next = TAILQ_NEXT(p, listq); - - /* - * If the page is wired for any reason besides the existence - * of managed, wired mappings, then it cannot be freed. For - * example, fictitious pages, which represent device memory, - * are inherently wired and cannot be freed. They can, - * however, be invalidated if the option OBJPR_CLEANONLY is - * not specified. - */ - vm_page_lock(p); - if ((wirings = p->wire_count) != 0 && - (wirings = pmap_page_wired_mappings(p)) != p->wire_count) { +restart: + while ((n = vm_radix_lookupn(&object->rtree, start, end, VM_RADIX_ANY, + (void **)pa, VM_RADIX_STACK, &start)) != 0) { + for (i = 0; i < n; i++) { + p = pa[i]; + /* + * Another thread may allocate this cached page from + * the queue before we acquire the page queue free + * mtx. + */ + if (p->flags & PG_CACHED) { + mtx_lock(&vm_page_queue_free_mtx); + if (p->object == object) { + vm_page_cache_free(p); + if (object->type == OBJT_VNODE && + object->cached_page_count == 0) + vp = object->handle; + } + mtx_unlock(&vm_page_queue_free_mtx); + continue; + } else if (p->object != object) + continue; + /* + * If the page is wired for any reason besides + * the existence of managed, wired mappings, then + * it cannot be freed. For example, fictitious + * pages, which represent device memory, are + * inherently wired and cannot be freed. They can, + * however, be invalidated if the option + * OBJPR_CLEANONLY is not specified. + */ + vm_page_lock(p); + if ((wirings = p->wire_count) != 0 && + (wirings = pmap_page_wired_mappings(p)) != + p->wire_count) { + if ((options & OBJPR_NOTMAPPED) == 0) { + pmap_remove_all(p); + /* + * Account for removal of wired + * mappings. + */ + if (wirings != 0) + p->wire_count -= wirings; + } + if ((options & OBJPR_CLEANONLY) == 0) { + p->valid = 0; + vm_page_undirty(p); + } + vm_page_unlock(p); + continue; + } + if (vm_page_sleep_if_busy(p, TRUE, "vmopar")) { + start = 0; + goto restart; + } + KASSERT((p->flags & PG_FICTITIOUS) == 0, + ("vm_object_page_remove: page %p is fictitious", + p)); + if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { + if ((options & OBJPR_NOTMAPPED) == 0) + pmap_remove_write(p); + if (p->dirty) { + vm_page_unlock(p); + continue; + } + } if ((options & OBJPR_NOTMAPPED) == 0) { pmap_remove_all(p); /* Account for removal of wired mappings. */ if (wirings != 0) p->wire_count -= wirings; } - if ((options & OBJPR_CLEANONLY) == 0) { - p->valid = 0; - vm_page_undirty(p); - } + vm_page_free(p); vm_page_unlock(p); - continue; } - if (vm_page_sleep_if_busy(p, TRUE, "vmopar")) - goto again; - KASSERT((p->flags & PG_FICTITIOUS) == 0, - ("vm_object_page_remove: page %p is fictitious", p)); - if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { - if ((options & OBJPR_NOTMAPPED) == 0) - pmap_remove_write(p); - if (p->dirty) { - vm_page_unlock(p); - continue; - } - } - if ((options & OBJPR_NOTMAPPED) == 0) { - pmap_remove_all(p); - /* Account for removal of wired mappings. */ - if (wirings != 0) - p->wire_count -= wirings; - } - vm_page_free(p); - vm_page_unlock(p); + if (n < VM_RADIX_STACK) + break; } vm_object_pip_wakeup(object); -skipmemq: - if (__predict_false(object->cache != NULL)) - vm_page_cache_free(object, start, end); + if (vp) + vdrop(vp); } /* @@ -2297,8 +2379,9 @@ db_printf(","); count++; - db_printf("(off=0x%jx,page=0x%jx)", - (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p)); + db_printf("(off=0x%jx,page=0x%jx,obj=%p,flags=0x%X)", + (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p), + p->object, p->flags); } if (count != 0) db_printf("\n"); Index: vm/vm_object.h =================================================================== --- vm/vm_object.h (.../head/sys) (revisione 233850) +++ vm/vm_object.h (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -71,6 +71,8 @@ #include #include +#include + /* * Types defined: * @@ -87,7 +89,7 @@ LIST_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */ LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */ TAILQ_HEAD(, vm_page) memq; /* list of resident pages */ - vm_page_t root; /* root of the resident page splay tree */ + struct vm_radix rtree; /* root of the resident page radix index tree */ vm_pindex_t size; /* Object size */ int generation; /* generation ID */ int ref_count; /* How many refs?? */ @@ -98,11 +100,11 @@ u_short pg_color; /* (c) color of first page in obj */ u_int paging_in_progress; /* Paging (in or out) so don't collapse or destroy */ int resident_page_count; /* number of resident pages */ + int cached_page_count; /* number of cached pages */ struct vm_object *backing_object; /* object that I'm a shadow of */ vm_ooffset_t backing_object_offset;/* Offset in backing object */ TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */ LIST_HEAD(, vm_reserv) rvq; /* list of reservations */ - vm_page_t cache; /* root of the cache page splay tree */ void *handle; union { /* Index: vm/vm_fault.c =================================================================== --- vm/vm_fault.c (.../head/sys) (revisione 233850) +++ vm/vm_fault.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -769,9 +769,7 @@ * process'es object. The page is * automatically made dirty. */ - vm_page_lock(fs.m); vm_page_rename(fs.m, fs.first_object, fs.first_pindex); - vm_page_unlock(fs.m); vm_page_busy(fs.m); fs.first_m = fs.m; fs.m = NULL; Index: vm/device_pager.c =================================================================== --- vm/device_pager.c (.../head/sys) (revisione 233850) +++ vm/device_pager.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -307,11 +307,14 @@ */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_LOCK(object); + if (vm_page_insert(page, object, offset) != 0) { + vm_page_putfake(page); + return (VM_PAGER_FAIL); + } vm_page_lock(*mres); vm_page_free(*mres); vm_page_unlock(*mres); *mres = page; - vm_page_insert(page, object, pidx); } page->valid = VM_PAGE_BITS_ALL; return (VM_PAGER_OK); Index: vm/vm_page.c =================================================================== --- vm/vm_page.c (.../head/sys) (revisione 233850) +++ vm/vm_page.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -103,11 +103,13 @@ #include #include #include +#include #include #include #include #include +#include #include /* @@ -312,7 +314,6 @@ vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; vm_page_queues[PQ_HOLD].cnt = &cnt.v_active_count; - /* * Allocate memory for use when boot strapping the kernel memory * allocator. @@ -765,63 +766,6 @@ } /* - * vm_page_splay: - * - * Implements Sleator and Tarjan's top-down splay algorithm. Returns - * the vm_page containing the given pindex. If, however, that - * pindex is not found in the vm_object, returns a vm_page that is - * adjacent to the pindex, coming before or after it. - */ -vm_page_t -vm_page_splay(vm_pindex_t pindex, vm_page_t root) -{ - struct vm_page dummy; - vm_page_t lefttreemax, righttreemin, y; - - if (root == NULL) - return (root); - lefttreemax = righttreemin = &dummy; - for (;; root = y) { - if (pindex < root->pindex) { - if ((y = root->left) == NULL) - break; - if (pindex < y->pindex) { - /* Rotate right. */ - root->left = y->right; - y->right = root; - root = y; - if ((y = root->left) == NULL) - break; - } - /* Link into the new root's right tree. */ - righttreemin->left = root; - righttreemin = root; - } else if (pindex > root->pindex) { - if ((y = root->right) == NULL) - break; - if (pindex > y->pindex) { - /* Rotate left. */ - root->right = y->left; - y->left = root; - root = y; - if ((y = root->right) == NULL) - break; - } - /* Link into the new root's left tree. */ - lefttreemax->right = root; - lefttreemax = root; - } else - break; - } - /* Assemble the new root. */ - lefttreemax->right = root->left; - righttreemin->left = root->right; - root->left = dummy.right; - root->right = dummy.left; - return (root); -} - -/* * vm_page_insert: [ internal use only ] * * Inserts the given mem entry into the object and object list. @@ -834,14 +778,15 @@ * The object and page must be locked. * This routine may not block. */ -void +int vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) { - vm_page_t root; - + vm_page_t neighbor; + vm_pindex_t cpindex; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if (m->object != NULL) panic("vm_page_insert: page already inserted"); + cpindex = m->pindex; /* * Record the object/offset pair in this page @@ -849,32 +794,27 @@ m->object = object; m->pindex = pindex; - /* - * Now link into the object's ordered list of backed pages. - */ - root = object->root; - if (root == NULL) { - m->left = NULL; - m->right = NULL; + if (object->resident_page_count == 0) { TAILQ_INSERT_TAIL(&object->memq, m, listq); - } else { - root = vm_page_splay(pindex, root); - if (pindex < root->pindex) { - m->left = root->left; - m->right = root; - root->left = NULL; - TAILQ_INSERT_BEFORE(root, m, listq); - } else if (pindex == root->pindex) - panic("vm_page_insert: offset already allocated"); - else { - m->right = root->right; - m->left = root; - root->right = NULL; - TAILQ_INSERT_AFTER(&object->memq, root, m, listq); - } + } else { + neighbor = vm_radix_lookup_ge(&object->rtree, pindex, + VM_RADIX_BLACK); + if (neighbor != NULL) { + KASSERT(pindex < neighbor->pindex, + ("vm_page_insert: offset %ju not minor than %ju", + (uintmax_t)pindex, (uintmax_t)neighbor->pindex)); + TAILQ_INSERT_BEFORE(neighbor, m, listq); + } else + TAILQ_INSERT_TAIL(&object->memq, m, listq); } - object->root = m; + if (vm_radix_insert(&object->rtree, pindex, m) != 0) { + TAILQ_REMOVE(&object->memq, m, listq); + m->object = NULL; + m->pindex = cpindex; + return (ENOMEM); + } + /* * show that the object has one more resident page. */ @@ -891,6 +831,7 @@ */ if (m->aflags & PGA_WRITEABLE) vm_object_set_writeable_dirty(object); + return (0); } /* @@ -909,7 +850,6 @@ vm_page_remove(vm_page_t m) { vm_object_t object; - vm_page_t next, prev, root; if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_lock_assert(m, MA_OWNED); @@ -921,45 +861,7 @@ vm_page_flash(m); } - /* - * Now remove from the object's list of backed pages. - */ - if ((next = TAILQ_NEXT(m, listq)) != NULL && next->left == m) { - /* - * Since the page's successor in the list is also its parent - * in the tree, its right subtree must be empty. - */ - next->left = m->left; - KASSERT(m->right == NULL, - ("vm_page_remove: page %p has right child", m)); - } else if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL && - prev->right == m) { - /* - * Since the page's predecessor in the list is also its parent - * in the tree, its left subtree must be empty. - */ - KASSERT(m->left == NULL, - ("vm_page_remove: page %p has left child", m)); - prev->right = m->right; - } else { - if (m != object->root) - vm_page_splay(m->pindex, object->root); - if (m->left == NULL) - root = m->right; - else if (m->right == NULL) - root = m->left; - else { - /* - * Move the page's successor to the root, because - * pages are usually removed in ascending order. - */ - if (m->right != next) - vm_page_splay(m->pindex, m->right); - next->left = m->left; - root = next; - } - object->root = root; - } + vm_radix_remove(&object->rtree, m->pindex, VM_RADIX_BLACK); TAILQ_REMOVE(&object->memq, m, listq); /* @@ -988,15 +890,10 @@ vm_page_t vm_page_lookup(vm_object_t object, vm_pindex_t pindex) { - vm_page_t m; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); - if ((m = object->root) != NULL && m->pindex != pindex) { - m = vm_page_splay(pindex, m); - if ((object->root = m)->pindex != pindex) - m = NULL; - } - return (m); + + return vm_radix_lookup(&object->rtree, pindex, VM_RADIX_BLACK); } /* @@ -1011,17 +908,12 @@ vm_page_t vm_page_find_least(vm_object_t object, vm_pindex_t pindex) { - vm_page_t m; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); - if ((m = TAILQ_FIRST(&object->memq)) != NULL) { - if (m->pindex < pindex) { - m = vm_page_splay(pindex, object->root); - if ((object->root = m)->pindex < pindex) - m = TAILQ_NEXT(m, listq); - } - } - return (m); + if (object->resident_page_count) + return vm_radix_lookup_ge(&object->rtree, pindex, + VM_RADIX_BLACK); + return (NULL); } /* @@ -1084,78 +976,24 @@ void vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) { + u_int i; + MPASS(m->object != NULL); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED); + + vm_page_lock(m); vm_page_remove(m); - vm_page_insert(m, new_object, new_pindex); + vm_page_unlock(m); + while (vm_page_insert(m, new_object, new_pindex) != 0) { + pagedaemon_wakeup(); + for (i = 0; i < 10000000; i++) + cpu_spinwait(); + } vm_page_dirty(m); } /* - * Convert all of the given object's cached pages that have a - * pindex within the given range into free pages. If the value - * zero is given for "end", then the range's upper bound is - * infinity. If the given object is backed by a vnode and it - * transitions from having one or more cached pages to none, the - * vnode's hold count is reduced. - */ -void -vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end) -{ - vm_page_t m, m_next; - boolean_t empty; - - mtx_lock(&vm_page_queue_free_mtx); - if (__predict_false(object->cache == NULL)) { - mtx_unlock(&vm_page_queue_free_mtx); - return; - } - m = object->cache = vm_page_splay(start, object->cache); - if (m->pindex < start) { - if (m->right == NULL) - m = NULL; - else { - m_next = vm_page_splay(start, m->right); - m_next->left = m; - m->right = NULL; - m = object->cache = m_next; - } - } - - /* - * At this point, "m" is either (1) a reference to the page - * with the least pindex that is greater than or equal to - * "start" or (2) NULL. - */ - for (; m != NULL && (m->pindex < end || end == 0); m = m_next) { - /* - * Find "m"'s successor and remove "m" from the - * object's cache. - */ - if (m->right == NULL) { - object->cache = m->left; - m_next = NULL; - } else { - m_next = vm_page_splay(start, m->right); - m_next->left = m->left; - object->cache = m_next; - } - /* Convert "m" to a free page. */ - m->object = NULL; - m->valid = 0; - /* Clear PG_CACHED and set PG_FREE. */ - m->flags ^= PG_CACHED | PG_FREE; - KASSERT((m->flags & (PG_CACHED | PG_FREE)) == PG_FREE, - ("vm_page_cache_free: page %p has inconsistent flags", m)); - cnt.v_cache_count--; - cnt.v_free_count++; - } - empty = object->cache == NULL; - mtx_unlock(&vm_page_queue_free_mtx); - if (object->type == OBJT_VNODE && empty) - vdrop(object->handle); -} - -/* * Returns the cached page that is associated with the given * object and offset. If, however, none exists, returns NULL. * @@ -1164,15 +1002,12 @@ static inline vm_page_t vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex) { - vm_page_t m; + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - if ((m = object->cache) != NULL && m->pindex != pindex) { - m = vm_page_splay(pindex, m); - if ((object->cache = m)->pindex != pindex) - m = NULL; - } - return (m); + if (object->cached_page_count != 0) + return vm_radix_lookup(&object->rtree, pindex, VM_RADIX_RED); + return (NULL); } /* @@ -1184,104 +1019,77 @@ void vm_page_cache_remove(vm_page_t m) { - vm_object_t object; - vm_page_t root; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); KASSERT((m->flags & PG_CACHED) != 0, ("vm_page_cache_remove: page %p is not cached", m)); - object = m->object; - if (m != object->cache) { - root = vm_page_splay(m->pindex, object->cache); - KASSERT(root == m, - ("vm_page_cache_remove: page %p is not cached in object %p", - m, object)); - } - if (m->left == NULL) - root = m->right; - else if (m->right == NULL) - root = m->left; - else { - root = vm_page_splay(m->pindex, m->left); - root->right = m->right; - } - object->cache = root; + vm_radix_remove(&m->object->rtree, m->pindex, VM_RADIX_RED); + m->object->cached_page_count--; m->object = NULL; cnt.v_cache_count--; } /* - * Transfer all of the cached pages with offset greater than or - * equal to 'offidxstart' from the original object's cache to the - * new object's cache. However, any cached pages with offset - * greater than or equal to the new object's size are kept in the - * original object. Initially, the new object's cache must be - * empty. Offset 'offidxstart' in the original object must - * correspond to offset zero in the new object. + * Move a given cached page from an object's cached pages to + * the free list. * - * The new object must be locked. + * The free page queue mtx and object lock must be locked. */ void -vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart, - vm_object_t new_object) +vm_page_cache_free(vm_page_t m) { - vm_page_t m, m_next; + vm_object_t object; + object = m->object; + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + KASSERT((m->flags & PG_CACHED) != 0, + ("vm_page_cache_free: page %p is not cached", m)); + /* - * Insertion into an object's collection of cached pages - * requires the object to be locked. In contrast, removal does - * not. + * Replicate vm_page_cache_remove with a version that can collapse + * internal nodes since the object lock is held. */ + vm_radix_remove(&object->rtree, m->pindex, VM_RADIX_ANY); + object->cached_page_count--; + m->object = NULL; + m->valid = 0; + /* Clear PG_CACHED and set PG_FREE. */ + m->flags ^= PG_CACHED | PG_FREE; + cnt.v_cache_count--; + cnt.v_free_count++; +} + +/* + * Attempt to rename a cached page from one object to another. If + * it fails the cached page is freed. + */ +void +vm_page_cache_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t idx) +{ + vm_object_t orig_object; + + orig_object = m->object; + VM_OBJECT_LOCK_ASSERT(orig_object, MA_OWNED); VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED); - KASSERT(new_object->cache == NULL, - ("vm_page_cache_transfer: object %p has cached pages", - new_object)); - mtx_lock(&vm_page_queue_free_mtx); - if ((m = orig_object->cache) != NULL) { - /* - * Transfer all of the pages with offset greater than or - * equal to 'offidxstart' from the original object's - * cache to the new object's cache. - */ - m = vm_page_splay(offidxstart, m); - if (m->pindex < offidxstart) { - orig_object->cache = m; - new_object->cache = m->right; - m->right = NULL; - } else { - orig_object->cache = m->left; - new_object->cache = m; - m->left = NULL; - } - while ((m = new_object->cache) != NULL) { - if ((m->pindex - offidxstart) >= new_object->size) { - /* - * Return all of the cached pages with - * offset greater than or equal to the - * new object's size to the original - * object's cache. - */ - new_object->cache = m->left; - m->left = orig_object->cache; - orig_object->cache = m; - break; - } - m_next = vm_page_splay(m->pindex, m->right); - /* Update the page's object and offset. */ - m->object = new_object; - m->pindex -= offidxstart; - if (m_next == NULL) - break; - m->right = NULL; - m_next->left = m; - new_object->cache = m_next; - } - KASSERT(new_object->cache == NULL || - new_object->type == OBJT_SWAP, - ("vm_page_cache_transfer: object %p's type is incompatible" - " with cached pages", new_object)); + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + /* + * If the insert fails we simply free the cached page. + */ + if (vm_radix_insert(&new_object->rtree, idx, m) != 0) { + vm_page_cache_free(m); + return; } - mtx_unlock(&vm_page_queue_free_mtx); + vm_radix_color(&new_object->rtree, idx, VM_RADIX_RED); + /* + * We use any color here though we know it's red so that tree + * compaction will still work. + */ + vm_radix_remove(&orig_object->rtree, m->pindex, VM_RADIX_ANY); + m->object = new_object; + m->pindex = idx; + new_object->cached_page_count++; + orig_object->cached_page_count--; } /* @@ -1415,7 +1223,8 @@ m->valid = 0; m_object = m->object; vm_page_cache_remove(m); - if (m_object->type == OBJT_VNODE && m_object->cache == NULL) + if (m_object->type == OBJT_VNODE && + m_object->cached_page_count == 0) vp = m_object->handle; } else { KASSERT(VM_PAGE_IS_FREE(m), @@ -1461,7 +1270,19 @@ if (object->memattr != VM_MEMATTR_DEFAULT && object->type != OBJT_DEVICE && object->type != OBJT_SG) pmap_page_set_memattr(m, object->memattr); - vm_page_insert(m, object, pindex); + if (vm_page_insert(m, object, pindex) != 0) { + + /* See the comment below about hold count handling. */ + if (vp != NULL) + vdrop(vp); + vm_page_lock(m); + if (req & VM_ALLOC_WIRED) + vm_page_unwire(m, 0); + vm_page_free(m); + vm_page_unlock(m); + pagedaemon_wakeup(); + return (NULL); + } } else m->pindex = pindex; @@ -1528,6 +1349,7 @@ { struct vnode *drop; vm_page_t deferred_vdrop_list, m, m_ret; + vm_pindex_t cpindex; u_int flags, oflags; int req_class; @@ -1614,6 +1436,7 @@ memattr == VM_MEMATTR_DEFAULT) memattr = object->memattr; } + cpindex = pindex; for (m = m_ret; m < &m_ret[npages]; m++) { m->aflags = 0; m->flags &= flags; @@ -1623,12 +1446,30 @@ m->oflags = oflags; if (memattr != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, memattr); - if (object != NULL) - vm_page_insert(m, object, pindex); - else - m->pindex = pindex; pindex++; } + for (m = m_ret; m < &m_ret[npages]; m++) { + if (object != NULL) { + if (vm_page_insert(m, object, cpindex) != 0) { + while (deferred_vdrop_list != NULL) { + vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev); + deferred_vdrop_list = + deferred_vdrop_list->pageq.tqe_next; + } + for (m = m_ret; m < &m_ret[npages]; m++) { + vm_page_lock(m); + if (req & VM_ALLOC_WIRED) + vm_page_unwire(m, 0); + vm_page_free(m); + vm_page_unlock(m); + } + pagedaemon_wakeup(); + return (NULL); + } + } else + m->pindex = cpindex; + cpindex++; + } while (deferred_vdrop_list != NULL) { vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev); deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next; @@ -1674,7 +1515,8 @@ m->valid = 0; m_object = m->object; vm_page_cache_remove(m); - if (m_object->type == OBJT_VNODE && m_object->cache == NULL) + if (m_object->type == OBJT_VNODE && + m_object->cached_page_count == 0) drop = m_object->handle; } else { KASSERT(VM_PAGE_IS_FREE(m), @@ -2257,7 +2099,7 @@ vm_page_cache(vm_page_t m) { vm_object_t object; - vm_page_t next, prev, root; + int old_cached; vm_page_lock_assert(m, MA_OWNED); object = m->object; @@ -2288,46 +2130,7 @@ */ vm_pageq_remove(m); - /* - * Remove the page from the object's collection of resident - * pages. - */ - if ((next = TAILQ_NEXT(m, listq)) != NULL && next->left == m) { - /* - * Since the page's successor in the list is also its parent - * in the tree, its right subtree must be empty. - */ - next->left = m->left; - KASSERT(m->right == NULL, - ("vm_page_cache: page %p has right child", m)); - } else if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL && - prev->right == m) { - /* - * Since the page's predecessor in the list is also its parent - * in the tree, its left subtree must be empty. - */ - KASSERT(m->left == NULL, - ("vm_page_cache: page %p has left child", m)); - prev->right = m->right; - } else { - if (m != object->root) - vm_page_splay(m->pindex, object->root); - if (m->left == NULL) - root = m->right; - else if (m->right == NULL) - root = m->left; - else { - /* - * Move the page's successor to the root, because - * pages are usually removed in ascending order. - */ - if (m->right != next) - vm_page_splay(m->pindex, m->right); - next->left = m->left; - root = next; - } - object->root = root; - } + vm_radix_color(&object->rtree, m->pindex, VM_RADIX_RED); TAILQ_REMOVE(&object->memq, m, listq); object->resident_page_count--; @@ -2344,26 +2147,9 @@ m->flags &= ~PG_ZERO; mtx_lock(&vm_page_queue_free_mtx); m->flags |= PG_CACHED; + old_cached = object->cached_page_count; + object->cached_page_count++; cnt.v_cache_count++; - root = object->cache; - if (root == NULL) { - m->left = NULL; - m->right = NULL; - } else { - root = vm_page_splay(m->pindex, root); - if (m->pindex < root->pindex) { - m->left = root->left; - m->right = root; - root->left = NULL; - } else if (__predict_false(m->pindex == root->pindex)) - panic("vm_page_cache: offset already cached"); - else { - m->right = root->right; - m->left = root; - root->right = NULL; - } - } - object->cache = m; #if VM_NRESERVLEVEL > 0 if (!vm_reserv_free_page(m)) { #else @@ -2381,9 +2167,9 @@ * the object's only resident page. */ if (object->type == OBJT_VNODE) { - if (root == NULL && object->resident_page_count != 0) + if (old_cached == 0 && object->resident_page_count != 0) vhold(object->handle); - else if (root != NULL && object->resident_page_count == 0) + else if (old_cached != 0 && object->resident_page_count == 0) vdrop(object->handle); } } @@ -2908,11 +2694,8 @@ pindex = m->pindex; retry_alloc: - pmap_remove_all(m); - vm_page_remove(m); mnew = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY); if (mnew == NULL) { - vm_page_insert(m, object, pindex); vm_page_unlock(m); VM_OBJECT_UNLOCK(object); VM_WAIT; @@ -2938,8 +2721,9 @@ vm_page_lock(mnew); vm_page_free(mnew); vm_page_unlock(mnew); - vm_page_insert(m, object, pindex); } else { /* clear COW & copy page */ + pmap_remove_all(m); + vm_page_remove(m); if (!so_zerocp_fullpage) pmap_copy_page(m, mnew); mnew->valid = VM_PAGE_BITS_ALL; Index: vm/vm_page.h =================================================================== --- vm/vm_page.h (.../head/sys) (revisione 233850) +++ vm/vm_page.h (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -130,8 +130,6 @@ struct vm_page { TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO queue or free list (Q) */ TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ - struct vm_page *left; /* splay tree link (O) */ - struct vm_page *right; /* splay tree link (O) */ vm_object_t object; /* which object am I in (O,P)*/ vm_pindex_t pindex; /* offset into object (O,P) */ @@ -379,16 +377,16 @@ vm_page_t vm_page_alloc_freelist(int, int); vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); void vm_page_cache(vm_page_t); -void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t); +void vm_page_cache_free(vm_page_t); void vm_page_cache_remove(vm_page_t); -void vm_page_cache_transfer(vm_object_t, vm_pindex_t, vm_object_t); +void vm_page_cache_rename(vm_page_t, vm_object_t, vm_pindex_t); int vm_page_try_to_cache (vm_page_t); int vm_page_try_to_free (vm_page_t); void vm_page_dontneed(vm_page_t); void vm_page_deactivate (vm_page_t); vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t); vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr); -void vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); +int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t); vm_page_t vm_page_next(vm_page_t m); int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *); @@ -400,7 +398,6 @@ void vm_page_requeue(vm_page_t m); void vm_page_set_valid_range(vm_page_t m, int base, int size); void vm_page_sleep(vm_page_t m, const char *msg); -vm_page_t vm_page_splay(vm_pindex_t, vm_page_t); vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_unhold_pages(vm_page_t *ma, int count); void vm_page_unwire (vm_page_t, int); Index: vm/vnode_pager.c =================================================================== --- vm/vnode_pager.c (.../head/sys) (revisione 233850) +++ vm/vnode_pager.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -375,6 +375,7 @@ vm_ooffset_t nsize; { vm_object_t object; + struct vnode *drop; vm_page_t m; vm_pindex_t nobjsize; @@ -400,18 +401,43 @@ /* * this gets rid of garbage at the end of a page that is now * only partially backed by the vnode. - * - * XXX for some reason (I don't know yet), if we take a - * completely invalid page and mark it partially valid - * it can screw up NFS reads, so we don't allow the case. */ if ((nsize & PAGE_MASK) && - (m = vm_page_lookup(object, OFF_TO_IDX(nsize))) != NULL && - m->valid != 0) { - int base = (int)nsize & PAGE_MASK; - int size = PAGE_SIZE - base; + (m = vm_radix_lookup(&object->rtree, OFF_TO_IDX(nsize), + VM_RADIX_ANY)) != NULL) { + int base; + int size; /* + * Eliminate any cached page as we would have to + * do too much work to save it. + */ + if (m->flags & PG_CACHED) { + drop = NULL; + mtx_lock(&vm_page_queue_free_mtx); + if (m->object == object) { + vm_page_cache_remove(m); + if (object->cached_page_count == 0) + drop = vp; + } + mtx_unlock(&vm_page_queue_free_mtx); + if (drop) + vdrop(drop); + goto out; + } + /* + * XXX for some reason (I don't know yet), if we take a + * completely invalid page and mark it partially valid + * it can screw up NFS reads, so we don't allow the + * case. + */ + if (m->valid != 0 || m->object != object) + goto out; + + base = (int)nsize & PAGE_MASK; + size = PAGE_SIZE - base; + + /* * Clear out partial-page garbage in case * the page has been mapped. */ @@ -439,12 +465,9 @@ * replacement from working properly. */ vm_page_clear_dirty(m, base, PAGE_SIZE - base); - } else if ((nsize & PAGE_MASK) && - __predict_false(object->cache != NULL)) { - vm_page_cache_free(object, OFF_TO_IDX(nsize), - nobjsize); } } +out: object->un_pager.vnp.vnp_size = nsize; object->size = nobjsize; VM_OBJECT_UNLOCK(object); Index: i386/include/pmap.h =================================================================== --- i386/include/pmap.h (.../head/sys) (revisione 233850) +++ i386/include/pmap.h (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -426,10 +426,20 @@ struct pv_chunk; struct md_page { - TAILQ_HEAD(,pv_entry) pv_list; - int pat_mode; + union { + TAILQ_HEAD(,pv_entry) pvi_list; + struct { + vm_page_t pii_left; + vm_page_t pii_right; + } pvi_siters; + } pv_structs; + int pat_mode; }; +#define pv_list pv_structs.pvi_list +#define pv_left pv_structs.pvi_siters.pii_left +#define pv_right pv_structs.pvi_siters.pii_right + struct pmap { struct mtx pm_mtx; pd_entry_t *pm_pdir; /* KVA of page directory */ @@ -468,7 +478,7 @@ */ typedef struct pv_entry { vm_offset_t pv_va; /* virtual address for mapping */ - TAILQ_ENTRY(pv_entry) pv_list; + TAILQ_ENTRY(pv_entry) pv_next; } *pv_entry_t; /* Index: i386/conf/XENHVM =================================================================== --- i386/conf/XENHVM (.../head/sys) (revisione 233850) +++ i386/conf/XENHVM (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: i386/conf/XENHVM ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/i386/conf/XENHVM:r230184-230983 Index: i386/i386/pmap.c =================================================================== --- i386/i386/pmap.c (.../head/sys) (revisione 233850) +++ i386/i386/pmap.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -326,6 +326,7 @@ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde); static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); +static vm_page_t pmap_vmpage_splay(vm_pindex_t pindex, vm_page_t root); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); @@ -1553,7 +1554,8 @@ while (free != NULL) { m = free; - free = m->right; + free = (void *)m->object; + m->object = NULL; /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } @@ -1572,7 +1574,7 @@ m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; - m->right = *free; + m->object = (void *)*free; *free = m; } @@ -1590,20 +1592,20 @@ PMAP_LOCK_ASSERT(pmap, MA_OWNED); root = pmap->pm_root; if (root == NULL) { - mpte->left = NULL; - mpte->right = NULL; + mpte->md.pv_left = NULL; + mpte->md.pv_right = NULL; } else { - root = vm_page_splay(mpte->pindex, root); + root = pmap_vmpage_splay(mpte->pindex, root); if (mpte->pindex < root->pindex) { - mpte->left = root->left; - mpte->right = root; - root->left = NULL; + mpte->md.pv_left = root->md.pv_left; + mpte->md.pv_right = root; + root->md.pv_left = NULL; } else if (mpte->pindex == root->pindex) panic("pmap_insert_pt_page: pindex already inserted"); else { - mpte->right = root->right; - mpte->left = root; - root->right = NULL; + mpte->md.pv_right = root->md.pv_right; + mpte->md.pv_left = root; + root->md.pv_right = NULL; } } pmap->pm_root = mpte; @@ -1622,7 +1624,7 @@ PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) { - mpte = vm_page_splay(pindex, mpte); + mpte = pmap_vmpage_splay(pindex, mpte); if ((pmap->pm_root = mpte)->pindex != pindex) mpte = NULL; } @@ -1641,14 +1643,20 @@ PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (mpte != pmap->pm_root) - vm_page_splay(mpte->pindex, pmap->pm_root); - if (mpte->left == NULL) - root = mpte->right; + pmap_vmpage_splay(mpte->pindex, pmap->pm_root); + if (mpte->md.pv_left == NULL) + root = mpte->md.pv_right; else { - root = vm_page_splay(mpte->pindex, mpte->left); - root->right = mpte->right; + root = pmap_vmpage_splay(mpte->pindex, mpte->md.pv_left); + root->md.pv_right = mpte->md.pv_right; } pmap->pm_root = root; + + /* + * Reinitialize the pv_list which could be dirty now because of the + * splay tree work. + */ + TAILQ_INIT(&mpte->md.pv_list); } /* @@ -1701,6 +1709,61 @@ } /* + * Implements Sleator and Tarjan's top-down splay algorithm. Returns + * the vm_page containing the given pindex. If, however, that + * pindex is not found in the pmap, returns a vm_page that is + * adjacent to the pindex, coming before or after it. + */ +static vm_page_t +pmap_vmpage_splay(vm_pindex_t pindex, vm_page_t root) +{ + struct vm_page dummy; + vm_page_t lefttreemax, righttreemin, y; + + if (root == NULL) + return (root); + lefttreemax = righttreemin = &dummy; + for (;; root = y) { + if (pindex < root->pindex) { + if ((y = root->md.pv_left) == NULL) + break; + if (pindex < y->pindex) { + /* Rotate right. */ + root->md.pv_left = y->md.pv_right; + y->md.pv_right = root; + root = y; + if ((y = root->md.pv_left) == NULL) + break; + } + /* Link into the new root's right tree. */ + righttreemin->md.pv_left = root; + righttreemin = root; + } else if (pindex > root->pindex) { + if ((y = root->md.pv_right) == NULL) + break; + if (pindex > y->pindex) { + /* Rotate left. */ + root->md.pv_right = y->md.pv_left; + y->md.pv_left = root; + root = y; + if ((y = root->md.pv_right) == NULL) + break; + } + /* Link into the new root's left tree. */ + lefttreemax->md.pv_right = root; + lefttreemax = root; + } else + break; + } + /* Assemble the new root. */ + lefttreemax->md.pv_right = root->md.pv_left; + righttreemin->md.pv_left = root->md.pv_right; + root->md.pv_left = dummy.md.pv_right; + root->md.pv_right = dummy.md.pv_left; + return (root); +} + +/* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ @@ -2216,7 +2279,7 @@ TAILQ_FOREACH(m, &vpq->pl, pageq) { if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy) continue; - TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); /* Avoid deadlock and lock recursion. */ @@ -2240,7 +2303,7 @@ pmap_unuse_pt(pmap, va, &free); pmap_invalidate_page(pmap, va); pmap_free_zero_pages(free); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); @@ -2389,9 +2452,9 @@ pv_entry_t pv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { - TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); break; } } @@ -2419,7 +2482,7 @@ pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); m = PHYS_TO_VM_PAGE(pa); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); /* Instantiate the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { @@ -2455,7 +2518,7 @@ pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); /* Free the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { @@ -2502,7 +2565,7 @@ mtx_assert(&vm_page_queue_mtx, MA_OWNED); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } /* @@ -2518,7 +2581,7 @@ if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); return (TRUE); } else return (FALSE); @@ -2538,7 +2601,7 @@ (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); return (TRUE); } else return (FALSE); @@ -2989,7 +3052,7 @@ vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, &free); pmap_invalidate_page(pmap, pv->pv_va); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } @@ -3421,7 +3484,7 @@ if (pv == NULL) pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); pa |= PG_MANAGED; } else if (pv != NULL) free_pv_entry(pmap, pv); @@ -4129,7 +4192,7 @@ ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; vm_page_lock_queues(); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; @@ -4140,7 +4203,7 @@ } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; @@ -4192,7 +4255,7 @@ mtx_assert(&vm_page_queue_mtx, MA_OWNED); sched_pin(); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); @@ -4317,7 +4380,7 @@ if ((tpte & PG_PS) != 0) { pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; pvh = pa_to_pvh(tpte & PG_PS_FRAME); - TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list)) @@ -4335,7 +4398,7 @@ } } else { pmap->pm_stats.resident_count--; - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); @@ -4412,7 +4475,7 @@ mtx_assert(&vm_page_queue_mtx, MA_OWNED); rv = FALSE; sched_pin(); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); @@ -4485,7 +4548,7 @@ mtx_assert(&vm_page_queue_mtx, MA_OWNED); rv = FALSE; sched_pin(); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); @@ -4528,7 +4591,7 @@ if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); @@ -4538,7 +4601,7 @@ PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4597,7 +4660,7 @@ sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, pvn) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); @@ -4631,9 +4694,9 @@ if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; do { - pvn = TAILQ_NEXT(pv, pv_list); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + pvn = TAILQ_NEXT(pv, pv_next); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4687,7 +4750,7 @@ if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); @@ -4724,7 +4787,7 @@ PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4768,7 +4831,7 @@ if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); @@ -4791,7 +4854,7 @@ PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -5305,7 +5368,7 @@ printf("pa %x", pa); m = PHYS_TO_VM_PAGE(pa); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va); pads(pmap); Index: contrib/pf =================================================================== --- contrib/pf (.../head/sys) (revisione 233850) +++ contrib/pf (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/pf ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/contrib/pf:r225232-233853 Index: contrib/octeon-sdk =================================================================== --- contrib/octeon-sdk (.../head/sys) (revisione 233850) +++ contrib/octeon-sdk (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/octeon-sdk ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/contrib/octeon-sdk:r225232-232948 Index: contrib/x86emu =================================================================== --- contrib/x86emu (.../head/sys) (revisione 233850) +++ contrib/x86emu (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/x86emu ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/contrib/x86emu:r225232-229659 Index: contrib/dev/acpica/include =================================================================== --- contrib/dev/acpica/include (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/include (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/include ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/contrib/dev/acpica/include:r225232-233702 Fuso /vendor-sys/acpica/dist/include:r193249-193331 Index: contrib/dev/acpica/components/debugger =================================================================== --- contrib/dev/acpica/components/debugger (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/components/debugger (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/components/debugger ___________________________________________________________________ Modified: svn:mergeinfo Fuso /projects/multi-fibv6/head/sys/contrib/dev/acpica/components/debugger:r230929-231845 Fuso /head/sys/contrib/dev/acpica/components/debugger:r225232-230983,231844-233702 Index: contrib/dev/acpica/components/events =================================================================== --- contrib/dev/acpica/components/events (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/components/events (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/components/events ___________________________________________________________________ Modified: svn:mergeinfo Fuso /projects/multi-fibv6/head/sys/contrib/dev/acpica/components/events:r230929-231845 Fuso /head/sys/contrib/dev/acpica/components/events:r225232-230983,231844-232160 Index: contrib/dev/acpica/components/executer =================================================================== --- contrib/dev/acpica/components/executer (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/components/executer (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/components/executer ___________________________________________________________________ Modified: svn:mergeinfo Fuso /projects/multi-fibv6/head/sys/contrib/dev/acpica/components/executer:r230929-231845 Fuso /head/sys/contrib/dev/acpica/components/executer:r225232-230983,231844-232160 Index: contrib/dev/acpica/components/dispatcher =================================================================== --- contrib/dev/acpica/components/dispatcher (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/components/dispatcher (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/components/dispatcher ___________________________________________________________________ Modified: svn:mergeinfo Fuso /projects/multi-fibv6/head/sys/contrib/dev/acpica/components/dispatcher:r230929-231845 Fuso /head/sys/contrib/dev/acpica/components/dispatcher:r225232-230983,231844-232160 Index: contrib/dev/acpica/components/resources =================================================================== --- contrib/dev/acpica/components/resources (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/components/resources (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/components/resources ___________________________________________________________________ Modified: svn:mergeinfo Fuso /projects/multi-fibv6/head/sys/contrib/dev/acpica/components/resources:r230929-231845 Fuso /head/sys/contrib/dev/acpica/components/resources:r225232-230983,231844-232160 Index: contrib/dev/acpica/components/tables =================================================================== --- contrib/dev/acpica/components/tables (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/components/tables (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/components/tables ___________________________________________________________________ Modified: svn:mergeinfo Fuso /projects/multi-fibv6/head/sys/contrib/dev/acpica/components/tables:r230929-231845 Fuso /head/sys/contrib/dev/acpica/components/tables:r225232-230983,231844-233702 Index: contrib/dev/acpica/components/utilities =================================================================== --- contrib/dev/acpica/components/utilities (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/components/utilities (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/components/utilities ___________________________________________________________________ Modified: svn:mergeinfo Fuso /projects/multi-fibv6/head/sys/contrib/dev/acpica/components/utilities:r230929-231845 Fuso /head/sys/contrib/dev/acpica/components/utilities:r225232-230983,231844-232160 Index: contrib/dev/acpica/components/namespace =================================================================== --- contrib/dev/acpica/components/namespace (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/components/namespace (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/components/namespace ___________________________________________________________________ Modified: svn:mergeinfo Fuso /projects/multi-fibv6/head/sys/contrib/dev/acpica/components/namespace:r230929-231845 Fuso /head/sys/contrib/dev/acpica/components/namespace:r225232-230983,231844-233702 Index: contrib/dev/acpica/components/parser =================================================================== --- contrib/dev/acpica/components/parser (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/components/parser (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/components/parser ___________________________________________________________________ Modified: svn:mergeinfo Fuso /projects/multi-fibv6/head/sys/contrib/dev/acpica/components/parser:r230929-231845 Fuso /head/sys/contrib/dev/acpica/components/parser:r225232-230983,231844-233702 Index: contrib/dev/acpica/components/disassembler =================================================================== --- contrib/dev/acpica/components/disassembler (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/components/disassembler (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/components/disassembler ___________________________________________________________________ Modified: svn:mergeinfo Fuso /projects/multi-fibv6/head/sys/contrib/dev/acpica/components/disassembler:r230929-231845 Fuso /head/sys/contrib/dev/acpica/components/disassembler:r225232-230983,231844-232160 Index: contrib/dev/acpica/components/hardware =================================================================== --- contrib/dev/acpica/components/hardware (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/components/hardware (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/components/hardware ___________________________________________________________________ Modified: svn:mergeinfo Fuso /projects/multi-fibv6/head/sys/contrib/dev/acpica/components/hardware:r230929-231845 Fuso /head/sys/contrib/dev/acpica/components/hardware:r225232-230983,231844-233702 Index: contrib/dev/acpica/changes.txt =================================================================== --- contrib/dev/acpica/changes.txt (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/changes.txt (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/changes.txt ___________________________________________________________________ Modified: svn:mergeinfo Fuso /vendor-sys/acpica/dist/changes.txt:r193249-193331 Fuso /head/sys/contrib/dev/acpica/changes.txt:r225232-233702 Index: contrib/dev/acpica/os_specific =================================================================== --- contrib/dev/acpica/os_specific (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/os_specific (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/os_specific ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/contrib/dev/acpica/os_specific:r225232-232160 Index: contrib/dev/acpica/common =================================================================== --- contrib/dev/acpica/common (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/common (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/common ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/contrib/dev/acpica/common:r225232-233702 Index: contrib/dev/acpica/compiler =================================================================== --- contrib/dev/acpica/compiler (.../head/sys) (revisione 233850) +++ contrib/dev/acpica/compiler (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica/compiler ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/contrib/dev/acpica/compiler:r225232-233702 Index: contrib/dev/acpica =================================================================== --- contrib/dev/acpica (.../head/sys) (revisione 233850) +++ contrib/dev/acpica (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: contrib/dev/acpica ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/contrib/dev/acpica:r225232-233702 Index: cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (.../head/sys) (revisione 233850) +++ cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -323,26 +323,32 @@ VM_OBJECT_LOCK_ASSERT(obj, MA_OWNED); for (;;) { - if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && - vm_page_is_valid(pp, (vm_offset_t)off, nbytes)) { - if ((pp->oflags & VPO_BUSY) != 0) { - /* - * Reference the page before unlocking and - * sleeping so that the page daemon is less - * likely to reclaim it. - */ - vm_page_reference(pp); - vm_page_sleep(pp, "zfsmwb"); - continue; + pp = vm_radix_lookup(&obj->rtree, OFF_TO_IDX(start), + VM_RADIX_ANY); + if (pp != NULL) { + if (vm_page_is_valid(pp, (vm_offset_t)off, nbytes)) { + if ((pp->oflags & VPO_BUSY) != 0) { + /* + * Reference the page before unlocking + * and sleeping so that the page + * daemon is less likely to reclaim it. + */ + vm_page_reference(pp); + vm_page_sleep(pp, "zfsmwb"); + continue; + } + vm_page_busy(pp); + vm_page_undirty(pp); + } else { + if (obj->cached_page_count != 0 && + (pp->flags & PG_CACHED) != 0) { + mtx_lock(&vm_page_queue_free_mtx); + if (pp->object == obj) + vm_page_cache_free(pp); + mtx_unlock(&vm_page_queue_free_mtx); + } + pp = NULL; } - vm_page_busy(pp); - vm_page_undirty(pp); - } else { - if (__predict_false(obj->cache != NULL)) { - vm_page_cache_free(obj, OFF_TO_IDX(start), - OFF_TO_IDX(start) + 1); - } - pp = NULL; } break; } Index: cddl/contrib/opensolaris =================================================================== --- cddl/contrib/opensolaris (.../head/sys) (revisione 233850) +++ cddl/contrib/opensolaris (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/cddl/contrib/opensolaris:r225232-233702 Index: cddl/compat/opensolaris/sys/vnode.h =================================================================== --- cddl/compat/opensolaris/sys/vnode.h (.../head/sys) (revisione 233850) +++ cddl/compat/opensolaris/sys/vnode.h (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -75,8 +75,7 @@ #define vn_mountedvfs(vp) ((vp)->v_mountedhere) #define vn_has_cached_data(vp) \ ((vp)->v_object != NULL && \ - ((vp)->v_object->resident_page_count > 0 || \ - (vp)->v_object->cache != NULL)) + (vp)->v_object->cached_page_count > 0) #define vn_exists(vp) do { } while (0) #define vn_invalid(vp) do { } while (0) #define vn_renamepath(tdvp, svp, tnm, lentnm) do { } while (0) Index: amd64/include/pmap.h =================================================================== --- amd64/include/pmap.h (.../head/sys) (revisione 233850) +++ amd64/include/pmap.h (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -240,10 +240,20 @@ struct pv_chunk; struct md_page { - TAILQ_HEAD(,pv_entry) pv_list; - int pat_mode; + union { + TAILQ_HEAD(,pv_entry) pvi_list; + struct { + vm_page_t pii_left; + vm_page_t pii_right; + } pvi_siters; + } pv_structs; + int pat_mode; }; +#define pv_list pv_structs.pvi_list +#define pv_left pv_structs.pvi_siters.pii_left +#define pv_right pv_structs.pvi_siters.pii_right + /* * The kernel virtual address (KVA) of the level 4 page table page is always * within the direct map (DMAP) region. @@ -282,7 +292,7 @@ */ typedef struct pv_entry { vm_offset_t pv_va; /* virtual address for mapping */ - TAILQ_ENTRY(pv_entry) pv_list; + TAILQ_ENTRY(pv_entry) pv_next; } *pv_entry_t; /* Index: amd64/include/xen =================================================================== --- amd64/include/xen (.../head/sys) (revisione 233850) +++ amd64/include/xen (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: amd64/include/xen ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys/amd64/include/xen:r225232-229659 Index: amd64/amd64/pmap.c =================================================================== --- amd64/amd64/pmap.c (.../head/sys) (revisione 233850) +++ amd64/amd64/pmap.c (.../user/attilio/vmcontention/sys) (revisione 233854) @@ -259,6 +259,7 @@ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde); static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); +static vm_page_t pmap_vmpage_splay(vm_pindex_t pindex, vm_page_t root); static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); @@ -1379,7 +1380,8 @@ while (free != NULL) { m = free; - free = m->right; + free = (void *)m->object; + m->object = NULL; /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } @@ -1398,7 +1400,7 @@ m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; - m->right = *free; + m->object = (void *)*free; *free = m; } @@ -1416,20 +1418,20 @@ PMAP_LOCK_ASSERT(pmap, MA_OWNED); root = pmap->pm_root; if (root == NULL) { - mpte->left = NULL; - mpte->right = NULL; + mpte->md.pv_left = NULL; + mpte->md.pv_right = NULL; } else { - root = vm_page_splay(mpte->pindex, root); + root = pmap_vmpage_splay(mpte->pindex, root); if (mpte->pindex < root->pindex) { - mpte->left = root->left; - mpte->right = root; - root->left = NULL; + mpte->md.pv_left = root->md.pv_left; + mpte->md.pv_right = root; + root->md.pv_left = NULL; } else if (mpte->pindex == root->pindex) panic("pmap_insert_pt_page: pindex already inserted"); else { - mpte->right = root->right; - mpte->left = root; - root->right = NULL; + mpte->md.pv_right = root->md.pv_right; + mpte->md.pv_left = root; + root->md.pv_right = NULL; } } pmap->pm_root = mpte; @@ -1448,7 +1450,7 @@ PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) { - mpte = vm_page_splay(pindex, mpte); + mpte = pmap_vmpage_splay(pindex, mpte); if ((pmap->pm_root = mpte)->pindex != pindex) mpte = NULL; } @@ -1467,18 +1469,24 @@ PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (mpte != pmap->pm_root) { - root = vm_page_splay(mpte->pindex, pmap->pm_root); + root = pmap_vmpage_splay(mpte->pindex, pmap->pm_root); KASSERT(mpte == root, ("pmap_remove_pt_page: mpte %p is missing from pmap %p", mpte, pmap)); } - if (mpte->left == NULL) - root = mpte->right; + if (mpte->md.pv_left == NULL) + root = mpte->md.pv_right; else { - root = vm_page_splay(mpte->pindex, mpte->left); - root->right = mpte->right; + root = pmap_vmpage_splay(mpte->pindex, mpte->md.pv_left); + root->md.pv_right = mpte->md.pv_right; } pmap->pm_root = root; + + /* + * Reinitialize the pv_list which could be dirty now because of the + * splay tree work. + */ + TAILQ_INIT(&mpte->md.pv_list); } /* @@ -1554,6 +1562,61 @@ } /* + * Implements Sleator and Tarjan's top-down splay algorithm. Returns + * the vm_page containing the given pindex. If, however, that + * pindex is not found in the pmap, returns a vm_page that is + * adjacent to the pindex, coming before or after it. + */ +static vm_page_t +pmap_vmpage_splay(vm_pindex_t pindex, vm_page_t root) +{ + struct vm_page dummy; + vm_page_t lefttreemax, righttreemin, y; + + if (root == NULL) + return (root); + lefttreemax = righttreemin = &dummy; + for (;; root = y) { + if (pindex < root->pindex) { + if ((y = root->md.pv_left) == NULL) + break; + if (pindex < y->pindex) { + /* Rotate right. */ + root->md.pv_left = y->md.pv_right; + y->md.pv_right = root; + root = y; + if ((y = root->md.pv_left) == NULL) + break; + } + /* Link into the new root's right tree. */ + righttreemin->md.pv_left = root; + righttreemin = root; + } else if (pindex > root->pindex) { + if ((y = root->md.pv_right) == NULL) + break; + if (pindex > y->pindex) { + /* Rotate left. */ + root->md.pv_right = y->md.pv_left; + y->md.pv_left = root; + root = y; + if ((y = root->md.pv_right) == NULL) + break; + } + /* Link into the new root's left tree. */ + lefttreemax->md.pv_right = root; + lefttreemax = root; + } else + break; + } + /* Assemble the new root. */ + lefttreemax->md.pv_right = root->md.pv_left; + righttreemin->md.pv_left = root->md.pv_right; + root->md.pv_left = dummy.md.pv_right; + root->md.pv_right = dummy.md.pv_left; + return (root); +} + +/* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ @@ -2060,7 +2123,7 @@ TAILQ_FOREACH(m, &vpq->pl, pageq) { if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy) continue; - TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); /* Avoid deadlock and lock recursion. */ @@ -2084,7 +2147,7 @@ pmap_unuse_pt(pmap, va, *pde, &free); pmap_invalidate_page(pmap, va); pmap_free_zero_pages(free); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); @@ -2226,9 +2289,9 @@ pv_entry_t pv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { - TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); break; } } @@ -2261,7 +2324,7 @@ pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); m = PHYS_TO_VM_PAGE(pa); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); /* Instantiate the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { @@ -2302,7 +2365,7 @@ pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); /* Free the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { @@ -2354,7 +2417,7 @@ mtx_assert(&vm_page_queue_mtx, MA_OWNED); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } /* @@ -2369,7 +2432,7 @@ mtx_assert(&vm_page_queue_mtx, MA_OWNED); if ((pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); return (TRUE); } else return (FALSE); @@ -2388,7 +2451,7 @@ if ((pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); return (TRUE); } else return (FALSE); @@ -2828,7 +2891,7 @@ vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, *pde, &free); pmap_invalidate_page(pmap, pv->pv_va); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } @@ -3247,7 +3310,7 @@ if (pv == NULL) pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); pa |= PG_MANAGED; } else if (pv != NULL) free_pv_entry(pmap, pv); @@ -3929,7 +3992,7 @@ ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; vm_page_lock_queues(); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; @@ -3940,7 +4003,7 @@ } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; @@ -3991,7 +4054,7 @@ pv_entry_t pv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); @@ -4115,7 +4178,7 @@ if ((tpte & PG_PS) != 0) { pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); pvh = pa_to_pvh(tpte & PG_PS_FRAME); - TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list)) @@ -4133,7 +4196,7 @@ } } else { pmap_resident_count_dec(pmap, 1); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); @@ -4207,7 +4270,7 @@ mtx_assert(&vm_page_queue_mtx, MA_OWNED); rv = FALSE; - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); @@ -4278,7 +4341,7 @@ mtx_assert(&vm_page_queue_mtx, MA_OWNED); rv = FALSE; - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); @@ -4319,7 +4382,7 @@ if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4329,7 +4392,7 @@ PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4381,7 +4444,7 @@ if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, pvn) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4415,9 +4478,9 @@ if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; do { - pvn = TAILQ_NEXT(pv, pv_list); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + pvn = TAILQ_NEXT(pv, pv_next); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4469,7 +4532,7 @@ if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4501,7 +4564,7 @@ PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4538,7 +4601,7 @@ if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4561,7 +4624,7 @@ PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); Index: . =================================================================== --- . (.../head/sys) (revisione 233850) +++ . (.../user/attilio/vmcontention/sys) (revisione 233854) Proprietà modificate su: . ___________________________________________________________________ Modified: svn:mergeinfo Fuso /head/sys:r225232-233853