Index: conf/files =================================================================== --- conf/files (revision 192031) +++ conf/files (working copy) @@ -2618,6 +2618,7 @@ vm/vm_page.c standard vm/vm_pageout.c standard vm/vm_pager.c standard vm/vm_phys.c standard +vm/vm_radix_tree.c standard vm/vm_reserv.c standard vm/vm_unix.c standard vm/vm_zeroidle.c standard Index: conf/options =================================================================== --- conf/options (revision 192031) +++ conf/options (working copy) @@ -1,4 +1,4 @@ -# $FreeBSD$ +# $FreeBSD: head/sys/conf/options 191762 2009-05-03 04:01:43Z imp $ # # On the handling of kernel options # @@ -46,6 +46,7 @@ ADW_ALLOW_MEMIO opt_adw.h TWA_DEBUG opt_twa.h TWA_FLASH_FIRMWARE opt_twa.h + # Debugging options. DDB DDB_BUFR_SIZE opt_ddb.h @@ -572,6 +573,7 @@ VM_KMEM_SIZE_SCALE opt_vm.h VM_KMEM_SIZE_MAX opt_vm.h VM_NRESERVLEVEL opt_vm.h VM_LEVEL_0_ORDER opt_vm.h +VM_RADIX opt_vm.h NO_SWAPPING opt_vm.h MALLOC_MAKE_FAILURES opt_vm.h MALLOC_PROFILE opt_vm.h Index: vm/vm_map.c =================================================================== --- vm/vm_map.c (revision 192031) +++ vm/vm_map.c (working copy) @@ -1659,13 +1659,19 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, start = 0; p_start = NULL; +#ifdef VM_RADIX + if ((p = TAILQ_FIRST(&object->memq)) != NULL) + p = vm_radix_tree_lookup_ge(&object->rtree, pindex); +#else if ((p = TAILQ_FIRST(&object->memq)) != NULL) { if (p->pindex < pindex) { + p = vm_page_splay(pindex, object->root); if ((object->root = p)->pindex < pindex) p = TAILQ_NEXT(p, listq); } } +#endif /* * Assert: the variable p is either (1) the page with the * least pindex greater than or equal to the parameter pindex Index: vm/vm_object.c =================================================================== --- vm/vm_object.c (revision 192031) +++ vm/vm_object.c (working copy) @@ -63,7 +63,7 @@ */ #include -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/sys/vm/vm_object.c 191439 2009-04-23 21:09:47Z kib $"); #include "opt_vm.h" @@ -216,7 +216,12 @@ _vm_object_allocate(objtype_t type, vm_pindex_t si TAILQ_INIT(&object->memq); LIST_INIT(&object->shadow_head); +#ifdef VM_RADIX + object->rtree.rt_height = 0; + object->rtree.rt_root = NULL; +#else object->root = NULL; +#endif object->type = type; object->size = size; object->generation = 1; @@ -250,8 +255,9 @@ vm_object_init(void) mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF); VM_OBJECT_LOCK_INIT(&kernel_object_store, "kernel object"); - _vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), - kernel_object); + _vm_object_allocate(OBJT_PHYS, + OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), + kernel_object); #if VM_NRESERVLEVEL > 0 kernel_object->flags |= OBJ_COLORED; kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); @@ -1350,9 +1356,14 @@ vm_object_split(vm_map_entry_t entry) retry: if ((m = TAILQ_FIRST(&orig_object->memq)) != NULL) { if (m->pindex < offidxstart) { +#ifdef VM_RADIX + m = vm_radix_tree_lookup_ge(&orig_object->rtree, + offidxstart); +#else m = vm_page_splay(offidxstart, orig_object->root); if ((orig_object->root = m)->pindex < offidxstart) m = TAILQ_NEXT(m, listq); +#endif } } vm_page_lock_queues(); @@ -1871,9 +1882,13 @@ vm_object_page_remove(vm_object_t object, vm_pinde again: if ((p = TAILQ_FIRST(&object->memq)) != NULL) { if (p->pindex < start) { +#ifdef VM_RADIX + p = vm_radix_tree_lookup_ge(&object->rtree, start); +#else p = vm_page_splay(start, object->root); if ((object->root = p)->pindex < start) p = TAILQ_NEXT(p, listq); +#endif } } vm_page_lock_queues(); Index: vm/vm_reserv.c =================================================================== --- vm/vm_reserv.c (revision 192031) +++ vm/vm_reserv.c (working copy) @@ -34,7 +34,7 @@ */ #include -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/sys/vm/vm_reserv.c 190912 2009-04-11 09:09:00Z alc $"); #include "opt_vm.h" @@ -311,8 +311,42 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex /* * Look for an existing reservation. */ +#ifdef VM_RADIX + mpred = vm_radix_tree_lookup_le(&object->rtree, pindex); + if (mpred != NULL) { + KASSERT(mpred->pindex != pindex, + ("vm_reserv_alloc_page: pindex already allocated")); + rv = vm_reserv_from_page(mpred); + if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) { + m = &rv->pages[VM_RESERV_INDEX(object, pindex)]; + // Handle vm_page_rename(m, new_object, ...). + if ((m->flags & (PG_CACHED | PG_FREE)) == 0) { + return (NULL); + } + vm_reserv_populate(rv); + return (m); + } + } + msucc = vm_radix_tree_lookup_ge(&object->rtree, pindex); + if (msucc != NULL) { + KASSERT(msucc->pindex != pindex, + ("vm_reserv_alloc_page: pindex already allocated")); + rv = vm_reserv_from_page(msucc); + if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) { + m = &rv->pages[VM_RESERV_INDEX(object, pindex)]; + // Handle vm_page_rename(m, new_object, ...). + if ((m->flags & (PG_CACHED | PG_FREE)) == 0) { + return (NULL); + } + vm_reserv_populate(rv); + return (m); + } + } + +#else msucc = NULL; mpred = object->root; + while (mpred != NULL) { KASSERT(mpred->pindex != pindex, ("vm_reserv_alloc_page: pindex already allocated")); @@ -349,7 +383,7 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex msucc = NULL; mpred = object->root = vm_page_splay(pindex, object->root); } - +#endif /* * Determine the first index to the left that can be used. */ Index: vm/vm_object.h =================================================================== --- vm/vm_object.h (revision 192031) +++ vm/vm_object.h (working copy) @@ -57,7 +57,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $FreeBSD$ + * $FreeBSD: head/sys/vm/vm_object.h 188348 2009-02-08 22:17:24Z alc $ */ /* @@ -67,9 +67,12 @@ #ifndef _VM_OBJECT_ #define _VM_OBJECT_ +#include "opt_vm.h" + #include #include #include +#include /* * Types defined: @@ -87,6 +90,7 @@ struct vm_object { LIST_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */ LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */ TAILQ_HEAD(, vm_page) memq; /* list of resident pages */ + struct vm_radix_tree rtree; vm_page_t root; /* root of the resident page splay tree */ vm_pindex_t size; /* Object size */ int generation; /* generation ID */ Index: vm/vm_page.c =================================================================== --- vm/vm_page.c (revision 192031) +++ vm/vm_page.c (working copy) @@ -98,7 +98,7 @@ */ #include -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/sys/vm/vm_page.c 186719 2009-01-03 13:24:08Z kib $"); #include "opt_vm.h" @@ -122,6 +122,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,7 @@ #include + /* * Associated with page of user-allocatable memory is a * page structure. @@ -138,6 +140,11 @@ struct vpgqueues vm_page_queues[PQ_COUNT]; struct mtx vm_page_queue_mtx; struct mtx vm_page_queue_free_mtx; +#ifdef VM_RADIX +extern SLIST_HEAD(, vm_radix_node) res_rnodes_head; +extern int rnode_size; +#endif + vm_page_t vm_page_array = 0; int vm_page_array_size = 0; long first_page = 0; @@ -223,6 +230,10 @@ vm_page_startup(vm_offset_t vaddr) int nblocks; vm_paddr_t last_pa; char *list; +#ifdef VM_RADIX + unsigned int rtree_res_count; + vm_pindex_t size; +#endif /* the biggest memory array is the second group of pages */ vm_paddr_t end; @@ -280,8 +291,34 @@ vm_page_startup(vm_offset_t vaddr) vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; vm_page_queues[PQ_HOLD].cnt = &cnt.v_active_count; - +#ifdef VM_RADIX /* + * Reserve memory for radix nodes. Allocate enough nodes so that + * insert on kernel_object will not result in recurrsion. + */ + rnode_size = sizeof(struct vm_radix_node) + + (sizeof(void *) * (VM_RADIX_MASK + 1)); + size = OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS); + rtree_res_count = 0; + while (size != 0) { + rtree_res_count += size / VM_RADIX_COUNT; + size /= VM_RADIX_COUNT; + } + printf("Allocated %d tree pages for %lu bytes of memory.\n", + rtree_res_count, VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS); + new_end = end - (rtree_res_count * rnode_size); + new_end = trunc_page(new_end); + mapped = pmap_map(&vaddr, new_end, end, + VM_PROT_READ | VM_PROT_WRITE); + bzero((void *)mapped, end - new_end); + end = new_end; + for (i = 0; i < rtree_res_count; i++) { + SLIST_INSERT_HEAD(&res_rnodes_head, + (struct vm_radix_node *)mapped, next); + mapped += rnode_size; + } +#endif + /* * Allocate memory for use when boot strapping the kernel memory * allocator. */ @@ -341,6 +378,9 @@ vm_page_startup(vm_offset_t vaddr) mapped = pmap_map(&vaddr, new_end, end, VM_PROT_READ | VM_PROT_WRITE); vm_page_array = (vm_page_t) mapped; + + + #if VM_NRESERVLEVEL > 0 /* * Allocate memory for the reservation management system's data @@ -646,8 +686,11 @@ vm_page_splay(vm_pindex_t pindex, vm_page_t root) void vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) { +#ifdef VM_RADIX + vm_page_t neighbor; +#else vm_page_t root; - +#endif VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if (m->object != NULL) panic("vm_page_insert: page already inserted"); @@ -658,6 +701,20 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm m->object = object; m->pindex = pindex; +#ifdef VM_RADIX + if (object->resident_page_count == 0) { + TAILQ_INSERT_TAIL(&object->memq, m, listq); + } else { + neighbor = vm_radix_tree_lookup_ge(&object->rtree, pindex); + if (neighbor != NULL) { + KASSERT(pindex != neighbor->pindex, + ("vm_page_insert: offset already allocated")); + TAILQ_INSERT_BEFORE(neighbor, m, listq); + } else + TAILQ_INSERT_TAIL(&object->memq, m, listq); + } + vm_radix_tree_insert(&object->rtree, pindex, m); +#else /* * Now link into the object's ordered list of backed pages. */ @@ -683,8 +740,9 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm } } object->root = m; +#endif object->generation++; - + /* * show that the object has one more resident page. */ @@ -719,7 +777,9 @@ void vm_page_remove(vm_page_t m) { vm_object_t object; +#ifndef VM_RADIX vm_page_t root; +#endif if ((object = m->object) == NULL) return; @@ -730,6 +790,9 @@ vm_page_remove(vm_page_t m) } mtx_assert(&vm_page_queue_mtx, MA_OWNED); +#ifdef VM_RADIX + vm_radix_tree_remove(&object->rtree, m->pindex); +#else /* * Now remove from the object's list of backed pages. */ @@ -742,6 +805,8 @@ vm_page_remove(vm_page_t m) root->right = m->right; } object->root = root; +#endif + TAILQ_REMOVE(&object->memq, m, listq); /* @@ -774,11 +839,16 @@ vm_page_lookup(vm_object_t object, vm_pindex_t pin vm_page_t m; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + +#ifdef VM_RADIX + m = vm_radix_tree_lookup(&object->rtree, pindex); +#else if ((m = object->root) != NULL && m->pindex != pindex) { m = vm_page_splay(pindex, m); if ((object->root = m)->pindex != pindex) m = NULL; } +#endif return (m); } @@ -1644,6 +1714,9 @@ vm_page_cache(vm_page_t m) */ vm_pageq_remove(m); +#ifdef VM_RADIX + vm_radix_tree_remove(&object->rtree, m->pindex); +#else /* * Remove the page from the object's collection of resident * pages. @@ -1657,6 +1730,7 @@ vm_page_cache(vm_page_t m) root->right = m->right; } object->root = root; +#endif TAILQ_REMOVE(&object->memq, m, listq); object->resident_page_count--; object->generation++; Index: vm/vm_radix_tree.c =================================================================== --- vm/vm_radix_tree.c (revision 0) +++ vm/vm_radix_tree.c (revision 0) @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2008 Mayur Shardul + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + + +/* + * Radix tree implementation. + * Number of bits per level are configurable. + * + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +SLIST_HEAD(, vm_radix_node) res_rnodes_head = + SLIST_HEAD_INITIALIZER(res_rnodes_head); +int rnode_size; + +static inline vm_rtidx_t max_index(struct vm_radix_tree *rtree); +static inline vm_rtidx_t get_slot(vm_rtidx_t index, + struct vm_radix_tree *rtree, int level); +static struct vm_radix_node *get_node(struct vm_radix_tree *rtree); +static void put_vm_radix_node(struct vm_radix_node *rnode, + struct vm_radix_tree *rtree); + +extern vm_offset_t rt_resmem_start, rt_resmem_end; + +/* + * Allocates and initializes a radix tree. + */ +struct vm_radix_tree * +vm_radix_tree_create(void) +{ + struct vm_radix_tree *rtree; + + rtree = (struct vm_radix_tree *)malloc(sizeof(struct vm_radix_tree), + M_TEMP,M_NOWAIT | M_ZERO); + if (rtree == NULL) + return NULL; + rtree->rt_height = 0; + rtree->rt_root = NULL; + + return rtree; +} + +/* + * get_node: + * + * Creates a node for given tree at given height. Appropriate number of + * slots for child nodes are created and initialized to NULL. + */ +struct vm_radix_node * +get_node(struct vm_radix_tree *rtree) +{ + struct vm_radix_node *rnode; + int children_cnt; + + if (VM_OBJECT_LOCKED(kernel_object) || VM_OBJECT_LOCKED(kmem_object)){ + if (!SLIST_EMPTY(&res_rnodes_head)) { + rnode = SLIST_FIRST(&res_rnodes_head); + SLIST_REMOVE_HEAD(&res_rnodes_head, next); + bzero((void *)rnode, rnode_size); + return (rnode); + } + panic("No memory for kernel_object. . ."); + } + /* try malloc */ + children_cnt = VM_RADIX_MASK + 1; + rnode = (struct vm_radix_node *)malloc(sizeof(struct vm_radix_node) + + sizeof(void *) * children_cnt, M_TEMP, M_NOWAIT | M_ZERO); + if (rnode == NULL) { + panic("get_node: Can not allocate memory\n"); + return NULL; + } + + return rnode; +} + +/* + * put_vm_radix_node: Free radix node + * + */ +void +put_vm_radix_node(struct vm_radix_node *rnode, struct vm_radix_tree *rtree) +{ + if (VM_OBJECT_LOCKED(kernel_object) || VM_OBJECT_LOCKED(kmem_object)) + SLIST_INSERT_HEAD(&res_rnodes_head,rnode,next); + else + free(rnode,M_TEMP); +} + +/* + * max_index: + * + * Returns the maximum possible value of the index that can be + * inserted in to the given tree with given height. + */ +static inline vm_rtidx_t +max_index(struct vm_radix_tree *rtree) +{ + + if (rtree->rt_height == 0) + return (0); + + return (VM_RADIX_MAX(rtree->rt_height)); +} + +/* + * get_slot: + * returns the position in to the child pointers array of the index in given + * tree. + */ +static inline vm_rtidx_t +get_slot(vm_rtidx_t index, struct vm_radix_tree *rtree, int level) +{ + vm_rtidx_t slot; + + slot = (index >> level * VM_RADIX_WIDTH) & VM_RADIX_MASK; + KASSERT(slot >= 0 && slot <= VM_RADIX_MASK, + ("get_slot: Wrong slot generated index - %lu level - %d", + (u_long)index, level)); + + return slot; +} + +/* + * vm_radix_tree_insert: + * + * Inserts the key-value (index,value) pair in to the radix tree. + * Returns 0 on successful insertion, + * or ENOMEM if there is insufficient memory. + * WARNING: If entry for the given index already exists the next insert + * operation will overwrite the old value. + */ +int +vm_radix_tree_insert(struct vm_radix_tree *rtree, vm_rtidx_t index, void *val) +{ + struct vm_radix_node *rnode, *child; + vm_rtidx_t slot; + int level; + + /* Handle an empty tree. */ + if (rtree->rt_root == NULL) { + rnode = get_node(rtree); + if (rnode == NULL) + return (ENOMEM); + rnode->rn_count = 0; + rtree->rt_height = 1; + rtree->rt_root = rnode; + /* + * Let the traversal code add the internal nodes in the + * correct slots depending on the value inserted. + */ + while (index > max_index(rtree)) + rtree->rt_height++; + } else { + /* + * Increase the height by adding nodes at the root until + * there is sufficient space. + */ + while (index > max_index(rtree)) { + rnode = get_node(rtree); + if (rnode == NULL) + return (ENOMEM); + rnode->rn_count = 1; + rnode->rn_children[0] = rtree->rt_root; + rtree->rt_root = rnode; + rtree->rt_height++; + } + } + + /* Now that the tree is tall enough, fill in the path to the index. */ + rnode = rtree->rt_root; + for (level = rtree->rt_height - 1; level > 0; level--) { + slot = get_slot(index, rtree, level); + /* Add the required intermidiate nodes. */ + if (rnode->rn_children[slot] == NULL) { + child = get_node(rtree); + if (child == NULL) + return (ENOMEM); + child->rn_count = 0; + rnode->rn_children[slot] = (void *)child; + rnode->rn_count++; + } + rnode = (struct vm_radix_node *)rnode->rn_children[slot]; + } + + slot = get_slot(index, rtree, level); + KASSERT(rnode->rn_children[slot] == NULL, + ("vm_radix_tree_insert: Duplicate value at index: %lu\n", + (u_long)index)); + rnode->rn_children[slot] = val; + rnode->rn_count++; + + return 0; +} + +/* + * vm_radix_tree_lookup: + * + * returns the value stored for the index, if the index is not present + * NULL value is returned. + */ +void * +vm_radix_tree_lookup(struct vm_radix_tree *rtree, vm_rtidx_t index) +{ + struct vm_radix_node *rnode; + vm_rtidx_t slot; + int level; + + if (index > max_index(rtree)) + return NULL; + level = rtree->rt_height - 1; + rnode = rtree->rt_root; + while (rnode) { + slot = get_slot(index, rtree, level); + if (level == 0) + return rnode->rn_children[slot]; + rnode = (struct vm_radix_node *)rnode->rn_children[slot]; + level--; + } + + return NULL; +} + +/* + * vm_radix_tree_lookup_ge: Will lookup index greater than or equal + * to given index + */ +void * +vm_radix_tree_lookup_ge(struct vm_radix_tree *rtree, vm_rtidx_t index) +{ + int level; + vm_rtidx_t slot; + struct vm_radix_node *tmp; + SLIST_HEAD(, vm_radix_node) rtree_path = + SLIST_HEAD_INITIALIZER(rtree_path); + + if (index > max_index(rtree)) + return NULL; + + level = rtree->rt_height - 1; + tmp = rtree->rt_root; + while (tmp) { + SLIST_INSERT_HEAD(&rtree_path, tmp, next); + slot = get_slot(index, rtree, level); + if (level == 0 && tmp->rn_children[slot] != NULL) + return tmp->rn_children[slot]; + tmp = (struct vm_radix_node *)tmp->rn_children[slot]; + while (tmp == NULL) { + /* + * Index not present, see if there is something + * greater than index. + */ + tmp = SLIST_FIRST(&rtree_path); + SLIST_REMOVE_HEAD(&rtree_path, next); + while (1) { + while (slot <= VM_RADIX_MASK + && tmp->rn_children[slot] == NULL) + slot++; + if (slot > VM_RADIX_MASK) { + if (level == rtree->rt_height - 1) + return NULL; + tmp = SLIST_FIRST(&rtree_path); + SLIST_REMOVE_HEAD(&rtree_path, next); + level++; + slot = + get_slot(index, rtree, level) + 1; + continue; + } + if (level == 0) + return tmp->rn_children[slot]; + SLIST_INSERT_HEAD(&rtree_path, tmp, next); + tmp = tmp->rn_children[slot]; + slot = 0; + level--; + } + } + level--; + } + return NULL; + +} + +/* + * vm_radix_tree_lookup_le: Will lookup index less than or equal to given index + */ +void * +vm_radix_tree_lookup_le(struct vm_radix_tree *rtree, vm_rtidx_t index) +{ + int level; + vm_rtidx_t slot; + vm_rtidx_t max; + struct vm_radix_node *tmp; + SLIST_HEAD(, vm_radix_node) rtree_path = + SLIST_HEAD_INITIALIZER(rtree_path); + + max = max_index(rtree); + if (index > max) + index = max; + + level = rtree->rt_height - 1; + tmp = rtree->rt_root; + while (tmp) { + SLIST_INSERT_HEAD(&rtree_path, tmp,next); + slot = get_slot(index, rtree, level); + if (level == 0 && tmp->rn_children[slot] != NULL) + return tmp->rn_children[slot]; + tmp = (struct vm_radix_node *)tmp->rn_children[slot]; + while (tmp == NULL) { + /* index not present, see if there is something + * less than index + */ + tmp = SLIST_FIRST(&rtree_path); + SLIST_REMOVE_HEAD(&rtree_path, next); + while (1) { + while (slot > 0 + && tmp->rn_children[slot] == NULL) { + slot--; + } + if (tmp->rn_children[slot] == NULL) + slot--; + if (slot > VM_RADIX_MASK) { + if (level == rtree->rt_height - 1) + return NULL; + tmp = SLIST_FIRST(&rtree_path); + SLIST_REMOVE_HEAD(&rtree_path, next); + level++; + slot = + get_slot(index, rtree, level) - 1; + continue; + } + if (level == 0) { + return tmp->rn_children[slot]; + } + SLIST_INSERT_HEAD(&rtree_path, tmp, next); + tmp = tmp->rn_children[slot]; + slot = VM_RADIX_MASK; + level--; + } + } + level--; + } + + return NULL; +} + +/* + * vm_radix_tree_remove: + * + * removes the specified index from the tree. If possible the height of the + * tree is adjusted after deletion. The value stored against the index is + * returned after successful deletion, if the index is not present NULL is + * returned. + */ +void * +vm_radix_tree_remove(struct vm_radix_tree *rtree, vm_rtidx_t index) +{ + struct vm_radix_node *tmp, *branch, *sub_branch; + int level, branch_level; + vm_rtidx_t slot; + void *val; + + val = NULL; + branch = NULL; + branch_level = 0; + level = rtree->rt_height - 1; + tmp = rtree->rt_root; + while (tmp) { + slot = get_slot(index, rtree, level); + /* + * The delete operation might create a branch without any + * nodes we will save the root of such branch, if there is + * any, and its level. + */ + if (branch == NULL && level != 0) { + /* + * If there is an intermidiate node with one + * child we save details of its parent node + */ + sub_branch = (struct vm_radix_node *) + tmp->rn_children[slot]; + branch_level = level; + if (sub_branch != NULL && sub_branch->rn_count == 1) + branch = tmp; + } else if (level != 0) { + /* + * If there is some decendent with more than one + * child then reset the branch to NULL + */ + sub_branch = (struct vm_radix_node *) + tmp->rn_children[slot]; + if (sub_branch != NULL && sub_branch->rn_count > 1) + branch = NULL; + } + if (level != 0) { + tmp = (struct vm_radix_node *)tmp->rn_children[slot]; + level--; + continue; + } + val = tmp->rn_children[slot]; + if (val == NULL) + break; + tmp->rn_children[slot] = NULL; + tmp->rn_count--; + break; + } + KASSERT(val != NULL, + ("vm_radix_tree_remove: index %lu not present in the tree.\n", + (u_long)index)); + + /* Cut the branch before we return. */ + if (val != NULL && branch != NULL) { + slot = get_slot(index, rtree, branch_level); + tmp = branch->rn_children[slot]; + branch->rn_children[slot] = NULL; + branch->rn_count--; + branch = tmp; + branch_level--; + while (branch != NULL) { + slot = get_slot(index, rtree, branch_level); + tmp = branch->rn_children[slot]; + put_vm_radix_node(branch, rtree); + branch = tmp; + branch_level--; + } + } + + return (val); +} + +/* + * vm_radix_tree_shrink: if possible reduces the height of the tree. + * If there are no keys stored the root is freed. + */ +void +vm_radix_tree_shrink(struct vm_radix_tree *rtree) +{ + struct vm_radix_node *tmp; + + if (rtree->rt_root == NULL) + return; + + /* Adjust the height of the tree. */ + while (rtree->rt_root->rn_count == 1 && + rtree->rt_root->rn_children[0] != NULL) { + tmp = rtree->rt_root; + rtree->rt_root = tmp->rn_children[0]; + rtree->rt_height--; + put_vm_radix_node(tmp,rtree); + } + /* Finally see if we have an empty tree. */ + if (rtree->rt_root->rn_count == 0) { + put_vm_radix_node(rtree->rt_root,rtree); + rtree->rt_root = NULL; + } +} Index: vm/vm_radix_tree.h =================================================================== --- vm/vm_radix_tree.h (revision 0) +++ vm/vm_radix_tree.h (revision 0) @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2008 Mayur Shardul + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _VM_RADIX_TREE_H_ +#define _VM_RADIX_TREE_H_ + +#include + +typedef vm_pindex_t vm_rtidx_t; + +#define VM_RTIDX_LEN (sizeof(vm_rtidx_t) * NBBY) + +/* Default values of the tree parameters */ +#define VM_RADIX_WIDTH 8 +#define VM_RADIX_COUNT (VM_RADIX_WIDTH << 1) + +/* creates a mask for n LSBs*/ +#define VM_RADIX_MASK (~(vm_rtidx_t)0 >> (VM_RTIDX_LEN - VM_RADIX_WIDTH)) + +/* Calculates maximum value for a tree of height h. */ +#define VM_RADIX_MAX(h) \ + (~(vm_rtidx_t)0 >> (VM_RTIDX_LEN - (VM_RADIX_WIDTH * (h)))) + +struct vm_radix_node { + SLIST_ENTRY(vm_radix_node) next; + uint16_t rn_count; /* Valid children. */ + void *rn_children[]; /* Pointers to child nodes. */ +}; + +struct vm_radix_tree { + struct vm_radix_node *rt_root; /* Root node. */ + int rt_height; /* Number of levels + 1. */ +}; + +struct vm_radix_tree *vm_radix_tree_create(void); +int vm_radix_tree_insert(struct vm_radix_tree *, vm_rtidx_t, void *); +void *vm_radix_tree_remove(struct vm_radix_tree *, vm_rtidx_t); +void *vm_radix_tree_lookup(struct vm_radix_tree *, vm_rtidx_t); +void *vm_radix_tree_lookup_ge(struct vm_radix_tree *, vm_rtidx_t); +void *vm_radix_tree_lookup_le(struct vm_radix_tree *, vm_rtidx_t); +void vm_radix_tree_shrink(struct vm_radix_tree *); + +#endif /* !_VM_RADIX_TREE_H_ */ Index: i386/include/vmparam.h =================================================================== --- i386/include/vmparam.h (revision 192031) +++ i386/include/vmparam.h (working copy) @@ -32,7 +32,7 @@ * SUCH DAMAGE. * * from: @(#)vmparam.h 5.9 (Berkeley) 5/12/91 - * $FreeBSD$ + * $FreeBSD: head/sys/i386/include/vmparam.h 190705 2009-04-04 23:12:14Z alc $ */ @@ -126,6 +126,14 @@ #endif /* + * Use radix tree for resident page management + */ +#ifndef VM_RADIX +#define VM_RADIX 1 +#endif + + +/* * Level 0 reservations consist of 512 pages under PAE and 1024 pages * otherwise. */ Index: amd64/conf/GENERIC =================================================================== --- amd64/conf/GENERIC (revision 192031) +++ amd64/conf/GENERIC (working copy) @@ -73,6 +73,8 @@ options AUDIT # Security event auditing #options KDTRACE_FRAME # Ensure frames are compiled in #options KDTRACE_HOOKS # Kernel DTrace hooks +options VM_RADIX + # Debugging for use in -current options KDB # Enable kernel debugger support. options DDB # Support DDB.