Index: kern/kern_proc.c =================================================================== --- kern/kern_proc.c (revision 212213) +++ kern/kern_proc.c (working copy) @@ -1520,8 +1520,9 @@ map = &p->p_vmspace->vm_map; /* XXXRW: More locking required? */ vm_map_lock_read(map); - for (entry = map->header.next; entry != &map->header; - entry = entry->next) { + for (entry = RB_MIN(maptree, &map->root); + entry != NULL; + entry = vm_map_next(entry)) { vm_object_t obj, tobj, lobj; vm_offset_t addr; int vfslocked; @@ -1698,8 +1699,9 @@ map = &vm->vm_map; /* XXXRW: More locking required? */ vm_map_lock_read(map); - for (entry = map->header.next; entry != &map->header; - entry = entry->next) { + for (entry = RB_MIN(maptree, &map->root); + entry != NULL; + entry = vm_map_next(entry)) { vm_object_t obj, tobj, lobj; vm_offset_t addr; int vfslocked; Index: kern/imgact_elf.c =================================================================== --- kern/imgact_elf.c (revision 212213) +++ kern/imgact_elf.c (working copy) @@ -1217,8 +1217,9 @@ boolean_t ignore_entry; vm_map_lock_read(map); - for (entry = map->header.next; entry != &map->header; - entry = entry->next) { + for (entry = RB_MIN(maptree, &map->root); + entry != NULL; + entry = vm_map_next(entry)) { /* * Don't dump inaccessible mappings, deal with legacy * coredump mode. Index: kern/uipc_shm.c =================================================================== --- kern/uipc_shm.c (revision 212213) +++ kern/uipc_shm.c (working copy) @@ -306,7 +306,7 @@ vm_page_clear_dirty(m, base, PAGE_SIZE - base); } else if ((length & PAGE_MASK) && - __predict_false(object->cache != NULL)) { + __predict_false(!RB_EMPTY(&object->cache))) { vm_page_cache_free(object, OFF_TO_IDX(length), nobjsize); } Index: kern/sys_process.c =================================================================== --- kern/sys_process.c (revision 212213) +++ kern/sys_process.c (working copy) @@ -389,22 +389,22 @@ vm_map_lock_read(map); do { - entry = map->header.next; + entry = RB_MIN(maptree, &map->root); index = 0; - while (index < pve->pve_entry && entry != &map->header) { - entry = entry->next; + while (index < pve->pve_entry && entry != NULL) { + entry = vm_map_next(entry); index++; } if (index != pve->pve_entry) { error = EINVAL; break; } - while (entry != &map->header && + while (entry != NULL && (entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) { - entry = entry->next; + entry = vm_map_next(entry); index++; } - if (entry == &map->header) { + if (entry == NULL) { error = ENOENT; break; } Index: fs/tmpfs/tmpfs_vnops.c =================================================================== --- fs/tmpfs/tmpfs_vnops.c (revision 212213) +++ fs/tmpfs/tmpfs_vnops.c (working copy) @@ -513,7 +513,7 @@ tlen = MIN(PAGE_SIZE - offset, len); if ((vobj == NULL) || - (vobj->resident_page_count == 0 && vobj->cache == NULL)) + (vobj->resident_page_count == 0 && RB_EMPTY(&vobj->cache))) goto nocache; VM_OBJECT_LOCK(vobj); @@ -638,7 +638,7 @@ tlen = MIN(PAGE_SIZE - offset, len); if ((vobj == NULL) || - (vobj->resident_page_count == 0 && vobj->cache == NULL)) { + (vobj->resident_page_count == 0 && RB_EMPTY(&vobj->cache))) { vpg = NULL; goto nocache; } @@ -662,7 +662,7 @@ VM_OBJECT_UNLOCK(vobj); error = uiomove_fromphys(&vpg, offset, tlen, uio); } else { - if (__predict_false(vobj->cache != NULL)) + if (__predict_false(!RB_EMPTY(&vobj->cache))) vm_page_cache_free(vobj, idx, idx + 1); VM_OBJECT_UNLOCK(vobj); vpg = NULL; Index: fs/procfs/procfs_map.c =================================================================== --- fs/procfs/procfs_map.c (revision 212213) +++ fs/procfs/procfs_map.c (working copy) @@ -112,8 +112,9 @@ return (ESRCH); map = &vm->vm_map; vm_map_lock_read(map); - for (entry = map->header.next; entry != &map->header; - entry = entry->next) { + for (entry = RB_MIN(maptree, &map->root); + entry != NULL; + entry = vm_map_next(entry)) { vm_object_t obj, tobj, lobj; int ref_count, shadow_count, flags; vm_offset_t e_start, e_end, addr; Index: dev/amd/amd.c =================================================================== --- dev/amd/amd.c (revision 212213) +++ dev/amd/amd.c (working copy) @@ -60,9 +60,6 @@ #include #include -#include -#include - #include #include #include Index: dev/atkbdc/atkbd.c =================================================================== --- dev/atkbdc/atkbd.c (revision 212213) +++ dev/atkbdc/atkbd.c (working copy) @@ -50,10 +50,6 @@ #include #include -#include -#include -#include - #include #endif /* __i386__ || __amd64__ */ Index: dev/hwpmc/hwpmc_mod.c =================================================================== --- dev/hwpmc/hwpmc_mod.c (revision 212213) +++ dev/hwpmc/hwpmc_mod.c (working copy) @@ -1648,7 +1648,9 @@ map = &vm->vm_map; vm_map_lock_read(map); - for (entry = map->header.next; entry != &map->header; entry = entry->next) { + for (entry = RB_MIN(maptree, &map->root); + entry != NULL; + entry = vm_map_next(entry)) { if (entry == NULL) { PMCDBG(LOG,OPS,2, "hwpmc: vm_map entry unexpectedly " Index: dev/advansys/advlib.c =================================================================== --- dev/advansys/advlib.c (revision 212213) +++ dev/advansys/advlib.c (working copy) @@ -63,10 +63,6 @@ #include #include -#include -#include -#include - #include #include Index: dev/advansys/advansys.c =================================================================== --- dev/advansys/advansys.c (revision 212213) +++ dev/advansys/advansys.c (working copy) @@ -70,10 +70,6 @@ #include #include -#include -#include -#include - #include static void adv_action(struct cam_sim *sim, union ccb *ccb); Index: dev/adb/adb_mouse.c =================================================================== --- dev/adb/adb_mouse.c (revision 212213) +++ dev/adb/adb_mouse.c (working copy) @@ -42,9 +42,6 @@ #include -#include -#include - #include "adb.h" #define CDEV_GET_SOFTC(x) (x)->si_drv1 Index: dev/adb/adb_bus.c =================================================================== --- dev/adb/adb_bus.c (revision 212213) +++ dev/adb/adb_bus.c (working copy) @@ -35,9 +35,6 @@ #include -#include -#include - #include "adb.h" #include "adbvar.h" Index: dev/adb/adb_kbd.c =================================================================== --- dev/adb/adb_kbd.c (revision 212213) +++ dev/adb/adb_kbd.c (working copy) @@ -42,9 +42,6 @@ #include #include -#include -#include - #include "adb.h" #define KBD_DRIVER_NAME "akbd" Index: vm/vm_pageout.c =================================================================== --- vm/vm_pageout.c (revision 212213) +++ vm/vm_pageout.c (working copy) @@ -569,7 +569,7 @@ /* * Scan the object's entire memory queue. */ - TAILQ_FOREACH(p, &object->memq, listq) { + RB_FOREACH(p, pgtree, &object->root) { if (pmap_resident_count(pmap) <= desired) goto unlock_return; if ((p->oflags & VPO_BUSY) != 0 || p->busy != 0) @@ -653,8 +653,8 @@ * first, search out the biggest object, and try to free pages from * that. */ - tmpe = map->header.next; - while (tmpe != &map->header) { + tmpe = RB_MIN(maptree, &map->root); + while (tmpe != NULL) { if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { obj = tmpe->object.vm_object; if (obj != NULL && VM_OBJECT_TRYLOCK(obj)) { @@ -670,7 +670,7 @@ } if (tmpe->wired_count > 0) nothingwired = FALSE; - tmpe = tmpe->next; + tmpe = vm_map_next(tmpe); } if (bigobj != NULL) { @@ -681,8 +681,8 @@ * Next, hunt around for other pages to deactivate. We actually * do this search sort of wrong -- .text first is not the best idea. */ - tmpe = map->header.next; - while (tmpe != &map->header) { + tmpe = RB_MIN(maptree, &map->root); + while (tmpe != NULL) { if (pmap_resident_count(vm_map_pmap(map)) <= desired) break; if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { @@ -693,7 +693,7 @@ VM_OBJECT_UNLOCK(obj); } } - tmpe = tmpe->next; + tmpe = vm_map_next(tmpe); } /* Index: vm/vm_meter.c =================================================================== --- vm/vm_meter.c (revision 212213) +++ vm/vm_meter.c (working copy) @@ -178,8 +178,9 @@ continue; map = &vm->vm_map; vm_map_lock_read(map); - for (entry = map->header.next; - entry != &map->header; entry = entry->next) { + for (entry = RB_MIN(maptree, &map->root); + entry != NULL; + entry = vm_map_next(entry)) { if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) || (object = entry->object.vm_object) == NULL) continue; Index: vm/vm_map.c =================================================================== --- vm/vm_map.c (revision 212213) +++ vm/vm_map.c (working copy) @@ -457,26 +457,20 @@ void _vm_map_unlock(vm_map_t map, const char *file, int line) { - vm_map_entry_t free_entry, entry; + vm_map_entry_t entry; vm_object_t object; - free_entry = map->deferred_freelist; - map->deferred_freelist = NULL; - if (map->system_map) _mtx_unlock_flags(&map->system_mtx, 0, file, line); else _sx_xunlock(&map->lock, file, line); - while (free_entry != NULL) { - entry = free_entry; - free_entry = free_entry->next; - + while ((entry = SLIST_FIRST(&map->deferred_freelist)) != NULL) { if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { object = entry->object.vm_object; vm_object_deallocate(object); } - + SLIST_REMOVE_HEAD(&map->deferred_freelist, mapl); vm_map_entry_dispose(map, entry); } } @@ -690,16 +684,15 @@ _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) { - map->header.next = map->header.prev = &map->header; map->needs_wakeup = FALSE; map->system_map = 0; map->pmap = pmap; map->min_offset = min; map->max_offset = max; map->flags = 0; - map->root = NULL; map->timestamp = 0; - map->deferred_freelist = NULL; + RB_INIT(&map->root); + SLIST_INIT(&map->deferred_freelist); } void @@ -764,11 +757,6 @@ vm_map_entry_set_max_free(vm_map_entry_t entry) { - entry->max_free = entry->adj_free; - if (entry->left != NULL && entry->left->max_free > entry->max_free) - entry->max_free = entry->left->max_free; - if (entry->right != NULL && entry->right->max_free > entry->max_free) - entry->max_free = entry->right->max_free; } /* @@ -788,100 +776,52 @@ * * Returns: the new root. */ -static vm_map_entry_t -vm_map_entry_splay(vm_offset_t addr, vm_map_entry_t root) + +/* + * a = in the tree + * b = external to compare + * + * a.start = 10 + * a.end = 20 + * b.start = 20 + * b.end = 30 + * a.start = 30 + * a.end = 40 + */ +int +vm_map_startend_cmp(const vm_map_entry_t a, const vm_map_entry_t b) { - vm_map_entry_t llist, rlist; - vm_map_entry_t ltree, rtree; - vm_map_entry_t y; - /* Special case of empty tree. */ - if (root == NULL) - return (root); + if (a->end - 1 < b->start) + return -1; /* a smaller b */ + if (a->start > b->end - 1) + return 1; /* a larger b */ + return 0; /* match or overlap */ +} - /* - * Pass One: Splay down the tree until we find addr or a NULL - * pointer where addr would go. llist and rlist are the two - * sides in reverse order (bottom-up), with llist linked by - * the right pointer and rlist linked by the left pointer in - * the vm_map_entry. Wait until Pass Two to set max_free on - * the two spines. - */ - llist = NULL; - rlist = NULL; - for (;;) { - /* root is never NULL in here. */ - if (addr < root->start) { - y = root->left; - if (y == NULL) - break; - if (addr < y->start && y->left != NULL) { - /* Rotate right and put y on rlist. */ - root->left = y->right; - y->right = root; - vm_map_entry_set_max_free(root); - root = y->left; - y->left = rlist; - rlist = y; - } else { - /* Put root on rlist. */ - root->left = rlist; - rlist = root; - root = y; - } - } else if (addr >= root->end) { - y = root->right; - if (y == NULL) - break; - if (addr >= y->end && y->right != NULL) { - /* Rotate left and put y on llist. */ - root->right = y->left; - y->left = root; - vm_map_entry_set_max_free(root); - root = y->right; - y->right = llist; - llist = y; - } else { - /* Put root on llist. */ - root->right = llist; - llist = root; - root = y; - } - } else - break; - } +RB_GENERATE(maptree, vm_map_entry, mapt, vm_map_startend_cmp); - /* - * Pass Two: Walk back up the two spines, flip the pointers - * and set max_free. The subtrees of the root go at the - * bottom of llist and rlist. - */ - ltree = root->left; - while (llist != NULL) { - y = llist->right; - llist->right = ltree; - vm_map_entry_set_max_free(llist); - ltree = llist; - llist = y; - } - rtree = root->right; - while (rlist != NULL) { - y = rlist->left; - rlist->left = rtree; - vm_map_entry_set_max_free(rlist); - rtree = rlist; - rlist = y; - } +#ifdef INVARIANTS +static int +vm_map_check(vm_map_t map) +{ + vm_map_entry_t e, en; - /* - * Final assembly: add ltree and rtree as subtrees of root. - */ - root->left = ltree; - root->right = rtree; - vm_map_entry_set_max_free(root); - - return (root); + RB_FOREACH_SAFE(e, maptree, &map->root, en) { + if (e->start >= e->end) + goto bad; + if (en != NULL && en->start < e->end) + goto bad; + } + return 1; +bad: + printf("%u-%u considered bad", e->start, e->end); + if (en != NULL) + printf("with %u-%u", en->start, en->end); + printf("\n"); + return 0; } +#endif /* * vm_map_entry_{un,}link: @@ -890,62 +830,49 @@ */ static void vm_map_entry_link(vm_map_t map, - vm_map_entry_t after_where, vm_map_entry_t entry) { + vm_map_entry_t e; - CTR4(KTR_VM, - "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map, - map->nentries, entry, after_where); + CTR3(KTR_VM, + "vm_map_entry_link: map %p, nentries %d, entry %p", map, + map->nentries, entry); VM_MAP_ASSERT_LOCKED(map); - map->nentries++; - entry->prev = after_where; - entry->next = after_where->next; - entry->next->prev = entry; - after_where->next = entry; - if (after_where != &map->header) { - if (after_where != map->root) - vm_map_entry_splay(after_where->start, map->root); - entry->right = after_where->right; - entry->left = after_where; - after_where->right = NULL; - after_where->adj_free = entry->start - after_where->end; - vm_map_entry_set_max_free(after_where); - } else { - entry->right = map->root; - entry->left = NULL; + KASSERT(vm_map_check(map), + ("%s: map tree inconsistent before", __func__)); + + KASSERT(entry->start < entry->end, + ("%s: start %u >= end %u", __func__, entry->start, entry->end)); + + if ((e = RB_INSERT(maptree, &map->root, entry)) != NULL) { + printf("%u-%u collides with %u-%u\n", entry->start, entry->end, e->start, e->end); + panic("%s: entry already exists", __func__); } - entry->adj_free = (entry->next == &map->header ? map->max_offset : - entry->next->start) - entry->end; + map->nentries++; + + KASSERT(vm_map_check(map), + ("%s: map tree inconsistent after", __func__)); + vm_map_entry_set_max_free(entry); - map->root = entry; } static void vm_map_entry_unlink(vm_map_t map, vm_map_entry_t entry) { - vm_map_entry_t next, prev, root; VM_MAP_ASSERT_LOCKED(map); - if (entry != map->root) - vm_map_entry_splay(entry->start, map->root); - if (entry->left == NULL) - root = entry->right; - else { - root = vm_map_entry_splay(entry->start, entry->left); - root->right = entry->right; - root->adj_free = (entry->next == &map->header ? map->max_offset : - entry->next->start) - root->end; - vm_map_entry_set_max_free(root); - } - map->root = root; - prev = entry->prev; - next = entry->next; - next->prev = prev; - prev->next = next; + KASSERT(vm_map_check(map), + ("%s: map tree inconsistent before", __func__)); + + if (RB_REMOVE(maptree, &map->root, entry) == NULL) + panic("%s: entry no in tree", __func__); + + KASSERT(vm_map_check(map), + ("%s: map tree inconsistent after", __func__)); + map->nentries--; CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map, map->nentries, entry); @@ -965,16 +892,6 @@ vm_map_entry_resize_free(vm_map_t map, vm_map_entry_t entry) { - /* - * Using splay trees without parent pointers, propagating - * max_free up the tree is done by moving the entry to the - * root and making the change there. - */ - if (entry != map->root) - map->root = vm_map_entry_splay(entry->start, map->root); - - entry->adj_free = (entry->next == &map->header ? map->max_offset : - entry->next->start) - entry->end; vm_map_entry_set_max_free(entry); } @@ -989,71 +906,45 @@ * actually contained in the map. */ boolean_t -vm_map_lookup_entry( - vm_map_t map, - vm_offset_t address, +vm_map_lookup_entry(vm_map_t map, vm_offset_t address, vm_map_entry_t *entry) /* OUT */ { +#if 1 + vm_map_entry_t e, cur = NULL; + + RB_FOREACH(e, maptree, &map->root) { + if (address >= e->start && address < e->end) { + *entry = e; + return (TRUE); + } + if (address < e->start) + break; + cur = e; + } + + *entry = cur; + return (FALSE); +#else vm_map_entry_t cur; - boolean_t locked; + struct vm_map_entry curt; - /* - * If the map is empty, then the map entry immediately preceding - * "address" is the map's header. - */ - cur = map->root; + curt.start = address; + curt.end = address; + cur = RB_NFIND(maptree, &map->root, &curt); if (cur == NULL) - *entry = &map->header; - else if (address >= cur->start && cur->end > address) { - *entry = cur; + cur = RB_MAX(maptree, &map->root); /* must be smaller */ + else if (address < cur->start) + cur = vm_map_prev(cur); /* larger, get prev */ + else { + *entry = cur; /* match */ return (TRUE); - } else if ((locked = vm_map_locked(map)) || - sx_try_upgrade(&map->lock)) { - /* - * Splay requires a write lock on the map. However, it only - * restructures the binary search tree; it does not otherwise - * change the map. Thus, the map's timestamp need not change - * on a temporary upgrade. - */ - map->root = cur = vm_map_entry_splay(address, cur); - if (!locked) - sx_downgrade(&map->lock); - - /* - * If "address" is contained within a map entry, the new root - * is that map entry. Otherwise, the new root is a map entry - * immediately before or after "address". - */ - if (address >= cur->start) { - *entry = cur; - if (cur->end > address) - return (TRUE); - } else - *entry = cur->prev; - } else - /* - * Since the map is only locked for read access, perform a - * standard binary search tree lookup for "address". - */ - for (;;) { - if (address < cur->start) { - if (cur->left == NULL) { - *entry = cur->prev; - break; - } - cur = cur->left; - } else if (cur->end > address) { - *entry = cur; - return (TRUE); - } else { - if (cur->right == NULL) { - *entry = cur; - break; - } - cur = cur->right; - } - } + } + if (cur != NULL) + *entry = cur; + else + *entry = NULL; return (FALSE); +#endif } /* @@ -1082,12 +973,14 @@ VM_MAP_ASSERT_LOCKED(map); + //printf("%s: %u-%u ;1\n", __func__, start, end); /* * Check that the start and end points are not bogus. */ if ((start < map->min_offset) || (end > map->max_offset) || (start >= end)) return (KERN_INVALID_ADDRESS); + //printf("%s: %u-%u ;2\n", __func__, start, end); /* * Find the entry prior to the proposed starting address; if it's part @@ -1101,9 +994,9 @@ /* * Assert that the next entry doesn't overlap the end point. */ - if ((prev_entry->next != &map->header) && - (prev_entry->next->start < end)) - return (KERN_NO_SPACE); + if (prev_entry != NULL && vm_map_next(prev_entry) != NULL && + vm_map_next(prev_entry)->start < end) + return (KERN_NO_SPACE); protoeflags = 0; charge_prev_obj = FALSE; @@ -1127,15 +1020,19 @@ ((object == kmem_object || object == kernel_object) && !(protoeflags & MAP_ENTRY_NEEDS_COPY)), ("kmem or kernel object and cow")); + if (cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT)) goto charged; + if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) && ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) { if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start)) return (KERN_RESOURCE_SHORTAGE); + KASSERT(object == NULL || (protoeflags & MAP_ENTRY_NEEDS_COPY) || object->uip == NULL, ("OVERCOMMIT: vm_map_insert o %p", object)); + uip = curthread->td_ucred->cr_ruidinfo; uihold(uip); if (object == NULL && !(protoeflags & MAP_ENTRY_NEEDS_COPY)) @@ -1156,7 +1053,7 @@ vm_object_clear_flag(object, OBJ_ONEMAPPING); VM_OBJECT_UNLOCK(object); } - else if ((prev_entry != &map->header) && + else if ((prev_entry != NULL) && (prev_entry->eflags == protoeflags) && (prev_entry->end == start) && (prev_entry->wired_count == 0) && @@ -1230,10 +1127,11 @@ ("OVERCOMMIT: vm_map_insert leaks vm_map %p", new_entry)); new_entry->uip = uip; + //printf("%s: %u-%u ;3\n", __func__, start, end); /* * Insert the new entry into the list */ - vm_map_entry_link(map, prev_entry, new_entry); + vm_map_entry_link(map, new_entry); map->size += new_entry->end - new_entry->start; #if 0 @@ -1263,12 +1161,6 @@ * Find the first fit (lowest VM address) for "length" free bytes * beginning at address >= start in the given map. * - * In a vm_map_entry, "adj_free" is the amount of free space - * adjacent (higher address) to this entry, and "max_free" is the - * maximum amount of contiguous free space in its subtree. This - * allows finding a free region in one path down the tree, so - * O(log n) amortized with splay trees. - * * The map must be locked, and leaves it so. * * Returns: 0 on success, and starting address in *addr, @@ -1278,65 +1170,63 @@ vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length, vm_offset_t *addr) /* OUT */ { - vm_map_entry_t entry; - vm_offset_t end, st; + vm_map_entry_t e, en; + vm_offset_t end; + //printf("%s: entering\n", __func__); + /* * Request must fit within min/max VM address and must avoid * address wrap. */ - if (start < map->min_offset) + if (start < map->min_offset) { start = map->min_offset; - if (start + length > map->max_offset || start + length < start) + printf("%s: set start = map->min_offset\n", __func__); + } + if (start + length > map->max_offset || start + length < start) { + printf("%s: start + length > map->maxoffset\n", __func__); return (1); + } /* Empty tree means wide open address space. */ - if (map->root == NULL) { + if (RB_EMPTY(&map->root)) { *addr = start; + printf("%s: empty tree\n", __func__); goto found; } - - /* - * After splay, if start comes before root node, then there - * must be a gap from start to the root. - */ - map->root = vm_map_entry_splay(start, map->root); - if (start + length <= map->root->start) { +#if 0 + if ((e = RB_MIN(maptree, &map->root)) != NULL && e->start <= start + length) { *addr = start; + //printf("%s: space before first element\n", __func__); goto found; } - +#endif /* - * Root is the last node that might begin its gap before - * start, and this is the last comparison where address - * wrap might be a problem. + * Search the tree for free space that is large enough. */ - st = (start > map->root->end) ? start : map->root->end; - if (length <= map->root->end + map->root->adj_free - st) { - *addr = st; - goto found; - } + RB_FOREACH_SAFE(e, maptree, &map->root, en) { + if (start > e->end) + continue; - /* With max_free, can immediately tell if no solution. */ - entry = map->root->right; - if (entry == NULL || length > entry->max_free) - return (1); + /* No next element. */ + if (en == NULL) { + if (e->end + length <= map->max_offset) { + *addr = e->end; + //printf("%s: space after last entry\n", __func__); + goto found; + } + break; + } - /* - * Search the right subtree in the order: left subtree, root, - * right subtree (first fit). The previous splay implies that - * all regions in the right subtree have addresses > start. - */ - while (entry != NULL) { - if (entry->left != NULL && entry->left->max_free >= length) - entry = entry->left; - else if (entry->adj_free >= length) { - *addr = entry->end; + if (e->end + length <= en->start) { + *addr = e->end; + //printf("%s: space found between entries\n", __func__); goto found; - } else - entry = entry->right; + } } + return (1); + /* Can't get here, so panic if we do. */ panic("vm_map_findspace: max_free corrupt"); @@ -1344,8 +1234,10 @@ /* Expand the kernel pmap, if necessary. */ if (map == kernel_map) { end = round_page(*addr + length); - if (end > kernel_vm_end) + if (end > kernel_vm_end) { + printf("%s: extending kernel_map\n", __func__); pmap_growkernel(end); + } } return (0); } @@ -1438,12 +1330,13 @@ { vm_map_entry_t next, prev; vm_size_t prevsize, esize; + int merged = 0; if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP)) return; - prev = entry->prev; - if (prev != &map->header) { + prev = vm_map_prev(entry); + if (prev != NULL) { prevsize = prev->end - prev->start; if ( (prev->end == entry->start) && (prev->object.vm_object == entry->object.vm_object) && @@ -1455,12 +1348,10 @@ (prev->inheritance == entry->inheritance) && (prev->wired_count == entry->wired_count) && (prev->uip == entry->uip)) { + vm_map_entry_unlink(map, entry); vm_map_entry_unlink(map, prev); entry->start = prev->start; entry->offset = prev->offset; - if (entry->prev != &map->header) - vm_map_entry_resize_free(map, entry->prev); - /* * If the backing object is a vnode object, * vm_object_deallocate() calls vrele(). @@ -1475,11 +1366,12 @@ if (prev->uip != NULL) uifree(prev->uip); vm_map_entry_dispose(map, prev); + merged = 1; } } - next = entry->next; - if (next != &map->header) { + next = vm_map_next(entry); + if (next != NULL) { esize = entry->end - entry->start; if ((entry->end == next->start) && (next->object.vm_object == entry->object.vm_object) && @@ -1491,9 +1383,9 @@ (next->inheritance == entry->inheritance) && (next->wired_count == entry->wired_count) && (next->uip == entry->uip)) { + vm_map_entry_unlink(map, entry); vm_map_entry_unlink(map, next); entry->end = next->end; - vm_map_entry_resize_free(map, entry); /* * See comment above. @@ -1503,8 +1395,12 @@ if (next->uip != NULL) uifree(next->uip); vm_map_entry_dispose(map, next); + merged = 1; } } + + if (merged) + vm_map_entry_link(map, entry); } /* * vm_map_clip_start: [ internal use only ] @@ -1570,14 +1466,21 @@ new_entry = vm_map_entry_create(map); *new_entry = *entry; - new_entry->end = start; + vm_map_entry_unlink(map, entry); + entry->offset += (start - entry->start); entry->start = start; + new_entry->end = start; if (new_entry->uip != NULL) uihold(entry->uip); - vm_map_entry_link(map, entry->prev, new_entry); + KASSERT(entry->start < entry->end, + ("%s: entry %u-%u, new_entry %u-%u", __func__, entry->start, entry->end, + new_entry->start, new_entry->end)); + vm_map_entry_link(map, entry); + vm_map_entry_link(map, new_entry); + if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { vm_object_reference(new_entry->object.vm_object); } @@ -1643,12 +1546,15 @@ new_entry = vm_map_entry_create(map); *new_entry = *entry; + vm_map_entry_unlink(map, entry); + new_entry->start = entry->end = end; new_entry->offset += (end - entry->start); if (new_entry->uip != NULL) uihold(entry->uip); - vm_map_entry_link(map, entry, new_entry); + vm_map_entry_link(map, entry); + vm_map_entry_link(map, new_entry); if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { vm_object_reference(new_entry->object.vm_object); @@ -1690,7 +1596,7 @@ if (vm_map_lookup_entry(map, start, &entry)) { vm_map_clip_start(map, entry, start); } else - entry = entry->next; + entry = vm_map_next(entry); vm_map_clip_end(map, entry, end); @@ -1750,15 +1656,14 @@ start = 0; p_start = NULL; - p = vm_page_find_least(object, pindex); /* * Assert: the variable p is either (1) the page with the * least pindex greater than or equal to the parameter pindex * or (2) NULL. */ - for (; + for (p = vm_page_find_least(object, pindex); p != NULL && (tmpidx = p->pindex - pindex) < psize; - p = TAILQ_NEXT(p, listq)) { + p = RB_NEXT(pgtree, &object->root, p)) { /* * don't allow an madvise to blow away our really * free pages allocating pv entries. @@ -1810,14 +1715,14 @@ if (vm_map_lookup_entry(map, start, &entry)) { vm_map_clip_start(map, entry, start); } else { - entry = entry->next; + entry = vm_map_next(entry); } /* * Make a first pass to check for protection violations. */ current = entry; - while ((current != &map->header) && (current->start < end)) { + while (current != NULL && current->start < end) { if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { vm_map_unlock(map); return (KERN_INVALID_ARGUMENT); @@ -1826,7 +1731,7 @@ vm_map_unlock(map); return (KERN_PROTECTION_FAILURE); } - current = current->next; + current = vm_map_next(current); } @@ -1835,8 +1740,9 @@ * now will do cow due to allowed write (e.g. debugger sets * breakpoint on text segment) */ - for (current = entry; (current != &map->header) && - (current->start < end); current = current->next) { + for (current = entry; + current != NULL && current->start < end; + current = vm_map_next(current)) { vm_map_clip_end(map, current, end); @@ -1889,7 +1795,7 @@ * necessary the second time.] */ current = entry; - while ((current != &map->header) && (current->start < end)) { + while (current != NULL && current->start < end) { old_prot = current->protection; if (set_max) @@ -1919,7 +1825,7 @@ #undef MASK } vm_map_simplify_entry(map, current); - current = current->next; + current = vm_map_next(current); } vm_map_unlock(map); return (KERN_SUCCESS); @@ -1978,7 +1884,7 @@ if (modify_map) vm_map_clip_start(map, entry, start); } else { - entry = entry->next; + entry = vm_map_next(entry); } if (modify_map) { @@ -1989,9 +1895,8 @@ * limited to the specified address range. */ for (current = entry; - (current != &map->header) && (current->start < end); - current = current->next - ) { + current != NULL && current->start < end; + current = vm_map_next(current)) { if (current->eflags & MAP_ENTRY_IS_SUB_MAP) continue; @@ -2037,9 +1942,8 @@ * the vm_object pindex and count. */ for (current = entry; - (current != &map->header) && (current->start < end); - current = current->next - ) { + current != NULL && current->start < end; + current = vm_map_next(current)) { vm_offset_t useStart; if (current->eflags & MAP_ENTRY_IS_SUB_MAP) @@ -2108,12 +2012,13 @@ entry = temp_entry; vm_map_clip_start(map, entry, start); } else - entry = temp_entry->next; - while ((entry != &map->header) && (entry->start < end)) { + entry = vm_map_next(temp_entry); + + while (entry != NULL && entry->start < end) { vm_map_clip_end(map, entry, end); entry->inheritance = new_inheritance; vm_map_simplify_entry(map, entry); - entry = entry->next; + entry = vm_map_next(entry); } vm_map_unlock(map); return (KERN_SUCCESS); @@ -2139,7 +2044,7 @@ VM_MAP_RANGE_CHECK(map, start, end); if (!vm_map_lookup_entry(map, start, &first_entry)) { if (flags & VM_MAP_WIRE_HOLESOK) - first_entry = first_entry->next; + first_entry = vm_map_next(first_entry); else { vm_map_unlock(map); return (KERN_INVALID_ADDRESS); @@ -2147,7 +2052,7 @@ } last_timestamp = map->timestamp; entry = first_entry; - while (entry != &map->header && entry->start < end) { + while (entry != NULL && entry->start < end) { if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { /* * We have not yet clipped the entry. @@ -2171,7 +2076,7 @@ if (!vm_map_lookup_entry(map, saved_start, &tmp_entry)) { if (flags & VM_MAP_WIRE_HOLESOK) - tmp_entry = tmp_entry->next; + tmp_entry = vm_map_next(tmp_entry); else { if (saved_start == start) { /* @@ -2206,8 +2111,8 @@ * If VM_MAP_WIRE_HOLESOK was specified, skip this check. */ if (((flags & VM_MAP_WIRE_HOLESOK) == 0) && - (entry->end < end && (entry->next == &map->header || - entry->next->start > entry->end))) { + (entry->end < end && (vm_map_next(entry) == NULL || + vm_map_next(entry)->start > entry->end))) { end = entry->end; rv = KERN_INVALID_ADDRESS; goto done; @@ -2221,7 +2126,7 @@ rv = KERN_INVALID_ARGUMENT; goto done; } - entry = entry->next; + entry = vm_map_next(entry); } rv = KERN_SUCCESS; done: @@ -2229,12 +2134,12 @@ if (first_entry == NULL) { result = vm_map_lookup_entry(map, start, &first_entry); if (!result && (flags & VM_MAP_WIRE_HOLESOK)) - first_entry = first_entry->next; + first_entry = vm_map_next(first_entry); else KASSERT(result, ("vm_map_unwire: lookup failed")); } entry = first_entry; - while (entry != &map->header && entry->start < end) { + while (entry != NULL && entry->start < end) { if (rv == KERN_SUCCESS && (!user_unwire || (entry->eflags & MAP_ENTRY_USER_WIRED))) { if (user_unwire) @@ -2258,7 +2163,7 @@ need_wakeup = TRUE; } vm_map_simplify_entry(map, entry); - entry = entry->next; + entry = vm_map_next(entry); } vm_map_unlock(map); if (need_wakeup) @@ -2286,7 +2191,7 @@ VM_MAP_RANGE_CHECK(map, start, end); if (!vm_map_lookup_entry(map, start, &first_entry)) { if (flags & VM_MAP_WIRE_HOLESOK) - first_entry = first_entry->next; + first_entry = vm_map_next(first_entry); else { vm_map_unlock(map); return (KERN_INVALID_ADDRESS); @@ -2294,7 +2199,7 @@ } last_timestamp = map->timestamp; entry = first_entry; - while (entry != &map->header && entry->start < end) { + while (entry != NULL && entry->start < end) { if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { /* * We have not yet clipped the entry. @@ -2318,7 +2223,7 @@ if (!vm_map_lookup_entry(map, saved_start, &tmp_entry)) { if (flags & VM_MAP_WIRE_HOLESOK) - tmp_entry = tmp_entry->next; + tmp_entry = vm_map_next(tmp_entry); else { if (saved_start == start) { /* @@ -2341,6 +2246,10 @@ last_timestamp = map->timestamp; continue; } + + //printf("%s: entry %u-%u, func %u-%u\n", __func__, + // entry->start, entry->end, start, end); + vm_map_clip_start(map, entry, start); vm_map_clip_end(map, entry, end); /* @@ -2397,7 +2306,7 @@ ("vm_map_wire: bad count")); entry->wired_count = -1; } - entry = entry->next; + entry = vm_map_next(entry); } } last_timestamp = map->timestamp; @@ -2422,13 +2331,13 @@ */ next_entry: if (((flags & VM_MAP_WIRE_HOLESOK) == 0) && - (entry->end < end && (entry->next == &map->header || - entry->next->start > entry->end))) { + (entry->end < end && (vm_map_next(entry) == NULL || + vm_map_next(entry)->start > entry->end))) { end = entry->end; rv = KERN_INVALID_ADDRESS; goto done; } - entry = entry->next; + entry = vm_map_next(entry); } rv = KERN_SUCCESS; done: @@ -2436,12 +2345,12 @@ if (first_entry == NULL) { result = vm_map_lookup_entry(map, start, &first_entry); if (!result && (flags & VM_MAP_WIRE_HOLESOK)) - first_entry = first_entry->next; + first_entry = vm_map_next(first_entry); else KASSERT(result, ("vm_map_wire: lookup failed")); } entry = first_entry; - while (entry != &map->header && entry->start < end) { + while (entry != NULL && entry->start < end) { if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0) goto next_entry_done; if (rv == KERN_SUCCESS) { @@ -2476,7 +2385,7 @@ need_wakeup = TRUE; } vm_map_simplify_entry(map, entry); - entry = entry->next; + entry = vm_map_next(entry); } vm_map_unlock(map); if (need_wakeup) @@ -2527,15 +2436,16 @@ /* * Make a first pass to check for user-wired memory and holes. */ - for (current = entry; current != &map->header && current->start < end; - current = current->next) { + for (current = entry; + current != NULL && current->start < end; + current = vm_map_next(current)) { if (invalidate && (current->eflags & MAP_ENTRY_USER_WIRED)) { vm_map_unlock_read(map); return (KERN_INVALID_ARGUMENT); } if (end > current->end && - (current->next == &map->header || - current->end != current->next->start)) { + (vm_map_next(current) == NULL || + current->end != vm_map_next(current)->start)) { vm_map_unlock_read(map); return (KERN_INVALID_ADDRESS); } @@ -2548,7 +2458,9 @@ * Make a second pass, cleaning/uncaching pages from the indicated * objects as we go. */ - for (current = entry; current != &map->header && current->start < end;) { + for (current = entry; + current != NULL && current->start < end; + ) { offset = current->offset + (start - current->start); size = (end <= current->end ? end : current->end) - start; if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { @@ -2577,7 +2489,7 @@ vm_map_lock_read(map); if (last_timestamp == map->timestamp || !vm_map_lookup_entry(map, start, ¤t)) - current = current->next; + current = vm_map_next(current); } vm_map_unlock_read(map); @@ -2675,9 +2587,12 @@ /* * Find the start of the region, and clip it */ - if (!vm_map_lookup_entry(map, start, &first_entry)) - entry = first_entry->next; - else { + if (!vm_map_lookup_entry(map, start, &first_entry)) { + if (first_entry != NULL) + entry = vm_map_next(first_entry); + else + entry = NULL; + } else { entry = first_entry; vm_map_clip_start(map, entry, start); } @@ -2685,7 +2600,7 @@ /* * Step through all entries in this region */ - while ((entry != &map->header) && (entry->start < end)) { + while ((entry != NULL) && (entry->start < end)) { vm_map_entry_t next; /* @@ -2714,7 +2629,7 @@ */ if (!vm_map_lookup_entry(map, saved_start, &tmp_entry)) - entry = tmp_entry->next; + entry = vm_map_next(tmp_entry); else { entry = tmp_entry; vm_map_clip_start(map, entry, @@ -2725,8 +2640,7 @@ } vm_map_clip_end(map, entry, end); - next = entry->next; - + next = vm_map_next(entry); /* * Unwire before removing addresses from the pmap; otherwise, * unwiring will put the entries back in the pmap. @@ -2744,8 +2658,7 @@ * will be set in the wrong object!) */ vm_map_entry_delete(map, entry); - entry->next = map->deferred_freelist; - map->deferred_freelist = entry; + SLIST_INSERT_HEAD(&map->deferred_freelist, entry, mapl); entry = next; } return (KERN_SUCCESS); @@ -2795,7 +2708,7 @@ entry = tmp_entry; while (start < end) { - if (entry == &map->header) + if (entry == NULL) return (FALSE); /* * No holes allowed! @@ -2809,7 +2722,7 @@ return (FALSE); /* go to next entry */ start = entry->end; - entry = entry->next; + entry = vm_map_next(entry); } return (TRUE); } @@ -2981,9 +2894,9 @@ KASSERT(locked, ("vmspace_fork: lock failed")); new_map->timestamp = 1; - old_entry = old_map->header.next; + old_entry = RB_MIN(maptree, &old_map->root); - while (old_entry != &old_map->header) { + while (old_entry != NULL) { if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) panic("vm_map_fork: encountered a submap"); @@ -3054,8 +2967,7 @@ * Insert the entry into the new map -- we know we're * inserting at the end of the new map. */ - vm_map_entry_link(new_map, new_map->header.prev, - new_entry); + vm_map_entry_link(new_map, new_entry); vmspace_map_entry_forked(vm1, vm2, new_entry); /* @@ -3078,14 +2990,13 @@ new_entry->wired_count = 0; new_entry->object.vm_object = NULL; new_entry->uip = NULL; - vm_map_entry_link(new_map, new_map->header.prev, - new_entry); + vm_map_entry_link(new_map, new_entry); vmspace_map_entry_forked(vm1, vm2, new_entry); vm_map_copy_entry(old_map, new_map, old_entry, new_entry, fork_charge); break; } - old_entry = old_entry->next; + old_entry = vm_map_next(old_entry); } unlock_and_return: vm_map_unlock(old_map); @@ -3149,8 +3060,8 @@ * Hopefully we will at least detect this condition when we try to * grow the stack. */ - if ((prev_entry->next != &map->header) && - (prev_entry->next->start < addrbos + max_ssize)) { + if (vm_map_next(prev_entry) != NULL && + vm_map_next(prev_entry)->start < addrbos + max_ssize) { vm_map_unlock(map); return (KERN_NO_SPACE); } @@ -3176,9 +3087,9 @@ /* Now set the avail_ssize amount. */ if (rv == KERN_SUCCESS) { - if (prev_entry != &map->header) + if (prev_entry != NULL) vm_map_clip_end(map, prev_entry, bot); - new_entry = prev_entry->next; + new_entry = vm_map_next(prev_entry); if (new_entry->end != top || new_entry->start != bot) panic("Bad entry start/end for new stack entry"); @@ -3226,7 +3137,7 @@ return (KERN_SUCCESS); } - next_entry = prev_entry->next; + next_entry = vm_map_next(prev_entry); if (!(prev_entry->eflags & MAP_ENTRY_GROWS_UP)) { /* * This entry does not grow upwards. Since the address lies @@ -3262,14 +3173,14 @@ if (stack_entry == next_entry) { KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_DOWN, ("foo")); KASSERT(addr < stack_entry->start, ("foo")); - end = (prev_entry != &map->header) ? prev_entry->end : + end = (prev_entry != NULL) ? prev_entry->end : stack_entry->start - stack_entry->avail_ssize; grow_amount = roundup(stack_entry->start - addr, PAGE_SIZE); max_grow = stack_entry->start - end; } else { KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_UP, ("foo")); KASSERT(addr >= stack_entry->end, ("foo")); - end = (next_entry != &map->header) ? next_entry->start : + end = (next_entry != NULL) ? next_entry->start : stack_entry->end + stack_entry->avail_ssize; grow_amount = roundup(addr + 1 - stack_entry->end, PAGE_SIZE); max_grow = end - stack_entry->end; @@ -3348,10 +3259,10 @@ /* Adjust the available stack space by the amount we grew. */ if (rv == KERN_SUCCESS) { - if (prev_entry != &map->header) + if (prev_entry != NULL) vm_map_clip_end(map, prev_entry, addr); - new_entry = prev_entry->next; - KASSERT(new_entry == stack_entry->prev, ("foo")); + new_entry = vm_map_next(prev_entry); + KASSERT(new_entry == vm_map_prev(stack_entry), ("foo")); KASSERT(new_entry->end == stack_entry->start, ("foo")); KASSERT(new_entry->start == addr, ("foo")); grow_amount = new_entry->end - new_entry->start; @@ -3394,7 +3305,7 @@ vm_map_entry_resize_free(map, stack_entry); rv = KERN_SUCCESS; - if (next_entry != &map->header) + if (next_entry != NULL) vm_map_clip_start(map, next_entry, addr); } else rv = KERN_FAILURE; @@ -3794,8 +3705,9 @@ return; db_indent += 2; - for (entry = map->header.next; entry != &map->header; - entry = entry->next) { + for (entry = RB_MIN(maptree, &map->root); + entry != NULL; + entry = vm_map_next(entry)) { db_iprintf("map entry %p: start=%p, end=%p\n", (void *)entry, (void *)entry->start, (void *)entry->end); nlines++; @@ -3815,8 +3727,8 @@ (void *)entry->object.sub_map, (uintmax_t)entry->offset); nlines++; - if ((entry->prev == &map->header) || - (entry->prev->object.sub_map != + if ((vm_map_prev(entry) == NULL) || + (vm_map_prev(entry)->object.sub_map != entry->object.sub_map)) { db_indent += 2; vm_map_print((db_expr_t)(intptr_t) @@ -3840,8 +3752,8 @@ db_printf("\n"); nlines++; - if ((entry->prev == &map->header) || - (entry->prev->object.vm_object != + if ((vm_map_prev(entry) == NULL) || + (vm_map_prev(entry)->object.vm_object != entry->object.vm_object)) { db_indent += 2; vm_object_print((db_expr_t)(intptr_t) Index: vm/vm_map.h =================================================================== --- vm/vm_map.h (revision 212213) +++ vm/vm_map.h (working copy) @@ -69,6 +69,7 @@ #include #include #include +#include /* * Types defined: @@ -97,14 +98,12 @@ * Also included is control information for virtual copy operations. */ struct vm_map_entry { - struct vm_map_entry *prev; /* previous entry */ - struct vm_map_entry *next; /* next entry */ - struct vm_map_entry *left; /* left child in binary search tree */ - struct vm_map_entry *right; /* right child in binary search tree */ + RB_ENTRY(vm_map_entry) mapt; /* binary tree */ + SLIST_ENTRY(vm_map_entry) mapl; /* linked list for deferred free */ vm_offset_t start; /* start address */ vm_offset_t end; /* end address */ vm_offset_t avail_ssize; /* amt can grow if this is a stack */ - vm_size_t adj_free; /* amount of adjacent free space */ + vm_size_t adj_free; /* amount of adjacent free space (up) */ vm_size_t max_free; /* max free space in subtree */ union vm_map_object object; /* object I point to */ vm_ooffset_t offset; /* offset into object */ @@ -174,7 +173,7 @@ * (c) const until freed */ struct vm_map { - struct vm_map_entry header; /* List of entries */ + RB_HEAD(maptree, vm_map_entry) root; /* Root of binaey search tree */ struct sx lock; /* Lock for map data */ struct mtx system_mtx; int nentries; /* Number of entries */ @@ -183,11 +182,10 @@ u_char needs_wakeup; u_char system_map; /* (c) Am I a system map? */ vm_flags_t flags; /* flags for this vm_map */ - vm_map_entry_t root; /* Root of a binary search tree */ pmap_t pmap; /* (c) Physical map */ - vm_map_entry_t deferred_freelist; -#define min_offset header.start /* (c) */ -#define max_offset header.end /* (c) */ + SLIST_HEAD(, vm_map_entry) deferred_freelist; + vm_size_t min_offset; /* (c) */ + vm_size_t max_offset; /* (c) */ }; /* @@ -277,6 +275,12 @@ int vm_map_unlock_and_wait(vm_map_t map, int timo); void vm_map_wakeup(vm_map_t map); +int vm_map_startend_cmp(const vm_map_entry_t, const vm_map_entry_t); +RB_PROTOTYPE(maptree, vm_map_entry, mapt, vm_map_startend_cmp); + +#define vm_map_next(entry) RB_NEXT(maptree, NULL, (entry)) +#define vm_map_prev(entry) RB_PREV(maptree, NULL, (entry)) + #define vm_map_lock(map) _vm_map_lock(map, LOCK_FILE, LOCK_LINE) #define vm_map_unlock(map) _vm_map_unlock(map, LOCK_FILE, LOCK_LINE) #define vm_map_lock_read(map) _vm_map_lock_read(map, LOCK_FILE, LOCK_LINE) Index: vm/swap_pager.c =================================================================== --- vm/swap_pager.c (revision 212213) +++ vm/swap_pager.c (working copy) @@ -2428,7 +2428,9 @@ vm_map_entry_t cur; int count = 0; - for (cur = map->header.next; cur != &map->header; cur = cur->next) { + for (cur = RB_MIN(maptree, &map->root); + cur != NULL; + cur = vm_map_next(cur)) { vm_object_t object; if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 && Index: vm/vm_mmap.c =================================================================== --- vm/vm_mmap.c (revision 212213) +++ vm/vm_mmap.c (working copy) @@ -581,8 +581,8 @@ */ if (vm_map_lookup_entry(map, addr, &entry)) { for (; - entry != &map->header && entry->start < addr + size; - entry = entry->next) { + entry != NULL && entry->start < addr + size; + entry = vm_map_next(entry)) { if (vm_map_check_protection(map, entry->start, entry->end, VM_PROT_EXECUTE) == TRUE) { pkm.pm_address = (uintptr_t) addr; @@ -813,15 +813,15 @@ */ lastvecindex = -1; for (current = entry; - (current != &map->header) && (current->start < end); - current = current->next) { + current != NULL && current->start < end; + current = vm_map_next(current)) { /* * check for contiguity */ if (current->end < end && - (entry->next == &map->header || - current->next->start > current->end)) { + (vm_map_next(entry) == NULL || + vm_map_next(entry)->start > current->end)) { vm_map_unlock_read(map); return (ENOMEM); } Index: vm/vm_object.c =================================================================== --- vm/vm_object.c (revision 212213) +++ vm/vm_object.c (working copy) @@ -160,7 +160,7 @@ vm_object_t object; object = (vm_object_t)mem; - KASSERT(TAILQ_EMPTY(&object->memq), + KASSERT(RB_EMPTY(&object->root), ("object %p has resident pages", object)); #if VM_NRESERVLEVEL > 0 @@ -168,7 +168,7 @@ ("object %p has reservations", object)); #endif - KASSERT(object->cache == NULL, + KASSERT(RB_EMPTY(&object->cache), ("object %p has cached pages", object)); KASSERT(object->paging_in_progress == 0, @@ -203,10 +203,10 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) { - TAILQ_INIT(&object->memq); + RB_INIT(&object->root); + RB_INIT(&object->cache); LIST_INIT(&object->shadow_head); - object->root = NULL; object->type = type; object->size = size; object->generation = 1; @@ -224,7 +224,6 @@ #if VM_NRESERVLEVEL > 0 LIST_INIT(&object->rvq); #endif - object->cache = NULL; mtx_lock(&vm_object_list_mtx); TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); @@ -301,7 +300,7 @@ case OBJT_SG: case OBJT_SWAP: case OBJT_VNODE: - if (!TAILQ_EMPTY(&object->memq)) + if (!RB_EMPTY(&object->root)) return (KERN_FAILURE); break; case OBJT_DEAD: @@ -662,7 +661,7 @@ void vm_object_terminate(vm_object_t object) { - vm_page_t p; + vm_page_t p, pn; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); @@ -706,7 +705,7 @@ * removes them from paging queues. Don't free wired pages, just * remove them from the object. */ - while ((p = TAILQ_FIRST(&object->memq)) != NULL) { + RB_FOREACH_SAFE(p, pgtree, &object->root, pn) { KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, ("vm_object_terminate: freeing busy page %p " "p->busy = %d, p->oflags %x\n", p, p->busy, p->oflags)); @@ -723,7 +722,7 @@ if (__predict_false(!LIST_EMPTY(&object->rvq))) vm_reserv_break_all(object); #endif - if (__predict_false(object->cache != NULL)) + if (__predict_false(!RB_EMPTY(&object->cache))) vm_page_cache_free(object, 0, 0); /* @@ -782,7 +781,8 @@ */ clearobjflags = 1; for (p = vm_page_find_least(object, start); - p != NULL && p->pindex < tend; p = TAILQ_NEXT(p, listq)) { + p != NULL && p->pindex < tend; + p = RB_NEXT(pgtree, &object->root, p)) { if ((flags & OBJPC_NOSYNC) != 0 && (p->oflags & VPO_NOSYNC) != 0) clearobjflags = 0; @@ -796,11 +796,13 @@ rescan: curgeneration = object->generation; - for (p = vm_page_find_least(object, start); p != NULL; p = np) { + for (p = vm_page_find_least(object, start); + p != NULL; + p = np) { pi = p->pindex; if (pi >= tend) break; - np = TAILQ_NEXT(p, listq); + np = RB_NEXT(pgtree, &object->root, p); if (p->valid == 0) continue; while (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) { @@ -1277,10 +1279,10 @@ orig_object->charge -= ptoa(size); } retry: - m = vm_page_find_least(orig_object, offidxstart); - for (; m != NULL && (idx = m->pindex - offidxstart) < size; - m = m_next) { - m_next = TAILQ_NEXT(m, listq); + for (m = vm_page_find_least(orig_object, offidxstart); + m != NULL && (idx = m->pindex - offidxstart) < size; + m = m_next) { + m_next = RB_NEXT(pgtree, &orig_object->root, m); /* * We must wait for pending I/O to complete before we can @@ -1312,12 +1314,12 @@ /* * Transfer any cached pages from orig_object to new_object. */ - if (__predict_false(orig_object->cache != NULL)) + if (__predict_false(!RB_EMPTY(&orig_object->cache))) vm_page_cache_transfer(orig_object, offidxstart, new_object); } VM_OBJECT_UNLOCK(orig_object); - TAILQ_FOREACH(m, &new_object->memq, listq) + RB_FOREACH(m, pgtree, &new_object->root) vm_page_wakeup(m); VM_OBJECT_UNLOCK(new_object); entry->object.vm_object = new_object; @@ -1334,7 +1336,7 @@ vm_object_backing_scan(vm_object_t object, int op) { int r = 1; - vm_page_t p; + vm_page_t p, pn, pp; vm_object_t backing_object; vm_pindex_t backing_offset_index; @@ -1368,14 +1370,10 @@ /* * Our scan */ - p = TAILQ_FIRST(&backing_object->memq); - while (p) { - vm_page_t next = TAILQ_NEXT(p, listq); + RB_FOREACH_SAFE(p, pgtree, &backing_object->root, pn) { vm_pindex_t new_pindex = p->pindex - backing_offset_index; if (op & OBSC_TEST_ALL_SHADOWED) { - vm_page_t pp; - /* * Ignore pages outside the parent object's range * and outside the parent object's mapping of the @@ -1384,13 +1382,9 @@ * note that we do not busy the backing object's * page. */ - if ( - p->pindex < backing_offset_index || - new_pindex >= object->size - ) { - p = next; + if (p->pindex < backing_offset_index || + new_pindex >= object->size) continue; - } /* * See if the parent has the page or if the parent's @@ -1401,12 +1395,9 @@ * If this fails, the parent does not completely shadow * the object and we might as well give up now. */ - pp = vm_page_lookup(object, new_pindex); - if ( - (pp == NULL || pp->valid == 0) && - !vm_pager_has_page(object, new_pindex, NULL, NULL) - ) { + if ((pp == NULL || pp->valid == 0) && + !vm_pager_has_page(object, new_pindex, NULL, NULL)) { r = 0; break; } @@ -1416,15 +1407,10 @@ * Check for busy page */ if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) { - vm_page_t pp; - if (op & OBSC_COLLAPSE_NOWAIT) { if ((p->oflags & VPO_BUSY) || - !p->valid || - p->busy) { - p = next; + !p->valid || p->busy) continue; - } } else if (op & OBSC_COLLAPSE_WAIT) { if ((p->oflags & VPO_BUSY) || p->busy) { VM_OBJECT_UNLOCK(object); @@ -1440,31 +1426,23 @@ * should not have changed so we * just restart our scan. */ - p = TAILQ_FIRST(&backing_object->memq); + pn = RB_MIN(pgtree, &backing_object->root); continue; } } - KASSERT( - p->object == backing_object, - ("vm_object_backing_scan: object mismatch") - ); + KASSERT(p->object == backing_object, + ("vm_object_backing_scan: object mismatch")); /* * Destroy any associated swap */ - if (backing_object->type == OBJT_SWAP) { - swap_pager_freespace( - backing_object, - p->pindex, - 1 - ); - } + if (backing_object->type == OBJT_SWAP) + swap_pager_freespace(backing_object, + p->pindex, 1); - if ( - p->pindex < backing_offset_index || - new_pindex >= object->size - ) { + if (p->pindex < backing_offset_index || + new_pindex >= object->size) { /* * Page is out of the parent object's range, we * can simply destroy it. @@ -1477,15 +1455,12 @@ else vm_page_remove(p); vm_page_unlock(p); - p = next; continue; } pp = vm_page_lookup(object, new_pindex); - if ( - pp != NULL || - vm_pager_has_page(object, new_pindex, NULL, NULL) - ) { + if (pp != NULL || + vm_pager_has_page(object, new_pindex, NULL, NULL)) { /* * page already exists in parent OR swap exists * for this location in the parent. Destroy @@ -1501,7 +1476,6 @@ else vm_page_remove(p); vm_page_unlock(p); - p = next; continue; } @@ -1525,7 +1499,6 @@ vm_page_unlock(p); /* page automatically made dirty by rename */ } - p = next; } return (r); } @@ -1640,7 +1613,7 @@ /* * Free any cached pages from backing_object. */ - if (__predict_false(backing_object->cache != NULL)) + if (__predict_false(!RB_EMPTY(&backing_object->cache))) vm_page_cache_free(backing_object, 0, 0); } /* @@ -1781,17 +1754,15 @@ vm_object_pip_add(object, 1); again: - p = vm_page_find_least(object, start); - /* * Assert: the variable p is either (1) the page with the * least pindex greater than or equal to the parameter pindex * or (2) NULL. */ - for (; + for (p = vm_page_find_least(object, start); p != NULL && (p->pindex < end || end == 0); p = next) { - next = TAILQ_NEXT(p, listq); + next = RB_NEXT(pgtree, &object->root, p); /* * If the page is wired for any reason besides the @@ -1836,7 +1807,7 @@ } vm_object_pip_wakeup(object); skipmemq: - if (__predict_false(object->cache != NULL)) + if (__predict_false(!RB_EMPTY(&object->cache))) vm_page_cache_free(object, start, end); } @@ -1883,7 +1854,7 @@ m = vm_page_lookup(object, start); while (m != NULL && m->pindex < pindex) { vm_page_wakeup(m); - m = TAILQ_NEXT(m, listq); + m = RB_NEXT(pgtree, &object->root, m); } } return (pindex == end); @@ -2036,23 +2007,23 @@ return 0; if (entry == 0) { - tmpe = map->header.next; + tmpe = RB_MIN(maptree, &map->root); entcount = map->nentries; - while (entcount-- && (tmpe != &map->header)) { + while (entcount-- && (tmpe != NULL)) { if (_vm_object_in_map(map, object, tmpe)) { return 1; } - tmpe = tmpe->next; + tmpe = vm_map_next(tmpe); } } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { tmpm = entry->object.sub_map; - tmpe = tmpm->header.next; + tmpe = RB_MIN(maptree, &tmpm->root); entcount = tmpm->nentries; - while (entcount-- && tmpe != &tmpm->header) { + while (entcount-- && tmpe != NULL) { if (_vm_object_in_map(tmpm, object, tmpe)) { return 1; } - tmpe = tmpe->next; + tmpe = vm_map_next(tmpe); } } else if ((obj = entry->object.vm_object) != NULL) { for (; obj; obj = obj->backing_object) @@ -2150,7 +2121,7 @@ db_indent += 2; count = 0; - TAILQ_FOREACH(p, &object->memq, listq) { + RB_FOREACH(p, pgtree, &object->root) { if (count == 0) db_iprintf("memory:="); else if (count == 6) { @@ -2204,10 +2175,10 @@ rcount = 0; fidx = 0; pa = -1; - TAILQ_FOREACH(m, &object->memq, listq) { + RB_FOREACH(m, pgtree, &object->root) { if (m->pindex > 128) break; - if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL && + if ((prev_m = RB_PREV(pgtree, &object->root, m)) != NULL && prev_m->pindex + 1 != m->pindex) { if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", Index: vm/vm_reserv.c =================================================================== --- vm/vm_reserv.c (revision 212213) +++ vm/vm_reserv.c (working copy) @@ -311,11 +311,15 @@ /* * Look for an existing reservation. */ - msucc = NULL; - mpred = object->root; - while (mpred != NULL) { - KASSERT(mpred->pindex != pindex, - ("vm_reserv_alloc_page: pindex already allocated")); + msucc = vm_page_find_least(object, pindex); + KASSERT(msucc == NULL || msucc->pindex != pindex, + ("vm_reserv_alloc_page: pindex already allocated")); + if (msucc != NULL) + mpred = RB_PREV(pgtree, &object->root, msucc); + else + mpred = RB_MAX(pgtree, &object->root); + + if (mpred != NULL) { rv = vm_reserv_from_page(mpred); if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) { m = &rv->pages[VM_RESERV_INDEX(object, pindex)]; @@ -324,31 +328,19 @@ return (NULL); vm_reserv_populate(rv); return (m); - } else if (mpred->pindex < pindex) { - if (msucc != NULL || - (msucc = TAILQ_NEXT(mpred, listq)) == NULL) - break; - KASSERT(msucc->pindex != pindex, - ("vm_reserv_alloc_page: pindex already allocated")); - rv = vm_reserv_from_page(msucc); - if (rv->object == object && - vm_reserv_has_pindex(rv, pindex)) { - m = &rv->pages[VM_RESERV_INDEX(object, pindex)]; - /* Handle vm_page_rename(m, new_object, ...). */ - if ((m->flags & (PG_CACHED | PG_FREE)) == 0) - return (NULL); - vm_reserv_populate(rv); - return (m); - } else if (pindex < msucc->pindex) - break; - } else if (msucc == NULL) { - msucc = mpred; - mpred = TAILQ_PREV(msucc, pglist, listq); - continue; } - msucc = NULL; - mpred = object->root = vm_page_splay(pindex, object->root); } + if (msucc != NULL) { + rv = vm_reserv_from_page(msucc); + if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) { + m = &rv->pages[VM_RESERV_INDEX(object, pindex)]; + /* Handle vm_page_rename(m, new_object, ...). */ + if ((m->flags & (PG_CACHED | PG_FREE)) == 0) + return (NULL); + vm_reserv_populate(rv); + return (m); + } + } /* * Determine the first index to the left that can be used. Index: vm/vm_object.h =================================================================== --- vm/vm_object.h (revision 212213) +++ vm/vm_object.h (working copy) @@ -68,6 +68,7 @@ #define _VM_OBJECT_ #include +#include #include #include @@ -86,8 +87,7 @@ TAILQ_ENTRY(vm_object) object_list; /* list of all objects */ LIST_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */ LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */ - TAILQ_HEAD(, vm_page) memq; /* list of resident pages */ - vm_page_t root; /* root of the resident page splay tree */ + struct pgtree root; /* root of the resident page tree */ vm_pindex_t size; /* Object size */ int generation; /* generation ID */ int ref_count; /* How many refs?? */ @@ -102,7 +102,7 @@ vm_ooffset_t backing_object_offset;/* Offset in backing object */ TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */ LIST_HEAD(, vm_reserv) rvq; /* list of reservations */ - vm_page_t cache; /* root of the cache page splay tree */ + struct pgtree cache; /* root of the cache page tree */ void *handle; union { /* Index: vm/vm_fault.c =================================================================== --- vm/vm_fault.c (revision 212213) +++ vm/vm_fault.c (working copy) @@ -1338,7 +1338,7 @@ startpindex = pindex - rbehind; } - if ((rtm = TAILQ_PREV(m, pglist, listq)) != NULL && + if ((rtm = RB_PREV(pgtree, &object->root, m)) != NULL && rtm->pindex >= startpindex) startpindex = rtm->pindex + 1; @@ -1378,7 +1378,7 @@ * scan forward for the read ahead pages */ endpindex = tpindex + rahead; - if ((rtm = TAILQ_NEXT(m, listq)) != NULL && rtm->pindex < endpindex) + if ((rtm = RB_NEXT(pgtree, &object->root, m)) != NULL && rtm->pindex < endpindex) endpindex = rtm->pindex; if (endpindex > object->size) endpindex = object->size; Index: vm/vm_page.c =================================================================== --- vm/vm_page.c (revision 212213) +++ vm/vm_page.c (working copy) @@ -677,55 +677,21 @@ * pindex is not found in the vm_object, returns a vm_page that is * adjacent to the pindex, coming before or after it. */ -vm_page_t -vm_page_splay(vm_pindex_t pindex, vm_page_t root) +int +vm_page_pindex_cmp(const vm_page_t a, const vm_page_t b) { - struct vm_page dummy; - vm_page_t lefttreemax, righttreemin, y; - if (root == NULL) - return (root); - lefttreemax = righttreemin = &dummy; - for (;; root = y) { - if (pindex < root->pindex) { - if ((y = root->left) == NULL) - break; - if (pindex < y->pindex) { - /* Rotate right. */ - root->left = y->right; - y->right = root; - root = y; - if ((y = root->left) == NULL) - break; - } - /* Link into the new root's right tree. */ - righttreemin->left = root; - righttreemin = root; - } else if (pindex > root->pindex) { - if ((y = root->right) == NULL) - break; - if (pindex > y->pindex) { - /* Rotate left. */ - root->right = y->left; - y->left = root; - root = y; - if ((y = root->right) == NULL) - break; - } - /* Link into the new root's left tree. */ - lefttreemax->right = root; - lefttreemax = root; - } else - break; - } - /* Assemble the new root. */ - lefttreemax->right = root->left; - righttreemin->left = root->right; - root->left = dummy.right; - root->right = dummy.left; - return (root); + if (a->pindex < b->pindex) + return -1; + else if (a->pindex > b->pindex) + return 1; + else + return 0; + /* return ((int)(a - b)); */ } +RB_GENERATE(pgtree, vm_page, paget, vm_page_pindex_cmp); + /* * vm_page_insert: [ internal use only ] * @@ -742,7 +708,6 @@ void vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) { - vm_page_t root; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if (m->object != NULL) @@ -757,28 +722,8 @@ /* * Now link into the object's ordered list of backed pages. */ - root = object->root; - if (root == NULL) { - m->left = NULL; - m->right = NULL; - TAILQ_INSERT_TAIL(&object->memq, m, listq); - } else { - root = vm_page_splay(pindex, root); - if (pindex < root->pindex) { - m->left = root->left; - m->right = root; - root->left = NULL; - TAILQ_INSERT_BEFORE(root, m, listq); - } else if (pindex == root->pindex) - panic("vm_page_insert: offset already allocated"); - else { - m->right = root->right; - m->left = root; - root->right = NULL; - TAILQ_INSERT_AFTER(&object->memq, root, m, listq); - } - } - object->root = m; + if (RB_INSERT(pgtree, &object->root, m) != NULL) + panic("vm_page_insert: offset already allocated"); object->generation++; /* @@ -815,7 +760,6 @@ vm_page_remove(vm_page_t m) { vm_object_t object; - vm_page_t root; if ((m->flags & PG_UNMANAGED) == 0) vm_page_lock_assert(m, MA_OWNED); @@ -830,16 +774,8 @@ /* * Now remove from the object's list of backed pages. */ - if (m != object->root) - vm_page_splay(m->pindex, object->root); - if (m->left == NULL) - root = m->right; - else { - root = vm_page_splay(m->pindex, m->left); - root->right = m->right; - } - object->root = root; - TAILQ_REMOVE(&object->memq, m, listq); + if (RB_REMOVE(pgtree, &object->root, m) == NULL) + panic("vm_page_remove: page not in list of backed pages"); /* * And show that the object has one fewer resident page. @@ -869,13 +805,12 @@ vm_page_lookup(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; + struct vm_page ms; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); - if ((m = object->root) != NULL && m->pindex != pindex) { - m = vm_page_splay(pindex, m); - if ((object->root = m)->pindex != pindex) - m = NULL; - } + ms.pindex = pindex; + m = RB_FIND(pgtree, &object->root, &ms); + return (m); } @@ -892,15 +827,12 @@ vm_page_find_least(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; + struct vm_page ms; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); - if ((m = TAILQ_FIRST(&object->memq)) != NULL) { - if (m->pindex < pindex) { - m = vm_page_splay(pindex, object->root); - if ((object->root = m)->pindex < pindex) - m = TAILQ_NEXT(m, listq); - } - } + ms.pindex = pindex; + m = RB_NFIND(pgtree, &object->root, &ms); + return (m); } @@ -916,8 +848,8 @@ vm_page_t next; VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); - if ((next = TAILQ_NEXT(m, listq)) != NULL && - next->pindex != m->pindex + 1) + next = RB_NEXT(pgtree, &m->object->root, m); + if (next != NULL && next->pindex > m->pindex + 1) next = NULL; return (next); } @@ -934,8 +866,8 @@ vm_page_t prev; VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); - if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL && - prev->pindex != m->pindex - 1) + prev = RB_PREV(pgtree, &m->object->root, m); + if (prev != NULL && prev->pindex < m->pindex -1) prev = NULL; return (prev); } @@ -982,43 +914,31 @@ vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { vm_page_t m, m_next; - boolean_t empty; + struct vm_page ms; mtx_lock(&vm_page_queue_free_mtx); - if (__predict_false(object->cache == NULL)) { + if (__predict_false(RB_EMPTY(&object->cache))) { mtx_unlock(&vm_page_queue_free_mtx); return; } - m = object->cache = vm_page_splay(start, object->cache); - if (m->pindex < start) { - if (m->right == NULL) - m = NULL; - else { - m_next = vm_page_splay(start, m->right); - m_next->left = m; - m->right = NULL; - m = object->cache = m_next; - } - } /* * At this point, "m" is either (1) a reference to the page * with the least pindex that is greater than or equal to * "start" or (2) NULL. */ - for (; m != NULL && (m->pindex < end || end == 0); m = m_next) { + ms.pindex = start; + for (m = RB_NFIND(pgtree, &object->cache, &ms); + m != NULL && (m->pindex < end || end == 0); + m = m_next) { + m_next = RB_NEXT(pgtree, &object->cache, m); /* * Find "m"'s successor and remove "m" from the * object's cache. */ - if (m->right == NULL) { - object->cache = m->left; - m_next = NULL; - } else { - m_next = vm_page_splay(start, m->right); - m_next->left = m->left; - object->cache = m_next; - } + if (RB_REMOVE(pgtree, &object->cache, m) == NULL) + panic("vm_page_cache_free: couldn't remove page"); + /* Convert "m" to a free page. */ m->object = NULL; m->valid = 0; @@ -1029,9 +949,8 @@ cnt.v_cache_count--; cnt.v_free_count++; } - empty = object->cache == NULL; mtx_unlock(&vm_page_queue_free_mtx); - if (object->type == OBJT_VNODE && empty) + if (object->type == OBJT_VNODE && RB_EMPTY(&object->cache)) vdrop(object->handle); } @@ -1045,13 +964,12 @@ vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; + struct vm_page ms; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - if ((m = object->cache) != NULL && m->pindex != pindex) { - m = vm_page_splay(pindex, m); - if ((object->cache = m)->pindex != pindex) - m = NULL; - } + ms.pindex = pindex; + m = RB_FIND(pgtree, &object->cache, &ms); + return (m); } @@ -1065,27 +983,16 @@ vm_page_cache_remove(vm_page_t m) { vm_object_t object; - vm_page_t root; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); KASSERT((m->flags & PG_CACHED) != 0, ("vm_page_cache_remove: page %p is not cached", m)); object = m->object; - if (m != object->cache) { - root = vm_page_splay(m->pindex, object->cache); - KASSERT(root == m, - ("vm_page_cache_remove: page %p is not cached in object %p", - m, object)); - } - if (m->left == NULL) - root = m->right; - else if (m->right == NULL) - root = m->left; - else { - root = vm_page_splay(m->pindex, m->left); - root->right = m->right; - } - object->cache = root; + + if ((m = RB_REMOVE(pgtree, &object->cache, m)) == NULL) + panic("vm_page_cache_remove: page %p is not cached in object %p", + m, object); + m->object = NULL; cnt.v_cache_count--; } @@ -1106,6 +1013,7 @@ vm_object_t new_object) { vm_page_t m, m_next; + struct vm_page ms; /* * Insertion into an object's collection of cached pages @@ -1113,54 +1021,33 @@ * not. */ VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED); - KASSERT(new_object->cache == NULL, + KASSERT(RB_EMPTY(&new_object->cache), ("vm_page_cache_transfer: object %p has cached pages", new_object)); mtx_lock(&vm_page_queue_free_mtx); - if ((m = orig_object->cache) != NULL) { + + ms.pindex = offidxstart; + for (m = RB_NFIND(pgtree, &orig_object->cache, &ms); + m != NULL && (m->pindex - offidxstart) < new_object->size; + m = m_next) { /* * Transfer all of the pages with offset greater than or * equal to 'offidxstart' from the original object's * cache to the new object's cache. + * But not beyond the new object's size. */ - m = vm_page_splay(offidxstart, m); - if (m->pindex < offidxstart) { - orig_object->cache = m; - new_object->cache = m->right; - m->right = NULL; - } else { - orig_object->cache = m->left; - new_object->cache = m; - m->left = NULL; - } - while ((m = new_object->cache) != NULL) { - if ((m->pindex - offidxstart) >= new_object->size) { - /* - * Return all of the cached pages with - * offset greater than or equal to the - * new object's size to the original - * object's cache. - */ - new_object->cache = m->left; - m->left = orig_object->cache; - orig_object->cache = m; - break; - } - m_next = vm_page_splay(m->pindex, m->right); - /* Update the page's object and offset. */ - m->object = new_object; - m->pindex -= offidxstart; - if (m_next == NULL) - break; - m->right = NULL; - m_next->left = m; - new_object->cache = m_next; - } - KASSERT(new_object->cache == NULL || - new_object->type == OBJT_SWAP, - ("vm_page_cache_transfer: object %p's type is incompatible" - " with cached pages", new_object)); + m_next = RB_NEXT(pgtree, &orig_object->cache, m); + if (RB_REMOVE(pgtree, &orig_object->cache, m) == NULL) + panic("%s: page not in orig_object", __func__); + if (RB_INSERT(pgtree, &new_object->cache, m) != NULL) + panic("%s: page already in new_object", __func__); } + + KASSERT(RB_EMPTY(&new_object->cache) || + new_object->type == OBJT_SWAP, + ("vm_page_cache_transfer: object %p's type is incompatible" + " with cached pages", new_object)); + mtx_unlock(&vm_page_queue_free_mtx); } @@ -1294,7 +1181,7 @@ m->valid = 0; m_object = m->object; vm_page_cache_remove(m); - if (m_object->type == OBJT_VNODE && m_object->cache == NULL) + if (m_object->type == OBJT_VNODE && RB_EMPTY(&m_object->cache)) vp = m_object->handle; } else { KASSERT(VM_PAGE_IS_FREE(m), @@ -1388,7 +1275,7 @@ m_object = m->object; vm_page_cache_remove(m); if (m_object->type == OBJT_VNODE && - m_object->cache == NULL) + RB_EMPTY(&m_object->cache)) drop = m_object->handle; } else { KASSERT(VM_PAGE_IS_FREE(m), @@ -1570,6 +1457,7 @@ vpq = &vm_page_queues[queue]; m->queue = queue; TAILQ_INSERT_TAIL(&vpq->pl, m, pageq); + m->next = NULL; ++*vpq->cnt; } @@ -1936,7 +1824,6 @@ vm_page_cache(vm_page_t m) { vm_object_t object; - vm_page_t root; vm_page_lock_assert(m, MA_OWNED); object = m->object; @@ -1971,16 +1858,8 @@ * Remove the page from the object's collection of resident * pages. */ - if (m != object->root) - vm_page_splay(m->pindex, object->root); - if (m->left == NULL) - root = m->right; - else { - root = vm_page_splay(m->pindex, m->left); - root->right = m->right; - } - object->root = root; - TAILQ_REMOVE(&object->memq, m, listq); + if (RB_REMOVE(pgtree, &object->root, m) == NULL) + panic("%s: page not in object", __func__); object->resident_page_count--; object->generation++; @@ -1998,25 +1877,8 @@ mtx_lock(&vm_page_queue_free_mtx); m->flags |= PG_CACHED; cnt.v_cache_count++; - root = object->cache; - if (root == NULL) { - m->left = NULL; - m->right = NULL; - } else { - root = vm_page_splay(m->pindex, root); - if (m->pindex < root->pindex) { - m->left = root->left; - m->right = root; - root->left = NULL; - } else if (__predict_false(m->pindex == root->pindex)) - panic("vm_page_cache: offset already cached"); - else { - m->right = root->right; - m->left = root; - root->right = NULL; - } - } - object->cache = m; + if (RB_INSERT(pgtree, &object->cache, m) != NULL) + panic("%s: page already in object cache", __func__); #if VM_NRESERVLEVEL > 0 if (!vm_reserv_free_page(m)) { #else @@ -2034,9 +1896,9 @@ * the object's only resident page. */ if (object->type == OBJT_VNODE) { - if (root == NULL && object->resident_page_count != 0) + if (RB_EMPTY(&object->root) && object->resident_page_count != 0) vhold(object->handle); - else if (root != NULL && object->resident_page_count == 0) + else if (!RB_EMPTY(&object->root) && object->resident_page_count == 0) vdrop(object->handle); } } Index: vm/vm_page.h =================================================================== --- vm/vm_page.h (revision 212213) +++ vm/vm_page.h (working copy) @@ -68,6 +68,7 @@ #define _VM_PAGE_ #include +#include /* * Management of resident (logical) pages. @@ -99,9 +100,8 @@ struct vm_page { TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO queue or free list (Q) */ - TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ - struct vm_page *left; /* splay tree link (O) */ - struct vm_page *right; /* splay tree link (O) */ + RB_ENTRY(vm_page) paget; /* RB tree link (O) */ + vm_page_t next; /* pmap deferred free list */ vm_object_t object; /* which object am I in (O,P)*/ vm_pindex_t pindex; /* offset into object (O,Q) */ @@ -362,7 +362,10 @@ void vm_page_requeue(vm_page_t m); void vm_page_set_valid(vm_page_t m, int base, int size); void vm_page_sleep(vm_page_t m, const char *msg); -vm_page_t vm_page_splay(vm_pindex_t, vm_page_t); + +int vm_page_pindex_cmp(const vm_page_t a, const vm_page_t b); +RB_PROTOTYPE(pgtree, vm_page, paget, vm_page_pindex_cmp); + vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_unwire (vm_page_t, int); void vm_page_wire (vm_page_t); Index: vm/vm.h =================================================================== --- vm/vm.h (revision 212213) +++ vm/vm.h (working copy) @@ -98,6 +98,13 @@ struct vm_object; typedef struct vm_object *vm_object_t; +struct pgtree { + struct vm_page *rbh_root; +}; +//struct maptree { +// struct vm_map *rbh_root; +//}; + #ifndef _KERNEL /* * This is defined in for the kernel so that non-vm kernel Index: vm/vnode_pager.c =================================================================== --- vm/vnode_pager.c (revision 212213) +++ vm/vnode_pager.c (working copy) @@ -431,7 +431,7 @@ */ vm_page_clear_dirty(m, base, PAGE_SIZE - base); } else if ((nsize & PAGE_MASK) && - __predict_false(object->cache != NULL)) { + __predict_false(!RB_EMPTY(&object->cache))) { vm_page_cache_free(object, OFF_TO_IDX(nsize), nobjsize); } Index: vm/uma_core.c =================================================================== --- vm/uma_core.c (revision 212213) +++ vm/uma_core.c (working copy) @@ -1007,7 +1007,7 @@ * This looks a little weird since we're getting one page at a time. */ VM_OBJECT_LOCK(object); - p = TAILQ_LAST(&object->memq, pglist); + p = RB_MAX(pgtree, &object->root); pages = p != NULL ? p->pindex + 1 : 0; startpages = pages; zkva = keg->uk_kva + pages * PAGE_SIZE; @@ -1019,7 +1019,7 @@ pmap_qremove(retkva, pages - startpages); while (pages != startpages) { pages--; - p = TAILQ_LAST(&object->memq, pglist); + p = RB_MAX(pgtree, &object->root); vm_page_unwire(p, 0); vm_page_free(p); } Index: compat/linprocfs/linprocfs.c =================================================================== --- compat/linprocfs/linprocfs.c (revision 212213) +++ compat/linprocfs/linprocfs.c (working copy) @@ -987,8 +987,9 @@ return (ESRCH); map = &vm->vm_map; vm_map_lock_read(map); - for (entry = map->header.next; entry != &map->header; - entry = entry->next) { + for (entry = RB_MIN(maptree, &map->root); + entry != NULL; + entry = vm_map_next(entry)) { name = ""; freename = NULL; if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) Index: i386/include/pmap.h =================================================================== --- i386/include/pmap.h (revision 212213) +++ i386/include/pmap.h (working copy) @@ -426,8 +426,7 @@ pdpt_entry_t *pm_pdpt; /* KVA of page director pointer table */ #endif - vm_page_t pm_root; /* spare page table pages */ - + struct pgtree pm_root; /* spare page table pages */ }; typedef struct pmap *pmap_t; Index: i386/i386/pmap.c =================================================================== --- i386/i386/pmap.c (revision 212213) +++ i386/i386/pmap.c (working copy) @@ -124,6 +124,7 @@ #include #include #include +#include #ifdef SMP #include #endif @@ -381,7 +382,7 @@ #ifdef PAE kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); #endif - kernel_pmap->pm_root = NULL; + RB_INIT(&kernel_pmap->pm_root); kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); LIST_INIT(&allpmaps); @@ -1507,12 +1508,20 @@ while (free != NULL) { m = free; - free = m->right; + free = m->next; + m->next = NULL; /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } } +static __inline void +pmap_free_zero_page(vm_page_t free) +{ + + vm_page_free_toq(free); +} + /* * Schedule the specified unused page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the @@ -1526,7 +1535,7 @@ m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; - m->right = *free; + m->next = *free; *free = m; } @@ -1539,28 +1548,11 @@ static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) { - vm_page_t root; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - root = pmap->pm_root; - if (root == NULL) { - mpte->left = NULL; - mpte->right = NULL; - } else { - root = vm_page_splay(mpte->pindex, root); - if (mpte->pindex < root->pindex) { - mpte->left = root->left; - mpte->right = root; - root->left = NULL; - } else if (mpte->pindex == root->pindex) - panic("pmap_insert_pt_page: pindex already inserted"); - else { - mpte->right = root->right; - mpte->left = root; - root->right = NULL; - } - } - pmap->pm_root = mpte; + if (RB_INSERT(pgtree, &pmap->pm_root, mpte) != NULL) + panic("pmap_insert_pt_page: pindex already exists"); + mpte->next = NULL; } /* @@ -1572,14 +1564,12 @@ pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va) { vm_page_t mpte; + struct vm_page p; vm_pindex_t pindex = va >> PDRSHIFT; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) { - mpte = vm_page_splay(pindex, mpte); - if ((pmap->pm_root = mpte)->pindex != pindex) - mpte = NULL; - } + p.pindex = pindex; + mpte = RB_FIND(pgtree, &pmap->pm_root, &p); return (mpte); } @@ -1591,18 +1581,10 @@ static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte) { - vm_page_t root; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - if (mpte != pmap->pm_root) - vm_page_splay(mpte->pindex, pmap->pm_root); - if (mpte->left == NULL) - root = mpte->right; - else { - root = vm_page_splay(mpte->pindex, mpte->left); - root->right = mpte->right; - } - pmap->pm_root = root; + if (RB_REMOVE(pgtree, &pmap->pm_root, mpte) == NULL) + panic("%s: page not found in page table", __func__); } /* @@ -1680,7 +1662,7 @@ #ifdef PAE pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); #endif - pmap->pm_root = NULL; + RB_INIT(&pmap->pm_root); pmap->pm_active = 0; PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); @@ -1724,9 +1706,9 @@ KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), ("pmap_pinit: pdpt above 4g")); #endif - pmap->pm_root = NULL; + RB_INIT(&pmap->pm_root); } - KASSERT(pmap->pm_root == NULL, + KASSERT(RB_EMPTY(&pmap->pm_root), ("pmap_pinit: pmap has reserved page table page(s)")); /* @@ -1982,7 +1964,7 @@ KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); - KASSERT(pmap->pm_root == NULL, + KASSERT(RB_EMPTY(&pmap->pm_root), ("pmap_release: pmap has reserved page table page(s)")); pmap_lazyfix(pmap); @@ -2182,7 +2164,7 @@ free = NULL; pmap_unuse_pt(pmap, va, &free); pmap_invalidate_page(pmap, va); - pmap_free_zero_pages(free); + pmap_free_zero_page(free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); free_pv_entry(pmap, pv); if (pmap != locked_pmap) @@ -2538,7 +2520,7 @@ free = NULL; pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free); pmap_invalidate_page(pmap, trunc_4mpage(va)); - pmap_free_zero_pages(free); + pmap_free_zero_page(free); CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x" " in pmap %p", va, pmap); return (FALSE); @@ -3438,7 +3420,11 @@ * populated, then attempt promotion. */ if ((mpte == NULL || mpte->wire_count == NPTEPG) && - pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0) + pg_ps_enabled +#if VM_NRESERVLEVEL > 0 + && vm_reserv_level_iffullpop(m) == 0 +#endif + ) pmap_promote_pde(pmap, pde, va); sched_unpin(); @@ -3532,13 +3518,16 @@ va = start + ptoa(diff); if ((va & PDRMASK) == 0 && va + NBPDR <= end && (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 && - pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 && + pg_ps_enabled && +#if VM_NRESERVLEVEL > 0 + vm_reserv_level_iffullpop(m) == 0 && +#endif pmap_enter_pde(pmap, va, m, prot)) m = &m[NBPDR / PAGE_SIZE - 1]; else mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte); - m = TAILQ_NEXT(m, listq); + m = RB_NEXT(pgtree, &m->object->root, m); } vm_page_unlock_queues(); PMAP_UNLOCK(pmap); @@ -3642,7 +3631,7 @@ free = NULL; if (pmap_unwire_pte_hold(pmap, mpte, &free)) { pmap_invalidate_page(pmap, va); - pmap_free_zero_pages(free); + pmap_free_zero_page(free); } mpte = NULL; @@ -3725,7 +3714,7 @@ * the pages are not physically contiguous or have differing * memory attributes. */ - p = TAILQ_NEXT(p, listq); + p = RB_NEXT(pgtree, &object->root, p); for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; pa += PAGE_SIZE) { KASSERT(p->valid == VM_PAGE_BITS_ALL, @@ -3733,7 +3722,7 @@ if (pa != VM_PAGE_TO_PHYS(p) || pat_mode != p->md.pat_mode) return; - p = TAILQ_NEXT(p, listq); + p = RB_NEXT(pgtree, &object->root, p); } /* @@ -3913,7 +3902,7 @@ dstmpte, &free)) { pmap_invalidate_page(dst_pmap, addr); - pmap_free_zero_pages(free); + pmap_free_zero_page(free); } goto out; } Index: cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (revision 212213) +++ cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c (working copy) @@ -330,7 +330,7 @@ vm_page_undirty(pp); vm_page_unlock_queues(); } else { - if (__predict_false(obj->cache != NULL)) { + if (__predict_false(!RB_EMPTY(&obj->cache))) { vm_page_cache_free(obj, OFF_TO_IDX(start), OFF_TO_IDX(start) + 1); } Index: security/mac/mac_process.c =================================================================== --- security/mac/mac_process.c (revision 212213) +++ security/mac/mac_process.c (working copy) @@ -265,7 +265,9 @@ return; vm_map_lock(map); - for (vme = map->header.next; vme != &map->header; vme = vme->next) { + for (vme = RB_MIN(maptree, &map->root); + vme != NULL; + vme = vm_map_next(vme)) { if (vme->eflags & MAP_ENTRY_IS_SUB_MAP) { mac_proc_vm_revoke_recurse(td, cred, vme->object.sub_map);