Index: sys/vm/vm_contig.c =================================================================== --- sys/vm/vm_contig.c (revision 261896) +++ sys/vm/vm_contig.c (working copy) @@ -66,7 +66,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include @@ -84,7 +83,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include static int @@ -188,22 +186,6 @@ vm_contig_launder(int queue, int tries, vm_paddr_t } /* - * Frees the given physically contiguous pages. - * - * N.B.: Any pages with PG_ZERO set must, in fact, be zero filled. - */ -static void -vm_page_release_contig(vm_page_t m, vm_pindex_t count) -{ - - while (count--) { - /* Leave PG_ZERO unchanged. */ - vm_page_free_toq(m); - m++; - } -} - -/* * Increase the number of cached pages. The specified value, "tries", * determines which categories of pages are cached: * @@ -264,9 +246,10 @@ kmem_alloc_attr(vm_map_t map, vm_size_t size, int vm_paddr_t high, vm_memattr_t memattr) { vm_object_t object = kernel_object; - vm_offset_t addr, i, offset; + vm_offset_t addr; + vm_ooffset_t end_offset, offset; vm_page_t m; - int tries; + int pflags, tries; size = round_page(size); vm_map_lock(map); @@ -278,14 +261,22 @@ kmem_alloc_attr(vm_map_t map, vm_size_t size, int vm_object_reference(object); vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); + if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT) + pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY; + else + pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY; + if (flags & M_ZERO) + pflags |= VM_ALLOC_ZERO; VM_OBJECT_LOCK(object); - for (i = 0; i < size; i += PAGE_SIZE) { + end_offset = offset + size; + for (; offset < end_offset; offset += PAGE_SIZE) { tries = 0; retry: - m = vm_phys_alloc_contig(1, low, high, PAGE_SIZE, 0); + m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 1, + low, high, PAGE_SIZE, 0, memattr); if (m == NULL) { + VM_OBJECT_UNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { - VM_OBJECT_UNLOCK(object); vm_map_unlock(map); vm_contig_grow_cache(tries, low, high); vm_map_lock(map); @@ -293,20 +284,16 @@ retry: tries++; goto retry; } - while (i != 0) { - i -= PAGE_SIZE; - m = vm_page_lookup(object, OFF_TO_IDX(offset + - i)); - vm_page_free(m); - } - VM_OBJECT_UNLOCK(object); + /* + * Since the pages that were allocated by any previous + * iterations of this loop are not busy, they can be + * freed by vm_object_page_remove(), which is called + * by vm_map_delete(). + */ vm_map_delete(map, addr, addr + size); vm_map_unlock(map); return (0); } - if (memattr != VM_MEMATTR_DEFAULT) - pmap_page_set_memattr(m, memattr); - vm_page_insert(m, object, OFF_TO_IDX(offset + i)); if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; @@ -326,92 +313,61 @@ retry: * specified through the given flags, then the pages are zeroed * before they are mapped. */ -static vm_offset_t -contigmapping(vm_map_t map, vm_size_t size, vm_page_t m, vm_memattr_t memattr, - int flags) +vm_offset_t +kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, + vm_paddr_t high, u_long alignment, vm_paddr_t boundary, + vm_memattr_t memattr) { vm_object_t object = kernel_object; - vm_offset_t addr, tmp_addr; + vm_offset_t addr; + vm_ooffset_t offset; + vm_page_t end_m, m; + int pflags, tries; + size = round_page(size); vm_map_lock(map); if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { vm_map_unlock(map); return (0); } + offset = addr - VM_MIN_KERNEL_ADDRESS; vm_object_reference(object); - vm_map_insert(map, object, addr - VM_MIN_KERNEL_ADDRESS, - addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); - vm_map_unlock(map); + vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL, + VM_PROT_ALL, 0); + if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT) + pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY; + else + pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY; + if (flags & M_ZERO) + pflags |= VM_ALLOC_ZERO; VM_OBJECT_LOCK(object); - for (tmp_addr = addr; tmp_addr < addr + size; tmp_addr += PAGE_SIZE) { - if (memattr != VM_MEMATTR_DEFAULT) - pmap_page_set_memattr(m, memattr); - vm_page_insert(m, object, - OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); - if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) - pmap_zero_page(m); - m->valid = VM_PAGE_BITS_ALL; - m++; - } - VM_OBJECT_UNLOCK(object); - vm_map_wire(map, addr, addr + size, - VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); - return (addr); -} - -void * -contigmalloc( - unsigned long size, /* should be size_t here and for malloc() */ - struct malloc_type *type, - int flags, - vm_paddr_t low, - vm_paddr_t high, - unsigned long alignment, - unsigned long boundary) -{ - void *ret; - - ret = (void *)kmem_alloc_contig(kernel_map, size, flags, low, high, - alignment, boundary, VM_MEMATTR_DEFAULT); - if (ret != NULL) - malloc_type_allocated(type, round_page(size)); - return (ret); -} - -vm_offset_t -kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, - vm_paddr_t high, unsigned long alignment, unsigned long boundary, - vm_memattr_t memattr) -{ - vm_offset_t ret; - vm_page_t pages; - unsigned long npgs; - int tries; - - size = round_page(size); - npgs = size >> PAGE_SHIFT; tries = 0; retry: - pages = vm_phys_alloc_contig(npgs, low, high, alignment, boundary); - if (pages == NULL) { + m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, + atop(size), low, high, alignment, boundary, memattr); + if (m == NULL) { + VM_OBJECT_UNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { + vm_map_unlock(map); vm_contig_grow_cache(tries, low, high); + vm_map_lock(map); + VM_OBJECT_LOCK(object); tries++; goto retry; } - ret = 0; - } else { - ret = contigmapping(map, size, pages, memattr, flags); - if (ret == 0) - vm_page_release_contig(pages, npgs); + vm_map_delete(map, addr, addr + size); + vm_map_unlock(map); + return (0); } - return (ret); + end_m = m + atop(size); + for (; m < end_m; m++) { + if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + m->valid = VM_PAGE_BITS_ALL; + } + VM_OBJECT_UNLOCK(object); + vm_map_unlock(map); + vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM | + VM_MAP_WIRE_NOHOLES); + return (addr); } - -void -contigfree(void *addr, unsigned long size, struct malloc_type *type) -{ - - kmem_free(kernel_map, (vm_offset_t)addr, size); - malloc_type_freed(type, round_page(size)); -} Index: sys/vm/vm_extern.h =================================================================== --- sys/vm/vm_extern.h (revision 261896) +++ sys/vm/vm_extern.h (working copy) @@ -44,8 +44,8 @@ vm_offset_t kmem_alloc(vm_map_t, vm_size_t); vm_offset_t kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr); vm_offset_t kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, - vm_paddr_t low, vm_paddr_t high, unsigned long alignment, - unsigned long boundary, vm_memattr_t memattr); + vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, + vm_memattr_t memattr); vm_offset_t kmem_alloc_nofault(vm_map_t, vm_size_t); vm_offset_t kmem_alloc_nofault_space(vm_map_t, vm_size_t, int); vm_offset_t kmem_alloc_wait(vm_map_t, vm_size_t); Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c (revision 261896) +++ sys/vm/vm_page.c (working copy) @@ -137,6 +137,7 @@ SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFL static uma_zone_t fakepg_zone; +static struct vnode *vm_page_alloc_init(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_queue_remove(int queue, vm_page_t m); static void vm_page_enqueue(int queue, vm_page_t m); @@ -1379,8 +1380,9 @@ vm_page_is_cached(vm_object_t object, vm_pindex_t /* * vm_page_alloc: * - * Allocate and return a memory cell associated - * with this VM object/offset pair. + * Allocate and return a page that is associated with the specified + * object and offset pair. By default, this page has the flag VPO_BUSY + * set. * * The caller must always specify an allocation class. * @@ -1390,13 +1392,16 @@ vm_page_is_cached(vm_object_t object, vm_pindex_t * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: - * VM_ALLOC_ZERO prefer a zeroed page - * VM_ALLOC_WIRED wire the allocated page - * VM_ALLOC_NOOBJ page is not associated with a vm object - * VM_ALLOC_NOBUSY do not set the page busy + * VM_ALLOC_COUNT(number) the number of additional pages that the caller + * intends to allocate * VM_ALLOC_IFCACHED return page only if it is cached * VM_ALLOC_IFNOTCACHED return NULL, do not reactivate if the page * is cached + * VM_ALLOC_NOBUSY do not set the flag VPO_BUSY on the page + * VM_ALLOC_NOOBJ page is not associated with an object and + * should not have the flag VPO_BUSY set + * VM_ALLOC_WIRED wire the allocated page + * VM_ALLOC_ZERO prefer a zeroed page * * This routine may not sleep. */ @@ -1406,27 +1411,26 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind struct vnode *vp = NULL; vm_object_t m_object; vm_page_t m; - int flags, page_req; + int flags, req_class; - if ((req & VM_ALLOC_NOOBJ) == 0) { - KASSERT(object != NULL, - ("vm_page_alloc: NULL object.")); + KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0), + ("vm_page_alloc: inconsistent object/req")); + if (object != NULL) VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); - } - page_req = req & VM_ALLOC_CLASS_MASK; + req_class = req & VM_ALLOC_CLASS_MASK; /* - * The pager is allowed to eat deeper into the free page list. + * The page daemon is allowed to dig deeper into the free page list. */ - if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) - page_req = VM_ALLOC_SYSTEM; + if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) + req_class = VM_ALLOC_SYSTEM; mtx_lock(&vm_page_queue_free_mtx); if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved || - (page_req == VM_ALLOC_SYSTEM && + (req_class == VM_ALLOC_SYSTEM && cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) || - (page_req == VM_ALLOC_INTERRUPT && + (req_class == VM_ALLOC_INTERRUPT && cnt.v_free_count + cnt.v_cache_count > 0)) { /* * Allocate from the free queue if the number of free pages @@ -1474,7 +1478,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind */ mtx_unlock(&vm_page_queue_free_mtx); atomic_add_int(&vm_pageout_deficit, - MAX((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); + max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); pagedaemon_wakeup(); return (NULL); } @@ -1482,7 +1486,6 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind /* * At this point we had better have found a good page. */ - KASSERT(m != NULL, ("vm_page_alloc: missing page")); KASSERT(m->queue == PQ_NONE, ("vm_page_alloc: page %p has unexpected queue %d", m, m->queue)); @@ -1494,6 +1497,8 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind ("vm_page_alloc: page %p has unexpected memattr %d", m, pmap_page_get_memattr(m))); if ((m->flags & PG_CACHED) != 0) { + KASSERT((m->flags & PG_ZERO) == 0, + ("vm_page_alloc: cached page %p is PG_ZERO", m)); KASSERT(m->valid != 0, ("vm_page_alloc: cached page %p is invalid", m)); if (m->object == object && m->pindex == pindex) @@ -1572,12 +1577,163 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pind } /* + * vm_page_alloc_contig: + * + * Allocate a contiguous set of physical pages of the given size "npages" + * from the free lists. All of the physical pages must be at or above + * the given physical address "low" and below the given physical address + * "high". The given value "alignment" determines the alignment of the + * first physical page in the set. If the given value "boundary" is + * non-zero, then the set of physical pages cannot cross any physical + * address boundary that is a multiple of that value. Both "alignment" + * and "boundary" must be a power of two. + * + * If the specified memory attribute, "memattr", is VM_MEMATTR_DEFAULT, + * then the memory attribute setting for the physical pages is configured + * to the object's memory attribute setting. Otherwise, the memory + * attribute setting for the physical pages is configured to "memattr", + * overriding the object's memory attribute setting. However, if the + * object's memory attribute setting is not VM_MEMATTR_DEFAULT, then the + * memory attribute setting for the physical pages cannot be configured + * to VM_MEMATTR_DEFAULT. + * + * The caller must always specify an allocation class. + * + * allocation classes: + * VM_ALLOC_NORMAL normal process request + * VM_ALLOC_SYSTEM system *really* needs a page + * VM_ALLOC_INTERRUPT interrupt time request + * + * optional allocation flags: + * VM_ALLOC_NOBUSY do not set the flag VPO_BUSY on the page + * VM_ALLOC_NOOBJ page is not associated with an object and + * should not have the flag VPO_BUSY set + * VM_ALLOC_WIRED wire the allocated page + * VM_ALLOC_ZERO prefer a zeroed page + * + * This routine may not sleep. + */ +vm_page_t +vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, + u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, + vm_paddr_t boundary, vm_memattr_t memattr) +{ + struct vnode *drop; + vm_page_t deferred_vdrop_list, m, m_ret; + u_int flags, oflags; + int req_class; + + KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0), + ("vm_page_alloc_contig: inconsistent object/req")); + if (object != NULL) { + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + KASSERT(object->type == OBJT_PHYS, + ("vm_page_alloc_contig: object %p isn't OBJT_PHYS", + object)); + } + KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero")); + req_class = req & VM_ALLOC_CLASS_MASK; + + /* + * The page daemon is allowed to dig deeper into the free page list. + */ + if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) + req_class = VM_ALLOC_SYSTEM; + + deferred_vdrop_list = NULL; + mtx_lock(&vm_page_queue_free_mtx); + if (cnt.v_free_count + cnt.v_cache_count >= npages + + cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && + cnt.v_free_count + cnt.v_cache_count >= npages + + cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && + cnt.v_free_count + cnt.v_cache_count >= npages)) { +#if VM_NRESERVLEVEL > 0 +retry: +#endif + m_ret = vm_phys_alloc_contig(npages, low, high, alignment, + boundary); + } else { + mtx_unlock(&vm_page_queue_free_mtx); + atomic_add_int(&vm_pageout_deficit, npages); + pagedaemon_wakeup(); + return (NULL); + } + if (m_ret != NULL) + for (m = m_ret; m < &m_ret[npages]; m++) { + drop = vm_page_alloc_init(m); + if (drop != NULL) { + /* + * Enqueue the vnode for deferred vdrop(). + * + * Once the pages are removed from the free + * page list, "pageq" can be safely abused to + * construct a short-lived list of vnodes. + */ + m->pageq.tqe_prev = (void *)drop; + m->pageq.tqe_next = deferred_vdrop_list; + deferred_vdrop_list = m; + } + } + else { +#if VM_NRESERVLEVEL > 0 + if (vm_reserv_reclaim_contig(npages << PAGE_SHIFT, low, high, + alignment, boundary)) + goto retry; +#endif + } + mtx_unlock(&vm_page_queue_free_mtx); + if (m_ret == NULL) + return (NULL); + + /* + * Initialize the pages. Only the PG_ZERO flag is inherited. + */ + flags = 0; + if ((req & VM_ALLOC_ZERO) != 0) + flags = PG_ZERO; + if ((req & VM_ALLOC_WIRED) != 0) + atomic_add_int(&cnt.v_wire_count, npages); + oflags = VPO_UNMANAGED; + if (object != NULL) { + if ((req & VM_ALLOC_NOBUSY) == 0) + oflags |= VPO_BUSY; + if (object->memattr != VM_MEMATTR_DEFAULT && + memattr == VM_MEMATTR_DEFAULT) + memattr = object->memattr; + } + for (m = m_ret; m < &m_ret[npages]; m++) { + m->aflags = 0; + m->flags &= flags; + if ((req & VM_ALLOC_WIRED) != 0) + m->wire_count = 1; + /* Unmanaged pages don't use "act_count". */ + m->oflags = oflags; + if (memattr != VM_MEMATTR_DEFAULT) + pmap_page_set_memattr(m, memattr); + if (object != NULL) + vm_page_insert(m, object, pindex); + else + m->pindex = pindex; + pindex++; + } + while (deferred_vdrop_list != NULL) { + vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev); + deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next; + } + if (vm_paging_needed()) + pagedaemon_wakeup(); + return (m_ret); +} + +/* * Initialize a page that has been freshly dequeued from a freelist. * The caller has to drop the vnode returned, if it is not NULL. * + * This function may only be used to initialize unmanaged pages. + * * To be called with vm_page_queue_free_mtx held. */ -struct vnode * +static struct vnode * vm_page_alloc_init(vm_page_t m) { struct vnode *drop; @@ -1600,11 +1756,12 @@ vm_page_alloc_init(vm_page_t m) mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); drop = NULL; if ((m->flags & PG_CACHED) != 0) { + KASSERT((m->flags & PG_ZERO) == 0, + ("vm_page_alloc_init: cached page %p is PG_ZERO", m)); m->valid = 0; m_object = m->object; vm_page_cache_remove(m); - if (m_object->type == OBJT_VNODE && - m_object->cache == NULL) + if (m_object->type == OBJT_VNODE && m_object->cache == NULL) drop = m_object->handle; } else { KASSERT(VM_PAGE_IS_FREE(m), @@ -1612,23 +1769,33 @@ vm_page_alloc_init(vm_page_t m) KASSERT(m->valid == 0, ("vm_page_alloc_init: free page %p is valid", m)); cnt.v_free_count--; + if ((m->flags & PG_ZERO) != 0) + vm_page_zero_count--; } - if (m->flags & PG_ZERO) - vm_page_zero_count--; /* Don't clear the PG_ZERO flag; we'll need it later. */ m->flags &= PG_ZERO; - m->aflags = 0; - m->oflags = VPO_UNMANAGED; - /* Unmanaged pages don't use "act_count". */ return (drop); } /* * vm_page_alloc_freelist: - * - * Allocate a page from the specified freelist. - * Only the ALLOC_CLASS values in req are honored, other request flags - * are ignored. + * + * Allocate a physical page from the specified free page list. + * + * The caller must always specify an allocation class. + * + * allocation classes: + * VM_ALLOC_NORMAL normal process request + * VM_ALLOC_SYSTEM system *really* needs a page + * VM_ALLOC_INTERRUPT interrupt time request + * + * optional allocation flags: + * VM_ALLOC_COUNT(number) the number of additional pages that the caller + * intends to allocate + * VM_ALLOC_WIRED wire the allocated page + * VM_ALLOC_ZERO prefer a zeroed page + * + * This routine may not sleep. */ vm_page_t vm_page_alloc_freelist(int flind, int req) @@ -1635,20 +1802,33 @@ vm_page_alloc_freelist(int flind, int req) { struct vnode *drop; vm_page_t m; - int page_req; + u_int flags; + int req_class; - m = NULL; - page_req = req & VM_ALLOC_CLASS_MASK; - mtx_lock(&vm_page_queue_free_mtx); + req_class = req & VM_ALLOC_CLASS_MASK; + /* + * The page daemon is allowed to dig deeper into the free page list. + */ + if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) + req_class = VM_ALLOC_SYSTEM; + + /* * Do not allocate reserved pages unless the req has asked for it. */ + mtx_lock(&vm_page_queue_free_mtx); if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved || - (page_req == VM_ALLOC_SYSTEM && + (req_class == VM_ALLOC_SYSTEM && cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) || - (page_req == VM_ALLOC_INTERRUPT && - cnt.v_free_count + cnt.v_cache_count > 0)) { + (req_class == VM_ALLOC_INTERRUPT && + cnt.v_free_count + cnt.v_cache_count > 0)) m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0); + else { + mtx_unlock(&vm_page_queue_free_mtx); + atomic_add_int(&vm_pageout_deficit, + max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); + pagedaemon_wakeup(); + return (NULL); } if (m == NULL) { mtx_unlock(&vm_page_queue_free_mtx); @@ -1656,8 +1836,29 @@ vm_page_alloc_freelist(int flind, int req) } drop = vm_page_alloc_init(m); mtx_unlock(&vm_page_queue_free_mtx); - if (drop) + + /* + * Initialize the page. Only the PG_ZERO flag is inherited. + */ + m->aflags = 0; + flags = 0; + if ((req & VM_ALLOC_ZERO) != 0) + flags = PG_ZERO; + m->flags &= flags; + if ((req & VM_ALLOC_WIRED) != 0) { + /* + * The page lock is not required for wiring a page that does + * not belong to an object. + */ + atomic_add_int(&cnt.v_wire_count, 1); + m->wire_count = 1; + } + /* Unmanaged pages don't use "act_count". */ + m->oflags = VPO_UNMANAGED; + if (drop != NULL) vdrop(drop); + if (vm_paging_needed()) + pagedaemon_wakeup(); return (m); } Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h (revision 261896) +++ sys/vm/vm_page.h (working copy) @@ -365,8 +365,10 @@ void vm_pageq_remove(vm_page_t m); void vm_page_activate (vm_page_t); vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int); +vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, + u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, + vm_paddr_t boundary, vm_memattr_t memattr); vm_page_t vm_page_alloc_freelist(int, int); -struct vnode *vm_page_alloc_init(vm_page_t); vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); void vm_page_cache(vm_page_t); void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t); Index: sys/vm/vm_phys.c =================================================================== --- sys/vm/vm_phys.c (revision 261896) +++ sys/vm/vm_phys.c (working copy) @@ -29,11 +29,17 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/* + * Physical memory system implementation + * + * Any external functions defined by this module are only to be used by the + * virtual memory system. + */ + #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" -#include "opt_vm.h" #include #include @@ -45,7 +51,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include @@ -55,7 +60,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include /* * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each @@ -543,26 +547,6 @@ vm_phys_alloc_domain_pages(int domain, int flind, } /* - * Allocate physical memory from phys_avail[]. - */ -vm_paddr_t -vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment) -{ - vm_paddr_t pa; - int i; - - size = round_page(size); - for (i = 0; phys_avail[i + 1] != 0; i += 2) { - if (phys_avail[i + 1] - phys_avail[i] < size) - continue; - pa = phys_avail[i]; - phys_avail[i] += size; - return (pa); - } - panic("vm_phys_bootstrap_alloc"); -} - -/* * Find the vm_page corresponding to the given physical address. */ vm_page_t @@ -713,7 +697,7 @@ vm_phys_free_pages(vm_page_t m, int order) { struct vm_freelist *fl; struct vm_phys_seg *seg; - vm_paddr_t pa, pa_buddy; + vm_paddr_t pa; vm_page_t m_buddy; KASSERT(m->order == VM_NFREEORDER, @@ -725,25 +709,26 @@ vm_phys_free_pages(vm_page_t m, int order) KASSERT(order < VM_NFREEORDER, ("vm_phys_free_pages: order %d is out of range", order)); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - pa = VM_PAGE_TO_PHYS(m); seg = &vm_phys_segs[m->segind]; - while (order < VM_NFREEORDER - 1) { - pa_buddy = pa ^ (1 << (PAGE_SHIFT + order)); - if (pa_buddy < seg->start || - pa_buddy >= seg->end) - break; - m_buddy = &seg->first_page[atop(pa_buddy - seg->start)]; - if (m_buddy->order != order) - break; - fl = (*seg->free_queues)[m_buddy->pool]; - TAILQ_REMOVE(&fl[m_buddy->order].pl, m_buddy, pageq); - fl[m_buddy->order].lcnt--; - m_buddy->order = VM_NFREEORDER; - if (m_buddy->pool != m->pool) - vm_phys_set_pool(m->pool, m_buddy, order); - order++; - pa &= ~((1 << (PAGE_SHIFT + order)) - 1); - m = &seg->first_page[atop(pa - seg->start)]; + if (order < VM_NFREEORDER - 1) { + pa = VM_PAGE_TO_PHYS(m); + do { + pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); + if (pa < seg->start || pa >= seg->end) + break; + m_buddy = &seg->first_page[atop(pa - seg->start)]; + if (m_buddy->order != order) + break; + fl = (*seg->free_queues)[m_buddy->pool]; + TAILQ_REMOVE(&fl[order].pl, m_buddy, pageq); + fl[order].lcnt--; + m_buddy->order = VM_NFREEORDER; + if (m_buddy->pool != m->pool) + vm_phys_set_pool(m->pool, m_buddy, order); + order++; + pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); + m = &seg->first_page[atop(pa - seg->start)]; + } while (order < VM_NFREEORDER - 1); } m->order = order; fl = (*seg->free_queues)[m->pool]; @@ -752,6 +737,47 @@ vm_phys_free_pages(vm_page_t m, int order) } /* + * Free a contiguous, arbitrarily sized set of physical pages. + * + * The free page queues must be locked. + */ +void +vm_phys_free_contig(vm_page_t m, u_long npages) +{ + u_int n; + int order; + + /* + * Avoid unnecessary coalescing by freeing the pages in the largest + * possible power-of-two-sized subsets. + */ + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + for (;; npages -= n) { + /* + * Unsigned "min" is used here so that "order" is assigned + * "VM_NFREEORDER - 1" when "m"'s physical address is zero + * or the low-order bits of its physical address are zero + * because the size of a physical address exceeds the size of + * a long. + */ + order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, + VM_NFREEORDER - 1); + n = 1 << order; + if (npages < n) + break; + vm_phys_free_pages(m, order); + m += n; + } + /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ + for (; npages > 0; npages -= n) { + order = flsl(npages) - 1; + n = 1 << order; + vm_phys_free_pages(m, order); + m += n; + } +} + +/* * Set the pool for a contiguous, power of two-sized set of physical pages. */ void @@ -887,16 +913,17 @@ vm_phys_zero_pages_idle(void) * "alignment" and "boundary" must be a power of two. */ vm_page_t -vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high, - unsigned long alignment, unsigned long boundary) +vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, + u_long alignment, vm_paddr_t boundary) { struct vm_freelist *fl; struct vm_phys_seg *seg; - struct vnode *vp; vm_paddr_t pa, pa_last, size; - vm_page_t deferred_vdrop_list, m, m_ret; - int domain, flind, i, oind, order, pind; + vm_page_t m, m_ret; + u_long npages_end; + int domain, flind, oind, order, pind; + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); #if VM_NDOMAIN > 1 domain = PCPU_GET(domain); #else @@ -909,13 +936,8 @@ vm_page_t ("vm_phys_alloc_contig: alignment must be a power of 2")); KASSERT((boundary & (boundary - 1)) == 0, ("vm_phys_alloc_contig: boundary must be a power of 2")); - deferred_vdrop_list = NULL; /* Compute the queue that is the best fit for npages. */ for (order = 0; (1 << order) < npages; order++); - mtx_lock(&vm_page_queue_free_mtx); -#if VM_NRESERVLEVEL > 0 -retry: -#endif for (flind = 0; flind < vm_nfreelists; flind++) { for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { for (pind = 0; pind < VM_NFREEPOOL; pind++) { @@ -974,11 +996,6 @@ vm_page_t } } } -#if VM_NRESERVLEVEL > 0 - if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary)) - goto retry; -#endif - mtx_unlock(&vm_page_queue_free_mtx); return (NULL); done: for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { @@ -991,34 +1008,10 @@ done: vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); fl = (*seg->free_queues)[m_ret->pool]; vm_phys_split_pages(m_ret, oind, fl, order); - for (i = 0; i < npages; i++) { - m = &m_ret[i]; - vp = vm_page_alloc_init(m); - if (vp != NULL) { - /* - * Enqueue the vnode for deferred vdrop(). - * - * Unmanaged pages don't use "pageq", so it - * can be safely abused to construct a short- - * lived queue of vnodes. - */ - m->pageq.tqe_prev = (void *)vp; - m->pageq.tqe_next = deferred_vdrop_list; - deferred_vdrop_list = m; - } - } - for (; i < roundup2(npages, 1 << imin(oind, order)); i++) { - m = &m_ret[i]; - KASSERT(m->order == VM_NFREEORDER, - ("vm_phys_alloc_contig: page %p has unexpected order %d", - m, m->order)); - vm_phys_free_pages(m, 0); - } - mtx_unlock(&vm_page_queue_free_mtx); - while (deferred_vdrop_list != NULL) { - vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev); - deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next; - } + /* Return excess pages to the free lists. */ + npages_end = roundup2(npages, 1 << imin(oind, order)); + if (npages < npages_end) + vm_phys_free_contig(&m_ret[npages], npages_end - npages); return (m_ret); } Index: sys/vm/vm_phys.h =================================================================== --- sys/vm/vm_phys.h (revision 261896) +++ sys/vm/vm_phys.h (working copy) @@ -49,13 +49,15 @@ struct mem_affinity { extern struct mem_affinity *mem_affinity; +/* + * The following functions are only to be used by the virtual memory system. + */ void vm_phys_add_page(vm_paddr_t pa); -vm_page_t vm_phys_alloc_contig(unsigned long npages, - vm_paddr_t low, vm_paddr_t high, - unsigned long alignment, unsigned long boundary); +vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, + u_long alignment, vm_paddr_t boundary); vm_page_t vm_phys_alloc_freelist_pages(int flind, int pool, int order); vm_page_t vm_phys_alloc_pages(int pool, int order); -vm_paddr_t vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment); +void vm_phys_free_contig(vm_page_t m, u_long npages); int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, vm_memattr_t memattr); void vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end); Index: sys/vm/vm_reserv.c =================================================================== --- sys/vm/vm_reserv.c (revision 261896) +++ sys/vm/vm_reserv.c (working copy) @@ -630,7 +630,7 @@ vm_reserv_reclaim_inactive(void) */ boolean_t vm_reserv_reclaim_contig(vm_paddr_t size, vm_paddr_t low, vm_paddr_t high, - unsigned long alignment, unsigned long boundary) + u_long alignment, vm_paddr_t boundary) { vm_paddr_t pa, pa_length; vm_reserv_t rv; Index: sys/vm/vm_reserv.h =================================================================== --- sys/vm/vm_reserv.h (revision 261896) +++ sys/vm/vm_reserv.h (working copy) @@ -49,8 +49,7 @@ void vm_reserv_init(void); int vm_reserv_level_iffullpop(vm_page_t m); boolean_t vm_reserv_reactivate_page(vm_page_t m); boolean_t vm_reserv_reclaim_contig(vm_paddr_t size, vm_paddr_t low, - vm_paddr_t high, unsigned long alignment, - unsigned long boundary); + vm_paddr_t high, u_long alignment, vm_paddr_t boundary); boolean_t vm_reserv_reclaim_inactive(void); void vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object, vm_pindex_t old_object_offset); Index: sys/mips/mips/pmap.c =================================================================== --- sys/mips/mips/pmap.c (revision 261896) +++ sys/mips/mips/pmap.c (working copy) @@ -1073,7 +1073,8 @@ pmap_alloc_direct_page(unsigned int index, int req { vm_page_t m; - m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, req); + m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, req | VM_ALLOC_WIRED | + VM_ALLOC_ZERO); if (m == NULL) return (NULL); @@ -1081,8 +1082,6 @@ pmap_alloc_direct_page(unsigned int index, int req pmap_zero_page(m); m->pindex = index; - atomic_add_int(&cnt.v_wire_count, 1); - m->wire_count = 1; return (m); } Index: sys/powerpc/aim/slb.c =================================================================== --- sys/powerpc/aim/slb.c (revision 261896) +++ sys/powerpc/aim/slb.c (working copy) @@ -40,7 +40,6 @@ #include #include #include -#include #include #include @@ -478,15 +477,22 @@ slb_uma_real_alloc(uma_zone_t zone, int bytes, u_i static vm_offset_t realmax = 0; void *va; vm_page_t m; + int pflags; if (realmax == 0) realmax = platform_real_maxaddr(); *flags = UMA_SLAB_PRIV; + if ((wait & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT) + pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; + else + pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; + if (wait & M_ZERO) + pflags |= VM_ALLOC_ZERO; for (;;) { - m = vm_phys_alloc_contig(1, 0, realmax, PAGE_SIZE, - PAGE_SIZE); + m = vm_page_alloc_contig(NULL, 0, pflags, 1, 0, realmax, + PAGE_SIZE, PAGE_SIZE, VM_MEMATTR_DEFAULT); if (m == NULL) { if (wait & M_NOWAIT) return (NULL); @@ -503,10 +509,6 @@ slb_uma_real_alloc(uma_zone_t zone, int bytes, u_i if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) bzero(va, PAGE_SIZE); - /* vm_phys_alloc_contig does not track wiring */ - atomic_add_int(&cnt.v_wire_count, 1); - m->wire_count = 1; - return (va); } Index: sys/kern/kern_malloc.c =================================================================== --- sys/kern/kern_malloc.c (revision 261896) +++ sys/kern/kern_malloc.c (working copy) @@ -406,6 +406,43 @@ malloc_type_freed(struct malloc_type *mtp, unsigne } /* + * contigmalloc: + * + * Allocate a block of physically contiguous memory. + * + * If M_NOWAIT is set, this routine will not block and return NULL if + * the allocation fails. + */ +void * +contigmalloc(unsigned long size, struct malloc_type *type, int flags, + vm_paddr_t low, vm_paddr_t high, unsigned long alignment, + unsigned long boundary) +{ + void *ret; + + ret = (void *)kmem_alloc_contig(kernel_map, size, flags, low, high, + alignment, boundary, VM_MEMATTR_DEFAULT); + if (ret != NULL) + malloc_type_allocated(type, round_page(size)); + return (ret); +} + +/* + * contigfree: + * + * Free a block of memory allocated by contigmalloc. + * + * This routine may not block. + */ +void +contigfree(void *addr, unsigned long size, struct malloc_type *type) +{ + + kmem_free(kernel_map, (vm_offset_t)addr, size); + malloc_type_freed(type, round_page(size)); +} + +/* * malloc: * * Allocate a block of memory.