diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index 5440671..398829c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -428,6 +428,7 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) continue; } vm_page_sbusy(pp); +#ifdef VM_PAGE_CACHE } else if (pp == NULL) { pp = vm_page_alloc(obj, OFF_TO_IDX(start), VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | @@ -435,6 +436,7 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) } else { ASSERT(pp != NULL && !pp->valid); pp = NULL; +#endif } if (pp != NULL) { diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index ae8adae..9aae82d 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1528,11 +1528,17 @@ swp_pager_async_iodone(struct buf *bp) " protected", m)); vm_page_undirty(m); vm_page_sunbusy(m); +#ifdef VM_PAGE_CACHE if (vm_page_count_severe()) { vm_page_lock(m); vm_page_try_to_cache(m); vm_page_unlock(m); } +#else + vm_page_lock(m); + vm_page_deactivate_noreuse(m); + vm_page_unlock(m); +#endif } } diff --git a/sys/vm/uma.h b/sys/vm/uma.h index 6ac78ef..8313a277 100644 --- a/sys/vm/uma.h +++ b/sys/vm/uma.h @@ -242,7 +242,7 @@ uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor, * Definitions for uma_zcreate flags * * These flags share space with UMA_ZFLAGs in uma_int.h. Be careful not to - * overlap when adding new features. 0xf0000000 is in use by uma_int.h. + * overlap when adding new features. 0xff000000 is in use by uma_int.h. */ #define UMA_ZONE_PAGEABLE 0x0001 /* Return items not fully backed by physical memory XXX Not yet */ @@ -262,7 +262,7 @@ uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor, * information in the vm_page. */ #define UMA_ZONE_SECONDARY 0x0200 /* Zone is a Secondary Zone */ -/* 0x0400 Unused */ +#define UMA_ZONE_NOBUCKETCACHE 0x0400 /* Zone does not cache buckets */ #define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets */ #define UMA_ZONE_CACHESPREAD 0x1000 /* * Spread memory start locations across diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index bcc895e..3c49ad8 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -2273,8 +2273,16 @@ zalloc_start: */ if (cache->uc_allocbucket == NULL) cache->uc_allocbucket = bucket; - else + else if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) == 0) LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link); + else { + critical_exit(); + ZONE_UNLOCK(zone); + bucket_drain(zone, bucket); + bucket_free(zone, bucket, udata); + critical_enter(); + goto zalloc_start; + } ZONE_UNLOCK(zone); goto zalloc_start; } @@ -2751,17 +2759,6 @@ zfree_start: } cache->uc_freebucket = NULL; - /* Can we throw this on the zone full list? */ - if (bucket != NULL) { -#ifdef UMA_DEBUG_ALLOC - printf("uma_zfree: Putting old bucket on the free list.\n"); -#endif - /* ub_cnt is pointing to the last free item */ - KASSERT(bucket->ub_cnt != 0, - ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); - LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link); - } - /* We are no longer associated with this CPU. */ critical_exit(); @@ -2771,7 +2768,30 @@ zfree_start: */ if (lockfail && zone->uz_count < BUCKET_MAX) zone->uz_count++; - ZONE_UNLOCK(zone); + + /* Can we throw this on the zone full list? */ + if (bucket != NULL) { +#ifdef UMA_DEBUG_ALLOC + printf("uma_zfree: Putting old bucket on the free list.\n"); +#endif + /* ub_cnt is pointing to the last free item */ + KASSERT(bucket->ub_cnt != 0, + ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); + if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) == 0) { + LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link); + ZONE_UNLOCK(zone); + } else { + /* + * Free the bucket instead of reusing it so that per-CPU + * caches are properly sized. + */ + ZONE_UNLOCK(zone); + bucket_drain(zone, bucket); + bucket_free(zone, bucket, udata); + } + } else { + ZONE_UNLOCK(zone); + } #ifdef UMA_DEBUG_ALLOC printf("uma_zfree: Allocating new free bucket.\n"); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 4a0479b..d8c9b26 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1131,6 +1131,7 @@ shadowlookup: } else if ((tobject->flags & OBJ_UNMANAGED) != 0) goto unlock_tobject; m = vm_page_lookup(tobject, tpindex); +#ifdef VM_PAGE_CACHE if (m == NULL && advise == MADV_WILLNEED) { /* * If the page is cached, reactivate it. @@ -1138,6 +1139,7 @@ shadowlookup: m = vm_page_alloc(tobject, tpindex, VM_ALLOC_IFCACHED | VM_ALLOC_NOBUSY); } +#endif if (m == NULL) { /* * There may be swap even if there is no backing page diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 2f3b17f..222dedb 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -152,6 +152,7 @@ SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD | /* Is the page daemon waiting for free pages? */ static int vm_pageout_pages_needed; +static uma_zone_t cachepg_zones[VM_NFREEPOOL]; /* XXX should be per-domain */ static uma_zone_t fakepg_zone; static struct vnode *vm_page_alloc_init(vm_page_t m); @@ -160,14 +161,14 @@ static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(uint8_t queue, vm_page_t m); static void vm_page_free_wakeup(void); static void vm_page_init_fakepg(void *dummy); +static int vm_page_import(void *arg, void **store, int cnt, int flags); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, vm_paddr_t high); - -SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init_fakepg, NULL); +static void vm_page_release(void *arg, void **store, int cnt); static void vm_page_init_fakepg(void *dummy) @@ -176,6 +177,33 @@ vm_page_init_fakepg(void *dummy) fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); } +SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init_fakepg, NULL); + +/* + * The cache page zone is initialized later since we need to be able to allocate + * pages before UMA is fully initialized. + * + * XXX I think there should be one cache zone per domain where each zone imports + * pages from its corresponding domain. Then we would use the thread's domain + * iterator to choose the zone. + * XXX How would this all work with NUMA-aware UMA? + */ +static void +vm_page_init_cache_zones(void *dummy __unused) +{ + int pind; + + for (pind = 0; pind < VM_NFREEPOOL; pind++) + /* + * XXX it's rather silly that cache zones use the item size to + * size buckets.. + */ + cachepg_zones[pind] = uma_zcache_create("cachepg", + sizeof(struct vm_page), NULL, NULL, NULL, NULL, + vm_page_import, vm_page_release, (void *)(uintptr_t)pind, + UMA_ZONE_NOBUCKETCACHE | UMA_ZONE_VM); +} +SYSINIT(vm_page2, SI_SUB_VM_CONF, SI_ORDER_ANY, vm_page_init_cache_zones, NULL); /* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */ #if PAGE_SIZE == 32768 @@ -1457,6 +1485,7 @@ vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex) return (vm_radix_lookup(&object->cache, pindex)); } +#ifdef VM_PAGE_CACHE /* * Remove the given cached page from its containing object's * collection of cached pages. @@ -1474,6 +1503,7 @@ vm_page_cache_remove(vm_page_t m) m->object = NULL; vm_cnt.v_cache_count--; } +#endif /* * Transfer all of the cached pages with offset greater than or @@ -1583,7 +1613,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) struct vnode *vp = NULL; vm_object_t m_object; vm_page_t m, mpred; - int flags, req_class; + int flags, req_class, zi; mpred = 0; /* XXX: pacify gcc */ KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && @@ -1610,6 +1640,19 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) } /* + * Try to allocate from per-CPU caches if we're not going to be + * attempting an allocation from a reservation. + */ + if (object == NULL || (object->flags & OBJ_COLORED) == 0) { + zi = object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT; + if (__predict_true(cachepg_zones[zi] != NULL)) { + m = uma_zalloc(cachepg_zones[zi], M_NOWAIT); + if (m != NULL) + goto gotpage; + } + } + + /* * The page allocation request can came from consumers which already * hold the free page queue mutex, like vm_page_insert() in * vm_page_cache(). @@ -1624,6 +1667,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) * Allocate from the free queue if the number of free pages * exceeds the minimum for the request class. */ +#ifdef VM_PAGE_CACHE if (object != NULL && (m = vm_page_cache_lookup(object, pindex)) != NULL) { if ((req & VM_ALLOC_IFNOTCACHED) != 0) { @@ -1659,6 +1703,25 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) } #endif } +#else /* VM_PAGE_CACHE */ +#if VM_NRESERVLEVEL > 0 + if (object == NULL || (object->flags & (OBJ_COLORED | + OBJ_FICTITIOUS)) != OBJ_COLORED || (m = + vm_reserv_alloc_page(object, pindex, mpred)) == NULL) { +#else + } else { +#endif + m = vm_phys_alloc_pages(object != NULL ? + VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); +#if VM_NRESERVLEVEL > 0 + if (m == NULL && vm_reserv_reclaim_inactive()) { + m = vm_phys_alloc_pages(object != NULL ? + VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, + 0); + } +#endif + } +#endif /* !VM_PAGE_CACHE */ } else { /* * Not allocatable, give up. @@ -1684,6 +1747,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("vm_page_alloc: page %p has unexpected memattr %d", m, pmap_page_get_memattr(m))); +#ifdef VM_PAGE_CACHE if ((m->flags & PG_CACHED) != 0) { KASSERT((m->flags & PG_ZERO) == 0, ("vm_page_alloc: cached page %p is PG_ZERO", m)); @@ -1705,8 +1769,14 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) if ((m->flags & PG_ZERO) != 0) vm_page_zero_count--; } +#else /* VM_PAGE_CACHE */ + (void)m_object; + (void)vp; + (void)vm_page_alloc_init(m); +#endif mtx_unlock(&vm_page_queue_free_mtx); +gotpage: /* * Initialize the page. Only the PG_ZERO flag is inherited. */ @@ -1737,9 +1807,11 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) if (object != NULL) { if (vm_page_insert_after(m, object, pindex, mpred)) { +#ifdef VM_PAGE_CACHE /* See the comment below about hold count. */ if (vp != NULL) vdrop(vp); +#endif pagedaemon_wakeup(); if (req & VM_ALLOC_WIRED) { atomic_subtract_int(&vm_cnt.v_wire_count, 1); @@ -1758,6 +1830,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) } else m->pindex = pindex; +#ifdef VM_PAGE_CACHE /* * The following call to vdrop() must come after the above call * to vm_page_insert() in case both affect the same object and @@ -1766,6 +1839,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) */ if (vp != NULL) vdrop(vp); +#endif /* * Don't wakeup too often - wakeup the pageout daemon when @@ -1777,6 +1851,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) return (m); } +#ifdef VM_PAGE_CACHE static void vm_page_alloc_contig_vdrop(struct spglist *lst) { @@ -1786,6 +1861,7 @@ vm_page_alloc_contig_vdrop(struct spglist *lst) SLIST_REMOVE_HEAD(lst, plinks.s.ss); } } +#endif /* * vm_page_alloc_contig: @@ -1881,6 +1957,7 @@ retry: } if (m_ret != NULL) for (m = m_ret; m < &m_ret[npages]; m++) { +#ifdef VM_PAGE_ALLOC drop = vm_page_alloc_init(m); if (drop != NULL) { /* @@ -1890,6 +1967,10 @@ retry: SLIST_INSERT_HEAD(&deferred_vdrop_list, m, plinks.s.ss); } +#else + (void)drop; + (void)vm_page_alloc_init(m); +#endif } else { #if VM_NRESERVLEVEL > 0 @@ -1933,8 +2014,10 @@ retry: m->oflags = VPO_UNMANAGED; if (object != NULL) { if (vm_page_insert(m, object, pindex)) { +#ifdef VM_PAGE_CACHE vm_page_alloc_contig_vdrop( &deferred_vdrop_list); +#endif if (vm_paging_needed()) pagedaemon_wakeup(); if ((req & VM_ALLOC_WIRED) != 0) @@ -1956,7 +2039,9 @@ retry: pmap_page_set_memattr(m, memattr); pindex++; } +#ifdef VM_PAGE_CACHE vm_page_alloc_contig_vdrop(&deferred_vdrop_list); +#endif if (vm_paging_needed()) pagedaemon_wakeup(); return (m_ret); @@ -1992,6 +2077,7 @@ vm_page_alloc_init(vm_page_t m) m, pmap_page_get_memattr(m))); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); drop = NULL; +#ifdef VM_PAGE_CACHE if ((m->flags & PG_CACHED) != 0) { KASSERT((m->flags & PG_ZERO) == 0, ("vm_page_alloc_init: cached page %p is PG_ZERO", m)); @@ -2009,6 +2095,16 @@ vm_page_alloc_init(vm_page_t m) vm_page_zero_count--; } return (drop); +#else + KASSERT(m->valid == 0, + ("vm_page_alloc_init: free page %p is valid", m)); + vm_phys_freecnt_adj(m, -1); + if ((m->flags & PG_ZERO) != 0) + vm_page_zero_count--; + (void)m_object; + (void)drop; + return (NULL); +#endif } /* @@ -2068,7 +2164,12 @@ vm_page_alloc_freelist(int flind, int req) mtx_unlock(&vm_page_queue_free_mtx); return (NULL); } +#ifdef VM_PAGE_CACHE drop = vm_page_alloc_init(m); +#else + (void)drop; + (void)vm_page_alloc_init(m); +#endif mtx_unlock(&vm_page_queue_free_mtx); /* @@ -2089,13 +2190,57 @@ vm_page_alloc_freelist(int flind, int req) } /* Unmanaged pages don't use "act_count". */ m->oflags = VPO_UNMANAGED; +#ifdef VM_PAGE_CACHE if (drop != NULL) vdrop(drop); +#endif if (vm_paging_needed()) pagedaemon_wakeup(); return (m); } +static int +vm_page_import(void *arg, void **store, int cnt, int flags) +{ + vm_page_t m; + int i, pind; + + pind = (int)arg; + KASSERT(pind >= 0 && pind < VM_NFREEPOOL, + ("vm_page_import: invalid freepool index %d", pind)); + + mtx_lock_flags(&vm_page_queue_free_mtx, /* XXX */ MTX_RECURSE); + for (i = 0; i < cnt; i++) { + m = vm_phys_alloc_pages(pind, 0); + if (m == NULL) + break; + vm_phys_freecnt_adj(m, -1); + store[i] = m; + } + mtx_unlock(&vm_page_queue_free_mtx); + return (i); +} + +static void +vm_page_release(void *arg __unused, void **store, int cnt) +{ + vm_page_t m; + int i; + + mtx_lock_flags(&vm_page_queue_free_mtx, /* XXX */ MTX_RECURSE); + for (i = 0; i < cnt; i++) { + m = (vm_page_t)store[i]; + vm_phys_freecnt_adj(m, 1); +#if VM_NRESERVLEVEL > 0 + if (!vm_reserv_free_page(m)) + vm_phys_free_pages(m, 0); +#else + vm_phys_free_pages(m, 0); +#endif + } + mtx_unlock(&vm_page_queue_free_mtx); +} + #define VPSC_ANY 0 /* No restrictions. */ #define VPSC_NORESERV 1 /* Skip reservations; implies VPSC_NOSUPER. */ #define VPSC_NOSUPER 2 /* Skip superpages. */ @@ -2216,8 +2361,9 @@ retry: /* Don't care: PG_NODUMP, PG_WINATCFLS, PG_ZERO. */ if (object->type != OBJT_DEFAULT && object->type != OBJT_SWAP && - object->type != OBJT_VNODE) + object->type != OBJT_VNODE) { run_ext = 0; +#ifdef VM_PAGE_CACHE else if ((m->flags & PG_CACHED) != 0 || m != vm_page_lookup(object, m->pindex)) { /* @@ -2248,6 +2394,7 @@ retry: m_inc = 1 << order; } else run_ext = 0; +#endif /* VM_PAGE_CACHE */ #if VM_NRESERVLEVEL > 0 } else if ((options & VPSC_NOSUPER) != 0 && (level = vm_reserv_level_iffullpop(m)) >= 0) { @@ -2414,6 +2561,7 @@ retry: object->type != OBJT_SWAP && object->type != OBJT_VNODE) error = EINVAL; +#ifdef VM_PAGE_CACHE else if ((m->flags & PG_CACHED) != 0 || m != vm_page_lookup(object, m->pindex)) { /* @@ -2422,7 +2570,9 @@ retry: */ VM_OBJECT_WUNLOCK(object); goto cached; - } else if (object->memattr != VM_MEMATTR_DEFAULT) + } +#endif + else if (object->memattr != VM_MEMATTR_DEFAULT) error = EINVAL; else if (m->queue != PQ_NONE && !vm_page_busied(m)) { KASSERT(pmap_page_get_memattr(m) == @@ -2523,7 +2673,9 @@ retry: unlock: VM_OBJECT_WUNLOCK(object); } else { +#ifdef VM_PAGE_CACHE cached: +#endif mtx_lock(&vm_page_queue_free_mtx); order = m->order; if (order < VM_NFREEORDER) { @@ -2943,6 +3095,11 @@ vm_page_cache_turn_free(vm_page_t m) void vm_page_free_toq(vm_page_t m) { + bool cacheable; + +#ifndef VM_PAGE_CACHE + MPASS((m->flags & PG_CACHED) == 0); +#endif if ((m->oflags & VPO_UNMANAGED) == 0) { vm_page_lock_assert(m, MA_OWNED); @@ -2956,6 +3113,13 @@ vm_page_free_toq(vm_page_t m) if (vm_page_sbusied(m)) panic("vm_page_free: freeing busy page %p", m); +#ifndef VM_PAGE_CACHE + cacheable = m->object == NULL || + (m->object->flags & OBJ_COLORED) == 0; +#else + cacheable = false; +#endif + /* * Unqueue, then remove page. Note that we cannot destroy * the page here because we do not want to call the pager's @@ -2990,6 +3154,11 @@ vm_page_free_toq(vm_page_t m) if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); + if (cacheable) { + uma_zfree(cachepg_zones[m->pool], m); + return; + } + /* * Insert the page into the physical memory allocator's * cache/free page queues.