diff --git a/sys/amd64/amd64/uma_machdep.c b/sys/amd64/amd64/uma_machdep.c index c0fb5017ab23..024c4337d0bc 100644 --- a/sys/amd64/amd64/uma_machdep.c +++ b/sys/amd64/amd64/uma_machdep.c @@ -41,7 +41,8 @@ __FBSDID("$FreeBSD$"); #include void * -uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, int bytes, int domain, u_int8_t *flags, + int wait) { vm_page_t m; vm_paddr_t pa; @@ -51,7 +52,7 @@ uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) *flags = UMA_SLAB_PRIV; pflags = malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; for (;;) { - m = vm_page_alloc(NULL, 0, pflags); + m = vm_page_alloc_domain(NULL, 0, domain, pflags); if (m == NULL) { if (wait & M_NOWAIT) return (NULL); diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index e925177122c8..674f9d2ad47d 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -339,7 +339,8 @@ static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); static void pmap_pte_release(pt_entry_t *pte); static int pmap_unuse_pt(pmap_t, vm_offset_t, struct spglist *); #ifdef PAE -static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, int domain, + u_int8_t *flags, int wait); #endif static void pmap_set_pg(void); @@ -648,7 +649,8 @@ pmap_page_init(vm_page_t m) #ifdef PAE static void * -pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +pmap_pdpt_allocf(uma_zone_t zone, int bytes, int domain, u_int8_t *flags, + int wait) { /* Inform UMA that this allocator uses kernel_map/object. */ diff --git a/sys/ia64/ia64/uma_machdep.c b/sys/ia64/ia64/uma_machdep.c index 29a45eaa9e7e..81dc81170f65 100644 --- a/sys/ia64/ia64/uma_machdep.c +++ b/sys/ia64/ia64/uma_machdep.c @@ -40,7 +40,8 @@ __FBSDID("$FreeBSD$"); #include void * -uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, int bytes, int domain, u_int8_t *flags, + int wait) { void *va; vm_page_t m; diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c index 119e4b832525..7221e666aa41 100644 --- a/sys/kern/kern_mbuf.c +++ b/sys/kern/kern_mbuf.c @@ -284,7 +284,7 @@ static int mb_zinit_pack(void *, int, int); static void mb_zfini_pack(void *, int); static void mb_reclaim(void *); -static void *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int); +static void *mbuf_jumbo_alloc(uma_zone_t, int, int, uint8_t *, int); /* Ensure that MSIZE is a power of 2. */ CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); @@ -389,7 +389,8 @@ SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); * pages. */ static void * -mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait) +mbuf_jumbo_alloc(uma_zone_t zone, int bytes, int domain, uint8_t *flags, + int wait) { /* Inform UMA that this allocator uses kernel_map/object. */ diff --git a/sys/kern/subr_busdma_bufalloc.c b/sys/kern/subr_busdma_bufalloc.c index a80a233f53b5..90df3cdfc981 100644 --- a/sys/kern/subr_busdma_bufalloc.c +++ b/sys/kern/subr_busdma_bufalloc.c @@ -147,8 +147,8 @@ busdma_bufalloc_findzone(busdma_bufalloc_t ba, bus_size_t size) } void * -busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, int size, u_int8_t *pflag, - int wait) +busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, int size, int domain, + u_int8_t *pflag, int wait) { #ifdef VM_MEMATTR_UNCACHEABLE diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c index d9eaadae78cb..78cbcd321b72 100644 --- a/sys/kern/subr_vmem.c +++ b/sys/kern/subr_vmem.c @@ -495,7 +495,7 @@ bt_insfree(vmem_t *vm, bt_t *bt) * Import from the arena into the quantum cache in UMA. */ static int -qc_import(void *arg, void **store, int cnt, int flags) +qc_import(void *arg, void **store, int cnt, int domain, int flags) { qcache_t *qc; vmem_addr_t addr; @@ -609,7 +609,7 @@ static struct mtx_padalign vmem_bt_lock; * we are really out of KVA. */ static void * -vmem_bt_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait) +vmem_bt_alloc(uma_zone_t zone, int bytes, int domain, uint8_t *pflag, int wait) { vmem_addr_t addr; diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index 414bd89fb51c..25de505f1961 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -1047,7 +1047,7 @@ pmap_alloc_direct_page(unsigned int index, int req) { vm_page_t m; - m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, req | VM_ALLOC_WIRED | + m = vm_page_alloc_freelist(0, VM_FREELIST_DIRECT, req | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (m == NULL) return (NULL); @@ -1581,7 +1581,7 @@ retry: } } /* No free items, allocate another chunk */ - m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, VM_ALLOC_NORMAL | + m = vm_page_alloc_freelist(0, VM_FREELIST_DIRECT, VM_ALLOC_NORMAL | VM_ALLOC_WIRED); if (m == NULL) { if (try) { diff --git a/sys/mips/mips/uma_machdep.c b/sys/mips/mips/uma_machdep.c index e70dded424ab..1ead1a45ae58 100644 --- a/sys/mips/mips/uma_machdep.c +++ b/sys/mips/mips/uma_machdep.c @@ -41,7 +41,8 @@ __FBSDID("$FreeBSD$"); #include void * -uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, int bytes, int domain, u_int8_t *flags, + int wait) { vm_paddr_t pa; vm_page_t m; @@ -52,7 +53,7 @@ uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; for (;;) { - m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, pflags); + m = vm_page_alloc_freelist(domain, VM_FREELIST_DIRECT, pflags); if (m == NULL) { if (wait & M_NOWAIT) return (NULL); diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index b47b94d14a99..5fffed2e3370 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -1439,7 +1439,8 @@ retry: static mmu_t installed_mmu; static void * -moea64_uma_page_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +moea64_uma_page_alloc(uma_zone_t zone, int bytes, int domain, u_int8_t *flags, + int wait) { /* * This entire routine is a horrible hack to avoid bothering kmem diff --git a/sys/powerpc/aim/slb.c b/sys/powerpc/aim/slb.c index 9d60b2bc018f..2bb5c29dcfb7 100644 --- a/sys/powerpc/aim/slb.c +++ b/sys/powerpc/aim/slb.c @@ -473,7 +473,8 @@ slb_insert_user(pmap_t pm, struct slb *slb) } static void * -slb_uma_real_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +slb_uma_real_alloc(uma_zone_t zone, int bytes, int domain, u_int8_t *flags, + int wait) { static vm_offset_t realmax = 0; void *va; diff --git a/sys/powerpc/aim/uma_machdep.c b/sys/powerpc/aim/uma_machdep.c index 4df562b48f6d..4133c3e1b214 100644 --- a/sys/powerpc/aim/uma_machdep.c +++ b/sys/powerpc/aim/uma_machdep.c @@ -50,7 +50,8 @@ SYSCTL_INT(_hw, OID_AUTO, uma_mdpages, CTLFLAG_RD, &hw_uma_mdpages, 0, "UMA MD pages in use"); void * -uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, int bytes, int domain, u_int8_t *flags, + int wait) { void *va; vm_page_t m; diff --git a/sys/sparc64/sparc64/vm_machdep.c b/sys/sparc64/sparc64/vm_machdep.c index 8615aa2fd9dc..0efae6736b8e 100644 --- a/sys/sparc64/sparc64/vm_machdep.c +++ b/sys/sparc64/sparc64/vm_machdep.c @@ -502,7 +502,8 @@ swi_vm(void *v) } void * -uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, int bytes, int domain, u_int8_t *flags, + int wait) { vm_paddr_t pa; vm_page_t m; diff --git a/sys/sys/busdma_bufalloc.h b/sys/sys/busdma_bufalloc.h index f5ec32f41380..e72a4fb1c0e3 100644 --- a/sys/sys/busdma_bufalloc.h +++ b/sys/sys/busdma_bufalloc.h @@ -111,7 +111,7 @@ struct busdma_bufzone * busdma_bufalloc_findzone(busdma_bufalloc_t ba, * you can probably use these when you need uncacheable buffers. */ void * busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, int size, - u_int8_t *pflag, int wait); + int domain, u_int8_t *pflag, int wait); void busdma_bufalloc_free_uncacheable(void *item, int size, u_int8_t pflag); #endif /* _MACHINE_BUSDMA_BUFALLOC_H_ */ diff --git a/sys/sys/proc.h b/sys/sys/proc.h index fbd064cd6769..5e7c34788b75 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -274,7 +274,6 @@ struct thread { pid_t td_dbg_forked; /* (c) Child pid for debugger. */ u_int td_vp_reserv; /* (k) Count of reserved vnodes. */ int td_no_sleeping; /* (k) Sleeping disabled count. */ - int td_dom_rr_idx; /* (k) RR Numa domain selection. */ #define td_endzero td_sigmask /* Copied during fork1() or create_thread(). */ diff --git a/sys/vm/_vm_domain.h b/sys/vm/_vm_domain.h new file mode 100644 index 000000000000..ba8f5eea8e10 --- /dev/null +++ b/sys/vm/_vm_domain.h @@ -0,0 +1,78 @@ +/*- + * Copyright (c) 2014, Jeffrey Roberson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VM__DOMAIN_H_ +#define _VM__DOMAIN_H_ + +#include +#include + +#ifdef _KERNEL +#define VM_DOMAIN_SETSIZE MAXMEMDOM +#endif + +#define VM_DOMAIN_MAXSIZE 64 + +#ifndef VM_DOMAIN_SETSIZE +#define VM_DOMAIN_SETSIZE VM_DOMAIN_MAXSIZE +#endif + +#define _NVM_DOMAINBITS _BITSET_BITS +#define _NVM_DOMAINWORDS __bitset_words(VM_DOMAIN_SETSIZE) + +BITSET_DEFINE(_vm_domainset, VM_DOMAIN_SETSIZE); +typedef struct _vm_domainset vm_domainset_t; + +#define VM_DOMAIN_FSET BITSET_FSET(_NVM_DOMAINWORDS) +#define VM_DOMAIN_T_INITIALIZER BITSET_T_INITIALIZER +#define VM_DOMAIN_SETBUFSIZ ((2 + sizeof(long) * 2) * _NVM_DOMAINWORDS) + +#ifdef _KERNEL + +/* + * Valid memory domain (NUMA) policy values. + */ +enum vm_domain_policy { + ROUNDROBIN, /* Select between any in the set. */ + FIRSTTOUCH /* Select the current domain. */ +}; + +/* + * The select structure encapsulate domain allocation strategy with + * allocator information. + */ +struct vm_domain_select { + vm_domainset_t ds_mask; /* bitmask of valid domains. */ + enum vm_domain_policy ds_policy; /* Allocation policy. */ + int ds_cursor; /* Allocation cursor. */ + int ds_count; /* Domains in policy. */ +}; + +#endif /* _KERNEL */ + +#endif /* !_VM__DOMAIN_H_ */ diff --git a/sys/vm/uma.h b/sys/vm/uma.h index 5012d98fbdc3..6f7d71aa9bc8 100644 --- a/sys/vm/uma.h +++ b/sys/vm/uma.h @@ -45,6 +45,7 @@ /* Types and type defs */ struct uma_zone; +struct vm_domain_select; /* Opaque type used as a handle to the zone */ typedef struct uma_zone * uma_zone_t; @@ -126,7 +127,8 @@ typedef void (*uma_fini)(void *mem, int size); /* * Import new memory into a cache zone. */ -typedef int (*uma_import)(void *arg, void **store, int count, int flags); +typedef int (*uma_import)(void *arg, void **store, int count, int domain, + int flags); /* * Free memory from a cache zone. @@ -365,16 +367,12 @@ uma_zfree(uma_zone_t zone, void *item) } /* - * XXX The rest of the prototypes in this header are h0h0 magic for the VM. - * If you think you need to use it for a normal zone you're probably incorrect. - */ - -/* * Backend page supplier routines * * Arguments: * zone The zone that is requesting pages. * size The number of bytes being requested. + * domain The NUMA domain we prefer for this allocation. * pflag Flags for these memory pages, see below. * wait Indicates our willingness to block. * @@ -382,7 +380,8 @@ uma_zfree(uma_zone_t zone, void *item) * A pointer to the allocated memory or NULL on failure. */ -typedef void *(*uma_alloc)(uma_zone_t zone, int size, uint8_t *pflag, int wait); +typedef void *(*uma_alloc)(uma_zone_t zone, int size, int domain, + uint8_t *pflag, int wait); /* * Backend page free routines @@ -397,8 +396,6 @@ typedef void *(*uma_alloc)(uma_zone_t zone, int size, uint8_t *pflag, int wait); */ typedef void (*uma_free)(void *item, int size, uint8_t pflag); - - /* * Sets up the uma allocator. (Called by vm_mem_init) * @@ -584,6 +581,19 @@ void uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf); void uma_zone_set_freef(uma_zone_t zone, uma_free freef); /* + * XXX + * + * Arguments: + * zone The zone NUMA policy is being installed. + * sel Selector of the NUMA policy requested. + * + * Returns: + * Nothing + */ +void uma_zone_set_domain_selector(uma_zone_t zone, + struct vm_domain_select *sel); + +/* * These flags are setable in the allocf and visible in the freef. */ #define UMA_SLAB_BOOT 0x01 /* Slab alloced from boot pages */ diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 335035693b9c..646b8c56f493 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -80,10 +80,12 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include #include +#include #include #include #include @@ -226,11 +228,11 @@ enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI }; /* Prototypes.. */ -static void *noobj_alloc(uma_zone_t, int, uint8_t *, int); -static void *page_alloc(uma_zone_t, int, uint8_t *, int); -static void *startup_alloc(uma_zone_t, int, uint8_t *, int); +static void *noobj_alloc(uma_zone_t, int, int, uint8_t *, int); +static void *page_alloc(uma_zone_t, int, int, uint8_t *, int); +static void *startup_alloc(uma_zone_t, int, int, uint8_t *, int); static void page_free(void *, int, uint8_t); -static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int); +static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int); static void cache_drain(uma_zone_t); static void bucket_drain(uma_zone_t, uma_bucket_t); static void bucket_cache_drain(uma_zone_t zone); @@ -248,23 +250,23 @@ static int hash_expand(struct uma_hash *, struct uma_hash *); static void hash_free(struct uma_hash *hash); static void uma_timeout(void *); static void uma_startup3(void); -static void *zone_alloc_item(uma_zone_t, void *, int); +static void *zone_alloc_item(uma_zone_t, void *, int, int); static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip); static void bucket_enable(void); static void bucket_init(void); static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int); static void bucket_free(uma_zone_t zone, uma_bucket_t, void *); static void bucket_zone_drain(void); -static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags); -static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags); -static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags); +static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int); +static uma_slab_t zone_fetch_slab(uma_zone_t, uma_keg_t, int, int); +static uma_slab_t zone_fetch_slab_multi(uma_zone_t, uma_keg_t, int, int); static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab); static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item); static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, int align, uint32_t flags); -static int zone_import(uma_zone_t zone, void **bucket, int max, int flags); -static void zone_release(uma_zone_t zone, void **bucket, int cnt); -static void uma_zero_item(void *item, uma_zone_t zone); +static int zone_import(uma_zone_t, void **, int, int, int); +static void zone_release(uma_zone_t, void **, int); +static void uma_zero_item(void *, uma_zone_t); void uma_print_zone(uma_zone_t); void uma_print_stats(void); @@ -312,6 +314,7 @@ bucket_init(void) ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET); + uma_zone_set_domain_selector(ubz->ubz_zone, &vm_sel_ft); } } @@ -539,7 +542,7 @@ hash_alloc(struct uma_hash *hash) } else { alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; hash->uh_slab_hash = zone_alloc_item(hashzone, NULL, - M_WAITOK); + UMA_ANYDOMAIN, M_WAITOK); hash->uh_hashsize = UMA_HASH_SIZE_INIT; } if (hash->uh_slab_hash) { @@ -705,17 +708,23 @@ cache_drain_safe_cpu(uma_zone_t zone) { uma_cache_t cache; uma_bucket_t b1, b2; + int domain; if (zone->uz_flags & UMA_ZFLAG_INTERNAL) return; b1 = b2 = NULL; + ZONE_LOCK(zone); critical_enter(); + if (zone->uz_sel == NULL) + domain = 0; + else + domain = vm_domain_select_first(zone->uz_sel); cache = &zone->uz_cpu[curcpu]; if (cache->uc_allocbucket) { if (cache->uc_allocbucket->ub_cnt != 0) - LIST_INSERT_HEAD(&zone->uz_buckets, + LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets, cache->uc_allocbucket, ub_link); else b1 = cache->uc_allocbucket; @@ -723,7 +732,7 @@ cache_drain_safe_cpu(uma_zone_t zone) } if (cache->uc_freebucket) { if (cache->uc_freebucket->ub_cnt != 0) - LIST_INSERT_HEAD(&zone->uz_buckets, + LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets, cache->uc_freebucket, ub_link); else b2 = cache->uc_freebucket; @@ -778,18 +787,22 @@ cache_drain_safe(uma_zone_t zone) static void bucket_cache_drain(uma_zone_t zone) { + uma_zone_domain_t zdom; uma_bucket_t bucket; + int i; /* - * Drain the bucket queues and free the buckets, we just keep two per - * cpu (alloc/free). + * Drain the bucket queues and free the buckets. */ - while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) { - LIST_REMOVE(bucket, ub_link); - ZONE_UNLOCK(zone); - bucket_drain(zone, bucket); - bucket_free(zone, bucket, NULL); - ZONE_LOCK(zone); + for (i = 0; i < vm_ndomains; i++) { + zdom = &zone->uz_domain[i]; + while ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) { + LIST_REMOVE(bucket, ub_link); + ZONE_UNLOCK(zone); + bucket_drain(zone, bucket); + bucket_free(zone, bucket, NULL); + ZONE_LOCK(zone); + } } /* @@ -834,8 +847,10 @@ static void keg_drain(uma_keg_t keg) { struct slabhead freeslabs = { 0 }; + uma_domain_t dom; uma_slab_t slab; uma_slab_t n; + int i; /* * We don't want to take pages from statically allocated kegs at this @@ -851,26 +866,30 @@ keg_drain(uma_keg_t keg) if (keg->uk_free == 0) goto finished; - slab = LIST_FIRST(&keg->uk_free_slab); - while (slab) { - n = LIST_NEXT(slab, us_link); + for (i = 0; i < vm_ndomains; i++) { + dom = &keg->uk_domain[i]; + slab = LIST_FIRST(&dom->ud_free_slab); + while (slab) { + n = LIST_NEXT(slab, us_link); - /* We have no where to free these to */ - if (slab->us_flags & UMA_SLAB_BOOT) { - slab = n; - continue; - } + /* We have no where to free these to */ + if (slab->us_flags & UMA_SLAB_BOOT) { + slab = n; + continue; + } - LIST_REMOVE(slab, us_link); - keg->uk_pages -= keg->uk_ppera; - keg->uk_free -= keg->uk_ipers; + LIST_REMOVE(slab, us_link); + keg->uk_pages -= keg->uk_ppera; + keg->uk_free -= keg->uk_ipers; - if (keg->uk_flags & UMA_ZONE_HASH) - UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data); + if (keg->uk_flags & UMA_ZONE_HASH) + UMA_HASH_REMOVE(&keg->uk_hash, slab, + slab->us_data); - SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); + SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); - slab = n; + slab = n; + } } finished: KEG_UNLOCK(keg); @@ -933,7 +952,7 @@ zone_drain(uma_zone_t zone) * caller specified M_NOWAIT. */ static uma_slab_t -keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) +keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait) { uma_slabrefcnt_t slabref; uma_alloc allocf; @@ -942,6 +961,8 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) uint8_t flags; int i; + KASSERT(domain >= 0 && domain < vm_ndomains, + ("keg_alloc_slab: domain %d out of range", domain)); mtx_assert(&keg->uk_lock, MA_OWNED); slab = NULL; mem = NULL; @@ -953,7 +974,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) KEG_UNLOCK(keg); if (keg->uk_flags & UMA_ZONE_OFFPAGE) { - slab = zone_alloc_item(keg->uk_slabzone, NULL, wait); + slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, wait); if (slab == NULL) goto out; } @@ -974,7 +995,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) wait |= M_NODUMP; /* zone is passed for legacy reasons. */ - mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait); + mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, domain, &flags, wait); if (mem == NULL) { if (keg->uk_flags & UMA_ZONE_OFFPAGE) zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE); @@ -998,6 +1019,18 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) #ifdef INVARIANTS BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree); #endif + /* + * Set the domain based on the first page. This may be incorrect + * for multi-page allocations depending on the numa_policy specified. + */ +#if MAXMEMDOM > 1 + if ((flags & UMA_SLAB_BOOT) == 0) + slab->us_domain = vm_phys_domain(PHYS_TO_VM_PAGE( + pmap_kextract((vm_offset_t)mem))); + else +#endif + slab->us_domain = 0; + if (keg->uk_flags & UMA_ZONE_REFCNT) { slabref = (uma_slabrefcnt_t)slab; for (i = 0; i < keg->uk_ipers; i++) @@ -1035,7 +1068,7 @@ out: * the VM is ready. */ static void * -startup_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait) +startup_alloc(uma_zone_t zone, int bytes, int domain, uint8_t *pflag, int wait) { uma_keg_t keg; uma_slab_t tmps; @@ -1080,7 +1113,7 @@ startup_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait) #else keg->uk_allocf = page_alloc; #endif - return keg->uk_allocf(zone, bytes, pflag, wait); + return keg->uk_allocf(zone, bytes, domain, pflag, wait); } /* @@ -1095,7 +1128,7 @@ startup_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait) * NULL if M_NOWAIT is set. */ static void * -page_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait) +page_alloc(uma_zone_t zone, int bytes, int domain, uint8_t *pflag, int wait) { void *p; /* Returned page */ @@ -1117,7 +1150,7 @@ page_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait) * NULL if M_NOWAIT is set. */ static void * -noobj_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait) +noobj_alloc(uma_zone_t zone, int bytes, int domain, uint8_t *flags, int wait) { TAILQ_HEAD(, vm_page) alloctail; u_long npages; @@ -1130,7 +1163,7 @@ noobj_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait) npages = howmany(bytes, PAGE_SIZE); while (npages > 0) { - p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | + p = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); if (p != NULL) { /* @@ -1410,6 +1443,7 @@ keg_ctor(void *mem, int size, void *udata, int flags) keg->uk_init = arg->uminit; keg->uk_fini = arg->fini; keg->uk_align = arg->align; + keg->uk_cursor = 0; keg->uk_free = 0; keg->uk_reserve = 0; keg->uk_pages = 0; @@ -1573,6 +1607,7 @@ zone_ctor(void *mem, int size, void *udata, int flags) zone->uz_sleeps = 0; zone->uz_count = 0; zone->uz_count_min = 0; + zone->uz_sel = NULL; zone->uz_flags = 0; zone->uz_warning = NULL; timevalclear(&zone->uz_ratecheck); @@ -1910,7 +1945,7 @@ uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align; args.flags = flags; args.zone = zone; - return (zone_alloc_item(kegs, &args, M_WAITOK)); + return (zone_alloc_item(kegs, &args, UMA_ANYDOMAIN, M_WAITOK)); } /* See uma.h */ @@ -1942,7 +1977,7 @@ uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor, args.flags = flags; args.keg = NULL; - return (zone_alloc_item(zones, &args, M_WAITOK)); + return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK)); } /* See uma.h */ @@ -1966,7 +2001,7 @@ uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, args.keg = keg; /* XXX Attaches only one keg of potentially many. */ - return (zone_alloc_item(zones, &args, M_WAITOK)); + return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK)); } /* See uma.h */ @@ -1990,7 +2025,7 @@ uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor, args.align = 0; args.flags = flags; - return (zone_alloc_item(zones, &args, M_WAITOK)); + return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK)); } static void @@ -2091,10 +2126,12 @@ uma_zdestroy(uma_zone_t zone) void * uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) { - void *item; - uma_cache_t cache; + uma_zone_domain_t zdom; uma_bucket_t bucket; + uma_cache_t cache; + void *item; int lockfail; + int domain; int cpu; /* This is the fast path allocation */ @@ -2196,8 +2233,10 @@ zalloc_start: bucket_free(zone, bucket, udata); /* Short-circuit for zones without buckets and low memory. */ - if (zone->uz_count == 0 || bucketdisable) + if (zone->uz_count == 0 || bucketdisable) { + domain = UMA_ANYDOMAIN; goto zalloc_item; + } /* * Attempt to retrieve the item from the per-CPU cache has failed, so @@ -2232,10 +2271,18 @@ zalloc_start: goto zalloc_start; } + if (zone->uz_sel == NULL) { + domain = UMA_ANYDOMAIN; + zdom = &zone->uz_domain[0]; + } else { + domain = vm_domain_select_first(zone->uz_sel); + zdom = &zone->uz_domain[domain]; + } + /* * Check the zone's cache of buckets. */ - if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) { + if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) { KASSERT(bucket->ub_cnt != 0, ("uma_zalloc_arg: Returning an empty bucket.")); @@ -2260,7 +2307,7 @@ zalloc_start: * works we'll restart the allocation from the begining and it * will use the just filled bucket. */ - bucket = zone_alloc_bucket(zone, udata, flags); + bucket = zone_alloc_bucket(zone, udata, domain, flags); if (bucket != NULL) { ZONE_LOCK(zone); critical_enter(); @@ -2271,10 +2318,11 @@ zalloc_start: * initialized bucket to make this less likely or claim * the memory directly. */ - if (cache->uc_allocbucket == NULL) - cache->uc_allocbucket = bucket; + if (cache->uc_allocbucket != NULL || + (domain != UMA_ANYDOMAIN && domain != PCPU_GET(domain))) + LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link); else - LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link); + cache->uc_allocbucket = bucket; ZONE_UNLOCK(zone); goto zalloc_start; } @@ -2287,16 +2335,44 @@ zalloc_start: #endif zalloc_item: - item = zone_alloc_item(zone, udata, flags); + item = zone_alloc_item(zone, udata, domain, flags); return (item); } +/* + * Find a slab with some space. Prefer slabs that are partially + * used over those that are totally full. This helps to reduce + * fragmentation. + */ +static uma_slab_t +keg_first_slab(uma_keg_t keg, int domain) +{ + uma_domain_t dom; + uma_slab_t slab; + + KASSERT(domain >= 0 && domain < vm_ndomains, + ("keg_first_slab: Domain %d out of range", domain)); + dom = &keg->uk_domain[domain]; + if (!LIST_EMPTY(&dom->ud_part_slab)) + return LIST_FIRST(&dom->ud_part_slab); + if (LIST_EMPTY(&dom->ud_free_slab)) + return (NULL); + slab = LIST_FIRST(&dom->ud_free_slab); + LIST_REMOVE(slab, us_link); + LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); + + return (slab); +} + static uma_slab_t -keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags) +keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, int flags) { + uma_domain_t dom; uma_slab_t slab; int reserve; + int domain; + int start; mtx_assert(&keg->uk_lock, MA_OWNED); slab = NULL; @@ -2304,21 +2380,14 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags) if ((flags & M_USE_RESERVE) == 0) reserve = keg->uk_reserve; - for (;;) { - /* - * Find a slab with some space. Prefer slabs that are partially - * used over those that are totally full. This helps to reduce - * fragmentation. - */ - if (keg->uk_free > reserve) { - if (!LIST_EMPTY(&keg->uk_part_slab)) { - slab = LIST_FIRST(&keg->uk_part_slab); - } else { - slab = LIST_FIRST(&keg->uk_free_slab); - LIST_REMOVE(slab, us_link); - LIST_INSERT_HEAD(&keg->uk_part_slab, slab, - us_link); - } + if (rdomain == UMA_ANYDOMAIN) { + keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains; + domain = start = keg->uk_cursor; + } else + domain = start = rdomain; + do { + if (keg->uk_free > reserve && + (slab = keg_first_slab(keg, domain)) != NULL) { MPASS(slab->us_keg == keg); return (slab); } @@ -2345,7 +2414,7 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags) msleep(keg, &keg->uk_lock, PVM, "keglimit", 0); continue; } - slab = keg_alloc_slab(keg, zone, flags); + slab = keg_alloc_slab(keg, zone, domain, flags); /* * If we got a slab here it's safe to mark it partially used * and return. We assume that the caller is going to remove @@ -2353,7 +2422,8 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags) */ if (slab) { MPASS(slab->us_keg == keg); - LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); + dom = &keg->uk_domain[slab->us_domain]; + LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); return (slab); } /* @@ -2361,13 +2431,21 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags) * could have while we were unlocked. Check again before we * fail. */ - flags |= M_NOVM; - } - return (slab); + if ((slab = keg_first_slab(keg, domain)) != NULL) { + MPASS(slab->us_keg == keg); + return (slab); + } + if (rdomain == UMA_ANYDOMAIN) { + keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains; + domain = keg->uk_cursor; + } + } while (domain != start); + + return (NULL); } static uma_slab_t -zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags) +zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int domain, int flags) { uma_slab_t slab; @@ -2377,7 +2455,7 @@ zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags) } for (;;) { - slab = keg_fetch_slab(keg, zone, flags); + slab = keg_fetch_slab(keg, zone, domain, flags); if (slab) return (slab); if (flags & (M_NOWAIT | M_NOVM)) @@ -2394,7 +2472,7 @@ zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags) * The last pointer is used to seed the search. It is not required. */ static uma_slab_t -zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags) +zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int domain, int rflags) { uma_klink_t klink; uma_slab_t slab; @@ -2414,7 +2492,7 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags) * the search. */ if (last != NULL) { - slab = keg_fetch_slab(last, zone, flags); + slab = keg_fetch_slab(last, zone, domain, flags); if (slab) return (slab); KEG_UNLOCK(last); @@ -2435,7 +2513,7 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags) keg = klink->kl_keg; KEG_LOCK(keg); if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) { - slab = keg_fetch_slab(keg, zone, flags); + slab = keg_fetch_slab(keg, zone, domain, flags); if (slab) return (slab); } @@ -2470,6 +2548,7 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags) static void * slab_alloc_item(uma_keg_t keg, uma_slab_t slab) { + uma_domain_t dom; void *item; uint8_t freei; @@ -2485,14 +2564,15 @@ slab_alloc_item(uma_keg_t keg, uma_slab_t slab) /* Move this slab to the full list */ if (slab->us_freecount == 0) { LIST_REMOVE(slab, us_link); - LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link); + dom = &keg->uk_domain[slab->us_domain]; + LIST_INSERT_HEAD(&dom->ud_full_slab, slab, us_link); } return (item); } static int -zone_import(uma_zone_t zone, void **bucket, int max, int flags) +zone_import(uma_zone_t zone, void **bucket, int max, int domain, int flags) { uma_slab_t slab; uma_keg_t keg; @@ -2502,13 +2582,25 @@ zone_import(uma_zone_t zone, void **bucket, int max, int flags) keg = NULL; /* Try to keep the buckets totally full */ for (i = 0; i < max; ) { - if ((slab = zone->uz_slab(zone, keg, flags)) == NULL) + if ((slab = zone->uz_slab(zone, keg, domain, flags)) == NULL) break; keg = slab->us_keg; while (slab->us_freecount && i < max) { bucket[i++] = slab_alloc_item(keg, slab); if (keg->uk_free <= keg->uk_reserve) break; +#if MAXMEMDOM > 1 + /* + * If the zone is striped we pick a new slab for + * every allocation. Eliminating this conditional + * will instead pick a new domain for each bucket + * rather than stripe within each bucket. The + * current options produces more fragmentation but + * yields better distribution. + */ + if (domain == UMA_ANYDOMAIN && vm_ndomains > 1) + break; +#endif } /* Don't grab more than one slab at a time. */ flags &= ~M_WAITOK; @@ -2521,7 +2613,7 @@ zone_import(uma_zone_t zone, void **bucket, int max, int flags) } static uma_bucket_t -zone_alloc_bucket(uma_zone_t zone, void *udata, int flags) +zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags) { uma_bucket_t bucket; int max; @@ -2533,7 +2625,7 @@ zone_alloc_bucket(uma_zone_t zone, void *udata, int flags) max = MIN(bucket->ub_entries, zone->uz_count); bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket, - max, flags); + max, domain, flags); /* * Initialize the memory if necessary. @@ -2583,7 +2675,7 @@ zone_alloc_bucket(uma_zone_t zone, void *udata, int flags) */ static void * -zone_alloc_item(uma_zone_t zone, void *udata, int flags) +zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags) { void *item; @@ -2592,7 +2684,7 @@ zone_alloc_item(uma_zone_t zone, void *udata, int flags) #ifdef UMA_DEBUG_ALLOC printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); #endif - if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1) + if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1) goto fail; atomic_add_long(&zone->uz_allocs, 1); @@ -2633,7 +2725,9 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata) { uma_cache_t cache; uma_bucket_t bucket; + uma_zone_domain_t zdom; int lockfail; + int domain; int cpu; #ifdef UMA_DEBUG_ALLOC_1 @@ -2743,6 +2837,14 @@ zfree_start: } cache->uc_freebucket = NULL; + if (zone->uz_sel == NULL) { + zdom = &zone->uz_domain[0]; + domain = UMA_ANYDOMAIN; + } else { + domain = vm_domain_select_first(zone->uz_sel); + zdom = &zone->uz_domain[domain]; + } + /* Can we throw this on the zone full list? */ if (bucket != NULL) { #ifdef UMA_DEBUG_ALLOC @@ -2751,7 +2853,7 @@ zfree_start: /* ub_cnt is pointing to the last free item */ KASSERT(bucket->ub_cnt != 0, ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); - LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link); + LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link); } /* We are no longer associated with this CPU. */ @@ -2773,7 +2875,8 @@ zfree_start: critical_enter(); cpu = curcpu; cache = &zone->uz_cpu[cpu]; - if (cache->uc_freebucket == NULL) { + if (cache->uc_freebucket == NULL && + (domain == UMA_ANYDOMAIN || domain == PCPU_GET(domain))) { cache->uc_freebucket = bucket; goto zfree_start; } @@ -2798,18 +2901,20 @@ zfree_item: static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item) { + uma_domain_t dom; uint8_t freei; mtx_assert(&keg->uk_lock, MA_OWNED); MPASS(keg == slab->us_keg); + dom = &keg->uk_domain[slab->us_domain]; /* Do we need to remove from any lists? */ if (slab->us_freecount+1 == keg->uk_ipers) { LIST_REMOVE(slab, us_link); - LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); + LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link); } else if (slab->us_freecount == 0) { LIST_REMOVE(slab, us_link); - LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); + LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); } /* Slab management. */ @@ -3062,6 +3167,16 @@ uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) /* See uma.h */ void +uma_zone_set_domain_selector(uma_zone_t zone, struct vm_domain_select *sel) +{ + + ZONE_LOCK(zone); + zone->uz_sel = sel; + ZONE_UNLOCK(zone); +} + +/* See uma.h */ +void uma_zone_reserve(uma_zone_t zone, int items) { uma_keg_t keg; @@ -3121,24 +3236,29 @@ uma_zone_reserve_kva(uma_zone_t zone, int count) void uma_prealloc(uma_zone_t zone, int items) { - int slabs; + uma_domain_t dom; uma_slab_t slab; uma_keg_t keg; + int domain; + int slabs; keg = zone_first_keg(zone); if (keg == NULL) return; KEG_LOCK(keg); slabs = items / keg->uk_ipers; + domain = 0; if (slabs * keg->uk_ipers < items) slabs++; while (slabs > 0) { - slab = keg_alloc_slab(keg, zone, M_WAITOK); + slab = keg_alloc_slab(keg, zone, domain, M_WAITOK); if (slab == NULL) break; MPASS(slab->us_keg == keg); - LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); + dom = &keg->uk_domain[slab->us_domain]; + LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link); slabs--; + domain = (domain + 1) % vm_ndomains; } KEG_UNLOCK(keg); } @@ -3207,14 +3327,17 @@ uma_zone_exhausted_nolock(uma_zone_t zone) void * uma_large_malloc(int size, int wait) { + static unsigned int large_domain; void *mem; uma_slab_t slab; + int domain; uint8_t flags; - slab = zone_alloc_item(slabzone, NULL, wait); + slab = zone_alloc_item(slabzone, NULL, UMA_ANYDOMAIN, wait); if (slab == NULL) return (NULL); - mem = page_alloc(NULL, size, &flags, wait); + domain = atomic_fetchadd_int(&large_domain, 1) % vm_ndomains; + mem = page_alloc(NULL, size, domain, &flags, wait); if (mem) { vsetslab((vm_offset_t)mem, slab); slab->us_data = mem; @@ -3272,7 +3395,9 @@ cache_print(uma_cache_t cache) static void uma_print_keg(uma_keg_t keg) { + uma_domain_t dom; uma_slab_t slab; + int i; printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d " "out %d free %d limit %d\n", @@ -3280,15 +3405,18 @@ uma_print_keg(uma_keg_t keg) keg->uk_ipers, keg->uk_ppera, (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free, (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers); - printf("Part slabs:\n"); - LIST_FOREACH(slab, &keg->uk_part_slab, us_link) - slab_print(slab); - printf("Free slabs:\n"); - LIST_FOREACH(slab, &keg->uk_free_slab, us_link) - slab_print(slab); - printf("Full slabs:\n"); - LIST_FOREACH(slab, &keg->uk_full_slab, us_link) - slab_print(slab); + for (i = 0; i < vm_ndomains; i++) { + dom = &keg->uk_domain[i]; + printf("[%d]: Part slabs:\n", i); + LIST_FOREACH(slab, &dom->ud_part_slab, us_link) + slab_print(slab); + printf("[%d]: Free slabs:\n", i); + LIST_FOREACH(slab, &dom->ud_free_slab, us_link) + slab_print(slab); + printf("[%d]: Full slabs:\n", i); + LIST_FOREACH(slab, &dom->ud_full_slab, us_link) + slab_print(slab); + } } void @@ -3377,6 +3505,7 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) struct uma_stream_header ush; struct uma_type_header uth; struct uma_percpu_stat ups; + uma_zone_domain_t zdom; uma_bucket_t bucket; struct sbuf sbuf; uma_cache_t cache; @@ -3432,8 +3561,12 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) (LIST_FIRST(&kz->uk_zones) != z)) uth.uth_zone_flags = UTH_ZONE_SECONDARY; - LIST_FOREACH(bucket, &z->uz_buckets, ub_link) - uth.uth_zone_free += bucket->ub_cnt; + for (i = 0; i < vm_ndomains; i++) { + zdom = &z->uz_domain[i]; + LIST_FOREACH(bucket, &zdom->uzd_buckets, + ub_link) + uth.uth_zone_free += bucket->ub_cnt; + } uth.uth_allocs = z->uz_allocs; uth.uth_frees = z->uz_frees; uth.uth_fails = z->uz_fails; @@ -3507,10 +3640,12 @@ sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS) DB_SHOW_COMMAND(uma, db_show_uma) { uint64_t allocs, frees, sleeps; + uma_zone_domain_t zdom; uma_bucket_t bucket; uma_keg_t kz; uma_zone_t z; int cachefree; + int i; db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used", "Free", "Requests", "Sleeps", "Bucket"); @@ -3527,8 +3662,12 @@ DB_SHOW_COMMAND(uma, db_show_uma) if (!((z->uz_flags & UMA_ZONE_SECONDARY) && (LIST_FIRST(&kz->uk_zones) != z))) cachefree += kz->uk_free; - LIST_FOREACH(bucket, &z->uz_buckets, ub_link) - cachefree += bucket->ub_cnt; + for (i = 0; i < vm_ndomains; i++) { + zdom = &z->uz_domain[i]; + LIST_FOREACH(bucket, &zdom->uzd_buckets, + ub_link) + cachefree += bucket->ub_cnt; + } db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n", z->uz_name, (uintmax_t)kz->uk_size, (intmax_t)(allocs - frees), cachefree, @@ -3542,16 +3681,22 @@ DB_SHOW_COMMAND(uma, db_show_uma) DB_SHOW_COMMAND(umacache, db_show_umacache) { uint64_t allocs, frees; + uma_zone_domain_t zdom; uma_bucket_t bucket; uma_zone_t z; int cachefree; + int i; db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free", "Requests", "Bucket"); LIST_FOREACH(z, &uma_cachezones, uz_link) { uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL); - LIST_FOREACH(bucket, &z->uz_buckets, ub_link) - cachefree += bucket->ub_cnt; + for (i = 0; i < vm_ndomains; i++) { + zdom = &z->uz_domain[i]; + LIST_FOREACH(bucket, &zdom->uzd_buckets, + ub_link) + cachefree += bucket->ub_cnt; + } db_printf("%18s %8ju %8jd %8d %12ju %8u\n", z->uz_name, (uintmax_t)z->uz_size, (intmax_t)(allocs - frees), cachefree, diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h index 1ffc7d5fabb8..a84c7fc87b57 100644 --- a/sys/vm/uma_int.h +++ b/sys/vm/uma_int.h @@ -34,7 +34,23 @@ */ /* - * Here's a quick description of the relationship between the objects: + * The brief summary; Zones describe unique allocation types. Zones are + * organized into per-cpu caches which are filled by buckets. Buckets are + * organized according to memory domains. Buckets are filled from kegs + * which are also organized according to memory domains. Kegs describe a + * unique allocation type, backend memory provider and layout. Kegs are + * associated with one or more zones and zones reference one or more kegs. + * Kegs provide slabs which are virtually contiguous collections of pages. + * Each slab is broken down into one or more items that will satisfy an + * individual allocation. + * + * Allocation is satisfied in the following order: + * 1) Per-cpu cache + * 2) Per-domain cache of buckets + * 3) Slab from any of N kegs + * 4) backend page provider + * + * More detail on individual objects is contained below: * * Kegs contain lists of slabs which are stored in either the full bin, empty * bin, or partially allocated bin, to reduce fragmentation. They also contain @@ -42,6 +58,13 @@ * and rsize is the result of that. The Keg also stores information for * managing a hash of page addresses that maps pages to uma_slab_t structures * for pages that don't have embedded uma_slab_t's. + * + * Keg slab lists are organized by memory domain to support NUMA allocation + * policies. By default allocations are spread across domains to reduce + * the potential for hotspots. Special keg creation flags may be specified + * to prefer local allocation. However there is no strict enforcement as + * frees may happen on any cpu and these are returned the the cpu local cache + * regardless of the originating domain. * * The uma_slab_t may be embedded in a UMA_SLAB_SIZE chunk of memory or it may * be allocated off the page from a special slab zone. The free list within a @@ -175,6 +198,17 @@ struct uma_cache { typedef struct uma_cache * uma_cache_t; /* + * Per-domain memory list. Embedded in the kegs. + */ +struct uma_domain { + LIST_HEAD(,uma_slab) ud_part_slab; /* partially allocated slabs */ + LIST_HEAD(,uma_slab) ud_free_slab; /* empty slab list */ + LIST_HEAD(,uma_slab) ud_full_slab; /* full slabs */ +}; + +typedef struct uma_domain * uma_domain_t; + +/* * Keg management structure * * TODO: Optimize for cache line size @@ -185,10 +219,9 @@ struct uma_keg { struct uma_hash uk_hash; LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */ - LIST_HEAD(,uma_slab) uk_part_slab; /* partially allocated slabs */ - LIST_HEAD(,uma_slab) uk_free_slab; /* empty slab list */ - LIST_HEAD(,uma_slab) uk_full_slab; /* full slabs */ + struct uma_domain uk_domain[MAXMEMDOM]; /* Keg's slab lists. */ + uint32_t uk_cursor; /* Domain alloc cursor. */ uint32_t uk_align; /* Alignment mask */ uint32_t uk_pages; /* Total page count */ uint32_t uk_free; /* Count of items free in slabs */ @@ -242,12 +275,18 @@ struct uma_slab { #endif uint16_t us_freecount; /* How many are free? */ uint8_t us_flags; /* Page flags see uma.h */ - uint8_t us_pad; /* Pad to 32bits, unused. */ + uint8_t us_domain; /* Backing NUMA domain. */ }; #define us_link us_type._us_link #define us_size us_type._us_size +#if MAXMEMDOM > 255 +#error "Slab domain type insufficient" +#endif + +#define UMA_ANYDOMAIN -1 + /* * The slab structure for UMA_ZONE_REFCNT zones for whose items we * maintain reference counters in the slab for. @@ -259,7 +298,7 @@ struct uma_slab_refcnt { typedef struct uma_slab * uma_slab_t; typedef struct uma_slab_refcnt * uma_slabrefcnt_t; -typedef uma_slab_t (*uma_slaballoc)(uma_zone_t, uma_keg_t, int); +typedef uma_slab_t (*uma_slaballoc)(uma_zone_t, uma_keg_t, int, int); struct uma_klink { LIST_ENTRY(uma_klink) kl_link; @@ -267,6 +306,14 @@ struct uma_klink { }; typedef struct uma_klink *uma_klink_t; +struct uma_zone_domain { + LIST_HEAD(,uma_bucket) uzd_buckets; /* full buckets */ +}; + +typedef struct uma_zone_domain * uma_zone_domain_t; + +struct vm_domain_select; + /* * Zone management structure * @@ -279,7 +326,7 @@ struct uma_zone { const char *uz_name; /* Text name of the zone */ LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */ - LIST_HEAD(,uma_bucket) uz_buckets; /* full buckets */ + struct uma_zone_domain uz_domain[MAXMEMDOM]; /* per-domain buckets */ LIST_HEAD(,uma_klink) uz_kegs; /* List of kegs. */ struct uma_klink uz_klink; /* klink for first keg. */ @@ -302,6 +349,7 @@ struct uma_zone { uint64_t uz_sleeps; /* Total number of alloc sleeps */ uint16_t uz_count; /* Amount of items in full bucket */ uint16_t uz_count_min; /* Minimal amount of items there */ + struct vm_domain_select *uz_sel; /* Domain memory selector. */ /* The next three fields are used to print a rate-limited warnings. */ const char *uz_warning; /* Warning to print on failure */ @@ -424,7 +472,8 @@ vsetslab(vm_offset_t va, uma_slab_t slab) * if they can provide more effecient allocation functions. This is useful * for using direct mapped addresses. */ -void *uma_small_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait); +void *uma_small_alloc(uma_zone_t zone, int bytes, int domain, uint8_t *pflag, + int wait); void uma_small_free(void *mem, int size, uint8_t flags); #endif /* _KERNEL */ diff --git a/sys/vm/vm_domain.h b/sys/vm/vm_domain.h new file mode 100644 index 000000000000..57495f72e8d7 --- /dev/null +++ b/sys/vm/vm_domain.h @@ -0,0 +1,111 @@ +/*- + * Copyright (c) 2014, Jeffrey Roberson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VM_DOMAIN_H_ +#define _VM_DOMAIN_H_ + +#include + +#include + +#define VM_DOMAIN_CLR(n, p) BIT_CLR(VM_DOMAIN_SETSIZE, n, p) +#define VM_DOMAIN_COPY(f, t) BIT_COPY(VM_DOMAIN_SETSIZE, f, t) +#define VM_DOMAIN_ISSET(n, p) BIT_ISSET(VM_DOMAIN_SETSIZE, n, p) +#define VM_DOMAIN_SET(n, p) BIT_SET(VM_DOMAIN_SETSIZE, n, p) +#define VM_DOMAIN_ZERO(p) BIT_ZERO(VM_DOMAIN_SETSIZE, p) +#define VM_DOMAIN_FILL(p) BIT_FILL(VM_DOMAIN_SETSIZE, p) +#define VM_DOMAIN_SETOF(n, p) BIT_SETOF(VM_DOMAIN_SETSIZE, n, p) +#define VM_DOMAIN_EMPTY(p) BIT_EMPTY(VM_DOMAIN_SETSIZE, p) +#define VM_DOMAIN_ISFULLSET(p) BIT_ISFULLSET(VM_DOMAIN_SETSIZE, p) +#define VM_DOMAIN_SUBSET(p, c) BIT_SUBSET(VM_DOMAIN_SETSIZE, p, c) +#define VM_DOMAIN_OVERLAP(p, c) BIT_OVERLAP(VM_DOMAIN_SETSIZE, p, c) +#define VM_DOMAIN_CMP(p, c) BIT_CMP(VM_DOMAIN_SETSIZE, p, c) +#define VM_DOMAIN_OR(d, s) BIT_OR(VM_DOMAIN_SETSIZE, d, s) +#define VM_DOMAIN_AND(d, s) BIT_AND(VM_DOMAIN_SETSIZE, d, s) +#define VM_DOMAIN_NAND(d, s) BIT_NAND(VM_DOMAIN_SETSIZE, d, s) +#define VM_DOMAIN_CLR_ATOMIC(n, p) BIT_CLR_ATOMIC(VM_DOMAIN_SETSIZE, n, p) +#define VM_DOMAIN_SET_ATOMIC(n, p) BIT_SET_ATOMIC(VM_DOMAIN_SETSIZE, n, p) +#define VM_DOMAIN_AND_ATOMIC(n, p) BIT_AND_ATOMIC(VM_DOMAIN_SETSIZE, n, p) +#define VM_DOMAIN_OR_ATOMIC(d, s) BIT_OR_ATOMIC(VM_DOMAIN_SETSIZE, d, s) +#define VM_DOMAIN_COPY_STORE_REL(f, t) BIT_COPY_STORE_REL(VM_DOMAIN_SETSIZE, f, t) +#define VM_DOMAIN_FFS(p) BIT_FFS(VM_DOMAIN_SETSIZE, p) + +#ifdef _KERNEL + +/* + * Domain sets. + */ +extern vm_domainset_t vm_alldomains; /* All domains. */ +extern vm_domainset_t vm_domset[MAXMEMDOM]; /* Specific domain bitmask. */ +extern int vm_ndomains; + +/* + * Domain allocation selectors. + */ +extern struct vm_domain_select vm_sel_def; /* default */ +extern struct vm_domain_select vm_sel_rr; /* round-robin */ +extern struct vm_domain_select vm_sel_ft; /* first-touch */ +extern struct vm_domain_select vm_sel_dom[MAXMEMDOM]; /* specific domain */ + +static inline int +vm_domain_select_next(struct vm_domain_select *sel, int domain) +{ + + switch (sel->ds_policy) { + case FIRSTTOUCH: + /* FALLTHROUGH */ + case ROUNDROBIN: + do { + domain = (domain + 1) % vm_ndomains; + } while (!VM_DOMAIN_ISSET(domain, &sel->ds_mask)); + } + return (domain); +} + +static inline int +vm_domain_select_first(struct vm_domain_select *sel) +{ + int domain; + + switch (sel->ds_policy) { + case FIRSTTOUCH: + domain = PCPU_GET(domain); + if (VM_DOMAIN_ISSET(domain, &sel->ds_mask)) + break; + /* FALLTHROUGH */ + case ROUNDROBIN: + domain = atomic_fetchadd_int(&sel->ds_cursor, 1) % vm_ndomains; + if (!VM_DOMAIN_ISSET(domain, &sel->ds_mask)) + domain = vm_domain_select_next(sel, domain); + } + return (domain); +} + +#endif /* _KERNEL */ + +#endif /* !_VM_DOMAIN_H_ */ diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index b428219bdd5d..0c31f6a64fda 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -86,11 +86,13 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -242,6 +244,9 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) panic("_vm_object_allocate: type %d is undefined", type); } object->size = size; +#if MAXMEMDOM > 1 + object->selector = vm_sel_def; +#endif object->generation = 1; object->ref_count = 1; object->memattr = VM_MEMATTR_DEFAULT; @@ -1254,6 +1259,9 @@ vm_object_shadow( result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & ((1 << (VM_NFREEORDER - 1)) - 1); #endif +#if MAXMEMDOM > 1 + result->selector = source->selector; +#endif VM_OBJECT_WUNLOCK(source); } @@ -1295,6 +1303,9 @@ vm_object_split(vm_map_entry_t entry) * into a swap object. */ new_object = vm_object_allocate(OBJT_DEFAULT, size); +#if MAXMEMDOM > 1 + new_object->selector = orig_object->selector; +#endif /* * At this point, the new object is still private, so the order in diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index d59a9e61fdf3..f5b0ed4eb3ec 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -72,6 +72,7 @@ #include #include +#include #include /* @@ -105,6 +106,9 @@ struct vm_object { TAILQ_HEAD(respgs, vm_page) memq; /* list of resident pages */ struct vm_radix rtree; /* root of the resident page radix trie*/ vm_pindex_t size; /* Object size */ +#if MAXMEMDOM > 1 + struct vm_domain_select selector; /* NUMA domain policy. */ +#endif int generation; /* generation ID */ int ref_count; /* How many refs?? */ int shadow_count; /* how many objects that this is a shadow for */ diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 26027c162713..98bfb6810293 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -104,6 +104,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -1447,6 +1448,32 @@ vm_page_is_cached(vm_object_t object, vm_pindex_t pindex) vm_page_t vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) { +#if MAXMEMDOM > 1 + struct vm_domain_select *sel; + vm_page_t m; + int i, dom; + + if (object == NULL) + sel = &vm_sel_def; + else + sel = &object->selector; + + for (i = 0, dom = vm_domain_select_first(sel); + i < sel->ds_count; i++, dom = vm_domain_select_next(sel, dom)) { + if ((m = vm_page_alloc_domain(object, pindex, dom, + req)) != NULL) + return (m); + } + return (NULL); +#else + return vm_page_alloc_domain(object, pindex, 0, req); +#endif +} + +vm_page_t +vm_page_alloc_domain(vm_object_t object, vm_pindex_t pindex, int domain, + int req) +{ struct vnode *vp = NULL; vm_object_t m_object; vm_page_t m, mpred; @@ -1512,15 +1539,16 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) #if VM_NRESERVLEVEL > 0 } else if (object == NULL || (object->flags & (OBJ_COLORED | OBJ_FICTITIOUS)) != OBJ_COLORED || (m = - vm_reserv_alloc_page(object, pindex, mpred)) == NULL) { + vm_reserv_alloc_page(object, pindex, domain, + mpred)) == NULL) { #else } else { #endif - m = vm_phys_alloc_pages(object != NULL ? + m = vm_phys_alloc_pages(domain, object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); #if VM_NRESERVLEVEL > 0 - if (m == NULL && vm_reserv_reclaim_inactive()) { - m = vm_phys_alloc_pages(object != NULL ? + if (m == NULL && vm_reserv_reclaim_inactive(domain)) { + m = vm_phys_alloc_pages(domain, object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); } @@ -1696,6 +1724,35 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { +#if MAXMEMDOM > 1 + struct vm_domain_select *sel; + vm_page_t m; + int i, dom; + + if (object == NULL) + sel = &vm_sel_def; + else + sel = &object->selector; + + for (i = 0, dom = vm_domain_select_first(sel); + i < sel->ds_count; i++, dom = vm_domain_select_next(sel, dom)) { + if ((m = vm_page_alloc_contig_domain(object, pindex, + dom, req, npages, low, high, + alignment, boundary, memattr)) != NULL) + return (m); + } + return (NULL); +#else + return vm_page_alloc_contig_domain(object, pindex, + 0, req, npages, low, high, alignment, boundary, memattr); +#endif +} + +vm_page_t +vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain, + int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, + vm_paddr_t boundary, vm_memattr_t memattr) +{ struct vnode *drop; struct spglist deferred_vdrop_list; vm_page_t m, m_tmp, m_ret; @@ -1733,10 +1790,10 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, #if VM_NRESERVLEVEL > 0 retry: if (object == NULL || (object->flags & OBJ_COLORED) == 0 || - (m_ret = vm_reserv_alloc_contig(object, pindex, npages, - low, high, alignment, boundary)) == NULL) + (m_ret = vm_reserv_alloc_contig(object, pindex, domain, + npages, low, high, alignment, boundary)) == NULL) #endif - m_ret = vm_phys_alloc_contig(npages, low, high, + m_ret = vm_phys_alloc_contig(domain, npages, low, high, alignment, boundary); } else { mtx_unlock(&vm_page_queue_free_mtx); @@ -1758,8 +1815,8 @@ retry: } else { #if VM_NRESERVLEVEL > 0 - if (vm_reserv_reclaim_contig(npages, low, high, alignment, - boundary)) + if (vm_reserv_reclaim_contig(domain, npages, low, high, + alignment, boundary)) goto retry; #endif } @@ -1897,7 +1954,7 @@ vm_page_alloc_init(vm_page_t m) * This routine may not sleep. */ vm_page_t -vm_page_alloc_freelist(int flind, int req) +vm_page_alloc_freelist(int domain, int flind, int req) { struct vnode *drop; vm_page_t m; @@ -1921,7 +1978,8 @@ vm_page_alloc_freelist(int flind, int req) vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && vm_cnt.v_free_count + vm_cnt.v_cache_count > 0)) - m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0); + m = vm_phys_alloc_freelist_pages(domain, flind, + VM_FREEPOOL_DIRECT, 0); else { mtx_unlock(&vm_page_queue_free_mtx); atomic_add_int(&vm_pageout_deficit, @@ -2013,7 +2071,7 @@ struct vm_pagequeue * vm_page_pagequeue(vm_page_t m) { - return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]); + return (&vm_page_domain(m)->vmd_pagequeues[m->queue]); } /* @@ -2072,7 +2130,7 @@ vm_page_enqueue(int queue, vm_page_t m) struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); - pq = &vm_phys_domain(m)->vmd_pagequeues[queue]; + pq = &vm_page_domain(m)->vmd_pagequeues[queue]; vm_pagequeue_lock(pq); m->queue = queue; TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); @@ -2404,7 +2462,7 @@ _vm_page_deactivate(vm_page_t m, int athead) if (queue != PQ_NONE) vm_page_dequeue(m); m->flags &= ~PG_WINATCFLS; - pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE]; + pq = &vm_page_domain(m)->vmd_pagequeues[PQ_INACTIVE]; vm_pagequeue_lock(pq); m->queue = PQ_INACTIVE; if (athead) diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 7466b22babc0..788a30a3cccb 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -423,11 +423,15 @@ void vm_page_free_zero(vm_page_t m); void vm_page_activate (vm_page_t); void vm_page_advise(vm_page_t m, int advice); -vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int); +vm_page_t vm_page_alloc(vm_object_t, vm_pindex_t, int); +vm_page_t vm_page_alloc_domain(vm_object_t, vm_pindex_t, int, int); vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); -vm_page_t vm_page_alloc_freelist(int, int); +vm_page_t vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, + int domain, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, + u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); +vm_page_t vm_page_alloc_freelist(int, int, int); vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); void vm_page_cache(vm_page_t); void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t); diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c index 14960c885734..5c361b490e40 100644 --- a/sys/vm/vm_phys.c +++ b/sys/vm/vm_phys.c @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -71,6 +72,12 @@ _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, struct mem_affinity *mem_affinity; int vm_ndomains = 1; +vm_domainset_t vm_alldomains; +vm_domainset_t vm_domset[MAXMEMDOM]; +struct vm_domain_select vm_sel_def; +struct vm_domain_select vm_sel_rr; +struct vm_domain_select vm_sel_ft; +struct vm_domain_select vm_sel_dom[MAXMEMDOM]; struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; int vm_phys_nsegs; @@ -104,8 +111,6 @@ SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, &vm_ndomains, 0, "Number of physical memory domains available."); -static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, - int order); static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain); static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind); @@ -113,22 +118,6 @@ static int vm_phys_paddr_to_segind(vm_paddr_t pa); static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order); -static __inline int -vm_rr_selectdomain(void) -{ -#if MAXMEMDOM > 1 - struct thread *td; - - td = curthread; - - td->td_dom_rr_idx++; - td->td_dom_rr_idx %= vm_ndomains; - return (td->td_dom_rr_idx); -#else - return (0); -#endif -} - boolean_t vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high) { @@ -345,6 +334,11 @@ vm_phys_init(void) VM_FREELIST_DEFAULT); } for (dom = 0; dom < vm_ndomains; dom++) { + VM_DOMAIN_SET(dom, &vm_alldomains); + VM_DOMAIN_SET(dom, &vm_domset[dom]); + vm_sel_dom[dom].ds_mask = vm_domset[dom]; + vm_sel_dom[dom].ds_policy = ROUNDROBIN; + vm_sel_dom[dom].ds_count = 1; for (flind = 0; flind < vm_nfreelists; flind++) { for (pind = 0; pind < VM_NFREEPOOL; pind++) { fl = vm_phys_free_queues[dom][flind][pind]; @@ -353,6 +347,15 @@ vm_phys_init(void) } } } + vm_sel_def.ds_mask = vm_alldomains; + vm_sel_def.ds_policy = ROUNDROBIN; + vm_sel_def.ds_count = vm_ndomains; + vm_sel_rr.ds_mask = vm_alldomains; + vm_sel_rr.ds_policy = ROUNDROBIN; + vm_sel_rr.ds_count = vm_ndomains; + vm_sel_ft.ds_mask = vm_alldomains; + vm_sel_ft.ds_policy = FIRSTTOUCH; + vm_sel_ft.ds_count = vm_ndomains; mtx_init(&vm_phys_fictitious_reg_mtx, "vmfctr", NULL, MTX_DEF); } @@ -388,7 +391,7 @@ vm_phys_add_page(vm_paddr_t pa) m->phys_addr = pa; m->queue = PQ_NONE; m->segind = vm_phys_paddr_to_segind(pa); - vmd = vm_phys_domain(m); + vmd = vm_page_domain(m); vmd->vmd_page_count++; vmd->vmd_segs |= 1UL << m->segind; KASSERT(m->order == VM_NFREEORDER, @@ -409,61 +412,37 @@ vm_phys_add_page(vm_paddr_t pa) * The free page queues must be locked. */ vm_page_t -vm_phys_alloc_pages(int pool, int order) +vm_phys_alloc_pages(int domain, int pool, int order) { vm_page_t m; - int dom, domain, flind; - - KASSERT(pool < VM_NFREEPOOL, - ("vm_phys_alloc_pages: pool %d is out of range", pool)); - KASSERT(order < VM_NFREEORDER, - ("vm_phys_alloc_pages: order %d is out of range", order)); + int flind; - for (dom = 0; dom < vm_ndomains; dom++) { - domain = vm_rr_selectdomain(); - for (flind = 0; flind < vm_nfreelists; flind++) { - m = vm_phys_alloc_domain_pages(domain, flind, pool, - order); - if (m != NULL) - return (m); - } + for (flind = 0; flind < vm_nfreelists; flind++) { + m = vm_phys_alloc_freelist_pages(domain, flind, pool, order); + if (m != NULL) + return (m); } return (NULL); } -/* - * Find and dequeue a free page on the given free list, with the - * specified pool and order - */ vm_page_t -vm_phys_alloc_freelist_pages(int flind, int pool, int order) -{ +vm_phys_alloc_freelist_pages(int domain, int flind, int pool, int order) +{ + struct vm_freelist *fl; + struct vm_freelist *alt; + int oind, pind; vm_page_t m; - int dom, domain; KASSERT(flind < VM_NFREELIST, - ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind)); + ("vm_phys_alloc_freelist_pages: freelist %d is out of range", + flind)); KASSERT(pool < VM_NFREEPOOL, ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); KASSERT(order < VM_NFREEORDER, ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); - - for (dom = 0; dom < vm_ndomains; dom++) { - domain = vm_rr_selectdomain(); - m = vm_phys_alloc_domain_pages(domain, flind, pool, order); - if (m != NULL) - return (m); - } - return (NULL); -} - -static vm_page_t -vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order) -{ - struct vm_freelist *fl; - struct vm_freelist *alt; - int oind, pind; - vm_page_t m; + KASSERT(domain >= 0 && domain < vm_ndomains, + ("vm_phys_alloc_freelist_pages: domain %d is out of range", + domain)); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); fl = &vm_phys_free_queues[domain][flind][pool][0]; @@ -814,7 +793,7 @@ vm_phys_zero_pages_idle(void) vm_page_t m, m_tmp; int domain; - domain = vm_rr_selectdomain(); + domain = PCPU_GET(domain); fl = vm_phys_free_queues[domain][0][0]; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); for (;;) { @@ -861,7 +840,7 @@ vm_phys_zero_pages_idle(void) * "alignment" and "boundary" must be a power of two. */ vm_page_t -vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, +vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { struct vm_freelist *fl; @@ -869,7 +848,7 @@ vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, vm_paddr_t pa, pa_last, size; vm_page_t m, m_ret; u_long npages_end; - int dom, domain, flind, oind, order, pind; + int flind, oind, order, pind; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); size = npages << PAGE_SHIFT; @@ -881,9 +860,6 @@ vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, ("vm_phys_alloc_contig: boundary must be a power of 2")); /* Compute the queue that is the best fit for npages. */ for (order = 0; (1 << order) < npages; order++); - dom = 0; -restartdom: - domain = vm_rr_selectdomain(); for (flind = 0; flind < vm_nfreelists; flind++) { for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { for (pind = 0; pind < VM_NFREEPOOL; pind++) { @@ -941,8 +917,6 @@ restartdom: } } } - if (++dom < vm_ndomains) - goto restartdom; return (NULL); done: for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h index 6d94e07fafad..f2bd58b2889f 100644 --- a/sys/vm/vm_phys.h +++ b/sys/vm/vm_phys.h @@ -69,10 +69,11 @@ extern int vm_phys_nsegs; * The following functions are only to be used by the virtual memory system. */ void vm_phys_add_page(vm_paddr_t pa); -vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, - u_long alignment, vm_paddr_t boundary); -vm_page_t vm_phys_alloc_freelist_pages(int flind, int pool, int order); -vm_page_t vm_phys_alloc_pages(int pool, int order); +vm_page_t vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, + vm_paddr_t high, u_long alignment, vm_paddr_t boundary); +vm_page_t vm_phys_alloc_freelist_pages(int domain, int flind, int pool, + int order); +vm_page_t vm_phys_alloc_pages(int domain, int pool, int order); boolean_t vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high); int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, vm_memattr_t memattr); @@ -91,30 +92,43 @@ boolean_t vm_phys_zero_pages_idle(void); * * Return the memory domain the page belongs to. */ -static inline struct vm_domain * +static inline int vm_phys_domain(vm_page_t m) { #if MAXMEMDOM > 1 - int domn, segind; + int segind; /* XXXKIB try to assert that the page is managed */ segind = m->segind; KASSERT(segind < vm_phys_nsegs, ("segind %d m %p", segind, m)); - domn = vm_phys_segs[segind].domain; - KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m)); - return (&vm_dom[domn]); + return (vm_phys_segs[segind].domain); #else - return (&vm_dom[0]); + return (0); #endif } +/* + * vm_page_domain: + * + * Return the memory domain structure the page belongs to. + */ +static inline struct vm_domain * +vm_page_domain(vm_page_t m) +{ + int domn; + + domn = vm_phys_domain(m); + KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m)); + return (&vm_dom[domn]); +} + static inline void vm_phys_freecnt_adj(vm_page_t m, int adj) { mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_cnt.v_free_count += adj; - vm_phys_domain(m)->vmd_free_count += adj; + vm_page_domain(m)->vmd_free_count += adj; } #endif /* _KERNEL */ diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c index 663390ed3adf..a7f1faf8cec2 100644 --- a/sys/vm/vm_reserv.c +++ b/sys/vm/vm_reserv.c @@ -127,6 +127,7 @@ struct vm_reserv { vm_object_t object; /* containing object */ vm_pindex_t pindex; /* offset within object */ vm_page_t pages; /* first page of a superpage */ + int domain; /* NUMA domain. */ int popcnt; /* # of pages in use */ char inpartpopq; popmap_t popmap[NPOPMAP]; /* bit vector of used pages */ @@ -164,8 +165,7 @@ static vm_reserv_t vm_reserv_array; * * Access to this queue is synchronized by the free page queue lock. */ -static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop = - TAILQ_HEAD_INITIALIZER(vm_rvq_partpop); +static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop[MAXMEMDOM]; static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info"); @@ -202,7 +202,7 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS) { struct sbuf sbuf; vm_reserv_t rv; - int counter, error, level, unused_pages; + int counter, error, level, unused_pages, i; error = sysctl_wire_old_buffer(req, 0); if (error != 0) @@ -213,9 +213,11 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS) counter = 0; unused_pages = 0; mtx_lock(&vm_page_queue_free_mtx); - TAILQ_FOREACH(rv, &vm_rvq_partpop/*[level]*/, partpopq) { - counter++; - unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt; + for (i = 0; i < vm_ndomains; i++) { + TAILQ_FOREACH(rv, &vm_rvq_partpop[i], partpopq) { + counter++; + unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt; + } } mtx_unlock(&vm_page_queue_free_mtx); sbuf_printf(&sbuf, "%5d: %6dK, %6d\n", level, @@ -247,7 +249,7 @@ vm_reserv_depopulate(vm_reserv_t rv, int index) KASSERT(rv->popcnt > 0, ("vm_reserv_depopulate: reserv %p's popcnt is corrupted", rv)); if (rv->inpartpopq) { - TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); + TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq); rv->inpartpopq = FALSE; } clrbit(rv->popmap, index); @@ -259,7 +261,7 @@ vm_reserv_depopulate(vm_reserv_t rv, int index) vm_reserv_freed++; } else { rv->inpartpopq = TRUE; - TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq); + TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq); } } @@ -303,14 +305,14 @@ vm_reserv_populate(vm_reserv_t rv, int index) KASSERT(rv->popcnt < VM_LEVEL_0_NPAGES, ("vm_reserv_populate: reserv %p is already full", rv)); if (rv->inpartpopq) { - TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); + TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq); rv->inpartpopq = FALSE; } setbit(rv->popmap, index); rv->popcnt++; if (rv->popcnt < VM_LEVEL_0_NPAGES) { rv->inpartpopq = TRUE; - TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq); + TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq); } } @@ -327,8 +329,9 @@ vm_reserv_populate(vm_reserv_t rv, int index) * The object and free page queue must be locked. */ vm_page_t -vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages, - vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) +vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain, + u_long npages, vm_paddr_t low, vm_paddr_t high, + u_long alignment, vm_paddr_t boundary) { vm_paddr_t pa, size; vm_page_t m, m_ret, mpred, msucc; @@ -440,7 +443,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages, * instance, the specified index may not be the first page within the * first new reservation. */ - m = vm_phys_alloc_contig(allocpages, low, high, ulmax(alignment, + m = vm_phys_alloc_contig(domain, allocpages, low, high, ulmax(alignment, VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0); if (m == NULL) return (NULL); @@ -456,6 +459,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages, LIST_INSERT_HEAD(&object->rvq, rv, objq); rv->object = object; rv->pindex = first; + rv->domain = vm_phys_domain(m); KASSERT(rv->popcnt == 0, ("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted", rv)); @@ -511,7 +515,8 @@ found: * The object and free page queue must be locked. */ vm_page_t -vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, vm_page_t mpred) +vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain, + vm_page_t mpred) { vm_page_t m, msucc; vm_pindex_t first, leftcap, rightcap; @@ -590,7 +595,7 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, vm_page_t mpred) /* * Allocate and populate the new reservation. */ - m = vm_phys_alloc_pages(VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER); + m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER); if (m == NULL) return (NULL); rv = vm_reserv_from_page(m); @@ -601,6 +606,7 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, vm_page_t mpred) LIST_INSERT_HEAD(&object->rvq, rv, objq); rv->object = object; rv->pindex = first; + rv->domain = vm_phys_domain(m); KASSERT(rv->popcnt == 0, ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv)); KASSERT(!rv->inpartpopq, @@ -716,7 +722,7 @@ vm_reserv_break_all(vm_object_t object) KASSERT(rv->object == object, ("vm_reserv_break_all: reserv %p is corrupted", rv)); if (rv->inpartpopq) { - TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); + TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq); rv->inpartpopq = FALSE; } vm_reserv_break(rv, NULL); @@ -770,6 +776,8 @@ vm_reserv_init(void) paddr += VM_LEVEL_0_SIZE; } } + for (i = 0; i < MAXMEMDOM; i++) + TAILQ_INIT(&vm_rvq_partpop[i]); } /* @@ -816,7 +824,7 @@ vm_reserv_reactivate_page(vm_page_t m) KASSERT(rv->inpartpopq, ("vm_reserv_reactivate_page: reserv %p's inpartpopq is FALSE", rv)); - TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); + TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq); rv->inpartpopq = FALSE; /* Don't release "m" to the physical memory allocator. */ vm_reserv_break(rv, m); @@ -837,7 +845,7 @@ vm_reserv_reclaim(vm_reserv_t rv) mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); KASSERT(rv->inpartpopq, ("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv)); - TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); + TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq); rv->inpartpopq = FALSE; vm_reserv_break(rv, NULL); vm_reserv_reclaimed++; @@ -851,12 +859,12 @@ vm_reserv_reclaim(vm_reserv_t rv) * The free page queue lock must be held. */ boolean_t -vm_reserv_reclaim_inactive(void) +vm_reserv_reclaim_inactive(int domain) { vm_reserv_t rv; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - if ((rv = TAILQ_FIRST(&vm_rvq_partpop)) != NULL) { + if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) { vm_reserv_reclaim(rv); return (TRUE); } @@ -873,8 +881,8 @@ vm_reserv_reclaim_inactive(void) * The free page queue lock must be held. */ boolean_t -vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, - u_long alignment, vm_paddr_t boundary) +vm_reserv_reclaim_contig(int domain, u_long npages, vm_paddr_t low, + vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { vm_paddr_t pa, size; vm_reserv_t rv; @@ -884,7 +892,7 @@ vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, if (npages > VM_LEVEL_0_NPAGES - 1) return (FALSE); size = npages << PAGE_SHIFT; - TAILQ_FOREACH(rv, &vm_rvq_partpop, partpopq) { + TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) { pa = VM_PAGE_TO_PHYS(&rv->pages[VM_LEVEL_0_NPAGES - 1]); if (pa + PAGE_SIZE - size < low) { /* This entire reservation is too low; go to next. */ diff --git a/sys/vm/vm_reserv.h b/sys/vm/vm_reserv.h index 4c142c773e84..b062087422f4 100644 --- a/sys/vm/vm_reserv.h +++ b/sys/vm/vm_reserv.h @@ -46,18 +46,19 @@ * The following functions are only to be used by the virtual memory system. */ vm_page_t vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, - u_long npages, vm_paddr_t low, vm_paddr_t high, + int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary); vm_page_t vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, - vm_page_t mpred); + int domain, vm_page_t mpred); void vm_reserv_break_all(vm_object_t object); boolean_t vm_reserv_free_page(vm_page_t m); void vm_reserv_init(void); int vm_reserv_level_iffullpop(vm_page_t m); boolean_t vm_reserv_reactivate_page(vm_page_t m); -boolean_t vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low, - vm_paddr_t high, u_long alignment, vm_paddr_t boundary); -boolean_t vm_reserv_reclaim_inactive(void); +boolean_t vm_reserv_reclaim_contig(int domain, u_long npages, + vm_paddr_t low, vm_paddr_t high, u_long alignment, + vm_paddr_t boundary); +boolean_t vm_reserv_reclaim_inactive(int domain); void vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object, vm_pindex_t old_object_offset); vm_paddr_t vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end,