Index: sparc64/sparc64/machdep.c =================================================================== --- sparc64/sparc64/machdep.c (revision 246206) +++ sparc64/sparc64/machdep.c (working copy) @@ -133,8 +133,6 @@ vm_offset_t kstack0; vm_paddr_t kstack0_phys; -struct kva_md_info kmi; - u_long ofw_vec; u_long ofw_tba; u_int tba_taken_over; @@ -181,11 +179,6 @@ physsz / (1024 * 1024)); realmem = (long)physsz / PAGE_SIZE; - vm_ksubmap_init(&kmi); - - bufinit(); - vm_pager_bufferinit(); - EVENTHANDLER_REGISTER(shutdown_final, sparc64_shutdown_final, NULL, SHUTDOWN_PRI_LAST); Index: sparc64/include/pmap.h =================================================================== --- sparc64/include/pmap.h (revision 246206) +++ sparc64/include/pmap.h (working copy) @@ -105,8 +105,6 @@ #define kernel_pmap (&kernel_pmap_store) extern struct rwlock_padalign tte_list_global_lock; extern vm_paddr_t phys_avail[]; -extern vm_offset_t virtual_avail; -extern vm_offset_t virtual_end; #ifdef PMAP_STATS Index: ia64/include/pmap.h =================================================================== --- ia64/include/pmap.h (revision 246206) +++ ia64/include/pmap.h (working copy) @@ -108,8 +108,6 @@ #ifdef _KERNEL extern vm_paddr_t phys_avail[]; -extern vm_offset_t virtual_avail; -extern vm_offset_t virtual_end; extern uint64_t pmap_vhpt_base[]; extern int pmap_vhpt_log2size; Index: ia64/ia64/machdep.c =================================================================== --- ia64/ia64/machdep.c (revision 246206) +++ ia64/ia64/machdep.c (working copy) @@ -157,8 +157,6 @@ /* Other subsystems (e.g., ACPI) can hook this later. */ void (*cpu_idle_hook)(void) = NULL; -struct kva_md_info kmi; - #define Mhz 1000000L #define Ghz (1000L*Mhz) @@ -262,8 +260,6 @@ printf("real memory = %ld (%ld MB)\n", ptoa(realmem), ptoa(realmem) / 1048576); - vm_ksubmap_init(&kmi); - printf("avail memory = %ld (%ld MB)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1048576); @@ -274,12 +270,6 @@ (long)fpswa_iface->if_rev, (void *)fpswa_iface->if_fpswa); /* - * Set up buffers, so they can be used to read disk labels. - */ - bufinit(); - vm_pager_bufferinit(); - - /* * Traverse the MADT to discover IOSAPIC and Local SAPIC * information. */ Index: ia64/ia64/pmap.c =================================================================== --- ia64/ia64/pmap.c (revision 246206) +++ ia64/ia64/pmap.c (working copy) @@ -1748,8 +1748,6 @@ * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { - KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, - ("pmap_enter: managed mapping within the clean submap")); pmap_insert_entry(pmap, va, m); managed = TRUE; } @@ -1847,9 +1845,6 @@ struct ia64_lpte *pte; boolean_t managed; - KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || - (m->oflags & VPO_UNMANAGED) != 0, - ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); Index: vm/vm_pager.c =================================================================== --- vm/vm_pager.c (revision 246206) +++ vm/vm_pager.c (working copy) @@ -74,6 +74,7 @@ #include #include #include +#include #include #include @@ -81,9 +82,17 @@ #include #include #include +#include +#include +static MALLOC_DEFINE(M_PAGEBUF, "pagerbuf", "Pager buffer structures"); + int cluster_pbuf_freecnt = -1; /* unlimited to begin with */ +int nswbuf; +SYSCTL_INT(_kern, OID_AUTO, nswbuf, CTLFLAG_RDTUN, &nswbuf, 0, + "Number of swap buffers"); + static int dead_pager_getpages(vm_object_t, vm_page_t *, int, int); static vm_object_t dead_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); @@ -91,6 +100,32 @@ static boolean_t dead_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); static void dead_pager_dealloc(vm_object_t); +SYSCTL_NODE(_vm_kvm, OID_AUTO, pager_map, CTLFLAG_RW, 0, "pager_map"); +SYSCTL_PROC(_vm_kvm_pager_map, OID_AUTO, maxsize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &pager_map, + SYSCTL_VM_MAP_MAXSIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_pager_map, OID_AUTO, cursize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &pager_map, + SYSCTL_VM_MAP_CURSIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_pager_map, OID_AUTO, freesize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &pager_map, + SYSCTL_VM_MAP_FREESIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_pager_map, OID_AUTO, maxfree, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &pager_map, + SYSCTL_VM_MAP_MAXFREE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_pager_map, OID_AUTO, startaddr, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &pager_map, + SYSCTL_VM_MAP_START, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_pager_map, OID_AUTO, endaddr, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &pager_map, + SYSCTL_VM_MAP_END, sysctl_vm_map, "LU", + ""); + static int dead_pager_getpages(obj, ma, count, req) vm_object_t obj; @@ -165,46 +200,73 @@ static const int npagers = sizeof(pagertab) / sizeof(pagertab[0]); -/* - * Kernel address space for mapping pages. - * Used by pagers where KVAs are needed for IO. - * - * XXX needs to be large enough to support the number of pending async - * cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size - * (MAXPHYS == 64k) if you want to get the most efficiency. - */ vm_map_t pager_map; -static int bswneeded; + static vm_offset_t swapbkva; /* swap buffers kva */ struct mtx pbuf_mtx; +static int bswneeded; /* protected by pbuf_mtx */ static TAILQ_HEAD(swqueue, buf) bswlist; +struct buf *swbuf; +#ifdef DIRECTIO +extern void ffs_rawread_setup(void); +#endif + +/* + * Initialize known pagers. + * NB: Only after the buffers have been initialized. + */ void -vm_pager_init() +vm_pager_init(void) { struct pagerops **pgops; - TAILQ_INIT(&bswlist); - /* - * Initialize known pagers - */ for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++) if ((*pgops)->pgo_init != NULL) (*(*pgops)->pgo_init) (); } +SYSINIT(vm_pager_init, SI_SUB_CPU, SI_ORDER_ANY, vm_pager_init, NULL); -void -vm_pager_bufferinit() +/* + * Kernel address space for mapping pages. + * Used by pagers where KVAs are needed for IO. + * swbufs are used as temporary holders for I/O, such as paging I/O. + * + * NB: needs to be large enough to support the number of pending async + * cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size + * (MAXPHYS == 64k) if you want to get the most efficiency. + */ +static void +vm_pager_bufferinit(void *dummy) { + vm_offset_t minaddr, maxaddr; struct buf *bp; int i; mtx_init(&pbuf_mtx, "pbuf mutex", NULL, MTX_DEF); - bp = swbuf; + /* + * We have no less then 16 and no more then 256. + */ + nswbuf = max(min(nbuf/4, 256), 16); +#ifdef NSWBUF_MIN + if (nswbuf < NSWBUF_MIN) + nswbuf = NSWBUF_MIN; +#endif +#ifdef DIRECTIO + ffs_rawread_setup(); +#endif + + pager_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, + (long)nswbuf * MAXPHYS, FALSE); + pager_map->system_map = 1; + + /* * Now set up swap and physical I/O buffer headers. */ - for (i = 0; i < nswbuf; i++, bp++) { + TAILQ_INIT(&bswlist); + swbuf = malloc(nswbuf * sizeof(struct buf), M_PAGEBUF, M_WAITOK); + for (i = 0, bp = swbuf; i < nswbuf; i++, bp++) { TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist); BUF_LOCKINIT(bp); LIST_INIT(&bp->b_dep); @@ -219,6 +281,7 @@ if (!swapbkva) panic("Not enough pager_map VM space for physical buffers"); } +SYSINIT(vm_pager_buf, SI_SUB_KMEM_ALLOC, SI_ORDER_ANY, vm_pager_bufferinit, NULL); /* * Allocate an instance of a pager of the given type. @@ -285,12 +348,8 @@ } /* - * initialize a physical buffer + * Initialize a physical buffer. */ - -/* - * XXX This probably belongs in vfs_bio.c - */ static void initpbuf(struct buf *bp) { Index: vm/vm_kern.c =================================================================== --- vm/vm_kern.c (revision 246206) +++ vm/vm_kern.c (working copy) @@ -78,6 +78,7 @@ #include #include #include +#include #include #include #include @@ -85,15 +86,48 @@ #include #include -vm_map_t kernel_map=0; -vm_map_t kmem_map=0; -vm_map_t exec_map=0; -vm_map_t pipe_map; -vm_map_t buffer_map=0; +vm_map_t kernel_map; const void *zero_region; CTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0); +SYSCTL_NODE(_vm, OID_AUTO, kvm, CTLFLAG_RW, 0, "Kernel virtual memory"); + +static vm_offset_t vm_min_kernel_address = VM_MIN_KERNEL_ADDRESS; +SYSCTL_ULONG(_vm_kvm, OID_AUTO, min_kernel_address, CTLFLAG_RD, + &vm_min_kernel_address, 0, "Start of kvm address range"); + +#ifndef __sparc64__ +static vm_offset_t vm_max_kernel_address = VM_MAX_KERNEL_ADDRESS; +#endif +SYSCTL_ULONG(_vm_kvm, OID_AUTO, max_kernel_address, CTLFLAG_RD, + &vm_max_kernel_address, 0, "End of kvm address range"); + +SYSCTL_PROC(_vm_kvm, OID_AUTO, maxsize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &kernel_map, + SYSCTL_VM_MAP_MAXSIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm, OID_AUTO, cursize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &kernel_map, + SYSCTL_VM_MAP_CURSIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm, OID_AUTO, freesize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &kernel_map, + SYSCTL_VM_MAP_FREESIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm, OID_AUTO, maxfree, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &kernel_map, + SYSCTL_VM_MAP_MAXFREE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm, OID_AUTO, startaddr, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &kernel_map, + SYSCTL_VM_MAP_START, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm, OID_AUTO, endaddr, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &kernel_map, + SYSCTL_VM_MAP_END, sysctl_vm_map, "LU", + ""); + /* * kmem_alloc_nofault: * @@ -659,25 +693,18 @@ * `start' as allocated, and the range between `start' and `end' as free. */ void -kmem_init(start, end) - vm_offset_t start, end; +kmem_init(vm_offset_t kernel, vm_offset_t start, vm_offset_t end) { - vm_map_t m; - m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end); - m->system_map = 1; - vm_map_lock(m); - /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ - kernel_map = m; - (void) vm_map_insert(m, NULL, (vm_ooffset_t) 0, -#ifdef __amd64__ - KERNBASE, -#else - VM_MIN_KERNEL_ADDRESS, -#endif + kernel_map = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end); + kernel_map->system_map = 1; + + vm_map_lock(kernel_map); + /* NB: cannot use kgdb to debug, starting with this assignment ... */ + (void) vm_map_insert(kernel_map, NULL, (vm_ooffset_t) 0, kernel, start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); - /* ... and ending with the completion of the above `insert' */ - vm_map_unlock(m); + /* ... and ending with the completion of the above `insert'. */ + vm_map_unlock(kernel_map); kmem_init_zero_region(); } Index: vm/vm_pager.h =================================================================== --- vm/vm_pager.h (revision 246206) +++ vm/vm_pager.h (working copy) @@ -101,7 +101,6 @@ vm_object_t vm_pager_allocate(objtype_t, void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); -void vm_pager_bufferinit(void); void vm_pager_deallocate(vm_object_t); static __inline int vm_pager_get_pages(vm_object_t, vm_page_t *, int, int); static __inline boolean_t vm_pager_has_page(vm_object_t, vm_pindex_t, int *, int *); Index: vm/vm_kern.h =================================================================== --- vm/vm_kern.h (revision 246206) +++ vm/vm_kern.h (working copy) @@ -68,7 +68,9 @@ extern vm_map_t kernel_map; extern vm_map_t kmem_map; extern vm_map_t exec_map; -extern vm_map_t pipe_map; extern u_long vm_kmem_size; +extern u_long kmem_real; /* kmem_map memory backed by real memory */ +void kmem_init(vm_offset_t, vm_offset_t, vm_offset_t); + #endif /* _VM_VM_KERN_H_ */ Index: vm/vm_map.c =================================================================== --- vm/vm_map.c (revision 246206) +++ vm/vm_map.c (working copy) @@ -167,6 +167,41 @@ } /* + * Generic sysctl to report various information on VM maps. + */ +int +sysctl_vm_map(SYSCTL_HANDLER_ARGS) +{ + u_long size = 0; + vm_map_t vmmap = *((vm_map_t *)(oidp->oid_arg1)); + + vm_map_lock_read(vmmap); + switch(oidp->oid_arg2) { + case SYSCTL_VM_MAP_MAXSIZE: + size = vm_map_maxsize(vmmap); + break; + case SYSCTL_VM_MAP_CURSIZE: + size = vm_map_cursize(vmmap); + break; + case SYSCTL_VM_MAP_FREESIZE: + size = vm_map_maxsize(vmmap); + size -= vm_map_cursize(vmmap); + break; + case SYSCTL_VM_MAP_MAXFREE: + size = vm_map_freesize(vmmap); + break; + case SYSCTL_VM_MAP_START: + size = vm_map_min(vmmap); + break; + case SYSCTL_VM_MAP_END: + size = vm_map_max(vmmap); + break; + } + vm_map_unlock_read(vmmap); + return (sysctl_handle_long(oidp, &size, 0, req)); +} + +/* * vm_map_startup: * * Initialize the vm_map module. Must be called before @@ -300,12 +335,14 @@ return (vm); } -void -vm_init2(void) +static void +vmspace_init(void) { + uma_zone_set_obj(kmapentzone, &kmapentobj, lmin(cnt.v_page_count, (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE) / 8 + maxproc * 2 + maxfiles); + vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL, #ifdef INVARIANTS vmspace_zdtor, @@ -314,6 +351,7 @@ #endif vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); } +SYSINIT(vmspace_init, SI_SUB_KMEM, SI_ORDER_ANY, vmspace_init, NULL); static void vmspace_container_reset(struct proc *p) Index: vm/vm_map.h =================================================================== --- vm/vm_map.h (revision 246206) +++ vm/vm_map.h (working copy) @@ -199,18 +199,41 @@ #define MAP_BUSY_WAKEUP 0x02 #ifdef _KERNEL +/* End address of map. */ static __inline vm_offset_t vm_map_max(const struct vm_map *map) { return (map->max_offset); } - +/* Start address of map. */ static __inline vm_offset_t vm_map_min(const struct vm_map *map) { return (map->min_offset); } +/* Current allocated memory in map. */ +static __inline vm_size_t +vm_map_cursize(const struct vm_map *map) +{ + return (map->size); +} +/* Total maximal size of map. */ +static __inline vm_size_t +vm_map_maxsize(const struct vm_map *map) +{ + return (map->max_offset - map->min_offset); +} +/* Largest contiguous range in map. */ +static __inline vm_size_t +vm_map_freesize(const struct vm_map *map) +{ + vm_size_t free; + free = map->root != NULL ? map->root->max_free : + map->max_offset - map->min_offset; + return (free); +} + static __inline pmap_t vm_map_pmap(vm_map_t map) { Index: vm/swap_pager.c =================================================================== --- vm/swap_pager.c (revision 246206) +++ vm/swap_pager.c (working copy) @@ -153,6 +153,9 @@ int swap_pager_avail; static int swdev_syscall_active = 0; /* serialize swap(on|off) */ +static long maxswzone; /* max swmeta KVA storage */ +SYSCTL_LONG(_kern, OID_AUTO, maxswzone, CTLFLAG_RDTUN, &maxswzone, 0, + "Maximum memory for swap metadata"); static vm_ooffset_t swap_total; SYSCTL_QUAD(_vm, OID_AUTO, swap_total, CTLFLAG_RD, &swap_total, 0, "Total amount of available swap storage."); @@ -508,6 +511,11 @@ { int n, n2; +#ifdef VM_SWZONE_SIZE_MAX + maxswzone = VM_SWZONE_SIZE_MAX; +#endif + TUNABLE_LONG_FETCH("kern.maxswzone", &maxswzone); + /* * Number of in-transit swap bp operations. Don't * exhaust the pbufs completely. Make sure we Index: vm/vm_page.c =================================================================== --- vm/vm_page.c (revision 246206) +++ vm/vm_page.c (working copy) @@ -158,10 +158,7 @@ static struct vnode *vm_page_alloc_init(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(int queue, vm_page_t m); -static void vm_page_init_fakepg(void *dummy); -SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init_fakepg, NULL); - static void vm_page_init_fakepg(void *dummy) { @@ -169,6 +166,7 @@ fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); } +SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init_fakepg, NULL); /* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */ #if PAGE_SIZE == 32768 Index: vm/vm_page.h =================================================================== --- vm/vm_page.h (revision 246206) +++ vm/vm_page.h (working copy) @@ -360,6 +360,7 @@ } #endif +void vm_set_page_size(void); void vm_page_busy(vm_page_t m); void vm_page_flash(vm_page_t m); void vm_page_io_start(vm_page_t m); Index: vm/vm.h =================================================================== --- vm/vm.h (revision 246206) +++ vm/vm.h (working copy) @@ -125,22 +125,6 @@ struct vm_reserv; typedef struct vm_reserv *vm_reserv_t; -/* - * Information passed from the machine-independant VM initialization code - * for use by machine-dependant code (mainly for MMU support) - */ -struct kva_md_info { - vm_offset_t buffer_sva; - vm_offset_t buffer_eva; - vm_offset_t clean_sva; - vm_offset_t clean_eva; - vm_offset_t pager_sva; - vm_offset_t pager_eva; -}; - -extern struct kva_md_info kmi; -extern void vm_ksubmap_init(struct kva_md_info *); - extern int old_mlock; struct ucred; Index: vm/vm_init.c =================================================================== --- vm/vm_init.c (revision 246206) +++ vm/vm_init.c (working copy) @@ -73,7 +73,6 @@ #include #include #include -#include #include #include @@ -88,124 +87,57 @@ long physmem; -static int exec_map_entries = 16; -TUNABLE_INT("vm.exec_map_entries", &exec_map_entries); -SYSCTL_INT(_vm, OID_AUTO, exec_map_entries, CTLFLAG_RD, &exec_map_entries, 0, - "Maximum number of simultaneous execs"); - /* - * System initialization + * Initializes resident memory structures. From here on, all physical + * memory is accounted for, and we use only virtual addresses. + * vm_init initializes the virtual memory system. + * This is done here in one place instead of using individual + * SYSINITs to avoid confusion. Also the order of these operations + * is important. */ -static void vm_mem_init(void *); -SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_FIRST, vm_mem_init, NULL); - -/* - * vm_init initializes the virtual memory system. - * This is done only by the first cpu up. - * - * The start and end address of physical memory is passed in. - */ -/* ARGSUSED*/ static void -vm_mem_init(dummy) - void *dummy; +vm_mem_init(void *dummy) { - /* - * Initializes resident memory structures. From here on, all physical - * memory is accounted for, and we use only virtual addresses. - */ + vm_size_t kernel; + vm_set_page_size(); - virtual_avail = vm_page_startup(virtual_avail); - - /* - * Initialize other VM packages - */ - vm_object_init(); - vm_map_startup(); - kmem_init(virtual_avail, virtual_end); - pmap_init(); - vm_pager_init(); -} -void -vm_ksubmap_init(struct kva_md_info *kmi) -{ - vm_offset_t firstaddr; - caddr_t v; - vm_size_t size = 0; - long physmem_est; - vm_offset_t minaddr; - vm_offset_t maxaddr; - vm_map_t clean_map; - /* - * Allocate space for system data structures. - * The first available kernel virtual address is in "v". - * As pages of kernel virtual memory are allocated, "v" is incremented. - * As pages of memory are allocated and cleared, - * "firstaddr" is incremented. - * An index into the kernel page table corresponding to the - * virtual memory address maintained in "v" is kept in "mapaddr". + * Allocate memory for the page structures and bootstrapping + * of the kernel memory allocator. */ + virtual_avail = vm_page_startup(virtual_avail); /* - * Make two passes. The first pass calculates how much memory is - * needed and allocates it. The second pass assigns virtual - * addresses to the various data structures. + * Initial allocation of kernel and kmem objects plus the UMA + * object zone. */ - firstaddr = 0; -again: - v = (caddr_t)firstaddr; + vm_object_init(); - v = kern_timeout_callwheel_alloc(v); - /* - * Discount the physical memory larger than the size of kernel_map - * to avoid eating up all of KVA space. + * Allocate and initialize the VM map zones. */ - physmem_est = lmin(physmem, btoc(kernel_map->max_offset - - kernel_map->min_offset)); + vm_map_startup(); - v = kern_vfs_bio_buffer_alloc(v, physmem_est); - /* - * End of first pass, size has been calculated so allocate memory + * Create the kernel map and insert a mapping covering the + * kernel and other pre-allocated pages during bootstrapping. */ - if (firstaddr == 0) { - size = (vm_size_t)v; - firstaddr = kmem_alloc(kernel_map, round_page(size)); - if (firstaddr == 0) - panic("startup: no room for tables"); - goto again; - } + kernel = +#ifdef __amd64__ + KERNBASE; +#else + VM_MIN_KERNEL_ADDRESS; +#endif + kmem_init(kernel, virtual_avail, virtual_end); /* - * End of second pass, addresses have been assigned + * Initialize any structures that the pmap system needs to + * map virtual memory. + * + * NB: MD specific. */ - if ((vm_size_t)((char *)v - firstaddr) != size) - panic("startup: table size inconsistency"); - - clean_map = kmem_suballoc(kernel_map, &kmi->clean_sva, &kmi->clean_eva, - (long)nbuf * BKVASIZE + (long)nswbuf * MAXPHYS, TRUE); - buffer_map = kmem_suballoc(clean_map, &kmi->buffer_sva, - &kmi->buffer_eva, (long)nbuf * BKVASIZE, FALSE); - buffer_map->system_map = 1; - pager_map = kmem_suballoc(clean_map, &kmi->pager_sva, &kmi->pager_eva, - (long)nswbuf * MAXPHYS, FALSE); - pager_map->system_map = 1; - exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, - exec_map_entries * round_page(PATH_MAX + ARG_MAX), FALSE); - pipe_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, maxpipekva, - FALSE); - - /* - * XXX: Mbuf system machine-specific initializations should - * go here, if anywhere. - */ - - /* - * Initialize the callouts we just allocated. - */ - kern_timeout_callwheel_init(); + pmap_init(); } +SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_FIRST, vm_mem_init, NULL); Index: vm/vm_extern.h =================================================================== --- vm/vm_extern.h (revision 246206) +++ vm/vm_extern.h (working copy) @@ -39,6 +39,26 @@ #ifdef _KERNEL +/* + * The kernel VM space layout is as follows: + * + * VM_MIN_KERNEL_ADDRESS + * [KERNBASE] + * kernel text, data, bss + * bootstrap and statically allocated structures (pmap) + * virtual_avail + * kernel_map + * ... + * kmem_map + * ... + * virtual_end + * VM_MAX_KERNEL_ADDRESS + * + * On most architectures virtual_end is equal to VM_MAX_KERNEL_ADDRESS. + */ +extern vm_offset_t virtual_avail; /* first available kernel vm page */ +extern vm_offset_t virtual_end; /* end of the kernel vm space */ + int kernacc(void *, int, int); vm_offset_t kmem_alloc(vm_map_t, vm_size_t); vm_offset_t kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, @@ -51,7 +71,6 @@ vm_offset_t kmem_alloc_wait(vm_map_t, vm_size_t); void kmem_free(vm_map_t, vm_offset_t, vm_size_t); void kmem_free_wakeup(vm_map_t, vm_offset_t, vm_size_t); -void kmem_init(vm_offset_t, vm_offset_t); vm_offset_t kmem_malloc(vm_map_t map, vm_size_t size, int flags); int kmem_back(vm_map_t, vm_offset_t, vm_size_t, int); vm_map_t kmem_suballoc(vm_map_t, vm_offset_t *, vm_offset_t *, vm_size_t, @@ -73,7 +92,6 @@ void vm_waitproc(struct proc *); int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, objtype_t, void *, vm_ooffset_t); int vm_mmap_to_errno(int rv); -void vm_set_page_size(void); void vm_sync_icache(vm_map_t, vm_offset_t, vm_size_t); struct vmspace *vmspace_alloc(vm_offset_t, vm_offset_t); struct vmspace *vmspace_fork(struct vmspace *, vm_ooffset_t *); @@ -90,5 +108,17 @@ void vm_imgact_unmap_page(struct sf_buf *sf); void vm_thread_dispose(struct thread *td); int vm_thread_new(struct thread *td, int pages); + +#ifdef _SYS_SYSCTL_H_ +SYSCTL_DECL(_vm_kvm); +int sysctl_vm_map(SYSCTL_HANDLER_ARGS); +#define SYSCTL_VM_MAP_MAXSIZE 1 +#define SYSCTL_VM_MAP_CURSIZE 2 +#define SYSCTL_VM_MAP_FREESIZE 3 +#define SYSCTL_VM_MAP_MAXFREE 4 +#define SYSCTL_VM_MAP_START 5 +#define SYSCTL_VM_MAP_END 6 +#endif + #endif /* _KERNEL */ #endif /* !_VM_EXTERN_H_ */ Index: pc98/pc98/machdep.c =================================================================== --- pc98/pc98/machdep.c (revision 246206) +++ pc98/pc98/machdep.c (working copy) @@ -211,8 +211,6 @@ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) -struct kva_md_info kmi; - static struct trapframe proc0_tf; struct pcpu __pcpu[MAXCPU]; @@ -262,17 +260,10 @@ } } - vm_ksubmap_init(&kmi); - printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); - /* - * Set up buffers, so they can be used to read disk labels. - */ - bufinit(); - vm_pager_bufferinit(); cpu_setregs(); } Index: i386/include/pmap.h =================================================================== --- i386/include/pmap.h (revision 246206) +++ i386/include/pmap.h (working copy) @@ -494,8 +494,6 @@ extern int pseflag; extern int pgeflag; extern char *ptvmmap; /* poor name! */ -extern vm_offset_t virtual_avail; -extern vm_offset_t virtual_end; #define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode) #define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0) Index: i386/i386/machdep.c =================================================================== --- i386/i386/machdep.c (revision 246206) +++ i386/i386/machdep.c (working copy) @@ -241,8 +241,6 @@ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) -struct kva_md_info kmi; - static struct trapframe proc0_tf; struct pcpu __pcpu[MAXCPU]; @@ -324,17 +322,10 @@ } } - vm_ksubmap_init(&kmi); - printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); - /* - * Set up buffers, so they can be used to read disk labels. - */ - bufinit(); - vm_pager_bufferinit(); #ifndef XEN cpu_setregs(); #endif Index: i386/i386/pmap.c =================================================================== --- i386/i386/pmap.c (revision 246206) +++ i386/i386/pmap.c (working copy) @@ -2091,26 +2091,6 @@ PMAP_LOCK_DESTROY(pmap); } -static int -kvm_size(SYSCTL_HANDLER_ARGS) -{ - unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; - - return (sysctl_handle_long(oidp, &ksize, 0, req)); -} -SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, - 0, 0, kvm_size, "IU", "Size of KVM"); - -static int -kvm_free(SYSCTL_HANDLER_ARGS) -{ - unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; - - return (sysctl_handle_long(oidp, &kfree, 0, req)); -} -SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, - 0, 0, kvm_free, "IU", "Amount of KVM free"); - /* * grow the number of kernel page table entries, if needed */ @@ -3546,8 +3526,6 @@ * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { - KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, - ("pmap_enter: managed mapping within the clean submap")); if (pv == NULL) pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; @@ -3762,9 +3740,6 @@ vm_paddr_t pa; vm_page_t free; - KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || - (m->oflags & VPO_UNMANAGED) != 0, - ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); Index: i386/xen/pmap.c =================================================================== --- i386/xen/pmap.c (revision 246206) +++ i386/xen/pmap.c (working copy) @@ -1829,26 +1829,6 @@ #endif } -static int -kvm_size(SYSCTL_HANDLER_ARGS) -{ - unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; - - return (sysctl_handle_long(oidp, &ksize, 0, req)); -} -SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, - 0, 0, kvm_size, "IU", "Size of KVM"); - -static int -kvm_free(SYSCTL_HANDLER_ARGS) -{ - unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; - - return (sysctl_handle_long(oidp, &kfree, 0, req)); -} -SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, - 0, 0, kvm_free, "IU", "Amount of KVM free"); - /* * grow the number of kernel page table entries, if needed */ @@ -2766,8 +2746,6 @@ * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { - KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, - ("pmap_enter: managed mapping within the clean submap")); if (pv == NULL) pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; @@ -2963,9 +2941,6 @@ vm_page_t free; multicall_entry_t *mcl = *mclpp; - KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || - (m->oflags & VPO_UNMANAGED) != 0, - ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); Index: amd64/include/pmap.h =================================================================== --- amd64/include/pmap.h (revision 246206) +++ amd64/include/pmap.h (working copy) @@ -305,8 +305,6 @@ extern pt_entry_t *CMAP1; extern vm_paddr_t phys_avail[]; extern vm_paddr_t dump_avail[]; -extern vm_offset_t virtual_avail; -extern vm_offset_t virtual_end; #define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode) #define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0) Index: amd64/amd64/machdep.c =================================================================== --- amd64/amd64/machdep.c (revision 246206) +++ amd64/amd64/machdep.c (working copy) @@ -198,8 +198,6 @@ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) -struct kva_md_info kmi; - static struct trapframe proc0_tf; struct region_descriptor r_gdt, r_idt; @@ -285,18 +283,10 @@ } } - vm_ksubmap_init(&kmi); - printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); - /* - * Set up buffers, so they can be used to read disk labels. - */ - bufinit(); - vm_pager_bufferinit(); - cpu_setregs(); } Index: amd64/amd64/pmap.c =================================================================== --- amd64/amd64/pmap.c (revision 246206) +++ amd64/amd64/pmap.c (working copy) @@ -1947,26 +1947,6 @@ PMAP_LOCK_DESTROY(pmap); } -static int -kvm_size(SYSCTL_HANDLER_ARGS) -{ - unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; - - return sysctl_handle_long(oidp, &ksize, 0, req); -} -SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, - 0, 0, kvm_size, "LU", "Size of KVM"); - -static int -kvm_free(SYSCTL_HANDLER_ARGS) -{ - unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; - - return sysctl_handle_long(oidp, &kfree, 0, req); -} -SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, - 0, 0, kvm_free, "LU", "Amount of KVM free"); - /* * grow the number of kernel page table entries, if needed */ @@ -3443,9 +3423,6 @@ KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)", va)); - KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva || - va >= kmi.clean_eva, - ("pmap_enter: managed mapping within the clean submap")); KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 || VM_OBJECT_LOCKED(m->object), ("pmap_enter: page %p is not busy", m)); @@ -3772,9 +3749,6 @@ pt_entry_t *pte; vm_paddr_t pa; - KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || - (m->oflags & VPO_UNMANAGED) != 0, - ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); Index: sys/kernel.h =================================================================== --- sys/kernel.h (revision 246206) +++ sys/kernel.h (working copy) @@ -100,6 +100,7 @@ SI_SUB_MTX_POOL_DYNAMIC = 0x1AC0000, /* dynamic mutex pool */ SI_SUB_LOCK = 0x1B00000, /* various locks */ SI_SUB_EVENTHANDLER = 0x1C00000, /* eventhandler init */ + SI_SUB_KMEM_ALLOC = 0x1D00000, /* kmem_map allocations */ SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */ SI_SUB_KLD = 0x2000000, /* KLD and module setup */ SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/ Index: sys/proc.h =================================================================== --- sys/proc.h (revision 246206) +++ sys/proc.h (working copy) @@ -883,7 +883,6 @@ void pargs_hold(struct pargs *pa); int proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb); int proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb); -void procinit(void); void proc_linkup0(struct proc *p, struct thread *td); void proc_linkup(struct proc *p, struct thread *td); void proc_reap(struct thread *td, struct proc *p, int *status, int options); @@ -899,7 +898,6 @@ void setsugid(struct proc *p); int should_yield(void); int sigonstack(size_t sp); -void sleepinit(void); void stopevent(struct proc *, u_int, u_int); struct thread *tdfind(lwpid_t, pid_t); void threadinit(void); Index: sys/pipe.h =================================================================== --- sys/pipe.h (revision 246206) +++ sys/pipe.h (working copy) @@ -53,10 +53,6 @@ #define PIPENPAGES (BIG_PIPE_SIZE / PAGE_SIZE + 1) -/* - * See sys_pipe.c for info on what these limits mean. - */ -extern long maxpipekva; extern struct fileops pipeops; /* Index: sys/sleepqueue.h =================================================================== --- sys/sleepqueue.h (revision 246206) +++ sys/sleepqueue.h (working copy) @@ -96,7 +96,6 @@ #define SLEEPQ_STOP_ON_BDRY 0x200 /* Stop sleeping thread on user mode boundary */ -void init_sleepqueues(void); int sleepq_abort(struct thread *td, int intrval); void sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags, int queue); Index: sys/buf.h =================================================================== --- sys/buf.h (revision 246206) +++ sys/buf.h (working copy) @@ -457,8 +457,6 @@ #ifdef _KERNEL extern int nbuf; /* The number of buffer headers */ -extern long maxswzone; /* Max KVA for swap structures */ -extern long maxbcache; /* Max KVA for buffer cache */ extern long runningbufspace; extern long hibufspace; extern int dirtybufthresh; @@ -474,7 +472,6 @@ void runningbufwakeup(struct buf *); void waitrunningbufspace(void); caddr_t kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est); -void bufinit(void); void bwillwrite(void); int buf_dirty_count_severe(void); void bremfree(struct buf *); Index: sys/callout.h =================================================================== --- sys/callout.h (revision 246206) +++ sys/callout.h (working copy) @@ -53,8 +53,6 @@ }; #ifdef _KERNEL -extern int ncallout; - #define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE) #define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE) #define callout_drain(c) _callout_stop_safe(c, 1) Index: sys/systm.h =================================================================== --- sys/systm.h (revision 246206) +++ sys/systm.h (working copy) @@ -319,8 +319,6 @@ void callout_handle_init(struct callout_handle *); struct callout_handle timeout(timeout_t *, void *, int); void untimeout(timeout_t *, void *, struct callout_handle); -caddr_t kern_timeout_callwheel_alloc(caddr_t v); -void kern_timeout_callwheel_init(void); /* Stubs for obsolete functions that used to be for interrupt management */ static __inline void spl0(void) { return; } Index: powerpc/include/pmap.h =================================================================== --- powerpc/include/pmap.h (revision 246206) +++ powerpc/include/pmap.h (working copy) @@ -242,8 +242,6 @@ #define PHYS_AVAIL_SZ 128 extern vm_offset_t phys_avail[PHYS_AVAIL_SZ]; -extern vm_offset_t virtual_avail; -extern vm_offset_t virtual_end; extern vm_offset_t msgbuf_phys; Index: powerpc/booke/machdep.c =================================================================== --- powerpc/booke/machdep.c (revision 246206) +++ powerpc/booke/machdep.c (working copy) @@ -215,15 +215,9 @@ } } - vm_ksubmap_init(&kmi); - printf("avail memory = %ld (%ld MB)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1048576); - /* Set up buffers, so they can be used to read disk labels. */ - bufinit(); - vm_pager_bufferinit(); - /* Cpu supports execution permissions on the pages. */ elf32_nxstack = 1; } Index: powerpc/booke/pmap.c =================================================================== --- powerpc/booke/pmap.c (revision 246206) +++ powerpc/booke/pmap.c (working copy) @@ -2537,9 +2537,9 @@ /* Find start of next chunk (from va). */ while (va < virtual_end) { /* Don't dump the buffer cache. */ - if (va >= kmi.buffer_sva && - va < kmi.buffer_eva) { - va = kmi.buffer_eva; + if (va >= vm_map_min(buffer_map) && + va < vm_map_max(buffer_map)) { + va = vm_map_max(buffer_map); continue; } pte = pte_find(mmu, kernel_pmap, va); @@ -2553,7 +2553,7 @@ /* Find last page in chunk. */ while (va < virtual_end) { /* Don't run into the buffer cache. */ - if (va == kmi.buffer_sva) + if (va == vm_map_min(buffer_map)) break; pte = pte_find(mmu, kernel_pmap, va); if (pte == NULL || !PTE_ISVALID(pte)) Index: powerpc/aim/machdep.c =================================================================== --- powerpc/aim/machdep.c (revision 246206) +++ powerpc/aim/machdep.c (working copy) @@ -164,8 +164,6 @@ struct bat battable[16]; #endif -struct kva_md_info kmi; - static void cpu_startup(void *dummy) { @@ -213,16 +211,8 @@ } } - vm_ksubmap_init(&kmi); - printf("avail memory = %ld (%ld MB)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1048576); - - /* - * Set up buffers, so they can be used to read disk labels. - */ - bufinit(); - vm_pager_bufferinit(); } extern char kernel_text[], _end[]; Index: arm/arm/machdep.c =================================================================== --- arm/arm/machdep.c (revision 246206) +++ arm/arm/machdep.c (working copy) @@ -285,8 +285,6 @@ mtx_lock(&psp->ps_mtx); } -struct kva_md_info kmi; - /* * arm32_vector_init: * @@ -379,14 +377,10 @@ } } - vm_ksubmap_init(&kmi); - printf("avail memory = %ju (%ju MB)\n", (uintmax_t)ptoa(cnt.v_free_count), (uintmax_t)ptoa(cnt.v_free_count) / 1048576); - bufinit(); - vm_pager_bufferinit(); pcb->un_32.pcb32_und_sp = (u_int)thread0.td_kstack + USPACE_UNDEF_STACK_TOP; pcb->un_32.pcb32_sp = (u_int)thread0.td_kstack + Index: arm/arm/pmap-v6.c =================================================================== --- arm/arm/pmap-v6.c (revision 246206) +++ arm/arm/pmap-v6.c (working copy) @@ -2699,8 +2699,6 @@ if ((!pve) && (pve = pmap_get_pv_entry()) == NULL) panic("pmap_enter: no pv entries"); - KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, - ("pmap_enter: managed mapping within the clean submap")); KASSERT(pve != NULL, ("No pv")); pmap_enter_pv(m, pve, pmap, va, nflags); } Index: arm/arm/pmap.c =================================================================== --- arm/arm/pmap.c (revision 246206) +++ arm/arm/pmap.c (working copy) @@ -3485,9 +3485,6 @@ } else m->md.pv_kva = va; } else { - KASSERT(va < kmi.clean_sva || - va >= kmi.clean_eva, - ("pmap_enter: managed mapping within the clean submap")); KASSERT(pve != NULL, ("No pv")); pmap_enter_pv(m, pve, pmap, va, nflags); } Index: arm/include/pmap.h =================================================================== --- arm/include/pmap.h (revision 246206) +++ arm/include/pmap.h (working copy) @@ -219,8 +219,6 @@ } extern vm_paddr_t phys_avail[]; -extern vm_offset_t virtual_avail; -extern vm_offset_t virtual_end; void pmap_bootstrap(vm_offset_t, vm_offset_t, struct pv_addr *); int pmap_change_attr(vm_offset_t, vm_size_t, int); Index: kern/kern_thread.c =================================================================== --- kern/kern_thread.c (revision 246206) +++ kern/kern_thread.c (working copy) @@ -282,6 +282,7 @@ tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash); rw_init(&tidhash_lock, "tidhash"); } +SYSINIT(threadinit, SI_SUB_KMEM_ALLOC, SI_ORDER_ANY, threadinit, NULL); /* * Place an unused thread on the zombie list. Index: kern/vfs_bio.c =================================================================== --- kern/vfs_bio.c (revision 246206) +++ kern/vfs_bio.c (working copy) @@ -91,6 +91,7 @@ * carnal knowledge of buffers. This knowledge should be moved to vfs_bio.c. */ struct buf *buf; /* buffer header pool */ +vm_map_t buffer_map; static struct proc *bufdaemonproc; @@ -116,6 +117,16 @@ static int sysctl_bufspace(SYSCTL_HANDLER_ARGS); #endif +#ifndef NBUF +#define NBUF 0 +#endif +int nbuf; /* buffer cache */ +SYSCTL_INT(_kern, OID_AUTO, nbuf, CTLFLAG_RDTUN, &nbuf, 0, + "Number of buffers in the buffer cache"); +static long maxbcache; /* max buffer cache KVA storage */ +SYSCTL_LONG(_kern, OID_AUTO, maxbcache, CTLFLAG_RDTUN, &maxbcache, 0, + "Maximum value of vfs.maxbufspace"); + int vmiodirenable = TRUE; SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW, &vmiodirenable, 0, "Use the VM system for directory writes"); @@ -207,6 +218,32 @@ SYSCTL_LONG(_vfs, OID_AUTO, notbufdflashes, CTLFLAG_RD, ¬bufdflashes, 0, "Number of dirty buffer flushes done by the bufdaemon helpers"); +SYSCTL_NODE(_vm_kvm, OID_AUTO, buffer_map, CTLFLAG_RW, 0, "buffer_map"); +SYSCTL_PROC(_vm_kvm_buffer_map, OID_AUTO, maxsize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &buffer_map, + SYSCTL_VM_MAP_MAXSIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_buffer_map, OID_AUTO, cursize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &buffer_map, + SYSCTL_VM_MAP_CURSIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_buffer_map, OID_AUTO, freesize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &buffer_map, + SYSCTL_VM_MAP_FREESIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_buffer_map, OID_AUTO, maxfree, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &buffer_map, + SYSCTL_VM_MAP_MAXFREE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_buffer_map, OID_AUTO, startaddr, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &buffer_map, + SYSCTL_VM_MAP_START, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_buffer_map, OID_AUTO, endaddr, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &buffer_map, + SYSCTL_VM_MAP_END, sysctl_vm_map, "LU", + ""); + /* * Wakeup point for bufdaemon, as well as indicator of whether it is already * active. Set to 1 when the bufdaemon is already "on" the queue, 0 when it @@ -311,9 +348,6 @@ } #endif -#ifdef DIRECTIO -extern void ffs_rawread_setup(void); -#endif /* DIRECTIO */ /* * numdirtywakeup: * @@ -503,18 +537,29 @@ * may be called more then once. We CANNOT write to the memory area * being reserved at this time. */ -caddr_t -kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est) +static void +kern_vfs_bio_buffer_init(void *dummy) { int tuned_nbuf; long maxbuf; + long physmem_est; + vm_offset_t minaddr, maxaddr; /* - * physmem_est is in pages. Convert it to kilobytes (assumes - * PAGE_SIZE is >= 1K) + * Discount the physical memory larger than the size of kernel_map + * to avoid eating up all of KVA space. */ - physmem_est = physmem_est * (PAGE_SIZE / 1024); + physmem_est = lmin(cnt.v_page_count * (PAGE_SIZE / 1024), + vm_map_maxsize(kernel_map) / 1024); + nbuf = NBUF; + TUNABLE_INT_FETCH("kern.nbuf", &nbuf); + +#ifdef VM_BCACHE_SIZE_MAX + maxbcache = VM_BCACHE_SIZE_MAX; +#endif + TUNABLE_LONG_FETCH("kern.maxbcache", &maxbcache); + /* * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. * For the first 64MB of ram nominally allocate sufficient buffers to @@ -550,33 +595,20 @@ nbuf = maxbuf; } - /* - * swbufs are used as temporary holders for I/O, such as paging I/O. - * We have no less then 16 and no more then 256. - */ - nswbuf = max(min(nbuf/4, 256), 16); -#ifdef NSWBUF_MIN - if (nswbuf < NSWBUF_MIN) - nswbuf = NSWBUF_MIN; -#endif -#ifdef DIRECTIO - ffs_rawread_setup(); -#endif + /* Reserve space for the buffer cache buffers. */ + buffer_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, + (long)nbuf * BKVASIZE, FALSE); + buffer_map->system_map = 1; - /* - * Reserve space for the buffer cache buffers - */ - swbuf = (void *)v; - v = (caddr_t)(swbuf + nswbuf); - buf = (void *)v; - v = (caddr_t)(buf + nbuf); - - return(v); + buf = malloc(nbuf * sizeof(struct buf), M_BIOBUF, M_WAITOK); } +SYSINIT(vfs_bio, SI_SUB_KMEM, SI_ORDER_ANY, kern_vfs_bio_buffer_init, NULL); -/* Initialize the buffer subsystem. Called before use of any buffers. */ -void -bufinit(void) +/* + * Initialize the buffer subsystem. Called before use of any buffers. + */ +static void +bufinit(void *dummy) { struct buf *bp; int i; @@ -672,6 +704,7 @@ bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED); } +SYSINIT(bufinit, SI_SUB_CPU, SI_ORDER_MIDDLE, bufinit, NULL); /* * bfreekva() - free the kva allocation for a buffer. Index: kern/init_main.c =================================================================== --- kern/init_main.c (revision 246206) +++ kern/init_main.c (working copy) @@ -69,6 +69,7 @@ #include #include #include +#include #include #include #include @@ -434,12 +435,6 @@ p->p_osrel = osreldate; /* - * Initialize thread and process structures. - */ - procinit(); /* set up proc zone */ - threadinit(); /* set up UMA zones */ - - /* * Initialise scheduler resources. * Add scheduler specific parts to proc, thread as needed. */ @@ -447,14 +442,9 @@ /* * Initialize sleep queue hash table */ - sleepinit(); + thread0.td_sleepqueue = sleepq_alloc(); /* - * additional VM structures - */ - vm_init2(); - - /* * Create process 0 (the swapper). */ LIST_INSERT_HEAD(&allproc, p, p_list); Index: kern/kern_proc.c =================================================================== --- kern/kern_proc.c (revision 246206) +++ kern/kern_proc.c (working copy) @@ -165,7 +165,7 @@ /* * Initialize global process hashing structures. */ -void +static void procinit() { @@ -181,6 +181,7 @@ UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uihashinit(); } +SYSINIT(procinit, SI_SUB_KMEM_ALLOC, SI_ORDER_ANY, procinit, NULL); /* * Prepare a proc for use. Index: kern/subr_sleepqueue.c =================================================================== --- kern/subr_sleepqueue.c (revision 246206) +++ kern/subr_sleepqueue.c (working copy) @@ -173,9 +173,8 @@ /* * Early initialization of sleep queues that is called from the sleepinit() - * SYSINIT. */ -void +static void init_sleepqueues(void) { #ifdef SLEEPQUEUE_PROFILING @@ -206,9 +205,8 @@ #else NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0); #endif - - thread0.td_sleepqueue = sleepq_alloc(); } +SYSINIT(init_sleepqueues, SI_SUB_KMEM_ALLOC, SI_ORDER_ANY, init_sleepqueues, NULL); /* * Get a sleep queue for a new thread. Index: kern/kern_exec.c =================================================================== --- kern/kern_exec.c (revision 246206) +++ kern/kern_exec.c (working copy) @@ -110,6 +110,36 @@ static int do_execve(struct thread *td, struct image_args *args, struct mac *mac_p); +vm_map_t exec_map; +static int exec_map_entries = 16; +TUNABLE_INT("vm.exec_map_entries", &exec_map_entries); + +SYSCTL_NODE(_vm_kvm, OID_AUTO, exec_map, CTLFLAG_RW, 0, "exec_map"); +SYSCTL_PROC(_vm_kvm_exec_map, OID_AUTO, maxsize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &exec_map, + SYSCTL_VM_MAP_MAXSIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_exec_map, OID_AUTO, cursize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &exec_map, + SYSCTL_VM_MAP_CURSIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_exec_map, OID_AUTO, freesize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &exec_map, + SYSCTL_VM_MAP_FREESIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_exec_map, OID_AUTO, maxfree, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &exec_map, + SYSCTL_VM_MAP_MAXFREE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_exec_map, OID_AUTO, startaddr, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &exec_map, + SYSCTL_VM_MAP_START, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_exec_map, OID_AUTO, endaddr, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &exec_map, + SYSCTL_VM_MAP_END, sysctl_vm_map, "LU", + ""); + /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD, NULL, 0, sysctl_kern_ps_strings, "LU", ""); @@ -179,6 +209,22 @@ } /* + * Set up the exec kernel VM map. + * + * The number of entries determine how many new processes + * can be in the exec phase at the same time. + */ +static void +sys_exec_init() +{ + vm_offset_t minaddr, maxaddr; + + exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, + exec_map_entries * round_page(PATH_MAX + ARG_MAX), FALSE); +} +SYSINIT(sys_exec, SI_SUB_KMEM, SI_ORDER_ANY, sys_exec_init, NULL); + +/* * Each of the items is a pointer to a `const struct execsw', hence the * double pointer here. */ Index: kern/kern_timeout.c =================================================================== --- kern/kern_timeout.c (revision 246206) +++ kern/kern_timeout.c (working copy) @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -80,6 +81,10 @@ static int avg_mpcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, "Average number of MP callouts made per softclock call. Units = 1/1000"); +static int ncallout; +SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN, &ncallout, 0, + "Number of entries in callwheel and size of timeout() preallocation"); + /* * TODO: * allocate more timeout table slots when table overflows. @@ -156,6 +161,8 @@ static int timeout_cpu; void (*callout_new_inserted)(int cpu, int ticks) = NULL; +static void callout_cpu_init(struct callout_cpu *cc); + static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); /** @@ -205,32 +212,44 @@ } /* - * kern_timeout_callwheel_alloc() - kernel low level callwheel initialization - * - * This code is called very early in the kernel initialization sequence, - * and may be called more then once. + * kernel low level callwheel initialization + * called on cpu0 during kernel startup. */ -caddr_t -kern_timeout_callwheel_alloc(caddr_t v) +static void +callout_callwheel_init(void *dummy) { struct callout_cpu *cc; - timeout_cpu = PCPU_GET(cpuid); - cc = CC_CPU(timeout_cpu); /* + * Calculate the size of the callout wheel and the preallocated + * timeout() structures. + */ + ncallout = imin(16 + maxproc + maxfiles, 18508); + TUNABLE_INT_FETCH("kern.ncallout", &ncallout); + + /* * Calculate callout wheel size, should be next power of two higher * than 'ncallout'. */ callwheelsize = 1 << fls(ncallout); callwheelmask = callwheelsize - 1; - cc->cc_callout = (struct callout *)v; - v = (caddr_t)(cc->cc_callout + ncallout); - cc->cc_callwheel = (struct callout_tailq *)v; - v = (caddr_t)(cc->cc_callwheel + callwheelsize); - return(v); + /* + * Only cpu0 handles timeout() and receives a preallocation. + * + * XXX: Once all timeout() consumers are converted this can + * be removed. + */ + cc = CC_CPU(PCPU_GET(cpuid)); + cc->cc_callout = malloc(ncallout * sizeof(struct callout), + M_CALLOUT, M_WAITOK); + callout_cpu_init(cc); } +SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL); +/* + * Initialize the per-cpu callout structure. + */ static void callout_cpu_init(struct callout_cpu *cc) { @@ -239,6 +258,8 @@ mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); SLIST_INIT(&cc->cc_callfree); + cc->cc_callwheel = malloc(sizeof(struct callout_tailq) * callwheelsize, + M_CALLOUT, M_WAITOK); for (i = 0; i < callwheelsize; i++) { TAILQ_INIT(&cc->cc_callwheel[i]); } @@ -284,19 +305,6 @@ #endif /* - * kern_timeout_callwheel_init() - initialize previously reserved callwheel - * space. - * - * This code is called just once, after the space reserved for the - * callout wheel has been finalized. - */ -void -kern_timeout_callwheel_init(void) -{ - callout_cpu_init(CC_CPU(timeout_cpu)); -} - -/* * Start standard softclock thread. */ static void @@ -306,28 +314,22 @@ #ifdef SMP int cpu; #endif - - cc = CC_CPU(timeout_cpu); - if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK, - INTR_MPSAFE, &cc->cc_cookie)) + if (swi_add(&clk_intr_event, "clock", softclock, CC_CPU(timeout_cpu), + SWI_CLOCK, INTR_MPSAFE, &CC_CPU(timeout_cpu)->cc_cookie)) panic("died while creating standard software ithreads"); #ifdef SMP CPU_FOREACH(cpu) { if (cpu == timeout_cpu) continue; cc = CC_CPU(cpu); + cc->cc_callout = NULL; /* Only cpu0 handles timeout(). */ + callout_cpu_init(cc); if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); - cc->cc_callout = NULL; /* Only cpu0 handles timeout(). */ - cc->cc_callwheel = malloc( - sizeof(struct callout_tailq) * callwheelsize, M_CALLOUT, - M_WAITOK); - callout_cpu_init(cc); } #endif } - SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); void Index: kern/kern_synch.c =================================================================== --- kern/kern_synch.c (revision 246206) +++ kern/kern_synch.c (working copy) @@ -121,13 +121,13 @@ SDT_PROBE_DEFINE(sched, , , schedctl_preempt, schedctl-preempt); SDT_PROBE_DEFINE(sched, , , schedctl_yield, schedctl-yield); -void +static void sleepinit(void) { hogticks = (hz / 10) * 2; /* Default only. */ - init_sleepqueues(); } +SYSINIT(sleepinit, SI_SUB_KMEM_ALLOC, SI_ORDER_ANY, sleepinit, NULL); /* * General sleep call. Suspends the current thread until a wakeup is Index: kern/kern_mbuf.c =================================================================== --- kern/kern_mbuf.c (revision 246206) +++ kern/kern_mbuf.c (working copy) @@ -97,32 +97,31 @@ * */ -int nmbufs; /* limits number of mbufs */ -int nmbclusters; /* limits number of mbuf clusters */ -int nmbjumbop; /* limits number of page size jumbo clusters */ -int nmbjumbo9; /* limits number of 9k jumbo clusters */ -int nmbjumbo16; /* limits number of 16k jumbo clusters */ -struct mbstat mbstat; +static int nmbufs; /* limits number of mbufs */ +int nmbclusters; /* limits number of mbuf clusters */ +static int nmbjumbop; /* limits number of page size jumbo clusters */ +static int nmbjumbo9; /* limits number of 9k jumbo clusters */ +static int nmbjumbo16; /* limits number of 16k jumbo clusters */ +struct mbstat mbstat; /* * tunable_mbinit() has to be run before any mbuf allocations are done. + * XXX: It can be folded into mbuf_init(). */ static void tunable_mbinit(void *dummy) { - quad_t realmem, maxmbufmem; + u_long maxmbufmem; /* * The default limit for all mbuf related memory is 1/2 of all - * available kernel memory (physical or kmem). + * available kernel map memory. * At most it can be 3/4 of available kernel memory. */ - realmem = qmin((quad_t)physmem * PAGE_SIZE, - vm_map_max(kernel_map) - vm_map_min(kernel_map)); - maxmbufmem = realmem / 2; - TUNABLE_QUAD_FETCH("kern.maxmbufmem", &maxmbufmem); - if (maxmbufmem > realmem / 4 * 3) - maxmbufmem = realmem / 4 * 3; + maxmbufmem = kmem_real / 2; + TUNABLE_ULONG_FETCH("kern.maxmbufmem", &maxmbufmem); + if (maxmbufmem > kmem_real / 4 * 3) + maxmbufmem = kmem_real / 4 * 3; TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); if (nmbclusters == 0) @@ -149,7 +148,7 @@ nmbufs = lmax(maxmbufmem / MSIZE / 5, nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); } -SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL); +SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_ANY, tunable_mbinit, NULL); static int sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) @@ -171,7 +170,7 @@ return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, -&nmbclusters, 0, sysctl_nmbclusters, "IU", + &nmbclusters, 0, sysctl_nmbclusters, "IU", "Maximum number of mbuf clusters allowed"); static int @@ -193,7 +192,7 @@ return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, -&nmbjumbop, 0, sysctl_nmbjumbop, "IU", + &nmbjumbop, 0, sysctl_nmbjumbop, "IU", "Maximum number of mbuf page size jumbo clusters allowed"); static int @@ -215,7 +214,7 @@ return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, -&nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", + &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", "Maximum number of mbuf 9k jumbo clusters allowed"); static int @@ -237,7 +236,7 @@ return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, -&nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", + &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", "Maximum number of mbuf 16k jumbo clusters allowed"); static int @@ -259,7 +258,7 @@ return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbuf, CTLTYPE_INT|CTLFLAG_RW, -&nmbufs, 0, sysctl_nmbufs, "IU", + &nmbufs, 0, sysctl_nmbufs, "IU", "Maximum number of mbufs allowed"); SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, Index: kern/kern_malloc.c =================================================================== --- kern/kern_malloc.c (revision 246206) +++ kern/kern_malloc.c (working copy) @@ -113,9 +113,6 @@ MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options"); MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery"); -static void kmeminit(void *); -SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL); - static MALLOC_DEFINE(M_FREE, "free", "should be on free list"); static struct malloc_type *kmemstatistics; @@ -123,6 +120,9 @@ static vm_offset_t kmemlimit; static int kmemcount; +vm_map_t kmem_map; +u_long kmem_real; /* kmem_map memory that is backed by real memory */ + #define KMEM_ZSHIFT 4 #define KMEM_ZBASE 16 #define KMEM_ZMASK (KMEM_ZBASE - 1) @@ -186,42 +186,53 @@ */ static uma_zone_t mt_zone; -static vm_offset_t vm_min_kernel_address = VM_MIN_KERNEL_ADDRESS; -SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD, - &vm_min_kernel_address, 0, "Min kernel address"); +SYSCTL_NODE(_vm_kvm, OID_AUTO, kmem_map, CTLFLAG_RW, 0, "kmem_map"); -#ifndef __sparc64__ -static vm_offset_t vm_max_kernel_address = VM_MAX_KERNEL_ADDRESS; -#endif -SYSCTL_ULONG(_vm, OID_AUTO, max_kernel_address, CTLFLAG_RD, - &vm_max_kernel_address, 0, "Max kernel address"); - u_long vm_kmem_size; -SYSCTL_ULONG(_vm, OID_AUTO, kmem_size, CTLFLAG_RDTUN, &vm_kmem_size, 0, +SYSCTL_ULONG(_vm_kvm_kmem_map, OID_AUTO, kmem_size, CTLFLAG_RDTUN, + &vm_kmem_size, 0, "Size of kernel memory"); static u_long vm_kmem_size_min; -SYSCTL_ULONG(_vm, OID_AUTO, kmem_size_min, CTLFLAG_RDTUN, &vm_kmem_size_min, 0, +SYSCTL_ULONG(_vm_kvm_kmem_map, OID_AUTO, kmem_size_min, CTLFLAG_RDTUN, + &vm_kmem_size_min, 0, "Minimum size of kernel memory"); static u_long vm_kmem_size_max; -SYSCTL_ULONG(_vm, OID_AUTO, kmem_size_max, CTLFLAG_RDTUN, &vm_kmem_size_max, 0, +SYSCTL_ULONG(_vm_kvm_kmem_map, OID_AUTO, kmem_size_max, CTLFLAG_RDTUN, + &vm_kmem_size_max, 0, "Maximum size of kernel memory"); static u_int vm_kmem_size_scale; -SYSCTL_UINT(_vm, OID_AUTO, kmem_size_scale, CTLFLAG_RDTUN, &vm_kmem_size_scale, 0, +SYSCTL_UINT(_vm_kvm_kmem_map, OID_AUTO, kmem_size_scale, CTLFLAG_RDTUN, + &vm_kmem_size_scale, 0, "Scale factor for kernel memory size"); -static int sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS); -SYSCTL_PROC(_vm, OID_AUTO, kmem_map_size, - CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0, - sysctl_kmem_map_size, "LU", "Current kmem_map allocation size"); +SYSCTL_PROC(_vm_kvm_kmem_map, OID_AUTO, maxsize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &kmem_map, + SYSCTL_VM_MAP_MAXSIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_kmem_map, OID_AUTO, cursize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &kmem_map, + SYSCTL_VM_MAP_CURSIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_kmem_map, OID_AUTO, freesize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &kmem_map, + SYSCTL_VM_MAP_FREESIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_kmem_map, OID_AUTO, maxfree, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &kmem_map, + SYSCTL_VM_MAP_MAXFREE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_kmem_map, OID_AUTO, startaddr, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &kmem_map, + SYSCTL_VM_MAP_START, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_kmem_map, OID_AUTO, endaddr, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &kmem_map, + SYSCTL_VM_MAP_END, sysctl_vm_map, "LU", + ""); -static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS); -SYSCTL_PROC(_vm, OID_AUTO, kmem_map_free, - CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0, - sysctl_kmem_map_free, "LU", "Largest contiguous free range in kmem_map"); - /* * The malloc_mtx protects the kmemstatistics linked list. */ @@ -260,27 +271,6 @@ &malloc_failure_count, 0, "Number of imposed M_NOWAIT malloc failures"); #endif -static int -sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS) -{ - u_long size; - - size = kmem_map->size; - return (sysctl_handle_long(oidp, &size, 0, req)); -} - -static int -sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS) -{ - u_long size; - - vm_map_lock_read(kmem_map); - size = kmem_map->root != NULL ? kmem_map->root->max_free : - kmem_map->max_offset - kmem_map->min_offset; - vm_map_unlock_read(kmem_map); - return (sysctl_handle_long(oidp, &size, 0, req)); -} - /* * malloc(9) uma zone separation -- sub-page buffer overruns in one * malloc type will affect only a subset of other malloc types. @@ -694,11 +684,19 @@ } /* - * Initialize the kernel memory allocator + * Initialize the kernel memory allocator under consideration of + * the following parameters: + * + * VM_KMEM_SIZE default start size of kmem_map if SCALE is + * not defined + * VM_KMEM_SIZE_MIN hard floor on the kmem_map size + * VM_KMEM_SIZE_MAX hard ceiling on the kmem_map size + * VM_KMEM_SIZE_SCALE fraction of the available real memory to + * be used for the kmem_map, limited by the + * MIN and MAX parameters. */ -/* ARGSUSED*/ static void -kmeminit(void *dummy) +kmem_map_init(void *dummy) { uint8_t indx; u_long mem_size, tmp; @@ -715,7 +713,7 @@ * Note that the kmem_map is also used by the zone allocator, * so make sure that there is enough space. */ - vm_kmem_size = VM_KMEM_SIZE + nmbclusters * PAGE_SIZE; + vm_kmem_size = VM_KMEM_SIZE; mem_size = cnt.v_page_count; #if defined(VM_KMEM_SIZE_SCALE) @@ -738,7 +736,7 @@ vm_kmem_size_max = VM_KMEM_SIZE_MAX; #endif TUNABLE_ULONG_FETCH("vm.kmem_size_max", &vm_kmem_size_max); - if (vm_kmem_size_max > 0 && vm_kmem_size >= vm_kmem_size_max) + if (vm_kmem_size_max > 0 && vm_kmem_size > vm_kmem_size_max) vm_kmem_size = vm_kmem_size_max; /* Allow final override from the kernel environment */ @@ -753,6 +751,8 @@ if (vm_kmem_size / 2 / PAGE_SIZE > mem_size) vm_kmem_size = 2 * mem_size * PAGE_SIZE; + kmem_real = ulmin(vm_kmem_size, mem_size * PAGE_SIZE); + #ifdef DEBUG_MEMGUARD tmp = memguard_fudge(vm_kmem_size, kernel_map); #else @@ -770,7 +770,6 @@ */ memguard_init(kmem_map); #endif - uma_startup2(); mt_zone = uma_zcreate("mt_zone", sizeof(struct malloc_type_internal), @@ -780,6 +779,7 @@ NULL, NULL, NULL, NULL, #endif UMA_ALIGN_PTR, UMA_ZONE_MALLOC); + for (i = 0, indx = 0; kmemzones[indx].kz_size != 0; indx++) { int size = kmemzones[indx].kz_size; char *name = kmemzones[indx].kz_name; @@ -800,6 +800,7 @@ } } +SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmem_map_init, NULL); void malloc_init(void *data) Index: kern/subr_param.c =================================================================== --- kern/subr_param.c (revision 246206) +++ kern/subr_param.c (working copy) @@ -72,9 +72,6 @@ # endif #endif #define NPROC (20 + 16 * maxusers) -#ifndef NBUF -#define NBUF 0 -#endif #ifndef MAXFILES #define MAXFILES (maxproc * 2) #endif @@ -89,14 +86,8 @@ int maxfiles; /* sys. wide open files limit */ int maxfilesperproc; /* per-proc open files limit */ int msgbufsize; /* size of kernel message buffer */ -int ncallout; /* maximum # of timer events */ -int nbuf; int ngroups_max; /* max # groups per process */ -int nswbuf; pid_t pid_max = PID_MAX; -long maxswzone; /* max swmeta KVA storage */ -long maxbcache; /* max buffer cache KVA storage */ -long maxpipekva; /* Limit on pipe KVA */ int vm_guest; /* Running as virtual machine guest? */ u_long maxtsiz; /* max text size */ u_long dfldsiz; /* initial data size limit */ @@ -107,18 +98,8 @@ SYSCTL_INT(_kern, OID_AUTO, hz, CTLFLAG_RDTUN, &hz, 0, "Number of clock ticks per second"); -SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN, &ncallout, 0, - "Number of pre-allocated timer events"); -SYSCTL_INT(_kern, OID_AUTO, nbuf, CTLFLAG_RDTUN, &nbuf, 0, - "Number of buffers in the buffer cache"); -SYSCTL_INT(_kern, OID_AUTO, nswbuf, CTLFLAG_RDTUN, &nswbuf, 0, - "Number of swap buffers"); SYSCTL_INT(_kern, OID_AUTO, msgbufsize, CTLFLAG_RDTUN, &msgbufsize, 0, "Size of the kernel message buffer"); -SYSCTL_LONG(_kern, OID_AUTO, maxswzone, CTLFLAG_RDTUN, &maxswzone, 0, - "Maximum memory for swap metadata"); -SYSCTL_LONG(_kern, OID_AUTO, maxbcache, CTLFLAG_RDTUN, &maxbcache, 0, - "Maximum value of vfs.maxbufspace"); SYSCTL_ULONG(_kern, OID_AUTO, maxtsiz, CTLFLAG_RW | CTLFLAG_TUN, &maxtsiz, 0, "Maximum text size"); SYSCTL_ULONG(_kern, OID_AUTO, dfldsiz, CTLFLAG_RW | CTLFLAG_TUN, &dfldsiz, 0, @@ -136,13 +117,6 @@ "Virtual machine guest detected? (none|generic|xen)"); /* - * These have to be allocated somewhere; allocating - * them here forces loader errors if this file is omitted - * (if they've been externed everywhere else; hah!). - */ -struct buf *swbuf; - -/* * The elements of this array are ordered based upon the values of the * corresponding enum VM_GUEST members. */ @@ -222,14 +196,6 @@ hz = vm_guest > VM_GUEST_NO ? HZ_VM : HZ; tick = 1000000 / hz; -#ifdef VM_SWZONE_SIZE_MAX - maxswzone = VM_SWZONE_SIZE_MAX; -#endif - TUNABLE_LONG_FETCH("kern.maxswzone", &maxswzone); -#ifdef VM_BCACHE_SIZE_MAX - maxbcache = VM_BCACHE_SIZE_MAX; -#endif - TUNABLE_LONG_FETCH("kern.maxbcache", &maxbcache); msgbufsize = MSGBUF_SIZE; TUNABLE_INT_FETCH("kern.msgbufsize", &msgbufsize); @@ -274,26 +240,17 @@ init_param2(long physpages) { - /* Base parameters */ - maxusers = MAXUSERS; - TUNABLE_INT_FETCH("kern.maxusers", &maxusers); - if (maxusers == 0) { - maxusers = physpages / (2 * 1024 * 1024 / PAGE_SIZE); - if (maxusers < 32) - maxusers = 32; -#ifdef VM_MAX_AUTOTUNE_MAXUSERS - if (maxusers > VM_MAX_AUTOTUNE_MAXUSERS) - maxusers = VM_MAX_AUTOTUNE_MAXUSERS; -#endif - /* - * Scales down the function in which maxusers grows once - * we hit 384. - */ - if (maxusers > 384) - maxusers = 384 + ((maxusers - 384) / 8); - } + /* XXX: Only used for NSFBUFS (sendfile buffers). */ + maxusers = 384; /* + * NB: The number of physical pages is irrelevant. + * Most things should be scaled of availabel KVM, + * which itself is depdendend on the physical pages + * present. + */ + + /* * The following can be overridden after boot via sysctl. Note: * unless overriden, these macros are ultimately based on maxusers. * Limit maxproc so that kmap entries cannot be exhausted by @@ -307,7 +264,7 @@ /* * The default limit for maxfiles is 1/12 of the number of - * physical page but not less than 16 times maxusers. + * physical pages. * At most it can be 1/6 the number of physical pages. */ maxfiles = imax(MAXFILES, physpages / 8); @@ -315,33 +272,6 @@ if (maxfiles > (physpages / 4)) maxfiles = physpages / 4; maxfilesperproc = (maxfiles / 10) * 9; - - /* - * Cannot be changed after boot. - */ - nbuf = NBUF; - TUNABLE_INT_FETCH("kern.nbuf", &nbuf); - - /* - * XXX: Does the callout wheel have to be so big? - * - * Clip callout to result of previous function of maxusers maximum - * 384. This is still huge, but acceptable. - */ - ncallout = imin(16 + maxproc + maxfiles, 18508); - TUNABLE_INT_FETCH("kern.ncallout", &ncallout); - - /* - * The default for maxpipekva is min(1/64 of the kernel address space, - * max(1/64 of main memory, 512KB)). See sys_pipe.c for more details. - */ - maxpipekva = (physpages / 64) * PAGE_SIZE; - TUNABLE_LONG_FETCH("kern.ipc.maxpipekva", &maxpipekva); - if (maxpipekva < 512 * 1024) - maxpipekva = 512 * 1024; - if (maxpipekva > (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 64) - maxpipekva = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / - 64; } /* Index: kern/sys_pipe.c =================================================================== --- kern/sys_pipe.c (revision 246206) +++ kern/sys_pipe.c (working copy) @@ -201,7 +201,10 @@ #define MINPIPESIZE (PIPE_SIZE/3) #define MAXPIPESIZE (2*PIPE_SIZE/3) +static vm_map_t pipe_map; + static long amountpipekva; +static long maxpipekva; static int pipefragretry; static int pipeallocfail; static int piperesizefail; @@ -220,7 +223,33 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW, &piperesizeallowed, 0, "Pipe resizing allowed"); -static void pipeinit(void *dummy __unused); +SYSCTL_NODE(_vm_kvm, OID_AUTO, pipe_map, CTLFLAG_RW, 0, "pipe_map"); +SYSCTL_PROC(_vm_kvm_pipe_map, OID_AUTO, maxsize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &pipe_map, + SYSCTL_VM_MAP_MAXSIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_pipe_map, OID_AUTO, cursize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &pipe_map, + SYSCTL_VM_MAP_CURSIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_pipe_map, OID_AUTO, freesize, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &pipe_map, + SYSCTL_VM_MAP_FREESIZE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_pipe_map, OID_AUTO, maxfree, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &pipe_map, + SYSCTL_VM_MAP_MAXFREE, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_pipe_map, OID_AUTO, startaddr, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &pipe_map, + SYSCTL_VM_MAP_START, sysctl_vm_map, "LU", + ""); +SYSCTL_PROC(_vm_kvm_pipe_map, OID_AUTO, endaddr, + CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, &pipe_map, + SYSCTL_VM_MAP_END, sysctl_vm_map, "LU", + ""); + + static void pipeclose(struct pipe *cpipe); static void pipe_free_kmem(struct pipe *cpipe); static int pipe_create(struct pipe *pipe, int backing); @@ -244,21 +273,41 @@ static struct unrhdr *pipeino_unr; static dev_t pipedev_ino; -SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); - +/* + * Set up the kmem suballocation and UMA zone for the pipe memory. + */ static void pipeinit(void *dummy __unused) { + vm_offset_t minaddr, maxaddr; + maxpipekva = kmem_real / 64; + TUNABLE_LONG_FETCH("kern.ipc.maxpipekva", &maxpipekva); + if (maxpipekva > kmem_real / 64) + maxpipekva = kmem_real / 64; + if (maxpipekva < 512 * 1024) + maxpipekva = 512 * 1024; + + pipe_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, maxpipekva, + FALSE); +} +SYSINIT(pipeinit, SI_SUB_KMEM, SI_ORDER_ANY, pipeinit, NULL); + +static void +pipeinit2(void *dummy) +{ + pipe_zone = uma_zcreate("pipe", sizeof(struct pipepair), pipe_zone_ctor, NULL, pipe_zone_init, pipe_zone_fini, UMA_ALIGN_PTR, 0); + KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); pipeino_unr = new_unrhdr(1, INT32_MAX, NULL); KASSERT(pipeino_unr != NULL, ("pipe fake inodes not initialized")); pipedev_ino = devfs_alloc_cdp_inode(); KASSERT(pipedev_ino > 0, ("pipe dev inode not initialized")); } +SYSINIT(pipeinit2, SI_SUB_VFS, SI_ORDER_ANY, pipeinit2, NULL); static int pipe_zone_ctor(void *mem, int size, void *arg, int flags) Index: mips/mips/machdep.c =================================================================== --- mips/mips/machdep.c (revision 246206) +++ mips/mips/machdep.c (working copy) @@ -148,8 +148,6 @@ static void cpu_startup(void *); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); -struct kva_md_info kmi; - int cpucfg; /* Value of processor config register */ int num_tlbentries = 64; /* Size of the CPU tlb */ int cputype; @@ -207,18 +205,10 @@ } } - vm_ksubmap_init(&kmi); - printf("avail memory = %ju (%juMB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); cpu_init_interrupts(); - - /* - * Set up buffers, so they can be used to read disk labels. - */ - bufinit(); - vm_pager_bufferinit(); } /* Index: mips/mips/pmap.c =================================================================== --- mips/mips/pmap.c (revision 246206) +++ mips/mips/pmap.c (working copy) @@ -2011,9 +2011,6 @@ va &= ~PAGE_MASK; KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); - KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva || - va >= kmi.clean_eva, - ("pmap_enter: managed mapping within the clean submap")); KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0, ("pmap_enter: page %p is not busy", m)); pa = VM_PAGE_TO_PHYS(m); @@ -2204,9 +2201,6 @@ pt_entry_t *pte; vm_paddr_t pa; - KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || - (m->oflags & VPO_UNMANAGED) != 0, - ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); Index: mips/include/pmap.h =================================================================== --- mips/include/pmap.h (revision 246206) +++ mips/include/pmap.h (working copy) @@ -157,9 +157,6 @@ extern vm_paddr_t phys_avail[PHYS_AVAIL_ENTRIES + 2]; extern vm_paddr_t physmem_desc[PHYS_AVAIL_ENTRIES + 2]; -extern vm_offset_t virtual_avail; -extern vm_offset_t virtual_end; - extern vm_paddr_t dump_avail[PHYS_AVAIL_ENTRIES + 2]; #define pmap_page_get_memattr(m) VM_MEMATTR_DEFAULT