Index: i386/include/vmparam.h =================================================================== --- i386/include/vmparam.h (revision 253035) +++ i386/include/vmparam.h (working copy) @@ -202,4 +202,8 @@ #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ +#ifndef VM_MAX_AUTOTUNE_MAXUSERS +#define VM_MAX_AUTOTUNE_MAXUSERS 384 +#endif + #endif /* _MACHINE_VMPARAM_H_ */ Index: kern/kern_mbuf.c =================================================================== --- kern/kern_mbuf.c (revision 253035) +++ kern/kern_mbuf.c (working copy) @@ -1,6 +1,6 @@ /*- * Copyright (c) 2004, 2005, - * Bosko Milekic . All rights reserved. + * Bosko Milekic . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -76,7 +77,7 @@ * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] * | \________ | * [ Cluster Keg ] \ / - * | [ Mbuf Keg ] + * | [ Mbuf Keg ] * [ Cluster Slabs ] | * | [ Mbuf Slabs ] * \____________(VM)_________________/ @@ -96,6 +97,7 @@ * */ +int nmbufs; /* limits number of mbufs */ int nmbclusters; /* limits number of mbuf clusters */ int nmbjumbop; /* limits number of page size jumbo clusters */ int nmbjumbo9; /* limits number of 9k jumbo clusters */ @@ -103,32 +105,51 @@ struct mbstat mbstat; /* - * tunable_mbinit() has to be run before init_maxsockets() thus - * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets() - * runs at SI_ORDER_ANY. + * tunable_mbinit() has to be run before any mbuf allocations are done. */ static void tunable_mbinit(void *dummy) { + quad_t realmem, maxmbufmem; - /* This has to be done before VM init. */ + /* + * The default limit for all mbuf related memory is 1/2 of all + * available kernel memory (physical or kmem). + * At most it can be 3/4 of available kernel memory. + */ + realmem = qmin((quad_t)physmem * PAGE_SIZE, + vm_map_max(kmem_map) - vm_map_min(kmem_map)); + maxmbufmem = realmem / 2; + TUNABLE_QUAD_FETCH("kern.maxmbufmem", &maxmbufmem); + if (maxmbufmem > realmem / 4 * 3) + maxmbufmem = realmem / 4 * 3; + TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); if (nmbclusters == 0) - nmbclusters = 1024 + maxusers * 64; + nmbclusters = maxmbufmem / MCLBYTES / 4; TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); if (nmbjumbop == 0) - nmbjumbop = nmbclusters / 2; + nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4; TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); if (nmbjumbo9 == 0) - nmbjumbo9 = nmbclusters / 4; + nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6; TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); if (nmbjumbo16 == 0) - nmbjumbo16 = nmbclusters / 8; + nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6; + + /* + * We need at least as many mbufs as we have clusters of + * the various types added together. + */ + TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); + if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) + nmbufs = lmax(maxmbufmem / MSIZE / 5, + nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); } -SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, NULL); +SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL); static int sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) @@ -136,11 +157,13 @@ int error, newnmbclusters; newnmbclusters = nmbclusters; - error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); + error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); if (error == 0 && req->newptr) { - if (newnmbclusters > nmbclusters) { + if (newnmbclusters > nmbclusters && + nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbclusters = newnmbclusters; uma_zone_set_max(zone_clust, nmbclusters); + nmbclusters = uma_zone_get_max(zone_clust); EVENTHANDLER_INVOKE(nmbclusters_change); } else error = EINVAL; @@ -157,11 +180,13 @@ int error, newnmbjumbop; newnmbjumbop = nmbjumbop; - error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); + error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); if (error == 0 && req->newptr) { - if (newnmbjumbop> nmbjumbop) { + if (newnmbjumbop > nmbjumbop && + nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbop = newnmbjumbop; uma_zone_set_max(zone_jumbop, nmbjumbop); + nmbjumbop = uma_zone_get_max(zone_jumbop); } else error = EINVAL; } @@ -169,9 +194,8 @@ } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, &nmbjumbop, 0, sysctl_nmbjumbop, "IU", - "Maximum number of mbuf page size jumbo clusters allowed"); + "Maximum number of mbuf page size jumbo clusters allowed"); - static int sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) { @@ -178,11 +202,13 @@ int error, newnmbjumbo9; newnmbjumbo9 = nmbjumbo9; - error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); + error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); if (error == 0 && req->newptr) { - if (newnmbjumbo9> nmbjumbo9) { + if (newnmbjumbo9 > nmbjumbo9&& + nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbo9 = newnmbjumbo9; uma_zone_set_max(zone_jumbo9, nmbjumbo9); + nmbjumbo9 = uma_zone_get_max(zone_jumbo9); } else error = EINVAL; } @@ -190,7 +216,7 @@ } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", - "Maximum number of mbuf 9k jumbo clusters allowed"); + "Maximum number of mbuf 9k jumbo clusters allowed"); static int sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) @@ -198,11 +224,13 @@ int error, newnmbjumbo16; newnmbjumbo16 = nmbjumbo16; - error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); + error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); if (error == 0 && req->newptr) { - if (newnmbjumbo16> nmbjumbo16) { + if (newnmbjumbo16 > nmbjumbo16 && + nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbo16 = newnmbjumbo16; uma_zone_set_max(zone_jumbo16, nmbjumbo16); + nmbjumbo16 = uma_zone_get_max(zone_jumbo16); } else error = EINVAL; } @@ -212,7 +240,27 @@ &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", "Maximum number of mbuf 16k jumbo clusters allowed"); +static int +sysctl_nmbufs(SYSCTL_HANDLER_ARGS) +{ + int error, newnmbufs; + newnmbufs = nmbufs; + error = sysctl_handle_int(oidp, &newnmbufs, 0, req); + if (error == 0 && req->newptr) { + if (newnmbufs > nmbufs) { + nmbufs = newnmbufs; + uma_zone_set_max(zone_mbuf, nmbufs); + nmbufs = uma_zone_get_max(zone_mbuf); + EVENTHANDLER_INVOKE(nmbufs_change); + } else + error = EINVAL; + } + return (error); +} +SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbuf, CTLTYPE_INT|CTLFLAG_RW, +&nmbufs, 0, sysctl_nmbufs, "IU", + "Maximum number of mbufs allowed"); SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, "Mbuf general information and statistics"); @@ -241,7 +289,6 @@ static void mb_zfini_pack(void *, int); static void mb_reclaim(void *); -static void mbuf_init(void *); static void *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int); /* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */ @@ -250,7 +297,6 @@ /* * Initialize FreeBSD Network buffer allocation. */ -SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); static void mbuf_init(void *dummy) { @@ -266,6 +312,8 @@ NULL, NULL, #endif MSIZE - 1, UMA_ZONE_MAXBUCKET); + if (nmbufs > 0) + nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, mb_ctor_clust, mb_dtor_clust, @@ -276,7 +324,7 @@ #endif UMA_ALIGN_PTR, UMA_ZONE_REFCNT); if (nmbclusters > 0) - uma_zone_set_max(zone_clust, nmbclusters); + nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); @@ -291,7 +339,7 @@ #endif UMA_ALIGN_PTR, UMA_ZONE_REFCNT); if (nmbjumbop > 0) - uma_zone_set_max(zone_jumbop, nmbjumbop); + nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, mb_ctor_clust, mb_dtor_clust, @@ -301,9 +349,9 @@ NULL, NULL, #endif UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); if (nmbjumbo9 > 0) - uma_zone_set_max(zone_jumbo9, nmbjumbo9); - uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); + nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, mb_ctor_clust, mb_dtor_clust, @@ -313,9 +361,9 @@ NULL, NULL, #endif UMA_ALIGN_PTR, UMA_ZONE_REFCNT); + uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); if (nmbjumbo16 > 0) - uma_zone_set_max(zone_jumbo16, nmbjumbo16); - uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); + nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), NULL, NULL, @@ -351,6 +399,7 @@ mbstat.sf_iocnt = 0; mbstat.sf_allocwait = mbstat.sf_allocfail = 0; } +SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); /* * UMA backend page allocator for the jumbo frame zones. @@ -435,7 +484,7 @@ mb_dtor_mbuf(void *mem, int size, void *arg) { struct mbuf *m; - unsigned long flags; + unsigned long flags; m = (struct mbuf *)mem; flags = (unsigned long)arg; Index: kern/subr_param.c =================================================================== --- kern/subr_param.c (revision 253035) +++ kern/subr_param.c (working copy) @@ -286,26 +286,40 @@ maxusers = physpages / (2 * 1024 * 1024 / PAGE_SIZE); if (maxusers < 32) maxusers = 32; - if (maxusers > 384) - maxusers = 384; - } +#ifdef VM_MAX_AUTOTUNE_MAXUSERS + if (maxusers > VM_MAX_AUTOTUNE_MAXUSERS) + maxusers = VM_MAX_AUTOTUNE_MAXUSERS; +#endif + /* + * Scales down the function in which maxusers grows once + * we hit 384. + */ + if (maxusers > 384) + maxusers = 384 + ((maxusers - 384) / 8); + } /* * The following can be overridden after boot via sysctl. Note: * unless overriden, these macros are ultimately based on maxusers. + * Limit maxproc so that kmap entries cannot be exhausted by + * processes. */ maxproc = NPROC; TUNABLE_INT_FETCH("kern.maxproc", &maxproc); + if (maxproc > (physpages / 12)) + maxproc = physpages / 12; + maxprocperuid = (maxproc * 9) / 10; + /* - * Limit maxproc so that kmap entries cannot be exhausted by - * processes. + * The default limit for maxfiles is 1/12 of the number of + * physical page but not less than 16 times maxusers. + * At most it can be 1/6 the number of physical pages. */ - if (maxproc > (physpages / 12)) - maxproc = physpages / 12; - maxfiles = MAXFILES; + maxfiles = imax(MAXFILES, physpages / 8); TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles); - maxprocperuid = (maxproc * 9) / 10; - maxfilesperproc = (maxfiles * 9) / 10; + if (maxfiles > (physpages / 4)) + maxfiles = physpages / 4; + maxfilesperproc = (maxfiles / 10) * 9; /* * Cannot be changed after boot. @@ -314,7 +328,13 @@ TUNABLE_INT_FETCH("kern.nbuf", &nbuf); TUNABLE_INT_FETCH("kern.bio_transient_maxcnt", &bio_transient_maxcnt); - ncallout = 16 + maxproc + maxfiles; + /* + * XXX: Does the callout wheel have to be so big? + * + * Clip callout to result of previous function of maxusers maximum + * 384. This is still huge, but acceptable. + */ + ncallout = imin(16 + maxproc + maxfiles, 18508); TUNABLE_INT_FETCH("kern.ncallout", &ncallout); /* @@ -322,12 +342,12 @@ * max(1/64 of main memory, 512KB)). See sys_pipe.c for more details. */ maxpipekva = (physpages / 64) * PAGE_SIZE; + TUNABLE_LONG_FETCH("kern.ipc.maxpipekva", &maxpipekva); if (maxpipekva < 512 * 1024) maxpipekva = 512 * 1024; if (maxpipekva > (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 64) maxpipekva = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 64; - TUNABLE_LONG_FETCH("kern.ipc.maxpipekva", &maxpipekva); } /* Index: kern/uipc_socket.c =================================================================== --- kern/uipc_socket.c (revision 253035) +++ kern/uipc_socket.c (working copy) @@ -282,7 +282,7 @@ { TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); - maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters)); + maxsockets = imax(maxsockets, maxfiles); } SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL); @@ -298,12 +298,9 @@ newmaxsockets = maxsockets; error = sysctl_handle_int(oidp, &newmaxsockets, 0, req); if (error == 0 && req->newptr) { - if (newmaxsockets > maxsockets) { + if (newmaxsockets > maxsockets && + newmaxsockets <= maxfiles) { maxsockets = newmaxsockets; - if (maxsockets > ((maxfiles / 4) * 3)) { - maxfiles = (maxsockets * 5) / 4; - maxfilesperproc = (maxfiles * 9) / 10; - } EVENTHANDLER_INVOKE(maxsockets_change); } else error = EINVAL; Index: sys/eventhandler.h =================================================================== --- sys/eventhandler.h (revision 253035) +++ sys/eventhandler.h (working copy) @@ -253,6 +253,7 @@ typedef void (*uma_zone_chfn)(void *); EVENTHANDLER_DECLARE(nmbclusters_change, uma_zone_chfn); +EVENTHANDLER_DECLARE(nmbufs_change, uma_zone_chfn); EVENTHANDLER_DECLARE(maxsockets_change, uma_zone_chfn); #endif /* SYS_EVENTHANDLER_H */ Index: sys/mbuf.h =================================================================== --- sys/mbuf.h (revision 253035) +++ sys/mbuf.h (working copy) @@ -396,7 +396,6 @@ * * The rest of it is defined in kern/kern_mbuf.c */ - extern uma_zone_t zone_mbuf; extern uma_zone_t zone_clust; extern uma_zone_t zone_pack; Index: sys =================================================================== --- sys (revision 253035) +++ sys (working copy) Property changes on: sys ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/sys:r243631,243639,243668,245575 Index: . =================================================================== --- . (revision 253035) +++ . (working copy) Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys:r242029,242847,243631,243639,243668,243995-243997,244080,245469,245575,246207,249843