Index: src/sys/conf/NOTES =================================================================== RCS file: /home/ncvs/src/sys/conf/NOTES,v retrieving revision 1.1061 diff -u -r1.1061 NOTES --- src/sys/conf/NOTES 9 Aug 2002 15:30:47 -0000 1.1061 +++ src/sys/conf/NOTES 9 Aug 2002 19:05:39 -0000 @@ -2252,6 +2252,7 @@ options NBUF=512 # Number of buffer headers options NMBCLUSTERS=1024 # Number of mbuf clusters +options NMBJUMBOBUFS=128 # Number of jumbo clusters options SCSI_NCR_DEBUG options SCSI_NCR_MAX_SYNC=10000 Index: src/sys/conf/options =================================================================== RCS file: /home/ncvs/src/sys/conf/options,v retrieving revision 1.341 diff -u -r1.341 options --- src/sys/conf/options 3 Aug 2002 00:19:58 -0000 1.341 +++ src/sys/conf/options 9 Aug 2002 19:05:45 -0000 @@ -197,6 +197,7 @@ MAXFILES opt_param.h NBUF opt_param.h NMBCLUSTERS opt_param.h +NMBJUMBOBUFS opt_param.h NSFBUFS opt_param.h VM_BCACHE_SIZE_MAX opt_param.h VM_SWZONE_SIZE_MAX opt_param.h Index: src/sys/kern/kern_malloc.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_malloc.c,v retrieving revision 1.111 diff -u -r1.111 kern_malloc.c --- src/sys/kern/kern_malloc.c 31 May 2002 09:41:09 -0000 1.111 +++ src/sys/kern/kern_malloc.c 9 Aug 2002 19:05:52 -0000 @@ -335,6 +335,7 @@ u_long mem_size; void *hashmem; u_long hashsize; + u_int mb_size; int highbit; int bits; int i; @@ -385,9 +386,12 @@ * amount to slightly more address space than we need for the submaps, * but it never hurts to have an extra page in kmem_map. */ - npg = (nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt * - sizeof(u_int) + vm_kmem_size) / PAGE_SIZE; - + mb_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt * + sizeof(u_int); +#ifdef NMBJUMBOBUFS + mb_size += nmbjumbobufs * MJUMBOSIZE; +#endif + npg = (mb_size + vm_kmem_size) / PAGE_SIZE; kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase, (vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE)); kmem_map->system_map = 1; Index: src/sys/kern/subr_mbuf.c =================================================================== RCS file: /home/ncvs/src/sys/kern/subr_mbuf.c,v retrieving revision 1.29 diff -u -r1.29 subr_mbuf.c --- src/sys/kern/subr_mbuf.c 8 Aug 2002 13:31:57 -0000 1.29 +++ src/sys/kern/subr_mbuf.c 9 Aug 2002 19:05:59 -0000 @@ -151,6 +151,9 @@ int nmbclusters; int nmbcnt; int nsfbufs; +#ifdef NMBJUMBOBUFS +int nmbjumbobufs; +#endif /* * Perform sanity checks of tunables declared above. @@ -170,6 +173,10 @@ TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); nmbcnt = NMBCNTS; TUNABLE_INT_FETCH("kern.ipc.nmbcnt", &nmbcnt); +#ifdef NMBJUMBOBUFS + nmbjumbobufs = NMBJUMBOBUFS; + TUNABLE_INT_FETCH("kern.ipc.nmbjumbobufs", &nmbjumbobufs); +#endif /* Sanity checks */ if (nmbufs < nmbclusters * 2) nmbufs = nmbclusters * 2; @@ -197,11 +204,15 @@ vm_offset_t ml_maptop; int ml_mapfull; u_int ml_objsize; + u_int ml_bucksize; u_int *ml_wmhigh; }; static struct mb_lstmngr mb_list_mbuf, mb_list_clust; static struct mtx mbuf_gen, mbuf_pcpu[NCPU]; u_int *cl_refcntmap; +#ifdef NMBJUMBOBUFS +static struct mb_lstmngr mb_list_jumbo; +#endif /* * Local macros for internal allocator structure manipulations. @@ -221,8 +232,8 @@ #define MB_GET_PCPU_LIST_NUM(mb_lst, num) \ (mb_lst)->ml_cntlst[(num)] -#define MB_BUCKET_INDX(mb_obj, mb_lst) \ - (int)(((caddr_t)(mb_obj) - (caddr_t)(mb_lst)->ml_mapbase) / PAGE_SIZE) +#define MB_BUCKET_INDX(mb_obj, mb_lst, mb_div) \ + (int)(((caddr_t)(mb_obj) - (caddr_t)(mb_lst)->ml_mapbase) / (mb_div)) #define MB_GET_OBJECT(mb_objp, mb_bckt, mb_lst) \ { \ @@ -271,6 +282,9 @@ static u_int mbuf_limit = 512; /* Upper limit on # of mbufs per CPU. */ static u_int clust_limit = 128; /* Upper limit on # of clusters per CPU. */ +#ifdef NMBJUMBOBUFS +static u_int jumbo_limit = 32; /* Upper limit on # of jumboclusts per CPU. */ +#endif /* * Objects exported by sysctl(8). @@ -294,6 +308,12 @@ "Mbuf general information and statistics"); SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mb_statpcpu, CTLFLAG_RD, mb_statpcpu, sizeof(mb_statpcpu), "S,", "Mbuf allocator per CPU statistics"); +#ifdef NMBJUMBOBUFS +SYSCTL_INT(_kern_ipc, OID_AUTO, nmbjumbobufs, CTLFLAG_RD, &nmbjumbobufs, 0, + "Maximum number of jumbo clusters available"); +SYSCTL_UINT(_kern_ipc, OID_AUTO, jumbo_limit, CTLFLAG_RW, &jumbo_limit, 0, + "Upper limit on number of jumbo clusters allowed on each PCPU list"); +#endif /* * Prototypes of local allocator routines. @@ -311,6 +331,32 @@ */ #define NMB_MBUF_INIT 4 #define NMB_CLUST_INIT 16 +#ifdef NMBJUMBOBUFS +#define NMB_JUMBO_INIT 1 + +/* + * Do not change this unless you know EXACTLY what you're doing. This is + * the pre-calculated number of pages of jumbo clusters to allocate per + * "bucket." Here's how it works: + * + * - MJUMBOSIZE is a constant, and we picked it to be 9216 bytes. This should + * be enough to accomodate large 9K frames, and a reference counter for them. + * - 'n' is the number of jumbo clusters per bucket. + * + * For minimum space wastage to occur, we need: + * (MJUMBOSIZE * n) % PAGE_SIZE == 0. + * We want to pick the smallest possible 'n' so that our buckets don't span + * too much space. For smallest PAGE_SIZE of 4K (like on i386, for example), + * 'n' is 4, and this means that we will need: + * (MJUMBOSIZE * 4 / PAGE_SIZE) = JMB_PG_BUCKET = 9 pages per bucket. + * This corresponds to the smallest 'n' that we can find for a 4K page size. + * For a larger page size (e.g., 8K), we just have PAGE_SIZE = 2 * 4096, + * so n = 2 * 4 = 8. We can calculate 'n' at runtime for our page size, + * as long as PAGE_SIZE is a multiple of 2, and as long as we define + * JMB_PG_BUCKET here appropriately. + */ +#define JMB_PG_BUCKET 9 +#endif /* * Internal flags that allow for cache locks to remain "persistent" across @@ -357,6 +403,7 @@ mb_list_mbuf.ml_mapfull = 0; mb_list_mbuf.ml_objsize = MSIZE; mb_list_mbuf.ml_wmhigh = &mbuf_limit; + mb_list_mbuf.ml_bucksize = PAGE_SIZE; mb_map_size = (vm_size_t)(nmbclusters * MCLBYTES); mb_map_size = rounddown(mb_map_size, PAGE_SIZE); @@ -371,6 +418,7 @@ mb_list_clust.ml_mapfull = 0; mb_list_clust.ml_objsize = MCLBYTES; mb_list_clust.ml_wmhigh = &clust_limit; + mb_list_clust.ml_bucksize = PAGE_SIZE; /* * Allocate required general (global) containers for each object type. @@ -428,11 +476,45 @@ */ mbstat.m_msize = MSIZE; mbstat.m_mclbytes = MCLBYTES; + mbstat.m_mjumbobytes = MJUMBOBYTES; mbstat.m_minclsize = MINCLSIZE; mbstat.m_mlen = MLEN; mbstat.m_mhlen = MHLEN; mbstat.m_numtypes = MT_NTYPES; +#ifdef NMBJUMBOBUFS + mb_map_size = (vm_size_t)(nmbjumbobufs * MJUMBOSIZE); + mb_map_size = roundup(mb_map_size, JMB_PG_BUCKET * PAGE_SIZE); + mb_list_jumbo.ml_btable = malloc((unsigned long)mb_map_size / + (JMB_PG_BUCKET * PAGE_SIZE) * sizeof(struct mb_bucket *), + M_MBUF, M_NOWAIT); + if (mb_list_jumbo.ml_btable == NULL) + goto bad; + mb_list_jumbo.ml_map = kmem_suballoc(kmem_map, + &(mb_list_jumbo.ml_mapbase), &(mb_list_jumbo.ml_maptop), + mb_map_size); + mb_list_jumbo.ml_map->system_map = 1; + mb_list_jumbo.ml_mapfull = 0; + mb_list_jumbo.ml_objsize = MJUMBOSIZE; + mb_list_jumbo.ml_wmhigh = &jumbo_limit; + mb_list_jumbo.ml_bucksize = JMB_PG_BUCKET * PAGE_SIZE; + mb_list_jumbo.ml_genlist = malloc(sizeof(struct mb_gen_list), + M_MBUF, M_NOWAIT); + if (mb_list_jumbo.ml_genlist == NULL) + goto bad; + cv_init(&(mb_list_jumbo.ml_genlist->mgl_mstarved), + "jumbo cluster pool starved"); + mb_list_jumbo.ml_genlist->mb_cont.mc_lock = &mbuf_gen; + mb_list_jumbo.ml_genlist->mb_cont.mc_numowner = MB_GENLIST_OWNER; + mb_list_jumbo.ml_genlist->mb_cont.mc_starved = 0; + mb_list_jumbo.ml_genlist->mb_cont.mc_objcount = + &(mb_statpcpu[MB_GENLIST_OWNER].mb_clfree); + mb_list_jumbo.ml_genlist->mb_cont.mc_numpgs = + &(mb_statpcpu[MB_GENLIST_OWNER].mb_clpgs); + mb_list_jumbo.ml_genlist->mb_cont.mc_types = NULL; + SLIST_INIT(&(mb_list_jumbo.ml_genlist->mb_cont.mc_bhead)); +#endif + /* * Allocate and initialize PCPU containers. */ @@ -492,6 +574,30 @@ goto bad; } MB_UNLOCK_CONT(pcpu_cnt); + +#ifdef NMBJUMBOBUFS + mb_list_jumbo.ml_cntlst[i] = malloc(sizeof(struct mb_pcpu_list), + M_MBUF, M_NOWAIT); + if (mb_list_jumbo.ml_cntlst[i] == NULL) + goto bad; + mb_list_jumbo.ml_cntlst[i]->mb_cont.mc_lock = &mbuf_pcpu[i]; + mb_list_jumbo.ml_cntlst[i]->mb_cont.mc_numowner = i; + mb_list_jumbo.ml_cntlst[i]->mb_cont.mc_starved = 0; + mb_list_jumbo.ml_cntlst[i]->mb_cont.mc_objcount = + &(mb_statpcpu[i].mb_jbfree); + mb_list_jumbo.ml_cntlst[i]->mb_cont.mc_numpgs = + &(mb_statpcpu[i].mb_jbpgs); + mb_list_jumbo.ml_cntlst[i]->mb_cont.mc_types = NULL; + SLIST_INIT(&(mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_bhead)); + pcpu_cnt = MB_GET_PCPU_LIST_NUM(&mb_list_jumbo, i); + MB_LOCK_CONT(pcpu_cnt); + for (j = 0; j < NMB_MBUF_INIT; j++) { + if (mb_pop_cont(&mb_list_jumbo, M_DONTWAIT, pcpu_cnt) + == NULL) + goto bad; + } + MB_UNLOCK_CONT(pcpu_cnt); +#endif } return; @@ -527,12 +633,12 @@ return (NULL); bucket = malloc(sizeof(struct mb_bucket) + - PAGE_SIZE / mb_list->ml_objsize * sizeof(void *), M_MBUF, + mb_list->ml_bucksize / mb_list->ml_objsize * sizeof(void *), M_MBUF, how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); if (bucket == NULL) return (NULL); - p = (caddr_t)kmem_malloc(mb_list->ml_map, PAGE_SIZE, + p = (caddr_t)kmem_malloc(mb_list->ml_map, mb_list->ml_bucksize, how == M_TRYWAIT ? M_WAITOK : M_NOWAIT); if (p == NULL) { free(bucket, M_MBUF); @@ -542,8 +648,9 @@ } bucket->mb_numfree = 0; - mb_list->ml_btable[MB_BUCKET_INDX(p, mb_list)] = bucket; - for (i = 0; i < (PAGE_SIZE / mb_list->ml_objsize); i++) { + mb_list->ml_btable[MB_BUCKET_INDX(p, mb_list, + mb_list->ml_bucksize)] = bucket; + for (i = 0; i < (mb_list->ml_bucksize / mb_list->ml_objsize); i++) { bucket->mb_free[i] = p; bucket->mb_numfree++; p += mb_list->ml_objsize; @@ -805,7 +912,8 @@ struct mb_bucket *bucket; u_int owner; - bucket = mb_list->ml_btable[MB_BUCKET_INDX(m, mb_list)]; + bucket = mb_list->ml_btable[MB_BUCKET_INDX(m, mb_list, + mb_list->ml_bucksize)]; /* * Make sure that if after we lock the bucket's present container the @@ -957,9 +1065,9 @@ * being freed in an effort to keep the mbtypes * counters approximately balanced across all lists. */ - MB_MBTYPES_DEC(cnt_lst, type, (PAGE_SIZE / + MB_MBTYPES_DEC(cnt_lst, type, (mb_list->ml_bucksize / mb_list->ml_objsize) - bucket->mb_numfree); - MB_MBTYPES_INC(gen_list, type, (PAGE_SIZE / + MB_MBTYPES_INC(gen_list, type, (mb_list->ml_bucksize / mb_list->ml_objsize) - bucket->mb_numfree); MB_UNLOCK_CONT(gen_list); Index: src/sys/sys/mbuf.h =================================================================== RCS file: /home/ncvs/src/sys/sys/mbuf.h,v retrieving revision 1.98 diff -u -r1.98 mbuf.h --- src/sys/sys/mbuf.h 30 Jul 2002 22:03:57 -0000 1.98 +++ src/sys/sys/mbuf.h 9 Aug 2002 19:06:05 -0000 @@ -44,9 +44,9 @@ #include /* - * Mbufs are of a single size, MSIZE (machine/param.h), which + * Mbufs are of a single size, MSIZE (sys/param.h), which * includes overhead. An mbuf may add a single "mbuf cluster" of size - * MCLBYTES (also in machine/param.h), which has no additional overhead + * MCLBYTES (also in sys/param.h), which has no additional overhead * and is used instead of the internal data area; this is done when * at least MINCLSIZE of data must be stored. Additionally, it is possible * to allocate a separate buffer externally and attach it to the mbuf in @@ -57,6 +57,16 @@ #define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */ #define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */ +/* + * Jumbo clusters/buffers are (9216 - sizeof(u_int)) bytes in size (the + * trailing u_int is used as a ref. count). They are _virtually_ + * contiguous data regions that can be attached to mbufs. They are + * typically used for large >9K frames with devices that can do + * scatter/gather. MJUMBOBYTES is the size of the actual data region. + */ +#define MJUMBOSIZE 9216 +#define MJUMBOBYTES (MJUMBOSIZE - sizeof(u_int)) + #ifdef _KERNEL /*- * Macros for type conversion: @@ -225,6 +235,8 @@ u_long mb_mbpgs; u_long mb_clfree; u_long mb_clpgs; + u_long mb_jbfree; + u_long mb_jbpgs; long mb_mbtypes[MT_NTYPES]; short mb_active; }; @@ -245,6 +257,7 @@ u_long m_mpfail; /* XXX: times m_pullup failed */ u_long m_msize; /* length of an mbuf */ u_long m_mclbytes; /* length of an mbuf cluster */ + u_long m_mjumbobytes; /* length of a jumbo cluster */ u_long m_minclsize; /* min length of data to allocate a cluster */ u_long m_mlen; /* length of data in an mbuf */ u_long m_mhlen; /* length of data in a header mbuf */ @@ -462,6 +475,7 @@ extern int nmbclusters; /* Maximum number of clusters */ extern int nmbcnt; /* Scale kmem_map for counter space */ extern int nmbufs; /* Maximum number of mbufs */ +extern int nmbjumbobufs; /* Maximum number of jumbo clusters */ extern int nsfbufs; /* Number of sendfile(2) bufs */ void _mext_free(struct mbuf *);