Index: pmap.c =================================================================== RCS file: /home/ncvs/src/sys/i386/i386/pmap.c,v retrieving revision 1.551 diff -u -r1.551 pmap.c --- pmap.c 27 Apr 2006 05:02:21 -0000 1.551 +++ pmap.c 28 Apr 2006 20:00:18 -0000 @@ -216,6 +216,10 @@ static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static int shpgperproc = PMAP_SHPGPERPROC; +struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ +int pv_maxchunks; /* How many chunks we have KVA for */ +vm_offset_t pv_vafree; /* freelist stored in the PTE */ + /* * All those kernel PT submaps that BSD is so fond of */ @@ -483,6 +487,65 @@ #endif /* + * ABuse the pte nodes for unmapped kva to thread a kva freelist through. + * Requirements: + * - Must deal with pages in order to ensure that none of the PG_* bits + * are ever set, PG_V in particular. + * - Assumes we can write to ptes without pte_store() atomic ops, even + * on PAE systems. This should be ok. + * - Assumes nothing will ever test these addresses for 0 to indicate + * no mapping instead of correctly checking PG_V. + * - Assumes a vm_offset_t will fit in a pte (true for i386). + * Because PG_V is never set, there can be no mappings to invalidate. + */ +static vm_offset_t +pmap_ptelist_alloc(vm_offset_t *head) +{ + pt_entry_t *pte; + vm_offset_t va; + + va = *head; + if (va == 0) + return (va); + pte = vtopte(va); + if (pte == NULL) + panic("pmap_ptelist_alloc: no pte for first page kva!"); + *head = *pte; + if (*head & PG_V) + panic("pmap_ptelist_alloc: va with PG_V set!"); + *pte = 0; + return (va); +} + +static void +pmap_ptelist_free(vm_offset_t *head, vm_offset_t va) +{ + pt_entry_t *pte; + + if (va & PG_V) + panic("pmap_ptelist_free: freeing va with PG_V set!"); + pte = vtopte(va); + if (pte == NULL) + panic("pmap_ptelist_free: no pte for freed page kva!"); + *pte = *head; /* virtual! PG_V is 0 though */ + *head = va; +} + +static void +pmap_ptelist_init(vm_offset_t *head, void *base, int npages) +{ + int i; + vm_offset_t va; + + *head = 0; + for (i = npages - 1; i >= 0; i--) { + va = (vm_offset_t)base + i * PAGE_SIZE; + pmap_ptelist_free(head, va); + } +} + + +/* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. @@ -499,8 +562,15 @@ TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); + pv_entry_max = roundup(pv_entry_max, _NPCPV); pv_entry_high_water = 9 * (pv_entry_max / 10); + pv_maxchunks = pv_entry_max / _NPCPV; + pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map, + PAGE_SIZE * pv_maxchunks); + if (pv_chunkbase == NULL) + panic("pmap_init: not enough kvm for pv chunks"); + pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); #ifdef PAE pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, @@ -511,35 +581,10 @@ SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); -static int -pmap_pventry_proc(SYSCTL_HANDLER_ARGS) -{ - int error; - - error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); - if (error == 0 && req->newptr) { - shpgperproc = (pv_entry_max - cnt.v_page_count) / maxproc; - pv_entry_high_water = 9 * (pv_entry_max / 10); - } - return (error); -} -SYSCTL_PROC(_vm_pmap, OID_AUTO, pv_entry_max, CTLTYPE_INT|CTLFLAG_RW, - &pv_entry_max, 0, pmap_pventry_proc, "IU", "Max number of PV entries"); - -static int -pmap_shpgperproc_proc(SYSCTL_HANDLER_ARGS) -{ - int error; - - error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); - if (error == 0 && req->newptr) { - pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; - pv_entry_high_water = 9 * (pv_entry_max / 10); - } - return (error); -} -SYSCTL_PROC(_vm_pmap, OID_AUTO, shpgperproc, CTLTYPE_INT|CTLFLAG_RW, - &shpgperproc, 0, pmap_shpgperproc_proc, "IU", "Page share factor per proc"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, + "Max number of PV entries"); +SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, + "Page share factor per proc"); /*************************************************** * Low level helper routines..... @@ -1606,7 +1651,7 @@ vm_page_unwire(m, 0); vm_page_free(m); vm_page_unlock_queues(); - kmem_free(kernel_map, (vm_offset_t)pc, PAGE_SIZE); + pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); } /* @@ -1653,43 +1698,50 @@ return (pv); } } - /* No free items, allocate another chunk */ - pc = (struct pv_chunk *)kmem_alloc_nofault(kernel_map, PAGE_SIZE); - if (pc == NULL) { + pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); + m = vm_page_alloc(NULL, colour, VM_ALLOC_SYSTEM | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); + if (m == NULL || pc == NULL) { if (try) { pv_entry_count--; PV_STAT(pc_chunk_tryfail++); - return (NULL); - } - panic("get_pv_entry: out of kvm for pv entry chunk!"); - } - m = vm_page_alloc(NULL, colour, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); - if (m == NULL) { - if (try) { - pv_entry_count--; - PV_STAT(pc_chunk_tryfail++); - kmem_free(kernel_map, (vm_offset_t)pc, PAGE_SIZE); + if (m) { + vm_page_lock_queues(); + vm_page_unwire(m, 0); + vm_page_free(m); + vm_page_unlock_queues(); + } + if (pc) + pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); return (NULL); } /* - * Reclaim pv entries: At first, destroy mappings to inactive - * pages. After that, if a pv chunk entry is still needed, - * destroy mappings to active pages. + * Reclaim pv entries: At first, destroy mappings to + * inactive pages. After that, if a pv chunk entry + * is still needed, destroy mappings to active pages. */ if (ratecheck(&lastprint, &printinterval)) - printf("Approaching the limit on PV entries, consider" - "increasing sysctl vm.pmap.shpgperproc or " + printf("Approaching the limit on PV entries, " + "consider increasing tunables " + "vm.pmap.shpgperproc or " "vm.pmap.pv_entry_max\n"); PV_STAT(pmap_collect_inactive++); pmap_collect(pmap, &vm_page_queues[PQ_INACTIVE]); - m = vm_page_alloc(NULL, colour, - VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ); - if (m == NULL) { - PV_STAT(pmap_collect_active++); - pmap_collect(pmap, &vm_page_queues[PQ_ACTIVE]); + if (m == NULL) m = vm_page_alloc(NULL, colour, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ); + if (pc == NULL) + pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); + if (m == NULL || pc == NULL) { + PV_STAT(pmap_collect_active++); + pmap_collect(pmap, &vm_page_queues[PQ_ACTIVE]); if (m == NULL) + m = vm_page_alloc(NULL, colour, + VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ); + if (pc == NULL) + pc = (struct pv_chunk *) + pmap_ptelist_alloc(&pv_vafree); + if (m == NULL || pc == NULL) panic("get_pv_entry: increase vm.pmap.shpgperproc"); } } @@ -2855,7 +2901,7 @@ vm_page_unwire(m, 0); vm_page_free(m); vm_page_unlock_queues(); - kmem_free(kernel_map, (vm_offset_t)pc, PAGE_SIZE); + pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); } } sched_unpin();