diff --git a/sys/mips/conf/MALTA b/sys/mips/conf/MALTA index fc082ad..b78043c 100644 --- a/sys/mips/conf/MALTA +++ b/sys/mips/conf/MALTA @@ -53,6 +53,9 @@ options UFS_ACL #Support for access control lists options UFS_DIRHASH #Improve performance on big directories options ROOTDEVNAME=\"ufs:ada0\" +options KTR +options KTR_VERBOSE +options KTR_COMPILE=(KTR_PMAP) # Debugging for use in -current #options DEADLKRES #Enable the deadlock resolver diff --git a/sys/mips/include/param.h b/sys/mips/include/param.h index 2d1d7f1..c2cdf4d 100644 --- a/sys/mips/include/param.h +++ b/sys/mips/include/param.h @@ -157,7 +157,7 @@ #define NBPDR (1 << PDRSHIFT) /* bytes/pagedir */ #define SEGMASK (NBSEG - 1) /* byte offset into segment */ -#define MAXPAGESIZES 1 /* max supported pagesizes */ +#define MAXPAGESIZES 2 /* max supported pagesizes */ #define MAXDUMPPGS 1 /* xxx: why is this only one? */ @@ -172,6 +172,8 @@ */ #define round_page(x) (((x) + PAGE_MASK) & ~PAGE_MASK) #define trunc_page(x) ((x) & ~PAGE_MASK) +#define trunc_1mpage(x) ((unsigned)(x) & ~PDRMASK) +#define round_1mpage(x) ((((unsigned)(x)) + PDRMASK) & ~PDRMASK) #define atop(x) ((x) >> PAGE_SHIFT) #define ptoa(x) ((x) << PAGE_SHIFT) diff --git a/sys/mips/include/pte.h b/sys/mips/include/pte.h index 2f2f995..5951a65 100644 --- a/sys/mips/include/pte.h +++ b/sys/mips/include/pte.h @@ -50,11 +50,24 @@ typedef pt_entry_t *pd_entry_t; #define TLB_PAGE_MASK (TLB_PAGE_SIZE - 1) /* - * TLB PageMask register. Has mask bits set above the default, 4K, page mask. + * TLB PageMask register. It allows to set up TLB fields that map larger pages. + * No MIPS CPU permits arbitrary bit patterns in PageMask. Most allow page + * sizes between 4KB and 16MB in x4 steps: + * + * 0000 0000 0000 0000 4KB + * 0000 0000 0000 0011 16KB + * 0000 0000 0000 1111 64KB + * [...] + * 1111 1111 1111 1111 64MB + * It has mask bits set above the default, 4K, page mask. */ #define TLBMASK_SHIFT (13) #define TLBMASK_MASK ((PAGE_MASK >> TLBMASK_SHIFT) << TLBMASK_SHIFT) +/* XXXDAVIDE: This is just horrible. But it's enough for now. */ +#define PAGE_MASK1MB ((1 << 8) - 1) +#define TLB_MASK1MB (PAGE_MASK1MB << TLBMASK_SHIFT) + /* * FreeBSD/mips page-table entries take a near-identical format to MIPS TLB * entries, each consisting of two 32-bit or 64-bit values ("EntryHi" and diff --git a/sys/mips/include/vmparam.h b/sys/mips/include/vmparam.h index 756a8a6..bb50a41 100644 --- a/sys/mips/include/vmparam.h +++ b/sys/mips/include/vmparam.h @@ -101,11 +101,11 @@ #endif /* - * Disable superpage reservations. (not sure if this is right - * I copied it from ARM) + * Superpage reservation. */ #ifndef VM_NRESERVLEVEL -#define VM_NRESERVLEVEL 0 +#define VM_NRESERVLEVEL 1 +#define VM_LEVEL_0_ORDER 8 #endif /* diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index 2ca8ed5..5cf906c 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -66,6 +66,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -95,10 +96,12 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include +#include #include #undef PMAP_DEBUG @@ -115,6 +118,9 @@ __FBSDID("$FreeBSD$"); #define PV_STAT(x) do { } while (0) #endif +#define pa_index(pa) ((pa) >> PDRSHIFT) +#define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) + /* * Get PDEs and PTEs for user/kernel address space */ @@ -154,6 +160,7 @@ static struct rwlock_padalign pvh_global_lock; * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); +static struct md_page *pv_table; static int pv_entry_count; static void free_pv_chunk(struct pv_chunk *pc); @@ -164,8 +171,11 @@ static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static vm_page_t pmap_alloc_direct_page(unsigned int index, int req); +static boolean_t pmap_enter_pte(pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); +static void pmap_promote_pde(pmap_t pmap, pt_entry_t *pde, vm_offset_t va); static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va, pd_entry_t pde); static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); @@ -186,6 +196,38 @@ static void pmap_invalidate_page_action(void *arg); static void pmap_invalidate_range_action(void *arg); static void pmap_update_page_action(void *arg); +static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); + +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, + "Current number of pv entries"); + +#ifdef PV_STATS +static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; + +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, + "Current number of pv entry chunks"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, + "Current number of pv entry chunks allocated"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, + "Current number of pv entry chunks frees"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, + "Number of times tried to get a chunk page but failed."); + +static long pv_entry_frees, pv_entry_allocs; +static int pv_entry_spare; + +SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, + "Current number of pv entry frees"); +SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, + "Current number of pv entry allocs"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, + "Current number of spare pv entries"); +#endif + +static int pg_ps_enabled = 1; +SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RD, &pg_ps_enabled, 1, + "Are large page mappings enabled?"); + #ifndef __mips_n64 /* * This structure is for high memory (memory above 512Meg in 32 bit) support. @@ -605,8 +647,53 @@ pmap_page_init(vm_page_t m) void pmap_init(void) { + vm_size_t s; + int i, pv_npg; + + /* + * Are large page mappings enabled? + */ + TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); + if (pg_ps_enabled) + KASSERT(MAXPAGESIZES > 1, + ("Max page sizes supported should be > 1")); + + /* + * Calculate the size of the pv head table for superpages; + */ + for (i = 0; i < phys_avail[i + 1]; i += 2); + pv_npg = round_1mpage(phys_avail[(i - 2) + 1]) / NBPDR; + + /* + * Allocate memory for the pv head table for superpages; + */ + s = (vm_size_t)(pv_npg * sizeof(struct md_page)); + s = round_page(s); + pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, + M_WAITOK | M_ZERO); + for (i = 0; i < pv_npg; i++) + TAILQ_INIT(&pv_table[i].pv_list); } +static SYSCTL_NODE(_vm_pmap, OID_AUTO, pte, CTLFLAG_RD, 0, + "Superpages mapping counters"); + +static u_long pmap_pte_demotions; +SYSCTL_ULONG(_vm_pmap_pte, OID_AUTO, demotions, CTLFLAG_RD, + &pmap_pte_demotions, 0, "Superpages demotions"); + +static u_long pmap_pte_mappings; +SYSCTL_ULONG(_vm_pmap_pte, OID_AUTO, mappings, CTLFLAG_RD, + &pmap_pte_mappings, 0, "Superpages mappings"); + +static u_long pmap_pte_p_failures; +SYSCTL_ULONG(_vm_pmap_pte, OID_AUTO, p_failures, CTLFLAG_RD, + &pmap_pte_p_failures, 0, "Superpages promotion failures"); + +static u_long pmap_pte_promotions; +SYSCTL_ULONG(_vm_pmap_pte, OID_AUTO, promotions, CTLFLAG_RD, + &pmap_pte_promotions, 0, "Superpages promotions"); + /*************************************************** * Low level helper routines..... ***************************************************/ @@ -1329,33 +1416,6 @@ static const u_long pc_freemask[_NPCM] = { #endif }; -static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); - -SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, - "Current number of pv entries"); - -#ifdef PV_STATS -static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; - -SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, - "Current number of pv entry chunks"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, - "Current number of pv entry chunks allocated"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, - "Current number of pv entry chunks frees"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, - "Number of times tried to get a chunk page but failed."); - -static long pv_entry_frees, pv_entry_allocs; -static int pv_entry_spare; - -SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, - "Current number of pv entry frees"); -SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, - "Current number of pv entry allocs"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, - "Current number of spare pv entries"); -#endif /* * We are in a serious low memory condition. Resort to @@ -2168,6 +2228,13 @@ validate: mips_icache_sync_range(va, PAGE_SIZE); mips_dcache_wbinv_range(va, PAGE_SIZE); } + + /* + * If both the page table page and the reservation are fully populated, + * then attempt promotion. + */ + if (mpte == NULL && pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0) + pmap_promote_pde(pmap, pte, va); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); @@ -2393,6 +2460,7 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_page_t m, mpte; + vm_offset_t va; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); @@ -2403,8 +2471,14 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { - mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m, - prot, mpte); + va = start + ptoa(diff); + if ((va & PDRMASK) == 0 && va + NBPDR <= end && + m->psind == 1 && pg_ps_enabled && + pmap_enter_pte(pmap, va, m, prot)) + m = &m[NBPDR / PAGE_SIZE - 1]; + else + mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m, + prot, mpte); m = TAILQ_NEXT(m, listq); } rw_wunlock(&pvh_global_lock); @@ -3072,6 +3146,241 @@ pmap_is_referenced(vm_page_t m) } /* + * Superpages management routines. + */ +static boolean_t +pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) +{ + struct md_page *pvh; + pv_entry_t pv; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + /* XXXDAVIDE: high water? */ + if (/* pv_entry_count < pv_entry_high_water && */ + (pv = get_pv_entry(pmap, TRUE)) != NULL) { + pv->pv_va = va; + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + return (TRUE); + } else + return (FALSE); +} + +static void +pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) +{ + struct md_page *pvh; + pv_entry_t pv; + vm_offset_t va_last; + vm_page_t m; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + KASSERT((pa & PDRMASK) == 0, + ("pmap_pv_promote_pde: pa is not 1mpage aligned")); + + /* + * Transfer the first page's pv entry for this mapping to the + * 1mpage's pv list. Aside from avoiding the cost of a call + * to get_pv_entry(), a transfer avoids the possibility that + * get_pv_entry() calls pmap_collect() and that pmap_collect() + * removes one of the mappings that is being promoted. + */ + m = PHYS_TO_VM_PAGE(pa); + va = trunc_1mpage(va); + pv = pmap_pvh_remove(&m->md, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + + /* Free the remaining NPTEPG - 1 pv entries. */ + va_last = va + NBPDR - PAGE_SIZE; + do { + m++; + va += PAGE_SIZE; + pmap_pvh_free(&m->md, pmap, va); + } while (va < va_last); +} + +/* + * Tries to promote the 256 contigous 4KB page mappings that are within + * a single page table page (PTP) to a single 2- or 4MB page mapping. + * For promotion to occur, two conditions must be met: (1) the 4KB page + * mapping must map aligned, contigous phyisical memory and (2) the 4KB page + * mappings must have identical characteristics. + * + */ +static void +pmap_promote_pde(pmap_t pmap, pt_entry_t *pde, vm_offset_t va) +{ + pt_entry_t *firstpte, oldpte, newpte, pa, *pte; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + /* + * Examine the first PTE in the specified PTP. Abort if this PTE is + * either invalid, unused, or does not map the first 4KB physical page + * within a a 1MB page. + * + * XXXDAVIDE: i386 here calls pmap_pte_quick(). There's no such function + * on mips. Should that be introduced? + */ + firstpte = pmap_pte(pmap, trunc_1mpage(va)); + newpte = *firstpte; + if ((newpte & ((~PAGE_MASK & PDRMASK) | PTE_V)) != PTE_V) { + pmap_pte_p_failures++; + CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#v" + " in pmap %p", va, pmap); + return; + } +#if 0 + if ((*firstpte & PTE_MANAGED) != 0 && pmap == kernel_pmap) { + pmap_pte_p_failures++; + CTR2(KTR_PMAP, "pmap_promot_pde: failure for va %#x" + " in pmap %p", va, pmap); + return; + } +#endif + /* + * Examine each of the other PTEs in the specified PTP. Abort if thi + * PTE maps an unexpected 4KB physica lpage or does not have identical + * characteristics to the first PTE. + */ + pa = (newpte & PTE_V) + NBPDR - PAGE_SIZE; + for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { + oldpte = *pte; + if ((oldpte & PTE_V) != pa) { + pmap_pte_p_failures++; + CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" + " in pmap %p", va, pmap); + return; + } +#if 0 + if ((oldpte & PTE_PROMOTE) != (newpte & PTE_PROMOTE)) { + pmap_pte_p_failures++; + CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" + " in pmap %p", va, pmap); + return; + } +#endif + pa -= PAGE_SIZE; + } + + /* + * Save the page table page in its current state until the PDE mapping + * the superpage is demeoted by pmap_demote_pde() or destroyed by + * pmap_destroy_pde(). + * XXX: vm_page_array? Do we need it? + */ +#if 0 + if (pmap_insert_pt_page(pmap, mpte)) { + pmap_pte_p_failures++; + CTR2(KTR_PMAP, + "pmap_promote_pde: failure for va %#x in pmap %p", va, + pmap); + return; + } +#endif + + /* + * Promote the pv entries. + */ + if ((newpte & PTE_MANAGED) != 0) + pmap_pv_promote_pde(pmap, va, newpte); /* XXX: PG_PS_FRAME */ + + /* XXX:map the superpage?? */ + pmap_pte_promotions++; + CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x" + " in pmap %p", va, pmap); +} + +/* + * Tries to create a 1MB page mapping. Returns TRUE if successful and + * FALSE otherwise. Fails if (1) a page table page cannot be allocated without + * blocking, (2) a mapping already exists at the specified virtual address, or + * (3) a pv entry cannot be allocated without reclaiming another pv entry. + */ +static boolean_t +pmap_enter_pte(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) +{ + pd_entry_t *pde; + pt_entry_t *pte; + vm_paddr_t pa; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + pde = pmap_pde(pmap, va); + if (*pde != 0) { + CTR2(KTR_PMAP, "pmap_enter_pte: failure for va %#lx" + " in pmap %p", va, pmap); + return (FALSE); + } + pte = pmap_pte(pmap, va); + pa = VM_PAGE_TO_PHYS(m); + *pte = TLBLO_PA_TO_PFN(pa); + + /* Mark this page as read-only */ + *pte |= PTE_RO; + + /* Mark this page as valid. */ + *pte |= PTE_V; + + /* XXX: kernel_pmap? */ + if (is_kernel_pmap(pmap)) { + *pte |= PTE_G; + } + + /* Set the right superpage size. */ + *pte |= TLB_MASK1MB; + + /* Take care of cache bits. */ + if (is_cacheable_mem(pa)) + *pte |= PTE_C_CACHE; + else + *pte |= PTE_C_UNCACHED; + if ((m->oflags & VPO_UNMANAGED) == 0) { + *pte |= PTE_MANAGED; + + /* + * Abort this mapping if its PV entry could not be created. + */ + if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) { + CTR2(KTR_PMAP, "pmap_enter_pte: failure for va %#lx" + " in pmap %p", va, pmap); + return (FALSE); + } + } + + /* Sync I & D caches. Do this only if the target pmap belongs + * to the current process. Otherwise, an unresolvable TLB miss + * may occur. + */ + if (pmap == &curproc->p_vmspace->vm_pmap) { + /* XXXDAVIDE: Fix for superpages? */ + va &= ~PAGE_MASK; + mips_icache_sync_range(va, PAGE_SIZE); + mips_dcache_wbinv_range(va, PAGE_SIZE); + } + + /* + * Increment counters. + */ + pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; + + /* pmap_pte_mappings++; XXX:FIXME */ + + CTR2(KTR_PMAP, "pmap_enter_section: success for va %#lx" + " in pmap %p", va, pmap); + return (TRUE); +} + +#if 0 +static void +pmap_remove_pde(pmap_t pmap, vm_offset_t sva) +{ +} +#endif + +/* * Miscellaneous support routines follow */ diff --git a/sys/mips/mips/tlb.c b/sys/mips/mips/tlb.c index 1ad8a11..0bab777 100644 --- a/sys/mips/mips/tlb.c +++ b/sys/mips/mips/tlb.c @@ -101,6 +101,7 @@ void tlb_insert_wired(unsigned i, vm_offset_t va, pt_entry_t pte0, pt_entry_t pte1) { register_t asid; + register_t page_mask; register_t s; va &= ~PAGE_MASK; @@ -109,7 +110,8 @@ tlb_insert_wired(unsigned i, vm_offset_t va, pt_entry_t pte0, pt_entry_t pte1) asid = mips_rd_entryhi() & TLBHI_ASID_MASK; mips_wr_index(i); - mips_wr_pagemask(0); + page_mask = (pte0 << 13) & ((1 << 13) - 1); + mips_wr_pagemask(page_mask); mips_wr_entryhi(TLBHI_ENTRY(va, 0)); mips_wr_entrylo0(pte0); mips_wr_entrylo1(pte1); @@ -296,6 +298,7 @@ void tlb_update(struct pmap *pmap, vm_offset_t va, pt_entry_t pte) { register_t asid; + register_t page_mask; register_t s; int i; @@ -305,7 +308,13 @@ tlb_update(struct pmap *pmap, vm_offset_t va, pt_entry_t pte) s = intr_disable(); asid = mips_rd_entryhi() & TLBHI_ASID_MASK; - mips_wr_pagemask(0); + /* + * Take in account superpages. + * Do something incredibly ugly. Take the relevant bits + * for the page and write inside pagemask. + */ + page_mask = (pte << 13) & ((1 << 13) - 1); + mips_wr_pagemask(page_mask); mips_wr_entryhi(TLBHI_ENTRY(va, pmap_asid(pmap))); tlb_probe(); i = mips_rd_index();