diff --git a/sys/conf/options.mips b/sys/conf/options.mips index f7432aa..c755b9c 100644 --- a/sys/conf/options.mips +++ b/sys/conf/options.mips @@ -86,6 +86,13 @@ OCTEON_BOARD_CAPK_0100ND opt_cvmx.h BERI_LARGE_TLB opt_global.h # +# Options for hardware with PageMask register support +# + +# Use one large page (currently 16K) for the kernel thread stack +KSTACK_LARGE_PAGE opt_global.h + +# # Options that control the Atheros SoC peripherals # ARGE_DEBUG opt_arge.h diff --git a/sys/mips/include/cpuinfo.h b/sys/mips/include/cpuinfo.h index 05b5a0f..da6e88c 100644 --- a/sys/mips/include/cpuinfo.h +++ b/sys/mips/include/cpuinfo.h @@ -54,6 +54,7 @@ struct mips_cpuinfo { u_int8_t cpu_rev; u_int8_t cpu_impl; u_int8_t tlb_type; + u_int32_t tlb_pgmask; u_int16_t tlb_nentries; u_int8_t icache_virtual; boolean_t cache_coherent_dma; diff --git a/sys/mips/include/param.h b/sys/mips/include/param.h index 264a847..39a3d71 100644 --- a/sys/mips/include/param.h +++ b/sys/mips/include/param.h @@ -161,11 +161,30 @@ #define MAXDUMPPGS 1 /* xxx: why is this only one? */ +#ifdef KSTACK_LARGE_PAGE +/* + * For a large kernel stack page the KSTACK_SIZE needs to be a page size + * supported by the hardware (e.g. 16K). + */ +#define KSTACK_SIZE (1 << 14) /* Single 16K page */ +#define KSTACK_PAGE_SIZE KSTACK_SIZE +#define KSTACK_PAGE_MASK (KSTACK_PAGE_SIZE - 1) +#define KSTACK_PAGES (KSTACK_SIZE / PAGE_SIZE) +#define KSTACK_TLBMASK_MASK ((KSTACK_PAGE_MASK >> (TLBMASK_SHIFT - 1)) \ + << TLBMASK_SHIFT) +#define KSTACK_GUARD_PAGES ((KSTACK_PAGE_SIZE * 2) / PAGE_SIZE) + +#else /* ! KSTACK_LARGE_PAGE */ + /* * The kernel stack needs to be aligned on a (PAGE_SIZE * 2) boundary. */ #define KSTACK_PAGES 2 /* kernel stack */ +#define KSTACK_SIZE (KSTACK_PAGES * PAGE_SIZE) +#define KSTACK_PAGE_SIZE PAGE_SIZE +#define KSTACK_PAGE_MASK (PAGE_SIZE - 1) #define KSTACK_GUARD_PAGES 2 /* pages of kstack guard; 0 disables */ +#endif /* ! KSTACK_LARGE_PAGE */ /* * Mach derived conversion macros diff --git a/sys/mips/include/pte.h b/sys/mips/include/pte.h index 9e9c87b..9c42e17 100644 --- a/sys/mips/include/pte.h +++ b/sys/mips/include/pte.h @@ -188,4 +188,17 @@ typedef pt_entry_t *pd_entry_t; #endif #endif /* LOCORE */ + +/* PageMask Register (CP0 Register 5, Select 0) Values */ +#define MIPS3_PGMASK_MASKX 0x00001800 +#define MIPS3_PGMASK_4K 0x00000000 +#define MIPS3_PGMASK_16K 0x00006000 +#define MIPS3_PGMASK_64K 0x0001e000 +#define MIPS3_PGMASK_256K 0x0007e000 +#define MIPS3_PGMASK_1M 0x001fe000 +#define MIPS3_PGMASK_4M 0x007fe000 +#define MIPS3_PGMASK_16M 0x01ffe000 +#define MIPS3_PGMASK_64M 0x07ffe000 +#define MIPS3_PGMASK_256M 0x1fffe000 + #endif /* !_MACHINE_PTE_H_ */ diff --git a/sys/mips/mips/cpu.c b/sys/mips/mips/cpu.c index 0473711..9197264 100644 --- a/sys/mips/mips/cpu.c +++ b/sys/mips/mips/cpu.c @@ -178,6 +178,20 @@ mips_get_identity(struct mips_cpuinfo *cpuinfo) * cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_nways; cpuinfo->l1.dc_size = cpuinfo->l1.dc_linesize * cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_nways; + + /* + * Probe PageMask register to see what sizes of pages are supported + * by writing all one's and then reading it back. + */ + mips_wr_pagemask(~0); + cpuinfo->tlb_pgmask = mips_rd_pagemask(); + mips_wr_pagemask(MIPS3_PGMASK_4K); + +#ifdef KSTACK_LARGE_PAGE + if ((cpuinfo->tlb_pgmask & MIPS3_PGMASK_16K) == 0) + panic("%s: 16K sized pages are not supported by this CPU.", + __func__); +#endif } void @@ -253,7 +267,30 @@ cpu_identify(void) } else if (cpuinfo.tlb_type == MIPS_MMU_FIXED) { printf("Fixed mapping"); } - printf(", %d entries\n", cpuinfo.tlb_nentries); + printf(", %d entries ", cpuinfo.tlb_nentries); + + if (cpuinfo.tlb_pgmask) { + printf("("); + if (cpuinfo.tlb_pgmask & MIPS3_PGMASK_MASKX) + printf("1K "); + printf("4K "); + if (cpuinfo.tlb_pgmask & MIPS3_PGMASK_16K) + printf("16K "); + if (cpuinfo.tlb_pgmask & MIPS3_PGMASK_64K) + printf("64K "); + if (cpuinfo.tlb_pgmask & MIPS3_PGMASK_256K) + printf("256K "); + if (cpuinfo.tlb_pgmask & MIPS3_PGMASK_1M) + printf("1M "); + if (cpuinfo.tlb_pgmask & MIPS3_PGMASK_16M) + printf("16M "); + if (cpuinfo.tlb_pgmask & MIPS3_PGMASK_64M) + printf("64M "); + if (cpuinfo.tlb_pgmask & MIPS3_PGMASK_256M) + printf("256M "); + printf("pg sizes)"); + } + printf("\n"); } printf(" L1 i-cache: "); diff --git a/sys/mips/mips/genassym.c b/sys/mips/mips/genassym.c index 5df4564..95f2b42 100644 --- a/sys/mips/mips/genassym.c +++ b/sys/mips/mips/genassym.c @@ -99,6 +99,9 @@ ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(MAXCOMLEN, MAXCOMLEN); ASSYM(MDTD_COP2USED, MDTD_COP2USED); +#ifdef KSTACK_LARGE_PAGE +ASSYM(KSTACK_TLBMASK_MASK, KSTACK_TLBMASK_MASK); +#endif ASSYM(MIPS_KSEG0_START, MIPS_KSEG0_START); ASSYM(MIPS_KSEG1_START, MIPS_KSEG1_START); diff --git a/sys/mips/mips/machdep.c b/sys/mips/mips/machdep.c index 140f3f4..45089a9 100644 --- a/sys/mips/mips/machdep.c +++ b/sys/mips/mips/machdep.c @@ -286,9 +286,9 @@ mips_proc0_init(void) #endif proc_linkup0(&proc0, &thread0); - KASSERT((kstack0 & PAGE_MASK) == 0, - ("kstack0 is not aligned on a page boundary: 0x%0lx", - (long)kstack0)); + KASSERT((kstack0 & ((KSTACK_PAGE_SIZE * 2) - 1)) == 0, + ("kstack0 is not aligned on a page (0x%0lx) boundary: 0x%0lx", + (long)(KSTACK_PAGE_SIZE * 2), (long)kstack0)); thread0.td_kstack = kstack0; thread0.td_kstack_pages = KSTACK_PAGES; /* diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index 35898b4..5a29792 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -546,9 +546,15 @@ again: msgbufinit(msgbufp, msgbufsize); /* - * Steal thread0 kstack. + * Steal thread0 kstack. This must be aligned to + * (KSTACK_PAGE_SIZE * 2) so it can mapped to a single TLB entry. + * + * XXX There should be a better way of getting aligned memory + * with pmap_steal_memory(). */ - kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT); + kstack0 = pmap_steal_memory((KSTACK_PAGES + KSTACK_GUARD_PAGES) \ + << PAGE_SHIFT); + kstack0 = roundup2(kstack0, (KSTACK_PAGE_SIZE * 2)); virtual_avail = VM_MIN_KERNEL_ADDRESS; virtual_end = VM_MAX_KERNEL_ADDRESS; diff --git a/sys/mips/mips/swtch.S b/sys/mips/mips/swtch.S index ae64883..cc75547 100644 --- a/sys/mips/mips/swtch.S +++ b/sys/mips/mips/swtch.S @@ -340,36 +340,91 @@ blocked_loop: * NOTE: This is hard coded to UPAGES == 2. * Also, there should be no TLB faults at this point. */ - MTC0 v0, MIPS_COP_0_TLB_HI # VPN = va + MTC0 v0, MIPS_COP_0_TLB_HI # VPN = va HAZARD_DELAY tlbp # probe VPN HAZARD_DELAY - mfc0 s0, MIPS_COP_0_TLB_INDEX + mfc0 s0, MIPS_COP_0_TLB_INDEX HAZARD_DELAY - PTR_LI t1, MIPS_KSEG0_START # invalidate tlb entry - bltz s0, entry0set + # MIPS_KSEG0_START + (2 * index * PAGE_SIZE) -> MIPS_COP_0_TLB_HI + PTR_LI t1, MIPS_KSEG0_START # invalidate tlb entry +#ifdef KSTACK_LARGE_PAGE + bltz s0, inval_nxt1 +#else + bltz s0, entry0set +#endif + nop + sll s0, PAGE_SHIFT + 1 + PTR_ADDU t1, s0 + MTC0 t1, MIPS_COP_0_TLB_HI + PTE_MTC0 zero, MIPS_COP_0_TLB_LO0 + PTE_MTC0 zero, MIPS_COP_0_TLB_LO1 + MTC0 zero, MIPS_COP_0_TLB_PG_MASK + HAZARD_DELAY + tlbwi + HAZARD_DELAY + +#ifdef KSTACK_LARGE_PAGE +/* + * With a KSTACK_PAGE_SIZE of 16K and PAGE_SIZE of 4K it is possible that + * a second TLB entry is currently mapping the kernel thread stack as a + * regular 4K sized page(s). Check for this case and, if so, invalidate + * that TLB entry as well. + */ +#if (PAGE_SIZE != 4096) && (KSTACK_PAGE_SIZE != 16384) +#error PAGE_SIZE is not 4K or KSTACK_PAGE_SIZE is not 16K. +#endif +inval_nxt1: + move v1, v0 + PTR_ADDU v1, PAGE_SIZE * 2 + MTC0 v1, MIPS_COP_0_TLB_HI # VPN = va + HAZARD_DELAY + tlbp # probe VPN + HAZARD_DELAY + mfc0 s0, MIPS_COP_0_TLB_INDEX + HAZARD_DELAY + + # MIPS_KSEG0_START + (2 * index * PAGE_SIZE) -> MIPS_COP_0_TLB_HI + PTR_LI t1, MIPS_KSEG0_START # invalidate tlb entry + bltz s0, entry0set nop - sll s0, PAGE_SHIFT + 1 - addu t1, s0 - MTC0 t1, MIPS_COP_0_TLB_HI + sll s0, PAGE_SHIFT + 1 + PTR_ADDU t1, s0 + MTC0 t1, MIPS_COP_0_TLB_HI PTE_MTC0 zero, MIPS_COP_0_TLB_LO0 PTE_MTC0 zero, MIPS_COP_0_TLB_LO1 + MTC0 zero, MIPS_COP_0_TLB_PG_MASK HAZARD_DELAY tlbwi HAZARD_DELAY - MTC0 v0, MIPS_COP_0_TLB_HI # set VPN again +#endif /* KSTACK_LARGE_PAGE */ entry0set: + MTC0 v0, MIPS_COP_0_TLB_HI # set VPN again + HAZARD_DELAY /* SMP!! - Works only for unshared TLB case - i.e. no v-cpus */ - mtc0 zero, MIPS_COP_0_TLB_INDEX # TLB entry #0 + mtc0 zero, MIPS_COP_0_TLB_INDEX # TLB entry #0 HAZARD_DELAY PTE_MTC0 a1, MIPS_COP_0_TLB_LO0 # upte[0] HAZARD_DELAY PTE_MTC0 a2, MIPS_COP_0_TLB_LO1 # upte[1] +#ifdef KSTACK_LARGE_PAGE + HAZARD_DELAY + li t1, KSTACK_TLBMASK_MASK + MTC0 t1, MIPS_COP_0_TLB_PG_MASK HAZARD_DELAY +#else + MTC0 zero, MIPS_COP_0_TLB_PG_MASK + HAZARD_DELAY +#endif tlbwi # set TLB entry #0 HAZARD_DELAY + +#ifdef KSTACK_LARGE_PAGE + MTC0 zero, MIPS_COP_0_TLB_PG_MASK + HAZARD_DELAY +#endif /* * Now running on new u struct. */ diff --git a/sys/mips/mips/vm_machdep.c b/sys/mips/mips/vm_machdep.c index c39bbe6..e32b38b 100644 --- a/sys/mips/mips/vm_machdep.c +++ b/sys/mips/mips/vm_machdep.c @@ -282,7 +282,6 @@ void cpu_thread_swapin(struct thread *td) { pt_entry_t *pte; - int i; /* * The kstack may be at a different physical address now. @@ -290,10 +289,21 @@ cpu_thread_swapin(struct thread *td) * part of the thread struct so cpu_switch() can quickly map in * the pcb struct and kernel stack. */ +#ifdef KSTACK_LARGE_PAGE + /* Just one entry for one large kernel page. */ + pte = pmap_pte(kernel_pmap, td->td_kstack); + td->td_md.md_upte[0] = *pte & ~TLBLO_SWBITS_MASK; + td->td_md.md_upte[1] = 1; + +#else + + int i; + for (i = 0; i < KSTACK_PAGES; i++) { pte = pmap_pte(kernel_pmap, td->td_kstack + i * PAGE_SIZE); td->td_md.md_upte[i] = *pte & ~TLBLO_SWBITS_MASK; } +#endif /* ! KSTACK_LARGE_PAGE */ } void @@ -305,17 +315,31 @@ void cpu_thread_alloc(struct thread *td) { pt_entry_t *pte; - int i; - KASSERT((td->td_kstack & (1 << PAGE_SHIFT)) == 0, ("kernel stack must be aligned.")); + KASSERT((td->td_kstack & ((KSTACK_PAGE_SIZE * 2) - 1) ) == 0, + ("kernel stack must be aligned.")); td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = &td->td_pcb->pcb_regs; - for (i = 0; i < KSTACK_PAGES; i++) { - pte = pmap_pte(kernel_pmap, td->td_kstack + i * PAGE_SIZE); - td->td_md.md_upte[i] = *pte & ~TLBLO_SWBITS_MASK; +#ifdef KSTACK_LARGE_PAGE + /* Just one entry for one large kernel page. */ + pte = pmap_pte(kernel_pmap, td->td_kstack); + td->td_md.md_upte[0] = *pte & ~TLBLO_SWBITS_MASK; + td->td_md.md_upte[1] = 1; + +#else + + { + int i; + + for (i = 0; i < KSTACK_PAGES; i++) { + pte = pmap_pte(kernel_pmap, td->td_kstack + i * + PAGE_SIZE); + td->td_md.md_upte[i] = *pte & ~TLBLO_SWBITS_MASK; + } } +#endif /* ! KSTACK_LARGE_PAGE */ } void diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index ecd5d20..918d353 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -311,6 +311,161 @@ SYSCTL_INT(_vm, OID_AUTO, kstacks, CTLFLAG_RD, &kstacks, 0, #define KSTACK_MAX_PAGES 32 #endif +#if defined(__mips__) + +static vm_offset_t +vm_kstack_valloc(int pages) +{ + vm_offset_t ks; + + /* + * We need to align the kstack's mapped address to fit within + * a single TLB entry. + */ + if (vmem_xalloc(kernel_arena, + (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE, + KSTACK_PAGE_SIZE * 2, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, + M_BESTFIT | M_NOWAIT, &ks)) { + return (0); + } + + return (ks); +} + +#ifdef KSTACK_LARGE_PAGE + +#define KSTACK_OBJT OBJT_PHYS + +static int +vm_kstack_palloc(vm_object_t ksobj, vm_offset_t ks, int allocflags, int pages, + vm_page_t ma[]) +{ + vm_page_t m, end_m; + int i; + + KASSERT((ksobj != NULL), ("vm_kstack_palloc: invalid VM object")); + VM_OBJECT_ASSERT_WLOCKED(ksobj); + + allocflags = (allocflags & ~VM_ALLOC_CLASS_MASK) | VM_ALLOC_NORMAL; + + for (i = 0; i < pages; i++) { +retrylookup: + if ((m = vm_page_lookup(ksobj, i)) == NULL) + break; + if (vm_page_busied(m)) { + /* + * Reference the page before unlocking and + * sleeping so that the page daemon is less + * likely to reclaim it. + */ + vm_page_aflag_set(m, PGA_REFERENCED); + vm_page_lock(m); + VM_OBJECT_WUNLOCK(ksobj); + vm_page_busy_sleep(m, "pgrbwt"); + VM_OBJECT_WLOCK(ksobj); + goto retrylookup; + } else { + if ((allocflags & VM_ALLOC_WIRED) != 0) { + vm_page_lock(m); + vm_page_wire(m); + vm_page_unlock(m); + } + ma[i] = m; + } + } + if (i == pages) + return (i); + + KASSERT((i == 0), ("vm_kstack_palloc: ksobj already has kstack pages")); + + for (;;) { + m = vm_page_alloc_contig(ksobj, 0, allocflags, + atop(KSTACK_PAGE_SIZE), 0ul, ~0ul, KSTACK_PAGE_SIZE * 2, 0, + VM_MEMATTR_DEFAULT); + if (m != NULL) + break; + VM_OBJECT_WUNLOCK(ksobj); + VM_WAIT; + VM_OBJECT_WLOCK(ksobj); + } + end_m = m + atop(KSTACK_PAGE_SIZE); + for (i = 0; m < end_m; m++) { + m->pindex = (vm_pindex_t)i; + if ((allocflags & VM_ALLOC_NOBUSY) != 0) + m->valid = VM_PAGE_BITS_ALL; + ma[i] = m; + i++; + } + return (i); +} + +#else /* ! KSTACK_LARGE_PAGE */ + +#define KSTACK_OBJT OBJT_DEFAULT + +static int +vm_kstack_palloc(vm_object_t ksobj, vm_offset_t ks, int allocflags, int pages, + vm_page_t ma[]) +{ + int i; + + KASSERT((ksobj != NULL), ("vm_kstack_palloc: invalid VM object")); + VM_OBJECT_ASSERT_WLOCKED(ksobj); + + allocflags = (allocflags & ~VM_ALLOC_CLASS_MASK) | VM_ALLOC_NORMAL; + + for (i = 0; i < pages; i++) { + /* + * Get a kernel stack page. + */ + ma[i] = vm_page_grab(ksobj, i, allocflags); + if (allocflags & VM_ALLOC_NOBUSY) + ma[i]->valid = VM_PAGE_BITS_ALL; + } + + return (i); +} +#endif /* ! KSTACK_LARGE_PAGE */ + +#else /* ! __mips__ */ + +#define KSTACK_OBJT OBJT_DEFAULT + +static vm_offset_t +vm_kstack_valloc(int pages) +{ + vm_offset_t ks; + + ks = kva_alloc((pages + KSTACK_GUARD_PAGES) * PAGE_SIZE); + + return(ks); +} + +static int +vm_kstack_palloc(vm_object_t ksobj, vm_offset_t ks, int allocflags, int pages, + vm_page_t ma[]) +{ + int i; + + KASSERT((ksobj != NULL), ("vm_kstack_palloc: invalid VM object")); + VM_OBJECT_ASSERT_WLOCKED(ksobj); + + allocflags = (allocflags & ~VM_ALLOC_CLASS_MASK) | VM_ALLOC_NORMAL; + + for (i = 0; i < pages; i++) { + /* + * Get a kernel stack page. + */ + ma[i] = vm_page_grab(ksobj, i, allocflags); + if (allocflags & VM_ALLOC_NOBUSY) + ma[i]->valid = VM_PAGE_BITS_ALL; + } + + return (i); +} +#endif /* ! __mips__ */ + + /* * Create the kernel stack (including pcb for i386) for a new thread. * This routine directly affects the fork perf for a process and @@ -321,9 +476,8 @@ vm_thread_new(struct thread *td, int pages) { vm_object_t ksobj; vm_offset_t ks; - vm_page_t m, ma[KSTACK_MAX_PAGES]; + vm_page_t ma[KSTACK_MAX_PAGES]; struct kstack_cache_entry *ks_ce; - int i; /* Bounds check */ if (pages <= 1) @@ -349,24 +503,12 @@ vm_thread_new(struct thread *td, int pages) /* * Allocate an object for the kstack. */ - ksobj = vm_object_allocate(OBJT_DEFAULT, pages); - + ksobj = vm_object_allocate(KSTACK_OBJT, pages); + /* * Get a kernel virtual address for this thread's kstack. */ -#if defined(__mips__) - /* - * We need to align the kstack's mapped address to fit within - * a single TLB entry. - */ - if (vmem_xalloc(kernel_arena, (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE, - PAGE_SIZE * 2, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, - M_BESTFIT | M_NOWAIT, &ks)) { - ks = 0; - } -#else - ks = kva_alloc((pages + KSTACK_GUARD_PAGES) * PAGE_SIZE); -#endif + ks = vm_kstack_valloc(pages); if (ks == 0) { printf("vm_thread_new: kstack allocation failed\n"); vm_object_deallocate(ksobj); @@ -385,21 +527,15 @@ vm_thread_new(struct thread *td, int pages) * want to deallocate them. */ td->td_kstack_pages = pages; - /* - * For the length of the stack, link in a real page of ram for each - * page of stack. - */ + VM_OBJECT_WLOCK(ksobj); - for (i = 0; i < pages; i++) { - /* - * Get a kernel stack page. - */ - m = vm_page_grab(ksobj, i, VM_ALLOC_NOBUSY | - VM_ALLOC_NORMAL | VM_ALLOC_WIRED); - ma[i] = m; - m->valid = VM_PAGE_BITS_ALL; - } + pages = vm_kstack_palloc(ksobj, ks, (VM_ALLOC_NOBUSY | VM_ALLOC_WIRED), + pages, ma); VM_OBJECT_WUNLOCK(ksobj); + if (pages == 0) { + printf("vm_thread_new: vm_kstack_palloc() failed\n"); + return (0); + } pmap_qenter(ks, ma, pages); return (1); } @@ -526,9 +662,9 @@ vm_thread_swapin(struct thread *td) pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; VM_OBJECT_WLOCK(ksobj); - for (i = 0; i < pages; i++) - ma[i] = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | - VM_ALLOC_WIRED); + rv = vm_kstack_palloc(ksobj, td->td_kstack, (VM_ALLOC_NORMAL | + VM_ALLOC_WIRED), pages, ma); + KASSERT(rv != 0, ("vm_thread_swapin: vm_kstack_palloc() failed")); for (i = 0; i < pages; i++) { if (ma[i]->valid != VM_PAGE_BITS_ALL) { vm_page_assert_xbusied(ma[i]);