commit ce2a9f5a3f8ba48d65bd3ed20b6beded0e6a2659 Author: Stacey Son Date: Mon Feb 17 04:57:50 2014 -0600 Add option to double the kernel thread stack by using larger page size. This code change adds the kernel option KSTACK_LARGE_PAGE that uses one 16K page for the kernel stack instead of only 4K page sizes. This doubles the kernel thread stack size without using additional wired TLB entries. To use this "option KSTACK_LARGE_PAGE" must be added to the kernel conf file. diff --git a/sys/conf/options.mips b/sys/conf/options.mips index f7432aa..c755b9c 100644 --- a/sys/conf/options.mips +++ b/sys/conf/options.mips @@ -86,6 +86,13 @@ OCTEON_BOARD_CAPK_0100ND opt_cvmx.h BERI_LARGE_TLB opt_global.h # +# Options for hardware with PageMask register support +# + +# Use one large page (currently 16K) for the kernel thread stack +KSTACK_LARGE_PAGE opt_global.h + +# # Options that control the Atheros SoC peripherals # ARGE_DEBUG opt_arge.h diff --git a/sys/mips/include/param.h b/sys/mips/include/param.h index 0d6ffae..39a3d71 100644 --- a/sys/mips/include/param.h +++ b/sys/mips/include/param.h @@ -161,6 +161,21 @@ #define MAXDUMPPGS 1 /* xxx: why is this only one? */ +#ifdef KSTACK_LARGE_PAGE +/* + * For a large kernel stack page the KSTACK_SIZE needs to be a page size + * supported by the hardware (e.g. 16K). + */ +#define KSTACK_SIZE (1 << 14) /* Single 16K page */ +#define KSTACK_PAGE_SIZE KSTACK_SIZE +#define KSTACK_PAGE_MASK (KSTACK_PAGE_SIZE - 1) +#define KSTACK_PAGES (KSTACK_SIZE / PAGE_SIZE) +#define KSTACK_TLBMASK_MASK ((KSTACK_PAGE_MASK >> (TLBMASK_SHIFT - 1)) \ + << TLBMASK_SHIFT) +#define KSTACK_GUARD_PAGES ((KSTACK_PAGE_SIZE * 2) / PAGE_SIZE) + +#else /* ! KSTACK_LARGE_PAGE */ + /* * The kernel stack needs to be aligned on a (PAGE_SIZE * 2) boundary. */ @@ -169,6 +184,7 @@ #define KSTACK_PAGE_SIZE PAGE_SIZE #define KSTACK_PAGE_MASK (PAGE_SIZE - 1) #define KSTACK_GUARD_PAGES 2 /* pages of kstack guard; 0 disables */ +#endif /* ! KSTACK_LARGE_PAGE */ /* * Mach derived conversion macros diff --git a/sys/mips/mips/cpu.c b/sys/mips/mips/cpu.c index 530e2f9..9197264 100644 --- a/sys/mips/mips/cpu.c +++ b/sys/mips/mips/cpu.c @@ -186,6 +186,12 @@ mips_get_identity(struct mips_cpuinfo *cpuinfo) mips_wr_pagemask(~0); cpuinfo->tlb_pgmask = mips_rd_pagemask(); mips_wr_pagemask(MIPS3_PGMASK_4K); + +#ifdef KSTACK_LARGE_PAGE + if ((cpuinfo->tlb_pgmask & MIPS3_PGMASK_16K) == 0) + panic("%s: 16K sized pages are not supported by this CPU.", + __func__); +#endif } void diff --git a/sys/mips/mips/genassym.c b/sys/mips/mips/genassym.c index 5df4564..95f2b42 100644 --- a/sys/mips/mips/genassym.c +++ b/sys/mips/mips/genassym.c @@ -99,6 +99,9 @@ ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(MAXCOMLEN, MAXCOMLEN); ASSYM(MDTD_COP2USED, MDTD_COP2USED); +#ifdef KSTACK_LARGE_PAGE +ASSYM(KSTACK_TLBMASK_MASK, KSTACK_TLBMASK_MASK); +#endif ASSYM(MIPS_KSEG0_START, MIPS_KSEG0_START); ASSYM(MIPS_KSEG1_START, MIPS_KSEG1_START); diff --git a/sys/mips/mips/machdep.c b/sys/mips/mips/machdep.c index 140f3f4..45089a9 100644 --- a/sys/mips/mips/machdep.c +++ b/sys/mips/mips/machdep.c @@ -286,9 +286,9 @@ mips_proc0_init(void) #endif proc_linkup0(&proc0, &thread0); - KASSERT((kstack0 & PAGE_MASK) == 0, - ("kstack0 is not aligned on a page boundary: 0x%0lx", - (long)kstack0)); + KASSERT((kstack0 & ((KSTACK_PAGE_SIZE * 2) - 1)) == 0, + ("kstack0 is not aligned on a page (0x%0lx) boundary: 0x%0lx", + (long)(KSTACK_PAGE_SIZE * 2), (long)kstack0)); thread0.td_kstack = kstack0; thread0.td_kstack_pages = KSTACK_PAGES; /* diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index 35898b4..5a29792 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -546,9 +546,15 @@ again: msgbufinit(msgbufp, msgbufsize); /* - * Steal thread0 kstack. + * Steal thread0 kstack. This must be aligned to + * (KSTACK_PAGE_SIZE * 2) so it can mapped to a single TLB entry. + * + * XXX There should be a better way of getting aligned memory + * with pmap_steal_memory(). */ - kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT); + kstack0 = pmap_steal_memory((KSTACK_PAGES + KSTACK_GUARD_PAGES) \ + << PAGE_SHIFT); + kstack0 = roundup2(kstack0, (KSTACK_PAGE_SIZE * 2)); virtual_avail = VM_MIN_KERNEL_ADDRESS; virtual_end = VM_MAX_KERNEL_ADDRESS; diff --git a/sys/mips/mips/swtch.S b/sys/mips/mips/swtch.S index ae64883..cc75547 100644 --- a/sys/mips/mips/swtch.S +++ b/sys/mips/mips/swtch.S @@ -340,36 +340,91 @@ blocked_loop: * NOTE: This is hard coded to UPAGES == 2. * Also, there should be no TLB faults at this point. */ - MTC0 v0, MIPS_COP_0_TLB_HI # VPN = va + MTC0 v0, MIPS_COP_0_TLB_HI # VPN = va HAZARD_DELAY tlbp # probe VPN HAZARD_DELAY - mfc0 s0, MIPS_COP_0_TLB_INDEX + mfc0 s0, MIPS_COP_0_TLB_INDEX HAZARD_DELAY - PTR_LI t1, MIPS_KSEG0_START # invalidate tlb entry - bltz s0, entry0set + # MIPS_KSEG0_START + (2 * index * PAGE_SIZE) -> MIPS_COP_0_TLB_HI + PTR_LI t1, MIPS_KSEG0_START # invalidate tlb entry +#ifdef KSTACK_LARGE_PAGE + bltz s0, inval_nxt1 +#else + bltz s0, entry0set +#endif + nop + sll s0, PAGE_SHIFT + 1 + PTR_ADDU t1, s0 + MTC0 t1, MIPS_COP_0_TLB_HI + PTE_MTC0 zero, MIPS_COP_0_TLB_LO0 + PTE_MTC0 zero, MIPS_COP_0_TLB_LO1 + MTC0 zero, MIPS_COP_0_TLB_PG_MASK + HAZARD_DELAY + tlbwi + HAZARD_DELAY + +#ifdef KSTACK_LARGE_PAGE +/* + * With a KSTACK_PAGE_SIZE of 16K and PAGE_SIZE of 4K it is possible that + * a second TLB entry is currently mapping the kernel thread stack as a + * regular 4K sized page(s). Check for this case and, if so, invalidate + * that TLB entry as well. + */ +#if (PAGE_SIZE != 4096) && (KSTACK_PAGE_SIZE != 16384) +#error PAGE_SIZE is not 4K or KSTACK_PAGE_SIZE is not 16K. +#endif +inval_nxt1: + move v1, v0 + PTR_ADDU v1, PAGE_SIZE * 2 + MTC0 v1, MIPS_COP_0_TLB_HI # VPN = va + HAZARD_DELAY + tlbp # probe VPN + HAZARD_DELAY + mfc0 s0, MIPS_COP_0_TLB_INDEX + HAZARD_DELAY + + # MIPS_KSEG0_START + (2 * index * PAGE_SIZE) -> MIPS_COP_0_TLB_HI + PTR_LI t1, MIPS_KSEG0_START # invalidate tlb entry + bltz s0, entry0set nop - sll s0, PAGE_SHIFT + 1 - addu t1, s0 - MTC0 t1, MIPS_COP_0_TLB_HI + sll s0, PAGE_SHIFT + 1 + PTR_ADDU t1, s0 + MTC0 t1, MIPS_COP_0_TLB_HI PTE_MTC0 zero, MIPS_COP_0_TLB_LO0 PTE_MTC0 zero, MIPS_COP_0_TLB_LO1 + MTC0 zero, MIPS_COP_0_TLB_PG_MASK HAZARD_DELAY tlbwi HAZARD_DELAY - MTC0 v0, MIPS_COP_0_TLB_HI # set VPN again +#endif /* KSTACK_LARGE_PAGE */ entry0set: + MTC0 v0, MIPS_COP_0_TLB_HI # set VPN again + HAZARD_DELAY /* SMP!! - Works only for unshared TLB case - i.e. no v-cpus */ - mtc0 zero, MIPS_COP_0_TLB_INDEX # TLB entry #0 + mtc0 zero, MIPS_COP_0_TLB_INDEX # TLB entry #0 HAZARD_DELAY PTE_MTC0 a1, MIPS_COP_0_TLB_LO0 # upte[0] HAZARD_DELAY PTE_MTC0 a2, MIPS_COP_0_TLB_LO1 # upte[1] +#ifdef KSTACK_LARGE_PAGE + HAZARD_DELAY + li t1, KSTACK_TLBMASK_MASK + MTC0 t1, MIPS_COP_0_TLB_PG_MASK HAZARD_DELAY +#else + MTC0 zero, MIPS_COP_0_TLB_PG_MASK + HAZARD_DELAY +#endif tlbwi # set TLB entry #0 HAZARD_DELAY + +#ifdef KSTACK_LARGE_PAGE + MTC0 zero, MIPS_COP_0_TLB_PG_MASK + HAZARD_DELAY +#endif /* * Now running on new u struct. */ diff --git a/sys/mips/mips/vm_machdep.c b/sys/mips/mips/vm_machdep.c index c39bbe6..e32b38b 100644 --- a/sys/mips/mips/vm_machdep.c +++ b/sys/mips/mips/vm_machdep.c @@ -282,7 +282,6 @@ void cpu_thread_swapin(struct thread *td) { pt_entry_t *pte; - int i; /* * The kstack may be at a different physical address now. @@ -290,10 +289,21 @@ cpu_thread_swapin(struct thread *td) * part of the thread struct so cpu_switch() can quickly map in * the pcb struct and kernel stack. */ +#ifdef KSTACK_LARGE_PAGE + /* Just one entry for one large kernel page. */ + pte = pmap_pte(kernel_pmap, td->td_kstack); + td->td_md.md_upte[0] = *pte & ~TLBLO_SWBITS_MASK; + td->td_md.md_upte[1] = 1; + +#else + + int i; + for (i = 0; i < KSTACK_PAGES; i++) { pte = pmap_pte(kernel_pmap, td->td_kstack + i * PAGE_SIZE); td->td_md.md_upte[i] = *pte & ~TLBLO_SWBITS_MASK; } +#endif /* ! KSTACK_LARGE_PAGE */ } void @@ -305,17 +315,31 @@ void cpu_thread_alloc(struct thread *td) { pt_entry_t *pte; - int i; - KASSERT((td->td_kstack & (1 << PAGE_SHIFT)) == 0, ("kernel stack must be aligned.")); + KASSERT((td->td_kstack & ((KSTACK_PAGE_SIZE * 2) - 1) ) == 0, + ("kernel stack must be aligned.")); td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = &td->td_pcb->pcb_regs; - for (i = 0; i < KSTACK_PAGES; i++) { - pte = pmap_pte(kernel_pmap, td->td_kstack + i * PAGE_SIZE); - td->td_md.md_upte[i] = *pte & ~TLBLO_SWBITS_MASK; +#ifdef KSTACK_LARGE_PAGE + /* Just one entry for one large kernel page. */ + pte = pmap_pte(kernel_pmap, td->td_kstack); + td->td_md.md_upte[0] = *pte & ~TLBLO_SWBITS_MASK; + td->td_md.md_upte[1] = 1; + +#else + + { + int i; + + for (i = 0; i < KSTACK_PAGES; i++) { + pte = pmap_pte(kernel_pmap, td->td_kstack + i * + PAGE_SIZE); + td->td_md.md_upte[i] = *pte & ~TLBLO_SWBITS_MASK; + } } +#endif /* ! KSTACK_LARGE_PAGE */ } void diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 265e8c4..918d353 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -332,6 +332,75 @@ vm_kstack_valloc(int pages) return (ks); } +#ifdef KSTACK_LARGE_PAGE + +#define KSTACK_OBJT OBJT_PHYS + +static int +vm_kstack_palloc(vm_object_t ksobj, vm_offset_t ks, int allocflags, int pages, + vm_page_t ma[]) +{ + vm_page_t m, end_m; + int i; + + KASSERT((ksobj != NULL), ("vm_kstack_palloc: invalid VM object")); + VM_OBJECT_ASSERT_WLOCKED(ksobj); + + allocflags = (allocflags & ~VM_ALLOC_CLASS_MASK) | VM_ALLOC_NORMAL; + + for (i = 0; i < pages; i++) { +retrylookup: + if ((m = vm_page_lookup(ksobj, i)) == NULL) + break; + if (vm_page_busied(m)) { + /* + * Reference the page before unlocking and + * sleeping so that the page daemon is less + * likely to reclaim it. + */ + vm_page_aflag_set(m, PGA_REFERENCED); + vm_page_lock(m); + VM_OBJECT_WUNLOCK(ksobj); + vm_page_busy_sleep(m, "pgrbwt"); + VM_OBJECT_WLOCK(ksobj); + goto retrylookup; + } else { + if ((allocflags & VM_ALLOC_WIRED) != 0) { + vm_page_lock(m); + vm_page_wire(m); + vm_page_unlock(m); + } + ma[i] = m; + } + } + if (i == pages) + return (i); + + KASSERT((i == 0), ("vm_kstack_palloc: ksobj already has kstack pages")); + + for (;;) { + m = vm_page_alloc_contig(ksobj, 0, allocflags, + atop(KSTACK_PAGE_SIZE), 0ul, ~0ul, KSTACK_PAGE_SIZE * 2, 0, + VM_MEMATTR_DEFAULT); + if (m != NULL) + break; + VM_OBJECT_WUNLOCK(ksobj); + VM_WAIT; + VM_OBJECT_WLOCK(ksobj); + } + end_m = m + atop(KSTACK_PAGE_SIZE); + for (i = 0; m < end_m; m++) { + m->pindex = (vm_pindex_t)i; + if ((allocflags & VM_ALLOC_NOBUSY) != 0) + m->valid = VM_PAGE_BITS_ALL; + ma[i] = m; + i++; + } + return (i); +} + +#else /* ! KSTACK_LARGE_PAGE */ + #define KSTACK_OBJT OBJT_DEFAULT static int @@ -340,6 +409,7 @@ vm_kstack_palloc(vm_object_t ksobj, vm_offset_t ks, int allocflags, int pages, { int i; + KASSERT((ksobj != NULL), ("vm_kstack_palloc: invalid VM object")); VM_OBJECT_ASSERT_WLOCKED(ksobj); allocflags = (allocflags & ~VM_ALLOC_CLASS_MASK) | VM_ALLOC_NORMAL; @@ -355,6 +425,7 @@ vm_kstack_palloc(vm_object_t ksobj, vm_offset_t ks, int allocflags, int pages, return (i); } +#endif /* ! KSTACK_LARGE_PAGE */ #else /* ! __mips__ */ @@ -376,6 +447,7 @@ vm_kstack_palloc(vm_object_t ksobj, vm_offset_t ks, int allocflags, int pages, { int i; + KASSERT((ksobj != NULL), ("vm_kstack_palloc: invalid VM object")); VM_OBJECT_ASSERT_WLOCKED(ksobj); allocflags = (allocflags & ~VM_ALLOC_CLASS_MASK) | VM_ALLOC_NORMAL; @@ -590,7 +662,8 @@ vm_thread_swapin(struct thread *td) pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; VM_OBJECT_WLOCK(ksobj); - rv = vm_kstack_palloc(ksobj, td->td_kstack, VM_ALLOC_WIRED, pages, ma); + rv = vm_kstack_palloc(ksobj, td->td_kstack, (VM_ALLOC_NORMAL | + VM_ALLOC_WIRED), pages, ma); KASSERT(rv != 0, ("vm_thread_swapin: vm_kstack_palloc() failed")); for (i = 0; i < pages; i++) { if (ma[i]->valid != VM_PAGE_BITS_ALL) {