==== //depot/projects/sparc64/sys/kern/kern_fork.c#19 - /usr/p4/sparc64/sys/kern/kern_fork.c ==== @@ -138,7 +138,7 @@ struct proc *p2; mtx_lock(&Giant); - error = fork1(td, RFFDG | RFPROC | RFPPWAIT | RFMEM, &p2); + error = fork1(td, RFFDG | RFPROC | RFPPWAIT /*| RFMEM*/, &p2); if (error == 0) { td->td_retval[0] = p2->p_pid; td->td_retval[1] = 0; ==== //depot/projects/sparc64/sys/sparc64/include/pcpu.h#4 - /usr/p4/sparc64/sys/sparc64/include/pcpu.h ==== @@ -45,6 +45,9 @@ #define PCPU_MD_FIELDS \ struct intr_queue pc_iq; /* interrupt queue */ \ u_int pc_mid; \ + u_int pc_tlb_ctx; \ + u_int pc_tlb_ctx_max; \ + u_int pc_tlb_ctx_min; \ u_int pc_wp_insn; /* watch point support */ \ u_long pc_wp_pstate; \ u_long pc_wp_va; \ ==== //depot/projects/sparc64/sys/sparc64/include/pmap.h#25 - /usr/p4/sparc64/sys/sparc64/include/pmap.h ==== @@ -88,6 +88,7 @@ }; void pmap_bootstrap(vm_offset_t ekva); +void pmap_context_rollover(void); vm_offset_t pmap_kextract(vm_offset_t va); void pmap_kenter_flags(vm_offset_t va, vm_offset_t pa, u_long flags); void pmap_qenter_flags(vm_offset_t va, vm_page_t *m, int count, u_long fl); ==== //depot/projects/sparc64/sys/sparc64/include/tlb.h#23 - /usr/p4/sparc64/sys/sparc64/include/tlb.h ==== @@ -105,14 +105,10 @@ stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, ASI_DMMU_DEMAP, 0); membar(Sync); - } else { - stxa(AA_DMMU_SCXR, ASI_DMMU, ctx); - membar(Sync); - stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_SECONDARY | TLB_DEMAP_PAGE, + } else if (ctx != -1) { + stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, ASI_DMMU_DEMAP, 0); membar(Sync); - stxa(AA_DMMU_SCXR, ASI_DMMU, 0); - membar(Sync); } } @@ -155,15 +151,10 @@ stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, ASI_IMMU_DEMAP, 0); flush(KERNBASE); - } else { - stxa(AA_DMMU_SCXR, ASI_DMMU, ctx); - membar(Sync); - stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_SECONDARY | TLB_DEMAP_PAGE, + } else if (ctx != -1) { + stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, ASI_IMMU_DEMAP, 0); membar(Sync); - stxa(AA_DMMU_SCXR, ASI_DMMU, 0); - /* flush probably not needed. */ - membar(Sync); } } @@ -188,10 +179,12 @@ } static __inline void -tlb_context_demap(u_int context) +tlb_context_demap(u_int ctx) { - tlb_dtlb_context_primary_demap(); - tlb_itlb_context_primary_demap(); + if (ctx != -1) { + tlb_dtlb_context_primary_demap(); + tlb_itlb_context_primary_demap(); + } } static __inline void @@ -231,6 +224,7 @@ static __inline void tlb_store(u_int tlb, vm_offset_t va, u_long ctx, struct tte tte) { + KASSERT(ctx != -1, ("I botched.")); if (tlb & TLB_DTLB) tlb_dtlb_store(va, ctx, tte); if (tlb & TLB_ITLB) @@ -240,6 +234,7 @@ static __inline void tlb_store_slot(u_int tlb, vm_offset_t va, u_long ctx, struct tte tte, int slot) { + KASSERT(ctx != -1, ("I botched.")); if (tlb & TLB_DTLB) tlb_dtlb_store_slot(va, ctx, tte, slot); if (tlb & TLB_ITLB) ==== //depot/projects/sparc64/sys/sparc64/sparc64/genassym.c#53 - /usr/p4/sparc64/sys/sparc64/sparc64/genassym.c ==== @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -158,6 +159,9 @@ ASSYM(PC_CPUMASK, offsetof(struct pcpu, pc_cpumask)); ASSYM(PC_IQ, offsetof(struct pcpu, pc_iq)); ASSYM(PC_MID, offsetof(struct pcpu, pc_mid)); +ASSYM(PC_TLB_CTX, offsetof(struct pcpu, pc_tlb_ctx)); +ASSYM(PC_TLB_CTX_MAX, offsetof(struct pcpu, pc_tlb_ctx_max)); +ASSYM(PC_TLB_CTX_MIN, offsetof(struct pcpu, pc_tlb_ctx_min)); ASSYM(PC_SIZEOF, sizeof(struct pcpu)); ASSYM(IH_SHIFT, IH_SHIFT); @@ -193,6 +197,8 @@ ASSYM(MD_UTRAP, offsetof(struct mdproc, md_utrap)); +ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock)); + ASSYM(P_COMM, offsetof(struct proc, p_comm)); ASSYM(P_MD, offsetof(struct proc, p_md)); ASSYM(P_PID, offsetof(struct proc, p_pid)); ==== //depot/projects/sparc64/sys/sparc64/sparc64/machdep.c#62 - /usr/p4/sparc64/sys/sparc64/sparc64/machdep.c ==== @@ -296,6 +296,9 @@ pc->pc_curthread = &thread0; pc->pc_curpcb = thread0.td_pcb; pc->pc_mid = mid; + pc->pc_tlb_ctx = 1; + pc->pc_tlb_ctx_min = 1; + pc->pc_tlb_ctx_max = 8192; /* * Initialize global registers. ==== //depot/projects/sparc64/sys/sparc64/sparc64/pmap.c#66 - /usr/p4/sparc64/sys/sparc64/sparc64/pmap.c ==== @@ -155,13 +155,6 @@ */ struct pmap kernel_pmap_store; -/* - * Map of free and in use hardware contexts and index of first potentially - * free context. - */ -static char pmap_context_map[PMAP_CONTEXT_MAX]; -static u_int pmap_context_base; - static boolean_t pmap_initialized = FALSE; /* Convert a tte data field into a page mask */ @@ -175,12 +168,6 @@ #define PMAP_TD_GET_MASK(d) pmap_page_masks[TD_GET_SIZE((d))] /* - * Allocate and free hardware context numbers. - */ -static u_int pmap_context_alloc(void); -static void pmap_context_destroy(u_int i); - -/* * Allocate physical memory for use in pmap_bootstrap. */ static vm_offset_t pmap_bootstrap_alloc(vm_size_t size); @@ -478,6 +465,43 @@ panic("pmap_bootstrap_alloc"); } +void +pmap_context_rollover(void) +{ + u_long data; + int i; + + mtx_assert(&sched_lock, MA_OWNED); + CTR0(KTR_PMAP, "pmap_context_rollover"); + for (i = 0; i < 64; i++) { + data = ldxa(TLB_DAR_SLOT(i), ASI_DTLB_DATA_ACCESS_REG); + if ((data & TD_V) != 0 && (data & TD_P) == 0) { + stxa(TLB_DAR_SLOT(i), ASI_DTLB_DATA_ACCESS_REG, 0); + membar(Sync); + } + data = ldxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG); + if ((data & TD_V) != 0 && (data & TD_P) == 0) { + stxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG, 0); + membar(Sync); + } + } + PCPU_SET(tlb_ctx, PCPU_GET(tlb_ctx_min)); +} + +static __inline u_int +pmap_context_alloc(void) +{ + u_int context; + + mtx_assert(&sched_lock, MA_OWNED); + context = PCPU_GET(tlb_ctx); + if (++context == PCPU_GET(tlb_ctx_max)) + pmap_context_rollover(); + else + PCPU_SET(tlb_ctx, context); + return (context); +} + /* * Initialize the pmap module. */ @@ -1054,8 +1078,10 @@ void pmap_pinit0(pmap_t pm) { + int i; - pm->pm_context[PCPU_GET(cpuid)] = pmap_context_alloc(); + for (i = 0; i < MAXCPU; i++) + pm->pm_context[i] = 0; pm->pm_active = 0; pm->pm_count = 1; pm->pm_tsb = NULL; @@ -1105,8 +1131,9 @@ } pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES); + for (i = 0; i < MAXCPU; i++) + pm->pm_context[i] = -1; pm->pm_active = 0; - pm->pm_context[PCPU_GET(cpuid)] = pmap_context_alloc(); pm->pm_count = 1; TAILQ_INIT(&pm->pm_pvlist); bzero(&pm->pm_stats, sizeof(pm->pm_stats)); @@ -1138,7 +1165,6 @@ KASSERT(pmap_resident_count(pm) == 0, ("pmap_release: resident pages %ld != 0", pmap_resident_count(pm))); - pmap_context_destroy(pm->pm_context[PCPU_GET(cpuid)]); TAILQ_FOREACH(m, &obj->memq, listq) { if (vm_page_sleep_busy(m, FALSE, "pmaprl")) continue; @@ -1760,6 +1786,7 @@ void pmap_activate(struct thread *td) { + struct vmspace *vm; vm_offset_t tsb; u_long context; pmap_t pm; @@ -1768,21 +1795,24 @@ * Load all the data we need up front to encourage the compiler to * not issue any loads while we have interrupts disable below. */ - pm = &td->td_proc->p_vmspace->vm_pmap; - context = pm->pm_context[PCPU_GET(cpuid)]; + vm = td->td_proc->p_vmspace; + pm = &vm->vm_pmap; tsb = (vm_offset_t)pm->pm_tsb; - KASSERT(context != 0, ("pmap_activate: activating nucleus context")); - KASSERT(context != -1, ("pmap_activate: steal context")); KASSERT(pm->pm_active == 0, ("pmap_activate: pmap already active?")); + KASSERT(pm->pm_context[PCPU_GET(cpuid)] != 0, + ("pmap_activate: activating nucleus context?")); + mtx_lock_spin(&sched_lock); wrpr(pstate, 0, PSTATE_MMU); mov(tsb, TSB_REG); - wrpr(pstate, 0, PSTATE_NORMAL); - pm->pm_active |= 1 << PCPU_GET(cpuid); + wrpr(pstate, 0, PSTATE_KERNEL); + context = pmap_context_alloc(); + pm->pm_context[PCPU_GET(cpuid)] = context; + pm->pm_active |= PCPU_GET(cpumask); stxa(AA_DMMU_PCXR, ASI_DMMU, context); membar(Sync); - wrpr(pstate, 0, PSTATE_KERNEL); + mtx_unlock_spin(&sched_lock); } vm_offset_t @@ -1816,32 +1846,3 @@ DCACHE_COLOR_MASK; return (color << PAGE_SHIFT); } - -/* - * Allocate a hardware context number from the context map. - */ -static u_int -pmap_context_alloc(void) -{ - u_int i; - - i = pmap_context_base; - do { - if (pmap_context_map[i] == 0) { - pmap_context_map[i] = 1; - pmap_context_base = (i + 1) & (PMAP_CONTEXT_MAX - 1); - return (i); - } - } while ((i = (i + 1) & (PMAP_CONTEXT_MAX - 1)) != pmap_context_base); - panic("pmap_context_alloc"); -} - -/* - * Free a hardware context number back to the context map. - */ -static void -pmap_context_destroy(u_int i) -{ - - pmap_context_map[i] = 0; -} ==== //depot/projects/sparc64/sys/sparc64/sparc64/swtch.s#35 - /usr/p4/sparc64/sys/sparc64/sparc64/swtch.s ==== @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/sparc64/sparc64/swtch.s,v 1.16 2002/02/27 00:27:05 jake Exp $ + * $FreeBSD: src/sys/sparc64/sparc64/swtch.s,v 1.13 2002/02/25 22:58:41 jake Exp $ */ #include @@ -31,6 +31,9 @@ #include #include + .register %g2, #ignore + .register %g3, #ignore + #include "assym.s" ENTRY(cpu_throw) @@ -91,18 +94,14 @@ */ .Lsw1: #if KTR_COMPILE & KTR_PROC - CATR(KTR_PROC, "cpu_switch: td=%d (%s) pc=%#lx fp=%#lx" - , %l3, %l4, %l5, 7, 8, 9) - ldx [%o0 + TD_PROC], %l4 - lduw [%l4 + P_PID], %l5 - stx %l5, [%l3 + KTR_PARM1] - add %l4, P_COMM, %l5 - stx %l5, [%l3 + KTR_PARM2] - ldx [%o0 + TD_PCB], %l4 - ldx [%l4 + PCB_PC], %l5 - stx %l5, [%l3 + KTR_PARM3] - ldx [%l4 + PCB_FP], %l5 - stx %l5, [%l3 + KTR_PARM4] + CATR(KTR_PROC, "cpu_switch: new td=%p pc=%#lx fp=%#lx" + , %g1, %g2, %g3, 7, 8, 9) + stx %o0, [%g1 + KTR_PARM1] + ldx [%o0 + TD_PCB], %g2 + ldx [%g2 + PCB_PC], %g3 + stx %g3, [%g1 + KTR_PARM2] + ldx [%g2 + PCB_FP], %g3 + stx %g3, [%g1 + KTR_PARM3] 9: #endif ldx [%o0 + TD_PCB], %o1 @@ -112,6 +111,9 @@ stx %o0, [PCPU(CURTHREAD)] stx %o1, [PCPU(CURPCB)] + SET(sched_lock, %o3, %o2) + stx %o0, [%o2 + MTX_LOCK] + wrpr %g0, PSTATE_NORMAL, %pstate mov %o1, PCB_REG wrpr %g0, PSTATE_ALT, %pstate @@ -126,31 +128,36 @@ ldx [%l2 + P_VMSPACE], %l2 ldx [%o2 + P_VMSPACE], %o2 +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: new vm=%p old vm=%p" + , %g1, %g2, %g3, 7, 8, 9) + stx %o2, [%g1 + KTR_PARM1] + stx %l2, [%g1 + KTR_PARM2] +9: +#endif + /* - * If they're the same we are done. + * If they are the same we are done. */ cmp %l2, %o2 be,a,pn %xcc, 4f nop - /* - * If the old process has nucleus context we can skip demapping the - * tsb. - */ + lduw [PCPU(CPUID)], %o3 + sllx %o3, INT_SHIFT, %o3 + add %o2, VM_PMAP + PM_CONTEXT, %o4 + lduw [PCPU(CPUID)], %l3 sllx %l3, INT_SHIFT, %l3 add %l2, VM_PMAP + PM_CONTEXT, %l4 - lduw [%l3 + %l4], %l5 - brz,a,pn %l5, 3f - nop /* - * Demap the old process's tsb. + * If the old process has nucleus context we don't want to deactivate + * its pmap on this cpu. */ - ldx [%l2 + VM_PMAP + PM_TSB], %l3 - or %l3, TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %l3 - stxa %g0, [%l3] ASI_DMMU_DEMAP - membar #Sync + lduw [%l3 + %l4], %l5 + brz,a %l5, 2f + nop /* * Mark the pmap no longer active on this cpu. @@ -161,43 +168,104 @@ stw %l3, [%l2 + VM_PMAP + PM_ACTIVE] /* + * Take away its context. + */ + lduw [PCPU(CPUID)], %l3 + sllx %l3, INT_SHIFT, %l3 + add %l2, VM_PMAP + PM_CONTEXT, %l4 + mov -1, %l5 + stw %l5, [%l3 + %l4] + + /* * If the new process has nucleus context we are done. */ -3: lduw [PCPU(CPUID)], %o3 - sllx %o3, INT_SHIFT, %o3 - add %o2, VM_PMAP + PM_CONTEXT, %o4 - lduw [%o3 + %o4], %o5 +2: lduw [%o3 + %o4], %o5 + +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: ctx=%#lx" + , %g1, %g2, %g3, 7, 8, 9) + stx %o5, [%g1 + KTR_PARM1] +9: +#endif + brz,a,pn %o5, 4f nop /* - * Install the new primary context. + * Find the current free tlb context for this cpu and install it as + * the new primary context. */ + lduw [PCPU(TLB_CTX)], %o5 + stw %o5, [%o3 + %o4] mov AA_DMMU_PCXR, %o4 stxa %o5, [%o4] ASI_DMMU - flush %o0 + membar #Sync + + /* + * See if we have run out of free contexts. + */ + lduw [PCPU(TLB_CTX_MAX)], %o3 + +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: ctx=%#lx next=%#lx max=%#lx" + , %g1, %g2, %g3, 7, 8, 9) + stx %o5, [%g1 + KTR_PARM1] + add %o5, 1, %g2 + stx %g2, [%g1 + KTR_PARM2] + stx %o3, [%g1 + KTR_PARM3] +9: +#endif + + add %o5, 1, %o5 + cmp %o3, %o5 + bne,a,pt %xcc, 3f + stw %o5, [PCPU(TLB_CTX)] + +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: context rollover" + , %g1, %g2, %g3, 7, 8, 9) +9: +#endif + + /* + * We will start re-using contexts on the next switch. Flush all + * non-nucleus mappings from the tlb, and reset the next free context. + */ + call pmap_context_rollover + nop + ldx [PCPU(CURTHREAD)], %o0 + ldx [%o0 + TD_PROC], %o2 + ldx [%o2 + P_VMSPACE], %o2 /* * Mark the pmap as active on this cpu. */ - lduw [%o2 + VM_PMAP + PM_ACTIVE], %o3 +3: lduw [%o2 + VM_PMAP + PM_ACTIVE], %o3 lduw [PCPU(CPUMASK)], %o4 or %o3, %o4, %o3 stw %o3, [%o2 + VM_PMAP + PM_ACTIVE] /* - * Switch to mmu globals and install the preloaded tsb pointer. + * Load the address of the tsb, switch to mmu globals, and install + * the preloaded tsb pointer. */ ldx [%o2 + VM_PMAP + PM_TSB], %o3 wrpr %g0, PSTATE_MMU, %pstate mov %o3, TSB_REG wrpr %g0, PSTATE_KERNEL, %pstate +4: +#if KTR_COMPILE & KTR_PROC + CATR(KTR_PROC, "cpu_switch: return" + , %g1, %g2, %g3, 7, 8, 9) +9: +#endif + /* * Done. Return and load the new process's window from the stack. */ MEXITCOUNT -4: ret + ret restore END(cpu_switch)