Index: src/sys/arch/i386/i386/cpu.c diff -u src/sys/arch/i386/i386/cpu.c:1.17 src/sys/arch/i386/i386/cpu.c:1.18 --- src/sys/arch/i386/i386/cpu.c:1.17 Fri Feb 13 06:36:13 2004 +++ src/sys/arch/i386/i386/cpu.c Fri Feb 20 12:35:01 2004 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.c,v 1.17 2004/02/13 11:36:13 wiz Exp $ */ +/* $NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp $ */ /*- * Copyright (c) 2000 The NetBSD Foundation, Inc. @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.17 2004/02/13 11:36:13 wiz Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp $"); #include "opt_ddb.h" #include "opt_multiprocessor.h" @@ -314,10 +314,12 @@ kstack + USPACE - 16 - sizeof (struct trapframe); pcb->pcb_tss.tss_esp = kstack + USPACE - 16 - sizeof (struct trapframe); - pcb->pcb_pmap = pmap_kernel(); pcb->pcb_cr0 = rcr0(); - pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdirpa; + pcb->pcb_cr3 = pmap_kernel()->pm_pdirpa; #endif + pmap_reference(pmap_kernel()); + ci->ci_pmap = pmap_kernel(); + ci->ci_tlbstate = TLBSTATE_STALE; /* further PCB init done later. */ Index: src/sys/arch/i386/i386/genassym.cf diff -u src/sys/arch/i386/i386/genassym.cf:1.39 src/sys/arch/i386/i386/genassym.cf:1.40 --- src/sys/arch/i386/i386/genassym.cf:1.39 Tue Nov 4 05:33:15 2003 +++ src/sys/arch/i386/i386/genassym.cf Fri Feb 20 12:35:01 2004 @@ -1,4 +1,4 @@ -# $NetBSD: genassym.cf,v 1.39 2003/11/04 10:33:15 dsl Exp $ +# $NetBSD: genassym.cf,v 1.40 2004/02/20 17:35:01 yamt Exp $ # # Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -258,6 +258,9 @@ define CPU_INFO_SELF offsetof(struct cpu_info, ci_self) define CPU_INFO_RESCHED offsetof(struct cpu_info, ci_want_resched) +define CPU_INFO_WANT_PMAPLOAD offsetof(struct cpu_info, ci_want_pmapload) +define CPU_INFO_TLBSTATE offsetof(struct cpu_info, ci_tlbstate) +define TLBSTATE_VALID TLBSTATE_VALID define CPU_INFO_CURLWP offsetof(struct cpu_info, ci_curlwp) define CPU_INFO_CURPCB offsetof(struct cpu_info, ci_curpcb) define CPU_INFO_IDLE_PCB offsetof(struct cpu_info, ci_idle_pcb) Index: src/sys/arch/i386/i386/locore.S diff -u src/sys/arch/i386/i386/locore.S:1.23 src/sys/arch/i386/i386/locore.S:1.24 --- src/sys/arch/i386/i386/locore.S:1.23 Mon Feb 16 12:11:27 2004 +++ src/sys/arch/i386/i386/locore.S Fri Feb 20 12:35:01 2004 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.23 2004/02/16 17:11:27 wiz Exp $ */ +/* $NetBSD: locore.S,v 1.24 2004/02/20 17:35:01 yamt Exp $ */ /*- * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. @@ -125,7 +125,7 @@ #define GET_CURPCB(reg) movl CPUVAR(CURPCB),reg #define SET_CURPCB(reg) movl reg,CPUVAR(CURPCB) - + #define CLEAR_RESCHED(reg) movl reg,CPUVAR(RESCHED) /* XXX temporary kluge; these should not be here */ @@ -701,6 +701,7 @@ pushl %ebx call *%esi addl $4,%esp + DO_DEFERRED_SWITCH(%eax) INTRFASTEXIT /* NOTREACHED */ @@ -778,7 +779,7 @@ pushl %edi GET_CURPCB(%eax) # load curpcb into eax and set on-fault pushl PCB_ONFAULT(%eax) - movl $_C_LABEL(copy_fault), PCB_ONFAULT(%eax) + movl $_C_LABEL(kcopy_fault), PCB_ONFAULT(%eax) movl 16(%esp),%esi movl 20(%esp),%edi @@ -871,6 +872,7 @@ */ /* LINTSTUB: Func: int copyout(const void *kaddr, void *uaddr, size_t len) */ ENTRY(copyout) + DO_DEFERRED_SWITCH(%eax) jmp *_C_LABEL(copyout_func) #if defined(I386_CPU) @@ -1012,6 +1014,7 @@ */ /* LINTSTUB: Func: int copyin(const void *uaddr, void *kaddr, size_t len) */ ENTRY(copyin) + DO_DEFERRED_SWITCH(%eax) jmp *_C_LABEL(copyin_func) #if defined(I386_CPU) || defined(I486_CPU) || defined(I586_CPU) || \ @@ -1062,6 +1065,19 @@ NENTRY(copy_efault) movl $EFAULT,%eax +/* + * kcopy_fault is used by kcopy and copy_fault is used by copyin/out. + * + * they're distinguished for lazy pmap switching. see trap(). + */ +/* LINTSTUB: Ignore */ +NENTRY(kcopy_fault) + GET_CURPCB(%edx) + popl PCB_ONFAULT(%edx) + popl %edi + popl %esi + ret + /* LINTSTUB: Ignore */ NENTRY(copy_fault) GET_CURPCB(%edx) @@ -1083,6 +1099,8 @@ pushl %esi pushl %edi + DO_DEFERRED_SWITCH(%eax) + movl 12(%esp),%esi # esi = from movl 16(%esp),%edi # edi = to movl 20(%esp),%edx # edx = maxlen @@ -1200,6 +1218,9 @@ ENTRY(copyinstr) pushl %esi pushl %edi + + DO_DEFERRED_SWITCH(%eax) + GET_CURPCB(%ecx) movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx) @@ -1311,6 +1332,7 @@ */ /* LINTSTUB: Func: long fuword(const void *base) */ ENTRY(fuword) + DO_DEFERRED_SWITCH(%eax) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-4,%edx ja _C_LABEL(fusuaddrfault) @@ -1327,6 +1349,7 @@ */ /* LINTSTUB: Func: int fusword(const void *base) */ ENTRY(fusword) + DO_DEFERRED_SWITCH(%eax) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-2,%edx ja _C_LABEL(fusuaddrfault) @@ -1344,6 +1367,8 @@ */ /* LINTSTUB: Func: int fuswintr(const void *base) */ ENTRY(fuswintr) + cmpl $TLBSTATE_VALID, CPUVAR(TLBSTATE) + jnz _C_LABEL(fusuaddrfault) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-2,%edx ja _C_LABEL(fusuaddrfault) @@ -1361,6 +1386,7 @@ */ /* LINTSTUB: Func: int fubyte(const void *base) */ ENTRY(fubyte) + DO_DEFERRED_SWITCH(%eax) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-1,%edx ja _C_LABEL(fusuaddrfault) @@ -1405,6 +1431,7 @@ */ /* LINTSTUB: Func: int suword(void *base, long c) */ ENTRY(suword) + DO_DEFERRED_SWITCH(%eax) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-4,%edx ja _C_LABEL(fusuaddrfault) @@ -1452,6 +1479,7 @@ */ /* LINTSTUB: Func: int susword(void *base, short c) */ ENTRY(susword) + DO_DEFERRED_SWITCH(%eax) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-2,%edx ja _C_LABEL(fusuaddrfault) @@ -1500,6 +1528,8 @@ */ /* LINTSTUB: Func: int suswintr(void *base, short c) */ ENTRY(suswintr) + cmpl $TLBSTATE_VALID, CPUVAR(TLBSTATE) + jnz _C_LABEL(fusuaddrfault) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-2,%edx ja _C_LABEL(fusuaddrfault) @@ -1537,6 +1567,7 @@ */ /* LINTSTUB: Func: int subyte(void *base, int c) */ ENTRY(subyte) + DO_DEFERRED_SWITCH(%eax) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-1,%edx ja _C_LABEL(fusuaddrfault) @@ -1722,7 +1753,7 @@ */ pushl %esi - call _C_LABEL(pmap_deactivate) # pmap_deactivate(oldproc) + call _C_LABEL(pmap_deactivate2) # pmap_deactivate(oldproc) addl $4,%esp movl L_ADDR(%esi),%esi @@ -1749,11 +1780,6 @@ movl PCB_ESP(%edi),%esp movl PCB_EBP(%edi),%ebp - - /* Switch address space. */ - movl PCB_CR3(%edi),%ecx - movl %ecx,%cr3 - /* Switch TSS. Reset "task busy" flag before loading. */ #ifdef MULTIPROCESSOR movl CPUVAR(GDT),%eax @@ -1872,7 +1898,7 @@ */ pushl %esi - call _C_LABEL(pmap_deactivate) # pmap_deactivate(oldproc) + call _C_LABEL(pmap_deactivate2) # pmap_deactivate(oldproc) addl $4,%esp movl L_ADDR(%esi),%esi @@ -2066,10 +2092,6 @@ movl _C_LABEL(gdt),%eax #endif - /* Switch address space. */ - movl PCB_CR3(%esi),%ecx - movl %ecx,%cr3 - /* Switch TSS. */ andl $~0x0200,4-SEL_KPL(%eax,%edx,1) ltr %dx @@ -2134,6 +2156,12 @@ INTRENTRY #ifdef DIAGNOSTIC + cmpl $0, CPUVAR(WANT_PMAPLOAD) + jz 1f + pushl $6f + call _C_LABEL(printf) + addl $4, %esp +1: movl CPUVAR(ILEVEL),%ebx testl %ebx,%ebx jz 1f @@ -2151,7 +2179,8 @@ pushl %esp call *P_MD_SYSCALL(%edx) # get pointer to syscall() function addl $4,%esp -2: /* Check for ASTs on exit to user mode. */ +syscall_checkast: + /* Check for ASTs on exit to user mode. */ cli CHECK_ASTPENDING(%eax) je 1f @@ -2162,11 +2191,13 @@ pushl %esp call _C_LABEL(trap) addl $4,%esp - jmp 2b + jmp syscall_checkast /* re-check ASTs */ +1: CHECK_DEFERRED_SWITCH(%eax) + jnz 9f #ifndef DIAGNOSTIC -1: INTRFASTEXIT + INTRFASTEXIT #else /* DIAGNOSTIC */ -1: cmpl $IPL_NONE,CPUVAR(ILEVEL) + cmpl $IPL_NONE,CPUVAR(ILEVEL) jne 3f INTRFASTEXIT 3: sti @@ -2180,7 +2211,11 @@ jmp 2b 4: .asciz "WARNING: SPL NOT LOWERED ON SYSCALL EXIT\n" 5: .asciz "WARNING: SPL NOT ZERO ON SYSCALL ENTRY\n" +6: .asciz "WARNING: WANT PMAPLOAD ON SYSCALL ENTRY\n" #endif /* DIAGNOSTIC */ +9: sti + call _C_LABEL(pmap_load) + jmp syscall_checkast /* re-check ASTs */ #if NNPX > 0 /* Index: src/sys/arch/i386/i386/mach_sigcode.S diff -u src/sys/arch/i386/i386/mach_sigcode.S:1.5 src/sys/arch/i386/i386/mach_sigcode.S:1.6 --- src/sys/arch/i386/i386/mach_sigcode.S:1.5 Wed Aug 20 17:48:37 2003 +++ src/sys/arch/i386/i386/mach_sigcode.S Fri Feb 20 12:35:01 2004 @@ -1,4 +1,4 @@ -/* $NetBSD: mach_sigcode.S,v 1.5 2003/08/20 21:48:37 fvdl Exp $ */ +/* $NetBSD: mach_sigcode.S,v 1.6 2004/02/20 17:35:01 yamt Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -125,4 +125,10 @@ call _C_LABEL(trap) addl $4,%esp jmp 2b -1: INTRFASTEXIT +1: CHECK_DEFERRED_SWITCH(%eax) + jnz 9f + INTRFASTEXIT +9: sti + call _C_LABEL(pmap_load) + cli + jmp 2b Index: src/sys/arch/i386/i386/pmap.c diff -u src/sys/arch/i386/i386/pmap.c:1.170 src/sys/arch/i386/i386/pmap.c:1.171 --- src/sys/arch/i386/i386/pmap.c:1.170 Fri Feb 13 06:36:14 2004 +++ src/sys/arch/i386/i386/pmap.c Fri Feb 20 12:35:01 2004 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.170 2004/02/13 11:36:14 wiz Exp $ */ +/* $NetBSD: pmap.c,v 1.171 2004/02/20 17:35:01 yamt Exp $ */ /* * @@ -60,7 +60,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.170 2004/02/13 11:36:14 wiz Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.171 2004/02/20 17:35:01 yamt Exp $"); #include "opt_cputype.h" #include "opt_user_ldt.h" @@ -501,6 +501,8 @@ static void pmap_tmpunmap_pvepte(struct pv_entry *); static void pmap_unmap_ptes(struct pmap *); +static boolean_t pmap_reactivate(struct pmap *); + /* * p m a p i n l i n e h e l p e r f u n c t i o n s */ @@ -514,8 +516,9 @@ pmap_is_curpmap(pmap) struct pmap *pmap; { + return((pmap == pmap_kernel()) || - (pmap->pm_pdirpa == (paddr_t) rcr3())); + (pmap == curcpu()->ci_pmap)); } /* @@ -663,24 +666,33 @@ struct pmap *pmap; { pd_entry_t opde; + struct pmap *ourpmap; + struct cpu_info *ci; /* the kernel's pmap is always accessible */ if (pmap == pmap_kernel()) { return(PTE_BASE); } + ci = curcpu(); + if (ci->ci_want_pmapload && + vm_map_pmap(&ci->ci_curlwp->l_proc->p_vmspace->vm_map) == pmap) + pmap_load(); + /* if curpmap then we are always mapped */ if (pmap_is_curpmap(pmap)) { simple_lock(&pmap->pm_obj.vmobjlock); return(PTE_BASE); } + ourpmap = ci->ci_pmap; + /* need to lock both curpmap and pmap: use ordered locking */ - if ((unsigned) pmap < (unsigned) curpcb->pcb_pmap) { + if ((unsigned) pmap < (unsigned) ourpmap) { simple_lock(&pmap->pm_obj.vmobjlock); - simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + simple_lock(&ourpmap->pm_obj.vmobjlock); } else { - simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + simple_lock(&ourpmap->pm_obj.vmobjlock); simple_lock(&pmap->pm_obj.vmobjlock); } @@ -690,7 +702,7 @@ if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) { *APDP_PDE = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V); if (pmap_valid_entry(opde)) - pmap_apte_flush(curpcb->pcb_pmap); + pmap_apte_flush(ourpmap); } return(APTE_BASE); } @@ -703,19 +715,22 @@ pmap_unmap_ptes(pmap) struct pmap *pmap; { + if (pmap == pmap_kernel()) { return; } if (pmap_is_curpmap(pmap)) { simple_unlock(&pmap->pm_obj.vmobjlock); } else { + struct pmap *ourpmap = curcpu()->ci_pmap; + #if defined(MULTIPROCESSOR) *APDP_PDE = 0; - pmap_apte_flush(curpcb->pcb_pmap); + pmap_apte_flush(ourpmap); #endif COUNT(apdp_pde_unmap); simple_unlock(&pmap->pm_obj.vmobjlock); - simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + simple_unlock(&ourpmap->pm_obj.vmobjlock); } } @@ -952,8 +967,6 @@ * operation of the system. */ - curpcb->pcb_pmap = kpm; /* proc0's pcb */ - /* * Begin to enable global TLB entries if they are supported. * The G bit has no effect until the CR4_PGE bit is set in CR4, @@ -1764,6 +1777,10 @@ struct pmap *pmap; { int refs; +#ifdef DIAGNOSTIC + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; +#endif /* DIAGNOSTIC */ /* * drop reference count @@ -1776,6 +1793,12 @@ return; } +#ifdef DIAGNOSTIC + for (CPU_INFO_FOREACH(cii, ci)) + if (ci->ci_pmap == pmap) + panic("destroying pmap being used"); +#endif /* DIAGNOSTIC */ + /* * reference count is zero, free pmap resources and then free pmap. */ @@ -1904,31 +1927,26 @@ #endif /* USER_LDT */ /* - * pmap_activate: activate a process' pmap (fill in %cr3 and LDT info) + * pmap_activate: activate a process' pmap * * => called from cpu_switch() - * => if proc is the curlwp, then load it into the MMU + * => if lwp is the curlwp, then set ci_want_pmapload so that + * actual MMU context switch will be done by pmap_load() later */ void pmap_activate(l) struct lwp *l; { + struct cpu_info *ci = curcpu(); struct pcb *pcb = &l->l_addr->u_pcb; - struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; + struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); - pcb->pcb_pmap = pmap; pcb->pcb_ldt_sel = pmap->pm_ldt_sel; pcb->pcb_cr3 = pmap->pm_pdirpa; - if (l == curlwp) { - lcr3(pcb->pcb_cr3); - lldt(pcb->pcb_ldt_sel); - - /* - * mark the pmap in use by this processor. - */ - x86_atomic_setbits_l(&pmap->pm_cpus, (1U << cpu_number())); - + if (l == ci->ci_curlwp) { + KASSERT(ci->ci_want_pmapload == 0); + KASSERT(ci->ci_tlbstate != TLBSTATE_VALID); #ifdef KSTACK_CHECK_DR0 /* * setup breakpoint on the top of stack @@ -1938,7 +1956,128 @@ else dr0(KSTACK_LOWEST_ADDR(l), 1, 3, 1); #endif + + /* + * no need to switch to kernel vmspace because + * it's a subset of any vmspace. + */ + + if (pmap == pmap_kernel()) { + ci->ci_want_pmapload = 0; + return; + } + + ci->ci_want_pmapload = 1; + } +} + +/* + * pmap_reactivate: try to regain reference to the pmap. + */ + +static boolean_t +pmap_reactivate(struct pmap *pmap) +{ + struct cpu_info *ci = curcpu(); + u_int32_t cpumask = 1U << ci->ci_cpuid; + int s; + boolean_t result; + u_int32_t oldcpus; + + KASSERT(pmap->pm_pdirpa == rcr3()); + + /* + * if we still have a lazy reference to this pmap, + * we can assume that there was no tlb shootdown + * for this pmap in the meantime. + */ + + s = splipi(); /* protect from tlb shootdown ipis. */ + oldcpus = pmap->pm_cpus; + x86_atomic_setbits_l(&pmap->pm_cpus, cpumask); + if (oldcpus & cpumask) { + KASSERT(ci->ci_tlbstate == TLBSTATE_LAZY); + /* got it */ + result = TRUE; + } else { + KASSERT(ci->ci_tlbstate == TLBSTATE_STALE); + result = FALSE; } + ci->ci_tlbstate = TLBSTATE_VALID; + splx(s); + + return result; +} + +/* + * pmap_load: actually switch pmap. (fill in %cr3 and LDT info) + */ + +void +pmap_load() +{ + struct cpu_info *ci = curcpu(); + u_int32_t cpumask = 1U << ci->ci_cpuid; + struct pmap *pmap; + struct pmap *oldpmap; + struct lwp *l; + int s; + + KASSERT(ci->ci_want_pmapload); + + l = ci->ci_curlwp; + KASSERT(l != NULL); + pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); + KASSERT(pmap != pmap_kernel()); + oldpmap = ci->ci_pmap; + + KASSERT(pmap->pm_ldt_sel == l->l_addr->u_pcb.pcb_ldt_sel); + lldt(pmap->pm_ldt_sel); + + if (pmap == oldpmap) { + if (!pmap_reactivate(pmap)) { + + /* + * pmap has been changed during deactivated. + * our tlb may be stale. + */ + + tlbflush(); + } + + ci->ci_want_pmapload = 0; + return; + } + + /* + * actually switch pmap. + */ + + x86_atomic_clearbits_l(&oldpmap->pm_cpus, cpumask); + + KASSERT(oldpmap->pm_pdirpa == rcr3()); + KASSERT((pmap->pm_cpus & cpumask) == 0); + + KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE); + pmap_reference(pmap); + KERNEL_UNLOCK(); + + /* + * mark the pmap in use by this processor. + */ + + s = splipi(); + x86_atomic_setbits_l(&pmap->pm_cpus, cpumask); + ci->ci_pmap = pmap; + ci->ci_tlbstate = TLBSTATE_VALID; + splx(s); + lcr3(pmap->pm_pdirpa); + + ci->ci_want_pmapload = 0; + + KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE); + pmap_destroy(oldpmap); + KERNEL_UNLOCK(); } /* @@ -1949,12 +2088,49 @@ pmap_deactivate(l) struct lwp *l; { - struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; - /* - * mark the pmap no longer in use by this processor. - */ - x86_atomic_clearbits_l(&pmap->pm_cpus, (1U << cpu_number())); + if (l == curlwp) + pmap_deactivate2(l); +} + +/* + * pmap_deactivate2: context switch version of pmap_deactivate. + * always treat l as curlwp. + */ + +void +pmap_deactivate2(l) + struct lwp *l; +{ + struct pmap *pmap; + struct cpu_info *ci = curcpu(); + + if (ci->ci_want_pmapload) { + KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map) + != pmap_kernel()); + KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map) + != ci->ci_pmap || ci->ci_tlbstate != TLBSTATE_VALID); + + /* + * userspace has not been touched. + * nothing to do here. + */ + + ci->ci_want_pmapload = 0; + return; + } + + pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); + + if (pmap == pmap_kernel()) { + return; + } + + KASSERT(pmap->pm_pdirpa == rcr3()); + KASSERT(ci->ci_pmap == pmap); + + KASSERT(ci->ci_tlbstate == TLBSTATE_VALID); + ci->ci_tlbstate = TLBSTATE_LAZY; } /* @@ -2380,6 +2556,8 @@ struct vm_page *ptp; int32_t cpumask = 0; TAILQ_HEAD(, vm_page) empty_ptps; + struct cpu_info *ci; + struct pmap *curpmap; /* * we lock in the pmap => pv_head direction @@ -2388,8 +2566,12 @@ TAILQ_INIT(&empty_ptps); PMAP_MAP_TO_HEAD_LOCK(); + ptes = pmap_map_ptes(pmap); /* locks pmap */ + ci = curcpu(); + curpmap = ci->ci_pmap; + /* * removing one page? take shortcut function. */ @@ -2438,7 +2620,7 @@ * here if we're using APTE space. */ #endif - pmap_tlb_shootdown(curpcb->pcb_pmap, + pmap_tlb_shootdown(curpmap, ((vaddr_t)ptes) + ptp->offset, opte, &cpumask); #if defined(MULTIPROCESSOR) @@ -2446,8 +2628,7 @@ * Always shoot down the pmap's self-mapping * of the PTP. * XXXthorpej Redundant shootdown can happen - * here if pmap == curpcb->pcb_pmap (not APTE - * space). + * here if pmap == curpmap (not APTE space). */ pmap_tlb_shootdown(pmap, ((vaddr_t)PTE_BASE) + ptp->offset, opte, @@ -2537,14 +2718,14 @@ * if we're using APTE space. */ #endif - pmap_tlb_shootdown(curpcb->pcb_pmap, + pmap_tlb_shootdown(curpmap, ((vaddr_t)ptes) + ptp->offset, opte, &cpumask); #if defined(MULTIPROCESSOR) /* * Always shoot down the pmap's self-mapping * of the PTP. * XXXthorpej Redundant shootdown can happen here - * if pmap == curpcb->pcb_pmap (not APTE space). + * if pmap == curpmap (not APTE space). */ pmap_tlb_shootdown(pmap, ((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask); @@ -2585,6 +2766,8 @@ int32_t cpumask = 0; TAILQ_HEAD(, vm_page) empty_ptps; struct vm_page *ptp; + struct cpu_info *ci; + struct pmap *curpmap; #ifdef DIAGNOSTIC int bank, off; @@ -2604,6 +2787,9 @@ /* set pv_head => pmap locking */ PMAP_HEAD_TO_MAP_LOCK(); + ci = curcpu(); + curpmap = ci->ci_pmap; + /* XXX: needed if we hold head->map lock? */ simple_lock(&pvh->pvh_lock); @@ -2657,7 +2843,7 @@ opte = x86_atomic_testset_ul( &pve->pv_pmap->pm_pdir[pdei(pve->pv_va)], 0); - pmap_tlb_shootdown(curpcb->pcb_pmap, + pmap_tlb_shootdown(curpmap, ((vaddr_t)ptes) + pve->pv_ptp->offset, opte, &cpumask); #if defined(MULTIPROCESSOR) @@ -3427,8 +3613,9 @@ void pmap_tlb_shootnow(int32_t cpumask) { + struct cpu_info *self; #ifdef MULTIPROCESSOR - struct cpu_info *ci, *self; + struct cpu_info *ci; CPU_INFO_ITERATOR cii; int s; #ifdef DIAGNOSTIC @@ -3439,13 +3626,13 @@ if (cpumask == 0) return; -#ifdef MULTIPROCESSOR self = curcpu(); +#ifdef MULTIPROCESSOR s = splipi(); self->ci_tlb_ipi_mask = cpumask; #endif - pmap_do_tlb_shootdown(0); /* do *our* work. */ + pmap_do_tlb_shootdown(self); /* do *our* work. */ #ifdef MULTIPROCESSOR splx(s); @@ -3585,6 +3772,40 @@ } /* + * pmap_do_tlb_shootdown_checktlbstate: check and update ci_tlbstate. + * + * => called at splipi. + * => return TRUE if we need to maintain user tlbs. + */ +static __inline boolean_t +pmap_do_tlb_shootdown_checktlbstate(struct cpu_info *ci) +{ + + KASSERT(ci == curcpu()); + + if (ci->ci_tlbstate == TLBSTATE_LAZY) { + KASSERT(ci->ci_pmap != pmap_kernel()); + /* + * mostly KASSERT(ci->ci_pmap->pm_cpus & (1U << ci->ci_cpuid)); + */ + + /* + * we no longer want tlb shootdown ipis for this pmap. + * mark the pmap no longer in use by this processor. + */ + + x86_atomic_clearbits_l(&ci->ci_pmap->pm_cpus, + 1U << ci->ci_cpuid); + ci->ci_tlbstate = TLBSTATE_STALE; + } + + if (ci->ci_tlbstate == TLBSTATE_STALE) + return FALSE; + + return TRUE; +} + +/* * pmap_do_tlb_shootdown: * * Process pending TLB shootdown operations for this processor. @@ -3592,7 +3813,7 @@ void pmap_do_tlb_shootdown(struct cpu_info *self) { - u_long cpu_id = cpu_number(); + u_long cpu_id = self->ci_cpuid; struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id]; struct pmap_tlb_shootdown_job *pj; int s; @@ -3600,6 +3821,7 @@ struct cpu_info *ci; CPU_INFO_ITERATOR cii; #endif + KASSERT(self == curcpu()); s = splipi(); @@ -3607,6 +3829,7 @@ if (pq->pq_flushg) { COUNT(flushg); + pmap_do_tlb_shootdown_checktlbstate(self); tlbflushg(); pq->pq_flushg = 0; pq->pq_flushu = 0; @@ -3618,14 +3841,20 @@ */ if (pq->pq_flushu) { COUNT(flushu); + pmap_do_tlb_shootdown_checktlbstate(self); tlbflush(); } while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) { TAILQ_REMOVE(&pq->pq_head, pj, pj_list); - if ((!pq->pq_flushu && pmap_is_curpmap(pj->pj_pmap)) || - (pj->pj_pte & pmap_pg_g)) + if ((pj->pj_pte & pmap_pg_g) || + pj->pj_pmap == pmap_kernel()) { pmap_update_pg(pj->pj_va); + } else if (!pq->pq_flushu && + pj->pj_pmap == self->ci_pmap) { + if (pmap_do_tlb_shootdown_checktlbstate(self)) + pmap_update_pg(pj->pj_va); + } pmap_tlb_shootdown_job_put(pq, pj); } Index: src/sys/arch/i386/i386/spl.S diff -u src/sys/arch/i386/i386/spl.S:1.7 src/sys/arch/i386/i386/spl.S:1.8 --- src/sys/arch/i386/i386/spl.S:1.7 Wed Aug 20 17:48:41 2003 +++ src/sys/arch/i386/i386/spl.S Fri Feb 20 12:35:01 2004 @@ -1,4 +1,4 @@ -/* $NetBSD: spl.S,v 1.7 2003/08/20 21:48:41 fvdl Exp $ */ +/* $NetBSD: spl.S,v 1.8 2004/02/20 17:35:01 yamt Exp $ */ /* * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -150,13 +150,17 @@ jmp *IS_RESUME(%eax) 2: /* Check for ASTs on exit to user mode. */ movl %ebx,CPUVAR(ILEVEL) -5: CHECK_ASTPENDING(%eax) - je 3f +5: testb $SEL_RPL,TF_CS(%esp) + jnz doreti_checkast #ifdef VM86 - jnz 4f testl $PSL_VM,TF_EFLAGS(%esp) + jz 6f +#else + jmp 6f #endif +doreti_checkast: + CHECK_ASTPENDING(%eax) jz 3f 4: CLEAR_ASTPENDING(%eax) sti @@ -168,4 +172,12 @@ cli jmp 5b 3: + CHECK_DEFERRED_SWITCH(%eax) + jnz 9f +6: INTRFASTEXIT +9: + sti + call _C_LABEL(pmap_load) + cli + jmp doreti_checkast /* recheck ASTs */ Index: src/sys/arch/i386/i386/svr4_sigcode.S diff -u src/sys/arch/i386/i386/svr4_sigcode.S:1.6 src/sys/arch/i386/i386/svr4_sigcode.S:1.7 --- src/sys/arch/i386/i386/svr4_sigcode.S:1.6 Wed Aug 20 17:48:42 2003 +++ src/sys/arch/i386/i386/svr4_sigcode.S Fri Feb 20 12:35:01 2004 @@ -1,4 +1,4 @@ -/* $NetBSD: svr4_sigcode.S,v 1.6 2003/08/20 21:48:42 fvdl Exp $ */ +/* $NetBSD: svr4_sigcode.S,v 1.7 2004/02/20 17:35:01 yamt Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -119,4 +119,10 @@ call _C_LABEL(trap) addl $4,%esp jmp 2b -1: INTRFASTEXIT +1: CHECK_DEFERRED_SWITCH(%eax) + jnz 9f + INTRFASTEXIT +9: sti + call _C_LABEL(pmap_load) + cli + jmp 2b Index: src/sys/arch/i386/i386/trap.c diff -u src/sys/arch/i386/i386/trap.c:1.195 src/sys/arch/i386/i386/trap.c:1.196 --- src/sys/arch/i386/i386/trap.c:1.195 Thu Feb 19 12:02:44 2004 +++ src/sys/arch/i386/i386/trap.c Fri Feb 20 12:35:01 2004 @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.195 2004/02/19 17:02:44 drochner Exp $ */ +/* $NetBSD: trap.c,v 1.196 2004/02/20 17:35:01 yamt Exp $ */ /*- * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. @@ -75,7 +75,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.195 2004/02/19 17:02:44 drochner Exp $"); +__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.196 2004/02/20 17:35:01 yamt Exp $"); #include "opt_ddb.h" #include "opt_kgdb.h" @@ -216,7 +216,7 @@ struct proc *p = l ? l->l_proc : 0; int type = frame->tf_trapno; struct pcb *pcb; - extern char fusubail[], + extern char fusubail[], kcopy_fault[], resume_iret[], resume_pop_ds[], resume_pop_es[], resume_pop_fs[], resume_pop_gs[], IDTVEC(osyscall)[]; @@ -616,6 +616,18 @@ if (type == T_PAGEFLT) { KERNEL_UNLOCK(); + + /* + * we need to switch pmap now if we're in + * the middle of copyin/out. + * + * but we don't need to do so for kcopy as + * it never touch userspace. + */ + + if (onfault != kcopy_fault && + curcpu()->ci_want_pmapload) + pmap_load(); return; } l->l_flag &= ~L_SA_PAGEFAULT; Index: src/sys/arch/i386/i386/vector.S diff -u src/sys/arch/i386/i386/vector.S:1.11 src/sys/arch/i386/i386/vector.S:1.12 --- src/sys/arch/i386/i386/vector.S:1.11 Fri Dec 12 15:17:53 2003 +++ src/sys/arch/i386/i386/vector.S Fri Feb 20 12:35:01 2004 @@ -1,4 +1,4 @@ -/* $NetBSD: vector.S,v 1.11 2003/12/12 20:17:53 nathanw Exp $ */ +/* $NetBSD: vector.S,v 1.12 2004/02/20 17:35:01 yamt Exp $ */ /* * Copyright 2002 (c) Wasabi Systems, Inc. @@ -860,27 +860,32 @@ pushl %esp call _C_LABEL(trap) addl $4,%esp -2: /* Check for ASTs on exit to user mode. */ - cli - CHECK_ASTPENDING(%eax) - je 1f testb $SEL_RPL,TF_CS(%esp) + jnz alltraps_checkast #ifdef VM86 - jnz 5f testl $PSL_VM,TF_EFLAGS(%esp) + jz 6f +#else + jmp 6f #endif - jz 1f +alltraps_checkast: + /* Check for ASTs on exit to user mode. */ + cli + CHECK_ASTPENDING(%eax) + jz 3f 5: CLEAR_ASTPENDING(%eax) sti movl $T_ASTFLT,TF_TRAPNO(%esp) pushl %esp call _C_LABEL(trap) addl $4,%esp - jmp 2b + jmp alltraps_checkast /* re-check ASTs */ +3: CHECK_DEFERRED_SWITCH(%eax) + jnz 9f #ifndef DIAGNOSTIC -1: INTRFASTEXIT +6: INTRFASTEXIT #else -1: cmpl CPUVAR(ILEVEL),%ebx +6: cmpl CPUVAR(ILEVEL),%ebx jne 3f INTRFASTEXIT 3: sti @@ -891,9 +896,12 @@ int $3 #endif /* DDB */ movl %ebx,CPUVAR(ILEVEL) - jmp 2b + jmp alltraps_checkast /* re-check ASTs */ 4: .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT\n" #endif /* DIAGNOSTIC */ +9: sti + call _C_LABEL(pmap_load) + jmp alltraps_checkast /* re-check ASTs */ #ifdef IPKDB /* LINTSTUB: Ignore */ Index: src/sys/arch/i386/include/cpu.h diff -u src/sys/arch/i386/include/cpu.h:1.112 src/sys/arch/i386/include/cpu.h:1.113 --- src/sys/arch/i386/include/cpu.h:1.112 Sun Jan 4 06:44:52 2004 +++ src/sys/arch/i386/include/cpu.h Fri Feb 20 12:35:01 2004 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.112 2004/01/04 11:44:52 jdolecek Exp $ */ +/* $NetBSD: cpu.h,v 1.113 2004/02/20 17:35:01 yamt Exp $ */ /*- * Copyright (c) 1990 The Regents of the University of California. @@ -61,6 +61,7 @@ #include /* offsetof */ struct intrsource; +struct pmap; /* * a bunch of this belongs in cpuvar.h; move it later.. @@ -91,6 +92,13 @@ int ci_fpsaving; /* save in progress */ volatile u_int32_t ci_tlb_ipi_mask; + + struct pmap *ci_pmap; /* current pmap */ + int ci_want_pmapload; /* pmap_load() is needed */ + int ci_tlbstate; /* one of TLBSTATE_ states. see below */ +#define TLBSTATE_VALID 0 /* all user tlbs are valid */ +#define TLBSTATE_LAZY 1 /* tlbs are valid but won't be kept uptodate */ +#define TLBSTATE_STALE 2 /* we might have stale user tlbs */ struct pcb *ci_curpcb; /* VA of current HW PCB */ struct pcb *ci_idle_pcb; /* VA of current PCB */ Index: src/sys/arch/i386/include/frameasm.h diff -u src/sys/arch/i386/include/frameasm.h:1.3 src/sys/arch/i386/include/frameasm.h:1.4 --- src/sys/arch/i386/include/frameasm.h:1.3 Sat Oct 4 01:57:51 2003 +++ src/sys/arch/i386/include/frameasm.h Fri Feb 20 12:35:01 2004 @@ -1,4 +1,4 @@ -/* $NetBSD: frameasm.h,v 1.3 2003/10/04 05:57:51 junyoung Exp $ */ +/* $NetBSD: frameasm.h,v 1.4 2004/02/20 17:35:01 yamt Exp $ */ #ifndef _I386_FRAMEASM_H_ #define _I386_FRAMEASM_H_ @@ -79,6 +79,15 @@ movl TF_EAX(%esp),%eax ; \ addl $(TF_PUSHSIZE+8),%esp ; \ iret + +#define DO_DEFERRED_SWITCH(reg) \ + cmpl $0, CPUVAR(WANT_PMAPLOAD) ; \ + jz 1f ; \ + call _C_LABEL(pmap_load) ; \ + 1: + +#define CHECK_DEFERRED_SWITCH(reg) \ + cmpl $0, CPUVAR(WANT_PMAPLOAD) #define CHECK_ASTPENDING(reg) movl CPUVAR(CURLWP),reg ; \ cmpl $0, reg ; \ Index: src/sys/arch/i386/include/pcb.h diff -u src/sys/arch/i386/include/pcb.h:1.36 src/sys/arch/i386/include/pcb.h:1.37 --- src/sys/arch/i386/include/pcb.h:1.36 Sun Nov 9 00:29:59 2003 +++ src/sys/arch/i386/include/pcb.h Fri Feb 20 12:35:01 2004 @@ -1,4 +1,4 @@ -/* $NetBSD: pcb.h,v 1.36 2003/11/09 05:29:59 tsutsui Exp $ */ +/* $NetBSD: pcb.h,v 1.37 2004/02/20 17:35:01 yamt Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -108,7 +108,6 @@ int vm86_eflags; /* virtual eflags for vm86 mode */ int vm86_flagmask; /* flag mask for vm86 mode */ void *vm86_userp; /* XXX performance hack */ - struct pmap *pcb_pmap; /* back pointer to our pmap */ struct cpu_info *pcb_fpcpu; /* cpu holding our fp state. */ u_long pcb_iomap[NIOPORTS/32]; /* I/O bitmap */ }; Index: src/sys/arch/i386/include/pmap.h diff -u src/sys/arch/i386/include/pmap.h:1.78 src/sys/arch/i386/include/pmap.h:1.79 --- src/sys/arch/i386/include/pmap.h:1.78 Mon Oct 27 08:44:20 2003 +++ src/sys/arch/i386/include/pmap.h Fri Feb 20 12:35:01 2004 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.h,v 1.78 2003/10/27 13:44:20 junyoung Exp $ */ +/* $NetBSD: pmap.h,v 1.79 2004/02/20 17:35:01 yamt Exp $ */ /* * @@ -342,12 +342,14 @@ void pmap_bootstrap(vaddr_t); boolean_t pmap_clear_attrs(struct vm_page *, int); void pmap_deactivate(struct lwp *); +void pmap_deactivate2(struct lwp *); void pmap_page_remove (struct vm_page *); void pmap_remove(struct pmap *, vaddr_t, vaddr_t); boolean_t pmap_test_attrs(struct vm_page *, int); void pmap_write_protect(struct pmap *, vaddr_t, vaddr_t, vm_prot_t); int pmap_exec_fixup(struct vm_map *, struct trapframe *, struct pcb *); +void pmap_load(void); vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */