Index: amd64/amd64/cpu_switch.S =================================================================== RCS file: /home/ncvs/src/sys/amd64/amd64/cpu_switch.S,v retrieving revision 1.160 diff -u -p -r1.160 cpu_switch.S --- amd64/amd64/cpu_switch.S 22 Aug 2007 05:06:14 -0000 1.160 +++ amd64/amd64/cpu_switch.S 11 Mar 2008 23:01:30 -0000 @@ -45,59 +45,18 @@ .text -#ifdef SMP -#define LK lock ; -#else -#define LK -#endif - #if defined(SCHED_ULE) && defined(SMP) #define SETLK xchgq #else #define SETLK movq #endif -/* - * cpu_throw() - * - * This is the second half of cpu_switch(). It is used when the current - * thread is either a dummy or slated to die, and we no longer care - * about its state. This is only a slight optimization and is probably - * not worth it anymore. Note that we need to clear the pm_active bits so - * we do need the old proc if it still exists. - * %rdi = oldtd - * %rsi = newtd - */ -ENTRY(cpu_throw) - movl PCPU(CPUID), %eax - testq %rdi,%rdi /* no thread? */ - jz 1f - /* release bit from old pm_active */ - movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */ - movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */ - LK btrl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ -1: - movq TD_PCB(%rsi),%rdx /* newtd->td_proc */ - movq PCB_CR3(%rdx),%rdx - movq %rdx,%cr3 /* new address space */ - jmp swact -END(cpu_throw) - -/* - * cpu_switch(old, new, mtx) - * - * Save the current thread state, then select the next thread to run - * and load its state. - * %rdi = oldtd - * %rsi = newtd - * %rdx = mtx - */ -ENTRY(cpu_switch) - /* Switch to new thread. First, save context. */ +ENTRY(cpu_switchout) movq TD_PCB(%rdi),%r8 - - movq (%rsp),%rax /* Hardware registers */ + /* Fetch and store the return address. */ + movq (%rsp),%rax movq %rax,PCB_RIP(%r8) + /* Save callee saved registers. */ movq %rbx,PCB_RBX(%r8) movq %rsp,PCB_RSP(%r8) movq %rbp,PCB_RBP(%r8) @@ -105,159 +64,51 @@ ENTRY(cpu_switch) movq %r13,PCB_R13(%r8) movq %r14,PCB_R14(%r8) movq %r15,PCB_R15(%r8) + xorq %rax,%rax + ret +END(cpu_switchout) - testl $PCB_32BIT,PCB_FLAGS(%r8) - jz 1f /* no, skip over */ - - /* Save userland %gs */ - movl %gs,PCB_GS(%r8) - movq PCB_GS32P(%r8),%rax - movq (%rax),%rax - movq %rax,PCB_GS32SD(%r8) - -1: - /* Test if debug registers should be saved. */ - testl $PCB_DBREGS,PCB_FLAGS(%r8) - jz 1f /* no, skip over */ - movq %dr7,%rax /* yes, do the save */ - movq %rax,PCB_DR7(%r8) - andq $0x0000fc00, %rax /* disable all watchpoints */ - movq %rax,%dr7 - movq %dr6,%rax - movq %rax,PCB_DR6(%r8) - movq %dr3,%rax - movq %rax,PCB_DR3(%r8) - movq %dr2,%rax - movq %rax,PCB_DR2(%r8) - movq %dr1,%rax - movq %rax,PCB_DR1(%r8) - movq %dr0,%rax - movq %rax,PCB_DR0(%r8) -1: - - /* have we used fp, and need a save? */ - cmpq %rdi,PCPU(FPCURTHREAD) - jne 1f - addq $PCB_SAVEFPU,%r8 - clts - fxsave (%r8) - smsw %ax - orb $CR0_TS,%al - lmsw %ax - xorl %eax,%eax - movq %rax,PCPU(FPCURTHREAD) +/* + * cpu_switchin(old, new, mtx) + * + * Release the old thread by reseting the lock pointer and then setup the + * stack for the new thread to return. + * from. + * %rdi = oldtd + * %rsi = newtd + * %rdx = mtx + */ +ENTRY(cpu_switchin) + testq %rdx,%rdx + jz 1f + SETLK %rdx,TD_LOCK(%rdi) /* Release the old thread */ 1: - - /* Save is done. Now fire up new thread. Leave old vmspace. */ - movq TD_PCB(%rsi),%r8 - - /* switch address space */ - movq PCB_CR3(%r8),%rcx - movq %cr3,%rax - cmpq %rcx,%rax /* Same address space? */ - jne swinact - SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ - jmp sw1 -swinact: - movq %rcx,%cr3 /* new address space */ - movl PCPU(CPUID), %eax - /* Release bit from old pmap->pm_active */ - movq TD_PROC(%rdi), %rcx /* oldproc */ - movq P_VMSPACE(%rcx), %rcx - LK btrl %eax, VM_PMAP+PM_ACTIVE(%rcx) /* clear old */ - SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ -swact: - /* Set bit in new pmap->pm_active */ - movq TD_PROC(%rsi),%rdx /* newproc */ - movq P_VMSPACE(%rdx), %rdx - LK btsl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ - -sw1: #if defined(SCHED_ULE) && defined(SMP) /* Wait for the new thread to become unblocked */ - movq $blocked_lock, %rdx + movq $blocked_lock,%rdx 1: movq TD_LOCK(%rsi),%rcx - cmpq %rcx, %rdx + cmpq %rcx,%rdx pause je 1b - lfence #endif - /* - * At this point, we've switched address spaces and are ready - * to load up the rest of the next context. - */ - movq TD_PCB(%rsi),%r8 - - /* Restore userland %fs */ - movl $MSR_FSBASE,%ecx - movl PCB_FSBASE(%r8),%eax - movl PCB_FSBASE+4(%r8),%edx - wrmsr - - /* Restore userland %gs */ - movl $MSR_KGSBASE,%ecx - movl PCB_GSBASE(%r8),%eax - movl PCB_GSBASE+4(%r8),%edx - wrmsr - - /* Update the TSS_RSP0 pointer for the next interrupt */ - movq PCPU(TSSP), %rax - addq $COMMON_TSS_RSP0, %rax - leaq -16(%r8), %rbx - movq %rbx, (%rax) - movq %rbx, PCPU(RSP0) - - movq %r8, PCPU(CURPCB) - movq %rsi, PCPU(CURTHREAD) /* into next thread */ - - testl $PCB_32BIT,PCB_FLAGS(%r8) - jz 1f /* no, skip over */ - - /* Restore userland %gs while preserving kernel gsbase */ - movq PCB_GS32P(%r8),%rax - movq PCB_GS32SD(%r8),%rbx - movq %rbx,(%rax) - movl $MSR_GSBASE,%ecx - rdmsr - movl PCB_GS(%r8),%gs - wrmsr - -1: /* Restore context. */ + movq TD_PCB(%rsi),%r8 + /* Load callee saved registers. */ movq PCB_RBX(%r8),%rbx - movq PCB_RSP(%r8),%rsp movq PCB_RBP(%r8),%rbp + movq PCB_RSP(%r8),%rsp movq PCB_R12(%r8),%r12 movq PCB_R13(%r8),%r13 movq PCB_R14(%r8),%r14 movq PCB_R15(%r8),%r15 + /* Restore return address and return. */ movq PCB_RIP(%r8),%rax movq %rax,(%rsp) - - /* Test if debug registers should be restored. */ - testl $PCB_DBREGS,PCB_FLAGS(%r8) - jz 1f - movq PCB_DR6(%r8),%rax - movq %rax,%dr6 - movq PCB_DR3(%r8),%rax - movq %rax,%dr3 - movq PCB_DR2(%r8),%rax - movq %rax,%dr2 - movq PCB_DR1(%r8),%rax - movq %rax,%dr1 - movq PCB_DR0(%r8),%rax - movq %rax,%dr0 - /* But preserve reserved bits in %dr7 */ - movq %dr7,%rax - andq $0x0000fc00,%rax - movq PCB_DR7(%r8),%rcx - andq $~0x0000fc00,%rcx - orq %rcx,%rax - movq %rax,%dr7 -1: + movq $1, %rax ret -END(cpu_switch) + +END(cpu_switch_hard) /* * savectx(pcb) Index: amd64/amd64/machdep.c =================================================================== RCS file: /home/ncvs/src/sys/amd64/amd64/machdep.c,v retrieving revision 1.680 diff -u -p -r1.680 machdep.c --- amd64/amd64/machdep.c 25 Dec 2007 17:51:55 -0000 1.680 +++ amd64/amd64/machdep.c 11 Mar 2008 23:01:30 -0000 @@ -1890,6 +1890,160 @@ user_dbreg_trap(void) return 0; } +int cpu_switchout(struct thread *); +void cpu_switchin(struct thread *, struct thread *, struct mtx *) __dead2; +void cpu_switchout_pcb(struct pcb *pcb); +void cpu_switchin_pcb(struct pcb *pcb); + +static void +cpu_store_gs(struct pcb *pcb) +{ + pcb->pcb_gs = rgs(); + pcb->pcb_gs32sd = *pcb->pcb_gs32p; +} + +static void +cpu_load_gs(struct pcb *pcb) +{ + register u_int32_t gsbase __asm("ecx"); + + *pcb->pcb_gs32p = pcb->pcb_gs32sd; + /* + * Restore userland %gs while preserving kernel gsbase + */ + gsbase = MSR_GSBASE; + __asm __volatile("rdmsr; movl %0,%%gs; wrmsr" + : : "rm" (pcb->pcb_gs), "c" (gsbase) : "eax", "edx"); +} + +static void +cpu_store_dbregs(struct pcb *pcb) +{ + pcb->pcb_dr7 = rdr7(); + /* disable all watchpoints */ + load_dr7(pcb->pcb_dr7 & 0x0000fc00); + pcb->pcb_dr6 = rdr6(); + pcb->pcb_dr3 = rdr3(); + pcb->pcb_dr2 = rdr2(); + pcb->pcb_dr1 = rdr1(); + pcb->pcb_dr0 = rdr0(); +} + +static void +cpu_load_dbregs(struct pcb *pcb) +{ + load_dr6(pcb->pcb_dr6); + load_dr3(pcb->pcb_dr3); + load_dr2(pcb->pcb_dr2); + load_dr1(pcb->pcb_dr1); + load_dr0(pcb->pcb_dr0); + /* Preserve reserved bits in dr7 */ + load_dr7((rdr7() & 0x0000fc00) | (pcb->pcb_dr7 & ~0x000fc00)); +} + +void +cpu_switchout_pcb(struct pcb *pcb) +{ + + if (pcb->pcb_flags & PCB_32BIT) + cpu_store_gs(pcb); + if (pcb->pcb_flags & PCB_DBREGS) + cpu_store_dbregs(pcb); +} + +void +cpu_switchin_pcb(struct pcb *pcb) +{ + + if (pcb->pcb_flags & PCB_32BIT) + cpu_load_gs(pcb); + if (pcb->pcb_flags & PCB_DBREGS) + cpu_load_dbregs(pcb); +} + +void +cpu_throw(struct thread *old, struct thread *new) +{ + cpu_switch(old, new, NULL); + for (;;); +} + +void +cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx) +{ + struct pcpu *pcpu; + struct pcb *pcb; + + /* + * Save old register state. Return value is 1 if we're coming + * back via cpu_switchin. + */ + pcpu = PCPU_GET(prvspace); + if (__predict_true(old != NULL)) { + pcb = old->td_pcb; + if (__predict_false(old == pcpu->pc_fpcurthread)) + fpuexit(old); + if (__predict_false(pcb->pcb_flags & (PCB_32BIT | PCB_DBREGS))) + cpu_switchout_pcb(pcb); + if (cpu_switchout(old)) + return; + } + pcb = new->td_pcb; + /* + * Leave old vmspace and enter the new one. + */ + if (__predict_true(rcr3() != pcb->pcb_cr3)) { + /* + * Release the old pmap. We're safe from the root page + * table from being freed because the thread is still + * locked. + */ + if (__predict_true(old != NULL)) + atomic_clear_rel_int((volatile int *) + &old->td_proc->p_vmspace->vm_pmap.pm_active, + pcpu->pc_cpumask); + /* + * Switch to the new page tables and mark this pmap as in + * use here. + */ + load_cr3(pcb->pcb_cr3); + atomic_set_rel_int((volatile int *) + &new->td_proc->p_vmspace->vm_pmap.pm_active, + pcpu->pc_cpumask); + } + /* + * Restore segment registers for userland if they've changed. + */ + if (__predict_true((new->td_pflags & TDP_KTHREAD) == 0)) { + /* Restore userland fs */ + if (__predict_false(old == NULL || + old->td_pcb->pcb_fsbase != pcb->pcb_fsbase)) + wrmsr(MSR_FSBASE, pcb->pcb_fsbase); + /* Restore userland %gs */ + if (__predict_false(old == NULL || + old->td_pcb->pcb_gsbase != pcb->pcb_gsbase)) + wrmsr(MSR_KGSBASE, pcb->pcb_gsbase); + } + /* + * Update the TSS_RSP0 pointer for the next interrupt + */ + pcpu->pc_rsp0 = pcpu->pc_tssp->tss_rsp0 = (uintptr_t)pcb; + /* + * At this point, we've switched address spaces and are ready + * to load up the rest of the next context. + */ + pcpu->pc_curpcb = pcb; + pcpu->pc_curthread = new; + /* + * Call cpu_switchin to release the old thread, lock the new thread, + * and load the context. + */ + if (__predict_false(pcb->pcb_flags & (PCB_32BIT | PCB_DBREGS))) + cpu_switchin_pcb(pcb); + cpu_switchin(old, new, mtx); + /* Returns via cpu_switchout() */ +} + #ifdef KDB /*