Index: amd64/amd64/cpu_switch.S =================================================================== RCS file: /home/ncvs/src/sys/amd64/amd64/cpu_switch.S,v retrieving revision 1.160 diff -u -r1.160 cpu_switch.S --- amd64/amd64/cpu_switch.S 22 Aug 2007 05:06:14 -0000 1.160 +++ amd64/amd64/cpu_switch.S 15 Mar 2008 01:16:39 -0000 @@ -69,16 +69,19 @@ * %rsi = newtd */ ENTRY(cpu_throw) + movq TD_PCB(%rdi),%r8 /* Old pcb */ movl PCPU(CPUID), %eax - testq %rdi,%rdi /* no thread? */ - jz 1f + movq PCB_FSBASE(%r8),%r9 + movq PCB_FSBASE(%r8),%r10 /* release bit from old pm_active */ movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */ movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */ LK btrl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ -1: - movq TD_PCB(%rsi),%rdx /* newtd->td_proc */ - movq PCB_CR3(%rdx),%rdx + movq TD_PCB(%rsi),%r8 /* newtd->td_proc */ + prefetch (%r8) + prefetch 32(%r8) + prefetch 64(%r8) + movq PCB_CR3(%r8),%rdx movq %rdx,%cr3 /* new address space */ jmp swact END(cpu_throw) @@ -96,44 +99,29 @@ /* Switch to new thread. First, save context. */ movq TD_PCB(%rdi),%r8 + prefetchw (%r8) + prefetchw 32(%r8) + prefetchw 64(%r8) + movq (%rsp),%rax /* Hardware registers */ - movq %rax,PCB_RIP(%r8) - movq %rbx,PCB_RBX(%r8) - movq %rsp,PCB_RSP(%r8) - movq %rbp,PCB_RBP(%r8) - movq %r12,PCB_R12(%r8) - movq %r13,PCB_R13(%r8) - movq %r14,PCB_R14(%r8) movq %r15,PCB_R15(%r8) + movq %r14,PCB_R14(%r8) + movq %r13,PCB_R13(%r8) + movq %r12,PCB_R12(%r8) + movq %rbp,PCB_RBP(%r8) + movq %rsp,PCB_RSP(%r8) + movq %rbx,PCB_RBX(%r8) + movq %rax,PCB_RIP(%r8) + movq PCB_FSBASE(%r8),%r9 + movq PCB_FSBASE(%r8),%r10 testl $PCB_32BIT,PCB_FLAGS(%r8) - jz 1f /* no, skip over */ - - /* Save userland %gs */ - movl %gs,PCB_GS(%r8) - movq PCB_GS32P(%r8),%rax - movq (%rax),%rax - movq %rax,PCB_GS32SD(%r8) + jnz store_gs /* static predict not taken */ +done_store_gs: -1: - /* Test if debug registers should be saved. */ testl $PCB_DBREGS,PCB_FLAGS(%r8) - jz 1f /* no, skip over */ - movq %dr7,%rax /* yes, do the save */ - movq %rax,PCB_DR7(%r8) - andq $0x0000fc00, %rax /* disable all watchpoints */ - movq %rax,%dr7 - movq %dr6,%rax - movq %rax,PCB_DR6(%r8) - movq %dr3,%rax - movq %rax,PCB_DR3(%r8) - movq %dr2,%rax - movq %rax,PCB_DR2(%r8) - movq %dr1,%rax - movq %rax,PCB_DR1(%r8) - movq %dr0,%rax - movq %rax,PCB_DR0(%r8) -1: + jnz store_dr /* static predict not taken */ +done_store_dr: /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) @@ -150,6 +138,9 @@ /* Save is done. Now fire up new thread. Leave old vmspace. */ movq TD_PCB(%rsi),%r8 + prefetch (%r8) + prefetch 32(%r8) + prefetch 64(%r8) /* switch address space */ movq PCB_CR3(%r8),%rcx @@ -181,82 +172,150 @@ cmpq %rcx, %rdx pause je 1b - lfence #endif /* * At this point, we've switched address spaces and are ready * to load up the rest of the next context. */ - movq TD_PCB(%rsi),%r8 + /* Skip loading user fsbase/gsbase for kthreads */ + testl $TDP_KTHREAD,TD_PFLAGS(%rsi) + jz normal_proc + + /* Copy old fs/gsbase to new kthread pcb for future switches */ + movq %r9,PCB_FSBASE(%r8) + movq %r10,PCB_GSBASE(%r8) + jmp do_tss + +normal_proc: + cmpq PCB_FSBASE(%r8),%r9 + jz 1f /* Restore userland %fs */ movl $MSR_FSBASE,%ecx movl PCB_FSBASE(%r8),%eax movl PCB_FSBASE+4(%r8),%edx wrmsr +1: + cmpq PCB_GSBASE(%r8),%r10 + jz 2f /* Restore userland %gs */ movl $MSR_KGSBASE,%ecx movl PCB_GSBASE(%r8),%eax movl PCB_GSBASE+4(%r8),%edx wrmsr +2: +do_tss: /* Update the TSS_RSP0 pointer for the next interrupt */ movq PCPU(TSSP), %rax - addq $COMMON_TSS_RSP0, %rax - leaq -16(%r8), %rbx - movq %rbx, (%rax) - movq %rbx, PCPU(RSP0) - + movq %r8, PCPU(RSP0) movq %r8, PCPU(CURPCB) + addq $COMMON_TSS_RSP0, %rax movq %rsi, PCPU(CURTHREAD) /* into next thread */ + movq %r8, (%rax) + + /* Test if debug registers should be restored. */ + testl $PCB_DBREGS,PCB_FLAGS(%r8) + jnz load_dr /* static predict not taken */ +done_load_dr: testl $PCB_32BIT,PCB_FLAGS(%r8) - jz 1f /* no, skip over */ + jnz load_gs /* static predict not taken */ +done_load_gs: + /* Restore context. */ + movq PCB_R15(%r8),%r15 + movq PCB_R14(%r8),%r14 + movq PCB_R13(%r8),%r13 + movq PCB_R12(%r8),%r12 + movq PCB_RBP(%r8),%rbp + movq PCB_RSP(%r8),%rsp + movq PCB_RBX(%r8),%rbx + movq PCB_RIP(%r8),%rax + movq %rax,(%rsp) + ret + + /* + * We order these strangely for several reasons. + * 1: I wanted to use static branch prediction hints + * 2: Most athlon64/opteron cpus don't have them. They define + * a forward branch as 'predict not taken'. Intel cores have + * the 'rep' prefix to invert this. + * So, to make it work on both forms of cpu we do the detour. + * We use jumps rather than call in order to avoid the stack. + */ + +#if 0 +do_kthread: + /* + * Copy old fsbase/gsbase to kthread to maintain the invariant + * that curpcb->pcb_[fg]sbase is a cache of the current cpu settings. + */ + movq %r9,PCB_FSBASE(%r8) + movq %r10,PCB_GSBASE(%r8) + jmp do_tss +#endif + + .p2align 4,0x90 +store_gs: + movl %gs,PCB_GS(%r8) + movq PCB_GS32P(%r8),%rax + movq (%rax),%rax + movq %rax,PCB_GS32SD(%r8) + jmp done_store_gs + + .p2align 4,0x90 +load_gs: /* Restore userland %gs while preserving kernel gsbase */ movq PCB_GS32P(%r8),%rax - movq PCB_GS32SD(%r8),%rbx - movq %rbx,(%rax) + movq PCB_GS32SD(%r8),%rcx + movq %rcx,(%rax) movl $MSR_GSBASE,%ecx rdmsr movl PCB_GS(%r8),%gs wrmsr + jmp done_load_gs -1: - /* Restore context. */ - movq PCB_RBX(%r8),%rbx - movq PCB_RSP(%r8),%rsp - movq PCB_RBP(%r8),%rbp - movq PCB_R12(%r8),%r12 - movq PCB_R13(%r8),%r13 - movq PCB_R14(%r8),%r14 - movq PCB_R15(%r8),%r15 - movq PCB_RIP(%r8),%rax - movq %rax,(%rsp) + .p2align 4,0x90 +store_dr: + movq %dr7,%rax /* yes, do the save */ + movq %dr0,%r15 + movq %dr1,%r14 + movq %dr2,%r13 + movq %dr3,%r12 + movq %dr6,%r11 + andq $0x0000fc00, %rax /* disable all watchpoints */ + movq %r15,PCB_DR0(%r8) + movq %r14,PCB_DR1(%r8) + movq %r13,PCB_DR2(%r8) + movq %r12,PCB_DR3(%r8) + movq %r11,PCB_DR6(%r8) + movq %rax,PCB_DR7(%r8) + movq %rax,%dr7 + jmp done_store_dr - /* Test if debug registers should be restored. */ - testl $PCB_DBREGS,PCB_FLAGS(%r8) - jz 1f - movq PCB_DR6(%r8),%rax - movq %rax,%dr6 - movq PCB_DR3(%r8),%rax - movq %rax,%dr3 - movq PCB_DR2(%r8),%rax - movq %rax,%dr2 - movq PCB_DR1(%r8),%rax - movq %rax,%dr1 - movq PCB_DR0(%r8),%rax - movq %rax,%dr0 - /* But preserve reserved bits in %dr7 */ + .p2align 4,0x90 +load_dr: movq %dr7,%rax - andq $0x0000fc00,%rax + movq PCB_DR0(%r8),%r15 + movq PCB_DR1(%r8),%r14 + movq PCB_DR2(%r8),%r13 + movq PCB_DR3(%r8),%r12 + movq PCB_DR6(%r8),%r11 movq PCB_DR7(%r8),%rcx + movq %r15,%dr0 + movq %r14,%dr1 + /* Preserve reserved bits in %dr7 */ + andq $0x0000fc00,%rax andq $~0x0000fc00,%rcx + movq %r13,%dr2 + movq %r12,%dr3 orq %rcx,%rax + movq %r11,%dr6 movq %rax,%dr7 -1: - ret + jmp done_load_dr + END(cpu_switch) /* Index: amd64/amd64/genassym.c =================================================================== RCS file: /home/ncvs/src/sys/amd64/amd64/genassym.c,v retrieving revision 1.166 diff -u -r1.166 genassym.c --- amd64/amd64/genassym.c 23 Nov 2007 03:03:29 -0000 1.166 +++ amd64/amd64/genassym.c 15 Mar 2008 01:16:39 -0000 @@ -86,6 +86,7 @@ ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); ASSYM(TDP_CALLCHAIN, TDP_CALLCHAIN); +ASSYM(TDP_KTHREAD, TDP_KTHREAD); ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap)); ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall)); Index: amd64/ia32/ia32_signal.c =================================================================== RCS file: /home/ncvs/src/sys/amd64/ia32/ia32_signal.c,v retrieving revision 1.16 diff -u -r1.16 ia32_signal.c --- amd64/ia32/ia32_signal.c 13 Mar 2008 10:54:37 -0000 1.16 +++ amd64/ia32/ia32_signal.c 15 Mar 2008 01:16:39 -0000 @@ -715,10 +715,12 @@ struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; + critical_enter(); wrmsr(MSR_FSBASE, 0); wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; + critical_exit(); load_ds(_udatasel); load_es(_udatasel); load_fs(_udatasel); Index: amd64/include/pcb.h =================================================================== RCS file: /home/ncvs/src/sys/amd64/include/pcb.h,v retrieving revision 1.63 diff -u -r1.63 pcb.h --- amd64/include/pcb.h 30 Mar 2007 00:06:21 -0000 1.63 +++ amd64/include/pcb.h 15 Mar 2008 01:16:39 -0000 @@ -44,17 +44,21 @@ #include struct pcb { - register_t pcb_cr3; + register_t pcb_cr3; /* 0 */ register_t pcb_r15; register_t pcb_r14; register_t pcb_r13; - register_t pcb_r12; + + register_t pcb_r12; /* 32 */ register_t pcb_rbp; register_t pcb_rsp; register_t pcb_rbx; - register_t pcb_rip; + + register_t pcb_rip; /* 64 */ register_t pcb_fsbase; register_t pcb_gsbase; + u_long pcb_flags; + u_int32_t pcb_ds; u_int32_t pcb_es; u_int32_t pcb_fs; @@ -67,7 +71,6 @@ u_int64_t pcb_dr7; struct savefpu pcb_save; - u_long pcb_flags; #define PCB_DBREGS 0x02 /* process using debug registers */ #define PCB_FPUINITDONE 0x08 /* fpu state is initialized */ #define PCB_32BIT 0x40 /* process has 32 bit context (segs etc) */ Index: kern/sched_4bsd.c =================================================================== RCS file: /home/ncvs/src/sys/kern/sched_4bsd.c,v retrieving revision 1.117 diff -u -r1.117 sched_4bsd.c --- kern/sched_4bsd.c 12 Mar 2008 10:11:59 -0000 1.117 +++ kern/sched_4bsd.c 15 Mar 2008 01:16:40 -0000 @@ -1400,6 +1400,8 @@ KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count")); PCPU_SET(switchtime, cpu_ticks()); PCPU_SET(switchticks, ticks); + if (td == NULL) + td = PCPU_GET(idlethread); cpu_throw(td, choosethread()); /* doesn't return */ } Index: kern/sched_ule.c =================================================================== RCS file: /home/ncvs/src/sys/kern/sched_ule.c,v retrieving revision 1.233 diff -u -r1.233 sched_ule.c --- kern/sched_ule.c 14 Mar 2008 15:22:38 -0000 1.233 +++ kern/sched_ule.c 15 Mar 2008 01:16:40 -0000 @@ -2511,6 +2511,8 @@ TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd; PCPU_SET(switchtime, cpu_ticks()); PCPU_SET(switchticks, ticks); + if (td == NULL) + td = PCPU_GET(idlethread); cpu_throw(td, newtd); /* doesn't return */ }