diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c index d53d8bb..937b914 100644 --- a/sys/amd64/acpica/acpi_wakeup.c +++ b/sys/amd64/acpica/acpi_wakeup.c @@ -245,7 +245,7 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state) cr3 = rcr3(); load_cr3(KPML4phys); - stopfpu = &stopxpcbs[0]->xpcb_pcb.pcb_save; + stopfpu = stopxpcbs[0]->xpcb_pcb.pcb_save; if (acpi_savecpu(stopxpcbs[0])) { fpugetregs(curthread, stopfpu); diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index 364875e..c71bcd0 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -116,7 +116,7 @@ done_store_dr: /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) jne 1f - addq $PCB_SAVEFPU,%r8 + movq PCB_SAVEFPU(%r8),%r8 clts fxsave (%r8) smsw %ax @@ -341,7 +341,7 @@ ENTRY(savectx) je 1f movq TD_PCB(%rax),%rdi - leaq PCB_SAVEFPU(%rdi),%rdi + movq PCB_SAVEFPU(%rdi),%rdi clts fxsave (%rdi) smsw %ax @@ -349,7 +349,7 @@ ENTRY(savectx) lmsw %ax movq $PCB_SAVEFPU_SIZE,%rdx /* arg 3 */ - leaq PCB_SAVEFPU(%rcx),%rsi /* arg 2 */ + movq PCB_SAVEFPU(%rcx),%rsi /* arg 2 */ /* arg 1 (%rdi) already loaded */ call bcopy 1: diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 10cb6c2..3e99b08 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -91,8 +91,8 @@ void stop_emulating(void); #endif /* __GNUCLIKE_ASM && !lint */ -#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_cw) -#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_sw) +#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw) +#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw) typedef u_char bool_t; @@ -146,7 +146,7 @@ fpuexit(struct thread *td) savecrit = intr_disable(); if (curthread == PCPU_GET(fpcurthread)) { stop_emulating(); - fxsave(&PCPU_GET(curpcb)->pcb_save); + fxsave(PCPU_GET(curpcb)->pcb_save); start_emulating(); PCPU_SET(fpcurthread, 0); } @@ -424,8 +424,10 @@ fpudna(void) if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(&pcb->pcb_initial_fpucw); pcb->pcb_flags |= PCB_FPUINITDONE; + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_USERFPUINITDONE; } else - fxrstor(&pcb->pcb_save); + fxrstor(pcb->pcb_save); intr_restore(s); } @@ -449,13 +451,39 @@ fpudrop() * It returns the FPU ownership status. */ int +fpugetuserregs(struct thread *td, struct savefpu *addr) +{ + register_t s; + struct pcb *pcb; + + pcb = td->td_pcb; + if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { + bcopy(&fpu_initialstate, addr, sizeof(fpu_initialstate)); + addr->sv_env.en_cw = pcb->pcb_initial_fpucw; + return (_MC_FPOWNED_NONE); + } + s = intr_disable(); + if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { + fxsave(addr); + intr_restore(s); + return (_MC_FPOWNED_FPU); + } else { + intr_restore(s); + bcopy(&pcb->pcb_user_save, addr, sizeof(*addr)); + return (_MC_FPOWNED_PCB); + } +} + +int fpugetregs(struct thread *td, struct savefpu *addr) { register_t s; + struct pcb *pcb; - if ((td->td_pcb->pcb_flags & PCB_FPUINITDONE) == 0) { + pcb = td->td_pcb; + if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { bcopy(&fpu_initialstate, addr, sizeof(fpu_initialstate)); - addr->sv_env.en_cw = td->td_pcb->pcb_initial_fpucw; + addr->sv_env.en_cw = pcb->pcb_initial_fpucw; return (_MC_FPOWNED_NONE); } s = intr_disable(); @@ -465,7 +493,7 @@ fpugetregs(struct thread *td, struct savefpu *addr) return (_MC_FPOWNED_FPU); } else { intr_restore(s); - bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr)); + bcopy(pcb->pcb_save, addr, sizeof(*addr)); return (_MC_FPOWNED_PCB); } } @@ -474,19 +502,42 @@ fpugetregs(struct thread *td, struct savefpu *addr) * Set the state of the FPU. */ void +fpusetuserregs(struct thread *td, struct savefpu *addr) +{ + register_t s; + struct pcb *pcb; + + pcb = td->td_pcb; + s = intr_disable(); + if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { + fxrstor(addr); + intr_restore(s); + pcb->pcb_flags |= PCB_FPUINITDONE | PCB_USERFPUINITDONE; + } else { + intr_restore(s); + bcopy(addr, &td->td_pcb->pcb_user_save, sizeof(*addr)); + pcb->pcb_flags |= PCB_USERFPUINITDONE; + } +} + +void fpusetregs(struct thread *td, struct savefpu *addr) { register_t s; + struct pcb *pcb; + pcb = td->td_pcb; s = intr_disable(); if (td == PCPU_GET(fpcurthread)) { fxrstor(addr); intr_restore(s); } else { intr_restore(s); - bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr)); + bcopy(addr, td->td_pcb->pcb_save, sizeof(*addr)); + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_USERFPUINITDONE; } - curthread->td_pcb->pcb_flags |= PCB_FPUINITDONE; + pcb->pcb_flags |= PCB_FPUINITDONE; } /* @@ -575,3 +626,22 @@ static devclass_t fpupnp_devclass; DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); #endif /* DEV_ISA */ + +int +fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx) +{ + + fpuexit(td); + ctx->prev = td->td_pcb->pcb_save; + td->td_pcb->pcb_save = &ctx->hwstate; + return (0); +} + +int +fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) +{ + + fpudrop(); + td->td_pcb->pcb_save = ctx->prev; + return (0); +} diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 2318975..62e0be5 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1962,7 +1962,7 @@ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { - fill_fpregs_xmm(&td->td_pcb->pcb_save, fpregs); + fill_fpregs_xmm(&td->td_pcb->pcb_user_save, fpregs); return (0); } @@ -1971,7 +1971,7 @@ int set_fpregs(struct thread *td, struct fpreg *fpregs) { - set_fpregs_xmm(fpregs, &td->td_pcb->pcb_save); + set_fpregs_xmm(fpregs, &td->td_pcb->pcb_user_save); return (0); } @@ -2086,7 +2086,8 @@ static void get_fpcontext(struct thread *td, mcontext_t *mcp) { - mcp->mc_ownedfp = fpugetregs(td, (struct savefpu *)&mcp->mc_fpstate); + mcp->mc_ownedfp = fpugetuserregs(td, + (struct savefpu *)&mcp->mc_fpstate); mcp->mc_fpformat = fpuformat(); } @@ -2111,7 +2112,7 @@ set_fpcontext(struct thread *td, const mcontext_t *mcp) */ fpstate = (struct savefpu *)&mcp->mc_fpstate; fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; - fpusetregs(td, fpstate); + fpusetuserregs(td, fpstate); } else return (EINVAL); return (0); diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 337c028..325ffe5 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -1250,7 +1250,7 @@ cpususpend_handler(void) rf = intr_disable(); cr3 = rcr3(); - stopfpu = &stopxpcbs[cpu]->xpcb_pcb.pcb_save; + stopfpu = stopxpcbs[cpu]->xpcb_pcb.pcb_save; if (savectx2(stopxpcbs[cpu])) { fpugetregs(curthread, stopfpu); wbinvd(); diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index bd7ee63..1c42fe3 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -476,6 +476,8 @@ trap(struct trapframe *frame) case T_DNA: /* transparent fault (due to context switch "late") */ + KASSERT(PCB_USER_FPU(td->td_pcb), + ("kernel FPU ctx has leaked")); fpudna(); goto userout; @@ -500,16 +502,19 @@ trap(struct trapframe *frame) goto out; case T_DNA: + KASSERT(!PCB_USER_FPU(td->td_pcb), + ("Unregistered use of FPU in kernel")); + fpudna(); + goto out; + + case T_ARITHTRAP: /* arithmetic trap */ + case T_XMMFLT: /* SIMD floating-point exception */ + case T_FPOPFLT: /* FPU operand fetch fault */ /* - * The kernel is apparently using fpu for copying. - * XXX this should be fatal unless the kernel has - * registered such use. + * XXXKIB for now disable any FPU traps in kernel + * handler registration seems to be overkill */ - printf("fpudna in kernel mode!\n"); -#ifdef KDB - kdb_backtrace(); -#endif - fpudna(); + trap_fatal(frame, 0); goto out; case T_STKFLT: /* stack fault */ diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 3f7d76a..41d496d 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -122,7 +122,7 @@ cpu_fork(td1, p2, td2, flags) return; } - /* Ensure that p1's pcb is up to date. */ + /* Ensure that td1's pcb is up to date. */ fpuexit(td1); /* Point the pcb to the top of the stack */ @@ -133,6 +133,9 @@ cpu_fork(td1, p2, td2, flags) /* Copy p1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); + /* Properly initialize pcb_save */ + pcb2->pcb_save = &pcb2->pcb_user_save; + /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); @@ -381,7 +384,7 @@ cpu_set_upcall(struct thread *td, struct thread *td0) * values here. */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); - pcb2->pcb_flags &= ~PCB_FPUINITDONE; + pcb2->pcb_flags &= ~(PCB_FPUINITDONE | PCB_USERFPUINITDONE); pcb2->pcb_full_iret = 1; /* diff --git a/sys/amd64/ia32/ia32_reg.c b/sys/amd64/ia32/ia32_reg.c index da5190f..30fcffb 100644 --- a/sys/amd64/ia32/ia32_reg.c +++ b/sys/amd64/ia32/ia32_reg.c @@ -147,7 +147,7 @@ fill_fpregs32(struct thread *td, struct fpreg32 *regs) { struct save87 *sv_87 = (struct save87 *)regs; struct env87 *penv_87 = &sv_87->sv_env; - struct savefpu *sv_fpu = &td->td_pcb->pcb_save; + struct savefpu *sv_fpu = &td->td_pcb->pcb_user_save; struct envxmm *penv_xmm = &sv_fpu->sv_env; int i; @@ -182,7 +182,7 @@ set_fpregs32(struct thread *td, struct fpreg32 *regs) { struct save87 *sv_87 = (struct save87 *)regs; struct env87 *penv_87 = &sv_87->sv_env; - struct savefpu *sv_fpu = &td->td_pcb->pcb_save; + struct savefpu *sv_fpu = &td->td_pcb->pcb_user_save; struct envxmm *penv_xmm = &sv_fpu->sv_env; int i; diff --git a/sys/amd64/include/fpu.h b/sys/amd64/include/fpu.h index 272f94a..3ce8e9a 100644 --- a/sys/amd64/include/fpu.h +++ b/sys/amd64/include/fpu.h @@ -73,6 +73,15 @@ struct savefpu { u_char sv_pad[96]; } __aligned(16); +#ifdef _KERNEL +struct fpu_kern_ctx { + struct savefpu hwstate; + struct savefpu *prev; +}; + +#define PCB_USER_FPU(pcb) ((pcb)->pcb_save == &(pcb)->pcb_user_save) +#endif + /* * The hardware default control word for i387's and later coprocessors is * 0x37F, giving: @@ -102,9 +111,13 @@ void fpudrop(void); void fpuexit(struct thread *td); int fpuformat(void); int fpugetregs(struct thread *td, struct savefpu *addr); +int fpugetuserregs(struct thread *td, struct savefpu *addr); void fpuinit(void); void fpusetregs(struct thread *td, struct savefpu *addr); +void fpusetuserregs(struct thread *td, struct savefpu *addr); int fputrap(void); +int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx); +int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); #endif #endif /* !_MACHINE_FPU_H_ */ diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h index b26188a..344d56b 100644 --- a/sys/amd64/include/pcb.h +++ b/sys/amd64/include/pcb.h @@ -58,6 +58,7 @@ struct pcb { u_long pcb_flags; #define PCB_DBREGS 0x02 /* process using debug registers */ #define PCB_FPUINITDONE 0x08 /* fpu state is initialized */ +#define PCB_USERFPUINITDONE 0x10 /* fpu user state is initialized */ #define PCB_GS32BIT 0x20 /* linux gs switch */ #define PCB_32BIT 0x40 /* process has 32 bit context (segs etc) */ #define PCB_FULLCTX 0x80 /* full context restore on sysret */ @@ -69,7 +70,7 @@ struct pcb { u_int64_t pcb_dr6; u_int64_t pcb_dr7; - struct savefpu pcb_save; + struct savefpu pcb_user_save; uint16_t pcb_initial_fpucw; caddr_t pcb_onfault; /* copyin/out fault recovery */ @@ -78,6 +79,7 @@ struct pcb { struct user_segment_descriptor pcb_gs32sd; /* local tss, with i/o bitmap; NULL for common */ struct amd64tss *pcb_tssp; + struct savefpu *pcb_save; char pcb_full_iret; };