diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c index d53d8bb..937b914 100644 --- a/sys/amd64/acpica/acpi_wakeup.c +++ b/sys/amd64/acpica/acpi_wakeup.c @@ -245,7 +245,7 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state) cr3 = rcr3(); load_cr3(KPML4phys); - stopfpu = &stopxpcbs[0]->xpcb_pcb.pcb_save; + stopfpu = stopxpcbs[0]->xpcb_pcb.pcb_save; if (acpi_savecpu(stopxpcbs[0])) { fpugetregs(curthread, stopfpu); diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index 364875e..c71bcd0 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -116,7 +116,7 @@ done_store_dr: /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) jne 1f - addq $PCB_SAVEFPU,%r8 + movq PCB_SAVEFPU(%r8),%r8 clts fxsave (%r8) smsw %ax @@ -341,7 +341,7 @@ ENTRY(savectx) je 1f movq TD_PCB(%rax),%rdi - leaq PCB_SAVEFPU(%rdi),%rdi + movq PCB_SAVEFPU(%rdi),%rdi clts fxsave (%rdi) smsw %ax @@ -349,7 +349,7 @@ ENTRY(savectx) lmsw %ax movq $PCB_SAVEFPU_SIZE,%rdx /* arg 3 */ - leaq PCB_SAVEFPU(%rcx),%rsi /* arg 2 */ + movq PCB_SAVEFPU(%rcx),%rsi /* arg 2 */ /* arg 1 (%rdi) already loaded */ call bcopy 1: diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 10cb6c2..0ba234c 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -91,8 +91,8 @@ void stop_emulating(void); #endif /* __GNUCLIKE_ASM && !lint */ -#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_cw) -#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_sw) +#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw) +#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw) typedef u_char bool_t; @@ -146,7 +146,7 @@ fpuexit(struct thread *td) savecrit = intr_disable(); if (curthread == PCPU_GET(fpcurthread)) { stop_emulating(); - fxsave(&PCPU_GET(curpcb)->pcb_save); + fxsave(PCPU_GET(curpcb)->pcb_save); start_emulating(); PCPU_SET(fpcurthread, 0); } @@ -424,8 +424,10 @@ fpudna(void) if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(&pcb->pcb_initial_fpucw); pcb->pcb_flags |= PCB_FPUINITDONE; + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_USERFPUINITDONE; } else - fxrstor(&pcb->pcb_save); + fxrstor(pcb->pcb_save); intr_restore(s); } @@ -449,13 +451,39 @@ fpudrop() * It returns the FPU ownership status. */ int +fpugetuserregs(struct thread *td, struct savefpu *addr) +{ + register_t s; + struct pcb *pcb; + + pcb = td->td_pcb; + if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { + bcopy(&fpu_initialstate, addr, sizeof(fpu_initialstate)); + addr->sv_env.en_cw = pcb->pcb_initial_fpucw; + return (_MC_FPOWNED_NONE); + } + s = intr_disable(); + if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { + fxsave(addr); + intr_restore(s); + return (_MC_FPOWNED_FPU); + } else { + intr_restore(s); + bcopy(&pcb->pcb_user_save, addr, sizeof(*addr)); + return (_MC_FPOWNED_PCB); + } +} + +int fpugetregs(struct thread *td, struct savefpu *addr) { register_t s; + struct pcb *pcb; - if ((td->td_pcb->pcb_flags & PCB_FPUINITDONE) == 0) { + pcb = td->td_pcb; + if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { bcopy(&fpu_initialstate, addr, sizeof(fpu_initialstate)); - addr->sv_env.en_cw = td->td_pcb->pcb_initial_fpucw; + addr->sv_env.en_cw = pcb->pcb_initial_fpucw; return (_MC_FPOWNED_NONE); } s = intr_disable(); @@ -465,7 +493,7 @@ fpugetregs(struct thread *td, struct savefpu *addr) return (_MC_FPOWNED_FPU); } else { intr_restore(s); - bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr)); + bcopy(pcb->pcb_save, addr, sizeof(*addr)); return (_MC_FPOWNED_PCB); } } @@ -474,19 +502,42 @@ fpugetregs(struct thread *td, struct savefpu *addr) * Set the state of the FPU. */ void +fpusetuserregs(struct thread *td, struct savefpu *addr) +{ + register_t s; + struct pcb *pcb; + + pcb = td->td_pcb; + s = intr_disable(); + if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { + fxrstor(addr); + intr_restore(s); + pcb->pcb_flags |= PCB_FPUINITDONE | PCB_USERFPUINITDONE; + } else { + intr_restore(s); + bcopy(addr, &td->td_pcb->pcb_user_save, sizeof(*addr)); + pcb->pcb_flags |= PCB_USERFPUINITDONE; + } +} + +void fpusetregs(struct thread *td, struct savefpu *addr) { register_t s; + struct pcb *pcb; + pcb = td->td_pcb; s = intr_disable(); if (td == PCPU_GET(fpcurthread)) { fxrstor(addr); intr_restore(s); } else { intr_restore(s); - bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr)); + bcopy(addr, td->td_pcb->pcb_save, sizeof(*addr)); + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_USERFPUINITDONE; } - curthread->td_pcb->pcb_flags |= PCB_FPUINITDONE; + pcb->pcb_flags |= PCB_FPUINITDONE; } /* @@ -575,3 +626,64 @@ static devclass_t fpupnp_devclass; DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); #endif /* DEV_ISA */ + +int +fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx) +{ + struct pcb *pcb; + + pcb = td->td_pcb; + KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save, + ("mangled pcb_save")); + ctx->flags = 0; + if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0) + ctx->flags |= FPU_KERN_CTX_FPUINITDONE; + fpuexit(td); + ctx->prev = pcb->pcb_save; + pcb->pcb_save = &ctx->hwstate; + pcb->pcb_flags |= PCB_KERNFPU; + return (0); +} + +int +fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) +{ + struct pcb *pcb; + register_t savecrit; + + pcb = td->td_pcb; + savecrit = intr_disable(); + if (curthread == PCPU_GET(fpcurthread)) + fpudrop(); + intr_restore(savecrit); + pcb->pcb_save = ctx->prev; + if (pcb->pcb_save == &pcb->pcb_user_save) { + if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) + pcb->pcb_flags |= PCB_FPUINITDONE; + else + pcb->pcb_flags &= ~PCB_FPUINITDONE; + pcb->pcb_flags &= ~PCB_KERNFPU; + } else { + if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0) + pcb->pcb_flags |= PCB_FPUINITDONE; + else + pcb->pcb_flags &= ~PCB_FPUINITDONE; + KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); + } + return (0); +} + +int +fpu_kern_thread(void) +{ + struct pcb *pcb; + + pcb = PCPU_GET(curpcb); + KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, + ("Only kthread may use fpu_kern_thread")); + KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save")); + KASSERT(PCB_USER_FPU(pcb), ("recursive call")); + + pcb->pcb_flags |= PCB_KERNFPU; + return (0); +} diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 1155eaa..fc9c303 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1958,7 +1958,7 @@ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { - fill_fpregs_xmm(&td->td_pcb->pcb_save, fpregs); + fill_fpregs_xmm(&td->td_pcb->pcb_user_save, fpregs); return (0); } @@ -1967,7 +1967,7 @@ int set_fpregs(struct thread *td, struct fpreg *fpregs) { - set_fpregs_xmm(fpregs, &td->td_pcb->pcb_save); + set_fpregs_xmm(fpregs, &td->td_pcb->pcb_user_save); return (0); } @@ -2082,7 +2082,8 @@ static void get_fpcontext(struct thread *td, mcontext_t *mcp) { - mcp->mc_ownedfp = fpugetregs(td, (struct savefpu *)&mcp->mc_fpstate); + mcp->mc_ownedfp = fpugetuserregs(td, + (struct savefpu *)&mcp->mc_fpstate); mcp->mc_fpformat = fpuformat(); } @@ -2107,7 +2108,7 @@ set_fpcontext(struct thread *td, const mcontext_t *mcp) */ fpstate = (struct savefpu *)&mcp->mc_fpstate; fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; - fpusetregs(td, fpstate); + fpusetuserregs(td, fpstate); } else return (EINVAL); return (0); @@ -2118,6 +2119,7 @@ fpstate_drop(struct thread *td) { register_t s; + KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); s = intr_disable(); if (PCPU_GET(fpcurthread) == td) fpudrop(); @@ -2131,7 +2133,8 @@ fpstate_drop(struct thread *td) * sendsig() is the only caller of fpugetregs()... perhaps we just * have too many layers. */ - curthread->td_pcb->pcb_flags &= ~PCB_FPUINITDONE; + curthread->td_pcb->pcb_flags &= ~(PCB_FPUINITDONE | + PCB_USERFPUINITDONE); intr_restore(s); } diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 337c028..325ffe5 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -1250,7 +1250,7 @@ cpususpend_handler(void) rf = intr_disable(); cr3 = rcr3(); - stopfpu = &stopxpcbs[cpu]->xpcb_pcb.pcb_save; + stopfpu = stopxpcbs[cpu]->xpcb_pcb.pcb_save; if (savectx2(stopxpcbs[cpu])) { fpugetregs(curthread, stopfpu); wbinvd(); diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index bd7ee63..e564f89 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -476,6 +476,8 @@ trap(struct trapframe *frame) case T_DNA: /* transparent fault (due to context switch "late") */ + KASSERT(PCB_USER_FPU(td->td_pcb), + ("kernel FPU ctx has leaked")); fpudna(); goto userout; @@ -500,16 +502,19 @@ trap(struct trapframe *frame) goto out; case T_DNA: + KASSERT(!PCB_USER_FPU(td->td_pcb), + ("Unregistered use of FPU in kernel")); + fpudna(); + goto out; + + case T_ARITHTRAP: /* arithmetic trap */ + case T_XMMFLT: /* SIMD floating-point exception */ + case T_FPOPFLT: /* FPU operand fetch fault */ /* - * The kernel is apparently using fpu for copying. - * XXX this should be fatal unless the kernel has - * registered such use. + * XXXKIB for now disable any FPU traps in kernel + * handler registration seems to be overkill */ - printf("fpudna in kernel mode!\n"); -#ifdef KDB - kdb_backtrace(); -#endif - fpudna(); + trap_fatal(frame, 0); goto out; case T_STKFLT: /* stack fault */ @@ -686,6 +691,8 @@ trap(struct trapframe *frame) user: userret(td, frame); mtx_assert(&Giant, MA_NOTOWNED); + KASSERT(PCB_USER_FPU(td->td_pcb), + ("Return from trap with kernel FPU ctx leaked")); userout: out: return; @@ -1069,6 +1076,10 @@ syscall(struct trapframe *frame) ("System call %s returning with %d locks held", (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ? syscallnames[sa.code] : "???", td->td_locks)); + KASSERT(PCB_USER_FPU(td->td_pcb), + ("System call %s returing with kernel FPU ctx leaked", + (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ? + syscallnames[sa.code] : "???")); /* * Handle reschedule and other end-of-syscall issues diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 3f7d76a..eefccfc 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -122,7 +122,7 @@ cpu_fork(td1, p2, td2, flags) return; } - /* Ensure that p1's pcb is up to date. */ + /* Ensure that td1's pcb is up to date. */ fpuexit(td1); /* Point the pcb to the top of the stack */ @@ -130,9 +130,12 @@ cpu_fork(td1, p2, td2, flags) td2->td_kstack_pages * PAGE_SIZE) - 1; td2->td_pcb = pcb2; - /* Copy p1's pcb */ + /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); + /* Properly initialize pcb_save */ + pcb2->pcb_save = &pcb2->pcb_user_save; + /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); @@ -308,6 +311,7 @@ cpu_thread_alloc(struct thread *td) td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = (struct trapframe *)td->td_pcb - 1; + td->td_pcb->pcb_save = &td->td_pcb->pcb_user_save; } void @@ -381,7 +385,8 @@ cpu_set_upcall(struct thread *td, struct thread *td0) * values here. */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); - pcb2->pcb_flags &= ~PCB_FPUINITDONE; + pcb2->pcb_flags &= ~(PCB_FPUINITDONE | PCB_USERFPUINITDONE); + pcb2->pcb_save = &pcb2->pcb_user_save; pcb2->pcb_full_iret = 1; /* diff --git a/sys/amd64/ia32/ia32_reg.c b/sys/amd64/ia32/ia32_reg.c index da5190f..30fcffb 100644 --- a/sys/amd64/ia32/ia32_reg.c +++ b/sys/amd64/ia32/ia32_reg.c @@ -147,7 +147,7 @@ fill_fpregs32(struct thread *td, struct fpreg32 *regs) { struct save87 *sv_87 = (struct save87 *)regs; struct env87 *penv_87 = &sv_87->sv_env; - struct savefpu *sv_fpu = &td->td_pcb->pcb_save; + struct savefpu *sv_fpu = &td->td_pcb->pcb_user_save; struct envxmm *penv_xmm = &sv_fpu->sv_env; int i; @@ -182,7 +182,7 @@ set_fpregs32(struct thread *td, struct fpreg32 *regs) { struct save87 *sv_87 = (struct save87 *)regs; struct env87 *penv_87 = &sv_87->sv_env; - struct savefpu *sv_fpu = &td->td_pcb->pcb_save; + struct savefpu *sv_fpu = &td->td_pcb->pcb_user_save; struct envxmm *penv_xmm = &sv_fpu->sv_env; int i; diff --git a/sys/amd64/include/fpu.h b/sys/amd64/include/fpu.h index 272f94a..74aac4b 100644 --- a/sys/amd64/include/fpu.h +++ b/sys/amd64/include/fpu.h @@ -73,6 +73,17 @@ struct savefpu { u_char sv_pad[96]; } __aligned(16); +#ifdef _KERNEL +struct fpu_kern_ctx { + struct savefpu hwstate; + struct savefpu *prev; + uint32_t flags; +}; +#define FPU_KERN_CTX_FPUINITDONE 0x01 + +#define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNFPU) == 0) +#endif + /* * The hardware default control word for i387's and later coprocessors is * 0x37F, giving: @@ -102,9 +113,14 @@ void fpudrop(void); void fpuexit(struct thread *td); int fpuformat(void); int fpugetregs(struct thread *td, struct savefpu *addr); +int fpugetuserregs(struct thread *td, struct savefpu *addr); void fpuinit(void); void fpusetregs(struct thread *td, struct savefpu *addr); +void fpusetuserregs(struct thread *td, struct savefpu *addr); int fputrap(void); +int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx); +int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); +int fpu_kern_thread(void); #endif #endif /* !_MACHINE_FPU_H_ */ diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h index b26188a..aead8b7 100644 --- a/sys/amd64/include/pcb.h +++ b/sys/amd64/include/pcb.h @@ -57,7 +57,9 @@ struct pcb { register_t pcb_gsbase; u_long pcb_flags; #define PCB_DBREGS 0x02 /* process using debug registers */ +#define PCB_KERNFPU 0x04 /* kernel uses fpu */ #define PCB_FPUINITDONE 0x08 /* fpu state is initialized */ +#define PCB_USERFPUINITDONE 0x10 /* fpu user state is initialized */ #define PCB_GS32BIT 0x20 /* linux gs switch */ #define PCB_32BIT 0x40 /* process has 32 bit context (segs etc) */ #define PCB_FULLCTX 0x80 /* full context restore on sysret */ @@ -69,7 +71,7 @@ struct pcb { u_int64_t pcb_dr6; u_int64_t pcb_dr7; - struct savefpu pcb_save; + struct savefpu pcb_user_save; uint16_t pcb_initial_fpucw; caddr_t pcb_onfault; /* copyin/out fault recovery */ @@ -78,6 +80,7 @@ struct pcb { struct user_segment_descriptor pcb_gs32sd; /* local tss, with i/o bitmap; NULL for common */ struct amd64tss *pcb_tssp; + struct savefpu *pcb_save; char pcb_full_iret; }; diff --git a/sys/crypto/via/padlock.c b/sys/crypto/via/padlock.c index ccb0595..df09acb 100644 --- a/sys/crypto/via/padlock.c +++ b/sys/crypto/via/padlock.c @@ -169,6 +169,7 @@ padlock_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) struct padlock_softc *sc = device_get_softc(dev); struct padlock_session *ses = NULL; struct cryptoini *encini, *macini; + struct thread *td; int error; if (sidp == NULL || cri == NULL) @@ -236,7 +237,13 @@ padlock_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) } if (macini != NULL) { + td = curthread; + error = fpu_kern_enter(td, &ses->ses_fpu_ctx); + if (error != 0) + goto out; error = padlock_hash_setup(ses, macini); + fpu_kern_leave(td, &ses->ses_fpu_ctx); + out: if (error != 0) { padlock_freesession_one(sc, ses, 0); return (error); diff --git a/sys/crypto/via/padlock.h b/sys/crypto/via/padlock.h index 7d928ba..c8ee9bd 100644 --- a/sys/crypto/via/padlock.h +++ b/sys/crypto/via/padlock.h @@ -32,6 +32,12 @@ #include #include +#if defined(__i386__) +#include +#elif defined(__amd64__) +#include +#endif + union padlock_cw { uint64_t raw; struct { @@ -70,6 +76,7 @@ struct padlock_session { int ses_used; uint32_t ses_id; TAILQ_ENTRY(padlock_session) ses_next; + struct fpu_kern_ctx ses_fpu_ctx; }; #define PADLOCK_ALIGN(p) (void *)(roundup2((uintptr_t)(p), 16)) diff --git a/sys/crypto/via/padlock_cipher.c b/sys/crypto/via/padlock_cipher.c index 8195584..333cdda 100644 --- a/sys/crypto/via/padlock_cipher.c +++ b/sys/crypto/via/padlock_cipher.c @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -201,9 +202,10 @@ padlock_cipher_process(struct padlock_session *ses, struct cryptodesc *enccrd, struct cryptop *crp) { union padlock_cw *cw; + struct thread *td; u_char *buf, *abuf; uint32_t *key; - int allocated; + int allocated, error; buf = padlock_cipher_alloc(enccrd, crp, &allocated); if (buf == NULL) @@ -247,9 +249,16 @@ padlock_cipher_process(struct padlock_session *ses, struct cryptodesc *enccrd, enccrd->crd_len, abuf); } + td = curthread; + error = fpu_kern_enter(td, &ses->ses_fpu_ctx); + if (error != 0) + goto out; + padlock_cbc(abuf, abuf, enccrd->crd_len / AES_BLOCK_LEN, key, cw, ses->ses_iv); + fpu_kern_leave(td, &ses->ses_fpu_ctx); + if (allocated) { crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, enccrd->crd_len, abuf); @@ -262,9 +271,10 @@ padlock_cipher_process(struct padlock_session *ses, struct cryptodesc *enccrd, AES_BLOCK_LEN, ses->ses_iv); } + out: if (allocated) { bzero(buf, enccrd->crd_len + 16); free(buf, M_PADLOCK); } - return (0); + return (error); } diff --git a/sys/crypto/via/padlock_hash.c b/sys/crypto/via/padlock_hash.c index 71f956e..acb3e2c 100644 --- a/sys/crypto/via/padlock_hash.c +++ b/sys/crypto/via/padlock_hash.c @@ -34,12 +34,14 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #if defined(__amd64__) || (defined(__i386__) && !defined(PC98)) #include #include #include #include #endif +#include #include #include /* for hmac_ipad_buffer and hmac_opad_buffer */ @@ -363,12 +365,18 @@ int padlock_hash_process(struct padlock_session *ses, struct cryptodesc *maccrd, struct cryptop *crp) { + struct thread *td; int error; + td = curthread; + error = fpu_kern_enter(td, &ses->ses_fpu_ctx); + if (error != 0) + return (error); if ((maccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) padlock_hash_key_setup(ses, maccrd->crd_key, maccrd->crd_klen); error = padlock_authcompute(ses, maccrd, crp->crp_buf, crp->crp_flags); + fpu_kern_leave(td, &ses->ses_fpu_ctx); return (error); } diff --git a/sys/dev/random/nehemiah.c b/sys/dev/random/nehemiah.c index e34cdfa..7ecdfbf 100644 --- a/sys/dev/random/nehemiah.c +++ b/sys/dev/random/nehemiah.c @@ -35,6 +35,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include + #include #define RANDOM_BLOCK_SIZE 256 @@ -82,6 +84,8 @@ static uint8_t out[RANDOM_BLOCK_SIZE+7] __aligned(16); static union VIA_ACE_CW acw __aligned(16); +static struct fpu_kern_ctx fpu_ctx_save; + static struct mtx random_nehemiah_mtx; /* ARGSUSED */ @@ -147,6 +151,7 @@ random_nehemiah_read(void *buf, int c) uint8_t *p; mtx_lock(&random_nehemiah_mtx); + fpu_kern_enter(curthread, &fpu_ctx_save); /* Get a random AES key */ count = 0; @@ -187,6 +192,7 @@ random_nehemiah_read(void *buf, int c) c = MIN(RANDOM_BLOCK_SIZE, c); memcpy(buf, out, (size_t)c); + fpu_kern_leave(curthread, &fpu_ctx_save); mtx_unlock(&random_nehemiah_mtx); return (c); } diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 695b656..aa6147f 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -3175,12 +3175,12 @@ fill_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { - fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm, + fill_fpregs_xmm(&td->td_pcb->pcb_user_save.sv_xmm, (struct save87 *)fpregs); return (0); } #endif /* CPU_ENABLE_SSE */ - bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); + bcopy(&td->td_pcb->pcb_user_save.sv_87, fpregs, sizeof *fpregs); return (0); } @@ -3190,11 +3190,11 @@ set_fpregs(struct thread *td, struct fpreg *fpregs) #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { set_fpregs_xmm((struct save87 *)fpregs, - &td->td_pcb->pcb_save.sv_xmm); + &td->td_pcb->pcb_user_save.sv_xmm); return (0); } #endif /* CPU_ENABLE_SSE */ - bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs); + bcopy(fpregs, &td->td_pcb->pcb_user_save.sv_87, sizeof *fpregs); return (0); } @@ -3314,7 +3314,7 @@ get_fpcontext(struct thread *td, mcontext_t *mcp) addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); } - mcp->mc_ownedfp = npxgetregs(td, addr); + mcp->mc_ownedfp = npxgetuserregs(td, addr); if (addr != (union savefpu *)&mcp->mc_fpstate) { bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); @@ -3359,7 +3359,7 @@ set_fpcontext(struct thread *td, const mcontext_t *mcp) * XXX we violate the dubious requirement that npxsetregs() * be called with interrupts disabled. */ - npxsetregs(td, addr); + npxsetuserregs(td, addr); #endif /* * Don't bother putting things back where they were in the @@ -3376,6 +3376,7 @@ fpstate_drop(struct thread *td) { register_t s; + KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); s = intr_disable(); #ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) @@ -3391,7 +3392,8 @@ fpstate_drop(struct thread *td) * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ - curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; + curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE | + PCB_NPXUSERINITDONE); intr_restore(s); } diff --git a/sys/i386/i386/ptrace_machdep.c b/sys/i386/i386/ptrace_machdep.c index 409db16..4608c9b 100644 --- a/sys/i386/i386/ptrace_machdep.c +++ b/sys/i386/i386/ptrace_machdep.c @@ -51,7 +51,7 @@ cpu_ptrace(struct thread *td, int req, void *addr, int data) if (!cpu_fxsr) return (EINVAL); - fpstate = &td->td_pcb->pcb_save.sv_xmm; + fpstate = &td->td_pcb->pcb_user_save.sv_xmm; switch (req) { case PT_GETXMMREGS: error = copyout(fpstate, addr, sizeof(*fpstate)); diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index 0c07871..1dee5f2 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -156,8 +156,7 @@ ENTRY(cpu_switch) /* have we used fp, and need a save? */ cmpl %ecx,PCPU(FPCURTHREAD) jne 1f - addl $PCB_SAVEFPU,%edx /* h/w bugs make saving complicated */ - pushl %edx + pushl PCB_SAVEFPU(%edx) /* h/w bugs make saving complicated */ call npxsave /* do it in a big C function */ popl %eax 1: @@ -408,7 +407,7 @@ ENTRY(savectx) pushl %ecx movl TD_PCB(%eax),%eax - leal PCB_SAVEFPU(%eax),%eax + movl PCB_SAVEFPU(%eax),%eax pushl %eax pushl %eax call npxsave @@ -417,7 +416,7 @@ ENTRY(savectx) popl %ecx pushl $PCB_SAVEFPU_SIZE - leal PCB_SAVEFPU(%ecx),%ecx + movl PCB_SAVEFPU(%ecx),%ecx pushl %ecx pushl %eax call bcopy diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index a2274e2..fadd66b 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -503,6 +503,8 @@ trap(struct trapframe *frame) case T_DNA: #ifdef DEV_NPX + KASSERT(PCB_USER_FPU(td->td_pcb), + ("kernel FPU ctx has leaked")); /* transparent fault (due to context switch "late") */ if (npxdna()) goto userout; @@ -535,20 +537,23 @@ trap(struct trapframe *frame) case T_DNA: #ifdef DEV_NPX - /* - * The kernel is apparently using npx for copying. - * XXX this should be fatal unless the kernel has - * registered such use. - */ - printf("npxdna in kernel mode!\n"); -#ifdef KDB - kdb_backtrace(); -#endif + KASSERT(!PCB_USER_FPU(td->td_pcb), + ("Unregistered use of FPU in kernel")); if (npxdna()) goto out; #endif break; + case T_ARITHTRAP: /* arithmetic trap */ + case T_XMMFLT: /* SIMD floating-point exception */ + case T_FPOPFLT: /* FPU operand fetch fault */ + /* + * XXXKIB for now disable any FPU traps in kernel + * handler registration seems to be overkill + */ + trap_fatal(frame, 0); + goto out; + /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle @@ -754,6 +759,8 @@ trap(struct trapframe *frame) user: userret(td, frame); mtx_assert(&Giant, MA_NOTOWNED); + KASSERT(PCB_USER_FPU(td->td_pcb), + ("Return from trap with kernel FPU ctx leaked")); userout: out: return; @@ -1156,6 +1163,10 @@ syscall(struct trapframe *frame) ("System call %s returning with %d locks held", (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ? syscallnames[sa.code] : "???", td->td_locks)); + KASSERT(PCB_USER_FPU(td->td_pcb), + ("System call %s returning with kernel FPU ctx leaked", + (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ? + syscallnames[sa.code] : "???")); /* * Handle reschedule and other end-of-syscall issues diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index d2c13b8..01e7245 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -176,13 +176,13 @@ cpu_fork(td1, p2, td2, flags) return; } - /* Ensure that p1's pcb is up to date. */ + /* Ensure that td1's pcb is up to date. */ if (td1 == curthread) td1->td_pcb->pcb_gs = rgs(); #ifdef DEV_NPX savecrit = intr_disable(); if (PCPU_GET(fpcurthread) == td1) - npxsave(&td1->td_pcb->pcb_save); + npxsave(td1->td_pcb->pcb_save); intr_restore(savecrit); #endif @@ -191,9 +191,12 @@ cpu_fork(td1, p2, td2, flags) td2->td_kstack_pages * PAGE_SIZE) - 1; td2->td_pcb = pcb2; - /* Copy p1's pcb */ + /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); + /* Properly initialize pcb_save */ + pcb2->pcb_save = &pcb2->pcb_user_save; + /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); @@ -372,6 +375,7 @@ cpu_thread_alloc(struct thread *td) td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1; td->td_pcb->pcb_ext = NULL; + td->td_pcb->pcb_save = &td->td_pcb->pcb_user_save; } void @@ -437,7 +441,8 @@ cpu_set_upcall(struct thread *td, struct thread *td0) * values here. */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); - pcb2->pcb_flags &= ~(PCB_NPXTRAP|PCB_NPXINITDONE); + pcb2->pcb_flags &= ~(PCB_NPXTRAP|PCB_NPXINITDONE|PCB_NPXUSERINITDONE); + pcb2->pcb_save = &pcb2->pcb_user_save; /* * Create a new fresh stack for the new thread. diff --git a/sys/i386/include/npx.h b/sys/i386/include/npx.h index e9811b3..e1cd317 100644 --- a/sys/i386/include/npx.h +++ b/sys/i386/include/npx.h @@ -143,6 +143,15 @@ union savefpu { #define IRQ_NPX 13 +struct fpu_kern_ctx { + union savefpu hwstate; + union savefpu *prev; + uint32_t flags; +}; +#define FPU_KERN_CTX_NPXINITDONE 0x01 + +#define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNNPX) == 0) + /* full reset on some systems, NOP on others */ #define npx_full_reset() outb(IO_NPX + 1, 0) @@ -151,10 +160,15 @@ void npxdrop(void); void npxexit(struct thread *td); int npxformat(void); int npxgetregs(struct thread *td, union savefpu *addr); +int npxgetuserregs(struct thread *td, union savefpu *addr); void npxinit(void); void npxsave(union savefpu *addr); void npxsetregs(struct thread *td, union savefpu *addr); +void npxsetuserregs(struct thread *td, union savefpu *addr); int npxtrap(void); +int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx); +int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); +int fpu_kern_thread(void); #endif diff --git a/sys/i386/include/pcb.h b/sys/i386/include/pcb.h index 17c8486..465f497 100644 --- a/sys/i386/include/pcb.h +++ b/sys/i386/include/pcb.h @@ -60,7 +60,7 @@ struct pcb { int pcb_dr6; int pcb_dr7; - union savefpu pcb_save; + union savefpu pcb_user_save; uint16_t pcb_initial_npxcw; u_int pcb_flags; #define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */ @@ -68,6 +68,8 @@ struct pcb { #define PCB_NPXTRAP 0x04 /* npx trap pending */ #define PCB_NPXINITDONE 0x08 /* fpu state is initialized */ #define PCB_VM86CALL 0x10 /* in vm86 call */ +#define PCB_NPXUSERINITDONE 0x20 /* user fpu state is initialized */ +#define PCB_KERNNPX 0x40 /* kernel uses npx */ caddr_t pcb_onfault; /* copyin/out fault recovery */ int pcb_gs; @@ -76,6 +78,7 @@ struct pcb { struct pcb_ext *pcb_ext; /* optional pcb extension */ int pcb_psl; /* process status long */ u_long pcb_vm86[2]; /* vm86bios scratch space */ + union savefpu *pcb_save; }; #ifdef _KERNEL diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index 6da4b4c..0c03fd2 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -135,12 +135,12 @@ void stop_emulating(void); #ifdef CPU_ENABLE_SSE #define GET_FPU_CW(thread) \ (cpu_fxsr ? \ - (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_cw : \ - (thread)->td_pcb->pcb_save.sv_87.sv_env.en_cw) + (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \ + (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ (cpu_fxsr ? \ - (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_sw : \ - (thread)->td_pcb->pcb_save.sv_87.sv_env.en_sw) + (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \ + (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw) #define SET_FPU_CW(savefpu, value) do { \ if (cpu_fxsr) \ (savefpu)->sv_xmm.sv_env.en_cw = (value); \ @@ -149,9 +149,9 @@ void stop_emulating(void); } while (0) #else /* CPU_ENABLE_SSE */ #define GET_FPU_CW(thread) \ - (thread->td_pcb->pcb_save.sv_87.sv_env.en_cw) + (thread->td_pcb->pcb_save->sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ - (thread->td_pcb->pcb_save.sv_87.sv_env.en_sw) + (thread->td_pcb->pcb_save->sv_87.sv_env.en_sw) #define SET_FPU_CW(savefpu, value) \ (savefpu)->sv_87.sv_env.en_cw = (value) #endif /* CPU_ENABLE_SSE */ @@ -502,7 +502,7 @@ npxexit(td) savecrit = intr_disable(); if (curthread == PCPU_GET(fpcurthread)) - npxsave(&PCPU_GET(curpcb)->pcb_save); + npxsave(PCPU_GET(curpcb)->pcb_save); intr_restore(savecrit); #ifdef NPX_DEBUG if (npx_exists) { @@ -809,6 +809,8 @@ npxdna(void) if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__) fldcw(&pcb->pcb_initial_npxcw); pcb->pcb_flags |= PCB_NPXINITDONE; + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_NPXUSERINITDONE; } else { /* * The following fpurstor() may cause an IRQ13 when the @@ -824,7 +826,7 @@ npxdna(void) * fnclex if it is the first FPU instruction after a context * switch. */ - fpurstor(&pcb->pcb_save); + fpurstor(pcb->pcb_save); } intr_restore(s); @@ -895,18 +897,18 @@ npxdrop() * It returns the FPU ownership status. */ int -npxgetregs(td, addr) - struct thread *td; - union savefpu *addr; +npxgetregs(struct thread *td, union savefpu *addr) { + struct pcb *pcb; register_t s; if (!npx_exists) return (_MC_FPOWNED_NONE); - if ((td->td_pcb->pcb_flags & PCB_NPXINITDONE) == 0) { + pcb = td->td_pcb; + if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { bcopy(&npx_initialstate, addr, sizeof(npx_initialstate)); - SET_FPU_CW(addr, td->td_pcb->pcb_initial_npxcw); + SET_FPU_CW(addr, pcb->pcb_initial_npxcw); return (_MC_FPOWNED_NONE); } s = intr_disable(); @@ -925,7 +927,43 @@ npxgetregs(td, addr) return (_MC_FPOWNED_FPU); } else { intr_restore(s); - bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr)); + bcopy(pcb->pcb_save, addr, sizeof(*addr)); + return (_MC_FPOWNED_PCB); + } +} + +int +npxgetuserregs(struct thread *td, union savefpu *addr) +{ + struct pcb *pcb; + register_t s; + + if (!npx_exists) + return (_MC_FPOWNED_NONE); + + pcb = td->td_pcb; + if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) == 0) { + bcopy(&npx_initialstate, addr, sizeof(npx_initialstate)); + SET_FPU_CW(addr, pcb->pcb_initial_npxcw); + return (_MC_FPOWNED_NONE); + } + s = intr_disable(); + if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { + fpusave(addr); +#ifdef CPU_ENABLE_SSE + if (!cpu_fxsr) +#endif + /* + * fnsave initializes the FPU and destroys whatever + * context it contains. Make sure the FPU owner + * starts with a clean state next time. + */ + npxdrop(); + intr_restore(s); + return (_MC_FPOWNED_FPU); + } else { + intr_restore(s); + bcopy(&pcb->pcb_user_save, addr, sizeof(*addr)); return (_MC_FPOWNED_PCB); } } @@ -934,15 +972,15 @@ npxgetregs(td, addr) * Set the state of the FPU. */ void -npxsetregs(td, addr) - struct thread *td; - union savefpu *addr; +npxsetregs(struct thread *td, union savefpu *addr) { + struct pcb *pcb; register_t s; if (!npx_exists) return; + pcb = td->td_pcb; s = intr_disable(); if (td == PCPU_GET(fpcurthread)) { #ifdef CPU_ENABLE_SSE @@ -953,9 +991,37 @@ npxsetregs(td, addr) intr_restore(s); } else { intr_restore(s); - bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr)); + bcopy(addr, pcb->pcb_save, sizeof(*addr)); + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_NPXUSERINITDONE; + } + pcb->pcb_flags |= PCB_NPXINITDONE; +} + +void +npxsetuserregs(struct thread *td, union savefpu *addr) +{ + struct pcb *pcb; + register_t s; + + if (!npx_exists) + return; + + pcb = td->td_pcb; + s = intr_disable(); + if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { +#ifdef CPU_ENABLE_SSE + if (!cpu_fxsr) +#endif + fnclex(); /* As in npxdrop(). */ + fpurstor(addr); + intr_restore(s); + pcb->pcb_flags |= PCB_NPXINITDONE | PCB_NPXINITDONE; + } else { + intr_restore(s); + bcopy(addr, &pcb->pcb_user_save, sizeof(*addr)); + pcb->pcb_flags |= PCB_NPXUSERINITDONE; } - curthread->td_pcb->pcb_flags |= PCB_NPXINITDONE; } static void @@ -1124,3 +1190,64 @@ DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0); DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); #endif #endif /* DEV_ISA */ + +int +fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx) +{ + struct pcb *pcb; + + pcb = td->td_pcb; + KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save, + ("mangled pcb_save")); + ctx->flags = 0; + if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0) + ctx->flags |= FPU_KERN_CTX_NPXINITDONE; + npxexit(td); + ctx->prev = pcb->pcb_save; + pcb->pcb_save = &ctx->hwstate; + pcb->pcb_flags |= PCB_KERNNPX; + return (0); +} + +int +fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) +{ + struct pcb *pcb; + register_t savecrit; + + pcb = td->td_pcb; + savecrit = intr_disable(); + if (curthread == PCPU_GET(fpcurthread)) + npxdrop(); + intr_restore(savecrit); + pcb->pcb_save = ctx->prev; + if (pcb->pcb_save == &pcb->pcb_user_save) { + if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0) + pcb->pcb_flags |= PCB_NPXINITDONE; + else + pcb->pcb_flags &= ~PCB_NPXINITDONE; + pcb->pcb_flags &= ~PCB_KERNNPX; + } else { + if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0) + pcb->pcb_flags |= PCB_NPXINITDONE; + else + pcb->pcb_flags &= ~PCB_NPXINITDONE; + KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); + } + return (0); +} + +int +fpu_kern_thread(void) +{ + struct pcb *pcb; + + pcb = PCPU_GET(curpcb); + KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, + ("Only kthread may use fpu_kern_thread")); + KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save")); + KASSERT(PCB_USER_FPU(pcb), ("recursive call")); + + pcb->pcb_flags |= PCB_KERNNPX; + return (0); +} diff --git a/sys/i386/linux/linux_ptrace.c b/sys/i386/linux/linux_ptrace.c index daee9e5..e9559f8 100644 --- a/sys/i386/linux/linux_ptrace.c +++ b/sys/i386/linux/linux_ptrace.c @@ -224,7 +224,7 @@ linux_proc_read_fpxregs(struct thread *td, struct linux_pt_fpxreg *fpxregs) PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); if (cpu_fxsr == 0 || (td->td_proc->p_flag & P_INMEM) == 0) return (EIO); - bcopy(&td->td_pcb->pcb_save.sv_xmm, fpxregs, sizeof(*fpxregs)); + bcopy(&td->td_pcb->pcb_user_save.sv_xmm, fpxregs, sizeof(*fpxregs)); return (0); } @@ -235,7 +235,7 @@ linux_proc_write_fpxregs(struct thread *td, struct linux_pt_fpxreg *fpxregs) PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); if (cpu_fxsr == 0 || (td->td_proc->p_flag & P_INMEM) == 0) return (EIO); - bcopy(fpxregs, &td->td_pcb->pcb_save.sv_xmm, sizeof(*fpxregs)); + bcopy(fpxregs, &td->td_pcb->pcb_user_save.sv_xmm, sizeof(*fpxregs)); return (0); } #endif diff --git a/sys/opencrypto/crypto.c b/sys/opencrypto/crypto.c index 5810780..bb9601d 100644 --- a/sys/opencrypto/crypto.c +++ b/sys/opencrypto/crypto.c @@ -82,6 +82,10 @@ __FBSDID("$FreeBSD$"); #include #include "cryptodev_if.h" +#if defined(__i386__) || defined(__amd64__) +#include +#endif + SDT_PROVIDER_DEFINE(opencrypto); /* @@ -1241,6 +1245,10 @@ crypto_proc(void) u_int32_t hid; int result, hint; +#if defined(__i386__) || defined(__amd64__) + fpu_kern_thread(); +#endif + CRYPTO_Q_LOCK(); for (;;) { /*