diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c index d53d8bb..937b914 100644 --- a/sys/amd64/acpica/acpi_wakeup.c +++ b/sys/amd64/acpica/acpi_wakeup.c @@ -245,7 +245,7 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state) cr3 = rcr3(); load_cr3(KPML4phys); - stopfpu = &stopxpcbs[0]->xpcb_pcb.pcb_save; + stopfpu = stopxpcbs[0]->xpcb_pcb.pcb_save; if (acpi_savecpu(stopxpcbs[0])) { fpugetregs(curthread, stopfpu); diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index 364875e..c71bcd0 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -116,7 +116,7 @@ done_store_dr: /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) jne 1f - addq $PCB_SAVEFPU,%r8 + movq PCB_SAVEFPU(%r8),%r8 clts fxsave (%r8) smsw %ax @@ -341,7 +341,7 @@ ENTRY(savectx) je 1f movq TD_PCB(%rax),%rdi - leaq PCB_SAVEFPU(%rdi),%rdi + movq PCB_SAVEFPU(%rdi),%rdi clts fxsave (%rdi) smsw %ax @@ -349,7 +349,7 @@ ENTRY(savectx) lmsw %ax movq $PCB_SAVEFPU_SIZE,%rdx /* arg 3 */ - leaq PCB_SAVEFPU(%rcx),%rsi /* arg 2 */ + movq PCB_SAVEFPU(%rcx),%rsi /* arg 2 */ /* arg 1 (%rdi) already loaded */ call bcopy 1: diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 10cb6c2..27baaea 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -91,8 +91,8 @@ void stop_emulating(void); #endif /* __GNUCLIKE_ASM && !lint */ -#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_cw) -#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_sw) +#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw) +#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw) typedef u_char bool_t; @@ -146,7 +146,7 @@ fpuexit(struct thread *td) savecrit = intr_disable(); if (curthread == PCPU_GET(fpcurthread)) { stop_emulating(); - fxsave(&PCPU_GET(curpcb)->pcb_save); + fxsave(PCPU_GET(curpcb)->pcb_save); start_emulating(); PCPU_SET(fpcurthread, 0); } @@ -424,8 +424,10 @@ fpudna(void) if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(&pcb->pcb_initial_fpucw); pcb->pcb_flags |= PCB_FPUINITDONE; + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_USERFPUINITDONE; } else - fxrstor(&pcb->pcb_save); + fxrstor(pcb->pcb_save); intr_restore(s); } @@ -449,13 +451,39 @@ fpudrop() * It returns the FPU ownership status. */ int +fpugetuserregs(struct thread *td, struct savefpu *addr) +{ + register_t s; + struct pcb *pcb; + + pcb = td->td_pcb; + if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { + bcopy(&fpu_initialstate, addr, sizeof(fpu_initialstate)); + addr->sv_env.en_cw = pcb->pcb_initial_fpucw; + return (_MC_FPOWNED_NONE); + } + s = intr_disable(); + if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { + fxsave(addr); + intr_restore(s); + return (_MC_FPOWNED_FPU); + } else { + intr_restore(s); + bcopy(&pcb->pcb_user_save, addr, sizeof(*addr)); + return (_MC_FPOWNED_PCB); + } +} + +int fpugetregs(struct thread *td, struct savefpu *addr) { register_t s; + struct pcb *pcb; - if ((td->td_pcb->pcb_flags & PCB_FPUINITDONE) == 0) { + pcb = td->td_pcb; + if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { bcopy(&fpu_initialstate, addr, sizeof(fpu_initialstate)); - addr->sv_env.en_cw = td->td_pcb->pcb_initial_fpucw; + addr->sv_env.en_cw = pcb->pcb_initial_fpucw; return (_MC_FPOWNED_NONE); } s = intr_disable(); @@ -465,7 +493,7 @@ fpugetregs(struct thread *td, struct savefpu *addr) return (_MC_FPOWNED_FPU); } else { intr_restore(s); - bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr)); + bcopy(pcb->pcb_save, addr, sizeof(*addr)); return (_MC_FPOWNED_PCB); } } @@ -474,19 +502,44 @@ fpugetregs(struct thread *td, struct savefpu *addr) * Set the state of the FPU. */ void +fpusetuserregs(struct thread *td, struct savefpu *addr) +{ + register_t s; + struct pcb *pcb; + + pcb = td->td_pcb; + s = intr_disable(); + if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { + fxrstor(addr); + intr_restore(s); + pcb->pcb_flags |= PCB_FPUINITDONE | PCB_USERFPUINITDONE; + } else { + intr_restore(s); + bcopy(addr, &td->td_pcb->pcb_user_save, sizeof(*addr)); + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_FPUINITDONE; + pcb->pcb_flags |= PCB_USERFPUINITDONE; + } +} + +void fpusetregs(struct thread *td, struct savefpu *addr) { register_t s; + struct pcb *pcb; + pcb = td->td_pcb; s = intr_disable(); if (td == PCPU_GET(fpcurthread)) { fxrstor(addr); intr_restore(s); } else { intr_restore(s); - bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr)); + bcopy(addr, td->td_pcb->pcb_save, sizeof(*addr)); } - curthread->td_pcb->pcb_flags |= PCB_FPUINITDONE; + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_USERFPUINITDONE; + pcb->pcb_flags |= PCB_FPUINITDONE; } /* @@ -575,3 +628,73 @@ static devclass_t fpupnp_devclass; DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); #endif /* DEV_ISA */ + +int +fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) +{ + struct pcb *pcb; + + pcb = td->td_pcb; + KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save, + ("mangled pcb_save")); + ctx->flags = 0; + if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0) + ctx->flags |= FPU_KERN_CTX_FPUINITDONE; + fpuexit(td); + ctx->prev = pcb->pcb_save; + pcb->pcb_save = &ctx->hwstate; + pcb->pcb_flags |= PCB_KERNFPU; + return (0); +} + +int +fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) +{ + struct pcb *pcb; + register_t savecrit; + + pcb = td->td_pcb; + savecrit = intr_disable(); + if (curthread == PCPU_GET(fpcurthread)) + fpudrop(); + intr_restore(savecrit); + pcb->pcb_save = ctx->prev; + if (pcb->pcb_save == &pcb->pcb_user_save) { + if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) + pcb->pcb_flags |= PCB_FPUINITDONE; + else + pcb->pcb_flags &= ~PCB_FPUINITDONE; + pcb->pcb_flags &= ~PCB_KERNFPU; + } else { + if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0) + pcb->pcb_flags |= PCB_FPUINITDONE; + else + pcb->pcb_flags &= ~PCB_FPUINITDONE; + KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); + } + return (0); +} + +int +fpu_kern_thread(u_int flags) +{ + struct pcb *pcb; + + pcb = PCPU_GET(curpcb); + KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, + ("Only kthread may use fpu_kern_thread")); + KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save")); + KASSERT(PCB_USER_FPU(pcb), ("recursive call")); + + pcb->pcb_flags |= PCB_KERNFPU; + return (0); +} + +int +is_fpu_kern_thread(u_int flags) +{ + + if ((curthread->td_pflags & TDP_KTHREAD) == 0) + return (0); + return ((PCPU_GET(curpcb)->pcb_flags & PCB_KERNFPU) != 0); +} diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 4e146c2..880fcd6 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1960,7 +1960,7 @@ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { - fill_fpregs_xmm(&td->td_pcb->pcb_save, fpregs); + fill_fpregs_xmm(&td->td_pcb->pcb_user_save, fpregs); return (0); } @@ -1969,7 +1969,7 @@ int set_fpregs(struct thread *td, struct fpreg *fpregs) { - set_fpregs_xmm(fpregs, &td->td_pcb->pcb_save); + set_fpregs_xmm(fpregs, &td->td_pcb->pcb_user_save); return (0); } @@ -2084,7 +2084,8 @@ static void get_fpcontext(struct thread *td, mcontext_t *mcp) { - mcp->mc_ownedfp = fpugetregs(td, (struct savefpu *)&mcp->mc_fpstate); + mcp->mc_ownedfp = fpugetuserregs(td, + (struct savefpu *)&mcp->mc_fpstate); mcp->mc_fpformat = fpuformat(); } @@ -2109,7 +2110,7 @@ set_fpcontext(struct thread *td, const mcontext_t *mcp) */ fpstate = (struct savefpu *)&mcp->mc_fpstate; fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; - fpusetregs(td, fpstate); + fpusetuserregs(td, fpstate); } else return (EINVAL); return (0); @@ -2120,6 +2121,7 @@ fpstate_drop(struct thread *td) { register_t s; + KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); s = intr_disable(); if (PCPU_GET(fpcurthread) == td) fpudrop(); @@ -2133,7 +2135,8 @@ fpstate_drop(struct thread *td) * sendsig() is the only caller of fpugetregs()... perhaps we just * have too many layers. */ - curthread->td_pcb->pcb_flags &= ~PCB_FPUINITDONE; + curthread->td_pcb->pcb_flags &= ~(PCB_FPUINITDONE | + PCB_USERFPUINITDONE); intr_restore(s); } diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 4a44aba..5430b03 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -1247,7 +1247,7 @@ cpususpend_handler(void) rf = intr_disable(); cr3 = rcr3(); - stopfpu = &stopxpcbs[cpu]->xpcb_pcb.pcb_save; + stopfpu = stopxpcbs[cpu]->xpcb_pcb.pcb_save; if (savectx2(stopxpcbs[cpu])) { fpugetregs(curthread, stopfpu); wbinvd(); diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 2deb931..b91698f 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -425,6 +425,8 @@ trap(struct trapframe *frame) case T_DNA: /* transparent fault (due to context switch "late") */ + KASSERT(PCB_USER_FPU(td->td_pcb), + ("kernel FPU ctx has leaked")); fpudna(); goto userout; @@ -449,16 +451,19 @@ trap(struct trapframe *frame) goto out; case T_DNA: + KASSERT(!PCB_USER_FPU(td->td_pcb), + ("Unregistered use of FPU in kernel")); + fpudna(); + goto out; + + case T_ARITHTRAP: /* arithmetic trap */ + case T_XMMFLT: /* SIMD floating-point exception */ + case T_FPOPFLT: /* FPU operand fetch fault */ /* - * The kernel is apparently using fpu for copying. - * XXX this should be fatal unless the kernel has - * registered such use. + * XXXKIB for now disable any FPU traps in kernel + * handler registration seems to be overkill */ - printf("fpudna in kernel mode!\n"); -#ifdef KDB - kdb_backtrace(); -#endif - fpudna(); + trap_fatal(frame, 0); goto out; case T_STKFLT: /* stack fault */ @@ -603,6 +608,8 @@ trap(struct trapframe *frame) user: userret(td, frame); mtx_assert(&Giant, MA_NOTOWNED); + KASSERT(PCB_USER_FPU(td->td_pcb), + ("Return from trap with kernel FPU ctx leaked")); userout: out: return; @@ -891,5 +898,12 @@ syscall(struct trapframe *frame) trapsignal(td, &ksi); } + KASSERT(PCB_USER_FPU(td->td_pcb), + ("System call %s returing with kernel FPU ctx leaked", + syscallname(td->td_proc, sa.code))); + KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save, + ("System call %s returning with mangled pcb_save", + syscallname(td->td_proc, sa.code))); + syscallret(td, error, &sa); } diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 3f7d76a..eefccfc 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -122,7 +122,7 @@ cpu_fork(td1, p2, td2, flags) return; } - /* Ensure that p1's pcb is up to date. */ + /* Ensure that td1's pcb is up to date. */ fpuexit(td1); /* Point the pcb to the top of the stack */ @@ -130,9 +130,12 @@ cpu_fork(td1, p2, td2, flags) td2->td_kstack_pages * PAGE_SIZE) - 1; td2->td_pcb = pcb2; - /* Copy p1's pcb */ + /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); + /* Properly initialize pcb_save */ + pcb2->pcb_save = &pcb2->pcb_user_save; + /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); @@ -308,6 +311,7 @@ cpu_thread_alloc(struct thread *td) td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = (struct trapframe *)td->td_pcb - 1; + td->td_pcb->pcb_save = &td->td_pcb->pcb_user_save; } void @@ -381,7 +385,8 @@ cpu_set_upcall(struct thread *td, struct thread *td0) * values here. */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); - pcb2->pcb_flags &= ~PCB_FPUINITDONE; + pcb2->pcb_flags &= ~(PCB_FPUINITDONE | PCB_USERFPUINITDONE); + pcb2->pcb_save = &pcb2->pcb_user_save; pcb2->pcb_full_iret = 1; /* diff --git a/sys/amd64/ia32/ia32_reg.c b/sys/amd64/ia32/ia32_reg.c index da5190f..30fcffb 100644 --- a/sys/amd64/ia32/ia32_reg.c +++ b/sys/amd64/ia32/ia32_reg.c @@ -147,7 +147,7 @@ fill_fpregs32(struct thread *td, struct fpreg32 *regs) { struct save87 *sv_87 = (struct save87 *)regs; struct env87 *penv_87 = &sv_87->sv_env; - struct savefpu *sv_fpu = &td->td_pcb->pcb_save; + struct savefpu *sv_fpu = &td->td_pcb->pcb_user_save; struct envxmm *penv_xmm = &sv_fpu->sv_env; int i; @@ -182,7 +182,7 @@ set_fpregs32(struct thread *td, struct fpreg32 *regs) { struct save87 *sv_87 = (struct save87 *)regs; struct env87 *penv_87 = &sv_87->sv_env; - struct savefpu *sv_fpu = &td->td_pcb->pcb_save; + struct savefpu *sv_fpu = &td->td_pcb->pcb_user_save; struct envxmm *penv_xmm = &sv_fpu->sv_env; int i; diff --git a/sys/amd64/include/fpu.h b/sys/amd64/include/fpu.h index 272f94a..ca0ac8f 100644 --- a/sys/amd64/include/fpu.h +++ b/sys/amd64/include/fpu.h @@ -73,6 +73,17 @@ struct savefpu { u_char sv_pad[96]; } __aligned(16); +#ifdef _KERNEL +struct fpu_kern_ctx { + struct savefpu hwstate; + struct savefpu *prev; + uint32_t flags; +}; +#define FPU_KERN_CTX_FPUINITDONE 0x01 + +#define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNFPU) == 0) +#endif + /* * The hardware default control word for i387's and later coprocessors is * 0x37F, giving: @@ -102,9 +113,22 @@ void fpudrop(void); void fpuexit(struct thread *td); int fpuformat(void); int fpugetregs(struct thread *td, struct savefpu *addr); +int fpugetuserregs(struct thread *td, struct savefpu *addr); void fpuinit(void); void fpusetregs(struct thread *td, struct savefpu *addr); +void fpusetuserregs(struct thread *td, struct savefpu *addr); int fputrap(void); +int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, + u_int flags); +int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); +int fpu_kern_thread(u_int flags); +int is_fpu_kern_thread(u_int flags); + +/* + * Flags for fpu_kern_enter() and fpu_kern_thread(). + */ +#define FPU_KERN_NORMAL 0x0000 + #endif #endif /* !_MACHINE_FPU_H_ */ diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h index b26188a..aead8b7 100644 --- a/sys/amd64/include/pcb.h +++ b/sys/amd64/include/pcb.h @@ -57,7 +57,9 @@ struct pcb { register_t pcb_gsbase; u_long pcb_flags; #define PCB_DBREGS 0x02 /* process using debug registers */ +#define PCB_KERNFPU 0x04 /* kernel uses fpu */ #define PCB_FPUINITDONE 0x08 /* fpu state is initialized */ +#define PCB_USERFPUINITDONE 0x10 /* fpu user state is initialized */ #define PCB_GS32BIT 0x20 /* linux gs switch */ #define PCB_32BIT 0x40 /* process has 32 bit context (segs etc) */ #define PCB_FULLCTX 0x80 /* full context restore on sysret */ @@ -69,7 +71,7 @@ struct pcb { u_int64_t pcb_dr6; u_int64_t pcb_dr7; - struct savefpu pcb_save; + struct savefpu pcb_user_save; uint16_t pcb_initial_fpucw; caddr_t pcb_onfault; /* copyin/out fault recovery */ @@ -78,6 +80,7 @@ struct pcb { struct user_segment_descriptor pcb_gs32sd; /* local tss, with i/o bitmap; NULL for common */ struct amd64tss *pcb_tssp; + struct savefpu *pcb_save; char pcb_full_iret; }; diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index c0d5d70..56ea598 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -133,6 +133,10 @@ amd64/amd64/uma_machdep.c standard amd64/amd64/vm_machdep.c standard amd64/pci/pci_bus.c optional pci amd64/pci/pci_cfgreg.c optional pci +crypto/aesni/aesencdec_amd64.S optional aesni +crypto/aesni/aeskeys_amd64.S optional aesni +crypto/aesni/aesni.c optional aesni +crypto/aesni/aesni_wrap.c optional aesni crypto/blowfish/bf_enc.c optional crypto | ipsec crypto/des/des_enc.c optional crypto | ipsec | netsmb crypto/via/padlock.c optional padlock diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 6b13071..3bd62eb 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -112,6 +112,10 @@ bf_enc.o optional crypto | ipsec \ dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \ compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \ no-implicit-rule +crypto/aesni/aesencdec_i386.S optional aesni +crypto/aesni/aeskeys_i386.S optional aesni +crypto/aesni/aesni.c optional aesni +crypto/aesni/aesni_wrap.c optional aesni crypto/des/arch/i386/des_enc.S optional crypto | ipsec | netsmb crypto/via/padlock.c optional padlock crypto/via/padlock_cipher.c optional padlock diff --git a/sys/crypto/aesni/aesencdec_amd64.S b/sys/crypto/aesni/aesencdec_amd64.S new file mode 100644 index 0000000..8060d00 --- /dev/null +++ b/sys/crypto/aesni/aesencdec_amd64.S @@ -0,0 +1,142 @@ +/*- + * Copyright (c) 2010 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + .text + + .align 0x10,0x90 + .globl aesni_enc + .type aesni_enc,@function +aesni_enc: + .cfi_startproc + movdqu (%rdx),%xmm0 + cmpq $0,%r8 + je 1f + movdqu (%r8),%xmm1 /* unaligned load into reg */ + pxor %xmm1,%xmm0 /* pxor otherwise can fault on iv */ +1: + pxor (%rsi),%xmm0 +2: + addq $0x10,%rsi +// aesenc (%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xdc,0x06 + decl %edi + jne 2b + addq $0x10,%rsi +// aesenclast (%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xdd,0x06 + movdqu %xmm0,(%rcx) + retq + .cfi_endproc + .size aesni_enc,. - aesni_enc + + .align 0x10,0x90 + .globl aesni_dec + .type aesni_dec,@function +aesni_dec: + .cfi_startproc + movdqu (%rdx),%xmm0 + pxor (%rsi),%xmm0 +1: + addq $0x10,%rsi +// aesdec (%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x06 + decl %edi + jne 1b + addq $0x10,%rsi +// aesdeclast (%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x06 + cmpq $0,%r8 + je 2f + movdqu (%r8),%xmm1 + pxor %xmm1,%xmm0 +2: + movdqu %xmm0,(%rcx) + retq + .cfi_endproc + .size aesni_dec,. - aesni_dec + + .align 0x10,0x90 + .globl aesni_decrypt_cbc + .type aesni_decrypt_cbc,@function +aesni_decrypt_cbc: + .cfi_startproc + shrq $4,%rdx + movdqu (%r8),%xmm1 +1: + movdqu (%rcx),%xmm0 + movdqa %xmm0,%xmm2 + pxor (%rsi),%xmm0 + cmpl $12,%edi +// aesdec 0x10(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x46,0x10 +// aesdec 0x20(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x46,0x20 +// aesdec 0x30(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x46,0x30 +// aesdec 0x40(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x46,0x40 +// aesdec 0x50(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x46,0x50 +// aesdec 0x60(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x46,0x60 +// aesdec 0x70(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x46,0x70 +// aesdec 0x80(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x86,0x80,0x00,0x00,0x00 +// aesdec 0x90(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x86,0x90,0x00,0x00,0x00 + jge 2f +// aesdeclast 0xa0(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x86,0xa0,0x00,0x00,0x00 + jmp 4f +2: +// aesdec 0xa0(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x86,0xa0,0x00,0x00,0x00 +// aesdec 0xb0(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x86,0xb0,0x00,0x00,0x00 + jg 3f +// aesdeclast 0xc0(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x86,0xc0,0x00,0x00,0x00 + jmp 4f +3: +// aesdec 0xc0(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x86,0xc0,0x00,0x00,0x00 +// aesdec 0xd0(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x86,0xd0,0x00,0x00,0x00 +// aesdeclast 0xe0(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x86,0xe0,0x00,0x00,0x00 +4: + pxor %xmm1,%xmm0 + movdqu %xmm0,(%rcx) + movdqa %xmm2,%xmm1 // iv + addq $0x10,%rcx + decq %rdx + jne 1b + retq + .cfi_endproc + .size aesni_decrypt_cbc,. - aesni_decrypt_cbc + + .ident "$FreeBSD$" diff --git a/sys/crypto/aesni/aesencdec_i386.S b/sys/crypto/aesni/aesencdec_i386.S new file mode 100644 index 0000000..28213d5 --- /dev/null +++ b/sys/crypto/aesni/aesencdec_i386.S @@ -0,0 +1,174 @@ +/*- + * Copyright (c) 2010 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + .text + .align 0x10,0x90 + .globl aesni_enc + .type aesni_enc,@function +aesni_enc: + .cfi_startproc + pushl %ebp + .cfi_adjust_cfa_offset 4 + movl %esp,%ebp + movl 8(%ebp),%ecx /* rounds */ + movl 16(%ebp),%edx + movdqu (%edx),%xmm0 /* from */ + movl 24(%ebp),%eax /* iv */ + cmpl $0,%eax + je 1f + movdqu (%eax),%xmm1 + pxor %xmm1,%xmm0 +1: + movl 12(%ebp),%eax /* key */ + pxor (%eax),%xmm0 +2: + addl $0x10,%eax +// aesenc (%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xdc,0x00 + loopne 2b + addl $0x10,%eax +// aesenclast (%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xdd,0x00 + movl 20(%ebp),%eax + movdqu %xmm0,(%eax) /* to */ + leave + .cfi_adjust_cfa_offset -4 + retl + .cfi_endproc + .size aesni_enc,. - aesni_enc + + .align 0x10,0x90 + .globl aesni_dec + .type aesni_dec,@function +aesni_dec: + .cfi_startproc + pushl %ebp + .cfi_adjust_cfa_offset 4 + movl %esp,%ebp + movl 8(%ebp),%ecx /* rounds */ + movl 16(%ebp),%edx + movdqu (%edx),%xmm0 /* from */ + movl 12(%ebp),%eax /* key */ + pxor (%eax),%xmm0 +1: + addl $0x10,%eax +// aesdec (%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x00 + loopne 1b + addl $0x10,%eax +// aesdeclast (%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x00 + movl 24(%ebp),%eax + cmpl $0,%eax /* iv */ + je 2f + movdqu (%eax),%xmm1 + pxor %xmm1,%xmm0 +2: + movl 20(%ebp),%eax + movdqu %xmm0,(%eax) /* to */ + leave + .cfi_adjust_cfa_offset -4 + retl + .cfi_endproc + .size aesni_dec,. - aesni_dec + + .align 0x10,0x90 + .globl aesni_decrypt_cbc + .type aesni_decrypt_cbc,@function +aesni_decrypt_cbc: + .cfi_startproc + pushl %ebp + .cfi_adjust_cfa_offset 4 + movl %esp,%ebp + pushl %ebx + pushl %esi + movl 12(%ebp),%eax /* key */ + movl 16(%ebp),%ecx /* length */ + shrl $4,%ecx + movl 20(%ebp),%ebx /* buf */ + movl 24(%ebp),%esi + movdqu (%esi),%xmm1 /* iv */ + movl 8(%ebp),%esi /* rounds */ +1: + movdqu (%ebx),%xmm0 + movdqa %xmm0,%xmm2 + pxor (%eax),%xmm0 + cmpl $12,%esi +// aesdec 0x10(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x40,0x10 +// aesdec 0x20(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x40,0x20 +// aesdec 0x30(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x40,0x30 +// aesdec 0x40(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x40,0x40 +// aesdec 0x50(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x40,0x50 +// aesdec 0x60(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x40,0x60 +// aesdec 0x70(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x40,0x70 +// aesdec 0x80(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x80,0x80,0x00,0x00,0x00 +// aesdec 0x90(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x80,0x90,0x00,0x00,0x00 + jge 2f +// aesdeclast 0xa0(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x80,0xa0,0x00,0x00,0x00 + jmp 4f +2: +// aesdec 0xa0(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x80,0xa0,0x00,0x00,0x00 +// aesdec 0xb0(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x80,0xb0,0x00,0x00,0x00 + jg 3f +// aesdeclast 0xc0(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x80,0xc0,0x00,0x00,0x00 + jmp 4f +3: +// aesdec 0xc0(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x80,0xc0,0x00,0x00,0x00 +// aesdec 0xd0(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x80,0xd0,0x00,0x00,0x00 +// aesdeclast 0xe0(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x80,0xe0,0x00,0x00,0x00 +4: + pxor %xmm1,%xmm0 + movdqu %xmm0,(%ebx) + movdqa %xmm2,%xmm1 + addl $0x10,%ebx + decl %ecx + jne 1b + + popl %esi + popl %ebx + leave + .cfi_adjust_cfa_offset -4 + retl + .cfi_endproc + .size aesni_decrypt_cbc,. - aesni_decrypt_cbc + + .ident "$FreeBSD$" diff --git a/sys/crypto/aesni/aeskeys_amd64.S b/sys/crypto/aesni/aeskeys_amd64.S new file mode 100644 index 0000000..4e058e3 --- /dev/null +++ b/sys/crypto/aesni/aeskeys_amd64.S @@ -0,0 +1,348 @@ +/*- + * XXX INTEL COPYRIGHT MISSED THERE. + * The code in the file was taken from the whitepaper + * Intel Advanced Encryption Standard (AES) Instructions Set + * January 2010 (26/1/2010) Rev. 3.0 + * by Intel Corporation. + */ +/*- + * Copyright (c) 2010 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + .text + + .align 0x10,0x90 + .globl aesni_key_expansion_decrypt + .type aesni_key_expansion_decrypt,@function +aesni_key_expansion_decrypt: + .cfi_startproc + movslq %edx,%rdx + movq %rdx,%rax + shlq $4,%rax + cmpq $10,%rdx + movdqa (%rax,%rdi),%xmm0 + movdqa %xmm0,(%rsi) +// aesimc -16(%rax,%rdi),%xmm1 + .byte 0x66,0x0f,0x38,0xdb,0x4c,0x38,0xf0 +// aesimc -32(%rax,%rdi),%xmm2 + .byte 0x66,0x0f,0x38,0xdb,0x54,0x38,0xe0 +// aesimc -48(%rax,%rdi),%xmm3 + .byte 0x66,0x0f,0x38,0xdb,0x5c,0x38,0xd0 +// aesimc -64(%rax,%rdi),%xmm4 + .byte 0x66,0x0f,0x38,0xdb,0x64,0x38,0xc0 + movdqa %xmm1,16(%rsi) + movdqa %xmm2,32(%rsi) + movdqa %xmm3,48(%rsi) + movdqa %xmm4,64(%rsi) +// aesimc -80(%rax,%rdi),%xmm5 + .byte 0x66,0x0f,0x38,0xdb,0x6c,0x38,0xb0 +// aesimc -96(%rax,%rdi),%xmm6 + .byte 0x66,0x0f,0x38,0xdb,0x74,0x38,0xa0 +// aesimc -112(%rax,%rdi),%xmm7 + .byte 0x66,0x0f,0x38,0xdb,0x7c,0x38,0x90 +// aesimc -128(%rax,%rdi),%xmm8 + .byte 0x66,0x44,0x0f,0x38,0xdb,0x44,0x38,0x80 + movdqa %xmm5,80(%rsi) + movdqa %xmm6,96(%rsi) + movdqa %xmm7,112(%rsi) + movdqa %xmm8,128(%rsi) +// aesimc -144(%rax,%rdi),%xmm9 + .byte 0x66,0x44,0x0f,0x38,0xdb,0x8c,0x38,0x70,0xff,0xff,0xff + movdqa %xmm9, 144(%rsi) + jle 1f + cmpq $12,%rdx +// aesimc -160(%rax,%rdi),%xmm0 + .byte 0x66,0x0f,0x38,0xdb,0x84,0x38,0x60,0xff,0xff,0xff +// aesimc -176(%rax,%rdi),%xmm1 + .byte 0x66,0x0f,0x38,0xdb,0x8c,0x38,0x50,0xff,0xff,0xff + movdqa %xmm0,160(%rsi) + movdqa %xmm1,176(%rsi) + jle 1f +// aesimc -192(%rax,%rdi),%xmm0 + .byte 0x66,0x0f,0x38,0xdb,0x84,0x38,0x40,0xff,0xff,0xff +// aesimc -208(%rax,%rdi),%xmm1 + .byte 0x66,0x0f,0x38,0xdb,0x8c,0x38,0x30,0xff,0xff,0xff + movdqa %xmm0,192(%rsi) + movdqa %xmm1,208(%rsi) +1: + movdqa (%rdi),%xmm0 + movdqa %xmm0,(%rax,%rsi) + retq + .cfi_endproc + .size aesni_key_expansion_decrypt,. - aesni_key_expansion_decrypt + + .align 0x10,0x90 + .globl aesni_128_key_expansion + .type aesni_128_key_expansion,@function +aesni_128_key_expansion: + .cfi_startproc + movdqu (%rdi),%xmm1 + movdqa %xmm1,(%rsi) +// aeskeygenassist $1,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 + call prepkey_128 + movdqa %xmm1,16(%rsi) +// aeskeygenassist $2,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 + call prepkey_128 + movdqa %xmm1,32(%rsi) +// aeskeygenassist $4,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 + call prepkey_128 + movdqa %xmm1,48(%rsi) +// aeskeygenassist $8,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 + call prepkey_128 + movdqa %xmm1,64(%rsi) +// aeskeygenassist $16,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 + call prepkey_128 + movdqa %xmm1,80(%rsi) +// aeskeygenassist $32,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 + call prepkey_128 + movdqa %xmm1,96(%rsi) +// aeskeygenassist $64,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 + call prepkey_128 + movdqa %xmm1,112(%rsi) +// aeskeygenassist $0x80,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 + call prepkey_128 + movdqa %xmm1,128(%rsi) +// aeskeygenassist $0x1b, %xmm1, %xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b + call prepkey_128 + movdqa %xmm1,144(%rsi) +// aeskeygenassist $0x36,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 + call prepkey_128 + movdqa %xmm1,160(%rsi) + retq + .cfi_endproc + .size AES_128_Key_Expansion,. - AES_128_Key_Expansion + + .align 0x10,0x90 + .type prepkey_128,@function +prepkey_128: + .cfi_startproc + pshufd $255,%xmm2,%xmm2 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm3 + pxor %xmm3,%xmm1 + pslldq $4,%xmm3 + pxor %xmm3,%xmm1 + pslldq $4,%xmm3 + pxor %xmm3,%xmm1 + pxor %xmm2,%xmm1 + retq + .cfi_endproc + .size prepkey_128,. - prepkey_128 + + .align 0x10,0x90 + .globl aesni_192_key_expansion + .type aesni_192_key_expansion,@function +aesni_192_key_expansion: + .cfi_startproc + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm3 + movdqa %xmm1,(%rsi) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x1,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,16(%rsi) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,32(%rsi) +// aeskeygenassist $0x2,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 + call prepkey_192 + movdqa %xmm1,48(%rsi) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x4,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,64(%rsi) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,80(%rsi) +// aeskeygenassist $0x8,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 + call prepkey_192 + movdqa %xmm1,96(%rsi) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x10,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,112(%rsi) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,128(%rsi) +// aeskeygenassist $0x20,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 + call prepkey_192 + movdqa %xmm1,144(%rsi) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x40,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,160(%rsi) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,176(%rsi) +// aeskeygenassist $0x80,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 + call prepkey_192 + movdqa %xmm1,192(%rsi) + retq + .cfi_endproc + .size aesni_192_key_expansion,. - aesni_192_key_expansion + + .align 0x10,0x90 + .type prepkey_192,@function +prepkey_192: + .cfi_startproc + pshufd $0x55,%xmm2,%xmm2 + movdqu %xmm1,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pxor %xmm2,%xmm1 + pshufd $0xff,%xmm1,%xmm2 + movdqu %xmm3,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm3 + retq + .cfi_endproc + .size prepkey_192,. - prepkey_192 + + .align 0x10,0x90 + .globl aesni_256_key_expansion + .type aesni_256_key_expansion,@function +aesni_256_key_expansion: + .cfi_startproc + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm3 + movdqa %xmm1,(%rsi) + movdqa %xmm3,16(%rsi) +// aeskeygenassist $0x1,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 + call rk256_a + movdqa %xmm1,32(%rsi) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,48(%rsi) +// aeskeygenassist $0x2,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 + call rk256_a + movdqa %xmm1,64(%rsi) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,80(%rsi) +// aeskeygenassist $0x4,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 + call rk256_a + movdqa %xmm1,96(%rsi) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,112(%rsi) +// aeskeygenassist $0x8,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 + call rk256_a + movdqa %xmm1,128(%rsi) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,144(%rsi) +// aeskeygenassist $0x10,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 + call rk256_a + movdqa %xmm1,160(%rsi) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,176(%rsi) +// aeskeygenassist $0x20,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 + call rk256_a + movdqa %xmm1,192(%rsi) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,208(%rsi) +// aeskeygenassist $0x40,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 + call rk256_a + movdqa %xmm1,224(%rsi) + retq + .cfi_endproc + .size AES_256_Key_Expansion,. - AES_256_Key_Expansion + + .align 0x10,0x90 + .type rk256_a,@function +rk256_a: + .cfi_startproc + pshufd $0xff,%xmm2,%xmm2 + movdqa %xmm1,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pxor %xmm2,%xmm1 + retq + .cfi_endproc + .size rk256_a,. - rk256_a + + .align 0x10,0x90 + .type rk256_b,@function +rk256_b: + .cfi_startproc + pshufd $0xaa,%xmm2,%xmm2 + movdqa %xmm3,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm3 + retq + .cfi_endproc + .size rk256_b,. - rk256_b + + .ident "$FreeBSD$" diff --git a/sys/crypto/aesni/aeskeys_i386.S b/sys/crypto/aesni/aeskeys_i386.S new file mode 100644 index 0000000..8588d43 --- /dev/null +++ b/sys/crypto/aesni/aeskeys_i386.S @@ -0,0 +1,368 @@ +/*- + * XXX INTEL COPYRIGHT MISSED THERE. + * The code in the file was taken from the whitepaper + * Intel Advanced Encryption Standard (AES) Instructions Set + * January 2010 (26/1/2010) Rev. 3.0 + * by Intel Corporation. + */ +/*- + * Copyright (c) 2010 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + .text + + .align 0x10,0x90 + .globl aesni_key_expansion_decrypt + .type aesni_key_expansion_decrypt,@function +aesni_key_expansion_decrypt: + .cfi_startproc + pushl %ebp + movl %esp,%ebp + movl 16(%ebp),%eax /* rounds */ + movl 8(%ebp),%ecx /* encrypt_schedule */ + movl 12(%ebp),%edx /* decrypt_schedule */ + shll $4,%eax + cmpl $(10<<4),%eax + movdqa (%eax,%ecx),%xmm0 + movdqa %xmm0,(%edx) +// aesimc -16(%eax,%ecx),%xmm1 + .byte 0x66,0x0f,0x38,0xdb,0x4c,0x08,0xf0 +// aesimc -32(%eax,%ecx),%xmm2 + .byte 0x66,0x0f,0x38,0xdb,0x54,0x08,0xe0 +// aesimc -48(%eax,%ecx),%xmm3 + .byte 0x66,0x0f,0x38,0xdb,0x5c,0x08,0xd0 +// aesimc -64(%eax,%ecx),%xmm4 + .byte 0x66,0x0f,0x38,0xdb,0x64,0x08,0xc0 + movdqa %xmm1,16(%edx) + movdqa %xmm2,32(%edx) + movdqa %xmm3,48(%edx) + movdqa %xmm4,64(%edx) +// aesimc -80(%eax,%ecx),%xmm5 + .byte 0x66,0x0f,0x38,0xdb,0x6c,0x08,0xb0 +// aesimc -96(%eax,%ecx),%xmm6 + .byte 0x66,0x0f,0x38,0xdb,0x74,0x08,0xa0 +// aesimc -112(%eax,%ecx),%xmm7 + .byte 0x66,0x0f,0x38,0xdb,0x7c,0x08,0x90 +// aesimc -128(%eax,%ecx),%xmm1 + .byte 0x66,0x0f,0x38,0xdb,0x4c,0x08,0x80 + movdqa %xmm5,80(%edx) + movdqa %xmm6,96(%edx) + movdqa %xmm7,112(%edx) + movdqa %xmm1,128(%edx) +// aesimc -144(%eax,%ecx),%xmm2 + .byte 0x66,0x0f,0x38,0xdb,0x94,0x08,0x70,0xff,0xff,0xff + movdqa %xmm2, 144(%edx) + jle 1f + cmpl $(12<<4),%eax +// aesimc -160(%eax,%ecx),%xmm0 + .byte 0x66,0x0f,0x38,0xdb,0x84,0x08,0x60,0xff,0xff,0xff +// aesimc -176(%eax,%ecx),%xmm1 + .byte 0x66,0x0f,0x38,0xdb,0x8c,0x08,0x50,0xff,0xff,0xff + movdqa %xmm0,160(%edx) + movdqa %xmm1,176(%edx) + jle 1f +// aesimc -192(%eax,%ecx),%xmm0 + .byte 0x66,0x0f,0x38,0xdb,0x84,0x08,0x40,0xff,0xff,0xff +// aesimc -208(%eax,%ecx),%xmm1 + .byte 0x66,0x0f,0x38,0xdb,0x8c,0x08,0x30,0xff,0xff,0xff + movdqa %xmm0,192(%edx) + movdqa %xmm1,208(%edx) +1: + movdqa (%ecx),%xmm0 + movdqa %xmm0,(%eax,%edx) + leave + retl + .cfi_endproc + .size aesni_key_expansion_decrypt,. - aesni_key_expansion_decrypt + + .align 0x10,0x90 + .globl aesni_128_key_expansion + .type aesni_128_key_expansion,@function +aesni_128_key_expansion: + .cfi_startproc + pushl %ebp + movl %esp,%ebp + movl 8(%ebp),%ecx /* userkey */ + movl 12(%ebp),%edx /* key_schedule */ + movdqu (%ecx),%xmm1 + movdqa %xmm1,(%edx) +// aeskeygenassist $1,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 + call prepkey_128 + movdqa %xmm1,16(%edx) +// aeskeygenassist $2,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 + call prepkey_128 + movdqa %xmm1,32(%edx) +// aeskeygenassist $4,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 + call prepkey_128 + movdqa %xmm1,48(%edx) +// aeskeygenassist $8,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 + call prepkey_128 + movdqa %xmm1,64(%edx) +// aeskeygenassist $16,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 + call prepkey_128 + movdqa %xmm1,80(%edx) +// aeskeygenassist $32,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 + call prepkey_128 + movdqa %xmm1,96(%edx) +// aeskeygenassist $64,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 + call prepkey_128 + movdqa %xmm1,112(%edx) +// aeskeygenassist $0x80,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 + call prepkey_128 + movdqa %xmm1,128(%edx) +// aeskeygenassist $0x1b, %xmm1, %xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b + call prepkey_128 + movdqa %xmm1,144(%edx) +// aeskeygenassist $0x36,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 + call prepkey_128 + movdqa %xmm1,160(%edx) + leave + retl + .cfi_endproc + .size aesni_128_key_expansion,. - aesni_128_key_expansion + + .align 0x10,0x90 + .type prepkey_128,@function +prepkey_128: + .cfi_startproc + pshufd $255,%xmm2,%xmm2 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm3 + pxor %xmm3,%xmm1 + pslldq $4,%xmm3 + pxor %xmm3,%xmm1 + pslldq $4,%xmm3 + pxor %xmm3,%xmm1 + pxor %xmm2,%xmm1 + retl + .cfi_endproc + .size prepkey_128,. - prepkey_128 + + .align 0x10,0x90 + .globl aesni_192_key_expansion + .type aesni_192_key_expansion,@function +aesni_192_key_expansion: + .cfi_startproc + pushl %ebp + movl %esp,%ebp + movl 8(%ebp),%ecx /* userkey */ + movl 12(%ebp),%edx /* key_schedule */ + movdqu (%ecx),%xmm1 + movdqu 16(%ecx),%xmm3 + movdqa %xmm1,(%edx) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x1,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,16(%edx) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,32(%edx) +// aeskeygenassist $0x2,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 + call prepkey_192 + movdqa %xmm1,48(%edx) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x4,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,64(%edx) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,80(%edx) +// aeskeygenassist $0x8,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 + call prepkey_192 + movdqa %xmm1,96(%edx) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x10,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,112(%edx) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,128(%edx) +// aeskeygenassist $0x20,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 + call prepkey_192 + movdqa %xmm1,144(%edx) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x40,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,160(%edx) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,176(%edx) +// aeskeygenassist $0x80,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 + call prepkey_192 + movdqa %xmm1,192(%edx) + leave + retl + .cfi_endproc + .size aesni_192_key_expansion,. - aesni_192_key_expansion + + .align 0x10,0x90 + .type prepkey_192,@function +prepkey_192: + .cfi_startproc + pshufd $0x55,%xmm2,%xmm2 + movdqu %xmm1,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pxor %xmm2,%xmm1 + pshufd $0xff,%xmm1,%xmm2 + movdqu %xmm3,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm3 + retl + .cfi_endproc + .size prepkey_192,. - prepkey_192 + + .align 0x10,0x90 + .globl aesni_256_key_expansion + .type aesni_256_key_expansion,@function +aesni_256_key_expansion: + .cfi_startproc + pushl %ebp + movl %esp,%ebp + movl 8(%ebp),%ecx /* userkey */ + movl 12(%ebp),%edx /* key_schedule */ + movdqu (%ecx),%xmm1 + movdqu 16(%ecx),%xmm3 + movdqa %xmm1,(%edx) + movdqa %xmm3,16(%edx) +// aeskeygenassist $0x1,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 + call rk256_a + movdqa %xmm1,32(%edx) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,48(%edx) +// aeskeygenassist $0x2,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 + call rk256_a + movdqa %xmm1,64(%edx) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,80(%edx) +// aeskeygenassist $0x4,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 + call rk256_a + movdqa %xmm1,96(%edx) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,112(%edx) +// aeskeygenassist $0x8,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 + call rk256_a + movdqa %xmm1,128(%edx) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,144(%edx) +// aeskeygenassist $0x10,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 + call rk256_a + movdqa %xmm1,160(%edx) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,176(%edx) +// aeskeygenassist $0x20,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 + call rk256_a + movdqa %xmm1,192(%edx) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,208(%edx) +// aeskeygenassist $0x40,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 + call rk256_a + movdqa %xmm1,224(%edx) + leave + retl + .cfi_endproc + .size AES_256_Key_Expansion,. - AES_256_Key_Expansion + + .align 0x10,0x90 + .type rk256_a,@function +rk256_a: + .cfi_startproc + pshufd $0xff,%xmm2,%xmm2 + movdqa %xmm1,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pxor %xmm2,%xmm1 + retl + .cfi_endproc + .size rk256_a,. - rk256_a + + .align 0x10,0x90 + .type rk256_b,@function +rk256_b: + .cfi_startproc + pshufd $0xaa,%xmm2,%xmm2 + movdqa %xmm3,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm3 + retl + .cfi_endproc + .size rk256_b,. - rk256_b + + .ident "$FreeBSD$" diff --git a/sys/crypto/aesni/aesni.c b/sys/crypto/aesni/aesni.c new file mode 100644 index 0000000..93ee042 --- /dev/null +++ b/sys/crypto/aesni/aesni.c @@ -0,0 +1,338 @@ +/*- + * Copyright (c) 2005-2008 Pawel Jakub Dawidek + * Copyright (c) 2010 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cryptodev_if.h" + +struct aesni_softc { + int32_t cid; + uint32_t sid; + TAILQ_HEAD(aesni_sessions_head, aesni_session) sessions; + struct rwlock lock; +}; + +static int aesni_newsession(device_t, uint32_t *sidp, struct cryptoini *cri); +static int aesni_freesession(device_t, uint64_t tid); +static void aesni_freesession_locked(struct aesni_softc *sc, + struct aesni_session *ses); + +MALLOC_DEFINE(M_AESNI, "aesni_data", "AESNI Data"); + +static void +aesni_identify(driver_t *drv, device_t parent) +{ + + /* NB: order 10 is so we get attached after h/w devices */ + if (device_find_child(parent, "aesni", -1) == NULL && + BUS_ADD_CHILD(parent, 10, "aesni", -1) == 0) + panic("aesni: could not attach"); +} + +static int +aesni_probe(device_t dev) +{ + char capp[32]; + + if ((cpu_feature2 & CPUID2_AESNI) == 0) { + device_printf(dev, "No AESNI support.\n"); + return (EINVAL); + } + strlcpy(capp, "AES-CBC", sizeof(capp)); + device_set_desc_copy(dev, capp); + return (0); +} + +static int +aesni_attach(device_t dev) +{ + struct aesni_softc *sc; + + sc = device_get_softc(dev); + TAILQ_INIT(&sc->sessions); + sc->sid = 1; + sc->cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE); + if (sc->cid < 0) { + device_printf(dev, "Could not get crypto driver id.\n"); + return (ENOMEM); + } + + rw_init(&sc->lock, "aesni_lock"); + crypto_register(sc->cid, CRYPTO_AES_CBC, 0, 0); + return (0); +} + +static int +aesni_detach(device_t dev) +{ + struct aesni_softc *sc; + struct aesni_session *ses; + + sc = device_get_softc(dev); + rw_wlock(&sc->lock); + TAILQ_FOREACH(ses, &sc->sessions, next) { + if (ses->used) { + rw_wunlock(&sc->lock); + device_printf(dev, + "Cannot detach, sessions still active.\n"); + return (EBUSY); + } + } + while ((ses = TAILQ_FIRST(&sc->sessions)) != NULL) { + TAILQ_REMOVE(&sc->sessions, ses, next); + free(ses, M_AESNI); + } + rw_wunlock(&sc->lock); + rw_destroy(&sc->lock); + crypto_unregister_all(sc->cid); + return (0); +} + +static int +aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) +{ + struct aesni_softc *sc; + struct aesni_session *ses; + struct cryptoini *encini; + int error; + + if (sidp == NULL || cri == NULL) + return (EINVAL); + + sc = device_get_softc(dev); + ses = NULL; + encini = NULL; + for (; cri != NULL; cri = cri->cri_next) { + switch (cri->cri_alg) { + case CRYPTO_AES_CBC: + if (encini != NULL) + return (EINVAL); + encini = cri; + break; + default: + return (EINVAL); + } + } + if (encini == NULL) + return (EINVAL); + + rw_wlock(&sc->lock); + /* + * Free sessions goes first, so if first session is used, we need to + * allocate one. + */ + ses = TAILQ_FIRST(&sc->sessions); + if (ses == NULL || ses->used) { + ses = malloc(sizeof(*ses), M_AESNI, M_NOWAIT | M_ZERO); + if (ses == NULL) { + rw_wunlock(&sc->lock); + return (ENOMEM); + } + KASSERT(((uintptr_t)ses) % 0x10 == 0, + ("malloc returned unaligned pointer")); + ses->id = sc->sid++; + } else { + TAILQ_REMOVE(&sc->sessions, ses, next); + } + ses->used = 1; + TAILQ_INSERT_TAIL(&sc->sessions, ses, next); + rw_wunlock(&sc->lock); + + error = aesni_cipher_setup(ses, encini); + if (error != 0) { + rw_wlock(&sc->lock); + aesni_freesession_locked(sc, ses); + rw_wunlock(&sc->lock); + return (error); + } + + *sidp = ses->id; + return (0); +} + +static void +aesni_freesession_locked(struct aesni_softc *sc, struct aesni_session *ses) +{ + uint32_t sid; + + sid = ses->id; + TAILQ_REMOVE(&sc->sessions, ses, next); + bzero(ses, sizeof(*ses)); + ses->id = sid; + TAILQ_INSERT_HEAD(&sc->sessions, ses, next); +} + +static int +aesni_freesession(device_t dev, uint64_t tid) +{ + struct aesni_softc *sc; + struct aesni_session *ses; + uint32_t sid; + + sc = device_get_softc(dev); + sid = ((uint32_t)tid) & 0xffffffff; + rw_wlock(&sc->lock); + TAILQ_FOREACH_REVERSE(ses, &sc->sessions, aesni_sessions_head, next) { + if (ses->id == sid) + break; + } + if (ses == NULL) { + rw_wunlock(&sc->lock); + return (EINVAL); + } + aesni_freesession_locked(sc, ses); + rw_wunlock(&sc->lock); + return (0); +} + +static int +aesni_process(device_t dev, struct cryptop *crp, int hint __unused) +{ + struct aesni_softc *sc = device_get_softc(dev); + struct aesni_session *ses = NULL; + struct cryptodesc *crd, *enccrd; + int error; + + error = 0; + enccrd = NULL; + + /* Sanity check. */ + if (crp == NULL) + return (EINVAL); + + if (crp->crp_callback == NULL || crp->crp_desc == NULL) { + error = EINVAL; + goto out; + } + + for (crd = crp->crp_desc; crd != NULL; crd = crd->crd_next) { + switch (crd->crd_alg) { + case CRYPTO_AES_CBC: + if (enccrd != NULL) { + error = EINVAL; + goto out; + } + enccrd = crd; + break; + default: + return (EINVAL); + } + } + if (enccrd == NULL || (enccrd->crd_len % AES_BLOCK_LEN) != 0) { + error = EINVAL; + goto out; + } + + rw_rlock(&sc->lock); + TAILQ_FOREACH_REVERSE(ses, &sc->sessions, aesni_sessions_head, next) { + if (ses->id == (crp->crp_sid & 0xffffffff)) + break; + } + rw_runlock(&sc->lock); + if (ses == NULL) { + error = EINVAL; + goto out; + } + + error = aesni_cipher_process(ses, enccrd, crp); + if (error != 0) + goto out; + +out: + crp->crp_etype = error; + crypto_done(crp); + return (error); +} + +uint8_t * +aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, + int *allocated) +{ + struct uio *uio; + struct iovec *iov; + uint8_t *addr; + + if (crp->crp_flags & CRYPTO_F_IMBUF) + goto alloc; + else if (crp->crp_flags & CRYPTO_F_IOV) { + uio = (struct uio *)crp->crp_buf; + if (uio->uio_iovcnt != 1) + goto alloc; + iov = uio->uio_iov; + addr = (u_char *)iov->iov_base + enccrd->crd_skip; + } else + addr = (u_char *)crp->crp_buf; + *allocated = 0; + return (addr); + +alloc: + addr = malloc(enccrd->crd_len, M_AESNI, M_NOWAIT); + if (addr != NULL) { + *allocated = 1; + crypto_copydata(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, + enccrd->crd_len, addr); + } else + *allocated = 0; + return (addr); +} + +static device_method_t aesni_methods[] = { + DEVMETHOD(device_identify, aesni_identify), + DEVMETHOD(device_probe, aesni_probe), + DEVMETHOD(device_attach, aesni_attach), + DEVMETHOD(device_detach, aesni_detach), + + DEVMETHOD(cryptodev_newsession, aesni_newsession), + DEVMETHOD(cryptodev_freesession, aesni_freesession), + DEVMETHOD(cryptodev_process, aesni_process), + + {0, 0}, +}; + +static driver_t aesni_driver = { + "aesni", + aesni_methods, + sizeof(struct aesni_softc), +}; +static devclass_t aesni_devclass; + +DRIVER_MODULE(aesni, nexus, aesni_driver, aesni_devclass, 0, 0); +MODULE_VERSION(aesni, 1); +MODULE_DEPEND(aesni, crypto, 1, 1, 1); diff --git a/sys/crypto/aesni/aesni.h b/sys/crypto/aesni/aesni.h new file mode 100644 index 0000000..0790f1e --- /dev/null +++ b/sys/crypto/aesni/aesni.h @@ -0,0 +1,105 @@ +/*- + * Copyright (c) 2010 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _AESNI_H_ +#define _AESNI_H_ + +#include +#include +#include + +#include + +#if defined(__amd64__) || (defined(__i386__) && !defined(PC98)) +#include +#include +#include +#include +#endif +#if defined(__i386__) +#include +#elif defined(__amd64__) +#include +#endif + +#define AES128_ROUNDS 10 +#define AES192_ROUNDS 12 +#define AES256_ROUNDS 14 +#define AES_SCHED_LEN ((AES256_ROUNDS + 1) * AES_BLOCK_LEN) + +struct aesni_session { + uint8_t enc_schedule[AES_SCHED_LEN] __aligned(16); + uint8_t dec_schedule[AES_SCHED_LEN] __aligned(16); + uint8_t iv[AES_BLOCK_LEN]; + int rounds; + /* uint8_t *ses_ictx; */ + /* uint8_t *ses_octx; */ + /* int ses_mlen; */ + int used; + uint32_t id; + TAILQ_ENTRY(aesni_session) next; + struct fpu_kern_ctx fpu_ctx; +}; + +/* + * Internal functions, implemented in assembler. + */ +void aesni_enc(int rounds, const uint8_t *key_schedule, + const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN], + const uint8_t iv[AES_BLOCK_LEN]); +void aesni_dec(int rounds, const uint8_t *key_schedule, + const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN], + const uint8_t iv[AES_BLOCK_LEN]); + +void aesni_key_expansion_decrypt(const uint8_t *encrypt_schedule, + uint8_t *decrypt_schedule, int number_of_rounds); +void aesni_128_key_expansion(const uint8_t *userkey, uint8_t *key_schedule); +void aesni_192_key_expansion(const uint8_t *userkey, uint8_t *key_schedule); +void aesni_256_key_expansion(const uint8_t *userkey, uint8_t *key_schedule); + +/* + * Slightly more public interfaces. + */ +void aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len, + const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]); +void aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len, + const uint8_t *from, const uint8_t iv[AES_BLOCK_LEN]); +void aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len, + const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]); +void aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, + const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]); + +int aesni_cipher_setup(struct aesni_session *ses, + struct cryptoini *encini); +int aesni_cipher_process(struct aesni_session *ses, + struct cryptodesc *enccrd, struct cryptop *crp); + +uint8_t *aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, + int *allocated); + +#endif diff --git a/sys/crypto/aesni/aesni_wrap.c b/sys/crypto/aesni/aesni_wrap.c new file mode 100644 index 0000000..f9c980b --- /dev/null +++ b/sys/crypto/aesni/aesni_wrap.c @@ -0,0 +1,198 @@ +/*- + * Copyright (c) 2010 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +MALLOC_DECLARE(M_AESNI); + +#ifdef DEBUG +static void +ps_len(const char *string, const uint8_t *data, int length) +{ + int i; + + printf("%-12s[0x", string); + for(i = 0; i < length; i++) { + if (i % AES_BLOCK_LEN == 0 && i > 0) + printf("+"); + printf("%02x", data[i]); + } + printf("]\n"); +} +#endif + +void +aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len, + const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]) +{ + const uint8_t *ivp; + size_t i; + +#ifdef DEBUG + ps_len("AES CBC encrypt iv:", iv, AES_BLOCK_LEN); + ps_len("from:", from, len); +#endif + + len /= AES_BLOCK_LEN; + ivp = iv; + for (i = 0; i < len; i++) { + aesni_enc(rounds - 1, key_schedule, from, to, ivp); + ivp = to; + from += AES_BLOCK_LEN; + to += AES_BLOCK_LEN; + } +#ifdef DEBUG + ps_len("to:", to - len * AES_BLOCK_LEN, len * AES_BLOCK_LEN); +#endif +} + +void +aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len, + const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]) +{ + size_t i; + + len /= AES_BLOCK_LEN; + for (i = 0; i < len; i++) { + aesni_enc(rounds - 1, key_schedule, from, to, NULL); + from += AES_BLOCK_LEN; + to += AES_BLOCK_LEN; + } +} + +void +aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, + const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]) +{ + size_t i; + + len /= AES_BLOCK_LEN; + for (i = 0; i < len; i++) { + aesni_dec(rounds - 1, key_schedule, from, to, NULL); + from += AES_BLOCK_LEN; + to += AES_BLOCK_LEN; + } +} + +int +aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini) +{ + struct thread *td; + int error; + + td = curthread; + error = fpu_kern_enter(td, &ses->fpu_ctx, FPU_KERN_NORMAL); + if (error != 0) + goto out1; + + switch (encini->cri_klen) { + case 128: + ses->rounds = AES128_ROUNDS; + aesni_128_key_expansion(encini->cri_key, ses->enc_schedule); + break; + case 192: + ses->rounds = AES192_ROUNDS; + aesni_192_key_expansion(encini->cri_key, ses->enc_schedule); + break; + case 256: + ses->rounds = AES256_ROUNDS; + aesni_256_key_expansion(encini->cri_key, ses->enc_schedule); + break; + default: + error = EINVAL; + goto out; + } + aesni_key_expansion_decrypt(ses->enc_schedule, ses->dec_schedule, + ses->rounds); + arc4rand(ses->iv, sizeof(ses->iv), 0); + out: + fpu_kern_leave(td, &ses->fpu_ctx); + out1: + return (0); +} + +int +aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, + struct cryptop *crp) +{ + struct thread *td; + uint8_t *buf; + int error, allocated; + + buf = aesni_cipher_alloc(enccrd, crp, &allocated); + if (buf == NULL) { + error = ENOMEM; + goto out; + } + + td = curthread; + error = fpu_kern_enter(td, &ses->fpu_ctx, FPU_KERN_NORMAL); + if (error != 0) + goto out1; + + if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) { + if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) + bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); + + if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) + crypto_copyback(crp->crp_flags, crp->crp_buf, + enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); + + aesni_encrypt_cbc(ses->rounds, ses->enc_schedule, + enccrd->crd_len, buf, buf, ses->iv); + } else { + if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) + bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); + else + crypto_copydata(crp->crp_flags, crp->crp_buf, + enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); + aesni_decrypt_cbc(ses->rounds, ses->dec_schedule, + enccrd->crd_len, buf, ses->iv); + } + fpu_kern_leave(td, &ses->fpu_ctx); + if (allocated) + crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, + enccrd->crd_len, buf); + if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) + crypto_copydata(crp->crp_flags, crp->crp_buf, + enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN, + AES_BLOCK_LEN, ses->iv); + out1: + if (allocated) { + bzero(buf, enccrd->crd_len); + free(buf, M_AESNI); + } + out: + return (error); +} diff --git a/sys/crypto/via/padlock.c b/sys/crypto/via/padlock.c index ccb0595..77e059b 100644 --- a/sys/crypto/via/padlock.c +++ b/sys/crypto/via/padlock.c @@ -169,6 +169,7 @@ padlock_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) struct padlock_softc *sc = device_get_softc(dev); struct padlock_session *ses = NULL; struct cryptoini *encini, *macini; + struct thread *td; int error; if (sidp == NULL || cri == NULL) @@ -236,7 +237,12 @@ padlock_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) } if (macini != NULL) { - error = padlock_hash_setup(ses, macini); + td = curthread; + error = fpu_kern_enter(td, &ses->ses_fpu_ctx, FPU_KERN_NORMAL); + if (error == 0) { + error = padlock_hash_setup(ses, macini); + fpu_kern_leave(td, &ses->ses_fpu_ctx); + } if (error != 0) { padlock_freesession_one(sc, ses, 0); return (error); diff --git a/sys/crypto/via/padlock.h b/sys/crypto/via/padlock.h index 7d928ba..c8ee9bd 100644 --- a/sys/crypto/via/padlock.h +++ b/sys/crypto/via/padlock.h @@ -32,6 +32,12 @@ #include #include +#if defined(__i386__) +#include +#elif defined(__amd64__) +#include +#endif + union padlock_cw { uint64_t raw; struct { @@ -70,6 +76,7 @@ struct padlock_session { int ses_used; uint32_t ses_id; TAILQ_ENTRY(padlock_session) ses_next; + struct fpu_kern_ctx ses_fpu_ctx; }; #define PADLOCK_ALIGN(p) (void *)(roundup2((uintptr_t)(p), 16)) diff --git a/sys/crypto/via/padlock_cipher.c b/sys/crypto/via/padlock_cipher.c index 8195584..0ae26c8 100644 --- a/sys/crypto/via/padlock_cipher.c +++ b/sys/crypto/via/padlock_cipher.c @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -201,9 +202,10 @@ padlock_cipher_process(struct padlock_session *ses, struct cryptodesc *enccrd, struct cryptop *crp) { union padlock_cw *cw; + struct thread *td; u_char *buf, *abuf; uint32_t *key; - int allocated; + int allocated, error; buf = padlock_cipher_alloc(enccrd, crp, &allocated); if (buf == NULL) @@ -247,9 +249,16 @@ padlock_cipher_process(struct padlock_session *ses, struct cryptodesc *enccrd, enccrd->crd_len, abuf); } + td = curthread; + error = fpu_kern_enter(td, &ses->ses_fpu_ctx, FPU_KERN_NORMAL); + if (error != 0) + goto out; + padlock_cbc(abuf, abuf, enccrd->crd_len / AES_BLOCK_LEN, key, cw, ses->ses_iv); + fpu_kern_leave(td, &ses->ses_fpu_ctx); + if (allocated) { crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, enccrd->crd_len, abuf); @@ -262,9 +271,10 @@ padlock_cipher_process(struct padlock_session *ses, struct cryptodesc *enccrd, AES_BLOCK_LEN, ses->ses_iv); } + out: if (allocated) { bzero(buf, enccrd->crd_len + 16); free(buf, M_PADLOCK); } - return (0); + return (error); } diff --git a/sys/crypto/via/padlock_hash.c b/sys/crypto/via/padlock_hash.c index 71f956e..58c58b2 100644 --- a/sys/crypto/via/padlock_hash.c +++ b/sys/crypto/via/padlock_hash.c @@ -34,12 +34,14 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #if defined(__amd64__) || (defined(__i386__) && !defined(PC98)) #include #include #include #include #endif +#include #include #include /* for hmac_ipad_buffer and hmac_opad_buffer */ @@ -363,12 +365,18 @@ int padlock_hash_process(struct padlock_session *ses, struct cryptodesc *maccrd, struct cryptop *crp) { + struct thread *td; int error; + td = curthread; + error = fpu_kern_enter(td, &ses->ses_fpu_ctx, FPU_KERN_NORMAL); + if (error != 0) + return (error); if ((maccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) padlock_hash_key_setup(ses, maccrd->crd_key, maccrd->crd_klen); error = padlock_authcompute(ses, maccrd, crp->crp_buf, crp->crp_flags); + fpu_kern_leave(td, &ses->ses_fpu_ctx); return (error); } diff --git a/sys/dev/random/nehemiah.c b/sys/dev/random/nehemiah.c index e34cdfa..f91e228 100644 --- a/sys/dev/random/nehemiah.c +++ b/sys/dev/random/nehemiah.c @@ -35,6 +35,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include + #include #define RANDOM_BLOCK_SIZE 256 @@ -82,6 +84,8 @@ static uint8_t out[RANDOM_BLOCK_SIZE+7] __aligned(16); static union VIA_ACE_CW acw __aligned(16); +static struct fpu_kern_ctx fpu_ctx_save; + static struct mtx random_nehemiah_mtx; /* ARGSUSED */ @@ -142,11 +146,16 @@ random_nehemiah_deinit(void) static int random_nehemiah_read(void *buf, int c) { - int i; + int i, error; size_t count, ret; uint8_t *p; mtx_lock(&random_nehemiah_mtx); + error = fpu_kern_enter(curthread, &fpu_ctx_save, FPU_KERN_NORMAL); + if (error != 0) { + mtx_unlock(&random_nehemiah_mtx); + return (0); + } /* Get a random AES key */ count = 0; @@ -187,6 +196,7 @@ random_nehemiah_read(void *buf, int c) c = MIN(RANDOM_BLOCK_SIZE, c); memcpy(buf, out, (size_t)c); + fpu_kern_leave(curthread, &fpu_ctx_save); mtx_unlock(&random_nehemiah_mtx); return (c); } diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 0263321..989b325 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -3185,12 +3185,12 @@ fill_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { - fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm, + fill_fpregs_xmm(&td->td_pcb->pcb_user_save.sv_xmm, (struct save87 *)fpregs); return (0); } #endif /* CPU_ENABLE_SSE */ - bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); + bcopy(&td->td_pcb->pcb_user_save.sv_87, fpregs, sizeof *fpregs); return (0); } @@ -3200,11 +3200,11 @@ set_fpregs(struct thread *td, struct fpreg *fpregs) #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { set_fpregs_xmm((struct save87 *)fpregs, - &td->td_pcb->pcb_save.sv_xmm); + &td->td_pcb->pcb_user_save.sv_xmm); return (0); } #endif /* CPU_ENABLE_SSE */ - bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs); + bcopy(fpregs, &td->td_pcb->pcb_user_save.sv_87, sizeof *fpregs); return (0); } @@ -3331,7 +3331,7 @@ get_fpcontext(struct thread *td, mcontext_t *mcp) addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); } - mcp->mc_ownedfp = npxgetregs(td, addr); + mcp->mc_ownedfp = npxgetuserregs(td, addr); if (addr != (union savefpu *)&mcp->mc_fpstate) { bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); @@ -3376,7 +3376,7 @@ set_fpcontext(struct thread *td, const mcontext_t *mcp) * XXX we violate the dubious requirement that npxsetregs() * be called with interrupts disabled. */ - npxsetregs(td, addr); + npxsetuserregs(td, addr); #endif /* * Don't bother putting things back where they were in the @@ -3393,6 +3393,7 @@ fpstate_drop(struct thread *td) { register_t s; + KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); s = intr_disable(); #ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) @@ -3408,7 +3409,8 @@ fpstate_drop(struct thread *td) * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ - curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; + curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE | + PCB_NPXUSERINITDONE); intr_restore(s); } diff --git a/sys/i386/i386/ptrace_machdep.c b/sys/i386/i386/ptrace_machdep.c index 409db16..4608c9b 100644 --- a/sys/i386/i386/ptrace_machdep.c +++ b/sys/i386/i386/ptrace_machdep.c @@ -51,7 +51,7 @@ cpu_ptrace(struct thread *td, int req, void *addr, int data) if (!cpu_fxsr) return (EINVAL); - fpstate = &td->td_pcb->pcb_save.sv_xmm; + fpstate = &td->td_pcb->pcb_user_save.sv_xmm; switch (req) { case PT_GETXMMREGS: error = copyout(fpstate, addr, sizeof(*fpstate)); diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index 0c07871..1dee5f2 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -156,8 +156,7 @@ ENTRY(cpu_switch) /* have we used fp, and need a save? */ cmpl %ecx,PCPU(FPCURTHREAD) jne 1f - addl $PCB_SAVEFPU,%edx /* h/w bugs make saving complicated */ - pushl %edx + pushl PCB_SAVEFPU(%edx) /* h/w bugs make saving complicated */ call npxsave /* do it in a big C function */ popl %eax 1: @@ -408,7 +407,7 @@ ENTRY(savectx) pushl %ecx movl TD_PCB(%eax),%eax - leal PCB_SAVEFPU(%eax),%eax + movl PCB_SAVEFPU(%eax),%eax pushl %eax pushl %eax call npxsave @@ -417,7 +416,7 @@ ENTRY(savectx) popl %ecx pushl $PCB_SAVEFPU_SIZE - leal PCB_SAVEFPU(%ecx),%ecx + movl PCB_SAVEFPU(%ecx),%ecx pushl %ecx pushl %eax call bcopy diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 644bb47..9c93319 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -501,6 +501,8 @@ trap(struct trapframe *frame) case T_DNA: #ifdef DEV_NPX + KASSERT(PCB_USER_FPU(td->td_pcb), + ("kernel FPU ctx has leaked")); /* transparent fault (due to context switch "late") */ if (npxdna()) goto userout; @@ -533,20 +535,23 @@ trap(struct trapframe *frame) case T_DNA: #ifdef DEV_NPX - /* - * The kernel is apparently using npx for copying. - * XXX this should be fatal unless the kernel has - * registered such use. - */ - printf("npxdna in kernel mode!\n"); -#ifdef KDB - kdb_backtrace(); -#endif + KASSERT(!PCB_USER_FPU(td->td_pcb), + ("Unregistered use of FPU in kernel")); if (npxdna()) goto out; #endif break; + case T_ARITHTRAP: /* arithmetic trap */ + case T_XMMFLT: /* SIMD floating-point exception */ + case T_FPOPFLT: /* FPU operand fetch fault */ + /* + * XXXKIB for now disable any FPU traps in kernel + * handler registration seems to be overkill + */ + trap_fatal(frame, 0); + goto out; + /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle @@ -752,6 +757,8 @@ trap(struct trapframe *frame) user: userret(td, frame); mtx_assert(&Giant, MA_NOTOWNED); + KASSERT(PCB_USER_FPU(td->td_pcb), + ("Return from trap with kernel FPU ctx leaked")); userout: out: return; @@ -1064,5 +1071,12 @@ syscall(struct trapframe *frame) trapsignal(td, &ksi); } + KASSERT(PCB_USER_FPU(td->td_pcb), + ("System call %s returning with kernel FPU ctx leaked", + syscallname(td->td_proc, sa.code))); + KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save, + ("System call %s returning with mangled pcb_save", + syscallname(td->td_proc, sa.code))); + syscallret(td, error, &sa); } diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index d2c13b8..01e7245 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -176,13 +176,13 @@ cpu_fork(td1, p2, td2, flags) return; } - /* Ensure that p1's pcb is up to date. */ + /* Ensure that td1's pcb is up to date. */ if (td1 == curthread) td1->td_pcb->pcb_gs = rgs(); #ifdef DEV_NPX savecrit = intr_disable(); if (PCPU_GET(fpcurthread) == td1) - npxsave(&td1->td_pcb->pcb_save); + npxsave(td1->td_pcb->pcb_save); intr_restore(savecrit); #endif @@ -191,9 +191,12 @@ cpu_fork(td1, p2, td2, flags) td2->td_kstack_pages * PAGE_SIZE) - 1; td2->td_pcb = pcb2; - /* Copy p1's pcb */ + /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); + /* Properly initialize pcb_save */ + pcb2->pcb_save = &pcb2->pcb_user_save; + /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); @@ -372,6 +375,7 @@ cpu_thread_alloc(struct thread *td) td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1; td->td_pcb->pcb_ext = NULL; + td->td_pcb->pcb_save = &td->td_pcb->pcb_user_save; } void @@ -437,7 +441,8 @@ cpu_set_upcall(struct thread *td, struct thread *td0) * values here. */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); - pcb2->pcb_flags &= ~(PCB_NPXTRAP|PCB_NPXINITDONE); + pcb2->pcb_flags &= ~(PCB_NPXTRAP|PCB_NPXINITDONE|PCB_NPXUSERINITDONE); + pcb2->pcb_save = &pcb2->pcb_user_save; /* * Create a new fresh stack for the new thread. diff --git a/sys/i386/include/npx.h b/sys/i386/include/npx.h index e9811b3..59615fa 100644 --- a/sys/i386/include/npx.h +++ b/sys/i386/include/npx.h @@ -143,6 +143,15 @@ union savefpu { #define IRQ_NPX 13 +struct fpu_kern_ctx { + union savefpu hwstate; + union savefpu *prev; + uint32_t flags; +}; +#define FPU_KERN_CTX_NPXINITDONE 0x01 + +#define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNNPX) == 0) + /* full reset on some systems, NOP on others */ #define npx_full_reset() outb(IO_NPX + 1, 0) @@ -151,10 +160,22 @@ void npxdrop(void); void npxexit(struct thread *td); int npxformat(void); int npxgetregs(struct thread *td, union savefpu *addr); +int npxgetuserregs(struct thread *td, union savefpu *addr); void npxinit(void); void npxsave(union savefpu *addr); void npxsetregs(struct thread *td, union savefpu *addr); +void npxsetuserregs(struct thread *td, union savefpu *addr); int npxtrap(void); +int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, + u_int flags); +int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); +int fpu_kern_thread(u_int flags); +int is_fpu_kern_thread(u_int flags); + +/* + * Flags for fpu_kern_enter() and fpu_kern_thread(). + */ +#define FPU_KERN_NORMAL 0x0000 #endif diff --git a/sys/i386/include/pcb.h b/sys/i386/include/pcb.h index 17c8486..465f497 100644 --- a/sys/i386/include/pcb.h +++ b/sys/i386/include/pcb.h @@ -60,7 +60,7 @@ struct pcb { int pcb_dr6; int pcb_dr7; - union savefpu pcb_save; + union savefpu pcb_user_save; uint16_t pcb_initial_npxcw; u_int pcb_flags; #define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */ @@ -68,6 +68,8 @@ struct pcb { #define PCB_NPXTRAP 0x04 /* npx trap pending */ #define PCB_NPXINITDONE 0x08 /* fpu state is initialized */ #define PCB_VM86CALL 0x10 /* in vm86 call */ +#define PCB_NPXUSERINITDONE 0x20 /* user fpu state is initialized */ +#define PCB_KERNNPX 0x40 /* kernel uses npx */ caddr_t pcb_onfault; /* copyin/out fault recovery */ int pcb_gs; @@ -76,6 +78,7 @@ struct pcb { struct pcb_ext *pcb_ext; /* optional pcb extension */ int pcb_psl; /* process status long */ u_long pcb_vm86[2]; /* vm86bios scratch space */ + union savefpu *pcb_save; }; #ifdef _KERNEL diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index 6da4b4c..209cc4a 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -135,12 +135,12 @@ void stop_emulating(void); #ifdef CPU_ENABLE_SSE #define GET_FPU_CW(thread) \ (cpu_fxsr ? \ - (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_cw : \ - (thread)->td_pcb->pcb_save.sv_87.sv_env.en_cw) + (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \ + (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ (cpu_fxsr ? \ - (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_sw : \ - (thread)->td_pcb->pcb_save.sv_87.sv_env.en_sw) + (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \ + (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw) #define SET_FPU_CW(savefpu, value) do { \ if (cpu_fxsr) \ (savefpu)->sv_xmm.sv_env.en_cw = (value); \ @@ -149,9 +149,9 @@ void stop_emulating(void); } while (0) #else /* CPU_ENABLE_SSE */ #define GET_FPU_CW(thread) \ - (thread->td_pcb->pcb_save.sv_87.sv_env.en_cw) + (thread->td_pcb->pcb_save->sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ - (thread->td_pcb->pcb_save.sv_87.sv_env.en_sw) + (thread->td_pcb->pcb_save->sv_87.sv_env.en_sw) #define SET_FPU_CW(savefpu, value) \ (savefpu)->sv_87.sv_env.en_cw = (value) #endif /* CPU_ENABLE_SSE */ @@ -502,7 +502,7 @@ npxexit(td) savecrit = intr_disable(); if (curthread == PCPU_GET(fpcurthread)) - npxsave(&PCPU_GET(curpcb)->pcb_save); + npxsave(PCPU_GET(curpcb)->pcb_save); intr_restore(savecrit); #ifdef NPX_DEBUG if (npx_exists) { @@ -809,6 +809,8 @@ npxdna(void) if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__) fldcw(&pcb->pcb_initial_npxcw); pcb->pcb_flags |= PCB_NPXINITDONE; + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_NPXUSERINITDONE; } else { /* * The following fpurstor() may cause an IRQ13 when the @@ -824,7 +826,7 @@ npxdna(void) * fnclex if it is the first FPU instruction after a context * switch. */ - fpurstor(&pcb->pcb_save); + fpurstor(pcb->pcb_save); } intr_restore(s); @@ -895,18 +897,18 @@ npxdrop() * It returns the FPU ownership status. */ int -npxgetregs(td, addr) - struct thread *td; - union savefpu *addr; +npxgetregs(struct thread *td, union savefpu *addr) { + struct pcb *pcb; register_t s; if (!npx_exists) return (_MC_FPOWNED_NONE); - if ((td->td_pcb->pcb_flags & PCB_NPXINITDONE) == 0) { + pcb = td->td_pcb; + if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { bcopy(&npx_initialstate, addr, sizeof(npx_initialstate)); - SET_FPU_CW(addr, td->td_pcb->pcb_initial_npxcw); + SET_FPU_CW(addr, pcb->pcb_initial_npxcw); return (_MC_FPOWNED_NONE); } s = intr_disable(); @@ -925,7 +927,43 @@ npxgetregs(td, addr) return (_MC_FPOWNED_FPU); } else { intr_restore(s); - bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr)); + bcopy(pcb->pcb_save, addr, sizeof(*addr)); + return (_MC_FPOWNED_PCB); + } +} + +int +npxgetuserregs(struct thread *td, union savefpu *addr) +{ + struct pcb *pcb; + register_t s; + + if (!npx_exists) + return (_MC_FPOWNED_NONE); + + pcb = td->td_pcb; + if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) == 0) { + bcopy(&npx_initialstate, addr, sizeof(npx_initialstate)); + SET_FPU_CW(addr, pcb->pcb_initial_npxcw); + return (_MC_FPOWNED_NONE); + } + s = intr_disable(); + if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { + fpusave(addr); +#ifdef CPU_ENABLE_SSE + if (!cpu_fxsr) +#endif + /* + * fnsave initializes the FPU and destroys whatever + * context it contains. Make sure the FPU owner + * starts with a clean state next time. + */ + npxdrop(); + intr_restore(s); + return (_MC_FPOWNED_FPU); + } else { + intr_restore(s); + bcopy(&pcb->pcb_user_save, addr, sizeof(*addr)); return (_MC_FPOWNED_PCB); } } @@ -934,15 +972,15 @@ npxgetregs(td, addr) * Set the state of the FPU. */ void -npxsetregs(td, addr) - struct thread *td; - union savefpu *addr; +npxsetregs(struct thread *td, union savefpu *addr) { + struct pcb *pcb; register_t s; if (!npx_exists) return; + pcb = td->td_pcb; s = intr_disable(); if (td == PCPU_GET(fpcurthread)) { #ifdef CPU_ENABLE_SSE @@ -953,9 +991,39 @@ npxsetregs(td, addr) intr_restore(s); } else { intr_restore(s); - bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr)); + bcopy(addr, pcb->pcb_save, sizeof(*addr)); + } + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_NPXUSERINITDONE; + pcb->pcb_flags |= PCB_NPXINITDONE; +} + +void +npxsetuserregs(struct thread *td, union savefpu *addr) +{ + struct pcb *pcb; + register_t s; + + if (!npx_exists) + return; + + pcb = td->td_pcb; + s = intr_disable(); + if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { +#ifdef CPU_ENABLE_SSE + if (!cpu_fxsr) +#endif + fnclex(); /* As in npxdrop(). */ + fpurstor(addr); + intr_restore(s); + pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE; + } else { + intr_restore(s); + bcopy(addr, &pcb->pcb_user_save, sizeof(*addr)); + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_NPXINITDONE; + pcb->pcb_flags |= PCB_NPXUSERINITDONE; } - curthread->td_pcb->pcb_flags |= PCB_NPXINITDONE; } static void @@ -1124,3 +1192,73 @@ DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0); DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); #endif #endif /* DEV_ISA */ + +int +fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) +{ + struct pcb *pcb; + + pcb = td->td_pcb; + KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save, + ("mangled pcb_save")); + ctx->flags = 0; + if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0) + ctx->flags |= FPU_KERN_CTX_NPXINITDONE; + npxexit(td); + ctx->prev = pcb->pcb_save; + pcb->pcb_save = &ctx->hwstate; + pcb->pcb_flags |= PCB_KERNNPX; + return (0); +} + +int +fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) +{ + struct pcb *pcb; + register_t savecrit; + + pcb = td->td_pcb; + savecrit = intr_disable(); + if (curthread == PCPU_GET(fpcurthread)) + npxdrop(); + intr_restore(savecrit); + pcb->pcb_save = ctx->prev; + if (pcb->pcb_save == &pcb->pcb_user_save) { + if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0) + pcb->pcb_flags |= PCB_NPXINITDONE; + else + pcb->pcb_flags &= ~PCB_NPXINITDONE; + pcb->pcb_flags &= ~PCB_KERNNPX; + } else { + if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0) + pcb->pcb_flags |= PCB_NPXINITDONE; + else + pcb->pcb_flags &= ~PCB_NPXINITDONE; + KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); + } + return (0); +} + +int +fpu_kern_thread(u_int flags) +{ + struct pcb *pcb; + + pcb = PCPU_GET(curpcb); + KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, + ("Only kthread may use fpu_kern_thread")); + KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save")); + KASSERT(PCB_USER_FPU(pcb), ("recursive call")); + + pcb->pcb_flags |= PCB_KERNNPX; + return (0); +} + +int +is_fpu_kern_thread(u_int flags) +{ + + if ((curthread->td_pflags & TDP_KTHREAD) == 0) + return (0); + return ((PCPU_GET(curpcb)->pcb_flags & PCB_KERNNPX) != 0); +} diff --git a/sys/i386/linux/linux_ptrace.c b/sys/i386/linux/linux_ptrace.c index daee9e5..e9559f8 100644 --- a/sys/i386/linux/linux_ptrace.c +++ b/sys/i386/linux/linux_ptrace.c @@ -224,7 +224,7 @@ linux_proc_read_fpxregs(struct thread *td, struct linux_pt_fpxreg *fpxregs) PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); if (cpu_fxsr == 0 || (td->td_proc->p_flag & P_INMEM) == 0) return (EIO); - bcopy(&td->td_pcb->pcb_save.sv_xmm, fpxregs, sizeof(*fpxregs)); + bcopy(&td->td_pcb->pcb_user_save.sv_xmm, fpxregs, sizeof(*fpxregs)); return (0); } @@ -235,7 +235,7 @@ linux_proc_write_fpxregs(struct thread *td, struct linux_pt_fpxreg *fpxregs) PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); if (cpu_fxsr == 0 || (td->td_proc->p_flag & P_INMEM) == 0) return (EIO); - bcopy(fpxregs, &td->td_pcb->pcb_save.sv_xmm, sizeof(*fpxregs)); + bcopy(fpxregs, &td->td_pcb->pcb_user_save.sv_xmm, sizeof(*fpxregs)); return (0); } #endif diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 8899140..93110bd 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -10,6 +10,7 @@ SUBDIR= ${_3dfx} \ accf_http \ ${_acpi} \ ae \ + ${_aesni} \ age \ ${_agp} \ aha \ @@ -438,6 +439,9 @@ _zfs= zfs .if ${MACHINE} == "i386" _aac= aac _acpi= acpi +.if ${MK_CRYPT} != "no" || defined(ALL_MODULES) +_aesni= aesni +.endif _ahb= ahb _amdsbwd= amdsbwd _amdtemp= amdtemp @@ -493,6 +497,9 @@ _snc= snc .if ${MACHINE_ARCH} == "amd64" _aac= aac _acpi= acpi +.if ${MK_CRYPT} != "no" || defined(ALL_MODULES) +_aesni= aesni +.endif _agp= agp _an= an _amdsbwd= amdsbwd diff --git a/sys/modules/aesni/Makefile b/sys/modules/aesni/Makefile new file mode 100644 index 0000000..3f8c9a8 --- /dev/null +++ b/sys/modules/aesni/Makefile @@ -0,0 +1,10 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../crypto/aesni + +KMOD= aesni +SRCS= aesni.c aesni_wrap.c +SRCS+= aesencdec_$(MACHINE_ARCH).S aeskeys_$(MACHINE_ARCH).S +SRCS+= device_if.h bus_if.h opt_bus.h cryptodev_if.h + +.include diff --git a/sys/opencrypto/crypto.c b/sys/opencrypto/crypto.c index 5810780..53c11e6 100644 --- a/sys/opencrypto/crypto.c +++ b/sys/opencrypto/crypto.c @@ -82,6 +82,10 @@ __FBSDID("$FreeBSD$"); #include #include "cryptodev_if.h" +#if defined(__i386__) || defined(__amd64__) +#include +#endif + SDT_PROVIDER_DEFINE(opencrypto); /* @@ -1241,6 +1245,10 @@ crypto_proc(void) u_int32_t hid; int result, hint; +#if defined(__i386__) || defined(__amd64__) + fpu_kern_thread(FPU_KERN_NORMAL); +#endif + CRYPTO_Q_LOCK(); for (;;) { /* diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index 3b07790..c851c12 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -2513,12 +2513,12 @@ fill_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { - fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm, + fill_fpregs_xmm(&td->td_pcb->pcb_save->sv_xmm, (struct save87 *)fpregs); return (0); } #endif /* CPU_ENABLE_SSE */ - bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); + bcopy(&td->td_pcb->pcb_save->sv_87, fpregs, sizeof *fpregs); return (0); } @@ -2528,11 +2528,11 @@ set_fpregs(struct thread *td, struct fpreg *fpregs) #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { set_fpregs_xmm((struct save87 *)fpregs, - &td->td_pcb->pcb_save.sv_xmm); + &td->td_pcb->pcb_save->sv_xmm); return (0); } #endif /* CPU_ENABLE_SSE */ - bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs); + bcopy(fpregs, &td->td_pcb->pcb_save->sv_87, sizeof *fpregs); return (0); }