diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index cd273ce..b541c48 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -129,6 +129,10 @@ amd64/amd64/uma_machdep.c standard amd64/amd64/vm_machdep.c standard amd64/pci/pci_bus.c optional pci amd64/pci/pci_cfgreg.c optional pci +crypto/aesni/aesencdec_amd64.S optional aesni +crypto/aesni/aeskeys_amd64.S optional aesni +crypto/aesni/aesni.c optional aesni +crypto/aesni/aesni_wrap.c optional aesni crypto/blowfish/bf_enc.c optional crypto | ipsec crypto/des/des_enc.c optional crypto | ipsec | netsmb crypto/via/padlock.c optional padlock diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 2382fb1..223ca6c 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -112,6 +112,10 @@ bf_enc.o optional crypto | ipsec \ dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \ compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \ no-implicit-rule +crypto/aesni/aesencdec_i386.S optional aesni +crypto/aesni/aeskeys_i386.S optional aesni +crypto/aesni/aesni.c optional aesni +crypto/aesni/aesni_wrap.c optional aesni crypto/des/arch/i386/des_enc.S optional crypto | ipsec | netsmb crypto/via/padlock.c optional padlock crypto/via/padlock_cipher.c optional padlock diff --git a/sys/crypto/aesni/aesencdec_amd64.S b/sys/crypto/aesni/aesencdec_amd64.S new file mode 100644 index 0000000..8060d00 --- /dev/null +++ b/sys/crypto/aesni/aesencdec_amd64.S @@ -0,0 +1,142 @@ +/*- + * Copyright (c) 2010 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + .text + + .align 0x10,0x90 + .globl aesni_enc + .type aesni_enc,@function +aesni_enc: + .cfi_startproc + movdqu (%rdx),%xmm0 + cmpq $0,%r8 + je 1f + movdqu (%r8),%xmm1 /* unaligned load into reg */ + pxor %xmm1,%xmm0 /* pxor otherwise can fault on iv */ +1: + pxor (%rsi),%xmm0 +2: + addq $0x10,%rsi +// aesenc (%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xdc,0x06 + decl %edi + jne 2b + addq $0x10,%rsi +// aesenclast (%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xdd,0x06 + movdqu %xmm0,(%rcx) + retq + .cfi_endproc + .size aesni_enc,. - aesni_enc + + .align 0x10,0x90 + .globl aesni_dec + .type aesni_dec,@function +aesni_dec: + .cfi_startproc + movdqu (%rdx),%xmm0 + pxor (%rsi),%xmm0 +1: + addq $0x10,%rsi +// aesdec (%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x06 + decl %edi + jne 1b + addq $0x10,%rsi +// aesdeclast (%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x06 + cmpq $0,%r8 + je 2f + movdqu (%r8),%xmm1 + pxor %xmm1,%xmm0 +2: + movdqu %xmm0,(%rcx) + retq + .cfi_endproc + .size aesni_dec,. - aesni_dec + + .align 0x10,0x90 + .globl aesni_decrypt_cbc + .type aesni_decrypt_cbc,@function +aesni_decrypt_cbc: + .cfi_startproc + shrq $4,%rdx + movdqu (%r8),%xmm1 +1: + movdqu (%rcx),%xmm0 + movdqa %xmm0,%xmm2 + pxor (%rsi),%xmm0 + cmpl $12,%edi +// aesdec 0x10(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x46,0x10 +// aesdec 0x20(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x46,0x20 +// aesdec 0x30(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x46,0x30 +// aesdec 0x40(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x46,0x40 +// aesdec 0x50(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x46,0x50 +// aesdec 0x60(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x46,0x60 +// aesdec 0x70(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x46,0x70 +// aesdec 0x80(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x86,0x80,0x00,0x00,0x00 +// aesdec 0x90(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x86,0x90,0x00,0x00,0x00 + jge 2f +// aesdeclast 0xa0(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x86,0xa0,0x00,0x00,0x00 + jmp 4f +2: +// aesdec 0xa0(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x86,0xa0,0x00,0x00,0x00 +// aesdec 0xb0(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x86,0xb0,0x00,0x00,0x00 + jg 3f +// aesdeclast 0xc0(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x86,0xc0,0x00,0x00,0x00 + jmp 4f +3: +// aesdec 0xc0(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x86,0xc0,0x00,0x00,0x00 +// aesdec 0xd0(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x86,0xd0,0x00,0x00,0x00 +// aesdeclast 0xe0(%rsi),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x86,0xe0,0x00,0x00,0x00 +4: + pxor %xmm1,%xmm0 + movdqu %xmm0,(%rcx) + movdqa %xmm2,%xmm1 // iv + addq $0x10,%rcx + decq %rdx + jne 1b + retq + .cfi_endproc + .size aesni_decrypt_cbc,. - aesni_decrypt_cbc + + .ident "$FreeBSD$" diff --git a/sys/crypto/aesni/aesencdec_i386.S b/sys/crypto/aesni/aesencdec_i386.S new file mode 100644 index 0000000..28213d5 --- /dev/null +++ b/sys/crypto/aesni/aesencdec_i386.S @@ -0,0 +1,174 @@ +/*- + * Copyright (c) 2010 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + .text + .align 0x10,0x90 + .globl aesni_enc + .type aesni_enc,@function +aesni_enc: + .cfi_startproc + pushl %ebp + .cfi_adjust_cfa_offset 4 + movl %esp,%ebp + movl 8(%ebp),%ecx /* rounds */ + movl 16(%ebp),%edx + movdqu (%edx),%xmm0 /* from */ + movl 24(%ebp),%eax /* iv */ + cmpl $0,%eax + je 1f + movdqu (%eax),%xmm1 + pxor %xmm1,%xmm0 +1: + movl 12(%ebp),%eax /* key */ + pxor (%eax),%xmm0 +2: + addl $0x10,%eax +// aesenc (%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xdc,0x00 + loopne 2b + addl $0x10,%eax +// aesenclast (%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xdd,0x00 + movl 20(%ebp),%eax + movdqu %xmm0,(%eax) /* to */ + leave + .cfi_adjust_cfa_offset -4 + retl + .cfi_endproc + .size aesni_enc,. - aesni_enc + + .align 0x10,0x90 + .globl aesni_dec + .type aesni_dec,@function +aesni_dec: + .cfi_startproc + pushl %ebp + .cfi_adjust_cfa_offset 4 + movl %esp,%ebp + movl 8(%ebp),%ecx /* rounds */ + movl 16(%ebp),%edx + movdqu (%edx),%xmm0 /* from */ + movl 12(%ebp),%eax /* key */ + pxor (%eax),%xmm0 +1: + addl $0x10,%eax +// aesdec (%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x00 + loopne 1b + addl $0x10,%eax +// aesdeclast (%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x00 + movl 24(%ebp),%eax + cmpl $0,%eax /* iv */ + je 2f + movdqu (%eax),%xmm1 + pxor %xmm1,%xmm0 +2: + movl 20(%ebp),%eax + movdqu %xmm0,(%eax) /* to */ + leave + .cfi_adjust_cfa_offset -4 + retl + .cfi_endproc + .size aesni_dec,. - aesni_dec + + .align 0x10,0x90 + .globl aesni_decrypt_cbc + .type aesni_decrypt_cbc,@function +aesni_decrypt_cbc: + .cfi_startproc + pushl %ebp + .cfi_adjust_cfa_offset 4 + movl %esp,%ebp + pushl %ebx + pushl %esi + movl 12(%ebp),%eax /* key */ + movl 16(%ebp),%ecx /* length */ + shrl $4,%ecx + movl 20(%ebp),%ebx /* buf */ + movl 24(%ebp),%esi + movdqu (%esi),%xmm1 /* iv */ + movl 8(%ebp),%esi /* rounds */ +1: + movdqu (%ebx),%xmm0 + movdqa %xmm0,%xmm2 + pxor (%eax),%xmm0 + cmpl $12,%esi +// aesdec 0x10(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x40,0x10 +// aesdec 0x20(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x40,0x20 +// aesdec 0x30(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x40,0x30 +// aesdec 0x40(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x40,0x40 +// aesdec 0x50(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x40,0x50 +// aesdec 0x60(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x40,0x60 +// aesdec 0x70(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x40,0x70 +// aesdec 0x80(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x80,0x80,0x00,0x00,0x00 +// aesdec 0x90(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x80,0x90,0x00,0x00,0x00 + jge 2f +// aesdeclast 0xa0(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x80,0xa0,0x00,0x00,0x00 + jmp 4f +2: +// aesdec 0xa0(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x80,0xa0,0x00,0x00,0x00 +// aesdec 0xb0(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x80,0xb0,0x00,0x00,0x00 + jg 3f +// aesdeclast 0xc0(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x80,0xc0,0x00,0x00,0x00 + jmp 4f +3: +// aesdec 0xc0(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x80,0xc0,0x00,0x00,0x00 +// aesdec 0xd0(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xde,0x80,0xd0,0x00,0x00,0x00 +// aesdeclast 0xe0(%eax),%xmm0 + .byte 0x66,0x0f,0x38,0xdf,0x80,0xe0,0x00,0x00,0x00 +4: + pxor %xmm1,%xmm0 + movdqu %xmm0,(%ebx) + movdqa %xmm2,%xmm1 + addl $0x10,%ebx + decl %ecx + jne 1b + + popl %esi + popl %ebx + leave + .cfi_adjust_cfa_offset -4 + retl + .cfi_endproc + .size aesni_decrypt_cbc,. - aesni_decrypt_cbc + + .ident "$FreeBSD$" diff --git a/sys/crypto/aesni/aeskeys_amd64.S b/sys/crypto/aesni/aeskeys_amd64.S new file mode 100644 index 0000000..4e058e3 --- /dev/null +++ b/sys/crypto/aesni/aeskeys_amd64.S @@ -0,0 +1,348 @@ +/*- + * XXX INTEL COPYRIGHT MISSED THERE. + * The code in the file was taken from the whitepaper + * Intel Advanced Encryption Standard (AES) Instructions Set + * January 2010 (26/1/2010) Rev. 3.0 + * by Intel Corporation. + */ +/*- + * Copyright (c) 2010 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + .text + + .align 0x10,0x90 + .globl aesni_key_expansion_decrypt + .type aesni_key_expansion_decrypt,@function +aesni_key_expansion_decrypt: + .cfi_startproc + movslq %edx,%rdx + movq %rdx,%rax + shlq $4,%rax + cmpq $10,%rdx + movdqa (%rax,%rdi),%xmm0 + movdqa %xmm0,(%rsi) +// aesimc -16(%rax,%rdi),%xmm1 + .byte 0x66,0x0f,0x38,0xdb,0x4c,0x38,0xf0 +// aesimc -32(%rax,%rdi),%xmm2 + .byte 0x66,0x0f,0x38,0xdb,0x54,0x38,0xe0 +// aesimc -48(%rax,%rdi),%xmm3 + .byte 0x66,0x0f,0x38,0xdb,0x5c,0x38,0xd0 +// aesimc -64(%rax,%rdi),%xmm4 + .byte 0x66,0x0f,0x38,0xdb,0x64,0x38,0xc0 + movdqa %xmm1,16(%rsi) + movdqa %xmm2,32(%rsi) + movdqa %xmm3,48(%rsi) + movdqa %xmm4,64(%rsi) +// aesimc -80(%rax,%rdi),%xmm5 + .byte 0x66,0x0f,0x38,0xdb,0x6c,0x38,0xb0 +// aesimc -96(%rax,%rdi),%xmm6 + .byte 0x66,0x0f,0x38,0xdb,0x74,0x38,0xa0 +// aesimc -112(%rax,%rdi),%xmm7 + .byte 0x66,0x0f,0x38,0xdb,0x7c,0x38,0x90 +// aesimc -128(%rax,%rdi),%xmm8 + .byte 0x66,0x44,0x0f,0x38,0xdb,0x44,0x38,0x80 + movdqa %xmm5,80(%rsi) + movdqa %xmm6,96(%rsi) + movdqa %xmm7,112(%rsi) + movdqa %xmm8,128(%rsi) +// aesimc -144(%rax,%rdi),%xmm9 + .byte 0x66,0x44,0x0f,0x38,0xdb,0x8c,0x38,0x70,0xff,0xff,0xff + movdqa %xmm9, 144(%rsi) + jle 1f + cmpq $12,%rdx +// aesimc -160(%rax,%rdi),%xmm0 + .byte 0x66,0x0f,0x38,0xdb,0x84,0x38,0x60,0xff,0xff,0xff +// aesimc -176(%rax,%rdi),%xmm1 + .byte 0x66,0x0f,0x38,0xdb,0x8c,0x38,0x50,0xff,0xff,0xff + movdqa %xmm0,160(%rsi) + movdqa %xmm1,176(%rsi) + jle 1f +// aesimc -192(%rax,%rdi),%xmm0 + .byte 0x66,0x0f,0x38,0xdb,0x84,0x38,0x40,0xff,0xff,0xff +// aesimc -208(%rax,%rdi),%xmm1 + .byte 0x66,0x0f,0x38,0xdb,0x8c,0x38,0x30,0xff,0xff,0xff + movdqa %xmm0,192(%rsi) + movdqa %xmm1,208(%rsi) +1: + movdqa (%rdi),%xmm0 + movdqa %xmm0,(%rax,%rsi) + retq + .cfi_endproc + .size aesni_key_expansion_decrypt,. - aesni_key_expansion_decrypt + + .align 0x10,0x90 + .globl aesni_128_key_expansion + .type aesni_128_key_expansion,@function +aesni_128_key_expansion: + .cfi_startproc + movdqu (%rdi),%xmm1 + movdqa %xmm1,(%rsi) +// aeskeygenassist $1,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 + call prepkey_128 + movdqa %xmm1,16(%rsi) +// aeskeygenassist $2,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 + call prepkey_128 + movdqa %xmm1,32(%rsi) +// aeskeygenassist $4,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 + call prepkey_128 + movdqa %xmm1,48(%rsi) +// aeskeygenassist $8,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 + call prepkey_128 + movdqa %xmm1,64(%rsi) +// aeskeygenassist $16,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 + call prepkey_128 + movdqa %xmm1,80(%rsi) +// aeskeygenassist $32,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 + call prepkey_128 + movdqa %xmm1,96(%rsi) +// aeskeygenassist $64,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 + call prepkey_128 + movdqa %xmm1,112(%rsi) +// aeskeygenassist $0x80,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 + call prepkey_128 + movdqa %xmm1,128(%rsi) +// aeskeygenassist $0x1b, %xmm1, %xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b + call prepkey_128 + movdqa %xmm1,144(%rsi) +// aeskeygenassist $0x36,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 + call prepkey_128 + movdqa %xmm1,160(%rsi) + retq + .cfi_endproc + .size AES_128_Key_Expansion,. - AES_128_Key_Expansion + + .align 0x10,0x90 + .type prepkey_128,@function +prepkey_128: + .cfi_startproc + pshufd $255,%xmm2,%xmm2 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm3 + pxor %xmm3,%xmm1 + pslldq $4,%xmm3 + pxor %xmm3,%xmm1 + pslldq $4,%xmm3 + pxor %xmm3,%xmm1 + pxor %xmm2,%xmm1 + retq + .cfi_endproc + .size prepkey_128,. - prepkey_128 + + .align 0x10,0x90 + .globl aesni_192_key_expansion + .type aesni_192_key_expansion,@function +aesni_192_key_expansion: + .cfi_startproc + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm3 + movdqa %xmm1,(%rsi) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x1,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,16(%rsi) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,32(%rsi) +// aeskeygenassist $0x2,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 + call prepkey_192 + movdqa %xmm1,48(%rsi) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x4,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,64(%rsi) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,80(%rsi) +// aeskeygenassist $0x8,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 + call prepkey_192 + movdqa %xmm1,96(%rsi) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x10,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,112(%rsi) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,128(%rsi) +// aeskeygenassist $0x20,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 + call prepkey_192 + movdqa %xmm1,144(%rsi) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x40,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,160(%rsi) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,176(%rsi) +// aeskeygenassist $0x80,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 + call prepkey_192 + movdqa %xmm1,192(%rsi) + retq + .cfi_endproc + .size aesni_192_key_expansion,. - aesni_192_key_expansion + + .align 0x10,0x90 + .type prepkey_192,@function +prepkey_192: + .cfi_startproc + pshufd $0x55,%xmm2,%xmm2 + movdqu %xmm1,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pxor %xmm2,%xmm1 + pshufd $0xff,%xmm1,%xmm2 + movdqu %xmm3,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm3 + retq + .cfi_endproc + .size prepkey_192,. - prepkey_192 + + .align 0x10,0x90 + .globl aesni_256_key_expansion + .type aesni_256_key_expansion,@function +aesni_256_key_expansion: + .cfi_startproc + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm3 + movdqa %xmm1,(%rsi) + movdqa %xmm3,16(%rsi) +// aeskeygenassist $0x1,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 + call rk256_a + movdqa %xmm1,32(%rsi) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,48(%rsi) +// aeskeygenassist $0x2,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 + call rk256_a + movdqa %xmm1,64(%rsi) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,80(%rsi) +// aeskeygenassist $0x4,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 + call rk256_a + movdqa %xmm1,96(%rsi) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,112(%rsi) +// aeskeygenassist $0x8,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 + call rk256_a + movdqa %xmm1,128(%rsi) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,144(%rsi) +// aeskeygenassist $0x10,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 + call rk256_a + movdqa %xmm1,160(%rsi) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,176(%rsi) +// aeskeygenassist $0x20,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 + call rk256_a + movdqa %xmm1,192(%rsi) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,208(%rsi) +// aeskeygenassist $0x40,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 + call rk256_a + movdqa %xmm1,224(%rsi) + retq + .cfi_endproc + .size AES_256_Key_Expansion,. - AES_256_Key_Expansion + + .align 0x10,0x90 + .type rk256_a,@function +rk256_a: + .cfi_startproc + pshufd $0xff,%xmm2,%xmm2 + movdqa %xmm1,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pxor %xmm2,%xmm1 + retq + .cfi_endproc + .size rk256_a,. - rk256_a + + .align 0x10,0x90 + .type rk256_b,@function +rk256_b: + .cfi_startproc + pshufd $0xaa,%xmm2,%xmm2 + movdqa %xmm3,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm3 + retq + .cfi_endproc + .size rk256_b,. - rk256_b + + .ident "$FreeBSD$" diff --git a/sys/crypto/aesni/aeskeys_i386.S b/sys/crypto/aesni/aeskeys_i386.S new file mode 100644 index 0000000..8588d43 --- /dev/null +++ b/sys/crypto/aesni/aeskeys_i386.S @@ -0,0 +1,368 @@ +/*- + * XXX INTEL COPYRIGHT MISSED THERE. + * The code in the file was taken from the whitepaper + * Intel Advanced Encryption Standard (AES) Instructions Set + * January 2010 (26/1/2010) Rev. 3.0 + * by Intel Corporation. + */ +/*- + * Copyright (c) 2010 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + .text + + .align 0x10,0x90 + .globl aesni_key_expansion_decrypt + .type aesni_key_expansion_decrypt,@function +aesni_key_expansion_decrypt: + .cfi_startproc + pushl %ebp + movl %esp,%ebp + movl 16(%ebp),%eax /* rounds */ + movl 8(%ebp),%ecx /* encrypt_schedule */ + movl 12(%ebp),%edx /* decrypt_schedule */ + shll $4,%eax + cmpl $(10<<4),%eax + movdqa (%eax,%ecx),%xmm0 + movdqa %xmm0,(%edx) +// aesimc -16(%eax,%ecx),%xmm1 + .byte 0x66,0x0f,0x38,0xdb,0x4c,0x08,0xf0 +// aesimc -32(%eax,%ecx),%xmm2 + .byte 0x66,0x0f,0x38,0xdb,0x54,0x08,0xe0 +// aesimc -48(%eax,%ecx),%xmm3 + .byte 0x66,0x0f,0x38,0xdb,0x5c,0x08,0xd0 +// aesimc -64(%eax,%ecx),%xmm4 + .byte 0x66,0x0f,0x38,0xdb,0x64,0x08,0xc0 + movdqa %xmm1,16(%edx) + movdqa %xmm2,32(%edx) + movdqa %xmm3,48(%edx) + movdqa %xmm4,64(%edx) +// aesimc -80(%eax,%ecx),%xmm5 + .byte 0x66,0x0f,0x38,0xdb,0x6c,0x08,0xb0 +// aesimc -96(%eax,%ecx),%xmm6 + .byte 0x66,0x0f,0x38,0xdb,0x74,0x08,0xa0 +// aesimc -112(%eax,%ecx),%xmm7 + .byte 0x66,0x0f,0x38,0xdb,0x7c,0x08,0x90 +// aesimc -128(%eax,%ecx),%xmm1 + .byte 0x66,0x0f,0x38,0xdb,0x4c,0x08,0x80 + movdqa %xmm5,80(%edx) + movdqa %xmm6,96(%edx) + movdqa %xmm7,112(%edx) + movdqa %xmm1,128(%edx) +// aesimc -144(%eax,%ecx),%xmm2 + .byte 0x66,0x0f,0x38,0xdb,0x94,0x08,0x70,0xff,0xff,0xff + movdqa %xmm2, 144(%edx) + jle 1f + cmpl $(12<<4),%eax +// aesimc -160(%eax,%ecx),%xmm0 + .byte 0x66,0x0f,0x38,0xdb,0x84,0x08,0x60,0xff,0xff,0xff +// aesimc -176(%eax,%ecx),%xmm1 + .byte 0x66,0x0f,0x38,0xdb,0x8c,0x08,0x50,0xff,0xff,0xff + movdqa %xmm0,160(%edx) + movdqa %xmm1,176(%edx) + jle 1f +// aesimc -192(%eax,%ecx),%xmm0 + .byte 0x66,0x0f,0x38,0xdb,0x84,0x08,0x40,0xff,0xff,0xff +// aesimc -208(%eax,%ecx),%xmm1 + .byte 0x66,0x0f,0x38,0xdb,0x8c,0x08,0x30,0xff,0xff,0xff + movdqa %xmm0,192(%edx) + movdqa %xmm1,208(%edx) +1: + movdqa (%ecx),%xmm0 + movdqa %xmm0,(%eax,%edx) + leave + retl + .cfi_endproc + .size aesni_key_expansion_decrypt,. - aesni_key_expansion_decrypt + + .align 0x10,0x90 + .globl aesni_128_key_expansion + .type aesni_128_key_expansion,@function +aesni_128_key_expansion: + .cfi_startproc + pushl %ebp + movl %esp,%ebp + movl 8(%ebp),%ecx /* userkey */ + movl 12(%ebp),%edx /* key_schedule */ + movdqu (%ecx),%xmm1 + movdqa %xmm1,(%edx) +// aeskeygenassist $1,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 + call prepkey_128 + movdqa %xmm1,16(%edx) +// aeskeygenassist $2,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 + call prepkey_128 + movdqa %xmm1,32(%edx) +// aeskeygenassist $4,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 + call prepkey_128 + movdqa %xmm1,48(%edx) +// aeskeygenassist $8,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 + call prepkey_128 + movdqa %xmm1,64(%edx) +// aeskeygenassist $16,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 + call prepkey_128 + movdqa %xmm1,80(%edx) +// aeskeygenassist $32,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 + call prepkey_128 + movdqa %xmm1,96(%edx) +// aeskeygenassist $64,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 + call prepkey_128 + movdqa %xmm1,112(%edx) +// aeskeygenassist $0x80,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 + call prepkey_128 + movdqa %xmm1,128(%edx) +// aeskeygenassist $0x1b, %xmm1, %xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b + call prepkey_128 + movdqa %xmm1,144(%edx) +// aeskeygenassist $0x36,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 + call prepkey_128 + movdqa %xmm1,160(%edx) + leave + retl + .cfi_endproc + .size aesni_128_key_expansion,. - aesni_128_key_expansion + + .align 0x10,0x90 + .type prepkey_128,@function +prepkey_128: + .cfi_startproc + pshufd $255,%xmm2,%xmm2 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm3 + pxor %xmm3,%xmm1 + pslldq $4,%xmm3 + pxor %xmm3,%xmm1 + pslldq $4,%xmm3 + pxor %xmm3,%xmm1 + pxor %xmm2,%xmm1 + retl + .cfi_endproc + .size prepkey_128,. - prepkey_128 + + .align 0x10,0x90 + .globl aesni_192_key_expansion + .type aesni_192_key_expansion,@function +aesni_192_key_expansion: + .cfi_startproc + pushl %ebp + movl %esp,%ebp + movl 8(%ebp),%ecx /* userkey */ + movl 12(%ebp),%edx /* key_schedule */ + movdqu (%ecx),%xmm1 + movdqu 16(%ecx),%xmm3 + movdqa %xmm1,(%edx) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x1,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,16(%edx) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,32(%edx) +// aeskeygenassist $0x2,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 + call prepkey_192 + movdqa %xmm1,48(%edx) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x4,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,64(%edx) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,80(%edx) +// aeskeygenassist $0x8,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 + call prepkey_192 + movdqa %xmm1,96(%edx) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x10,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,112(%edx) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,128(%edx) +// aeskeygenassist $0x20,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 + call prepkey_192 + movdqa %xmm1,144(%edx) + movdqa %xmm3,%xmm5 +// aeskeygenassist $0x40,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 + call prepkey_192 + shufpd $0,%xmm1,%xmm5 + movdqa %xmm5,160(%edx) + movdqa %xmm1,%xmm6 + shufpd $1,%xmm3,%xmm6 + movdqa %xmm6,176(%edx) +// aeskeygenassist $0x80,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 + call prepkey_192 + movdqa %xmm1,192(%edx) + leave + retl + .cfi_endproc + .size aesni_192_key_expansion,. - aesni_192_key_expansion + + .align 0x10,0x90 + .type prepkey_192,@function +prepkey_192: + .cfi_startproc + pshufd $0x55,%xmm2,%xmm2 + movdqu %xmm1,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pxor %xmm2,%xmm1 + pshufd $0xff,%xmm1,%xmm2 + movdqu %xmm3,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm3 + retl + .cfi_endproc + .size prepkey_192,. - prepkey_192 + + .align 0x10,0x90 + .globl aesni_256_key_expansion + .type aesni_256_key_expansion,@function +aesni_256_key_expansion: + .cfi_startproc + pushl %ebp + movl %esp,%ebp + movl 8(%ebp),%ecx /* userkey */ + movl 12(%ebp),%edx /* key_schedule */ + movdqu (%ecx),%xmm1 + movdqu 16(%ecx),%xmm3 + movdqa %xmm1,(%edx) + movdqa %xmm3,16(%edx) +// aeskeygenassist $0x1,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 + call rk256_a + movdqa %xmm1,32(%edx) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,48(%edx) +// aeskeygenassist $0x2,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 + call rk256_a + movdqa %xmm1,64(%edx) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,80(%edx) +// aeskeygenassist $0x4,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 + call rk256_a + movdqa %xmm1,96(%edx) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,112(%edx) +// aeskeygenassist $0x8,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 + call rk256_a + movdqa %xmm1,128(%edx) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,144(%edx) +// aeskeygenassist $0x10,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 + call rk256_a + movdqa %xmm1,160(%edx) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,176(%edx) +// aeskeygenassist $0x20,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 + call rk256_a + movdqa %xmm1,192(%edx) +// aeskeygenassist $0x0,%xmm1,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x00 + call rk256_b + movdqa %xmm3,208(%edx) +// aeskeygenassist $0x40,%xmm3,%xmm2 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 + call rk256_a + movdqa %xmm1,224(%edx) + leave + retl + .cfi_endproc + .size AES_256_Key_Expansion,. - AES_256_Key_Expansion + + .align 0x10,0x90 + .type rk256_a,@function +rk256_a: + .cfi_startproc + pshufd $0xff,%xmm2,%xmm2 + movdqa %xmm1,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pslldq $4,%xmm4 + pxor %xmm4,%xmm1 + pxor %xmm2,%xmm1 + retl + .cfi_endproc + .size rk256_a,. - rk256_a + + .align 0x10,0x90 + .type rk256_b,@function +rk256_b: + .cfi_startproc + pshufd $0xaa,%xmm2,%xmm2 + movdqa %xmm3,%xmm4 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pslldq $4,%xmm4 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm3 + retl + .cfi_endproc + .size rk256_b,. - rk256_b + + .ident "$FreeBSD$" diff --git a/sys/crypto/aesni/aesni.c b/sys/crypto/aesni/aesni.c new file mode 100644 index 0000000..93ee042 --- /dev/null +++ b/sys/crypto/aesni/aesni.c @@ -0,0 +1,338 @@ +/*- + * Copyright (c) 2005-2008 Pawel Jakub Dawidek + * Copyright (c) 2010 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cryptodev_if.h" + +struct aesni_softc { + int32_t cid; + uint32_t sid; + TAILQ_HEAD(aesni_sessions_head, aesni_session) sessions; + struct rwlock lock; +}; + +static int aesni_newsession(device_t, uint32_t *sidp, struct cryptoini *cri); +static int aesni_freesession(device_t, uint64_t tid); +static void aesni_freesession_locked(struct aesni_softc *sc, + struct aesni_session *ses); + +MALLOC_DEFINE(M_AESNI, "aesni_data", "AESNI Data"); + +static void +aesni_identify(driver_t *drv, device_t parent) +{ + + /* NB: order 10 is so we get attached after h/w devices */ + if (device_find_child(parent, "aesni", -1) == NULL && + BUS_ADD_CHILD(parent, 10, "aesni", -1) == 0) + panic("aesni: could not attach"); +} + +static int +aesni_probe(device_t dev) +{ + char capp[32]; + + if ((cpu_feature2 & CPUID2_AESNI) == 0) { + device_printf(dev, "No AESNI support.\n"); + return (EINVAL); + } + strlcpy(capp, "AES-CBC", sizeof(capp)); + device_set_desc_copy(dev, capp); + return (0); +} + +static int +aesni_attach(device_t dev) +{ + struct aesni_softc *sc; + + sc = device_get_softc(dev); + TAILQ_INIT(&sc->sessions); + sc->sid = 1; + sc->cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE); + if (sc->cid < 0) { + device_printf(dev, "Could not get crypto driver id.\n"); + return (ENOMEM); + } + + rw_init(&sc->lock, "aesni_lock"); + crypto_register(sc->cid, CRYPTO_AES_CBC, 0, 0); + return (0); +} + +static int +aesni_detach(device_t dev) +{ + struct aesni_softc *sc; + struct aesni_session *ses; + + sc = device_get_softc(dev); + rw_wlock(&sc->lock); + TAILQ_FOREACH(ses, &sc->sessions, next) { + if (ses->used) { + rw_wunlock(&sc->lock); + device_printf(dev, + "Cannot detach, sessions still active.\n"); + return (EBUSY); + } + } + while ((ses = TAILQ_FIRST(&sc->sessions)) != NULL) { + TAILQ_REMOVE(&sc->sessions, ses, next); + free(ses, M_AESNI); + } + rw_wunlock(&sc->lock); + rw_destroy(&sc->lock); + crypto_unregister_all(sc->cid); + return (0); +} + +static int +aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) +{ + struct aesni_softc *sc; + struct aesni_session *ses; + struct cryptoini *encini; + int error; + + if (sidp == NULL || cri == NULL) + return (EINVAL); + + sc = device_get_softc(dev); + ses = NULL; + encini = NULL; + for (; cri != NULL; cri = cri->cri_next) { + switch (cri->cri_alg) { + case CRYPTO_AES_CBC: + if (encini != NULL) + return (EINVAL); + encini = cri; + break; + default: + return (EINVAL); + } + } + if (encini == NULL) + return (EINVAL); + + rw_wlock(&sc->lock); + /* + * Free sessions goes first, so if first session is used, we need to + * allocate one. + */ + ses = TAILQ_FIRST(&sc->sessions); + if (ses == NULL || ses->used) { + ses = malloc(sizeof(*ses), M_AESNI, M_NOWAIT | M_ZERO); + if (ses == NULL) { + rw_wunlock(&sc->lock); + return (ENOMEM); + } + KASSERT(((uintptr_t)ses) % 0x10 == 0, + ("malloc returned unaligned pointer")); + ses->id = sc->sid++; + } else { + TAILQ_REMOVE(&sc->sessions, ses, next); + } + ses->used = 1; + TAILQ_INSERT_TAIL(&sc->sessions, ses, next); + rw_wunlock(&sc->lock); + + error = aesni_cipher_setup(ses, encini); + if (error != 0) { + rw_wlock(&sc->lock); + aesni_freesession_locked(sc, ses); + rw_wunlock(&sc->lock); + return (error); + } + + *sidp = ses->id; + return (0); +} + +static void +aesni_freesession_locked(struct aesni_softc *sc, struct aesni_session *ses) +{ + uint32_t sid; + + sid = ses->id; + TAILQ_REMOVE(&sc->sessions, ses, next); + bzero(ses, sizeof(*ses)); + ses->id = sid; + TAILQ_INSERT_HEAD(&sc->sessions, ses, next); +} + +static int +aesni_freesession(device_t dev, uint64_t tid) +{ + struct aesni_softc *sc; + struct aesni_session *ses; + uint32_t sid; + + sc = device_get_softc(dev); + sid = ((uint32_t)tid) & 0xffffffff; + rw_wlock(&sc->lock); + TAILQ_FOREACH_REVERSE(ses, &sc->sessions, aesni_sessions_head, next) { + if (ses->id == sid) + break; + } + if (ses == NULL) { + rw_wunlock(&sc->lock); + return (EINVAL); + } + aesni_freesession_locked(sc, ses); + rw_wunlock(&sc->lock); + return (0); +} + +static int +aesni_process(device_t dev, struct cryptop *crp, int hint __unused) +{ + struct aesni_softc *sc = device_get_softc(dev); + struct aesni_session *ses = NULL; + struct cryptodesc *crd, *enccrd; + int error; + + error = 0; + enccrd = NULL; + + /* Sanity check. */ + if (crp == NULL) + return (EINVAL); + + if (crp->crp_callback == NULL || crp->crp_desc == NULL) { + error = EINVAL; + goto out; + } + + for (crd = crp->crp_desc; crd != NULL; crd = crd->crd_next) { + switch (crd->crd_alg) { + case CRYPTO_AES_CBC: + if (enccrd != NULL) { + error = EINVAL; + goto out; + } + enccrd = crd; + break; + default: + return (EINVAL); + } + } + if (enccrd == NULL || (enccrd->crd_len % AES_BLOCK_LEN) != 0) { + error = EINVAL; + goto out; + } + + rw_rlock(&sc->lock); + TAILQ_FOREACH_REVERSE(ses, &sc->sessions, aesni_sessions_head, next) { + if (ses->id == (crp->crp_sid & 0xffffffff)) + break; + } + rw_runlock(&sc->lock); + if (ses == NULL) { + error = EINVAL; + goto out; + } + + error = aesni_cipher_process(ses, enccrd, crp); + if (error != 0) + goto out; + +out: + crp->crp_etype = error; + crypto_done(crp); + return (error); +} + +uint8_t * +aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, + int *allocated) +{ + struct uio *uio; + struct iovec *iov; + uint8_t *addr; + + if (crp->crp_flags & CRYPTO_F_IMBUF) + goto alloc; + else if (crp->crp_flags & CRYPTO_F_IOV) { + uio = (struct uio *)crp->crp_buf; + if (uio->uio_iovcnt != 1) + goto alloc; + iov = uio->uio_iov; + addr = (u_char *)iov->iov_base + enccrd->crd_skip; + } else + addr = (u_char *)crp->crp_buf; + *allocated = 0; + return (addr); + +alloc: + addr = malloc(enccrd->crd_len, M_AESNI, M_NOWAIT); + if (addr != NULL) { + *allocated = 1; + crypto_copydata(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, + enccrd->crd_len, addr); + } else + *allocated = 0; + return (addr); +} + +static device_method_t aesni_methods[] = { + DEVMETHOD(device_identify, aesni_identify), + DEVMETHOD(device_probe, aesni_probe), + DEVMETHOD(device_attach, aesni_attach), + DEVMETHOD(device_detach, aesni_detach), + + DEVMETHOD(cryptodev_newsession, aesni_newsession), + DEVMETHOD(cryptodev_freesession, aesni_freesession), + DEVMETHOD(cryptodev_process, aesni_process), + + {0, 0}, +}; + +static driver_t aesni_driver = { + "aesni", + aesni_methods, + sizeof(struct aesni_softc), +}; +static devclass_t aesni_devclass; + +DRIVER_MODULE(aesni, nexus, aesni_driver, aesni_devclass, 0, 0); +MODULE_VERSION(aesni, 1); +MODULE_DEPEND(aesni, crypto, 1, 1, 1); diff --git a/sys/crypto/aesni/aesni.h b/sys/crypto/aesni/aesni.h new file mode 100644 index 0000000..0790f1e --- /dev/null +++ b/sys/crypto/aesni/aesni.h @@ -0,0 +1,105 @@ +/*- + * Copyright (c) 2010 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _AESNI_H_ +#define _AESNI_H_ + +#include +#include +#include + +#include + +#if defined(__amd64__) || (defined(__i386__) && !defined(PC98)) +#include +#include +#include +#include +#endif +#if defined(__i386__) +#include +#elif defined(__amd64__) +#include +#endif + +#define AES128_ROUNDS 10 +#define AES192_ROUNDS 12 +#define AES256_ROUNDS 14 +#define AES_SCHED_LEN ((AES256_ROUNDS + 1) * AES_BLOCK_LEN) + +struct aesni_session { + uint8_t enc_schedule[AES_SCHED_LEN] __aligned(16); + uint8_t dec_schedule[AES_SCHED_LEN] __aligned(16); + uint8_t iv[AES_BLOCK_LEN]; + int rounds; + /* uint8_t *ses_ictx; */ + /* uint8_t *ses_octx; */ + /* int ses_mlen; */ + int used; + uint32_t id; + TAILQ_ENTRY(aesni_session) next; + struct fpu_kern_ctx fpu_ctx; +}; + +/* + * Internal functions, implemented in assembler. + */ +void aesni_enc(int rounds, const uint8_t *key_schedule, + const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN], + const uint8_t iv[AES_BLOCK_LEN]); +void aesni_dec(int rounds, const uint8_t *key_schedule, + const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN], + const uint8_t iv[AES_BLOCK_LEN]); + +void aesni_key_expansion_decrypt(const uint8_t *encrypt_schedule, + uint8_t *decrypt_schedule, int number_of_rounds); +void aesni_128_key_expansion(const uint8_t *userkey, uint8_t *key_schedule); +void aesni_192_key_expansion(const uint8_t *userkey, uint8_t *key_schedule); +void aesni_256_key_expansion(const uint8_t *userkey, uint8_t *key_schedule); + +/* + * Slightly more public interfaces. + */ +void aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len, + const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]); +void aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len, + const uint8_t *from, const uint8_t iv[AES_BLOCK_LEN]); +void aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len, + const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]); +void aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, + const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]); + +int aesni_cipher_setup(struct aesni_session *ses, + struct cryptoini *encini); +int aesni_cipher_process(struct aesni_session *ses, + struct cryptodesc *enccrd, struct cryptop *crp); + +uint8_t *aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, + int *allocated); + +#endif diff --git a/sys/crypto/aesni/aesni_wrap.c b/sys/crypto/aesni/aesni_wrap.c new file mode 100644 index 0000000..f9c980b --- /dev/null +++ b/sys/crypto/aesni/aesni_wrap.c @@ -0,0 +1,198 @@ +/*- + * Copyright (c) 2010 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +MALLOC_DECLARE(M_AESNI); + +#ifdef DEBUG +static void +ps_len(const char *string, const uint8_t *data, int length) +{ + int i; + + printf("%-12s[0x", string); + for(i = 0; i < length; i++) { + if (i % AES_BLOCK_LEN == 0 && i > 0) + printf("+"); + printf("%02x", data[i]); + } + printf("]\n"); +} +#endif + +void +aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len, + const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]) +{ + const uint8_t *ivp; + size_t i; + +#ifdef DEBUG + ps_len("AES CBC encrypt iv:", iv, AES_BLOCK_LEN); + ps_len("from:", from, len); +#endif + + len /= AES_BLOCK_LEN; + ivp = iv; + for (i = 0; i < len; i++) { + aesni_enc(rounds - 1, key_schedule, from, to, ivp); + ivp = to; + from += AES_BLOCK_LEN; + to += AES_BLOCK_LEN; + } +#ifdef DEBUG + ps_len("to:", to - len * AES_BLOCK_LEN, len * AES_BLOCK_LEN); +#endif +} + +void +aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len, + const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]) +{ + size_t i; + + len /= AES_BLOCK_LEN; + for (i = 0; i < len; i++) { + aesni_enc(rounds - 1, key_schedule, from, to, NULL); + from += AES_BLOCK_LEN; + to += AES_BLOCK_LEN; + } +} + +void +aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, + const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]) +{ + size_t i; + + len /= AES_BLOCK_LEN; + for (i = 0; i < len; i++) { + aesni_dec(rounds - 1, key_schedule, from, to, NULL); + from += AES_BLOCK_LEN; + to += AES_BLOCK_LEN; + } +} + +int +aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini) +{ + struct thread *td; + int error; + + td = curthread; + error = fpu_kern_enter(td, &ses->fpu_ctx, FPU_KERN_NORMAL); + if (error != 0) + goto out1; + + switch (encini->cri_klen) { + case 128: + ses->rounds = AES128_ROUNDS; + aesni_128_key_expansion(encini->cri_key, ses->enc_schedule); + break; + case 192: + ses->rounds = AES192_ROUNDS; + aesni_192_key_expansion(encini->cri_key, ses->enc_schedule); + break; + case 256: + ses->rounds = AES256_ROUNDS; + aesni_256_key_expansion(encini->cri_key, ses->enc_schedule); + break; + default: + error = EINVAL; + goto out; + } + aesni_key_expansion_decrypt(ses->enc_schedule, ses->dec_schedule, + ses->rounds); + arc4rand(ses->iv, sizeof(ses->iv), 0); + out: + fpu_kern_leave(td, &ses->fpu_ctx); + out1: + return (0); +} + +int +aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, + struct cryptop *crp) +{ + struct thread *td; + uint8_t *buf; + int error, allocated; + + buf = aesni_cipher_alloc(enccrd, crp, &allocated); + if (buf == NULL) { + error = ENOMEM; + goto out; + } + + td = curthread; + error = fpu_kern_enter(td, &ses->fpu_ctx, FPU_KERN_NORMAL); + if (error != 0) + goto out1; + + if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) { + if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) + bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); + + if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) + crypto_copyback(crp->crp_flags, crp->crp_buf, + enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); + + aesni_encrypt_cbc(ses->rounds, ses->enc_schedule, + enccrd->crd_len, buf, buf, ses->iv); + } else { + if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) + bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); + else + crypto_copydata(crp->crp_flags, crp->crp_buf, + enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); + aesni_decrypt_cbc(ses->rounds, ses->dec_schedule, + enccrd->crd_len, buf, ses->iv); + } + fpu_kern_leave(td, &ses->fpu_ctx); + if (allocated) + crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, + enccrd->crd_len, buf); + if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) + crypto_copydata(crp->crp_flags, crp->crp_buf, + enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN, + AES_BLOCK_LEN, ses->iv); + out1: + if (allocated) { + bzero(buf, enccrd->crd_len); + free(buf, M_AESNI); + } + out: + return (error); +} diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 8899140..93110bd 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -10,6 +10,7 @@ SUBDIR= ${_3dfx} \ accf_http \ ${_acpi} \ ae \ + ${_aesni} \ age \ ${_agp} \ aha \ @@ -438,6 +439,9 @@ _zfs= zfs .if ${MACHINE} == "i386" _aac= aac _acpi= acpi +.if ${MK_CRYPT} != "no" || defined(ALL_MODULES) +_aesni= aesni +.endif _ahb= ahb _amdsbwd= amdsbwd _amdtemp= amdtemp @@ -493,6 +497,9 @@ _snc= snc .if ${MACHINE_ARCH} == "amd64" _aac= aac _acpi= acpi +.if ${MK_CRYPT} != "no" || defined(ALL_MODULES) +_aesni= aesni +.endif _agp= agp _an= an _amdsbwd= amdsbwd diff --git a/sys/modules/aesni/Makefile b/sys/modules/aesni/Makefile new file mode 100644 index 0000000..3f8c9a8 --- /dev/null +++ b/sys/modules/aesni/Makefile @@ -0,0 +1,10 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../crypto/aesni + +KMOD= aesni +SRCS= aesni.c aesni_wrap.c +SRCS+= aesencdec_$(MACHINE_ARCH).S aeskeys_$(MACHINE_ARCH).S +SRCS+= device_if.h bus_if.h opt_bus.h cryptodev_if.h + +.include