diff --git a/gnu/usr.bin/gdb/kgdb/kthr.c b/gnu/usr.bin/gdb/kgdb/kthr.c index b12d07c..d4187ae 100644 --- a/gnu/usr.bin/gdb/kgdb/kthr.c +++ b/gnu/usr.bin/gdb/kgdb/kthr.c @@ -99,10 +99,16 @@ kgdb_thr_add_procs(uintptr_t paddr) if (td.td_tid == dumptid) kt->pcb = dumppcb; else if (td.td_state == TDS_RUNNING && stoppcbs != 0 && - CPU_ISSET(td.td_oncpu, &stopped_cpus)) + CPU_ISSET(td.td_oncpu, &stopped_cpus)) { +#ifdef __amd64__ + kvm_read(kvm, (uintptr_t)stoppcbs + + sizeof(void *) * td.td_oncpu, &kt->pcb, + sizeof(void *)); +#else kt->pcb = (uintptr_t)stoppcbs + sizeof(struct pcb) * td.td_oncpu; - else +#endif + } else kt->pcb = (uintptr_t)td.td_pcb; kt->kstack = td.td_kstack; kt->tid = td.td_tid; diff --git a/gnu/usr.bin/gdb/kgdb/trgt_amd64.c b/gnu/usr.bin/gdb/kgdb/trgt_amd64.c index dab41f9..cdab775 100644 --- a/gnu/usr.bin/gdb/kgdb/trgt_amd64.c +++ b/gnu/usr.bin/gdb/kgdb/trgt_amd64.c @@ -66,7 +66,7 @@ kgdb_trgt_fetch_registers(int regno __unused) supply_register(AMD64_R8_REGNUM + 6, (char *)&pcb.pcb_r14); supply_register(AMD64_R15_REGNUM, (char *)&pcb.pcb_r15); supply_register(AMD64_RIP_REGNUM, (char *)&pcb.pcb_rip); - amd64_supply_fxsave(current_regcache, -1, &pcb.pcb_user_save); + amd64_supply_fxsave(current_regcache, -1, (struct fpusave *)(&pcb + 1)); } void diff --git a/lib/libc/amd64/gen/Makefile.inc b/lib/libc/amd64/gen/Makefile.inc index 30fb05f..fb4f7f4 100644 --- a/lib/libc/amd64/gen/Makefile.inc +++ b/lib/libc/amd64/gen/Makefile.inc @@ -2,7 +2,7 @@ # $FreeBSD$ SRCS+= _setjmp.S _set_tp.c rfork_thread.S setjmp.S sigsetjmp.S \ - fabs.S \ + fabs.S getcontextx.c \ infinity.c ldexp.c makecontext.c signalcontext.c \ flt_rounds.c fpgetmask.c fpsetmask.c fpgetprec.c fpsetprec.c \ fpgetround.c fpsetround.c fpgetsticky.c diff --git a/lib/libc/amd64/gen/getcontextx.c b/lib/libc/amd64/gen/getcontextx.c new file mode 100644 index 0000000..24361bc --- /dev/null +++ b/lib/libc/amd64/gen/getcontextx.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2011 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int xstate_sz = -1; + +size_t +__getcontextx_size(void) +{ + u_int p[4]; + + if (xstate_sz == -1) { + do_cpuid(1, p); + if ((p[2] & CPUID2_OSXSAVE) != 0) { + cpuid_count(0xd, 0x0, p); + xstate_sz = p[1] - sizeof(struct savefpu); + } else + xstate_sz = 0; + } + + return (sizeof(ucontext_t) + xstate_sz); +} + +int +__fillcontextx(char *ctx) +{ + struct amd64_get_xfpustate xfpu; + ucontext_t *ucp; + + ucp = (ucontext_t *)ctx; + if (getcontext(ucp) == -1) + return (-1); + if (xstate_sz != 0) { + xfpu.addr = (char *)(ucp + 1); + xfpu.len = xstate_sz; + if (sysarch(AMD64_GET_XFPUSTATE, &xfpu) == -1) + return (-1); + ucp->uc_mcontext.mc_xfpustate = (__register_t)xfpu.addr; + ucp->uc_mcontext.mc_xfpustate_len = xstate_sz; + ucp->uc_mcontext.mc_flags |= _MC_HASFPXSTATE; + } else { + ucp->uc_mcontext.mc_xfpustate = 0; + ucp->uc_mcontext.mc_xfpustate_len = 0; + } + return (0); +} + +__weak_reference(__getcontextx, getcontextx); + +ucontext_t * +__getcontextx(void) +{ + char *ctx; + int error; + + ctx = malloc(__getcontextx_size()); + if (ctx == NULL) + return (NULL); + if (__fillcontextx(ctx) == -1) { + error = errno; + free(ctx); + errno = error; + return (NULL); + } + return ((ucontext_t *)ctx); +} diff --git a/lib/libc/arm/gen/Makefile.inc b/lib/libc/arm/gen/Makefile.inc index fb9d885..03c9151 100644 --- a/lib/libc/arm/gen/Makefile.inc +++ b/lib/libc/arm/gen/Makefile.inc @@ -2,5 +2,5 @@ # $FreeBSD$ SRCS+= _ctx_start.S _setjmp.S _set_tp.c alloca.S fabs.c \ - infinity.c ldexp.c makecontext.c \ + getcontextx.c infinity.c ldexp.c makecontext.c \ setjmp.S signalcontext.c sigsetjmp.S divsi3.S diff --git a/lib/libc/arm/gen/getcontextx.c b/lib/libc/arm/gen/getcontextx.c new file mode 100644 index 0000000..b701c18 --- /dev/null +++ b/lib/libc/arm/gen/getcontextx.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2011 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +size_t +__getcontextx_size(void) +{ + + return (sizeof(ucontext_t)); +} + +int +__fillcontextx(char *ctx) +{ + ucontext_t *ucp; + + ucp = (ucontext_t *)ctx; + return (getcontext(ucp)); +} + +__weak_reference(__getcontextx, getcontextx); + +ucontext_t * +__getcontextx(void) +{ + char *ctx; + int error; + + ctx = malloc(__getcontextx_size()); + if (ctx == NULL) + return (NULL); + if (__fillcontextx(ctx) == -1) { + error = errno; + free(ctx); + errno = error; + return (NULL); + } + return ((ucontext_t *)ctx); +} diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map index e00e746..d794c0a 100644 --- a/lib/libc/gen/Symbol.map +++ b/lib/libc/gen/Symbol.map @@ -384,6 +384,7 @@ FBSD_1.2 { FBSD_1.3 { fdlopen; __FreeBSD_libc_enter_restricted_mode; + getcontextx; }; FBSDprivate_1.0 { @@ -507,4 +508,6 @@ FBSDprivate_1.0 { __elf_aux_vector; __pthread_map_stacks_exec; + __fillcontextx; + __getcontextx_size; }; diff --git a/lib/libc/gen/getcontext.3 b/lib/libc/gen/getcontext.3 index 2fda6b8..5b801fd 100644 --- a/lib/libc/gen/getcontext.3 +++ b/lib/libc/gen/getcontext.3 @@ -35,11 +35,11 @@ .\" .\" $FreeBSD$ .\" -.Dd September 10, 2002 +.Dd December 26, 2011 .Dt GETCONTEXT 3 .Os .Sh NAME -.Nm getcontext , setcontext +.Nm getcontext , getcontextx , setcontext .Nd get and set user thread context .Sh LIBRARY .Lb libc @@ -59,6 +59,20 @@ This saved context may then later be restored by calling .Fn setcontext . .Pp The +.Fn getcontextx +function saves the current execution context in the newly allocated structure +.Vt ucontext_t , +which is returned on success. +If architecture defines additional CPU states that can be stored in extended +blocks referenced from the +.Vt ucontext_t , +the memory for them may be allocated and their context also stored. +Memory returned by +.Fn getcontextx +function shall be freed using +.Fn free 3 . +.Pp +The .Fn setcontext function makes a previously saved thread context the current thread context, i.e., @@ -109,11 +123,24 @@ If successful, returns zero and .Fn setcontext does not return; otherwise \-1 is returned. +The +.Fn getcontextx +returns pointer to the allocated and initialized context on success, and +.Va NULL +on failure. .Sh ERRORS No errors are defined for .Fn getcontext or .Fn setcontext . +The +.Fn getcontextx +may return the following errors in +.Va errno : +.Bl -tag -width Er +.It Bq Er ENOMEM +No memory was available to allocate for the context or some extended state. +.El .Sh SEE ALSO .Xr sigaction 2 , .Xr sigaltstack 2 , diff --git a/lib/libc/gen/ucontext.3 b/lib/libc/gen/ucontext.3 index 0574322..69ecbc1 100644 --- a/lib/libc/gen/ucontext.3 +++ b/lib/libc/gen/ucontext.3 @@ -92,6 +92,9 @@ structures: .Ft int .Fn getcontext "ucontext_t *" ; .It +.Ft "ucontext_t *" +.Fn getcontextx "void" ; +.It .Ft int .Fn setcontext "const ucontext_t *" ; .It @@ -104,4 +107,5 @@ structures: .Sh SEE ALSO .Xr sigaltstack 2 , .Xr getcontext 3 , +.Xr getcontextx 3 , .Xr makecontext 3 diff --git a/lib/libc/i386/gen/Makefile.inc b/lib/libc/i386/gen/Makefile.inc index 45e69ca..73dcabb 100644 --- a/lib/libc/i386/gen/Makefile.inc +++ b/lib/libc/i386/gen/Makefile.inc @@ -2,5 +2,5 @@ # $FreeBSD$ SRCS+= _ctx_start.S _setjmp.S _set_tp.c fabs.S \ - flt_rounds.c infinity.c ldexp.c makecontext.c \ + flt_rounds.c getcontextx.c infinity.c ldexp.c makecontext.c \ rfork_thread.S setjmp.S signalcontext.c sigsetjmp.S diff --git a/lib/libc/i386/gen/getcontextx.c b/lib/libc/i386/gen/getcontextx.c new file mode 100644 index 0000000..bb8e996 --- /dev/null +++ b/lib/libc/i386/gen/getcontextx.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2011 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include + +static int xstate_sz = -1; + +size_t +__getcontextx_size(void) +{ + u_int p[4]; + int cpuid_supported; + + if (xstate_sz == -1) { + __asm __volatile( + " pushfl\n" + " popl %%eax\n" + " movl %%eax,%%ecx\n" + " xorl $0x200000,%%eax\n" + " pushl %%eax\n" + " popfl\n" + " pushfl\n" + " popl %%eax\n" + " xorl %%eax,%%ecx\n" + " je 1f\n" + " movl $1,%0\n" + " jmp 2f\n" + "1: movl $0,%0\n" + "2:\n" + : "=r" (cpuid_supported) : : "eax", "ecx"); + if (cpuid_supported) { + __asm __volatile( + " pushl %%ebx\n" + " cpuid\n" + " movl %%ebx,%1\n" + " popl %%ebx\n" + : "=a" (p[0]), "=r" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "1" (0x0)); + if ((p[2] & CPUID2_OSXSAVE) != 0) { + __asm __volatile( + " pushl %%ebx\n" + " cpuid\n" + " movl %%ebx,%1\n" + " popl %%ebx\n" + : "=a" (p[0]), "=r" (p[1]), "=c" (p[2]), + "=d" (p[3]) + : "0" (0xd), "2" (0x0)); + xstate_sz = p[1] - sizeof(struct savexmm); + } else + xstate_sz = 0; + } else + xstate_sz = 0; + } + + return (sizeof(ucontext_t) + xstate_sz); +} + +int +__fillcontextx(char *ctx) +{ + struct i386_get_xfpustate xfpu; + ucontext_t *ucp; + + ucp = (ucontext_t *)ctx; + if (getcontext(ucp) == -1) + return (-1); + if (xstate_sz != 0) { + xfpu.addr = (char *)(ucp + 1); + xfpu.len = xstate_sz; + if (sysarch(I386_GET_XFPUSTATE, &xfpu) == -1) + return (-1); + ucp->uc_mcontext.mc_xfpustate = (__register_t)xfpu.addr; + ucp->uc_mcontext.mc_xfpustate_len = xstate_sz; + ucp->uc_mcontext.mc_flags |= _MC_HASFPXSTATE; + } else { + ucp->uc_mcontext.mc_xfpustate = 0; + ucp->uc_mcontext.mc_xfpustate_len = 0; + } + return (0); +} + +__weak_reference(__getcontextx, getcontextx); + +ucontext_t * +__getcontextx(void) +{ + char *ctx; + int error; + + ctx = malloc(__getcontextx_size()); + if (ctx == NULL) + return (NULL); + if (__fillcontextx(ctx) == -1) { + error = errno; + free(ctx); + errno = error; + return (NULL); + } + return ((ucontext_t *)ctx); +} diff --git a/lib/libc/ia64/gen/Makefile.inc b/lib/libc/ia64/gen/Makefile.inc index 685ab86..1e3373a 100644 --- a/lib/libc/ia64/gen/Makefile.inc +++ b/lib/libc/ia64/gen/Makefile.inc @@ -3,7 +3,7 @@ SRCS+= __divdf3.S __divdi3.S __divsf3.S __divsi3.S __moddi3.S __modsi3.S \ __udivdi3.S __udivsi3.S __umoddi3.S __umodsi3.S _mcount.S _set_tp.c \ _setjmp.S fabs.S flt_rounds.c fpgetmask.c fpgetround.c fpsetmask.c \ - fpsetround.c infinity.c ldexp.c makecontext.c setjmp.S \ + fpsetround.c getcontextx.c infinity.c ldexp.c makecontext.c setjmp.S \ signalcontext.c sigsetjmp.S # The following may go away if function _Unwind_FindTableEntry() diff --git a/lib/libc/ia64/gen/getcontextx.c b/lib/libc/ia64/gen/getcontextx.c new file mode 100644 index 0000000..b701c18 --- /dev/null +++ b/lib/libc/ia64/gen/getcontextx.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2011 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +size_t +__getcontextx_size(void) +{ + + return (sizeof(ucontext_t)); +} + +int +__fillcontextx(char *ctx) +{ + ucontext_t *ucp; + + ucp = (ucontext_t *)ctx; + return (getcontext(ucp)); +} + +__weak_reference(__getcontextx, getcontextx); + +ucontext_t * +__getcontextx(void) +{ + char *ctx; + int error; + + ctx = malloc(__getcontextx_size()); + if (ctx == NULL) + return (NULL); + if (__fillcontextx(ctx) == -1) { + error = errno; + free(ctx); + errno = error; + return (NULL); + } + return ((ucontext_t *)ctx); +} diff --git a/lib/libc/mips/gen/Makefile.inc b/lib/libc/mips/gen/Makefile.inc index 8ee69d0..9d9cc7a 100644 --- a/lib/libc/mips/gen/Makefile.inc +++ b/lib/libc/mips/gen/Makefile.inc @@ -6,4 +6,5 @@ SRCS+= infinity.c fabs.c ldexp.c # SRCS+= flt_rounds.c fpgetmask.c fpgetround.c fpgetsticky.c fpsetmask.c \ # fpsetround.c fpsetsticky.c -SRCS+= _ctx_start.S _set_tp.c _setjmp.S makecontext.c setjmp.S signalcontext.c sigsetjmp.S +SRCS+= _ctx_start.S _set_tp.c _setjmp.S getcontextx.c makecontext.c \ + setjmp.S signalcontext.c sigsetjmp.S diff --git a/lib/libc/mips/gen/getcontextx.c b/lib/libc/mips/gen/getcontextx.c new file mode 100644 index 0000000..b701c18 --- /dev/null +++ b/lib/libc/mips/gen/getcontextx.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2011 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +size_t +__getcontextx_size(void) +{ + + return (sizeof(ucontext_t)); +} + +int +__fillcontextx(char *ctx) +{ + ucontext_t *ucp; + + ucp = (ucontext_t *)ctx; + return (getcontext(ucp)); +} + +__weak_reference(__getcontextx, getcontextx); + +ucontext_t * +__getcontextx(void) +{ + char *ctx; + int error; + + ctx = malloc(__getcontextx_size()); + if (ctx == NULL) + return (NULL); + if (__fillcontextx(ctx) == -1) { + error = errno; + free(ctx); + errno = error; + return (NULL); + } + return ((ucontext_t *)ctx); +} diff --git a/lib/libc/powerpc/gen/Makefile.inc b/lib/libc/powerpc/gen/Makefile.inc index 3bfabda..79a2746 100644 --- a/lib/libc/powerpc/gen/Makefile.inc +++ b/lib/libc/powerpc/gen/Makefile.inc @@ -1,7 +1,7 @@ # $FreeBSD$ SRCS += _ctx_start.S fabs.S flt_rounds.c fpgetmask.c fpgetround.c \ - fpgetsticky.c fpsetmask.c fpsetround.c \ + fpgetsticky.c fpsetmask.c fpsetround.c getcontextx.c \ infinity.c ldexp.c makecontext.c _setjmp.S \ setjmp.S sigsetjmp.S signalcontext.c syncicache.c \ _set_tp.c diff --git a/lib/libc/powerpc/gen/getcontextx.c b/lib/libc/powerpc/gen/getcontextx.c new file mode 100644 index 0000000..b701c18 --- /dev/null +++ b/lib/libc/powerpc/gen/getcontextx.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2011 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +size_t +__getcontextx_size(void) +{ + + return (sizeof(ucontext_t)); +} + +int +__fillcontextx(char *ctx) +{ + ucontext_t *ucp; + + ucp = (ucontext_t *)ctx; + return (getcontext(ucp)); +} + +__weak_reference(__getcontextx, getcontextx); + +ucontext_t * +__getcontextx(void) +{ + char *ctx; + int error; + + ctx = malloc(__getcontextx_size()); + if (ctx == NULL) + return (NULL); + if (__fillcontextx(ctx) == -1) { + error = errno; + free(ctx); + errno = error; + return (NULL); + } + return ((ucontext_t *)ctx); +} diff --git a/lib/libc/powerpc64/gen/Makefile.inc b/lib/libc/powerpc64/gen/Makefile.inc index 3bfabda..79a2746 100644 --- a/lib/libc/powerpc64/gen/Makefile.inc +++ b/lib/libc/powerpc64/gen/Makefile.inc @@ -1,7 +1,7 @@ # $FreeBSD$ SRCS += _ctx_start.S fabs.S flt_rounds.c fpgetmask.c fpgetround.c \ - fpgetsticky.c fpsetmask.c fpsetround.c \ + fpgetsticky.c fpsetmask.c fpsetround.c getcontextx.c \ infinity.c ldexp.c makecontext.c _setjmp.S \ setjmp.S sigsetjmp.S signalcontext.c syncicache.c \ _set_tp.c diff --git a/lib/libc/powerpc64/gen/getcontextx.c b/lib/libc/powerpc64/gen/getcontextx.c new file mode 100644 index 0000000..b701c18 --- /dev/null +++ b/lib/libc/powerpc64/gen/getcontextx.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2011 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +size_t +__getcontextx_size(void) +{ + + return (sizeof(ucontext_t)); +} + +int +__fillcontextx(char *ctx) +{ + ucontext_t *ucp; + + ucp = (ucontext_t *)ctx; + return (getcontext(ucp)); +} + +__weak_reference(__getcontextx, getcontextx); + +ucontext_t * +__getcontextx(void) +{ + char *ctx; + int error; + + ctx = malloc(__getcontextx_size()); + if (ctx == NULL) + return (NULL); + if (__fillcontextx(ctx) == -1) { + error = errno; + free(ctx); + errno = error; + return (NULL); + } + return ((ucontext_t *)ctx); +} diff --git a/lib/libc/sparc64/gen/Makefile.inc b/lib/libc/sparc64/gen/Makefile.inc index f82f225..d3fbfe4 100644 --- a/lib/libc/sparc64/gen/Makefile.inc +++ b/lib/libc/sparc64/gen/Makefile.inc @@ -2,5 +2,5 @@ SRCS+= _ctx_start.S _setjmp.S fabs.S fixunsdfsi.S flt_rounds.c fpgetmask.c \ fpgetround.c fpgetsticky.c fpsetmask.c fpsetround.c \ - infinity.c ldexp.c makecontext.c \ + getcontextx.c infinity.c ldexp.c makecontext.c \ signalcontext.c setjmp.S sigsetjmp.S _set_tp.c diff --git a/lib/libc/sparc64/gen/getcontextx.c b/lib/libc/sparc64/gen/getcontextx.c new file mode 100644 index 0000000..b701c18 --- /dev/null +++ b/lib/libc/sparc64/gen/getcontextx.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2011 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +size_t +__getcontextx_size(void) +{ + + return (sizeof(ucontext_t)); +} + +int +__fillcontextx(char *ctx) +{ + ucontext_t *ucp; + + ucp = (ucontext_t *)ctx; + return (getcontext(ucp)); +} + +__weak_reference(__getcontextx, getcontextx); + +ucontext_t * +__getcontextx(void) +{ + char *ctx; + int error; + + ctx = malloc(__getcontextx_size()); + if (ctx == NULL) + return (NULL); + if (__fillcontextx(ctx) == -1) { + error = errno; + free(ctx); + errno = error; + return (NULL); + } + return ((ucontext_t *)ctx); +} diff --git a/lib/libthr/thread/thr_sig.c b/lib/libthr/thread/thr_sig.c index 66358db..46cbd82 100644 --- a/lib/libthr/thread/thr_sig.c +++ b/lib/libthr/thread/thr_sig.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include "un-namespace.h" @@ -314,16 +315,24 @@ check_cancel(struct pthread *curthread, ucontext_t *ucp) static void check_deferred_signal(struct pthread *curthread) { - ucontext_t uc; + ucontext_t *uc; struct sigaction act; siginfo_t info; if (__predict_true(curthread->deferred_siginfo.si_signo == 0)) return; - getcontext(&uc); + +#if defined(__amd64__) || defined(__i386__) + uc = alloca(__getcontextx_size()); + __fillcontextx((char *)uc); +#else + ucontext_t ucv; + uc = &ucv; + getcontext(uc); +#endif if (curthread->deferred_siginfo.si_signo != 0) { act = curthread->deferred_sigact; - uc.uc_sigmask = curthread->deferred_sigmask; + uc->uc_sigmask = curthread->deferred_sigmask; memcpy(&info, &curthread->deferred_siginfo, sizeof(siginfo_t)); /* remove signal */ curthread->deferred_siginfo.si_signo = 0; @@ -334,7 +343,7 @@ check_deferred_signal(struct pthread *curthread) tact.sa_handler = SIG_DFL; _sigaction(info.si_signo, &tact, NULL); } - handle_signal(&act, info.si_signo, &info, &uc); + handle_signal(&act, info.si_signo, &info, uc); } } diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c index 43aeec3..d20599a 100644 --- a/sys/amd64/acpica/acpi_wakeup.c +++ b/sys/amd64/acpica/acpi_wakeup.c @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #ifdef SMP #include @@ -65,11 +66,7 @@ CTASSERT(sizeof(wakecode) < PAGE_SIZE - 1024); extern int acpi_resume_beep; extern int acpi_reset_video; -#ifdef SMP -extern struct pcb **susppcbs; -#else static struct pcb **susppcbs; -#endif int acpi_restorecpu(vm_offset_t, struct pcb *); @@ -334,7 +331,7 @@ acpi_alloc_wakeup_handler(void) } susppcbs = malloc(mp_ncpus * sizeof(*susppcbs), M_DEVBUF, M_WAITOK); for (i = 0; i < mp_ncpus; i++) - susppcbs[i] = malloc(sizeof(**susppcbs), M_DEVBUF, M_WAITOK); + susppcbs[i] = alloc_pcb(M_WAITOK); return (wakeaddr); } diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index efb01b1..9631da2 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -112,16 +112,25 @@ done_store_dr: /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) - jne 1f + jne 3f movq PCB_SAVEFPU(%r8),%r8 clts + cmpl $0,use_xsave + jne 1f fxsave (%r8) - smsw %ax + jmp 2f +1: movq %rdx,%rcx + movl xsave_mask,%eax + movl xsave_mask+4,%edx +/* xsave (%r8) */ + .byte 0x41,0x0f,0xae,0x20 + movq %rcx,%rdx +2: smsw %ax orb $CR0_TS,%al lmsw %ax xorl %eax,%eax movq %rax,PCPU(FPCURTHREAD) -1: +3: /* Save is done. Now fire up new thread. Leave old vmspace. */ movq TD_PCB(%rsi),%r8 @@ -355,8 +364,16 @@ ENTRY(savectx) str PCB_TR(%rdi) clts + cmpl $0,use_xsave + jne 1f fxsave PCB_USERFPU(%rdi) - movq %rsi,%cr0 /* The previous %cr0 is saved in %rsi. */ + jmp 2f +1: movl xsave_mask,%eax + movl xsave_mask+4,%edx +/* xsave PCB_USERFPU(%rdi) */ + .byte 0x0f,0xae,0xa7 + .long PCB_USERFPU +2: movq %rsi,%cr0 /* The previous %cr0 is saved in %rsi. */ movl $1,%eax ret diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 08e5e57..bceca74 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -96,19 +96,97 @@ void stop_emulating(void); #define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw) #define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw) -typedef u_char bool_t; +CTASSERT(sizeof(struct savefpu) == 512); +CTASSERT(sizeof(struct xstate_hdr) == 64); +CTASSERT(sizeof(struct savefpu_ymm) == 832); + +/* + * This requirement is to make it easier for asm code to calculate + * offset of the fpu save area from the pcb address. FPU save area + * must by 64-bytes aligned. + */ +CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); static void fpu_clean_state(void); SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, NULL, 1, "Floating point instructions executed in hardware"); -static struct savefpu fpu_initialstate; +int use_xsave; /* non-static for cpu_switch.S */ +uint64_t xsave_mask; /* the same */ +static struct savefpu *fpu_initialstate; + +static void +fpusave(void *addr) +{ + + if (use_xsave) + xsave((char *)addr, xsave_mask); + else + fxsave((char *)addr); +} + +static void +fpurestore(void *addr) +{ + + if (use_xsave) + xrstor((char *)addr, xsave_mask); + else + fxrstor((char *)addr); +} + +/* + * Enable XSAVE if supported and allowed by user. + * Calculate the xsave_mask. + */ +static void +fpuinit_bsp1(void) +{ + u_int cp[4]; + uint64_t xsave_mask_user; + + if ((cpu_feature2 & CPUID2_XSAVE) != 0) { + use_xsave = 1; + TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); + } + if (!use_xsave) + return; + + cpuid_count(0xd, 0x0, cp); + xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; + if ((cp[0] & xsave_mask) != xsave_mask) + panic("CPU0 does not support X87 or SSE: %x", cp[0]); + xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; + xsave_mask_user = xsave_mask; + TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user); + xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; + xsave_mask &= xsave_mask_user; +} /* - * Initialize the floating point unit. On the boot CPU we generate a - * clean state that is used to initialize the floating point unit when - * it is first used by a process. + * Calculate the fpu save area size. + */ +static void +fpuinit_bsp2(void) +{ + u_int cp[4]; + + if (use_xsave) { + cpuid_count(0xd, 0x0, cp); + cpu_max_ext_state_size = cp[1]; + + /* + * Reload the cpu_feature2, since we enabled OSXSAVE. + */ + do_cpuid(1, cp); + cpu_feature2 = cp[2]; + } else + cpu_max_ext_state_size = sizeof(struct savefpu); +} + +/* + * Initialize the floating point unit. */ void fpuinit(void) @@ -117,6 +195,20 @@ fpuinit(void) u_int mxcsr; u_short control; + if (IS_BSP()) + fpuinit_bsp1(); + + if (use_xsave) { + load_cr4(rcr4() | CR4_XSAVE); + xsetbv(XCR0, xsave_mask); + } + + /* + * XCR0 shall be set up before CPU can report the save area size. + */ + if (IS_BSP()) + fpuinit_bsp2(); + /* * It is too early for critical_enter() to work on AP. */ @@ -127,20 +219,46 @@ fpuinit(void) fldcw(control); mxcsr = __INITIAL_MXCSR__; ldmxcsr(mxcsr); - if (PCPU_GET(cpuid) == 0) { - fxsave(&fpu_initialstate); - if (fpu_initialstate.sv_env.en_mxcsr_mask) - cpu_mxcsr_mask = fpu_initialstate.sv_env.en_mxcsr_mask; - else - cpu_mxcsr_mask = 0xFFBF; - bzero(fpu_initialstate.sv_fp, sizeof(fpu_initialstate.sv_fp)); - bzero(fpu_initialstate.sv_xmm, sizeof(fpu_initialstate.sv_xmm)); - } start_emulating(); intr_restore(saveintr); } /* + * On the boot CPU we generate a clean state that is used to + * initialize the floating point unit when it is first used by a + * process. + */ +static void +fpuinitstate(void *arg __unused) +{ + register_t saveintr; + + fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF, + M_WAITOK | M_ZERO); + saveintr = intr_disable(); + stop_emulating(); + + fpusave(fpu_initialstate); + if (fpu_initialstate->sv_env.en_mxcsr_mask) + cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask; + else + cpu_mxcsr_mask = 0xFFBF; + + /* + * The fninit instruction does not modify XMM registers. The + * fpusave call dumped the garbage contained in the registers + * after reset to the initial state saved. Clear XMM + * registers file image to make the startup program state and + * signal handler XMM register content predictable. + */ + bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc)); + + start_emulating(); + intr_restore(saveintr); +} +SYSINIT(fpuinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, fpuinitstate, NULL); + +/* * Free coprocessor (if we have it). */ void @@ -150,7 +268,7 @@ fpuexit(struct thread *td) critical_enter(); if (curthread == PCPU_GET(fpcurthread)) { stop_emulating(); - fxsave(PCPU_GET(curpcb)->pcb_save); + fpusave(PCPU_GET(curpcb)->pcb_save); start_emulating(); PCPU_SET(fpcurthread, 0); } @@ -423,7 +541,7 @@ fpudna(void) * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. */ - fxrstor(&fpu_initialstate); + fpurestore(fpu_initialstate); if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(pcb->pcb_initial_fpucw); if (PCB_USER_FPU(pcb)) @@ -432,7 +550,7 @@ fpudna(void) else set_pcb_flags(pcb, PCB_FPUINITDONE); } else - fxrstor(pcb->pcb_save); + fpurestore(pcb->pcb_save); critical_exit(); } @@ -461,15 +579,16 @@ fpugetregs(struct thread *td) pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { - bcopy(&fpu_initialstate, &pcb->pcb_user_save, - sizeof(fpu_initialstate)); - pcb->pcb_user_save.sv_env.en_cw = pcb->pcb_initial_fpucw; + bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb), + cpu_max_ext_state_size); + get_pcb_user_save_pcb(pcb)->sv_env.en_cw = + pcb->pcb_initial_fpucw; fpuuserinited(td); return (_MC_FPOWNED_PCB); } critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { - fxsave(&pcb->pcb_user_save); + fpusave(get_pcb_user_save_pcb(pcb)); critical_exit(); return (_MC_FPOWNED_FPU); } else { @@ -491,25 +610,78 @@ fpuuserinited(struct thread *td) set_pcb_flags(pcb, PCB_FPUINITDONE); } +int +fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) +{ + struct xstate_hdr *hdr, *ehdr; + size_t len, max_len; + uint64_t bv; + + /* XXXKIB should we clear all extended state in xstate_bv instead ? */ + if (xfpustate == NULL) + return (0); + if (!use_xsave) + return (EOPNOTSUPP); + + len = xfpustate_size; + if (len < sizeof(struct xstate_hdr)) + return (EINVAL); + max_len = cpu_max_ext_state_size - sizeof(struct savefpu); + if (len > max_len) + return (EINVAL); + + ehdr = (struct xstate_hdr *)xfpustate; + bv = ehdr->xstate_bv; + + /* + * Avoid #gp. + */ + if (bv & ~xsave_mask) + return (EINVAL); + if ((bv & (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE)) != + (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE)) + return (EINVAL); + + hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); + + hdr->xstate_bv = bv; + bcopy(xfpustate + sizeof(struct xstate_hdr), + (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); + + return (0); +} + /* * Set the state of the FPU. */ -void -fpusetregs(struct thread *td, struct savefpu *addr) +int +fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, + size_t xfpustate_size) { struct pcb *pcb; + int error; pcb = td->td_pcb; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { - fxrstor(addr); + error = fpusetxstate(td, xfpustate, xfpustate_size); + if (error != 0) { + critical_exit(); + return (error); + } + bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); + fpurestore(get_pcb_user_save_td(td)); critical_exit(); set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); } else { critical_exit(); - bcopy(addr, &td->td_pcb->pcb_user_save, sizeof(*addr)); + error = fpusetxstate(td, xfpustate, xfpustate_size); + if (error != 0) + return (error); + bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpuuserinited(td); } + return (0); } /* @@ -599,20 +771,62 @@ static devclass_t fpupnp_devclass; DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); #endif /* DEV_ISA */ +static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", + "Kernel contexts for FPU state"); + +#define FPU_KERN_CTX_FPUINITDONE 0x01 + +struct fpu_kern_ctx { + struct savefpu *prev; + uint32_t flags; + char hwstate1[]; +}; + +struct fpu_kern_ctx * +fpu_kern_alloc_ctx(u_int flags) +{ + struct fpu_kern_ctx *res; + size_t sz; + + sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + + cpu_max_ext_state_size; + res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? + M_NOWAIT : M_WAITOK) | M_ZERO); + return (res); +} + +void +fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) +{ + + /* XXXKIB clear the memory ? */ + free(ctx, M_FPUKERN_CTX); +} + +static struct savefpu * +fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) +{ + vm_offset_t p; + + p = (vm_offset_t)&ctx->hwstate1; + p = roundup2(p, XSAVE_AREA_ALIGN); + return ((struct savefpu *)p); +} + int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { struct pcb *pcb; pcb = td->td_pcb; - KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save, - ("mangled pcb_save")); + KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == + get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); ctx->flags = 0; if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0) ctx->flags |= FPU_KERN_CTX_FPUINITDONE; fpuexit(td); ctx->prev = pcb->pcb_save; - pcb->pcb_save = &ctx->hwstate; + pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); set_pcb_flags(pcb, PCB_KERNFPU); clear_pcb_flags(pcb, PCB_FPUINITDONE); return (0); @@ -629,7 +843,7 @@ fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) fpudrop(); critical_exit(); pcb->pcb_save = ctx->prev; - if (pcb->pcb_save == &pcb->pcb_user_save) { + if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) { set_pcb_flags(pcb, PCB_FPUINITDONE); clear_pcb_flags(pcb, PCB_KERNFPU); @@ -653,7 +867,8 @@ fpu_kern_thread(u_int flags) pcb = PCPU_GET(curpcb); KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); - KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save")); + KASSERT(pcb->pcb_save == get_pcb_user_save_pcb(pcb), + ("mangled pcb_save")); KASSERT(PCB_USER_FPU(pcb), ("recursive call")); set_pcb_flags(pcb, PCB_KERNFPU); diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index d133223..3796aa8 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -156,7 +156,7 @@ ASSYM(PCB_GS32SD, offsetof(struct pcb, pcb_gs32sd)); ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp)); ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save)); ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct savefpu)); -ASSYM(PCB_USERFPU, offsetof(struct pcb, pcb_user_save)); +ASSYM(PCB_USERFPU, sizeof(struct pcb)); ASSYM(PCB_SIZE, sizeof(struct pcb)); ASSYM(PCB_FULL_IRET, PCB_FULL_IRET); ASSYM(PCB_DBREGS, PCB_DBREGS); diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c index 5a832a6..02588af 100644 --- a/sys/amd64/amd64/initcpu.c +++ b/sys/amd64/amd64/initcpu.c @@ -72,6 +72,7 @@ u_int cpu_vendor_id; /* CPU vendor ID */ u_int cpu_fxsr; /* SSE enabled */ u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */ u_int cpu_clflush_line_size = 32; +u_int cpu_max_ext_state_size; SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD, &via_feature_rng, 0, "VIA RNG feature available in CPU"); diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index e9d160f..bc14745 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -154,8 +154,10 @@ extern void panicifcpuunsupported(void); #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void cpu_startup(void *); -static void get_fpcontext(struct thread *td, mcontext_t *mcp); -static int set_fpcontext(struct thread *td, const mcontext_t *mcp); +static void get_fpcontext(struct thread *td, mcontext_t *mcp, + char *xfpusave, size_t xfpusave_len); +static int set_fpcontext(struct thread *td, const mcontext_t *mcp, + char *xfpustate, size_t xfpustate_len); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); /* @@ -315,6 +317,8 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) struct sigacts *psp; char *sp; struct trapframe *regs; + char *xfpusave; + size_t xfpusave_len; int sig; int oonstack; @@ -328,6 +332,14 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); + if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { + xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); + xfpusave = __builtin_alloca(xfpusave_len); + } else { + xfpusave_len = 0; + xfpusave = NULL; + } + /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; @@ -337,7 +349,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ - get_fpcontext(td, &sf.sf_uc.uc_mcontext); + get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase; @@ -348,13 +360,18 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - sp = td->td_sigstk.ss_sp + - td->td_sigstk.ss_size - sizeof(struct sigframe); + sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size; #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else - sp = (char *)regs->tf_rsp - sizeof(struct sigframe) - 128; + sp = (char *)regs->tf_rsp - 128; + if (xfpusave != NULL) { + sp -= xfpusave_len; + sp = (char *)((unsigned long)sp & ~0x3Ful); + sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; + } + sp -= sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned long)sp & ~0xFul); @@ -387,7 +404,10 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) /* * Copy the sigframe out to the user's stack. */ - if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { + if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || + (xfpusave != NULL && copyout(xfpusave, + (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len) + != 0)) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif @@ -432,6 +452,8 @@ sys_sigreturn(td, uap) struct proc *p; struct trapframe *regs; ucontext_t *ucp; + char *xfpustate; + size_t xfpustate_len; long rflags; int cs, error, ret; ksiginfo_t ksi; @@ -490,7 +512,28 @@ sys_sigreturn(td, uap) return (EINVAL); } - ret = set_fpcontext(td, &ucp->uc_mcontext); + if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { + xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; + if (xfpustate_len > cpu_max_ext_state_size - + sizeof(struct savefpu)) { + uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", + p->p_pid, td->td_name, xfpustate_len); + return (EINVAL); + } + xfpustate = __builtin_alloca(xfpustate_len); + error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, + xfpustate, xfpustate_len); + if (error != 0) { + uprintf( + "pid %d (%s): sigreturn copying xfpustate failed\n", + p->p_pid, td->td_name); + return (error); + } + } else { + xfpustate = NULL; + xfpustate_len = 0; + } + ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); if (ret != 0) { uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", p->p_pid, td->td_name, ret); @@ -1592,6 +1635,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) int gsel_tss, x; struct pcpu *pc; struct nmi_pcpu *np; + struct xstate_hdr *xhdr; u_int64_t msr; char *env; size_t kstack0_sz; @@ -1601,7 +1645,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; bzero((void *)thread0.td_kstack, kstack0_sz); physfree += kstack0_sz; - thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1; /* * This may be done better later if it gets more high level @@ -1650,7 +1693,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) physfree += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); - PCPU_SET(curpcb, thread0.td_pcb); PCPU_SET(tssp, &common_tss[0]); PCPU_SET(commontssp, &common_tss[0]); PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); @@ -1742,13 +1784,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) initializecpu(); /* Initialize CPU registers */ initializecpucache(); - /* make an initial tss so cpu can get interrupt stack on syscall! */ - common_tss[0].tss_rsp0 = thread0.td_kstack + - kstack0_sz - sizeof(struct pcb); - /* Ensure the stack is aligned to 16 bytes */ - common_tss[0].tss_rsp0 &= ~0xFul; - PCPU_SET(rsp0, common_tss[0].tss_rsp0); - /* doublefault stack space, runs on ist1 */ common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; @@ -1785,6 +1820,25 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) msgbufinit(msgbufp, msgbufsize); fpuinit(); + /* + * Set up thread0 pcb after fpuinit calculated pcb + fpu save + * area size. Zero out the extended state header in fpu save + * area. + */ + thread0.td_pcb = get_pcb_td(&thread0); + bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); + if (use_xsave) { + xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + + 1); + xhdr->xstate_bv = xsave_mask; + } + /* make an initial tss so cpu can get interrupt stack on syscall! */ + common_tss[0].tss_rsp0 = (vm_offset_t)thread0.td_pcb; + /* Ensure the stack is aligned to 16 bytes */ + common_tss[0].tss_rsp0 &= ~0xFul; + PCPU_SET(rsp0, common_tss[0].tss_rsp0); + PCPU_SET(curpcb, thread0.td_pcb); + /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); @@ -2054,7 +2108,7 @@ fill_fpregs(struct thread *td, struct fpreg *fpregs) P_SHOULDSTOP(td->td_proc), ("not suspended thread %p", td)); fpugetregs(td); - fill_fpregs_xmm(&td->td_pcb->pcb_user_save, fpregs); + fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs); return (0); } @@ -2063,7 +2117,7 @@ int set_fpregs(struct thread *td, struct fpreg *fpregs) { - set_fpregs_xmm(fpregs, &td->td_pcb->pcb_user_save); + set_fpregs_xmm(fpregs, get_pcb_user_save_td(td)); fpuuserinited(td); return (0); } @@ -2114,9 +2168,11 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int flags) mcp->mc_gs = tp->tf_gs; mcp->mc_flags = tp->tf_flags; mcp->mc_len = sizeof(*mcp); - get_fpcontext(td, mcp); + get_fpcontext(td, mcp, NULL, 0); mcp->mc_fsbase = pcb->pcb_fsbase; mcp->mc_gsbase = pcb->pcb_gsbase; + mcp->mc_xfpustate = 0; + mcp->mc_xfpustate_len = 0; bzero(mcp->mc_spare, sizeof(mcp->mc_spare)); return (0); } @@ -2132,6 +2188,7 @@ set_mcontext(struct thread *td, const mcontext_t *mcp) { struct pcb *pcb; struct trapframe *tp; + char *xfpustate; long rflags; int ret; @@ -2142,7 +2199,18 @@ set_mcontext(struct thread *td, const mcontext_t *mcp) return (EINVAL); rflags = (mcp->mc_rflags & PSL_USERCHANGE) | (tp->tf_rflags & ~PSL_USERCHANGE); - ret = set_fpcontext(td, mcp); + if (mcp->mc_flags & _MC_HASFPXSTATE) { + if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - + sizeof(struct savefpu)) + return (EINVAL); + xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); + ret = copyin((void *)mcp->mc_xfpustate, xfpustate, + mcp->mc_xfpustate_len); + if (ret != 0) + return (ret); + } else + xfpustate = NULL; + ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); tp->tf_r15 = mcp->mc_r15; @@ -2180,35 +2248,51 @@ set_mcontext(struct thread *td, const mcontext_t *mcp) } static void -get_fpcontext(struct thread *td, mcontext_t *mcp) +get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, + size_t xfpusave_len) { + size_t max_len, len; mcp->mc_ownedfp = fpugetregs(td); - bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate, + bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = fpuformat(); + if (!use_xsave || xfpusave_len == 0) + return; + max_len = cpu_max_ext_state_size - sizeof(struct savefpu); + len = xfpusave_len; + if (len > max_len) { + len = max_len; + bzero(xfpusave + max_len, len - max_len); + } + mcp->mc_flags |= _MC_HASFPXSTATE; + mcp->mc_xfpustate_len = len; + bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); } static int -set_fpcontext(struct thread *td, const mcontext_t *mcp) +set_fpcontext(struct thread *td, const mcontext_t *mcp, char *xfpustate, + size_t xfpustate_len) { struct savefpu *fpstate; + int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); - else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) + else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); - else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || + error = 0; + } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { fpstate = (struct savefpu *)&mcp->mc_fpstate; fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; - fpusetregs(td, fpstate); + error = fpusetregs(td, fpstate, xfpustate, xfpustate_len); } else return (EINVAL); - return (0); + return (error); } void diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 50b52c8..e79f939 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -98,8 +98,8 @@ char *doublefault_stack; char *nmi_stack; void *dpcpu; -struct pcb stoppcbs[MAXCPU]; -struct pcb **susppcbs = NULL; +struct pcb **stoppcbs; +struct pcb **susppcbs; /* Variables needed for SMP tlb shootdown. */ vm_offset_t smp_tlb_addr1; @@ -1388,7 +1388,7 @@ cpustop_handler(void) cpu = PCPU_GET(cpuid); - savectx(&stoppcbs[cpu]); + savectx(stoppcbs[cpu]); /* Indicate that we are stopped */ CPU_SET_ATOMIC(cpu, &stopped_cpus); @@ -1451,7 +1451,11 @@ cpususpend_handler(void) static void release_aps(void *dummy __unused) { + int i; + stoppcbs = malloc(MAXCPU * sizeof(struct pcb *), M_DEVBUF, M_WAITOK); + for (i = 0; i < MAXCPU; i++) + stoppcbs[i] = alloc_pcb(M_WAITOK); if (mp_ncpus == 1) return; atomic_store_rel_int(&aps_ready, 1); diff --git a/sys/amd64/amd64/ptrace_machdep.c b/sys/amd64/amd64/ptrace_machdep.c new file mode 100644 index 0000000..8eea132 --- /dev/null +++ b/sys/amd64/amd64/ptrace_machdep.c @@ -0,0 +1,141 @@ +/*- + * Copyright (c) 2011 Konstantin Belousov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_compat.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +static int +cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data) +{ + char *savefpu; + int error; + + if (!use_xsave) + return (EOPNOTSUPP); + + switch (req) { + case PT_GETXSTATE: + savefpu = (char *)(get_pcb_user_save_td(td) + 1); + error = copyout(savefpu, addr, + cpu_max_ext_state_size - sizeof(struct savefpu)); + break; + + case PT_SETXSTATE: + if (data > cpu_max_ext_state_size - sizeof(struct savefpu)) { + error = EINVAL; + break; + } + savefpu = malloc(data, M_TEMP, M_WAITOK); + error = copyin(addr, savefpu, data); + if (error == 0) + error = fpusetxstate(td, savefpu, data); + free(savefpu, M_TEMP); + break; + + default: + error = EINVAL; + break; + } + + return (error); +} + +#ifdef COMPAT_FREEBSD32 +#define PT_I386_GETXMMREGS (PT_FIRSTMACH + 0) +#define PT_I386_SETXMMREGS (PT_FIRSTMACH + 1) +#define PT_I386_GETXSTATE (PT_FIRSTMACH + 2) +#define PT_I386_SETXSTATE (PT_FIRSTMACH + 3) + +static int +cpu32_ptrace(struct thread *td, int req, void *addr, int data) +{ + struct savefpu *fpstate; + int error; + + switch (req) { + case PT_I386_GETXMMREGS: + error = copyout(get_pcb_user_save_td(td), addr, + sizeof(*fpstate)); + break; + + case PT_I386_SETXMMREGS: + fpstate = get_pcb_user_save_td(td); + error = copyin(addr, fpstate, sizeof(*fpstate)); + fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; + break; + + case PT_I386_GETXSTATE: + error = cpu_ptrace_xstate(td, PT_GETXSTATE, addr, data); + break; + + case PT_I386_SETXSTATE: + error = cpu_ptrace_xstate(td, PT_SETXSTATE, addr, data); + break; + + default: + error = EINVAL; + break; + } + + return (error); +} +#endif + +int +cpu_ptrace(struct thread *td, int req, void *addr, int data) +{ + int error; + +#ifdef COMPAT_FREEBSD32 + if (SV_CURPROC_FLAG(SV_ILP32)) + return (cpu32_ptrace(td, req, addr, data)); +#endif + + switch (req) { + case PT_GETXSTATE: + case PT_SETXSTATE: + error = cpu_ptrace_xstate(td, req, addr, data); + break; + + default: + error = EINVAL; + break; + } + + return (error); +} diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c index bcd7fa3..3d0f87e 100644 --- a/sys/amd64/amd64/sys_machdep.c +++ b/sys/amd64/amd64/sys_machdep.c @@ -179,6 +179,8 @@ sysarch(td, uap) uint32_t i386base; uint64_t a64base; struct i386_ioperm_args iargs; + struct i386_get_xfpustate i386xfpu; + struct amd64_get_xfpustate a64xfpu; #ifdef CAPABILITY_MODE /* @@ -195,10 +197,12 @@ sysarch(td, uap) case I386_SET_FSBASE: case I386_GET_GSBASE: case I386_SET_GSBASE: + case I386_GET_XFPUSTATE: case AMD64_GET_FSBASE: case AMD64_SET_FSBASE: case AMD64_GET_GSBASE: case AMD64_SET_GSBASE: + case AMD64_GET_XFPUSTATE: break; case I386_SET_IOPERM: @@ -226,6 +230,18 @@ sysarch(td, uap) sizeof(struct i386_ioperm_args))) != 0) return (error); break; + case I386_GET_XFPUSTATE: + if ((error = copyin(uap->parms, &i386xfpu, + sizeof(struct i386_ioperm_args))) != 0) + return (error); + a64xfpu.addr = (void *)(uintptr_t)i386xfpu.addr; + a64xfpu.len = i386xfpu.len; + break; + case AMD64_GET_XFPUSTATE: + if ((error = copyin(uap->parms, &a64xfpu, + sizeof(struct amd64_get_xfpustate))) != 0) + return (error); + break; default: break; } @@ -296,6 +312,16 @@ sysarch(td, uap) } break; + case I386_GET_XFPUSTATE: + case AMD64_GET_XFPUSTATE: + if (a64xfpu.len > cpu_max_ext_state_size - + sizeof(struct savefpu)) + return (EINVAL); + fpugetregs(td); + error = copyout((char *)(get_pcb_user_save_td(td) + 1), + a64xfpu.addr, a64xfpu.len); + return (error); + default: error = EINVAL; break; diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 16b9c1a..a2363db 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -934,7 +934,7 @@ amd64_syscall(struct thread *td, int traced) KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returing with kernel FPU ctx leaked", syscallname(td->td_proc, sa.code))); - KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save, + KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, sa.code))); diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index d05880f..cbb6bc5 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -90,6 +90,52 @@ static u_int cpu_reset_proxyid; static volatile u_int cpu_reset_proxy_active; #endif +struct savefpu * +get_pcb_user_save_td(struct thread *td) +{ + vm_offset_t p; + + p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - + cpu_max_ext_state_size; + KASSERT((p % 64) == 0, ("Unaligned pcb_user_save area")); + return ((struct savefpu *)p); +} + +struct savefpu * +get_pcb_user_save_pcb(struct pcb *pcb) +{ + vm_offset_t p; + + p = (vm_offset_t)(pcb + 1); + return ((struct savefpu *)p); +} + +struct pcb * +get_pcb_td(struct thread *td) +{ + vm_offset_t p; + + p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - + cpu_max_ext_state_size - sizeof(struct pcb); + return ((struct pcb *)p); +} + +struct pcb * +alloc_pcb(int flags) +{ + struct pcb *res; + struct savefpu_ymm *sf; + + res = malloc(sizeof(struct pcb) + cpu_max_ext_state_size, M_DEVBUF, + flags); + if (use_xsave) { + sf = (struct savefpu_ymm *)get_pcb_user_save_pcb(res); + bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd)); + sf->sv_xstate.sx_hd.xstate_bv = xsave_mask; + } + return (res); +} + /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child @@ -127,15 +173,16 @@ cpu_fork(td1, p2, td2, flags) fpuexit(td1); /* Point the pcb to the top of the stack */ - pcb2 = (struct pcb *)(td2->td_kstack + - td2->td_kstack_pages * PAGE_SIZE) - 1; + pcb2 = get_pcb_td(td2); td2->td_pcb = pcb2; /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Properly initialize pcb_save */ - pcb2->pcb_save = &pcb2->pcb_user_save; + pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); + bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2), + cpu_max_ext_state_size); /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; @@ -310,11 +357,17 @@ cpu_thread_swapout(struct thread *td) void cpu_thread_alloc(struct thread *td) { - - td->td_pcb = (struct pcb *)(td->td_kstack + - td->td_kstack_pages * PAGE_SIZE) - 1; - td->td_frame = (struct trapframe *)td->td_pcb - 1; - td->td_pcb->pcb_save = &td->td_pcb->pcb_user_save; + struct pcb *pcb; + struct xstate_hdr *xhdr; + + td->td_pcb = pcb = get_pcb_td(td); + td->td_frame = (struct trapframe *)pcb - 1; + pcb->pcb_save = get_pcb_user_save_pcb(pcb); + if (use_xsave) { + xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); + bzero(xhdr, sizeof(*xhdr)); + xhdr->xstate_bv = xsave_mask; + } } void @@ -387,7 +440,9 @@ cpu_set_upcall(struct thread *td, struct thread *td0) */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); clear_pcb_flags(pcb2, PCB_FPUINITDONE | PCB_USERFPUINITDONE); - pcb2->pcb_save = &pcb2->pcb_user_save; + pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); + bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save, + cpu_max_ext_state_size); set_pcb_flags(pcb2, PCB_FULL_IRET); /* diff --git a/sys/amd64/ia32/ia32_reg.c b/sys/amd64/ia32/ia32_reg.c index 279df9a..71eed5e 100644 --- a/sys/amd64/ia32/ia32_reg.c +++ b/sys/amd64/ia32/ia32_reg.c @@ -155,7 +155,7 @@ fill_fpregs32(struct thread *td, struct fpreg32 *regs) sv_87 = (struct save87 *)regs; penv_87 = &sv_87->sv_env; fpugetregs(td); - sv_fpu = &td->td_pcb->pcb_user_save; + sv_fpu = get_pcb_user_save_td(td); penv_xmm = &sv_fpu->sv_env; /* FPU control/status */ @@ -187,7 +187,7 @@ set_fpregs32(struct thread *td, struct fpreg32 *regs) { struct save87 *sv_87 = (struct save87 *)regs; struct env87 *penv_87 = &sv_87->sv_env; - struct savefpu *sv_fpu = &td->td_pcb->pcb_user_save; + struct savefpu *sv_fpu = get_pcb_user_save_td(td); struct envxmm *penv_xmm = &sv_fpu->sv_env; int i; diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c index 2f41870..44d7d70 100644 --- a/sys/amd64/ia32/ia32_signal.c +++ b/sys/amd64/ia32/ia32_signal.c @@ -71,6 +71,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -83,15 +84,15 @@ __FBSDID("$FreeBSD$"); #ifdef COMPAT_FREEBSD4 static void freebsd4_ia32_sendsig(sig_t, ksiginfo_t *, sigset_t *); #endif -static void ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp); -static int ia32_set_fpcontext(struct thread *td, const struct ia32_mcontext *mcp); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void -ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp) +ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp, + char *xfpusave, size_t xfpusave_len) { + size_t max_len, len; /* * XXX Format of 64bit and 32bit FXSAVE areas differs. FXSAVE @@ -100,28 +101,43 @@ ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp) * for now, it should be irrelevant for most applications. */ mcp->mc_ownedfp = fpugetregs(td); - bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate, + bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = fpuformat(); + if (!use_xsave || xfpusave_len == 0) + return; + max_len = cpu_max_ext_state_size - sizeof(struct savefpu); + len = xfpusave_len; + if (len > max_len) { + len = max_len; + bzero(xfpusave + max_len, len - max_len); + } + mcp->mc_flags |= _MC_HASFPXSTATE; + mcp->mc_xfpustate_len = len; + bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); } static int -ia32_set_fpcontext(struct thread *td, const struct ia32_mcontext *mcp) +ia32_set_fpcontext(struct thread *td, const struct ia32_mcontext *mcp, + char *xfpustate, size_t xfpustate_len) { + int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); - else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) + else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); - else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || + error = 0; + } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { - fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate); + error = fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate, + xfpustate, xfpustate_len); } else return (EINVAL); - return (0); + return (error); } /* @@ -164,10 +180,11 @@ ia32_get_mcontext(struct thread *td, struct ia32_mcontext *mcp, int flags) mcp->mc_esp = tp->tf_rsp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); - ia32_get_fpcontext(td, mcp); + ia32_get_fpcontext(td, mcp, NULL, 0); mcp->mc_fsbase = pcb->pcb_fsbase; mcp->mc_gsbase = pcb->pcb_gsbase; - bzero(mcp->mc_spare1, sizeof(mcp->mc_spare1)); + mcp->mc_xfpustate = 0; + mcp->mc_xfpustate_len = 0; bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2)); set_pcb_flags(pcb, PCB_FULL_IRET); return (0); @@ -183,6 +200,7 @@ static int ia32_set_mcontext(struct thread *td, const struct ia32_mcontext *mcp) { struct trapframe *tp; + char *xfpustate; long rflags; int ret; @@ -191,7 +209,18 @@ ia32_set_mcontext(struct thread *td, const struct ia32_mcontext *mcp) return (EINVAL); rflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_rflags & ~PSL_USERCHANGE); - ret = ia32_set_fpcontext(td, mcp); + if (mcp->mc_flags & _MC_IA32_HASFPXSTATE) { + if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - + sizeof(struct savefpu)) + return (EINVAL); + xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); + ret = copyin(PTRIN(mcp->mc_xfpustate), xfpustate, + mcp->mc_xfpustate_len); + if (ret != 0) + return (ret); + } else + xfpustate = NULL; + ret = ia32_set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); tp->tf_gs = mcp->mc_gs; @@ -529,6 +558,8 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) struct sigacts *psp; char *sp; struct trapframe *regs; + char *xfpusave; + size_t xfpusave_len; int oonstack; int sig; @@ -554,6 +585,14 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); + if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { + xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); + xfpusave = __builtin_alloca(xfpusave_len); + } else { + xfpusave_len = 0; + xfpusave = NULL; + } + /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; @@ -582,7 +621,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs; sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs; sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ - ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext); + ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase; @@ -590,11 +629,16 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && - SIGISMEMBER(psp->ps_sigonstack, sig)) { - sp = td->td_sigstk.ss_sp + - td->td_sigstk.ss_size - sizeof(sf); - } else - sp = (char *)regs->tf_rsp - sizeof(sf); + SIGISMEMBER(psp->ps_sigonstack, sig)) + sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size; + else + sp = (char *)regs->tf_rsp; + if (xfpusave != NULL) { + sp -= xfpusave_len; + sp = (char *)((unsigned long)sp & ~0x3Ful); + sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; + } + sp -= sizeof(sf); /* Align to 16 bytes. */ sfp = (struct ia32_sigframe *)((uintptr_t)sp & ~0xF); PROC_UNLOCK(p); @@ -626,7 +670,10 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) /* * Copy the sigframe out to the user's stack. */ - if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { + if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || + (xfpusave != NULL && copyout(xfpusave, + PTRIN(sf.sf_uc.uc_mcontext.mc_xfpustate), xfpusave_len) + != 0)) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif @@ -812,6 +859,8 @@ freebsd32_sigreturn(td, uap) struct ia32_ucontext uc; struct trapframe *regs; struct ia32_ucontext *ucp; + char *xfpustate; + size_t xfpustate_len; int cs, eflags, error, ret; ksiginfo_t ksi; @@ -858,9 +907,34 @@ freebsd32_sigreturn(td, uap) return (EINVAL); } - ret = ia32_set_fpcontext(td, &ucp->uc_mcontext); - if (ret != 0) + if ((ucp->uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { + xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; + if (xfpustate_len > cpu_max_ext_state_size - + sizeof(struct savefpu)) { + uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", + td->td_proc->p_pid, td->td_name, xfpustate_len); + return (EINVAL); + } + xfpustate = __builtin_alloca(xfpustate_len); + error = copyin(PTRIN(ucp->uc_mcontext.mc_xfpustate), + xfpustate, xfpustate_len); + if (error != 0) { + uprintf( + "pid %d (%s): sigreturn copying xfpustate failed\n", + td->td_proc->p_pid, td->td_name); + return (error); + } + } else { + xfpustate = NULL; + xfpustate_len = 0; + } + ret = ia32_set_fpcontext(td, &ucp->uc_mcontext, xfpustate, + xfpustate_len); + if (ret != 0) { + uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", + td->td_proc->p_pid, td->td_name, ret); return (ret); + } regs->tf_rdi = ucp->uc_mcontext.mc_edi; regs->tf_rsi = ucp->uc_mcontext.mc_esi; diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index c07e09b..9b147c7 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -669,6 +669,41 @@ intr_restore(register_t rflags) write_rflags(rflags); } +static __inline void +xsetbv(uint32_t reg, uint64_t val) +{ + uint32_t low, hi; + + low = val; + hi = val >> 32; + __asm __volatile(".byte 0x0f,0x01,0xd1" : : + "c" (reg), "a" (low), "d" (hi)); +} + +static __inline void +xsave(char *addr, uint64_t mask) +{ + uint32_t low, hi; + + low = mask; + hi = mask >> 32; + /* xsave (%rdi) */ + __asm __volatile(".byte 0x0f,0xae,0x27" : : + "a" (low), "d" (hi), "D" (addr) : "memory"); +} + +static __inline void +xrstor(char *addr, uint64_t mask) +{ + uint32_t low, hi; + + low = mask; + hi = mask >> 32; + /* xrstor (%rdi) */ + __asm __volatile(".byte 0x0f,0xae,0x2f" : : + "a" (low), "d" (hi), "D" (addr)); +} + #else /* !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */ int breakpoint(void); @@ -733,6 +768,9 @@ u_int rgs(void); void wbinvd(void); void write_rflags(u_int rf); void wrmsr(u_int msr, uint64_t newval); +void xsetbv(uint32_t reg, uint64_t val); +void xsave(char *addr, uint64_t mask); +void xrstor(char *addr, uint64_t mask); #endif /* __GNUCLIKE_ASM && __CC_SUPPORTS___INLINE */ diff --git a/sys/amd64/include/fpu.h b/sys/amd64/include/fpu.h index 50b3819..581e26b 100644 --- a/sys/amd64/include/fpu.h +++ b/sys/amd64/include/fpu.h @@ -43,12 +43,17 @@ /* Contents of each x87 floating point accumulator */ struct fpacc87 { - u_char fp_bytes[10]; + uint8_t fp_bytes[10]; }; /* Contents of each SSE extended accumulator */ struct xmmacc { - u_char xmm_bytes[16]; + uint8_t xmm_bytes[16]; +}; + +/* Contents of the upper 16 bytes of each AVX extended accumulator */ +struct ymmacc { + uint8_t ymm_bytes[16]; }; struct envxmm { @@ -67,21 +72,42 @@ struct savefpu { struct envxmm sv_env; struct { struct fpacc87 fp_acc; - u_char fp_pad[6]; /* padding */ + uint8_t fp_pad[6]; /* padding */ } sv_fp[8]; struct xmmacc sv_xmm[16]; - u_char sv_pad[96]; + uint8_t sv_pad[96]; } __aligned(16); -#ifdef _KERNEL -struct fpu_kern_ctx { - struct savefpu hwstate; - struct savefpu *prev; - uint32_t flags; +struct xstate_hdr { + uint64_t xstate_bv; + uint8_t xstate_rsrv0[16]; + uint8_t xstate_rsrv[40]; +}; + +struct savefpu_xstate { + struct xstate_hdr sx_hd; + struct ymmacc sx_ymm[16]; }; -#define FPU_KERN_CTX_FPUINITDONE 0x01 + +struct savefpu_ymm { + struct envxmm sv_env; + struct { + struct fpacc87 fp_acc; + int8_t fp_pad[6]; /* padding */ + } sv_fp[8]; + struct xmmacc sv_xmm[16]; + uint8_t sv_pad[96]; + struct savefpu_xstate sv_xstate; +} __aligned(64); + +#ifdef _KERNEL + +struct fpu_kern_ctx; #define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNFPU) == 0) + +#define XSAVE_AREA_ALIGN 64 + #endif /* @@ -114,9 +140,14 @@ void fpuexit(struct thread *td); int fpuformat(void); int fpugetregs(struct thread *td); void fpuinit(void); -void fpusetregs(struct thread *td, struct savefpu *addr); +int fpusetregs(struct thread *td, struct savefpu *addr, + char *xfpustate, size_t xfpustate_size); +int fpusetxstate(struct thread *td, char *xfpustate, + size_t xfpustate_size); int fputrap(void); void fpuuserinited(struct thread *td); +struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags); +void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx); int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags); int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); @@ -124,9 +155,10 @@ int fpu_kern_thread(u_int flags); int is_fpu_kern_thread(u_int flags); /* - * Flags for fpu_kern_enter() and fpu_kern_thread(). + * Flags for fpu_kern_alloc_ctx(), fpu_kern_enter() and fpu_kern_thread(). */ #define FPU_KERN_NORMAL 0x0000 +#define FPU_KERN_NOWAIT 0x0001 #endif diff --git a/sys/amd64/include/frame.h b/sys/amd64/include/frame.h index 12722a4..e171407 100644 --- a/sys/amd64/include/frame.h +++ b/sys/amd64/include/frame.h @@ -81,6 +81,7 @@ struct trapframe { }; #define TF_HASSEGS 0x1 -/* #define _MC_HASBASES 0x2 */ +#define TF_HASBASES 0x2 +#define TF_HASFPXSTATE 0x4 #endif /* _MACHINE_FRAME_H_ */ diff --git a/sys/amd64/include/kdb.h b/sys/amd64/include/kdb.h index 56d2018..8e2d722 100644 --- a/sys/amd64/include/kdb.h +++ b/sys/amd64/include/kdb.h @@ -32,7 +32,7 @@ #include #include -#define KDB_STOPPEDPCB(pc) &stoppcbs[pc->pc_cpuid] +#define KDB_STOPPEDPCB(pc) stoppcbs[pc->pc_cpuid] static __inline void kdb_cpu_clear_singlestep(void) diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index 479c84e..fe30974 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -51,6 +51,7 @@ extern u_int cpu_clflush_line_size; extern u_int cpu_fxsr; extern u_int cpu_high; extern u_int cpu_id; +extern u_int cpu_max_ext_state_size; extern u_int cpu_mxcsr_mask; extern u_int cpu_procinfo; extern u_int cpu_procinfo2; @@ -67,14 +68,19 @@ extern int _ucodesel; extern int _ucode32sel; extern int _ufssel; extern int _ugssel; +extern int use_xsave; +extern uint64_t xsave_mask; typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); +struct pcb; +struct savefpu; struct thread; struct reg; struct fpreg; struct dbreg; struct dumperinfo; +struct pcb *alloc_pcb(int flags); void amd64_syscall(struct thread *td, int traced); void busdma_swi(void); void cpu_setregs(void); @@ -105,5 +111,8 @@ void pagezero(void *addr); void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist); int user_dbreg_trap(void); void minidumpsys(struct dumperinfo *); +struct savefpu *get_pcb_user_save_td(struct thread *td); +struct savefpu *get_pcb_user_save_pcb(struct pcb *pcb); +struct pcb *get_pcb_td(struct thread *td); #endif /* !_MACHINE_MD_VAR_H_ */ diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h index 1af8f6d..61f651bb 100644 --- a/sys/amd64/include/pcb.h +++ b/sys/amd64/include/pcb.h @@ -92,7 +92,8 @@ struct pcb { struct amd64tss *pcb_tssp; struct savefpu *pcb_save; - struct savefpu pcb_user_save; + + uint64_t pcb_pad[2]; }; #ifdef _KERNEL @@ -130,6 +131,7 @@ clear_pcb_flags(struct pcb *pcb, const u_int flags) void makectx(struct trapframe *, struct pcb *); int savectx(struct pcb *); + #endif #endif /* _AMD64_PCB_H_ */ diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index c4b0c44..d07dbac 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -226,6 +226,8 @@ __curthread(void) } #define curthread (__curthread()) +#define IS_BSP() (PCPU_GET(cpuid) == 0) + #else /* !lint || defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) */ #error "this file needs to be ported to your compiler" diff --git a/sys/amd64/include/ptrace.h b/sys/amd64/include/ptrace.h index eef24f8..28f94f7 100644 --- a/sys/amd64/include/ptrace.h +++ b/sys/amd64/include/ptrace.h @@ -33,4 +33,9 @@ #ifndef _MACHINE_PTRACE_H_ #define _MACHINE_PTRACE_H_ +#define __HAVE_PTRACE_MACHDEP + +#define PT_GETXSTATE (PT_FIRSTMACH + 0) +#define PT_SETXSTATE (PT_FIRSTMACH + 1) + #endif diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index de686b7..7180aa6 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -33,7 +33,7 @@ extern u_int32_t mptramp_pagetables; /* global data in mp_machdep.c */ extern int mp_naps; extern int boot_cpu_id; -extern struct pcb stoppcbs[]; +extern struct pcb **stoppcbs; extern int cpu_apic_ids[]; #ifdef COUNT_IPIS extern u_long *ipi_invltlb_counts[MAXCPU]; diff --git a/sys/amd64/include/specialreg.h b/sys/amd64/include/specialreg.h index 4c50166..7ba5f9f 100644 --- a/sys/amd64/include/specialreg.h +++ b/sys/amd64/include/specialreg.h @@ -66,6 +66,7 @@ #define CR4_PCE 0x00000100 /* Performance monitoring counter enable */ #define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */ #define CR4_XMM 0x00000400 /* enable SIMD/MMX2 to use except 16 */ +#define CR4_XSAVE 0x00040000 /* XSETBV/XGETBV */ /* * Bits in AMD64 special registers. EFER is 64 bits wide. @@ -76,6 +77,18 @@ #define EFER_NXE 0x000000800 /* PTE No-Execute bit enable (R/W) */ /* + * Intel Extended Features registers + */ +#define XCR0 0 /* XFEATURE_ENABLED_MASK register */ + +#define XFEATURE_ENABLED_X87 0x00000001 +#define XFEATURE_ENABLED_SSE 0x00000002 +#define XFEATURE_ENABLED_AVX 0x00000004 + +#define XFEATURE_AVX \ + (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX) + +/* * CPUID instruction features register */ #define CPUID_FPU 0x00000001 diff --git a/sys/amd64/include/sysarch.h b/sys/amd64/include/sysarch.h index 7b95a8b..773bb99 100644 --- a/sys/amd64/include/sysarch.h +++ b/sys/amd64/include/sysarch.h @@ -50,12 +50,14 @@ #define I386_SET_FSBASE 8 #define I386_GET_GSBASE 9 #define I386_SET_GSBASE 10 +#define I386_GET_XFPUSTATE 11 /* Leave space for 0-127 for to avoid translating syscalls */ #define AMD64_GET_FSBASE 128 #define AMD64_SET_FSBASE 129 #define AMD64_GET_GSBASE 130 #define AMD64_SET_GSBASE 131 +#define AMD64_GET_XFPUSTATE 132 struct i386_ldt_args { unsigned int start; @@ -69,6 +71,16 @@ struct i386_ioperm_args { int enable; }; +struct i386_get_xfpustate { + int addr; + int len; +}; + +struct amd64_get_xfpustate { + void *addr; + int len; +}; + #ifndef _KERNEL __BEGIN_DECLS int amd64_get_fsbase(void **); diff --git a/sys/amd64/include/ucontext.h b/sys/amd64/include/ucontext.h index 75b7bd2..5ab841e 100644 --- a/sys/amd64/include/ucontext.h +++ b/sys/amd64/include/ucontext.h @@ -37,7 +37,8 @@ */ #define _MC_HASSEGS 0x1 #define _MC_HASBASES 0x2 -#define _MC_FLAG_MASK (_MC_HASSEGS | _MC_HASBASES) +#define _MC_HASFPXSTATE 0x4 +#define _MC_FLAG_MASK (_MC_HASSEGS | _MC_HASBASES | _MC_HASFPXSTATE) typedef struct __mcontext { /* @@ -93,7 +94,10 @@ typedef struct __mcontext { __register_t mc_fsbase; __register_t mc_gsbase; - long mc_spare[6]; + __register_t mc_xfpustate; + __register_t mc_xfpustate_len; + + long mc_spare[4]; } mcontext_t; #endif /* !_MACHINE_UCONTEXT_H_ */ diff --git a/sys/compat/ia32/ia32_signal.h b/sys/compat/ia32/ia32_signal.h index 0c54caf..d6f1620 100644 --- a/sys/compat/ia32/ia32_signal.h +++ b/sys/compat/ia32/ia32_signal.h @@ -32,6 +32,12 @@ #ifndef _COMPAT_IA32_IA32_SIGNAL_H #define _COMPAT_IA32_IA32_SIGNAL_H +#define _MC_IA32_HASSEGS 0x1 +#define _MC_IA32_HASBASES 0x2 +#define _MC_IA32_HASFPXSTATE 0x4 +#define _MC_IA32_FLAG_MASK \ + (_MC_IA32_HASSEGS | _MC_IA32_HASBASES | _MC_IA32_HASFPXSTATE) + struct ia32_mcontext { u_int32_t mc_onstack; /* XXX - sigcontext compat. */ u_int32_t mc_gs; /* machine state (struct trapframe) */ @@ -57,14 +63,16 @@ struct ia32_mcontext { /* We use the same values for fpformat and ownedfp */ u_int32_t mc_fpformat; u_int32_t mc_ownedfp; - u_int32_t mc_spare1[1]; /* align next field to 16 bytes */ + u_int32_t mc_flags; /* * See for the internals of mc_fpstate[]. */ u_int32_t mc_fpstate[128] __aligned(16); u_int32_t mc_fsbase; u_int32_t mc_gsbase; - u_int32_t mc_spare2[6]; + u_int32_t mc_xfpustate; + u_int32_t mc_xfpustate_len; + u_int32_t mc_spare2[4]; }; struct ia32_ucontext { @@ -112,6 +120,52 @@ struct ia32_ucontext4 { }; #endif +#ifdef COMPAT_FREEBSD9 +struct ia32_mcontext9 { + u_int32_t mc_onstack; /* XXX - sigcontext compat. */ + u_int32_t mc_gs; /* machine state (struct trapframe) */ + u_int32_t mc_fs; + u_int32_t mc_es; + u_int32_t mc_ds; + u_int32_t mc_edi; + u_int32_t mc_esi; + u_int32_t mc_ebp; + u_int32_t mc_isp; + u_int32_t mc_ebx; + u_int32_t mc_edx; + u_int32_t mc_ecx; + u_int32_t mc_eax; + u_int32_t mc_trapno; + u_int32_t mc_err; + u_int32_t mc_eip; + u_int32_t mc_cs; + u_int32_t mc_eflags; + u_int32_t mc_esp; + u_int32_t mc_ss; + u_int32_t mc_len; /* sizeof(struct ia32_mcontext) */ + /* We use the same values for fpformat and ownedfp */ + u_int32_t mc_fpformat; + u_int32_t mc_ownedfp; + u_int32_t mc_spare1[1]; /* align next field to 16 bytes */ + /* + * See for the internals of mc_fpstate[]. + */ + u_int32_t mc_fpstate[128] __aligned(16); + u_int32_t mc_fsbase; + u_int32_t mc_gsbase; + u_int32_t mc_spare2[6]; +}; + +struct ia32_ucontext9 { + sigset_t uc_sigmask; + struct ia32_mcontext9 uc_mcontext; + u_int32_t uc_link; + struct sigaltstack32 uc_stack; + u_int32_t uc_flags; + u_int32_t __spare__[4]; +}; +#endif + #ifdef COMPAT_43 struct ia32_sigcontext3 { u_int32_t sc_onstack; diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index b396600..ff60776 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -124,6 +124,7 @@ amd64/amd64/mp_watchdog.c optional mp_watchdog smp amd64/amd64/mpboot.S optional smp amd64/amd64/pmap.c standard amd64/amd64/prof_machdep.c optional profiling-routine +amd64/amd64/ptrace_machdep.c standard amd64/amd64/sigtramp.S standard amd64/amd64/stack_machdep.c optional ddb | stack amd64/amd64/support.S standard diff --git a/sys/crypto/aesni/aesni.c b/sys/crypto/aesni/aesni.c index ed12f28..ca00a57 100644 --- a/sys/crypto/aesni/aesni.c +++ b/sys/crypto/aesni/aesni.c @@ -116,6 +116,7 @@ aesni_detach(device_t dev) } while ((ses = TAILQ_FIRST(&sc->sessions)) != NULL) { TAILQ_REMOVE(&sc->sessions, ses, next); + fpu_kern_free_ctx(ses->fpu_ctx); free(ses, M_AESNI); } rw_wunlock(&sc->lock); @@ -165,8 +166,13 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) rw_wunlock(&sc->lock); return (ENOMEM); } - KASSERT(((uintptr_t)ses) % 0x10 == 0, - ("malloc returned unaligned pointer")); + ses->fpu_ctx = fpu_kern_alloc_ctx(FPU_KERN_NORMAL | + FPU_KERN_NOWAIT); + if (ses->fpu_ctx == NULL) { + free(ses, M_AESNI); + rw_wunlock(&sc->lock); + return (ENOMEM); + } ses->id = sc->sid++; } else { TAILQ_REMOVE(&sc->sessions, ses, next); @@ -191,12 +197,15 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) static void aesni_freesession_locked(struct aesni_softc *sc, struct aesni_session *ses) { + struct fpu_kern_ctx *ctx; uint32_t sid; sid = ses->id; TAILQ_REMOVE(&sc->sessions, ses, next); + ctx = ses->fpu_ctx; bzero(ses, sizeof(*ses)); ses->id = sid; + ses->fpu_ctx = ctx; TAILQ_INSERT_HEAD(&sc->sessions, ses, next); } diff --git a/sys/crypto/aesni/aesni.h b/sys/crypto/aesni/aesni.h index b07ec8b..78255b7 100644 --- a/sys/crypto/aesni/aesni.h +++ b/sys/crypto/aesni/aesni.h @@ -65,7 +65,7 @@ struct aesni_session { int used; uint32_t id; TAILQ_ENTRY(aesni_session) next; - struct fpu_kern_ctx fpu_ctx; + struct fpu_kern_ctx *fpu_ctx; }; /* diff --git a/sys/crypto/aesni/aesni_wrap.c b/sys/crypto/aesni/aesni_wrap.c index 1492640..787c1a9 100644 --- a/sys/crypto/aesni/aesni_wrap.c +++ b/sys/crypto/aesni/aesni_wrap.c @@ -227,7 +227,7 @@ aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini) td = curthread; if (!is_fpu_kern_thread(0)) { - error = fpu_kern_enter(td, &ses->fpu_ctx, FPU_KERN_NORMAL); + error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL); saved_ctx = 1; } else { error = 0; @@ -237,7 +237,7 @@ aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini) error = aesni_cipher_setup_common(ses, encini->cri_key, encini->cri_klen); if (saved_ctx) - fpu_kern_leave(td, &ses->fpu_ctx); + fpu_kern_leave(td, ses->fpu_ctx); } return (error); } @@ -256,7 +256,7 @@ aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, td = curthread; if (!is_fpu_kern_thread(0)) { - error = fpu_kern_enter(td, &ses->fpu_ctx, FPU_KERN_NORMAL); + error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL); if (error != 0) goto out; saved_ctx = 1; @@ -302,7 +302,7 @@ aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, } } if (saved_ctx) - fpu_kern_leave(td, &ses->fpu_ctx); + fpu_kern_leave(td, ses->fpu_ctx); if (allocated) crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, enccrd->crd_len, buf); diff --git a/sys/crypto/via/padlock.c b/sys/crypto/via/padlock.c index ba63093..f601d2a 100644 --- a/sys/crypto/via/padlock.c +++ b/sys/crypto/via/padlock.c @@ -156,6 +156,7 @@ padlock_detach(device_t dev) } while ((ses = TAILQ_FIRST(&sc->sc_sessions)) != NULL) { TAILQ_REMOVE(&sc->sc_sessions, ses, ses_next); + fpu_kern_free_ctx(ses->ses_fpu_ctx); free(ses, M_PADLOCK); } rw_destroy(&sc->sc_sessions_lock); @@ -222,6 +223,13 @@ padlock_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) rw_wunlock(&sc->sc_sessions_lock); return (ENOMEM); } + ses->ses_fpu_ctx = fpu_kern_alloc_ctx(FPU_KERN_NORMAL | + FPU_KERN_NOWAIT); + if (ses->ses_fpu_ctx == NULL) { + free(ses, M_PADLOCK); + rw_wunlock(&sc->sc_sessions_lock); + return (ENOMEM); + } ses->ses_id = sc->sc_sid++; } else { TAILQ_REMOVE(&sc->sc_sessions, ses, ses_next); @@ -239,7 +247,7 @@ padlock_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) if (macini != NULL) { td = curthread; if (!is_fpu_kern_thread(0)) { - error = fpu_kern_enter(td, &ses->ses_fpu_ctx, + error = fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL); saved_ctx = 1; } else { @@ -249,7 +257,7 @@ padlock_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) if (error == 0) { error = padlock_hash_setup(ses, macini); if (saved_ctx) - fpu_kern_leave(td, &ses->ses_fpu_ctx); + fpu_kern_leave(td, ses->ses_fpu_ctx); } if (error != 0) { padlock_freesession_one(sc, ses, 0); @@ -265,15 +273,18 @@ static void padlock_freesession_one(struct padlock_softc *sc, struct padlock_session *ses, int locked) { + struct fpu_kern_ctx *ctx; uint32_t sid = ses->ses_id; if (!locked) rw_wlock(&sc->sc_sessions_lock); TAILQ_REMOVE(&sc->sc_sessions, ses, ses_next); padlock_hash_free(ses); + ctx = ses->ses_fpu_ctx; bzero(ses, sizeof(*ses)); ses->ses_used = 0; ses->ses_id = sid; + ses->ses_fpu_ctx = ctx; TAILQ_INSERT_HEAD(&sc->sc_sessions, ses, ses_next); if (!locked) rw_wunlock(&sc->sc_sessions_lock); diff --git a/sys/crypto/via/padlock.h b/sys/crypto/via/padlock.h index c8ee9bd..77a4673 100644 --- a/sys/crypto/via/padlock.h +++ b/sys/crypto/via/padlock.h @@ -76,7 +76,7 @@ struct padlock_session { int ses_used; uint32_t ses_id; TAILQ_ENTRY(padlock_session) ses_next; - struct fpu_kern_ctx ses_fpu_ctx; + struct fpu_kern_ctx *ses_fpu_ctx; }; #define PADLOCK_ALIGN(p) (void *)(roundup2((uintptr_t)(p), 16)) diff --git a/sys/crypto/via/padlock_cipher.c b/sys/crypto/via/padlock_cipher.c index 1456ddf..7170211 100644 --- a/sys/crypto/via/padlock_cipher.c +++ b/sys/crypto/via/padlock_cipher.c @@ -251,7 +251,7 @@ padlock_cipher_process(struct padlock_session *ses, struct cryptodesc *enccrd, td = curthread; if (!is_fpu_kern_thread(0)) { - error = fpu_kern_enter(td, &ses->ses_fpu_ctx, FPU_KERN_NORMAL); + error = fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL); saved_ctx = 1; } else { error = 0; @@ -264,7 +264,7 @@ padlock_cipher_process(struct padlock_session *ses, struct cryptodesc *enccrd, ses->ses_iv); if (saved_ctx) - fpu_kern_leave(td, &ses->ses_fpu_ctx); + fpu_kern_leave(td, ses->ses_fpu_ctx); if (allocated) { crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, diff --git a/sys/crypto/via/padlock_hash.c b/sys/crypto/via/padlock_hash.c index 0fe182b..924a9ec 100644 --- a/sys/crypto/via/padlock_hash.c +++ b/sys/crypto/via/padlock_hash.c @@ -370,7 +370,7 @@ padlock_hash_process(struct padlock_session *ses, struct cryptodesc *maccrd, td = curthread; if (!is_fpu_kern_thread(0)) { - error = fpu_kern_enter(td, &ses->ses_fpu_ctx, FPU_KERN_NORMAL); + error = fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL); saved_ctx = 1; } else { error = 0; @@ -383,7 +383,7 @@ padlock_hash_process(struct padlock_session *ses, struct cryptodesc *maccrd, error = padlock_authcompute(ses, maccrd, crp->crp_buf, crp->crp_flags); if (saved_ctx) - fpu_kern_leave(td, &ses->ses_fpu_ctx); + fpu_kern_leave(td, ses->ses_fpu_ctx); return (error); } diff --git a/sys/dev/random/nehemiah.c b/sys/dev/random/nehemiah.c index f91e228..b9f52e7 100644 --- a/sys/dev/random/nehemiah.c +++ b/sys/dev/random/nehemiah.c @@ -84,7 +84,7 @@ static uint8_t out[RANDOM_BLOCK_SIZE+7] __aligned(16); static union VIA_ACE_CW acw __aligned(16); -static struct fpu_kern_ctx fpu_ctx_save; +static struct fpu_kern_ctx *fpu_ctx_save; static struct mtx random_nehemiah_mtx; @@ -135,11 +135,14 @@ random_nehemiah_init(void) acw.field.round_count = 12; mtx_init(&random_nehemiah_mtx, "random nehemiah", NULL, MTX_DEF); + fpu_ctx_save = fpu_kern_alloc_ctx(FPU_KERN_NORMAL); } void random_nehemiah_deinit(void) { + + fpu_kern_free_ctx(fpu_ctx_save); mtx_destroy(&random_nehemiah_mtx); } @@ -151,7 +154,7 @@ random_nehemiah_read(void *buf, int c) uint8_t *p; mtx_lock(&random_nehemiah_mtx); - error = fpu_kern_enter(curthread, &fpu_ctx_save, FPU_KERN_NORMAL); + error = fpu_kern_enter(curthread, fpu_ctx_save, FPU_KERN_NORMAL); if (error != 0) { mtx_unlock(&random_nehemiah_mtx); return (0); @@ -196,7 +199,7 @@ random_nehemiah_read(void *buf, int c) c = MIN(RANDOM_BLOCK_SIZE, c); memcpy(buf, out, (size_t)c); - fpu_kern_leave(curthread, &fpu_ctx_save); + fpu_kern_leave(curthread, fpu_ctx_save); mtx_unlock(&random_nehemiah_mtx); return (c); } diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index e481e22..126b618 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -653,8 +653,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sdp = &td->td_pcb->pcb_gsd; sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; - bzero(sf.sf_uc.uc_mcontext.mc_spare1, - sizeof(sf.sf_uc.uc_mcontext.mc_spare1)); + sf.sf_uc.uc_mcontext.mc_flags = 0; bzero(sf.sf_uc.uc_mcontext.mc_spare2, sizeof(sf.sf_uc.uc_mcontext.mc_spare2)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); @@ -3382,7 +3381,7 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int flags) mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; - bzero(mcp->mc_spare1, sizeof(mcp->mc_spare1)); + mcp->mc_flags = 0; bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2)); return (0); } diff --git a/sys/i386/include/npx.h b/sys/i386/include/npx.h index 6b56bb4..932ba67 100644 --- a/sys/i386/include/npx.h +++ b/sys/i386/include/npx.h @@ -101,6 +101,11 @@ struct xmmacc { u_char xmm_bytes[16]; }; +/* Contents of the upper 16 bytes of each AVX extended accumulator */ +struct ymmacc { + uint8_t ymm_bytes[16]; +}; + struct savexmm { struct envxmm sv_env; struct { @@ -116,6 +121,28 @@ union savefpu { struct savexmm sv_xmm; }; +struct xstate_hdr { + uint64_t xstate_bv; + uint8_t xstate_rsrv0[16]; + uint8_t xstate_rsrv[40]; +}; + +struct savexmm_xstate { + struct xstate_hdr sx_hd; + struct ymmacc sx_ymm[16]; +}; + +struct savexmm_ymm { + struct envxmm sv_env; + struct { + struct fpacc87 fp_acc; + int8_t fp_pad[6]; /* padding */ + } sv_fp[8]; + struct xmmacc sv_xmm[16]; + uint8_t sv_pad[96]; + struct savexmm_xstate sv_xstate; +} __aligned(64); + /* * The hardware default control word for i387's and later coprocessors is * 0x37F, giving: @@ -138,13 +165,6 @@ union savefpu { #ifdef _KERNEL -struct fpu_kern_ctx { - union savefpu hwstate; - union savefpu *prev; - uint32_t flags; -}; -#define FPU_KERN_CTX_NPXINITDONE 0x01 - #define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNNPX) == 0) int npxdna(void); @@ -157,6 +177,8 @@ void npxsave(union savefpu *addr); void npxsetregs(struct thread *td, union savefpu *addr); int npxtrap(void); void npxuserinited(struct thread *); +struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags); +void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx); int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags); int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); @@ -167,6 +189,7 @@ int is_fpu_kern_thread(u_int flags); * Flags for fpu_kern_enter() and fpu_kern_thread(). */ #define FPU_KERN_NORMAL 0x0000 +#define FPU_KERN_NOWAIT 0x0001 #endif diff --git a/sys/i386/include/ptrace.h b/sys/i386/include/ptrace.h index 24eb411..c64d3e8 100644 --- a/sys/i386/include/ptrace.h +++ b/sys/i386/include/ptrace.h @@ -37,5 +37,7 @@ #define PT_GETXMMREGS (PT_FIRSTMACH + 0) #define PT_SETXMMREGS (PT_FIRSTMACH + 1) +#define PT_GETXSTATE (PT_FIRSTMACH + 2) +#define PT_SETXSTATE (PT_FIRSTMACH + 3) #endif diff --git a/sys/i386/include/specialreg.h b/sys/i386/include/specialreg.h index e3199f7..601b3e9 100644 --- a/sys/i386/include/specialreg.h +++ b/sys/i386/include/specialreg.h @@ -66,6 +66,7 @@ #define CR4_PCE 0x00000100 /* Performance monitoring counter enable */ #define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */ #define CR4_XMM 0x00000400 /* enable SIMD/MMX2 to use except 16 */ +#define CR4_XSAVE 0x00040000 /* XSETBV/XGETBV */ /* * Bits in AMD64 special registers. EFER is 64 bits wide. diff --git a/sys/i386/include/sysarch.h b/sys/i386/include/sysarch.h index 454d64c..316a5ab 100644 --- a/sys/i386/include/sysarch.h +++ b/sys/i386/include/sysarch.h @@ -47,6 +47,7 @@ #define I386_SET_FSBASE 8 #define I386_GET_GSBASE 9 #define I386_SET_GSBASE 10 +#define I386_GET_XFPUSTATE 11 /* These four only exist when running an i386 binary on amd64 */ #define _AMD64_GET_FSBASE 128 @@ -71,6 +72,11 @@ struct i386_vm86_args { char *sub_args; /* args */ }; +struct i386_get_xfpustate { + void *addr; + int len; +}; + #ifndef _KERNEL #include diff --git a/sys/i386/include/ucontext.h b/sys/i386/include/ucontext.h index d8ffc4b..79aabd7 100644 --- a/sys/i386/include/ucontext.h +++ b/sys/i386/include/ucontext.h @@ -31,6 +31,12 @@ #ifndef _MACHINE_UCONTEXT_H_ #define _MACHINE_UCONTEXT_H_ +/* Keep _MC_* values similar to amd64 */ +#define _MC_HASSEGS 0x1 +#define _MC_HASBASES 0x2 +#define _MC_HASFPXSTATE 0x4 +#define _MC_FLAG_MASK (_MC_HASSEGS | _MC_HASBASES | _MC_HASFPXSTATE) + typedef struct __mcontext { /* * The definition of mcontext_t must match the layout of @@ -68,7 +74,7 @@ typedef struct __mcontext { #define _MC_FPOWNED_FPU 0x20001 /* FP state came from FPU */ #define _MC_FPOWNED_PCB 0x20002 /* FP state came from PCB */ int mc_ownedfp; - int mc_spare1[1]; /* align next field to 16 bytes */ + __register_t mc_flags; /* * See for the internals of mc_fpstate[]. */ @@ -77,11 +83,13 @@ typedef struct __mcontext { __register_t mc_fsbase; __register_t mc_gsbase; - int mc_spare2[6]; + __register_t mc_xfpustate; + __register_t mc_xfpustate_len; + + int mc_spare2[4]; } mcontext_t; #if defined(_KERNEL) && defined(COMPAT_FREEBSD4) - struct mcontext4 { __register_t mc_onstack; /* XXX - sigcontext compat. */ __register_t mc_gs; /* machine state (struct trapframe) */ diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index f314e44..0da580f 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -985,6 +985,50 @@ DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); #endif #endif /* DEV_ISA */ +static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", + "Kernel contexts for FPU state"); + +#define XSAVE_AREA_ALIGN 64 + +#define FPU_KERN_CTX_NPXINITDONE 0x01 + +struct fpu_kern_ctx { + union savefpu *prev; + uint32_t flags; + char hwstate1[]; +}; + +struct fpu_kern_ctx * +fpu_kern_alloc_ctx(u_int flags) +{ + struct fpu_kern_ctx *res; + size_t sz; + + sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + + sizeof(union savefpu); + res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? + M_NOWAIT : M_WAITOK) | M_ZERO); + return (res); +} + +void +fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) +{ + + /* XXXKIB clear the memory ? */ + free(ctx, M_FPUKERN_CTX); +} + +static union savefpu * +fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) +{ + vm_offset_t p; + + p = (vm_offset_t)&ctx->hwstate1; + p = roundup2(p, XSAVE_AREA_ALIGN); + return ((union savefpu *)p); +} + int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { @@ -998,7 +1042,7 @@ fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) ctx->flags |= FPU_KERN_CTX_NPXINITDONE; npxexit(td); ctx->prev = pcb->pcb_save; - pcb->pcb_save = &ctx->hwstate; + pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); pcb->pcb_flags |= PCB_KERNNPX; pcb->pcb_flags &= ~PCB_NPXINITDONE; return (0); diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index c9e1a91..9fdefaf 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -588,8 +588,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sdp = &td->td_pcb->pcb_gsd; sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; - bzero(sf.sf_uc.uc_mcontext.mc_spare1, - sizeof(sf.sf_uc.uc_mcontext.mc_spare1)); + sf.sf_uc.uc_mcontext.mc_flags = 0; bzero(sf.sf_uc.uc_mcontext.mc_spare2, sizeof(sf.sf_uc.uc_mcontext.mc_spare2)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); @@ -2694,7 +2693,7 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int flags) mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; - bzero(mcp->mc_spare1, sizeof(mcp->mc_spare1)); + mcp->mc_flags = 0; bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2)); return (0); } diff --git a/sys/sys/ucontext.h b/sys/sys/ucontext.h index baa179e..87a2fb1 100644 --- a/sys/sys/ucontext.h +++ b/sys/sys/ucontext.h @@ -72,11 +72,17 @@ struct ucontext4 { __BEGIN_DECLS int getcontext(ucontext_t *); +ucontext_t *getcontextx(void); int setcontext(const ucontext_t *); void makecontext(ucontext_t *, void (*)(void), int, ...); int signalcontext(ucontext_t *, int, __sighandler_t *); int swapcontext(ucontext_t *, const ucontext_t *); +#if __BSD_VISIBLE +size_t __getcontextx_size(void); +int __fillcontextx(char *ctx); +#endif + __END_DECLS #else /* _KERNEL */