Property changes on: lib/libc ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/lib/libc:r230429,230864 Index: lib/libc/powerpc/gen/Makefile.inc =================================================================== --- lib/libc/powerpc/gen/Makefile.inc (revision 235453) +++ lib/libc/powerpc/gen/Makefile.inc (working copy) @@ -1,7 +1,7 @@ # $FreeBSD$ SRCS += _ctx_start.S fabs.S flt_rounds.c fpgetmask.c fpgetround.c \ - fpgetsticky.c fpsetmask.c fpsetround.c \ + fpgetsticky.c fpsetmask.c fpsetround.c getcontextx.c \ infinity.c ldexp.c makecontext.c modf.c _setjmp.S \ setjmp.S sigsetjmp.S signalcontext.c syncicache.c \ _set_tp.c Index: lib/libc/powerpc/gen/getcontextx.c =================================================================== --- lib/libc/powerpc/gen/getcontextx.c (revision 235443) +++ lib/libc/powerpc/gen/getcontextx.c (working copy) @@ -32,7 +32,7 @@ #include #include -size_t +int __getcontextx_size(void) { Index: lib/libc/arm/gen/Makefile.inc =================================================================== --- lib/libc/arm/gen/Makefile.inc (revision 235453) +++ lib/libc/arm/gen/Makefile.inc (working copy) @@ -2,5 +2,5 @@ # $FreeBSD$ SRCS+= _ctx_start.S _setjmp.S _set_tp.c alloca.S fabs.c \ - infinity.c ldexp.c makecontext.c modf.c \ + getcontextx.c infinity.c ldexp.c makecontext.c modf.c \ setjmp.S signalcontext.c sigsetjmp.S divsi3.S Index: lib/libc/arm/gen/getcontextx.c =================================================================== --- lib/libc/arm/gen/getcontextx.c (revision 235443) +++ lib/libc/arm/gen/getcontextx.c (working copy) @@ -32,7 +32,7 @@ #include #include -size_t +int __getcontextx_size(void) { Index: lib/libc/sparc64/gen/Makefile.inc =================================================================== --- lib/libc/sparc64/gen/Makefile.inc (revision 235453) +++ lib/libc/sparc64/gen/Makefile.inc (working copy) @@ -2,5 +2,5 @@ SRCS+= _ctx_start.S _setjmp.S fabs.S fixunsdfsi.S flt_rounds.c fpgetmask.c \ fpgetround.c fpgetsticky.c fpsetmask.c fpsetround.c \ - infinity.c ldexp.c makecontext.c modf.S \ + getcontextx.c infinity.c ldexp.c makecontext.c modf.S \ signalcontext.c setjmp.S sigsetjmp.S _set_tp.c Index: lib/libc/sparc64/gen/getcontextx.c =================================================================== --- lib/libc/sparc64/gen/getcontextx.c (revision 235443) +++ lib/libc/sparc64/gen/getcontextx.c (working copy) @@ -32,7 +32,7 @@ #include #include -size_t +int __getcontextx_size(void) { Index: lib/libc/ia64/gen/Makefile.inc =================================================================== --- lib/libc/ia64/gen/Makefile.inc (revision 235453) +++ lib/libc/ia64/gen/Makefile.inc (working copy) @@ -3,8 +3,8 @@ SRCS+= __divdf3.S __divdi3.S __divsf3.S __divsi3.S __moddi3.S __modsi3.S \ __udivdi3.S __udivsi3.S __umoddi3.S __umodsi3.S _mcount.S _set_tp.c \ _setjmp.S fabs.S flt_rounds.c fpgetmask.c fpgetround.c fpsetmask.c \ - fpsetround.c infinity.c ldexp.c makecontext.c modf.c setjmp.S \ - signalcontext.c sigsetjmp.S + fpsetround.c getcontextx.c infinity.c ldexp.c makecontext.c modf.c \ + setjmp.S signalcontext.c sigsetjmp.S # The following may go away if function _Unwind_FindTableEntry() # will be part of GCC. Index: lib/libc/ia64/gen/getcontextx.c =================================================================== --- lib/libc/ia64/gen/getcontextx.c (revision 235443) +++ lib/libc/ia64/gen/getcontextx.c (working copy) @@ -32,7 +32,7 @@ #include #include -size_t +int __getcontextx_size(void) { Index: lib/libc/mips/gen/Makefile.inc =================================================================== --- lib/libc/mips/gen/Makefile.inc (revision 235453) +++ lib/libc/mips/gen/Makefile.inc (working copy) @@ -6,4 +6,5 @@ # SRCS+= flt_rounds.c fpgetmask.c fpgetround.c fpgetsticky.c fpsetmask.c \ # fpsetround.c fpsetsticky.c -SRCS+= _ctx_start.S _set_tp.c _setjmp.S makecontext.c setjmp.S signalcontext.c sigsetjmp.S +SRCS+= _ctx_start.S _set_tp.c _setjmp.S getcontextx.c makecontext.c \ + setjmp.S signalcontext.c sigsetjmp.S Index: lib/libc/mips/gen/getcontextx.c =================================================================== --- lib/libc/mips/gen/getcontextx.c (revision 235443) +++ lib/libc/mips/gen/getcontextx.c (working copy) @@ -32,7 +32,7 @@ #include #include -size_t +int __getcontextx_size(void) { Index: lib/libc/gen/getcontext.3 =================================================================== --- lib/libc/gen/getcontext.3 (revision 235453) +++ lib/libc/gen/getcontext.3 (working copy) @@ -35,11 +35,11 @@ .\" .\" $FreeBSD$ .\" -.Dd September 10, 2002 +.Dd December 26, 2011 .Dt GETCONTEXT 3 .Os .Sh NAME -.Nm getcontext , setcontext +.Nm getcontext , getcontextx , setcontext .Nd get and set user thread context .Sh LIBRARY .Lb libc @@ -59,6 +59,20 @@ .Fn setcontext . .Pp The +.Fn getcontextx +function saves the current execution context in the newly allocated structure +.Vt ucontext_t , +which is returned on success. +If architecture defines additional CPU states that can be stored in extended +blocks referenced from the +.Vt ucontext_t , +the memory for them may be allocated and their context also stored. +Memory returned by +.Fn getcontextx +function shall be freed using +.Fn free 3 . +.Pp +The .Fn setcontext function makes a previously saved thread context the current thread context, i.e., @@ -109,11 +123,24 @@ returns zero and .Fn setcontext does not return; otherwise \-1 is returned. +The +.Fn getcontextx +returns pointer to the allocated and initialized context on success, and +.Va NULL +on failure. .Sh ERRORS No errors are defined for .Fn getcontext or .Fn setcontext . +The +.Fn getcontextx +may return the following errors in +.Va errno : +.Bl -tag -width Er +.It Bq Er ENOMEM +No memory was available to allocate for the context or some extended state. +.El .Sh SEE ALSO .Xr sigaction 2 , .Xr sigaltstack 2 , Index: lib/libc/gen/ucontext.3 =================================================================== --- lib/libc/gen/ucontext.3 (revision 235453) +++ lib/libc/gen/ucontext.3 (working copy) @@ -92,6 +92,9 @@ .Ft int .Fn getcontext "ucontext_t *" ; .It +.Ft "ucontext_t *" +.Fn getcontextx "void" ; +.It .Ft int .Fn setcontext "const ucontext_t *" ; .It @@ -104,4 +107,5 @@ .Sh SEE ALSO .Xr sigaltstack 2 , .Xr getcontext 3 , +.Xr getcontextx 3 , .Xr makecontext 3 Index: lib/libc/gen/Symbol.map =================================================================== --- lib/libc/gen/Symbol.map (revision 235453) +++ lib/libc/gen/Symbol.map (working copy) @@ -371,6 +371,7 @@ FBSD_1.3 { __FreeBSD_libc_enter_restricted_mode; + getcontextx; }; FBSDprivate_1.0 { @@ -487,4 +488,6 @@ _wait; __waitpid; _waitpid; + __fillcontextx; + __getcontextx_size; }; Property changes on: lib/libc/stdtime ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/lib/libc/stdtime:r230429,230864 Index: lib/libc/i386/gen/Makefile.inc =================================================================== --- lib/libc/i386/gen/Makefile.inc (revision 235453) +++ lib/libc/i386/gen/Makefile.inc (working copy) @@ -2,5 +2,5 @@ # $FreeBSD$ SRCS+= _ctx_start.S _setjmp.S _set_tp.c fabs.S \ - flt_rounds.c infinity.c ldexp.c makecontext.c modf.S \ + flt_rounds.c getcontextx.c infinity.c ldexp.c makecontext.c modf.S \ rfork_thread.S setjmp.S signalcontext.c sigsetjmp.S Index: lib/libc/i386/gen/getcontextx.c =================================================================== --- lib/libc/i386/gen/getcontextx.c (revision 235443) +++ lib/libc/i386/gen/getcontextx.c (working copy) @@ -38,7 +38,7 @@ static int xstate_sz = -1; -size_t +int __getcontextx_size(void) { u_int p[4]; Property changes on: lib/libc/uuid ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/lib/libc/uuid:r230429,230864 Index: lib/libc/amd64/gen/Makefile.inc =================================================================== --- lib/libc/amd64/gen/Makefile.inc (revision 235453) +++ lib/libc/amd64/gen/Makefile.inc (working copy) @@ -2,7 +2,7 @@ # $FreeBSD$ SRCS+= _setjmp.S _set_tp.c rfork_thread.S setjmp.S sigsetjmp.S \ - fabs.S modf.S \ + fabs.S getcontextx.c modf.S \ infinity.c ldexp.c makecontext.c signalcontext.c \ flt_rounds.c fpgetmask.c fpsetmask.c fpgetprec.c fpsetprec.c \ fpgetround.c fpsetround.c fpgetsticky.c Index: lib/libc/amd64/gen/getcontextx.c =================================================================== --- lib/libc/amd64/gen/getcontextx.c (revision 235443) +++ lib/libc/amd64/gen/getcontextx.c (working copy) @@ -39,7 +39,7 @@ static int xstate_sz = -1; -size_t +int __getcontextx_size(void) { u_int p[4]; Property changes on: lib/libc/sys ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/lib/libc/sys:r230429,230864 Property changes on: sys ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys:r217886,218389,230260-230262,230269-230270,230426,230429,230538,230765-230766,230864 Index: sys/conf/files.amd64 =================================================================== --- sys/conf/files.amd64 (revision 235453) +++ sys/conf/files.amd64 (working copy) @@ -129,6 +129,7 @@ amd64/amd64/mpboot.S optional smp amd64/amd64/pmap.c standard amd64/amd64/prof_machdep.c optional profiling-routine +amd64/amd64/ptrace_machdep.c standard amd64/amd64/sigtramp.S standard amd64/amd64/stack_machdep.c optional ddb | stack amd64/amd64/support.S standard Property changes on: sys/boot ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/boot:r217886,218389,230260-230262,230269-230270,230426,230429,230538,230765-230766,230864 Property changes on: sys/dev/e1000 ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/dev/e1000:r217886,218389,230260-230262,230269-230270,230426,230429,230538,230765-230766,230864 Index: sys/dev/random/nehemiah.c =================================================================== --- sys/dev/random/nehemiah.c (revision 235453) +++ sys/dev/random/nehemiah.c (working copy) @@ -84,7 +84,7 @@ static union VIA_ACE_CW acw __aligned(16); -static struct fpu_kern_ctx fpu_ctx_save; +static struct fpu_kern_ctx *fpu_ctx_save; static struct mtx random_nehemiah_mtx; @@ -135,11 +135,14 @@ acw.field.round_count = 12; mtx_init(&random_nehemiah_mtx, "random nehemiah", NULL, MTX_DEF); + fpu_ctx_save = fpu_kern_alloc_ctx(FPU_KERN_NORMAL); } void random_nehemiah_deinit(void) { + + fpu_kern_free_ctx(fpu_ctx_save); mtx_destroy(&random_nehemiah_mtx); } @@ -151,7 +154,7 @@ uint8_t *p; mtx_lock(&random_nehemiah_mtx); - error = fpu_kern_enter(curthread, &fpu_ctx_save, FPU_KERN_NORMAL); + error = fpu_kern_enter(curthread, fpu_ctx_save, FPU_KERN_NORMAL); if (error != 0) { mtx_unlock(&random_nehemiah_mtx); return (0); @@ -196,7 +199,7 @@ c = MIN(RANDOM_BLOCK_SIZE, c); memcpy(buf, out, (size_t)c); - fpu_kern_leave(curthread, &fpu_ctx_save); + fpu_kern_leave(curthread, fpu_ctx_save); mtx_unlock(&random_nehemiah_mtx); return (c); } Index: sys/crypto/aesni/aesni.c =================================================================== --- sys/crypto/aesni/aesni.c (revision 235453) +++ sys/crypto/aesni/aesni.c (working copy) @@ -116,6 +116,7 @@ } while ((ses = TAILQ_FIRST(&sc->sessions)) != NULL) { TAILQ_REMOVE(&sc->sessions, ses, next); + fpu_kern_free_ctx(ses->fpu_ctx); free(ses, M_AESNI); } rw_wunlock(&sc->lock); @@ -165,8 +166,13 @@ rw_wunlock(&sc->lock); return (ENOMEM); } - KASSERT(((uintptr_t)ses) % 0x10 == 0, - ("malloc returned unaligned pointer")); + ses->fpu_ctx = fpu_kern_alloc_ctx(FPU_KERN_NORMAL | + FPU_KERN_NOWAIT); + if (ses->fpu_ctx == NULL) { + free(ses, M_AESNI); + rw_wunlock(&sc->lock); + return (ENOMEM); + } ses->id = sc->sid++; } else { TAILQ_REMOVE(&sc->sessions, ses, next); @@ -191,12 +197,15 @@ static void aesni_freesession_locked(struct aesni_softc *sc, struct aesni_session *ses) { + struct fpu_kern_ctx *ctx; uint32_t sid; sid = ses->id; TAILQ_REMOVE(&sc->sessions, ses, next); + ctx = ses->fpu_ctx; bzero(ses, sizeof(*ses)); ses->id = sid; + ses->fpu_ctx = ctx; TAILQ_INSERT_HEAD(&sc->sessions, ses, next); } Index: sys/crypto/aesni/aesni.h =================================================================== --- sys/crypto/aesni/aesni.h (revision 235453) +++ sys/crypto/aesni/aesni.h (working copy) @@ -65,7 +65,7 @@ int used; uint32_t id; TAILQ_ENTRY(aesni_session) next; - struct fpu_kern_ctx fpu_ctx; + struct fpu_kern_ctx *fpu_ctx; }; /* Index: sys/crypto/aesni/aesni_wrap.c =================================================================== --- sys/crypto/aesni/aesni_wrap.c (revision 235453) +++ sys/crypto/aesni/aesni_wrap.c (working copy) @@ -226,7 +226,7 @@ td = curthread; if (!is_fpu_kern_thread(0)) { - error = fpu_kern_enter(td, &ses->fpu_ctx, FPU_KERN_NORMAL); + error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL); saved_ctx = 1; } else { error = 0; @@ -236,7 +236,7 @@ error = aesni_cipher_setup_common(ses, encini->cri_key, encini->cri_klen); if (saved_ctx) - fpu_kern_leave(td, &ses->fpu_ctx); + fpu_kern_leave(td, ses->fpu_ctx); } return (error); } @@ -255,7 +255,7 @@ td = curthread; if (!is_fpu_kern_thread(0)) { - error = fpu_kern_enter(td, &ses->fpu_ctx, FPU_KERN_NORMAL); + error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL); if (error != 0) goto out; saved_ctx = 1; @@ -301,7 +301,7 @@ } } if (saved_ctx) - fpu_kern_leave(td, &ses->fpu_ctx); + fpu_kern_leave(td, ses->fpu_ctx); if (allocated) crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, enccrd->crd_len, buf); Index: sys/crypto/via/padlock_hash.c =================================================================== --- sys/crypto/via/padlock_hash.c (revision 235453) +++ sys/crypto/via/padlock_hash.c (working copy) @@ -370,7 +370,7 @@ td = curthread; if (!is_fpu_kern_thread(0)) { - error = fpu_kern_enter(td, &ses->ses_fpu_ctx, FPU_KERN_NORMAL); + error = fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL); saved_ctx = 1; } else { error = 0; @@ -383,7 +383,7 @@ error = padlock_authcompute(ses, maccrd, crp->crp_buf, crp->crp_flags); if (saved_ctx) - fpu_kern_leave(td, &ses->ses_fpu_ctx); + fpu_kern_leave(td, ses->ses_fpu_ctx); return (error); } Index: sys/crypto/via/padlock.h =================================================================== --- sys/crypto/via/padlock.h (revision 235453) +++ sys/crypto/via/padlock.h (working copy) @@ -76,7 +76,7 @@ int ses_used; uint32_t ses_id; TAILQ_ENTRY(padlock_session) ses_next; - struct fpu_kern_ctx ses_fpu_ctx; + struct fpu_kern_ctx *ses_fpu_ctx; }; #define PADLOCK_ALIGN(p) (void *)(roundup2((uintptr_t)(p), 16)) Index: sys/crypto/via/padlock_cipher.c =================================================================== --- sys/crypto/via/padlock_cipher.c (revision 235453) +++ sys/crypto/via/padlock_cipher.c (working copy) @@ -251,7 +251,7 @@ td = curthread; if (!is_fpu_kern_thread(0)) { - error = fpu_kern_enter(td, &ses->ses_fpu_ctx, FPU_KERN_NORMAL); + error = fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL); saved_ctx = 1; } else { error = 0; @@ -264,7 +264,7 @@ ses->ses_iv); if (saved_ctx) - fpu_kern_leave(td, &ses->ses_fpu_ctx); + fpu_kern_leave(td, ses->ses_fpu_ctx); if (allocated) { crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, Index: sys/crypto/via/padlock.c =================================================================== --- sys/crypto/via/padlock.c (revision 235453) +++ sys/crypto/via/padlock.c (working copy) @@ -156,6 +156,7 @@ } while ((ses = TAILQ_FIRST(&sc->sc_sessions)) != NULL) { TAILQ_REMOVE(&sc->sc_sessions, ses, ses_next); + fpu_kern_free_ctx(ses->ses_fpu_ctx); free(ses, M_PADLOCK); } rw_destroy(&sc->sc_sessions_lock); @@ -222,6 +223,13 @@ rw_wunlock(&sc->sc_sessions_lock); return (ENOMEM); } + ses->ses_fpu_ctx = fpu_kern_alloc_ctx(FPU_KERN_NORMAL | + FPU_KERN_NOWAIT); + if (ses->ses_fpu_ctx == NULL) { + free(ses, M_PADLOCK); + rw_wunlock(&sc->sc_sessions_lock); + return (ENOMEM); + } ses->ses_id = sc->sc_sid++; } else { TAILQ_REMOVE(&sc->sc_sessions, ses, ses_next); @@ -239,7 +247,7 @@ if (macini != NULL) { td = curthread; if (!is_fpu_kern_thread(0)) { - error = fpu_kern_enter(td, &ses->ses_fpu_ctx, + error = fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL); saved_ctx = 1; } else { @@ -249,7 +257,7 @@ if (error == 0) { error = padlock_hash_setup(ses, macini); if (saved_ctx) - fpu_kern_leave(td, &ses->ses_fpu_ctx); + fpu_kern_leave(td, ses->ses_fpu_ctx); } if (error != 0) { padlock_freesession_one(sc, ses, 0); @@ -265,15 +273,18 @@ padlock_freesession_one(struct padlock_softc *sc, struct padlock_session *ses, int locked) { + struct fpu_kern_ctx *ctx; uint32_t sid = ses->ses_id; if (!locked) rw_wlock(&sc->sc_sessions_lock); TAILQ_REMOVE(&sc->sc_sessions, ses, ses_next); padlock_hash_free(ses); + ctx = ses->ses_fpu_ctx; bzero(ses, sizeof(*ses)); ses->ses_used = 0; ses->ses_id = sid; + ses->ses_fpu_ctx = ctx; TAILQ_INSERT_HEAD(&sc->sc_sessions, ses, ses_next); if (!locked) rw_wunlock(&sc->sc_sessions_lock); Index: sys/compat/ia32/ia32_signal.h =================================================================== --- sys/compat/ia32/ia32_signal.h (revision 235453) +++ sys/compat/ia32/ia32_signal.h (working copy) @@ -32,6 +32,12 @@ #ifndef _COMPAT_IA32_IA32_SIGNAL_H #define _COMPAT_IA32_IA32_SIGNAL_H +#define _MC_IA32_HASSEGS 0x1 +#define _MC_IA32_HASBASES 0x2 +#define _MC_IA32_HASFPXSTATE 0x4 +#define _MC_IA32_FLAG_MASK \ + (_MC_IA32_HASSEGS | _MC_IA32_HASBASES | _MC_IA32_HASFPXSTATE) + struct ia32_mcontext { u_int32_t mc_onstack; /* XXX - sigcontext compat. */ u_int32_t mc_gs; /* machine state (struct trapframe) */ @@ -57,14 +63,16 @@ /* We use the same values for fpformat and ownedfp */ u_int32_t mc_fpformat; u_int32_t mc_ownedfp; - u_int32_t mc_spare1[1]; /* align next field to 16 bytes */ + u_int32_t mc_flags; /* * See for the internals of mc_fpstate[]. */ u_int32_t mc_fpstate[128] __aligned(16); u_int32_t mc_fsbase; u_int32_t mc_gsbase; - u_int32_t mc_spare2[6]; + u_int32_t mc_xfpustate; + u_int32_t mc_xfpustate_len; + u_int32_t mc_spare2[4]; }; struct ia32_ucontext { Index: sys/pc98/pc98/machdep.c =================================================================== --- sys/pc98/pc98/machdep.c (revision 235453) +++ sys/pc98/pc98/machdep.c (working copy) @@ -583,8 +583,7 @@ sdp = &td->td_pcb->pcb_gsd; sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; - bzero(sf.sf_uc.uc_mcontext.mc_spare1, - sizeof(sf.sf_uc.uc_mcontext.mc_spare1)); + sf.sf_uc.uc_mcontext.mc_flags = 0; bzero(sf.sf_uc.uc_mcontext.mc_spare2, sizeof(sf.sf_uc.uc_mcontext.mc_spare2)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); @@ -2088,11 +2087,13 @@ { struct gate_descriptor *gdp; int gsel_tss, metadata_missing, x, pa; + size_t kstack0_sz; struct pcpu *pc; thread0.td_kstack = proc0kstack; - thread0.td_pcb = (struct pcb *) - (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; + thread0.td_kstack_pages = KSTACK_PAGES; + kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; + thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1; /* * This may be done better later if it gets more high level @@ -2262,7 +2263,7 @@ /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + - KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); + kstack0_sz - sizeof(struct pcb) - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); @@ -2632,7 +2633,7 @@ mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; - bzero(mcp->mc_spare1, sizeof(mcp->mc_spare1)); + mcp->mc_flags = 0; bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2)); return (0); } Index: sys/i386/include/ucontext.h =================================================================== --- sys/i386/include/ucontext.h (revision 235453) +++ sys/i386/include/ucontext.h (working copy) @@ -31,6 +31,12 @@ #ifndef _MACHINE_UCONTEXT_H_ #define _MACHINE_UCONTEXT_H_ +/* Keep _MC_* values similar to amd64 */ +#define _MC_HASSEGS 0x1 +#define _MC_HASBASES 0x2 +#define _MC_HASFPXSTATE 0x4 +#define _MC_FLAG_MASK (_MC_HASSEGS | _MC_HASBASES | _MC_HASFPXSTATE) + typedef struct __mcontext { /* * The first 20 fields must match the definition of @@ -67,7 +73,7 @@ #define _MC_FPOWNED_FPU 0x20001 /* FP state came from FPU */ #define _MC_FPOWNED_PCB 0x20002 /* FP state came from PCB */ int mc_ownedfp; - int mc_spare1[1]; /* align next field to 16 bytes */ + __register_t mc_flags; /* * See for the internals of mc_fpstate[]. */ @@ -76,11 +82,13 @@ __register_t mc_fsbase; __register_t mc_gsbase; - int mc_spare2[6]; + __register_t mc_xfpustate; + __register_t mc_xfpustate_len; + + int mc_spare2[4]; } mcontext_t; #if defined(_KERNEL) && defined(COMPAT_FREEBSD4) - struct mcontext4 { __register_t mc_onstack; /* XXX - sigcontext compat. */ __register_t mc_gs; /* machine state (struct trapframe) */ Index: sys/i386/include/npx.h =================================================================== --- sys/i386/include/npx.h (revision 235453) +++ sys/i386/include/npx.h (working copy) @@ -101,6 +101,11 @@ u_char xmm_bytes[16]; }; +/* Contents of the upper 16 bytes of each AVX extended accumulator */ +struct ymmacc { + uint8_t ymm_bytes[16]; +}; + struct savexmm { struct envxmm sv_env; struct { @@ -116,6 +121,28 @@ struct savexmm sv_xmm; }; +struct xstate_hdr { + uint64_t xstate_bv; + uint8_t xstate_rsrv0[16]; + uint8_t xstate_rsrv[40]; +}; + +struct savexmm_xstate { + struct xstate_hdr sx_hd; + struct ymmacc sx_ymm[16]; +}; + +struct savexmm_ymm { + struct envxmm sv_env; + struct { + struct fpacc87 fp_acc; + int8_t fp_pad[6]; /* padding */ + } sv_fp[8]; + struct xmmacc sv_xmm[16]; + uint8_t sv_pad[96]; + struct savexmm_xstate sv_xstate; +} __aligned(64); + /* * The hardware default control word for i387's and later coprocessors is * 0x37F, giving: @@ -138,13 +165,6 @@ #ifdef _KERNEL -struct fpu_kern_ctx { - union savefpu hwstate; - union savefpu *prev; - uint32_t flags; -}; -#define FPU_KERN_CTX_NPXINITDONE 0x01 - #define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNNPX) == 0) int npxdna(void); @@ -157,6 +177,8 @@ void npxsetregs(struct thread *td, union savefpu *addr); int npxtrap(void); void npxuserinited(struct thread *); +struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags); +void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx); int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags); int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); @@ -167,6 +189,7 @@ * Flags for fpu_kern_enter() and fpu_kern_thread(). */ #define FPU_KERN_NORMAL 0x0000 +#define FPU_KERN_NOWAIT 0x0001 #endif Index: sys/i386/include/sysarch.h =================================================================== --- sys/i386/include/sysarch.h (revision 235453) +++ sys/i386/include/sysarch.h (working copy) @@ -47,6 +47,7 @@ #define I386_SET_FSBASE 8 #define I386_GET_GSBASE 9 #define I386_SET_GSBASE 10 +#define I386_GET_XFPUSTATE 11 /* These four only exist when running an i386 binary on amd64 */ #define _AMD64_GET_FSBASE 128 @@ -71,6 +72,11 @@ char *sub_args; /* args */ }; +struct i386_get_xfpustate { + void *addr; + int len; +}; + #ifndef _KERNEL #include Index: sys/i386/include/signal.h =================================================================== --- sys/i386/include/signal.h (revision 235453) +++ sys/i386/include/signal.h (working copy) @@ -114,13 +114,16 @@ */ int sc_fpformat; int sc_ownedfp; - int sc_spare1[1]; + int sc_flags; int sc_fpstate[128] __aligned(16); int sc_fsbase; int sc_gsbase; - int sc_spare2[6]; + int sc_xfpustate; + int sc_xfpustate_len; + + int sc_spare2[4]; }; #define sc_sp sc_esp Index: sys/i386/include/ptrace.h =================================================================== --- sys/i386/include/ptrace.h (revision 235453) +++ sys/i386/include/ptrace.h (working copy) @@ -37,5 +37,7 @@ #define PT_GETXMMREGS (PT_FIRSTMACH + 0) #define PT_SETXMMREGS (PT_FIRSTMACH + 1) +#define PT_GETXSTATE (PT_FIRSTMACH + 2) +#define PT_SETXSTATE (PT_FIRSTMACH + 3) #endif Index: sys/i386/include/specialreg.h =================================================================== --- sys/i386/include/specialreg.h (revision 235453) +++ sys/i386/include/specialreg.h (working copy) @@ -66,6 +66,7 @@ #define CR4_PCE 0x00000100 /* Performance monitoring counter enable */ #define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */ #define CR4_XMM 0x00000400 /* enable SIMD/MMX2 to use except 16 */ +#define CR4_XSAVE 0x00040000 /* XSETBV/XGETBV */ /* * Bits in AMD64 special registers. EFER is 64 bits wide. Property changes on: sys/i386/conf/XENHVM ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/i386/conf/XENHVM:r217886,218389,230260-230262,230269-230270,230426,230429,230538,230765-230766,230864 Index: sys/i386/i386/machdep.c =================================================================== --- sys/i386/i386/machdep.c (revision 235453) +++ sys/i386/i386/machdep.c (working copy) @@ -649,8 +649,7 @@ sdp = &td->td_pcb->pcb_gsd; sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; - bzero(sf.sf_uc.uc_mcontext.mc_spare1, - sizeof(sf.sf_uc.uc_mcontext.mc_spare1)); + sf.sf_uc.uc_mcontext.mc_flags = 0; bzero(sf.sf_uc.uc_mcontext.mc_spare2, sizeof(sf.sf_uc.uc_mcontext.mc_spare2)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); @@ -2508,6 +2507,7 @@ { unsigned long gdtmachpfn; int error, gsel_tss, metadata_missing, x, pa; + size_t kstack0_sz; struct pcpu *pc; struct callback_register event = { .type = CALLBACKTYPE_event, @@ -2519,8 +2519,9 @@ }; thread0.td_kstack = proc0kstack; - thread0.td_pcb = (struct pcb *) - (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; + thread0.td_kstack_pages = KSTACK_PAGES; + kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; + thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1; /* * This may be done better later if it gets more high level @@ -2671,7 +2672,7 @@ /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + - KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); + kstack0_sz - sizeof(struct pcb) - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), @@ -2734,11 +2735,13 @@ { struct gate_descriptor *gdp; int gsel_tss, metadata_missing, x, pa; + size_t kstack0_sz; struct pcpu *pc; thread0.td_kstack = proc0kstack; - thread0.td_pcb = (struct pcb *) - (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; + thread0.td_kstack_pages = KSTACK_PAGES; + kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; + thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1; /* * This may be done better later if it gets more high level @@ -2930,7 +2933,7 @@ /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + - KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); + kstack0_sz - sizeof(struct pcb) - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); @@ -3314,7 +3317,7 @@ mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; - bzero(mcp->mc_spare1, sizeof(mcp->mc_spare1)); + mcp->mc_flags = 0; bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2)); return (0); } Index: sys/i386/isa/npx.c =================================================================== --- sys/i386/isa/npx.c (revision 235453) +++ sys/i386/isa/npx.c (working copy) @@ -985,6 +985,50 @@ #endif #endif /* DEV_ISA */ +static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", + "Kernel contexts for FPU state"); + +#define XSAVE_AREA_ALIGN 64 + +#define FPU_KERN_CTX_NPXINITDONE 0x01 + +struct fpu_kern_ctx { + union savefpu *prev; + uint32_t flags; + char hwstate1[]; +}; + +struct fpu_kern_ctx * +fpu_kern_alloc_ctx(u_int flags) +{ + struct fpu_kern_ctx *res; + size_t sz; + + sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + + sizeof(union savefpu); + res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? + M_NOWAIT : M_WAITOK) | M_ZERO); + return (res); +} + +void +fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) +{ + + /* XXXKIB clear the memory ? */ + free(ctx, M_FPUKERN_CTX); +} + +static union savefpu * +fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) +{ + vm_offset_t p; + + p = (vm_offset_t)&ctx->hwstate1; + p = roundup2(p, XSAVE_AREA_ALIGN); + return ((union savefpu *)p); +} + int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { @@ -998,7 +1042,7 @@ ctx->flags |= FPU_KERN_CTX_NPXINITDONE; npxexit(td); ctx->prev = pcb->pcb_save; - pcb->pcb_save = &ctx->hwstate; + pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); pcb->pcb_flags |= PCB_KERNNPX; pcb->pcb_flags &= ~PCB_NPXINITDONE; return (0); Property changes on: sys/contrib/pf ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/pf:r217886,218389,230260-230262,230269-230270,230426,230429,230538,230765-230766,230864 Property changes on: sys/contrib/dev/acpica ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/contrib/dev/acpica:r217886,218389,230260-230262,230269-230270,230426,230429,230538,230765-230766,230864 Property changes on: sys/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/cddl/contrib/opensolaris:r217886,218389,230260-230262,230269-230270,230426,230429,230538,230765-230766,230864 Index: sys/amd64/acpica/acpi_wakecode.S =================================================================== --- sys/amd64/acpica/acpi_wakecode.S (revision 235453) +++ sys/amd64/acpica/acpi_wakecode.S (working copy) @@ -270,6 +270,8 @@ wakeup_gdt: .word 0 .quad 0 +wakeup_fpusave: + .quad 0 ALIGN_DATA wakeup_efer: Index: sys/amd64/acpica/acpi_wakeup.c =================================================================== --- sys/amd64/acpica/acpi_wakeup.c (revision 235453) +++ sys/amd64/acpica/acpi_wakeup.c (working copy) @@ -45,6 +45,7 @@ #include #include #include +#include #ifdef SMP #include @@ -67,8 +68,10 @@ #ifdef SMP extern struct pcb **susppcbs; +extern void **suspfpusave; #else static struct pcb **susppcbs; +static void **suspfpusave; #endif int acpi_restorecpu(vm_offset_t, struct pcb *); @@ -105,6 +108,7 @@ int ms; WAKECODE_FIXUP(wakeup_pcb, struct pcb *, susppcbs[cpu]); + WAKECODE_FIXUP(wakeup_fpusave, void *, suspfpusave[cpu]); WAKECODE_FIXUP(wakeup_gdt, uint16_t, susppcbs[cpu]->pcb_gdt.rd_limit); WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, susppcbs[cpu]->pcb_gdt.rd_base); @@ -243,6 +247,7 @@ load_cr3(KPML4phys); if (savectx(susppcbs[0])) { + ctx_fpusave(suspfpusave[0]); #ifdef SMP if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) { device_printf(sc->acpi_dev, @@ -256,6 +261,7 @@ WAKECODE_FIXUP(reset_video, uint8_t, (acpi_reset_video != 0)); WAKECODE_FIXUP(wakeup_pcb, struct pcb *, susppcbs[0]); + WAKECODE_FIXUP(wakeup_fpusave, void *, suspfpusave[0]); WAKECODE_FIXUP(wakeup_gdt, uint16_t, susppcbs[0]->pcb_gdt.rd_limit); WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, @@ -333,8 +339,11 @@ return (NULL); } susppcbs = malloc(mp_ncpus * sizeof(*susppcbs), M_DEVBUF, M_WAITOK); - for (i = 0; i < mp_ncpus; i++) + suspfpusave = malloc(mp_ncpus * sizeof(void *), M_DEVBUF, M_WAITOK); + for (i = 0; i < mp_ncpus; i++) { susppcbs[i] = malloc(sizeof(**susppcbs), M_DEVBUF, M_WAITOK); + suspfpusave[i] = alloc_fpusave(M_WAITOK); + } return (wakeaddr); } Index: sys/amd64/acpica/acpi_switch.S =================================================================== --- sys/amd64/acpica/acpi_switch.S (revision 235453) +++ sys/amd64/acpica/acpi_switch.S (working copy) @@ -146,11 +146,22 @@ /* Restore FPU state. */ fninit - fxrstor PCB_USERFPU(%rdi) + movq WAKEUP_CTX(fpusave),%rdi + cmpl $0,use_xsave + jne 1f + fxrstor (%rdi) + jmp 2f +1: movl xsave_mask,%eax + movl xsave_mask+4,%edx +/* xrstor (%rdi) */ + .byte 0x0f,0xae,0x2f +2: /* Reload CR0. */ movq %rcx, %cr0 + movq WAKEUP_CTX(pcb),%rdi + /* Restore return address. */ movq PCB_RIP(%rdi), %rax movq %rax, (%rsp) Index: sys/amd64/include/ucontext.h =================================================================== --- sys/amd64/include/ucontext.h (revision 235453) +++ sys/amd64/include/ucontext.h (working copy) @@ -37,7 +37,8 @@ */ #define _MC_HASSEGS 0x1 #define _MC_HASBASES 0x2 -#define _MC_FLAG_MASK (_MC_HASSEGS | _MC_HASBASES) +#define _MC_HASFPXSTATE 0x4 +#define _MC_FLAG_MASK (_MC_HASSEGS | _MC_HASBASES | _MC_HASFPXSTATE) typedef struct __mcontext { /* @@ -92,7 +93,10 @@ __register_t mc_fsbase; __register_t mc_gsbase; - long mc_spare[6]; + __register_t mc_xfpustate; + __register_t mc_xfpustate_len; + + long mc_spare[4]; } mcontext_t; #endif /* !_MACHINE_UCONTEXT_H_ */ Index: sys/amd64/include/pcb.h =================================================================== --- sys/amd64/include/pcb.h (revision 235453) +++ sys/amd64/include/pcb.h (working copy) @@ -92,7 +92,8 @@ struct amd64tss *pcb_tssp; struct savefpu *pcb_save; - struct savefpu pcb_user_save; + + uint64_t pcb_pad[2]; }; #ifdef _KERNEL @@ -130,6 +131,7 @@ void makectx(struct trapframe *, struct pcb *); int savectx(struct pcb *); + #endif #endif /* _AMD64_PCB_H_ */ Index: sys/amd64/include/sysarch.h =================================================================== --- sys/amd64/include/sysarch.h (revision 235453) +++ sys/amd64/include/sysarch.h (working copy) @@ -48,12 +48,14 @@ #define I386_SET_FSBASE 8 #define I386_GET_GSBASE 9 #define I386_SET_GSBASE 10 +#define I386_GET_XFPUSTATE 11 /* Leave space for 0-127 for to avoid translating syscalls */ #define AMD64_GET_FSBASE 128 #define AMD64_SET_FSBASE 129 #define AMD64_GET_GSBASE 130 #define AMD64_SET_GSBASE 131 +#define AMD64_GET_XFPUSTATE 132 struct i386_ldt_args { unsigned int start; @@ -67,6 +69,16 @@ int enable; }; +struct i386_get_xfpustate { + unsigned int addr; + int len; +}; + +struct amd64_get_xfpustate { + void *addr; + int len; +}; + #ifndef _KERNEL #include Index: sys/amd64/include/frame.h =================================================================== --- sys/amd64/include/frame.h (revision 235453) +++ sys/amd64/include/frame.h (working copy) @@ -81,6 +81,7 @@ }; #define TF_HASSEGS 0x1 -/* #define _MC_HASBASES 0x2 */ +#define TF_HASBASES 0x2 +#define TF_HASFPXSTATE 0x4 #endif /* _MACHINE_FRAME_H_ */ Index: sys/amd64/include/pcpu.h =================================================================== --- sys/amd64/include/pcpu.h (revision 235453) +++ sys/amd64/include/pcpu.h (working copy) @@ -226,6 +226,8 @@ } #define curthread (__curthread()) +#define IS_BSP() (PCPU_GET(cpuid) == 0) + #else /* !lint || defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) */ #error "this file needs to be ported to your compiler" Index: sys/amd64/include/fpu.h =================================================================== --- sys/amd64/include/fpu.h (revision 235453) +++ sys/amd64/include/fpu.h (working copy) @@ -43,45 +43,71 @@ /* Contents of each x87 floating point accumulator */ struct fpacc87 { - u_char fp_bytes[10]; + uint8_t fp_bytes[10]; }; /* Contents of each SSE extended accumulator */ struct xmmacc { - u_char xmm_bytes[16]; + uint8_t xmm_bytes[16]; }; +/* Contents of the upper 16 bytes of each AVX extended accumulator */ +struct ymmacc { + uint8_t ymm_bytes[16]; +}; + struct envxmm { - u_int16_t en_cw; /* control word (16bits) */ - u_int16_t en_sw; /* status word (16bits) */ - u_int8_t en_tw; /* tag word (8bits) */ - u_int8_t en_zero; - u_int16_t en_opcode; /* opcode last executed (11 bits ) */ - u_int64_t en_rip; /* floating point instruction pointer */ - u_int64_t en_rdp; /* floating operand pointer */ - u_int32_t en_mxcsr; /* SSE sontorol/status register */ - u_int32_t en_mxcsr_mask; /* valid bits in mxcsr */ + uint16_t en_cw; /* control word (16bits) */ + uint16_t en_sw; /* status word (16bits) */ + uint8_t en_tw; /* tag word (8bits) */ + uint8_t en_zero; + uint16_t en_opcode; /* opcode last executed (11 bits ) */ + uint64_t en_rip; /* floating point instruction pointer */ + uint64_t en_rdp; /* floating operand pointer */ + uint32_t en_mxcsr; /* SSE sontorol/status register */ + uint32_t en_mxcsr_mask; /* valid bits in mxcsr */ }; struct savefpu { struct envxmm sv_env; struct { struct fpacc87 fp_acc; - u_char fp_pad[6]; /* padding */ + uint8_t fp_pad[6]; /* padding */ } sv_fp[8]; struct xmmacc sv_xmm[16]; - u_char sv_pad[96]; + uint8_t sv_pad[96]; } __aligned(16); -#ifdef _KERNEL -struct fpu_kern_ctx { - struct savefpu hwstate; - struct savefpu *prev; - uint32_t flags; +struct xstate_hdr { + uint64_t xstate_bv; + uint8_t xstate_rsrv0[16]; + uint8_t xstate_rsrv[40]; }; -#define FPU_KERN_CTX_FPUINITDONE 0x01 +struct savefpu_xstate { + struct xstate_hdr sx_hd; + struct ymmacc sx_ymm[16]; +}; + +struct savefpu_ymm { + struct envxmm sv_env; + struct { + struct fpacc87 fp_acc; + int8_t fp_pad[6]; /* padding */ + } sv_fp[8]; + struct xmmacc sv_xmm[16]; + uint8_t sv_pad[96]; + struct savefpu_xstate sv_xstate; +} __aligned(64); + +#ifdef _KERNEL + +struct fpu_kern_ctx; + #define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNFPU) == 0) + +#define XSAVE_AREA_ALIGN 64 + #endif /* @@ -114,9 +140,15 @@ int fpuformat(void); int fpugetregs(struct thread *td); void fpuinit(void); -void fpusetregs(struct thread *td, struct savefpu *addr); +void fpusave(void *addr); +int fpusetregs(struct thread *td, struct savefpu *addr, + char *xfpustate, size_t xfpustate_size); +int fpusetxstate(struct thread *td, char *xfpustate, + size_t xfpustate_size); int fputrap(void); void fpuuserinited(struct thread *td); +struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags); +void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx); int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags); int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); @@ -124,9 +156,10 @@ int is_fpu_kern_thread(u_int flags); /* - * Flags for fpu_kern_enter() and fpu_kern_thread(). + * Flags for fpu_kern_alloc_ctx(), fpu_kern_enter() and fpu_kern_thread(). */ #define FPU_KERN_NORMAL 0x0000 +#define FPU_KERN_NOWAIT 0x0001 #endif Property changes on: sys/amd64/include/xen ___________________________________________________________________ Modified: svn:mergeinfo Merged /head/sys/amd64/include/xen:r217886,218389,230260-230262,230269-230270,230426,230429,230538,230765-230766,230864 Index: sys/amd64/include/md_var.h =================================================================== --- sys/amd64/include/md_var.h (revision 235453) +++ sys/amd64/include/md_var.h (working copy) @@ -51,6 +51,7 @@ extern u_int cpu_fxsr; extern u_int cpu_high; extern u_int cpu_id; +extern u_int cpu_max_ext_state_size; extern u_int cpu_mxcsr_mask; extern u_int cpu_procinfo; extern u_int cpu_procinfo2; @@ -67,17 +68,23 @@ extern int _ucode32sel; extern int _ufssel; extern int _ugssel; +extern int use_xsave; +extern uint64_t xsave_mask; typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); +struct pcb; +struct savefpu; struct thread; struct reg; struct fpreg; struct dbreg; struct dumperinfo; +void *alloc_fpusave(int flags); void amd64_syscall(struct thread *td, int traced); void busdma_swi(void); void cpu_setregs(void); +void ctx_fpusave(void *); void doreti_iret(void) __asm(__STRING(doreti_iret)); void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault)); void ld_ds(void) __asm(__STRING(ld_ds)); @@ -105,5 +112,8 @@ void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist); int user_dbreg_trap(void); void minidumpsys(struct dumperinfo *); +struct savefpu *get_pcb_user_save_td(struct thread *td); +struct savefpu *get_pcb_user_save_pcb(struct pcb *pcb); +struct pcb *get_pcb_td(struct thread *td); #endif /* !_MACHINE_MD_VAR_H_ */ Index: sys/amd64/include/signal.h =================================================================== --- sys/amd64/include/signal.h (revision 235453) +++ sys/amd64/include/signal.h (working copy) @@ -103,7 +103,10 @@ long sc_fsbase; long sc_gsbase; - long sc_spare[6]; + long sc_xfpustate; + long sc_xfpustate_len; + + long sc_spare[4]; }; #endif /* __BSD_VISIBLE */ Index: sys/amd64/include/ptrace.h =================================================================== --- sys/amd64/include/ptrace.h (revision 235453) +++ sys/amd64/include/ptrace.h (working copy) @@ -33,4 +33,9 @@ #ifndef _MACHINE_PTRACE_H_ #define _MACHINE_PTRACE_H_ +#define __HAVE_PTRACE_MACHDEP + +#define PT_GETXSTATE (PT_FIRSTMACH + 0) +#define PT_SETXSTATE (PT_FIRSTMACH + 1) + #endif Index: sys/amd64/include/specialreg.h =================================================================== --- sys/amd64/include/specialreg.h (revision 235453) +++ sys/amd64/include/specialreg.h (working copy) @@ -66,6 +66,7 @@ #define CR4_PCE 0x00000100 /* Performance monitoring counter enable */ #define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */ #define CR4_XMM 0x00000400 /* enable SIMD/MMX2 to use except 16 */ +#define CR4_XSAVE 0x00040000 /* XSETBV/XGETBV */ /* * Bits in AMD64 special registers. EFER is 64 bits wide. @@ -76,6 +77,18 @@ #define EFER_NXE 0x000000800 /* PTE No-Execute bit enable (R/W) */ /* + * Intel Extended Features registers + */ +#define XCR0 0 /* XFEATURE_ENABLED_MASK register */ + +#define XFEATURE_ENABLED_X87 0x00000001 +#define XFEATURE_ENABLED_SSE 0x00000002 +#define XFEATURE_ENABLED_AVX 0x00000004 + +#define XFEATURE_AVX \ + (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX) + +/* * CPUID instruction features register */ #define CPUID_FPU 0x00000001 Index: sys/amd64/amd64/vm_machdep.c =================================================================== --- sys/amd64/amd64/vm_machdep.c (revision 235453) +++ sys/amd64/amd64/vm_machdep.c (working copy) @@ -89,6 +89,51 @@ static volatile u_int cpu_reset_proxy_active; #endif +struct savefpu * +get_pcb_user_save_td(struct thread *td) +{ + vm_offset_t p; + + p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - + cpu_max_ext_state_size; + KASSERT((p % 64) == 0, ("Unaligned pcb_user_save area")); + return ((struct savefpu *)p); +} + +struct savefpu * +get_pcb_user_save_pcb(struct pcb *pcb) +{ + vm_offset_t p; + + p = (vm_offset_t)(pcb + 1); + return ((struct savefpu *)p); +} + +struct pcb * +get_pcb_td(struct thread *td) +{ + vm_offset_t p; + + p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - + cpu_max_ext_state_size - sizeof(struct pcb); + return ((struct pcb *)p); +} + +void * +alloc_fpusave(int flags) +{ + struct pcb *res; + struct savefpu_ymm *sf; + + res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags); + if (use_xsave) { + sf = (struct savefpu_ymm *)res; + bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd)); + sf->sv_xstate.sx_hd.xstate_bv = xsave_mask; + } + return (res); +} + /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child @@ -126,15 +171,16 @@ fpuexit(td1); /* Point the pcb to the top of the stack */ - pcb2 = (struct pcb *)(td2->td_kstack + - td2->td_kstack_pages * PAGE_SIZE) - 1; + pcb2 = get_pcb_td(td2); td2->td_pcb = pcb2; /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Properly initialize pcb_save */ - pcb2->pcb_save = &pcb2->pcb_user_save; + pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); + bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2), + cpu_max_ext_state_size); /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; @@ -309,11 +355,17 @@ void cpu_thread_alloc(struct thread *td) { + struct pcb *pcb; + struct xstate_hdr *xhdr; - td->td_pcb = (struct pcb *)(td->td_kstack + - td->td_kstack_pages * PAGE_SIZE) - 1; - td->td_frame = (struct trapframe *)td->td_pcb - 1; - td->td_pcb->pcb_save = &td->td_pcb->pcb_user_save; + td->td_pcb = pcb = get_pcb_td(td); + td->td_frame = (struct trapframe *)pcb - 1; + pcb->pcb_save = get_pcb_user_save_pcb(pcb); + if (use_xsave) { + xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); + bzero(xhdr, sizeof(*xhdr)); + xhdr->xstate_bv = xsave_mask; + } } void @@ -386,7 +438,9 @@ */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); clear_pcb_flags(pcb2, PCB_FPUINITDONE | PCB_USERFPUINITDONE); - pcb2->pcb_save = &pcb2->pcb_user_save; + pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); + bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save, + cpu_max_ext_state_size); set_pcb_flags(pcb2, PCB_FULL_IRET); /* Index: sys/amd64/amd64/initcpu.c =================================================================== --- sys/amd64/amd64/initcpu.c (revision 235453) +++ sys/amd64/amd64/initcpu.c (working copy) @@ -72,6 +72,7 @@ u_int cpu_fxsr; /* SSE enabled */ u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */ u_int cpu_clflush_line_size = 32; +u_int cpu_max_ext_state_size; SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD, &via_feature_rng, 0, "VIA C3/C7 RNG feature available in CPU"); Index: sys/amd64/amd64/genassym.c =================================================================== --- sys/amd64/amd64/genassym.c (revision 235453) +++ sys/amd64/amd64/genassym.c (working copy) @@ -156,7 +156,7 @@ ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp)); ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save)); ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct savefpu)); -ASSYM(PCB_USERFPU, offsetof(struct pcb, pcb_user_save)); +ASSYM(PCB_USERFPU, sizeof(struct pcb)); ASSYM(PCB_SIZE, sizeof(struct pcb)); ASSYM(PCB_FULL_IRET, PCB_FULL_IRET); ASSYM(PCB_DBREGS, PCB_DBREGS); Index: sys/amd64/amd64/cpu_switch.S =================================================================== --- sys/amd64/amd64/cpu_switch.S (revision 235453) +++ sys/amd64/amd64/cpu_switch.S (working copy) @@ -112,16 +112,25 @@ /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) - jne 1f + jne 3f movq PCB_SAVEFPU(%r8),%r8 clts + cmpl $0,use_xsave + jne 1f fxsave (%r8) - smsw %ax + jmp 2f +1: movq %rdx,%rcx + movl xsave_mask,%eax + movl xsave_mask+4,%edx +/* xsave (%r8) */ + .byte 0x41,0x0f,0xae,0x20 + movq %rcx,%rdx +2: smsw %ax orb $CR0_TS,%al lmsw %ax xorl %eax,%eax movq %rax,PCPU(FPCURTHREAD) -1: +3: /* Save is done. Now fire up new thread. Leave old vmspace. */ movq TD_PCB(%rsi),%r8 @@ -354,10 +363,19 @@ sldt PCB_LDT(%rdi) str PCB_TR(%rdi) - clts - fxsave PCB_USERFPU(%rdi) - movq %rsi,%cr0 /* The previous %cr0 is saved in %rsi. */ +2: movq %rsi,%cr0 /* The previous %cr0 is saved in %rsi. */ movl $1,%eax ret END(savectx) + +/* + * Wrapper around fpusave to care about TS0_CR. + */ +ENTRY(ctx_fpusave) + movq %cr0,%rsi + clts + call fpusave + movq %rsi,%cr0 + ret +END(ctx_fpusave) Index: sys/amd64/amd64/fpu.c =================================================================== --- sys/amd64/amd64/fpu.c (revision 235453) +++ sys/amd64/amd64/fpu.c (working copy) @@ -78,6 +78,41 @@ : : "n" (CR0_TS) : "ax") #define stop_emulating() __asm __volatile("clts") +static __inline void +xrstor(char *addr, uint64_t mask) +{ + uint32_t low, hi; + + low = mask; + hi = mask >> 32; + /* xrstor (%rdi) */ + __asm __volatile(".byte 0x0f,0xae,0x2f" : : + "a" (low), "d" (hi), "D" (addr)); +} + +static __inline void +xsave(char *addr, uint64_t mask) +{ + uint32_t low, hi; + + low = mask; + hi = mask >> 32; + /* xsave (%rdi) */ + __asm __volatile(".byte 0x0f,0xae,0x27" : : + "a" (low), "d" (hi), "D" (addr) : "memory"); +} + +static __inline void +xsetbv(uint32_t reg, uint64_t val) +{ + uint32_t low, hi; + + low = val; + hi = val >> 32; + __asm __volatile(".byte 0x0f,0x01,0xd1" : : + "c" (reg), "a" (low), "d" (hi)); +} + #else /* !(__GNUCLIKE_ASM && !lint) */ void fldcw(u_short cw); @@ -90,26 +125,107 @@ void ldmxcsr(u_int csr); void start_emulating(void); void stop_emulating(void); +void xrstor(char *addr, uint64_t mask); +void xsave(char *addr, uint64_t mask); +void xsetbv(uint32_t reg, uint64_t val); #endif /* __GNUCLIKE_ASM && !lint */ #define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw) #define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw) -typedef u_char bool_t; +CTASSERT(sizeof(struct savefpu) == 512); +CTASSERT(sizeof(struct xstate_hdr) == 64); +CTASSERT(sizeof(struct savefpu_ymm) == 832); +/* + * This requirement is to make it easier for asm code to calculate + * offset of the fpu save area from the pcb address. FPU save area + * must by 64-bytes aligned. + */ +CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); + static void fpu_clean_state(void); SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, NULL, 1, "Floating point instructions executed in hardware"); -static struct savefpu fpu_initialstate; +int use_xsave; /* non-static for cpu_switch.S */ +uint64_t xsave_mask; /* the same */ +static struct savefpu *fpu_initialstate; +void +fpusave(void *addr) +{ + + if (use_xsave) + xsave((char *)addr, xsave_mask); + else + fxsave((char *)addr); +} + +static void +fpurestore(void *addr) +{ + + if (use_xsave) + xrstor((char *)addr, xsave_mask); + else + fxrstor((char *)addr); +} + /* - * Initialize the floating point unit. On the boot CPU we generate a - * clean state that is used to initialize the floating point unit when - * it is first used by a process. + * Enable XSAVE if supported and allowed by user. + * Calculate the xsave_mask. */ +static void +fpuinit_bsp1(void) +{ + u_int cp[4]; + uint64_t xsave_mask_user; + + if ((cpu_feature2 & CPUID2_XSAVE) != 0) { + use_xsave = 1; + TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); + } + if (!use_xsave) + return; + + cpuid_count(0xd, 0x0, cp); + xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; + if ((cp[0] & xsave_mask) != xsave_mask) + panic("CPU0 does not support X87 or SSE: %x", cp[0]); + xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; + xsave_mask_user = xsave_mask; + TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user); + xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; + xsave_mask &= xsave_mask_user; +} + +/* + * Calculate the fpu save area size. + */ +static void +fpuinit_bsp2(void) +{ + u_int cp[4]; + + if (use_xsave) { + cpuid_count(0xd, 0x0, cp); + cpu_max_ext_state_size = cp[1]; + + /* + * Reload the cpu_feature2, since we enabled OSXSAVE. + */ + do_cpuid(1, cp); + cpu_feature2 = cp[2]; + } else + cpu_max_ext_state_size = sizeof(struct savefpu); +} + +/* + * Initialize the floating point unit. + */ void fpuinit(void) { @@ -117,7 +233,21 @@ u_int mxcsr; u_short control; + if (IS_BSP()) + fpuinit_bsp1(); + + if (use_xsave) { + load_cr4(rcr4() | CR4_XSAVE); + xsetbv(XCR0, xsave_mask); + } + /* + * XCR0 shall be set up before CPU can report the save area size. + */ + if (IS_BSP()) + fpuinit_bsp2(); + + /* * It is too early for critical_enter() to work on AP. */ saveintr = intr_disable(); @@ -127,20 +257,46 @@ fldcw(control); mxcsr = __INITIAL_MXCSR__; ldmxcsr(mxcsr); - if (PCPU_GET(cpuid) == 0) { - fxsave(&fpu_initialstate); - if (fpu_initialstate.sv_env.en_mxcsr_mask) - cpu_mxcsr_mask = fpu_initialstate.sv_env.en_mxcsr_mask; - else - cpu_mxcsr_mask = 0xFFBF; - bzero(fpu_initialstate.sv_fp, sizeof(fpu_initialstate.sv_fp)); - bzero(fpu_initialstate.sv_xmm, sizeof(fpu_initialstate.sv_xmm)); - } start_emulating(); intr_restore(saveintr); } /* + * On the boot CPU we generate a clean state that is used to + * initialize the floating point unit when it is first used by a + * process. + */ +static void +fpuinitstate(void *arg __unused) +{ + register_t saveintr; + + fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF, + M_WAITOK | M_ZERO); + saveintr = intr_disable(); + stop_emulating(); + + fpusave(fpu_initialstate); + if (fpu_initialstate->sv_env.en_mxcsr_mask) + cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask; + else + cpu_mxcsr_mask = 0xFFBF; + + /* + * The fninit instruction does not modify XMM registers. The + * fpusave call dumped the garbage contained in the registers + * after reset to the initial state saved. Clear XMM + * registers file image to make the startup program state and + * signal handler XMM register content predictable. + */ + bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc)); + + start_emulating(); + intr_restore(saveintr); +} +SYSINIT(fpuinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, fpuinitstate, NULL); + +/* * Free coprocessor (if we have it). */ void @@ -150,7 +306,7 @@ critical_enter(); if (curthread == PCPU_GET(fpcurthread)) { stop_emulating(); - fxsave(PCPU_GET(curpcb)->pcb_save); + fpusave(PCPU_GET(curpcb)->pcb_save); start_emulating(); PCPU_SET(fpcurthread, 0); } @@ -423,7 +579,7 @@ * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. */ - fxrstor(&fpu_initialstate); + fpurestore(fpu_initialstate); if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(pcb->pcb_initial_fpucw); if (PCB_USER_FPU(pcb)) @@ -432,7 +588,7 @@ else set_pcb_flags(pcb, PCB_FPUINITDONE); } else - fxrstor(pcb->pcb_save); + fpurestore(pcb->pcb_save); critical_exit(); } @@ -461,15 +617,16 @@ pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { - bcopy(&fpu_initialstate, &pcb->pcb_user_save, - sizeof(fpu_initialstate)); - pcb->pcb_user_save.sv_env.en_cw = pcb->pcb_initial_fpucw; + bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb), + cpu_max_ext_state_size); + get_pcb_user_save_pcb(pcb)->sv_env.en_cw = + pcb->pcb_initial_fpucw; fpuuserinited(td); return (_MC_FPOWNED_PCB); } critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { - fxsave(&pcb->pcb_user_save); + fpusave(get_pcb_user_save_pcb(pcb)); critical_exit(); return (_MC_FPOWNED_FPU); } else { @@ -491,25 +648,78 @@ set_pcb_flags(pcb, PCB_FPUINITDONE); } +int +fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) +{ + struct xstate_hdr *hdr, *ehdr; + size_t len, max_len; + uint64_t bv; + + /* XXXKIB should we clear all extended state in xstate_bv instead ? */ + if (xfpustate == NULL) + return (0); + if (!use_xsave) + return (EOPNOTSUPP); + + len = xfpustate_size; + if (len < sizeof(struct xstate_hdr)) + return (EINVAL); + max_len = cpu_max_ext_state_size - sizeof(struct savefpu); + if (len > max_len) + return (EINVAL); + + ehdr = (struct xstate_hdr *)xfpustate; + bv = ehdr->xstate_bv; + + /* + * Avoid #gp. + */ + if (bv & ~xsave_mask) + return (EINVAL); + if ((bv & (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE)) != + (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE)) + return (EINVAL); + + hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); + + hdr->xstate_bv = bv; + bcopy(xfpustate + sizeof(struct xstate_hdr), + (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); + + return (0); +} + /* * Set the state of the FPU. */ -void -fpusetregs(struct thread *td, struct savefpu *addr) +int +fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, + size_t xfpustate_size) { struct pcb *pcb; + int error; pcb = td->td_pcb; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { - fxrstor(addr); + error = fpusetxstate(td, xfpustate, xfpustate_size); + if (error != 0) { + critical_exit(); + return (error); + } + bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); + fpurestore(get_pcb_user_save_td(td)); critical_exit(); set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); } else { critical_exit(); - bcopy(addr, &td->td_pcb->pcb_user_save, sizeof(*addr)); + error = fpusetxstate(td, xfpustate, xfpustate_size); + if (error != 0) + return (error); + bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpuuserinited(td); } + return (0); } /* @@ -599,20 +809,62 @@ DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); #endif /* DEV_ISA */ +static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", + "Kernel contexts for FPU state"); + +#define FPU_KERN_CTX_FPUINITDONE 0x01 + +struct fpu_kern_ctx { + struct savefpu *prev; + uint32_t flags; + char hwstate1[]; +}; + +struct fpu_kern_ctx * +fpu_kern_alloc_ctx(u_int flags) +{ + struct fpu_kern_ctx *res; + size_t sz; + + sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + + cpu_max_ext_state_size; + res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? + M_NOWAIT : M_WAITOK) | M_ZERO); + return (res); +} + +void +fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) +{ + + /* XXXKIB clear the memory ? */ + free(ctx, M_FPUKERN_CTX); +} + +static struct savefpu * +fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) +{ + vm_offset_t p; + + p = (vm_offset_t)&ctx->hwstate1; + p = roundup2(p, XSAVE_AREA_ALIGN); + return ((struct savefpu *)p); +} + int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { struct pcb *pcb; pcb = td->td_pcb; - KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save, - ("mangled pcb_save")); + KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == + get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); ctx->flags = 0; if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0) ctx->flags |= FPU_KERN_CTX_FPUINITDONE; fpuexit(td); ctx->prev = pcb->pcb_save; - pcb->pcb_save = &ctx->hwstate; + pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); set_pcb_flags(pcb, PCB_KERNFPU); clear_pcb_flags(pcb, PCB_FPUINITDONE); return (0); @@ -629,7 +881,7 @@ fpudrop(); critical_exit(); pcb->pcb_save = ctx->prev; - if (pcb->pcb_save == &pcb->pcb_user_save) { + if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) { set_pcb_flags(pcb, PCB_FPUINITDONE); clear_pcb_flags(pcb, PCB_KERNFPU); @@ -653,7 +905,8 @@ pcb = PCPU_GET(curpcb); KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); - KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save")); + KASSERT(pcb->pcb_save == get_pcb_user_save_pcb(pcb), + ("mangled pcb_save")); KASSERT(PCB_USER_FPU(pcb), ("recursive call")); set_pcb_flags(pcb, PCB_KERNFPU); Index: sys/amd64/amd64/mp_machdep.c =================================================================== --- sys/amd64/amd64/mp_machdep.c (revision 235453) +++ sys/amd64/amd64/mp_machdep.c (working copy) @@ -105,7 +105,8 @@ extern pt_entry_t *SMPpt; struct pcb stoppcbs[MAXCPU]; -struct pcb **susppcbs = NULL; +struct pcb **susppcbs; +void **suspfpusave; /* Variables needed for SMP tlb shootdown. */ vm_offset_t smp_tlb_addr1; @@ -1351,6 +1352,7 @@ cr3 = rcr3(); if (savectx(susppcbs[cpu])) { + ctx_fpusave(suspfpusave[cpu]); wbinvd(); atomic_set_int(&stopped_cpus, cpumask); } else { Index: sys/amd64/amd64/sys_machdep.c =================================================================== --- sys/amd64/amd64/sys_machdep.c (revision 235453) +++ sys/amd64/amd64/sys_machdep.c (working copy) @@ -176,6 +176,8 @@ uint32_t i386base; uint64_t a64base; struct i386_ioperm_args iargs; + struct i386_get_xfpustate i386xfpu; + struct amd64_get_xfpustate a64xfpu; if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT) return (sysarch_ldt(td, uap, UIO_USERSPACE)); @@ -191,6 +193,18 @@ sizeof(struct i386_ioperm_args))) != 0) return (error); break; + case I386_GET_XFPUSTATE: + if ((error = copyin(uap->parms, &i386xfpu, + sizeof(struct i386_get_xfpustate))) != 0) + return (error); + a64xfpu.addr = (void *)(uintptr_t)i386xfpu.addr; + a64xfpu.len = i386xfpu.len; + break; + case AMD64_GET_XFPUSTATE: + if ((error = copyin(uap->parms, &a64xfpu, + sizeof(struct amd64_get_xfpustate))) != 0) + return (error); + break; default: break; } @@ -261,6 +275,16 @@ } break; + case I386_GET_XFPUSTATE: + case AMD64_GET_XFPUSTATE: + if (a64xfpu.len > cpu_max_ext_state_size - + sizeof(struct savefpu)) + return (EINVAL); + fpugetregs(td); + error = copyout((char *)(get_pcb_user_save_td(td) + 1), + a64xfpu.addr, a64xfpu.len); + return (error); + default: error = EINVAL; break; Index: sys/amd64/amd64/machdep.c =================================================================== --- sys/amd64/amd64/machdep.c (revision 235453) +++ sys/amd64/amd64/machdep.c (working copy) @@ -149,8 +149,10 @@ #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void cpu_startup(void *); -static void get_fpcontext(struct thread *td, mcontext_t *mcp); -static int set_fpcontext(struct thread *td, const mcontext_t *mcp); +static void get_fpcontext(struct thread *td, mcontext_t *mcp, + char *xfpusave, size_t xfpusave_len); +static int set_fpcontext(struct thread *td, const mcontext_t *mcp, + char *xfpustate, size_t xfpustate_len); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); #ifdef DDB @@ -305,6 +307,8 @@ struct sigacts *psp; char *sp; struct trapframe *regs; + char *xfpusave; + size_t xfpusave_len; int sig; int oonstack; @@ -318,6 +322,14 @@ regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); + if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { + xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); + xfpusave = __builtin_alloca(xfpusave_len); + } else { + xfpusave_len = 0; + xfpusave = NULL; + } + /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; @@ -327,7 +339,7 @@ sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ - get_fpcontext(td, &sf.sf_uc.uc_mcontext); + get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase; @@ -338,13 +350,18 @@ /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - sp = td->td_sigstk.ss_sp + - td->td_sigstk.ss_size - sizeof(struct sigframe); + sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size; #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else - sp = (char *)regs->tf_rsp - sizeof(struct sigframe) - 128; + sp = (char *)regs->tf_rsp - 128; + if (xfpusave != NULL) { + sp -= xfpusave_len; + sp = (char *)((unsigned long)sp & ~0x3Ful); + sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; + } + sp -= sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned long)sp & ~0xFul); @@ -377,7 +394,10 @@ /* * Copy the sigframe out to the user's stack. */ - if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { + if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || + (xfpusave != NULL && copyout(xfpusave, + (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len) + != 0)) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif @@ -422,6 +442,8 @@ struct proc *p; struct trapframe *regs; ucontext_t *ucp; + char *xfpustate; + size_t xfpustate_len; long rflags; int cs, error, ret; ksiginfo_t ksi; @@ -480,7 +502,28 @@ return (EINVAL); } - ret = set_fpcontext(td, &ucp->uc_mcontext); + if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { + xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; + if (xfpustate_len > cpu_max_ext_state_size - + sizeof(struct savefpu)) { + uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", + p->p_pid, td->td_name, xfpustate_len); + return (EINVAL); + } + xfpustate = __builtin_alloca(xfpustate_len); + error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, + xfpustate, xfpustate_len); + if (error != 0) { + uprintf( + "pid %d (%s): sigreturn copying xfpustate failed\n", + p->p_pid, td->td_name); + return (error); + } + } else { + xfpustate = NULL; + xfpustate_len = 0; + } + ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); if (ret != 0) { uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", p->p_pid, td->td_name, ret); @@ -1543,14 +1586,16 @@ int gsel_tss, x; struct pcpu *pc; struct nmi_pcpu *np; + struct xstate_hdr *xhdr; u_int64_t msr; char *env; + size_t kstack0_sz; thread0.td_kstack = physfree + KERNBASE; - bzero((void *)thread0.td_kstack, KSTACK_PAGES * PAGE_SIZE); - physfree += KSTACK_PAGES * PAGE_SIZE; - thread0.td_pcb = (struct pcb *) - (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; + thread0.td_kstack_pages = KSTACK_PAGES; + kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; + bzero((void *)thread0.td_kstack, kstack0_sz); + physfree += kstack0_sz; /* * This may be done better later if it gets more high level @@ -1599,7 +1644,6 @@ physfree += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); - PCPU_SET(curpcb, thread0.td_pcb); PCPU_SET(tssp, &common_tss[0]); PCPU_SET(commontssp, &common_tss[0]); PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); @@ -1691,13 +1735,6 @@ initializecpu(); /* Initialize CPU registers */ initializecpucache(); - /* make an initial tss so cpu can get interrupt stack on syscall! */ - common_tss[0].tss_rsp0 = thread0.td_kstack + \ - KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb); - /* Ensure the stack is aligned to 16 bytes */ - common_tss[0].tss_rsp0 &= ~0xFul; - PCPU_SET(rsp0, common_tss[0].tss_rsp0); - /* doublefault stack space, runs on ist1 */ common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; @@ -1734,6 +1771,25 @@ msgbufinit(msgbufp, msgbufsize); fpuinit(); + /* + * Set up thread0 pcb after fpuinit calculated pcb + fpu save + * area size. Zero out the extended state header in fpu save + * area. + */ + thread0.td_pcb = get_pcb_td(&thread0); + bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); + if (use_xsave) { + xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + + 1); + xhdr->xstate_bv = xsave_mask; + } + /* make an initial tss so cpu can get interrupt stack on syscall! */ + common_tss[0].tss_rsp0 = (vm_offset_t)thread0.td_pcb; + /* Ensure the stack is aligned to 16 bytes */ + common_tss[0].tss_rsp0 &= ~0xFul; + PCPU_SET(rsp0, common_tss[0].tss_rsp0); + PCPU_SET(curpcb, thread0.td_pcb); + /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); @@ -2004,7 +2060,7 @@ P_SHOULDSTOP(td->td_proc), ("not suspended thread %p", td)); fpugetregs(td); - fill_fpregs_xmm(&td->td_pcb->pcb_user_save, fpregs); + fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs); return (0); } @@ -2013,7 +2069,7 @@ set_fpregs(struct thread *td, struct fpreg *fpregs) { - set_fpregs_xmm(fpregs, &td->td_pcb->pcb_user_save); + set_fpregs_xmm(fpregs, get_pcb_user_save_td(td)); fpuuserinited(td); return (0); } @@ -2064,9 +2120,11 @@ mcp->mc_gs = tp->tf_gs; mcp->mc_flags = tp->tf_flags; mcp->mc_len = sizeof(*mcp); - get_fpcontext(td, mcp); + get_fpcontext(td, mcp, NULL, 0); mcp->mc_fsbase = pcb->pcb_fsbase; mcp->mc_gsbase = pcb->pcb_gsbase; + mcp->mc_xfpustate = 0; + mcp->mc_xfpustate_len = 0; bzero(mcp->mc_spare, sizeof(mcp->mc_spare)); return (0); } @@ -2082,6 +2140,7 @@ { struct pcb *pcb; struct trapframe *tp; + char *xfpustate; long rflags; int ret; @@ -2092,7 +2151,18 @@ return (EINVAL); rflags = (mcp->mc_rflags & PSL_USERCHANGE) | (tp->tf_rflags & ~PSL_USERCHANGE); - ret = set_fpcontext(td, mcp); + if (mcp->mc_flags & _MC_HASFPXSTATE) { + if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - + sizeof(struct savefpu)) + return (EINVAL); + xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); + ret = copyin((void *)mcp->mc_xfpustate, xfpustate, + mcp->mc_xfpustate_len); + if (ret != 0) + return (ret); + } else + xfpustate = NULL; + ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); tp->tf_r15 = mcp->mc_r15; @@ -2130,35 +2200,51 @@ } static void -get_fpcontext(struct thread *td, mcontext_t *mcp) +get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, + size_t xfpusave_len) { + size_t max_len, len; mcp->mc_ownedfp = fpugetregs(td); - bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate, + bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = fpuformat(); + if (!use_xsave || xfpusave_len == 0) + return; + max_len = cpu_max_ext_state_size - sizeof(struct savefpu); + len = xfpusave_len; + if (len > max_len) { + len = max_len; + bzero(xfpusave + max_len, len - max_len); + } + mcp->mc_flags |= _MC_HASFPXSTATE; + mcp->mc_xfpustate_len = len; + bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); } static int -set_fpcontext(struct thread *td, const mcontext_t *mcp) +set_fpcontext(struct thread *td, const mcontext_t *mcp, char *xfpustate, + size_t xfpustate_len) { struct savefpu *fpstate; + int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); - else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) + else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); - else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || + error = 0; + } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { fpstate = (struct savefpu *)&mcp->mc_fpstate; fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; - fpusetregs(td, fpstate); + error = fpusetregs(td, fpstate, xfpustate, xfpustate_len); } else return (EINVAL); - return (0); + return (error); } void Index: sys/amd64/amd64/trap.c =================================================================== --- sys/amd64/amd64/trap.c (revision 235453) +++ sys/amd64/amd64/trap.c (working copy) @@ -964,7 +964,7 @@ KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returing with kernel FPU ctx leaked", syscallname(td->td_proc, sa.code))); - KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save, + KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, sa.code))); Index: sys/amd64/ia32/ia32_reg.c =================================================================== --- sys/amd64/ia32/ia32_reg.c (revision 235453) +++ sys/amd64/ia32/ia32_reg.c (working copy) @@ -155,7 +155,7 @@ sv_87 = (struct save87 *)regs; penv_87 = &sv_87->sv_env; fpugetregs(td); - sv_fpu = &td->td_pcb->pcb_user_save; + sv_fpu = get_pcb_user_save_td(td); penv_xmm = &sv_fpu->sv_env; /* FPU control/status */ @@ -187,7 +187,7 @@ { struct save87 *sv_87 = (struct save87 *)regs; struct env87 *penv_87 = &sv_87->sv_env; - struct savefpu *sv_fpu = &td->td_pcb->pcb_user_save; + struct savefpu *sv_fpu = get_pcb_user_save_td(td); struct envxmm *penv_xmm = &sv_fpu->sv_env; int i; Index: sys/amd64/ia32/ia32_signal.c =================================================================== --- sys/amd64/ia32/ia32_signal.c (revision 235453) +++ sys/amd64/ia32/ia32_signal.c (working copy) @@ -71,6 +71,7 @@ #include #include #include +#include #include #include #include @@ -83,15 +84,15 @@ #ifdef COMPAT_FREEBSD4 static void freebsd4_ia32_sendsig(sig_t, ksiginfo_t *, sigset_t *); #endif -static void ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp); -static int ia32_set_fpcontext(struct thread *td, const struct ia32_mcontext *mcp); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void -ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp) +ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp, + char *xfpusave, size_t xfpusave_len) { + size_t max_len, len; /* * XXX Format of 64bit and 32bit FXSAVE areas differs. FXSAVE @@ -100,28 +101,43 @@ * for now, it should be irrelevant for most applications. */ mcp->mc_ownedfp = fpugetregs(td); - bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate, + bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = fpuformat(); + if (!use_xsave || xfpusave_len == 0) + return; + max_len = cpu_max_ext_state_size - sizeof(struct savefpu); + len = xfpusave_len; + if (len > max_len) { + len = max_len; + bzero(xfpusave + max_len, len - max_len); + } + mcp->mc_flags |= _MC_HASFPXSTATE; + mcp->mc_xfpustate_len = len; + bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); } static int -ia32_set_fpcontext(struct thread *td, const struct ia32_mcontext *mcp) +ia32_set_fpcontext(struct thread *td, const struct ia32_mcontext *mcp, + char *xfpustate, size_t xfpustate_len) { + int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); - else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) + else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); - else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || + error = 0; + } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { - fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate); + error = fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate, + xfpustate, xfpustate_len); } else return (EINVAL); - return (0); + return (error); } /* @@ -164,10 +180,12 @@ mcp->mc_esp = tp->tf_rsp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); - ia32_get_fpcontext(td, mcp); + mcp->mc_flags = tp->tf_flags; + ia32_get_fpcontext(td, mcp, NULL, 0); mcp->mc_fsbase = pcb->pcb_fsbase; mcp->mc_gsbase = pcb->pcb_gsbase; - bzero(mcp->mc_spare1, sizeof(mcp->mc_spare1)); + mcp->mc_xfpustate = 0; + mcp->mc_xfpustate_len = 0; bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2)); set_pcb_flags(pcb, PCB_FULL_IRET); return (0); @@ -183,6 +201,7 @@ ia32_set_mcontext(struct thread *td, const struct ia32_mcontext *mcp) { struct trapframe *tp; + char *xfpustate; long rflags; int ret; @@ -191,7 +210,18 @@ return (EINVAL); rflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_rflags & ~PSL_USERCHANGE); - ret = ia32_set_fpcontext(td, mcp); + if (mcp->mc_flags & _MC_IA32_HASFPXSTATE) { + if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - + sizeof(struct savefpu)) + return (EINVAL); + xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); + ret = copyin(PTRIN(mcp->mc_xfpustate), xfpustate, + mcp->mc_xfpustate_len); + if (ret != 0) + return (ret); + } else + xfpustate = NULL; + ret = ia32_set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); tp->tf_gs = mcp->mc_gs; @@ -425,6 +455,8 @@ struct sigacts *psp; char *sp; struct trapframe *regs; + char *xfpusave; + size_t xfpusave_len; int oonstack; int sig; @@ -444,6 +476,14 @@ regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); + if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { + xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); + xfpusave = __builtin_alloca(xfpusave_len); + } else { + xfpusave_len = 0; + xfpusave = NULL; + } + /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; @@ -472,7 +512,7 @@ sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs; sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs; sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ - ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext); + ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase; @@ -480,11 +520,16 @@ /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && - SIGISMEMBER(psp->ps_sigonstack, sig)) { - sp = td->td_sigstk.ss_sp + - td->td_sigstk.ss_size - sizeof(sf); - } else - sp = (char *)regs->tf_rsp - sizeof(sf); + SIGISMEMBER(psp->ps_sigonstack, sig)) + sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size; + else + sp = (char *)regs->tf_rsp; + if (xfpusave != NULL) { + sp -= xfpusave_len; + sp = (char *)((unsigned long)sp & ~0x3Ful); + sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; + } + sp -= sizeof(sf); /* Align to 16 bytes. */ sfp = (struct ia32_sigframe *)((uintptr_t)sp & ~0xF); PROC_UNLOCK(p); @@ -516,7 +561,10 @@ /* * Copy the sigframe out to the user's stack. */ - if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { + if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || + (xfpusave != NULL && copyout(xfpusave, + PTRIN(sf.sf_uc.uc_mcontext.mc_xfpustate), xfpusave_len) + != 0)) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif @@ -644,6 +692,8 @@ struct ia32_ucontext uc; struct trapframe *regs; struct ia32_ucontext *ucp; + char *xfpustate; + size_t xfpustate_len; int cs, eflags, error, ret; ksiginfo_t ksi; @@ -690,9 +740,34 @@ return (EINVAL); } - ret = ia32_set_fpcontext(td, &ucp->uc_mcontext); - if (ret != 0) + if ((ucp->uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { + xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; + if (xfpustate_len > cpu_max_ext_state_size - + sizeof(struct savefpu)) { + uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", + td->td_proc->p_pid, td->td_name, xfpustate_len); + return (EINVAL); + } + xfpustate = __builtin_alloca(xfpustate_len); + error = copyin(PTRIN(ucp->uc_mcontext.mc_xfpustate), + xfpustate, xfpustate_len); + if (error != 0) { + uprintf( + "pid %d (%s): sigreturn copying xfpustate failed\n", + td->td_proc->p_pid, td->td_name); + return (error); + } + } else { + xfpustate = NULL; + xfpustate_len = 0; + } + ret = ia32_set_fpcontext(td, &ucp->uc_mcontext, xfpustate, + xfpustate_len); + if (ret != 0) { + uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", + td->td_proc->p_pid, td->td_name, ret); return (ret); + } regs->tf_rdi = ucp->uc_mcontext.mc_edi; regs->tf_rsi = ucp->uc_mcontext.mc_esi; Index: sys/sys/ucontext.h =================================================================== --- sys/sys/ucontext.h (revision 235453) +++ sys/sys/ucontext.h (working copy) @@ -72,11 +72,17 @@ __BEGIN_DECLS int getcontext(ucontext_t *); +ucontext_t *getcontextx(void); int setcontext(const ucontext_t *); void makecontext(ucontext_t *, void (*)(void), int, ...); int signalcontext(ucontext_t *, int, __sighandler_t *); int swapcontext(ucontext_t *, const ucontext_t *); +#if __BSD_VISIBLE +int __getcontextx_size(void); +int __fillcontextx(char *ctx); +#endif + __END_DECLS #else /* _KERNEL */