diff --git a/lib/libc/i386/sys/Makefile.inc b/lib/libc/i386/sys/Makefile.inc index 593956b..98a9c9e 100644 --- a/lib/libc/i386/sys/Makefile.inc +++ b/lib/libc/i386/sys/Makefile.inc @@ -2,11 +2,10 @@ # $FreeBSD$ .if !defined(COMPAT_32BIT) -SRCS+= i386_clr_watch.c i386_get_ioperm.c \ - i386_set_ioperm.c i386_set_watch.c i386_vm86.c +SRCS+= i386_clr_watch.c i386_set_watch.c i386_vm86.c .endif -SRCS+= i386_get_fsbase.c i386_get_gsbase.c i386_get_ldt.c \ - i386_set_fsbase.c i386_set_gsbase.c i386_set_ldt.c +SRCS+= i386_get_fsbase.c i386_get_gsbase.c i386_get_ioperm.c i386_get_ldt.c \ + i386_set_fsbase.c i386_set_gsbase.c i386_set_ioperm.c i386_set_ldt.c MDASM= Ovfork.S brk.S cerror.S exect.S getcontext.S pipe.S ptrace.S \ reboot.S sbrk.S setlogin.S sigreturn.S syscall.S diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index 14a6f87..cffb021 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -219,9 +219,7 @@ IDTVEC(cpustop) movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ call cpustop_handler - - POP_FRAME - iretq + jmp doreti /* * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU. @@ -235,6 +233,5 @@ IDTVEC(rendezvous) call smp_rendezvous_action movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ - POP_FRAME /* Why not doreti? */ - iretq + jmp doreti #endif /* SMP */ diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index 99d6716..c751031 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -75,8 +75,6 @@ ENTRY(cpu_throw) 1: movq TD_PCB(%rdi),%r8 /* Old pcb */ movl PCPU(CPUID), %eax - movq PCB_FSBASE(%r8),%r9 - movq PCB_GSBASE(%r8),%r10 /* release bit from old pm_active */ movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */ movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */ @@ -110,28 +108,6 @@ ENTRY(cpu_switch) movq %rbx,PCB_RBX(%r8) movq %rax,PCB_RIP(%r8) - /* - * Reread fs and gs bases. Explicit fs segment register load - * by the usermode code may change actual fs base without - * updating pcb_{fs,gs}base. - * - * %rdx still contains the mtx, save %rdx around rdmsr. - */ - movq %rdx,%r11 - movl $MSR_FSBASE,%ecx - rdmsr - shlq $32,%rdx - leaq (%rax,%rdx),%r9 - movl $MSR_KGSBASE,%ecx - rdmsr - shlq $32,%rdx - leaq (%rax,%rdx),%r10 - movq %r11,%rdx - - testl $PCB_32BIT,PCB_FLAGS(%r8) - jnz store_seg -done_store_seg: - testl $PCB_DBREGS,PCB_FLAGS(%r8) jnz store_dr /* static predict not taken */ done_store_dr: @@ -192,35 +168,47 @@ sw1: testl $TDP_KTHREAD,TD_PFLAGS(%rsi) jnz do_kthread - testl $PCB_32BIT,PCB_FLAGS(%r8) - jnz load_seg -done_load_seg: + /* + * Load ldt register + */ + movq TD_PROC(%rsi),%rcx + cmpq $0, P_MD+MD_LDT(%rcx) + jne do_ldt + xorl %eax,%eax +ld_ldt: lldt %ax - cmpq PCB_FSBASE(%r8),%r9 - jz 1f - /* Restore userland %fs */ - movl $MSR_FSBASE,%ecx + /* Restore fs base in GDT */ movl PCB_FSBASE(%r8),%eax - movl PCB_FSBASE+4(%r8),%edx - wrmsr -1: - cmpq PCB_GSBASE(%r8),%r10 - jz 2f - /* Restore userland %gs */ - movl $MSR_KGSBASE,%ecx + movq PCPU(FS32P),%rdx + movw %ax,2(%rdx) + shrl $16,%eax + movb %al,4(%rdx) + shrl $8,%eax + movb %al,7(%rdx) + + /* Restore gs base in GDT */ movl PCB_GSBASE(%r8),%eax - movl PCB_GSBASE+4(%r8),%edx - wrmsr -2: + movq PCPU(GS32P),%rdx + movw %ax,2(%rdx) + shrl $16,%eax + movb %al,4(%rdx) + shrl $8,%eax + movb %al,7(%rdx) -do_tss: +do_kthread: + /* Do we need to reload tss ? */ + movq PCPU(TSSP),%rax + movq PCB_TSSP(%r8),%rdx + testq %rdx,%rdx + cmovzq PCPU(COMMONTSSP),%rdx + cmpq %rax,%rdx + jne do_tss +done_tss: + movq %r8,PCPU(RSP0) + movq %r8,PCPU(CURPCB) /* Update the TSS_RSP0 pointer for the next interrupt */ - movq PCPU(TSSP), %rax - movq %r8, PCPU(RSP0) - movq %r8, PCPU(CURPCB) - addq $COMMON_TSS_RSP0, %rax - movq %rsi, PCPU(CURTHREAD) /* into next thread */ - movq %r8, (%rax) + movq %r8,COMMON_TSS_RSP0(%rdx) + movq %rsi,PCPU(CURTHREAD) /* into next thread */ /* Test if debug registers should be restored. */ testl $PCB_DBREGS,PCB_FLAGS(%r8) @@ -249,45 +237,6 @@ done_load_dr: * We use jumps rather than call in order to avoid the stack. */ -do_kthread: - /* - * Copy old fs/gsbase to new kthread pcb for future switches - * This maintains curpcb->pcb_[fg]sbase as caches of the MSR - */ - movq %r9,PCB_FSBASE(%r8) - movq %r10,PCB_GSBASE(%r8) - jmp do_tss - -store_seg: - movl %gs,PCB_GS(%r8) - testl $PCB_GS32BIT,PCB_FLAGS(%r8) - jnz 2f -1: movl %ds,PCB_DS(%r8) - movl %es,PCB_ES(%r8) - movl %fs,PCB_FS(%r8) - jmp done_store_seg -2: movq PCPU(GS32P),%rax - movq (%rax),%rax - movq %rax,PCB_GS32SD(%r8) - jmp 1b - -load_seg: - testl $PCB_GS32BIT,PCB_FLAGS(%r8) - jnz 2f -1: movl $MSR_GSBASE,%ecx - rdmsr - movl PCB_GS(%r8),%gs - wrmsr - movl PCB_DS(%r8),%ds - movl PCB_ES(%r8),%es - movl PCB_FS(%r8),%fs - jmp done_load_seg - /* Restore userland %gs while preserving kernel gsbase */ -2: movq PCPU(GS32P),%rax - movq PCB_GS32SD(%r8),%rcx - movq %rcx,(%rax) - jmp 1b - store_dr: movq %dr7,%rax /* yes, do the save */ movq %dr0,%r15 @@ -325,6 +274,29 @@ load_dr: movq %rax,%dr7 jmp done_load_dr +do_tss: movq %rdx,PCPU(TSSP) + movq %rdx,%rcx + movq PCPU(TSS),%rax + movw %rcx,2(%rax) + shrq $16,%rcx + movb %cl,4(%rax) + shrq $8,%rcx + movb %cl,7(%rax) + shrq $8,%rcx + movl %ecx,8(%rax) + movb $0x89,5(%rax) /* unset busy */ + movl $TSSSEL,%eax + ltr %ax + jmp done_tss + +do_ldt: movq PCPU(LDT),%rax + movq P_MD+MD_LDT_SD(%rcx),%rdx + movq %rdx,(%rax) + movq P_MD+MD_LDT_SD+8(%rcx),%rdx + movq %rdx,8(%rax) + movl $LDTSEL,%eax + jmp ld_ldt + END(cpu_switch) /* diff --git a/sys/amd64/amd64/db_interface.c b/sys/amd64/amd64/db_interface.c index b297616..287c236 100644 --- a/sys/amd64/amd64/db_interface.c +++ b/sys/amd64/amd64/db_interface.c @@ -139,7 +139,11 @@ void db_show_mdpcpu(struct pcpu *pc) { -#if 0 - db_printf("currentldt = 0x%x\n", pc->pc_currentldt); -#endif + db_printf("curpmap = %p\n", pc->pc_curpmap); + db_printf("tssp = %p\n", pc->pc_tssp); + db_printf("commontssp = %p\n", pc->pc_commontssp); + db_printf("rsp0 = 0x%lx\n", pc->pc_rsp0); + db_printf("gs32p = %p\n", pc->pc_gs32p); + db_printf("ldt = %p\n", pc->pc_ldt); + db_printf("tss = %p\n", pc->pc_tss); } diff --git a/sys/amd64/amd64/db_trace.c b/sys/amd64/amd64/db_trace.c index 9676963..9753b04 100644 --- a/sys/amd64/amd64/db_trace.c +++ b/sys/amd64/amd64/db_trace.c @@ -69,12 +69,10 @@ static db_varfcn_t db_ss; #define DB_OFFSET(x) (db_expr_t *)offsetof(struct trapframe, x) struct db_variable db_regs[] = { { "cs", DB_OFFSET(tf_cs), db_frame }, -#if 0 { "ds", DB_OFFSET(tf_ds), db_frame }, { "es", DB_OFFSET(tf_es), db_frame }, { "fs", DB_OFFSET(tf_fs), db_frame }, { "gs", DB_OFFSET(tf_gs), db_frame }, -#endif { "ss", NULL, db_ss }, { "rax", DB_OFFSET(tf_rax), db_frame }, { "rcx", DB_OFFSET(tf_rcx), db_frame }, @@ -94,7 +92,7 @@ struct db_variable db_regs[] = { { "r15", DB_OFFSET(tf_r15), db_frame }, { "rip", DB_OFFSET(tf_rip), db_frame }, { "rflags", DB_OFFSET(tf_rflags), db_frame }, -#define DB_N_SHOW_REGS 20 /* Don't show registers after here. */ +#define DB_N_SHOW_REGS 24 /* Don't show registers after here. */ { "dr0", NULL, db_dr0 }, { "dr1", NULL, db_dr1 }, { "dr2", NULL, db_dr2 }, @@ -355,7 +353,7 @@ db_nextframe(struct amd64_frame **fp, db_addr_t *ip, struct thread *td) rbp = tf->tf_rbp; switch (frame_type) { case TRAP: - db_printf("--- trap %#lr", tf->tf_trapno); + db_printf("--- trap %#r", tf->tf_trapno); break; case SYSCALL: db_printf("--- syscall"); diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index 1c098e4..356a415 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -42,6 +42,7 @@ #include #include #include +#include #include "assym.s" @@ -99,7 +100,7 @@ MCOUNT_LABEL(btrap) /* Traps that we leave interrupts disabled for.. */ #define TRAP_NOEN(a) \ subq $TF_RIP,%rsp; \ - movq $(a),TF_TRAPNO(%rsp) ; \ + movl $(a),TF_TRAPNO(%rsp) ; \ movq $0,TF_ADDR(%rsp) ; \ movq $0,TF_ERR(%rsp) ; \ jmp alltraps_noen @@ -111,7 +112,7 @@ IDTVEC(bpt) /* Regular traps; The cpu does not supply tf_err for these. */ #define TRAP(a) \ subq $TF_RIP,%rsp; \ - movq $(a),TF_TRAPNO(%rsp) ; \ + movl $(a),TF_TRAPNO(%rsp) ; \ movq $0,TF_ADDR(%rsp) ; \ movq $0,TF_ERR(%rsp) ; \ jmp alltraps @@ -139,7 +140,7 @@ IDTVEC(xmm) /* This group of traps have tf_err already pushed by the cpu */ #define TRAP_ERR(a) \ subq $TF_ERR,%rsp; \ - movq $(a),TF_TRAPNO(%rsp) ; \ + movl $(a),TF_TRAPNO(%rsp) ; \ movq $0,TF_ADDR(%rsp) ; \ jmp alltraps IDTVEC(tss) @@ -185,6 +186,11 @@ alltraps_pushregs_no_rdi: movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) + movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) + movl $TF_HASSEGS,TF_FLAGS(%rsp) FAKE_MCOUNT(TF_RIP(%rsp)) #ifdef KDTRACE_HOOKS /* @@ -193,7 +199,7 @@ alltraps_pushregs_no_rdi: * interrupt. For all other trap types, just handle them in * the usual way. */ - cmpq $T_BPTFLT,TF_TRAPNO(%rsp) + cmpl $T_BPTFLT,TF_TRAPNO(%rsp) jne calltrap /* Check if there is no DTrace hook registered. */ @@ -234,7 +240,7 @@ alltraps_noen: IDTVEC(dblfault) subq $TF_ERR,%rsp - movq $T_DOUBLEFLT,TF_TRAPNO(%rsp) + movl $T_DOUBLEFLT,TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) movq $0,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) @@ -252,6 +258,11 @@ IDTVEC(dblfault) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) + movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) + movl $TF_HASSEGS,TF_FLAGS(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* already running with kernel GS.base */ swapgs @@ -262,7 +273,7 @@ IDTVEC(dblfault) IDTVEC(page) subq $TF_ERR,%rsp - movq $T_PAGEFLT,TF_TRAPNO(%rsp) + movl $T_PAGEFLT,TF_TRAPNO(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* already running with kernel GS.base */ swapgs @@ -283,18 +294,16 @@ IDTVEC(page) */ IDTVEC(prot) subq $TF_ERR,%rsp - movq $T_PROTFLT,TF_TRAPNO(%rsp) + movl $T_PROTFLT,TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) movq %rdi,TF_RDI(%rsp) /* free up a GP register */ leaq doreti_iret(%rip),%rdi cmpq %rdi,TF_RIP(%rsp) - je 2f /* kernel but with user gsbase!! */ + je 1f /* kernel but with user gsbase!! */ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ - jz 1f /* already running with kernel GS.base */ -2: - swapgs -1: - testl $PSL_I,TF_RFLAGS(%rsp) + jz 2f /* already running with kernel GS.base */ +1: swapgs +2: testl $PSL_I,TF_RFLAGS(%rsp) jz alltraps_pushregs_no_rdi sti jmp alltraps_pushregs_no_rdi @@ -333,40 +342,15 @@ IDTVEC(fast_syscall) movq %r13,TF_R13(%rsp) /* C preserved */ movq %r14,TF_R14(%rsp) /* C preserved */ movq %r15,TF_R15(%rsp) /* C preserved */ + movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) + movw $TF_HASSEGS,TF_FLAGS(%rsp) FAKE_MCOUNT(TF_RIP(%rsp)) movq %rsp, %rdi call syscall movq PCPU(CURPCB),%rax - testq $PCB_FULLCTX,PCB_FLAGS(%rax) - jne 3f -1: /* Check for and handle AST's on return to userland */ - cli - movq PCPU(CURTHREAD),%rax - testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) - je 2f - sti - movq %rsp, %rdi - call ast - jmp 1b -2: /* restore preserved registers */ - MEXITCOUNT - movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ - movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */ - movq TF_RDX(%rsp),%rdx /* return value 2 */ - movq TF_RAX(%rsp),%rax /* return value 1 */ - movq TF_RBX(%rsp),%rbx /* C preserved */ - movq TF_RBP(%rsp),%rbp /* C preserved */ - movq TF_R12(%rsp),%r12 /* C preserved */ - movq TF_R13(%rsp),%r13 /* C preserved */ - movq TF_R14(%rsp),%r14 /* C preserved */ - movq TF_R15(%rsp),%r15 /* C preserved */ - movq TF_RFLAGS(%rsp),%r11 /* original %rflags */ - movq TF_RIP(%rsp),%rcx /* original %rip */ - movq TF_RSP(%rsp),%r9 /* user stack pointer */ - movq %r9,%rsp /* original %rsp */ - swapgs - sysretq -3: /* Requested full context restore, use doreti for that */ andq $~PCB_FULLCTX,PCB_FLAGS(%rax) MEXITCOUNT jmp doreti @@ -403,7 +387,7 @@ IDTVEC(fast_syscall32) IDTVEC(nmi) subq $TF_RIP,%rsp - movq $(T_NMI),TF_TRAPNO(%rsp) + movl $(T_NMI),TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) movq $0,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) @@ -421,6 +405,11 @@ IDTVEC(nmi) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) + movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) + movl $TF_HASSEGS,TF_FLAGS(%rsp) xorl %ebx,%ebx testb $SEL_RPL_MASK,TF_CS(%rsp) jnz nmi_needswapgs /* we came from userland */ @@ -606,6 +595,44 @@ doreti_ast: */ doreti_exit: MEXITCOUNT + movq PCPU(CURTHREAD),%r8 + movq TD_PCB(%r8),%r8 + + testl $TF_HASSEGS,TF_FLAGS(%rsp) + je set_segs + +do_segs: + /* Restore %fs and fsbase */ + movw TF_FS(%rsp),%ax + .globl ld_fs +ld_fs: movw %ax,%fs + cmpw $KUF32SEL,%ax + jne 1f + movl $MSR_FSBASE,%ecx + movl PCB_FSBASE(%r8),%eax + movl PCB_FSBASE+4(%r8),%edx + wrmsr +1: + /* Restore %gs and gsbase */ + movw TF_GS(%rsp),%si + pushfq + cli + movl $MSR_GSBASE,%ecx + rdmsr + .globl ld_gs +ld_gs: movw %si,%gs + wrmsr + popfq + cmpw $KUG32SEL,%si + jne 1f + movl $MSR_KGSBASE,%ecx + movl PCB_GSBASE(%r8),%eax + movl PCB_GSBASE+4(%r8),%edx + wrmsr +1: .globl ld_es +ld_es: movw TF_ES(%rsp),%es + .globl ld_ds +ld_ds: movw TF_DS(%rsp),%ds movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi movq TF_RDX(%rsp),%rdx @@ -630,6 +657,14 @@ doreti_exit: doreti_iret: iretq +set_segs: + movw $KUDSEL,%ax + movw %ax,TF_DS(%rsp) + movw %ax,TF_ES(%rsp) + movw $KUF32SEL,TF_FS(%rsp) + movw $KUG32SEL,TF_GS(%rsp) + jmp do_segs + /* * doreti_iret_fault. Alternative return code for * the case where we get a fault in the doreti_exit code @@ -644,7 +679,12 @@ doreti_iret_fault: testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti -1: movq %rdi,TF_RDI(%rsp) +1: movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) + movl $TF_HASSEGS,TF_FLAGS(%rsp) + movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) @@ -659,11 +699,48 @@ doreti_iret_fault: movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) - movq $T_PROTFLT,TF_TRAPNO(%rsp) + movl $T_PROTFLT,TF_TRAPNO(%rsp) movq $0,TF_ERR(%rsp) /* XXX should be the error code */ movq $0,TF_ADDR(%rsp) FAKE_MCOUNT(TF_RIP(%rsp)) jmp calltrap + + ALIGN_TEXT + .globl ds_load_fault +ds_load_fault: + movl $T_PROTFLT,TF_TRAPNO(%rsp) + movzwl TF_DS(%rsp),%edx + movl %edx,TF_ERR(%rsp) + movw $KUDSEL,TF_DS(%rsp) + jmp calltrap + + ALIGN_TEXT + .globl es_load_fault +es_load_fault: + movl $T_PROTFLT,TF_TRAPNO(%rsp) + movzwl TF_ES(%rsp),%edx + movl %edx,TF_ERR(%rsp) + movw $KUDSEL,TF_ES(%rsp) + jmp calltrap + + ALIGN_TEXT + .globl fs_load_fault +fs_load_fault: + movl $T_PROTFLT,TF_TRAPNO(%rsp) + movzwl TF_FS(%rsp),%edx + movl %edx,TF_ERR(%rsp) + movw $KUF32SEL,TF_FS(%rsp) + jmp calltrap + + ALIGN_TEXT + .globl gs_load_fault +gs_load_fault: + popfq + movl $T_PROTFLT,TF_TRAPNO(%rsp) + movzwl TF_GS(%rsp),%edx + movl %edx,TF_ERR(%rsp) + movw $KUG32SEL,TF_GS(%rsp) + jmp calltrap #ifdef HWPMC_HOOKS ENTRY(end_exceptions) #endif diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 1924be7..25cc243 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -79,6 +79,10 @@ ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active)); +ASSYM(P_MD, offsetof(struct proc, p_md)); +ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt)); +ASSYM(MD_LDT_SD, offsetof(struct mdproc, md_ldt_sd)); + ASSYM(TD_LOCK, offsetof(struct thread, td_lock)); ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); @@ -132,16 +136,13 @@ ASSYM(PCB_RBX, offsetof(struct pcb, pcb_rbx)); ASSYM(PCB_RIP, offsetof(struct pcb, pcb_rip)); ASSYM(PCB_FSBASE, offsetof(struct pcb, pcb_fsbase)); ASSYM(PCB_GSBASE, offsetof(struct pcb, pcb_gsbase)); -ASSYM(PCB_DS, offsetof(struct pcb, pcb_ds)); -ASSYM(PCB_ES, offsetof(struct pcb, pcb_es)); -ASSYM(PCB_FS, offsetof(struct pcb, pcb_fs)); -ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs)); ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0)); ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1)); ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2)); ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3)); ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6)); ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7)); +ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp)); ASSYM(PCB_DBREGS, PCB_DBREGS); ASSYM(PCB_32BIT, PCB_32BIT); ASSYM(PCB_GS32BIT, PCB_GS32BIT); @@ -180,7 +181,13 @@ ASSYM(TF_CS, offsetof(struct trapframe, tf_cs)); ASSYM(TF_RFLAGS, offsetof(struct trapframe, tf_rflags)); ASSYM(TF_RSP, offsetof(struct trapframe, tf_rsp)); ASSYM(TF_SS, offsetof(struct trapframe, tf_ss)); +ASSYM(TF_DS, offsetof(struct trapframe, tf_ds)); +ASSYM(TF_ES, offsetof(struct trapframe, tf_es)); +ASSYM(TF_FS, offsetof(struct trapframe, tf_fs)); +ASSYM(TF_GS, offsetof(struct trapframe, tf_gs)); +ASSYM(TF_FLAGS, offsetof(struct trapframe, tf_flags)); ASSYM(TF_SIZE, sizeof(struct trapframe)); +ASSYM(TF_HASSEGS, TF_HASSEGS); ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler)); ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc)); @@ -202,7 +209,11 @@ ASSYM(PC_SCRATCH_RSP, offsetof(struct pcpu, pc_scratch_rsp)); ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap)); ASSYM(PC_TSSP, offsetof(struct pcpu, pc_tssp)); ASSYM(PC_RSP0, offsetof(struct pcpu, pc_rsp0)); +ASSYM(PC_FS32P, offsetof(struct pcpu, pc_fs32p)); ASSYM(PC_GS32P, offsetof(struct pcpu, pc_gs32p)); +ASSYM(PC_LDT, offsetof(struct pcpu, pc_ldt)); +ASSYM(PC_COMMONTSSP, offsetof(struct pcpu, pc_commontssp)); +ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss)); ASSYM(LA_VER, offsetof(struct LAPIC, version)); ASSYM(LA_TPR, offsetof(struct LAPIC, tpr)); @@ -217,6 +228,10 @@ ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL)); ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL)); ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL)); ASSYM(KUC32SEL, GSEL(GUCODE32_SEL, SEL_UPL)); +ASSYM(KUF32SEL, GSEL(GUFS32_SEL, SEL_UPL)); +ASSYM(KUG32SEL, GSEL(GUGS32_SEL, SEL_UPL)); +ASSYM(TSSSEL, GSEL(GPROC0_SEL, SEL_KPL)); +ASSYM(LDTSEL, GSEL(GUSERLDT_SEL, SEL_KPL)); ASSYM(SEL_RPL_MASK, SEL_RPL_MASK); ASSYM(MSR_GSBASE, MSR_GSBASE); diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 988b039..9aac8ec 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -159,7 +159,7 @@ extern vm_offset_t ksym_start, ksym_end; #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 -int _udatasel, _ucodesel, _ucode32sel; +int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; int cold = 1; @@ -192,6 +192,8 @@ struct mtx icu_lock; struct mem_range_softc mem_range_softc; +struct mtx dt_lock; /* lock for GDT and LDT */ + static void cpu_startup(dummy) void *dummy; @@ -267,7 +269,7 @@ cpu_startup(dummy) * Send an interrupt to process. * * Stack is set up to allow sigcode stored - * at top to call routine, followed by kcall + * at top to call routine, followed by call * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user @@ -305,6 +307,8 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); + sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase; + sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase; /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && @@ -359,6 +363,11 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) regs->tf_rip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; + regs->tf_ds = _udatasel; + regs->tf_es = _udatasel; + regs->tf_fs = _ufssel; + regs->tf_gs = _ugssel; + regs->tf_flags = TF_HASSEGS; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } @@ -393,6 +402,8 @@ sigreturn(td, uap) if (error != 0) return (error); ucp = &uc; + if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) + return (EINVAL); regs = td->td_frame; rflags = ucp->uc_mcontext.mc_rflags; /* @@ -434,6 +445,8 @@ sigreturn(td, uap) if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); + td->td_pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase; + td->td_pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase; PROC_LOCK(p); #if defined(COMPAT_43) @@ -727,22 +740,16 @@ exec_setregs(td, entry, stack, ps_strings) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; + + mtx_lock(&dt_lock); + if (td->td_proc->p_md.md_ldt != NULL) + user_ldt_free(td); + else + mtx_unlock(&dt_lock); - critical_enter(); - wrmsr(MSR_FSBASE, 0); - wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; - critical_exit(); pcb->pcb_flags &= ~(PCB_32BIT | PCB_GS32BIT); - load_ds(_udatasel); - load_es(_udatasel); - load_fs(_udatasel); - load_gs(_udatasel); - pcb->pcb_ds = _udatasel; - pcb->pcb_es = _udatasel; - pcb->pcb_fs = _udatasel; - pcb->pcb_gs = _udatasel; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = entry; @@ -751,6 +758,11 @@ exec_setregs(td, entry, stack, ps_strings) regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; + regs->tf_ds = _udatasel; + regs->tf_es = _udatasel; + regs->tf_fs = _ufssel; + regs->tf_gs = _ugssel; + regs->tf_flags = TF_HASSEGS; /* * Reset the hardware debug registers if they were in use. @@ -814,87 +826,114 @@ struct amd64tss common_tss[MAXCPU]; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ -{ 0x0, /* segment base address */ - 0x0, /* length */ - 0, /* segment type */ - 0, /* segment descriptor priority level */ - 0, /* segment descriptor present */ - 0, /* long */ - 0, /* default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, +{ .ssd_base = 0x0, + .ssd_limit = 0x0, + .ssd_type = 0, + .ssd_dpl = 0, + .ssd_p = 0, + .ssd_long = 0, + .ssd_def32 = 0, + .ssd_gran = 0 }, /* GCODE_SEL 1 Code Descriptor for kernel */ -{ 0x0, /* segment base address */ - 0xfffff, /* length - all address space */ - SDT_MEMERA, /* segment type */ - SEL_KPL, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 1, /* long */ - 0, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, +{ .ssd_base = 0x0, + .ssd_limit = 0xfffff, + .ssd_type = SDT_MEMERA, + .ssd_dpl = SEL_KPL, + .ssd_p = 1, + .ssd_long = 1, + .ssd_def32 = 0, + .ssd_gran = 1 }, /* GDATA_SEL 2 Data Descriptor for kernel */ -{ 0x0, /* segment base address */ - 0xfffff, /* length - all address space */ - SDT_MEMRWA, /* segment type */ - SEL_KPL, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 1, /* long */ - 0, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, +{ .ssd_base = 0x0, + .ssd_limit = 0xfffff, + .ssd_type = SDT_MEMRWA, + .ssd_dpl = SEL_KPL, + .ssd_p = 1, + .ssd_long = 1, + .ssd_def32 = 0, + .ssd_gran = 1 }, /* GUCODE32_SEL 3 32 bit Code Descriptor for user */ -{ 0x0, /* segment base address */ - 0xfffff, /* length - all address space */ - SDT_MEMERA, /* segment type */ - SEL_UPL, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, /* long */ - 1, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, +{ .ssd_base = 0x0, + .ssd_limit = 0xfffff, + .ssd_type = SDT_MEMERA, + .ssd_dpl = SEL_UPL, + .ssd_p = 1, + .ssd_long = 0, + .ssd_def32 = 1, + .ssd_gran = 1 }, /* GUDATA_SEL 4 32/64 bit Data Descriptor for user */ -{ 0x0, /* segment base address */ - 0xfffff, /* length - all address space */ - SDT_MEMRWA, /* segment type */ - SEL_UPL, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, /* long */ - 1, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, +{ .ssd_base = 0x0, + .ssd_limit = 0xfffff, + .ssd_type = SDT_MEMRWA, + .ssd_dpl = SEL_UPL, + .ssd_p = 1, + .ssd_long = 0, + .ssd_def32 = 1, + .ssd_gran = 1 }, /* GUCODE_SEL 5 64 bit Code Descriptor for user */ -{ 0x0, /* segment base address */ - 0xfffff, /* length - all address space */ - SDT_MEMERA, /* segment type */ - SEL_UPL, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 1, /* long */ - 0, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, +{ .ssd_base = 0x0, + .ssd_limit = 0xfffff, + .ssd_type = SDT_MEMERA, + .ssd_dpl = SEL_UPL, + .ssd_p = 1, + .ssd_long = 1, + .ssd_def32 = 0, + .ssd_gran = 1 }, /* GPROC0_SEL 6 Proc 0 Tss Descriptor */ { - 0x0, /* segment base address */ - sizeof(struct amd64tss)-1,/* length */ - SDT_SYSTSS, /* segment type */ - SEL_KPL, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, /* long */ - 0, /* unused - default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, + .ssd_base = 0x0, + .ssd_limit = sizeof(struct amd64tss) + IOPAGES * PAGE_SIZE - 1, + .ssd_type = SDT_SYSTSS, + .ssd_dpl = SEL_KPL, + .ssd_p = 1, + .ssd_long = 0, + .ssd_def32 = 0, + .ssd_gran = 0 }, /* Actually, the TSS is a system descriptor which is double size */ -{ 0x0, /* segment base address */ - 0x0, /* length */ - 0, /* segment type */ - 0, /* segment descriptor priority level */ - 0, /* segment descriptor present */ - 0, /* long */ - 0, /* default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, +{ .ssd_base = 0x0, + .ssd_limit = 0x0, + .ssd_type = 0, + .ssd_dpl = 0, + .ssd_p = 0, + .ssd_long = 0, + .ssd_def32 = 0, + .ssd_gran = 0 }, /* GUGS32_SEL 8 32 bit GS Descriptor for user */ -{ 0x0, /* segment base address */ - 0xfffff, /* length - all address space */ - SDT_MEMRWA, /* segment type */ - SEL_UPL, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, /* long */ - 1, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, +{ .ssd_base = 0x0, + .ssd_limit = 0xfffff, + .ssd_type = SDT_MEMRWA, + .ssd_dpl = SEL_UPL, + .ssd_p = 1, + .ssd_long = 0, + .ssd_def32 = 1, + .ssd_gran = 1 }, +/* GUFS32_SEL 9 32 bit FS Descriptor for user */ +{ .ssd_base = 0x0, + .ssd_limit = 0xfffff, + .ssd_type = SDT_MEMRWA, + .ssd_dpl = SEL_UPL, + .ssd_p = 1, + .ssd_long = 0, + .ssd_def32 = 1, + .ssd_gran = 1 }, +/* GUSERLDT_SEL 10 LDT Descriptor */ +{ .ssd_base = 0x0, + .ssd_limit = 0x0, + .ssd_type = 0, + .ssd_dpl = 0, + .ssd_p = 0, + .ssd_long = 0, + .ssd_def32 = 0, + .ssd_gran = 0 }, +/* GUSERLDT_SEL 11 LDT Descriptor, double size */ +{ .ssd_base = 0x0, + .ssd_limit = 0x0, + .ssd_type = 0, + .ssd_dpl = 0, + .ssd_p = 0, + .ssd_long = 0, + .ssd_def32 = 0, + .ssd_gran = 0 }, }; void @@ -1324,12 +1363,12 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) /* * make gdt memory segments */ - gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; - for (x = 0; x < NGDT; x++) { - if (x != GPROC0_SEL && x != (GPROC0_SEL + 1)) + if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && + x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) ssdtosd(&gdt_segs[x], &gdt[x]); } + gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); @@ -1347,6 +1386,10 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); PCPU_SET(tssp, &common_tss[0]); + PCPU_SET(commontssp, &common_tss[0]); + PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); + PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); + PCPU_SET(fs32p, &gdt[GUFS32_SEL]); PCPU_SET(gs32p, &gdt[GUGS32_SEL]); /* @@ -1359,6 +1402,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); + mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); /* exceptions */ for (x = 0; x < NIDT; x++) @@ -1439,7 +1483,8 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; /* Set the IO permission bitmap (empty due to tss seg limit) */ - common_tss[0].tss_iobase = sizeof(struct amd64tss); + common_tss[0].tss_iobase = sizeof(struct amd64tss) + + IOPAGES * PAGE_SIZE; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); @@ -1467,10 +1512,12 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); + _ufssel = GSEL(GUFS32_SEL, SEL_UPL); + _ugssel = GSEL(GUGS32_SEL, SEL_UPL); load_ds(_udatasel); load_es(_udatasel); - load_fs(_udatasel); + load_fs(_ufssel); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; @@ -1736,8 +1783,15 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int flags) mcp->mc_cs = tp->tf_cs; mcp->mc_rsp = tp->tf_rsp; mcp->mc_ss = tp->tf_ss; + mcp->mc_ds = tp->tf_ds; + mcp->mc_es = tp->tf_es; + mcp->mc_fs = tp->tf_fs; + mcp->mc_gs = tp->tf_gs; + mcp->mc_flags = tp->tf_flags; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); + mcp->mc_fsbase = td->td_pcb->pcb_fsbase; + mcp->mc_gsbase = td->td_pcb->pcb_gsbase; return (0); } @@ -1755,7 +1809,8 @@ set_mcontext(struct thread *td, const mcontext_t *mcp) int ret; tp = td->td_frame; - if (mcp->mc_len != sizeof(*mcp)) + if (mcp->mc_len != sizeof(*mcp) || + (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) return (EINVAL); rflags = (mcp->mc_rflags & PSL_USERCHANGE) | (tp->tf_rflags & ~PSL_USERCHANGE); @@ -1781,6 +1836,17 @@ set_mcontext(struct thread *td, const mcontext_t *mcp) tp->tf_rflags = rflags; tp->tf_rsp = mcp->mc_rsp; tp->tf_ss = mcp->mc_ss; + tp->tf_flags = mcp->mc_flags; + if (tp->tf_flags & TF_HASSEGS) { + tp->tf_ds = mcp->mc_ds; + tp->tf_es = mcp->mc_es; + tp->tf_fs = mcp->mc_fs; + tp->tf_gs = mcp->mc_gs; + } + if (mcp->mc_flags & _MC_HASBASES) { + td->td_pcb->pcb_fsbase = mcp->mc_fsbase; + td->td_pcb->pcb_gsbase = mcp->mc_gsbase; + } td->td_pcb->pcb_flags |= PCB_FULLCTX; return (0); } diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 6578b20..1bf223f 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -99,8 +99,6 @@ extern pt_entry_t *KPTphys; /* SMP page table page */ extern pt_entry_t *SMPpt; -extern int _udatasel; - struct pcb stoppcbs[MAXCPU]; /* Variables needed for SMP tlb shootdown. */ @@ -445,17 +443,19 @@ init_secondary(void) /* Init tss */ common_tss[cpu] = common_tss[0]; common_tss[cpu].tss_rsp0 = 0; /* not used until after switch */ - common_tss[cpu].tss_iobase = sizeof(struct amd64tss); + common_tss[cpu].tss_iobase = sizeof(struct amd64tss) + + IOPAGES * PAGE_SIZE; common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE]; /* Prepare private GDT */ gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; - ssdtosyssd(&gdt_segs[GPROC0_SEL], - (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]); for (x = 0; x < NGDT; x++) { - if (x != GPROC0_SEL && x != (GPROC0_SEL + 1)) + if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && + x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1)) ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]); } + ssdtosyssd(&gdt_segs[GPROC0_SEL], + (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]); ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; ap_gdt.rd_base = (long) &gdt[NGDT * cpu]; lgdt(&ap_gdt); /* does magic intra-segment return */ @@ -469,8 +469,14 @@ init_secondary(void) pc->pc_prvspace = pc; pc->pc_curthread = 0; pc->pc_tssp = &common_tss[cpu]; + pc->pc_commontssp = &common_tss[cpu]; pc->pc_rsp0 = 0; + pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu + + GPROC0_SEL]; + pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL]; pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL]; + pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu + + GUSERLDT_SEL]; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); @@ -576,7 +582,7 @@ init_secondary(void) load_cr4(rcr4() | CR4_PGE); load_ds(_udatasel); load_es(_udatasel); - load_fs(_udatasel); + load_fs(_ufssel); mtx_unlock_spin(&ap_boot_mtx); /* wait until all the AP's are up */ diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c index 7f022d0..2fdf39c 100644 --- a/sys/amd64/amd64/sys_machdep.c +++ b/sys/amd64/amd64/sys_machdep.c @@ -36,16 +36,39 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include +#include #include #include -#include -#include -#include +#include #include #include +#include /* for kernel_map */ +#include + +#include +#include +#include +#include +#include +#include #include +#include + +int max_ldt_segment = 1024; +#define LD_PER_PAGE 512 +#define NULL_LDT_BASE ((caddr_t)NULL) + +#ifdef notyet +#ifdef SMP +static void set_user_ldt_rv(struct vmspace *vmsp); +#endif +#endif +static void user_ldt_derefl(struct proc_ldt *pldt); + #ifndef _SYS_SYSPROTO_H_ struct sysarch_args { int op; @@ -54,6 +77,55 @@ struct sysarch_args { #endif int +sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space) +{ + struct i386_ldt_args *largs, la; + struct user_segment_descriptor *lp; + int error = 0; + + /* + * XXXKIB check that the BSM generation code knows to encode + * the op argument. + */ + AUDIT_ARG(cmd, uap->op); + if (uap_space == UIO_USERSPACE) { + error = copyin(uap->parms, &la, sizeof(struct i386_ldt_args)); + if (error != 0) + return (error); + largs = &la; + } else + largs = (struct i386_ldt_args *)uap->parms; + if (largs->num > max_ldt_segment || largs->num <= 0) + return (EINVAL); + + switch (uap->op) { + case I386_GET_LDT: + error = amd64_get_ldt(td, largs); + break; + case I386_SET_LDT: + if (largs->descs != NULL) { + lp = (struct user_segment_descriptor *) + kmem_alloc(kernel_map, largs->num * + sizeof(struct user_segment_descriptor)); + if (lp == NULL) { + error = ENOMEM; + break; + } + error = copyin(largs->descs, lp, largs->num * + sizeof(struct user_segment_descriptor)); + if (error == 0) + error = amd64_set_ldt(td, largs, lp); + kmem_free(kernel_map, (vm_offset_t)lp, largs->num * + sizeof(struct user_segment_descriptor)); + } else { + error = amd64_set_ldt(td, largs, NULL); + } + break; + } + return (error); +} + +int sysarch(td, uap) struct thread *td; register struct sysarch_args *uap; @@ -62,8 +134,36 @@ sysarch(td, uap) struct pcb *pcb = curthread->td_pcb; uint32_t i386base; uint64_t a64base; + struct i386_ioperm_args iargs; - switch(uap->op) { + if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT) + return (sysarch_ldt(td, uap, UIO_USERSPACE)); + /* + * XXXKIB check that the BSM generation code knows to encode + * the op argument. + */ + AUDIT_ARG(cmd, uap->op); + switch (uap->op) { + case I386_GET_IOPERM: + case I386_SET_IOPERM: + if ((error = copyin(uap->parms, &iargs, + sizeof(struct i386_ioperm_args))) != 0) + return (error); + break; + default: + break; + } + + switch (uap->op) { + case I386_GET_IOPERM: + error = amd64_get_ioperm(td, &iargs); + if (error == 0) + error = copyout(&iargs, uap->parms, + sizeof(struct i386_ioperm_args)); + break; + case I386_SET_IOPERM: + error = amd64_set_ioperm(td, &iargs); + break; case I386_GET_FSBASE: i386base = pcb->pcb_fsbase; error = copyout(&i386base, uap->parms, sizeof(i386base)); @@ -71,10 +171,8 @@ sysarch(td, uap) case I386_SET_FSBASE: error = copyin(uap->parms, &i386base, sizeof(i386base)); if (!error) { - critical_enter(); - wrmsr(MSR_FSBASE, i386base); pcb->pcb_fsbase = i386base; - critical_exit(); + td->td_frame->tf_fs = _ufssel; } break; case I386_GET_GSBASE: @@ -84,10 +182,8 @@ sysarch(td, uap) case I386_SET_GSBASE: error = copyin(uap->parms, &i386base, sizeof(i386base)); if (!error) { - critical_enter(); - wrmsr(MSR_KGSBASE, i386base); pcb->pcb_gsbase = i386base; - critical_exit(); + td->td_frame->tf_gs = _ugssel; } break; case AMD64_GET_FSBASE: @@ -98,13 +194,10 @@ sysarch(td, uap) error = copyin(uap->parms, &a64base, sizeof(a64base)); if (!error) { if (a64base < VM_MAXUSER_ADDRESS) { - critical_enter(); - wrmsr(MSR_FSBASE, a64base); pcb->pcb_fsbase = a64base; - critical_exit(); - } else { + td->td_frame->tf_fs = _ufssel; + } else error = EINVAL; - } } break; @@ -116,13 +209,10 @@ sysarch(td, uap) error = copyin(uap->parms, &a64base, sizeof(a64base)); if (!error) { if (a64base < VM_MAXUSER_ADDRESS) { - critical_enter(); - wrmsr(MSR_KGSBASE, a64base); pcb->pcb_gsbase = a64base; - critical_exit(); - } else { + td->td_frame->tf_gs = _ugssel; + } else error = EINVAL; - } } break; @@ -132,3 +222,424 @@ sysarch(td, uap) } return (error); } + +int +amd64_set_ioperm(td, uap) + struct thread *td; + struct i386_ioperm_args *uap; +{ + int i, error; + char *iomap; + struct amd64tss *tssp; + struct system_segment_descriptor *tss_sd; + u_long *addr; + struct pcb *pcb; + + if ((error = priv_check(td, PRIV_IO)) != 0) + return (error); + if ((error = securelevel_gt(td->td_ucred, 0)) != 0) + return (error); + if (uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY) + return (EINVAL); + + /* + * XXX + * While this is restricted to root, we should probably figure out + * whether any other driver is using this i/o address, as so not to + * cause confusion. This probably requires a global 'usage registry'. + */ + pcb = td->td_pcb; + if (pcb->pcb_tssp == NULL) { + tssp = (struct amd64tss *)kmem_alloc(kernel_map, + ctob(IOPAGES+1)); + if (tssp == NULL) + return (ENOMEM); + iomap = (char *)&tssp[1]; + addr = (u_long *)iomap; + for (i = 0; i < (ctob(IOPAGES) + 1) / sizeof(u_long); i++) + *addr++ = ~0; + critical_enter(); + /* Takes care of tss_rsp0. */ + memcpy(tssp, &common_tss[PCPU_GET(cpuid)], + sizeof(struct amd64tss)); + tssp->tss_iobase = sizeof(*tssp); + pcb->pcb_tssp = tssp; + tss_sd = PCPU_GET(tss); + tss_sd->sd_lobase = (u_long)tssp & 0xffffff; + tss_sd->sd_hibase = ((u_long)tssp >> 24) & 0xfffffffffful; + tss_sd->sd_type = SDT_SYSTSS; + ltr(GSEL(GPROC0_SEL, SEL_KPL)); + PCPU_SET(tssp, tssp); + critical_exit(); + } else + iomap = (char *)&pcb->pcb_tssp[1]; + for (i = uap->start; i < uap->start + uap->length; i++) { + if (uap->enable) + iomap[i >> 3] &= ~(1 << (i & 7)); + else + iomap[i >> 3] |= (1 << (i & 7)); + } + return (error); +} + +int +amd64_get_ioperm(td, uap) + struct thread *td; + struct i386_ioperm_args *uap; +{ + int i, state; + char *iomap; + + if (uap->start >= IOPAGES * PAGE_SIZE * NBBY) + return (EINVAL); + if (td->td_pcb->pcb_tssp == NULL) { + uap->length = 0; + goto done; + } + + iomap = (char *)&td->td_pcb->pcb_tssp[1]; + + i = uap->start; + state = (iomap[i >> 3] >> (i & 7)) & 1; + uap->enable = !state; + uap->length = 1; + + for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) { + if (state != ((iomap[i >> 3] >> (i & 7)) & 1)) + break; + uap->length++; + } + +done: + return (0); +} + +/* + * Update the GDT entry pointing to the LDT to point to the LDT of the + * current process. + */ +void +set_user_ldt(struct mdproc *mdp) +{ + + critical_enter(); + *PCPU_GET(ldt) = mdp->md_ldt_sd; + lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); + critical_exit(); +} + +#ifdef notyet +#ifdef SMP +static void +set_user_ldt_rv(struct vmspace *vmsp) +{ + struct thread *td; + + td = curthread; + if (vmsp != td->td_proc->p_vmspace) + return; + + set_user_ldt(&td->td_proc->p_md); +} +#endif +#endif + +struct proc_ldt * +user_ldt_alloc(struct proc *p, int force) +{ + struct proc_ldt *pldt, *new_ldt; + struct mdproc *mdp; + struct soft_segment_descriptor sldt; + + mtx_assert(&dt_lock, MA_OWNED); + mdp = &p->p_md; + if (!force && mdp->md_ldt != NULL) + return (mdp->md_ldt); + mtx_unlock(&dt_lock); + new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK); + new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, + max_ldt_segment * sizeof(struct user_segment_descriptor)); + if (new_ldt->ldt_base == NULL) { + FREE(new_ldt, M_SUBPROC); + mtx_lock(&dt_lock); + return (NULL); + } + new_ldt->ldt_refcnt = 1; + sldt.ssd_base = (uint64_t)new_ldt->ldt_base; + sldt.ssd_limit = max_ldt_segment * + sizeof(struct user_segment_descriptor) - 1; + sldt.ssd_type = SDT_SYSLDT; + sldt.ssd_dpl = SEL_KPL; + sldt.ssd_p = 1; + sldt.ssd_long = 0; + sldt.ssd_def32 = 0; + sldt.ssd_gran = 0; + mtx_lock(&dt_lock); + pldt = mdp->md_ldt; + if (pldt != NULL && !force) { + kmem_free(kernel_map, (vm_offset_t)new_ldt->ldt_base, + max_ldt_segment * sizeof(struct user_segment_descriptor)); + free(new_ldt, M_SUBPROC); + return (pldt); + } + + mdp->md_ldt = new_ldt; + if (pldt != NULL) { + bcopy(pldt->ldt_base, new_ldt->ldt_base, max_ldt_segment * + sizeof(struct user_segment_descriptor)); + user_ldt_derefl(pldt); + } + ssdtosyssd(&sldt, &p->p_md.md_ldt_sd); + if (p == curproc) + set_user_ldt(mdp); + + return (mdp->md_ldt); +} + +void +user_ldt_free(struct thread *td) +{ + struct proc *p = td->td_proc; + struct mdproc *mdp = &p->p_md; + struct proc_ldt *pldt; + + mtx_assert(&dt_lock, MA_OWNED); + if ((pldt = mdp->md_ldt) == NULL) { + mtx_unlock(&dt_lock); + return; + } + + mdp->md_ldt = NULL; + bzero(&mdp->md_ldt_sd, sizeof(mdp->md_ldt_sd)); + if (td == curthread) + lldt(GSEL(GNULL_SEL, SEL_KPL)); + user_ldt_deref(pldt); +} + +static void +user_ldt_derefl(struct proc_ldt *pldt) +{ + + if (--pldt->ldt_refcnt == 0) { + kmem_free(kernel_map, (vm_offset_t)pldt->ldt_base, + max_ldt_segment * sizeof(struct user_segment_descriptor)); + free(pldt, M_SUBPROC); + } +} + +void +user_ldt_deref(struct proc_ldt *pldt) +{ + + mtx_assert(&dt_lock, MA_OWNED); + user_ldt_derefl(pldt); + mtx_unlock(&dt_lock); +} + +/* + * Note for the authors of compat layers (linux, etc): copyout() in + * the function below is not a problem since it presents data in + * arch-specific format (i.e. i386-specific in this case), not in + * the OS-specific one. + */ +int +amd64_get_ldt(td, uap) + struct thread *td; + struct i386_ldt_args *uap; +{ + int error = 0; + struct proc_ldt *pldt; + int num; + struct user_segment_descriptor *lp; + +#ifdef DEBUG + printf("amd64_get_ldt: start=%d num=%d descs=%p\n", + uap->start, uap->num, (void *)uap->descs); +#endif + + if ((pldt = td->td_proc->p_md.md_ldt) != NULL) { + lp = &((struct user_segment_descriptor *)(pldt->ldt_base)) + [uap->start]; + num = min(uap->num, max_ldt_segment); + } else + return (EINVAL); + + if ((uap->start > (unsigned int)max_ldt_segment) || + ((unsigned int)num > (unsigned int)max_ldt_segment) || + ((unsigned int)(uap->start + num) > (unsigned int)max_ldt_segment)) + return(EINVAL); + + error = copyout(lp, uap->descs, num * + sizeof(struct user_segment_descriptor)); + if (!error) + td->td_retval[0] = num; + + return(error); +} + +int +amd64_set_ldt(td, uap, descs) + struct thread *td; + struct i386_ldt_args *uap; + struct user_segment_descriptor *descs; +{ + int error = 0, i; + int largest_ld; + struct mdproc *mdp = &td->td_proc->p_md; + struct proc_ldt *pldt; + struct user_segment_descriptor *dp; + struct proc *p; + +#ifdef DEBUG + printf("amd64_set_ldt: start=%d num=%d descs=%p\n", + uap->start, uap->num, (void *)uap->descs); +#endif + + p = td->td_proc; + if (descs == NULL) { + /* Free descriptors */ + if (uap->start == 0 && uap->num == 0) + uap->num = max_ldt_segment; + if (uap->num <= 0) + return (EINVAL); + if ((pldt = mdp->md_ldt) == NULL || + uap->start >= max_ldt_segment) + return (0); + largest_ld = uap->start + uap->num; + if (largest_ld > max_ldt_segment) + largest_ld = max_ldt_segment; + i = largest_ld - uap->start; + mtx_lock(&dt_lock); + bzero(&((struct user_segment_descriptor *)(pldt->ldt_base)) + [uap->start], sizeof(struct user_segment_descriptor) * i); + mtx_unlock(&dt_lock); + return (0); + } + + if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) { + /* verify range of descriptors to modify */ + largest_ld = uap->start + uap->num; + if (uap->start >= max_ldt_segment || + uap->num < 0 || largest_ld > max_ldt_segment) + return (EINVAL); + } + + /* Check descriptors for access violations */ + for (i = 0; i < uap->num; i++) { + dp = &descs[i]; + + switch (dp->sd_type) { + case SDT_SYSNULL: /* system null */ + dp->sd_p = 0; + break; + case SDT_SYS286TSS: + case SDT_SYSLDT: + case SDT_SYS286BSY: + case SDT_SYS286CGT: + case SDT_SYSTASKGT: + case SDT_SYS286IGT: + case SDT_SYS286TGT: + case SDT_SYSNULL2: + case SDT_SYSTSS: + case SDT_SYSNULL3: + case SDT_SYSBSY: + case SDT_SYSCGT: + case SDT_SYSNULL4: + case SDT_SYSIGT: + case SDT_SYSTGT: + /* I can't think of any reason to allow a user proc + * to create a segment of these types. They are + * for OS use only. + */ + return (EACCES); + /*NOTREACHED*/ + + /* memory segment types */ + case SDT_MEMEC: /* memory execute only conforming */ + case SDT_MEMEAC: /* memory execute only accessed conforming */ + case SDT_MEMERC: /* memory execute read conforming */ + case SDT_MEMERAC: /* memory execute read accessed conforming */ + /* Must be "present" if executable and conforming. */ + if (dp->sd_p == 0) + return (EACCES); + break; + case SDT_MEMRO: /* memory read only */ + case SDT_MEMROA: /* memory read only accessed */ + case SDT_MEMRW: /* memory read write */ + case SDT_MEMRWA: /* memory read write accessed */ + case SDT_MEMROD: /* memory read only expand dwn limit */ + case SDT_MEMRODA: /* memory read only expand dwn lim accessed */ + case SDT_MEMRWD: /* memory read write expand dwn limit */ + case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */ + case SDT_MEME: /* memory execute only */ + case SDT_MEMEA: /* memory execute only accessed */ + case SDT_MEMER: /* memory execute read */ + case SDT_MEMERA: /* memory execute read accessed */ + break; + default: + return(EINVAL); + /*NOTREACHED*/ + } + + /* Only user (ring-3) descriptors may be present. */ + if ((dp->sd_p != 0) && (dp->sd_dpl != SEL_UPL)) + return (EACCES); + } + + if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) { + /* Allocate a free slot */ + mtx_lock(&dt_lock); + pldt = user_ldt_alloc(p, 0); + if (pldt == NULL) { + mtx_unlock(&dt_lock); + return (ENOMEM); + } + + /* + * start scanning a bit up to leave room for NVidia and + * Wine, which still user the "Blat" method of allocation. + */ + i = 16; + dp = &((struct user_segment_descriptor *)(pldt->ldt_base))[i]; + for (; i < max_ldt_segment; ++i, ++dp) { + if (dp->sd_type == SDT_SYSNULL) + break; + } + if (i >= max_ldt_segment) { + mtx_unlock(&dt_lock); + return (ENOSPC); + } + uap->start = i; + error = amd64_set_ldt_data(td, i, 1, descs); + mtx_unlock(&dt_lock); + } else { + largest_ld = uap->start + uap->num; + if (largest_ld > max_ldt_segment) + return (EINVAL); + mtx_lock(&dt_lock); + if (user_ldt_alloc(p, 0) != NULL) { + error = amd64_set_ldt_data(td, uap->start, uap->num, + descs); + } + mtx_unlock(&dt_lock); + } + if (error == 0) + td->td_retval[0] = uap->start; + return (error); +} + +int +amd64_set_ldt_data(struct thread *td, int start, int num, + struct user_segment_descriptor *descs) +{ + struct mdproc *mdp = &td->td_proc->p_md; + struct proc_ldt *pldt = mdp->md_ldt; + + mtx_assert(&dt_lock, MA_OWNED); + + /* Fill in range */ + bcopy(descs, + &((struct user_segment_descriptor *)(pldt->ldt_base))[start], + num * sizeof(struct user_segment_descriptor)); + return (0); +} diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 8d710cd..f36effd 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -171,6 +171,52 @@ SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW, extern char *syscallnames[]; +/* #define DEBUG */ +#ifdef DEBUG +static void +report_seg_fault(const char *segn, struct trapframe *frame) +{ + struct proc_ldt *pldt; + struct trapframe *pf; + + pldt = curproc->p_md.md_ldt; + printf("%d: %s load fault %lx %p %d\n", + curproc->p_pid, segn, frame->tf_err, + pldt != NULL ? pldt->ldt_base : NULL, + pldt != NULL ? pldt->ldt_refcnt : 0); + kdb_backtrace(); + pf = (struct trapframe *)frame->tf_rsp; + printf("rdi %lx\n", pf->tf_rdi); + printf("rsi %lx\n", pf->tf_rsi); + printf("rdx %lx\n", pf->tf_rdx); + printf("rcx %lx\n", pf->tf_rcx); + printf("r8 %lx\n", pf->tf_r8); + printf("r9 %lx\n", pf->tf_r9); + printf("rax %lx\n", pf->tf_rax); + printf("rbx %lx\n", pf->tf_rbx); + printf("rbp %lx\n", pf->tf_rbp); + printf("r10 %lx\n", pf->tf_r10); + printf("r11 %lx\n", pf->tf_r11); + printf("r12 %lx\n", pf->tf_r12); + printf("r13 %lx\n", pf->tf_r13); + printf("r14 %lx\n", pf->tf_r14); + printf("r15 %lx\n", pf->tf_r15); + printf("fs %lx\n", pf->tf_fs); + printf("gs %lx\n", pf->tf_gs); + printf("es %lx\n", pf->tf_es); + printf("ds %lx\n", pf->tf_ds); + printf("tno %lx\n", pf->tf_trapno); + printf("adr %lx\n", pf->tf_addr); + printf("flg %lx\n", pf->tf_flags); + printf("err %lx\n", pf->tf_err); + printf("rip %lx\n", pf->tf_rip); + printf("cs %lx\n", pf->tf_cs); + printf("rfl %lx\n", pf->tf_rflags); + printf("rsp %lx\n", pf->tf_rsp); + printf("ss %lx\n", pf->tf_ss); +} +#endif + /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry @@ -478,6 +524,38 @@ trap(struct trapframe *frame) frame->tf_rip = (long)doreti_iret_fault; goto out; } + if (frame->tf_rip == (long)ld_ds) { +#ifdef DEBUG + report_seg_fault("ds", frame); +#endif + frame->tf_rip = (long)ds_load_fault; + frame->tf_ds = _udatasel; + goto out; + } + if (frame->tf_rip == (long)ld_es) { +#ifdef DEBUG + report_seg_fault("es", frame); +#endif + frame->tf_rip = (long)es_load_fault; + frame->tf_es = _udatasel; + goto out; + } + if (frame->tf_rip == (long)ld_fs) { +#ifdef DEBUG + report_seg_fault("fs", frame); +#endif + frame->tf_rip = (long)fs_load_fault; + frame->tf_fs = _ufssel; + goto out; + } + if (frame->tf_rip == (long)ld_gs) { +#ifdef DEBUG + report_seg_fault("gs", frame); +#endif + frame->tf_rip = (long)gs_load_fault; + frame->tf_gs = _ugssel; + goto out; + } if (PCPU_GET(curpcb)->pcb_onfault != NULL) { frame->tf_rip = (long)PCPU_GET(curpcb)->pcb_onfault; @@ -573,6 +651,9 @@ trap(struct trapframe *frame) trapsignal(td, &ksi); #ifdef DEBUG +{ + register_t rg,rgk, rf; + if (type <= MAX_TRAP_MSG) { uprintf("fatal process exception: %s", trap_msg[type]); @@ -580,6 +661,17 @@ trap(struct trapframe *frame) uprintf(", fault VA = 0x%lx", frame->tf_addr); uprintf("\n"); } + rf = rdmsr(0xc0000100); + rg = rdmsr(0xc0000101); + rgk = rdmsr(0xc0000102); + uprintf("pid %d TRAP %d rip %lx err %lx addr %lx cs %lx ss %lx ds %lx " + "es %lx fs %lx fsbase %lx %lx gs %lx gsbase %lx %lx %lx\n", + curproc->p_pid, type, frame->tf_rip, frame->tf_err, + frame->tf_addr, + frame->tf_cs, frame->tf_ss, frame->tf_ds, frame->tf_es, + frame->tf_fs, td->td_pcb->pcb_fsbase, rf, + frame->tf_gs, td->td_pcb->pcb_gsbase, rg, rgk); +} #endif user: diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 0cc9b03..72c4917 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -107,11 +108,23 @@ cpu_fork(td1, p2, td2, flags) { register struct proc *p1; struct pcb *pcb2; - struct mdproc *mdp2; + struct mdproc *mdp1, *mdp2; + struct proc_ldt *pldt; p1 = td1->td_proc; - if ((flags & RFPROC) == 0) + if ((flags & RFPROC) == 0) { + if ((flags & RFMEM) == 0) { + /* unshare user LDT */ + mdp1 = &p1->p_md; + mtx_lock(&dt_lock); + if ((pldt = mdp1->md_ldt) != NULL && + pldt->ldt_refcnt > 1 && + user_ldt_alloc(p1, 1) == NULL) + panic("could not copy LDT"); + mtx_unlock(&dt_lock); + } return; + } /* Ensure that p1's pcb is up to date. */ fpuexit(td1); @@ -173,6 +186,32 @@ cpu_fork(td1, p2, td2, flags) td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I; + /* As an i386, do not copy io permission bitmap. */ + pcb2->pcb_tssp = NULL; + + /* Copy the LDT, if necessary. */ + mdp1 = &td1->td_proc->p_md; + mdp2 = &p2->p_md; + mtx_lock(&dt_lock); + if (mdp1->md_ldt != NULL) { + if (flags & RFMEM) { + mdp1->md_ldt->ldt_refcnt++; + mdp2->md_ldt = mdp1->md_ldt; + bcopy(&mdp1->md_ldt_sd, &mdp2->md_ldt_sd, sizeof(struct + system_segment_descriptor)); + } else { + mdp2->md_ldt = NULL; + mdp2->md_ldt = user_ldt_alloc(p2, 0); + if (mdp2->md_ldt == NULL) + panic("could not copy LDT"); + amd64_set_ldt_data(td2, 0, max_ldt_segment, + (struct user_segment_descriptor *) + mdp1->md_ldt->ldt_base); + } + } else + mdp2->md_ldt = NULL; + mtx_unlock(&dt_lock); + /* * Now, cpu_switch() can schedule the new process. * pcb_rsp is loaded pointing to the cpu_switch() stack frame @@ -207,25 +246,49 @@ cpu_set_fork_handler(td, func, arg) void cpu_exit(struct thread *td) { + + /* + * If this process has a custom LDT, release it. + */ + mtx_lock(&dt_lock); + if (td->td_proc->p_md.md_ldt != 0) + user_ldt_free(td); + else + mtx_unlock(&dt_lock); } void cpu_thread_exit(struct thread *td) { + struct pcb *pcb; if (td == PCPU_GET(fpcurthread)) fpudrop(); + pcb = td->td_pcb; + /* Disable any hardware breakpoints. */ - if (td->td_pcb->pcb_flags & PCB_DBREGS) { + if (pcb->pcb_flags & PCB_DBREGS) { reset_dbregs(); - td->td_pcb->pcb_flags &= ~PCB_DBREGS; + pcb->pcb_flags &= ~PCB_DBREGS; } } void cpu_thread_clean(struct thread *td) { + struct pcb *pcb; + + pcb = td->td_pcb; + + /* + * Clean TSS/iomap + */ + if (pcb->pcb_tssp != NULL) { + kmem_free(kernel_map, (vm_offset_t)pcb->pcb_tssp, + ctob(IOPAGES + 1)); + pcb->pcb_tssp = NULL; + } } void @@ -250,6 +313,8 @@ cpu_thread_alloc(struct thread *td) void cpu_thread_free(struct thread *td) { + + cpu_thread_clean(td); } /* @@ -361,6 +426,11 @@ cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg, ((register_t)stack->ss_sp + stack->ss_size) & ~0x0f; td->td_frame->tf_rsp -= 8; td->td_frame->tf_rip = (register_t)entry; + td->td_frame->tf_ds = _udatasel; + td->td_frame->tf_es = _udatasel; + td->td_frame->tf_fs = _ufssel; + td->td_frame->tf_gs = _ugssel; + td->td_frame->tf_flags = TF_HASSEGS; /* * Pass the address of the mailbox for this kse to the uts @@ -378,25 +448,11 @@ cpu_set_user_tls(struct thread *td, void *tls_base) #ifdef COMPAT_IA32 if (td->td_proc->p_sysent == &ia32_freebsd_sysvec) { - if (td == curthread) { - critical_enter(); - td->td_pcb->pcb_gsbase = (register_t)tls_base; - wrmsr(MSR_KGSBASE, td->td_pcb->pcb_gsbase); - critical_exit(); - } else { - td->td_pcb->pcb_gsbase = (register_t)tls_base; - } + td->td_pcb->pcb_gsbase = (register_t)tls_base; return (0); } #endif - if (td == curthread) { - critical_enter(); - td->td_pcb->pcb_fsbase = (register_t)tls_base; - wrmsr(MSR_FSBASE, td->td_pcb->pcb_fsbase); - critical_exit(); - } else { - td->td_pcb->pcb_fsbase = (register_t)tls_base; - } + td->td_pcb->pcb_fsbase = (register_t)tls_base; return (0); } diff --git a/sys/amd64/ia32/ia32_exception.S b/sys/amd64/ia32/ia32_exception.S index 4820f53..76c5d5a 100644 --- a/sys/amd64/ia32/ia32_exception.S +++ b/sys/amd64/ia32/ia32_exception.S @@ -60,6 +60,11 @@ IDTVEC(int0x80_syscall) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) + movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) + movl $TF_HASSEGS,TF_FLAGS(%rsp) FAKE_MCOUNT(TF_RIP(%rsp)) movq %rsp, %rdi call ia32_syscall diff --git a/sys/amd64/ia32/ia32_reg.c b/sys/amd64/ia32/ia32_reg.c index 8abc6fc..49dd4e2 100644 --- a/sys/amd64/ia32/ia32_reg.c +++ b/sys/amd64/ia32/ia32_reg.c @@ -85,9 +85,17 @@ fill_regs32(struct thread *td, struct reg32 *regs) tp = td->td_frame; pcb = td->td_pcb; - regs->r_fs = pcb->pcb_fs; - regs->r_es = pcb->pcb_es; - regs->r_ds = pcb->pcb_ds; + if (tp->tf_flags & TF_HASSEGS) { + regs->r_gs = tp->tf_gs; + regs->r_fs = tp->tf_fs; + regs->r_es = tp->tf_es; + regs->r_ds = tp->tf_ds; + } else { + regs->r_gs = _ugssel; + regs->r_fs = _ufssel; + regs->r_es = _udatasel; + regs->r_ds = _udatasel; + } regs->r_edi = tp->tf_rdi; regs->r_esi = tp->tf_rsi; regs->r_ebp = tp->tf_rbp; @@ -100,7 +108,6 @@ fill_regs32(struct thread *td, struct reg32 *regs) regs->r_eflags = tp->tf_rflags; regs->r_esp = tp->tf_rsp; regs->r_ss = tp->tf_ss; - regs->r_gs = pcb->pcb_gs; return (0); } @@ -114,14 +121,11 @@ set_regs32(struct thread *td, struct reg32 *regs) if (!EFL_SECURE(regs->r_eflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); pcb = td->td_pcb; -#if 0 - load_fs(regs->r_fs); - pcb->pcb_fs = regs->r_fs; - load_es(regs->r_es); - pcb->pcb_es = regs->r_es; - load_ds(regs->r_ds); - pcb->pcb_ds = regs->r_ds; -#endif + tp->tf_gs = regs->r_gs; + tp->tf_fs = regs->r_fs; + tp->tf_es = regs->r_es; + tp->tf_ds = regs->r_ds; + tp->tf_flags = TF_HASSEGS; tp->tf_rdi = regs->r_edi; tp->tf_rsi = regs->r_esi; tp->tf_rbp = regs->r_ebp; @@ -134,10 +138,6 @@ set_regs32(struct thread *td, struct reg32 *regs) tp->tf_rflags = regs->r_eflags; tp->tf_rsp = regs->r_esp; tp->tf_ss = regs->r_ss; -#if 0 - load_gs(regs->r_gs); - pcb->pcb_gs = regs->r_gs; -#endif return (0); } @@ -166,7 +166,8 @@ fill_fpregs32(struct thread *td, struct fpreg32 *regs) penv_87->en_fcs = td->td_frame->tf_cs; penv_87->en_opcode = penv_xmm->en_opcode; penv_87->en_foo = penv_xmm->en_rdp; - penv_87->en_fos = td->td_pcb->pcb_ds; + /* Entry into the kernel always sets TF_HASSEGS */ + penv_87->en_fos = td->td_frame->tf_ds; /* FPU registers */ for (i = 0; i < 8; ++i) diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c index 162dcf9..594b5f4 100644 --- a/sys/amd64/ia32/ia32_signal.c +++ b/sys/amd64/ia32/ia32_signal.c @@ -85,8 +85,6 @@ static void freebsd4_ia32_sendsig(sig_t, ksiginfo_t *, sigset_t *); static void ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp); static int ia32_set_fpcontext(struct thread *td, const struct ia32_mcontext *mcp); -extern int _ucode32sel, _udatasel; - #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) @@ -134,10 +132,11 @@ ia32_get_mcontext(struct thread *td, struct ia32_mcontext *mcp, int flags) PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_rsp); PROC_UNLOCK(curthread->td_proc); - mcp->mc_gs = td->td_pcb->pcb_gs; - mcp->mc_fs = td->td_pcb->pcb_fs; - mcp->mc_es = td->td_pcb->pcb_es; - mcp->mc_ds = td->td_pcb->pcb_ds; + /* Entry into kernel always sets TF_HASSEGS */ + mcp->mc_gs = tp->tf_gs; + mcp->mc_fs = tp->tf_fs; + mcp->mc_es = tp->tf_es; + mcp->mc_ds = tp->tf_ds; mcp->mc_edi = tp->tf_rdi; mcp->mc_esi = tp->tf_rsi; mcp->mc_ebp = tp->tf_rbp; @@ -158,6 +157,8 @@ ia32_get_mcontext(struct thread *td, struct ia32_mcontext *mcp, int flags) mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); ia32_get_fpcontext(td, mcp); + mcp->mc_fsbase = td->td_pcb->pcb_fsbase; + mcp->mc_gsbase = td->td_pcb->pcb_gsbase; return (0); } @@ -182,11 +183,11 @@ ia32_set_mcontext(struct thread *td, const struct ia32_mcontext *mcp) ret = ia32_set_fpcontext(td, mcp); if (ret != 0) return (ret); -#if 0 /* XXX deal with load_fs() and friends */ + tp->tf_gs = mcp->mc_gs; tp->tf_fs = mcp->mc_fs; tp->tf_es = mcp->mc_es; tp->tf_ds = mcp->mc_ds; -#endif + tp->tf_flags = TF_HASSEGS; tp->tf_rdi = mcp->mc_edi; tp->tf_rsi = mcp->mc_esi; tp->tf_rbp = mcp->mc_ebp; @@ -199,9 +200,6 @@ ia32_set_mcontext(struct thread *td, const struct ia32_mcontext *mcp) tp->tf_rflags = rflags; tp->tf_rsp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; -#if 0 /* XXX deal with load_gs() and friends */ - td->td_pcb->pcb_gs = mcp->mc_gs; -#endif td->td_pcb->pcb_flags |= PCB_FULLCTX; return (0); } @@ -326,10 +324,6 @@ freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; - sf.sf_uc.uc_mcontext.mc_gs = rgs(); - sf.sf_uc.uc_mcontext.mc_fs = rfs(); - __asm __volatile("movl %%es,%0" : "=rm" (sf.sf_uc.uc_mcontext.mc_es)); - __asm __volatile("movl %%ds,%0" : "=rm" (sf.sf_uc.uc_mcontext.mc_ds)); sf.sf_uc.uc_mcontext.mc_edi = regs->tf_rdi; sf.sf_uc.uc_mcontext.mc_esi = regs->tf_rsi; sf.sf_uc.uc_mcontext.mc_ebp = regs->tf_rbp; @@ -345,6 +339,10 @@ freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sf.sf_uc.uc_mcontext.mc_eflags = regs->tf_rflags; sf.sf_uc.uc_mcontext.mc_esp = regs->tf_rsp; sf.sf_uc.uc_mcontext.mc_ss = regs->tf_ss; + sf.sf_uc.uc_mcontext.mc_ds = regs->tf_ds; + sf.sf_uc.uc_mcontext.mc_es = regs->tf_es; + sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs; + sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs; /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && @@ -394,10 +392,8 @@ freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; - load_ds(_udatasel); - td->td_pcb->pcb_ds = _udatasel; - load_es(_udatasel); - td->td_pcb->pcb_es = _udatasel; + regs->tf_ds = _udatasel; + regs->tf_es = _udatasel; /* leave user %fs and %gs untouched */ PROC_LOCK(p); mtx_lock(&psp->ps_mtx); @@ -441,10 +437,6 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; - sf.sf_uc.uc_mcontext.mc_gs = rgs(); - sf.sf_uc.uc_mcontext.mc_fs = rfs(); - __asm __volatile("movl %%es,%0" : "=rm" (sf.sf_uc.uc_mcontext.mc_es)); - __asm __volatile("movl %%ds,%0" : "=rm" (sf.sf_uc.uc_mcontext.mc_ds)); sf.sf_uc.uc_mcontext.mc_edi = regs->tf_rdi; sf.sf_uc.uc_mcontext.mc_esi = regs->tf_rsi; sf.sf_uc.uc_mcontext.mc_ebp = regs->tf_rbp; @@ -460,9 +452,15 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sf.sf_uc.uc_mcontext.mc_eflags = regs->tf_rflags; sf.sf_uc.uc_mcontext.mc_esp = regs->tf_rsp; sf.sf_uc.uc_mcontext.mc_ss = regs->tf_ss; + sf.sf_uc.uc_mcontext.mc_ds = regs->tf_ds; + sf.sf_uc.uc_mcontext.mc_es = regs->tf_es; + sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs; + sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs; sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); + sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase; + sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase; /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && @@ -514,11 +512,9 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; - load_ds(_udatasel); - td->td_pcb->pcb_ds = _udatasel; - load_es(_udatasel); - td->td_pcb->pcb_es = _udatasel; - /* leave user %fs and %gs untouched */ + regs->tf_ds = _udatasel; + regs->tf_es = _udatasel; + /* XXXKIB leave user %fs and %gs untouched */ PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } @@ -591,7 +587,6 @@ freebsd4_freebsd32_sigreturn(td, uap) return (EINVAL); } - /* Segment selectors restored by sigtramp.S */ regs->tf_rdi = ucp->uc_mcontext.mc_edi; regs->tf_rsi = ucp->uc_mcontext.mc_esi; regs->tf_rbp = ucp->uc_mcontext.mc_ebp; @@ -606,6 +601,10 @@ freebsd4_freebsd32_sigreturn(td, uap) regs->tf_rflags = ucp->uc_mcontext.mc_eflags; regs->tf_rsp = ucp->uc_mcontext.mc_esp; regs->tf_ss = ucp->uc_mcontext.mc_ss; + regs->tf_ds = ucp->uc_mcontext.mc_ds; + regs->tf_es = ucp->uc_mcontext.mc_es; + regs->tf_fs = ucp->uc_mcontext.mc_fs; + regs->tf_gs = ucp->uc_mcontext.mc_gs; PROC_LOCK(p); td->td_sigmask = ucp->uc_sigmask; @@ -678,7 +677,6 @@ freebsd32_sigreturn(td, uap) if (ret != 0) return (ret); - /* Segment selectors restored by sigtramp.S */ regs->tf_rdi = ucp->uc_mcontext.mc_edi; regs->tf_rsi = ucp->uc_mcontext.mc_esi; regs->tf_rbp = ucp->uc_mcontext.mc_ebp; @@ -693,6 +691,11 @@ freebsd32_sigreturn(td, uap) regs->tf_rflags = ucp->uc_mcontext.mc_eflags; regs->tf_rsp = ucp->uc_mcontext.mc_esp; regs->tf_ss = ucp->uc_mcontext.mc_ss; + regs->tf_ds = ucp->uc_mcontext.mc_ds; + regs->tf_es = ucp->uc_mcontext.mc_es; + regs->tf_fs = ucp->uc_mcontext.mc_fs; + regs->tf_gs = ucp->uc_mcontext.mc_gs; + regs->tf_flags = TF_HASSEGS; PROC_LOCK(p); td->td_sigmask = ucp->uc_sigmask; @@ -715,20 +718,14 @@ ia32_setregs(td, entry, stack, ps_strings) struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; - critical_enter(); - wrmsr(MSR_FSBASE, 0); - wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ + mtx_lock(&dt_lock); + if (td->td_proc->p_md.md_ldt != NULL) + user_ldt_free(td); + else + mtx_unlock(&dt_lock); + pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; - critical_exit(); - load_ds(_udatasel); - load_es(_udatasel); - load_fs(_udatasel); - load_gs(_udatasel); - pcb->pcb_ds = _udatasel; - pcb->pcb_es = _udatasel; - pcb->pcb_fs = _udatasel; - pcb->pcb_gs = _udatasel; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = entry; @@ -737,6 +734,12 @@ ia32_setregs(td, entry, stack, ps_strings) regs->tf_ss = _udatasel; regs->tf_cs = _ucode32sel; regs->tf_rbx = ps_strings; + regs->tf_ds = _udatasel; + regs->tf_es = _udatasel; + regs->tf_fs = _ufssel; + regs->tf_gs = _ugssel; + regs->tf_flags = TF_HASSEGS; + load_cr0(rcr0() | CR0_MP | CR0_TS); fpstate_drop(td); diff --git a/sys/amd64/ia32/ia32_sigtramp.S b/sys/amd64/ia32/ia32_sigtramp.S index 7b20bc4..9455169 100644 --- a/sys/amd64/ia32/ia32_sigtramp.S +++ b/sys/amd64/ia32/ia32_sigtramp.S @@ -45,8 +45,6 @@ ia32_sigcode: calll *IA32_SIGF_HANDLER(%esp) leal IA32_SIGF_UC(%esp),%eax /* get ucontext */ pushl %eax - movl IA32_UC_ES(%eax),%es /* restore %es */ - movl IA32_UC_DS(%eax),%ds /* restore %ds */ movl $SYS_sigreturn,%eax pushl %eax /* junk to fake return addr. */ int $0x80 /* enter kernel with args */ @@ -60,8 +58,6 @@ freebsd4_ia32_sigcode: calll *IA32_SIGF_HANDLER(%esp) leal IA32_SIGF_UC4(%esp),%eax/* get ucontext */ pushl %eax - movl IA32_UC4_ES(%eax),%es /* restore %es */ - movl IA32_UC4_DS(%eax),%ds /* restore %ds */ movl $344,%eax /* 4.x SYS_sigreturn */ pushl %eax /* junk to fake return addr. */ int $0x80 /* enter kernel with args */ diff --git a/sys/amd64/include/asmacros.h b/sys/amd64/include/asmacros.h index 788f39f..0bf0029 100644 --- a/sys/amd64/include/asmacros.h +++ b/sys/amd64/include/asmacros.h @@ -161,7 +161,12 @@ movq %r12,TF_R12(%rsp) ; \ movq %r13,TF_R13(%rsp) ; \ movq %r14,TF_R14(%rsp) ; \ - movq %r15,TF_R15(%rsp) + movq %r15,TF_R15(%rsp) ; \ + movw %fs,TF_FS(%rsp) ; \ + movw %gs,TF_GS(%rsp) ; \ + movw %es,TF_ES(%rsp) ; \ + movw %ds,TF_DS(%rsp) ; \ + movl $TF_HASSEGS,TF_FLAGS(%rsp) #define POP_FRAME \ movq TF_RDI(%rsp),%rdi ; \ diff --git a/sys/amd64/include/frame.h b/sys/amd64/include/frame.h index 26c9dd0..12722a4 100644 --- a/sys/amd64/include/frame.h +++ b/sys/amd64/include/frame.h @@ -64,9 +64,13 @@ struct trapframe { register_t tf_r13; register_t tf_r14; register_t tf_r15; - register_t tf_trapno; + uint32_t tf_trapno; + uint16_t tf_fs; + uint16_t tf_gs; register_t tf_addr; - register_t tf_flags; + uint32_t tf_flags; + uint16_t tf_es; + uint16_t tf_ds; /* below portion defined in hardware */ register_t tf_err; register_t tf_rip; @@ -76,4 +80,7 @@ struct trapframe { register_t tf_ss; }; +#define TF_HASSEGS 0x1 +/* #define _MC_HASBASES 0x2 */ + #endif /* _MACHINE_FRAME_H_ */ diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index 2125b9f..892e19d 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -60,6 +60,11 @@ extern char sigcode[]; extern int szsigcode; extern uint64_t *vm_page_dump; extern int vm_page_dump_size; +extern int _udatasel; +extern int _ucodesel; +extern int _ucode32sel; +extern int _ufssel; +extern int _ugssel; typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); struct thread; @@ -72,6 +77,14 @@ void busdma_swi(void); void cpu_setregs(void); void doreti_iret(void) __asm(__STRING(doreti_iret)); void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault)); +void ld_ds(void) __asm(__STRING(ld_ds)); +void ld_es(void) __asm(__STRING(ld_es)); +void ld_fs(void) __asm(__STRING(ld_fs)); +void ld_gs(void) __asm(__STRING(ld_gs)); +void ds_load_fault(void) __asm(__STRING(ds_load_fault)); +void es_load_fault(void) __asm(__STRING(es_load_fault)); +void fs_load_fault(void) __asm(__STRING(fs_load_fault)); +void gs_load_fault(void) __asm(__STRING(gs_load_fault)); void dump_add_page(vm_paddr_t); void dump_drop_page(vm_paddr_t); void initializecpu(void); diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h index 43b59e5..8ce784d 100644 --- a/sys/amd64/include/pcb.h +++ b/sys/amd64/include/pcb.h @@ -56,10 +56,6 @@ struct pcb { register_t pcb_fsbase; register_t pcb_gsbase; u_long pcb_flags; - u_int32_t pcb_ds; - u_int32_t pcb_es; - u_int32_t pcb_fs; - u_int32_t pcb_gs; u_int64_t pcb_dr0; u_int64_t pcb_dr1; u_int64_t pcb_dr2; @@ -78,6 +74,8 @@ struct pcb { /* 32-bit segment descriptor */ struct user_segment_descriptor pcb_gs32sd; + /* local tss, with i/o bitmap; NULL for common */ + struct amd64tss *pcb_tssp; }; #ifdef _KERNEL diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index e9faf28..e3ab522 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -44,12 +44,20 @@ char pc_monitorbuf[128] __aligned(128); /* cache line */ \ struct pcpu *pc_prvspace; /* Self-reference */ \ struct pmap *pc_curpmap; \ - struct amd64tss *pc_tssp; \ + struct amd64tss *pc_tssp; /* TSS segment active on CPU */ \ + struct amd64tss *pc_commontssp;/* Common TSS for the CPU */ \ register_t pc_rsp0; \ register_t pc_scratch_rsp; /* User %rsp in syscall */ \ u_int pc_apic_id; \ u_int pc_acpi_id; /* ACPI CPU id */ \ - struct user_segment_descriptor *pc_gs32p + /* Pointer to the CPU %fs descriptor */ \ + struct user_segment_descriptor *pc_fs32p; \ + /* Pointer to the CPU %gs descriptor */ \ + struct user_segment_descriptor *pc_gs32p; \ + /* Pointer to the CPU LDT descriptor */ \ + struct system_segment_descriptor *pc_ldt; \ + /* Pointer to the CPU TSS descriptor */ \ + struct system_segment_descriptor *pc_tss #ifdef _KERNEL diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h index a3ebd79..273ba5e 100644 --- a/sys/amd64/include/proc.h +++ b/sys/amd64/include/proc.h @@ -33,6 +33,13 @@ #ifndef _MACHINE_PROC_H_ #define _MACHINE_PROC_H_ +#include + +struct proc_ldt { + caddr_t ldt_base; + int ldt_refcnt; +}; + /* * Machine-dependent part of the proc structure for AMD64. */ @@ -42,6 +49,8 @@ struct mdthread { }; struct mdproc { + struct proc_ldt *md_ldt; /* (t) per-process ldt */ + struct system_segment_descriptor md_ldt_sd; }; #ifdef _KERNEL @@ -55,6 +64,18 @@ struct mdproc { (char *)&td; \ } while (0) +void set_user_ldt(struct mdproc *); +struct proc_ldt *user_ldt_alloc(struct proc *, int); +void user_ldt_free(struct thread *); +void user_ldt_deref(struct proc_ldt *); +struct sysarch_args; +int sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space); +int amd64_set_ldt_data(struct thread *td, int start, int num, + struct user_segment_descriptor *descs); + +extern struct mtx dt_lock; +extern int max_ldt_segment; + #endif /* _KERNEL */ #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/amd64/include/segments.h b/sys/amd64/include/segments.h index 1c83d1c..6c6cb40 100644 --- a/sys/amd64/include/segments.h +++ b/sys/amd64/include/segments.h @@ -108,12 +108,29 @@ struct gate_descriptor { u_int64_t sd_xx1:32; } __packed; +/* + * Generic descriptor + */ +union descriptor { + struct user_segment_descriptor sd; + struct gate_descriptor gd; +}; + /* system segments and gate types */ #define SDT_SYSNULL 0 /* system null */ +#define SDT_SYS286TSS 1 /* system 286 TSS available */ #define SDT_SYSLDT 2 /* system 64 bit local descriptor table */ +#define SDT_SYS286BSY 3 /* system 286 TSS busy */ +#define SDT_SYS286CGT 4 /* system 286 call gate */ +#define SDT_SYSTASKGT 5 /* system task gate */ +#define SDT_SYS286IGT 6 /* system 286 interrupt gate */ +#define SDT_SYS286TGT 7 /* system 286 trap gate */ +#define SDT_SYSNULL2 8 /* system null again */ #define SDT_SYSTSS 9 /* system available 64 bit TSS */ +#define SDT_SYSNULL3 10 /* system null again */ #define SDT_SYSBSY 11 /* system busy 64 bit TSS */ #define SDT_SYSCGT 12 /* system 64 bit call gate */ +#define SDT_SYSNULL4 13 /* system null again */ #define SDT_SYSIGT 14 /* system 64 bit interrupt gate */ #define SDT_SYSTGT 15 /* system 64 bit trap gate */ @@ -203,7 +220,10 @@ struct region_descriptor { #define GPROC0_SEL 6 /* TSS for entering kernel etc */ /* slot 7 is second half of GPROC0_SEL */ #define GUGS32_SEL 8 /* User 32 bit GS Descriptor */ -#define NGDT 9 +#define GUFS32_SEL 9 /* User 32 bit FS Descriptor */ +#define GUSERLDT_SEL 10 /* LDT */ +/* slot 11 is second half of GUSERLDT_SEL */ +#define NGDT 12 #ifdef _KERNEL extern struct user_segment_descriptor gdt[]; diff --git a/sys/amd64/include/sysarch.h b/sys/amd64/include/sysarch.h index 67c8a4a..6c3e6c9 100644 --- a/sys/amd64/include/sysarch.h +++ b/sys/amd64/include/sysarch.h @@ -35,6 +35,15 @@ #ifndef _MACHINE_SYSARCH_H_ #define _MACHINE_SYSARCH_H_ +#define I386_GET_LDT 0 +#define I386_SET_LDT 1 +#define LDT_AUTO_ALLOC 0xffffffff + /* I386_IOPL */ +#define I386_GET_IOPERM 3 +#define I386_SET_IOPERM 4 + +/* XXX Not implementable #define I386_VM86 6 */ + #define I386_GET_FSBASE 7 #define I386_SET_FSBASE 8 #define I386_GET_GSBASE 9 @@ -46,6 +55,18 @@ #define AMD64_GET_GSBASE 130 #define AMD64_SET_GSBASE 131 +struct i386_ldt_args { + unsigned int start; + struct user_segment_descriptor *descs __packed; + unsigned int num; +}; + +struct i386_ioperm_args { + unsigned int start; + unsigned int length; + int enable; +}; + #ifndef _KERNEL #include @@ -56,6 +77,15 @@ int amd64_set_fsbase(void *); int amd64_set_gsbase(void *); int sysarch(int, void *); __END_DECLS +#else +struct thread; +union descriptor; + +int amd64_get_ldt(struct thread *, struct i386_ldt_args *); +int amd64_set_ldt(struct thread *, struct i386_ldt_args *, + struct user_segment_descriptor *); +int amd64_get_ioperm(struct thread *, struct i386_ioperm_args *); +int amd64_set_ioperm(struct thread *, struct i386_ioperm_args *); #endif #endif /* !_MACHINE_SYSARCH_H_ */ diff --git a/sys/amd64/include/ucontext.h b/sys/amd64/include/ucontext.h index 5c13803..c5bbd65 100644 --- a/sys/amd64/include/ucontext.h +++ b/sys/amd64/include/ucontext.h @@ -32,9 +32,16 @@ #ifndef _MACHINE_UCONTEXT_H_ #define _MACHINE_UCONTEXT_H_ +/* + * mc_trapno bits. Shall be in sync with TF_XXX. + */ +#define _MC_HASSEGS 0x1 +#define _MC_HASBASES 0x2 +#define _MC_FLAG_MASK (_MC_HASSEGS | _MC_HASBASES) + typedef struct __mcontext { /* - * The first 20 fields must match the definition of + * The first 24 fields must match the definition of * sigcontext. So that we can support sigcontext * and ucontext_t at the same time. */ @@ -54,9 +61,13 @@ typedef struct __mcontext { __register_t mc_r13; __register_t mc_r14; __register_t mc_r15; - __register_t mc_trapno; + __uint32_t mc_trapno; + __uint16_t mc_fs; + __uint16_t mc_gs; __register_t mc_addr; - __register_t mc_flags; + __uint32_t mc_flags; + __uint16_t mc_es; + __uint16_t mc_ds; __register_t mc_err; __register_t mc_rip; __register_t mc_cs; @@ -65,6 +76,7 @@ typedef struct __mcontext { __register_t mc_ss; long mc_len; /* sizeof(mcontext_t) */ + #define _MC_FPFMT_NODEV 0x10000 /* device not present or configured */ #define _MC_FPFMT_XMM 0x10002 long mc_fpformat; @@ -76,7 +88,11 @@ typedef struct __mcontext { * See for the internals of mc_fpstate[]. */ long mc_fpstate[64] __aligned(16); - long mc_spare[8]; + + __register_t mc_fsbase; + __register_t mc_gsbase; + + long mc_spare[6]; } mcontext_t; #endif /* !_MACHINE_UCONTEXT_H_ */ diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c index c8869e2..0470230 100644 --- a/sys/amd64/linux32/linux32_machdep.c +++ b/sys/amd64/linux32/linux32_machdep.c @@ -716,8 +716,8 @@ linux_clone(struct thread *td, struct linux_clone_args *args) sd.sd_long, sd.sd_def32, sd.sd_gran); #endif td2->td_pcb->pcb_gsbase = (register_t)info.base_addr; - td2->td_pcb->pcb_gs32sd = sd; - td2->td_pcb->pcb_gs = GSEL(GUGS32_SEL, SEL_UPL); +/* XXXKIB td2->td_pcb->pcb_gs32sd = sd; */ + td2->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL); td2->td_pcb->pcb_flags |= PCB_GS32BIT | PCB_32BIT; } } @@ -1361,7 +1361,7 @@ linux_set_thread_area(struct thread *td, critical_enter(); td->td_pcb->pcb_gsbase = (register_t)info.base_addr; - td->td_pcb->pcb_gs32sd = *PCPU_GET(gs32p) = sd; +/* XXXKIB td->td_pcb->pcb_gs32sd = *PCPU_GET(gs32p) = sd; */ td->td_pcb->pcb_flags |= PCB_32BIT | PCB_GS32BIT; wrmsr(MSR_KGSBASE, td->td_pcb->pcb_gsbase); critical_exit(); diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c index aaa7458..b3dd558 100644 --- a/sys/amd64/linux32/linux32_sysvec.c +++ b/sys/amd64/linux32/linux32_sysvec.c @@ -278,7 +278,6 @@ elf_linux_fixup(register_t **stack_base, struct image_params *imgp) return 0; } -extern int _ucodesel, _ucode32sel, _udatasel; extern unsigned long linux_sznonrtsigcode; static void @@ -349,12 +348,6 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; - frame.sf_sc.uc_mcontext.sc_gs = rgs(); - frame.sf_sc.uc_mcontext.sc_fs = rfs(); - __asm __volatile("movl %%es,%0" : - "=rm" (frame.sf_sc.uc_mcontext.sc_es)); - __asm __volatile("movl %%ds,%0" : - "=rm" (frame.sf_sc.uc_mcontext.sc_ds)); frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; @@ -364,6 +357,10 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; + frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; + frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; + frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; + frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; @@ -401,11 +398,11 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; - load_ds(_udatasel); - td->td_pcb->pcb_ds = _udatasel; - load_es(_udatasel); - td->td_pcb->pcb_es = _udatasel; - /* leave user %fs and %gs untouched */ + regs->tf_ds = _udatasel; + regs->tf_es = _udatasel; + regs->tf_fs = _ufssel; + regs->tf_gs = _ugssel; + regs->tf_flags = TF_HASSEGS; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } @@ -483,10 +480,10 @@ linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) * Build the signal context to be used by sigreturn. */ frame.sf_sc.sc_mask = lmask.__bits[0]; - frame.sf_sc.sc_gs = rgs(); - frame.sf_sc.sc_fs = rfs(); - __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es)); - __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds)); + frame.sf_sc.sc_gs = regs->tf_gs; + frame.sf_sc.sc_fs = regs->tf_fs; + frame.sf_sc.sc_es = regs->tf_es; + frame.sf_sc.sc_ds = regs->tf_ds; frame.sf_sc.sc_edi = regs->tf_rdi; frame.sf_sc.sc_esi = regs->tf_rsi; frame.sf_sc.sc_ebp = regs->tf_rbp; @@ -523,11 +520,11 @@ linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; - load_ds(_udatasel); - td->td_pcb->pcb_ds = _udatasel; - load_es(_udatasel); - td->td_pcb->pcb_es = _udatasel; - /* leave user %fs and %gs untouched */ + regs->tf_ds = _udatasel; + regs->tf_es = _udatasel; + regs->tf_fs = _ufssel; + regs->tf_gs = _ugssel; + regs->tf_flags = TF_HASSEGS; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } @@ -612,7 +609,7 @@ linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) /* * Restore signal context. */ - /* Selectors were restored by the trampoline. */ + /* Selectors were restored by the trampoline. XXXKIB really ? */ regs->tf_rdi = frame.sf_sc.sc_edi; regs->tf_rsi = frame.sf_sc.sc_esi; regs->tf_rbp = frame.sf_sc.sc_ebp; @@ -710,7 +707,10 @@ linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) /* * Restore signal context */ - /* Selectors were restored by the trampoline. */ + regs->tf_gs = context->sc_gs; + regs->tf_fs = context->sc_fs; + regs->tf_es = context->sc_es; + regs->tf_ds = context->sc_ds; regs->tf_rdi = context->sc_edi; regs->tf_rsi = context->sc_esi; regs->tf_rbp = context->sc_ebp; @@ -813,26 +813,29 @@ exec_linux_setregs(td, entry, stack, ps_strings) struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; + mtx_lock(&dt_lock); + if (td->td_proc->p_md.md_ldt != NULL) + user_ldt_free(td); + else + mtx_unlock(&dt_lock); + critical_enter(); wrmsr(MSR_FSBASE, 0); wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; critical_exit(); - load_ds(_udatasel); - load_es(_udatasel); - load_fs(_udatasel); - load_gs(_udatasel); - pcb->pcb_ds = _udatasel; - pcb->pcb_es = _udatasel; - pcb->pcb_fs = _udatasel; - pcb->pcb_gs = _udatasel; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = entry; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); + regs->tf_gs = _udatasel; + regs->tf_fs = _udatasel; + regs->tf_es = _udatasel; + regs->tf_ds = _udatasel; regs->tf_ss = _udatasel; + regs->tf_flags = TF_HASSEGS; regs->tf_cs = _ucode32sel; regs->tf_rbx = ps_strings; load_cr0(rcr0() | CR0_MP | CR0_TS); diff --git a/sys/compat/freebsd32/freebsd32.h b/sys/compat/freebsd32/freebsd32.h index 08d6510..84e832c 100644 --- a/sys/compat/freebsd32/freebsd32.h +++ b/sys/compat/freebsd32/freebsd32.h @@ -191,4 +191,10 @@ struct thr_param32 { uint32_t spare[3]; }; +struct i386_ldt_args32 { + uint32_t start; + uint32_t descs; + uint32_t num; +}; + #endif /* !_COMPAT_FREEBSD32_FREEBSD32_H_ */ diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index 83fd67f..253a455 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -85,6 +85,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include @@ -2631,6 +2632,31 @@ freebsd32_cpuset_setaffinity(struct thread *td, } int +freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap) +{ + struct sysarch_args uap1; + struct i386_ldt_args uapl; + struct i386_ldt_args32 uapl32; + int error; + + if (uap->op == I386_SET_LDT || uap->op == I386_GET_LDT) { + if ((error = copyin(uap->parms, &uapl32, sizeof(uapl32))) != 0) + return (error); + uap1.op = uap->op; + uap1.parms = (char *)&uapl; + uapl.start = uapl32.start; + uapl.descs = (struct user_segment_descriptor *)(uintptr_t) + uapl32.descs; + uapl.num = uapl32.num; + return (sysarch_ldt(td, &uap1, UIO_SYSSPACE)); + } else { + uap1.op = uap->op; + uap1.parms = uap->parms; + return (sysarch(td, &uap1)); + } +} + +int freebsd32_nmount(struct thread *td, struct freebsd32_nmount_args /* { struct iovec *iovp; diff --git a/sys/compat/freebsd32/freebsd32_proto.h b/sys/compat/freebsd32/freebsd32_proto.h index 625378f..cd8a395 100644 --- a/sys/compat/freebsd32/freebsd32_proto.h +++ b/sys/compat/freebsd32/freebsd32_proto.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 185878 2008-12-10 20:56:19Z jhb + * created from FreeBSD */ #ifndef _FREEBSD32_SYSPROTO_H_ @@ -119,6 +119,10 @@ struct freebsd32_adjtime_args { char delta_l_[PADL_(struct timeval32 *)]; struct timeval32 * delta; char delta_r_[PADR_(struct timeval32 *)]; char olddelta_l_[PADL_(struct timeval32 *)]; struct timeval32 * olddelta; char olddelta_r_[PADR_(struct timeval32 *)]; }; +struct freebsd32_sysarch_args { + char op_l_[PADL_(int)]; int op; char op_r_[PADR_(int)]; + char parms_l_[PADL_(char *)]; char * parms; char parms_r_[PADR_(char *)]; +}; struct freebsd32_semsys_args { char which_l_[PADL_(int)]; int which; char which_r_[PADR_(int)]; char a2_l_[PADL_(int)]; int a2; char a2_r_[PADR_(int)]; @@ -456,6 +460,7 @@ int freebsd32_writev(struct thread *, struct freebsd32_writev_args *); int freebsd32_settimeofday(struct thread *, struct freebsd32_settimeofday_args *); int freebsd32_utimes(struct thread *, struct freebsd32_utimes_args *); int freebsd32_adjtime(struct thread *, struct freebsd32_adjtime_args *); +int freebsd32_sysarch(struct thread *, struct freebsd32_sysarch_args *); int freebsd32_semsys(struct thread *, struct freebsd32_semsys_args *); int freebsd32_msgsys(struct thread *, struct freebsd32_msgsys_args *); int freebsd32_shmsys(struct thread *, struct freebsd32_shmsys_args *); @@ -682,6 +687,7 @@ int freebsd6_freebsd32_ftruncate(struct thread *, struct freebsd6_freebsd32_ftru #define FREEBSD32_SYS_AUE_freebsd32_settimeofday AUE_SETTIMEOFDAY #define FREEBSD32_SYS_AUE_freebsd32_utimes AUE_UTIMES #define FREEBSD32_SYS_AUE_freebsd32_adjtime AUE_ADJTIME +#define FREEBSD32_SYS_AUE_freebsd32_sysarch AUE_SYSARCH #define FREEBSD32_SYS_AUE_freebsd32_semsys AUE_SEMSYS #define FREEBSD32_SYS_AUE_freebsd32_msgsys AUE_MSGSYS #define FREEBSD32_SYS_AUE_freebsd32_shmsys AUE_SHMSYS diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h index d491d83..7900cee 100644 --- a/sys/compat/freebsd32/freebsd32_syscall.h +++ b/sys/compat/freebsd32/freebsd32_syscall.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 185878 2008-12-10 20:56:19Z jhb + * created from FreeBSD */ #define FREEBSD32_SYS_syscall 0 @@ -159,7 +159,7 @@ /* 162 is obsolete getdomainname */ /* 163 is obsolete setdomainname */ /* 164 is obsolete uname */ -#define FREEBSD32_SYS_sysarch 165 +#define FREEBSD32_SYS_freebsd32_sysarch 165 #define FREEBSD32_SYS_rtprio 166 #define FREEBSD32_SYS_freebsd32_semsys 169 #define FREEBSD32_SYS_freebsd32_msgsys 170 diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c index 88a5e83..a4e10ef 100644 --- a/sys/compat/freebsd32/freebsd32_syscalls.c +++ b/sys/compat/freebsd32/freebsd32_syscalls.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 185878 2008-12-10 20:56:19Z jhb + * created from FreeBSD */ const char *freebsd32_syscallnames[] = { @@ -172,7 +172,7 @@ const char *freebsd32_syscallnames[] = { "obs_getdomainname", /* 162 = obsolete getdomainname */ "obs_setdomainname", /* 163 = obsolete setdomainname */ "obs_uname", /* 164 = obsolete uname */ - "sysarch", /* 165 = sysarch */ + "freebsd32_sysarch", /* 165 = freebsd32_sysarch */ "rtprio", /* 166 = rtprio */ "#167", /* 167 = nosys */ "#168", /* 168 = nosys */ diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c index e0177c8..1043214 100644 --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 185878 2008-12-10 20:56:19Z jhb + * created from FreeBSD */ #include "opt_compat.h" @@ -203,7 +203,7 @@ struct sysent freebsd32_sysent[] = { { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 162 = obsolete getdomainname */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 163 = obsolete setdomainname */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 164 = obsolete uname */ - { AS(sysarch_args), (sy_call_t *)sysarch, AUE_SYSARCH, NULL, 0, 0 }, /* 165 = sysarch */ + { AS(freebsd32_sysarch_args), (sy_call_t *)freebsd32_sysarch, AUE_SYSARCH, NULL, 0, 0 }, /* 165 = freebsd32_sysarch */ { AS(rtprio_args), (sy_call_t *)rtprio, AUE_RTPRIO, NULL, 0, 0 }, /* 166 = rtprio */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 167 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 }, /* 168 = nosys */ diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master index 25cb81c..4d571f2 100644 --- a/sys/compat/freebsd32/syscalls.master +++ b/sys/compat/freebsd32/syscalls.master @@ -298,7 +298,7 @@ 162 AUE_NULL OBSOL getdomainname 163 AUE_NULL OBSOL setdomainname 164 AUE_NULL OBSOL uname -165 AUE_SYSARCH NOPROTO { int sysarch(int op, char *parms); } +165 AUE_SYSARCH STD { int freebsd32_sysarch(int op, char *parms); } 166 AUE_RTPRIO NOPROTO { int rtprio(int function, pid_t pid, \ struct rtprio *rtp); } 167 AUE_NULL UNIMPL nosys diff --git a/sys/compat/ia32/ia32_signal.h b/sys/compat/ia32/ia32_signal.h index f2be96d..6ebb0de 100644 --- a/sys/compat/ia32/ia32_signal.h +++ b/sys/compat/ia32/ia32_signal.h @@ -59,7 +59,9 @@ struct ia32_mcontext { * See for the internals of mc_fpstate[]. */ u_int32_t mc_fpstate[128] __aligned(16); - u_int32_t mc_spare2[8]; + u_int32_t mc_fsbase; + u_int32_t mc_gsbase; + u_int32_t mc_spare2[6]; }; struct ia32_ucontext { diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 803d8d1..90195ee 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -578,6 +578,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) struct sigacts *psp; char *sp; struct trapframe *regs; + struct segment_descriptor *sdp; int sig; int oonstack; @@ -614,6 +615,15 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); + /* + * Unconditionally fill the fsbase and gsbase into the mcontext. + */ + sdp = &td->td_pcb->pcb_gsd; + sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 | + sdp->sd_lobase; + sdp = &td->td_pcb->pcb_fsd; + sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | + sdp->sd_lobase; /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && @@ -3066,6 +3076,7 @@ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; + struct segment_descriptor *sdp; tp = td->td_frame; @@ -3097,6 +3108,11 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int flags) mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); + sdp = &td->td_pcb->pcb_gsd; + mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; + sdp = &td->td_pcb->pcb_fsd; + mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; + return (0); } diff --git a/sys/i386/include/ucontext.h b/sys/i386/include/ucontext.h index c992495..d8657d3 100644 --- a/sys/i386/include/ucontext.h +++ b/sys/i386/include/ucontext.h @@ -72,10 +72,15 @@ typedef struct __mcontext { * See for the internals of mc_fpstate[]. */ int mc_fpstate[128] __aligned(16); - int mc_spare2[8]; + + __register_t mc_fsbase; + __register_t mc_gsbase; + + int mc_spare2[6]; } mcontext_t; #if defined(_KERNEL) && defined(COMPAT_FREEBSD4) + struct mcontext4 { __register_t mc_onstack; /* XXX - sigcontext compat. */ __register_t mc_gs; /* machine state (struct trapframe) */