diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index c537a4f..b5b84ed 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -380,8 +380,11 @@ IDTVEC(fast_syscall) movl $TF_HASSEGS,TF_FLAGS(%rsp) cld FAKE_MCOUNT(TF_RIP(%rsp)) - movq %rsp,%rdi - call syscall + movq PCPU(CURTHREAD),%rdi + movq %rsp,TD_FRAME(%rdi) + movl TF_RFLAGS(%rsp),%esi + andl $PSL_T,%esi + call amd64_syscall 1: movq PCPU(CURPCB),%rax /* Disable interrupts before testing PCB_FULL_IRET. */ cli @@ -396,17 +399,12 @@ IDTVEC(fast_syscall) call ast jmp 1b 2: /* Restore preserved registers. */ + .align 16 MEXITCOUNT movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */ movq TF_RDX(%rsp),%rdx /* return value 2 */ movq TF_RAX(%rsp),%rax /* return value 1 */ - movq TF_RBX(%rsp),%rbx /* C preserved */ - movq TF_RBP(%rsp),%rbp /* C preserved */ - movq TF_R12(%rsp),%r12 /* C preserved */ - movq TF_R13(%rsp),%r13 /* C preserved */ - movq TF_R14(%rsp),%r14 /* C preserved */ - movq TF_R15(%rsp),%r15 /* C preserved */ movq TF_RFLAGS(%rsp),%r11 /* original %rflags */ movq TF_RIP(%rsp),%rcx /* original %rip */ movq TF_RSP(%rsp),%r9 /* user stack pointer */ diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 1c9abd5..d133223 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -87,6 +87,7 @@ ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_PFLAGS, offsetof(struct thread, td_pflags)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_TID, offsetof(struct thread, td_tid)); +ASSYM(TD_FRAME, offsetof(struct thread, td_frame)); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 4e5f8b8..3c2d785 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -68,11 +68,14 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #ifdef HWPMC_HOOKS #include #endif +#include + #include #include #include @@ -883,41 +886,38 @@ cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) return (error); } +#include "../../kern/subr_sysenter.c" + +void amd64_syscall(struct thread *td, int traced); /* * syscall - system call request C handler * * A system call is essentially treated as a trap. */ void -syscall(struct trapframe *frame) +amd64_syscall(struct thread *td, int traced) { - struct thread *td; struct syscall_args sa; - register_t orig_tf_rflags; int error; ksiginfo_t ksi; #ifdef DIAGNOSTIC - if (ISPL(frame->tf_cs) != SEL_UPL) { + if (ISPL(td->td_frame->tf_cs) != SEL_UPL) { panic("syscall"); /* NOT REACHED */ } #endif - orig_tf_rflags = frame->tf_rflags; - td = curthread; - td->td_frame = frame; - error = syscallenter(td, &sa); /* * Traced syscall. */ - if (orig_tf_rflags & PSL_T) { - frame->tf_rflags &= ~PSL_T; + if (__predict_false(traced)) { + td->td_frame->tf_rflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; - ksi.ksi_addr = (void *)frame->tf_rip; + ksi.ksi_addr = (void *)td->td_frame->tf_rip; trapsignal(td, &ksi); } diff --git a/sys/amd64/ia32/ia32_syscall.c b/sys/amd64/ia32/ia32_syscall.c index 0b46dae..25a2954 100644 --- a/sys/amd64/ia32/ia32_syscall.c +++ b/sys/amd64/ia32/ia32_syscall.c @@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -163,6 +164,8 @@ ia32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) return (error); } +#include "../../kern/subr_sysenter.c" + void ia32_syscall(struct trapframe *frame) { diff --git a/sys/arm/arm/elf_machdep.c b/sys/arm/arm/elf_machdep.c index 5ea5615..6aec18b 100644 --- a/sys/arm/arm/elf_machdep.c +++ b/sys/arm/arm/elf_machdep.c @@ -76,7 +76,7 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, - .sv_fetch_syscall_args = NULL, /* XXXKIB */ + .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_schedtail = NULL, }; diff --git a/sys/arm/arm/trap.c b/sys/arm/arm/trap.c index c7c84d0..ca8a5c6 100644 --- a/sys/arm/arm/trap.c +++ b/sys/arm/arm/trap.c @@ -861,98 +861,66 @@ badaddr_read(void *addr, size_t size, void *rptr) return (rv); } -#define MAXARGS 8 +int +cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +{ + struct proc *p; + register_t *ap; + int error; + + sa->code = sa->insn & 0x000fffff; + ap = &td->td_frame->tf_r0; + if (sa->code == SYS_syscall) { + sa->code = *ap++; + sa->nap--; + } else if (sa->code == SYS___syscall) { + sa->code = ap[_QUAD_LOWWORD]; + sa->nap -= 2; + ap += 2; + } + p = td->td_proc; + if (p->p_sysent->sv_mask) + sa->code &= p->p_sysent->sv_mask; + if (sa->code >= p->p_sysent->sv_size) + sa->callp = &p->p_sysent->sv_table[0]; + else + sa->callp = &p->p_sysent->sv_table[sa->code]; + sa->narg = sa->callp->sy_narg; + error = 0; + memcpy(sa->args, ap, sa->nap * sizeof(register_t)); + if (sa->narg > sa->nap) { + error = copyin((void *)td->td_frame->tf_usr_sp, sa->args + + sa->nap, (sa->narg - sa->nap) * sizeof(register_t)); + } + if (error == 0) { + td->td_retval[0] = 0; + td->td_retval[1] = 0; + } + return (error); +} + static void syscall(struct thread *td, trapframe_t *frame, u_int32_t insn) { - struct proc *p = td->td_proc; - int code, error; - u_int nap, nargs; - register_t *ap, *args, copyargs[MAXARGS]; - struct sysent *callp; + struct syscall_args sa; + int error; - PCPU_INC(cnt.v_syscall); - td->td_pticks = 0; - if (td->td_ucred != td->td_proc->p_ucred) - cred_update_thread(td); + td->td_frame = frame; + sa.insn = insn; switch (insn & SWI_OS_MASK) { case 0: /* XXX: we need our own one. */ - nap = 4; + sa.nap = 4; break; default: call_trapsignal(td, SIGILL, 0); userret(td, frame); return; } - code = insn & 0x000fffff; - td->td_pticks = 0; - ap = &frame->tf_r0; - if (code == SYS_syscall) { - code = *ap++; - - nap--; - } else if (code == SYS___syscall) { - code = ap[_QUAD_LOWWORD]; - nap -= 2; - ap += 2; - } - if (p->p_sysent->sv_mask) - code &= p->p_sysent->sv_mask; - if (code >= p->p_sysent->sv_size) - callp = &p->p_sysent->sv_table[0]; - else - callp = &p->p_sysent->sv_table[code]; - nargs = callp->sy_narg; - memcpy(copyargs, ap, nap * sizeof(register_t)); - if (nargs > nap) { - error = copyin((void *)frame->tf_usr_sp, copyargs + nap, - (nargs - nap) * sizeof(register_t)); - if (error) - goto bad; - } - args = copyargs; - error = 0; -#ifdef KTRACE - if (KTRPOINT(td, KTR_SYSCALL)) - ktrsyscall(code, nargs, args); -#endif - - CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td, - td->td_proc->p_pid, td->td_name, code); - if (error == 0) { - td->td_retval[0] = 0; - td->td_retval[1] = 0; - STOPEVENT(p, S_SCE, callp->sy_narg); - PTRACESTOP_SC(p, td, S_PT_SCE); - AUDIT_SYSCALL_ENTER(code, td); - error = (*callp->sy_call)(td, args); - AUDIT_SYSCALL_EXIT(error, td); - KASSERT(td->td_ar == NULL, - ("returning from syscall with td_ar set!")); - } -bad: - cpu_set_syscall_retval(td, error); - - WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning", - (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???"); - KASSERT(td->td_critnest == 0, - ("System call %s returning in a critical section", - (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???")); - KASSERT(td->td_locks == 0, - ("System call %s returning with %d locks held", - (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???", - td->td_locks)); - - userret(td, frame); - CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td, - td->td_proc->p_pid, td->td_name, code); - - STOPEVENT(p, S_SCX, code); - PTRACESTOP_SC(p, td, S_PT_SCX); -#ifdef KTRACE - if (KTRPOINT(td, KTR_SYSRET)) - ktrsysret(code, error, td->td_retval[0]); -#endif + + error = syscallenter(td, &sa); + KASSERT(error != 0 || td->td_ar == NULL, + ("returning from syscall with td_ar set!")); + syscallret(td, error, &sa); } void diff --git a/sys/arm/include/proc.h b/sys/arm/include/proc.h index 7032be5..cf74d36 100644 --- a/sys/arm/include/proc.h +++ b/sys/arm/include/proc.h @@ -62,4 +62,15 @@ struct mdproc { #define KINFO_PROC_SIZE 792 +#define MAXARGS 8 +struct syscall_args { + u_int code; + struct sysent *callp; + register_t args[MAXARGS]; + int narg; + u_int nap; + u_int32_t insn; +}; +#define HAVE_SYSCALL_ARGS_DEF 1 + #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 5a8016c..9183725 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #ifdef HWPMC_HOOKS #include @@ -1054,6 +1055,8 @@ cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) return (error); } +#include "../../kern/subr_sysenter.c" + /* * syscall - system call request C handler * diff --git a/sys/kern/subr_sysenter.c b/sys/kern/subr_sysenter.c new file mode 100644 index 0000000..fafe04f --- /dev/null +++ b/sys/kern/subr_sysenter.c @@ -0,0 +1,163 @@ +#include "opt_capsicum.h" +#include "opt_ktrace.h" +#include "opt_kdtrace.h" + +static inline int +syscallenter(struct thread *td, struct syscall_args *sa) +{ + struct proc *p; + int error, traced; + + PCPU_INC(cnt.v_syscall); + p = td->td_proc; + + td->td_pticks = 0; + if (td->td_ucred != p->p_ucred) + cred_update_thread(td); + if (p->p_flag & P_TRACED) { + traced = 1; + PROC_LOCK(p); + td->td_dbgflags &= ~TDB_USERWR; + td->td_dbgflags |= TDB_SCE; + PROC_UNLOCK(p); + } else + traced = 0; + error = (p->p_sysent->sv_fetch_syscall_args)(td, sa); +#ifdef KTRACE + if (KTRPOINT(td, KTR_SYSCALL)) + ktrsyscall(sa->code, sa->narg, sa->args); +#endif + + CTR6(KTR_SYSC, +"syscall: td=%p pid %d %s (%#lx, %#lx, %#lx)", + td, td->td_proc->p_pid, syscallname(p, sa->code), + sa->args[0], sa->args[1], sa->args[2]); + + if (error == 0) { + STOPEVENT(p, S_SCE, sa->narg); + PTRACESTOP_SC(p, td, S_PT_SCE); + if (td->td_dbgflags & TDB_USERWR) { + /* + * Reread syscall number and arguments if + * debugger modified registers or memory. + */ + error = (p->p_sysent->sv_fetch_syscall_args)(td, sa); +#ifdef KTRACE + if (KTRPOINT(td, KTR_SYSCALL)) + ktrsyscall(sa->code, sa->narg, sa->args); +#endif + if (error != 0) + goto retval; + } + +#ifdef CAPABILITY_MODE + /* + * In capability mode, we only allow access to system calls + * flagged with SYF_CAPENABLED. + */ + if (IN_CAPABILITY_MODE(td) && + !(sa->callp->sy_flags & SYF_CAPENABLED)) { + error = ECAPMODE; + goto retval; + } +#endif + + error = syscall_thread_enter(td, sa->callp); + if (error != 0) + goto retval; + +#ifdef KDTRACE_HOOKS + /* + * If the systrace module has registered it's probe + * callback and if there is a probe active for the + * syscall 'entry', process the probe. + */ + if (systrace_probe_func != NULL && sa->callp->sy_entry != 0) + (*systrace_probe_func)(sa->callp->sy_entry, sa->code, + sa->callp, sa->args, 0); +#endif + + AUDIT_SYSCALL_ENTER(sa->code, td); + error = (sa->callp->sy_call)(td, sa->args); + AUDIT_SYSCALL_EXIT(error, td); + + /* Save the latest error return value. */ + td->td_errno = error; + +#ifdef KDTRACE_HOOKS + /* + * If the systrace module has registered it's probe + * callback and if there is a probe active for the + * syscall 'return', process the probe. + */ + if (systrace_probe_func != NULL && sa->callp->sy_return != 0) + (*systrace_probe_func)(sa->callp->sy_return, sa->code, + sa->callp, NULL, (error) ? -1 : td->td_retval[0]); +#endif + syscall_thread_exit(td, sa->callp); + CTR4(KTR_SYSC, "syscall: p=%p error=%d return %#lx %#lx", + p, error, td->td_retval[0], td->td_retval[1]); + } + retval: + if (traced) { + PROC_LOCK(p); + td->td_dbgflags &= ~TDB_SCE; + PROC_UNLOCK(p); + } + (p->p_sysent->sv_set_syscall_retval)(td, error); + return (error); +} + +static inline void +syscallret(struct thread *td, int error, struct syscall_args *sa __unused) +{ + struct proc *p; + int traced; + + p = td->td_proc; + + /* + * Check for misbehavior. + */ + WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning", + syscallname(p, sa->code)); + KASSERT(td->td_critnest == 0, + ("System call %s returning in a critical section", + syscallname(p, sa->code))); + KASSERT(td->td_locks == 0, + ("System call %s returning with %d locks held", + syscallname(p, sa->code), td->td_locks)); + + /* + * Handle reschedule and other end-of-syscall issues + */ + userret(td, td->td_frame); + + CTR4(KTR_SYSC, "syscall %s exit thread %p pid %d proc %s", + syscallname(p, sa->code), td, td->td_proc->p_pid, td->td_name); + +#ifdef KTRACE + if (KTRPOINT(td, KTR_SYSRET)) + ktrsysret(sa->code, error, td->td_retval[0]); +#endif + + if (p->p_flag & P_TRACED) { + traced = 1; + PROC_LOCK(p); + td->td_dbgflags |= TDB_SCX; + PROC_UNLOCK(p); + } else + traced = 0; + /* + * This works because errno is findable through the + * register set. If we ever support an emulation where this + * is not the case, this code will need to be revisited. + */ + STOPEVENT(p, S_SCX, sa->code); + PTRACESTOP_SC(p, td, S_PT_SCX); + if (traced || (td->td_dbgflags & (TDB_EXEC | TDB_FORK)) != 0) { + PROC_LOCK(p); + td->td_dbgflags &= ~(TDB_SCX | TDB_EXEC | TDB_FORK); + PROC_UNLOCK(p); + } +} diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 3527ed1..42adf80 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -252,7 +252,6 @@ ast(struct trapframe *framep) mtx_assert(&Giant, MA_NOTOWNED); } -#ifdef HAVE_SYSCALL_ARGS_DEF const char * syscallname(struct proc *p, u_int code) { @@ -264,164 +263,3 @@ syscallname(struct proc *p, u_int code) return (unknown); return (sv->sv_syscallnames[code]); } - -int -syscallenter(struct thread *td, struct syscall_args *sa) -{ - struct proc *p; - int error, traced; - - PCPU_INC(cnt.v_syscall); - p = td->td_proc; - - td->td_pticks = 0; - if (td->td_ucred != p->p_ucred) - cred_update_thread(td); - if (p->p_flag & P_TRACED) { - traced = 1; - PROC_LOCK(p); - td->td_dbgflags &= ~TDB_USERWR; - td->td_dbgflags |= TDB_SCE; - PROC_UNLOCK(p); - } else - traced = 0; - error = (p->p_sysent->sv_fetch_syscall_args)(td, sa); -#ifdef KTRACE - if (KTRPOINT(td, KTR_SYSCALL)) - ktrsyscall(sa->code, sa->narg, sa->args); -#endif - - CTR6(KTR_SYSC, -"syscall: td=%p pid %d %s (%#lx, %#lx, %#lx)", - td, td->td_proc->p_pid, syscallname(p, sa->code), - sa->args[0], sa->args[1], sa->args[2]); - - if (error == 0) { - STOPEVENT(p, S_SCE, sa->narg); - PTRACESTOP_SC(p, td, S_PT_SCE); - if (td->td_dbgflags & TDB_USERWR) { - /* - * Reread syscall number and arguments if - * debugger modified registers or memory. - */ - error = (p->p_sysent->sv_fetch_syscall_args)(td, sa); -#ifdef KTRACE - if (KTRPOINT(td, KTR_SYSCALL)) - ktrsyscall(sa->code, sa->narg, sa->args); -#endif - if (error != 0) - goto retval; - } - -#ifdef CAPABILITY_MODE - /* - * In capability mode, we only allow access to system calls - * flagged with SYF_CAPENABLED. - */ - if (IN_CAPABILITY_MODE(td) && - !(sa->callp->sy_flags & SYF_CAPENABLED)) { - error = ECAPMODE; - goto retval; - } -#endif - - error = syscall_thread_enter(td, sa->callp); - if (error != 0) - goto retval; - -#ifdef KDTRACE_HOOKS - /* - * If the systrace module has registered it's probe - * callback and if there is a probe active for the - * syscall 'entry', process the probe. - */ - if (systrace_probe_func != NULL && sa->callp->sy_entry != 0) - (*systrace_probe_func)(sa->callp->sy_entry, sa->code, - sa->callp, sa->args, 0); -#endif - - AUDIT_SYSCALL_ENTER(sa->code, td); - error = (sa->callp->sy_call)(td, sa->args); - AUDIT_SYSCALL_EXIT(error, td); - - /* Save the latest error return value. */ - td->td_errno = error; - -#ifdef KDTRACE_HOOKS - /* - * If the systrace module has registered it's probe - * callback and if there is a probe active for the - * syscall 'return', process the probe. - */ - if (systrace_probe_func != NULL && sa->callp->sy_return != 0) - (*systrace_probe_func)(sa->callp->sy_return, sa->code, - sa->callp, NULL, (error) ? -1 : td->td_retval[0]); -#endif - syscall_thread_exit(td, sa->callp); - CTR4(KTR_SYSC, "syscall: p=%p error=%d return %#lx %#lx", - p, error, td->td_retval[0], td->td_retval[1]); - } - retval: - if (traced) { - PROC_LOCK(p); - td->td_dbgflags &= ~TDB_SCE; - PROC_UNLOCK(p); - } - (p->p_sysent->sv_set_syscall_retval)(td, error); - return (error); -} - -void -syscallret(struct thread *td, int error, struct syscall_args *sa __unused) -{ - struct proc *p; - int traced; - - p = td->td_proc; - - /* - * Check for misbehavior. - */ - WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning", - syscallname(p, sa->code)); - KASSERT(td->td_critnest == 0, - ("System call %s returning in a critical section", - syscallname(p, sa->code))); - KASSERT(td->td_locks == 0, - ("System call %s returning with %d locks held", - syscallname(p, sa->code), td->td_locks)); - - /* - * Handle reschedule and other end-of-syscall issues - */ - userret(td, td->td_frame); - - CTR4(KTR_SYSC, "syscall %s exit thread %p pid %d proc %s", - syscallname(p, sa->code), td, td->td_proc->p_pid, td->td_name); - -#ifdef KTRACE - if (KTRPOINT(td, KTR_SYSRET)) - ktrsysret(sa->code, error, td->td_retval[0]); -#endif - - if (p->p_flag & P_TRACED) { - traced = 1; - PROC_LOCK(p); - td->td_dbgflags |= TDB_SCX; - PROC_UNLOCK(p); - } else - traced = 0; - /* - * This works because errno is findable through the - * register set. If we ever support an emulation where this - * is not the case, this code will need to be revisited. - */ - STOPEVENT(p, S_SCX, sa->code); - PTRACESTOP_SC(p, td, S_PT_SCX); - if (traced || (td->td_dbgflags & (TDB_EXEC | TDB_FORK)) != 0) { - PROC_LOCK(p); - td->td_dbgflags &= ~(TDB_SCX | TDB_EXEC | TDB_FORK); - PROC_UNLOCK(p); - } -} -#endif /* HAVE_SYSCALL_ARGS_DEF */ diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 67adbe5..fb97913 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -873,9 +873,6 @@ void cpu_switch(struct thread *, struct thread *, struct mtx *); void cpu_throw(struct thread *, struct thread *) __dead2; void unsleep(struct thread *); void userret(struct thread *, struct trapframe *); -struct syscall_args; -int syscallenter(struct thread *, struct syscall_args *); -void syscallret(struct thread *, int, struct syscall_args *); void cpu_exit(struct thread *); void exit1(struct thread *, int) __dead2;