diff --git a/sys/cddl/compat/opensolaris/sys/cpuvar_defs.h b/sys/cddl/compat/opensolaris/sys/cpuvar_defs.h index d99eaea7947e..db94a2da1f46 100644 --- a/sys/cddl/compat/opensolaris/sys/cpuvar_defs.h +++ b/sys/cddl/compat/opensolaris/sys/cpuvar_defs.h @@ -45,13 +45,17 @@ #endif #define CPU_DTRACE_ENTRY 0x0800 /* pid provider hint to ustack() */ #define CPU_DTRACE_BADSTACK 0x1000 /* DTrace fault: bad stack */ +#ifdef __FreeBSD__ +#define CPU_DTRACE_TAILCALLDROP 0x2000 /* DTrace fault: return probe drop */ +#endif #define CPU_DTRACE_FAULT (CPU_DTRACE_BADADDR | CPU_DTRACE_BADALIGN | \ CPU_DTRACE_DIVZERO | CPU_DTRACE_ILLOP | \ CPU_DTRACE_NOSCRATCH | CPU_DTRACE_KPRIV | \ CPU_DTRACE_UPRIV | CPU_DTRACE_TUPOFLOW | \ CPU_DTRACE_BADSTACK) -#define CPU_DTRACE_ERROR (CPU_DTRACE_FAULT | CPU_DTRACE_DROP) +#define CPU_DTRACE_ERROR (CPU_DTRACE_FAULT | CPU_DTRACE_DROP | \ + CPU_DTRACE_TAILCALLDROP) #define PANICSTKSIZE 8192 #define REGSIZE 256 diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/cpuvar.h b/sys/cddl/contrib/opensolaris/uts/common/sys/cpuvar.h index 403273a37fde..0c038fd205a8 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/cpuvar.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/cpuvar.h @@ -367,13 +367,17 @@ extern cpu_core_t cpu_core[]; #endif #define CPU_DTRACE_ENTRY 0x0800 /* pid provider hint to ustack() */ #define CPU_DTRACE_BADSTACK 0x1000 /* DTrace fault: bad stack */ +#ifdef __FreeBSD__ +#define CPU_DTRACE_TAILCALLDROP 0x2000 /* DTrace fault: dropped return probe */ +#endif #define CPU_DTRACE_FAULT (CPU_DTRACE_BADADDR | CPU_DTRACE_BADALIGN | \ CPU_DTRACE_DIVZERO | CPU_DTRACE_ILLOP | \ CPU_DTRACE_NOSCRATCH | CPU_DTRACE_KPRIV | \ CPU_DTRACE_UPRIV | CPU_DTRACE_TUPOFLOW | \ CPU_DTRACE_BADSTACK) -#define CPU_DTRACE_ERROR (CPU_DTRACE_FAULT | CPU_DTRACE_DROP) +#define CPU_DTRACE_ERROR (CPU_DTRACE_FAULT | CPU_DTRACE_DROP | \ + CPU_DTRACE_TAILCALLDROP) /* * Dispatcher flags diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_asm.S b/sys/cddl/dev/dtrace/amd64/dtrace_asm.S index 09eea5606e4c..a49063e9f11a 100644 --- a/sys/cddl/dev/dtrace/amd64/dtrace_asm.S +++ b/sys/cddl/dev/dtrace/amd64/dtrace_asm.S @@ -36,6 +36,7 @@ #include "assym.s" +/* XXX this is duplicated a lot! */ #define INTR_POP \ MEXITCOUNT; \ movq TF_RDI(%rsp),%rdi; \ @@ -73,8 +74,11 @@ movq TF_RAX(%rsp), %rdx call dtrace_invop ALTENTRY(dtrace_invop_callsite) - cmpl $DTRACE_INVOP_PUSHL_EBP, %eax + /* XXX make this a jump table. */ + cmpl $DTRACE_INVOP_PUSHQ_RBP, %eax je bp_push + cmpl $DTRACE_INVOP_POPQ_RBP, %eax + je bp_pop cmpl $DTRACE_INVOP_LEAVE, %eax je bp_leave cmpl $DTRACE_INVOP_NOP, %eax @@ -110,6 +114,17 @@ bp_push: iretq /* return from interrupt */ /*NOTREACHED*/ +bp_pop: + /* Emulate a "popq %rbp". */ + INTR_POP + pushq %rax /* push temp */ + movq 32(%rsp), %rax /* load %rsp */ + addq $8, 32(%rsp) /* adjust new %rsp */ + movq (%rax), %rbp /* load new %rbp */ + popq %rax /* pop off temp */ + iretq + /*NOTREACHED*/ + bp_leave: /* * We must emulate a "leave", which is the same as a "movq %rbp, %rsp" diff --git a/sys/cddl/dev/dtrace/dtrace_cddl.h b/sys/cddl/dev/dtrace/dtrace_cddl.h index b8ea17a23a54..552ef09c42f1 100644 --- a/sys/cddl/dev/dtrace/dtrace_cddl.h +++ b/sys/cddl/dev/dtrace/dtrace_cddl.h @@ -19,7 +19,6 @@ * CDDL HEADER END * * $FreeBSD$ - * */ #ifndef _DTRACE_CDDL_H_ @@ -34,34 +33,33 @@ */ typedef struct kdtrace_proc { int p_dtrace_probes; /* Are there probes for this proc? */ - u_int64_t p_dtrace_count; /* Number of DTrace tracepoints */ + uint64_t p_dtrace_count; /* Number of DTrace tracepoints */ void *p_dtrace_helpers; /* DTrace helpers, if any */ int p_dtrace_model; - } kdtrace_proc_t; /* * Kernel DTrace extension to 'struct thread' for FreeBSD. */ typedef struct kdtrace_thread { - u_int8_t td_dtrace_stop; /* Indicates a DTrace-desired stop */ - u_int8_t td_dtrace_sig; /* Signal sent via DTrace's raise() */ + uint8_t td_dtrace_stop; /* Indicates a DTrace-desired stop */ + uint8_t td_dtrace_sig; /* Signal sent via DTrace's raise() */ u_int td_predcache; /* DTrace predicate cache */ - u_int64_t td_dtrace_vtime; /* DTrace virtual time */ - u_int64_t td_dtrace_start; /* DTrace slice start time */ + uint64_t td_dtrace_vtime; /* DTrace virtual time */ + uint64_t td_dtrace_start; /* DTrace slice start time */ union __tdu { struct __tds { - u_int8_t _td_dtrace_on; + uint8_t _td_dtrace_on; /* Hit a fasttrap tracepoint. */ - u_int8_t _td_dtrace_step; + uint8_t _td_dtrace_step; /* About to return to kernel. */ - u_int8_t _td_dtrace_ret; + uint8_t _td_dtrace_ret; /* Handling a return probe. */ - u_int8_t _td_dtrace_ast; + uint8_t _td_dtrace_ast; /* Saved ast flag. */ #ifdef __amd64__ - u_int8_t _td_dtrace_reg; + uint8_t _td_dtrace_reg; #endif } _tds; u_long _td_dtrace_ft; /* Bitwise or of these flags. */ @@ -82,11 +80,31 @@ typedef struct kdtrace_thread { #ifdef __amd64__ uintptr_t td_dtrace_regv; #endif - u_int64_t td_hrtime; /* Last time on cpu. */ + +#ifdef __FreeBSD__ void *td_dtrace_sscr; /* Saved scratch space location. */ void *td_systrace_args; /* syscall probe arguments. */ + +#ifdef __amd64__ + /* + * A structure for recording information needed to implement tail call + * return probes. + */ + struct { +#define DTRACE_TAIL_CALL_RECORDS 15 + union { + void *arg; + uintptr_t retaddr; + } stack[DTRACE_TAIL_CALL_RECORDS]; + uint16_t map; + uint8_t head; + } td_tail_call; +#endif +#endif } kdtrace_thread_t; +CTASSERT(sizeof(struct kdtrace_thread) < 256); + /* * Definitions to reference fields in the FreeBSD DTrace structures defined * above using the names of fields in similar structures in Solaris. Note @@ -111,6 +129,9 @@ typedef struct kdtrace_thread { #define t_dtrace_regv td_dtrace->td_dtrace_regv #define t_dtrace_sscr td_dtrace->td_dtrace_sscr #define t_dtrace_systrace_args td_dtrace->td_systrace_args +#define t_dtrace_tc_stack td_dtrace->td_tail_call.stack +#define t_dtrace_tc_map td_dtrace->td_tail_call.map +#define t_dtrace_tc_head td_dtrace->td_tail_call.head #define p_dtrace_helpers p_dtrace->p_dtrace_helpers #define p_dtrace_count p_dtrace->p_dtrace_count #define p_dtrace_probes p_dtrace->p_dtrace_probes diff --git a/sys/cddl/dev/dtrace/mips/dtrace_subr.c b/sys/cddl/dev/dtrace/mips/dtrace_subr.c index 1ed3cd02ae5e..672e10480e61 100644 --- a/sys/cddl/dev/dtrace/mips/dtrace_subr.c +++ b/sys/cddl/dev/dtrace/mips/dtrace_subr.c @@ -59,13 +59,13 @@ typedef struct dtrace_invop_hdlr { dtrace_invop_hdlr_t *dtrace_invop_hdlr; int -dtrace_invop(uintptr_t addr, struct trapframe *stack, uintptr_t eax) +dtrace_invop(uintptr_t addr, struct trapframe *frame, uintptr_t eax) { dtrace_invop_hdlr_t *hdlr; int rval; for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next) - if ((rval = hdlr->dtih_func(addr, stack, eax)) != 0) + if ((rval = hdlr->dtih_func(addr, frame, eax)) != 0) return (rval); return (0); diff --git a/sys/cddl/dev/fbt/aarch64/fbt_isa.c b/sys/cddl/dev/fbt/aarch64/fbt_isa.c index 1231140a4253..78f78521cd41 100644 --- a/sys/cddl/dev/fbt/aarch64/fbt_isa.c +++ b/sys/cddl/dev/fbt/aarch64/fbt_isa.c @@ -22,6 +22,7 @@ * Portions Copyright 2013 Justin Hibbits jhibbits@freebsd.org * Portions Copyright 2013 Howard Su howardsu@freebsd.org * Portions Copyright 2015 Ruslan Bukin + * Portions Copyright 2016 Mark Johnston * * $FreeBSD$ */ @@ -45,6 +46,11 @@ #define FBT_ENTRY "entry" #define FBT_RETURN "return" +void +fbt_md_init(void) +{ +} + int fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval) { diff --git a/sys/cddl/dev/fbt/amd64/fbt_asm.S b/sys/cddl/dev/fbt/amd64/fbt_asm.S new file mode 100644 index 000000000000..f00dfe7f15e8 --- /dev/null +++ b/sys/cddl/dev/fbt/amd64/fbt_asm.S @@ -0,0 +1,33 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Portions Copyright 2016 Mark Johnston + * + * $FreeBSD$ + */ + +#define _ASM + +#include + +#include "assym.s" + +fbt_tail_ret_trampoline: + int $3 diff --git a/sys/cddl/dev/fbt/arm/fbt_isa.c b/sys/cddl/dev/fbt/arm/fbt_isa.c index 592e59802bbc..57dc74378171 100644 --- a/sys/cddl/dev/fbt/arm/fbt_isa.c +++ b/sys/cddl/dev/fbt/arm/fbt_isa.c @@ -48,6 +48,11 @@ #define FBT_ENTRY "entry" #define FBT_RETURN "return" +void +fbt_md_init(void) +{ +} + int fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval) { diff --git a/sys/cddl/dev/fbt/fbt.c b/sys/cddl/dev/fbt/fbt.c index 19018f987050..e66469c08da3 100644 --- a/sys/cddl/dev/fbt/fbt.c +++ b/sys/cddl/dev/fbt/fbt.c @@ -19,9 +19,7 @@ * CDDL HEADER END * * Portions Copyright 2006-2008 John Birrell jb@freebsd.org - * - * $FreeBSD$ - * + * Portions Copyright 2016 Mark Johnston */ /* @@ -30,6 +28,8 @@ */ #include +__FBSDID("$FreeBSD$"); + #include #include #include @@ -777,7 +777,8 @@ ctf_strptr(linker_ctf_t *lc, int name) if (name < 0 || name >= hp->cth_strlen) return(strp); - strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t)); + strp = (const char *)(lc->ctftab + hp->cth_stroff + name + + sizeof(ctf_header_t)); return (strp); } @@ -976,7 +977,8 @@ fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len) } static void -fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc) +fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, + dtrace_argdesc_t *desc) { const ushort_t *dp; fbt_probe_t *fbt = parg; @@ -1057,10 +1059,9 @@ fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_a dp += ndx + 1; } - if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0) + if (fbt_type_name(&lc, *dp, desc->dtargd_native, + sizeof(desc->dtargd_native)) > 0) desc->dtargd_ndx = ndx; - - return; } static int @@ -1087,8 +1088,8 @@ fbt_load(void *dummy) fbt_probetab_mask = fbt_probetab_size - 1; /* Allocate memory for the probe table. */ - fbt_probetab = - malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO); + fbt_probetab = malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, + M_WAITOK | M_ZERO); dtrace_doubletrap_func = fbt_doubletrap; dtrace_invop_add(fbt_invop); @@ -1097,12 +1098,15 @@ fbt_load(void *dummy) NULL, &fbt_pops, NULL, &fbt_id) != 0) return; + /* Give machine-dependent code an opportunity to initialize itself. */ + fbt_md_init(); + /* Create probes for the kernel and already-loaded modules. */ linker_file_foreach(fbt_linker_file_cb, NULL); } static int -fbt_unload() +fbt_unload(void) { int error = 0; @@ -1128,29 +1132,20 @@ fbt_unload() static int fbt_modevent(module_t mod __unused, int type, void *data __unused) { - int error = 0; switch (type) { case MOD_LOAD: - break; - case MOD_UNLOAD: - break; - case MOD_SHUTDOWN: - break; - + return (0); default: - error = EOPNOTSUPP; - break; - + return (EOPNOTSUPP); } - - return (error); } static int -fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) +fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, + struct thread *td __unused) { return (0); } diff --git a/sys/cddl/dev/fbt/fbt.h b/sys/cddl/dev/fbt/fbt.h index f34025917b88..f4bf5971b907 100644 --- a/sys/cddl/dev/fbt/fbt.h +++ b/sys/cddl/dev/fbt/fbt.h @@ -19,9 +19,9 @@ * CDDL HEADER END * * Portions Copyright 2006-2008 John Birrell jb@freebsd.org + * Portions Copyright 2016 Mark Johnston * * $FreeBSD$ - * */ /* @@ -38,6 +38,7 @@ typedef struct fbt_probe { struct fbt_probe *fbtp_hashnext; fbt_patchval_t *fbtp_patchpoint; int8_t fbtp_rval; + uint8_t fbtp_flags; fbt_patchval_t fbtp_patchval; fbt_patchval_t fbtp_savedval; uintptr_t fbtp_roffset; @@ -49,11 +50,15 @@ typedef struct fbt_probe { struct fbt_probe *fbtp_next; } fbt_probe_t; +#define FBTPF_TAIL_CALL 0x01 /* probe site is a tail call */ +#define FBTPF_TAIL_CALL_RET 0x02 /* return from tail call trampoline */ + struct linker_file; struct linker_symval; struct trapframe; int fbt_invop(uintptr_t, struct trapframe *, uintptr_t); +void fbt_md_init(void); void fbt_patch_tracepoint(fbt_probe_t *, fbt_patchval_t); int fbt_provide_module_function(struct linker_file *, int, struct linker_symval *, void *); diff --git a/sys/cddl/dev/fbt/powerpc/fbt_isa.c b/sys/cddl/dev/fbt/powerpc/fbt_isa.c index 0568e55046df..d20800cd0eb5 100644 --- a/sys/cddl/dev/fbt/powerpc/fbt_isa.c +++ b/sys/cddl/dev/fbt/powerpc/fbt_isa.c @@ -50,6 +50,11 @@ #define FBT_RETURN "return" #define FBT_AFRAMES 7 +void +fbt_md_init(void) +{ +} + int fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval) { diff --git a/sys/cddl/dev/fbt/x86/fbt_isa.c b/sys/cddl/dev/fbt/x86/fbt_isa.c index 6639806d11af..5424bc1d54cb 100644 --- a/sys/cddl/dev/fbt/x86/fbt_isa.c +++ b/sys/cddl/dev/fbt/x86/fbt_isa.c @@ -19,9 +19,7 @@ * CDDL HEADER END * * Portions Copyright 2006-2008 John Birrell jb@freebsd.org - * - * $FreeBSD$ - * + * Portions Copyright 2016 Mark Johnston */ /* @@ -30,13 +28,18 @@ */ #include -#include +__FBSDID("$FreeBSD$"); +#include #include +#include + +#include #include "fbt.h" #define FBT_PUSHL_EBP 0x55 +#define FBT_PUSHQ_RBP 0x55 #define FBT_MOVL_ESP_EBP0_V0 0x8b #define FBT_MOVL_ESP_EBP1_V0 0xec #define FBT_MOVL_ESP_EBP0_V1 0x89 @@ -44,9 +47,13 @@ #define FBT_REX_RSP_RBP 0x48 #define FBT_POPL_EBP 0x5d +#define FBT_POPQ_RBP 0x5d #define FBT_RET 0xc3 #define FBT_RET_IMM16 0xc2 #define FBT_LEAVE 0xc9 +#define FBT_JMP_SHORT 0xeb +#define FBT_JMP_REL32 0xe9 +#define FBT_JMP_ABS 0xff #ifdef __amd64__ #define FBT_PATCHVAL 0xcc @@ -57,16 +64,44 @@ #define FBT_ENTRY "entry" #define FBT_RETURN "return" +#define FBT_TRAMPOLINE_ADDR ((uintptr_t)&fbt_tail_ret_trampoline) + +static uintptr_t fbt_tail_call_return(uintptr_t); +static int fbt_tail_call_push(fbt_probe_t *, uintptr_t); + +void +fbt_md_init(void) +{ +#ifdef __amd64__ + fbt_probe_t *fbt; + uintptr_t instr; + + /* + * Create a probe for the tail call return trampoline. When it fires, we + * know we have returned from a tail call, and pop the last sequence of + * consecutive tail calls, causing return probes to fire. + */ + instr = FBT_TRAMPOLINE_ADDR; + + fbt = malloc(sizeof(*fbt), M_FBT, M_WAITOK | M_ZERO); + fbt->fbtp_patchpoint = (fbt_patchval_t *)instr; + fbt->fbtp_rval = DTRACE_INVOP_NOP; + fbt->fbtp_flags = FBTPF_TAIL_CALL_RET; + + fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; + fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; +#endif +} + int fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval) { solaris_cpu_t *cpu; - uintptr_t *stack; - uintptr_t arg0, arg1, arg2, arg3, arg4; + uintptr_t *stack, arg0, arg1, arg2, arg3, arg4; fbt_probe_t *fbt; #ifdef __amd64__ - stack = (uintptr_t *)frame->tf_rsp; + stack = (uintptr_t *)(frame->tf_rsp & ~0xf); /* XXX why? */ #else /* Skip hardware-saved registers. */ stack = (uintptr_t *)frame->tf_isp + 3; @@ -75,66 +110,89 @@ fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval) cpu = &solaris_cpu[curcpu]; fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { - if ((uintptr_t)fbt->fbtp_patchpoint == addr) { - if (fbt->fbtp_roffset == 0) { -#ifdef __amd64__ - /* fbt->fbtp_rval == DTRACE_INVOP_PUSHQ_RBP */ + if ((uintptr_t)fbt->fbtp_patchpoint != addr) + continue; + if ((fbt->fbtp_flags & FBTPF_TAIL_CALL) != 0) { + /* + * We instrument tail calls by overwriting a pop of the + * frame pointer. At this point the stack pointer is not + * aligned + */ + /* + * Record info needed to effect a tail call return + * probe, and plant a return to our trampoline. It's + * possible the current return address is already that + * of the trampoline. + */ + if (fbt_tail_call_push(fbt, stack[1])) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - cpu->cpu_dtrace_caller = stack[0]; + stack[1] = FBT_TRAMPOLINE_ADDR; DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); - - arg0 = frame->tf_rdi; - arg1 = frame->tf_rsi; - arg2 = frame->tf_rdx; - arg3 = frame->tf_rcx; - arg4 = frame->tf_r8; + } + } else if ((fbt->fbtp_flags & FBTPF_TAIL_CALL_RET) != 0) { + /* + * Fire return probes for any intermediate tail calls. + * There should be at least one. + */ + frame->tf_rip = fbt_tail_call_return(rval); + MPASS(fbt->fbtp_rval == DTRACE_INVOP_NOP); + } else if (fbt->fbtp_roffset == 0) { +#ifdef __amd64__ + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + cpu->cpu_dtrace_caller = stack[0]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | + CPU_DTRACE_BADADDR); + + arg0 = frame->tf_rdi; + arg1 = frame->tf_rsi; + arg2 = frame->tf_rdx; + arg3 = frame->tf_rcx; + arg4 = frame->tf_r8; #else - int i = 0; - - /* - * When accessing the arguments on the stack, - * we must protect against accessing beyond - * the stack. We can safely set NOFAULT here - * -- we know that interrupts are already - * disabled. - */ - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - cpu->cpu_dtrace_caller = stack[i++]; - arg0 = stack[i++]; - arg1 = stack[i++]; - arg2 = stack[i++]; - arg3 = stack[i++]; - arg4 = stack[i++]; - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | - CPU_DTRACE_BADADDR); + int i = 0; + + /* + * When accessing the arguments on the stack, + * we must protect against accessing beyond + * the stack. We can safely set NOFAULT here + * -- we know that interrupts are already + * disabled. + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + cpu->cpu_dtrace_caller = stack[i++]; + arg0 = stack[i++]; + arg1 = stack[i++]; + arg2 = stack[i++]; + arg3 = stack[i++]; + arg4 = stack[i++]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | + CPU_DTRACE_BADADDR); #endif - dtrace_probe(fbt->fbtp_id, arg0, arg1, - arg2, arg3, arg4); - - cpu->cpu_dtrace_caller = 0; - } else { + dtrace_probe(fbt->fbtp_id, arg0, arg1, arg2, arg3, + arg4); + cpu->cpu_dtrace_caller = 0; + } else { #ifdef __amd64__ - /* - * On amd64, we instrument the ret, not the - * leave. We therefore need to set the caller - * to ensure that the top frame of a stack() - * action is correct. - */ - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - cpu->cpu_dtrace_caller = stack[0]; - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | - CPU_DTRACE_BADADDR); + /* + * On amd64, we instrument the ret, not the + * leave. We therefore need to set the caller + * to assure that the top frame of a stack() + * action is correct. + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + cpu->cpu_dtrace_caller = stack[0]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | + CPU_DTRACE_BADADDR); #endif - dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, - rval, 0, 0, 0); - cpu->cpu_dtrace_caller = 0; - } - - return (fbt->fbtp_rval); + dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, + rval, 0, 0, 0); + cpu->cpu_dtrace_caller = 0; } + + return (fbt->fbtp_rval); } return (0); @@ -154,9 +212,9 @@ fbt_provide_module_function(linker_file_t lf, int symindx, char *modname = opaque; const char *name = symval->name; fbt_probe_t *fbt, *retfbt; - int j; - int size; - uint8_t *instr, *limit; + uint8_t *instr, *limit, *first; + int j, size; + uint8_t flags, next; if ((strncmp(name, "dtrace_", 7) == 0 && strncmp(name, "dtrace_safe_", 12) != 0) || @@ -177,14 +235,16 @@ fbt_provide_module_function(linker_file_t lf, int symindx, if (name[0] == '_' && name[1] == '_') return (0); + flags = 0; size = symval->size; instr = (uint8_t *) symval->value; limit = (uint8_t *) symval->value + symval->size; #ifdef __amd64__ + first = instr; while (instr < limit) { - if (*instr == FBT_PUSHL_EBP) + if (*instr == FBT_PUSHQ_RBP) break; if ((size = dtrace_instr_size(instr)) <= 0) @@ -193,7 +253,7 @@ fbt_provide_module_function(linker_file_t lf, int symindx, instr += size; } - if (instr >= limit || *instr != FBT_PUSHL_EBP) { + if (instr >= limit || *instr != FBT_PUSHQ_RBP) { /* * We either don't save the frame pointer in this * function, or we ran into some disassembly @@ -243,12 +303,40 @@ again: return (0); #ifdef __amd64__ + /* + * A pop of the frame pointer should be followed by a ret or an + * unconditional jmp depending on whether it's part of a normal return + * or a tail call respectively. + */ + if (size == 1 && *instr == FBT_POPQ_RBP && instr + 1 < limit) { + /* Verify that we can disassemble the next instruction. */ + if (dtrace_instr_size(instr + 1) <= 0) + return (0); + + next = *(instr + 1); + if (next == FBT_JMP_SHORT || + next == FBT_JMP_REL32 || + next == FBT_JMP_ABS) { + flags |= FBTPF_TAIL_CALL; + goto found; + } else if (next == FBT_RET && (dtrace_instr_size(first) != 1 || + *first != FBT_PUSHQ_RBP)) { + goto found; + } + + instr += size; + goto again; + } + /* * We only instrument "ret" on amd64 -- we don't yet instrument * ret imm16, largely because the compiler doesn't seem to * (yet) emit them in the kernel... */ - if (*instr != FBT_RET) { + if (*instr == FBT_RET_IMM16) + printf("fbt: skipping ret immediate instruction\n"); + if (*instr != FBT_RET || dtrace_instr_size(first) != 1 || + *first != FBT_PUSHQ_RBP) { instr += size; goto again; } @@ -262,6 +350,7 @@ again: } #endif +found: /* * We (desperately) want to avoid erroneously instrumenting a * jump table, especially given that our markers are pretty @@ -306,12 +395,13 @@ again: } retfbt = fbt; + fbt->fbtp_flags = flags; fbt->fbtp_patchpoint = instr; fbt->fbtp_ctl = lf; fbt->fbtp_loadcnt = lf->loadcnt; fbt->fbtp_symindx = symindx; -#ifndef __amd64__ +#ifdef __i386__ if (*instr == FBT_POPL_EBP) { fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP; } else { @@ -320,10 +410,13 @@ again: } fbt->fbtp_roffset = (uintptr_t)(instr - (uint8_t *) symval->value) + 1; - #else - ASSERT(*instr == FBT_RET); - fbt->fbtp_rval = DTRACE_INVOP_RET; + if (*instr == FBT_POPQ_RBP) { + fbt->fbtp_rval = DTRACE_INVOP_POPQ_RBP; + } else { + ASSERT(*instr == FBT_RET); + fbt->fbtp_rval = DTRACE_INVOP_RET; + } fbt->fbtp_roffset = (uintptr_t)(instr - (uint8_t *) symval->value); #endif @@ -338,3 +431,59 @@ again: instr += size; goto again; } + +static uintptr_t +fbt_tail_call_return(uintptr_t rval) +{ + fbt_probe_t *fbt; + struct thread *td; + int si; + uint16_t map; + + td = curthread; + si = --td->t_dtrace_tc_head; + map = td->t_dtrace_tc_map; + + MPASS(si >= 0 && si < nitems(td->t_dtrace_tc_stack)); + MPASS((map & (1 << si)) == 0); + do { + fbt = td->t_dtrace_tc_stack[si].arg; + dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0); + } while ((map & (1 << --si)) == 0); + MPASS(si >= 0 && si < DTRACE_TAIL_CALL_RECORDS); + td->t_dtrace_tc_map &= ~(1 << si); + td->t_dtrace_tc_head = si; + return (td->t_dtrace_tc_stack[si].retaddr); +} + +/* + * Record the information needed for a return probe to fire upon return from a + * tail call. Specifically, we keep a pointer to the probe metadata and the + * return address currently on the stack. If the return address is already that + * of our tail return trampoline, it doesn't need to be recorded. We use a map + * to keep track of sequences of consecutive tail calls, for which return + * addresses do not need to be saved. + */ +static int +fbt_tail_call_push(fbt_probe_t *fbt, uintptr_t retaddr) +{ + struct thread *td; + int si; + + td = curthread; + si = td->t_dtrace_tc_head; + if (retaddr != FBT_TRAMPOLINE_ADDR) { + if (si >= DTRACE_TAIL_CALL_RECORDS - 1) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_TAILCALLDROP); + return (0); + } + td->t_dtrace_tc_stack[si].retaddr = retaddr; + td->t_dtrace_tc_map |= (1 << si++); + } else if (si >= DTRACE_TAIL_CALL_RECORDS) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_TAILCALLDROP); + return (0); + } + td->t_dtrace_tc_stack[si].arg = fbt; + td->t_dtrace_tc_head = ++si; + return (1); +} diff --git a/sys/cddl/dev/fbt/x86/fbt_isa.h b/sys/cddl/dev/fbt/x86/fbt_isa.h index 79190dbf9307..824ca4476d2f 100644 --- a/sys/cddl/dev/fbt/x86/fbt_isa.h +++ b/sys/cddl/dev/fbt/x86/fbt_isa.h @@ -19,12 +19,13 @@ * CDDL HEADER END * * $FreeBSD$ - * */ #ifndef _FBT_ISA_H_ -#define _FBT_ISA_H_ +#define _FBT_ISA_H_ typedef uint8_t fbt_patchval_t; +void fbt_tail_ret_trampoline(void); + #endif diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index f6488257245b..5a7b8cd4efdc 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -165,6 +165,7 @@ amd64/pci/pci_cfgreg.c optional pci cddl/contrib/opensolaris/common/atomic/amd64/opensolaris_atomic.S optional zfs | dtrace compile-with "${ZFS_S}" cddl/dev/dtrace/amd64/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/amd64/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" +cddl/dev/fbt/amd64/fbt_asm.S optional dtrace_fbt | dtraceall compile-with "${DTRACE_S}" cddl/dev/fbt/x86/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" cddl/dev/dtrace/x86/dis_tables.c optional dtrace_fbt | dtraceall compile-with "${DTRACE_C}" cddl/dev/dtrace/x86/instr_size.c optional dtrace_fbt | dtraceall compile-with "${DTRACE_C}" diff --git a/sys/modules/dtrace/fbt/Makefile b/sys/modules/dtrace/fbt/Makefile index 969f07605ff8..15155cde44b4 100644 --- a/sys/modules/dtrace/fbt/Makefile +++ b/sys/modules/dtrace/fbt/Makefile @@ -3,17 +3,24 @@ SYSDIR?= ${.CURDIR}/../../.. .PATH: ${SYSDIR}/cddl/dev/fbt +.PATH: ${SYSDIR}/cddl/dev/fbt/${MACHINE_CPUARCH} KMOD= fbt SRCS= fbt.c fbt_isa.c SRCS+= vnode_if.h +CFLAGS+= -I${SYSDIR}/cddl/dev/fbt/${MACHINE_CPUARCH} + .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" +SRCS+= fbt_asm.S + +SRCS+= assym.s +SRCS+= opt_compat.h opt_kstack_pages.h opt_nfs.h opt_hwpmc_hooks.h + +fbt_asm.o: assym.s + CFLAGS+= -I${SYSDIR}/cddl/dev/fbt/x86 .PATH: ${SYSDIR}/cddl/dev/fbt/x86 -.else -CFLAGS+= -I${SYSDIR}/cddl/dev/fbt/${MACHINE_CPUARCH} -.PATH: ${SYSDIR}/cddl/dev/fbt/${MACHINE_CPUARCH} .endif CFLAGS+= -I${SYSDIR}/cddl/dev/fbt \