Index: sys/amd64/conf/GENERIC =================================================================== --- sys/amd64/conf/GENERIC (revision 274898) +++ sys/amd64/conf/GENERIC (working copy) @@ -73,6 +73,8 @@ options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # Kernel ELF linker loads CTF data options INCLUDE_CONFIG_FILE # Include this file in kernel +options KTR +options KTR_COMPILE=(KTR_LOCK|KTR_INTR|KTR_PROC) # Debugging support. Always need this: options KDB # Enable kernel debugger support. Index: sys/kern/kern_ktr.c =================================================================== --- sys/kern/kern_ktr.c (revision 274898) +++ sys/kern/kern_ktr.c (working copy) @@ -88,7 +88,7 @@ #endif #ifndef KTR_CPU -#define KTR_CPU PCPU_GET(cpuid) +#define KTR_CPU curcpu #endif static MALLOC_DEFINE(M_KTR, "KTR", "KTR"); @@ -95,13 +95,15 @@ FEATURE(ktr, "Kernel support for KTR kernel tracing facility"); -volatile int ktr_idx = 0; int ktr_mask = KTR_MASK; int ktr_compile = KTR_COMPILE; int ktr_entries = KTR_BOOT_ENTRIES; int ktr_version = KTR_VERSION; -struct ktr_entry ktr_buf_init[KTR_BOOT_ENTRIES]; -struct ktr_entry *ktr_buf = ktr_buf_init; + +/* CPU0 gets a static buffer so that tracing can be used in early boot */ +struct ktr_entry ktr_buf_init[KTR_BOOT_ENTRIES]; +struct ktr_entry *ktr_pcpu_buf[MAXCPU]; +volatile int ktr_pcpu_idx[MAXCPU]; cpuset_t ktr_cpumask = CPUSET_T_INITIALIZER(KTR_CPUMASK); static SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options"); @@ -138,7 +140,7 @@ static int sysctl_debug_ktr_clear(SYSCTL_HANDLER_ARGS) { - int clear, error; + int clear, error, i; clear = 0; error = sysctl_handle_int(oidp, &clear, 0, req); @@ -146,8 +148,11 @@ return (error); if (clear) { - bzero(ktr_buf, sizeof(*ktr_buf) * ktr_entries); - ktr_idx = 0; + CPU_FOREACH(i) { + bzero(ktr_pcpu_buf[i], + sizeof(*ktr_pcpu_buf[i]) * ktr_entries); + ktr_pcpu_idx[i] = 0; + } } return (error); @@ -176,7 +181,6 @@ sysctl_debug_ktr_mask, "IU", "Bitmask of KTR event classes for which logging is enabled"); -#if KTR_ENTRIES > KTR_BOOT_ENTRIES /* * A simplified version of sysctl_debug_ktr_entries. * No need to care about SMP, scheduling, etc. @@ -184,31 +188,51 @@ static void ktr_entries_initializer(void *dummy __unused) { - int mask; + struct ktr_entry *ktr_buf; + int i, idx, mask; - /* Temporarily disable ktr in case malloc() is being traced. */ + /* + * Temporarily disable KTR in case malloc() is being traced. + */ mask = ktr_mask; ktr_mask = 0; - ktr_buf = malloc(sizeof(*ktr_buf) * KTR_ENTRIES, M_KTR, - M_WAITOK | M_ZERO); - memcpy(ktr_buf, ktr_buf_init + ktr_idx, - (KTR_BOOT_ENTRIES - ktr_idx) * sizeof(*ktr_buf)); - if (ktr_idx != 0) { - memcpy(ktr_buf + KTR_BOOT_ENTRIES - ktr_idx, ktr_buf_init, - ktr_idx * sizeof(*ktr_buf)); - ktr_idx = KTR_BOOT_ENTRIES; + + /* + * Allocate the per-CPU KTR buffers. + */ + for (i = 0; i < mp_ncpus; ++i) { + printf("%d\n", i); + ktr_pcpu_buf[i] = malloc(sizeof(*ktr_buf) * KTR_ENTRIES, + M_KTR, M_WAITOK | M_ZERO); } + + /* + * For CPU0, we need to copy all the entries traced at early boot + * time in the new buffer. + */ + ktr_buf = ktr_pcpu_buf[0]; + idx = ktr_pcpu_idx[0]; + memcpy(ktr_buf, ktr_buf_init + idx, + (KTR_BOOT_ENTRIES - idx) * sizeof(*ktr_buf)); + if (idx != 0) { + memcpy(ktr_buf + KTR_BOOT_ENTRIES - idx, ktr_buf_init, + idx * sizeof(*ktr_buf)); + ktr_pcpu_idx[0] = KTR_BOOT_ENTRIES; + } + + /* + * Re-enable KTR. + */ ktr_entries = KTR_ENTRIES; ktr_mask = mask; } SYSINIT(ktr_entries_initializer, SI_SUB_KMEM, SI_ORDER_ANY, ktr_entries_initializer, NULL); -#endif static int sysctl_debug_ktr_entries(SYSCTL_HANDLER_ARGS) { - int entries, error, mask; + int entries, error, i, mask; struct ktr_entry *buf, *oldbuf; entries = ktr_entries; @@ -225,20 +249,29 @@ ktr_mask = mask; return (error); } - if (ktr_buf != ktr_buf_init) - oldbuf = ktr_buf; - else - oldbuf = NULL; - /* Allocate a new buffer. */ - buf = malloc(sizeof(*buf) * entries, M_KTR, M_WAITOK | M_ZERO); - /* Install the new buffer and restart ktr. */ - ktr_buf = buf; - ktr_entries = entries; - ktr_idx = 0; + + for (i = 0; i < mp_ncpus; ++i) { + + /* + * Allocate a new buffer. Do this outside of the critical + * section. + */ + buf = malloc(sizeof(*buf) * entries, M_KTR, M_WAITOK | M_ZERO); + critical_enter(); + if (i > 0 || ktr_pcpu_buf[0] != ktr_buf_init) + oldbuf = ktr_pcpu_buf[i]; + else + oldbuf = NULL; + + /* Install the new buffer and restart KTR. */ + ktr_pcpu_buf[i] = buf; + ktr_pcpu_idx[i] = 0; + critical_exit(); + if (oldbuf != NULL) + free(oldbuf, M_KTR); + } atomic_store_rel_int(&ktr_mask, mask); - if (oldbuf != NULL) - free(oldbuf, M_KTR); - + ktr_entries = entries; /* XXXDAVIDE: atomic? */ return (error); } @@ -318,19 +351,15 @@ #ifdef KTR_ALQ struct ale *ale = NULL; #endif - int newindex, saveindex; #if defined(KTR_VERBOSE) || defined(KTR_ALQ) struct thread *td; #endif - int cpu; + int cpu, idx; if (panicstr || kdb_active) return; - if ((ktr_mask & mask) == 0 || ktr_buf == NULL) + if ((ktr_mask & mask) == 0) return; - cpu = KTR_CPU; - if (!CPU_ISSET(cpu, &ktr_cpumask)) - return; #if defined(KTR_VERBOSE) || defined(KTR_ALQ) td = curthread; if (td->td_pflags & TDP_INKTR) @@ -355,13 +384,19 @@ } } else #endif - { - do { - saveindex = ktr_idx; - newindex = (saveindex + 1) % ktr_entries; - } while (atomic_cmpset_rel_int(&ktr_idx, saveindex, newindex) == 0); - entry = &ktr_buf[saveindex]; + + /* XXXDAVIDE: critical_enter() in early boot is dangerous? */ + critical_enter(); + cpu = curcpu; + if (ktr_pcpu_buf[cpu] == NULL || + !CPU_ISSET(cpu, &ktr_cpumask)) { + critical_exit(); + return; } + idx = (ktr_pcpu_idx[cpu] + 1) % ktr_entries; + entry = &ktr_pcpu_buf[cpu][idx]; + ktr_pcpu_idx[cpu] = idx; + critical_exit(); entry->ktr_timestamp = KTR_TIME; entry->ktr_cpu = cpu; entry->ktr_thread = curthread; @@ -406,40 +441,47 @@ int cur; int first; }; -static struct tstate tstate; +static struct tstate tstate[MAXCPU]; static int db_ktr_verbose; -static int db_mach_vtrace(void); +static int db_mach_vtrace(int); +/* + * XXX: Make a variant of this, with a cpu argument? + */ DB_SHOW_COMMAND(ktr, db_ktr_all) { - - tstate.cur = (ktr_idx - 1) % ktr_entries; - tstate.first = -1; - db_ktr_verbose = 0; - db_ktr_verbose |= (strchr(modif, 'v') != NULL) ? 2 : 0; - db_ktr_verbose |= (strchr(modif, 'V') != NULL) ? 1 : 0; /* just timestap please */ - if (strchr(modif, 'a') != NULL) { - db_disable_pager(); - while (cncheckc() != -1) - if (db_mach_vtrace() == 0) - break; - } else { - while (!db_pager_quit) - if (db_mach_vtrace() == 0) - break; + int i; + + CPU_FOREACH(i) { + tstate[i].cur = (ktr_pcpu_idx[i] - 1) % ktr_entries; + tstate[i].first = -1; + db_ktr_verbose = 0; + db_ktr_verbose |= (strchr(modif, 'v') != NULL) ? 2 : 0; + /* just timestap please */ + db_ktr_verbose |= (strchr(modif, 'V') != NULL) ? 1 : 0; + if (strchr(modif, 'a') != NULL) { + db_disable_pager(); + while (cncheckc() != -1) + if (db_mach_vtrace(i) == 0) + break; + } else { + while (!db_pager_quit) + if (db_mach_vtrace(i) == 0) + break; + } } } static int -db_mach_vtrace(void) +db_mach_vtrace(int cpu) { - struct ktr_entry *kp; + struct ktr_entry *kp; - if (tstate.cur == tstate.first || ktr_buf == NULL) { + if (tstate[cpu].cur == tstate[cpu].first || ktr_pcpu_buf[cpu] == NULL) { db_printf("--- End of trace buffer ---\n"); return (0); } - kp = &ktr_buf[tstate.cur]; + kp = &ktr_pcpu_buf[cpu][tstate[cpu].cur]; /* Skip over unused entries. */ if (kp->ktr_desc == NULL) { @@ -446,7 +488,7 @@ db_printf("--- End of trace buffer ---\n"); return (0); } - db_printf("%d (%p", tstate.cur, kp->ktr_thread); + db_printf("%d (%p", tstate[cpu].cur, kp->ktr_thread); #ifdef SMP db_printf(":cpu%d", kp->ktr_cpu); #endif @@ -463,11 +505,11 @@ kp->ktr_parms[5]); db_printf("\n"); - if (tstate.first == -1) - tstate.first = tstate.cur; + if (tstate[cpu].first == -1) + tstate[cpu].first = tstate[cpu].cur; - if (--tstate.cur < 0) - tstate.cur = ktr_entries - 1; + if (--tstate[cpu].cur < 0) + tstate[cpu].cur = ktr_entries - 1; return (1); } Index: sys/sys/ktr.h =================================================================== --- sys/sys/ktr.h (revision 274898) +++ sys/sys/ktr.h (working copy) @@ -66,8 +66,8 @@ extern int ktr_entries; extern int ktr_verbose; -extern volatile int ktr_idx; -extern struct ktr_entry *ktr_buf; +extern volatile int ktr_pcpu_idx[MAXCPU]; +extern struct ktr_entry *ktr_pcpu_buf[MAXCPU]; #ifdef KTR Index: usr.bin/ktrdump/Makefile =================================================================== --- usr.bin/ktrdump/Makefile (revision 274898) +++ usr.bin/ktrdump/Makefile (working copy) @@ -5,6 +5,6 @@ LDADD= -lkvm MAN= ktrdump.8 -WARNS?= 2 +WARNS?= 1 .include Index: usr.bin/ktrdump/ktrdump.c =================================================================== --- usr.bin/ktrdump/ktrdump.c (revision 274898) +++ usr.bin/ktrdump/ktrdump.c (working copy) @@ -48,13 +48,19 @@ #define USAGE \ "usage: ktrdump [-cfqrtH] [-e execfile] [-i ktrfile] [-m corefile] [-o outfile]\n" +static void ktrdump_cpu_buf(FILE *, kvm_t *, int, int, int, int, + struct ktr_entry **); +static struct ktr_entry **ktrdump_get_buffers(kvm_t *, int, int); +static int ktrdump_get_ncpus(kvm_t *); +static void ktrdump_print_header(FILE *); static void usage(void); static struct nlist nl[] = { { "_ktr_version" }, { "_ktr_entries" }, - { "_ktr_idx" }, - { "_ktr_buf" }, + { "_ktr_pcpu_idx" }, + { "_ktr_pcpu_buf" }, + { "_mp_ncpus" }, { NULL } }; @@ -77,119 +83,86 @@ static char obuf[PATH_MAX]; static char sbuf[KTR_PARMS][SBUFLEN]; -/* - * Reads the ktr trace buffer from kernel memory and prints the trace entries. - */ -int -main(int ac, char **av) +static struct ktr_entry ** +ktrdump_get_buffers(kvm_t *kd, int ncpus, int entries) { - u_long parms[KTR_PARMS]; - struct ktr_entry *buf; - uintmax_t tlast, tnow; - unsigned long bufptr; - struct stat sb; - kvm_t *kd; - FILE *out; - char *p; - int version; - int entries; - int index, index2; - int parm; - int in; - int c; - int i = 0; + struct ktr_entry **buf; + int i; - /* - * Parse commandline arguments. - */ - out = stdout; - while ((c = getopt(ac, av, "cfqrtHe:i:m:o:")) != -1) - switch (c) { - case 'c': - cflag = 1; - break; - case 'e': - if (strlcpy(execfile, optarg, sizeof(execfile)) - >= sizeof(execfile)) - errx(1, "%s: File name too long", optarg); - eflag = 1; - break; - case 'f': - fflag = 1; - break; - case 'i': - iflag = 1; - if ((in = open(optarg, O_RDONLY)) == -1) - err(1, "%s", optarg); - break; - case 'm': - if (strlcpy(corefile, optarg, sizeof(corefile)) - >= sizeof(corefile)) - errx(1, "%s: File name too long", optarg); - mflag = 1; - break; - case 'o': - if ((out = fopen(optarg, "w")) == NULL) - err(1, "%s", optarg); - break; - case 'q': - qflag++; - break; - case 'r': - rflag = 1; - break; - case 't': - tflag = 1; - break; - case 'H': - hflag = 1; - break; - case '?': - default: - usage(); + buf = malloc(sizeof(*buf) * ncpus); + if (buf == NULL) + errx(1, "Can't allocate memory"); + if (kvm_read(kd, nl[3].n_value, buf, sizeof(*buf) * ncpus) == -1) + errx(1, "%s", kvm_geterr(kd)); + for (i = 0; i < ncpus; ++i) { + void *bufptr = buf[i]; + buf[i] = malloc(sizeof(**buf) * entries); + if (buf[i] == NULL) + errx(1, "Can't allocate memory"); + if (kvm_read(kd, (uintptr_t)bufptr, buf[i], + sizeof(**buf) * entries) == -1) { + errx(1, "%s", kvm_geterr(kd)); } - ac -= optind; - av += optind; - if (ac != 0) - usage(); + } + return (buf); +} +static int +ktrdump_get_entries(kvm_t *kd) +{ + int entries; + + if (kvm_read(kd, nl[1].n_value, &entries, sizeof(entries)) == -1) + errx(1, "%s", kvm_geterr(kd)); + return (entries); +} + +static int * +ktrdump_get_indices(kvm_t *kd, int ncpus) +{ + int *indices; + + indices = malloc(sizeof(*indices) * ncpus); + if (indices == NULL) + errx(1, "Can't allocate memory"); + if (kvm_read(kd, nl[2].n_value, indices, + sizeof(*indices) * ncpus) == -1) + errx(1, "%s", kvm_geterr(kd)); + return (indices); +} + +static int +ktrdump_get_ncpus(kvm_t *kd) +{ + int ncpus; + /* - * Open our execfile and corefile, resolve needed symbols and read in - * the trace buffer. + * Get the number of CPUS available in the system. + * XXXDAVIDE: Fix in the !SMP case? */ - if ((kd = kvm_openfiles(eflag ? execfile : NULL, - mflag ? corefile : NULL, NULL, O_RDONLY, errbuf)) == NULL) - errx(1, "%s", errbuf); - if (kvm_nlist(kd, nl) != 0 || - kvm_read(kd, nl[0].n_value, &version, sizeof(version)) == -1) + if (kvm_read(kd, nl[4].n_value, &ncpus, sizeof(ncpus)) == -1) errx(1, "%s", kvm_geterr(kd)); + return (ncpus); +} + +static int +ktrdump_get_version(kvm_t *kd) +{ + int version; + + if (kvm_nlist(kd, nl) != 0) + errx(1, "%s", kvm_geterr(kd)); + if (kvm_read(kd, nl[0].n_value, &version, sizeof(version)) == -1) + errx(1, "%s", kvm_geterr(kd)); if (version != KTR_VERSION) errx(1, "ktr version mismatch"); - if (iflag) { - if (fstat(in, &sb) == -1) - errx(1, "stat"); - entries = sb.st_size / sizeof(*buf); - index = 0; - buf = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, in, 0); - if (buf == MAP_FAILED) - errx(1, "mmap"); - } else { - if (kvm_read(kd, nl[1].n_value, &entries, sizeof(entries)) - == -1) - errx(1, "%s", kvm_geterr(kd)); - if ((buf = malloc(sizeof(*buf) * entries)) == NULL) - err(1, NULL); - if (kvm_read(kd, nl[2].n_value, &index, sizeof(index)) == -1 || - kvm_read(kd, nl[3].n_value, &bufptr, - sizeof(bufptr)) == -1 || - kvm_read(kd, bufptr, buf, sizeof(*buf) * entries) == -1 || - kvm_read(kd, nl[2].n_value, &index2, sizeof(index2)) == -1) - errx(1, "%s", kvm_geterr(kd)); - } + return (version); +} - /* - * Print a nice header. - */ +static void +ktrdump_print_header(FILE *out) +{ + if (!qflag) { fprintf(out, "%-6s ", "index"); if (cflag) @@ -216,7 +189,17 @@ fprintf(out, "----- "); fprintf(out, "\n"); } +} +static void +ktrdump_cpu_buf(FILE *out, kvm_t *kd, int index, int index2, int cpu, + int entries, struct ktr_entry **buf) +{ + u_long parms[KTR_PARMS]; + char *p; + uintmax_t tlast, tnow; + int c, i, parm; + /* * Now tear through the trace buffer. */ @@ -224,12 +207,13 @@ i = index - 1; if (i < 0) i = entries - 1; - } + } else + i = 0; tlast = -1; for (;;) { - if (buf[i].ktr_desc == NULL) + if (buf[cpu][i].ktr_desc == NULL) break; - if (kvm_read(kd, (u_long)buf[i].ktr_desc, desc, + if (kvm_read(kd, (u_long)buf[cpu][i].ktr_desc, desc, sizeof(desc)) == -1) errx(1, "%s", kvm_geterr(kd)); desc[sizeof(desc) - 1] = '\0'; @@ -249,7 +233,7 @@ case 'q': case 'L': case '.': goto next; case 's': - if (kvm_read(kd, (u_long)buf[i].ktr_parms[parm], + if (kvm_read(kd, (u_long)buf[cpu][i].ktr_parms[parm], sbuf[parm], sizeof(sbuf[parm])) == -1) strcpy(sbuf[parm], "(null)"); sbuf[parm][sizeof(sbuf[0]) - 1] = '\0'; @@ -257,7 +241,7 @@ parm++; break; default: - parms[parm] = buf[i].ktr_parms[parm]; + parms[parm] = buf[cpu][i].ktr_parms[parm]; parm++; break; } @@ -264,9 +248,9 @@ } fprintf(out, "%6d ", i); if (cflag) - fprintf(out, "%3d ", buf[i].ktr_cpu); + fprintf(out, "%3d ", buf[cpu][i].ktr_cpu); if (tflag) { - tnow = (uintmax_t)buf[i].ktr_timestamp; + tnow = (uintmax_t)buf[cpu][i].ktr_timestamp; if (rflag) { if (tlast == -1) tlast = tnow; @@ -277,15 +261,15 @@ fprintf(out, "%16ju ", tnow); } if (fflag) { - if (kvm_read(kd, (u_long)buf[i].ktr_file, fbuf, + if (kvm_read(kd, (u_long)buf[cpu][i].ktr_file, fbuf, sizeof(fbuf)) == -1) strcpy(fbuf, "(null)"); snprintf(obuf, sizeof(obuf), "%s:%d", fbuf, - buf[i].ktr_line); + buf[cpu][i].ktr_line); fprintf(out, "%-40s ", obuf); } if (hflag) - fprintf(out, "%p ", buf[i].ktr_thread); + fprintf(out, "%p ", buf[cpu][i].ktr_thread); fprintf(out, desc, parms[0], parms[1], parms[2], parms[3], parms[4], parms[5]); fprintf(out, "\n"); @@ -306,8 +290,6 @@ break; } } - - return (0); } static void @@ -317,3 +299,106 @@ fprintf(stderr, USAGE); exit(1); } + +int +main(int ac, char **av) +{ + struct ktr_entry **ktr_pcpu_buf; + struct stat sb; + kvm_t *kd; + FILE *out; + char *p; + int *indices, *indices2; + int c, entries, i, in, index, index2, j, ncpus, version; + + /* + * Parse commandline arguments. + */ + out = stdout; + while ((c = getopt(ac, av, "cfqrtHe:i:m:o:")) != -1) + switch (c) { + case 'c': + cflag = 1; + break; + case 'e': + if (strlcpy(execfile, optarg, sizeof(execfile)) + >= sizeof(execfile)) + errx(1, "%s: File name too long", optarg); + eflag = 1; + break; + case 'f': + fflag = 1; + break; + case 'i': + iflag = 1; + if ((in = open(optarg, O_RDONLY)) == -1) + err(1, "%s", optarg); + break; + case 'm': + if (strlcpy(corefile, optarg, sizeof(corefile)) + >= sizeof(corefile)) + errx(1, "%s: File name too long", optarg); + mflag = 1; + break; + case 'o': + if ((out = fopen(optarg, "w")) == NULL) + err(1, "%s", optarg); + break; + case 'q': + qflag++; + break; + case 'r': + rflag = 1; + break; + case 't': + tflag = 1; + break; + case 'H': + hflag = 1; + break; + case '?': + default: + usage(); + } + ac -= optind; + av += optind; + if (ac != 0) + usage(); + + /* + * Open our execfile and corefile, resolve needed symbols and read in + * the trace buffer. + */ + if ((kd = kvm_openfiles(eflag ? execfile : NULL, + mflag ? corefile : NULL, NULL, O_RDONLY, errbuf)) == NULL) + errx(1, "%s", errbuf); + + version = ktrdump_get_version(kd); + entries = ktrdump_get_entries(kd); + ncpus = ktrdump_get_ncpus(kd); + indices = ktrdump_get_indices(kd, ncpus); + ktr_pcpu_buf = ktrdump_get_buffers(kd, ncpus, entries); + indices2 = ktrdump_get_indices(kd, ncpus); + ktrdump_print_header(out); + for (i = 0; i < ncpus; ++i) { +#if 0 + if (iflag) { + if (fstat(in, &sb) == -1) + errx(1, "stat"); + entries = sb.st_size / sizeof(*buf); + index = 0; + buf = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, in, 0); + if (buf == MAP_FAILED) + errx(1, "mmap"); + } +#endif + ktrdump_cpu_buf(out, kd, indices[i], indices2[i], i, entries, + ktr_pcpu_buf); + } + + /* + * XXXDAVIDE: FREE resources. + */ + return (0); +} +