commit 4c1e9136db89c913ab7f8c81aa1e4643224e6708 Author: Jayachandran C Date: Mon Feb 27 20:58:04 2012 +0530 JProf: simple, accurate, NMI based profiler JProf is a profiler for XLP cpus. It uses uses the on-chip to generate NMI to a selected CPU, and the NMI handler records the PC and RA in a memory buffer. A python program jprof can interpret the data recorded and correlate it to functions/instructions level in the kernel image. diff --git a/sys/mips/nlm/dev/jprof/README b/sys/mips/nlm/dev/jprof/README new file mode 100644 index 0000000..c3a5f37 --- /dev/null +++ b/sys/mips/nlm/dev/jprof/README @@ -0,0 +1,79 @@ + How to use JProf + ================ + +JProf is a simple and accurate profiler for XLP cpus. + +JProf uses NMI interrupts generated by the XLP PIC to get accurate +profiling information of a CPU. + +Compiling JProf into the FreeBSD kernel +--------------------------------------- +Add 'device jprof' to your kernel conf file. Compile the kernel and boot +as usual. + +Profiling with JProf +-------------------- +Enable jprof on the cpu you need 'cpuset', e.g. for cpu 4: + +cpuset -l 4 sysctl debug.jprof=1 + +cpuset -l 4 sysctl debug.jprof=0 + +This sequence records the profiling data of CPU 4 into a kernel memory +buffer. + +Extract data stored for a profiling run +--------------------------------------- +The profiling data can be extracted from the kernel memory buffer using +the 'dd' command on /dev/mem. The following command does this: + +dd if=/dev/mem of=/tmp/jprof.dump bs=1048576 skip=192 count=16 + +This will create a /tmp/jprof.dump that is 16MB on the board. You have to +transfer it to your build server using scp to do further analysis. + +View profile output +------------------- +Change to the kernel build directory on the FreeBSD build machine. Make sure +your build environment is setup, and run this command. + +sys/mips/nlm/dev/jprof/jprof -i jprof.dump | less + +The -i is optional and show the instruction level profile output. + +The output is sorted based on the frequency of use. The first function would +be the one in which the program has spent most time on. The count field shows +how many samples point to a specific function or instruction. + +Count(%Time) Function [startaddr - endaddr] + 3475 (18.0%) smp_rendezvous_action [0x00000000 - 0x000002d8] + +The percentage shows the percentage of total program execution time spent in +the function alone (not including the functions called by this) + +This is followed by the frequent callers of the function + 1870 [0x8033a0e0] smp_rendezvous_action + 0x200 + 1605 [0x8033b040] smp_rendezvous_cpus + 0x368 + +NOTE: if you see that the caller is the same function, it is because +we don't have accurate return info. + +The next block shows how frequently each instruction of the function is +hit, this will show the part of the function which are taking most execution +time. + [smp_rendezvous_action+0x0000] 1 ( 0.0%) + [smp_rendezvous_action+0x0004] 2 ( 0.0%) + [smp_rendezvous_action+0x0008] 2 ( 0.0%) + [smp_rendezvous_action+0x000c] 1 ( 0.0%) + [smp_rendezvous_action+0x001c] 1 ( 0.0%) + [smp_rendezvous_action+0x0020] 2 ( 0.0%) + [smp_rendezvous_action+0x0028] 1 ( 0.0%) + [smp_rendezvous_action+0x002c] 1 ( 0.0%) + [smp_rendezvous_action+0x0034] 32 ( 9.0%) + [smp_rendezvous_action+0x0038] 32 ( 9.0%) + [smp_rendezvous_action+0x003c] 12 ( 3.0%) + [smp_rendezvous_action+0x0040] 10 ( 3.0%) + +The percentages here show the percent of the total function execution time +spent in the specific instuction. +-- diff --git a/sys/mips/nlm/dev/jprof/jprof b/sys/mips/nlm/dev/jprof/jprof new file mode 100644 index 0000000..1e8c45d --- /dev/null +++ b/sys/mips/nlm/dev/jprof/jprof @@ -0,0 +1,154 @@ +#!/usr/bin/env python + +import sys +import os +import getopt +from array import array +from bisect import bisect +from subprocess import Popen,PIPE +from operator import itemgetter + +def usage(): + print "Usage: jprof [-i] dumpfile" + print "Arguments: " + print " -i Print instruction level profiling" + print " dumpfile Profile data dump file from target" + sys.exit(1) + +def conferr(str): + print "Configuration error:", str + print "Make sure that you are in you FreeBSD soruce dir" + print "you have a built kernel, and your environment variables" + print "MAKEOBJDIRPREFIX, TARGET, TARGET_ARCH. and KERNCONF" + print "are set." + sys.exit(1) + +addl = [] +detl = [] +offset = 0 + +# parse options - we have -i for instruction level reporting +try: + opts, args = getopt.getopt(sys.argv[1:], "i") +except getopt.GetoptError, err: + print str(err) + usage() + +instprint = False +for o, a in opts: + if o == "-i": + instprint = True + +if len(args) != 1: + usage() + +try: + objdir = os.environ['MAKEOBJDIRPREFIX'] + "/" + \ + os.environ['TARGET'] + "." + os.environ['TARGET_ARCH'] + \ + "/" + os.environ['PWD'] + + crossnm = objdir + "/tmp/usr/bin/nm" + kerndbg = objdir + "/sys/" + os.environ['KERNCONF'] + '/kernel.debug' +except KeyError: + conferr("could not find FreeBSD build env") + +if not os.path.isfile(crossnm): + conferr("could not find " + crossnm) + +if not os.path.isfile(kerndbg): + print "Cannot find kernel debug image at", kerndbg + sys.exit(1) + +# Run nm +try: +# print "Running ", crossnm, " -n ", kerndbg, " offset ", hex(offset) + p = Popen([crossnm, "-n" , kerndbg] , shell=False, stdout=PIPE) +except OSError,s: + print "os error", s + sys.exit(1) + +# create sym table +for l in p.stdout.readlines(): + a = l.split(' ') + try: + addr = long(a[0][-8:],16) + addl.append(addr) + sym = a[2].rstrip('\n') + detl.append(sym) + except ValueError: + pass + +print "Read", len(detl), "symbols from", kerndbg + +#read EPC dump +addrlist = [] +scount = 0 +try: + a = array('I'); + try: + epcf = open(args[0], "rb") + except IOError, ie: + print str(ie) + sys.exit(1) + try: + a.fromfile(epcf, 0x2000000) + except EOFError: + pass + + a.byteswap() + k = {} + for i in range(0, len(a), 2): + if (a[i] == 0xa5a5a5a5L): + continue + scount += 1 + t = (a[i], a[i+1]) + k[t] = k.setdefault(t, 0) + 1 + addrlist = k.items() + addrlist.sort() + epcf.close() +except OSError,s: + print "os error", s + sys.exit(1) + +print "Read", scount, "samples from", args[0] +print +cs = {} +cl = {} +al = {} +for ((addr, raddr), count) in addrlist: + try: + fn = rfn = None + off = roff = 0 + pos = bisect(addl, addr) + if (pos == 0): + fn = "Unknown" + else: + fn = detl[pos-1] + off = addr - addl[pos-1] + rpos = bisect(addl, raddr) + if (rpos == 0): + rfn = "UnknownC" + else: + rfn = detl[rpos-1] + roff = raddr - addl[rpos-1] + cs[fn] = cs.setdefault(fn, 0) + count + a = al.setdefault(fn, {}) + a[off] = a.setdefault(off, 0) + count + h = cl.setdefault(fn, {}) + callee = (rfn, roff, raddr) + h[callee] = h.setdefault(callee, 0) + count + except ValueError: + pass + +print "Count(Time %)\tFunction" +print "------------\t--------" +for (fn, n) in sorted(cs.items(), reverse=True, key=itemgetter(1)): + alist = sorted(al[fn].items()); + print "%5d (%2.1f%%)\t%s [0x%08x - 0x%08x]" % (n, 100.0*n/scount, fn, alist[0][0], alist[-1][0]) + for (c, cn) in sorted(cl[fn].items(), reverse=True, key=itemgetter(1)): + print "\t%5d\t[0x%08x] %s + 0x%x" % (cn, c[2], c[0], c[1]) + print + if instprint: + for (addr, c) in alist: + print "\t[%s+0x%04x] %5d (%4.1f%%)" % (fn, addr, c, 100.0*c/n) + print diff --git a/sys/mips/nlm/dev/jprof/jprof.c b/sys/mips/nlm/dev/jprof/jprof.c new file mode 100644 index 0000000..b716c9f --- /dev/null +++ b/sys/mips/nlm/dev/jprof/jprof.c @@ -0,0 +1,77 @@ +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern char JProf[], JProfEnd[]; + +#define JPROF_LOG_START 0xffffffff8c000000ull +#define JPROF_LOG_SIZE 0x1000000 + +static int +sys_jprof(SYSCTL_HANDLER_ARGS) +{ + static char enabled; + vm_offset_t saveva; + int error, v, cpuid; + + saveva = (vm_offset_t)JPROF_LOG_START; + cpuid = nlm_cpuid(); + v = enabled; + error = sysctl_handle_int(oidp, &v, v, req); + if (error != 0 || req->newptr == NULL) + return (error); + if (v == enabled) + return (0); + enabled = v; + if (v) { + printf("Start profile on %d\n", cpuid); + /* start profiling */ + memset((void *)saveva, 0xa5, JPROF_LOG_SIZE); + write_c0_register64(22, 2, JPROF_LOG_START); + bcopy(JProf, (void *)MIPS_RESET_EXC_VEC, + JProfEnd - JProf); + nlm_pic_set_timer(xlp_pic_base, 6, 509 * 1499ull, -3, cpuid); + } else { + /* stop the stuff */ + nlm_pic_set_timer(xlp_pic_base, 6, ~0ULL, 0, 0); + printf("Stopped\n"); + } + + return (0); +} + +SYSCTL_PROC(_debug, OID_AUTO, jprof, CTLTYPE_INT | CTLFLAG_RW, 0, 0, + sys_jprof, "A", "JC's profiler"); diff --git a/sys/mips/nlm/dev/jprof/jprof_nmi.S b/sys/mips/nlm/dev/jprof/jprof_nmi.S new file mode 100644 index 0000000..6856475 --- /dev/null +++ b/sys/mips/nlm/dev/jprof/jprof_nmi.S @@ -0,0 +1,35 @@ +#include +#include +#include +#include +#include +#include "assym.s" + + .text + .set noat + .set mips64r2 + +VECTOR(JProf, unknown) + dmtc0 k0, $22, 6 + dmtc0 k1, $22, 7 + dmfc0 k0, $22, 2 + dmfc0 k1, MIPS_COP_0_ERROR_PC + sw k1, 0(k0) + sw ra, 4(k0) + daddi k0, 8 + dins k0, zero, 24, 1 /* wrap at 16 MB */ + dmtc0 k0, $22, 2 + li k0, 0xb8000000 + XLP_IO_PIC_OFFSET(0) + XLP_IO_PCI_HDRSZ + li k1, (1 << PIC_IRT_TIMER_6_INDEX) + sd k1, (PIC_STATUS * 4)(k0) + li k1, PIC_IRT_TIMER_6_INDEX + sd k1, (PIC_INT_ACK * 4)(k0) + li k1, 0x00480000 + mfc0 k0, MIPS_COP_0_STATUS + xor k0, k1 + mtc0 k0, MIPS_COP_0_STATUS + dmfc0 k0, $22, 6 + dmfc0 k1, $22, 7 + eret + nop +VECTOR_END(JProf)