#ifdef _KERNEL #include #include #include #include #include #include #include #else #include #include #include #include #include #include #endif void bucket_add(uint64_t); void buckets_print(uint64_t); long memtime(long *, long, int); #define BUCKETS 2000 /* Gives is 2us worth of time */ #define BINVAL 5 /* Anything within 5ns is the same */ uintmax_t totalns; int buckets[BUCKETS]; int ticks; int binning = 1; static __inline uint64_t rdtsc(void) { u_int32_t low, high; __asm __volatile("mfence; rdtsc" : "=a" (low), "=d" (high)); return (low | ((u_int64_t)high << 32)); } void bucket_add(uint64_t time) { /* This effectively yields ns */ time = (time * 1000) / ticks; time = (time / binning) * binning; /* * Times exceeding the bucket size are probably caused by context * switch interference. */ if (time >= BUCKETS) return; totalns += time; buckets[time]++; } void buckets_print(uint64_t ticks) { int i; for (i = 0; i < BUCKETS; i++) if (buckets[i]) printf("%d\t%d\n", i, buckets[i]); } long memtime(long *mem, long memsize, int iters) { uint64_t start; uint64_t end; int tscticks; long total; long *loc; long val; int i; memset((void *)mem, 0, memsize); memset((void *)buckets, 0, sizeof(buckets)); printf("mem %p, memsize %ld\n", mem, memsize); tscticks = 0; /* * Fake return val to prevent gcc from optimizing away the load. */ total = 0; for (i = 0; i < 10; i++) { __asm("mfence; pause; pause;"); start = rdtsc(); end = rdtsc(); if (tscticks == 0) tscticks = end - start; else if (tscticks > (end - start)) tscticks = end - start; } #ifdef _KERNEL ticks = 1800; #else start = rdtsc(); sleep(1); end = rdtsc(); /* * Convert down to microseconds. */ ticks = (end - start) / 1000000; #endif printf("ticks per microsecond %d, tsc ticks = %d\n", ticks, tscticks); for (; iters > 0; iters--) { /* * Fetch the next location and keep the address computation * out of the loop. */ val = random() % (memsize / sizeof(val)); loc = mem + val; __asm("mfence; pause; pause;"); start = rdtsc(); val = *loc; end = rdtsc(); val++; total += val; bucket_add((end - start) - tscticks); } return (total); } #ifdef _KERNEL #define MEMSIZE (4 * 1024 * 1024) #define ITERS 100000000 static int memtime_load(module_t mod, int cmd, void *arg) { register_t s; long memsize; long *mem; int error; error = 0; switch (cmd) { case MOD_LOAD: memsize = MEMSIZE; mem = malloc(memsize, M_TEMP, M_WAITOK); s = intr_disable(); memtime(mem, memsize, ITERS); memtime(mem, memsize, ITERS); intr_restore(s); buckets_print(ticks); free(mem, M_TEMP); break; case MOD_UNLOAD: break; default: error = EOPNOTSUPP; break; } return (error); } DEV_MODULE(memtime, memtime_load, NULL); #else int main(int argc, char **argv) { long memsize; long *mem; int iters; if (argc != 3 && argc != 4) { fprintf(stderr, "usage:\n\t%s [binning]\n", argv[0]); exit(EXIT_FAILURE); } memsize = atoi(argv[1]); iters = atoi(argv[2]); if (argc == 4) binning = atoi(argv[3]); if (iters == 0 || memsize == 0) { fprintf(stderr, "usage:\n\t%s \n", argv[0]); exit(EXIT_FAILURE); } mem = malloc(memsize); if (mem == NULL) { perror("malloc"); exit(EXIT_FAILURE); } totalns = 0; memtime(mem, memsize, iters); totalns /= iters; printf("Average access time: %jd\n", totalns); buckets_print(ticks); exit(EXIT_SUCCESS); } #endif