/* * 64-bit atomic counter test. * * Compilation: * $ gcc -O2 counter_test.c -o counter_test -lpthread * * With (amd64): * $ gcc -O2 counter_test.c -o counter_test -lpthread -DATOMIC * * With libatomic_ops installed (see below): * $ gcc -O2 counter_test.c -o counter_test -lpthread -DLIBATOMIC_OPS -I/usr/local/include -L/usr/local/lib -latomic_ops * * With gcc builtins (see below): * $ gcc -O2 counter_test.c -o counter_test -lpthread -DGCC_ATOMIC * */ #include #include #include #include #include #ifdef ATOMIC /* Internally supported atomic operations, see atomic(9). * Note that 64-bit atomics are only supported on 64-bit * platforms at the moment. */ #include #endif #ifdef LIBATOMIC_OPS /* The atomic_ops library [1] installed from the sources of * devel/boehm-gc. Note that this is not installed with the * port, it was done manually. * * [1] http://www.hpl.hp.com/research/linux/atomic_ops/ */ #include #endif #include #include #include /* * A type synonym for making the code more self-documented. */ typedef uint64_t counter_t; const counter_t count = 1000000ULL; const counter_t offset = 4294467296ULL; counter_t global_counter; struct test_config { char *name; void (*func)(counter_t *, counter_t); int ncpu; }; int set_affinity(int pnum) { cpuset_t set; CPU_ZERO(&set); CPU_SET(pnum, &set); if (cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(cpuset_t), &set) != 0) { perror("cpuset_setaffinity()"); return -1; } return 0; } /* * A naive method. No locking, no atomicity but performance. */ void simple_add(counter_t *p, counter_t v) { (*p) += v; } #ifdef FPU_ATOMIC /* * Use the FPU to juggle with 64-bit values, since they can be load and stored * atomically. */ void fpu_add(counter_t *p, counter_t v) { counter_t r = (*p) + v; __asm __volatile( " fild %0 ; " " fistp %1 ; " : "=m" (r), "=m" (*p) : "m" (*p)); } #endif #ifdef ATOMIC /* * Use atomic(9). */ void atomic_add(counter_t *p, counter_t v) { atomic_add_64(p, v); } #endif #ifdef LIBATOMIC_OPS void libatomic_add(counter_t *p, counter_t v) { AO_fetch_and_add(p, v); } #endif #ifdef GCC_ATOMIC /* * Ask GCC to generate atomics on the given platform. Note that it works * on amd64 without problems, but on i386 it requires -march=i586, since * it uses cmpxchg8b which was introduced on Pentiums. */ void gccatomic_add(counter_t * p, counter_t v) { __sync_fetch_and_add(p, v); } #endif struct timespec diff(struct timespec begin, struct timespec end) { struct timespec tmp; if ((end.tv_nsec - begin.tv_nsec) < 0) { tmp.tv_sec = end.tv_sec - begin.tv_sec - 1; tmp.tv_nsec = 1000000000 + end.tv_nsec - begin.tv_nsec; } else { tmp.tv_sec = end.tv_sec - begin.tv_sec; tmp.tv_nsec = end.tv_nsec - begin.tv_nsec; } return tmp; } void * concurrent_inc(void *arg) { struct test_config *cfg; void (*f)(counter_t *, counter_t); counter_t i; cfg = (struct test_config *) arg; f = cfg->func; if (set_affinity(cfg->ncpu) != 0) return NULL; for (i = 0; i < count; i++) f(&global_counter, 1); return NULL; } int run_concurrent_inc(int n, struct test_config *cfg) { pthread_t *threads; struct timespec start; struct timespec stop; struct timespec runtime; struct test_config *cfgs; int i; const counter_t expected = offset + (n * count); threads = (pthread_t *) malloc(sizeof(pthread_t) * n); if (threads == NULL) { perror("malloc(threads)"); return -1; } cfgs = (struct test_config *) malloc(sizeof(struct test_config) * n); if (cfgs == NULL) { perror("malloc(cfgs)"); return -1; } global_counter = offset; clock_gettime(CLOCK_REALTIME_PRECISE, &start); for (i = 0; i < n; i++) { cfgs[i] = *cfg; cfgs[i].ncpu = i; if (pthread_create(&threads[i], NULL, concurrent_inc, &cfgs[i]) != 0) { perror("pthread_create()"); n = i; break; } } for (i = 0; i < n; i++) pthread_join(threads[i], NULL); clock_gettime(CLOCK_REALTIME_PRECISE, &stop); runtime = diff(start, stop); if (cfg->name != NULL) { printf("[%s] global_counter = %llu\n", cfg->name, global_counter); printf("[%s] expected: %llu\n", cfg->name, expected); printf("[%s] %scorrect.\n", cfg->name, expected != global_counter ? "NOT " : ""); printf("[%s] time: %d ms\n", cfg->name, runtime.tv_sec * 1000 + runtime.tv_nsec / (long) 1000000); printf("\n"); } free(threads); free(cfgs); return 0; } int main(void) { int procs; size_t sprocs; struct test_config cfg; procs = 1; sprocs = sizeof(procs); if (sysctlbyname("hw.ncpu", &procs, &sprocs, NULL, 0) != 0) { perror("sysctlbyname()"); return EXIT_FAILURE; } #if defined(__amd64__) printf("Compiled for x86_64.\n"); #elif defined(__i386__) printf("Compiled for x86.\n"); #endif printf("%d CPU%s detected, working with %d thread%s.\n", procs, procs > 1 ? "s" : "", procs, procs > 1 ? "s" : ""); if (procs < 2) { printf("Warning: There is no \"real\" multiprocessing present, " "hence the results likely will not be valid.\n"); } printf("\n"); /* Warm up. */ cfg.name = NULL, cfg.func = simple_add; if (run_concurrent_inc(procs, &cfg) != 0) return EXIT_FAILURE; cfg.name = "naive", cfg.func = simple_add; if (run_concurrent_inc(procs, &cfg) != 0) return EXIT_FAILURE; #ifdef ATOMIC cfg.name = "atomic.h", cfg.func = atomic_add; if (run_concurrent_inc(procs, &cfg) != 0) return EXIT_FAILURE; #endif #ifdef LIBATOMIC_OPS cfg.name = "atomic_ops.h", cfg.func = libatomic_add; if (run_concurrent_inc(procs, &cfg) != 0) return EXIT_FAILURE; #endif #ifdef GCC_ATOMIC cfg.name = "gcc atomic", cfg.func = gccatomic_add; if (run_concurrent_inc(procs, &cfg) != 0) return EXIT_FAILURE; #endif #ifdef FPU_ATOMIC cfg.name = "fpu", cfg.func = fpu_add; if (run_concurrent_inc(procs, &cfg) != 0) return EXIT_FAILURE; #endif return EXIT_SUCCESS; }