#include #include #include #include #include #include #define COUNT(x) (sizeof(x) / sizeof(x[0])) static uint64_t nclock(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC_RAW, &ts); return (uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec; } int null(int i, int x, int y); int ref1(int i, int x, int y); int ref2(int i, int x, int y); int new(int i, int x, int y); int new32(int i, int x, int y); int new32v2(int i, int x, int y); int new32v3(int i, int x, int y); void bigtab_init(void); int bigtab(int i, int x, int y); struct solfunc { const char *name; int (*func)(int, int, int); double T, dT; double adj_T, adj_dT; }; static struct solfunc solfuncs[ ] = { {"null", null}, {"ref1", ref1}, {"ref2", ref2}, {"bigtab", bigtab}, {"new", new}, {"new32", new32}, {"new32v2", new32v2}, {"new32v3", new32v3} }; #define PASSES 5 static void benchmark(void) { const int ys[] = {243, 81, 27, 9, 3, 1}; const size_t size = 20*1000*1000; int seed; struct { int i, x, y, r; } *data; struct solfunc *fastest = NULL; getrandom(&seed, sizeof(seed), 0); srand(seed); data = malloc(sizeof(*data) * size); if (!data) { fprintf(stderr, "out of memory\n"); exit(2); } printf("benchmarking %zu solutions: ", COUNT(solfuncs)); for (size_t j = 0; j < size; j++) { data[j].i = rand() % 729; data[j].x = rand() % 3; data[j].y = ys[rand() % 6]; } for (size_t i = 0; i < COUNT(solfuncs); i++) { struct solfunc *sf = solfuncs + i; double ts[PASSES], var; printf("%zu", i + 1); for (size_t j = 0; j < PASSES; j++) { uint64_t t0; fflush(stdout); t0 = nclock(); for (size_t k = 0; k < size; k++) data[k].r = sf->func(data[k].i, data[k].x, data[k].y); ts[j] = (nclock() - t0) * 1.0e-9; ts[j] *= 1.0e9 / size; printf("."); } sf->T = 0.0f; for (size_t j = 0; j < PASSES; j++) sf->T += ts[j]; sf->T /= PASSES; var = 0.0f; for (size_t j = 0; j < PASSES; j++) var += pow(ts[j] - sf->T, 2.0); sf->dT = sqrt(var) / sqrt(PASSES); } printf("\n"); for (size_t i = 1; i < COUNT(solfuncs); i++) { struct solfunc *sf = solfuncs + i; sf->adj_T = sf->T - solfuncs[0].T; sf->adj_dT = sqrt(pow(sf->dT, 2) + pow(solfuncs[0].dT, 2)); if (!fastest || sf->T < fastest->T) { fastest = sf; fastest->T = sf->T; } } for (size_t i = 0; i < COUNT(solfuncs); i++) { struct solfunc *sf = solfuncs + i; printf("%7s %.2f ± %.2f", sf->name, sf->T, sf->dT); if (i > 0) { printf(" (x %.3f)", sf->T / fastest->T); printf(" %.2f ± %.2f", sf->adj_T, sf->adj_dT); printf(" (x %.3f)", sf->adj_T / fastest->adj_T); } puts(""); } } int main(void) { bigtab_init(); for (int j = 2; j < COUNT(solfuncs); j++) { struct solfunc *sf = solfuncs + j; for (int i = 0; i < 729; i++) for (int x = 0; x < 3; x++) for (int y = 243; y > 0; y /= 3) { int ref, res; ref = ref1(i, x, y); if ((res = sf->func(i, x, y)) != ref) { printf("\n%s fails for %i/%i/%i: got %i, expected %i\n", sf->name, i, x, y, res, ref); return 1; } } } benchmark(); return 0; }