diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Makefile | 42 | ||||
-rw-r--r-- | src/bigtab.c | 15 | ||||
-rw-r--r-- | src/main.c | 151 | ||||
-rw-r--r-- | src/new.c | 24 | ||||
-rw-r--r-- | src/new32.c | 24 | ||||
-rw-r--r-- | src/null.c | 4 | ||||
-rw-r--r-- | src/ref.c | 12 |
8 files changed, 274 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..91a5944 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +division-golf +obj/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f3400a6 --- /dev/null +++ b/Makefile @@ -0,0 +1,42 @@ +CC ?= gcc +CFLAGS += -O3 -Wall +CPPFLAGS += -MMD +LDFLAGS += -lm + +PP_BOLD := $(shell tput bold) +PP_RESET := $(shell tput sgr0) +PP_CC := $(PP_BOLD)$(shell tput setf 6)CC$(PP_RESET) +PP_LD := $(PP_BOLD)$(shell tput setf 2)LD$(PP_RESET) +PP_RM := $(PP_BOLD)$(shell tput setf 4)RM$(PP_RESET) + +SRC := src/bigtab.c \ + src/main.c \ + src/new.c \ + src/new32.c \ + src/null.c \ + src/ref.c + +OBJ := $(SRC:src/%.c=obj/%.o) +OUT := division-golf + +all: $(OUT) + +-include $(OBJ:.o=.d) + +obj/%.o: src/%.c + @echo "$(PP_CC) src/$*.c" + @mkdir -p $(@D) + @$(CC) $(CFLAGS) $(CPPFLAGS) -c src/$*.c -o obj/$*.o + +$(OUT): $(OBJ) + @echo "$(PP_LD) $(OUT)" + @$(CC) $(OBJ) -o $(OUT) $(LDFLAGS) + +clean: + @echo "${PP_RM} obj" + @rm -rf obj + @echo "${PP_RM} ${OUT}" + @rm -rf ${OUT} + +.PHONY: clean + diff --git a/src/bigtab.c b/src/bigtab.c new file mode 100644 index 0000000..483ded7 --- /dev/null +++ b/src/bigtab.c @@ -0,0 +1,15 @@ +int ref2(int i, int x, int y); +static short table[730][3][244]; + +void bigtab_init(void) +{ + for (int i = 0; i < 729; i++) + for (int x = 0; x < 3; x++) + for (int y = 243; y > 0; y /= 3) + table[i][x][y] = ref2(i, x, y); +} + +int bigtab(int i, int x, int y) +{ + return table[i][x][y]; +} diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..8d82af5 --- /dev/null +++ b/src/main.c @@ -0,0 +1,151 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <sys/random.h> +#include <time.h> +#include <math.h> + +#define COUNT(x) (sizeof(x) / sizeof(x[0])) + +static uint64_t nclock(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + return (uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec; +} + +int null(int i, int x, int y); +int ref1(int i, int x, int y); +int ref2(int i, int x, int y); +int new(int i, int x, int y); +int new32(int i, int x, int y); +void bigtab_init(void); +int bigtab(int i, int x, int y); + +struct solfunc { + const char *name; + int (*func)(int, int, int); + + double T, dT; + double adj_T, adj_dT; +}; + +static struct solfunc solfuncs[ ] = { + {"null", null}, + {"ref1", ref1}, + {"ref2", ref2}, + {"bigtab", bigtab}, + {"new", new}, + {"new32", new32} +}; + +#define PASSES 5 + +static void benchmark(void) +{ + const int ys[] = {243, 81, 27, 9, 3, 1}; + const size_t size = 20*1000*1000; + int seed; + struct { int i, x, y, r; } *data; + struct solfunc *fastest = NULL; + + getrandom(&seed, sizeof(seed), 0); + srand(seed); + + data = malloc(sizeof(*data) * size); + if (!data) { + fprintf(stderr, "out of memory\n"); + exit(2); + } + + printf("benchmarking %zu solutions: ", COUNT(solfuncs)); + + for (size_t j = 0; j < size; j++) { + data[j].i = rand() % 729; + data[j].x = rand() % 3; + data[j].y = ys[rand() % 6]; + } + + for (size_t i = 0; i < COUNT(solfuncs); i++) { + struct solfunc *sf = solfuncs + i; + double ts[PASSES], var; + + printf("%zu", i + 1); + + for (size_t j = 0; j < PASSES; j++) { + uint64_t t0; + + fflush(stdout); + + t0 = nclock(); + for (size_t k = 0; k < size; k++) + data[k].r = sf->func(data[k].i, data[k].x, + data[k].y); + ts[j] = (nclock() - t0) * 1.0e-9; + ts[j] *= 1.0e9 / size; + + printf("."); + } + + sf->T = 0.0f; + for (size_t j = 0; j < PASSES; j++) + sf->T += ts[j]; + sf->T /= PASSES; + + var = 0.0f; + for (size_t j = 0; j < PASSES; j++) + var += pow(ts[j] - sf->T, 2.0); + sf->dT = sqrt(var) / sqrt(PASSES); + } + + printf("\n"); + + for (size_t i = 1; i < COUNT(solfuncs); i++) { + struct solfunc *sf = solfuncs + i; + + sf->adj_T = sf->T - solfuncs[0].T; + sf->adj_dT = sqrt(pow(sf->dT, 2) + pow(solfuncs[0].dT, 2)); + + if (!fastest || sf->T < fastest->T) { + fastest = sf; + fastest->T = sf->T; + } + } + + for (size_t i = 0; i < COUNT(solfuncs); i++) { + struct solfunc *sf = solfuncs + i; + + printf("%7s %.2f ± %.2f", sf->name, sf->T, sf->dT); + if (i > 0) { + printf(" %.2f ± %.2f", sf->adj_T, sf->adj_dT); + printf(" (x %.3f)", sf->T / fastest->T); + } + puts(""); + } +} + +int main(void) +{ + bigtab_init(); + + for (int j = 2; j < COUNT(solfuncs); j++) { + struct solfunc *sf = solfuncs + j; + + for (int i = 0; i < 729; i++) + for (int x = 0; x < 3; x++) + for (int y = 243; y > 0; y /= 3) { + int ref, res; + + ref = ref1(i, x, y); + + if ((res = sf->func(i, x, y)) != ref) { + printf("\n%s fails for %i/%i/%i: got %i, expected %i\n", + sf->name, i, x, y, res, ref); + return 1; + } + } + } + + benchmark(); + return 0; +}
\ No newline at end of file diff --git a/src/new.c b/src/new.c new file mode 100644 index 0000000..3bdd6d0 --- /dev/null +++ b/src/new.c @@ -0,0 +1,24 @@ +#include <stdint.h> + +static const uint64_t divtab[244] = { + [1] = 0xffffffff + 1l, + [3] = 0x55555555 + 1l, + [9] = 0x1c71c71c + 1l, + [27] = 0x097b425e + 1l, + [81] = 0x0329161f + 1l, + [243] = 0x010db20a + 1l +}; + +#define FASTDIV(n, C) (((n) * (C)) >> 32) +#define FASTMOD(n, d, C) ((n) - (d) * FASTDIV((n), (C))) + +int new(int i, int x, int y) +{ + uint64_t A, B, C; + + A = FASTDIV(i, divtab[y]); + B = FASTMOD(A, 3, divtab[3]); + C = FASTMOD(A + (uint64_t)x, 3, divtab[3]); + + return i + (C - B) * y; +} diff --git a/src/new32.c b/src/new32.c new file mode 100644 index 0000000..9136115 --- /dev/null +++ b/src/new32.c @@ -0,0 +1,24 @@ +#include <stdint.h> + +static const unsigned int divtab[244] = { + [1] = 0xffff + 1l, + [3] = 0x5555 + 1l, + [9] = 0x1c71 + 1l, + [27] = 0x097b + 1l, + [81] = 0x0329 + 1l, + [243] = 0x010d + 1l +}; + +#define FASTDIV(n, C) (((n) * (C)) >> 16) +#define FASTMOD(n, d, C) ((n) - (d) * FASTDIV((n), (C))) + +int new32(int i, int x, int y) +{ + int A, B, C; + + A = FASTDIV(i, divtab[y]); + B = FASTMOD(A, 3, divtab[3]); + C = FASTMOD(A + x, 3, divtab[3]); + + return i + (C - B) * y; +} diff --git a/src/null.c b/src/null.c new file mode 100644 index 0000000..4474c1a --- /dev/null +++ b/src/null.c @@ -0,0 +1,4 @@ +int null(int i, int x, int y) +{ + return 0; +}
\ No newline at end of file diff --git a/src/ref.c b/src/ref.c new file mode 100644 index 0000000..3589b13 --- /dev/null +++ b/src/ref.c @@ -0,0 +1,12 @@ +// This is the original code posted on IOTA's Discord. +int ref1(int i, int x, int y) +{ + return i % y + ((((i / y) + x) % 3) + (i / (y * 3)) * 3) * y; +} + +// A simplified version posted by someone on IOTA's Discord (not exactly sure +// who posted it first). +int ref2(int i, int x, int y) +{ + return i + (((i / y) + x) % 3 - ((i / y) % 3)) * y; +}
\ No newline at end of file |