summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--Makefile42
-rw-r--r--src/bigtab.c15
-rw-r--r--src/main.c151
-rw-r--r--src/new.c24
-rw-r--r--src/new32.c24
-rw-r--r--src/null.c4
-rw-r--r--src/ref.c12
8 files changed, 274 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..91a5944
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+division-golf
+obj/
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f3400a6
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,42 @@
+CC ?= gcc
+CFLAGS += -O3 -Wall
+CPPFLAGS += -MMD
+LDFLAGS += -lm
+
+PP_BOLD := $(shell tput bold)
+PP_RESET := $(shell tput sgr0)
+PP_CC := $(PP_BOLD)$(shell tput setf 6)CC$(PP_RESET)
+PP_LD := $(PP_BOLD)$(shell tput setf 2)LD$(PP_RESET)
+PP_RM := $(PP_BOLD)$(shell tput setf 4)RM$(PP_RESET)
+
+SRC := src/bigtab.c \
+ src/main.c \
+ src/new.c \
+ src/new32.c \
+ src/null.c \
+ src/ref.c
+
+OBJ := $(SRC:src/%.c=obj/%.o)
+OUT := division-golf
+
+all: $(OUT)
+
+-include $(OBJ:.o=.d)
+
+obj/%.o: src/%.c
+ @echo "$(PP_CC) src/$*.c"
+ @mkdir -p $(@D)
+ @$(CC) $(CFLAGS) $(CPPFLAGS) -c src/$*.c -o obj/$*.o
+
+$(OUT): $(OBJ)
+ @echo "$(PP_LD) $(OUT)"
+ @$(CC) $(OBJ) -o $(OUT) $(LDFLAGS)
+
+clean:
+ @echo "${PP_RM} obj"
+ @rm -rf obj
+ @echo "${PP_RM} ${OUT}"
+ @rm -rf ${OUT}
+
+.PHONY: clean
+
diff --git a/src/bigtab.c b/src/bigtab.c
new file mode 100644
index 0000000..483ded7
--- /dev/null
+++ b/src/bigtab.c
@@ -0,0 +1,15 @@
+int ref2(int i, int x, int y);
+static short table[730][3][244];
+
+void bigtab_init(void)
+{
+ for (int i = 0; i < 729; i++)
+ for (int x = 0; x < 3; x++)
+ for (int y = 243; y > 0; y /= 3)
+ table[i][x][y] = ref2(i, x, y);
+}
+
+int bigtab(int i, int x, int y)
+{
+ return table[i][x][y];
+}
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..8d82af5
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,151 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/random.h>
+#include <time.h>
+#include <math.h>
+
+#define COUNT(x) (sizeof(x) / sizeof(x[0]))
+
+static uint64_t nclock(void)
+{
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+ return (uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec;
+}
+
+int null(int i, int x, int y);
+int ref1(int i, int x, int y);
+int ref2(int i, int x, int y);
+int new(int i, int x, int y);
+int new32(int i, int x, int y);
+void bigtab_init(void);
+int bigtab(int i, int x, int y);
+
+struct solfunc {
+ const char *name;
+ int (*func)(int, int, int);
+
+ double T, dT;
+ double adj_T, adj_dT;
+};
+
+static struct solfunc solfuncs[ ] = {
+ {"null", null},
+ {"ref1", ref1},
+ {"ref2", ref2},
+ {"bigtab", bigtab},
+ {"new", new},
+ {"new32", new32}
+};
+
+#define PASSES 5
+
+static void benchmark(void)
+{
+ const int ys[] = {243, 81, 27, 9, 3, 1};
+ const size_t size = 20*1000*1000;
+ int seed;
+ struct { int i, x, y, r; } *data;
+ struct solfunc *fastest = NULL;
+
+ getrandom(&seed, sizeof(seed), 0);
+ srand(seed);
+
+ data = malloc(sizeof(*data) * size);
+ if (!data) {
+ fprintf(stderr, "out of memory\n");
+ exit(2);
+ }
+
+ printf("benchmarking %zu solutions: ", COUNT(solfuncs));
+
+ for (size_t j = 0; j < size; j++) {
+ data[j].i = rand() % 729;
+ data[j].x = rand() % 3;
+ data[j].y = ys[rand() % 6];
+ }
+
+ for (size_t i = 0; i < COUNT(solfuncs); i++) {
+ struct solfunc *sf = solfuncs + i;
+ double ts[PASSES], var;
+
+ printf("%zu", i + 1);
+
+ for (size_t j = 0; j < PASSES; j++) {
+ uint64_t t0;
+
+ fflush(stdout);
+
+ t0 = nclock();
+ for (size_t k = 0; k < size; k++)
+ data[k].r = sf->func(data[k].i, data[k].x,
+ data[k].y);
+ ts[j] = (nclock() - t0) * 1.0e-9;
+ ts[j] *= 1.0e9 / size;
+
+ printf(".");
+ }
+
+ sf->T = 0.0f;
+ for (size_t j = 0; j < PASSES; j++)
+ sf->T += ts[j];
+ sf->T /= PASSES;
+
+ var = 0.0f;
+ for (size_t j = 0; j < PASSES; j++)
+ var += pow(ts[j] - sf->T, 2.0);
+ sf->dT = sqrt(var) / sqrt(PASSES);
+ }
+
+ printf("\n");
+
+ for (size_t i = 1; i < COUNT(solfuncs); i++) {
+ struct solfunc *sf = solfuncs + i;
+
+ sf->adj_T = sf->T - solfuncs[0].T;
+ sf->adj_dT = sqrt(pow(sf->dT, 2) + pow(solfuncs[0].dT, 2));
+
+ if (!fastest || sf->T < fastest->T) {
+ fastest = sf;
+ fastest->T = sf->T;
+ }
+ }
+
+ for (size_t i = 0; i < COUNT(solfuncs); i++) {
+ struct solfunc *sf = solfuncs + i;
+
+ printf("%7s %.2f ± %.2f", sf->name, sf->T, sf->dT);
+ if (i > 0) {
+ printf(" %.2f ± %.2f", sf->adj_T, sf->adj_dT);
+ printf(" (x %.3f)", sf->T / fastest->T);
+ }
+ puts("");
+ }
+}
+
+int main(void)
+{
+ bigtab_init();
+
+ for (int j = 2; j < COUNT(solfuncs); j++) {
+ struct solfunc *sf = solfuncs + j;
+
+ for (int i = 0; i < 729; i++)
+ for (int x = 0; x < 3; x++)
+ for (int y = 243; y > 0; y /= 3) {
+ int ref, res;
+
+ ref = ref1(i, x, y);
+
+ if ((res = sf->func(i, x, y)) != ref) {
+ printf("\n%s fails for %i/%i/%i: got %i, expected %i\n",
+ sf->name, i, x, y, res, ref);
+ return 1;
+ }
+ }
+ }
+
+ benchmark();
+ return 0;
+} \ No newline at end of file
diff --git a/src/new.c b/src/new.c
new file mode 100644
index 0000000..3bdd6d0
--- /dev/null
+++ b/src/new.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+
+static const uint64_t divtab[244] = {
+ [1] = 0xffffffff + 1l,
+ [3] = 0x55555555 + 1l,
+ [9] = 0x1c71c71c + 1l,
+ [27] = 0x097b425e + 1l,
+ [81] = 0x0329161f + 1l,
+ [243] = 0x010db20a + 1l
+};
+
+#define FASTDIV(n, C) (((n) * (C)) >> 32)
+#define FASTMOD(n, d, C) ((n) - (d) * FASTDIV((n), (C)))
+
+int new(int i, int x, int y)
+{
+ uint64_t A, B, C;
+
+ A = FASTDIV(i, divtab[y]);
+ B = FASTMOD(A, 3, divtab[3]);
+ C = FASTMOD(A + (uint64_t)x, 3, divtab[3]);
+
+ return i + (C - B) * y;
+}
diff --git a/src/new32.c b/src/new32.c
new file mode 100644
index 0000000..9136115
--- /dev/null
+++ b/src/new32.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+
+static const unsigned int divtab[244] = {
+ [1] = 0xffff + 1l,
+ [3] = 0x5555 + 1l,
+ [9] = 0x1c71 + 1l,
+ [27] = 0x097b + 1l,
+ [81] = 0x0329 + 1l,
+ [243] = 0x010d + 1l
+};
+
+#define FASTDIV(n, C) (((n) * (C)) >> 16)
+#define FASTMOD(n, d, C) ((n) - (d) * FASTDIV((n), (C)))
+
+int new32(int i, int x, int y)
+{
+ int A, B, C;
+
+ A = FASTDIV(i, divtab[y]);
+ B = FASTMOD(A, 3, divtab[3]);
+ C = FASTMOD(A + x, 3, divtab[3]);
+
+ return i + (C - B) * y;
+}
diff --git a/src/null.c b/src/null.c
new file mode 100644
index 0000000..4474c1a
--- /dev/null
+++ b/src/null.c
@@ -0,0 +1,4 @@
+int null(int i, int x, int y)
+{
+ return 0;
+} \ No newline at end of file
diff --git a/src/ref.c b/src/ref.c
new file mode 100644
index 0000000..3589b13
--- /dev/null
+++ b/src/ref.c
@@ -0,0 +1,12 @@
+// This is the original code posted on IOTA's Discord.
+int ref1(int i, int x, int y)
+{
+ return i % y + ((((i / y) + x) % 3) + (i / (y * 3)) * 3) * y;
+}
+
+// A simplified version posted by someone on IOTA's Discord (not exactly sure
+// who posted it first).
+int ref2(int i, int x, int y)
+{
+ return i + (((i / y) + x) % 3 - ((i / y) % 3)) * y;
+} \ No newline at end of file