summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaweł Redman <pawel.redman@gmail.com>2018-02-12 18:40:38 +0100
committerPaweł Redman <pawel.redman@gmail.com>2018-02-12 18:40:38 +0100
commit4f17bd29c61e449ba676c4c57b7b69e8abbb2160 (patch)
tree424dbd01775ee885f7d2e925f77923ac6f6d9506
parenta0b52cc85e7c8c59af394dddcea8b15b4f383f58 (diff)
zus: Initial commit.
-rw-r--r--tools/rules.txt105
-rwxr-xr-xtools/zittrigsubersetzer.py134
2 files changed, 239 insertions, 0 deletions
diff --git a/tools/rules.txt b/tools/rules.txt
new file mode 100644
index 0000000..67ef437
--- /dev/null
+++ b/tools/rules.txt
@@ -0,0 +1,105 @@
+# rules for Zittrigsubersetzer, revision 1
+# note: '//' isn't for comments, it separates the "from" from the "to" part
+# of the regexp
+
+# numbers
+ATE\b // 8 # special case lmfao
+\b(\d)(\d\d)\b // \1HUNDERT UND \2
+\b10\b // ZEHN
+\b11\b // ELF
+\b12\b // ZWÖLF
+\b(\d)(\d)\b // \2UND\1ZIG
+\b(\d)\b // \1ZEHN
+0 // NULL
+1 // EINS
+2 // ZWEI
+3 // DREI
+4 // VIER
+5 // FUNF
+6 // SECHS
+7 // SIEBEN
+8 // ACHT
+9 // NEUN
+
+# whole words
+\bi\b // IDEMPOTENT #yolo
+
+# suffixes
+[AEIOUY]S\b // U
+CE // SE
+IN\b // INÆ
+IST\b // ISTI
+EST\b // ESTO
+([OE])R\b // \1RЯ\1
+ING\b // EN
+T\b // TUM
+([AEIOUY])P\b // \1ILER
+ABLE\b // MÖGLICHKEIT #lmfao
+
+# punctuation
+/ // (VOLDEMORT)
+\. // (KEBAB)
+, // (GENTOO)
+: // (ZWEIKEBAB)
+; // (GENTOOKEBAB)
+\( // (CHUJ)
+\) // (CHUJEC)
+\[ // (OCHUJENIE)
+\] // (NIEOCHUJENIE)
+{ // (CHUJNIA)
+} // (ANTYCHUJNIA)
+_ // (UNTERMENSCH)
+\^ // (REISE)
+
+# longer substitutions
+([^J])ES // \1IES
+CK // CQUES
+[AEIOUY]D // ṎDE
+OUGH // SCHWANZ # i'm not even fucking trying anymore lmao
+MM // FIS
+EE // REICH
+NN // DÜRÜM
+OO // WEIL
+ACT // AB
+OVER // DEUTSCH # propaganda
+UNDER // JUDE # propaganda x10
+YOU // CASSINI
+([AEIOUY])I // γ\1\1
+([AEIOUY])M // ζ\1
+
+# [AEIOUY]N
+AN // ϟð
+EN // ϟΣ
+IN // ϟϟ
+ON // ϟʒ
+UN // ϟγ
+YN // DESGLEICHEN # analmangisch isn't very regular
+
+# reverse two-vowel-runs
+([AEIOUY])([AEIOUY]) // \2\1
+
+# single-letter substitutions
+[AEIOUY]NT // ∇
+ON // Ü
+X // KS
+V // B
+(SCH|SH) // ʃ
+(CH|KH|C|K) // X
+(B|SS|S) // ß
+(PH|F) // φ
+LL // Σ
+L // λ
+TH // Θ
+D // ð
+J // H
+EU // Œ
+AN // Ë
+W // Ł
+(GG|G) // ʒ
+M // м
+N // П
+U // µ
+Q // ξ
+OH // ∅
+([^AEIOUY])E\b // \1з
+R // Γ
diff --git a/tools/zittrigsubersetzer.py b/tools/zittrigsubersetzer.py
new file mode 100755
index 0000000..2cffa50
--- /dev/null
+++ b/tools/zittrigsubersetzer.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+import re, argparse
+from os import path
+
+
+prog_name = "Zittrigsubersetzer (ZUS)"
+prog_version = "0.1.0"
+
+
+class Rule:
+ def __init__(self, pre, post, lc):
+ self.pre = pre
+ self.post = post
+ self.lc = lc
+
+ def sub(self, text):
+ try:
+ return re.sub(self.pre, self.post, text, \
+ flags=re.IGNORECASE)
+ except:
+ print(f"error: messed up rule on line {self.lc}")
+ print(f"\t pre: {self.pre}\n\tpost: {self.post}")
+ raise
+
+
+class RuleSet:
+ def __init__(self):
+ self.rules = list()
+
+ def add_rule(self, pre, post, lc):
+ self.rules.append(Rule(pre, post, lc))
+
+ def load(self, fd):
+ for i, line in enumerate(fd):
+ line = re.sub("#.*$", "", line) # strip comments
+ line = line.strip() # strip whitespace
+
+ if not line:
+ continue
+
+ try:
+ (pre, post) = line.split("//")
+ pre = pre.strip()
+ post = post.strip()
+ except ValueError:
+ print(f"error: shit on line {i + 1}")
+ raise # raise raise zittrigmann
+
+ self.add_rule(pre, post, i + 1)
+ return ruleset
+
+ def translate(self, text):
+ text = text.upper()
+ for rule in self.rules:
+ text = rule.sub(text)
+ return text
+
+ def translate_debug(self, text):
+ text = text.upper()
+ print(text)
+ for i, rule in enumerate(self.rules):
+ before = text
+ after = rule.sub(before)
+ if before == after:
+ continue
+ else:
+ print(f"(rule {i:2d}: s/{rule.pre}/{rule.post}/)")
+ print(after)
+ text = after
+ return text
+
+
+def mode_normal(args, ruleset):
+ while True:
+ try:
+ text = input()
+ if args.debug:
+ print(ruleset.translate_debug(text))
+ else:
+ print(ruleset.translate(text))
+ except (EOFError, KeyboardInterrupt):
+ exit(0)
+
+
+def mode_test(args, ruleset):
+ longest_input = 3
+ table = list()
+
+ with open(args.tests, "r", encoding="UTF-8") as fd:
+ for test in fd:
+ test = re.sub("#.*$", "", test)
+ test = test.strip()
+ if not test:
+ continue
+
+ if len(test) > longest_input:
+ longest_input = len(test)
+
+ output = ruleset.translate(test)
+ table.append((test, output))
+
+ for (input, output) in table:
+ input = input.ljust(longest_input, " ")
+ print(f"{input} {output}")
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-v", "--version", action="store_true",
+ help="print version")
+ parser.add_argument("-D", "--debug", action="store_true", \
+ help="be more verbose (for debugging)")
+ parser.add_argument("-R", "--ruleset", \
+ help="path to file with the rules")
+ parser.add_argument("-T", "--tests", help="path to the tests file")
+ args = parser.parse_args()
+
+ if args.version:
+ print(f"{prog_name} {prog_version}")
+ exit(0)
+
+ if not args.ruleset:
+ dirname = path.dirname(path.realpath(__file__))
+ args.ruleset = path.join(dirname, "rules.txt")
+
+ ruleset = RuleSet()
+ with open(args.ruleset, "r", encoding="UTF-8") as fd:
+ ruleset.load(fd)
+
+ if args.tests:
+ mode_test(args, ruleset)
+ else:
+ mode_normal(args, ruleset)
+