From 4f17bd29c61e449ba676c4c57b7b69e8abbb2160 Mon Sep 17 00:00:00 2001 From: Paweł Redman Date: Mon, 12 Feb 2018 18:40:38 +0100 Subject: zus: Initial commit. --- tools/rules.txt | 105 ++++++++++++++++++++++++++++++++++ tools/zittrigsubersetzer.py | 134 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 239 insertions(+) create mode 100644 tools/rules.txt create mode 100755 tools/zittrigsubersetzer.py diff --git a/tools/rules.txt b/tools/rules.txt new file mode 100644 index 0000000..67ef437 --- /dev/null +++ b/tools/rules.txt @@ -0,0 +1,105 @@ +# rules for Zittrigsubersetzer, revision 1 +# note: '//' isn't for comments, it separates the "from" from the "to" part +# of the regexp + +# numbers +ATE\b // 8 # special case lmfao +\b(\d)(\d\d)\b // \1HUNDERT UND \2 +\b10\b // ZEHN +\b11\b // ELF +\b12\b // ZWÖLF +\b(\d)(\d)\b // \2UND\1ZIG +\b(\d)\b // \1ZEHN +0 // NULL +1 // EINS +2 // ZWEI +3 // DREI +4 // VIER +5 // FUNF +6 // SECHS +7 // SIEBEN +8 // ACHT +9 // NEUN + +# whole words +\bi\b // IDEMPOTENT #yolo + +# suffixes +[AEIOUY]S\b // U +CE // SE +IN\b // INÆ +IST\b // ISTI +EST\b // ESTO +([OE])R\b // \1RЯ\1 +ING\b // EN +T\b // TUM +([AEIOUY])P\b // \1ILER +ABLE\b // MÖGLICHKEIT #lmfao + +# punctuation +/ // (VOLDEMORT) +\. // (KEBAB) +, // (GENTOO) +: // (ZWEIKEBAB) +; // (GENTOOKEBAB) +\( // (CHUJ) +\) // (CHUJEC) +\[ // (OCHUJENIE) +\] // (NIEOCHUJENIE) +{ // (CHUJNIA) +} // (ANTYCHUJNIA) +_ // (UNTERMENSCH) +\^ // (REISE) + +# longer substitutions +([^J])ES // \1IES +CK // CQUES +[AEIOUY]D // ṎDE +OUGH // SCHWANZ # i'm not even fucking trying anymore lmao +MM // FIS +EE // REICH +NN // DÜRÜM +OO // WEIL +ACT // AB +OVER // DEUTSCH # propaganda +UNDER // JUDE # propaganda x10 +YOU // CASSINI +([AEIOUY])I // γ\1\1 +([AEIOUY])M // ζ\1 + +# [AEIOUY]N +AN // ϟð +EN // ϟΣ +IN // ϟϟ +ON // ϟʒ +UN // ϟγ +YN // DESGLEICHEN # analmangisch isn't very regular + +# reverse two-vowel-runs +([AEIOUY])([AEIOUY]) // \2\1 + +# single-letter substitutions +[AEIOUY]NT // ∇ +ON // Ü +X // KS +V // B +(SCH|SH) // ʃ +(CH|KH|C|K) // X +(B|SS|S) // ß +(PH|F) // φ +LL // Σ +L // λ +TH // Θ +D // ð +J // H +EU // Œ +AN // Ë +W // Ł +(GG|G) // ʒ +M // м +N // П +U // µ +Q // ξ +OH // ∅ +([^AEIOUY])E\b // \1з +R // Γ diff --git a/tools/zittrigsubersetzer.py b/tools/zittrigsubersetzer.py new file mode 100755 index 0000000..2cffa50 --- /dev/null +++ b/tools/zittrigsubersetzer.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +import re, argparse +from os import path + + +prog_name = "Zittrigsubersetzer (ZUS)" +prog_version = "0.1.0" + + +class Rule: + def __init__(self, pre, post, lc): + self.pre = pre + self.post = post + self.lc = lc + + def sub(self, text): + try: + return re.sub(self.pre, self.post, text, \ + flags=re.IGNORECASE) + except: + print(f"error: messed up rule on line {self.lc}") + print(f"\t pre: {self.pre}\n\tpost: {self.post}") + raise + + +class RuleSet: + def __init__(self): + self.rules = list() + + def add_rule(self, pre, post, lc): + self.rules.append(Rule(pre, post, lc)) + + def load(self, fd): + for i, line in enumerate(fd): + line = re.sub("#.*$", "", line) # strip comments + line = line.strip() # strip whitespace + + if not line: + continue + + try: + (pre, post) = line.split("//") + pre = pre.strip() + post = post.strip() + except ValueError: + print(f"error: shit on line {i + 1}") + raise # raise raise zittrigmann + + self.add_rule(pre, post, i + 1) + return ruleset + + def translate(self, text): + text = text.upper() + for rule in self.rules: + text = rule.sub(text) + return text + + def translate_debug(self, text): + text = text.upper() + print(text) + for i, rule in enumerate(self.rules): + before = text + after = rule.sub(before) + if before == after: + continue + else: + print(f"(rule {i:2d}: s/{rule.pre}/{rule.post}/)") + print(after) + text = after + return text + + +def mode_normal(args, ruleset): + while True: + try: + text = input() + if args.debug: + print(ruleset.translate_debug(text)) + else: + print(ruleset.translate(text)) + except (EOFError, KeyboardInterrupt): + exit(0) + + +def mode_test(args, ruleset): + longest_input = 3 + table = list() + + with open(args.tests, "r", encoding="UTF-8") as fd: + for test in fd: + test = re.sub("#.*$", "", test) + test = test.strip() + if not test: + continue + + if len(test) > longest_input: + longest_input = len(test) + + output = ruleset.translate(test) + table.append((test, output)) + + for (input, output) in table: + input = input.ljust(longest_input, " ") + print(f"{input} {output}") + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("-v", "--version", action="store_true", + help="print version") + parser.add_argument("-D", "--debug", action="store_true", \ + help="be more verbose (for debugging)") + parser.add_argument("-R", "--ruleset", \ + help="path to file with the rules") + parser.add_argument("-T", "--tests", help="path to the tests file") + args = parser.parse_args() + + if args.version: + print(f"{prog_name} {prog_version}") + exit(0) + + if not args.ruleset: + dirname = path.dirname(path.realpath(__file__)) + args.ruleset = path.join(dirname, "rules.txt") + + ruleset = RuleSet() + with open(args.ruleset, "r", encoding="UTF-8") as fd: + ruleset.load(fd) + + if args.tests: + mode_test(args, ruleset) + else: + mode_normal(args, ruleset) + -- cgit