diff options
Diffstat (limited to 'crl/data.py')
-rw-r--r-- | crl/data.py | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/crl/data.py b/crl/data.py new file mode 100644 index 0000000..69d69e2 --- /dev/null +++ b/crl/data.py @@ -0,0 +1,121 @@ +import re +import numpy as np + +class LoadError(Exception): + pass + + + + +def identify(contents): + if contents.startswith(b"SpectraSuite Data File"): + return "spectrasuite" + elif b"created by Plot Digitizer" in contents: + return "Plot Digitizer" + else: + return "unknown" + +def identify_csv(line): + line = line.strip().rstrip() + + point = "," + delim = "\s+" + + if "." in line: + point = "." + if "," in line: + delim = "," + + num_cols = len(re.findall(delim, line)) + 1 + if line[-1] == delim: + num_cols -= 1 + + re_int = "([\+-]?\d+)" + re_frac = "%s?(\d*)" % point + re_exp = "[eE]?([\+-]?\d*)" + + regex = delim.join([re_int + re_frac + re_exp] * num_cols) + + return num_cols, re.compile(regex) + +def parse_csv(lines): + num_cols, regex = identify_csv(lines[0]) + + data = np.empty((len(lines), num_cols), dtype="float") + + for i, line in enumerate(lines): + if not len(line.strip()): + continue + + rv = regex.match(line) + for j in range(num_cols): + str_int = rv.group(3 * j + 1) + str_frac = rv.group(3 * j + 2) + str_exp = rv.group(3 * j + 3) + + fint = float(str_int) + frac = float(str_frac) * 10 ** (-len(str_frac)) + if fint < 0: + frac = -frac + + if str_exp != "": + number = (fint + frac) * 10 ** (int(str_exp)) + else: + number = fint + frac + + data[i, j] = number + + return data + +re_spectrasuite_marker = re.compile("^>>>>>(.*)<<<<<") + +def parse_spectrasuite(lines): + data_start = None + data_end = None + + for i, line in enumerate(lines): + rv = re_spectrasuite_marker.match(line) + if rv is not None: + marker = rv.group(1) + + if marker == "Begin Processed Spectral Data": + data_start = i + 1 + elif marker == "End Processed Spectral Data": + data_end = i + + if data_start is None: + raise ValueError("Missing 'Begin Processed Spectral Data'") + if data_end is None: + raise ValueError("Missing 'End Processed Spectral Data'") + + return parse_csv(lines[data_start:data_end]) + +def parse_plot_digitizer(lines): + return parse_csv(lines[6:]) + +def load(path): + with open(path, "rb") as fd: + contents = fd.read() + + fmt = identify(contents) + + try: + lines = [line.decode("ascii") for line in contents.split(b"\n")] + except UnicodeDecodeError: + raise LoadError("This non-ASCII data format isn't supported") + + if fmt == "spectrasuite": + try: + return parse_spectrasuite(lines) + except Exception as exc: + raise LoadError("This SpectraSuite file couldn't be understood") + elif fmt == "Plot Digitizer": + try: + return parse_plot_digitizer(lines) + except Exception as exc: + raise LoadError("This Plot Digitizer file couldn't be understood") + else: + try: + return parse_csv(lines) + except Exception as exc: + raise LoadError("This data format isn't supported") |