import re import numpy as np class LoadError(Exception): pass def identify(contents): if contents.startswith(b"SpectraSuite Data File"): return "spectrasuite" elif b"created by Plot Digitizer" in contents: return "Plot Digitizer" else: return "unknown" def identify_csv(line): line = line.strip().rstrip() point = "," delim = "\s+" if "." in line: point = "." if "," in line: delim = "," num_cols = len(re.findall(delim, line)) + 1 if line[-1] == delim: num_cols -= 1 re_int = "([\+-]?\d+)" re_frac = "%s?(\d*)" % point re_exp = "[eE]?([\+-]?\d*)" regex = delim.join([re_int + re_frac + re_exp] * num_cols) return num_cols, re.compile(regex) def parse_csv(lines): num_cols, regex = identify_csv(lines[0]) data = np.empty((len(lines), num_cols), dtype="float") for i, line in enumerate(lines): if not len(line.strip()): continue rv = regex.match(line) for j in range(num_cols): str_int = rv.group(3 * j + 1) str_frac = rv.group(3 * j + 2) str_exp = rv.group(3 * j + 3) fint = float(str_int) frac = float(str_frac) * 10 ** (-len(str_frac)) if fint < 0: frac = -frac if str_exp != "": number = (fint + frac) * 10 ** (int(str_exp)) else: number = fint + frac data[i, j] = number return data re_spectrasuite_marker = re.compile("^>>>>>(.*)<<<<<") def parse_spectrasuite(lines): data_start = None data_end = None for i, line in enumerate(lines): rv = re_spectrasuite_marker.match(line) if rv is not None: marker = rv.group(1) if marker == "Begin Processed Spectral Data": data_start = i + 1 elif marker == "End Processed Spectral Data": data_end = i if data_start is None: raise ValueError("Missing 'Begin Processed Spectral Data'") if data_end is None: raise ValueError("Missing 'End Processed Spectral Data'") return parse_csv(lines[data_start:data_end]) def parse_plot_digitizer(lines): return parse_csv(lines[6:]) def load(path): with open(path, "rb") as fd: contents = fd.read() fmt = identify(contents) try: lines = [line.decode("ascii") for line in contents.split(b"\n")] except UnicodeDecodeError: raise LoadError("This non-ASCII data format isn't supported") if fmt == "spectrasuite": try: return parse_spectrasuite(lines) except Exception as exc: raise LoadError("This SpectraSuite file couldn't be understood") elif fmt == "Plot Digitizer": try: return parse_plot_digitizer(lines) except Exception as exc: raise LoadError("This Plot Digitizer file couldn't be understood") else: try: return parse_csv(lines) except Exception as exc: raise LoadError("This data format isn't supported")