summaryrefslogtreecommitdiff
path: root/crl/data.py
diff options
context:
space:
mode:
Diffstat (limited to 'crl/data.py')
-rw-r--r--crl/data.py121
1 files changed, 121 insertions, 0 deletions
diff --git a/crl/data.py b/crl/data.py
new file mode 100644
index 0000000..69d69e2
--- /dev/null
+++ b/crl/data.py
@@ -0,0 +1,121 @@
+import re
+import numpy as np
+
+class LoadError(Exception):
+ pass
+
+
+
+
+def identify(contents):
+ if contents.startswith(b"SpectraSuite Data File"):
+ return "spectrasuite"
+ elif b"created by Plot Digitizer" in contents:
+ return "Plot Digitizer"
+ else:
+ return "unknown"
+
+def identify_csv(line):
+ line = line.strip().rstrip()
+
+ point = ","
+ delim = "\s+"
+
+ if "." in line:
+ point = "."
+ if "," in line:
+ delim = ","
+
+ num_cols = len(re.findall(delim, line)) + 1
+ if line[-1] == delim:
+ num_cols -= 1
+
+ re_int = "([\+-]?\d+)"
+ re_frac = "%s?(\d*)" % point
+ re_exp = "[eE]?([\+-]?\d*)"
+
+ regex = delim.join([re_int + re_frac + re_exp] * num_cols)
+
+ return num_cols, re.compile(regex)
+
+def parse_csv(lines):
+ num_cols, regex = identify_csv(lines[0])
+
+ data = np.empty((len(lines), num_cols), dtype="float")
+
+ for i, line in enumerate(lines):
+ if not len(line.strip()):
+ continue
+
+ rv = regex.match(line)
+ for j in range(num_cols):
+ str_int = rv.group(3 * j + 1)
+ str_frac = rv.group(3 * j + 2)
+ str_exp = rv.group(3 * j + 3)
+
+ fint = float(str_int)
+ frac = float(str_frac) * 10 ** (-len(str_frac))
+ if fint < 0:
+ frac = -frac
+
+ if str_exp != "":
+ number = (fint + frac) * 10 ** (int(str_exp))
+ else:
+ number = fint + frac
+
+ data[i, j] = number
+
+ return data
+
+re_spectrasuite_marker = re.compile("^>>>>>(.*)<<<<<")
+
+def parse_spectrasuite(lines):
+ data_start = None
+ data_end = None
+
+ for i, line in enumerate(lines):
+ rv = re_spectrasuite_marker.match(line)
+ if rv is not None:
+ marker = rv.group(1)
+
+ if marker == "Begin Processed Spectral Data":
+ data_start = i + 1
+ elif marker == "End Processed Spectral Data":
+ data_end = i
+
+ if data_start is None:
+ raise ValueError("Missing 'Begin Processed Spectral Data'")
+ if data_end is None:
+ raise ValueError("Missing 'End Processed Spectral Data'")
+
+ return parse_csv(lines[data_start:data_end])
+
+def parse_plot_digitizer(lines):
+ return parse_csv(lines[6:])
+
+def load(path):
+ with open(path, "rb") as fd:
+ contents = fd.read()
+
+ fmt = identify(contents)
+
+ try:
+ lines = [line.decode("ascii") for line in contents.split(b"\n")]
+ except UnicodeDecodeError:
+ raise LoadError("This non-ASCII data format isn't supported")
+
+ if fmt == "spectrasuite":
+ try:
+ return parse_spectrasuite(lines)
+ except Exception as exc:
+ raise LoadError("This SpectraSuite file couldn't be understood")
+ elif fmt == "Plot Digitizer":
+ try:
+ return parse_plot_digitizer(lines)
+ except Exception as exc:
+ raise LoadError("This Plot Digitizer file couldn't be understood")
+ else:
+ try:
+ return parse_csv(lines)
+ except Exception as exc:
+ raise LoadError("This data format isn't supported")