1 files changed, 121 insertions, 0 deletions
diff --git a/crl/data.py b/crl/data.py
new file mode 100644
index 0000000..69d69e2
--- /dev/null
+++ b/crl/data.py
@@ -0,0 +1,121 @@
+import re
+import numpy as np
+
+class LoadError(Exception):
+	pass
+
+
+
+
+def identify(contents):
+	if contents.startswith(b"SpectraSuite Data File"):
+		return "spectrasuite"
+	elif b"created by Plot Digitizer" in contents:
+		return "Plot Digitizer"
+	else:
+		return "unknown"
+
+def identify_csv(line):
+	line = line.strip().rstrip()
+
+	point = ","
+	delim = "\s+"
+
+	if "." in line:
+		point = "."
+		if "," in line:
+			delim = ","
+
+	num_cols = len(re.findall(delim, line)) + 1
+	if line[-1] == delim:
+		num_cols -= 1
+
+	re_int = "([\+-]?\d+)"
+	re_frac = "%s?(\d*)" % point
+	re_exp = "[eE]?([\+-]?\d*)"
+
+	regex = delim.join([re_int + re_frac + re_exp] * num_cols)
+
+	return num_cols, re.compile(regex)
+
+def parse_csv(lines):
+	num_cols, regex = identify_csv(lines[0])
+
+	data = np.empty((len(lines), num_cols), dtype="float")
+
+	for i, line in enumerate(lines):
+		if not len(line.strip()):
+			continue
+
+		rv = regex.match(line)
+		for j in range(num_cols):
+			str_int = rv.group(3 * j + 1)
+			str_frac = rv.group(3 * j + 2)
+			str_exp = rv.group(3 * j + 3)
+
+			fint = float(str_int)
+			frac = float(str_frac) * 10 ** (-len(str_frac))
+			if fint < 0:
+				frac = -frac
+
+			if str_exp != "":
+				number = (fint + frac) * 10 ** (int(str_exp))
+			else:
+				number = fint + frac
+
+			data[i, j] = number
+
+	return data
+
+re_spectrasuite_marker = re.compile("^>>>>>(.*)<<<<<")
+
+def parse_spectrasuite(lines):
+	data_start = None
+	data_end = None
+
+	for i, line in enumerate(lines):
+		rv = re_spectrasuite_marker.match(line)
+		if rv is not None:
+			marker = rv.group(1)
+
+			if marker == "Begin Processed Spectral Data":
+				data_start = i + 1
+			elif marker == "End Processed Spectral Data":
+				data_end = i
+
+	if data_start is None:
+		raise ValueError("Missing 'Begin Processed Spectral Data'")
+	if data_end is None:
+		raise ValueError("Missing 'End Processed Spectral Data'")
+
+	return parse_csv(lines[data_start:data_end])
+
+def parse_plot_digitizer(lines):
+	return parse_csv(lines[6:])
+
+def load(path):
+	with open(path, "rb") as fd:
+		contents = fd.read()
+
+	fmt = identify(contents)
+
+	try:
+		lines = [line.decode("ascii") for line in contents.split(b"\n")]
+	except UnicodeDecodeError:
+		raise LoadError("This non-ASCII data format isn't supported")
+
+	if fmt == "spectrasuite":
+		try:
+			return parse_spectrasuite(lines)
+		except Exception as exc:
+			raise LoadError("This SpectraSuite file couldn't be understood")
+	elif fmt == "Plot Digitizer":
+		try:
+			return parse_plot_digitizer(lines)
+		except Exception as exc:
+			raise LoadError("This Plot Digitizer file couldn't be understood")
+	else:
+		try:
+			return parse_csv(lines)
+		except Exception as exc:
+			raise LoadError("This data format isn't supported")