1 files changed, 81 insertions, 30 deletions
diff --git a/stalinizer.py b/stalinizer.py
index 0cecb01..1c6503f 100755
--- a/stalinizer.py
+++ b/stalinizer.py
@@ -80,19 +80,71 @@ class StateTracker:
 
 class WeightedMean:
 	def __init__(self):
+		self.samples = list()
 		self.total = 0
+		self.total_weighted = 0
 		self.weights = 0
 
 	def feed(self, sample, weight):
-		self.total += sample * weight;
+		self.samples.append((sample, weight))
+		self.total += sample
+		self.total_weighted += sample * weight;
 		self.weights += weight
 
-	def read(self):
+	def mean(self):
 		if self.weights != 0:
-			return self.total / self.weights
+			return self.total/ self.weights
 		else:
 			return 0
 
+	def wmean(self):
+		if self.weights != 0:
+			return self.total_weighted / self.weights
+		else:
+			return 0
+
+	# weighted standard deviation
+	# http://www.itl.nist.gov/div898/software/dataplot/refman2/ch2/weightsd.pdf
+	def wsd(self):
+		if len(self.samples) <= 1:
+			return 99999
+
+		wmean = self.wmean()
+
+		S = 0
+		for sample in self.samples:
+			S += sample[1] * (sample[0] - wmean) ** 2
+
+		N = len(self.samples)
+		wsd = (S / ((N - 1) / N * self.weights)) ** 0.5;
+
+		for sample in self.samples:
+			print("%d\t%d" % (sample[0], sample[1]))
+		print("wsd is %f" % wsd)
+
+		return wsd
+
+	# standard deviation of the weighted mean
+	def wmsd(self):
+		if self.weights == 0:
+			return 99999
+
+		# sum of squared weights
+		sq_weights = 0
+		for sample in self.samples:
+			sq_weights += sample[1] ** 2
+
+		# unweighted variance
+		mean = self.mean()
+		var = 0
+		for sample in self.samples:
+			var += (sample[0] - mean) ** 2
+		var /= len(self.samples)
+
+		wmsd = (sq_weights / (self.weights) ** 2 * var) ** 0.5
+
+		return wmsd
+
 
 class Day:
 	def __init__(self, date):
@@ -100,7 +152,7 @@ class Day:
 		self.pcount_sum = 0
 		self.pcount_time = 0
 		self.pcount_peak = 0
-		self.pings = list()
+		self.pings = WeightedMean()
 
 	def avg_pcount(self):
 		return self.pcount_sum / self.pcount_time
@@ -108,37 +160,34 @@ class Day:
 	def peak_pcount(self):
 		return self.pcount_peak
 
-	def ping_stats(self):
-		mean = WeightedMean()
+	def ping_distrib(self):
 		above_60 = 0
 		above_110 = 0
 		above_160 = 0
 		above_210 = 0
 		above_260 = 0
 
-		for ping in self.pings:
-			mean.feed(ping[0], ping[1]);
+		for sample in self.pings.samples:
+			if sample[0] > 60:
+				above_60 += sample[1]
+			if sample[0] > 110:
+				above_110 += sample[1]
+			if sample[0] > 160:
+				above_160 += sample[1]
+			if sample[0] > 210:
+				above_210 += sample[1]
+			if sample[0] > 260:
+				above_260 += sample[1]
 
-			if ping[0] > 60:
-				above_60 += ping[1]
-			if ping[0] > 110:
-				above_110 += ping[1]
-			if ping[0] > 160:
-				above_160 += ping[1]
-			if ping[0] > 210:
-				above_210 += ping[1]
-			if ping[0] > 260:
-				above_260 += ping[1]
-
-		if len(self.pings):
-			above_60 /= mean.weights
-			above_110 /= mean.weights
-			above_160 /= mean.weights
-			above_210 /= mean.weights
-			above_260 /= mean.weights
+		if len(self.pings.samples):
+			above_60 /= self.pings.weights
+			above_110 /= self.pings.weights
+			above_160 /= self.pings.weights
+			above_210 /= self.pings.weights
+			above_260 /= self.pings.weights
 
-		return "%f %f%% %f%% %f%% %f%% %f" % (mean.read(), above_60, \
-		       above_110, above_160, above_210, above_260)
+		return "%f %f %f %f %f" % (above_60, above_110, above_160, \
+		       above_210, above_260)
 
 
 
@@ -172,15 +221,17 @@ class Analyzer:
 		if date not in self.days:
 			self.days[date] = Day(date)
 
-		self.days[date].pings += pings
+		for ping in pings:
+			self.days[date].pings.feed(ping[0], ping[1])
 
 	def finish(self):
 		for date, day in self.days.items():
 			if day.pcount_time < 80000:
 				 continue
 
-			print("%s %f %s %d" % (date, day.avg_pcount(), \
-			      day.ping_stats(), day.peak_pcount()))
+			print("%s %f %f %f %f %s %d" % (date, day.avg_pcount(), \
+			      day.pings.wmean(), day.pings.wmsd(), day.pings.wsd(), \
+			      day.ping_distrib(), day.peak_pcount()))
 		pass