diff options
| -rwxr-xr-x | plot.sh | 28 | ||||
| -rwxr-xr-x | stalinizer.py | 111 | 
2 files changed, 99 insertions, 40 deletions
@@ -36,24 +36,32 @@ function gnuplot_conf {  		echo "set title \"Population over time\n$TS\""  		echo "set ylabel 'Players'"  		echo -n "plot '$DATA' " -		echo -n    "using 1:2 title 'Mean player count', " -		echo    "'' using 1:9 title 'Peak player count'" +		echo -n    "using 1:2  title 'Mean player count', " +		echo    "'' using 1:11 title 'Peak player count'"  	elif [ "$SELECT" == "ping" ]; then  		echo "set title \"Mean ping over time\n$TS\"" -		echo "set key off" -		echo "set ylabel \"Mean ping [ms]\"" -		echo -n "plot '$DATA' using 1:3 axes x1y1" +		echo "set ylabel \"Ping [ms]\"" +		echo "set yrange [0:240]" +		echo "set ytics 20" +		echo "set bars 0.5" +		echo -n "plot '$DATA' " +		echo -n    "using 1:3:5 title 'Standard deviation' " +		echo -n    "with yerrorbars pt 0 lc rgb '#C0C0C0'," +		echo -n "'' using 1:3:4 title 'Standard deviation of the mean' " +		echo -n    "with yerrorbars pt 0 lc rgb 'red'," +		echo -n "'' using 1:3 title 'Mean ping' " +		echo -n    "with points pt 2 lc rgb 'blue'"  	elif [ "$SELECT" == "ping-distrib" ]; then  		echo "set title \"Ping distribution over time\n$TS\""  		echo "set ylabel 'Fraction of players'"  		echo "set yrange [0:1]"  		echo "set ytics 0.1"  		echo -n "plot '$DATA'" -		echo -n    "using 1:4 title 'Above 60ms', " -		echo -n "'' using 1:5 title 'Above 110ms', " -		echo -n "'' using 1:6 title 'Above 160ms', " -		echo -n "'' using 1:7 title 'Above 210ms', " -		echo    "'' using 1:8 title 'Above 260ms'" +		echo -n    "using 1:6  title 'Above 60ms', " +		echo -n "'' using 1:7  title 'Above 110ms', " +		echo -n "'' using 1:8  title 'Above 160ms', " +		echo -n "'' using 1:9  title 'Above 210ms', " +		echo    "'' using 1:10 title 'Above 260ms'"  	else  		echo "SELECT is wrong" 1>&2  	fi diff --git a/stalinizer.py b/stalinizer.py index 0cecb01..1c6503f 100755 --- a/stalinizer.py +++ b/stalinizer.py @@ -80,19 +80,71 @@ class StateTracker:  class WeightedMean:  	def __init__(self): +		self.samples = list()  		self.total = 0 +		self.total_weighted = 0  		self.weights = 0  	def feed(self, sample, weight): -		self.total += sample * weight; +		self.samples.append((sample, weight)) +		self.total += sample +		self.total_weighted += sample * weight;  		self.weights += weight -	def read(self): +	def mean(self):  		if self.weights != 0: -			return self.total / self.weights +			return self.total/ self.weights  		else:  			return 0 +	def wmean(self): +		if self.weights != 0: +			return self.total_weighted / self.weights +		else: +			return 0 + +	# weighted standard deviation +	# http://www.itl.nist.gov/div898/software/dataplot/refman2/ch2/weightsd.pdf +	def wsd(self): +		if len(self.samples) <= 1: +			return 99999 + +		wmean = self.wmean() + +		S = 0 +		for sample in self.samples: +			S += sample[1] * (sample[0] - wmean) ** 2 + +		N = len(self.samples) +		wsd = (S / ((N - 1) / N * self.weights)) ** 0.5; + +		for sample in self.samples: +			print("%d\t%d" % (sample[0], sample[1])) +		print("wsd is %f" % wsd) + +		return wsd + +	# standard deviation of the weighted mean +	def wmsd(self): +		if self.weights == 0: +			return 99999 + +		# sum of squared weights +		sq_weights = 0 +		for sample in self.samples: +			sq_weights += sample[1] ** 2 + +		# unweighted variance +		mean = self.mean() +		var = 0 +		for sample in self.samples: +			var += (sample[0] - mean) ** 2 +		var /= len(self.samples) + +		wmsd = (sq_weights / (self.weights) ** 2 * var) ** 0.5 + +		return wmsd +  class Day:  	def __init__(self, date): @@ -100,7 +152,7 @@ class Day:  		self.pcount_sum = 0  		self.pcount_time = 0  		self.pcount_peak = 0 -		self.pings = list() +		self.pings = WeightedMean()  	def avg_pcount(self):  		return self.pcount_sum / self.pcount_time @@ -108,37 +160,34 @@ class Day:  	def peak_pcount(self):  		return self.pcount_peak -	def ping_stats(self): -		mean = WeightedMean() +	def ping_distrib(self):  		above_60 = 0  		above_110 = 0  		above_160 = 0  		above_210 = 0  		above_260 = 0 -		for ping in self.pings: -			mean.feed(ping[0], ping[1]); +		for sample in self.pings.samples: +			if sample[0] > 60: +				above_60 += sample[1] +			if sample[0] > 110: +				above_110 += sample[1] +			if sample[0] > 160: +				above_160 += sample[1] +			if sample[0] > 210: +				above_210 += sample[1] +			if sample[0] > 260: +				above_260 += sample[1] -			if ping[0] > 60: -				above_60 += ping[1] -			if ping[0] > 110: -				above_110 += ping[1] -			if ping[0] > 160: -				above_160 += ping[1] -			if ping[0] > 210: -				above_210 += ping[1] -			if ping[0] > 260: -				above_260 += ping[1] - -		if len(self.pings): -			above_60 /= mean.weights -			above_110 /= mean.weights -			above_160 /= mean.weights -			above_210 /= mean.weights -			above_260 /= mean.weights +		if len(self.pings.samples): +			above_60 /= self.pings.weights +			above_110 /= self.pings.weights +			above_160 /= self.pings.weights +			above_210 /= self.pings.weights +			above_260 /= self.pings.weights -		return "%f %f%% %f%% %f%% %f%% %f" % (mean.read(), above_60, \ -		       above_110, above_160, above_210, above_260) +		return "%f %f %f %f %f" % (above_60, above_110, above_160, \ +		       above_210, above_260) @@ -172,15 +221,17 @@ class Analyzer:  		if date not in self.days:  			self.days[date] = Day(date) -		self.days[date].pings += pings +		for ping in pings: +			self.days[date].pings.feed(ping[0], ping[1])  	def finish(self):  		for date, day in self.days.items():  			if day.pcount_time < 80000:  				 continue -			print("%s %f %s %d" % (date, day.avg_pcount(), \ -			      day.ping_stats(), day.peak_pcount())) +			print("%s %f %f %f %f %s %d" % (date, day.avg_pcount(), \ +			      day.pings.wmean(), day.pings.wmsd(), day.pings.wsd(), \ +			      day.ping_distrib(), day.peak_pcount()))  		pass  | 
