From 1646077ab8a024a51ee5eec74f0614015fc91d2d Mon Sep 17 00:00:00 2001 From: Paweł Redman Date: Wed, 7 Mar 2018 14:55:29 +0100 Subject: SD and SD of the mean on the ping graph. --- plot.sh | 28 +++++++++------ stalinizer.py | 111 ++++++++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 99 insertions(+), 40 deletions(-) diff --git a/plot.sh b/plot.sh index 69f7744..140be84 100755 --- a/plot.sh +++ b/plot.sh @@ -36,24 +36,32 @@ function gnuplot_conf { echo "set title \"Population over time\n$TS\"" echo "set ylabel 'Players'" echo -n "plot '$DATA' " - echo -n "using 1:2 title 'Mean player count', " - echo "'' using 1:9 title 'Peak player count'" + echo -n "using 1:2 title 'Mean player count', " + echo "'' using 1:11 title 'Peak player count'" elif [ "$SELECT" == "ping" ]; then echo "set title \"Mean ping over time\n$TS\"" - echo "set key off" - echo "set ylabel \"Mean ping [ms]\"" - echo -n "plot '$DATA' using 1:3 axes x1y1" + echo "set ylabel \"Ping [ms]\"" + echo "set yrange [0:240]" + echo "set ytics 20" + echo "set bars 0.5" + echo -n "plot '$DATA' " + echo -n "using 1:3:5 title 'Standard deviation' " + echo -n "with yerrorbars pt 0 lc rgb '#C0C0C0'," + echo -n "'' using 1:3:4 title 'Standard deviation of the mean' " + echo -n "with yerrorbars pt 0 lc rgb 'red'," + echo -n "'' using 1:3 title 'Mean ping' " + echo -n "with points pt 2 lc rgb 'blue'" elif [ "$SELECT" == "ping-distrib" ]; then echo "set title \"Ping distribution over time\n$TS\"" echo "set ylabel 'Fraction of players'" echo "set yrange [0:1]" echo "set ytics 0.1" echo -n "plot '$DATA'" - echo -n "using 1:4 title 'Above 60ms', " - echo -n "'' using 1:5 title 'Above 110ms', " - echo -n "'' using 1:6 title 'Above 160ms', " - echo -n "'' using 1:7 title 'Above 210ms', " - echo "'' using 1:8 title 'Above 260ms'" + echo -n "using 1:6 title 'Above 60ms', " + echo -n "'' using 1:7 title 'Above 110ms', " + echo -n "'' using 1:8 title 'Above 160ms', " + echo -n "'' using 1:9 title 'Above 210ms', " + echo "'' using 1:10 title 'Above 260ms'" else echo "SELECT is wrong" 1>&2 fi diff --git a/stalinizer.py b/stalinizer.py index 0cecb01..1c6503f 100755 --- a/stalinizer.py +++ b/stalinizer.py @@ -80,19 +80,71 @@ class StateTracker: class WeightedMean: def __init__(self): + self.samples = list() self.total = 0 + self.total_weighted = 0 self.weights = 0 def feed(self, sample, weight): - self.total += sample * weight; + self.samples.append((sample, weight)) + self.total += sample + self.total_weighted += sample * weight; self.weights += weight - def read(self): + def mean(self): if self.weights != 0: - return self.total / self.weights + return self.total/ self.weights else: return 0 + def wmean(self): + if self.weights != 0: + return self.total_weighted / self.weights + else: + return 0 + + # weighted standard deviation + # http://www.itl.nist.gov/div898/software/dataplot/refman2/ch2/weightsd.pdf + def wsd(self): + if len(self.samples) <= 1: + return 99999 + + wmean = self.wmean() + + S = 0 + for sample in self.samples: + S += sample[1] * (sample[0] - wmean) ** 2 + + N = len(self.samples) + wsd = (S / ((N - 1) / N * self.weights)) ** 0.5; + + for sample in self.samples: + print("%d\t%d" % (sample[0], sample[1])) + print("wsd is %f" % wsd) + + return wsd + + # standard deviation of the weighted mean + def wmsd(self): + if self.weights == 0: + return 99999 + + # sum of squared weights + sq_weights = 0 + for sample in self.samples: + sq_weights += sample[1] ** 2 + + # unweighted variance + mean = self.mean() + var = 0 + for sample in self.samples: + var += (sample[0] - mean) ** 2 + var /= len(self.samples) + + wmsd = (sq_weights / (self.weights) ** 2 * var) ** 0.5 + + return wmsd + class Day: def __init__(self, date): @@ -100,7 +152,7 @@ class Day: self.pcount_sum = 0 self.pcount_time = 0 self.pcount_peak = 0 - self.pings = list() + self.pings = WeightedMean() def avg_pcount(self): return self.pcount_sum / self.pcount_time @@ -108,37 +160,34 @@ class Day: def peak_pcount(self): return self.pcount_peak - def ping_stats(self): - mean = WeightedMean() + def ping_distrib(self): above_60 = 0 above_110 = 0 above_160 = 0 above_210 = 0 above_260 = 0 - for ping in self.pings: - mean.feed(ping[0], ping[1]); + for sample in self.pings.samples: + if sample[0] > 60: + above_60 += sample[1] + if sample[0] > 110: + above_110 += sample[1] + if sample[0] > 160: + above_160 += sample[1] + if sample[0] > 210: + above_210 += sample[1] + if sample[0] > 260: + above_260 += sample[1] - if ping[0] > 60: - above_60 += ping[1] - if ping[0] > 110: - above_110 += ping[1] - if ping[0] > 160: - above_160 += ping[1] - if ping[0] > 210: - above_210 += ping[1] - if ping[0] > 260: - above_260 += ping[1] - - if len(self.pings): - above_60 /= mean.weights - above_110 /= mean.weights - above_160 /= mean.weights - above_210 /= mean.weights - above_260 /= mean.weights + if len(self.pings.samples): + above_60 /= self.pings.weights + above_110 /= self.pings.weights + above_160 /= self.pings.weights + above_210 /= self.pings.weights + above_260 /= self.pings.weights - return "%f %f%% %f%% %f%% %f%% %f" % (mean.read(), above_60, \ - above_110, above_160, above_210, above_260) + return "%f %f %f %f %f" % (above_60, above_110, above_160, \ + above_210, above_260) @@ -172,15 +221,17 @@ class Analyzer: if date not in self.days: self.days[date] = Day(date) - self.days[date].pings += pings + for ping in pings: + self.days[date].pings.feed(ping[0], ping[1]) def finish(self): for date, day in self.days.items(): if day.pcount_time < 80000: continue - print("%s %f %s %d" % (date, day.avg_pcount(), \ - day.ping_stats(), day.peak_pcount())) + print("%s %f %f %f %f %s %d" % (date, day.avg_pcount(), \ + day.pings.wmean(), day.pings.wmsd(), day.pings.wsd(), \ + day.ping_distrib(), day.peak_pcount())) pass -- cgit