diff --git a/cmd/lotus-bench/import.go b/cmd/lotus-bench/import.go index f76ef4644..ef572b13b 100644 --- a/cmd/lotus-bench/import.go +++ b/cmd/lotus-bench/import.go @@ -232,21 +232,20 @@ func compStats(vals []float64) (float64, float64) { type stats struct { count float64 mean float64 - dSqr float64 + m2 float64 } func (s *stats) AddPoint(value float64) { - s.count++ - meanDiff := (value - s.mean) / s.count - newMean := s.mean + meanDiff - - dSqrtInc := (value - newMean) * (value - s.mean) - s.dSqr += dSqrtInc - s.mean = newMean + // based on https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm + s.count += 1 + delta := value - s.mean + s.mean += delta / s.count + delta2 := value - s.mean + s.m2 += delta * delta2 } func (s *stats) variance() float64 { - return s.dSqr / (s.count - 1) + return s.m2 / (s.count - 1) } func (s1 *stats) Combine(s2 *stats) { @@ -268,14 +267,12 @@ func (s1 *stats) Combine(s2 *stats) { } newCount := s1.count + s2.count - newMean := s1.count*s1.mean + s2.count*s2.mean - newMean /= newCount - newVar := s1.count * (s1.variance() + (s1.mean-newMean)*(s1.mean-newMean)) - newVar += s2.count * (s2.variance() + (s2.mean-newMean)*(s2.mean-newMean)) - newVar /= newCount + delta := s2.mean - s1.mean + meanDelta := delta * s2.count / newCount + m2 := s1.m2 + s2.m2 + delta*meanDelta*s1.count s1.count = newCount - s1.mean = newMean - s1.dSqr = newVar * (newCount - 1) + s1.mean += meanDelta + s1.m2 = m2 } func tallyGasCharges(charges map[string]*stats, et types.ExecutionTrace) { diff --git a/cmd/lotus-bench/stats_test.go b/cmd/lotus-bench/stats_test.go index 851dc71af..6ade8a4e5 100644 --- a/cmd/lotus-bench/stats_test.go +++ b/cmd/lotus-bench/stats_test.go @@ -9,12 +9,12 @@ import ( func TestStats(t *testing.T) { N := 16 ss := make([]*stats, N) + rng := rand.New(rand.NewSource(1)) for i := 0; i < N; i++ { ss[i] = &stats{} - maxJ := rand.Intn(1000) + maxJ := rng.Intn(1000) for j := 0; j < maxJ; j++ { - ss[i].AddPoint(rand.NormFloat64()*5 + 500) - ss[i].AddPoint(rand.NormFloat64()*5 + 1000) + ss[i].AddPoint(rng.NormFloat64()*5 + 500) } t.Logf("mean: %f, stddev: %f, count %f", ss[i].mean, math.Sqrt(ss[i].variance()), ss[i].count) }