Add quantiles and histogram

Signed-off-by: Jakub Sztandera <kubuxu@protocol.ai>
This commit is contained in:
Jakub Sztandera 2021-06-11 15:35:13 +02:00 committed by Steven Allen
parent 2721279e87
commit 7dd58efb84
7 changed files with 325 additions and 0 deletions

View File

@ -10,6 +10,7 @@ import (
"time" "time"
"github.com/ipfs/go-cid" "github.com/ipfs/go-cid"
"github.com/streadway/quantile"
"github.com/urfave/cli/v2" "github.com/urfave/cli/v2"
"github.com/filecoin-project/go-state-types/big" "github.com/filecoin-project/go-state-types/big"
@ -21,6 +22,7 @@ import (
"github.com/filecoin-project/lotus/chain/stmgr" "github.com/filecoin-project/lotus/chain/stmgr"
"github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/chain/types"
"github.com/filecoin-project/lotus/cmd/lotus-sim/simulation" "github.com/filecoin-project/lotus/cmd/lotus-sim/simulation"
"github.com/filecoin-project/lotus/lib/stati"
) )
func getTotalPower(ctx context.Context, sm *stmgr.StateManager, ts *types.TipSet) (power.Claim, error) { func getTotalPower(ctx context.Context, sm *stmgr.StateManager, ts *types.TipSet) (power.Claim, error) {
@ -177,6 +179,31 @@ var infoCommitGasSimCommand = &cli.Command{
var gasAggMax, proofsAggMax uint64 var gasAggMax, proofsAggMax uint64
var gasSingle, proofsSingle uint64 var gasSingle, proofsSingle uint64
qpoints := []struct{ q, tol float64 }{
{0.01, 0.0005},
{0.05, 0.001},
{0.20, 0.01},
{0.25, 0.01},
{0.30, 0.01},
{0.40, 0.01},
{0.45, 0.01},
{0.50, 0.01},
{0.60, 0.01},
{0.80, 0.01},
{0.95, 0.001},
{0.99, 0.0005},
}
estims := make([]quantile.Estimate, len(qpoints))
for i, p := range qpoints {
estims[i] = quantile.Known(p.q, p.tol)
}
qua := quantile.New(estims...)
hist, err := stati.NewHistogram([]float64{
1, 3, 5, 7, 15, 30, 50, 100, 200, 400, 600, 700, 819})
if err != nil {
return err
}
err = sim.Walk(cctx.Context, cctx.Int64("lookback"), func( err = sim.Walk(cctx.Context, cctx.Int64("lookback"), func(
sm *stmgr.StateManager, ts *types.TipSet, stCid cid.Cid, sm *stmgr.StateManager, ts *types.TipSet, stCid cid.Cid,
messages []*simulation.AppliedMessage, messages []*simulation.AppliedMessage,
@ -203,11 +230,17 @@ var infoCommitGasSimCommand = &cli.Command{
gasAggMax += uint64(m.GasUsed) gasAggMax += uint64(m.GasUsed)
proofsAggMax += c proofsAggMax += c
} }
for i := uint64(0); i < c; i++ {
qua.Add(float64(c))
}
hist.Observe(float64(c))
} }
if m.Method == builtin.MethodsMiner.ProveCommitSector { if m.Method == builtin.MethodsMiner.ProveCommitSector {
gasSingle += uint64(m.GasUsed) gasSingle += uint64(m.GasUsed)
proofsSingle++ proofsSingle++
qua.Add(1)
hist.Observe(1)
} }
} }
@ -220,6 +253,22 @@ var infoCommitGasSimCommand = &cli.Command{
fmt.Printf("Gas usage efficiency in comparison to all 819: %f%%\n", 100*idealGassUsed/float64(gasAgg+gasSingle)) fmt.Printf("Gas usage efficiency in comparison to all 819: %f%%\n", 100*idealGassUsed/float64(gasAgg+gasSingle))
fmt.Printf("Proofs in singles: %d\n", proofsSingle)
fmt.Printf("Proofs in Aggs: %d\n", proofsAgg)
fmt.Printf("Proofs in Aggs(819): %d\n", proofsAggMax)
fmt.Println()
fmt.Println("Quantiles of proofs in given aggregate size:")
for _, p := range qpoints {
fmt.Printf("%.0f%%\t%.0f\n", p.q*100, qua.Get(p.q))
}
fmt.Println()
fmt.Println("Histogram of messages:")
fmt.Printf("Total\t%d\n", hist.Total())
for i, b := range hist.Buckets[1:] {
fmt.Printf("%.0f\t%d\n", b, hist.Get(i))
}
return nil return nil
}, },
} }

1
go.mod
View File

@ -133,6 +133,7 @@ require (
github.com/prometheus/client_golang v1.6.0 github.com/prometheus/client_golang v1.6.0
github.com/raulk/clock v1.1.0 github.com/raulk/clock v1.1.0
github.com/raulk/go-watchdog v1.0.1 github.com/raulk/go-watchdog v1.0.1
github.com/streadway/quantile v0.0.0-20150917103942-b0c588724d25
github.com/stretchr/objx v0.2.0 // indirect github.com/stretchr/objx v0.2.0 // indirect
github.com/stretchr/testify v1.7.0 github.com/stretchr/testify v1.7.0
github.com/syndtr/goleveldb v1.0.0 github.com/syndtr/goleveldb v1.0.0

2
go.sum
View File

@ -1513,6 +1513,8 @@ github.com/src-d/envconfig v1.0.0/go.mod h1:Q9YQZ7BKITldTBnoxsE5gOeB5y66RyPXeue/
github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw=
github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw=
github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI= github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI=
github.com/streadway/quantile v0.0.0-20150917103942-b0c588724d25 h1:7z3LSn867ex6VSaahyKadf4WtSsJIgne6A1WLOAGM8A=
github.com/streadway/quantile v0.0.0-20150917103942-b0c588724d25/go.mod h1:lbP8tGiBjZ5YWIc2fzuRpTaz0b/53vT6PEs3QuAWzuU=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48= github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48=

104
lib/stati/covar.go Normal file
View File

@ -0,0 +1,104 @@
package stati
import "math"
type Covar struct {
meanX float64
meanY float64
c float64
n float64
m2x float64
m2y float64
}
func (cov1 *Covar) MeanX() float64 {
return cov1.meanX
}
func (cov1 *Covar) MeanY() float64 {
return cov1.meanY
}
func (cov1 *Covar) N() float64 {
return cov1.n
}
func (cov1 *Covar) Covariance() float64 {
return cov1.c / (cov1.n - 1)
}
func (cov1 *Covar) VarianceX() float64 {
return cov1.m2x / (cov1.n - 1)
}
func (cov1 *Covar) StddevX() float64 {
return math.Sqrt(cov1.VarianceX())
}
func (cov1 *Covar) VarianceY() float64 {
return cov1.m2y / (cov1.n - 1)
}
func (cov1 *Covar) StddevY() float64 {
return math.Sqrt(cov1.VarianceY())
}
func (cov1 *Covar) AddPoint(x, y float64) {
cov1.n++
dx := x - cov1.meanX
cov1.meanX += dx / cov1.n
dx2 := x - cov1.meanX
cov1.m2x += dx * dx2
dy := y - cov1.meanY
cov1.meanY += dy / cov1.n
dy2 := y - cov1.meanY
cov1.m2y += dy * dy2
cov1.c += dx * dy
}
func (cov1 *Covar) Combine(cov2 *Covar) {
if cov1.n == 0 {
*cov1 = *cov2
return
}
if cov2.n == 0 {
return
}
if cov1.n == 1 {
cpy := *cov2
cpy.AddPoint(cov2.meanX, cov2.meanY)
*cov1 = cpy
return
}
if cov2.n == 1 {
cov1.AddPoint(cov2.meanX, cov2.meanY)
}
out := Covar{}
out.n = cov1.n + cov2.n
dx := cov1.meanX - cov2.meanX
out.meanX = cov1.meanX - dx*cov2.n/out.n
out.m2x = cov1.m2x + cov2.m2x + dx*dx*cov1.n*cov2.n/out.n
dy := cov1.meanY - cov2.meanY
out.meanY = cov1.meanY - dy*cov2.n/out.n
out.m2y = cov1.m2y + cov2.m2y + dy*dy*cov1.n*cov2.n/out.n
out.c = cov1.c + cov2.c + dx*dy*cov1.n*cov2.n/out.n
*cov1 = out
}
func (cov1 *Covar) A() float64 {
return cov1.Covariance() / cov1.VarianceX()
}
func (cov1 *Covar) B() float64 {
return cov1.meanY - cov1.meanX*cov1.A()
}
func (cov1 *Covar) Correl() float64 {
return cov1.Covariance() / cov1.StddevX() / cov1.StddevY()
}

56
lib/stati/histo.go Normal file
View File

@ -0,0 +1,56 @@
package stati
import (
"math"
"golang.org/x/xerrors"
)
type Histogram struct {
Buckets []float64
Counts []uint64
}
// NewHistogram creates a histograme with buckets defined as:
// {x > -Inf, x >= buckets[0], x >= buckets[1], ..., x >= buckets[i]}
func NewHistogram(buckets []float64) (*Histogram, error) {
if len(buckets) == 0 {
return nil, xerrors.Errorf("empty buckets")
}
prev := buckets[0]
for i, v := range buckets[1:] {
if v < prev {
return nil, xerrors.Errorf("bucket at index %d is smaller than previous %f < %f", i+1, v, prev)
}
prev = v
}
h := &Histogram{
Buckets: append([]float64{math.Inf(-1)}, buckets...),
Counts: make([]uint64, len(buckets)+1),
}
return h, nil
}
func (h *Histogram) Observe(x float64) {
for i, b := range h.Buckets {
if x >= b {
h.Counts[i]++
} else {
break
}
}
}
func (h *Histogram) Total() uint64 {
return h.Counts[0]
}
func (h *Histogram) Get(i int) uint64 {
if i >= len(h.Counts)-2 {
return h.Counts[i]
}
return h.Counts[i+1] - h.Counts[i+2]
}
func (h *Histogram) GetRatio(i int) float64 {
return float64(h.Get(i)) / float64(h.Total())
}

66
lib/stati/meanvar.go Normal file
View File

@ -0,0 +1,66 @@
package stati
import (
"fmt"
"math"
)
type MeanVar struct {
n float64
mean float64
m2 float64
}
func (v1 *MeanVar) AddPoint(value float64) {
// based on https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
v1.n++
delta := value - v1.mean
v1.mean += delta / v1.n
delta2 := value - v1.mean
v1.m2 += delta * delta2
}
func (v1 *MeanVar) Mean() float64 {
return v1.mean
}
func (v1 *MeanVar) N() float64 {
return v1.n
}
func (v1 *MeanVar) Variance() float64 {
return v1.m2 / (v1.n - 1)
}
func (v1 *MeanVar) Stddev() float64 {
return math.Sqrt(v1.Variance())
}
func (v1 MeanVar) String() string {
return fmt.Sprintf("%f stddev: %f (%.0f)", v1.Mean(), v1.Stddev(), v1.N())
}
func (v1 *MeanVar) Combine(v2 *MeanVar) {
if v1.n == 0 {
*v1 = *v2
return
}
if v2.n == 0 {
return
}
if v1.n == 1 {
cpy := *v2
cpy.AddPoint(v1.mean)
*v1 = cpy
return
}
if v2.n == 1 {
v1.AddPoint(v2.mean)
return
}
newCount := v1.n + v2.n
delta := v2.mean - v1.mean
meanDelta := delta * v2.n / newCount
m2 := v1.m2 + v2.m2 + delta*meanDelta*v1.n
v1.n = newCount
v1.mean += meanDelta
v1.m2 = m2
}

47
lib/stati/stats_test.go Normal file
View File

@ -0,0 +1,47 @@
package stati
import (
"math/rand"
"testing"
)
func TestMeanVar(t *testing.T) {
N := 16
ss := make([]*MeanVar, N)
rng := rand.New(rand.NewSource(1))
for i := 0; i < N; i++ {
ss[i] = &MeanVar{}
maxJ := rng.Intn(1000)
for j := 0; j < maxJ; j++ {
ss[i].AddPoint(rng.NormFloat64()*5 + 500)
}
t.Logf("mean: %f, stddev: %f, count %f", ss[i].mean, ss[i].Stddev(), ss[i].n)
}
out := &MeanVar{}
for i := 0; i < N; i++ {
out.Combine(ss[i])
t.Logf("combine: mean: %f, stddev: %f", out.mean, out.Stddev())
}
}
func TestCovar(t *testing.T) {
N := 16
ss := make([]*Covar, N)
rng := rand.New(rand.NewSource(1))
for i := 0; i < N; i++ {
ss[i] = &Covar{}
maxJ := rng.Intn(1000) + 500
for j := 0; j < maxJ; j++ {
x := rng.NormFloat64()*5 + 500
ss[i].AddPoint(x, x*2-1000)
}
t.Logf("corell: %f, y = %f*x+%f @%.0f", ss[i].Correl(), ss[i].A(), ss[i].B(), ss[i].n)
t.Logf("\txVar: %f yVar: %f covar: %f", ss[i].StddevX(), ss[i].StddevY(), ss[i].Covariance())
}
out := &Covar{}
for i := 0; i < N; i++ {
out.Combine(ss[i])
t.Logf("combine: corell: %f, y = %f*x+%f", out.Correl(), out.A(), out.B())
t.Logf("\txVar: %f yVar: %f covar: %f", out.StddevX(), out.StddevY(), out.Covariance())
}
}