diff --git a/cmd/lotus-sim/info.go b/cmd/lotus-sim/info.go index 757c6d6e7..6a37f7258 100644 --- a/cmd/lotus-sim/info.go +++ b/cmd/lotus-sim/info.go @@ -10,6 +10,7 @@ import ( "time" "github.com/ipfs/go-cid" + "github.com/streadway/quantile" "github.com/urfave/cli/v2" "github.com/filecoin-project/go-state-types/big" @@ -21,6 +22,7 @@ import ( "github.com/filecoin-project/lotus/chain/stmgr" "github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/cmd/lotus-sim/simulation" + "github.com/filecoin-project/lotus/lib/stati" ) func getTotalPower(ctx context.Context, sm *stmgr.StateManager, ts *types.TipSet) (power.Claim, error) { @@ -177,6 +179,31 @@ var infoCommitGasSimCommand = &cli.Command{ var gasAggMax, proofsAggMax uint64 var gasSingle, proofsSingle uint64 + qpoints := []struct{ q, tol float64 }{ + {0.01, 0.0005}, + {0.05, 0.001}, + {0.20, 0.01}, + {0.25, 0.01}, + {0.30, 0.01}, + {0.40, 0.01}, + {0.45, 0.01}, + {0.50, 0.01}, + {0.60, 0.01}, + {0.80, 0.01}, + {0.95, 0.001}, + {0.99, 0.0005}, + } + estims := make([]quantile.Estimate, len(qpoints)) + for i, p := range qpoints { + estims[i] = quantile.Known(p.q, p.tol) + } + qua := quantile.New(estims...) + hist, err := stati.NewHistogram([]float64{ + 1, 3, 5, 7, 15, 30, 50, 100, 200, 400, 600, 700, 819}) + if err != nil { + return err + } + err = sim.Walk(cctx.Context, cctx.Int64("lookback"), func( sm *stmgr.StateManager, ts *types.TipSet, stCid cid.Cid, messages []*simulation.AppliedMessage, @@ -203,11 +230,17 @@ var infoCommitGasSimCommand = &cli.Command{ gasAggMax += uint64(m.GasUsed) proofsAggMax += c } + for i := uint64(0); i < c; i++ { + qua.Add(float64(c)) + } + hist.Observe(float64(c)) } if m.Method == builtin.MethodsMiner.ProveCommitSector { gasSingle += uint64(m.GasUsed) proofsSingle++ + qua.Add(1) + hist.Observe(1) } } @@ -220,6 +253,22 @@ var infoCommitGasSimCommand = &cli.Command{ fmt.Printf("Gas usage efficiency in comparison to all 819: %f%%\n", 100*idealGassUsed/float64(gasAgg+gasSingle)) + fmt.Printf("Proofs in singles: %d\n", proofsSingle) + fmt.Printf("Proofs in Aggs: %d\n", proofsAgg) + fmt.Printf("Proofs in Aggs(819): %d\n", proofsAggMax) + + fmt.Println() + fmt.Println("Quantiles of proofs in given aggregate size:") + for _, p := range qpoints { + fmt.Printf("%.0f%%\t%.0f\n", p.q*100, qua.Get(p.q)) + } + fmt.Println() + fmt.Println("Histogram of messages:") + fmt.Printf("Total\t%d\n", hist.Total()) + for i, b := range hist.Buckets[1:] { + fmt.Printf("%.0f\t%d\n", b, hist.Get(i)) + } + return nil }, } diff --git a/go.mod b/go.mod index 411522a36..5bf9094f0 100644 --- a/go.mod +++ b/go.mod @@ -133,6 +133,7 @@ require ( github.com/prometheus/client_golang v1.6.0 github.com/raulk/clock v1.1.0 github.com/raulk/go-watchdog v1.0.1 + github.com/streadway/quantile v0.0.0-20150917103942-b0c588724d25 github.com/stretchr/objx v0.2.0 // indirect github.com/stretchr/testify v1.7.0 github.com/syndtr/goleveldb v1.0.0 diff --git a/go.sum b/go.sum index 5573587fd..cfc1d4221 100644 --- a/go.sum +++ b/go.sum @@ -1513,6 +1513,8 @@ github.com/src-d/envconfig v1.0.0/go.mod h1:Q9YQZ7BKITldTBnoxsE5gOeB5y66RyPXeue/ github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI= +github.com/streadway/quantile v0.0.0-20150917103942-b0c588724d25 h1:7z3LSn867ex6VSaahyKadf4WtSsJIgne6A1WLOAGM8A= +github.com/streadway/quantile v0.0.0-20150917103942-b0c588724d25/go.mod h1:lbP8tGiBjZ5YWIc2fzuRpTaz0b/53vT6PEs3QuAWzuU= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48= diff --git a/lib/stati/covar.go b/lib/stati/covar.go new file mode 100644 index 000000000..c92fd8b74 --- /dev/null +++ b/lib/stati/covar.go @@ -0,0 +1,104 @@ +package stati + +import "math" + +type Covar struct { + meanX float64 + meanY float64 + c float64 + n float64 + m2x float64 + m2y float64 +} + +func (cov1 *Covar) MeanX() float64 { + return cov1.meanX +} + +func (cov1 *Covar) MeanY() float64 { + return cov1.meanY +} + +func (cov1 *Covar) N() float64 { + return cov1.n +} + +func (cov1 *Covar) Covariance() float64 { + return cov1.c / (cov1.n - 1) +} + +func (cov1 *Covar) VarianceX() float64 { + return cov1.m2x / (cov1.n - 1) +} + +func (cov1 *Covar) StddevX() float64 { + return math.Sqrt(cov1.VarianceX()) +} + +func (cov1 *Covar) VarianceY() float64 { + return cov1.m2y / (cov1.n - 1) +} + +func (cov1 *Covar) StddevY() float64 { + return math.Sqrt(cov1.VarianceY()) +} + +func (cov1 *Covar) AddPoint(x, y float64) { + cov1.n++ + + dx := x - cov1.meanX + cov1.meanX += dx / cov1.n + dx2 := x - cov1.meanX + cov1.m2x += dx * dx2 + + dy := y - cov1.meanY + cov1.meanY += dy / cov1.n + dy2 := y - cov1.meanY + cov1.m2y += dy * dy2 + + cov1.c += dx * dy +} + +func (cov1 *Covar) Combine(cov2 *Covar) { + if cov1.n == 0 { + *cov1 = *cov2 + return + } + if cov2.n == 0 { + return + } + + if cov1.n == 1 { + cpy := *cov2 + cpy.AddPoint(cov2.meanX, cov2.meanY) + *cov1 = cpy + return + } + if cov2.n == 1 { + cov1.AddPoint(cov2.meanX, cov2.meanY) + } + + out := Covar{} + out.n = cov1.n + cov2.n + + dx := cov1.meanX - cov2.meanX + out.meanX = cov1.meanX - dx*cov2.n/out.n + out.m2x = cov1.m2x + cov2.m2x + dx*dx*cov1.n*cov2.n/out.n + + dy := cov1.meanY - cov2.meanY + out.meanY = cov1.meanY - dy*cov2.n/out.n + out.m2y = cov1.m2y + cov2.m2y + dy*dy*cov1.n*cov2.n/out.n + + out.c = cov1.c + cov2.c + dx*dy*cov1.n*cov2.n/out.n + *cov1 = out +} + +func (cov1 *Covar) A() float64 { + return cov1.Covariance() / cov1.VarianceX() +} +func (cov1 *Covar) B() float64 { + return cov1.meanY - cov1.meanX*cov1.A() +} +func (cov1 *Covar) Correl() float64 { + return cov1.Covariance() / cov1.StddevX() / cov1.StddevY() +} diff --git a/lib/stati/histo.go b/lib/stati/histo.go new file mode 100644 index 000000000..3c410c0d0 --- /dev/null +++ b/lib/stati/histo.go @@ -0,0 +1,56 @@ +package stati + +import ( + "math" + + "golang.org/x/xerrors" +) + +type Histogram struct { + Buckets []float64 + Counts []uint64 +} + +// NewHistogram creates a histograme with buckets defined as: +// {x > -Inf, x >= buckets[0], x >= buckets[1], ..., x >= buckets[i]} +func NewHistogram(buckets []float64) (*Histogram, error) { + if len(buckets) == 0 { + return nil, xerrors.Errorf("empty buckets") + } + prev := buckets[0] + for i, v := range buckets[1:] { + if v < prev { + return nil, xerrors.Errorf("bucket at index %d is smaller than previous %f < %f", i+1, v, prev) + } + prev = v + } + h := &Histogram{ + Buckets: append([]float64{math.Inf(-1)}, buckets...), + Counts: make([]uint64, len(buckets)+1), + } + return h, nil +} + +func (h *Histogram) Observe(x float64) { + for i, b := range h.Buckets { + if x >= b { + h.Counts[i]++ + } else { + break + } + } +} + +func (h *Histogram) Total() uint64 { + return h.Counts[0] +} + +func (h *Histogram) Get(i int) uint64 { + if i >= len(h.Counts)-2 { + return h.Counts[i] + } + return h.Counts[i+1] - h.Counts[i+2] +} +func (h *Histogram) GetRatio(i int) float64 { + return float64(h.Get(i)) / float64(h.Total()) +} diff --git a/lib/stati/meanvar.go b/lib/stati/meanvar.go new file mode 100644 index 000000000..b77aaa638 --- /dev/null +++ b/lib/stati/meanvar.go @@ -0,0 +1,66 @@ +package stati + +import ( + "fmt" + "math" +) + +type MeanVar struct { + n float64 + mean float64 + m2 float64 +} + +func (v1 *MeanVar) AddPoint(value float64) { + // based on https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm + v1.n++ + delta := value - v1.mean + v1.mean += delta / v1.n + delta2 := value - v1.mean + v1.m2 += delta * delta2 +} + +func (v1 *MeanVar) Mean() float64 { + return v1.mean +} +func (v1 *MeanVar) N() float64 { + return v1.n +} +func (v1 *MeanVar) Variance() float64 { + return v1.m2 / (v1.n - 1) +} +func (v1 *MeanVar) Stddev() float64 { + return math.Sqrt(v1.Variance()) +} + +func (v1 MeanVar) String() string { + return fmt.Sprintf("%f stddev: %f (%.0f)", v1.Mean(), v1.Stddev(), v1.N()) +} + +func (v1 *MeanVar) Combine(v2 *MeanVar) { + if v1.n == 0 { + *v1 = *v2 + return + } + if v2.n == 0 { + return + } + if v1.n == 1 { + cpy := *v2 + cpy.AddPoint(v1.mean) + *v1 = cpy + return + } + if v2.n == 1 { + v1.AddPoint(v2.mean) + return + } + + newCount := v1.n + v2.n + delta := v2.mean - v1.mean + meanDelta := delta * v2.n / newCount + m2 := v1.m2 + v2.m2 + delta*meanDelta*v1.n + v1.n = newCount + v1.mean += meanDelta + v1.m2 = m2 +} diff --git a/lib/stati/stats_test.go b/lib/stati/stats_test.go new file mode 100644 index 000000000..fa92913b6 --- /dev/null +++ b/lib/stati/stats_test.go @@ -0,0 +1,47 @@ +package stati + +import ( + "math/rand" + "testing" +) + +func TestMeanVar(t *testing.T) { + N := 16 + ss := make([]*MeanVar, N) + rng := rand.New(rand.NewSource(1)) + for i := 0; i < N; i++ { + ss[i] = &MeanVar{} + maxJ := rng.Intn(1000) + for j := 0; j < maxJ; j++ { + ss[i].AddPoint(rng.NormFloat64()*5 + 500) + } + t.Logf("mean: %f, stddev: %f, count %f", ss[i].mean, ss[i].Stddev(), ss[i].n) + } + out := &MeanVar{} + for i := 0; i < N; i++ { + out.Combine(ss[i]) + t.Logf("combine: mean: %f, stddev: %f", out.mean, out.Stddev()) + } +} + +func TestCovar(t *testing.T) { + N := 16 + ss := make([]*Covar, N) + rng := rand.New(rand.NewSource(1)) + for i := 0; i < N; i++ { + ss[i] = &Covar{} + maxJ := rng.Intn(1000) + 500 + for j := 0; j < maxJ; j++ { + x := rng.NormFloat64()*5 + 500 + ss[i].AddPoint(x, x*2-1000) + } + t.Logf("corell: %f, y = %f*x+%f @%.0f", ss[i].Correl(), ss[i].A(), ss[i].B(), ss[i].n) + t.Logf("\txVar: %f yVar: %f covar: %f", ss[i].StddevX(), ss[i].StddevY(), ss[i].Covariance()) + } + out := &Covar{} + for i := 0; i < N; i++ { + out.Combine(ss[i]) + t.Logf("combine: corell: %f, y = %f*x+%f", out.Correl(), out.A(), out.B()) + t.Logf("\txVar: %f yVar: %f covar: %f", out.StddevX(), out.StddevY(), out.Covariance()) + } +}