From c0fce07889773fa90b12b51c6ad6384882d22a88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 28 Nov 2022 19:51:18 +0100 Subject: [PATCH] sched: Metrics around scheduldng workload --- metrics/metrics.go | 24 ++++++++++++++++++++++++ storage/sealer/sched_assigner_common.go | 5 +++++ 2 files changed, 29 insertions(+) diff --git a/metrics/metrics.go b/metrics/metrics.go index fb40d97e2..685293640 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -22,6 +22,8 @@ var workMillisecondsDistribution = view.Distribution( 350*60_000, 400*60_000, 600*60_000, 800*60_000, 1000*60_000, 1300*60_000, 1800*60_000, 4000*60_000, 10000*60_000, // intel PC1 range ) +var queueSizeDistribution = view.Distribution(0, 1, 2, 3, 5, 7, 10, 15, 25, 35, 50, 70, 90, 130, 200, 300, 500, 1000, 2000, 5000, 10000) + // Global Tags var ( // common @@ -140,6 +142,8 @@ var ( SchedAssignerCandidatesDuration = stats.Float64("sched/assigner_cycle_candidates_ms", "Duration of scheduler assigner candidate matching step", stats.UnitMilliseconds) SchedAssignerWindowSelectionDuration = stats.Float64("sched/assigner_cycle_window_select_ms", "Duration of scheduler window selection step", stats.UnitMilliseconds) SchedAssignerSubmitDuration = stats.Float64("sched/assigner_cycle_submit_ms", "Duration of scheduler window submit step", stats.UnitMilliseconds) + SchedCycleOpenWindows = stats.Int64("sched/assigner_cycle_open_window_count", "Number of open windows in scheduling cycles", stats.UnitDimensionless) + SchedCycleQueueSize = stats.Int64("sched/assigner_cycle_task_queue_entry_count", "Number of task queue entries in scheduling cycles", stats.UnitDimensionless) DagStorePRInitCount = stats.Int64("dagstore/pr_init_count", "PieceReader init count", stats.UnitDimensionless) DagStorePRBytesRequested = stats.Int64("dagstore/pr_requested_bytes", "PieceReader requested bytes", stats.UnitBytes) @@ -449,6 +453,22 @@ var ( Measure: SchedAssignerSubmitDuration, Aggregation: defaultMillisecondsDistribution, } + SchedCycleOpenWindowsView = &view.View{ + Measure: SchedCycleOpenWindows, + Aggregation: queueSizeDistribution, + } + SchedCycleQueueSizeView = &view.View{ + Measure: SchedCycleQueueSize, + Aggregation: queueSizeDistribution, + } + SchedCycleLastOpenWindowsView = &view.View{ + Measure: SchedCycleOpenWindows, + Aggregation: view.LastValue(), + } + SchedCycleLastQueueSizeView = &view.View{ + Measure: SchedCycleQueueSize, + Aggregation: view.LastValue(), + } DagStorePRInitCountView = &view.View{ Measure: DagStorePRInitCount, @@ -736,6 +756,10 @@ var MinerNodeViews = append([]*view.View{ SchedAssignerCandidatesDurationView, SchedAssignerWindowSelectionDurationView, SchedAssignerSubmitDurationView, + SchedCycleOpenWindowsView, + SchedCycleQueueSizeView, + SchedCycleLastQueueSizeView, + SchedCycleLastOpenWindowsView, DagStorePRInitCountView, DagStorePRBytesRequestedView, diff --git a/storage/sealer/sched_assigner_common.go b/storage/sealer/sched_assigner_common.go index b6197cf2d..fb0a0f147 100644 --- a/storage/sealer/sched_assigner_common.go +++ b/storage/sealer/sched_assigner_common.go @@ -6,6 +6,8 @@ import ( "sort" "sync" + "go.opencensus.io/stats" + "github.com/filecoin-project/lotus/metrics" ) @@ -38,6 +40,9 @@ func (a *AssignerCommon) TrySched(sh *Scheduler) { windowsLen := len(sh.OpenWindows) queueLen := sh.SchedQueue.Len() + stats.Record(sh.mctx, metrics.SchedCycleOpenWindows.M(int64(windowsLen))) + stats.Record(sh.mctx, metrics.SchedCycleQueueSize.M(int64(queueLen))) + log.Debugf("SCHED %d queued; %d open windows", queueLen, windowsLen) if windowsLen == 0 || queueLen == 0 {