Merge pull request #10933 from filecoin-project/feat/fvm-concurrency-alert

feat: alert: Add FVM_CONCURRENCY alert
This commit is contained in:
Łukasz Magiera 2023-05-31 20:14:45 +02:00 committed by GitHub
commit 6e7dc9532a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 0 deletions

View File

@ -89,6 +89,7 @@ const (
// health checks
CheckFDLimit
CheckFvmConcurrency
LegacyMarketsEOL
// libp2p
@ -165,6 +166,7 @@ func defaults() []Option {
Override(new(dtypes.NodeStartTime), FromVal(dtypes.NodeStartTime(time.Now()))),
Override(CheckFDLimit, modules.CheckFdLimit(build.DefaultFDLimit)),
Override(CheckFvmConcurrency, modules.CheckFvmConcurrency()),
Override(new(system.MemoryConstraints), modules.MemoryConstraints),
Override(InitMemoryWatchdog, modules.MemoryWatchdog),

View File

@ -1,6 +1,9 @@
package modules
import (
"os"
"strconv"
"github.com/filecoin-project/lotus/journal/alerting"
"github.com/filecoin-project/lotus/lib/ulimit"
)
@ -42,6 +45,35 @@ func LegacyMarketsEOL(al *alerting.Alerting) {
})
}
func CheckFvmConcurrency() func(al *alerting.Alerting) {
return func(al *alerting.Alerting) {
fvmConcurrency, ok := os.LookupEnv("LOTUS_FVM_CONCURRENCY")
if !ok {
return
}
fvmConcurrencyVal, err := strconv.Atoi(fvmConcurrency)
if err != nil {
alert := al.AddAlertType("process", "fvm-concurrency")
al.Raise(alert, map[string]string{
"message": "LOTUS_FVM_CONCURRENCY is not an integer",
"error": err.Error(),
})
return
}
// Raise alert if LOTUS_FVM_CONCURRENCY is set to a high value
if fvmConcurrencyVal > 24 {
alert := al.AddAlertType("process", "fvm-concurrency")
al.Raise(alert, map[string]interface{}{
"message": "LOTUS_FVM_CONCURRENCY is set to a high value that can cause chain sync panics on network migrations/upgrades",
"set_value": fvmConcurrencyVal,
"recommended": "24 or less during network upgrades",
})
}
}
}
// TODO: More things:
// * Space in repo dirs (taking into account mounts)
// * Miner