diff --git a/node/builder.go b/node/builder.go index af604983d..10d366d56 100644 --- a/node/builder.go +++ b/node/builder.go @@ -89,6 +89,7 @@ const ( // health checks CheckFDLimit + CheckFvmConcurrency LegacyMarketsEOL // libp2p @@ -165,6 +166,7 @@ func defaults() []Option { Override(new(dtypes.NodeStartTime), FromVal(dtypes.NodeStartTime(time.Now()))), Override(CheckFDLimit, modules.CheckFdLimit(build.DefaultFDLimit)), + Override(CheckFvmConcurrency, modules.CheckFvmConcurrency()), Override(new(system.MemoryConstraints), modules.MemoryConstraints), Override(InitMemoryWatchdog, modules.MemoryWatchdog), diff --git a/node/modules/alerts.go b/node/modules/alerts.go index df6b76435..fc375b0df 100644 --- a/node/modules/alerts.go +++ b/node/modules/alerts.go @@ -1,6 +1,9 @@ package modules import ( + "os" + "strconv" + "github.com/filecoin-project/lotus/journal/alerting" "github.com/filecoin-project/lotus/lib/ulimit" ) @@ -42,6 +45,35 @@ func LegacyMarketsEOL(al *alerting.Alerting) { }) } +func CheckFvmConcurrency() func(al *alerting.Alerting) { + return func(al *alerting.Alerting) { + fvmConcurrency, ok := os.LookupEnv("LOTUS_FVM_CONCURRENCY") + if !ok { + return + } + + fvmConcurrencyVal, err := strconv.Atoi(fvmConcurrency) + if err != nil { + alert := al.AddAlertType("process", "fvm-concurrency") + al.Raise(alert, map[string]string{ + "message": "LOTUS_FVM_CONCURRENCY is not an integer", + "error": err.Error(), + }) + return + } + + // Raise alert if LOTUS_FVM_CONCURRENCY is set to a high value + if fvmConcurrencyVal >= 24 { + alert := al.AddAlertType("process", "fvm-concurrency") + al.Raise(alert, map[string]interface{}{ + "message": "LOTUS_FVM_CONCURRENCY is set to a high value that can cause chain sync panics on network migrations/upgrades", + "set_value": fvmConcurrencyVal, + "recommended": "23 or less during network upgrades", + }) + } + } +} + // TODO: More things: // * Space in repo dirs (taking into account mounts) // * Miner