2021-08-17 12:39:36 +00:00
|
|
|
package modules
|
|
|
|
|
|
|
|
import (
|
2023-10-30 09:36:15 +00:00
|
|
|
"net"
|
2023-05-31 13:39:03 +00:00
|
|
|
"os"
|
|
|
|
"strconv"
|
2023-10-30 09:36:15 +00:00
|
|
|
"syscall"
|
2023-05-31 13:39:03 +00:00
|
|
|
|
2021-08-17 12:39:36 +00:00
|
|
|
"github.com/filecoin-project/lotus/journal/alerting"
|
|
|
|
"github.com/filecoin-project/lotus/lib/ulimit"
|
|
|
|
)
|
|
|
|
|
|
|
|
func CheckFdLimit(min uint64) func(al *alerting.Alerting) {
|
|
|
|
return func(al *alerting.Alerting) {
|
2021-08-26 13:59:54 +00:00
|
|
|
soft, _, err := ulimit.GetLimit()
|
|
|
|
|
|
|
|
if err == ulimit.ErrUnsupported {
|
|
|
|
log.Warn("FD limit monitoring not available")
|
2021-08-17 12:39:36 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
alert := al.AddAlertType("process", "fd-limit")
|
|
|
|
if err != nil {
|
|
|
|
al.Raise(alert, map[string]string{
|
|
|
|
"message": "failed to get FD limit",
|
|
|
|
"error": err.Error(),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
if soft < min {
|
|
|
|
al.Raise(alert, map[string]interface{}{
|
|
|
|
"message": "soft FD limit is low",
|
|
|
|
"soft_limit": soft,
|
|
|
|
"recommended_min": min,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-30 09:36:15 +00:00
|
|
|
func CheckUDPBufferSize(wanted int) func(al *alerting.Alerting) {
|
|
|
|
return func(al *alerting.Alerting) {
|
|
|
|
conn, err := net.Dial("udp", "localhost:0")
|
|
|
|
if err != nil {
|
|
|
|
alert := al.AddAlertType("process", "udp-buffer-size")
|
|
|
|
al.Raise(alert, map[string]string{
|
|
|
|
"message": "Failed to create UDP connection",
|
|
|
|
"error": err.Error(),
|
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
2023-10-30 09:59:59 +00:00
|
|
|
defer func() {
|
|
|
|
if err := conn.Close(); err != nil {
|
|
|
|
log.Warnf("Failed to close connection: %s", err)
|
|
|
|
}
|
|
|
|
}()
|
2023-10-30 09:36:15 +00:00
|
|
|
|
|
|
|
udpConn, ok := conn.(*net.UDPConn)
|
|
|
|
if !ok {
|
|
|
|
alert := al.AddAlertType("process", "udp-buffer-size")
|
|
|
|
al.Raise(alert, map[string]string{
|
|
|
|
"message": "Failed to cast connection to UDPConn",
|
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
file, err := udpConn.File()
|
|
|
|
if err != nil {
|
|
|
|
alert := al.AddAlertType("process", "udp-buffer-size")
|
|
|
|
al.Raise(alert, map[string]string{
|
|
|
|
"message": "Failed to get file descriptor from UDPConn",
|
|
|
|
"error": err.Error(),
|
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
2023-10-30 09:59:59 +00:00
|
|
|
defer func() {
|
|
|
|
if err := file.Close(); err != nil {
|
|
|
|
log.Warnf("Failed to close file: %s", err)
|
|
|
|
}
|
|
|
|
}()
|
2023-10-30 09:36:15 +00:00
|
|
|
|
|
|
|
size, err := syscall.GetsockoptInt(int(file.Fd()), syscall.SOL_SOCKET, syscall.SO_RCVBUF)
|
|
|
|
if err != nil {
|
|
|
|
alert := al.AddAlertType("process", "udp-buffer-size")
|
|
|
|
al.Raise(alert, map[string]string{
|
|
|
|
"message": "Failed to get UDP buffer size",
|
|
|
|
"error": err.Error(),
|
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if size < wanted {
|
|
|
|
alert := al.AddAlertType("process", "udp-buffer-size")
|
|
|
|
al.Raise(alert, map[string]interface{}{
|
|
|
|
"message": "UDP buffer size is low",
|
|
|
|
"current_size": size,
|
|
|
|
"wanted_size": wanted,
|
|
|
|
"help": "See https://github.com/quic-go/quic-go/wiki/UDP-Buffer-Sizes for details.",
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-31 13:39:03 +00:00
|
|
|
func CheckFvmConcurrency() func(al *alerting.Alerting) {
|
|
|
|
return func(al *alerting.Alerting) {
|
|
|
|
fvmConcurrency, ok := os.LookupEnv("LOTUS_FVM_CONCURRENCY")
|
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
fvmConcurrencyVal, err := strconv.Atoi(fvmConcurrency)
|
|
|
|
if err != nil {
|
|
|
|
alert := al.AddAlertType("process", "fvm-concurrency")
|
|
|
|
al.Raise(alert, map[string]string{
|
|
|
|
"message": "LOTUS_FVM_CONCURRENCY is not an integer",
|
|
|
|
"error": err.Error(),
|
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Raise alert if LOTUS_FVM_CONCURRENCY is set to a high value
|
2023-05-31 13:43:26 +00:00
|
|
|
if fvmConcurrencyVal > 24 {
|
2023-05-31 13:39:03 +00:00
|
|
|
alert := al.AddAlertType("process", "fvm-concurrency")
|
|
|
|
al.Raise(alert, map[string]interface{}{
|
|
|
|
"message": "LOTUS_FVM_CONCURRENCY is set to a high value that can cause chain sync panics on network migrations/upgrades",
|
|
|
|
"set_value": fvmConcurrencyVal,
|
2023-05-31 13:43:26 +00:00
|
|
|
"recommended": "24 or less during network upgrades",
|
2023-05-31 13:39:03 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-17 12:39:36 +00:00
|
|
|
// TODO: More things:
|
|
|
|
// * Space in repo dirs (taking into account mounts)
|
|
|
|
// * Miner
|
|
|
|
// * Faulted partitions
|
|
|
|
// * Low balances
|
|
|
|
// * Market provider
|
|
|
|
// * Reachability
|
|
|
|
// * on-chain config
|
|
|
|
// * Low memory (maybe)
|
|
|
|
// * Network / sync issues
|