Merge pull request #5101 from filecoin-project/raulk/memory-watchdog
introduce memory watchdog; LOTUS_MAX_HEAP
This commit is contained in:
commit
dc06d30f52
2
go.mod
2
go.mod
@ -22,6 +22,7 @@ require (
|
||||
github.com/drand/kyber v1.1.4
|
||||
github.com/dustin/go-humanize v1.0.0
|
||||
github.com/elastic/go-sysinfo v1.3.0
|
||||
github.com/elastic/gosigar v0.12.0
|
||||
github.com/fatih/color v1.9.0
|
||||
github.com/filecoin-project/filecoin-ffi v0.30.4-0.20200910194244-f640612a1a1f
|
||||
github.com/filecoin-project/go-address v0.0.5-0.20201103152444-f2023ef3f5bb
|
||||
@ -123,6 +124,7 @@ require (
|
||||
github.com/polydawn/refmt v0.0.0-20190809202753-05966cbd336a
|
||||
github.com/prometheus/client_golang v1.6.0
|
||||
github.com/raulk/clock v1.1.0
|
||||
github.com/raulk/go-watchdog v0.0.1
|
||||
github.com/stretchr/testify v1.6.1
|
||||
github.com/supranational/blst v0.1.1
|
||||
github.com/syndtr/goleveldb v1.0.0
|
||||
|
8
go.sum
8
go.sum
@ -219,6 +219,8 @@ github.com/elastic/go-sysinfo v1.3.0 h1:eb2XFGTMlSwG/yyU9Y8jVAYLIzU2sFzWXwo2gmet
|
||||
github.com/elastic/go-sysinfo v1.3.0/go.mod h1:i1ZYdU10oLNfRzq4vq62BEwD2fH8KaWh6eh0ikPT9F0=
|
||||
github.com/elastic/go-windows v1.0.0 h1:qLURgZFkkrYyTTkvYpsZIgf83AUsdIHfvlJaqaZ7aSY=
|
||||
github.com/elastic/go-windows v1.0.0/go.mod h1:TsU0Nrp7/y3+VwE82FoZF8gC/XFg/Elz6CcloAxnPgU=
|
||||
github.com/elastic/gosigar v0.12.0 h1:AsdhYCJlTudhfOYQyFNgx+fIVTfrDO0V1ST0vHgiapU=
|
||||
github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
|
||||
github.com/ema/qdisc v0.0.0-20190904071900-b82c76788043/go.mod h1:ix4kG2zvdUd8kEKSW0ZTr1XLks0epFpI4j745DXxlNE=
|
||||
github.com/envoyproxy/go-control-plane v0.6.9/go.mod h1:SBwIajubJHhxtWwsL9s8ss4safvEdbitLhGGK48rN6g=
|
||||
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||
@ -1311,6 +1313,8 @@ github.com/prometheus/procfs v0.1.0 h1:jhMy6QXfi3y2HEzFoyuCj40z4OZIIHHPtFyCMftmv
|
||||
github.com/prometheus/procfs v0.1.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
|
||||
github.com/raulk/clock v1.1.0 h1:dpb29+UKMbLqiU/jqIJptgLR1nn23HLgMY0sTCDza5Y=
|
||||
github.com/raulk/clock v1.1.0/go.mod h1:3MpVxdZ/ODBQDxbN+kzshf5OSZwPjtMDx6BBXBmOeY0=
|
||||
github.com/raulk/go-watchdog v0.0.1 h1:q0ad0fanW8uaLRTvxQ0RfdADBiKa6CL6NMByhB0vpBs=
|
||||
github.com/raulk/go-watchdog v0.0.1/go.mod h1:dIvQcKy0laxuHGda1ms8/2T9wE3ZJRbz9bxEO7c0q1M=
|
||||
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||
@ -1680,6 +1684,7 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 h1:SQFwaSi55rU7vdNs9Yr0Z324VNlrF+0wMqRXT4St8ck=
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20180202135801-37707fdb30a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
@ -1794,6 +1799,7 @@ golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapK
|
||||
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200216192241-b320d3a0f5a2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200711155855-7342f9734a7d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
|
||||
golang.org/x/tools v0.0.0-20200827010519-17fd2f27a9e3 h1:r3P/5xOq/dK1991B65Oy6E1fRF/2d/fSYZJ/fXGVfJc=
|
||||
golang.org/x/tools v0.0.0-20200827010519-17fd2f27a9e3/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
|
||||
golang.org/x/tools v0.0.0-20201112185108-eeaa07dd7696 h1:Bfazo+enXJET5SbHeh95NtxabJF6fJ9r/jpfRJgd3j4=
|
||||
golang.org/x/tools v0.0.0-20201112185108-eeaa07dd7696/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
@ -1877,6 +1883,7 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD
|
||||
google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
|
||||
google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
|
||||
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
@ -1923,6 +1930,7 @@ launchpad.net/gocheck v0.0.0-20140225173054-000000000087/go.mod h1:hj7XX3B/0A+80
|
||||
modernc.org/cc v1.0.0 h1:nPibNuDEx6tvYrUAtvDTTw98rx5juGsa5zuDnKwEEQQ=
|
||||
modernc.org/cc v1.0.0/go.mod h1:1Sk4//wdnYJiUIxnW8ddKpaOJCF37yAdqYnkxUpaYxw=
|
||||
modernc.org/fileutil v1.0.0/go.mod h1:JHsWpkrk/CnVV1H/eGlFf85BEpfkrp56ro8nojIq9Q8=
|
||||
modernc.org/golex v1.0.0 h1:wWpDlbK8ejRfSyi0frMyhilD3JBvtcx2AdGDnU+JtsE=
|
||||
modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk=
|
||||
modernc.org/golex v1.0.1 h1:EYKY1a3wStt0RzHaH8mdSRNg78Ub0OHxYfCRWw35YtM=
|
||||
modernc.org/golex v1.0.1/go.mod h1:QCA53QtsT1NdGkaZZkF5ezFwk4IXh4BGNafAARTC254=
|
||||
|
@ -15,6 +15,7 @@ import (
|
||||
"github.com/filecoin-project/lotus/chain/vm"
|
||||
"github.com/filecoin-project/lotus/chain/wallet"
|
||||
"github.com/filecoin-project/lotus/node/hello"
|
||||
"github.com/filecoin-project/lotus/system"
|
||||
|
||||
logging "github.com/ipfs/go-log"
|
||||
ci "github.com/libp2p/go-libp2p-core/crypto"
|
||||
@ -111,8 +112,10 @@ const (
|
||||
// the system starts, so that it's available for all other components.
|
||||
InitJournalKey = invoke(iota)
|
||||
|
||||
// libp2p
|
||||
// System processes.
|
||||
InitMemoryWatchdog
|
||||
|
||||
// libp2p
|
||||
PstoreAddSelfKeysKey
|
||||
StartListeningKey
|
||||
BootstrapKey
|
||||
@ -174,6 +177,9 @@ func defaults() []Option {
|
||||
Override(new(journal.DisabledEvents), journal.EnvDisabledEvents),
|
||||
Override(new(journal.Journal), modules.OpenFilesystemJournal),
|
||||
|
||||
Override(new(system.MemoryConstraints), modules.MemoryConstraints),
|
||||
Override(InitMemoryWatchdog, modules.MemoryWatchdog),
|
||||
|
||||
Override(new(helpers.MetricsCtx), func() context.Context {
|
||||
return metricsi.CtxScope(context.Background(), "lotus")
|
||||
}),
|
||||
|
@ -6,12 +6,15 @@ import (
|
||||
"errors"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/gbrlsnchs/jwt/v3"
|
||||
logging "github.com/ipfs/go-log/v2"
|
||||
"github.com/libp2p/go-libp2p-core/peer"
|
||||
"github.com/libp2p/go-libp2p-core/peerstore"
|
||||
record "github.com/libp2p/go-libp2p-record"
|
||||
"go.uber.org/fx"
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
"github.com/filecoin-project/go-jsonrpc/auth"
|
||||
@ -24,9 +27,26 @@ import (
|
||||
"github.com/filecoin-project/lotus/node/config"
|
||||
"github.com/filecoin-project/lotus/node/modules/dtypes"
|
||||
"github.com/filecoin-project/lotus/node/repo"
|
||||
"github.com/filecoin-project/lotus/system"
|
||||
"github.com/raulk/go-watchdog"
|
||||
)
|
||||
|
||||
var log = logging.Logger("modules")
|
||||
const (
|
||||
// EnvWatchdogDisabled is an escape hatch to disable the watchdog explicitly
|
||||
// in case an OS/kernel appears to report incorrect information. The
|
||||
// watchdog will be disabled if the value of this env variable is 1.
|
||||
EnvWatchdogDisabled = "LOTUS_DISABLE_WATCHDOG"
|
||||
)
|
||||
|
||||
const (
|
||||
JWTSecretName = "auth-jwt-private" //nolint:gosec
|
||||
KTJwtHmacSecret = "jwt-hmac-secret" //nolint:gosec
|
||||
)
|
||||
|
||||
var (
|
||||
log = logging.Logger("modules")
|
||||
logWatchdog = logging.Logger("watchdog")
|
||||
)
|
||||
|
||||
type Genesis func() (*types.BlockHeader, error)
|
||||
|
||||
@ -37,8 +57,58 @@ func RecordValidator(ps peerstore.Peerstore) record.Validator {
|
||||
}
|
||||
}
|
||||
|
||||
const JWTSecretName = "auth-jwt-private" //nolint:gosec
|
||||
const KTJwtHmacSecret = "jwt-hmac-secret" //nolint:gosec
|
||||
// MemoryConstraints returns the memory constraints configured for this system.
|
||||
func MemoryConstraints() system.MemoryConstraints {
|
||||
constraints := system.GetMemoryConstraints()
|
||||
log.Infow("memory limits initialized",
|
||||
"max_mem_heap", constraints.MaxHeapMem,
|
||||
"total_system_mem", constraints.TotalSystemMem,
|
||||
"effective_mem_limit", constraints.EffectiveMemLimit)
|
||||
return constraints
|
||||
}
|
||||
|
||||
// MemoryWatchdog starts the memory watchdog, applying the computed resource
|
||||
// constraints.
|
||||
func MemoryWatchdog(lc fx.Lifecycle, constraints system.MemoryConstraints) {
|
||||
if os.Getenv(EnvWatchdogDisabled) == "1" {
|
||||
log.Infof("memory watchdog is disabled via %s", EnvWatchdogDisabled)
|
||||
return
|
||||
}
|
||||
|
||||
cfg := watchdog.MemConfig{
|
||||
Resolution: 5 * time.Second,
|
||||
Policy: &watchdog.WatermarkPolicy{
|
||||
Watermarks: []float64{0.50, 0.60, 0.70, 0.85, 0.90, 0.925, 0.95},
|
||||
EmergencyWatermark: 0.95,
|
||||
},
|
||||
Logger: logWatchdog,
|
||||
}
|
||||
|
||||
// if user has set max heap limit, apply it. Otherwise, fall back to total
|
||||
// system memory constraint.
|
||||
if maxHeap := constraints.MaxHeapMem; maxHeap != 0 {
|
||||
log.Infof("memory watchdog will apply max heap constraint: %d bytes", maxHeap)
|
||||
cfg.Limit = maxHeap
|
||||
cfg.Scope = watchdog.ScopeHeap
|
||||
} else {
|
||||
log.Infof("max heap size not provided; memory watchdog will apply total system memory constraint: %d bytes", constraints.TotalSystemMem)
|
||||
cfg.Limit = constraints.TotalSystemMem
|
||||
cfg.Scope = watchdog.ScopeSystem
|
||||
}
|
||||
|
||||
err, stop := watchdog.Memory(cfg)
|
||||
if err != nil {
|
||||
log.Warnf("failed to instantiate memory watchdog: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
lc.Append(fx.Hook{
|
||||
OnStop: func(ctx context.Context) error {
|
||||
stop()
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
type JwtPayload struct {
|
||||
Allow []auth.Permission
|
||||
|
@ -21,9 +21,8 @@ func BadgerBlockstoreOptions(domain BlockstoreDomain, path string, readonly bool
|
||||
opts.DetectConflicts = false
|
||||
|
||||
// This is to optimize the database on close so it can be opened
|
||||
// read-only and efficiently queried. We don't do that and hanging on
|
||||
// stop isn't nice.
|
||||
opts.CompactL0OnClose = false
|
||||
// read-only and efficiently queried.
|
||||
opts.CompactL0OnClose = true
|
||||
|
||||
// The alternative is "crash on start and tell the user to fix it". This
|
||||
// will truncate corrupt and unsynced data, which we don't guarantee to
|
||||
|
63
system/resources.go
Normal file
63
system/resources.go
Normal file
@ -0,0 +1,63 @@
|
||||
package system
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/elastic/gosigar"
|
||||
logging "github.com/ipfs/go-log/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
logSystem = logging.Logger("system")
|
||||
)
|
||||
|
||||
// EnvMaximumHeap is name of the environment variable with which the user can
|
||||
// specify a maximum heap size to abide by. The value of the env variable should
|
||||
// be in bytes, or in SI bytes (e.g. 32GiB).
|
||||
const EnvMaximumHeap = "LOTUS_MAX_HEAP"
|
||||
|
||||
// MemoryConstraints represents resource constraints that Lotus and the go
|
||||
// runtime should abide by. It is a singleton object that's populated on
|
||||
// initialization, and can be used by components for size calculations
|
||||
// (e.g. caches).
|
||||
type MemoryConstraints struct {
|
||||
// MaxHeapMem is the maximum heap memory that has been set by the user
|
||||
// through the LOTUS_MAX_HEAP env variable. If zero, there is no max heap
|
||||
// limit set.
|
||||
MaxHeapMem uint64
|
||||
|
||||
// TotalSystemMem is the total system memory as reported by go-sigar. If
|
||||
// zero, it was impossible to determine the total system memory.
|
||||
TotalSystemMem uint64
|
||||
|
||||
// EffectiveMemLimit is the memory limit in effect, in bytes.
|
||||
//
|
||||
// In order of precedence:
|
||||
// 1. MaxHeapMem if non-zero.
|
||||
// 2. TotalSystemMem if non-zero.
|
||||
// 3. Zero (no known limit).
|
||||
EffectiveMemLimit uint64
|
||||
}
|
||||
|
||||
// GetMemoryConstraints returns the memory constraints for this process.
|
||||
func GetMemoryConstraints() (ret MemoryConstraints) {
|
||||
var mem gosigar.Mem
|
||||
if err := mem.Get(); err != nil {
|
||||
logSystem.Warnf("failed to acquire total system memory: %s", err)
|
||||
} else {
|
||||
ret.TotalSystemMem = mem.Total
|
||||
ret.EffectiveMemLimit = mem.Total
|
||||
}
|
||||
|
||||
if v := os.Getenv(EnvMaximumHeap); v != "" {
|
||||
bytes, err := humanize.ParseBytes(v)
|
||||
if err != nil {
|
||||
logSystem.Warnf("failed to parse %s env variable with value %s: %s; ignoring max heap limit", EnvMaximumHeap, v, err)
|
||||
} else {
|
||||
ret.MaxHeapMem = bytes
|
||||
ret.EffectiveMemLimit = bytes
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
Loading…
Reference in New Issue
Block a user