diff --git a/go.mod b/go.mod index 6b62ec67d..801042f71 100644 --- a/go.mod +++ b/go.mod @@ -22,6 +22,7 @@ require ( github.com/drand/kyber v1.1.4 github.com/dustin/go-humanize v1.0.0 github.com/elastic/go-sysinfo v1.3.0 + github.com/elastic/gosigar v0.12.0 github.com/fatih/color v1.9.0 github.com/filecoin-project/filecoin-ffi v0.30.4-0.20200910194244-f640612a1a1f github.com/filecoin-project/go-address v0.0.5-0.20201103152444-f2023ef3f5bb @@ -123,6 +124,7 @@ require ( github.com/polydawn/refmt v0.0.0-20190809202753-05966cbd336a github.com/prometheus/client_golang v1.6.0 github.com/raulk/clock v1.1.0 + github.com/raulk/go-watchdog v0.0.1 github.com/stretchr/testify v1.6.1 github.com/supranational/blst v0.1.1 github.com/syndtr/goleveldb v1.0.0 diff --git a/go.sum b/go.sum index 623aa7b6f..3d8dad54b 100644 --- a/go.sum +++ b/go.sum @@ -219,6 +219,8 @@ github.com/elastic/go-sysinfo v1.3.0 h1:eb2XFGTMlSwG/yyU9Y8jVAYLIzU2sFzWXwo2gmet github.com/elastic/go-sysinfo v1.3.0/go.mod h1:i1ZYdU10oLNfRzq4vq62BEwD2fH8KaWh6eh0ikPT9F0= github.com/elastic/go-windows v1.0.0 h1:qLURgZFkkrYyTTkvYpsZIgf83AUsdIHfvlJaqaZ7aSY= github.com/elastic/go-windows v1.0.0/go.mod h1:TsU0Nrp7/y3+VwE82FoZF8gC/XFg/Elz6CcloAxnPgU= +github.com/elastic/gosigar v0.12.0 h1:AsdhYCJlTudhfOYQyFNgx+fIVTfrDO0V1ST0vHgiapU= +github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= github.com/ema/qdisc v0.0.0-20190904071900-b82c76788043/go.mod h1:ix4kG2zvdUd8kEKSW0ZTr1XLks0epFpI4j745DXxlNE= github.com/envoyproxy/go-control-plane v0.6.9/go.mod h1:SBwIajubJHhxtWwsL9s8ss4safvEdbitLhGGK48rN6g= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= @@ -1311,6 +1313,8 @@ github.com/prometheus/procfs v0.1.0 h1:jhMy6QXfi3y2HEzFoyuCj40z4OZIIHHPtFyCMftmv github.com/prometheus/procfs v0.1.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/raulk/clock v1.1.0 h1:dpb29+UKMbLqiU/jqIJptgLR1nn23HLgMY0sTCDza5Y= github.com/raulk/clock v1.1.0/go.mod h1:3MpVxdZ/ODBQDxbN+kzshf5OSZwPjtMDx6BBXBmOeY0= +github.com/raulk/go-watchdog v0.0.1 h1:q0ad0fanW8uaLRTvxQ0RfdADBiKa6CL6NMByhB0vpBs= +github.com/raulk/go-watchdog v0.0.1/go.mod h1:dIvQcKy0laxuHGda1ms8/2T9wE3ZJRbz9bxEO7c0q1M= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= @@ -1680,6 +1684,7 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 h1:SQFwaSi55rU7vdNs9Yr0Z324VNlrF+0wMqRXT4St8ck= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180202135801-37707fdb30a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1794,6 +1799,7 @@ golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapK golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200216192241-b320d3a0f5a2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200711155855-7342f9734a7d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= +golang.org/x/tools v0.0.0-20200827010519-17fd2f27a9e3 h1:r3P/5xOq/dK1991B65Oy6E1fRF/2d/fSYZJ/fXGVfJc= golang.org/x/tools v0.0.0-20200827010519-17fd2f27a9e3/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20201112185108-eeaa07dd7696 h1:Bfazo+enXJET5SbHeh95NtxabJF6fJ9r/jpfRJgd3j4= golang.org/x/tools v0.0.0-20201112185108-eeaa07dd7696/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= @@ -1877,6 +1883,7 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -1923,6 +1930,7 @@ launchpad.net/gocheck v0.0.0-20140225173054-000000000087/go.mod h1:hj7XX3B/0A+80 modernc.org/cc v1.0.0 h1:nPibNuDEx6tvYrUAtvDTTw98rx5juGsa5zuDnKwEEQQ= modernc.org/cc v1.0.0/go.mod h1:1Sk4//wdnYJiUIxnW8ddKpaOJCF37yAdqYnkxUpaYxw= modernc.org/fileutil v1.0.0/go.mod h1:JHsWpkrk/CnVV1H/eGlFf85BEpfkrp56ro8nojIq9Q8= +modernc.org/golex v1.0.0 h1:wWpDlbK8ejRfSyi0frMyhilD3JBvtcx2AdGDnU+JtsE= modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk= modernc.org/golex v1.0.1 h1:EYKY1a3wStt0RzHaH8mdSRNg78Ub0OHxYfCRWw35YtM= modernc.org/golex v1.0.1/go.mod h1:QCA53QtsT1NdGkaZZkF5ezFwk4IXh4BGNafAARTC254= diff --git a/node/builder.go b/node/builder.go index 5efd4aa3c..70ce981ad 100644 --- a/node/builder.go +++ b/node/builder.go @@ -15,6 +15,7 @@ import ( "github.com/filecoin-project/lotus/chain/vm" "github.com/filecoin-project/lotus/chain/wallet" "github.com/filecoin-project/lotus/node/hello" + "github.com/filecoin-project/lotus/system" logging "github.com/ipfs/go-log" ci "github.com/libp2p/go-libp2p-core/crypto" @@ -111,8 +112,10 @@ const ( // the system starts, so that it's available for all other components. InitJournalKey = invoke(iota) - // libp2p + // System processes. + InitMemoryWatchdog + // libp2p PstoreAddSelfKeysKey StartListeningKey BootstrapKey @@ -174,6 +177,9 @@ func defaults() []Option { Override(new(journal.DisabledEvents), journal.EnvDisabledEvents), Override(new(journal.Journal), modules.OpenFilesystemJournal), + Override(new(system.MemoryConstraints), modules.MemoryConstraints), + Override(InitMemoryWatchdog, modules.MemoryWatchdog), + Override(new(helpers.MetricsCtx), func() context.Context { return metricsi.CtxScope(context.Background(), "lotus") }), diff --git a/node/modules/core.go b/node/modules/core.go index 259c1ba3a..794a9dafe 100644 --- a/node/modules/core.go +++ b/node/modules/core.go @@ -6,12 +6,15 @@ import ( "errors" "io" "io/ioutil" + "os" + "time" "github.com/gbrlsnchs/jwt/v3" logging "github.com/ipfs/go-log/v2" "github.com/libp2p/go-libp2p-core/peer" "github.com/libp2p/go-libp2p-core/peerstore" record "github.com/libp2p/go-libp2p-record" + "go.uber.org/fx" "golang.org/x/xerrors" "github.com/filecoin-project/go-jsonrpc/auth" @@ -24,9 +27,26 @@ import ( "github.com/filecoin-project/lotus/node/config" "github.com/filecoin-project/lotus/node/modules/dtypes" "github.com/filecoin-project/lotus/node/repo" + "github.com/filecoin-project/lotus/system" + "github.com/raulk/go-watchdog" ) -var log = logging.Logger("modules") +const ( + // EnvWatchdogDisabled is an escape hatch to disable the watchdog explicitly + // in case an OS/kernel appears to report incorrect information. The + // watchdog will be disabled if the value of this env variable is 1. + EnvWatchdogDisabled = "LOTUS_DISABLE_WATCHDOG" +) + +const ( + JWTSecretName = "auth-jwt-private" //nolint:gosec + KTJwtHmacSecret = "jwt-hmac-secret" //nolint:gosec +) + +var ( + log = logging.Logger("modules") + logWatchdog = logging.Logger("watchdog") +) type Genesis func() (*types.BlockHeader, error) @@ -37,8 +57,58 @@ func RecordValidator(ps peerstore.Peerstore) record.Validator { } } -const JWTSecretName = "auth-jwt-private" //nolint:gosec -const KTJwtHmacSecret = "jwt-hmac-secret" //nolint:gosec +// MemoryConstraints returns the memory constraints configured for this system. +func MemoryConstraints() system.MemoryConstraints { + constraints := system.GetMemoryConstraints() + log.Infow("memory limits initialized", + "max_mem_heap", constraints.MaxHeapMem, + "total_system_mem", constraints.TotalSystemMem, + "effective_mem_limit", constraints.EffectiveMemLimit) + return constraints +} + +// MemoryWatchdog starts the memory watchdog, applying the computed resource +// constraints. +func MemoryWatchdog(lc fx.Lifecycle, constraints system.MemoryConstraints) { + if os.Getenv(EnvWatchdogDisabled) == "1" { + log.Infof("memory watchdog is disabled via %s", EnvWatchdogDisabled) + return + } + + cfg := watchdog.MemConfig{ + Resolution: 5 * time.Second, + Policy: &watchdog.WatermarkPolicy{ + Watermarks: []float64{0.50, 0.60, 0.70, 0.85, 0.90, 0.925, 0.95}, + EmergencyWatermark: 0.95, + }, + Logger: logWatchdog, + } + + // if user has set max heap limit, apply it. Otherwise, fall back to total + // system memory constraint. + if maxHeap := constraints.MaxHeapMem; maxHeap != 0 { + log.Infof("memory watchdog will apply max heap constraint: %d bytes", maxHeap) + cfg.Limit = maxHeap + cfg.Scope = watchdog.ScopeHeap + } else { + log.Infof("max heap size not provided; memory watchdog will apply total system memory constraint: %d bytes", constraints.TotalSystemMem) + cfg.Limit = constraints.TotalSystemMem + cfg.Scope = watchdog.ScopeSystem + } + + err, stop := watchdog.Memory(cfg) + if err != nil { + log.Warnf("failed to instantiate memory watchdog: %s", err) + return + } + + lc.Append(fx.Hook{ + OnStop: func(ctx context.Context) error { + stop() + return nil + }, + }) +} type JwtPayload struct { Allow []auth.Permission diff --git a/node/repo/blockstore_opts.go b/node/repo/blockstore_opts.go index d8d852d84..775b41266 100644 --- a/node/repo/blockstore_opts.go +++ b/node/repo/blockstore_opts.go @@ -21,9 +21,8 @@ func BadgerBlockstoreOptions(domain BlockstoreDomain, path string, readonly bool opts.DetectConflicts = false // This is to optimize the database on close so it can be opened - // read-only and efficiently queried. We don't do that and hanging on - // stop isn't nice. - opts.CompactL0OnClose = false + // read-only and efficiently queried. + opts.CompactL0OnClose = true // The alternative is "crash on start and tell the user to fix it". This // will truncate corrupt and unsynced data, which we don't guarantee to diff --git a/system/resources.go b/system/resources.go new file mode 100644 index 000000000..4c0d38943 --- /dev/null +++ b/system/resources.go @@ -0,0 +1,63 @@ +package system + +import ( + "os" + + "github.com/dustin/go-humanize" + "github.com/elastic/gosigar" + logging "github.com/ipfs/go-log/v2" +) + +var ( + logSystem = logging.Logger("system") +) + +// EnvMaximumHeap is name of the environment variable with which the user can +// specify a maximum heap size to abide by. The value of the env variable should +// be in bytes, or in SI bytes (e.g. 32GiB). +const EnvMaximumHeap = "LOTUS_MAX_HEAP" + +// MemoryConstraints represents resource constraints that Lotus and the go +// runtime should abide by. It is a singleton object that's populated on +// initialization, and can be used by components for size calculations +// (e.g. caches). +type MemoryConstraints struct { + // MaxHeapMem is the maximum heap memory that has been set by the user + // through the LOTUS_MAX_HEAP env variable. If zero, there is no max heap + // limit set. + MaxHeapMem uint64 + + // TotalSystemMem is the total system memory as reported by go-sigar. If + // zero, it was impossible to determine the total system memory. + TotalSystemMem uint64 + + // EffectiveMemLimit is the memory limit in effect, in bytes. + // + // In order of precedence: + // 1. MaxHeapMem if non-zero. + // 2. TotalSystemMem if non-zero. + // 3. Zero (no known limit). + EffectiveMemLimit uint64 +} + +// GetMemoryConstraints returns the memory constraints for this process. +func GetMemoryConstraints() (ret MemoryConstraints) { + var mem gosigar.Mem + if err := mem.Get(); err != nil { + logSystem.Warnf("failed to acquire total system memory: %s", err) + } else { + ret.TotalSystemMem = mem.Total + ret.EffectiveMemLimit = mem.Total + } + + if v := os.Getenv(EnvMaximumHeap); v != "" { + bytes, err := humanize.ParseBytes(v) + if err != nil { + logSystem.Warnf("failed to parse %s env variable with value %s: %s; ignoring max heap limit", EnvMaximumHeap, v, err) + } else { + ret.MaxHeapMem = bytes + ret.EffectiveMemLimit = bytes + } + } + return ret +}