Merge pull request #5101 from filecoin-project/raulk/memory-watchdog

introduce memory watchdog; LOTUS_MAX_HEAP
This commit is contained in:
Łukasz Magiera 2020-12-03 15:46:09 +01:00 committed by GitHub
commit dc06d30f52
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 155 additions and 7 deletions

2
go.mod
View File

@ -22,6 +22,7 @@ require (
github.com/drand/kyber v1.1.4
github.com/dustin/go-humanize v1.0.0
github.com/elastic/go-sysinfo v1.3.0
github.com/elastic/gosigar v0.12.0
github.com/fatih/color v1.9.0
github.com/filecoin-project/filecoin-ffi v0.30.4-0.20200910194244-f640612a1a1f
github.com/filecoin-project/go-address v0.0.5-0.20201103152444-f2023ef3f5bb
@ -123,6 +124,7 @@ require (
github.com/polydawn/refmt v0.0.0-20190809202753-05966cbd336a
github.com/prometheus/client_golang v1.6.0
github.com/raulk/clock v1.1.0
github.com/raulk/go-watchdog v0.0.1
github.com/stretchr/testify v1.6.1
github.com/supranational/blst v0.1.1
github.com/syndtr/goleveldb v1.0.0

8
go.sum
View File

@ -219,6 +219,8 @@ github.com/elastic/go-sysinfo v1.3.0 h1:eb2XFGTMlSwG/yyU9Y8jVAYLIzU2sFzWXwo2gmet
github.com/elastic/go-sysinfo v1.3.0/go.mod h1:i1ZYdU10oLNfRzq4vq62BEwD2fH8KaWh6eh0ikPT9F0=
github.com/elastic/go-windows v1.0.0 h1:qLURgZFkkrYyTTkvYpsZIgf83AUsdIHfvlJaqaZ7aSY=
github.com/elastic/go-windows v1.0.0/go.mod h1:TsU0Nrp7/y3+VwE82FoZF8gC/XFg/Elz6CcloAxnPgU=
github.com/elastic/gosigar v0.12.0 h1:AsdhYCJlTudhfOYQyFNgx+fIVTfrDO0V1ST0vHgiapU=
github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
github.com/ema/qdisc v0.0.0-20190904071900-b82c76788043/go.mod h1:ix4kG2zvdUd8kEKSW0ZTr1XLks0epFpI4j745DXxlNE=
github.com/envoyproxy/go-control-plane v0.6.9/go.mod h1:SBwIajubJHhxtWwsL9s8ss4safvEdbitLhGGK48rN6g=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
@ -1311,6 +1313,8 @@ github.com/prometheus/procfs v0.1.0 h1:jhMy6QXfi3y2HEzFoyuCj40z4OZIIHHPtFyCMftmv
github.com/prometheus/procfs v0.1.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/raulk/clock v1.1.0 h1:dpb29+UKMbLqiU/jqIJptgLR1nn23HLgMY0sTCDza5Y=
github.com/raulk/clock v1.1.0/go.mod h1:3MpVxdZ/ODBQDxbN+kzshf5OSZwPjtMDx6BBXBmOeY0=
github.com/raulk/go-watchdog v0.0.1 h1:q0ad0fanW8uaLRTvxQ0RfdADBiKa6CL6NMByhB0vpBs=
github.com/raulk/go-watchdog v0.0.1/go.mod h1:dIvQcKy0laxuHGda1ms8/2T9wE3ZJRbz9bxEO7c0q1M=
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk=
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
@ -1680,6 +1684,7 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 h1:SQFwaSi55rU7vdNs9Yr0Z324VNlrF+0wMqRXT4St8ck=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180202135801-37707fdb30a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@ -1794,6 +1799,7 @@ golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapK
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200216192241-b320d3a0f5a2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200711155855-7342f9734a7d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200827010519-17fd2f27a9e3 h1:r3P/5xOq/dK1991B65Oy6E1fRF/2d/fSYZJ/fXGVfJc=
golang.org/x/tools v0.0.0-20200827010519-17fd2f27a9e3/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20201112185108-eeaa07dd7696 h1:Bfazo+enXJET5SbHeh95NtxabJF6fJ9r/jpfRJgd3j4=
golang.org/x/tools v0.0.0-20201112185108-eeaa07dd7696/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
@ -1877,6 +1883,7 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD
google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
@ -1923,6 +1930,7 @@ launchpad.net/gocheck v0.0.0-20140225173054-000000000087/go.mod h1:hj7XX3B/0A+80
modernc.org/cc v1.0.0 h1:nPibNuDEx6tvYrUAtvDTTw98rx5juGsa5zuDnKwEEQQ=
modernc.org/cc v1.0.0/go.mod h1:1Sk4//wdnYJiUIxnW8ddKpaOJCF37yAdqYnkxUpaYxw=
modernc.org/fileutil v1.0.0/go.mod h1:JHsWpkrk/CnVV1H/eGlFf85BEpfkrp56ro8nojIq9Q8=
modernc.org/golex v1.0.0 h1:wWpDlbK8ejRfSyi0frMyhilD3JBvtcx2AdGDnU+JtsE=
modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk=
modernc.org/golex v1.0.1 h1:EYKY1a3wStt0RzHaH8mdSRNg78Ub0OHxYfCRWw35YtM=
modernc.org/golex v1.0.1/go.mod h1:QCA53QtsT1NdGkaZZkF5ezFwk4IXh4BGNafAARTC254=

View File

@ -15,6 +15,7 @@ import (
"github.com/filecoin-project/lotus/chain/vm"
"github.com/filecoin-project/lotus/chain/wallet"
"github.com/filecoin-project/lotus/node/hello"
"github.com/filecoin-project/lotus/system"
logging "github.com/ipfs/go-log"
ci "github.com/libp2p/go-libp2p-core/crypto"
@ -111,8 +112,10 @@ const (
// the system starts, so that it's available for all other components.
InitJournalKey = invoke(iota)
// libp2p
// System processes.
InitMemoryWatchdog
// libp2p
PstoreAddSelfKeysKey
StartListeningKey
BootstrapKey
@ -174,6 +177,9 @@ func defaults() []Option {
Override(new(journal.DisabledEvents), journal.EnvDisabledEvents),
Override(new(journal.Journal), modules.OpenFilesystemJournal),
Override(new(system.MemoryConstraints), modules.MemoryConstraints),
Override(InitMemoryWatchdog, modules.MemoryWatchdog),
Override(new(helpers.MetricsCtx), func() context.Context {
return metricsi.CtxScope(context.Background(), "lotus")
}),

View File

@ -6,12 +6,15 @@ import (
"errors"
"io"
"io/ioutil"
"os"
"time"
"github.com/gbrlsnchs/jwt/v3"
logging "github.com/ipfs/go-log/v2"
"github.com/libp2p/go-libp2p-core/peer"
"github.com/libp2p/go-libp2p-core/peerstore"
record "github.com/libp2p/go-libp2p-record"
"go.uber.org/fx"
"golang.org/x/xerrors"
"github.com/filecoin-project/go-jsonrpc/auth"
@ -24,9 +27,26 @@ import (
"github.com/filecoin-project/lotus/node/config"
"github.com/filecoin-project/lotus/node/modules/dtypes"
"github.com/filecoin-project/lotus/node/repo"
"github.com/filecoin-project/lotus/system"
"github.com/raulk/go-watchdog"
)
var log = logging.Logger("modules")
const (
// EnvWatchdogDisabled is an escape hatch to disable the watchdog explicitly
// in case an OS/kernel appears to report incorrect information. The
// watchdog will be disabled if the value of this env variable is 1.
EnvWatchdogDisabled = "LOTUS_DISABLE_WATCHDOG"
)
const (
JWTSecretName = "auth-jwt-private" //nolint:gosec
KTJwtHmacSecret = "jwt-hmac-secret" //nolint:gosec
)
var (
log = logging.Logger("modules")
logWatchdog = logging.Logger("watchdog")
)
type Genesis func() (*types.BlockHeader, error)
@ -37,8 +57,58 @@ func RecordValidator(ps peerstore.Peerstore) record.Validator {
}
}
const JWTSecretName = "auth-jwt-private" //nolint:gosec
const KTJwtHmacSecret = "jwt-hmac-secret" //nolint:gosec
// MemoryConstraints returns the memory constraints configured for this system.
func MemoryConstraints() system.MemoryConstraints {
constraints := system.GetMemoryConstraints()
log.Infow("memory limits initialized",
"max_mem_heap", constraints.MaxHeapMem,
"total_system_mem", constraints.TotalSystemMem,
"effective_mem_limit", constraints.EffectiveMemLimit)
return constraints
}
// MemoryWatchdog starts the memory watchdog, applying the computed resource
// constraints.
func MemoryWatchdog(lc fx.Lifecycle, constraints system.MemoryConstraints) {
if os.Getenv(EnvWatchdogDisabled) == "1" {
log.Infof("memory watchdog is disabled via %s", EnvWatchdogDisabled)
return
}
cfg := watchdog.MemConfig{
Resolution: 5 * time.Second,
Policy: &watchdog.WatermarkPolicy{
Watermarks: []float64{0.50, 0.60, 0.70, 0.85, 0.90, 0.925, 0.95},
EmergencyWatermark: 0.95,
},
Logger: logWatchdog,
}
// if user has set max heap limit, apply it. Otherwise, fall back to total
// system memory constraint.
if maxHeap := constraints.MaxHeapMem; maxHeap != 0 {
log.Infof("memory watchdog will apply max heap constraint: %d bytes", maxHeap)
cfg.Limit = maxHeap
cfg.Scope = watchdog.ScopeHeap
} else {
log.Infof("max heap size not provided; memory watchdog will apply total system memory constraint: %d bytes", constraints.TotalSystemMem)
cfg.Limit = constraints.TotalSystemMem
cfg.Scope = watchdog.ScopeSystem
}
err, stop := watchdog.Memory(cfg)
if err != nil {
log.Warnf("failed to instantiate memory watchdog: %s", err)
return
}
lc.Append(fx.Hook{
OnStop: func(ctx context.Context) error {
stop()
return nil
},
})
}
type JwtPayload struct {
Allow []auth.Permission

View File

@ -21,9 +21,8 @@ func BadgerBlockstoreOptions(domain BlockstoreDomain, path string, readonly bool
opts.DetectConflicts = false
// This is to optimize the database on close so it can be opened
// read-only and efficiently queried. We don't do that and hanging on
// stop isn't nice.
opts.CompactL0OnClose = false
// read-only and efficiently queried.
opts.CompactL0OnClose = true
// The alternative is "crash on start and tell the user to fix it". This
// will truncate corrupt and unsynced data, which we don't guarantee to

63
system/resources.go Normal file
View File

@ -0,0 +1,63 @@
package system
import (
"os"
"github.com/dustin/go-humanize"
"github.com/elastic/gosigar"
logging "github.com/ipfs/go-log/v2"
)
var (
logSystem = logging.Logger("system")
)
// EnvMaximumHeap is name of the environment variable with which the user can
// specify a maximum heap size to abide by. The value of the env variable should
// be in bytes, or in SI bytes (e.g. 32GiB).
const EnvMaximumHeap = "LOTUS_MAX_HEAP"
// MemoryConstraints represents resource constraints that Lotus and the go
// runtime should abide by. It is a singleton object that's populated on
// initialization, and can be used by components for size calculations
// (e.g. caches).
type MemoryConstraints struct {
// MaxHeapMem is the maximum heap memory that has been set by the user
// through the LOTUS_MAX_HEAP env variable. If zero, there is no max heap
// limit set.
MaxHeapMem uint64
// TotalSystemMem is the total system memory as reported by go-sigar. If
// zero, it was impossible to determine the total system memory.
TotalSystemMem uint64
// EffectiveMemLimit is the memory limit in effect, in bytes.
//
// In order of precedence:
// 1. MaxHeapMem if non-zero.
// 2. TotalSystemMem if non-zero.
// 3. Zero (no known limit).
EffectiveMemLimit uint64
}
// GetMemoryConstraints returns the memory constraints for this process.
func GetMemoryConstraints() (ret MemoryConstraints) {
var mem gosigar.Mem
if err := mem.Get(); err != nil {
logSystem.Warnf("failed to acquire total system memory: %s", err)
} else {
ret.TotalSystemMem = mem.Total
ret.EffectiveMemLimit = mem.Total
}
if v := os.Getenv(EnvMaximumHeap); v != "" {
bytes, err := humanize.ParseBytes(v)
if err != nil {
logSystem.Warnf("failed to parse %s env variable with value %s: %s; ignoring max heap limit", EnvMaximumHeap, v, err)
} else {
ret.MaxHeapMem = bytes
ret.EffectiveMemLimit = bytes
}
}
return ret
}