Merge pull request #7108 from filecoin-project/feat/alerting

Simple alert system; FD limit alerts
This commit is contained in:
Łukasz Magiera 2021-08-26 16:34:04 +02:00 committed by GitHub
commit 55dbbf5f0c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 581 additions and 21 deletions

View File

@ -5,6 +5,7 @@ import (
"fmt"
apitypes "github.com/filecoin-project/lotus/api/types"
"github.com/filecoin-project/lotus/journal/alerting"
"github.com/google/uuid"
@ -33,6 +34,10 @@ type Common interface {
LogList(context.Context) ([]string, error) //perm:write
LogSetLevel(context.Context, string, string) error //perm:write
// LogAlerts returns list of all, active and inactive alerts tracked by the
// node
LogAlerts(ctx context.Context) ([]alerting.Alert, error) //perm:admin
// MethodGroup: Common
// Version provides information about API provider

View File

@ -24,6 +24,7 @@ import (
apitypes "github.com/filecoin-project/lotus/api/types"
miner "github.com/filecoin-project/lotus/chain/actors/builtin/miner"
types "github.com/filecoin-project/lotus/chain/types"
alerting "github.com/filecoin-project/lotus/journal/alerting"
marketevents "github.com/filecoin-project/lotus/markets/loggers"
dtypes "github.com/filecoin-project/lotus/node/modules/dtypes"
imports "github.com/filecoin-project/lotus/node/repo/imports"
@ -995,6 +996,21 @@ func (mr *MockFullNodeMockRecorder) ID(arg0 interface{}) *gomock.Call {
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ID", reflect.TypeOf((*MockFullNode)(nil).ID), arg0)
}
// LogAlerts mocks base method.
func (m *MockFullNode) LogAlerts(arg0 context.Context) ([]alerting.Alert, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "LogAlerts", arg0)
ret0, _ := ret[0].([]alerting.Alert)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// LogAlerts indicates an expected call of LogAlerts.
func (mr *MockFullNodeMockRecorder) LogAlerts(arg0 interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LogAlerts", reflect.TypeOf((*MockFullNode)(nil).LogAlerts), arg0)
}
// LogList mocks base method.
func (m *MockFullNode) LogList(arg0 context.Context) ([]string, error) {
m.ctrl.T.Helper()

View File

@ -27,6 +27,7 @@ import (
"github.com/filecoin-project/lotus/extern/sector-storage/stores"
"github.com/filecoin-project/lotus/extern/sector-storage/storiface"
"github.com/filecoin-project/lotus/extern/storage-sealing/sealiface"
"github.com/filecoin-project/lotus/journal/alerting"
marketevents "github.com/filecoin-project/lotus/markets/loggers"
"github.com/filecoin-project/lotus/node/modules/dtypes"
"github.com/filecoin-project/lotus/node/repo/imports"
@ -63,6 +64,8 @@ type CommonStruct struct {
Discover func(p0 context.Context) (apitypes.OpenRPCDocument, error) `perm:"read"`
LogAlerts func(p0 context.Context) ([]alerting.Alert, error) `perm:"admin"`
LogList func(p0 context.Context) ([]string, error) `perm:"write"`
LogSetLevel func(p0 context.Context, p1 string, p2 string) error `perm:"write"`
@ -946,6 +949,17 @@ func (s *CommonStub) Discover(p0 context.Context) (apitypes.OpenRPCDocument, err
return *new(apitypes.OpenRPCDocument), ErrNotSupported
}
func (s *CommonStruct) LogAlerts(p0 context.Context) ([]alerting.Alert, error) {
if s.Internal.LogAlerts == nil {
return *new([]alerting.Alert), ErrNotSupported
}
return s.Internal.LogAlerts(p0)
}
func (s *CommonStub) LogAlerts(p0 context.Context) ([]alerting.Alert, error) {
return *new([]alerting.Alert), ErrNotSupported
}
func (s *CommonStruct) LogList(p0 context.Context) ([]string, error) {
if s.Internal.LogList == nil {
return *new([]string), ErrNotSupported

View File

@ -23,6 +23,7 @@ import (
apitypes "github.com/filecoin-project/lotus/api/types"
miner "github.com/filecoin-project/lotus/chain/actors/builtin/miner"
types "github.com/filecoin-project/lotus/chain/types"
alerting "github.com/filecoin-project/lotus/journal/alerting"
marketevents "github.com/filecoin-project/lotus/markets/loggers"
dtypes "github.com/filecoin-project/lotus/node/modules/dtypes"
imports "github.com/filecoin-project/lotus/node/repo/imports"
@ -950,6 +951,21 @@ func (mr *MockFullNodeMockRecorder) ID(arg0 interface{}) *gomock.Call {
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ID", reflect.TypeOf((*MockFullNode)(nil).ID), arg0)
}
// LogAlerts mocks base method.
func (m *MockFullNode) LogAlerts(arg0 context.Context) ([]alerting.Alert, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "LogAlerts", arg0)
ret0, _ := ret[0].([]alerting.Alert)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// LogAlerts indicates an expected call of LogAlerts.
func (mr *MockFullNodeMockRecorder) LogAlerts(arg0 interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LogAlerts", reflect.TypeOf((*MockFullNode)(nil).LogAlerts), arg0)
}
// LogList mocks base method.
func (m *MockFullNode) LogList(arg0 context.Context) ([]string, error) {
m.ctrl.T.Helper()

6
build/limits.go Normal file
View File

@ -0,0 +1,6 @@
package build
var (
DefaultFDLimit uint64 = 16 << 10
MinerFDLimit uint64 = 100_000
)

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -2,7 +2,9 @@ package cli
import (
"fmt"
"time"
"github.com/fatih/color"
"github.com/urfave/cli/v2"
"golang.org/x/xerrors"
)
@ -13,6 +15,7 @@ var LogCmd = &cli.Command{
Subcommands: []*cli.Command{
LogList,
LogSetLevel,
LogAlerts,
},
}
@ -100,3 +103,51 @@ var LogSetLevel = &cli.Command{
return nil
},
}
var LogAlerts = &cli.Command{
Name: "alerts",
Usage: "Get alert states",
Flags: []cli.Flag{
&cli.BoolFlag{
Name: "all",
Usage: "get all (active and inactive) alerts",
},
},
Action: func(cctx *cli.Context) error {
api, closer, err := GetAPI(cctx)
if err != nil {
return err
}
defer closer()
ctx := ReqContext(cctx)
alerts, err := api.LogAlerts(ctx)
if err != nil {
return xerrors.Errorf("getting alerts: %w", err)
}
all := cctx.Bool("all")
for _, alert := range alerts {
if !all && !alert.Active {
continue
}
active := color.RedString("active ")
if !alert.Active {
active = color.GreenString("inactive")
}
fmt.Printf("%s %s:%s\n", active, alert.Type.System, alert.Type.Subsystem)
if alert.LastResolved != nil {
fmt.Printf(" last resolved at %s; reason: %s\n", alert.LastResolved.Time.Truncate(time.Millisecond), alert.LastResolved.Message)
}
if alert.LastActive != nil {
fmt.Printf(" %s %s; reason: %s\n", color.YellowString("last raised at"), alert.LastActive.Time.Truncate(time.Millisecond), alert.LastActive.Message)
}
}
return nil
},
}

View File

@ -32,6 +32,7 @@ import (
"github.com/filecoin-project/lotus/chain/actors/builtin/miner"
"github.com/filecoin-project/lotus/chain/types"
lcli "github.com/filecoin-project/lotus/cli"
"github.com/filecoin-project/lotus/journal/alerting"
)
var infoCmd = &cli.Command{
@ -116,6 +117,21 @@ func infoCmdAct(cctx *cli.Context) error {
fmt.Println()
alerts, err := minerApi.LogAlerts(ctx)
if err != nil {
return xerrors.Errorf("getting alerts: %w", err)
}
activeAlerts := make([]alerting.Alert, 0)
for _, alert := range alerts {
if alert.Active {
activeAlerts = append(activeAlerts, alert)
}
}
if len(activeAlerts) > 0 {
fmt.Printf("%s (check %s)\n", color.RedString("⚠ %d Active alerts", len(activeAlerts)), color.YellowString("lotus-miner log alerts"))
}
err = handleMiningInfo(ctx, cctx, fullapi, minerApi)
if err != nil {
return err

View File

@ -51,6 +51,7 @@ import (
sealing "github.com/filecoin-project/lotus/extern/storage-sealing"
"github.com/filecoin-project/lotus/genesis"
"github.com/filecoin-project/lotus/journal"
"github.com/filecoin-project/lotus/journal/fsjournal"
storageminer "github.com/filecoin-project/lotus/miner"
"github.com/filecoin-project/lotus/node/modules"
"github.com/filecoin-project/lotus/node/modules/dtypes"
@ -479,7 +480,7 @@ func storageMinerInit(ctx context.Context, cctx *cli.Context, api v1api.FullNode
return err
}
j, err := journal.OpenFSJournal(lr, journal.EnvDisabledEvents())
j, err := fsjournal.OpenFSJournal(lr, journal.EnvDisabledEvents())
if err != nil {
return fmt.Errorf("failed to open filesystem journal: %w", err)
}

View File

@ -37,6 +37,7 @@ import (
lcli "github.com/filecoin-project/lotus/cli"
"github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper"
"github.com/filecoin-project/lotus/journal"
"github.com/filecoin-project/lotus/journal/fsjournal"
"github.com/filecoin-project/lotus/lib/peermgr"
"github.com/filecoin-project/lotus/lib/ulimit"
"github.com/filecoin-project/lotus/metrics"
@ -476,7 +477,7 @@ func ImportChain(ctx context.Context, r repo.Repo, fname string, snapshot bool)
return err
}
j, err := journal.OpenFSJournal(lr, journal.EnvDisabledEvents())
j, err := fsjournal.OpenFSJournal(lr, journal.EnvDisabledEvents())
if err != nil {
return xerrors.Errorf("failed to open journal: %w", err)
}

View File

@ -44,6 +44,7 @@
* [I](#I)
* [ID](#ID)
* [Log](#Log)
* [LogAlerts](#LogAlerts)
* [LogList](#LogList)
* [LogSetLevel](#LogSetLevel)
* [Market](#Market)
@ -664,6 +665,15 @@ Response: `"12D3KooWGzxzKZYveHXtpG6AsrUJBcWxHBFS2HsEoGTxrMLvKXtf"`
## Log
### LogAlerts
Perms: admin
Inputs: `null`
Response: `null`
### LogList

View File

@ -71,6 +71,7 @@
* [I](#I)
* [ID](#ID)
* [Log](#Log)
* [LogAlerts](#LogAlerts)
* [LogList](#LogList)
* [LogSetLevel](#LogSetLevel)
* [Market](#Market)
@ -1816,6 +1817,15 @@ Response: `"12D3KooWGzxzKZYveHXtpG6AsrUJBcWxHBFS2HsEoGTxrMLvKXtf"`
## Log
### LogAlerts
Perms: admin
Inputs: `null`
Response: `null`
### LogList

View File

@ -74,6 +74,7 @@
* [I](#I)
* [ID](#ID)
* [Log](#Log)
* [LogAlerts](#LogAlerts)
* [LogList](#LogList)
* [LogSetLevel](#LogSetLevel)
* [Market](#Market)
@ -1880,6 +1881,15 @@ Response: `"12D3KooWGzxzKZYveHXtpG6AsrUJBcWxHBFS2HsEoGTxrMLvKXtf"`
## Log
### LogAlerts
Perms: admin
Inputs: `null`
Response: `null`
### LogList

View File

@ -506,6 +506,7 @@ USAGE:
COMMANDS:
list List log systems
set-level Set log level
alerts Get alert states
help, h Shows a list of commands or help for one command
OPTIONS:
@ -561,6 +562,20 @@ OPTIONS:
```
### lotus-miner log alerts
```
NAME:
lotus-miner log alerts - Get alert states
USAGE:
lotus-miner log alerts [command options] [arguments...]
OPTIONS:
--all get all (active and inactive) alerts (default: false)
--help, -h show help (default: false)
```
## lotus-miner wait-api
```
NAME:

View File

@ -2366,6 +2366,7 @@ USAGE:
COMMANDS:
list List log systems
set-level Set log level
alerts Get alert states
help, h Shows a list of commands or help for one command
OPTIONS:
@ -2421,6 +2422,20 @@ OPTIONS:
```
### lotus log alerts
```
NAME:
lotus log alerts - Get alert states
USAGE:
lotus log alerts [command options] [arguments...]
OPTIONS:
--all get all (active and inactive) alerts (default: false)
--help, -h show help (default: false)
```
## lotus wait-api
```
NAME:

161
journal/alerting/alerts.go Normal file
View File

@ -0,0 +1,161 @@
package alerting
import (
"encoding/json"
"sort"
"sync"
"time"
"github.com/filecoin-project/lotus/journal"
logging "github.com/ipfs/go-log/v2"
)
var log = logging.Logger("alerting")
// Alerting provides simple stateful alert system. Consumers can register alerts,
// which can be raised and resolved.
//
// When an alert is raised or resolved, a related journal entry is recorded.
type Alerting struct {
j journal.Journal
lk sync.Mutex
alerts map[AlertType]Alert
}
// AlertType is a unique alert identifier
type AlertType struct {
System, Subsystem string
}
// AlertEvent contains information about alert state transition
type AlertEvent struct {
Type string // either 'raised' or 'resolved'
Message json.RawMessage
Time time.Time
}
type Alert struct {
Type AlertType
Active bool
LastActive *AlertEvent // NOTE: pointer for nullability, don't mutate the referenced object!
LastResolved *AlertEvent
journalType journal.EventType
}
func NewAlertingSystem(j journal.Journal) *Alerting {
return &Alerting{
j: j,
alerts: map[AlertType]Alert{},
}
}
func (a *Alerting) AddAlertType(system, subsystem string) AlertType {
a.lk.Lock()
defer a.lk.Unlock()
at := AlertType{
System: system,
Subsystem: subsystem,
}
if _, exists := a.alerts[at]; exists {
return at
}
et := a.j.RegisterEventType(system, subsystem)
a.alerts[at] = Alert{
Type: at,
Active: false,
journalType: et,
}
return at
}
func (a *Alerting) update(at AlertType, message interface{}, upd func(Alert, json.RawMessage) Alert) {
a.lk.Lock()
defer a.lk.Unlock()
alert, ok := a.alerts[at]
if !ok {
log.Errorw("unknown alert", "type", at, "message", message)
}
rawMsg, err := json.Marshal(message)
if err != nil {
log.Errorw("marshaling alert message failed", "type", at, "error", err)
rawMsg, err = json.Marshal(&struct {
AlertError string
}{
AlertError: err.Error(),
})
log.Errorw("marshaling marshaling error failed", "type", at, "error", err)
}
a.alerts[at] = upd(alert, rawMsg)
}
// Raise marks the alert condition as active and records related event in the journal
func (a *Alerting) Raise(at AlertType, message interface{}) {
log.Errorw("alert raised", "type", at, "message", message)
a.update(at, message, func(alert Alert, rawMsg json.RawMessage) Alert {
alert.Active = true
alert.LastActive = &AlertEvent{
Type: "raised",
Message: rawMsg,
Time: time.Now(),
}
a.j.RecordEvent(alert.journalType, func() interface{} {
return alert.LastActive
})
return alert
})
}
// Resolve marks the alert condition as resolved and records related event in the journal
func (a *Alerting) Resolve(at AlertType, message interface{}) {
log.Errorw("alert resolved", "type", at, "message", message)
a.update(at, message, func(alert Alert, rawMsg json.RawMessage) Alert {
alert.Active = false
alert.LastResolved = &AlertEvent{
Type: "resolved",
Message: rawMsg,
Time: time.Now(),
}
a.j.RecordEvent(alert.journalType, func() interface{} {
return alert.LastResolved
})
return alert
})
}
// GetAlerts returns all registered (active and inactive) alerts
func (a *Alerting) GetAlerts() []Alert {
a.lk.Lock()
defer a.lk.Unlock()
out := make([]Alert, 0, len(a.alerts))
for _, alert := range a.alerts {
out = append(out, alert)
}
sort.Slice(out, func(i, j int) bool {
if out[i].Type.System != out[j].Type.System {
return out[i].Type.System < out[j].Type.System
}
return out[i].Type.Subsystem < out[j].Type.Subsystem
})
return out
}

View File

@ -0,0 +1,61 @@
package alerting
import (
"encoding/json"
"testing"
"github.com/golang/mock/gomock"
"github.com/stretchr/testify/require"
"github.com/filecoin-project/lotus/journal"
"github.com/filecoin-project/lotus/journal/mockjournal"
)
func TestAlerting(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
j := mockjournal.NewMockJournal(mockCtrl)
a := NewAlertingSystem(j)
j.EXPECT().RegisterEventType("s1", "b1").Return(journal.EventType{System: "s1", Event: "b1"})
al1 := a.AddAlertType("s1", "b1")
j.EXPECT().RegisterEventType("s2", "b2").Return(journal.EventType{System: "s2", Event: "b2"})
al2 := a.AddAlertType("s2", "b2")
l := a.GetAlerts()
require.Len(t, l, 2)
require.Equal(t, al1, l[0].Type)
require.Equal(t, al2, l[1].Type)
for _, alert := range l {
require.False(t, alert.Active)
require.Nil(t, alert.LastActive)
require.Nil(t, alert.LastResolved)
}
j.EXPECT().RecordEvent(a.alerts[al1].journalType, gomock.Any())
a.Raise(al1, "test")
for _, alert := range l { // check for no magic mutations
require.False(t, alert.Active)
require.Nil(t, alert.LastActive)
require.Nil(t, alert.LastResolved)
}
l = a.GetAlerts()
require.Len(t, l, 2)
require.Equal(t, al1, l[0].Type)
require.Equal(t, al2, l[1].Type)
require.True(t, l[0].Active)
require.NotNil(t, l[0].LastActive)
require.Equal(t, "raised", l[0].LastActive.Type)
require.Equal(t, json.RawMessage(`"test"`), l[0].LastActive.Message)
require.Nil(t, l[0].LastResolved)
require.False(t, l[1].Active)
require.Nil(t, l[1].LastActive)
require.Nil(t, l[1].LastResolved)
}

View File

@ -1,4 +1,4 @@
package journal
package fsjournal
import (
"encoding/json"
@ -6,17 +6,21 @@ import (
"os"
"path/filepath"
logging "github.com/ipfs/go-log/v2"
"golang.org/x/xerrors"
"github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/lotus/journal"
"github.com/filecoin-project/lotus/node/repo"
)
var log = logging.Logger("fsjournal")
const RFC3339nocolon = "2006-01-02T150405Z0700"
// fsJournal is a basic journal backed by files on a filesystem.
type fsJournal struct {
EventTypeRegistry
journal.EventTypeRegistry
dir string
sizeLimit int64
@ -24,7 +28,7 @@ type fsJournal struct {
fi *os.File
fSize int64
incoming chan *Event
incoming chan *journal.Event
closing chan struct{}
closed chan struct{}
@ -32,17 +36,17 @@ type fsJournal struct {
// OpenFSJournal constructs a rolling filesystem journal, with a default
// per-file size limit of 1GiB.
func OpenFSJournal(lr repo.LockedRepo, disabled DisabledEvents) (Journal, error) {
func OpenFSJournal(lr repo.LockedRepo, disabled journal.DisabledEvents) (journal.Journal, error) {
dir := filepath.Join(lr.Path(), "journal")
if err := os.MkdirAll(dir, 0755); err != nil {
return nil, fmt.Errorf("failed to mk directory %s for file journal: %w", dir, err)
}
f := &fsJournal{
EventTypeRegistry: NewEventTypeRegistry(disabled),
EventTypeRegistry: journal.NewEventTypeRegistry(disabled),
dir: dir,
sizeLimit: 1 << 30,
incoming: make(chan *Event, 32),
incoming: make(chan *journal.Event, 32),
closing: make(chan struct{}),
closed: make(chan struct{}),
}
@ -56,7 +60,7 @@ func OpenFSJournal(lr repo.LockedRepo, disabled DisabledEvents) (Journal, error)
return f, nil
}
func (f *fsJournal) RecordEvent(evtType EventType, supplier func() interface{}) {
func (f *fsJournal) RecordEvent(evtType journal.EventType, supplier func() interface{}) {
defer func() {
if r := recover(); r != nil {
log.Warnf("recovered from panic while recording journal event; type=%s, err=%v", evtType, r)
@ -67,7 +71,7 @@ func (f *fsJournal) RecordEvent(evtType EventType, supplier func() interface{})
return
}
je := &Event{
je := &journal.Event{
EventType: evtType,
Timestamp: build.Clock.Now(),
Data: supplier(),
@ -85,7 +89,7 @@ func (f *fsJournal) Close() error {
return nil
}
func (f *fsJournal) putEvent(evt *Event) error {
func (f *fsJournal) putEvent(evt *journal.Event) error {
b, err := json.Marshal(evt)
if err != nil {
return err

View File

@ -0,0 +1,75 @@
// Code generated by MockGen. DO NOT EDIT.
// Source: github.com/filecoin-project/lotus/journal (interfaces: Journal)
// Package mockjournal is a generated GoMock package.
package mockjournal
import (
reflect "reflect"
journal "github.com/filecoin-project/lotus/journal"
gomock "github.com/golang/mock/gomock"
)
// MockJournal is a mock of Journal interface.
type MockJournal struct {
ctrl *gomock.Controller
recorder *MockJournalMockRecorder
}
// MockJournalMockRecorder is the mock recorder for MockJournal.
type MockJournalMockRecorder struct {
mock *MockJournal
}
// NewMockJournal creates a new mock instance.
func NewMockJournal(ctrl *gomock.Controller) *MockJournal {
mock := &MockJournal{ctrl: ctrl}
mock.recorder = &MockJournalMockRecorder{mock}
return mock
}
// EXPECT returns an object that allows the caller to indicate expected use.
func (m *MockJournal) EXPECT() *MockJournalMockRecorder {
return m.recorder
}
// Close mocks base method.
func (m *MockJournal) Close() error {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "Close")
ret0, _ := ret[0].(error)
return ret0
}
// Close indicates an expected call of Close.
func (mr *MockJournalMockRecorder) Close() *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Close", reflect.TypeOf((*MockJournal)(nil).Close))
}
// RecordEvent mocks base method.
func (m *MockJournal) RecordEvent(arg0 journal.EventType, arg1 func() interface{}) {
m.ctrl.T.Helper()
m.ctrl.Call(m, "RecordEvent", arg0, arg1)
}
// RecordEvent indicates an expected call of RecordEvent.
func (mr *MockJournalMockRecorder) RecordEvent(arg0, arg1 interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RecordEvent", reflect.TypeOf((*MockJournal)(nil).RecordEvent), arg0, arg1)
}
// RegisterEventType mocks base method.
func (m *MockJournal) RegisterEventType(arg0, arg1 string) journal.EventType {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "RegisterEventType", arg0, arg1)
ret0, _ := ret[0].(journal.EventType)
return ret0
}
// RegisterEventType indicates an expected call of RegisterEventType.
func (mr *MockJournalMockRecorder) RegisterEventType(arg0, arg1 interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RegisterEventType", reflect.TypeOf((*MockJournal)(nil).RegisterEventType), arg0, arg1)
}

View File

@ -4,12 +4,8 @@ import (
"fmt"
"strings"
"time"
logging "github.com/ipfs/go-log/v2"
)
var log = logging.Logger("journal")
var (
// DefaultDisabledEvents lists the journal events disabled by
// default, usually because they are considered noisy.
@ -69,6 +65,8 @@ func (et EventType) Enabled() bool {
return et.safe && et.enabled
}
//go:generate go run github.com/golang/mock/mockgen -destination=mockjournal/journal.go -package=mockjournal . Journal
// Journal represents an audit trail of system actions.
//
// Every entry is tagged with a timestamp, a system name, and an event name.

View File

@ -3,11 +3,14 @@ package ulimit
// from go-ipfs
import (
"errors"
"fmt"
"os"
"strconv"
"syscall"
"github.com/filecoin-project/lotus/build"
logging "github.com/ipfs/go-log/v2"
)
@ -25,8 +28,15 @@ var (
// minimum file descriptor limit before we complain
const minFds = 2048
// default max file descriptor limit.
const maxFds = 16 << 10
var ErrUnsupported = errors.New("unsupported")
func GetLimit() (uint64, uint64, error) {
if getLimit == nil {
return 0, 0, ErrUnsupported
}
return getLimit()
}
// userMaxFDs returns the value of LOTUS_FD_MAX
func userMaxFDs() uint64 {
@ -55,13 +65,13 @@ func ManageFdLimit() (changed bool, newLimit uint64, err error) {
return false, 0, nil
}
targetLimit := uint64(maxFds)
targetLimit := build.DefaultFDLimit
userLimit := userMaxFDs()
if userLimit > 0 {
targetLimit = userLimit
}
soft, hard, err := getLimit()
soft, hard, err := GetLimit()
if err != nil {
return false, 0, err
}

View File

@ -8,6 +8,8 @@ import (
"strings"
"syscall"
"testing"
"github.com/filecoin-project/lotus/build"
)
func TestManageFdLimit(t *testing.T) {
@ -16,7 +18,7 @@ func TestManageFdLimit(t *testing.T) {
t.Errorf("Cannot manage file descriptors")
}
if maxFds != uint64(16<<10) {
if build.DefaultFDLimit != uint64(16<<10) {
t.Errorf("Maximum file descriptors default value changed")
}
}

View File

@ -28,10 +28,12 @@ import (
"go.uber.org/fx"
"golang.org/x/xerrors"
"github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/lotus/chain/beacon"
"github.com/filecoin-project/lotus/chain/types"
"github.com/filecoin-project/lotus/extern/sector-storage/stores"
"github.com/filecoin-project/lotus/journal"
"github.com/filecoin-project/lotus/journal/alerting"
"github.com/filecoin-project/lotus/lib/peermgr"
_ "github.com/filecoin-project/lotus/lib/sigs/bls"
_ "github.com/filecoin-project/lotus/lib/sigs/secp"
@ -82,6 +84,9 @@ const (
// System processes.
InitMemoryWatchdog
// health checks
CheckFDLimit
// libp2p
PstoreAddSelfKeysKey
StartListeningKey
@ -146,6 +151,9 @@ func defaults() []Option {
// global system journal.
Override(new(journal.DisabledEvents), journal.EnvDisabledEvents),
Override(new(journal.Journal), modules.OpenFilesystemJournal),
Override(new(*alerting.Alerting), alerting.NewAlertingSystem),
Override(CheckFDLimit, modules.CheckFdLimit(build.DefaultFDLimit)),
Override(new(system.MemoryConstraints), modules.MemoryConstraints),
Override(InitMemoryWatchdog, modules.MemoryWatchdog),

View File

@ -15,6 +15,7 @@ import (
storage2 "github.com/filecoin-project/specs-storage/storage"
"github.com/filecoin-project/lotus/api"
"github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/lotus/chain/gen"
"github.com/filecoin-project/lotus/chain/gen/slashfilter"
sectorstorage "github.com/filecoin-project/lotus/extern/sector-storage"
@ -74,6 +75,8 @@ func ConfigStorageMiner(c interface{}) Option {
return Options(
ConfigCommon(&cfg.Common, enableLibp2pNode),
Override(CheckFDLimit, modules.CheckFdLimit(build.MinerFDLimit)), // recommend at least 100k FD limit to miners
Override(new(api.MinerSubsystems), modules.ExtractEnabledMinerSubsystems(cfg.Subsystems)),
Override(new(stores.LocalStorage), From(new(repo.LockedRepo))),
Override(new(*stores.Local), modules.LocalStorage),

View File

@ -10,9 +10,11 @@ import (
"golang.org/x/xerrors"
"github.com/filecoin-project/go-jsonrpc/auth"
"github.com/filecoin-project/lotus/api"
apitypes "github.com/filecoin-project/lotus/api/types"
"github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/lotus/journal/alerting"
"github.com/filecoin-project/lotus/node/modules/dtypes"
)
@ -21,6 +23,7 @@ var session = uuid.New()
type CommonAPI struct {
fx.In
Alerting *alerting.Alerting
APISecret *dtypes.APIAlg
ShutdownChan dtypes.ShutdownChan
}
@ -72,6 +75,10 @@ func (a *CommonAPI) LogSetLevel(ctx context.Context, subsystem, level string) er
return logging.SetLogLevel(subsystem, level)
}
func (a *CommonAPI) LogAlerts(ctx context.Context) ([]alerting.Alert, error) {
return a.Alerting.GetAlerts(), nil
}
func (a *CommonAPI) Shutdown(ctx context.Context) error {
a.ShutdownChan <- struct{}{}
return nil

44
node/modules/alerts.go Normal file
View File

@ -0,0 +1,44 @@
package modules
import (
"github.com/filecoin-project/lotus/journal/alerting"
"github.com/filecoin-project/lotus/lib/ulimit"
)
func CheckFdLimit(min uint64) func(al *alerting.Alerting) {
return func(al *alerting.Alerting) {
soft, _, err := ulimit.GetLimit()
if err == ulimit.ErrUnsupported {
log.Warn("FD limit monitoring not available")
return
}
alert := al.AddAlertType("process", "fd-limit")
if err != nil {
al.Raise(alert, map[string]string{
"message": "failed to get FD limit",
"error": err.Error(),
})
}
if soft < min {
al.Raise(alert, map[string]interface{}{
"message": "soft FD limit is low",
"soft_limit": soft,
"recommended_min": min,
})
}
}
}
// TODO: More things:
// * Space in repo dirs (taking into account mounts)
// * Miner
// * Faulted partitions
// * Low balances
// * Market provider
// * Reachability
// * on-chain config
// * Low memory (maybe)
// * Network / sync issues

View File

@ -30,6 +30,7 @@ import (
"github.com/filecoin-project/lotus/chain/sub"
"github.com/filecoin-project/lotus/chain/types"
"github.com/filecoin-project/lotus/journal"
"github.com/filecoin-project/lotus/journal/fsjournal"
"github.com/filecoin-project/lotus/lib/peermgr"
marketevents "github.com/filecoin-project/lotus/markets/loggers"
"github.com/filecoin-project/lotus/node/hello"
@ -237,7 +238,7 @@ func RandomSchedule(p RandomBeaconParams, _ dtypes.AfterGenesisSet) (beacon.Sche
}
func OpenFilesystemJournal(lr repo.LockedRepo, lc fx.Lifecycle, disabled journal.DisabledEvents) (journal.Journal, error) {
jrnl, err := journal.OpenFSJournal(lr, disabled)
jrnl, err := fsjournal.OpenFSJournal(lr, disabled)
if err != nil {
return nil, err
}