Merge pull request #7108 from filecoin-project/feat/alerting
Simple alert system; FD limit alerts
This commit is contained in:
commit
55dbbf5f0c
@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
|
||||
apitypes "github.com/filecoin-project/lotus/api/types"
|
||||
"github.com/filecoin-project/lotus/journal/alerting"
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
@ -33,6 +34,10 @@ type Common interface {
|
||||
LogList(context.Context) ([]string, error) //perm:write
|
||||
LogSetLevel(context.Context, string, string) error //perm:write
|
||||
|
||||
// LogAlerts returns list of all, active and inactive alerts tracked by the
|
||||
// node
|
||||
LogAlerts(ctx context.Context) ([]alerting.Alert, error) //perm:admin
|
||||
|
||||
// MethodGroup: Common
|
||||
|
||||
// Version provides information about API provider
|
||||
|
@ -24,6 +24,7 @@ import (
|
||||
apitypes "github.com/filecoin-project/lotus/api/types"
|
||||
miner "github.com/filecoin-project/lotus/chain/actors/builtin/miner"
|
||||
types "github.com/filecoin-project/lotus/chain/types"
|
||||
alerting "github.com/filecoin-project/lotus/journal/alerting"
|
||||
marketevents "github.com/filecoin-project/lotus/markets/loggers"
|
||||
dtypes "github.com/filecoin-project/lotus/node/modules/dtypes"
|
||||
imports "github.com/filecoin-project/lotus/node/repo/imports"
|
||||
@ -995,6 +996,21 @@ func (mr *MockFullNodeMockRecorder) ID(arg0 interface{}) *gomock.Call {
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ID", reflect.TypeOf((*MockFullNode)(nil).ID), arg0)
|
||||
}
|
||||
|
||||
// LogAlerts mocks base method.
|
||||
func (m *MockFullNode) LogAlerts(arg0 context.Context) ([]alerting.Alert, error) {
|
||||
m.ctrl.T.Helper()
|
||||
ret := m.ctrl.Call(m, "LogAlerts", arg0)
|
||||
ret0, _ := ret[0].([]alerting.Alert)
|
||||
ret1, _ := ret[1].(error)
|
||||
return ret0, ret1
|
||||
}
|
||||
|
||||
// LogAlerts indicates an expected call of LogAlerts.
|
||||
func (mr *MockFullNodeMockRecorder) LogAlerts(arg0 interface{}) *gomock.Call {
|
||||
mr.mock.ctrl.T.Helper()
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LogAlerts", reflect.TypeOf((*MockFullNode)(nil).LogAlerts), arg0)
|
||||
}
|
||||
|
||||
// LogList mocks base method.
|
||||
func (m *MockFullNode) LogList(arg0 context.Context) ([]string, error) {
|
||||
m.ctrl.T.Helper()
|
||||
|
@ -27,6 +27,7 @@ import (
|
||||
"github.com/filecoin-project/lotus/extern/sector-storage/stores"
|
||||
"github.com/filecoin-project/lotus/extern/sector-storage/storiface"
|
||||
"github.com/filecoin-project/lotus/extern/storage-sealing/sealiface"
|
||||
"github.com/filecoin-project/lotus/journal/alerting"
|
||||
marketevents "github.com/filecoin-project/lotus/markets/loggers"
|
||||
"github.com/filecoin-project/lotus/node/modules/dtypes"
|
||||
"github.com/filecoin-project/lotus/node/repo/imports"
|
||||
@ -63,6 +64,8 @@ type CommonStruct struct {
|
||||
|
||||
Discover func(p0 context.Context) (apitypes.OpenRPCDocument, error) `perm:"read"`
|
||||
|
||||
LogAlerts func(p0 context.Context) ([]alerting.Alert, error) `perm:"admin"`
|
||||
|
||||
LogList func(p0 context.Context) ([]string, error) `perm:"write"`
|
||||
|
||||
LogSetLevel func(p0 context.Context, p1 string, p2 string) error `perm:"write"`
|
||||
@ -946,6 +949,17 @@ func (s *CommonStub) Discover(p0 context.Context) (apitypes.OpenRPCDocument, err
|
||||
return *new(apitypes.OpenRPCDocument), ErrNotSupported
|
||||
}
|
||||
|
||||
func (s *CommonStruct) LogAlerts(p0 context.Context) ([]alerting.Alert, error) {
|
||||
if s.Internal.LogAlerts == nil {
|
||||
return *new([]alerting.Alert), ErrNotSupported
|
||||
}
|
||||
return s.Internal.LogAlerts(p0)
|
||||
}
|
||||
|
||||
func (s *CommonStub) LogAlerts(p0 context.Context) ([]alerting.Alert, error) {
|
||||
return *new([]alerting.Alert), ErrNotSupported
|
||||
}
|
||||
|
||||
func (s *CommonStruct) LogList(p0 context.Context) ([]string, error) {
|
||||
if s.Internal.LogList == nil {
|
||||
return *new([]string), ErrNotSupported
|
||||
|
@ -23,6 +23,7 @@ import (
|
||||
apitypes "github.com/filecoin-project/lotus/api/types"
|
||||
miner "github.com/filecoin-project/lotus/chain/actors/builtin/miner"
|
||||
types "github.com/filecoin-project/lotus/chain/types"
|
||||
alerting "github.com/filecoin-project/lotus/journal/alerting"
|
||||
marketevents "github.com/filecoin-project/lotus/markets/loggers"
|
||||
dtypes "github.com/filecoin-project/lotus/node/modules/dtypes"
|
||||
imports "github.com/filecoin-project/lotus/node/repo/imports"
|
||||
@ -950,6 +951,21 @@ func (mr *MockFullNodeMockRecorder) ID(arg0 interface{}) *gomock.Call {
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ID", reflect.TypeOf((*MockFullNode)(nil).ID), arg0)
|
||||
}
|
||||
|
||||
// LogAlerts mocks base method.
|
||||
func (m *MockFullNode) LogAlerts(arg0 context.Context) ([]alerting.Alert, error) {
|
||||
m.ctrl.T.Helper()
|
||||
ret := m.ctrl.Call(m, "LogAlerts", arg0)
|
||||
ret0, _ := ret[0].([]alerting.Alert)
|
||||
ret1, _ := ret[1].(error)
|
||||
return ret0, ret1
|
||||
}
|
||||
|
||||
// LogAlerts indicates an expected call of LogAlerts.
|
||||
func (mr *MockFullNodeMockRecorder) LogAlerts(arg0 interface{}) *gomock.Call {
|
||||
mr.mock.ctrl.T.Helper()
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LogAlerts", reflect.TypeOf((*MockFullNode)(nil).LogAlerts), arg0)
|
||||
}
|
||||
|
||||
// LogList mocks base method.
|
||||
func (m *MockFullNode) LogList(arg0 context.Context) ([]string, error) {
|
||||
m.ctrl.T.Helper()
|
||||
|
6
build/limits.go
Normal file
6
build/limits.go
Normal file
@ -0,0 +1,6 @@
|
||||
package build
|
||||
|
||||
var (
|
||||
DefaultFDLimit uint64 = 16 << 10
|
||||
MinerFDLimit uint64 = 100_000
|
||||
)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
51
cli/log.go
51
cli/log.go
@ -2,7 +2,9 @@ package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/fatih/color"
|
||||
"github.com/urfave/cli/v2"
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
@ -13,6 +15,7 @@ var LogCmd = &cli.Command{
|
||||
Subcommands: []*cli.Command{
|
||||
LogList,
|
||||
LogSetLevel,
|
||||
LogAlerts,
|
||||
},
|
||||
}
|
||||
|
||||
@ -100,3 +103,51 @@ var LogSetLevel = &cli.Command{
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
var LogAlerts = &cli.Command{
|
||||
Name: "alerts",
|
||||
Usage: "Get alert states",
|
||||
Flags: []cli.Flag{
|
||||
&cli.BoolFlag{
|
||||
Name: "all",
|
||||
Usage: "get all (active and inactive) alerts",
|
||||
},
|
||||
},
|
||||
Action: func(cctx *cli.Context) error {
|
||||
api, closer, err := GetAPI(cctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer closer()
|
||||
|
||||
ctx := ReqContext(cctx)
|
||||
|
||||
alerts, err := api.LogAlerts(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("getting alerts: %w", err)
|
||||
}
|
||||
|
||||
all := cctx.Bool("all")
|
||||
|
||||
for _, alert := range alerts {
|
||||
if !all && !alert.Active {
|
||||
continue
|
||||
}
|
||||
|
||||
active := color.RedString("active ")
|
||||
if !alert.Active {
|
||||
active = color.GreenString("inactive")
|
||||
}
|
||||
|
||||
fmt.Printf("%s %s:%s\n", active, alert.Type.System, alert.Type.Subsystem)
|
||||
if alert.LastResolved != nil {
|
||||
fmt.Printf(" last resolved at %s; reason: %s\n", alert.LastResolved.Time.Truncate(time.Millisecond), alert.LastResolved.Message)
|
||||
}
|
||||
if alert.LastActive != nil {
|
||||
fmt.Printf(" %s %s; reason: %s\n", color.YellowString("last raised at"), alert.LastActive.Time.Truncate(time.Millisecond), alert.LastActive.Message)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
@ -32,6 +32,7 @@ import (
|
||||
"github.com/filecoin-project/lotus/chain/actors/builtin/miner"
|
||||
"github.com/filecoin-project/lotus/chain/types"
|
||||
lcli "github.com/filecoin-project/lotus/cli"
|
||||
"github.com/filecoin-project/lotus/journal/alerting"
|
||||
)
|
||||
|
||||
var infoCmd = &cli.Command{
|
||||
@ -116,6 +117,21 @@ func infoCmdAct(cctx *cli.Context) error {
|
||||
|
||||
fmt.Println()
|
||||
|
||||
alerts, err := minerApi.LogAlerts(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("getting alerts: %w", err)
|
||||
}
|
||||
|
||||
activeAlerts := make([]alerting.Alert, 0)
|
||||
for _, alert := range alerts {
|
||||
if alert.Active {
|
||||
activeAlerts = append(activeAlerts, alert)
|
||||
}
|
||||
}
|
||||
if len(activeAlerts) > 0 {
|
||||
fmt.Printf("%s (check %s)\n", color.RedString("⚠ %d Active alerts", len(activeAlerts)), color.YellowString("lotus-miner log alerts"))
|
||||
}
|
||||
|
||||
err = handleMiningInfo(ctx, cctx, fullapi, minerApi)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -51,6 +51,7 @@ import (
|
||||
sealing "github.com/filecoin-project/lotus/extern/storage-sealing"
|
||||
"github.com/filecoin-project/lotus/genesis"
|
||||
"github.com/filecoin-project/lotus/journal"
|
||||
"github.com/filecoin-project/lotus/journal/fsjournal"
|
||||
storageminer "github.com/filecoin-project/lotus/miner"
|
||||
"github.com/filecoin-project/lotus/node/modules"
|
||||
"github.com/filecoin-project/lotus/node/modules/dtypes"
|
||||
@ -479,7 +480,7 @@ func storageMinerInit(ctx context.Context, cctx *cli.Context, api v1api.FullNode
|
||||
return err
|
||||
}
|
||||
|
||||
j, err := journal.OpenFSJournal(lr, journal.EnvDisabledEvents())
|
||||
j, err := fsjournal.OpenFSJournal(lr, journal.EnvDisabledEvents())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open filesystem journal: %w", err)
|
||||
}
|
||||
|
@ -37,6 +37,7 @@ import (
|
||||
lcli "github.com/filecoin-project/lotus/cli"
|
||||
"github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper"
|
||||
"github.com/filecoin-project/lotus/journal"
|
||||
"github.com/filecoin-project/lotus/journal/fsjournal"
|
||||
"github.com/filecoin-project/lotus/lib/peermgr"
|
||||
"github.com/filecoin-project/lotus/lib/ulimit"
|
||||
"github.com/filecoin-project/lotus/metrics"
|
||||
@ -476,7 +477,7 @@ func ImportChain(ctx context.Context, r repo.Repo, fname string, snapshot bool)
|
||||
return err
|
||||
}
|
||||
|
||||
j, err := journal.OpenFSJournal(lr, journal.EnvDisabledEvents())
|
||||
j, err := fsjournal.OpenFSJournal(lr, journal.EnvDisabledEvents())
|
||||
if err != nil {
|
||||
return xerrors.Errorf("failed to open journal: %w", err)
|
||||
}
|
||||
|
@ -44,6 +44,7 @@
|
||||
* [I](#I)
|
||||
* [ID](#ID)
|
||||
* [Log](#Log)
|
||||
* [LogAlerts](#LogAlerts)
|
||||
* [LogList](#LogList)
|
||||
* [LogSetLevel](#LogSetLevel)
|
||||
* [Market](#Market)
|
||||
@ -664,6 +665,15 @@ Response: `"12D3KooWGzxzKZYveHXtpG6AsrUJBcWxHBFS2HsEoGTxrMLvKXtf"`
|
||||
## Log
|
||||
|
||||
|
||||
### LogAlerts
|
||||
|
||||
|
||||
Perms: admin
|
||||
|
||||
Inputs: `null`
|
||||
|
||||
Response: `null`
|
||||
|
||||
### LogList
|
||||
|
||||
|
||||
|
@ -71,6 +71,7 @@
|
||||
* [I](#I)
|
||||
* [ID](#ID)
|
||||
* [Log](#Log)
|
||||
* [LogAlerts](#LogAlerts)
|
||||
* [LogList](#LogList)
|
||||
* [LogSetLevel](#LogSetLevel)
|
||||
* [Market](#Market)
|
||||
@ -1816,6 +1817,15 @@ Response: `"12D3KooWGzxzKZYveHXtpG6AsrUJBcWxHBFS2HsEoGTxrMLvKXtf"`
|
||||
## Log
|
||||
|
||||
|
||||
### LogAlerts
|
||||
|
||||
|
||||
Perms: admin
|
||||
|
||||
Inputs: `null`
|
||||
|
||||
Response: `null`
|
||||
|
||||
### LogList
|
||||
|
||||
|
||||
|
@ -74,6 +74,7 @@
|
||||
* [I](#I)
|
||||
* [ID](#ID)
|
||||
* [Log](#Log)
|
||||
* [LogAlerts](#LogAlerts)
|
||||
* [LogList](#LogList)
|
||||
* [LogSetLevel](#LogSetLevel)
|
||||
* [Market](#Market)
|
||||
@ -1880,6 +1881,15 @@ Response: `"12D3KooWGzxzKZYveHXtpG6AsrUJBcWxHBFS2HsEoGTxrMLvKXtf"`
|
||||
## Log
|
||||
|
||||
|
||||
### LogAlerts
|
||||
|
||||
|
||||
Perms: admin
|
||||
|
||||
Inputs: `null`
|
||||
|
||||
Response: `null`
|
||||
|
||||
### LogList
|
||||
|
||||
|
||||
|
@ -506,6 +506,7 @@ USAGE:
|
||||
COMMANDS:
|
||||
list List log systems
|
||||
set-level Set log level
|
||||
alerts Get alert states
|
||||
help, h Shows a list of commands or help for one command
|
||||
|
||||
OPTIONS:
|
||||
@ -561,6 +562,20 @@ OPTIONS:
|
||||
|
||||
```
|
||||
|
||||
### lotus-miner log alerts
|
||||
```
|
||||
NAME:
|
||||
lotus-miner log alerts - Get alert states
|
||||
|
||||
USAGE:
|
||||
lotus-miner log alerts [command options] [arguments...]
|
||||
|
||||
OPTIONS:
|
||||
--all get all (active and inactive) alerts (default: false)
|
||||
--help, -h show help (default: false)
|
||||
|
||||
```
|
||||
|
||||
## lotus-miner wait-api
|
||||
```
|
||||
NAME:
|
||||
|
@ -2366,6 +2366,7 @@ USAGE:
|
||||
COMMANDS:
|
||||
list List log systems
|
||||
set-level Set log level
|
||||
alerts Get alert states
|
||||
help, h Shows a list of commands or help for one command
|
||||
|
||||
OPTIONS:
|
||||
@ -2421,6 +2422,20 @@ OPTIONS:
|
||||
|
||||
```
|
||||
|
||||
### lotus log alerts
|
||||
```
|
||||
NAME:
|
||||
lotus log alerts - Get alert states
|
||||
|
||||
USAGE:
|
||||
lotus log alerts [command options] [arguments...]
|
||||
|
||||
OPTIONS:
|
||||
--all get all (active and inactive) alerts (default: false)
|
||||
--help, -h show help (default: false)
|
||||
|
||||
```
|
||||
|
||||
## lotus wait-api
|
||||
```
|
||||
NAME:
|
||||
|
161
journal/alerting/alerts.go
Normal file
161
journal/alerting/alerts.go
Normal file
@ -0,0 +1,161 @@
|
||||
package alerting
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/filecoin-project/lotus/journal"
|
||||
logging "github.com/ipfs/go-log/v2"
|
||||
)
|
||||
|
||||
var log = logging.Logger("alerting")
|
||||
|
||||
// Alerting provides simple stateful alert system. Consumers can register alerts,
|
||||
// which can be raised and resolved.
|
||||
//
|
||||
// When an alert is raised or resolved, a related journal entry is recorded.
|
||||
type Alerting struct {
|
||||
j journal.Journal
|
||||
|
||||
lk sync.Mutex
|
||||
alerts map[AlertType]Alert
|
||||
}
|
||||
|
||||
// AlertType is a unique alert identifier
|
||||
type AlertType struct {
|
||||
System, Subsystem string
|
||||
}
|
||||
|
||||
// AlertEvent contains information about alert state transition
|
||||
type AlertEvent struct {
|
||||
Type string // either 'raised' or 'resolved'
|
||||
Message json.RawMessage
|
||||
Time time.Time
|
||||
}
|
||||
|
||||
type Alert struct {
|
||||
Type AlertType
|
||||
Active bool
|
||||
|
||||
LastActive *AlertEvent // NOTE: pointer for nullability, don't mutate the referenced object!
|
||||
LastResolved *AlertEvent
|
||||
|
||||
journalType journal.EventType
|
||||
}
|
||||
|
||||
func NewAlertingSystem(j journal.Journal) *Alerting {
|
||||
return &Alerting{
|
||||
j: j,
|
||||
|
||||
alerts: map[AlertType]Alert{},
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Alerting) AddAlertType(system, subsystem string) AlertType {
|
||||
a.lk.Lock()
|
||||
defer a.lk.Unlock()
|
||||
|
||||
at := AlertType{
|
||||
System: system,
|
||||
Subsystem: subsystem,
|
||||
}
|
||||
|
||||
if _, exists := a.alerts[at]; exists {
|
||||
return at
|
||||
}
|
||||
|
||||
et := a.j.RegisterEventType(system, subsystem)
|
||||
|
||||
a.alerts[at] = Alert{
|
||||
Type: at,
|
||||
Active: false,
|
||||
journalType: et,
|
||||
}
|
||||
|
||||
return at
|
||||
}
|
||||
|
||||
func (a *Alerting) update(at AlertType, message interface{}, upd func(Alert, json.RawMessage) Alert) {
|
||||
a.lk.Lock()
|
||||
defer a.lk.Unlock()
|
||||
|
||||
alert, ok := a.alerts[at]
|
||||
if !ok {
|
||||
log.Errorw("unknown alert", "type", at, "message", message)
|
||||
}
|
||||
|
||||
rawMsg, err := json.Marshal(message)
|
||||
if err != nil {
|
||||
log.Errorw("marshaling alert message failed", "type", at, "error", err)
|
||||
rawMsg, err = json.Marshal(&struct {
|
||||
AlertError string
|
||||
}{
|
||||
AlertError: err.Error(),
|
||||
})
|
||||
log.Errorw("marshaling marshaling error failed", "type", at, "error", err)
|
||||
}
|
||||
|
||||
a.alerts[at] = upd(alert, rawMsg)
|
||||
}
|
||||
|
||||
// Raise marks the alert condition as active and records related event in the journal
|
||||
func (a *Alerting) Raise(at AlertType, message interface{}) {
|
||||
log.Errorw("alert raised", "type", at, "message", message)
|
||||
|
||||
a.update(at, message, func(alert Alert, rawMsg json.RawMessage) Alert {
|
||||
alert.Active = true
|
||||
alert.LastActive = &AlertEvent{
|
||||
Type: "raised",
|
||||
Message: rawMsg,
|
||||
Time: time.Now(),
|
||||
}
|
||||
|
||||
a.j.RecordEvent(alert.journalType, func() interface{} {
|
||||
return alert.LastActive
|
||||
})
|
||||
|
||||
return alert
|
||||
})
|
||||
}
|
||||
|
||||
// Resolve marks the alert condition as resolved and records related event in the journal
|
||||
func (a *Alerting) Resolve(at AlertType, message interface{}) {
|
||||
log.Errorw("alert resolved", "type", at, "message", message)
|
||||
|
||||
a.update(at, message, func(alert Alert, rawMsg json.RawMessage) Alert {
|
||||
alert.Active = false
|
||||
alert.LastResolved = &AlertEvent{
|
||||
Type: "resolved",
|
||||
Message: rawMsg,
|
||||
Time: time.Now(),
|
||||
}
|
||||
|
||||
a.j.RecordEvent(alert.journalType, func() interface{} {
|
||||
return alert.LastResolved
|
||||
})
|
||||
|
||||
return alert
|
||||
})
|
||||
}
|
||||
|
||||
// GetAlerts returns all registered (active and inactive) alerts
|
||||
func (a *Alerting) GetAlerts() []Alert {
|
||||
a.lk.Lock()
|
||||
defer a.lk.Unlock()
|
||||
|
||||
out := make([]Alert, 0, len(a.alerts))
|
||||
for _, alert := range a.alerts {
|
||||
out = append(out, alert)
|
||||
}
|
||||
sort.Slice(out, func(i, j int) bool {
|
||||
if out[i].Type.System != out[j].Type.System {
|
||||
return out[i].Type.System < out[j].Type.System
|
||||
}
|
||||
|
||||
return out[i].Type.Subsystem < out[j].Type.Subsystem
|
||||
})
|
||||
|
||||
return out
|
||||
}
|
61
journal/alerting/alerts_test.go
Normal file
61
journal/alerting/alerts_test.go
Normal file
@ -0,0 +1,61 @@
|
||||
package alerting
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/golang/mock/gomock"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/filecoin-project/lotus/journal"
|
||||
"github.com/filecoin-project/lotus/journal/mockjournal"
|
||||
)
|
||||
|
||||
func TestAlerting(t *testing.T) {
|
||||
mockCtrl := gomock.NewController(t)
|
||||
defer mockCtrl.Finish()
|
||||
j := mockjournal.NewMockJournal(mockCtrl)
|
||||
|
||||
a := NewAlertingSystem(j)
|
||||
|
||||
j.EXPECT().RegisterEventType("s1", "b1").Return(journal.EventType{System: "s1", Event: "b1"})
|
||||
al1 := a.AddAlertType("s1", "b1")
|
||||
|
||||
j.EXPECT().RegisterEventType("s2", "b2").Return(journal.EventType{System: "s2", Event: "b2"})
|
||||
al2 := a.AddAlertType("s2", "b2")
|
||||
|
||||
l := a.GetAlerts()
|
||||
require.Len(t, l, 2)
|
||||
require.Equal(t, al1, l[0].Type)
|
||||
require.Equal(t, al2, l[1].Type)
|
||||
|
||||
for _, alert := range l {
|
||||
require.False(t, alert.Active)
|
||||
require.Nil(t, alert.LastActive)
|
||||
require.Nil(t, alert.LastResolved)
|
||||
}
|
||||
|
||||
j.EXPECT().RecordEvent(a.alerts[al1].journalType, gomock.Any())
|
||||
a.Raise(al1, "test")
|
||||
|
||||
for _, alert := range l { // check for no magic mutations
|
||||
require.False(t, alert.Active)
|
||||
require.Nil(t, alert.LastActive)
|
||||
require.Nil(t, alert.LastResolved)
|
||||
}
|
||||
|
||||
l = a.GetAlerts()
|
||||
require.Len(t, l, 2)
|
||||
require.Equal(t, al1, l[0].Type)
|
||||
require.Equal(t, al2, l[1].Type)
|
||||
|
||||
require.True(t, l[0].Active)
|
||||
require.NotNil(t, l[0].LastActive)
|
||||
require.Equal(t, "raised", l[0].LastActive.Type)
|
||||
require.Equal(t, json.RawMessage(`"test"`), l[0].LastActive.Message)
|
||||
require.Nil(t, l[0].LastResolved)
|
||||
|
||||
require.False(t, l[1].Active)
|
||||
require.Nil(t, l[1].LastActive)
|
||||
require.Nil(t, l[1].LastResolved)
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package journal
|
||||
package fsjournal
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
@ -6,17 +6,21 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
logging "github.com/ipfs/go-log/v2"
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
"github.com/filecoin-project/lotus/build"
|
||||
"github.com/filecoin-project/lotus/journal"
|
||||
"github.com/filecoin-project/lotus/node/repo"
|
||||
)
|
||||
|
||||
var log = logging.Logger("fsjournal")
|
||||
|
||||
const RFC3339nocolon = "2006-01-02T150405Z0700"
|
||||
|
||||
// fsJournal is a basic journal backed by files on a filesystem.
|
||||
type fsJournal struct {
|
||||
EventTypeRegistry
|
||||
journal.EventTypeRegistry
|
||||
|
||||
dir string
|
||||
sizeLimit int64
|
||||
@ -24,7 +28,7 @@ type fsJournal struct {
|
||||
fi *os.File
|
||||
fSize int64
|
||||
|
||||
incoming chan *Event
|
||||
incoming chan *journal.Event
|
||||
|
||||
closing chan struct{}
|
||||
closed chan struct{}
|
||||
@ -32,17 +36,17 @@ type fsJournal struct {
|
||||
|
||||
// OpenFSJournal constructs a rolling filesystem journal, with a default
|
||||
// per-file size limit of 1GiB.
|
||||
func OpenFSJournal(lr repo.LockedRepo, disabled DisabledEvents) (Journal, error) {
|
||||
func OpenFSJournal(lr repo.LockedRepo, disabled journal.DisabledEvents) (journal.Journal, error) {
|
||||
dir := filepath.Join(lr.Path(), "journal")
|
||||
if err := os.MkdirAll(dir, 0755); err != nil {
|
||||
return nil, fmt.Errorf("failed to mk directory %s for file journal: %w", dir, err)
|
||||
}
|
||||
|
||||
f := &fsJournal{
|
||||
EventTypeRegistry: NewEventTypeRegistry(disabled),
|
||||
EventTypeRegistry: journal.NewEventTypeRegistry(disabled),
|
||||
dir: dir,
|
||||
sizeLimit: 1 << 30,
|
||||
incoming: make(chan *Event, 32),
|
||||
incoming: make(chan *journal.Event, 32),
|
||||
closing: make(chan struct{}),
|
||||
closed: make(chan struct{}),
|
||||
}
|
||||
@ -56,7 +60,7 @@ func OpenFSJournal(lr repo.LockedRepo, disabled DisabledEvents) (Journal, error)
|
||||
return f, nil
|
||||
}
|
||||
|
||||
func (f *fsJournal) RecordEvent(evtType EventType, supplier func() interface{}) {
|
||||
func (f *fsJournal) RecordEvent(evtType journal.EventType, supplier func() interface{}) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
log.Warnf("recovered from panic while recording journal event; type=%s, err=%v", evtType, r)
|
||||
@ -67,7 +71,7 @@ func (f *fsJournal) RecordEvent(evtType EventType, supplier func() interface{})
|
||||
return
|
||||
}
|
||||
|
||||
je := &Event{
|
||||
je := &journal.Event{
|
||||
EventType: evtType,
|
||||
Timestamp: build.Clock.Now(),
|
||||
Data: supplier(),
|
||||
@ -85,7 +89,7 @@ func (f *fsJournal) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *fsJournal) putEvent(evt *Event) error {
|
||||
func (f *fsJournal) putEvent(evt *journal.Event) error {
|
||||
b, err := json.Marshal(evt)
|
||||
if err != nil {
|
||||
return err
|
75
journal/mockjournal/journal.go
Normal file
75
journal/mockjournal/journal.go
Normal file
@ -0,0 +1,75 @@
|
||||
// Code generated by MockGen. DO NOT EDIT.
|
||||
// Source: github.com/filecoin-project/lotus/journal (interfaces: Journal)
|
||||
|
||||
// Package mockjournal is a generated GoMock package.
|
||||
package mockjournal
|
||||
|
||||
import (
|
||||
reflect "reflect"
|
||||
|
||||
journal "github.com/filecoin-project/lotus/journal"
|
||||
gomock "github.com/golang/mock/gomock"
|
||||
)
|
||||
|
||||
// MockJournal is a mock of Journal interface.
|
||||
type MockJournal struct {
|
||||
ctrl *gomock.Controller
|
||||
recorder *MockJournalMockRecorder
|
||||
}
|
||||
|
||||
// MockJournalMockRecorder is the mock recorder for MockJournal.
|
||||
type MockJournalMockRecorder struct {
|
||||
mock *MockJournal
|
||||
}
|
||||
|
||||
// NewMockJournal creates a new mock instance.
|
||||
func NewMockJournal(ctrl *gomock.Controller) *MockJournal {
|
||||
mock := &MockJournal{ctrl: ctrl}
|
||||
mock.recorder = &MockJournalMockRecorder{mock}
|
||||
return mock
|
||||
}
|
||||
|
||||
// EXPECT returns an object that allows the caller to indicate expected use.
|
||||
func (m *MockJournal) EXPECT() *MockJournalMockRecorder {
|
||||
return m.recorder
|
||||
}
|
||||
|
||||
// Close mocks base method.
|
||||
func (m *MockJournal) Close() error {
|
||||
m.ctrl.T.Helper()
|
||||
ret := m.ctrl.Call(m, "Close")
|
||||
ret0, _ := ret[0].(error)
|
||||
return ret0
|
||||
}
|
||||
|
||||
// Close indicates an expected call of Close.
|
||||
func (mr *MockJournalMockRecorder) Close() *gomock.Call {
|
||||
mr.mock.ctrl.T.Helper()
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Close", reflect.TypeOf((*MockJournal)(nil).Close))
|
||||
}
|
||||
|
||||
// RecordEvent mocks base method.
|
||||
func (m *MockJournal) RecordEvent(arg0 journal.EventType, arg1 func() interface{}) {
|
||||
m.ctrl.T.Helper()
|
||||
m.ctrl.Call(m, "RecordEvent", arg0, arg1)
|
||||
}
|
||||
|
||||
// RecordEvent indicates an expected call of RecordEvent.
|
||||
func (mr *MockJournalMockRecorder) RecordEvent(arg0, arg1 interface{}) *gomock.Call {
|
||||
mr.mock.ctrl.T.Helper()
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RecordEvent", reflect.TypeOf((*MockJournal)(nil).RecordEvent), arg0, arg1)
|
||||
}
|
||||
|
||||
// RegisterEventType mocks base method.
|
||||
func (m *MockJournal) RegisterEventType(arg0, arg1 string) journal.EventType {
|
||||
m.ctrl.T.Helper()
|
||||
ret := m.ctrl.Call(m, "RegisterEventType", arg0, arg1)
|
||||
ret0, _ := ret[0].(journal.EventType)
|
||||
return ret0
|
||||
}
|
||||
|
||||
// RegisterEventType indicates an expected call of RegisterEventType.
|
||||
func (mr *MockJournalMockRecorder) RegisterEventType(arg0, arg1 interface{}) *gomock.Call {
|
||||
mr.mock.ctrl.T.Helper()
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RegisterEventType", reflect.TypeOf((*MockJournal)(nil).RegisterEventType), arg0, arg1)
|
||||
}
|
@ -4,12 +4,8 @@ import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
logging "github.com/ipfs/go-log/v2"
|
||||
)
|
||||
|
||||
var log = logging.Logger("journal")
|
||||
|
||||
var (
|
||||
// DefaultDisabledEvents lists the journal events disabled by
|
||||
// default, usually because they are considered noisy.
|
||||
@ -69,6 +65,8 @@ func (et EventType) Enabled() bool {
|
||||
return et.safe && et.enabled
|
||||
}
|
||||
|
||||
//go:generate go run github.com/golang/mock/mockgen -destination=mockjournal/journal.go -package=mockjournal . Journal
|
||||
|
||||
// Journal represents an audit trail of system actions.
|
||||
//
|
||||
// Every entry is tagged with a timestamp, a system name, and an event name.
|
||||
|
@ -3,11 +3,14 @@ package ulimit
|
||||
// from go-ipfs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"syscall"
|
||||
|
||||
"github.com/filecoin-project/lotus/build"
|
||||
|
||||
logging "github.com/ipfs/go-log/v2"
|
||||
)
|
||||
|
||||
@ -25,8 +28,15 @@ var (
|
||||
// minimum file descriptor limit before we complain
|
||||
const minFds = 2048
|
||||
|
||||
// default max file descriptor limit.
|
||||
const maxFds = 16 << 10
|
||||
var ErrUnsupported = errors.New("unsupported")
|
||||
|
||||
func GetLimit() (uint64, uint64, error) {
|
||||
if getLimit == nil {
|
||||
return 0, 0, ErrUnsupported
|
||||
}
|
||||
|
||||
return getLimit()
|
||||
}
|
||||
|
||||
// userMaxFDs returns the value of LOTUS_FD_MAX
|
||||
func userMaxFDs() uint64 {
|
||||
@ -55,13 +65,13 @@ func ManageFdLimit() (changed bool, newLimit uint64, err error) {
|
||||
return false, 0, nil
|
||||
}
|
||||
|
||||
targetLimit := uint64(maxFds)
|
||||
targetLimit := build.DefaultFDLimit
|
||||
userLimit := userMaxFDs()
|
||||
if userLimit > 0 {
|
||||
targetLimit = userLimit
|
||||
}
|
||||
|
||||
soft, hard, err := getLimit()
|
||||
soft, hard, err := GetLimit()
|
||||
if err != nil {
|
||||
return false, 0, err
|
||||
}
|
||||
|
@ -8,6 +8,8 @@ import (
|
||||
"strings"
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"github.com/filecoin-project/lotus/build"
|
||||
)
|
||||
|
||||
func TestManageFdLimit(t *testing.T) {
|
||||
@ -16,7 +18,7 @@ func TestManageFdLimit(t *testing.T) {
|
||||
t.Errorf("Cannot manage file descriptors")
|
||||
}
|
||||
|
||||
if maxFds != uint64(16<<10) {
|
||||
if build.DefaultFDLimit != uint64(16<<10) {
|
||||
t.Errorf("Maximum file descriptors default value changed")
|
||||
}
|
||||
}
|
||||
|
@ -28,10 +28,12 @@ import (
|
||||
"go.uber.org/fx"
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
"github.com/filecoin-project/lotus/build"
|
||||
"github.com/filecoin-project/lotus/chain/beacon"
|
||||
"github.com/filecoin-project/lotus/chain/types"
|
||||
"github.com/filecoin-project/lotus/extern/sector-storage/stores"
|
||||
"github.com/filecoin-project/lotus/journal"
|
||||
"github.com/filecoin-project/lotus/journal/alerting"
|
||||
"github.com/filecoin-project/lotus/lib/peermgr"
|
||||
_ "github.com/filecoin-project/lotus/lib/sigs/bls"
|
||||
_ "github.com/filecoin-project/lotus/lib/sigs/secp"
|
||||
@ -82,6 +84,9 @@ const (
|
||||
// System processes.
|
||||
InitMemoryWatchdog
|
||||
|
||||
// health checks
|
||||
CheckFDLimit
|
||||
|
||||
// libp2p
|
||||
PstoreAddSelfKeysKey
|
||||
StartListeningKey
|
||||
@ -146,6 +151,9 @@ func defaults() []Option {
|
||||
// global system journal.
|
||||
Override(new(journal.DisabledEvents), journal.EnvDisabledEvents),
|
||||
Override(new(journal.Journal), modules.OpenFilesystemJournal),
|
||||
Override(new(*alerting.Alerting), alerting.NewAlertingSystem),
|
||||
|
||||
Override(CheckFDLimit, modules.CheckFdLimit(build.DefaultFDLimit)),
|
||||
|
||||
Override(new(system.MemoryConstraints), modules.MemoryConstraints),
|
||||
Override(InitMemoryWatchdog, modules.MemoryWatchdog),
|
||||
|
@ -15,6 +15,7 @@ import (
|
||||
storage2 "github.com/filecoin-project/specs-storage/storage"
|
||||
|
||||
"github.com/filecoin-project/lotus/api"
|
||||
"github.com/filecoin-project/lotus/build"
|
||||
"github.com/filecoin-project/lotus/chain/gen"
|
||||
"github.com/filecoin-project/lotus/chain/gen/slashfilter"
|
||||
sectorstorage "github.com/filecoin-project/lotus/extern/sector-storage"
|
||||
@ -74,6 +75,8 @@ func ConfigStorageMiner(c interface{}) Option {
|
||||
return Options(
|
||||
ConfigCommon(&cfg.Common, enableLibp2pNode),
|
||||
|
||||
Override(CheckFDLimit, modules.CheckFdLimit(build.MinerFDLimit)), // recommend at least 100k FD limit to miners
|
||||
|
||||
Override(new(api.MinerSubsystems), modules.ExtractEnabledMinerSubsystems(cfg.Subsystems)),
|
||||
Override(new(stores.LocalStorage), From(new(repo.LockedRepo))),
|
||||
Override(new(*stores.Local), modules.LocalStorage),
|
||||
|
@ -10,9 +10,11 @@ import (
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
"github.com/filecoin-project/go-jsonrpc/auth"
|
||||
|
||||
"github.com/filecoin-project/lotus/api"
|
||||
apitypes "github.com/filecoin-project/lotus/api/types"
|
||||
"github.com/filecoin-project/lotus/build"
|
||||
"github.com/filecoin-project/lotus/journal/alerting"
|
||||
"github.com/filecoin-project/lotus/node/modules/dtypes"
|
||||
)
|
||||
|
||||
@ -21,6 +23,7 @@ var session = uuid.New()
|
||||
type CommonAPI struct {
|
||||
fx.In
|
||||
|
||||
Alerting *alerting.Alerting
|
||||
APISecret *dtypes.APIAlg
|
||||
ShutdownChan dtypes.ShutdownChan
|
||||
}
|
||||
@ -72,6 +75,10 @@ func (a *CommonAPI) LogSetLevel(ctx context.Context, subsystem, level string) er
|
||||
return logging.SetLogLevel(subsystem, level)
|
||||
}
|
||||
|
||||
func (a *CommonAPI) LogAlerts(ctx context.Context) ([]alerting.Alert, error) {
|
||||
return a.Alerting.GetAlerts(), nil
|
||||
}
|
||||
|
||||
func (a *CommonAPI) Shutdown(ctx context.Context) error {
|
||||
a.ShutdownChan <- struct{}{}
|
||||
return nil
|
||||
|
44
node/modules/alerts.go
Normal file
44
node/modules/alerts.go
Normal file
@ -0,0 +1,44 @@
|
||||
package modules
|
||||
|
||||
import (
|
||||
"github.com/filecoin-project/lotus/journal/alerting"
|
||||
"github.com/filecoin-project/lotus/lib/ulimit"
|
||||
)
|
||||
|
||||
func CheckFdLimit(min uint64) func(al *alerting.Alerting) {
|
||||
return func(al *alerting.Alerting) {
|
||||
soft, _, err := ulimit.GetLimit()
|
||||
|
||||
if err == ulimit.ErrUnsupported {
|
||||
log.Warn("FD limit monitoring not available")
|
||||
return
|
||||
}
|
||||
|
||||
alert := al.AddAlertType("process", "fd-limit")
|
||||
if err != nil {
|
||||
al.Raise(alert, map[string]string{
|
||||
"message": "failed to get FD limit",
|
||||
"error": err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
if soft < min {
|
||||
al.Raise(alert, map[string]interface{}{
|
||||
"message": "soft FD limit is low",
|
||||
"soft_limit": soft,
|
||||
"recommended_min": min,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: More things:
|
||||
// * Space in repo dirs (taking into account mounts)
|
||||
// * Miner
|
||||
// * Faulted partitions
|
||||
// * Low balances
|
||||
// * Market provider
|
||||
// * Reachability
|
||||
// * on-chain config
|
||||
// * Low memory (maybe)
|
||||
// * Network / sync issues
|
@ -30,6 +30,7 @@ import (
|
||||
"github.com/filecoin-project/lotus/chain/sub"
|
||||
"github.com/filecoin-project/lotus/chain/types"
|
||||
"github.com/filecoin-project/lotus/journal"
|
||||
"github.com/filecoin-project/lotus/journal/fsjournal"
|
||||
"github.com/filecoin-project/lotus/lib/peermgr"
|
||||
marketevents "github.com/filecoin-project/lotus/markets/loggers"
|
||||
"github.com/filecoin-project/lotus/node/hello"
|
||||
@ -237,7 +238,7 @@ func RandomSchedule(p RandomBeaconParams, _ dtypes.AfterGenesisSet) (beacon.Sche
|
||||
}
|
||||
|
||||
func OpenFilesystemJournal(lr repo.LockedRepo, lc fx.Lifecycle, disabled journal.DisabledEvents) (journal.Journal, error) {
|
||||
jrnl, err := journal.OpenFSJournal(lr, disabled)
|
||||
jrnl, err := fsjournal.OpenFSJournal(lr, disabled)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user