From b534ab9d3c8aa8f4a4f86e91cd089465701118ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Tue, 11 Aug 2020 14:28:00 +0100 Subject: [PATCH] panic recovery in MaybeRecordEvent; handle nil current tipsets in wdpost. --- journal/fs.go | 3 --- journal/types.go | 16 ++++++++++++++++ miner/journal_events.go | 1 - storage/wdpost_run.go | 36 ++++++++++++------------------------ storage/wdpost_sched.go | 21 +++++++++++++++------ 5 files changed, 43 insertions(+), 34 deletions(-) delete mode 100644 miner/journal_events.go diff --git a/journal/fs.go b/journal/fs.go index 91b0d1e6d..73a2fff43 100644 --- a/journal/fs.go +++ b/journal/fs.go @@ -8,15 +8,12 @@ import ( "sync" "time" - logging "github.com/ipfs/go-log" "golang.org/x/xerrors" "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/node/repo" ) -var log = logging.Logger("journal") - // fsJournal is a basic journal backed by files on a filesystem. type fsJournal struct { EventTypeFactory diff --git a/journal/types.go b/journal/types.go index 6b6d53c08..6183e6cfa 100644 --- a/journal/types.go +++ b/journal/types.go @@ -3,8 +3,12 @@ package journal import ( "sync" "time" + + logging "github.com/ipfs/go-log" ) +var log = logging.Logger("journal") + var ( // DefaultDisabledEvents lists the journal events disabled by // default, usually because they are considered noisy. @@ -30,6 +34,10 @@ type EventType struct { safe bool } +func (et EventType) String() string { + return et.System + ":" + et.Event +} + // Enabled returns whether this event type is enabled in the journaling // subsystem. Users are advised to check this before actually attempting to // add a journal entry, as it helps bypass object construction for events that @@ -84,9 +92,17 @@ type Event struct { // enabled, and if so, it calls the supplier to create the event and // subsequently journal.RecordEvent on the provided journal to record it. // +// It also recovers from panics raised when calling the supplier function. +// // This is safe to call with a nil Journal, either because the value is nil, // or because a journal obtained through NilJournal() is in use. func MaybeRecordEvent(journal Journal, evtType EventType, supplier func() interface{}) { + defer func() { + if r := recover(); r != nil { + log.Warnf("recovered from panic while recording journal event; type=%s, err=%v", evtType, r) + } + }() + if journal == nil || journal == nilj { return } diff --git a/miner/journal_events.go b/miner/journal_events.go deleted file mode 100644 index ab865910b..000000000 --- a/miner/journal_events.go +++ /dev/null @@ -1 +0,0 @@ -package miner diff --git a/storage/wdpost_run.go b/storage/wdpost_run.go index 6750042cf..0ec4ac1f6 100644 --- a/storage/wdpost_run.go +++ b/storage/wdpost_run.go @@ -27,13 +27,11 @@ var errNoPartitions = errors.New("no partitions") func (s *WindowPoStScheduler) failPost(err error, deadline *miner.DeadlineInfo) { journal.MaybeRecordEvent(s.jrnl, s.wdPoStEvtType, func() interface{} { - return WindowPoStEvt{ + return s.enrichWithTipset(WindowPoStEvt{ State: "failed", Deadline: s.activeDeadline, - Height: s.cur.Height(), - TipSet: s.cur.Cids(), Error: err, - } + }) }) log.Errorf("TODO") @@ -51,12 +49,10 @@ func (s *WindowPoStScheduler) doPost(ctx context.Context, deadline *miner.Deadli s.activeDeadline = deadline journal.MaybeRecordEvent(s.jrnl, s.wdPoStEvtType, func() interface{} { - return WindowPoStEvt{ + return s.enrichWithTipset(WindowPoStEvt{ State: "started", Deadline: s.activeDeadline, - Height: s.cur.Height(), - TipSet: s.cur.Cids(), - } + }) }) go func() { @@ -82,12 +78,10 @@ func (s *WindowPoStScheduler) doPost(ctx context.Context, deadline *miner.Deadli } journal.MaybeRecordEvent(s.jrnl, s.wdPoStEvtType, func() interface{} { - return WindowPoStEvt{ + return s.enrichWithTipset(WindowPoStEvt{ State: "succeeded", Deadline: s.activeDeadline, - Height: s.cur.Height(), - TipSet: s.cur.Cids(), - } + }) }) }() } @@ -153,16 +147,14 @@ func (s *WindowPoStScheduler) checkNextRecoveries(ctx context.Context, dlIdx uin mcid = sm.Cid() } - return WindowPoStEvt{ + return s.enrichWithTipset(WindowPoStEvt{ State: "recoveries_processed", Deadline: s.activeDeadline, - Height: s.cur.Height(), - TipSet: s.cur.Cids(), Recoveries: &WindowPoStEvt_Recoveries{ Declarations: params.Recoveries, MessageCID: mcid, }, - } + }) }) for partIdx, partition := range partitions { @@ -260,16 +252,14 @@ func (s *WindowPoStScheduler) checkNextFaults(ctx context.Context, dlIdx uint64, if sm != nil { mcid = sm.Cid() } - return WindowPoStEvt{ + return s.enrichWithTipset(WindowPoStEvt{ State: "faults_processed", Deadline: s.activeDeadline, - Height: s.cur.Height(), - TipSet: s.cur.Cids(), Faults: &WindowPoStEvt_Faults{ Declarations: params.Faults, MessageCID: mcid, }, - } + }) }) for partIdx, partition := range partitions { @@ -517,16 +507,14 @@ func (s *WindowPoStScheduler) submitPost(ctx context.Context, proof *miner.Submi mcid = sm.Cid() } - return WindowPoStEvt{ + return s.enrichWithTipset(WindowPoStEvt{ State: "proofs_processed", Deadline: s.activeDeadline, - Height: s.cur.Height(), - TipSet: s.cur.Cids(), Proofs: &WindowPoStEvt_Proofs{ Partitions: proof.Partitions, MessageCID: mcid, }, - } + }) }) enc, aerr := actors.SerializeParams(proof) diff --git a/storage/wdpost_sched.go b/storage/wdpost_sched.go index 8344097dc..2c55b93ef 100644 --- a/storage/wdpost_sched.go +++ b/storage/wdpost_sched.go @@ -140,12 +140,13 @@ func (s *WindowPoStScheduler) Run(ctx context.Context) { log.Errorf("expected first notif to have len = 1") continue } - if changes[0].Type != store.HCCurrent { + chg := changes[0] + if chg.Type != store.HCCurrent { log.Errorf("expected first notif to tell current ts") continue } - if err := s.update(ctx, changes[0].Val); err != nil { + if err := s.update(ctx, chg.Val); err != nil { log.Errorf("%+v", err) } @@ -248,12 +249,10 @@ func (s *WindowPoStScheduler) abortActivePoSt() { s.abort() journal.MaybeRecordEvent(s.jrnl, s.wdPoStEvtType, func() interface{} { - return WindowPoStEvt{ + return s.enrichWithTipset(WindowPoStEvt{ State: "abort", Deadline: s.activeDeadline, - Height: s.cur.Height(), - TipSet: s.cur.Cids(), - } + }) }) log.Warnf("Aborting Window PoSt (Deadline: %+v)", s.activeDeadline) @@ -262,3 +261,13 @@ func (s *WindowPoStScheduler) abortActivePoSt() { s.activeDeadline = nil s.abort = nil } + +// enrichWithTipset enriches a WindowPoStEvt with tipset information, +// if available. +func (s *WindowPoStScheduler) enrichWithTipset(evt WindowPoStEvt) WindowPoStEvt { + if s.cur != nil { + evt.Height = s.cur.Height() + evt.TipSet = s.cur.Cids() + } + return evt +}