From 6284fad33e421104b81ac9bccbc749a9d17d1ce1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= <magik6k@gmail.com> Date: Fri, 3 Apr 2020 19:45:48 +0200 Subject: [PATCH] fsm: Implement handlers for Commit errors --- api/api_storage.go | 2 +- build/params_shared.go | 4 +-- storage/sealing/checks.go | 30 ++++++++++++++++-- storage/sealing/fsm.go | 39 +++++++++++++++-------- storage/sealing/fsm_events.go | 4 +++ storage/sealing/states.go | 4 +-- storage/sealing/states_failed.go | 53 ++++++++++++++++++++++++++++++-- 7 files changed, 113 insertions(+), 23 deletions(-) diff --git a/api/api_storage.go b/api/api_storage.go index 33f136810..8208d049c 100644 --- a/api/api_storage.go +++ b/api/api_storage.go @@ -35,7 +35,7 @@ const ( FailedUnrecoverable SectorState = "FailedUnrecoverable" SealFailed SectorState = "SealFailed" PreCommitFailed SectorState = "PreCommitFailed" - SealCommitFailed SectorState = "SealCommitFailed" + ComputeProofFailed SectorState = "ComputeProofFailed" CommitFailed SectorState = "CommitFailed" PackingFailed SectorState = "PackingFailed" Faulty SectorState = "Faulty" // sector is corrupted or gone for some reason diff --git a/build/params_shared.go b/build/params_shared.go index 82798a5ca..1dae1ec09 100644 --- a/build/params_shared.go +++ b/build/params_shared.go @@ -60,10 +60,10 @@ const FallbackPoStConfidence = 6 const SealRandomnessLookback = Finality // Epochs -const SealRandomnessLookbackLimit = SealRandomnessLookback + 2000 +const SealRandomnessLookbackLimit = SealRandomnessLookback + 2000 // TODO: Get from spec specs-actors // Maximum lookback that randomness can be sourced from for a seal proof submission -const MaxSealLookback = SealRandomnessLookbackLimit + 2000 +const MaxSealLookback = SealRandomnessLookbackLimit + 2000 // TODO: Get from specs-actors // ///// // Mining diff --git a/storage/sealing/checks.go b/storage/sealing/checks.go index d43cd82df..cc68459ba 100644 --- a/storage/sealing/checks.go +++ b/storage/sealing/checks.go @@ -11,6 +11,7 @@ import ( "github.com/filecoin-project/go-address" "github.com/filecoin-project/specs-actors/actors/builtin" "github.com/filecoin-project/specs-actors/actors/builtin/market" + "github.com/filecoin-project/specs-actors/actors/crypto" "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/actors" @@ -29,6 +30,8 @@ type ErrExpiredDeals struct{ error } type ErrBadCommD struct{ error } type ErrExpiredTicket struct{ error } +type ErrBadSeed struct{ error } + // checkPieces validates that: // - Each piece han a corresponding on chain deal // - Piece commitments match with on chain deals @@ -69,9 +72,9 @@ func checkPieces(ctx context.Context, si SectorInfo, api sealingApi) error { return nil } -// checkSeal checks that data commitment generated in the sealing process +// checkPrecommit checks that data commitment generated in the sealing process // matches pieces, and that the seal ticket isn't expired -func checkSeal(ctx context.Context, maddr address.Address, si SectorInfo, api sealingApi) (err error) { +func checkPrecommit(ctx context.Context, maddr address.Address, si SectorInfo, api sealingApi) (err error) { head, err := api.ChainHead(ctx) if err != nil { return &ErrApi{xerrors.Errorf("getting chain head: %w", err)} @@ -116,5 +119,26 @@ func checkSeal(ctx context.Context, maddr address.Address, si SectorInfo, api se } return nil - +} + +func checkCommit(ctx context.Context, si SectorInfo, api sealingApi) (err error) { + head, err := api.ChainHead(ctx) + if err != nil { + return &ErrApi{xerrors.Errorf("getting chain head: %w", err)} + } + + if si.Seed.Epoch == 0 { + return &ErrBadSeed{xerrors.Errorf("seed epoch was not set")} + } + + rand, err := api.ChainGetRandomness(ctx, head.Key(), crypto.DomainSeparationTag_InteractiveSealChallengeSeed, si.Seed.Epoch, nil) + if err != nil { + return &ErrApi{xerrors.Errorf("failed to get randomness for computing seal proof: %w", err)} + } + + if string(rand) != string(si.Seed.Value) { + return &ErrBadSeed{xerrors.Errorf("seed has changed")} + } + + return nil } diff --git a/storage/sealing/fsm.go b/storage/sealing/fsm.go index 93aa4c1e9..f3f79d763 100644 --- a/storage/sealing/fsm.go +++ b/storage/sealing/fsm.go @@ -77,6 +77,14 @@ var fsmPlanners = map[api.SectorState]func(events []statemachine.Event, state *S on(SectorRetryWaitSeed{}, api.WaitSeed), on(SectorSealPreCommitFailed{}, api.SealFailed), ), + api.ComputeProofFailed: planOne( + on(SectorRetryComputeProof{}, api.Committing), + ), + api.CommitFailed: planOne( + on(SectorSealPreCommitFailed{}, api.SealFailed), + on(SectorRetryWaitSeed{}, api.WaitSeed), + on(SectorRetryComputeProof{}, api.Committing), + ), api.Faulty: planOne( on(SectorFaultReported{}, api.FaultReported), @@ -129,15 +137,20 @@ func (m *Sealing) plan(events []statemachine.Event, state *SectorInfo) (func(sta | | | v *<- PreCommit1 <--> SealFailed - | | - | v - * PreCommitting <--> PreCommitFailed - | | ^ - | v | - *<- WaitSeed ----------/ - | ||| - | vvv v--> SealCommitFailed - *<- Committing + | | ^^^ + | v ||| + *<- PreCommit2 -------/|| + | | || + | v /-------/| + * PreCommitting <-----+---> PreCommitFailed + | | | ^ + | v | | + *<- WaitSeed -----------+-----/ + | ||| ^ | + | ||| \--------*-----/ + | ||| | + | vvv v----+----> ComputeProofFailed + *<- Committing | | | ^--> CommitFailed | v ^ *<- CommitWait ---/ @@ -181,10 +194,10 @@ func (m *Sealing) plan(events []statemachine.Event, state *SectorInfo) (func(sta return m.handleSealFailed, nil case api.PreCommitFailed: return m.handlePreCommitFailed, nil - case api.SealCommitFailed: - log.Warnf("sector %d entered unimplemented state 'SealCommitFailed'", state.SectorID) + case api.ComputeProofFailed: + return m.handleComputeProofFailed, nil case api.CommitFailed: - log.Warnf("sector %d entered unimplemented state 'CommitFailed'", state.SectorID) + return m.handleCommitFailed, nil // Faults case api.Faulty: @@ -224,7 +237,7 @@ func planCommitting(events []statemachine.Event, state *SectorInfo) error { state.State = api.Committing return nil case SectorComputeProofFailed: - state.State = api.SealCommitFailed + state.State = api.ComputeProofFailed case SectorSealPreCommitFailed: state.State = api.CommitFailed case SectorCommitFailed: diff --git a/storage/sealing/fsm_events.go b/storage/sealing/fsm_events.go index e7c9a69b7..3aac561e3 100644 --- a/storage/sealing/fsm_events.go +++ b/storage/sealing/fsm_events.go @@ -166,6 +166,10 @@ type SectorRetryWaitSeed struct{} func (evt SectorRetryWaitSeed) apply(state *SectorInfo) {} +type SectorRetryComputeProof struct{} + +func (evt SectorRetryComputeProof) apply(state *SectorInfo) {} + // Faults type SectorFaulty struct{} diff --git a/storage/sealing/states.go b/storage/sealing/states.go index 69f36d314..07287b9e3 100644 --- a/storage/sealing/states.go +++ b/storage/sealing/states.go @@ -94,7 +94,7 @@ func (m *Sealing) handlePreCommit2(ctx statemachine.Context, sector SectorInfo) } func (m *Sealing) handlePreCommitting(ctx statemachine.Context, sector SectorInfo) error { - if err := checkSeal(ctx.Context(), m.maddr, sector, m.api); err != nil { + if err := checkPrecommit(ctx.Context(), m.maddr, sector, m.api); err != nil { switch err.(type) { case *ErrApi: log.Errorf("handlePreCommitting: api error, not proceeding: %+v", err) @@ -104,7 +104,7 @@ func (m *Sealing) handlePreCommitting(ctx statemachine.Context, sector SectorInf case *ErrExpiredTicket: return ctx.Send(SectorSealPreCommitFailed{xerrors.Errorf("ticket expired: %w", err)}) default: - return xerrors.Errorf("checkSeal sanity check error: %w", err) + return xerrors.Errorf("checkPrecommit sanity check error: %w", err) } } diff --git a/storage/sealing/states_failed.go b/storage/sealing/states_failed.go index 8c0cc46f8..538cebca4 100644 --- a/storage/sealing/states_failed.go +++ b/storage/sealing/states_failed.go @@ -17,6 +17,8 @@ import ( const minRetryTime = 1 * time.Minute func failedCooldown(ctx statemachine.Context, sector SectorInfo) error { + // TODO: Exponential backoff when we see consecutive failures + retryStart := time.Unix(int64(sector.Log[len(sector.Log)-1].Timestamp), 0).Add(minRetryTime) if len(sector.Log) > 0 && !time.Now().After(retryStart) { log.Infof("%s(%d), waiting %s before retrying", sector.State, sector.SectorID, time.Until(retryStart)) @@ -74,7 +76,7 @@ func (m *Sealing) handleSealFailed(ctx statemachine.Context, sector SectorInfo) } func (m *Sealing) handlePreCommitFailed(ctx statemachine.Context, sector SectorInfo) error { - if err := checkSeal(ctx.Context(), m.maddr, sector, m.api); err != nil { + if err := checkPrecommit(ctx.Context(), m.maddr, sector, m.api); err != nil { switch err.(type) { case *ErrApi: log.Errorf("handlePreCommitFailed: api error, not proceeding: %+v", err) @@ -84,7 +86,7 @@ func (m *Sealing) handlePreCommitFailed(ctx statemachine.Context, sector SectorI case *ErrExpiredTicket: return ctx.Send(SectorSealPreCommitFailed{xerrors.Errorf("ticket expired error: %w", err)}) default: - return xerrors.Errorf("checkSeal sanity check error: %w", err) + return xerrors.Errorf("checkPrecommit sanity check error: %w", err) } } @@ -119,3 +121,50 @@ func (m *Sealing) handlePreCommitFailed(ctx statemachine.Context, sector SectorI return ctx.Send(SectorRetryPreCommit{}) } + +func (m *Sealing) handleComputeProofFailed(ctx statemachine.Context, sector SectorInfo) error { + // TODO: Check sector files + + if err := failedCooldown(ctx, sector); err != nil { + return err + } + + return ctx.Send(SectorRetryComputeProof{}) +} + +func (m *Sealing) handleCommitFailed(ctx statemachine.Context, sector SectorInfo) error { + if err := checkPrecommit(ctx.Context(), m.maddr, sector, m.api); err != nil { + switch err.(type) { + case *ErrApi: + log.Errorf("handleCommitFailed: api error, not proceeding: %+v", err) + return nil + case *ErrBadCommD: + return ctx.Send(SectorSealPreCommitFailed{xerrors.Errorf("bad CommD error: %w", err)}) + case *ErrExpiredTicket: + return ctx.Send(SectorSealPreCommitFailed{xerrors.Errorf("ticket expired error: %w", err)}) + default: + return xerrors.Errorf("checkPrecommit sanity check error: %w", err) + } + } + + if err := checkCommit(ctx.Context(), sector, m.api); err != nil { + switch err.(type) { + case *ErrApi: + log.Errorf("handleCommitFailed: api error, not proceeding: %+v", err) + return nil + case *ErrBadSeed: + log.Errorf("seed changed, will retry: %+v", err) + return ctx.Send(SectorRetryWaitSeed{}) + default: + return xerrors.Errorf("checkCommit sanity check error: %w", err) + } + } + + // TODO: Check sector files + + if err := failedCooldown(ctx, sector); err != nil { + return err + } + + return ctx.Send(SectorRetryComputeProof{}) +}