lotus/storage/sealing/states_failed.go

171 lines
5.3 KiB
Go
Raw Normal View History

package sealing
import (
2020-01-23 17:34:04 +00:00
"bytes"
"time"
2020-03-06 18:59:08 +00:00
"github.com/filecoin-project/go-statemachine"
2020-02-12 07:44:20 +00:00
"github.com/filecoin-project/specs-actors/actors/builtin/miner"
"github.com/filecoin-project/specs-actors/actors/util/adt"
2020-01-23 17:34:04 +00:00
"golang.org/x/xerrors"
2020-02-12 07:44:20 +00:00
"github.com/filecoin-project/lotus/api/apibstore"
"github.com/filecoin-project/lotus/chain/store"
"github.com/filecoin-project/lotus/chain/types"
)
const minRetryTime = 1 * time.Minute
func failedCooldown(ctx statemachine.Context, sector SectorInfo) error {
// TODO: Exponential backoff when we see consecutive failures
retryStart := time.Unix(int64(sector.Log[len(sector.Log)-1].Timestamp), 0).Add(minRetryTime)
if len(sector.Log) > 0 && !time.Now().After(retryStart) {
2020-04-03 16:24:45 +00:00
log.Infof("%s(%d), waiting %s before retrying", sector.State, sector.SectorID, time.Until(retryStart))
select {
case <-time.After(time.Until(retryStart)):
case <-ctx.Context().Done():
return ctx.Context().Err()
}
}
return nil
}
2020-02-12 07:44:20 +00:00
func (m *Sealing) checkPreCommitted(ctx statemachine.Context, sector SectorInfo) (*miner.SectorPreCommitOnChainInfo, bool) {
act, err := m.api.StateGetActor(ctx.Context(), m.maddr, types.EmptyTSK)
if err != nil {
log.Errorf("handleSealFailed(%d): temp error: %+v", sector.SectorID, err)
2020-01-23 17:34:04 +00:00
return nil, true
}
2020-01-23 17:34:04 +00:00
st, err := m.api.ChainReadObj(ctx.Context(), act.Head)
if err != nil {
log.Errorf("handleSealFailed(%d): temp error: %+v", sector.SectorID, err)
2020-01-23 17:34:04 +00:00
return nil, true
}
2020-02-25 20:35:15 +00:00
var state miner.State
2020-01-23 17:34:04 +00:00
if err := state.UnmarshalCBOR(bytes.NewReader(st)); err != nil {
log.Errorf("handleSealFailed(%d): temp error: unmarshaling miner state: %+v", sector.SectorID, err)
return nil, true
}
2020-02-12 07:44:20 +00:00
var pci miner.SectorPreCommitOnChainInfo
precommits := adt.AsMap(store.ActorStore(ctx.Context(), apibstore.NewAPIBlockstore(m.api)), state.PreCommittedSectors)
2020-02-13 00:10:07 +00:00
if _, err := precommits.Get(adt.UIntKey(uint64(sector.SectorID)), &pci); err != nil {
2020-02-12 07:44:20 +00:00
log.Error(err)
return nil, true
2020-01-23 17:34:04 +00:00
}
2020-02-12 07:44:20 +00:00
return &pci, false
2020-01-23 17:34:04 +00:00
}
func (m *Sealing) handleSealFailed(ctx statemachine.Context, sector SectorInfo) error {
if _, is := m.checkPreCommitted(ctx, sector); is {
// TODO: Remove this after we can re-precommit
return nil // noop, for now
}
if err := failedCooldown(ctx, sector); err != nil {
return err
}
return ctx.Send(SectorRetrySeal{})
}
2020-01-23 17:34:04 +00:00
func (m *Sealing) handlePreCommitFailed(ctx statemachine.Context, sector SectorInfo) error {
if err := checkPrecommit(ctx.Context(), m.maddr, sector, m.api); err != nil {
2020-01-23 17:34:04 +00:00
switch err.(type) {
case *ErrApi:
log.Errorf("handlePreCommitFailed: api error, not proceeding: %+v", err)
return nil
2020-04-03 16:54:01 +00:00
case *ErrBadCommD: // TODO: Should this just back to packing? (not really needed since handlePreCommit1 will do that too)
return ctx.Send(SectorSealPreCommitFailed{xerrors.Errorf("bad CommD error: %w", err)})
2020-01-23 17:34:04 +00:00
case *ErrExpiredTicket:
2020-04-03 16:54:01 +00:00
return ctx.Send(SectorSealPreCommitFailed{xerrors.Errorf("ticket expired error: %w", err)})
2020-01-23 17:34:04 +00:00
default:
return xerrors.Errorf("checkPrecommit sanity check error: %w", err)
2020-01-23 17:34:04 +00:00
}
}
if pci, is := m.checkPreCommitted(ctx, sector); is && pci != nil {
if sector.PreCommitMessage != nil {
log.Warn("sector %d is precommitted on chain, but we don't have precommit message", sector.SectorID)
return nil // TODO: SeedWait needs this currently
}
2020-02-27 00:42:39 +00:00
if pci.Info.SealedCID != *sector.CommR {
log.Warn("sector %d is precommitted on chain, with different CommR: %x != %x", sector.SectorID, pci.Info.SealedCID, sector.CommR)
2020-01-23 17:34:04 +00:00
return nil // TODO: remove when the actor allows re-precommit
}
// TODO: we could compare more things, but I don't think we really need to
// CommR tells us that CommD (and CommPs), and the ticket are all matching
if err := failedCooldown(ctx, sector); err != nil {
return err
}
return ctx.Send(SectorRetryWaitSeed{})
}
if sector.PreCommitMessage != nil {
log.Warn("retrying precommit even though the message failed to apply")
}
if err := failedCooldown(ctx, sector); err != nil {
return err
}
return ctx.Send(SectorRetryPreCommit{})
}
func (m *Sealing) handleComputeProofFailed(ctx statemachine.Context, sector SectorInfo) error {
// TODO: Check sector files
if err := failedCooldown(ctx, sector); err != nil {
return err
}
return ctx.Send(SectorRetryComputeProof{})
}
func (m *Sealing) handleCommitFailed(ctx statemachine.Context, sector SectorInfo) error {
if err := checkPrecommit(ctx.Context(), m.maddr, sector, m.api); err != nil {
switch err.(type) {
case *ErrApi:
log.Errorf("handleCommitFailed: api error, not proceeding: %+v", err)
return nil
case *ErrBadCommD:
return ctx.Send(SectorSealPreCommitFailed{xerrors.Errorf("bad CommD error: %w", err)})
case *ErrExpiredTicket:
return ctx.Send(SectorSealPreCommitFailed{xerrors.Errorf("ticket expired error: %w", err)})
default:
return xerrors.Errorf("checkPrecommit sanity check error: %w", err)
}
}
if err := checkCommit(ctx.Context(), sector, m.api); err != nil {
switch err.(type) {
case *ErrApi:
log.Errorf("handleCommitFailed: api error, not proceeding: %+v", err)
return nil
case *ErrBadSeed:
log.Errorf("seed changed, will retry: %+v", err)
return ctx.Send(SectorRetryWaitSeed{})
default:
return xerrors.Errorf("checkCommit sanity check error: %w", err)
}
}
// TODO: Check sector files
if err := failedCooldown(ctx, sector); err != nil {
return err
}
return ctx.Send(SectorRetryComputeProof{})
}