445 lines
15 KiB
Go
445 lines
15 KiB
Go
package sealing
|
|
|
|
import (
|
|
"time"
|
|
|
|
"github.com/hashicorp/go-multierror"
|
|
"golang.org/x/xerrors"
|
|
|
|
"github.com/filecoin-project/lotus/chain/actors/builtin/market"
|
|
"github.com/filecoin-project/lotus/chain/actors/builtin/miner"
|
|
|
|
"github.com/filecoin-project/go-state-types/abi"
|
|
"github.com/filecoin-project/go-state-types/exitcode"
|
|
"github.com/filecoin-project/go-statemachine"
|
|
|
|
"github.com/filecoin-project/go-commp-utils/zerocomm"
|
|
)
|
|
|
|
const minRetryTime = 1 * time.Minute
|
|
|
|
func failedCooldown(ctx statemachine.Context, sector SectorInfo) error {
|
|
// TODO: Exponential backoff when we see consecutive failures
|
|
|
|
retryStart := time.Unix(int64(sector.Log[len(sector.Log)-1].Timestamp), 0).Add(minRetryTime)
|
|
if len(sector.Log) > 0 && !time.Now().After(retryStart) {
|
|
log.Infof("%s(%d), waiting %s before retrying", sector.State, sector.SectorNumber, time.Until(retryStart))
|
|
select {
|
|
case <-time.After(time.Until(retryStart)):
|
|
case <-ctx.Context().Done():
|
|
return ctx.Context().Err()
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (m *Sealing) checkPreCommitted(ctx statemachine.Context, sector SectorInfo) (*miner.SectorPreCommitOnChainInfo, bool) {
|
|
tok, _, err := m.api.ChainHead(ctx.Context())
|
|
if err != nil {
|
|
log.Errorf("handleSealPrecommit1Failed(%d): temp error: %+v", sector.SectorNumber, err)
|
|
return nil, false
|
|
}
|
|
|
|
info, err := m.api.StateSectorPreCommitInfo(ctx.Context(), m.maddr, sector.SectorNumber, tok)
|
|
if err != nil {
|
|
log.Errorf("handleSealPrecommit1Failed(%d): temp error: %+v", sector.SectorNumber, err)
|
|
return nil, false
|
|
}
|
|
|
|
return info, true
|
|
}
|
|
|
|
func (m *Sealing) handleSealPrecommit1Failed(ctx statemachine.Context, sector SectorInfo) error {
|
|
if err := failedCooldown(ctx, sector); err != nil {
|
|
return err
|
|
}
|
|
|
|
return ctx.Send(SectorRetrySealPreCommit1{})
|
|
}
|
|
|
|
func (m *Sealing) handleSealPrecommit2Failed(ctx statemachine.Context, sector SectorInfo) error {
|
|
if err := failedCooldown(ctx, sector); err != nil {
|
|
return err
|
|
}
|
|
|
|
if sector.PreCommit2Fails > 3 {
|
|
return ctx.Send(SectorRetrySealPreCommit1{})
|
|
}
|
|
|
|
return ctx.Send(SectorRetrySealPreCommit2{})
|
|
}
|
|
|
|
func (m *Sealing) handlePreCommitFailed(ctx statemachine.Context, sector SectorInfo) error {
|
|
tok, height, err := m.api.ChainHead(ctx.Context())
|
|
if err != nil {
|
|
log.Errorf("handlePreCommitFailed: api error, not proceeding: %+v", err)
|
|
return nil
|
|
}
|
|
|
|
if sector.PreCommitMessage != nil {
|
|
mw, err := m.api.StateSearchMsg(ctx.Context(), *sector.PreCommitMessage)
|
|
if err != nil {
|
|
// API error
|
|
if err := failedCooldown(ctx, sector); err != nil {
|
|
return err
|
|
}
|
|
|
|
return ctx.Send(SectorRetryPreCommitWait{})
|
|
}
|
|
|
|
if mw == nil {
|
|
// API error in precommit
|
|
return ctx.Send(SectorRetryPreCommitWait{})
|
|
}
|
|
|
|
switch mw.Receipt.ExitCode {
|
|
case exitcode.Ok:
|
|
// API error in PreCommitWait
|
|
return ctx.Send(SectorRetryPreCommitWait{})
|
|
case exitcode.SysErrOutOfGas:
|
|
// API error in PreCommitWait AND gas estimator guessed a wrong number in PreCommit
|
|
return ctx.Send(SectorRetryPreCommit{})
|
|
default:
|
|
// something else went wrong
|
|
}
|
|
}
|
|
|
|
if err := checkPrecommit(ctx.Context(), m.Address(), sector, tok, height, m.api); err != nil {
|
|
switch err.(type) {
|
|
case *ErrApi:
|
|
log.Errorf("handlePreCommitFailed: api error, not proceeding: %+v", err)
|
|
return nil
|
|
case *ErrBadCommD: // TODO: Should this just back to packing? (not really needed since handlePreCommit1 will do that too)
|
|
return ctx.Send(SectorSealPreCommit1Failed{xerrors.Errorf("bad CommD error: %w", err)})
|
|
case *ErrExpiredTicket:
|
|
return ctx.Send(SectorSealPreCommit1Failed{xerrors.Errorf("ticket expired error: %w", err)})
|
|
case *ErrBadTicket:
|
|
return ctx.Send(SectorSealPreCommit1Failed{xerrors.Errorf("bad expired: %w", err)})
|
|
case *ErrInvalidDeals:
|
|
log.Warnf("invalid deals in sector %d: %v", sector.SectorNumber, err)
|
|
return ctx.Send(SectorInvalidDealIDs{Return: RetPreCommitFailed})
|
|
case *ErrExpiredDeals:
|
|
return ctx.Send(SectorDealsExpired{xerrors.Errorf("sector deals expired: %w", err)})
|
|
case *ErrNoPrecommit:
|
|
return ctx.Send(SectorRetryPreCommit{})
|
|
case *ErrPrecommitOnChain:
|
|
// noop
|
|
case *ErrSectorNumberAllocated:
|
|
log.Errorf("handlePreCommitFailed: sector number already allocated, not proceeding: %+v", err)
|
|
// TODO: check if the sector is committed (not sure how we'd end up here)
|
|
// TODO: check on-chain state, adjust local sector number counter to not give out allocated numbers
|
|
return nil
|
|
default:
|
|
return xerrors.Errorf("checkPrecommit sanity check error: %w", err)
|
|
}
|
|
}
|
|
|
|
if pci, is := m.checkPreCommitted(ctx, sector); is && pci != nil {
|
|
if sector.PreCommitMessage == nil {
|
|
log.Warnf("sector %d is precommitted on chain, but we don't have precommit message", sector.SectorNumber)
|
|
return ctx.Send(SectorPreCommitLanded{TipSet: tok})
|
|
}
|
|
|
|
if pci.Info.SealedCID != *sector.CommR {
|
|
log.Warnf("sector %d is precommitted on chain, with different CommR: %x != %x", sector.SectorNumber, pci.Info.SealedCID, sector.CommR)
|
|
return nil // TODO: remove when the actor allows re-precommit
|
|
}
|
|
|
|
// TODO: we could compare more things, but I don't think we really need to
|
|
// CommR tells us that CommD (and CommPs), and the ticket are all matching
|
|
|
|
if err := failedCooldown(ctx, sector); err != nil {
|
|
return err
|
|
}
|
|
|
|
return ctx.Send(SectorRetryWaitSeed{})
|
|
}
|
|
|
|
if sector.PreCommitMessage != nil {
|
|
log.Warn("retrying precommit even though the message failed to apply")
|
|
}
|
|
|
|
if err := failedCooldown(ctx, sector); err != nil {
|
|
return err
|
|
}
|
|
|
|
return ctx.Send(SectorRetryPreCommit{})
|
|
}
|
|
|
|
func (m *Sealing) handleComputeProofFailed(ctx statemachine.Context, sector SectorInfo) error {
|
|
// TODO: Check sector files
|
|
|
|
if err := failedCooldown(ctx, sector); err != nil {
|
|
return err
|
|
}
|
|
|
|
if sector.InvalidProofs > 1 {
|
|
return ctx.Send(SectorSealPreCommit1Failed{xerrors.Errorf("consecutive compute fails")})
|
|
}
|
|
|
|
return ctx.Send(SectorRetryComputeProof{})
|
|
}
|
|
|
|
func (m *Sealing) handleCommitFailed(ctx statemachine.Context, sector SectorInfo) error {
|
|
tok, height, err := m.api.ChainHead(ctx.Context())
|
|
if err != nil {
|
|
log.Errorf("handleCommitting: api error, not proceeding: %+v", err)
|
|
return nil
|
|
}
|
|
|
|
if sector.CommitMessage != nil {
|
|
mw, err := m.api.StateSearchMsg(ctx.Context(), *sector.CommitMessage)
|
|
if err != nil {
|
|
// API error
|
|
if err := failedCooldown(ctx, sector); err != nil {
|
|
return err
|
|
}
|
|
|
|
return ctx.Send(SectorRetryCommitWait{})
|
|
}
|
|
|
|
if mw == nil {
|
|
// API error in commit
|
|
return ctx.Send(SectorRetryCommitWait{})
|
|
}
|
|
|
|
switch mw.Receipt.ExitCode {
|
|
case exitcode.Ok:
|
|
// API error in CcommitWait
|
|
return ctx.Send(SectorRetryCommitWait{})
|
|
case exitcode.SysErrOutOfGas:
|
|
// API error in CommitWait AND gas estimator guessed a wrong number in SubmitCommit
|
|
return ctx.Send(SectorRetrySubmitCommit{})
|
|
default:
|
|
// something else went wrong
|
|
}
|
|
}
|
|
|
|
if err := checkPrecommit(ctx.Context(), m.maddr, sector, tok, height, m.api); err != nil {
|
|
switch err.(type) {
|
|
case *ErrApi:
|
|
log.Errorf("handleCommitFailed: api error, not proceeding: %+v", err)
|
|
return nil
|
|
case *ErrBadCommD:
|
|
return ctx.Send(SectorSealPreCommit1Failed{xerrors.Errorf("bad CommD error: %w", err)})
|
|
case *ErrExpiredTicket:
|
|
return ctx.Send(SectorTicketExpired{xerrors.Errorf("ticket expired error, removing sector: %w", err)})
|
|
case *ErrBadTicket:
|
|
return ctx.Send(SectorTicketExpired{xerrors.Errorf("expired ticket, removing sector: %w", err)})
|
|
case *ErrInvalidDeals:
|
|
log.Warnf("invalid deals in sector %d: %v", sector.SectorNumber, err)
|
|
return ctx.Send(SectorInvalidDealIDs{Return: RetCommitFailed})
|
|
case *ErrExpiredDeals:
|
|
return ctx.Send(SectorDealsExpired{xerrors.Errorf("sector deals expired: %w", err)})
|
|
case nil:
|
|
return ctx.Send(SectorChainPreCommitFailed{xerrors.Errorf("no precommit: %w", err)})
|
|
case *ErrPrecommitOnChain:
|
|
// noop, this is expected
|
|
case *ErrSectorNumberAllocated:
|
|
// noop, already committed?
|
|
default:
|
|
return xerrors.Errorf("checkPrecommit sanity check error (%T): %w", err, err)
|
|
}
|
|
}
|
|
|
|
if err := m.checkCommit(ctx.Context(), sector, sector.Proof, tok); err != nil {
|
|
switch err.(type) {
|
|
case *ErrApi:
|
|
log.Errorf("handleCommitFailed: api error, not proceeding: %+v", err)
|
|
return nil
|
|
case *ErrBadSeed:
|
|
log.Errorf("seed changed, will retry: %+v", err)
|
|
return ctx.Send(SectorRetryWaitSeed{})
|
|
case *ErrInvalidProof:
|
|
if err := failedCooldown(ctx, sector); err != nil {
|
|
return err
|
|
}
|
|
|
|
if sector.InvalidProofs > 0 {
|
|
return ctx.Send(SectorSealPreCommit1Failed{xerrors.Errorf("consecutive invalid proofs")})
|
|
}
|
|
|
|
return ctx.Send(SectorRetryInvalidProof{})
|
|
case *ErrPrecommitOnChain:
|
|
log.Errorf("no precommit on chain, will retry: %+v", err)
|
|
return ctx.Send(SectorRetryPreCommitWait{})
|
|
case *ErrNoPrecommit:
|
|
return ctx.Send(SectorRetryPreCommit{})
|
|
case *ErrInvalidDeals:
|
|
log.Warnf("invalid deals in sector %d: %v", sector.SectorNumber, err)
|
|
return ctx.Send(SectorInvalidDealIDs{Return: RetCommitFailed})
|
|
case *ErrExpiredDeals:
|
|
return ctx.Send(SectorDealsExpired{xerrors.Errorf("sector deals expired: %w", err)})
|
|
case *ErrCommitWaitFailed:
|
|
if err := failedCooldown(ctx, sector); err != nil {
|
|
return err
|
|
}
|
|
|
|
return ctx.Send(SectorRetryCommitWait{})
|
|
default:
|
|
return xerrors.Errorf("checkCommit sanity check error (%T): %w", err, err)
|
|
}
|
|
}
|
|
|
|
// TODO: Check sector files
|
|
|
|
if err := failedCooldown(ctx, sector); err != nil {
|
|
return err
|
|
}
|
|
|
|
return ctx.Send(SectorRetryComputeProof{})
|
|
}
|
|
|
|
func (m *Sealing) handleFinalizeFailed(ctx statemachine.Context, sector SectorInfo) error {
|
|
// TODO: Check sector files
|
|
|
|
if err := failedCooldown(ctx, sector); err != nil {
|
|
return err
|
|
}
|
|
|
|
return ctx.Send(SectorRetryFinalize{})
|
|
}
|
|
|
|
func (m *Sealing) handleRemoveFailed(ctx statemachine.Context, sector SectorInfo) error {
|
|
if err := failedCooldown(ctx, sector); err != nil {
|
|
return err
|
|
}
|
|
|
|
return ctx.Send(SectorRemove{})
|
|
}
|
|
|
|
func (m *Sealing) handleTerminateFailed(ctx statemachine.Context, sector SectorInfo) error {
|
|
// ignoring error as it's most likely an API error - `pci` will be nil, and we'll go back to
|
|
// the Terminating state after cooldown. If the API is still failing, well get back to here
|
|
// with the error in SectorInfo log.
|
|
pci, _ := m.api.StateSectorPreCommitInfo(ctx.Context(), m.maddr, sector.SectorNumber, nil)
|
|
if pci != nil {
|
|
return nil // pause the fsm, needs manual user action
|
|
}
|
|
|
|
if err := failedCooldown(ctx, sector); err != nil {
|
|
return err
|
|
}
|
|
|
|
return ctx.Send(SectorTerminate{})
|
|
}
|
|
|
|
func (m *Sealing) handleDealsExpired(ctx statemachine.Context, sector SectorInfo) error {
|
|
// First make vary sure the sector isn't committed
|
|
si, err := m.api.StateSectorGetInfo(ctx.Context(), m.maddr, sector.SectorNumber, nil)
|
|
if err != nil {
|
|
return xerrors.Errorf("getting sector info: %w", err)
|
|
}
|
|
if si != nil {
|
|
// TODO: this should never happen, but in case it does, try to go back to
|
|
// the proving state after running some checks
|
|
return xerrors.Errorf("sector is committed on-chain, but we're in DealsExpired")
|
|
}
|
|
|
|
if sector.PreCommitInfo == nil {
|
|
// TODO: Create a separate state which will remove those pieces, and go back to PC1
|
|
log.Errorf("non-precommitted sector with expired deals, can't recover from this yet")
|
|
}
|
|
|
|
// Not much to do here, we can't go back in time to commit this sector
|
|
return ctx.Send(SectorRemove{})
|
|
}
|
|
|
|
func (m *Sealing) handleRecoverDealIDs(ctx statemachine.Context, sector SectorInfo) error {
|
|
tok, height, err := m.api.ChainHead(ctx.Context())
|
|
if err != nil {
|
|
return xerrors.Errorf("getting chain head: %w", err)
|
|
}
|
|
|
|
var toFix []int
|
|
paddingPieces := 0
|
|
|
|
for i, p := range sector.Pieces {
|
|
// if no deal is associated with the piece, ensure that we added it as
|
|
// filler (i.e. ensure that it has a zero PieceCID)
|
|
if p.DealInfo == nil {
|
|
exp := zerocomm.ZeroPieceCommitment(p.Piece.Size.Unpadded())
|
|
if !p.Piece.PieceCID.Equals(exp) {
|
|
return xerrors.Errorf("sector %d piece %d had non-zero PieceCID %+v", sector.SectorNumber, i, p.Piece.PieceCID)
|
|
}
|
|
paddingPieces++
|
|
continue
|
|
}
|
|
|
|
proposal, err := m.api.StateMarketStorageDealProposal(ctx.Context(), p.DealInfo.DealID, tok)
|
|
if err != nil {
|
|
log.Warnf("getting deal %d for piece %d: %+v", p.DealInfo.DealID, i, err)
|
|
toFix = append(toFix, i)
|
|
continue
|
|
}
|
|
|
|
if proposal.Provider != m.maddr {
|
|
log.Warnf("piece %d (of %d) of sector %d refers deal %d with wrong provider: %s != %s", i, len(sector.Pieces), sector.SectorNumber, p.DealInfo.DealID, proposal.Provider, m.maddr)
|
|
toFix = append(toFix, i)
|
|
continue
|
|
}
|
|
|
|
if proposal.PieceCID != p.Piece.PieceCID {
|
|
log.Warnf("piece %d (of %d) of sector %d refers deal %d with wrong PieceCID: %x != %x", i, len(sector.Pieces), sector.SectorNumber, p.DealInfo.DealID, p.Piece.PieceCID, proposal.PieceCID)
|
|
toFix = append(toFix, i)
|
|
continue
|
|
}
|
|
|
|
if p.Piece.Size != proposal.PieceSize {
|
|
log.Warnf("piece %d (of %d) of sector %d refers deal %d with different size: %d != %d", i, len(sector.Pieces), sector.SectorNumber, p.DealInfo.DealID, p.Piece.Size, proposal.PieceSize)
|
|
toFix = append(toFix, i)
|
|
continue
|
|
}
|
|
|
|
if height >= proposal.StartEpoch {
|
|
// TODO: check if we are in an early enough state (before precommit), try to remove the offending pieces
|
|
// (tricky as we have to 'defragment' the sector while doing that, and update piece references for retrieval)
|
|
return xerrors.Errorf("can't fix sector deals: piece %d (of %d) of sector %d refers expired deal %d - should start at %d, head %d", i, len(sector.Pieces), sector.SectorNumber, p.DealInfo.DealID, proposal.StartEpoch, height)
|
|
}
|
|
}
|
|
|
|
failed := map[int]error{}
|
|
updates := map[int]abi.DealID{}
|
|
for _, i := range toFix {
|
|
p := sector.Pieces[i]
|
|
|
|
if p.DealInfo.PublishCid == nil {
|
|
// TODO: check if we are in an early enough state try to remove this piece
|
|
log.Errorf("can't fix sector deals: piece %d (of %d) of sector %d has nil DealInfo.PublishCid (refers to deal %d)", i, len(sector.Pieces), sector.SectorNumber, p.DealInfo.DealID)
|
|
// Not much to do here (and this can only happen for old spacerace sectors)
|
|
return ctx.Send(SectorRemove{})
|
|
}
|
|
|
|
var dp *market.DealProposal
|
|
if p.DealInfo.DealProposal != nil {
|
|
mdp := market.DealProposal(*p.DealInfo.DealProposal)
|
|
dp = &mdp
|
|
}
|
|
res, err := m.dealInfo.GetCurrentDealInfo(ctx.Context(), tok, dp, *p.DealInfo.PublishCid)
|
|
if err != nil {
|
|
failed[i] = xerrors.Errorf("getting current deal info for piece %d: %w", i, err)
|
|
}
|
|
|
|
updates[i] = res.DealID
|
|
}
|
|
|
|
if len(failed) > 0 {
|
|
var merr error
|
|
for _, e := range failed {
|
|
merr = multierror.Append(merr, e)
|
|
}
|
|
|
|
if len(failed)+paddingPieces == len(sector.Pieces) {
|
|
log.Errorf("removing sector %d: all deals expired or unrecoverable: %+v", sector.SectorNumber, merr)
|
|
return ctx.Send(SectorRemove{})
|
|
}
|
|
|
|
// todo: try to remove bad pieces (hard; see the todo above)
|
|
return xerrors.Errorf("failed to recover some deals: %w", merr)
|
|
}
|
|
|
|
// Not much to do here, we can't go back in time to commit this sector
|
|
return ctx.Send(SectorUpdateDealIDs{Updates: updates})
|
|
}
|