2020-01-23 15:38:01 +00:00
package sealing
import (
"time"
2021-02-22 20:03:51 +00:00
"github.com/hashicorp/go-multierror"
2020-01-23 17:34:04 +00:00
"golang.org/x/xerrors"
2020-09-28 21:25:58 +00:00
"github.com/filecoin-project/lotus/chain/actors/builtin/market"
"github.com/filecoin-project/lotus/chain/actors/builtin/miner"
2020-09-07 03:49:10 +00:00
"github.com/filecoin-project/go-state-types/abi"
"github.com/filecoin-project/go-state-types/exitcode"
2020-04-06 18:07:26 +00:00
"github.com/filecoin-project/go-statemachine"
2020-08-27 19:04:43 +00:00
2020-11-20 00:28:18 +00:00
"github.com/filecoin-project/go-commp-utils/zerocomm"
2020-01-23 15:38:01 +00:00
)
const minRetryTime = 1 * time . Minute
func failedCooldown ( ctx statemachine . Context , sector SectorInfo ) error {
2020-04-03 17:45:48 +00:00
// TODO: Exponential backoff when we see consecutive failures
2020-01-23 15:38:01 +00:00
retryStart := time . Unix ( int64 ( sector . Log [ len ( sector . Log ) - 1 ] . Timestamp ) , 0 ) . Add ( minRetryTime )
if len ( sector . Log ) > 0 && ! time . Now ( ) . After ( retryStart ) {
2020-04-06 22:31:33 +00:00
log . Infof ( "%s(%d), waiting %s before retrying" , sector . State , sector . SectorNumber , time . Until ( retryStart ) )
2020-01-23 15:38:01 +00:00
select {
case <- time . After ( time . Until ( retryStart ) ) :
case <- ctx . Context ( ) . Done ( ) :
return ctx . Context ( ) . Err ( )
}
}
return nil
}
2020-02-12 07:44:20 +00:00
func ( m * Sealing ) checkPreCommitted ( ctx statemachine . Context , sector SectorInfo ) ( * miner . SectorPreCommitOnChainInfo , bool ) {
2021-08-18 10:43:44 +00:00
tok , _ , err := m . Api . ChainHead ( ctx . Context ( ) )
2020-01-23 15:38:01 +00:00
if err != nil {
2020-06-04 15:29:31 +00:00
log . Errorf ( "handleSealPrecommit1Failed(%d): temp error: %+v" , sector . SectorNumber , err )
2020-09-01 06:18:02 +00:00
return nil , false
2020-01-23 15:38:01 +00:00
}
2021-08-18 10:43:44 +00:00
info , err := m . Api . StateSectorPreCommitInfo ( ctx . Context ( ) , m . maddr , sector . SectorNumber , tok )
2020-01-23 15:38:01 +00:00
if err != nil {
2020-06-04 15:29:31 +00:00
log . Errorf ( "handleSealPrecommit1Failed(%d): temp error: %+v" , sector . SectorNumber , err )
2020-09-01 06:18:02 +00:00
return nil , false
2020-01-23 17:34:04 +00:00
}
2020-09-01 06:18:02 +00:00
return info , true
2020-01-23 17:34:04 +00:00
}
2020-06-04 15:29:31 +00:00
func ( m * Sealing ) handleSealPrecommit1Failed ( ctx statemachine . Context , sector SectorInfo ) error {
if err := failedCooldown ( ctx , sector ) ; err != nil {
return err
2020-01-23 15:38:01 +00:00
}
2020-06-04 15:29:31 +00:00
return ctx . Send ( SectorRetrySealPreCommit1 { } )
}
func ( m * Sealing ) handleSealPrecommit2Failed ( ctx statemachine . Context , sector SectorInfo ) error {
2020-01-23 15:38:01 +00:00
if err := failedCooldown ( ctx , sector ) ; err != nil {
return err
}
2020-09-10 03:52:21 +00:00
if sector . PreCommit2Fails > 3 {
2020-06-04 15:29:31 +00:00
return ctx . Send ( SectorRetrySealPreCommit1 { } )
}
return ctx . Send ( SectorRetrySealPreCommit2 { } )
2020-01-23 15:38:01 +00:00
}
2020-01-23 17:34:04 +00:00
func ( m * Sealing ) handlePreCommitFailed ( ctx statemachine . Context , sector SectorInfo ) error {
2021-08-18 10:43:44 +00:00
tok , height , err := m . Api . ChainHead ( ctx . Context ( ) )
2020-04-09 17:34:07 +00:00
if err != nil {
log . Errorf ( "handlePreCommitFailed: api error, not proceeding: %+v" , err )
return nil
}
2020-11-03 17:46:33 +00:00
if sector . PreCommitMessage != nil {
2021-08-18 10:43:44 +00:00
mw , err := m . Api . StateSearchMsg ( ctx . Context ( ) , * sector . PreCommitMessage )
2020-11-03 17:46:33 +00:00
if err != nil {
// API error
if err := failedCooldown ( ctx , sector ) ; err != nil {
return err
}
return ctx . Send ( SectorRetryPreCommitWait { } )
}
if mw == nil {
// API error in precommit
return ctx . Send ( SectorRetryPreCommitWait { } )
}
switch mw . Receipt . ExitCode {
case exitcode . Ok :
// API error in PreCommitWait
return ctx . Send ( SectorRetryPreCommitWait { } )
case exitcode . SysErrOutOfGas :
// API error in PreCommitWait AND gas estimator guessed a wrong number in PreCommit
return ctx . Send ( SectorRetryPreCommit { } )
default :
// something else went wrong
}
}
2021-08-18 10:43:44 +00:00
if err := checkPrecommit ( ctx . Context ( ) , m . Address ( ) , sector , tok , height , m . Api ) ; err != nil {
2020-01-23 17:34:04 +00:00
switch err . ( type ) {
case * ErrApi :
log . Errorf ( "handlePreCommitFailed: api error, not proceeding: %+v" , err )
return nil
2020-04-03 16:54:01 +00:00
case * ErrBadCommD : // TODO: Should this just back to packing? (not really needed since handlePreCommit1 will do that too)
2020-06-04 15:29:31 +00:00
return ctx . Send ( SectorSealPreCommit1Failed { xerrors . Errorf ( "bad CommD error: %w" , err ) } )
2020-01-23 17:34:04 +00:00
case * ErrExpiredTicket :
2020-06-04 15:29:31 +00:00
return ctx . Send ( SectorSealPreCommit1Failed { xerrors . Errorf ( "ticket expired error: %w" , err ) } )
2020-06-02 21:45:28 +00:00
case * ErrBadTicket :
2020-06-04 15:29:31 +00:00
return ctx . Send ( SectorSealPreCommit1Failed { xerrors . Errorf ( "bad expired: %w" , err ) } )
2020-08-27 11:51:13 +00:00
case * ErrInvalidDeals :
2020-08-27 19:04:43 +00:00
log . Warnf ( "invalid deals in sector %d: %v" , sector . SectorNumber , err )
2020-08-27 21:14:46 +00:00
return ctx . Send ( SectorInvalidDealIDs { Return : RetPreCommitFailed } )
2020-08-27 11:51:13 +00:00
case * ErrExpiredDeals :
return ctx . Send ( SectorDealsExpired { xerrors . Errorf ( "sector deals expired: %w" , err ) } )
2020-08-05 01:30:58 +00:00
case * ErrNoPrecommit :
return ctx . Send ( SectorRetryPreCommit { } )
2020-06-02 21:45:28 +00:00
case * ErrPrecommitOnChain :
// noop
2020-08-18 16:02:13 +00:00
case * ErrSectorNumberAllocated :
log . Errorf ( "handlePreCommitFailed: sector number already allocated, not proceeding: %+v" , err )
// TODO: check if the sector is committed (not sure how we'd end up here)
2020-08-27 11:51:13 +00:00
// TODO: check on-chain state, adjust local sector number counter to not give out allocated numbers
2020-08-18 16:02:13 +00:00
return nil
2020-01-23 17:34:04 +00:00
default :
2020-04-03 17:45:48 +00:00
return xerrors . Errorf ( "checkPrecommit sanity check error: %w" , err )
2020-01-23 17:34:04 +00:00
}
}
if pci , is := m . checkPreCommitted ( ctx , sector ) ; is && pci != nil {
2020-09-01 06:18:02 +00:00
if sector . PreCommitMessage == nil {
2020-11-24 11:09:48 +00:00
log . Warnf ( "sector %d is precommitted on chain, but we don't have precommit message" , sector . SectorNumber )
2020-06-02 21:45:28 +00:00
return ctx . Send ( SectorPreCommitLanded { TipSet : tok } )
2020-01-23 17:34:04 +00:00
}
2020-02-27 00:42:39 +00:00
if pci . Info . SealedCID != * sector . CommR {
2021-07-05 10:51:15 +00:00
log . Warnf ( "sector %d is precommitted on chain, with different CommR: %s != %s" , sector . SectorNumber , pci . Info . SealedCID , sector . CommR )
2020-01-23 17:34:04 +00:00
return nil // TODO: remove when the actor allows re-precommit
}
// TODO: we could compare more things, but I don't think we really need to
// CommR tells us that CommD (and CommPs), and the ticket are all matching
if err := failedCooldown ( ctx , sector ) ; err != nil {
return err
}
return ctx . Send ( SectorRetryWaitSeed { } )
}
if sector . PreCommitMessage != nil {
log . Warn ( "retrying precommit even though the message failed to apply" )
}
if err := failedCooldown ( ctx , sector ) ; err != nil {
return err
}
return ctx . Send ( SectorRetryPreCommit { } )
}
2020-04-03 17:45:48 +00:00
func ( m * Sealing ) handleComputeProofFailed ( ctx statemachine . Context , sector SectorInfo ) error {
// TODO: Check sector files
if err := failedCooldown ( ctx , sector ) ; err != nil {
return err
}
2020-06-02 20:30:40 +00:00
if sector . InvalidProofs > 1 {
2020-06-04 15:29:31 +00:00
return ctx . Send ( SectorSealPreCommit1Failed { xerrors . Errorf ( "consecutive compute fails" ) } )
2020-06-02 20:30:40 +00:00
}
2020-04-03 17:45:48 +00:00
return ctx . Send ( SectorRetryComputeProof { } )
}
func ( m * Sealing ) handleCommitFailed ( ctx statemachine . Context , sector SectorInfo ) error {
2021-08-18 10:43:44 +00:00
tok , _ , err := m . Api . ChainHead ( ctx . Context ( ) )
2020-04-09 17:34:07 +00:00
if err != nil {
log . Errorf ( "handleCommitting: api error, not proceeding: %+v" , err )
return nil
}
2020-11-03 17:46:33 +00:00
if sector . CommitMessage != nil {
2021-08-18 10:43:44 +00:00
mw , err := m . Api . StateSearchMsg ( ctx . Context ( ) , * sector . CommitMessage )
2020-11-03 17:46:33 +00:00
if err != nil {
// API error
if err := failedCooldown ( ctx , sector ) ; err != nil {
return err
}
return ctx . Send ( SectorRetryCommitWait { } )
}
if mw == nil {
// API error in commit
return ctx . Send ( SectorRetryCommitWait { } )
}
switch mw . Receipt . ExitCode {
case exitcode . Ok :
// API error in CcommitWait
return ctx . Send ( SectorRetryCommitWait { } )
case exitcode . SysErrOutOfGas :
// API error in CommitWait AND gas estimator guessed a wrong number in SubmitCommit
return ctx . Send ( SectorRetrySubmitCommit { } )
default :
// something else went wrong
}
}
2020-04-09 17:34:07 +00:00
if err := m . checkCommit ( ctx . Context ( ) , sector , sector . Proof , tok ) ; err != nil {
2020-04-03 17:45:48 +00:00
switch err . ( type ) {
case * ErrApi :
log . Errorf ( "handleCommitFailed: api error, not proceeding: %+v" , err )
return nil
case * ErrBadSeed :
log . Errorf ( "seed changed, will retry: %+v" , err )
return ctx . Send ( SectorRetryWaitSeed { } )
2020-04-04 01:50:05 +00:00
case * ErrInvalidProof :
if err := failedCooldown ( ctx , sector ) ; err != nil {
return err
}
if sector . InvalidProofs > 0 {
2020-06-04 15:29:31 +00:00
return ctx . Send ( SectorSealPreCommit1Failed { xerrors . Errorf ( "consecutive invalid proofs" ) } )
2020-04-04 01:50:05 +00:00
}
return ctx . Send ( SectorRetryInvalidProof { } )
2020-06-17 15:19:36 +00:00
case * ErrPrecommitOnChain :
log . Errorf ( "no precommit on chain, will retry: %+v" , err )
return ctx . Send ( SectorRetryPreCommitWait { } )
2020-08-05 01:30:58 +00:00
case * ErrNoPrecommit :
return ctx . Send ( SectorRetryPreCommit { } )
2020-08-27 11:51:13 +00:00
case * ErrInvalidDeals :
2020-08-27 19:04:43 +00:00
log . Warnf ( "invalid deals in sector %d: %v" , sector . SectorNumber , err )
2020-08-27 21:14:46 +00:00
return ctx . Send ( SectorInvalidDealIDs { Return : RetCommitFailed } )
2020-08-27 11:51:13 +00:00
case * ErrExpiredDeals :
return ctx . Send ( SectorDealsExpired { xerrors . Errorf ( "sector deals expired: %w" , err ) } )
2020-08-18 16:02:13 +00:00
case * ErrCommitWaitFailed :
if err := failedCooldown ( ctx , sector ) ; err != nil {
return err
}
return ctx . Send ( SectorRetryCommitWait { } )
2020-04-03 17:45:48 +00:00
default :
2020-08-05 01:30:58 +00:00
return xerrors . Errorf ( "checkCommit sanity check error (%T): %w" , err , err )
2020-04-03 17:45:48 +00:00
}
}
// TODO: Check sector files
if err := failedCooldown ( ctx , sector ) ; err != nil {
return err
}
return ctx . Send ( SectorRetryComputeProof { } )
}
2020-06-03 21:42:13 +00:00
func ( m * Sealing ) handleFinalizeFailed ( ctx statemachine . Context , sector SectorInfo ) error {
// TODO: Check sector files
if err := failedCooldown ( ctx , sector ) ; err != nil {
return err
}
return ctx . Send ( SectorRetryFinalize { } )
}
2020-08-27 11:51:13 +00:00
2020-08-27 21:59:01 +00:00
func ( m * Sealing ) handleRemoveFailed ( ctx statemachine . Context , sector SectorInfo ) error {
if err := failedCooldown ( ctx , sector ) ; err != nil {
return err
}
return ctx . Send ( SectorRemove { } )
}
2021-01-14 15:13:32 +00:00
func ( m * Sealing ) handleTerminateFailed ( ctx statemachine . Context , sector SectorInfo ) error {
2021-01-14 14:46:57 +00:00
// ignoring error as it's most likely an API error - `pci` will be nil, and we'll go back to
// the Terminating state after cooldown. If the API is still failing, well get back to here
// with the error in SectorInfo log.
2021-08-18 10:43:44 +00:00
pci , _ := m . Api . StateSectorPreCommitInfo ( ctx . Context ( ) , m . maddr , sector . SectorNumber , nil )
2021-01-14 14:46:57 +00:00
if pci != nil {
return nil // pause the fsm, needs manual user action
}
2021-01-14 15:13:32 +00:00
if err := failedCooldown ( ctx , sector ) ; err != nil {
2021-01-12 23:42:01 +00:00
return err
}
return ctx . Send ( SectorTerminate { } )
}
2020-08-27 11:51:13 +00:00
func ( m * Sealing ) handleDealsExpired ( ctx statemachine . Context , sector SectorInfo ) error {
// First make vary sure the sector isn't committed
2021-08-18 10:43:44 +00:00
si , err := m . Api . StateSectorGetInfo ( ctx . Context ( ) , m . maddr , sector . SectorNumber , nil )
2020-08-27 11:51:13 +00:00
if err != nil {
return xerrors . Errorf ( "getting sector info: %w" , err )
}
if si != nil {
// TODO: this should never happen, but in case it does, try to go back to
// the proving state after running some checks
return xerrors . Errorf ( "sector is committed on-chain, but we're in DealsExpired" )
}
if sector . PreCommitInfo == nil {
// TODO: Create a separate state which will remove those pieces, and go back to PC1
2020-08-27 20:41:35 +00:00
log . Errorf ( "non-precommitted sector with expired deals, can't recover from this yet" )
2020-08-27 11:51:13 +00:00
}
// Not much to do here, we can't go back in time to commit this sector
return ctx . Send ( SectorRemove { } )
}
2020-08-27 19:04:43 +00:00
2021-08-18 10:43:44 +00:00
func ( m * Sealing ) HandleRecoverDealIDs ( ctx Context , sector SectorInfo ) error {
tok , height , err := m . Api . ChainHead ( ctx . Context ( ) )
2020-08-27 19:04:43 +00:00
if err != nil {
return xerrors . Errorf ( "getting chain head: %w" , err )
}
var toFix [ ] int
2021-02-22 20:03:51 +00:00
paddingPieces := 0
2020-08-27 19:04:43 +00:00
for i , p := range sector . Pieces {
// if no deal is associated with the piece, ensure that we added it as
// filler (i.e. ensure that it has a zero PieceCID)
if p . DealInfo == nil {
exp := zerocomm . ZeroPieceCommitment ( p . Piece . Size . Unpadded ( ) )
if ! p . Piece . PieceCID . Equals ( exp ) {
return xerrors . Errorf ( "sector %d piece %d had non-zero PieceCID %+v" , sector . SectorNumber , i , p . Piece . PieceCID )
}
2021-02-22 20:03:51 +00:00
paddingPieces ++
2020-08-27 19:04:43 +00:00
continue
}
2021-08-18 10:43:44 +00:00
proposal , err := m . Api . StateMarketStorageDealProposal ( ctx . Context ( ) , p . DealInfo . DealID , tok )
2020-08-27 19:04:43 +00:00
if err != nil {
2020-08-28 09:43:56 +00:00
log . Warnf ( "getting deal %d for piece %d: %+v" , p . DealInfo . DealID , i , err )
2020-08-27 19:04:43 +00:00
toFix = append ( toFix , i )
continue
}
if proposal . Provider != m . maddr {
2020-08-28 09:43:56 +00:00
log . Warnf ( "piece %d (of %d) of sector %d refers deal %d with wrong provider: %s != %s" , i , len ( sector . Pieces ) , sector . SectorNumber , p . DealInfo . DealID , proposal . Provider , m . maddr )
2020-08-27 19:04:43 +00:00
toFix = append ( toFix , i )
continue
}
if proposal . PieceCID != p . Piece . PieceCID {
2021-07-05 10:51:15 +00:00
log . Warnf ( "piece %d (of %d) of sector %d refers deal %d with wrong PieceCID: %s != %s" , i , len ( sector . Pieces ) , sector . SectorNumber , p . DealInfo . DealID , p . Piece . PieceCID , proposal . PieceCID )
2020-08-27 19:04:43 +00:00
toFix = append ( toFix , i )
continue
}
if p . Piece . Size != proposal . PieceSize {
2020-08-28 09:43:56 +00:00
log . Warnf ( "piece %d (of %d) of sector %d refers deal %d with different size: %d != %d" , i , len ( sector . Pieces ) , sector . SectorNumber , p . DealInfo . DealID , p . Piece . Size , proposal . PieceSize )
2020-08-27 19:04:43 +00:00
toFix = append ( toFix , i )
continue
}
if height >= proposal . StartEpoch {
// TODO: check if we are in an early enough state (before precommit), try to remove the offending pieces
// (tricky as we have to 'defragment' the sector while doing that, and update piece references for retrieval)
return xerrors . Errorf ( "can't fix sector deals: piece %d (of %d) of sector %d refers expired deal %d - should start at %d, head %d" , i , len ( sector . Pieces ) , sector . SectorNumber , p . DealInfo . DealID , proposal . StartEpoch , height )
}
}
2021-02-22 20:03:51 +00:00
failed := map [ int ] error { }
2020-08-27 19:04:43 +00:00
updates := map [ int ] abi . DealID { }
for _ , i := range toFix {
p := sector . Pieces [ i ]
if p . DealInfo . PublishCid == nil {
// TODO: check if we are in an early enough state try to remove this piece
2020-11-24 11:09:48 +00:00
log . Errorf ( "can't fix sector deals: piece %d (of %d) of sector %d has nil DealInfo.PublishCid (refers to deal %d)" , i , len ( sector . Pieces ) , sector . SectorNumber , p . DealInfo . DealID )
2020-08-27 19:04:43 +00:00
// Not much to do here (and this can only happen for old spacerace sectors)
return ctx . Send ( SectorRemove { } )
}
2021-01-25 10:28:39 +00:00
var dp * market . DealProposal
if p . DealInfo . DealProposal != nil {
mdp := market . DealProposal ( * p . DealInfo . DealProposal )
dp = & mdp
2020-08-27 19:04:43 +00:00
}
2021-08-18 10:43:44 +00:00
res , err := m . DealInfo . GetCurrentDealInfo ( ctx . Context ( ) , tok , dp , * p . DealInfo . PublishCid )
2021-01-25 10:28:39 +00:00
if err != nil {
2021-02-22 20:03:51 +00:00
failed [ i ] = xerrors . Errorf ( "getting current deal info for piece %d: %w" , i , err )
2021-09-17 12:55:56 +00:00
continue
2020-08-27 19:04:43 +00:00
}
2021-08-18 10:43:44 +00:00
if res . MarketDeal . Proposal . PieceCID != p . Piece . PieceCID {
failed [ i ] = xerrors . Errorf ( "recovered piece (%d) deal in sector %d (dealid %d) has different PieceCID %s != %s" , i , sector . SectorNumber , p . DealInfo . DealID , p . Piece . PieceCID , res . MarketDeal . Proposal . PieceCID )
continue
}
2021-01-25 10:28:39 +00:00
updates [ i ] = res . DealID
2020-08-27 19:04:43 +00:00
}
2021-02-22 20:03:51 +00:00
if len ( failed ) > 0 {
var merr error
for _ , e := range failed {
merr = multierror . Append ( merr , e )
}
if len ( failed ) + paddingPieces == len ( sector . Pieces ) {
log . Errorf ( "removing sector %d: all deals expired or unrecoverable: %+v" , sector . SectorNumber , merr )
return ctx . Send ( SectorRemove { } )
}
// todo: try to remove bad pieces (hard; see the todo above)
2021-08-18 10:43:44 +00:00
// for now removing sectors is probably better than having them stuck in RecoverDealIDs
// and expire anyways
log . Errorf ( "removing sector %d: deals expired or unrecoverable: %+v" , sector . SectorNumber , merr )
return ctx . Send ( SectorRemove { } )
2021-02-22 20:03:51 +00:00
}
2020-08-27 19:04:43 +00:00
// Not much to do here, we can't go back in time to commit this sector
return ctx . Send ( SectorUpdateDealIDs { Updates : updates } )
}