lotus/storage/wdpost_run.go

918 lines
28 KiB
Go
Raw Normal View History

package storage
import (
"bytes"
"context"
2019-11-28 18:08:10 +00:00
"time"
2020-05-28 15:45:34 +00:00
"github.com/filecoin-project/go-bitfield"
"github.com/filecoin-project/specs-storage/storage"
2020-05-28 15:45:34 +00:00
"github.com/filecoin-project/go-address"
2020-09-07 03:49:10 +00:00
"github.com/filecoin-project/go-state-types/abi"
"github.com/filecoin-project/go-state-types/big"
"github.com/filecoin-project/go-state-types/crypto"
"github.com/filecoin-project/go-state-types/dline"
"github.com/filecoin-project/go-state-types/network"
2020-07-20 13:45:17 +00:00
"github.com/ipfs/go-cid"
2019-11-28 18:08:10 +00:00
"go.opencensus.io/trace"
"golang.org/x/xerrors"
2020-10-08 01:09:33 +00:00
proof2 "github.com/filecoin-project/specs-actors/v2/actors/runtime/proof"
"github.com/filecoin-project/specs-actors/v3/actors/runtime/proof"
2020-09-21 22:24:45 +00:00
"github.com/filecoin-project/lotus/api"
2020-05-26 03:43:17 +00:00
"github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/lotus/chain/actors"
2020-09-17 02:34:13 +00:00
"github.com/filecoin-project/lotus/chain/actors/builtin/miner"
2020-10-08 01:09:33 +00:00
"github.com/filecoin-project/lotus/chain/actors/policy"
"github.com/filecoin-project/lotus/chain/messagepool"
"github.com/filecoin-project/lotus/chain/types"
)
2021-05-14 20:00:13 +00:00
// recordPoStFailure records a failure in the journal.
func (s *WindowPoStScheduler) recordPoStFailure(err error, ts *types.TipSet, deadline *dline.Info) {
s.journal.RecordEvent(s.evtTypes[evtTypeWdPoStScheduler], func() interface{} {
c := evtCommon{Error: err}
if ts != nil {
c.Deadline = deadline
c.Height = ts.Height()
c.TipSet = ts.Cids()
}
return WdPoStSchedulerEvt{
evtCommon: c,
State: SchedulerStateFaulted,
}
2020-07-20 13:45:17 +00:00
})
2021-02-11 11:00:26 +00:00
log.Errorf("Got err %+v - TODO handle errors", err)
2020-04-07 19:55:34 +00:00
/*s.failLk.Lock()
2019-12-08 19:48:17 +00:00
if eps > s.failed {
s.failed = eps
}
2020-04-07 19:55:34 +00:00
s.failLk.Unlock()*/
2019-12-08 19:48:17 +00:00
}
// recordProofsEvent records a successful proofs_processed event in the
// journal, even if it was a noop (no partitions).
func (s *WindowPoStScheduler) recordProofsEvent(partitions []miner.PoStPartition, mcid cid.Cid) {
s.journal.RecordEvent(s.evtTypes[evtTypeWdPoStProofs], func() interface{} {
return &WdPoStProofsProcessedEvt{
evtCommon: s.getEvtCommon(nil),
Partitions: partitions,
MessageCID: mcid,
}
})
}
// startGeneratePoST kicks off the process of generating a PoST
func (s *WindowPoStScheduler) startGeneratePoST(
ctx context.Context,
ts *types.TipSet,
deadline *dline.Info,
completeGeneratePoST CompleteGeneratePoSTCb,
) context.CancelFunc {
ctx, abort := context.WithCancel(ctx)
go func() {
defer abort()
s.journal.RecordEvent(s.evtTypes[evtTypeWdPoStScheduler], func() interface{} {
return WdPoStSchedulerEvt{
evtCommon: s.getEvtCommon(nil),
State: SchedulerStateStarted,
}
})
posts, err := s.runGeneratePoST(ctx, ts, deadline)
completeGeneratePoST(posts, err)
}()
2020-07-20 13:45:17 +00:00
return abort
}
// runGeneratePoST generates the PoST
func (s *WindowPoStScheduler) runGeneratePoST(
ctx context.Context,
ts *types.TipSet,
deadline *dline.Info,
) ([]miner.SubmitWindowedPoStParams, error) {
ctx, span := trace.StartSpan(ctx, "WindowPoStScheduler.generatePoST")
defer span.End()
2021-05-14 20:00:13 +00:00
posts, err := s.runPoStCycle(ctx, *deadline, ts)
if err != nil {
2021-05-14 20:00:13 +00:00
log.Errorf("runPoStCycle failed: %+v", err)
return nil, err
}
if len(posts) == 0 {
s.recordProofsEvent(nil, cid.Undef)
}
return posts, nil
}
// startSubmitPoST kicks of the process of submitting PoST
func (s *WindowPoStScheduler) startSubmitPoST(
ctx context.Context,
ts *types.TipSet,
deadline *dline.Info,
posts []miner.SubmitWindowedPoStParams,
completeSubmitPoST CompleteSubmitPoSTCb,
) context.CancelFunc {
ctx, abort := context.WithCancel(ctx)
go func() {
defer abort()
err := s.runSubmitPoST(ctx, ts, deadline, posts)
if err == nil {
s.journal.RecordEvent(s.evtTypes[evtTypeWdPoStScheduler], func() interface{} {
return WdPoStSchedulerEvt{
evtCommon: s.getEvtCommon(nil),
State: SchedulerStateSucceeded,
}
})
}
completeSubmitPoST(err)
}()
return abort
}
// runSubmitPoST submits PoST
func (s *WindowPoStScheduler) runSubmitPoST(
ctx context.Context,
ts *types.TipSet,
deadline *dline.Info,
posts []miner.SubmitWindowedPoStParams,
) error {
if len(posts) == 0 {
return nil
}
ctx, span := trace.StartSpan(ctx, "WindowPoStScheduler.submitPoST")
defer span.End()
// Get randomness from tickets
// use the challenge epoch if we've upgraded to network version 4
// (actors version 2). We want to go back as far as possible to be safe.
commEpoch := deadline.Open
if ver, err := s.api.StateNetworkVersion(ctx, types.EmptyTSK); err != nil {
log.Errorw("failed to get network version to determine PoSt epoch randomness lookback", "error", err)
} else if ver >= network.Version4 {
commEpoch = deadline.Challenge
}
commRand, err := s.api.ChainGetRandomnessFromTickets(ctx, ts.Key(), crypto.DomainSeparationTag_PoStChainCommit, commEpoch, nil)
if err != nil {
err = xerrors.Errorf("failed to get chain randomness from tickets for windowPost (ts=%d; deadline=%d): %w", ts.Height(), commEpoch, err)
log.Errorf("submitPoStMessage failed: %+v", err)
return err
}
var submitErr error
for i := range posts {
// Add randomness to PoST
post := &posts[i]
post.ChainCommitEpoch = commEpoch
post.ChainCommitRand = commRand
// Submit PoST
sm, submitErr := s.submitPoStMessage(ctx, post)
if submitErr != nil {
log.Errorf("submit window post failed: %+v", submitErr)
} else {
s.recordProofsEvent(post.Partitions, sm.Cid())
}
}
2020-07-20 13:45:17 +00:00
return submitErr
}
func (s *WindowPoStScheduler) checkSectors(ctx context.Context, check bitfield.BitField, tsk types.TipSetKey) (bitfield.BitField, error) {
2020-05-16 21:50:50 +00:00
mid, err := address.IDFromAddress(s.actor)
if err != nil {
return bitfield.BitField{}, err
2020-05-16 21:50:50 +00:00
}
sectorInfos, err := s.api.StateMinerSectors(ctx, s.actor, &check, tsk)
if err != nil {
return bitfield.BitField{}, err
}
2020-05-16 21:50:50 +00:00
sectors := make(map[abi.SectorNumber]struct{})
var tocheck []storage.SectorRef
for _, info := range sectorInfos {
sectors[info.SectorNumber] = struct{}{}
tocheck = append(tocheck, storage.SectorRef{
ProofType: info.SealProof,
ID: abi.SectorID{
Miner: abi.ActorID(mid),
Number: info.SectorNumber,
},
})
}
2020-05-16 21:50:50 +00:00
2020-12-01 23:32:01 +00:00
bad, err := s.faultTracker.CheckProvable(ctx, s.proofType, tocheck, nil)
2020-05-16 21:50:50 +00:00
if err != nil {
return bitfield.BitField{}, xerrors.Errorf("checking provable sectors: %w", err)
2020-05-16 21:50:50 +00:00
}
2020-11-26 07:02:43 +00:00
for id := range bad {
delete(sectors, id.Number)
2020-05-16 21:50:50 +00:00
}
2020-05-29 19:36:04 +00:00
log.Warnw("Checked sectors", "checked", len(tocheck), "good", len(sectors))
2020-05-16 21:50:50 +00:00
sbf := bitfield.New()
for s := range sectors {
sbf.Set(uint64(s))
2020-05-16 21:50:50 +00:00
}
return sbf, nil
2020-05-29 19:36:04 +00:00
}
// declareRecoveries identifies sectors that were previously marked as faulty
// for our miner, but are now recovered (i.e. are now provable again) and
// still not reported as such.
//
// It then reports the recovery on chain via a `DeclareFaultsRecovered`
// message to our miner actor.
//
// This is always invoked ahead of time, before the deadline for the evaluated
// sectors arrives. That way, recoveries are declared in preparation for those
// sectors to be proven.
//
// If a declaration is made, it awaits for build.MessageConfidence confirmations
// on chain before returning.
//
// TODO: the waiting should happen in the background. Right now this
// is blocking/delaying the actual generation and submission of WindowPoSts in
// this deadline!
func (s *WindowPoStScheduler) declareRecoveries(ctx context.Context, dlIdx uint64, partitions []api.Partition, tsk types.TipSetKey) ([]miner.RecoveryDeclaration, *types.SignedMessage, error) {
ctx, span := trace.StartSpan(ctx, "storage.declareRecoveries")
2020-07-14 17:10:31 +00:00
defer span.End()
2020-05-29 19:36:04 +00:00
2020-07-20 13:45:17 +00:00
faulty := uint64(0)
2020-09-14 21:40:52 +00:00
params := &miner.DeclareFaultsRecoveredParams{
Recoveries: []miner.RecoveryDeclaration{},
2020-05-29 19:36:04 +00:00
}
2020-07-14 17:10:31 +00:00
for partIdx, partition := range partitions {
unrecovered, err := bitfield.SubtractBitField(partition.FaultySectors, partition.RecoveringSectors)
2020-07-14 17:10:31 +00:00
if err != nil {
2020-09-02 18:15:25 +00:00
return nil, nil, xerrors.Errorf("subtracting recovered set from fault set: %w", err)
2020-07-14 17:10:31 +00:00
}
2020-05-29 19:36:04 +00:00
2020-07-14 17:10:31 +00:00
uc, err := unrecovered.Count()
if err != nil {
2020-09-02 18:15:25 +00:00
return nil, nil, xerrors.Errorf("counting unrecovered sectors: %w", err)
2020-07-14 17:10:31 +00:00
}
2020-05-29 19:36:04 +00:00
2020-07-14 17:10:31 +00:00
if uc == 0 {
continue
}
2020-07-14 17:10:31 +00:00
faulty += uc
2020-05-29 19:36:04 +00:00
recovered, err := s.checkSectors(ctx, unrecovered, tsk)
2020-07-14 17:10:31 +00:00
if err != nil {
2020-09-02 18:15:25 +00:00
return nil, nil, xerrors.Errorf("checking unrecovered sectors: %w", err)
2020-07-14 17:10:31 +00:00
}
2020-05-29 19:36:04 +00:00
2020-07-14 17:10:31 +00:00
// if all sectors failed to recover, don't declare recoveries
recoveredCount, err := recovered.Count()
if err != nil {
2020-09-02 18:15:25 +00:00
return nil, nil, xerrors.Errorf("counting recovered sectors: %w", err)
2020-07-14 17:10:31 +00:00
}
2020-07-08 12:35:53 +00:00
2020-07-14 17:10:31 +00:00
if recoveredCount == 0 {
continue
}
2020-07-08 12:35:53 +00:00
2020-09-14 21:40:52 +00:00
params.Recoveries = append(params.Recoveries, miner.RecoveryDeclaration{
2020-07-14 17:10:31 +00:00
Deadline: dlIdx,
Partition: uint64(partIdx),
Sectors: recovered,
})
}
2020-05-29 19:36:04 +00:00
2020-09-02 18:15:25 +00:00
recoveries := params.Recoveries
if len(recoveries) == 0 {
2020-07-14 17:10:31 +00:00
if faulty != 0 {
log.Warnw("No recoveries to declare", "deadline", dlIdx, "faulty", faulty)
}
2020-09-02 18:15:25 +00:00
return recoveries, nil, nil
2020-05-16 21:50:50 +00:00
}
2020-07-14 17:10:37 +00:00
enc, aerr := actors.SerializeParams(params)
2020-05-16 21:50:50 +00:00
if aerr != nil {
2020-09-02 18:15:25 +00:00
return recoveries, nil, xerrors.Errorf("could not serialize declare recoveries parameters: %w", aerr)
2020-05-16 21:50:50 +00:00
}
msg := &types.Message{
To: s.actor,
Method: miner.Methods.DeclareFaultsRecovered,
Params: enc,
Value: types.NewInt(0),
2020-05-16 21:50:50 +00:00
}
2020-08-19 21:25:58 +00:00
spec := &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)}
2021-05-14 18:49:05 +00:00
if err := s.prepareMessage(ctx, msg, spec); err != nil {
2020-10-21 03:35:18 +00:00
return recoveries, nil, err
}
2020-05-16 21:50:50 +00:00
sm, err := s.api.MpoolPushMessage(ctx, msg, &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)})
2020-05-16 21:50:50 +00:00
if err != nil {
2020-09-02 18:15:25 +00:00
return recoveries, sm, xerrors.Errorf("pushing message to mpool: %w", err)
2020-05-16 21:50:50 +00:00
}
log.Warnw("declare faults recovered Message CID", "cid", sm.Cid())
2021-04-05 17:56:53 +00:00
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence, api.LookbackNoLimit, true)
2020-05-16 21:50:50 +00:00
if err != nil {
2020-09-02 18:15:25 +00:00
return recoveries, sm, xerrors.Errorf("declare faults recovered wait error: %w", err)
2020-05-16 21:50:50 +00:00
}
2020-05-28 15:45:34 +00:00
if rec.Receipt.ExitCode != 0 {
2020-09-02 18:15:25 +00:00
return recoveries, sm, xerrors.Errorf("declare faults recovered wait non-0 exit code: %d", rec.Receipt.ExitCode)
2020-05-16 21:50:50 +00:00
}
2020-09-02 18:15:25 +00:00
return recoveries, sm, nil
2020-05-16 21:50:50 +00:00
}
// declareFaults identifies the sectors on the specified proving deadline that
// are faulty, and reports the faults on chain via the `DeclareFaults` message
// to our miner actor.
//
// This is always invoked ahead of time, before the deadline for the evaluated
// sectors arrives. That way, faults are declared before a penalty is accrued.
//
// If a declaration is made, it awaits for build.MessageConfidence confirmations
// on chain before returning.
//
// TODO: the waiting should happen in the background. Right now this
// is blocking/delaying the actual generation and submission of WindowPoSts in
// this deadline!
func (s *WindowPoStScheduler) declareFaults(ctx context.Context, dlIdx uint64, partitions []api.Partition, tsk types.TipSetKey) ([]miner.FaultDeclaration, *types.SignedMessage, error) {
ctx, span := trace.StartSpan(ctx, "storage.declareFaults")
2020-07-14 17:10:31 +00:00
defer span.End()
2020-06-01 12:49:48 +00:00
2020-07-20 13:45:17 +00:00
bad := uint64(0)
2020-09-14 21:40:52 +00:00
params := &miner.DeclareFaultsParams{
Faults: []miner.FaultDeclaration{},
2020-05-29 19:36:04 +00:00
}
2020-07-14 17:10:31 +00:00
for partIdx, partition := range partitions {
nonFaulty, err := bitfield.SubtractBitField(partition.LiveSectors, partition.FaultySectors)
if err != nil {
return nil, nil, xerrors.Errorf("determining non faulty sectors: %w", err)
}
good, err := s.checkSectors(ctx, nonFaulty, tsk)
2020-07-14 17:10:31 +00:00
if err != nil {
2020-09-02 18:15:25 +00:00
return nil, nil, xerrors.Errorf("checking sectors: %w", err)
2020-07-14 17:10:31 +00:00
}
newFaulty, err := bitfield.SubtractBitField(nonFaulty, good)
2020-07-14 17:10:31 +00:00
if err != nil {
2020-09-02 18:15:25 +00:00
return nil, nil, xerrors.Errorf("calculating faulty sector set: %w", err)
2020-07-14 17:10:31 +00:00
}
c, err := newFaulty.Count()
2020-07-14 17:10:31 +00:00
if err != nil {
2020-09-02 18:15:25 +00:00
return nil, nil, xerrors.Errorf("counting faulty sectors: %w", err)
2020-07-14 17:10:31 +00:00
}
if c == 0 {
continue
}
bad += c
2020-09-14 21:40:52 +00:00
params.Faults = append(params.Faults, miner.FaultDeclaration{
2020-07-14 17:10:31 +00:00
Deadline: dlIdx,
Partition: uint64(partIdx),
Sectors: newFaulty,
2020-07-14 17:10:31 +00:00
})
2020-05-29 19:36:04 +00:00
}
2019-12-17 22:23:43 +00:00
2020-09-02 18:15:25 +00:00
faults := params.Faults
if len(faults) == 0 {
return faults, nil, nil
2020-05-29 19:36:04 +00:00
}
2019-12-17 22:23:43 +00:00
2020-07-14 17:10:31 +00:00
log.Errorw("DETECTED FAULTY SECTORS, declaring faults", "count", bad)
2020-05-29 19:36:04 +00:00
enc, aerr := actors.SerializeParams(params)
if aerr != nil {
2020-09-02 18:15:25 +00:00
return faults, nil, xerrors.Errorf("could not serialize declare faults parameters: %w", aerr)
2020-05-29 19:36:04 +00:00
}
msg := &types.Message{
To: s.actor,
Method: miner.Methods.DeclareFaults,
Params: enc,
Value: types.NewInt(0), // TODO: Is there a fee?
2020-05-29 19:36:04 +00:00
}
2020-08-19 21:25:58 +00:00
spec := &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)}
2021-05-14 18:49:05 +00:00
if err := s.prepareMessage(ctx, msg, spec); err != nil {
2020-10-21 03:35:18 +00:00
return faults, nil, err
}
2020-05-29 19:36:04 +00:00
2020-08-19 21:25:58 +00:00
sm, err := s.api.MpoolPushMessage(ctx, msg, spec)
2020-05-29 19:36:04 +00:00
if err != nil {
2020-09-02 18:15:25 +00:00
return faults, sm, xerrors.Errorf("pushing message to mpool: %w", err)
2020-05-29 19:36:04 +00:00
}
log.Warnw("declare faults Message CID", "cid", sm.Cid())
2021-04-05 17:56:53 +00:00
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence, api.LookbackNoLimit, true)
2020-05-29 19:36:04 +00:00
if err != nil {
2020-09-02 18:15:25 +00:00
return faults, sm, xerrors.Errorf("declare faults wait error: %w", err)
2020-05-29 19:36:04 +00:00
}
if rec.Receipt.ExitCode != 0 {
2020-09-02 18:15:25 +00:00
return faults, sm, xerrors.Errorf("declare faults wait non-0 exit code: %d", rec.Receipt.ExitCode)
2020-05-29 19:36:04 +00:00
}
2019-12-17 22:23:43 +00:00
2020-09-02 18:15:25 +00:00
return faults, sm, nil
2019-12-17 22:23:43 +00:00
}
2021-05-14 20:00:13 +00:00
// runPoStCycle runs a full cycle of the PoSt process:
2021-05-14 18:45:47 +00:00
//
// 1. performs recovery declarations for the next deadline.
// 2. performs fault declarations for the next deadline.
// 3. computes and submits proofs, batching partitions and making sure they
// don't exceed message capacity.
2021-05-14 20:00:13 +00:00
func (s *WindowPoStScheduler) runPoStCycle(ctx context.Context, di dline.Info, ts *types.TipSet) ([]miner.SubmitWindowedPoStParams, error) {
ctx, span := trace.StartSpan(ctx, "storage.runPoStCycle")
defer span.End()
2020-07-14 17:10:37 +00:00
go func() {
2021-05-14 20:00:13 +00:00
// TODO: extract from runPoStCycle, run on fault cutoff boundaries
2020-08-03 16:56:59 +00:00
2020-05-29 19:36:04 +00:00
// check faults / recoveries for the *next* deadline. It's already too
// late to declare them for this deadline
declDeadline := (di.Index + 2) % di.WPoStPeriodDeadlines
2020-05-29 19:36:04 +00:00
partitions, err := s.api.StateMinerPartitions(context.TODO(), s.actor, declDeadline, ts.Key())
if err != nil {
log.Errorf("getting partitions: %v", err)
2020-07-14 17:10:31 +00:00
return
}
2020-09-02 18:15:25 +00:00
var (
sigmsg *types.SignedMessage
recoveries []miner.RecoveryDeclaration
faults []miner.FaultDeclaration
// optionalCid returns the CID of the message, or cid.Undef is the
// message is nil. We don't need the argument (could capture the
// pointer), but it's clearer and purer like that.
optionalCid = func(sigmsg *types.SignedMessage) cid.Cid {
if sigmsg == nil {
return cid.Undef
}
return sigmsg.Cid()
}
)
if recoveries, sigmsg, err = s.declareRecoveries(context.TODO(), declDeadline, partitions, ts.Key()); err != nil {
2020-05-29 19:36:04 +00:00
// TODO: This is potentially quite bad, but not even trying to post when this fails is objectively worse
log.Errorf("checking sector recoveries: %v", err)
}
s.journal.RecordEvent(s.evtTypes[evtTypeWdPoStRecoveries], func() interface{} {
j := WdPoStRecoveriesProcessedEvt{
evtCommon: s.getEvtCommon(err),
Declarations: recoveries,
MessageCID: optionalCid(sigmsg),
}
j.Error = err
return j
2020-09-02 18:15:25 +00:00
})
2020-09-24 21:30:11 +00:00
if ts.Height() > build.UpgradeIgnitionHeight {
return // FORK: declaring faults after ignition upgrade makes no sense
}
if faults, sigmsg, err = s.declareFaults(context.TODO(), declDeadline, partitions, ts.Key()); err != nil {
2020-05-29 19:36:04 +00:00
// TODO: This is also potentially really bad, but we try to post anyways
log.Errorf("checking sector faults: %v", err)
}
2020-09-02 18:15:25 +00:00
s.journal.RecordEvent(s.evtTypes[evtTypeWdPoStFaults], func() interface{} {
return WdPoStFaultsProcessedEvt{
evtCommon: s.getEvtCommon(err),
Declarations: faults,
MessageCID: optionalCid(sigmsg),
}
2020-09-02 18:15:25 +00:00
})
2020-07-14 17:10:31 +00:00
}()
2020-05-16 21:50:50 +00:00
buf := new(bytes.Buffer)
if err := s.actor.MarshalCBOR(buf); err != nil {
return nil, xerrors.Errorf("failed to marshal address to cbor: %w", err)
}
headTs, err := s.api.ChainHead(ctx)
if err != nil {
return nil, xerrors.Errorf("getting current head: %w", err)
}
rand, err := s.api.ChainGetRandomnessFromBeacon(ctx, headTs.Key(), crypto.DomainSeparationTag_WindowedPoStChallengeSeed, di.Challenge, buf.Bytes())
2020-08-11 23:58:35 +00:00
if err != nil {
return nil, xerrors.Errorf("failed to get chain randomness from beacon for window post (ts=%d; deadline=%d): %w", ts.Height(), di, err)
2020-08-11 23:58:35 +00:00
}
// Get the partitions for the given deadline
partitions, err := s.api.StateMinerPartitions(ctx, s.actor, di.Index, ts.Key())
if err != nil {
return nil, xerrors.Errorf("getting partitions: %w", err)
}
nv, err := s.api.StateNetworkVersion(ctx, ts.Key())
if err != nil {
return nil, xerrors.Errorf("getting network version: %w", err)
}
// Split partitions into batches, so as not to exceed the number of sectors
// allowed in a single message
partitionBatches, err := s.batchPartitions(partitions, nv)
if err != nil {
return nil, err
}
// Generate proofs in batches
posts := make([]miner.SubmitWindowedPoStParams, 0, len(partitionBatches))
for batchIdx, batch := range partitionBatches {
batchPartitionStartIdx := 0
for _, batch := range partitionBatches[:batchIdx] {
batchPartitionStartIdx += len(batch)
}
params := miner.SubmitWindowedPoStParams{
Deadline: di.Index,
Partitions: make([]miner.PoStPartition, 0, len(batch)),
Proofs: nil,
}
postSkipped := bitfield.New()
somethingToProve := false
2020-04-21 17:22:53 +00:00
// Retry until we run out of sectors to prove.
for retries := 0; ; retries++ {
2021-07-29 11:51:25 +00:00
skipCount := uint64(0)
var partitions []miner.PoStPartition
2020-10-08 01:09:33 +00:00
var sinfos []proof2.SectorInfo
for partIdx, partition := range batch {
// TODO: Can do this in parallel
toProve, err := bitfield.SubtractBitField(partition.LiveSectors, partition.FaultySectors)
if err != nil {
return nil, xerrors.Errorf("removing faults from set of sectors to prove: %w", err)
}
toProve, err = bitfield.MergeBitFields(toProve, partition.RecoveringSectors)
if err != nil {
return nil, xerrors.Errorf("adding recoveries to set of sectors to prove: %w", err)
}
good, err := s.checkSectors(ctx, toProve, ts.Key())
if err != nil {
return nil, xerrors.Errorf("checking sectors to skip: %w", err)
}
good, err = bitfield.SubtractBitField(good, postSkipped)
if err != nil {
return nil, xerrors.Errorf("toProve - postSkipped: %w", err)
}
skipped, err := bitfield.SubtractBitField(toProve, good)
if err != nil {
return nil, xerrors.Errorf("toProve - good: %w", err)
}
2020-05-29 19:36:04 +00:00
sc, err := skipped.Count()
if err != nil {
return nil, xerrors.Errorf("getting skipped sector count: %w", err)
}
2020-05-29 19:36:04 +00:00
skipCount += sc
2020-09-17 02:34:13 +00:00
ssi, err := s.sectorsForProof(ctx, good, partition.AllSectors, ts)
if err != nil {
return nil, xerrors.Errorf("getting sorted sector info: %w", err)
}
if len(ssi) == 0 {
continue
}
sinfos = append(sinfos, ssi...)
2020-09-18 01:00:31 +00:00
partitions = append(partitions, miner.PoStPartition{
Index: uint64(batchPartitionStartIdx + partIdx),
Skipped: skipped,
})
}
if len(sinfos) == 0 {
// nothing to prove for this batch
break
}
// Generate proof
log.Infow("running window post",
"chain-random", rand,
"deadline", di,
"height", ts.Height(),
"skipped", skipCount)
2020-05-29 19:36:04 +00:00
tsStart := build.Clock.Now()
2020-07-22 09:02:46 +00:00
mid, err := address.IDFromAddress(s.actor)
if err != nil {
return nil, err
}
2021-03-11 07:10:19 +00:00
postOut, ps, err := s.prover.GenerateWindowPoSt(ctx, abi.ActorID(mid), sinfos, append(abi.PoStRandomness{}, rand...))
elapsed := time.Since(tsStart)
2020-05-29 19:36:04 +00:00
log.Infow("computing window post", "batch", batchIdx, "elapsed", elapsed)
2020-07-22 09:02:46 +00:00
if err == nil {
// If we proved nothing, something is very wrong.
if len(postOut) == 0 {
return nil, xerrors.Errorf("received no proofs back from generate window post")
}
headTs, err := s.api.ChainHead(ctx)
if err != nil {
return nil, xerrors.Errorf("getting current head: %w", err)
}
checkRand, err := s.api.ChainGetRandomnessFromBeacon(ctx, headTs.Key(), crypto.DomainSeparationTag_WindowedPoStChallengeSeed, di.Challenge, buf.Bytes())
2021-03-10 08:19:38 +00:00
if err != nil {
return nil, xerrors.Errorf("failed to get chain randomness from beacon for window post (ts=%d; deadline=%d): %w", ts.Height(), di, err)
}
if !bytes.Equal(checkRand, rand) {
log.Warnw("windowpost randomness changed", "old", rand, "new", checkRand, "ts-height", ts.Height(), "challenge-height", di.Challenge, "tsk", ts.Key())
rand = checkRand
continue
2021-03-10 08:19:38 +00:00
}
// If we generated an incorrect proof, try again.
if correct, err := s.verifier.VerifyWindowPoSt(ctx, proof.WindowPoStVerifyInfo{
2021-03-10 08:19:38 +00:00
Randomness: abi.PoStRandomness(checkRand),
Proofs: postOut,
ChallengedSectors: sinfos,
Prover: abi.ActorID(mid),
}); err != nil {
log.Errorw("window post verification failed", "post", postOut, "error", err)
time.Sleep(5 * time.Second)
continue
} else if !correct {
log.Errorw("generated incorrect window post proof", "post", postOut, "error", err)
continue
}
// Proof generation successful, stop retrying
somethingToProve = true
params.Partitions = partitions
params.Proofs = postOut
break
}
// Proof generation failed, so retry
2020-07-14 17:10:31 +00:00
if len(ps) == 0 {
// If we didn't skip any new sectors, we failed
// for some other reason and we need to abort.
return nil, xerrors.Errorf("running window post failed: %w", err)
}
// TODO: maybe mark these as faulty somewhere?
log.Warnw("generate window post skipped sectors", "sectors", ps, "error", err, "try", retries)
2020-07-17 14:47:16 +00:00
// Explicitly make sure we haven't aborted this PoSt
// (GenerateWindowPoSt may or may not check this).
// Otherwise, we could try to continue proving a
// deadline after the deadline has ended.
if ctx.Err() != nil {
log.Warnw("aborting PoSt due to context cancellation", "error", ctx.Err(), "deadline", di.Index)
return nil, ctx.Err()
}
for _, sector := range ps {
postSkipped.Set(uint64(sector.Number))
}
}
// Nothing to prove for this batch, try the next batch
if !somethingToProve {
continue
}
posts = append(posts, params)
}
2020-07-20 17:21:10 +00:00
return posts, nil
}
func (s *WindowPoStScheduler) batchPartitions(partitions []api.Partition, nv network.Version) ([][]api.Partition, error) {
// We don't want to exceed the number of sectors allowed in a message.
// So given the number of sectors in a partition, work out the number of
// partitions that can be in a message without exceeding sectors per
// message:
// floor(number of sectors allowed in a message / sectors per partition)
// eg:
// max sectors per message 7: ooooooo
// sectors per partition 3: ooo
// partitions per message 2: oooOOO
// <1><2> (3rd doesn't fit)
partitionsPerMsg, err := policy.GetMaxPoStPartitions(nv, s.proofType)
2020-10-08 01:09:33 +00:00
if err != nil {
return nil, xerrors.Errorf("getting sectors per partition: %w", err)
}
// Also respect the AddressedPartitionsMax (which is the same as DeclarationsMax (which is all really just MaxPartitionsPerDeadline))
2021-08-10 17:07:30 +00:00
declMax, err := policy.GetDeclarationsMax(nv)
if err != nil {
return nil, xerrors.Errorf("getting max declarations: %w", err)
}
if partitionsPerMsg > declMax {
partitionsPerMsg = declMax
}
// The number of messages will be:
// ceiling(number of partitions / partitions per message)
batchCount := len(partitions) / partitionsPerMsg
if len(partitions)%partitionsPerMsg != 0 {
batchCount++
}
// Split the partitions into batches
batches := make([][]api.Partition, 0, batchCount)
for i := 0; i < len(partitions); i += partitionsPerMsg {
end := i + partitionsPerMsg
if end > len(partitions) {
end = len(partitions)
}
batches = append(batches, partitions[i:end])
}
return batches, nil
}
2020-10-08 01:09:33 +00:00
func (s *WindowPoStScheduler) sectorsForProof(ctx context.Context, goodSectors, allSectors bitfield.BitField, ts *types.TipSet) ([]proof2.SectorInfo, error) {
sset, err := s.api.StateMinerSectors(ctx, s.actor, &goodSectors, ts.Key())
if err != nil {
2020-04-07 19:55:34 +00:00
return nil, err
}
if len(sset) == 0 {
return nil, nil
}
2020-10-08 01:09:33 +00:00
substitute := proof2.SectorInfo{
SectorNumber: sset[0].SectorNumber,
SealedCID: sset[0].SealedCID,
SealProof: sset[0].SealProof,
}
2020-10-08 01:09:33 +00:00
sectorByID := make(map[uint64]proof2.SectorInfo, len(sset))
for _, sector := range sset {
2020-10-08 01:09:33 +00:00
sectorByID[uint64(sector.SectorNumber)] = proof2.SectorInfo{
SectorNumber: sector.SectorNumber,
SealedCID: sector.SealedCID,
SealProof: sector.SealProof,
}
}
2020-10-08 01:09:33 +00:00
proofSectors := make([]proof2.SectorInfo, 0, len(sset))
if err := allSectors.ForEach(func(sectorNo uint64) error {
if info, found := sectorByID[sectorNo]; found {
proofSectors = append(proofSectors, info)
} else {
proofSectors = append(proofSectors, substitute)
}
return nil
}); err != nil {
return nil, xerrors.Errorf("iterating partition sector bitmap: %w", err)
}
return proofSectors, nil
}
// submitPoStMessage builds a SubmitWindowedPoSt message and submits it to
// the mpool. It doesn't synchronously block on confirmations, but it does
// monitor in the background simply for the purposes of logging.
func (s *WindowPoStScheduler) submitPoStMessage(ctx context.Context, proof *miner.SubmitWindowedPoStParams) (*types.SignedMessage, error) {
ctx, span := trace.StartSpan(ctx, "storage.commitPost")
defer span.End()
2020-07-20 13:45:17 +00:00
var sm *types.SignedMessage
enc, aerr := actors.SerializeParams(proof)
if aerr != nil {
return nil, xerrors.Errorf("could not serialize submit window post parameters: %w", aerr)
}
msg := &types.Message{
To: s.actor,
Method: miner.Methods.SubmitWindowedPoSt,
Params: enc,
Value: types.NewInt(0),
}
2020-08-19 21:25:58 +00:00
spec := &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)}
2021-05-14 18:49:05 +00:00
if err := s.prepareMessage(ctx, msg, spec); err != nil {
2020-10-21 03:35:18 +00:00
return nil, err
}
2020-08-19 21:25:58 +00:00
sm, err := s.api.MpoolPushMessage(ctx, msg, spec)
if err != nil {
return nil, xerrors.Errorf("pushing message to mpool: %w", err)
}
2020-04-21 17:22:53 +00:00
log.Infof("Submitted window post: %s", sm.Cid())
2020-01-29 22:47:28 +00:00
go func() {
2021-04-05 17:56:53 +00:00
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence, api.LookbackNoLimit, true)
2020-01-29 22:47:28 +00:00
if err != nil {
log.Error(err)
return
}
if rec.Receipt.ExitCode == 0 {
return
}
2020-04-21 17:22:53 +00:00
log.Errorf("Submitting window post %s failed: exit %d", sm.Cid(), rec.Receipt.ExitCode)
2020-01-29 22:47:28 +00:00
}()
return sm, nil
}
2021-05-14 18:49:05 +00:00
// prepareMessage prepares a message before sending it, setting:
//
// * the sender (from the AddressSelector, falling back to the worker address if none set)
// * the right gas parameters
func (s *WindowPoStScheduler) prepareMessage(ctx context.Context, msg *types.Message, spec *api.MessageSendSpec) error {
mi, err := s.api.StateMinerInfo(ctx, s.actor, types.EmptyTSK)
if err != nil {
2020-10-21 03:35:18 +00:00
return xerrors.Errorf("error getting miner info: %w", err)
}
2021-05-14 18:49:05 +00:00
// set the worker as a fallback
2020-10-21 03:35:18 +00:00
msg.From = mi.Worker
2021-05-14 18:49:05 +00:00
// (optimal) initial estimation with some overestimation that guarantees
// block inclusion within the next 20 tipsets.
2020-08-19 21:25:58 +00:00
gm, err := s.api.GasEstimateMessageGas(ctx, msg, spec, types.EmptyTSK)
if err != nil {
log.Errorw("estimating gas", "error", err)
2020-10-21 03:35:18 +00:00
return nil
2020-08-19 21:25:58 +00:00
}
*msg = *gm
2021-05-14 18:45:47 +00:00
// calculate a more frugal estimation; premium is estimated to guarantee
// inclusion within 5 tipsets, and fee cap is estimated for inclusion
// within 4 tipsets.
minGasFeeMsg := *msg
2021-04-29 04:56:16 +00:00
minGasFeeMsg.GasPremium, err = s.api.GasEstimateGasPremium(ctx, 5, msg.From, msg.GasLimit, types.EmptyTSK)
if err != nil {
log.Errorf("failed to estimate minimum gas premium: %+v", err)
minGasFeeMsg.GasPremium = msg.GasPremium
}
minGasFeeMsg.GasFeeCap, err = s.api.GasEstimateFeeCap(ctx, &minGasFeeMsg, 4, types.EmptyTSK)
if err != nil {
log.Errorf("failed to estimate minimum gas fee cap: %+v", err)
minGasFeeMsg.GasFeeCap = msg.GasFeeCap
}
2021-05-14 18:45:47 +00:00
// goodFunds = funds needed for optimal inclusion probability.
// minFunds = funds needed for more speculative inclusion probability.
goodFunds := big.Add(msg.RequiredFunds(), msg.Value)
minFunds := big.Min(big.Add(minGasFeeMsg.RequiredFunds(), minGasFeeMsg.Value), goodFunds)
2020-12-02 20:47:45 +00:00
pa, avail, err := s.addrSel.AddressFor(ctx, s.api, mi, api.PoStAddr, goodFunds, minFunds)
if err != nil {
log.Errorw("error selecting address for window post", "error", err)
2020-10-21 03:35:18 +00:00
return nil
}
msg.From = pa
bestReq := big.Add(msg.RequiredFunds(), msg.Value)
if avail.LessThan(bestReq) {
2020-11-19 17:30:53 +00:00
mff := func() (abi.TokenAmount, error) {
return msg.RequiredFunds(), nil
}
messagepool.CapGasFee(mff, msg, &api.MessageSendSpec{MaxFee: big.Min(big.Sub(avail, msg.Value), msg.RequiredFunds())})
}
2020-10-21 03:35:18 +00:00
return nil
2020-08-19 23:26:13 +00:00
}