lotus/storage/wdpost_run.go

491 lines
13 KiB
Go
Raw Normal View History

package storage
import (
"bytes"
"context"
2020-04-22 19:16:19 +00:00
"errors"
2019-11-28 18:08:10 +00:00
"time"
2020-05-28 15:45:34 +00:00
"github.com/filecoin-project/go-bitfield"
"github.com/filecoin-project/go-address"
2020-02-08 02:18:32 +00:00
"github.com/filecoin-project/specs-actors/actors/abi"
2020-02-12 22:12:11 +00:00
"github.com/filecoin-project/specs-actors/actors/builtin"
"github.com/filecoin-project/specs-actors/actors/builtin/miner"
"github.com/filecoin-project/specs-actors/actors/crypto"
2019-11-28 18:08:10 +00:00
"go.opencensus.io/trace"
"golang.org/x/xerrors"
2020-05-26 03:43:17 +00:00
"github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/lotus/chain/actors"
"github.com/filecoin-project/lotus/chain/types"
)
2020-04-22 19:16:19 +00:00
var errNoPartitions = errors.New("no partitions")
2020-04-20 17:34:08 +00:00
func (s *WindowPoStScheduler) failPost(deadline *miner.DeadlineInfo) {
2020-04-07 19:55:34 +00:00
log.Errorf("TODO")
/*s.failLk.Lock()
2019-12-08 19:48:17 +00:00
if eps > s.failed {
s.failed = eps
}
2020-04-07 19:55:34 +00:00
s.failLk.Unlock()*/
2019-12-08 19:48:17 +00:00
}
2020-04-20 17:34:08 +00:00
func (s *WindowPoStScheduler) doPost(ctx context.Context, deadline *miner.DeadlineInfo, ts *types.TipSet) {
ctx, abort := context.WithCancel(ctx)
s.abort = abort
2020-04-07 19:55:34 +00:00
s.activeDeadline = deadline
go func() {
defer abort()
2020-04-07 17:41:41 +00:00
ctx, span := trace.StartSpan(ctx, "WindowPoStScheduler.doPost")
defer span.End()
2020-04-10 21:07:18 +00:00
proof, err := s.runPost(ctx, *deadline, ts)
2020-04-22 19:16:19 +00:00
switch err {
case errNoPartitions:
return
2020-04-22 19:16:19 +00:00
case nil:
if err := s.submitPost(ctx, proof); err != nil {
log.Errorf("submitPost failed: %+v", err)
s.failPost(deadline)
return
}
default:
log.Errorf("runPost failed: %+v", err)
2020-04-07 19:55:34 +00:00
s.failPost(deadline)
return
}
}()
}
func (s *WindowPoStScheduler) checkSectors(ctx context.Context, check abi.BitField) (abi.BitField, error) {
2020-05-16 21:50:50 +00:00
spt, err := s.proofType.RegisteredSealProof()
if err != nil {
return bitfield.BitField{}, xerrors.Errorf("getting seal proof type: %w", err)
2020-05-16 21:50:50 +00:00
}
mid, err := address.IDFromAddress(s.actor)
if err != nil {
return bitfield.BitField{}, err
2020-05-16 21:50:50 +00:00
}
2020-05-17 01:36:22 +00:00
sectors := make(map[abi.SectorID]struct{})
2020-05-16 21:50:50 +00:00
var tocheck []abi.SectorID
2020-05-29 19:36:04 +00:00
err = check.ForEach(func(snum uint64) error {
2020-05-16 21:50:50 +00:00
s := abi.SectorID{
Miner: abi.ActorID(mid),
Number: abi.SectorNumber(snum),
}
tocheck = append(tocheck, s)
sectors[s] = struct{}{}
return nil
})
if err != nil {
return bitfield.BitField{}, xerrors.Errorf("iterating over bitfield: %w", err)
2020-05-16 21:50:50 +00:00
}
bad, err := s.faultTracker.CheckProvable(ctx, spt, tocheck)
if err != nil {
return bitfield.BitField{}, xerrors.Errorf("checking provable sectors: %w", err)
2020-05-16 21:50:50 +00:00
}
for _, id := range bad {
delete(sectors, id)
}
2020-05-29 19:36:04 +00:00
log.Warnw("Checked sectors", "checked", len(tocheck), "good", len(sectors))
2020-05-16 21:50:50 +00:00
sbf := bitfield.New()
for s := range sectors {
sbf.Set(uint64(s.Number))
2020-05-16 21:50:50 +00:00
}
return sbf, nil
2020-05-29 19:36:04 +00:00
}
2020-07-14 17:10:31 +00:00
func (s *WindowPoStScheduler) checkNextRecoveries(ctx context.Context, dlIdx uint64, partitions []*miner.Partition) error {
ctx, span := trace.StartSpan(ctx, "storage.checkNextRecoveries")
defer span.End()
2020-05-29 19:36:04 +00:00
2020-07-14 17:10:31 +00:00
params := &miner.DeclareFaultsRecoveredParams{
Recoveries: []miner.RecoveryDeclaration{},
2020-05-29 19:36:04 +00:00
}
2020-07-14 17:10:31 +00:00
faulty := uint64(0)
2020-05-29 19:36:04 +00:00
2020-07-14 17:10:31 +00:00
for partIdx, partition := range partitions {
unrecovered, err := bitfield.SubtractBitField(partition.Faults, partition.Recoveries)
if err != nil {
return xerrors.Errorf("subtracting recovered set from fault set: %w", err)
}
2020-05-29 19:36:04 +00:00
2020-07-14 17:10:31 +00:00
uc, err := unrecovered.Count()
if err != nil {
return xerrors.Errorf("counting unrecovered sectors: %w", err)
}
2020-05-29 19:36:04 +00:00
2020-07-14 17:10:31 +00:00
if uc == 0 {
continue
}
2020-07-14 17:10:31 +00:00
faulty += uc
2020-05-29 19:36:04 +00:00
2020-07-14 17:10:31 +00:00
recovered, err := s.checkSectors(ctx, unrecovered)
if err != nil {
return xerrors.Errorf("checking unrecovered sectors: %w", err)
}
2020-05-29 19:36:04 +00:00
2020-07-14 17:10:31 +00:00
// if all sectors failed to recover, don't declare recoveries
recoveredCount, err := recovered.Count()
if err != nil {
return xerrors.Errorf("counting recovered sectors: %w", err)
}
2020-07-08 12:35:53 +00:00
2020-07-14 17:10:31 +00:00
if recoveredCount == 0 {
continue
}
2020-07-08 12:35:53 +00:00
2020-07-14 17:10:31 +00:00
params.Recoveries = append(params.Recoveries, miner.RecoveryDeclaration{
Deadline: dlIdx,
Partition: uint64(partIdx),
Sectors: recovered,
})
}
2020-05-29 19:36:04 +00:00
2020-07-14 17:10:31 +00:00
if len(params.Recoveries) == 0 {
if faulty != 0 {
log.Warnw("No recoveries to declare", "deadline", dlIdx, "faulty", faulty)
}
return nil
2020-05-16 21:50:50 +00:00
}
2020-07-14 17:10:37 +00:00
enc, aerr := actors.SerializeParams(params)
2020-05-16 21:50:50 +00:00
if aerr != nil {
return xerrors.Errorf("could not serialize declare recoveries parameters: %w", aerr)
}
msg := &types.Message{
To: s.actor,
From: s.worker,
Method: builtin.MethodsMiner.DeclareFaultsRecovered,
Params: enc,
Value: types.NewInt(0),
2020-05-16 21:50:50 +00:00
}
sm, err := s.api.MpoolPushMessage(ctx, msg)
if err != nil {
return xerrors.Errorf("pushing message to mpool: %w", err)
}
log.Warnw("declare faults recovered Message CID", "cid", sm.Cid())
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence)
2020-05-16 21:50:50 +00:00
if err != nil {
return xerrors.Errorf("declare faults recovered wait error: %w", err)
}
2020-05-28 15:45:34 +00:00
if rec.Receipt.ExitCode != 0 {
2020-05-16 21:50:50 +00:00
return xerrors.Errorf("declare faults recovered wait non-0 exit code: %d", rec.Receipt.ExitCode)
}
return nil
}
2020-07-14 17:10:31 +00:00
func (s *WindowPoStScheduler) checkNextFaults(ctx context.Context, dlIdx uint64, partitions []*miner.Partition) error {
ctx, span := trace.StartSpan(ctx, "storage.checkNextFaults")
defer span.End()
2020-06-01 12:49:48 +00:00
2020-07-14 17:10:31 +00:00
params := &miner.DeclareFaultsParams{
Faults: []miner.FaultDeclaration{},
2020-05-29 19:36:04 +00:00
}
2020-07-14 17:10:31 +00:00
bad := uint64(0)
2020-05-29 19:36:04 +00:00
2020-07-14 17:10:31 +00:00
for partIdx, partition := range partitions {
toCheck, err := partition.ActiveSectors()
if err != nil {
return xerrors.Errorf("getting active sectors: %w", err)
}
2019-12-17 22:23:43 +00:00
2020-07-14 17:10:31 +00:00
good, err := s.checkSectors(ctx, toCheck)
if err != nil {
return xerrors.Errorf("checking sectors: %w", err)
}
faulty, err := bitfield.SubtractBitField(toCheck, good)
if err != nil {
return xerrors.Errorf("calculating faulty sector set: %w", err)
}
c, err := faulty.Count()
if err != nil {
return xerrors.Errorf("counting faulty sectors: %w", err)
}
if c == 0 {
continue
}
bad += c
params.Faults = append(params.Faults, miner.FaultDeclaration{
Deadline: dlIdx,
Partition: uint64(partIdx),
Sectors: faulty,
})
2020-05-29 19:36:04 +00:00
}
2019-12-17 22:23:43 +00:00
2020-07-14 17:10:31 +00:00
if len(params.Faults) == 0 {
2020-05-29 19:36:04 +00:00
return nil
}
2019-12-17 22:23:43 +00:00
2020-07-14 17:10:31 +00:00
log.Errorw("DETECTED FAULTY SECTORS, declaring faults", "count", bad)
2020-05-29 19:36:04 +00:00
enc, aerr := actors.SerializeParams(params)
if aerr != nil {
return xerrors.Errorf("could not serialize declare faults parameters: %w", aerr)
}
msg := &types.Message{
To: s.actor,
From: s.worker,
Method: builtin.MethodsMiner.DeclareFaults,
Params: enc,
Value: types.NewInt(0), // TODO: Is there a fee?
2020-05-29 19:36:04 +00:00
}
sm, err := s.api.MpoolPushMessage(ctx, msg)
if err != nil {
return xerrors.Errorf("pushing message to mpool: %w", err)
}
log.Warnw("declare faults Message CID", "cid", sm.Cid())
2020-06-03 21:42:06 +00:00
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence)
2020-05-29 19:36:04 +00:00
if err != nil {
return xerrors.Errorf("declare faults wait error: %w", err)
}
if rec.Receipt.ExitCode != 0 {
return xerrors.Errorf("declare faults wait non-0 exit code: %d", rec.Receipt.ExitCode)
}
2019-12-17 22:23:43 +00:00
2020-05-29 19:36:04 +00:00
return nil
2019-12-17 22:23:43 +00:00
}
2020-04-20 17:34:08 +00:00
func (s *WindowPoStScheduler) runPost(ctx context.Context, di miner.DeadlineInfo, ts *types.TipSet) (*miner.SubmitWindowedPoStParams, error) {
ctx, span := trace.StartSpan(ctx, "storage.runPost")
defer span.End()
2020-07-14 17:10:37 +00:00
go func() {
2020-08-03 16:56:59 +00:00
// TODO: extract from runPost, run on fault cutoff boundaries
2020-05-29 19:36:04 +00:00
// check faults / recoveries for the *next* deadline. It's already too
// late to declare them for this deadline
2020-08-03 16:56:59 +00:00
declDeadline := (di.Index + 2) % miner.WPoStPeriodDeadlines
2020-05-29 19:36:04 +00:00
partitions, err := s.api.StateMinerPartitions(context.TODO(), s.actor, declDeadline, ts.Key())
2020-07-14 17:10:31 +00:00
if err != nil {
log.Errorf("getting partitions: %v", err)
return
}
if err := s.checkNextRecoveries(context.TODO(), declDeadline, partitions); err != nil {
2020-05-29 19:36:04 +00:00
// TODO: This is potentially quite bad, but not even trying to post when this fails is objectively worse
log.Errorf("checking sector recoveries: %v", err)
}
if err := s.checkNextFaults(context.TODO(), declDeadline, partitions); err != nil {
2020-05-29 19:36:04 +00:00
// TODO: This is also potentially really bad, but we try to post anyways
log.Errorf("checking sector faults: %v", err)
}
2020-07-14 17:10:31 +00:00
}()
2020-05-16 21:50:50 +00:00
buf := new(bytes.Buffer)
if err := s.actor.MarshalCBOR(buf); err != nil {
return nil, xerrors.Errorf("failed to marshal address to cbor: %w", err)
}
2020-08-11 23:58:35 +00:00
rand, err := s.api.ChainGetRandomnessFromBeacon(ctx, ts.Key(), crypto.DomainSeparationTag_WindowedPoStChallengeSeed, di.Challenge, buf.Bytes())
if err != nil {
return nil, xerrors.Errorf("failed to get chain randomness for windowPost (ts=%d; deadline=%d): %w", ts.Height(), di, err)
}
commEpoch := di.Open
2020-08-11 23:58:35 +00:00
commRand, err := s.api.ChainGetRandomnessFromTickets(ctx, ts.Key(), crypto.DomainSeparationTag_PoStChainCommit, commEpoch, nil)
if err != nil {
return nil, xerrors.Errorf("failed to get chain randomness for windowPost (ts=%d; deadline=%d): %w", ts.Height(), di, err)
}
2020-07-14 17:10:31 +00:00
partitions, err := s.api.StateMinerPartitions(ctx, s.actor, di.Index, ts.Key())
if err != nil {
2020-07-14 17:10:31 +00:00
return nil, xerrors.Errorf("getting partitions: %w", err)
}
2020-07-14 17:10:31 +00:00
params := &miner.SubmitWindowedPoStParams{
2020-08-11 23:58:35 +00:00
Deadline: di.Index,
Partitions: make([]miner.PoStPartition, 0, len(partitions)),
Proofs: nil,
ChainCommitEpoch: commEpoch,
ChainCommitRand: commRand,
}
2020-07-14 17:10:31 +00:00
var sinfos []abi.SectorInfo
sidToPart := map[abi.SectorNumber]uint64{}
skipCount := uint64(0)
2020-04-21 17:22:53 +00:00
2020-07-14 17:10:31 +00:00
for partIdx, partition := range partitions {
// TODO: Can do this in parallel
toProve, err := partition.ActiveSectors()
if err != nil {
return nil, xerrors.Errorf("getting active sectors: %w", err)
}
2020-04-21 17:22:53 +00:00
2020-07-14 17:10:31 +00:00
toProve, err = bitfield.MergeBitFields(toProve, partition.Recoveries)
if err != nil {
return nil, xerrors.Errorf("adding recoveries to set of sectors to prove: %w", err)
}
2020-07-14 17:10:31 +00:00
good, err := s.checkSectors(ctx, toProve)
if err != nil {
return nil, xerrors.Errorf("checking sectors to skip: %w", err)
}
2020-07-14 17:10:31 +00:00
skipped, err := bitfield.SubtractBitField(toProve, good)
if err != nil {
return nil, xerrors.Errorf("toProve - good: %w", err)
}
2020-07-14 17:10:31 +00:00
sc, err := skipped.Count()
if err != nil {
return nil, xerrors.Errorf("getting skipped sector count: %w", err)
}
2020-05-29 19:36:04 +00:00
2020-07-14 17:10:31 +00:00
skipCount += sc
2020-05-29 19:36:04 +00:00
2020-07-14 17:10:31 +00:00
ssi, err := s.sectorInfo(ctx, good, ts)
if err != nil {
return nil, xerrors.Errorf("getting sorted sector info: %w", err)
}
2020-05-29 19:36:04 +00:00
2020-07-22 09:02:46 +00:00
if len(ssi) == 0 {
continue
}
2020-07-14 17:10:31 +00:00
sinfos = append(sinfos, ssi...)
for _, si := range ssi {
sidToPart[si.SectorNumber] = uint64(partIdx)
}
params.Partitions = append(params.Partitions, miner.PoStPartition{
2020-07-14 17:10:37 +00:00
Index: uint64(partIdx),
2020-07-14 17:10:31 +00:00
Skipped: skipped,
})
2020-03-07 05:26:00 +00:00
}
2020-07-17 14:47:16 +00:00
if len(sinfos) == 0 {
// nothing to prove..
return nil, errNoPartitions
}
2020-04-07 17:41:41 +00:00
log.Infow("running windowPost",
2020-01-21 15:01:42 +00:00
"chain-random", rand,
"deadline", di,
2020-05-29 19:36:04 +00:00
"height", ts.Height(),
"skipped", skipCount)
2020-07-10 14:43:14 +00:00
tsStart := build.Clock.Now()
2020-07-14 17:10:31 +00:00
log.Infow("generating windowPost", "sectors", len(sinfos))
2020-01-21 15:01:42 +00:00
2020-03-17 20:19:52 +00:00
mid, err := address.IDFromAddress(s.actor)
if err != nil {
return nil, err
}
2020-07-14 17:10:31 +00:00
postOut, postSkipped, err := s.prover.GenerateWindowPoSt(ctx, abi.ActorID(mid), sinfos, abi.PoStRandomness(rand))
if err != nil {
return nil, xerrors.Errorf("running post failed: %w", err)
}
2020-04-10 21:07:18 +00:00
if len(postOut) == 0 {
return nil, xerrors.Errorf("received proofs back from generate window post")
}
2020-07-20 17:21:10 +00:00
params.Proofs = postOut
for _, sector := range postSkipped {
2020-07-14 17:10:31 +00:00
params.Partitions[sidToPart[sector.Number]].Skipped.Set(uint64(sector.Number))
}
elapsed := time.Since(tsStart)
2020-04-21 17:22:53 +00:00
log.Infow("submitting window PoSt", "elapsed", elapsed)
2020-07-14 17:10:31 +00:00
return params, nil
}
func (s *WindowPoStScheduler) sectorInfo(ctx context.Context, deadlineSectors abi.BitField, ts *types.TipSet) ([]abi.SectorInfo, error) {
sset, err := s.api.StateMinerSectors(ctx, s.actor, &deadlineSectors, false, ts.Key())
if err != nil {
2020-04-07 19:55:34 +00:00
return nil, err
}
2020-02-27 21:45:31 +00:00
sbsi := make([]abi.SectorInfo, len(sset))
for k, sector := range sset {
2020-02-27 21:45:31 +00:00
sbsi[k] = abi.SectorInfo{
2020-06-15 16:30:49 +00:00
SectorNumber: sector.ID,
2020-06-26 13:13:18 +00:00
SealedCID: sector.Info.SealedCID,
SealProof: sector.Info.SealProof,
}
}
2020-02-27 21:45:31 +00:00
return sbsi, nil
}
func (s *WindowPoStScheduler) submitPost(ctx context.Context, proof *miner.SubmitWindowedPoStParams) error {
ctx, span := trace.StartSpan(ctx, "storage.commitPost")
defer span.End()
enc, aerr := actors.SerializeParams(proof)
if aerr != nil {
return xerrors.Errorf("could not serialize submit post parameters: %w", aerr)
}
msg := &types.Message{
To: s.actor,
From: s.worker,
Method: builtin.MethodsMiner.SubmitWindowedPoSt,
Params: enc,
Value: types.NewInt(1000), // currently hard-coded late fee in actor, returned if not late
}
// TODO: consider maybe caring about the output
sm, err := s.api.MpoolPushMessage(ctx, msg)
if err != nil {
return xerrors.Errorf("pushing message to mpool: %w", err)
}
2020-04-21 17:22:53 +00:00
log.Infof("Submitted window post: %s", sm.Cid())
2020-01-29 22:47:28 +00:00
go func() {
2020-06-03 21:42:06 +00:00
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence)
2020-01-29 22:47:28 +00:00
if err != nil {
log.Error(err)
return
}
if rec.Receipt.ExitCode == 0 {
return
}
2020-04-21 17:22:53 +00:00
log.Errorf("Submitting window post %s failed: exit %d", sm.Cid(), rec.Receipt.ExitCode)
2020-01-29 22:47:28 +00:00
}()
return nil
}