lotus/storage/wdpost_sched.go

260 lines
6.2 KiB
Go
Raw Normal View History

package storage
import (
"context"
"time"
2020-09-10 06:30:47 +00:00
"github.com/filecoin-project/go-state-types/dline"
"golang.org/x/xerrors"
"github.com/filecoin-project/go-address"
2020-09-07 03:49:10 +00:00
"github.com/filecoin-project/go-state-types/abi"
"github.com/filecoin-project/go-state-types/dline"
2020-03-06 05:30:47 +00:00
"github.com/filecoin-project/specs-storage/storage"
2020-01-13 20:47:27 +00:00
"github.com/filecoin-project/lotus/api"
2020-07-10 14:43:14 +00:00
"github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/lotus/chain/store"
"github.com/filecoin-project/lotus/chain/types"
2020-08-17 13:39:33 +00:00
sectorstorage "github.com/filecoin-project/lotus/extern/sector-storage"
2020-07-20 13:45:17 +00:00
"github.com/filecoin-project/lotus/journal"
"github.com/filecoin-project/lotus/node/config"
"go.opencensus.io/trace"
)
const StartConfidence = 4 // TODO: config
2020-04-07 17:41:41 +00:00
type WindowPoStScheduler struct {
2020-04-29 18:06:05 +00:00
api storageMinerApi
2020-08-12 17:47:00 +00:00
feeCfg config.MinerFeeConfig
2020-04-29 18:06:05 +00:00
prover storage.Prover
faultTracker sectorstorage.FaultTracker
2020-06-15 16:30:49 +00:00
proofType abi.RegisteredPoStProof
2020-04-29 18:06:05 +00:00
partitionSectors uint64
actor address.Address
worker address.Address
cur *types.TipSet
// if a post is in progress, this indicates for which ElectionPeriodStart
activeDeadline *dline.Info
2020-04-10 21:07:18 +00:00
abort context.CancelFunc
2019-12-08 19:48:17 +00:00
evtTypes [4]journal.EventType
2019-12-08 19:48:17 +00:00
2020-07-20 13:45:17 +00:00
// failed abi.ChainEpoch // eps
// failLk sync.Mutex
}
func NewWindowedPoStScheduler(api storageMinerApi, fc config.MinerFeeConfig, sb storage.Prover, ft sectorstorage.FaultTracker, actor address.Address, worker address.Address) (*WindowPoStScheduler, error) {
mi, err := api.StateMinerInfo(context.TODO(), actor, types.EmptyTSK)
if err != nil {
return nil, xerrors.Errorf("getting sector size: %w", err)
}
2020-04-29 18:06:05 +00:00
rt, err := mi.SealProofType.RegisteredWindowPoStProof()
if err != nil {
return nil, err
}
2020-04-29 18:06:05 +00:00
return &WindowPoStScheduler{
api: api,
2020-08-12 17:47:00 +00:00
feeCfg: fc,
2020-04-29 18:06:05 +00:00
prover: sb,
2020-05-16 21:50:50 +00:00
faultTracker: ft,
2020-04-29 18:06:05 +00:00
proofType: rt,
partitionSectors: mi.WindowPoStPartitionSectors,
2020-04-29 18:06:05 +00:00
actor: actor,
worker: worker,
evtTypes: [...]journal.EventType{
evtTypeWdPoStScheduler: journal.J.RegisterEventType("wdpost", "scheduler"),
evtTypeWdPoStProofs: journal.J.RegisterEventType("wdpost", "proofs_processed"),
evtTypeWdPoStRecoveries: journal.J.RegisterEventType("wdpost", "recoveries_processed"),
evtTypeWdPoStFaults: journal.J.RegisterEventType("wdpost", "faults_processed"),
},
2020-04-29 18:06:05 +00:00
}, nil
2020-04-07 19:55:34 +00:00
}
func deadlineEquals(a, b *dline.Info) bool {
2020-04-20 17:34:08 +00:00
if a == nil || b == nil {
return b == a
}
2020-04-07 19:55:34 +00:00
2020-04-20 17:34:08 +00:00
return a.PeriodStart == b.PeriodStart && a.Index == b.Index && a.Challenge == b.Challenge
}
2020-04-07 17:41:41 +00:00
func (s *WindowPoStScheduler) Run(ctx context.Context) {
defer s.abortActivePoSt()
var notifs <-chan []*api.HeadChange
var err error
var gotCur bool
// not fine to panic after this point
for {
if notifs == nil {
notifs, err = s.api.ChainNotify(ctx)
if err != nil {
2020-08-18 19:08:20 +00:00
log.Errorf("ChainNotify error: %+v", err)
2020-07-10 14:43:14 +00:00
build.Clock.Sleep(10 * time.Second)
continue
}
gotCur = false
}
select {
case changes, ok := <-notifs:
if !ok {
2020-04-07 17:41:41 +00:00
log.Warn("WindowPoStScheduler notifs channel closed")
notifs = nil
continue
}
if !gotCur {
if len(changes) != 1 {
log.Errorf("expected first notif to have len = 1")
continue
}
chg := changes[0]
if chg.Type != store.HCCurrent {
log.Errorf("expected first notif to tell current ts")
continue
}
if err := s.update(ctx, chg.Val); err != nil {
log.Errorf("%+v", err)
}
gotCur = true
continue
}
2020-04-07 17:41:41 +00:00
ctx, span := trace.StartSpan(ctx, "WindowPoStScheduler.headChange")
var lowest, highest *types.TipSet = s.cur, nil
for _, change := range changes {
2020-01-18 15:00:22 +00:00
if change.Val == nil {
log.Errorf("change.Val was nil")
}
switch change.Type {
case store.HCRevert:
lowest = change.Val
case store.HCApply:
highest = change.Val
}
}
if err := s.revert(ctx, lowest); err != nil {
2020-04-28 17:39:35 +00:00
log.Error("handling head reverts in windowPost sched: %+v", err)
}
if err := s.update(ctx, highest); err != nil {
2020-04-28 17:39:35 +00:00
log.Error("handling head updates in windowPost sched: %+v", err)
}
span.End()
case <-ctx.Done():
return
}
}
}
2020-04-07 17:41:41 +00:00
func (s *WindowPoStScheduler) revert(ctx context.Context, newLowest *types.TipSet) error {
if s.cur == newLowest {
return nil
}
s.cur = newLowest
newDeadline, err := s.api.StateMinerProvingDeadline(ctx, s.actor, newLowest.Key())
if err != nil {
return err
}
2020-04-20 17:34:08 +00:00
if !deadlineEquals(s.activeDeadline, newDeadline) {
s.abortActivePoSt()
}
return nil
}
2020-04-07 17:41:41 +00:00
func (s *WindowPoStScheduler) update(ctx context.Context, new *types.TipSet) error {
2020-01-18 15:00:22 +00:00
if new == nil {
2020-04-07 17:41:41 +00:00
return xerrors.Errorf("no new tipset in WindowPoStScheduler.update")
2020-01-18 15:00:22 +00:00
}
di, err := s.api.StateMinerProvingDeadline(ctx, s.actor, new.Key())
if err != nil {
return err
}
2020-04-20 17:34:08 +00:00
if deadlineEquals(s.activeDeadline, di) {
return nil // already working on this deadline
}
2020-04-28 17:13:46 +00:00
if !di.PeriodStarted() {
return nil // not proving anything yet
}
s.abortActivePoSt()
// TODO: wait for di.Challenge here, will give us ~10min more to compute windowpost
// (Need to get correct deadline above, which is tricky)
if di.Open+StartConfidence >= new.Height() {
log.Info("not starting windowPost yet, waiting for startconfidence", di.Open, di.Open+StartConfidence, new.Height())
2020-04-07 19:55:34 +00:00
return nil
}
2020-04-07 19:55:34 +00:00
/*s.failLk.Lock()
2019-12-08 19:48:17 +00:00
if s.failed > 0 {
s.failed = 0
s.activeEPS = 0
}
2020-04-07 19:55:34 +00:00
s.failLk.Unlock()*/
2020-04-21 17:22:53 +00:00
log.Infof("at %d, doPost for P %d, dd %d", new.Height(), di.PeriodStart, di.Index)
2020-04-07 19:55:34 +00:00
s.doPost(ctx, di, new)
return nil
}
2020-04-07 17:41:41 +00:00
func (s *WindowPoStScheduler) abortActivePoSt() {
2020-04-07 19:55:34 +00:00
if s.activeDeadline == nil {
return // noop
}
if s.abort != nil {
s.abort()
journal.J.RecordEvent(s.evtTypes[evtTypeWdPoStScheduler], func() interface{} {
return WdPoStSchedulerEvt{
evtCommon: s.getEvtCommon(nil),
State: SchedulerStateAborted,
}
2020-07-21 12:02:51 +00:00
})
2020-07-20 13:45:17 +00:00
2020-07-21 12:02:51 +00:00
log.Warnf("Aborting Window PoSt (Deadline: %+v)", s.activeDeadline)
}
2020-04-07 19:55:34 +00:00
s.activeDeadline = nil
s.abort = nil
}
// getEvtCommon populates and returns common attributes from state, for a
// WdPoSt journal event.
func (s *WindowPoStScheduler) getEvtCommon(err error) evtCommon {
c := evtCommon{Error: err}
if s.cur != nil {
c.Deadline = s.activeDeadline
c.Height = s.cur.Height()
c.TipSet = s.cur.Cids()
}
return c
}