shuffle code to journal wdpost events.
This commit is contained in:
parent
0b6a182a44
commit
ac152abc75
@ -151,41 +151,24 @@ func (s *WindowPoStScheduler) checkSectors(ctx context.Context, check abi.BitFie
|
|||||||
return sbf, nil
|
return sbf, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *WindowPoStScheduler) checkNextRecoveries(ctx context.Context, dlIdx uint64, partitions []*miner.Partition) error {
|
func (s *WindowPoStScheduler) checkNextRecoveries(ctx context.Context, dlIdx uint64, partitions []*miner.Partition) ([]miner.RecoveryDeclaration, *types.SignedMessage, error) {
|
||||||
ctx, span := trace.StartSpan(ctx, "storage.checkNextRecoveries")
|
ctx, span := trace.StartSpan(ctx, "storage.checkNextRecoveries")
|
||||||
defer span.End()
|
defer span.End()
|
||||||
|
|
||||||
var sm *types.SignedMessage
|
|
||||||
faulty := uint64(0)
|
faulty := uint64(0)
|
||||||
params := &miner.DeclareFaultsRecoveredParams{
|
params := &miner.DeclareFaultsRecoveredParams{
|
||||||
Recoveries: []miner.RecoveryDeclaration{},
|
Recoveries: []miner.RecoveryDeclaration{},
|
||||||
}
|
}
|
||||||
|
|
||||||
defer journal.J.RecordEvent(s.wdPoStEvtType, func() interface{} {
|
|
||||||
var mcid cid.Cid
|
|
||||||
if sm != nil {
|
|
||||||
mcid = sm.Cid()
|
|
||||||
}
|
|
||||||
|
|
||||||
return s.enrichWithTipset(WindowPoStEvt{
|
|
||||||
State: "recoveries_processed",
|
|
||||||
Deadline: s.activeDeadline,
|
|
||||||
Recoveries: &WindowPoStEvt_Recoveries{
|
|
||||||
Declarations: params.Recoveries,
|
|
||||||
MessageCID: mcid,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
for partIdx, partition := range partitions {
|
for partIdx, partition := range partitions {
|
||||||
unrecovered, err := bitfield.SubtractBitField(partition.Faults, partition.Recoveries)
|
unrecovered, err := bitfield.SubtractBitField(partition.Faults, partition.Recoveries)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("subtracting recovered set from fault set: %w", err)
|
return nil, nil, xerrors.Errorf("subtracting recovered set from fault set: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
uc, err := unrecovered.Count()
|
uc, err := unrecovered.Count()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("counting unrecovered sectors: %w", err)
|
return nil, nil, xerrors.Errorf("counting unrecovered sectors: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if uc == 0 {
|
if uc == 0 {
|
||||||
@ -196,13 +179,13 @@ func (s *WindowPoStScheduler) checkNextRecoveries(ctx context.Context, dlIdx uin
|
|||||||
|
|
||||||
recovered, err := s.checkSectors(ctx, unrecovered)
|
recovered, err := s.checkSectors(ctx, unrecovered)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("checking unrecovered sectors: %w", err)
|
return nil, nil, xerrors.Errorf("checking unrecovered sectors: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// if all sectors failed to recover, don't declare recoveries
|
// if all sectors failed to recover, don't declare recoveries
|
||||||
recoveredCount, err := recovered.Count()
|
recoveredCount, err := recovered.Count()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("counting recovered sectors: %w", err)
|
return nil, nil, xerrors.Errorf("counting recovered sectors: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if recoveredCount == 0 {
|
if recoveredCount == 0 {
|
||||||
@ -216,17 +199,18 @@ func (s *WindowPoStScheduler) checkNextRecoveries(ctx context.Context, dlIdx uin
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(params.Recoveries) == 0 {
|
recoveries := params.Recoveries
|
||||||
|
if len(recoveries) == 0 {
|
||||||
if faulty != 0 {
|
if faulty != 0 {
|
||||||
log.Warnw("No recoveries to declare", "deadline", dlIdx, "faulty", faulty)
|
log.Warnw("No recoveries to declare", "deadline", dlIdx, "faulty", faulty)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return recoveries, nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
enc, aerr := actors.SerializeParams(params)
|
enc, aerr := actors.SerializeParams(params)
|
||||||
if aerr != nil {
|
if aerr != nil {
|
||||||
return xerrors.Errorf("could not serialize declare recoveries parameters: %w", aerr)
|
return recoveries, nil, xerrors.Errorf("could not serialize declare recoveries parameters: %w", aerr)
|
||||||
}
|
}
|
||||||
|
|
||||||
msg := &types.Message{
|
msg := &types.Message{
|
||||||
@ -241,67 +225,51 @@ func (s *WindowPoStScheduler) checkNextRecoveries(ctx context.Context, dlIdx uin
|
|||||||
|
|
||||||
sm, err := s.api.MpoolPushMessage(ctx, msg, &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)})
|
sm, err := s.api.MpoolPushMessage(ctx, msg, &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("pushing message to mpool: %w", err)
|
return recoveries, sm, xerrors.Errorf("pushing message to mpool: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Warnw("declare faults recovered Message CID", "cid", sm.Cid())
|
log.Warnw("declare faults recovered Message CID", "cid", sm.Cid())
|
||||||
|
|
||||||
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence)
|
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("declare faults recovered wait error: %w", err)
|
return recoveries, sm, xerrors.Errorf("declare faults recovered wait error: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if rec.Receipt.ExitCode != 0 {
|
if rec.Receipt.ExitCode != 0 {
|
||||||
return xerrors.Errorf("declare faults recovered wait non-0 exit code: %d", rec.Receipt.ExitCode)
|
return recoveries, sm, xerrors.Errorf("declare faults recovered wait non-0 exit code: %d", rec.Receipt.ExitCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return recoveries, sm, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *WindowPoStScheduler) checkNextFaults(ctx context.Context, dlIdx uint64, partitions []*miner.Partition) error {
|
func (s *WindowPoStScheduler) checkNextFaults(ctx context.Context, dlIdx uint64, partitions []*miner.Partition) ([]miner.FaultDeclaration, *types.SignedMessage, error) {
|
||||||
ctx, span := trace.StartSpan(ctx, "storage.checkNextFaults")
|
ctx, span := trace.StartSpan(ctx, "storage.checkNextFaults")
|
||||||
defer span.End()
|
defer span.End()
|
||||||
|
|
||||||
var sm *types.SignedMessage
|
|
||||||
bad := uint64(0)
|
bad := uint64(0)
|
||||||
params := &miner.DeclareFaultsParams{
|
params := &miner.DeclareFaultsParams{
|
||||||
Faults: []miner.FaultDeclaration{},
|
Faults: []miner.FaultDeclaration{},
|
||||||
}
|
}
|
||||||
|
|
||||||
defer journal.J.RecordEvent(s.wdPoStEvtType, func() interface{} {
|
|
||||||
var mcid cid.Cid
|
|
||||||
if sm != nil {
|
|
||||||
mcid = sm.Cid()
|
|
||||||
}
|
|
||||||
return s.enrichWithTipset(WindowPoStEvt{
|
|
||||||
State: "faults_processed",
|
|
||||||
Deadline: s.activeDeadline,
|
|
||||||
Faults: &WindowPoStEvt_Faults{
|
|
||||||
Declarations: params.Faults,
|
|
||||||
MessageCID: mcid,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
for partIdx, partition := range partitions {
|
for partIdx, partition := range partitions {
|
||||||
toCheck, err := partition.ActiveSectors()
|
toCheck, err := partition.ActiveSectors()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("getting active sectors: %w", err)
|
return nil, nil, xerrors.Errorf("getting active sectors: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
good, err := s.checkSectors(ctx, toCheck)
|
good, err := s.checkSectors(ctx, toCheck)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("checking sectors: %w", err)
|
return nil, nil, xerrors.Errorf("checking sectors: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
faulty, err := bitfield.SubtractBitField(toCheck, good)
|
faulty, err := bitfield.SubtractBitField(toCheck, good)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("calculating faulty sector set: %w", err)
|
return nil, nil, xerrors.Errorf("calculating faulty sector set: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
c, err := faulty.Count()
|
c, err := faulty.Count()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("counting faulty sectors: %w", err)
|
return nil, nil, xerrors.Errorf("counting faulty sectors: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if c == 0 {
|
if c == 0 {
|
||||||
@ -317,15 +285,16 @@ func (s *WindowPoStScheduler) checkNextFaults(ctx context.Context, dlIdx uint64,
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(params.Faults) == 0 {
|
faults := params.Faults
|
||||||
return nil
|
if len(faults) == 0 {
|
||||||
|
return faults, nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Errorw("DETECTED FAULTY SECTORS, declaring faults", "count", bad)
|
log.Errorw("DETECTED FAULTY SECTORS, declaring faults", "count", bad)
|
||||||
|
|
||||||
enc, aerr := actors.SerializeParams(params)
|
enc, aerr := actors.SerializeParams(params)
|
||||||
if aerr != nil {
|
if aerr != nil {
|
||||||
return xerrors.Errorf("could not serialize declare faults parameters: %w", aerr)
|
return faults, nil, xerrors.Errorf("could not serialize declare faults parameters: %w", aerr)
|
||||||
}
|
}
|
||||||
|
|
||||||
msg := &types.Message{
|
msg := &types.Message{
|
||||||
@ -340,21 +309,21 @@ func (s *WindowPoStScheduler) checkNextFaults(ctx context.Context, dlIdx uint64,
|
|||||||
|
|
||||||
sm, err := s.api.MpoolPushMessage(ctx, msg, spec)
|
sm, err := s.api.MpoolPushMessage(ctx, msg, spec)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("pushing message to mpool: %w", err)
|
return faults, sm, xerrors.Errorf("pushing message to mpool: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Warnw("declare faults Message CID", "cid", sm.Cid())
|
log.Warnw("declare faults Message CID", "cid", sm.Cid())
|
||||||
|
|
||||||
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence)
|
rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("declare faults wait error: %w", err)
|
return faults, sm, xerrors.Errorf("declare faults wait error: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if rec.Receipt.ExitCode != 0 {
|
if rec.Receipt.ExitCode != 0 {
|
||||||
return xerrors.Errorf("declare faults wait non-0 exit code: %d", rec.Receipt.ExitCode)
|
return faults, sm, xerrors.Errorf("declare faults wait non-0 exit code: %d", rec.Receipt.ExitCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return faults, sm, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *WindowPoStScheduler) runPost(ctx context.Context, di miner.DeadlineInfo, ts *types.TipSet) (*miner.SubmitWindowedPoStParams, error) {
|
func (s *WindowPoStScheduler) runPost(ctx context.Context, di miner.DeadlineInfo, ts *types.TipSet) (*miner.SubmitWindowedPoStParams, error) {
|
||||||
@ -374,15 +343,53 @@ func (s *WindowPoStScheduler) runPost(ctx context.Context, di miner.DeadlineInfo
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := s.checkNextRecoveries(context.TODO(), declDeadline, partitions); err != nil {
|
var (
|
||||||
|
sigmsg *types.SignedMessage
|
||||||
|
recoveries []miner.RecoveryDeclaration
|
||||||
|
faults []miner.FaultDeclaration
|
||||||
|
|
||||||
|
// optionalCid returns the CID of the message, or cid.Undef is the
|
||||||
|
// message is nil. We don't need the argument (could capture the
|
||||||
|
// pointer), but it's clearer and purer like that.
|
||||||
|
optionalCid = func(sigmsg *types.SignedMessage) cid.Cid {
|
||||||
|
if sigmsg == nil {
|
||||||
|
return cid.Undef
|
||||||
|
}
|
||||||
|
return sigmsg.Cid()
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if recoveries, sigmsg, err = s.checkNextRecoveries(context.TODO(), declDeadline, partitions); err != nil {
|
||||||
// TODO: This is potentially quite bad, but not even trying to post when this fails is objectively worse
|
// TODO: This is potentially quite bad, but not even trying to post when this fails is objectively worse
|
||||||
log.Errorf("checking sector recoveries: %v", err)
|
log.Errorf("checking sector recoveries: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := s.checkNextFaults(context.TODO(), declDeadline, partitions); err != nil {
|
journal.J.RecordEvent(s.wdPoStEvtType, func() interface{} {
|
||||||
|
return s.enrichWithTipset(WindowPoStEvt{
|
||||||
|
State: "recoveries_processed",
|
||||||
|
Deadline: s.activeDeadline,
|
||||||
|
Recoveries: &WindowPoStEvt_Recoveries{
|
||||||
|
Declarations: recoveries,
|
||||||
|
MessageCID: optionalCid(sigmsg),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
if faults, sigmsg, err = s.checkNextFaults(context.TODO(), declDeadline, partitions); err != nil {
|
||||||
// TODO: This is also potentially really bad, but we try to post anyways
|
// TODO: This is also potentially really bad, but we try to post anyways
|
||||||
log.Errorf("checking sector faults: %v", err)
|
log.Errorf("checking sector faults: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
journal.J.RecordEvent(s.wdPoStEvtType, func() interface{} {
|
||||||
|
return s.enrichWithTipset(WindowPoStEvt{
|
||||||
|
State: "faults_processed",
|
||||||
|
Deadline: s.activeDeadline,
|
||||||
|
Faults: &WindowPoStEvt_Faults{
|
||||||
|
Declarations: faults,
|
||||||
|
MessageCID: optionalCid(sigmsg),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
}()
|
}()
|
||||||
|
|
||||||
buf := new(bytes.Buffer)
|
buf := new(bytes.Buffer)
|
||||||
|
Loading…
Reference in New Issue
Block a user